awk模式匹配学习笔记(180813)

学习参考资料:https://coolshell.cn/articles/9070.html
1. 首先通过netstat命令获取测试文件netstat.txt(netstat - Print network connections, routing tables, interface statistics, masquerade connections, and multicast memberships)
2. #格式化输出
$ awk {print “%-8s %-8s %-8s %-8s %-22s %-15s\n”, $1,$2,$3,$4,$5,$6 } netstat.txt
3. #因为我的列表中LISTEN并不存在,所以我的和作者不一样
awk $3 == 0 $6 == “;LISTEN”netstat.txt
4. #条件过滤
awk $3>0 {print $0} netstat.txt
5. #需要表头,引入内建变量,如果是excel表格的话,就是第一行的内容,每列名称
awk $3 == 0 && $6 == “LISTEN” || NR == 1 netstat.txt

去掉NR == 1的话就没有了

awk $3 == 0 && $6 == “LISTEN” netstat
6. #再复杂点,格式化输出
awk $3 == 0 && $6 == “ESTABLISHED” || NR == 1 {print “%-20s %-20s $s\n”, $4, $5, $6}netstat.txt

  1. #输出行号
    awk $3 == 0 && $6 == “ESTABLISHED” || NR == 1 {print “%-20s %-20s %-20s %-20s %s\n”, NR, FNR, $4, $5, $6} netstat.txt
  2. #指定分隔符
    awk BEGIN{FS = “:”} {print $1, $3, $6}/etc/passwd

等同于-F

awk -F: {print $1, $3, $6}/etc/passwd

可以指定多个分隔符

awk -F [; : ,]

  1. #\t作为分隔符输出
    awk -F: {print $1, $3, $6} OFS == “\t” /etc/passwd
  2. #字符串匹配,正则表达式匹配,~模式开始,//中是模式
    awk $6 ~ /FIN/ || NR == 1 {print NR,$4,$5,$6} OFS=”\t” netstat.txt

再看一例

awk $6 ~ /WAIT/ || NR == 1 {print NR,$4,$5,$6} OFS = ”\t” netstat.txt

再看一例

awk /tcp/ netstat.txt
11. #用’/FIN | TIME/’ 来匹配FIN 或TIME
awk $6 ~ /FIN | TIME/ || NR == 1 {print NR,$4,$5,$6}OFS = “\t” netstat.txt
12. #模式取反
awk $6 !~ /WAIT/ || NR == 1 {print NR,$4,$5,$6} OFS = “\t” netstat.txt

再举一例

awk $1 !~/tcp/ || NR == 1 {print NR,$4,$5,$6} OFS = “\t” netstat.txt

也可以这样

awk ! /WAIT/ netstat.txt
13. #拆分文件
awk NR!=1{print > $6} netstat.txt
14. #指定的列输出到文件
awk NR !=1 {print $4,$5,$6} netstat.txt
15. #复杂一点,awk结合if else语句
awk NR!=1{if($6 ~ /TIME | ESTABLISHED/)print > “1.txt”; else if ($6 ~ /CONNECTED/)print “2.txt”; else print > “3.txt”} netstat.txt
16. #计算所有某种类型的文件的大小总和
$ ls
G_24hCFvsG_24hCM.DE_down.xls
G_24hCFvsG_24hCM.DElist_down.txt
G_24hCFvsG_24hCM.DElist.txt
G_24hCFvsG_24hCM.DElist_up.txt
G_24hCFvsG_24hCM.DE_up.xls
G_24hCFvsG_24hCM.DE.xls
G_24hCFvsG_24hCM.Differential_analysis_results.xls
ls -l *xls *txt | awk {sum += $5} END {print sum}
21361
17. # awk结合for循环语句
$ awk NR!=1{a[$6]++;} END {for (I in a ) print i “,” a[i]; } netstat.txt
18. #统计每个用户的进程占了多少内存
$ ps aux | awk NR!=1{a[$1]+=$6;} END {for (i in a) print i“,” a[i]”KB”;}
19. #BEGIN{这里面是执行前的语句}、END{这里面是处理完所有的行后要执行的语句}、{这里是处理每一行时要执行的语句},下面是一个awk命令文件,运行该脚本的方式有$awk -f cal.awk students_score.txt,还有./cal.awk students_score.txt等等
#!/bin/awk -f
# before run
BEGIN{
math = 0
english = 0
computer = 0

`print "NAME NO. MATH ENGLISH COMPUTER TOTAL\n"`
`print "------------------------------------\n"`

}
-#running
{

math+=$3
english+=$4
computer+=$5
print "%-6s %-6s %4d %8d %8d\n", $1, $2, $3, $4, $5, $3+$4+$5
}

# runninng later
END {
print "-------------------------------------------------\n"
print " TOTAL: %10d %8d %8d \n",math, english,computer
print "AVERAGE: %10.2f %8.2f %8.2f\n", math/NR, english/NR,computer/NR
}

20. #使用-v参数和ENVIRON,使用ENVIRON的环境变量需要export
$ x=5
$ y=10
$ export y
$ echo $x $y
5 10

$ awk -v val = $x {print $1, $2, $3, $4+val, $5+ENVIRON[“y”]}OFS=”\t” students_score.txt
21. #找出文件中长度大于50 的行
awk length>50 students_score.txt
22. #按连接数查看客户端IP
$ netstat -ntu | awk {print $5} | cut -d: -f1 | sort | uniq -c | sort
23. #打印99乘法表
$
seq 9 | sed ‘H;g’ | awk -v RS=’’{for(i=1;i<=NF;i++) print(“%dx%d=%d%s”,i, NR, i*NR, i==NR?”\n”:”\t”)}`

你可能感兴趣的:(Linux_Learning)