一、处理数据库文件
1、要处理的数据文件
[root@watchout2 ~]# cat datafile
M H:(424)2222233:250:890:102323111
Y U:(433)3334443:250:890:124234
M H:(424)4444433:250:890:12222
M H:(424)2222888:250:890:100
M H:(424)6666633:250:830:100
M H:(424)7777233:250:890:134
M H:(424)2222833:250:890:103
M H:(424)9999933:250:890:100
M H:(424)0000003:250:890:100
2、运行的脚本
[root@watchout2 ~]# cat awk
#/usr/bin/awk -f(这一行可以不要)
BEGIN{ FS=":"; OFS="\t"
print "name\tphone\t\tJan\tFeb\tMar\t\tTotal"
print "________________________________________________________________________________"
};
{$6 = $3 + $4 + $5}
{print $1"\t"$2"\t"$3"\t"$4"\t"$5"\t\t"$6}
{total3 +=$3}
{total4 +=$4}
{total5 +=$5}
END{
print "_________________________________________________________________________________"
print "this is Jan total: " total3
print "this is Feb total: " total4
print "this is Mar total: " total5
}
[root@watchout2 ~]#
3、执行脚本
[root@watchout2 ~]# awk -f awk datafile
name phone Jan Feb Mar Total
________________________________________________________________________________
M H (424)2222233 250 890 102323111 102324251
Y U (433)3334443 250 890 124234 125374
M H (424)4444433 250 890 12222 13362
M H (424)2222888 250 890 100 1240
M H (424)6666633 250 830 100 1180
M H (424)7777233 250 890 134 1274
M H (424)2222833 250 890 103 1243
M H (424)9999933 250 890 100 1240
M H (424)0000003 250 890 100 1240
_________________________________________________________________________________
this is Jan total: 2250
this is Feb total: 7950
this is Mar total: 102460204
[root@watchout2 ~]#
4、存在问题
输出文件不够合理,total的行不够整齐,目前这个问题无法解决。
更新:
root:/root>vi testt
#!/usr/bin/awk -f
BEGIN{FS=":"; OFS="\t"
print "*** aaaaaaaaaaaaaaaaaaa ***"
print "*** 2000 ***"
print "name\tphone\t\tJan\tFeb\tMar\t\tTotal"
#printf {"%-20s%-20s%-20s%-20s%-20s%20s\n",name,phone,Jan,Feb,Mar,Total};
print "______________________________________________"
};
{$6 = $3 + $4 + $5}
#{print "\t" $1"\t"$2"\t"$3"\t"$4"\t"$5"\t"$6}
{printf "%-8s%-15s%-8s%-8s%-15s%-10s\n", $1,$2,$3,$4,$5,$6}
{total3 +=$3}
{total4 +=$4}
{total5 +=$5}
END{
print "_____________________________________________"
print "is Jan total; " total3
print "is Feb total: " total4
print "is Mar total: " total5
}
执行脚本
root:/root>awk -f testt new
*** aaaaaaaaaaaaaaaaaaa ***
*** 2000 ***
name phone Jan Feb Mar Total
______________________________________________
M H (424)2222888 250 890 100 1240
M H (424)2222888 250 890 100 1240
M H (424)2222833 250 890 103 1243
M H (424)2222833 250 890 103 1243
M H (424)4444433 250 890 12222 13362
M H (424)4444433 250 890 12222 13362
Y U (433)3334443 250 890 124234 125374
Y U (433)3334443 250 890 124234 125374
M H (424)2222233 250 890 102323111 102324251
M H (424)2222233 250 890 102323111 102324251
_____________________________________________
is Jan total; 2500
is Feb total: 8900
is Mar total: 204919540
二、抓取ping结果中的IP地址和ping的时间
[root@watchout2 ~]# awk 'BEGIN{ FS="[ =:]";OFS="\t"};{print "\t\thostname\t\ttime"};{print "\t\t"$4"\t\t"$11}' ping
hostname time
202.108.33.32 55.9
hostname time
202.108.33.32 18.5
[root@watchout2 ~]#
使hostname 和time只出现在首行
[root@watchout2 ~]# awk 'BEGIN{ FS="[ =:]";OFS="\t"; print "\t\thostname\t\ttime"};{print "\t\t"$4"\t\t"$11}' ping
hostname time
202.108.33.32 55.9
202.108.33.32 18.5
[root@watchout2 ~]#
将上面的命令行写入文件中
[root@watchout2 ~]# vi awkping
BEGIN{
FS="[ =:]";OFS="\t"
print "\t\t hostname\t\ttime"
}
{print "\t\t"$4"\t\t"$11}
保存退出
[root@watchout2 ~]# awk -f awkping ping
hostname time
202.108.33.32 55.9
202.108.33.32 18.5
[root@watchout2 ~]#
三、找出一个文件中有多个相同的行
i.net 1
sp.com.cn 1
xun.com 1
bai.net.cn 1
da.com.cn 1
port.com.cn 1
[root@ht-store control]# awk '{count[$1]++}END{for (name in count)print name,count[name] }' locals.bak
四、处理文本文件中的$和 “,” 并进行数值运算
数据文件
[root@watchout2 ~]# cat qian
tom:gp:$,70
she:uy:$,90
all:yy:$,80
rey:oo:$,50
[root@watchout2 ~]#
[root@watchout2 ~]# awk -F: '{gsub (/\$/,"");gsub(/,/,""); cost += $3}; END{print "the total cost is $" cost}' qian
the total cost is $290
[root@watchout2 ~]#
五、规范文件(多行记录)
数据文件
[root@watchout2 ~]# cat checkbook
Saj
1/2/3330
+999
Tyoie
2/5/8880
-123
Ulio
5/5/555
+8879
脚本文件
[root@watchout2 ~]# cat awkcheck
BEGIN{RS=""; FS="\n"; ORS="\n\n"}
{ print NR, $1, $2}
[root@watchout2 ~]#
运行脚本
[root@watchout2 ~]# awk -f awkcheck checkbook
1 Saj 1/2/3330
2 Tyoie 2/5/8880
3 Ulio 5/5/555
六、 用awk查找目录下的最大文件ls -al
#!/usr/bin/awk -f
BNGIN {
filename=""
len = 0
}
{
if (($1 !~ /^d/) && ($1 !~ /^l/)) {
if ($5 > len) {
len = $5
filename=$9
}
}
}
END {
print filename " size is " len
}
实际应用
[root@watchout2 ~]# ll
total 244
-rw-r--r-- 1 root root 349 Jul 5 18:59 2datafile
-rw-r--r-- 1 root root 903 Nov 30 2007 anaconda-ks.cfg
-rw-r--r-- 1 root root 116 Jul 5 23:20 array
-rwxr-xr-x 1 root root 495 Jul 5 20:08 awk
-rw-r--r-- 1 root root 54 Jul 6 04:43 awkcheck
-rw-r--r-- 1 root root 127 Jul 5 23:44 awkping
-rw-r--r-- 1 root root 98 Jul 6 04:20 awkq
-rw-r--r-- 1 root root 60 Jul 6 04:39 checkbook
-rw-r--r-- 1 root root 2267 Jul 2 09:58 check_mem.sh
-rw-r--r-- 1 root root 544 Jul 5 23:08 datafile
-rw------- 1 root root 10 Jun 30 17:27 dead.letter
-rwxr--r-- 1 root root 216 Jul 7 07:41 fileawk
-rw-r--r-- 1 root root 53784 Nov 30 2007 install.log
-rw-r--r-- 1 root root 9243 Nov 30 2007 install.log.syslog
-rw------- 1 root root 12865 Jun 30 17:46 mbox
-rw-r--r-- 1 root root 18 Jul 6 18:06 nu
drwxr-xr-x 2 root root 4096 Jul 7 01:04 perl
-rw-r--r-- 1 root root 122 Jul 5 11:26 ping
-rw-r--r-- 1 root root 49 Jul 6 03:58 qian
drwxr-xr-x 2 root root 4096 Jul 6 01:55 sql
-rw-r--r-- 1 root root 85 Jul 6 00:52 test
[root@watchout2 ~]# ls -al |./fileawk
install.log size is 53784
[root@watchout2 ~]#
七、打印文件中的重复行
测试文件内容:
awk '{dup[$1]++; if (dup[$1] > 1) { domain[$1]++ }} END{ for ( i in domain ) { print i,domain[i]} }' file_awk
因为当dup[$1]大于1时,数组domain才会增加1,所以结果a 是1 、b 是 2 ,与实际上相差1。
awk '{dup[$1]++; if (dup[$1] > 1) { domain[$1]++ }} END{ for ( i in domain ) { print i,domain[i]+1} }' file_awk
八、awk 的变量和操作符
FS 输入域分隔符 OFS 输出域分隔符
RS 输入记录分隔符 ORS 输出记录分隔符
NR 多个记录 NF 域的个数
x= =y x等于y
x!=y x不等于y
x>y x大于y
x>=y x大于或等于y
x<y x 小于y
x<=y x小于或等于y
x~re x匹配正则表达式re
x!~re x不匹配正则表达式re
awk的操作符(按优先级升序排列)
= 、+=、 -=、 *= 、/= 、 %=
||
&&
> >= < <= == != ~ !~
xy (字符串连结,'x''y'变成"xy")
+ -
* / %
++ --
九、使用系统变量.
定义一个变量
[root@old01 logs]# domain=www.bo.com
转换变量并进行测试
[root@old01 logs]# awk 'BEGIN{print test_domain="'$domain'"}'
www.bo.com
测试的数据文件
[root@old01 logs]# cat /tmp/do
www.bo.com
www.bo.com
将环境变量转化为awk 变量.
[root@old01 logs]# awk 'BEGIN{test_domain="'$domain'"} ; {if($1 ~ $test_domain) {print $0}}' /tmp/do
www.bo.com
www.bo.com
[root@old01 logs]#
模板中使用系统变量
[root@changchun-old01 logs]# a=bo
[root@changchun-old01 logs]# awk -F"." '$2 ~ $a {print $2}' /tmp/do
[root@changchun-old01 logs]# awk -F"." '$2 ~ /$a/ {print $2}' /tmp/do
[root@changchun-old01 logs]# awk -F"." '$2 ~ /'$a'/ {print $2}' /tmp/do
bo
bo
[root@changchun-old01 logs]# cat /tmp/do
www.bo.com
www.bo.com
[root@changchun-old01 logs]#
自定义变量使用。(使用单引号)
[root@localhost test]# var=2010093008
[root@localhost test]# echo $var
2010093008
[root@localhost test]# awk '/'$var'/ {print $0}' 20100930.log |grep '46K' |tail -n 1
201009300857 46K 200 33672.000 1.404
[root@localhost test]#