接上一篇,这里使用的grade.txt 也是和上一篇中的相同。
先来总结一下awk内置变量:
ARGC 命令行参数个数
ARGV 命令行参数排列
ENVIRON 支持队列中系统环境变量的使用
FILENAME awk浏览文件名
FNR 浏览文件的记录数
FS 设置输入域分隔符,等价于命令行-F选项
NF 浏览记录的域个数
NR 已读的记录数
OFS 输出域分隔符
ORS 输出例句分隔符
RS 控制记录分隔符
[root@wuke shell2]# awk '{print NF,NR,$0}END{print FILENAME}' grade.txt
7 1 M.Tansley 05/99 48311 Green 8 40 44
7 2 J.Lulu 06/99 48317 green 9 24 26
7 3 P.Bunny 02/99 48 Yellow 12 35 28
7 4 J.Troll 07/99 4842 Brown-3 12 26 26
7 5 L.Tansley 05/99 4712 Brown-2 12 30 28
grade.txt
#使用 -F 参数指定分隔符
[root@wuke shell2]# echo $PWD
/opt/shell2
[root@wuke shell2]# echo $PWD |awk -F/ '{print $NF"\t"NF}'
shell23
#设置变量名,将27 赋值给变量BASELINE
[root@wuke shell2]# awk 'BEGIN{BASELINE="27"}$6 J.Troll 07/99 4842 Brown-3 12 26 26 #修改数值域取值,注意‘{}’ [root@wuke shell2]# awk '{if($1=="M.Tansley");print $1,$6,$7}' grade.txt M.Tansley 40 44 J.Lulu 24 26 P.Bunny 35 28 J.Troll 26 26 L.Tansley 30 28 [root@wuke shell2]# awk '{if($1=="M.Tansley") $6=$6-1;print $1,$6,$7}' grade.txt M.Tansley 39 44 J.Lulu 24 26 P.Bunny 35 28 J.Troll 26 26 L.Tansley 30 28 #修改文本域取值 (1)
[root@wuke shell2]# awk '{if($1=="J.Troll")print $0}' grade.txt J.Troll 07/99 4842 Brown-3 12 26 26 [root@wuke shell2]# awk '{if($1=="J.Troll");print $0}' grade.txt M.Tansley 05/99 48311 Green 8 40 44 J.Lulu 06/99 48317 green 9 24 26 P.Bunny 02/99 48 Yellow 12 35 28 J.Troll 07/99 4842 Brown-3 12 26 26 L.Tansley 05/99 4712 Brown-2 12 30 28 (2) [root@wuke shell2]# awk '{if($1=="J.Troll")print $1}' grade.txt J.Troll [root@wuke shell2]# awk '{if($1=="J.Troll");print $1}' grade.txt M.Tansley J.Lulu P.Bunny J.Troll L.Tansley [root@wuke shell2]# awk '{if($1=="J.Troll")$1="J.L.Troll";print $1}' grade.txt M.Tansley J.Lulu P.Bunny J.L.Troll L.Tansley #创建新的输出域,这里新的输出域为 diff [root@localhost shell2]# awk 'BEGIN{print "Name \t Difference"}{if($6<$7) diff=$7-$6;print $1,diff}' grade.txt Name Difference M.Tansley 4 J.Lulu 2 P.Bunny 2 J.Troll 2 L.Tansley 2 [root@localhost shell2]# awk 'BEGIN{print "Name \t Difference"}{if($6<$7){diff=$7-$6;print $1,diff}}' grade.txt Name Difference M.Tansley 4 J.Lulu 2 #统计某一个域的和,使用‘+=’ 下面的例子统计第六个域的和 [root@localhost shell2]# awk '(tot+=$3);END{print "Club student points:" tot}' grade.txt M.Tansley 05/99 48311 Green 8 40 44 J.Lulu 06/99 48317 green 9 24 26 P.Bunny 02/99 48 Yellow 12 35 28 J.Troll 07/99 4842 Brown-3 12 26 26 L.Tansley 05/99 4712 Brown-2 12 30 28 Club student points:106230 #注意区别,加‘{}’则不打印文件 [root@localhost shell2]# awk '{(tot+=$3)};END{print "Club student points:" tot}' grade.txt Club student points:106230 awk 内置字符串函数 gsub(r,s) 在整个$0中用s替代r gsub(r,s,t) 在整个t中使用s替代r index(s,t) 在返回s中字符串t的第一个位置 length(s) 放回s长度 match(s,r) 测试s是否包含匹配r的字符串 split(s,a,fs) 在fs上将s分成序列a sprint(fmt,exp) 返回经fmt格式化后的exp sub(r,s) 用$0中最左边最长的子串代替s substr(s,p) 返回字符串s中从p开始的后缀部分 substr(s,p,n) 返回字符串s中从p开始长度为n的后缀部分 #替换,目标串使用正则表达式格式‘//’ [root@localhost shell2]# grep '4842' grade.txt J.Troll 07/99 4842 Brown-3 12 26 26 [root@localhost shell2]# awk 'gsub(/4842/,4899){print $0}' grade.txt J.Troll 07/99 4899 Brown-3 12 26 26 #查询字符串第一次出现的位置,注意使用BEGIN,否则每一行都会打印,字符串使用引号括起来 [root@localhost shell2]# awk 'BEGIN{print index("Bunny","ny")}' grade.txt 4 [root@localhost shell2]# awk '$1=="J.Troll"{print length($1)" "$1}' grade.txt 7 J.Troll #match 使用: 找不到返回0,找到返模式串在匹配串中的位置, #注:单独使用 加BEGIN [root@localhost shell2]# awk 'BEGIN{print match("ABCD",/d/)}' 0 [root@localhost shell2]# awk 'BEGIN{print match("ABCD",/D/)}' 4 #以下两种模式都正确 [root@wuke shell2]# awk '$1=="J.Lulu"{print match($1,"u")}' grade.txt 4 [root@wuke shell2]# awk '$1=="J.Lulu"{print match($1,/u/)}' grade.txt 4 #split 返回字符串数组元素个数 [root@wuke shell2]# awk 'BEGIN{print split("123#456#789",myarry,"#")}' 3 [root@wuke shell2]# awk 'BEGIN{print split("123#456#789",myarry,"#");print myarry[1],myarry[2],myarry[3]}' 3 123 456 789 #sub,发现并替换模式的第一个位置 [root@wuke shell2]# awk '$1=="J.Troll"{print $0}' grade.txt J.Troll 07/99 4842 Brown-3 12 26 26 [root@wuke shell2]# awk '$1=="J.Troll"{sub(26,29,$0) ;print $0}' grade.txt J.Troll 07/99 4842 Brown-3 12 29 26 [root@wuke shell2]# awk '$1=="J.Troll"{sub(26,29,$0)} {print $0}' grade.txt M.Tansley 05/99 48311 Green 8 40 44 J.Lulu 06/99 48317 green 9 24 26 P.Bunny 02/99 48 Yellow 12 35 28 J.Troll 07/99 4842 Brown-3 12 29 26 L.Tansley 05/99 4712 Brown-2 12 30 28 #substr,返回字符串指定范围内的子串 [root@wuke shell2]# awk '$1=="L.Tansley"{print substr($1,1,5)}' grade.txt L.Tan #使用substr返回指定位置开始的后缀部分,范围只给了一个参数,注意和上一个例子相对比 [root@wuke shell2]# awk '{print $1}' grade.txt M.Tansley J.Lulu P.Bunny J.Troll L.Tansley [root@wuke shell2]# awk '{print substr($1,3)}' grade.txt Tansley Lulu Bunny Troll Tansley #从shell中向awk传递字符串,通过 echo 加管道的方式 [root@wuke shell2]# echo "Test" |awk '{print length($0)}' 4 [root@wuke shell2]# STR="mydoc.txt" [root@wuke shell2]# echo $STR|awk '{print substr($STR,7)}' txt awk 使用printf
#printf使用类似于C语言 #字符转换 [root@wuke shell2]# echo "65"|awk '{printf "%c\n",$0}' A [root@wuke shell2]# echo "99"|awk '{printf "%f\n",$0}' 99.000000 [root@wuke shell2]# echo "99" |awk '{printf "%10.2f\n",$0}' 99.00
#格式化输出 #打印名字,左对齐,使用‘-’ [root@wuke shell2]# awk '{printf "%-15s %s\n",$1,$3}' grade.txt M.Tansley 48311 J.Lulu 48317 P.Bunny 48 J.Troll 4842 L.Tansley 4712 #向awk传入参数 [root@wuke shell2]# awk '{if($5 J.Lulu 06/99 48317 green 9 24 26 [root@wuke shell2]# df -k | awk '($4~/^[0-9]/){if ($4>TRIGGER)print $6"\t"$4}' TRIGGER=80000 /13286720 /dev/shm506096 /boot243511 #awk脚本 下面的脚本是将该命令翻译成为一个完整脚本的形式:awk '(tot+=$6); END{print "Club student total points: " tot}' grade.txt !/bin/awk -f #print a header first BEGIN{ print "Student Date Member No. Grade Age Points Max" print "Name Joined Gained Point Available" print "===================================================================" } #let's add the scores of points gained (tot+=$6) #finished processing END{ print "Club student total points :" tot print "Average Club Student points:" tot/NR } [root@wuke shell2]# ./test.awk grade.txt Student Date Member No. Grade Age Points Max Name Joined Gained Point Available =================================================================== M.Tansley 05/99 48311 Green 8 40 44 J.Lulu 06/99 48317 green 9 24 26 P.Bunny 02/99 48 Yellow 12 35 28 J.Troll 07/99 4842 Brown-3 12 26 26 L.Tansley 05/99 4712 Brown-2 12 30 28 Club student total points :155 Average Club Student points:31 #一个文件中如果有相同的行连续出现就只打印一次 stip.txt: INVALID LCSD 98GJ23 strip.awk: #!/bin/awk -f #error_strip.awk #to call: error_strip.awk #strips out the ERROR* lines if there are more than one #ERROR* lines after each filed record. BEGIN{ error_line="" } #tell awk the whole is "ERROR *" { if ($0 == "ERROR*" && error_line == "ERROR*") next; error_line = $0; print }
[root@wuke shell2]# ./strip.awk strip.txt INVALID LCSD 98GJ23 ERROR* CAUTION LPSS ERROR ON ACC NO. ERROR* PASS FILED INVALID ON GHSI ERROR* CUTION LPSS ERROR ON ACC NO. ERROR* strip2.awk:
!/bin/awk -f #error_strip.awk #to call: error_strip.awk #strips out the ERROR* lines if there are more than one #ERROR* lines after each filed record. BEGIN{ error_line="" } #tell awk the whole is "ERROR *" { if ($0 == "ERROR*" && error_line == "ERROR*") error_line = $0; print } [root@wuke shell2]# ./strip2.awk strip.txt INVALID LCSD 98GJ23 ERROR* ERROR* CAUTION LPSS ERROR ON ACC NO. ERROR* ERROR* ERROR* ERROR* ERROR* PASS FILED INVALID ON GHSI ERROR* CUTION LPSS ERROR ON ACC NO. ERROR* ERROR* #在awk中使用FS变量指定分隔符的时候,FS一定要放在BEGIN部分 !/bin/awk -f #to call :passwd.awk /etc/passwd #print out the first and fifth fields BEGIN{FS=":"} {print $1"\t"$5} #第一域是帐号名,第五域是账号所有者 [root@wuke shell2]# ./passwd.awk /etc/passwd rootroot binbin daemondaemon admadm lplp syncsync shutdownshutdown halthalt mailmail #向AWK脚本传递参数 #!/bin/awk -f #name: age.awk #to call : age.awk AGE=n grade.txt #prints ages that are lower than the age supplied on the command line { if ($5 } [root@wuke shell2]# ./age.awk AGE=10 grade.txt M.Tansley 05/99 48311 Green 8 40 44 J.Lulu 06/99 48317 green 9 24 26 #awk 数组,awk数组是类似于一个键值对,既可以使用数字做下标,也可以使用字符串做下标 前面介绍过split函数,并使用了一个例子: $awk 'BEGIN {print split("123#456#789",myarray,"#")}' 3 上面例子中,split返回数组myarray下标数,实际上myarray数组为: myarray[1]="123" myarray[2]="456" myarray[3]="789"
[root@wuke shell2]# awk 'BEGIN{print split("123#456#789",myarray,"#");print myarray[1],myarray[2],myarray[3]}' 3 123 456 789
数组使用前不必定义,也不必指定数组元素个数。经常使用循环来方位数组,一般这样使用循环: for(element in array ) print array[element] #下面脚本先将"123#456#789" 使用split环峰,再循环打印个数组元素
#!/bin/awk -f #name: arraytest.awk #prints out an array BEGIN{ a="123#456#789"; split(a,myarry,"#") } END{ for (i in myarry){ print myarry[i] } } #要运行脚本 需要使用/dev/null作为输入文件
[root@wuke shell2]# ./myarry.awk /dev/null 123 456 789 grade_student.txt: Yellow#Junior belts.awk:
#!/bin/awk -f #name: belts.awk #to call: belts.awk grade2.txt #loops through the grade2.txt file and counts how many #belts we have in(yellow,orange,red) #also count how many adults and juniors we have # #start of BEGIN #set FS and load the arrays and our values BEGIN{ FS="#" #load the belt colours we are interested in only belt["Yellow"] belt["Orange"] belt["Red"] #end of BEGIN #load the student type student["Junior"] student["Senior"] } #loop thru array that holds the belt colours against field-1 #if we have a match,keep a running total {for (colour in belt) { if($1==colour) belt[colour]++ } } #loop thru array that holds the student type against #field-2 if we have a match, keep a running total {for (b in student) { if($2==b) student[b]++ } }
脚本的作用: 1.统计Yellow、Orange和Red级别的人各是多少 2.俱乐部中有多少成年(Senior)和未成年人(Junior) [root@wuke shell2]# ./belts.awk grade_student.txt the club has 2 Orange Belts the club has 2 Red Belts the club has 3 Yellow Belts The club has 7 Senior students The club has 8 Junior students 以下有一个总结了很多关于AWK内容的文件,使用CherryTree(百度上搜索可以下载)打开,
#长度
ERROR*
ERROR*
CAUTION LPSS ERROR ON ACC NO.
ERROR*
ERROR*
ERROR*
ERROR*
ERROR*
PASS FILED INVALID ON GHSI
ERROR*
CUTION LPSS ERROR ON ACC NO.
ERROR*
ERROR*
Orange#Senior
Yellow#Junior
Purple#Junior
Brown-2#Junior
White#Senior
Orange#Senior
Red#Junior
Brown-2#Senior
Yellow#Senior
Red#Junior
Blue#Senior
Green#Senior
Purple#Junior
White#Junior