AWK程序可用于选择文件中的特定记录并对它们执行操作。对文本处理有非常好的支持。功能强大。
[root@localhost Awk0-script]# awk 'BEGIN{print "Don\47t Panic!"}'
Don't Panic!
注意: ’ '(单引号),否则执行错误
[root@localhost Awk0-script]# vim demo-01 #编辑 demo-01
BEGIN {print "Don't Panic!"}
[root@localhost Awk0-script]# awk -f demo-01 #运行 awk程序
Don't Panic!
[root@localhost Awk0-script]# vim demo-02
[root@localhost Awk0-script]# chmod +x demo-02
############################################
#!/bin/awk -f
BEGIN {
print "Awk Executable Programs"
}
##########################################
[root@localhost Awk0-script]# ./demo-02
Awk Executable Programs
[root@localhost Awk0-script]# awk 'BEGIN { print "hello Awk" } # let's be cute'
> '
hello Awk
[root@localhost Awk0-script]# awk '/li/ {print $0}' mail-list.txt
Amelia 555-5553 [email protected] F
Broderick 555-0542 [email protected] R
Julie 555-6699 [email protected] F
Samuel 555-3430 [email protected] A
[root@localhost Awk0-script]# awk 'BEGIN { for (i = 1; i <= 7; i++)
> print int(101 * rand()) }'
24
29
85
15
59
19
81
[root@localhost Awk0-script]# awk '{print length($0) }' mail-list.txt
59
59
59
91
[root@localhost Awk0-script]# ll -l mail-list.txt
-rw-r--r--. 1 root root 752 Dec 8 16:28 mail-list.txt
# 查看单个文件所占的字节数
[root@localhost Awk0-script]# ls -l mail-list.txt|awk '{print $5 " bytes"}'
752 bytes
#查看 当前目录占用的总字节数
[root@localhost Awk0-script]# ls -l ./ |awk 'BEGIN{print "The Directory Size(Bytes):"}{x+= $5}END{print x " bytes"}'
The Directory Size(Bytes):
1654 bytes
[root@localhost Awk0-script]# awk 'END{print NR}' data.txt
17
[root@localhost Awk0-script]# awk 'NR % 2 == 0' data.txt #奇数行 awk 'NR % 2 == 1' data.txt
Feb 15 32 24 226
Apr 31 52 63 420
Jun 31 42 75 492
Aug 15 34 47 316
Oct 29 54 68 525
Dec 17 35 61 401
Jan 21 36 64 620
Mar 24 75 70 495
ls -l | awk '$6 == "Dec" { sum += $5 }END { print sum }'
1654
[root@localhost Awk0-script]# echo a b | gawk 'BEGIN { FS = "" }
{
for (i = 1; i <= NF; i = i + 1)
print "Field", i, "is", $i,NF
}'
Field 1 is a 3
Field 2 is 3
Field 3 is b 3
[root@localhost Awk0-script]# vim w_do.awk
BEGIN {
FIELDWIDTHs = "9 12 6 10 6 7 7 35"
printf "User LOGIN LOGIN_IP TIME \n"
printf "---------------------------------------------\n"
}
NR>2{
idle=$5
sub(/^ +/,idle)
if(idle== "")
idle=0
if(idle ~ /:/){
split(idle,t,":")
idle=t[1]*60+t[2]
}
if(idle~ /days/ )
idle*=24*60*60
printf "%2s %10s %15s %8s \n",$1,$2,$3,idle
}
[root@localhost Awk0-script]# w |awk -f w_do.awk
User LOGIN LOGIN_IP TIME
---------------------------------------------
root pts/0 192.168.10.102 6.00s
[root@localhost Awk0-script]# gawk -f fw.awk fw.in
3 12 34a bcde
### fw.awk #####
BEGIN { FIELDWIDTHS = "2 3 4" }
{ print NF, $1, $2, $3 }
#############
@@@@ fw.in @@@@@@@
1234abcdefghi
注 awk 4.2版本以后支持最后一参数为 " * " ,匹配余下的字符串,
详见: 4.6.3 Capturing Optional Trailing Data
[root@localhost Awk0-script]# vim address.csv
Robbins,Arnold,"1234 A Pretty Street, NE",MyTown,MyState,12345-6789,USA
[root@localhost Awk0-script]# vim process-csv.awk
########
BEGIN {
FPAT = "([^,]+)|(\"[^\"]+\")" # 匹配字段的正则表达式
}
{
print "NF = ", NF #输出总的记录数
for (i = 1; i <= NF; i++) { # 输出记录
printf("$%d = <%s>\n", i, $i)
}
}
#######
[root@localhost Awk0-script]# gawk -f process-csv.awk address.csv
NF= 7
$1= <Robbins>
$2= <Arnold>
$3= <"1234 A Pretty Street, NE"> ## 没有正常显示数据,需要去掉引号
$4= <MyTown>
$5= <MyState>
$6= <12345-6789>
$7= <USA>
改进之后的结果是:
###改进的方法是使用awk的 substr(str,index,len) 处理
if (substr($i, 1, 1) == "\"") {
len = length($i)
$i = substr($i, 2, len - 2) # 获取不包含双引号的子串
}
[root@localhost Awk0-script]# gawk -f process-csv.awk address.csv
NF= 7
$1= <Robbins>
$2= <Arnold>
$3= <1234 A Pretty Street, NE>
$4= <MyTown>
$5= <MyState>
$6= <12345-6789>
$7= <USA>
[root@localhost Awk0-script]# gawk -f addr.awk address
######## addr.awk #####
# Each line is one field.
BEGIN {RS = ""; FS = "\n"} # RS:记录分隔符 FS:字段分隔符(其为空或者regx RS无效)
{
printf "Name is:%s,Address is:%s,City and State are :%s \n",$1,$2,$3
}
######################
Name is:Jane Doe,Address is:123 Main Street,City and State are :Anywhere, SE 12345-6789
Name is:John Smith,Address is:456 Tree-lined Avenue,City and State are :Smallville, MW 98765-4321
[root@localhost Awk0-script]# cat address
Jane Doe
123 Main Street
Anywhere, SE 12345-6789
John Smith
456 Tree-lined Avenue
Smallville, MW 98765-4321
变量(Variable) | 说明(Description) | 示例(Example) |
---|---|---|
$n | 当前记录的第n个字段,默认由FS分隔(tab或空格) | awk ‘{print $1}’ file |
$0 | 完整的输入记录 | awk ‘{print $0}’ file |
ARGC | 命令行参数的数目 | |
ARGIND | 命令行参数当前文件的index | awk ‘/^A/ {print ARGIND}’ mail-list.txt |
ARGV | 包含命令行参数的数组 | |
CONVFMT | 数字转换格式(默认值为%.6g)ENVIRON环境变量关联数组 | |
ERRNO | 最后一个系统错误的描述 | |
FIELDWIDTHS | 字段宽度列表(用空格分隔) | |
FILENAME | 当前文件名 | awk ‘/^A/ {print FILENAME}’ file |
FNR | 各文件分别计数的行号 | awk ‘/^A/ {print FNR}’ |
FS | 字段分隔符(默认是任何空格) | |
IGNORECASE | 如果为1,则进行忽略大小写的匹配 | |
NF | 一条记录的字段的数目(即列) | |
NR | 记录数的行号 | |
OFMT | 数字的输出格式(默认值是%.6g) | |
OFS | 输出记录分隔符(输出换行符),输出时用指定的符号代替换行符 | |
ORS | 输出记录分隔符(默认值是一个换行符) | |
RLENGTH | 由match函数所匹配的字符串的长度 | |
RS | 记录分隔符(默认是一个换行符) | |
RSTART | 由match函数所匹配的字符串的第一个位置 | |
SUBSEP | 数组下标分隔符(默认值是/034) |
参考:
The GNU Awk User’s Guide