应用介绍
文本间隔:
1.
在每一行后面增加一空行
#sed 'G' passwd
#awk 'NR;{print ""}' passwd
#awk '{print $0 "\n"}' passwd
#awk '{printf("%s\n\n",$0)}' passwd (在每一行后面增加一空行 )
#awk 'NR{print $0 "\n"}' passwd
#awk '1;{print ""}' passwd
#awk 'BEGIN{ORS="\n\n"};1' passwd
#awk 'BEGIN{ORS="\n\n"};{print}' passwd
2. 每行后面增加两行空行
#sed 'G;G' passwd
#awk '{print $0 "\n\n"}' passwd
#awk '1;{print "\n"}' passwd
#awk 'NR;{print "\n"}' passwd
3.将原来的所有空行删除并在每一行后面增加一空行。
#sed '/^$/d;G' passwd
#awk '!/^$/{printf("%s\n\n",$0)}' passwd
#awk 'NF{print $0 "\n"}' passwd
#awk 'NF;NF{print ""}' passwd
4.数字每3位以逗号为分隔符(eg:12,345,678)
#sed ':a;s/\B[0-9]\{3\}\>/,&/;ta'
#sed -r -e :a -e 's/(.*[0-9])([0-9]{3})/\1,\2/;ta'
#awk '{while(match($0,/[0-9][0-9][0-9][0-9]+/)){$0=sprintf("%s,%s",substr($0,0,RSTART+RLENGTH-4),substr($0,RSTART+RLENGTH-3))}print $0}'
5# 显示文件中的前10行 (模拟“head”的行为)
sed ‘10q’passwd
awk 'NR<11' passwd
awk '{print;if(NR==10)exit}' passwd
6.显示文件中的第10行
sed -n '10p' passwd
awk 'NR==10' passwd
awk 'NR==10{print}' passwd
7.# 显示部分文本——指定行号范围(从第8至第12行,含8和12行)
sed -n '8,12p' # 方法1
sed '8,12!d' # 方法2
awk '{if(NR>=8 && NR<12)print}'
awk 'NR==8,NR==12' passwd
8.显示文件中的后10行(tail)
#sed -e :a -e '$q;N;11,$D;ba'
9.显示文件中的最后一行(模拟“tail -1”)
#sed -n '$p' passwd
#sed '$!d' passwd
#awk 'END{print}' passwd
10.显示文中的倒数第二行
sed -e '$!{h;d;}' -e x # 当文件中只有一行时,输出空行
sed -e '1{$q;}' -e '$!{h;d;}' -e x # 当文件中只有一行时,显示该行
sed -e '1{$d;}' -e '$!{h;d;}' -e x # 当文件中只有一行时,不输出
awk '{B=A;A=$0}END{print B}'
11.删除空白行
#sed '/^$/d' passwd
#awk 'NF' passwd
#sed '/^\s*$/d' passwd
#sed -r 's@^$|^#|\s@@g;/^$/d' passwd (删除空行,空格,tab行,#开头,删除的比较彻底,空行之间也删除了)
12.打印第10行和第15行
#sed -n '10p;15p' passwd
#awk 'NR==10;NR==15' passwd
13.打印前12行,并显示行号(行号与内容用空格隔开)
#awk 'NR<13{print NR " " $0}' passwd (行号与内容用空格隔开)
#sed -n '1,12=;1,12p' passwd (行号在内容上方)
14..打印前12行,并显示行号(行号与内容用\tab隔开)
#awk 'NR<13{print NR "\t" $0}' passwd
#sed = passwd | sed -n 'N;s/\n/\t/;1,24p'
15. 打印空白行
#grep -n "^$" passwd
#sed -n '/^$/=' passwd
#awk '/^$/{print NR}' passwd
16.求和
#awk '{A+=$1}END{print A}' 1 (第一列全是数字,相加求和)
17.把123456求和
#echo 123456|awk 'BEGIN{FS=""}{for (i=1;i<=NF;i++) sum+=$i;print sum}'
#echo 123456|awk 'BEGIN{FS="";OFS="+"}{$1=$1;print $0}'|bc
#echo 123456|sed 's/[0-9]/&+/g'|sed 's/\(.*\)+/\1/g'|bc
18.只显示父目录(意识就是除去最后一个)
#echo "/usr/local/nginx/conf/"|sed -r 's#^(/.*/)[^/]+/?#\1#g'
#echo "/usr/local/nginx/conf"|sed -r 's#^(/.*/)([^/]+/?)#\2#g'
(只显示子目录)
19.只显示奇数行,并显示行号。
#sed -n '1~2=;1~2p' passwd
以下内容用sed和awk分别实现
20.# 删除8的倍数行
sed '0~8d' passwd # 只对GNU sed有效
sed 'n;n;n;n;n;n;n;d;' passwd # 其他sed
awk '{if(NR%8!=0)print}' passwd
21.将文件单双行互换
#seq 10|awk 'BEGIN{OFS="\n"}{if (NR%2==1) getline a;print a,$0}'
#seq 10|awk 'BEGIN{OFS="\n"}{getline a;print a,$0}'
#seq 10|sed -r 'N;s#(.*)\n(.*)#\2\n\1#g'
#seq 10|sed -n 'h;n;G;p'
1 在每一行后面增加一空行
sed G
awk '{printf("%s\n\n",$0)}'
2 # 将原来的所有空行删除并在每一行后面增加一空行。
# 这样在输出的文本中每一行后面将有且只有一空行。
sed '/^$/d;G'
awk '!/^$/{printf("%s\n\n",$0)}'
# 在每一行后面增加两行空行
sed 'G;G'
awk '{printf("%s\n\n\n",$0)}'
# 在匹配式样“regex”的行之前插入一空行
sed '/regex/{x;p;x;}'
awk '{if(/regex/)printf("\n%s\n",$0);else print $0}'
# 在匹配式样“regex”的行之后插入一空行
sed '/regex/G'
awk '{if(/regex/)printf("%s\n\n",$0);else print $0}'
# 为文件中的每一行进行编号(简单的左对齐方式)。这里使用了“制表符”
# (tab,见本文末尾关于’\t’的用法的描述)而不是空格来对齐边缘。
sed = filename | sed 'N;s/\n/\t/'
awk '{i++;printf("%d\t%s\n",i,$0)}'
# 计算行数 (模拟 “wc -l”)
sed -n '$='
awk '{i++}END{print i}'
# 只在行中出现字串“baz”的情况下将“foo”替换成“bar”
sed '/baz/s/foo/bar/g'
awk '{if(/baz/)gsub(/foo/,"bar");print $0}'
# 将“foo”替换成“bar”,并且只在行中未出现字串“baz”的情况下替换
sed '/baz/!s/foo/bar/g'
awk '{if(/baz$/)gsub(/foo/,"bar");print $0}'
# 不管是“scarlet”“ruby”还是“puce”,一律换成“red”
sed 's/scarlet/red/g;s/ruby/red/g;s/puce/red/g' #对多数的sed都有效
gsed 's/scarlet\|ruby\|puce/red/g' # 只对GNU sed有效
awk '{gsub(/scarlet|ruby|puce/,"red");print $0}'
# 倒置所有行,第一行成为最后一行,依次类推(模拟“tac”)。
# 由于某些原因,使用下面命令时HHsed v1.5会将文件中的空行删除
sed '1!G;h;$!d' # 方法1
sed -n '1!G;h;$p' # 方法2
awk '{A[i++]=$0}END{for(j=i-1;j>=0;j--)print A[j]}'
# 将行中的字符逆序排列,第一个字成为最后一字,……(模拟“rev”)
sed '/\n/!G;s/\(.\)\(.*\n\)/&\2\1/;//D;s/.//'
awk '{for(i=length($0);i>0;i--)printf("%s",substr($0,i,1));printf("\n")}'
# 将每两行连接成一行(类似“paste”)
sed '$!N;s/\n/ /'
awk '{f=!f;if(f)printf("%s",$0);else printf(" %s\n",$0)}'
# 为数字字串增加逗号分隔符号,将“1234567”改为“1,234,567”
sed ':a;s/\B[0-9]\{3\}\>/,&/;ta' # GNU sed
sed -e :a -e 's/\(.*[0-9]\)\([0-9]\{3\}\)/\1,\2/;ta' # 其他sed
#awk的正则没有后向匹配和引用,搞的比较狼狈,呵呵。
awk '{while(match($0,/[0-9][0-9][0-9][0-9]+/)){$0=sprintf("%s,%s",substr($0,0,RSTART+RLENGTH-4),substr($0,RSTART+RLENGTH-3))}print $0}'
# 显示文件中的前10行 (模拟“head”的行为)
sed 10q
awk '{print;if(NR==10)exit}'
# 显示文件中的第一行 (模拟“head -1”命令)
sed q
awk '{print;exit}'
# 显示文件中的最后10行 (模拟“tail”)
sed -e :a -e '$q;N;11,$D;ba'
# 显示文件中的最后一行(模拟“tail -1”)
sed '$!d' # 方法1
sed -n '$p' # 方法2
#这个比较好办,只存最后一行了。
awk '{A=$0}END{print A}'
# 显示文件中的倒数第二行
sed -e '$!{h;d;}' -e x # 当文件中只有一行时,输出空行
sed -e '1{$q;}' -e '$!{h;d;}' -e x # 当文件中只有一行时,显示该行
sed -e '1{$d;}' -e '$!{h;d;}' -e x # 当文件中只有一行时,不输出
#存两行呗(当文件中只有一行时,输出空行)
awk '{B=A;A=$0}END{print B}'
# 只显示匹配正则表达式的行(模拟“grep”)
sed -n '/regexp/p' # 方法1
sed '/regexp/!d' # 方法2
awk '/regexp/{print}'
# 只显示“不”匹配正则表达式的行(模拟“grep -v”)
sed -n '/regexp/!p' # 方法1,与前面的命令相对应
sed '/regexp/d' # 方法2,类似的语法
awk '!/regexp/{print}'
# 显示包含“AAA”、“BBB”和“CCC”的行(任意次序)
sed '/AAA/!d; /BBB/!d; /CCC/!d' # 字串的次序不影响结果
awk '{if(match($0,/AAA/) && match($0,/BBB/) && match($0,/CCC/))print}'
# 显示包含“AAA”、“BBB”和“CCC”的行(固定次序)
sed '/AAA.*BBB.*CCC/!d'
awk '{if(match($0,/AAA.*BBB.*CCC/))print}'
# 显示包含65个或以上字符的行
sed -n '/^.\{65\}/p'
cat ll.txt | awk '{if(length($0)>=65)print}'
# 显示包含65个以下字符的行
sed -n '/^.\{65\}/!p' # 方法1,与上面的脚本相对应
sed '/^.\{65\}/d' # 方法2,更简便一点的方法
awk '{if(length($0)<=65)print}'
# 显示部分文本——指定行号范围(从第8至第12行,含8和12行)
sed -n '8,12p' # 方法1
sed '8,12!d' # 方法2
awk '{if(NR>=8 && NR<12)print}'
# 显示第52行
sed -n '52p' # 方法1
sed '52!d' # 方法2
sed '52q;d' # 方法3, 处理大文件时更有效率
awk '{if(NR==52){print;exit}}'
# 从第3行开始,每7行显示一次
gsed -n '3~7p' # 只对GNU sed有效
sed -n '3,${p;n;n;n;n;n;n;}' # 其他sed
awk '{if(NR==3)F=1}{if(F){i++;if(i%7==1)print}}'
# 删除文件中相邻的重复行(模拟“uniq”)
# 只保留重复行中的第一行,其他行删除
sed '$!N; /^\(.*\)\n\1$/!P; D'
awk '{if($0!=B)print;B=$0}'
# 删除文件中的最后两行
sed 'N;$!P;$!D;$d'
awk '{B[NR]=$0}END{for(i=0;i<=NR-2;i++)print B[i]}'
# 删除文件中的最后10行
sed -e :a -e '$d;N;2,10ba' -e 'P;D' # 方法1
sed -n -e :a -e '1,10!{P;N;D;};N;ba' # 方法2
awk '{B[NR]=$0}END{for(i=0;i<=NR-10;i++)print B[i]}'
# 删除8的倍数行
sed '0~8d' # 只对GNU sed有效
sed 'n;n;n;n;n;n;n;d;' # 其他sed
awk '{if(NR%8!=0)print}' |head
#seq 1 10|sed '1!G;h;$!d' # 倒叙排列
#ls -l|sed -n '/^.rwx.*/p' # 查找属主权限为7的文件
#sed = filename | sed 'N;s/\n/\t/' # 为文件中的每一行进行编号
(简单的左对齐方式)
#sed = filename | sed 'N;s/^/ /; s/ *\(.\{6,\}\)\n/\1 /' # 对文件中的所有行编号(行号在左,文字右端对齐)
#
100 100
a 100 a 100
b -50 200
c -20 b -50
d -30 150
c -20
130
d -30
#awk 'NR==1{sum=$1;print $0}NR!=1{print $0;sum=sum-$2;print sum}'
将passwd全文归为一行
awk '{printf"%s",$0}' passwd
#awk '/xxoo/{system("mail xxoo")}' urfile (neizhimingling)
#join <(sort file1) <(sort file2)
#awk 'NR==FNR{a[$1]=$2;next}{print $0,a[$1]}' 1.test 2.test
韩林海 男 22岁
海林韩 男 23岁
韩海林 男 21岁
林海韩 男 24岁
文件file:
A XXOO 22
B XXODO 13
C XXOOD 32
D XXOOD 9
要求的结果是 (在原文的基础上中增加一列,对第三例的值依次相加求和,效果如下)
#awk '{a+=$3;print $0,a}' file
A XXOO 22 22
B XXODO 13 35
C XXOOD 32 67
D XXOOD 9 76
#cat 1.txt
A XXOO 22
B XXODO 13
C XXOOD 32
D XXOOD 9
#cat 1.txt |awk '{a+=$3;print $0,a}'
A XXOO 22 22
B XXODO 13 35
C XXOOD 32 67
D XXOOD 9 76
#匹配以数字开头,并且下一行是以字母开头的行
#sed -nr 'N;/^[0-9]+.+\n[a-z]/p;D'
#awk '/^[a-z]/&&s~/^[0-9]/{print s"\n"$0}{s=$0}'
#打印以abc结尾并且下一行是以bcd开头的行
#awk '/^bcd/&&s~/abc$/{print s"\n"$0}{s=$0}'
#awk 'NR==FNR{a[$0]=$0;next}{print a[$0],$0}' file1 file2
(对比两个文件不一样的东西)
#awk 'NR==FNR{a[$0]=$0;next}NR!=FNR{if(a[$0]!=$0)print}' file1 file
#diff -u rc3.d rc5.d (对比两个文件不一样的东西)
#awk -F: '{a[$1]=a[$1] OFS $2}END{for(i in a)print i,a[i]}' file1
#awk '{a[$1]=a[$1]"\t"$2;b[$1]=b[$1]"\t"$3;}END{for (i in a){printf "date %-s\n%-s %-s\n" ,a[i],i,b[i]}}'
#将【admin】开头的IP取出来
[root@liudehua test1]# cat 1.txt
[test-admin-web]
1.1.1.1
2.2.2.2
333.333.4.5
2.3.4.5
[test-admin-db]
1.2.3.4
1.2.4.5
1.2.4.4
1.1.1.1
[test-xxxx-db]
333.333.333.333
1)
[root@liudehua test1]# awk '/admin/{p=1;next};/^\[/&&!/admin/{p=0}p' 1.txt
1.1.1.1
2.2.2.2
333.333.4.5
2.3.4.5
1.2.3.4
1.2.4.5
1.2.4.4
1.1.1.1
2)
[root@liudehua test1]# awk -vRS='\\[[^]]+\\]\n' 'p~/admin/;{p=RT}' 1.txt
1.1.1.1
2.2.2.2
333.333.4.5
2.3.4.5
1.2.3.4
1.2.4.5
1.2.4.4
1.1.1.1
> # 每行后面增加一行空行
> awk '1;{print ""}'
> awk 'BEGIN{ORS="\n\n"};1'
> # 每行后面增加一行空行。输出文件不会包含连续的两个或两个以上的空行
> # 注意:在Unix系统, DOS行包括的 CRLF (\r\n) 通常会被作为非空行对待
> # 因此 'NF' 将会返回TRUE。
> awk 'NF{print $0 "\n"}'
> # 每行后面增加两行空行
> awk '1;{print "\n"}'
> 编号和计算:
> # 以文件为单位,在每句行前加上编号 (左对齐).
> # 使用制表符 (\t) 来代替空格可以有效保护页变的空白。
> awk '{print FNR "\t" $0}' files*
> # 用制表符 (\t) 给所有文件加上连贯的编号。
> awk '{print NR "\t" $0}' files*
> # number each line of a file (number on left, right-aligned)
> # Double the percent signs if typing from the DOS command prompt.
> awk '{printf("%5d : %s\n", NR,$0)}'
> # 给非空白行的行加上编号
> # 记得Unix对于 \r 的处理的特殊之处。(上面已经提到)
> awk 'NF{$0=++a " :" $0};{print}'
> awk '{print (NF? ++a " :" :"") $0}'
> # 计算行数 (模拟 "wc -l")
> awk 'END{print NR}'
> # 计算每行每个区域(行)之和
> awk '{s=0; for (i=1; i<=NF; i++) s=s+$i; print s}'
> # 计算所有行所有区域的总和
> awk '{for (i=1; i<=NF; i++) s=s+$i}; END{print s}'
> # 打印每行每区域的绝对值
> awk '{for (i=1; i<=NF; i++) if ($i < 0) $i = -$i; print }'
> awk '{for (i=1; i<=NF; i++) $i = ($i < 0) ? -$i : $i; print }'
> # 计算所有行所有区域(词)的个数
> awk '{ total = total + NF }; END {print total}' file
> # 打印包含 "Beth" 的行数
> awk '/Beth/{n++}; END {print n+0}' file
> # 打印第一列最大的行
> # 并且在行前打印出这个最大的数
> awk '$1 > max {max=$1; maxline=$0}; END{ print max, maxline}'
> # 打印每行的列数,并在后面跟上此行内容
> awk '{ print NF ":" $0 } '
> # 打印每行的最后一列
> awk '{ print $NF }'
> # 打印最后一行的最后一列
> awk '{ field = $NF }; END{ print field }'
> # 打印列数超过4的行
> awk 'NF > 4'
> # 打印最后一列大于4的行
> awk '$NF > 4'
> 文本转换和替代:
> # 在Unix环境:转换DOS新行 (CR/LF) 为Unix格式
> awk '{sub(/\r$/,"");print}' # 假设每行都以Ctrl-M结尾
> # 在Unix环境:转换Unix新行 (LF) 为DOS格式
> awk '{sub(/$/,"\r");print}
> # 在DOS环境:转换Unix新行 (LF) 为DOS格式
> awk 1
> # 在DOS环境:转换DOS新行 (CR/LF) 为Unix格式
> # DOS版本的awk不能运行, 只能用gawk:
> gawk -v BINMODE="w" '1' infile >outfile
> # 用 "tr" 替代的方法。
> tr -d \r <infile >outfile # GNU tr 版本为 1.22 或者更高
> # 删除每行前的空白(包括空格符和制表符)
> # 使所有文本左对齐
> awk '{sub(/^[ \t]+/, ""); print}'
> # 删除每行结尾的空白(包括空格符和制表符)
> awk '{sub(/[ \t]+$/, "");print}'
> # 删除每行开头和结尾的所有空白(包括空格符和制表符)
> awk '{gsub(/^[ \t]+|[ \t]+$/,"");print}'
> awk '{$1=$1;print}' # 每列之间的空白也被删除
> # 在每一行开头处插入5个空格 (做整页的左位移)
> awk '{sub(/^/, " ");print}'
> # 用79个字符为宽度,将全部文本右对齐
> awk '{printf "%79s\n", $0}' file*
> # 用79个字符为宽度,将全部文本居中对齐
> awk '{l=length();s=int((79-l)/2); printf "%"(s+l)"s\n",$0}' file*
> # 每行用 "bar" 查找替换 "foo"
> awk '{sub(/foo/,"bar");print}' # 仅仅替换第一个找到的"foo"
> gawk '{$0=gensub(/foo/,"bar",4);print}' # 仅仅替换第四个找到的"foo"
> awk '{gsub(/foo/,"bar");print}' # 全部替换
> # 在包含 "baz" 的行里,将 "foo" 替换为 "bar"
> awk '/baz/{gsub(/foo/, "bar")};{print}'
> # 在不包含 "baz" 的行里,将 "foo" 替换为 "bar"
> awk '!/baz/{gsub(/foo/, "bar")};{print}'
> # 将 "scarlet" 或者 "ruby" 或者 "puce" 替换为 "red"
> awk '{gsub(/scarlet|ruby|puce/, "red"); print}'
> # 倒排文本 (模拟 "tac")
> awk '{a[i++]=$0} END {for (j=i-1; j>=0;) print a[j--] }' file*
> # 如果一行结尾为反斜线符,将下一行接到这行后面
> # (如果有连续多行后面带反斜线符,将会失败)
> awk '/\\$/ {sub(/\\$/,""); getline t; print $0 t; next}; 1' file*
> # 排序并打印所有登录用户的姓名
> awk -F ":" '{ print $1 | "sort" }' /etc/passwd
> # 以相反的顺序打印出每行的前两列
> awk '{print $2, $1}' file
> # 调换前两列的位置
> awk '{temp = $1; $1 = $2; $2 = temp}' file
> # 打印每行,并删除第二列
> awk '{ $2 = ""; print }'
> # 倒置每行并打印
> awk '{for (i=NF; i>0; i--) printf("%s ",i);printf ("\n")}' file
> # 删除重复连续的行 (模拟 "uniq")
> awk 'a !~ $0; {a=$0}'
> # 删除重复的、非连续的行
> awk '! a[$0]++' # 最简练
> awk '!($0 in a) {a[$0];print}' # 最有效
> # 用逗号链接每5行
> awk 'ORS=%NR%5?",":"\n"' file #bug awk 'ORS=NR%5?",":"\n"' file
> 选择性的打印某些行:
> # 打印文件的前十行 (模拟 "head")
> awk 'NR < 11'
> # 打印文件的第一行 (模拟 "head -1")
> awk 'NR>1{exit};1'
> # 打印文件的最后两行 (模拟 "tail -2")
> awk '{y=x "\n" $0; x=$0};END{print y}'
> # 打印文件的最后一行 (模拟 "tail -1")
> awk 'END{print}'
> # 打印匹配正则表达式的行 (模拟 "grep")
> awk '/regex/'
> # 打印不匹配正则表达式的行 (模拟 "grep -v")
> awk '!/regex/'
> # 打印匹配正则表达式的前一行,但是不打印当前行
> awk '/regex/{print x};{x=$0}'
> awk '/regex/{print (x=="" ? "match on line 1" : x)};{x=$0}'
> # 打印匹配正则表达式的后一行,但是不打印当前行
> awk '/regex/{getline;print}'
> # 以任何顺序查找包含 AAA、BBB 和 CCC 的行
> awk '/AAA/; /BBB/; /CCC/'
> # 以指定顺序查找包含 AAA、BBB 和 CCC 的行
> awk '/AAA.*BBB.*CCC/'
> # 打印长度大于64个字节的行
> awk 'length > 64'
> # 打印长度小于64个字节的行
> awk 'length < 64'
> # 打印从匹配正则起到文件末尾的内容
> awk '/regex/,0'
> awk '/regex/,EOF'
> # 打印指定行之间的内容 (8-12行, 包括第8和第12行)
> awk 'NR==8,NR==12'
> # 打印第52行
> awk 'NR==52'
> awk 'NR==52 {print;exit}' # 对于大文件更有效率
> # 打印两个正则匹配间的内容 (包括正则的内容)
> awk '/Iowa/,/Montana/' # 大小写敏感
> 选择性的删除某些行:
> # 删除所有空白行 (类似于 "grep '.' ")
> awk NF
> awk '/./'
awk不显示最后一列
awk 'NF--' file
此命令遇到空行会报错,所以使用:
awk 'NF{NF--}1' file等同于awk 'NF{NF=NF-1}1' file
1或任意数字类似print$0
awk '{$NF=null;print $0}' file
null是指将最后一列置空,也可以将其它列置空,如awk '{$1=null;print $0}' file
awk '$NF=" "' file
sed '/^\s*$/d' test | awk 'NF--'
先去掉空行再去掉最后一列
列求和
awk 'BEGIN{total=0}{total+=$1}END{print total}'
awk 'pattern{action}' file
一般awk的格式是这样的。
如果写成awk 'pattern' file,那么action默认为{print $0};
pattern如果是个定值,则判断该值是否为0(0为false,非0为true);
pattern如果是个计算表达式,则判断计算结果是否为0;
pattern如果是个awk 命令,则判断其返回值是否为0。
这样就能理解awk '1' file的意义了:
在这里action为空,因此默认为{print $0};1为定值,非0,因此执行action,即print $0。
awk '!a[$1]++' file
打印每行第一列重复的行中的第一行
去掉重复的行
awk '!a[$0]++' file
按列分类求和
awk '{a[$1]+=$2}END{for(i in a)print i,a[i]}' file
--------------------------------------------------------------------------------------
1.取行的最后几个字符
echo "SYBASE4"|sed 's/.*\(....\)$/\1/'
2.取行的前几个字符
echo "SYBASE4"|sed 's/^\(........\).*/\1/'
3.删除文件中的空行
sed '/^$/d' filename
4.替换文件中的空行为0
sed 's/^$/0/g' -i filename
5、将两行合并成一行:
sed 'N;s/\n/ /g'
6、将一个文件中的内容将另一个文件中的每行内容通过变量进行替换
#!/bin/sh
for sheng in `cat shenglist`
do
sed -i "s/$sheng.*/$sheng/g" liuliangfenbu.txt
done
按位相加
sed -r 's/[0-9]/&+/g' urfile|sed -r 's/[\+][^\+]*$//'
awk 'BEGIN{FS=""}{for (i=1;i<=NF;i++) sum+=$i;print sum}' urfile
echo 65734843|awk 'BEGIN{FS="";OFS="+"}{$1=$1;print $0}'|bc
打印4001行至行尾:
sed -n '4001,$p' del_domainlist
在shell中遇到字符串问题,首先考虑的是grep ,sed ,awk , cut
不先讲这几个命令了,先来看一下字符串截取中用简单方法就能做到的事情 ${}
(1)shell中的单引号比双引号的区别:单引号关闭所有有特殊作用的字符,而双引号只要求shell忽略大多数,具体的说,就是①美元符号②反引号③反斜杠,这3种特殊字符不被忽略
(2)求字符串长度 ----- (1)expr $x:‘.*’(2)${#x}
(3)求字符串子串 ----- ${x:$pos:$len},其中$pos位置,$len长度,其中$len可省略
(4)字符串替换 ----- ${x/a/b} 用b替换a ; ${x//a/b}用b替换所有的a
(5)字符串首尾截取----- ${x##*/}去掉所有/左边的字符,也可用其它匹配代替*/ ${x#*/}只去掉第一次出现/左边所有的字符。顺序为从左到右.${x%%/*}去掉所有/右边的字符,${x%/*}去掉第一次出现/右边的字符,顺序为从右到左.
eg,
#!/bin/bash
y=kdjfkd:dfkdjfkd:8888:9899899:kdjfkdfj
q=`echo $y | cut -d":" -f4` //以:截取,取第四个字段即9899899
m=${q##*8} //去掉所有8左边的字符
echo $m
n=${q#*8} //去掉第一个8左边的字符
echo $n
result:99
99899
(6)Shell数组定义 a=(1 2 3 4),不能有空格,比如a =(1 2 3 4)和a=(1 2 3 4)都是不允许的。
(7)数组长度: ${#a[@]}或者 ${#a[*]} ;全部数组 ${a[@]}或{$a[*]}返回1 2 3 4
(8)数组元素的长度 ${#a[i]},i为下标,和其它语言一样,从0开始,数组元素${a[i]}
(9)awk默认使用空格作为域分隔符,
"+"和"?"只适用awk而不适用sed和grep
awk条件操作符:<,<=,>=,==,!=,~,!~
awk字符串处理函数:gsub(r,s),gsub(r,s,t),index(s,t)等
awk自定义环境变量:FILENAME,FNR,FS,NF,NR等
awk -F : '{print $1}' /etc/passwd 打印第一列的值, $0 打印全部的值
awk -F : 'BEGIN {print "name passwd\n-----------------"} {print $1"\t"$5}'END {print "End of file"}' /etc/passwd 输出增加头尾
awk '{if($0~/root/) print $0}' /etc/passwd输出匹配"root"的行,等价于awk '$0~/root/' /etc/passwd
(10)无论命令是什么?sed并不与初始化文件打交道,它操作的只是文件的一个拷贝。如果不重定向到一个文件,直接在标准输出(显示屏)显示。
搜索替换是sed的拿手好戏。
sed -n '1,$p' /etc/passwd 打印从1到最后一行的所有内容
sed -e '/root/=' /etc/passwd打印出现"root"的行号和所有行
sed -n '/root/=' /etc/passwd只打印行号
sed -n -e '/root/p' /etc/passwd 只打印出现"root"的匹配行
sed -n -e '/root/p' -e '/root/=' /etc/passwd 打印匹配行和行号
sed 's/^M//g' /etc/passwd 删除行尾控制字符(^M)= ctrl+v+m
sed 's/^0*//g' /etc/passwd 删除行首的多个零
(11)grep 一般用来搜索字段或字串,sed用来搜索或者替换,awk可以进行复杂的运算和定制操作
(12)先不要管Shell的版本,来看看Shell 变量,在Shell中有三种变量:系统变量,环境变量,用户变量
系统变量:
$# 传递到脚本的参数个数;
$$脚本运行的当前进程id;
$?最后命令的退出状态,0表成功;
$!上一个命令的PID ;
$@ 以"参数1" "参数2" ... 形式保存所有参数 ;
$* 以"参数1 参数2 ... " 形式保存所有参数 ;$0表示脚本名称
用户变量:用set命令查看
环境变量:用setenv查看
(13)不同的shell版本有不同数组赋值方法,而bourne shell (如bash)中不支持数组方式。
(14)查看目录或者文件,符号链接:ls -l | grep '^d' 或者 ls -l | grep '^-',ls -l | grep '^l'
(15)查看最后一列的指:ls -l | grep '^l' | awk '{print $NF}' (默认以空格分隔),在awk中,NF表示字段数,$NF表示最后一个字段
FILE SPACING:
# double space a file
-> sed G
-> awk '$0=$0 ORS' or awk '1{print ""}' or: awk -vORS="\n\n" '1'
# double space a file which already has blank lines in it. Output file
# should contain no more than one blank line between lines of text.
-> sed '/^$/d;G'
-> awk NF ORS='\n\n'
# triple space a file
-> sed 'G;G'
-> awk '$0=$0 ORS ORS'
# undo double-spacing (assumes even-numbered lines are always blank)
-> sed 'n;d'
-> awk 'i=!i'
(有很多可以实现这个的awk方法,这里只给出黑哥的码)
# insert a blank line above every line which matches "regex"
-> sed '/regex/{x;p;x;}'
-> awk '/regex/{$0=ORS $0}1'
# insert a blank line below every line which matches "regex"
-> sed '/regex/G'
-> awk '/regex/{$0=$0 ORS}1' or: awk '{ORS=(/regex/)?"\n\n":"\n"}1'
# insert a blank line above and below every line which matches "regex"
-> sed '/regex/{x;p;x;G;}'
-> awk '/regex/{$0=ORS $0 ORS}1'
NUMBERING:
# number each line of a file (simple left alignment). Using a tab (see
# note on '\t' at end of file) instead of space will preserve margins.
-> sed = filename | sed 'N;s/\n/\t/'
-> awk '{print NR"\t"$0}' filename
# number each line of a file (number on left, right-aligned)
-> sed = filename | sed 'N; s/^/ /; s/ *\(.\{6,\}\)\n/\1 /'
-> awk '{printf "%6s %s\n", NR,$0}' filename
# number each line of file, but only print numbers if line is not blank
-> sed '/./=' filename | sed '/./N; s/\n/ /'
-> awk '{print NF?NR" "$0:$0}' filename
# count lines (emulates "wc -l")
-> sed -n '$='
-> awk 'END{print NR}'
TEXT CONVERSION AND SUBSTITUTION:
# IN UNIX ENVIRONMENT: convert DOS newlines (CR/LF) to Unix format.
-> sed 's/.$//' # assumes that all lines end with CR/LF
-> awk '{sub(/.$/,"",$0);print}'
-> sed 's/^M$//' # in bash/tcsh, press Ctrl-V then Ctrl-M
-> awk '{sub(/^M$/,"",$0);print}'
-> sed 's/\x0D$//' # works on ssed, gsed 3.02.80 or higher
-> gawk '{sub(/\x0D$/,"",$0);print}'
# IN UNIX ENVIRONMENT: convert Unix newlines (LF) to DOS format.
-> sed "s/$/`echo -e \\\r`/" # command line under ksh
-> sed 's/"/`echo \\\r`/" # command line under bash
-> sed "s/$/`echo \\\r`/" # command line under zsh
-> sed 's/$/\r/' # gsed 3.02.80 or higher
-> gawk '{sub(/$/,"\r",$0);print}'
# delete leading whitespace (spaces, tabs) from front of each line
# aligns all text flush left
-> sed 's/^[ \t]*//' # see note on '\t' at end of file
-> awk '{sub(/^[ \t]*/,"",$0);print}'
# delete trailing whitespace (spaces, tabs) from end of each line
-> sed 's/[ \t]*$//' # see note on '\t' at end of file
-> awk '{sub(/[ \t]*$/,"",$0);print}'
# delete BOTH leading and trailing whitespace from each line
-> sed 's/^[ \t]*//;s/[ \t]*$//'
-> awk '{sub(/^[ \t]*/,"",$0);sub(/[ \t]*$/,"",$0);print}'
# insert 5 blank spaces at beginning of each line (make page offset)
-> sed 's/^/ /'
-> awk '{sub(/^/," ",$0);print}'
# align all text flush right on a 79-column width
-> sed -e :a -e 's/^.\{1,78\}$/ &/;ta' # set at 78 plus 1 space
-> awk '{printf "%79s\n",$0}'
# center all text in the middle of 79-column width. In method 1,
# spaces at the beginning of the line are significant, and trailing
# spaces are appended at the end of the line. In method 2, spaces at
# the beginning of the line are discarded in centering the line, and
# no trailing spaces appear at the end of lines.
-> sed -e :a -e 's/^.\{1,77\}$/ & /;ta' # method 1
-> sed -e :a -e 's/^.\{1,77\}$/ &/;ta' -e 's/\( *\)\1/\1/' # method 2
-> awk '{t=(79-length)/2+length;printf "%"t"s%"79-t"s\n",$0," "}' #length=length($0)
# substitute (find and replace) "foo" with "bar" on each line
-> sed 's/foo/bar/' # replaces only 1st instance in a line
-> awk '{sub(/foo/,"bar");print}'
-> sed 's/foo/bar/4' # replaces only 4th instance in a line
-> gawk '{$0=gensub(/foo/,"bar",4,$0);print}'
-> sed 's/foo/bar/g' # replaces ALL instances in a line
-> awk '{gsub(/foo/,"bar");print}'
-> sed 's/\(.*\)foo\(.*foo\)/\1bar\2/' # replace the next-to-last case
-> gawk '{$0=gensub(/(.*)foo(.*foo)/,"\\1bar\\2","g",$0);print}'
-> sed 's/\(.*\)foo/\1bar/' # replace only the last case
-> gawk '{$0=gensub(/(.*)foo/,"\\1bar","g",$0);print}'
# substitute "foo" with "bar" ONLY for lines which contain "baz"
-> sed '/baz/s/foo/bar/g'
-> awk '/baz/{gsub(/foo/,"bar")}1'
# substitute "foo" with "bar" EXCEPT for lines which contain "baz"
-> sed '/baz/!s/foo/bar/g'
-> awk '!/baz/{gsub(/foo/,"bar")}1'
# change "scarlet" or "ruby" or "puce" to "red"
-> sed 's/scarlet/red/g;s/ruby/red/g;s/puce/red/g' # most seds
-> gsed 's/scarlet\|ruby\|puce/red/g' # GNU sed only
-> gawk '{gsub(/scarlet|ruby|puce/,"red");print}'
# reverse order of lines (emulates "tac")
# bug/feature in HHsed v1.5 causes blank lines to be deleted
-> sed '1!G;h;$!d' # method 1
-> sed -n '1!G;h;$p' # method 2
-> awk '{a[NR]=$0}END{i=NR;while(i>=1) print a[i--]}'
# reverse each character on the line (emulates "rev")
-> sed '/\n/!G;s/\(.\)\(.*\n\)/&\2\1/;//D;s/.//'
-> awk -vFS= '{i=NF;while(i>=1) printf $(i--);print ""}'
# join pairs of lines side-by-side (like "paste")
-> sed '$!N;s/\n/ /'
-> awk 'ORS=NR%2?FS:RS' or: awk '{getline line;print $0 FS line}'
# if a line ends with a backslash, append the next line to it
-> sed -e :a -e '/\\$/N; s/\\\n//; ta'
-> awk '/\\$/{sub(/\\$/,"");printf $0;next}1'
# if a line begins with an equal sign, append it to the previous line
# and replace the "=" with a single space
-> sed -e :a -e '$!N;s/\n=/ /;ta' -e 'P;D'
-> awk -vRS='\n=' -vORS=' ' '{print}'
# add commas to numeric strings, changing "1234567" to "1,234,567"
-> gsed ':a;s/\B[0-9]\{3\}\>/,&/;ta' # GNU sed
-> sed -e :a -e 's/\(.*[0-9]\)\([0-9]\{3\}\)/\1,\2/;ta' # other seds
-> awk -v FS=OFS= '{for(i=2;i<=NF;i++) $i=(NF-i+1)%3?$i:","$i}1'
# add commas to numbers with decimal points and minus signs (GNU sed)
-> gsed -r ':a;s/(^|[^0-9.])([0-9]+)([0-9]{3})/\1\2,\3/g;ta'
-> awk -v FS=OFS= '{for(i=$1=="-"?3:2;i<=NF;i++) if($i==".") break; else {$i=(NF-i+1)%3?$i:","$i}}1'
# add a blank line every 5 lines (after lines 5, 10, 15, 20, etc.)
-> gsed '0~5G' # GNU sed only
-> sed 'n;n;n;n;G;' # other seds
-> awk '{$0=NR%5?$0:$0 ORS}1'
SELECTIVE PRINTING OF CERTAIN LINES:
# print first 10 lines of file (emulates behavior of "head")
-> sed 10q
-> awk 'NR==10{print;exit}1' or:awk '1;NR==10{exit}' (多谢Tim兄)
# print first line of file (emulates "head -1")
-> sed q
-> awk 'NR==1{print;exit}' or: awk '{print;exit}' (多谢Tim兄)
# print the last 10 lines of a file (emulates "tail")
-> sed -e :a -e '$q;N;11,$D;ba'
-> awk '{a[NR]=$0;delete a[NR-10]}END{for(i=NR-9;i<=NR;i++) print a[i]}'
# print the last 2 lines of a file (emulates "tail -2")
-> sed '$!N;$!D'
-> awk '{a=b;b=$0}END{print a ORS b}'
# print the last line of a file (emulates "tail -1")
-> sed '$!d' # method 1
-> sed -n '$p' # method 2
-> awk 'END{print $0}'
# print the next-to-the-last line of a file
-> sed -e '$!{h;d;}' -e x # for 1-line files, print blank line
-> awk '{a=b;b=$0}END{print a}'
-> sed -e '1{$q;}' -e '$!{h;d;}' -e x # for 1-line files, print the line
-> awk '{a=b;b=$0}END{print a?a:b}'
-> sed -e '1{$d;}' -e '$!{h;d;}' -e x # for 1-line files, print nothing
-> awk '{a=b;b=$0}END{printf a?a ORS:""}'
# print only lines which match regular expression (emulates "grep")
-> sed -n '/regexp/p' # method 1
-> sed '/regexp/!d' # method 2
-> awk '/regxp/'
# print only lines which do NOT match regexp (emulates "grep -v")
-> sed -n '/regexp/!p' # method 1, corresponds to above
-> sed '/regexp/d' # method 2, simpler syntax
-> awk '!/regxp/'
# print the line immediately before a regexp, but not the line
# containing the regexp
-> sed -n '/regexp/{g;1!p;};h'
-> awk '{a=b;b=$0}/regexp/{print a}'
# print the line immediately after a regexp, but not the line
# containing the regexp
-> sed -n '/regexp/{n;p;}'
-> awk '/regexp/{getline line;print line}'
# print 1 line of context before and after regexp, with line number
# indicating where the regexp occurred (similar to "grep -A1 -B1")
-> sed -n -e '/regexp/{=;x;1!p;g;$!N;p;D;}' -e h
-> awk '{a=b;b=c;c=$0}b~/6/{printf "%s\n%s\n%s\n%s\n",NR-1,a,b,c}'
-> awk '{a=b;b=$0}/6/{getline c;print NR-1"\n"a"\n"b"\n"c}'
# grep for AAA and BBB and CCC (in any order)
-> sed '/AAA/!d; /BBB/!d; /CCC/!d'
-> awk '/AAA/&&/BBB/&&/CCC/'
# grep for AAA and BBB and CCC (in that order)
-> sed '/AAA.*BBB.*CCC/!d'
-> awk '/AAA.*BBB.*CCC/'
# grep for AAA or BBB or CCC (emulates "egrep")
-> sed -e '/AAA/b' -e '/BBB/b' -e '/CCC/b' -e d # most seds
-> gsed '/AAA\|BBB\|CCC/!d' # GNU sed only
-> awk '/AAA|BBB|CCC/'
# print paragraph if it contains AAA (blank lines separate paragraphs)
# HHsed v1.5 must insert a 'G;' after 'x;' in the next 3 scripts below
-> sed -e '/./{H;$!d;}' -e 'x;/AAA/!d;'
-> awk -vRS='\n\n' '/AAA/'
# print paragraph if it contains AAA and BBB and CCC (in any order)
-> sed -e '/./{H;$!d;}' -e 'x;/AAA/!d;/BBB/!d;/CCC/!d'
-> awk -vRS='\n\n' '/AAA/&&/BBB/&&/CCC/'
# print paragraph if it contains AAA or BBB or CCC
-> sed -e '/./{H;$!d;}' -e 'x;/AAA/b' -e '/BBB/b' -e '/CCC/b' -e d
-> gsed '/./{H;$!d;};x;/AAA\|BBB\|CCC/b;d' # GNU sed only
-> awk -vRS='\n\n' '/AAA|BBB|CCC/'
# print only lines of 65 characters or longer
-> sed -n '/^.\{65\}/p'
-> awk 'length>=65'
# print only lines of less than 65 characters
-> sed -n '/^.\{65\}/!p' # method 1, corresponds to above
-> sed '/^.\{65\}/d' # method 2, simpler syntax
-> awk 'length<65' or awk 'NF<65' FS=
# print section of file from regular expression to end of file
-> sed -n '/regexp/,$p'
-> awk '/regexp/,0' or awk '/regexp/,EOF'
# print section of file based on line numbers (lines 8-12, inclusive)
-> sed -n '8,12p' # method 1
-> sed '8,12!d' # method 2
-> awk 'NR==8,NR==12' OR awk 'NR>=8&&NR<=12'
# print line number 52
-> sed -n '52p' # method 1
-> sed '52!d' # method 2
-> sed '52q;d' # method 3, efficient on large files
-> awk 'NR==52{print;exit}'
# beginning at line 3, print every 7th line
-> gsed -n '3~7p' # GNU sed only
-> sed -n '3,${p;n;n;n;n;n;n;}' # other seds
-> awk 'NR>=3&&!((NR-3)%10)'
# print section of file between two regular expressions (inclusive)
-> sed -n '/Iowa/,/Montana/p' # case sensitive
-> awk '/Iowa/,/Montana/'
SELECTIVE DELETION OF CERTAIN LINES:
# print all of file EXCEPT section between 2 regular expressions
-> sed '/Iowa/,/Montana/d'
-> awk 'Iowa/,Montana/{next}1'
# delete duplicate, consecutive lines from a file (emulates "uniq").
# First line in a set of duplicate lines is kept, rest are deleted.
-> sed '$!N; /^\(.*\)\n\1$/!P; D'
-> awk '{printf $0==v?"":$0 ORS;v=$0}' or: awk 'a !~ $0; {a=$0}'
# delete duplicate, nonconsecutive lines from a file. Beware not to
# overflow the buffer size of the hold space, or else use GNU sed.
-> sed -n 'G; s/\n/&&/; /^\([ -~]*\n\).*\n\1/d; s/\n//; h; P'
-> awk '!a[$0]++'
# delete all lines except duplicate lines (emulates "uniq -d").
-> sed '$!N; s/^\(.*\)\n\1$/\1/; t; D'
-> awk '!(a[$0]++-1)'
# delete the first 10 lines of a file
-> sed '1,10d'
-> awk '++p>10'
# delete the last line of a file
-> sed '$d'
-> awk '{a=b;b=$0}NR>1{print a}' OR: awk '{a[NR+1]=$0;if(NR>1)print a[NR]}'
# delete the last 2 lines of a file
-> sed 'N;$!P;$!D;$d'
-> awk '{a[NR+2]=$0;if(NR>2)print a[NR]}' OR: awk '{a=b;b=c;c=$0}NR>2{print a}'
# delete the last 10 lines of a file
-> sed -e :a -e '$d;N;2,10ba' -e 'P;D' # method 1
-> sed -n -e :a -e '1,10!{P;N;D;};N;ba' # method 2
-> awk '{a[NR+10]=$0;if(NR>10)print a[NR]}'
(如果文件非常大,内存有限的话,加个delete,如下
-> awk '{a[NR+10]=$0;if(NR>10){print a[NR]
;delete a[NR]}}')
# delete every 8th line
-> gsed '0~8d' # GNU sed only
-> sed 'n;n;n;n;n;n;n;d;' # other seds
-> awk 'NR%8'
# delete lines matching pattern
-> sed '/pattern/d'
-> awk '/pattern/{next}1'
# delete ALL blank lines from a file (same as "grep '.' ")
-> sed '/^$/d' # method 1
-> sed '/./!d' # method 2
-> awk NF
# delete all CONSECUTIVE blank lines from file except the first; also
# deletes all blank lines from top and end of file (emulates "cat -s")
-> sed '/./,/^$/!d' # method 1, allows 0 blanks at top, 1 at EOF
-> awk '{/^$/?p--:p=1}!p>=0'
-> sed '/^$/N;/\n$/D' # method 2, allows 1 blank at top, 0 at EOF
-> awk -v p=1 '{/^$/?p--:p=1}!p>=0'
# delete all CONSECUTIVE blank lines from file except the first 2:
-> sed '/^$/N;/\n$/N;//D'
-> awk -v p=2 '{/^$/?p--:p=2}!p>=0'
# delete all leading blank lines at top of file
-> sed '/./,$!d'
-> awk '!/^$/{p=1}p' OR: awk '!/^$/,0'
# delete all trailing blank lines at end of file
-> sed -e :a -e '/^\n*$/{$d;N;ba' -e '}' # works on all seds
-> sed -e :a -e '/^\n*$/N;/\n$/ba' # ditto, except for gsed 3.02.*
-> awk -vRS='[\n]* '1'
# delete the last line of each paragraph
-> sed -n '/^$/{p;h;};/./{x;/./p;}'
-> awk '{a=b;b=$0}NR>1{printf /^$/?"":a ORS}'
©版权声明:本文内容由互联网用户自发贡献,版权归原创作者所有,本站不拥有所有权,也不承担相关法律责任。如果您发现本站中有涉嫌抄袭的内容,欢迎发送邮件至: www_apollocode_net@163.com 进行举报,并提供相关证据,一经查实,本站将立刻删除涉嫌侵权内容。
转载请注明出处: apollocode » awk sed 大全
文件列表(部分)
名称 | 大小 | 修改日期 |
---|
发表评论 取消回复