号段提取工具

1、先把xls文件导出为txt文件

2、过滤txt文件内容并输出

vi haodan.awk

#!/bin/awk -f

# version 0.2
# the awk script function is hao duan extract.


#天津	22
#海南	898

BEGIN {
	all=0;

	if(ARGC==1) {
		printf("Input: file.txt...\n");
		exit 1
	}

	for(i=1;i<=ARGC-1;i++) {
		if((getline<ARGV[i])==-1) {print "\nError: "ARGV[i]" file not exist!\n";exit;}

		if(system("test -s "ARGV[i])) {print "\nWarn: "ARGV[i]" size is zero!\n";exit};

        	system("if file "ARGV[i]" | grep CRLF >/dev/null 2>&1 ; then dos2unix "ARGV[i]" >/dev/null 2>&1 ; echo; echo convert "ARGV[i]" format is dos to unix ok... ;else echo; echo file "ARGV[i]" format is unix file ok... ; fi");

        	system("if ! file "ARGV[i]" | grep UTF >/dev/null 2>&1; then iconv -f gb2312 -t utf-8 "ARGV[i]" > tmp.file; echo; echo convert "ARGV[i]" coding is gb2312 to utf-8 ok...; mv -f tmp.file "ARGV[i]" ; else echo; echo file "ARGV[i]" coding is utf-8 ok... ; fi");

		if(i==ARGC-1)
			printf("\n*************************************************\n\n");
		else
			printf("\n***************************************\n");
	}	

	FS="\t"

	x=1;
}

function head(NF) {
	if($0~"联通" && $0~"调整" || $0~"^\t*$") {
		FNR--;
		NR--;
		y=1;
		next;
	}

	if(y==1) {
        	file_name=FILENAME;
        	sum=0;

		y=0;

		x=1;
	
		sub("\t*$","");

		COL=7;
        	for(i=COL;i<=NF;i++)
              		h[i]=substr($i,1,4);

        	next;
	}
	
	if($0~/移动/ || $0~"^\t*$" || $0~/电信/) {
		FNR--;
		NR--;
		next;
	}

        file_name=FILENAME;
        sum=0;

	x=1;

	COL=4;
        for(i=COL;i<=NF;i++)
                h[i]=substr($i,1,4);

        next;
}

{
	
	if(FNR==1 && NR==1) {
		if(x==1){
			printf("************** file name = %s **************\n",FILENAME);
			x=0;
		}

		head(NF);
	}
	else if(FNR==1 && NR!=1) {
		if(x==1) {
			printf("****** file %s line = %d ******\n\n************** file name = %s **************\n",file_name,sum,FILENAME);
			all+=sum;
			x=0;
		}

		head(NF);
	}
}

function range(str) {
	if(str~/-/) {
		s=substr(str,1,3)+0;
		e=substr(str,5,3)+0;
		for(k=s;k<=e;k++) {
			printf("%s%03d\t\t%s\t%s\n", h[i],k,$1,$2);
			sum++;
		}
	}
	else {
		printf("%s%03d\t\t%s\t%s\n", h[i],str,$1,$2);
		sum++;
	}
}

{
#	HAODUAN=$3==22||$3==898;
	HAODUAN=$3==22;
#	HAODUAN=$3==898;
	ALLHAOD=$0;

	if($0~/本张表/ || $0~/已网间/) next;
	else if(ALLHAOD) {
#	else if(HAODUAN) {
		sub("\t*$","");
		for(i=COL;i<=NF;i++) {
			if($i!="") {
				if($i~/"/) {
					gsub(/"/,"",$i);
					n=split($i,r,",");
					for(j=1;j<=n;j++)
							range(r[j]);
				}
				else if($i~/、/) {
					n=split($i,r,"、");
					for(j=1;j<=n;j++)
							range(r[j]);
				}
				else if($i~/-/) {
					range($i);
				}
				else
					range($i);
			}
		}
	}
}

END{
	printf("****** file %s line = %d ******\n",file_name,sum);
	printf("\nAll file process ok. all line = %d\n\n",all+=sum);

	exit 0;
}

 

 

你可能感兴趣的:(号段提取工具)