import os, sys
import random
import re #引入正则表达式模块
inpudir = sys.argv[1]
outputDir = sys.argv[2]
names = ["@*126.com", "@*sina.com"]
subdirs = os.listdir(inputdir) #读取inputdir目录下所有子目录名
outputdir = os.path.join(inputdir, outputDir)
if(not(os.path.exists(outputdir))):
os.mkdir(outputdir) #在inputdir目录下新建子目录outputDir
fileNum = 1 #文件新名字
for subdir in subdirs:
inputDir = os.path.join(inputDir, subdir)
flist = os.listdir(inputDir) #分别读取子目录,将文件名存入flist
count = len(flist) #count为文件数量
for i in range(0, count):
filepath = os.path.join(inputDir, flist[i])
lines = open(filepath) #读取文件每一行,并存入lines中
for line in lines.readlines():
regex = "^china"
if re.match(regex, line): #匹配是否以"china"开头
for j in range(0, 2):
if re.search(names[j], line): #查看是否含有"@...126.com" 或 "@... sina.com"的字符串
outputpath = os.path.join(outputdir, str(fileNum) + ".txt")
os.system("cp '%s' '%s'"%(filepath, outputpath)) #拷贝文件
fileNum += 1
break
#结束
#
#
regex = "^china" 以"china"开头
regex2 = "ever$" 以"ever"结尾
sample = "china will survive forever"
exist = re.search(regex, sample) #将会返回true
exist2 = re.search(regex2, sample) #将会返回true
exist4 = re.search(regex, sample) and re.search(regex2, sample) #将会返回true
regex3 = "will*forever"
exist3 = re.search(regex2, sample) #将会返回true