读取目录 正则表达式 用例

import os, sys
import random
import re #引入正则表达式模块

 

inpudir = sys.argv[1]
outputDir = sys.argv[2]

names = ["@*126.com", "@*sina.com"]

subdirs = os.listdir(inputdir) #读取inputdir目录下所有子目录名

outputdir = os.path.join(inputdir, outputDir)

if(not(os.path.exists(outputdir))):
    os.mkdir(outputdir)  #在inputdir目录下新建子目录outputDir
fileNum = 1 #文件新名字

for subdir in subdirs:
     inputDir = os.path.join(inputDir, subdir)
     flist = os.listdir(inputDir) #分别读取子目录,将文件名存入flist
     count = len(flist) #count为文件数量
     for i in range(0, count):
          filepath = os.path.join(inputDir, flist[i])
          lines = open(filepath) #读取文件每一行,并存入lines中
          for line in lines.readlines():
               regex = "^china"
               if re.match(regex, line):  #匹配是否以"china"开头
                    for j in range(0, 2):
                         if re.search(names[j], line): #查看是否含有"@...126.com" 或 "@... sina.com"的字符串
                              outputpath = os.path.join(outputdir, str(fileNum) + ".txt")
                              os.system("cp '%s' '%s'"%(filepath, outputpath)) #拷贝文件
                              fileNum += 1
                break
#结束

#

#

 

regex = "^china" 以"china"开头

regex2 = "ever$" 以"ever"结尾

sample = "china will survive forever"

exist = re.search(regex, sample)  #将会返回true

exist2 = re.search(regex2, sample) #将会返回true 

exist4 = re.search(regex, sample) and re.search(regex2, sample)  #将会返回true

regex3 = "will*forever"

exist3 = re.search(regex2, sample) #将会返回true

你可能感兴趣的:(正则表达式,OS,regex,import)