Python系列(4)-- Python 正则表达式匹配字符串替换、格式修改

有如下数据,需要把 . 去掉
这里写图片描述

# -*- coding: utf-8 -*-
"""
Created on Mon Sep 25 20:47:33 2017

@author: Don
"""
import re


f = open("84.txt",'rb')
r = open("84_result.txt","w+")
corpus = bytes.decode(f.read()).split("\n")
s = r'\d{8}.\d{2}'                 #正则表达式匹配
for i in range(len(corpus)):

    m = re.match(s, corpus[i])
    if m is None:                  #要先判断是否为空
        continue
    str = m.group(0)               #得到匹配的字符串
    repStr= str.replace(".","")
    corpus[i] = corpus[i].replace(str, repStr)
    r.write(corpus[i] + '\n')
r.close()
f.close()

有如下数据,需要去掉 . 并把不足十位的最后补零
这里写图片描述

# -*- coding: utf-8 -*-
"""
Created on Tue Sep 26 10:46:12 2017

@author: Don
"""

import re
import copy

f = open("index.txt",'rb')
r = open("index_result.txt","w+")
corpus = bytes.decode(f.read()).split("\n")
corpuscopy = copy.copy(corpus)

for i in range(len(corpus)):
    if corpus[i] is None:             #判断空行
        continue 
    tmp = corpus[i].split()
    if len(tmp) == 0:
        continue
    indextmp = len(tmp[-1])
    if '.' in tmp[-1]:                #定位.  并删除
        index = tmp[-1].find('.')
        tmp[-1] = tmp[-1][:index] + tmp[-1][index+1:]
    if len(tmp[-1]) < 10:             #不足十位的报关码要补全
        tmpstr = ''
        for k in range(10 - len(tmp[-1])):
            tmpstr = tmpstr + '0'
        tmp[-1] = tmp[-1] + tmpstr
    corpuscopy[i] = corpuscopy[i][:len(corpuscopy[i]) - indextmp] + tmp[-1]

    r.write(corpuscopy[i] + '\n')
r.close()
f.close()

你可能感兴趣的:(python)