python读取txt文档数据并筛选某一列满足条件的所有行

import xlwt
#xlwt写入excel中每个sheet最多大概为五万行,所以需要大致知道打开的文档有多少行
f = open('123.txt','r')
data = f.readlines()
#按行读取txt数据,data为list类型
print(len(data))
#len(data)为文本文档的数据行数
#2794729
f.close()
B = []
for line in data:
    first = line.strip('\n')
    second = first.split(' ')
    A = [i for i in second if i != ""]
    # 先循环遍历有空值的列表,并抽取非空的列表元素,存储到新的列表中;踢掉空元素
    B.append(A)
import xlwt
f = open('123.txt','r')
data = f.readlines()
print(len(data))
f.close()
B = []
for line in data:
    first = line.strip('\n')
    second = first.split(' ')
    A = [i for i in second if i != ""]
    # 先循环遍历有空值的列表,并抽取非空的列表元素,存储到新的列表中
    B.append(A)
C = []
for i in range(len(B)):
    try:
        if B[i][3] == '184':
            C.append(B[i])
    except:
        print('第{0}条数据处理失败'.format(i))
print(len(C))

#判断第i行第4列的元素是否为'184',这是我们的目标,是的话就挑出来放到新的数组C中,发现C的长度为696767,按照5w为一个sheet,所以将写入成70/5=14个sheet

wbk = xlwt.Workbook(encoding='utf-8')
# sheet = wbk.add_sheet('Sheet1', cell_overwrite_ok=True)
sheet1 = wbk.add_sheet("Sheet1", cell_overwrite_ok=True)
sheet2 = wbk.add_sheet("Sheet2", cell_overwrite_ok=True)
sheet3 = wbk.add_sheet("Sheet3", cell_overwrite_ok=True)
sheet4 = wbk.add_sheet("Sheet4", cell_overwrite_ok=True)
sheet5 = wbk.add_sheet("Sheet5", cell_overwrite_ok=True)
sheet6 = wbk.add_sheet("Sheet6", cell_overwrite_ok=True)
sheet7 = wbk.add_sheet("Sheet7", cell_overwrite_ok=True)
sheet8 = wbk.add_sheet("Sheet8", cell_overwrite_ok=True)
sheet9 = wbk.add_sheet("Sheet9", cell_overwrite_ok=True)
sheet10 = wbk.add_sheet("Sheet10", cell_overwrite_ok=True)
sheet11 = wbk.add_sheet("Sheet11", cell_overwrite_ok=True)
sheet12 = wbk.add_sheet("Sheet12", cell_overwrite_ok=True)
sheet13 = wbk.add_sheet("Sheet13", cell_overwrite_ok=True)
sheet14 = wbk.add_sheet("Sheet14", cell_overwrite_ok=True)
for i in range(0,50000):
    for j in range(0,len(C[i])):
     sheet1.write(i, j, C[i][j])
for i in range(50000,100000):
    for j in range(0,len(C[i])):
     sheet2.write(i-50000, j, C[i][j])
for i in range(100000,150000):
    for j in range(0,len(C[i])):
     sheet3.write(i-100000, j, C[i][j])
for i in range(150000,200000):
    for j in range(0,len(C[i])):
     sheet4.write(i-150000, j, C[i][j])
for i in range(200000,250000):
    for j in range(0,len(C[i])):
     sheet5.write(i-200000, j, C[i][j])
for i in range(250000,300000):
    for j in range(0,len(C[i])):
     sheet6.write(i-250000, j, C[i][j])
for i in range(300000,350000):
    for j in range(0,len(C[i])):
     sheet7.write(i-300000, j, C[i][j])
for i in range(350000,400000):
    for j in range(0,len(C[i])):
     sheet8.write(i-350000, j, C[i][j])
for i in range(400000,450000):
    for j in range(0,len(C[i])):
     sheet9.write(i-400000, j, C[i][j])
for i in range(450000,500000):
    for j in range(0,len(C[i])):
     sheet10.write(i-450000, j, C[i][j])
for i in range(500000,550000):
    for j in range(0,len(C[i])):
     sheet11.write(i-500000, j, C[i][j])
for i in range(550000,600000):
    for j in range(0,len(C[i])):
     sheet12.write(i-550000, j, C[i][j])
for i in range(600000,650000):
    for j in range(0,len(C[i])):
     sheet13.write(i-600000, j, C[i][j])
for i in range(650000,len(C)):
    for j in range(0,len(C[i])):
     sheet14.write(i-650000, j, C[i][j])
wbk.save('amazing.xls')

你可能感兴趣的:(python,开发语言)