python(利用shlex库)对超大CSV文件进行读取,并以逗号分列,双引号内逗号不分割

方法一:
import os,glob
import csv,re
import pandas as pd
import shlex


save_f=open('cellrelation.csv','w')
with  open('./EUtranCellRelation.csv','r') as cellrelation:

    for line in cellrelation:
        s=shlex.shlex(line)
        #print(s)
        s.quotes='"'
        s.whitespace=','
        s.whitespace_split = True

        b=list(s)
        #print(b[2])
        #s=re.split(r",chr(34)?![^(]*\chr(34)",cellrelation )
        #re.split(r",(?![^(]*\))", a)
        if b[0]=="10.124.100.100.log":
            cell = re.findall(".*EUtranCell(.*),", b[2])
            print(b[0],b[2],cell)
            #save_f.write(b[0]+'\n')



方法二:
import csv
import os
import pandas as pd
reader = pd.read_csv('./EUtranCellRelation.csv',chunksize=1000000,header=None,sep=',',usecols=[0,2])
for b in reader:
    #print(b[0])
    data1=pd.DataFrame(b)
    data1.to_csv('cellrelation1.csv',index=False,mode='a',header=None)
    #条件。。。

你可能感兴趣的:(PYTHON)