python中读取特定字符串,文件自然排序,单列时间前后时间差,批量读取csv文件

 python3.6

1,读取特定字符串

字符串list,取特定字符,可以借助正则表达式,很方便,举例说明:

cli=['cm_00102_cont','cm_00102','cm_00103_cont',
      'cm_00103','cm_00104','cm_00104_cont']
match = [x for x in cli if ('cm_' in x and '_cont' not in x)]
mats = [x.split('_') for x in match]
print((mats))

输出结果

[['cm', '00102'], ['cm', '00103'], ['cm', '00104']]

如果cli是bytes格式,而非str,则可以先将pytes转换为str后变成cli的str格式再进行。

clim=[b'cm_00102_cont',b'cm_00102',b'cm_00103_cont',
      b'cm_00103',b'cm_00104',b'cm_00104_cont']
cli=[]
cli=[str(s, encoding = "utf-8") for s in clim]
match = [x for x in cli if ('cm_' in x and '_cont' not in x)]
mats = [x.split('_') for x in match]
print((mats))

2,对文件夹下文件进行自然排序(文件有编号)

def natural_sort(l): 
    convert = lambda text: int(text) if text.isdigit() else text.lower() 
    alphanum_key = lambda key: [ convert(c) for c in re.split('([0-9]+)', key) ] 
    return sorted(l, key = alphanum_key)

调用函数csvfilepath下所有文件:

 allfile_list=natural_sort(os.listdir(csvfilepath))

3,矩阵中时间错位,计算时间差,时间的格式为'%Y-%m-%d %H:%M:%S',c_dett存放前后时间差,newtim存放以0开始的时间累计

def get_dettime(dataori):
    import datetime
    timev  = dataori
    c_dett =[0]
    newtim = [0]
    newtim1 = 0
    for i in range(len(timev)-1):
        restime1 = datetime.datetime.strptime(timev[i],'%Y-%m-%d %H:%M:%S')
        restime2 = datetime.datetime.strptime(timev[i+1],'%Y-%m-%d %H:%M:%S')
        c_dett.append(( restime2 - restime1).seconds)
        newtim2 = newtim1+ ( restime2 - restime1).seconds
        newtim1 = newtim2
        newtim.append(newtim2)
    return c_dett
newdfa['ntime']=[datetime.strptime(x,'%Y-%m-%d %H:%M:%S') for x in newdfa['time']]
    #转换时间,将字符时间转换为日期时间
sectim = newdfa['ntime']-newdfa['ntime'].iloc[0]
newdfa['timeseconds']=[(x.days*86400 +x.seconds) for x in sectim]

dettim1 = newdfa['ntime'].iloc[:-1].reset_index(drop=True)
dettim2 = newdfa['ntime'].iloc[1:].reset_index(drop=True)
det = [x.seconds for x in (dettim2-dettim1)]
det.insert(0,0)
newdfa['dettime']= det


4,批量读取csv文件

def get_csvdatas(csvfilepath):
    import os
    import re
    import pandas as pd
    # (re.split(r'[\_,.]',csv_list[0]))
    # (re.findall('\d+',csv_list[0]))[1]
    allfile_list=natural_sort(os.listdir(csvfilepath))
    csv_list = [x for x in allfile_list if x.endswith('.csv')]
    cs_data=[0 for row in range(len(csv_list))] 
    for csvid in range(len(csv_list)):
        if csv_list[csvid].endswith('.csv'):
            cs_data[csvid] = pd.read_csv(csvfilepath+csv_list[csvid])
            cs_data[csvid]['dettime']=get_dettime(cs_data[csvid]['time'])
    return cs_data

 

 

 

 

 

 

你可能感兴趣的:(python)