python3.6
1,读取特定字符串
字符串list,取特定字符,可以借助正则表达式,很方便,举例说明:
cli=['cm_00102_cont','cm_00102','cm_00103_cont',
'cm_00103','cm_00104','cm_00104_cont']
match = [x for x in cli if ('cm_' in x and '_cont' not in x)]
mats = [x.split('_') for x in match]
print((mats))
输出结果
[['cm', '00102'], ['cm', '00103'], ['cm', '00104']]
如果cli是bytes格式,而非str,则可以先将pytes转换为str后变成cli的str格式再进行。
clim=[b'cm_00102_cont',b'cm_00102',b'cm_00103_cont',
b'cm_00103',b'cm_00104',b'cm_00104_cont']
cli=[]
cli=[str(s, encoding = "utf-8") for s in clim]
match = [x for x in cli if ('cm_' in x and '_cont' not in x)]
mats = [x.split('_') for x in match]
print((mats))
2,对文件夹下文件进行自然排序(文件有编号)
def natural_sort(l):
convert = lambda text: int(text) if text.isdigit() else text.lower()
alphanum_key = lambda key: [ convert(c) for c in re.split('([0-9]+)', key) ]
return sorted(l, key = alphanum_key)
调用函数csvfilepath下所有文件:
allfile_list=natural_sort(os.listdir(csvfilepath))
3,矩阵中时间错位,计算时间差,时间的格式为'%Y-%m-%d %H:%M:%S',c_dett存放前后时间差,newtim存放以0开始的时间累计
def get_dettime(dataori):
import datetime
timev = dataori
c_dett =[0]
newtim = [0]
newtim1 = 0
for i in range(len(timev)-1):
restime1 = datetime.datetime.strptime(timev[i],'%Y-%m-%d %H:%M:%S')
restime2 = datetime.datetime.strptime(timev[i+1],'%Y-%m-%d %H:%M:%S')
c_dett.append(( restime2 - restime1).seconds)
newtim2 = newtim1+ ( restime2 - restime1).seconds
newtim1 = newtim2
newtim.append(newtim2)
return c_dett
newdfa['ntime']=[datetime.strptime(x,'%Y-%m-%d %H:%M:%S') for x in newdfa['time']]
#转换时间,将字符时间转换为日期时间
sectim = newdfa['ntime']-newdfa['ntime'].iloc[0]
newdfa['timeseconds']=[(x.days*86400 +x.seconds) for x in sectim]
dettim1 = newdfa['ntime'].iloc[:-1].reset_index(drop=True)
dettim2 = newdfa['ntime'].iloc[1:].reset_index(drop=True)
det = [x.seconds for x in (dettim2-dettim1)]
det.insert(0,0)
newdfa['dettime']= det
4,批量读取csv文件
def get_csvdatas(csvfilepath):
import os
import re
import pandas as pd
# (re.split(r'[\_,.]',csv_list[0]))
# (re.findall('\d+',csv_list[0]))[1]
allfile_list=natural_sort(os.listdir(csvfilepath))
csv_list = [x for x in allfile_list if x.endswith('.csv')]
cs_data=[0 for row in range(len(csv_list))]
for csvid in range(len(csv_list)):
if csv_list[csvid].endswith('.csv'):
cs_data[csvid] = pd.read_csv(csvfilepath+csv_list[csvid])
cs_data[csvid]['dettime']=get_dettime(cs_data[csvid]['time'])
return cs_data