总结下这一个月以来的代码小trick呀~
list之间可以直接+,效果等于extend
a = [1,2,3]
b = [4,5,6]
a+=b
print(a)
$ [1, 2, 3, 4, 5, 6]
a = [1,2,3]
b = [4,5,6]
a.append(b)
print(a)
$ [1, 2, 3, 4, 5, 6]
### append是将list整体向后加,而extend是将list中的元素向后加
a = [1,2,3]
b = [4,5,6]
a.append(b)
print(a)
$ [1, 2, 3, [4, 5, 6]]
### list在append / extend之后千万不可以赋值哦
a = [1,2,3]
b = [4,5,6]
a.extend(b)
print(a)
$ None
但是python list 不支持减法
只能使用py的投机取巧了
a = [1, 2, 3, 4, 5, 6]
b = [4,5,6]
a = list(set(a)-set(b))
print(a)
$ [1, 2, 3]
a = a - b
print(a)
$ TypeError: unsupported operand type(s) for -: 'list' and 'list'
为了使(取json文件中值的)函数可以正常运行,就不可以读入不是json的形式,用os.path.splitext(files[i])[-1] == ".xml"来判断/过滤掉隐藏文件。
detail_data = data_df[‘name’].str.split(’-’,expand=True)
因为expend=True,所以要将其新赋给一个dataframe。
pose_data[‘pose’] = [x.strip().replace(’_’, ‘,’) for x in pose_data[‘pose’]]
哇,就感觉这个还是很厉害诶!!!
emo_data['expression'] = emo_data[['angry','disgusted','fearful','happy','sad','surprised','neutral']].apply(lambda x: emo_data.columns[list(x).index(np.max(x))], axis=1)
不带apply的lambda函数~
alg_emo.rename(columns=lambda x:'alg_'+str(x), inplace=True)
nonalg_time = time_out[np.isnan(time_out['alg_time'])]['mark_time']
len(nonalg_time)
pose_diff.describe().T.sort_values(by='mean', ascending = False)
# 转成list:
if 'unknown' in list(face_id_r['face_id']):
face_id_r = face_id_r[face_id_r['face_id'] != 'unknown']
if 'unknown' in df.columns:
pass
lambda 对df数据行操作的两种写法
list中直接写函数
# way1
new_df['name'] = [int(x) for x in new_df['name']]
# way2
new_df['name'] = new_df['name'] .apply(lambd x:int(x))
将秒数转化为string时间格式
divmod:除余函数
def sec2time(sec_number):
"""
将秒数转化为string时间格式
return time_str
"""
sec_number = sec_number/10
if sec_number< 3600:
minute_num, second_num = divmod(sec_number, 60)
time_str = ("%02d:%02d" % (minute_num, second_num))
else:
minute_num, second_num = divmod(sec_number, 60)
hour_num, minute_num = divmod(minute_num, 60)
time_str = ("%02d:%02d:%02d" % (hour_num, minute_num, second_num))
return time_str
lambda函数中使用之前自定义的函数
# lambda apply 自定义函数sec2time()
stu_df['stu_speak_dur_conv'] = stu_df[['stu_speak_dur']].apply(lambda x: sec2time(int(x)), axis=1)
stu_df = stu_df[stu_df['stu_speak_dur_conv'] != str('00:00')]
愉悦度赋分,用字典极大的减少了loop次数,提高了效率
emo = [0,1,2,3,4,5,6]
score = [-8,-2,-6,9,-10,10,5]
x = dict(zip(emo,score))
emo_score = []
for i in range(len(emo_tb)):
res = 0
for emo, score in x.items():
# 直接找到对于的分数,而不用loop + if
res = res + emo_tb[emo].iloc[i]*score
emo_score.append(res)
emo_tb['positive_score1'] = emo_score
选取行数据:loc
emo_tb.loc[emo_tb['target_face_id'].isin(['34846125','34845936','34846002','34845556'])])
超级重要!!!
找到最大数据的index
emo_df['expression'] = emo_df[[0,1,2,3,4,5,6]].apply(lambda x: emo_df.columns[list(x).index(np.max(x))+2], axis=1)