这篇文章主要介绍了如何用python汇总数据,具有一定借鉴价值,需要的朋友可以参考下。希望大家阅读完这篇文章后大有收获,下面让小编带着大家一起了解一下。
location_data = location_data.reset_index(drop=True)
data = result_data.drop_duplicates(subset = ['集团客户ID', '集团客户名称', '证件地址'], keep = 'first')
merge_data = pd.merge(boss_data, data[['ID', '名称', '地址', '省名称',
'output',]], how = 'left', left_on = [ 'ID', '名称', '地址', '省名称'], right_on = ['ID', '名称', '地址', '省名称'])
data['enterprise_information'] = {"企业名称":1, "企业地址":2,"经度":3,"纬度":4}
def formatrow(row):
if row['enterprise_information']:
res_row=row['enterprise_information']
return res_row["企业名称"], res_row["企业地址"],res_row["经度"],res_row["纬度"]
else:
return [],[],[],[]
data[["企业名称", "企业地址","经度","纬度"]] = data.apply(formatrow, axis=1, result_type="expand")
import re
pattern = re.compile('(\d*).(椅子|桌子)')
s='这里有3个椅子和10张桌子'
f=re.finditer(pattern,s)
print(f)
for i in f:
print(i)
print(i.group())
print(i.group(1))
print(i.group(2))
print(i.span())
输出:
3个椅子
3
椅子
(3, 7)
10张桌子
10
桌子
(8, 13)
添加通配符?
import re
meanwhile ='账户(.*?)不存在'
pattern = re.compile(meanwhile)
s='账户不存在,显示银行账户已经不存在或者已销户,但客户表示未做更改。 '
f=re.finditer(pattern,s)
print(f)
for i in f:
print(i)
print(i.group())
print(i.group(1))
print(i.span())
输出:
账户不存在
(0, 5)
账户已经不存在
已经
(10, 17)
np.argmin((value['distance']))
list_data = [1,2,3]
for index, index_data in enumerate(list_data):
print(index)
def list_unqiue(word_list):
"""
字典列表去重
"""
temp_old = []
iter_list = [temp_old.append(i) for i in word_list if not i in temp_old]
return temp_old
score_dict = {'a':1,'b':2,'c':3}
top_rule = max(score_dict, key=score_dict.get) # 获取优先级最高的规则
输出:
'c'
score_dict = {'a':1,'b':2,'c':3}
dict(filter(lambda x: x[1] >= 2,score_dict.items()))
输出:
{'b': 2, 'c': 3}
from collections import Counter
total_list = [1,2,3,3]
topn_word = 2
counter = Counter(total_list) # 对整体工单频次条统计
counter.most_common(topn_word)
输出:
counter
Counter({3: 2, 1: 1, 2: 1})
counter.most_common(topn_word)
[(3, 2), (1, 1)]
import pickle
# 保存Model(注:save文件夹要预先建立,否则会报错)
with open('/order_analysis/model/svc.pickle', 'wb') as f:
pickle.dump(model, f)
# 读取Model
with open('/order_analysis/model/svc.pickle', 'rb') as f:
model_load = pickle.load(f)
# 测试读取后的Model
print([id_to_cat[i] for i in model_load.predict(X_test[2:7])])
from sklearn.externals import joblib # jbolib模块
joblib.dump(model_load, '/order_analysis/model/svc.pkl')
# 读取Model
model_load = joblib.load('/order_analysis/model/svc.pkl')
# 测试读取后的Model
# X_test 需要以训练集同样的方式进行处理
print([id_to_cat[i] for i in model_load.predict(X_test[2:7])])
import json
sentence_dict_path = 'keyword.json'
with open(sentence_dict_path, "w") as f:
f.write(json.dumps(self.sentence_keyword_dict, ensure_ascii=False, indent=4, separators=(',', ':')))
import json
# 结合人工整合关键词
sentence_dict_path = 'keyword.json'
# 加载词语字典
with open(sentence_dict_path,'r') as fp:
sentence_dict = json.load(fp)
print(sentence_dict)
import pickle
keyword_path = 'keyword.json'
with open(keyword_path, 'wb') as f_json:
pickle.dump(keyword_dict, f_json, pickle.HIGHEST_PROTOCOL)
import pickle
with open(keyword_path, 'rb') as f:
keyword_dict = pickle.load(f)