1、DataFrame遍历行和新增行示例
import pandas as pd
df1 = pd.DataFrame([['1,2,3', 'a'], ['4', 'b']], columns=list('AB'))
#######
add_df = pd.DataFrame(columns=df1.columns)
for index, row in df1.iterrows():
a_list = row['A'].split(',')
if len(a_list) > 1:
for i in a_list:
row1 = row.copy(deep=True)
row1['A'] = i
add_df.loc[len(add_df.index)] = row1
df1 = df1.append(add_df)
2、记录数统计
import pandas as pd
df1 = pd.DataFrame([['1,2,3', 'a'], ['4', 'b'], ['4', 'b1']], columns=list('AB'))
df1['B'].value_counts()
3、重复记录清理
import pandas as pd
df1 = pd.DataFrame([['1,2,3', 'a'], ['4', 'b'], ['4', 'b'], ['4', 'b1']], columns=list('AB'))
df1 = df1.drop_duplicates(subset=['A','B'], keep='last')
4、读取Json文件
import pandas as pd
with open('data/net.json') as f:
json_str = f.read()
df1 = pd.read_json(json_str, orient='records')