# coding: utf-8
import codecs
import json
import re
from collections import OrderedDict, defaultdict
import pandas as pd
FILE_PATH = u'处理的数据.xlsx'
def clean_name(name):
name = re.sub(u'[\d|\s|省|市|区]', '', name)
return name
df = pd.read_excel(FILE_PATH)
new_df = df.iloc[3:-2, 2:]
new_df = new_df.fillna('')
exporter = map(clean_name, df.iloc[3:-2, 0]) # 供方
importer = map(clean_name, df.iloc[1, 2:-3]) # 需方
data = defaultdict(list)
for idx, importer_name in enumerate(importer):
for idx_2 in range(len(exporter)):
exporter_name = exporter[idx_2]
amount = new_df.iloc[idx_2, idx]
data[importer_name].append(OrderedDict(
[('name', exporter_name), ('import', amount)])) # 进口
data[exporter_name].append(OrderedDict(
[('name', importer_name), ('export', amount)])) # 出口
with codecs.open('data1.json', 'wb', 'utf-8') as F:
json.dump(data.items(), F, ensure_ascii=False, indent=4)