读取json文件格式数据,整理导出成csv格式
import json,csv
#加载数据
def loadData():
with open('jifenluohu.json', 'r') as f:
data = json.load(f)
rows = data['rows']
with open("jifenluohu.csv", "w") as f:
fieldnames = ["pxid", "id", "idCard", "name", "score", "unit", "ranking"] # 表的列名
writer = csv.DictWriter(f, fieldnames=fieldnames)
writer.writeheader() # 加上表头
for row in rows:
newrow = {"pxid":row["pxid"],"id":row["id"],"idCard":row["idCard"],"name":row["name"],"score":row["score"],"unit":row["unit"],"ranking":row["ranking"]}
writer.writerow(newrow)
print("写csv完成")
t = loadData()
print(t)
后面有增加了年龄,生肖,年龄,省份,城市等属性。为后续进一步分析做准备。
import json,csv
from datetime import datetime
#根据出生年份获取生肖
def chinese_zodiac(year):
return u'猴鸡狗猪鼠牛虎兔龙蛇马羊'[year%12]
# 根据出生日期获取星座
def get_constellation(month, date):
dates = (21, 20, 21, 21, 22, 22, 23, 24, 24, 24, 23, 22)
constellations = ("摩羯", "水瓶", "双鱼", "白羊", "金牛", "双子", "巨蟹", "狮子", "处女", "天秤", "天蝎", "射手", "摩羯")
if date < dates[month-1]:
return constellations[month-1]
else:
return constellations[month]
#city
def citydict():
with open("city.csv") as file:
citys = {}
for line in file:
if line==",":
continue
city = line.split(",")
citys.update({city[0]:city[1].replace("\n", "")})
return citys
#加载数据
def loadData():
with open('jifenluohu.json', 'r') as f:
data = json.load(f)
rows = data['rows']
with open("jifenluohu.csv", "w") as f:
fieldnames = ["pxid", "id", "idCard", "name", "score", "unit", "ranking", "province", "city", "provincename", "cityname", "birthday", "age", "zoo", "star"] # 表的列名
writer = csv.DictWriter(f, fieldnames=fieldnames)
writer.writeheader() # 加上表头
citys = citydict()
for row in rows:
idCard = row["idCard"]
province = idCard[0:2]
city = idCard[0:6]
year = idCard[6:10]
month = idCard[10:12]
day = idCard[12:14]
zoo = chinese_zodiac(int(year))
star = get_constellation(int(month), int(day))
provincename = citys.get(province)
cityname = citys.get(city)
birthday = year+'-'+month+'-'+day
age = 2018-int(year)
ext = {'province':province, 'city':city, 'provincename':provincename, 'cityname':cityname, 'birthday':birthday, 'age':age, 'zoo':zoo, 'star':star}
#print(ext)
newrow = {"pxid":row["pxid"],"id":row["id"],"idCard":row["idCard"],"name":row["name"],"score":row["score"],"unit":row["unit"].strip(),"ranking":row["ranking"]}
newrow.update(ext)
writer.writerow(newrow)
print("写csv完成")
t = loadData()
#t = citydict()
print(t)
资料包,以及用pyspark分析过程下载
https://download.csdn.net/download/huoyongliang/10723220
百度云
https://pan.baidu.com/s/1XyoyO3AgkVwVRRBnGZq2Gg