《利用Python进行数据分析》 实例:USDA食品数据库

USDA食品数据库:

from pandas import DataFrame,Series
from pylab import *
import pandas as pd
import json

def groupby(ndata):
	result = ndata.groupby(['nutrient','groupp'])['value'].quantile(0.5)
	result['Zinc, Zn'].sort_values().plot(kind='barh')
	show()

def combination(info,nutrients):
	ndata = pd.merge(nutrients,info,on='id',how='outer')	#连接两个DataFrame
	groupby(ndata)

def changename(nutrients,data):
	info_keys = ['description','group','id','manufacturer']		#只获取这四列
	info = DataFrame(data,columns=info_keys)
	rename1 = {'description':'breed','group':'groupp'}
	info = info.rename(columns=rename1,copy=False)				#为避免两个DataFrame的名字重复修改名字
	rename2 = {'description':'nutrient','group':'groupq'}
	nutrients = nutrients.rename(columns=rename2,copy=False)	#为避免两个DataFrame的名字重复修改名字
	print(info)
	combination(info,nutrients)

def lists(data):
	nutrients = []
	for res in data:						#把所有的事物的营养项转换为DataFrame
		fnuts = DataFrame(res['nutrients'])
		fnuts['id'] = res['id']
		nutrients.append(fnuts)
	nutrients = pd.concat(nutrients,ignore_index=True)		#连接列表中所有的项
	nutrients = nutrients.drop_duplicates()			#去掉重复的数据
	changename(nutrients,data)

def decode(path):
	data = json.load(open(path))		#解json为python
	lists(data)


if __name__=="__main__":
	path = r"D:\pythonAnalysis\Python for Data Analysis-1st-edition\pydata-book-1st-edition\ch07\foods-2011-10-03.json"
	decode(path)		#导入文件路径


《利用Python进行数据分析》 实例:USDA食品数据库_第1张图片



你可能感兴趣的:(数据挖掘)