pandas 练习题

(1)请计算每一年宫颈癌患病百分比,每一年龄段的宫颈癌患病百分比,并将结果到ratioUS.csv中

import pandas as pd

deathUS = pd.read_csv(r'C:\Users\David\Desktop\deathUS.csv')
populationUS = pd.read_csv(r'C:\Users\David\Desktop\populationUS.csv')

ratioUS = pd.DataFrame()
for indexs in deathUS.columns:
    ratioUS[indexs] = deathUS[indexs]/populationUS[indexs]
ratioUS['Year'] = deathUS['Year']
    
ratioUS.to_csv(r'C:\Users\David\Desktop\ratioUS.csv',index=False)

(2)找出患病最低年龄段的百分比、患病最高年龄段的百分比,将该年龄段所有列都删除,并将结果到DownRatioUS.csv中

df = pd.DataFrame(deathUS.sum(axis=0)/populationUS.sum(axis=0))[2:]

# 患病最高年龄段的百分比
idmax = df.idxmax(axis=0).values[0] 

# 患病最低年龄段的百分比
idmin = df.idxmin(axis=0).values[0]

del ratioUS[idmax]
del ratioUS[idmin]

ratioUS.to_csv(r'C:\Users\David\Desktop\DownRatioUS.csv',index=False)

(3)请按照每个年龄段做一张图,表示1950-2017的死亡率变化曲线

# 导入包

import matplotlib.pyplot as plt
import numpy as np
import pandas as pd

# 导入数据集并转成方便作图的格式

Dataset = pd.read_csv(r'C:\Users\David\Desktop\ratioUS.csv')

# 设定式样

#plt.style.use('seaborn-darkgrid')

# 创建调色板, 色卡用来控制每条线的颜色

#palette = plt.get_cmap('Paired')

# 绘图

#color=palette(num)
plt.rcParams['font.sans-serif']='SimHei' 
plt.figure(figsize=(15, 7))
color = ['yellow',"gray","pink","magenta","red","salmon","orange","gold","brown","lime","green","cyan","steelblue","blue","purple","chocolate","blueviolet"]
num=-1
for column in Dataset.drop(['Year','total'],axis=1):
    num += 1
    plt.plot(Dataset['Year'], Dataset[column], marker='', c = color[num], linewidth=2, alpha=0.9, label=column)    
plt.legend(loc=1, ncol=2)
plt.title("1950-2017年死亡率变化曲线", loc='center', fontsize=20, fontweight=0, color='r')
plt.xlabel("year")
plt.ylabel("死亡人数/总人数")
plt.show()

你可能感兴趣的:(python)