(1)请绘制全球 1950~2021 年总人口随时间变化的趋势折线图,分析整体人口的变化趋势
(2)然后分别统计所有国家 2011~2021 的人口增长率和增长人数。
(3)给出人口增长率最大的前 10 个国家和最小的后 10 个国家名单,人口增长数最多
的前 10 个国家和最少的后 10 个国家名单
# 第一小问
from matplotlib import pyplot as plt
plt.figure(figsize=(15,6),facecolor='#fff')
plt.plot(world['Year'],world['Population'])
plt.title('世界人口时序图')
plt.show()
# 第二小问
res2 = df[(df['Year'] >= 2011) & (df['Year'] <= 2021)].iloc[:,:3].reset_index(drop = True)
res2['growth_rate'] = res2.groupby('Country name')['Population'].pct_change()
res2['growth_count'] = res2.groupby('Country name')['Population'].diff()
res2.head()
#第三小问
pd.pivot_table(res2,index = 'Country name',values = 'growth_rate',aggfunc='max').sort_values('growth_rate',ascending = False)[:10]
pd.pivot_table(res2,index = 'Country name',values = 'growth_rate',aggfunc='max').sort_values('growth_rate',ascending = True)[:10]
pd.pivot_table(res2,index = 'Country name',values = 'growth_count',aggfunc='max').sort_values('growth_count',ascending = False)[:10]
pd.pivot_table(res2,index = 'Country name',values = 'growth_count',aggfunc='max').sort_values('growth_count',ascending = True)[:10]
(1)请比较 2021 年不同国家的总人口数,绘制不同国家总人口数的柱状图,
(2)并给出总人口最多的前 10 个国家和最少的后 10 个国家名单
# 第一小问
df[df['Year'] == 2021]
plt.figure(figsize=(15,6),facecolor='#fff')
plt.bar(
df[df['Year'] == 2021]['Country name'],
df[df['Year'] == 2021]['Population']
)
plt.xticks(df[df['Year'] == 2021]['Country name'].values[::20].tolist(),rotation = 330)
plt.show()
# 第二小问
df[df['Year'] == 2021].sort_values('Population',ascending = False)[['Country name','Population']][:10]
df[df['Year'] == 2021].sort_values('Population',ascending = True)[['Country name','Population']][:10]
请任选三个国家,分别绘制不同年龄段人口的直方图,对比分析年龄分布的异同点,并结合这三个国家的国情分析不同年龄段人口的分布原因
# 获取特征列
a = '''Population aged 1 to 4 years
Population aged 5 to 9 years
Population aged 10 to 14 years
Population aged 15 to 19 years
Population aged 20 to 29 years
Population aged 30 to 39 years
Population aged 40 to 49 years
Population aged 50 to 59 years
Population aged 60 to 69 years
Population aged 70 to 79 years
Population aged 80 to 89 years
Population aged 90 to 99 years
Population older than 100 years'''
columns = []
for i in a.split('\n'):
if i != '':
columns.append(i)
# 可视化直方图
res3 = pd.concat([df[['Country name','Year']],df[columns]],axis = 1)
plt.figure(figsize=(15,6),facecolor='#fff')
# 将年龄段和人口数量分别存入列表
ages = res3[(res3['Country name'] == 'China') & (res3['Year'] == 2021)].columns.tolist()[2:]
china_population = res3[(res3['Country name'] == 'China') & (res3['Year'] == 2021)].values.tolist()[0][2:]
usa_population = res3[(res3['Country name'] == 'India') & (res3['Year'] == 2021)].values.tolist()[0][2:]
india_population = res3[(res3['Country name'] == 'North Korea') & (res3['Year'] == 2021)].values.tolist()[0][2:]
# 设置每个柱子宽度
bar_width = 0.25
# 设置三个柱状图的偏移量
china_position = np.arange(len(ages))
usa_position = [x + bar_width for x in china_position]
india_position = [x + bar_width * 2 for x in china_position]
# 绘制三个柱状图
plt.bar(china_position, china_population, color='#867BA9', width=bar_width, label='China')
plt.bar(usa_position, usa_population, color='#A69ABD', width=bar_width, label='India')
plt.bar(india_position, india_population, color='#924F8E', width=bar_width, label='North Korea')
# 设置横坐标和标题
plt.xticks(china_position + bar_width, ages,rotation = 330)
plt.title('2021年按年龄划分的人口分布')
# 设置图例和显示图像
plt.legend()
plt.show()
# China India North Korea
请建立人口预测的数学模型,然后分别预测到 2100 年底中国、印度和全球的总人口数,并分析人口的变化趋势
china = df[df['Country name'] == 'China'].iloc[:,1:3].reset_index(drop = True)
india = df[df['Country name'] == 'India'].iloc[:,1:3].reset_index(drop = True)
world = df[df['Country name'] == 'World'].iloc[:,1:3].reset_index(drop = True)
plt.figure(figsize=(15,6),facecolor='#fff')
plt.plot(china['Year'],china['Population'])
plt.title('中国人口时序图')
plt.show()
plt.figure(figsize=(15,6),facecolor='#fff')
plt.plot(india['Year'],india['Population'])
plt.title('印度人口时序图')
plt.show()
n1 = 2
yt1_china = np.convolve(np.ones(n1)/n1,china['Population'])[n1-1:-n1+1]
s1_china = np.sqrt(((china['Population'][n1:]-yt1_china[:-1])**2).mean())
s1_china,r2_score(china['Population'][n1:],yt1_china[:-1])
plt.figure(figsize=(15,6),facecolor='#fff')
plt.plot(china['Year'][2:],china['Population'][2:])
plt.plot(china['Year'][2:],yt1_china[:-1])
plt.title('中国人口时序图')
plt.legend(['实际值','预测值'])
plt.show()
n1 = 2
yt1_india = np.convolve(np.ones(n1)/n1,india['Population'])[n1-1:-n1+1]
s1_india = np.sqrt(((india['Population'][n1:]-yt1_india[:-1])**2).mean())
s1_india,r2_score(india['Population'][n1:],yt1_india[:-1])
plt.figure(figsize=(15,6),facecolor='#fff')
plt.plot(india['Year'][2:],india['Population'][2:])
plt.plot(india['Year'][2:],yt1_india[:-1])
plt.title('印度人口时序图')
plt.legend(['实际值','预测值'])
plt.show()
n1 = 2
yt1_world = np.convolve(np.ones(n1)/n1,world['Population'])[n1-1:-n1+1]
s1_world = np.sqrt(((world['Population'][n1:]-yt1_world[:-1])**2).mean())
s1_world,r2_score(world['Population'][n1:],yt1_world[:-1])
plt.figure(figsize=(15,6),facecolor='#fff')
plt.plot(world['Year'][2:],world['Population'][2:])
plt.plot(world['Year'][2:],yt1_world[:-1])
plt.title('世界人口时序图')
plt.legend(['实际值','预测值'])
plt.show()