导入包
#数据处理工具 import numpy as np import pandas as pd from pandas import Series,DataFrame
对月份及所在政党进行定义
months = {'JAN' : 1, 'FEB' : 2, 'MAR' : 3, 'APR' : 4, 'MAY' : 5, 'JUN' : 6, 'JUL' : 7, 'AUG' : 8, 'SEP' : 9, 'OCT': 10, 'NOV': 11, 'DEC' : 12}
parties = { 'Bachmann, Michelle': 'Republican', 'Romney, Mitt': 'Republican', 'Obama, Barack': 'Democrat', "Roemer, Charles E. 'Buddy' III": 'Reform', 'Pawlenty, Timothy': 'Republican', 'Johnson, Gary Earl': 'Libertarian', 'Paul, Ron': 'Republican', 'Santorum, Rick': 'Republican', 'Cain, Herman': 'Republican', 'Gingrich, Newt': 'Republican', 'McCotter, Thaddeus G': 'Republican', 'Huntsman, Jon': 'Republican', 'Perry, Rick': 'Republican' }
读取数据
数据下载地址: 链接:https://pan.baidu.com/s/19_-s3Xv_fiYkMtIca-stdw 提取码:iwjt
data = pd.read_csv('./data/usa_election.txt') data.head() #查看前五行数据
字段解释
cmte_id :候选人ID
cand_nm :候选人姓名
contbr_nm : 捐赠人姓名
contbr_st :捐赠人所在州
contbr_employer : 捐赠人所在公司
contbr_occupation : 捐赠人职业
contb_receipt_amt :捐赠数额(美元)
contb_receipt_dt : 捐款的日期
创建一个各个候选人所在的党派party
data['party'] =data['cand_nm'].map(parties) data.head()
party这一列中有哪些元素
data['party'].unique() #元素:array(['Republican', 'Democrat', 'Reform', 'Libertarian'], dtype=object)
统计party列中各个元素出现次数,value_counts()是Series中的,无参,返回一个带有每个元素出现次数的Series
data['party'].value_counts() #value_counts() 统计个数 #统计出来的个数 Democrat 292400 Republican 237575 Reform 5364 Libertarian 702 Name: party, dtype: int64
查看各个党派收到的政治献金总数contb_receipt_amt
data.groupby(by='party',axis=0)['contb_receipt_amt'].sum() #数据 party Democrat 8.105758e+07 Libertarian 4.132769e+05 Reform 3.390338e+05 Republican 1.192255e+08 Name: contb_receipt_amt, dtype: float64
查看具体每天各个党派收到的政治献金总数contb_receipt_amt
data.groupby(by=['party','contb_receipt_dt'],axis=0)['contb_receipt_amt'].sum()
将表中日期格式转换为'yyy-mm-dd'
def transform_date(d): day,month,year = d.split('-') month = months[month] return '20'+year+'-'+str(month)+'-'+day date = data['contb_receipt_dt'].map(transform_date) data['contb_receipt_dt'] = date data.head()
查看老兵(捐献者职业)DISABLED VETERAN主要支持谁 :查看老兵们捐赠给谁的钱最多
data['contbr_occupation'] == 'DISABLED VETERAN' old_bing_df = data.loc[data['contbr_occupation'] == 'DISABLED VETERAN'] old_bing_df.head()
对竟选者进行分组
old_bing_df.groupby(by='cand_nm',axis=0)['contb_receipt_amt'].sum()
找出投资的最大值
data['contb_receipt_amt'].max()
找出候选人的捐赠者中,捐赠金额最大的人的职业以及捐献额 .通过query("查询条件来查找捐献人职业")
data.query('contb_receipt_amt == %f'%data['contb_receipt_amt'].max())