python爬取股票数据

pip install tushare,爬取银行股票数据

对数据进行初步进行整理,用今日的收盘减去昨天的收盘值定义value表示涨跌,并绘图直观表示最近的股票走势图。其实也可以直接根据pchange>0来标签分类的 -_-

import tushare as ts
import pandas as pd 
import pandas as pd
#获取中国银行历史数据
df_CB=ts.get_hist_data('601988', start='2015-01-01', end='2017-11-01')
#保存到本地
df_CB.to_csv('G:\\Project\\data\\CB.csv', sep=',' ,index=True)
#读取数据
df_CB=pd.read_csv(r'G:\\Project\\data\\CB.csv',encoding='gbk')

#将日期作为index,顺序排列
df_CB = df_CB.set_index('date')
df_CB = df_CB.sort_index()
#每天的开盘价,收盘价,交易量,价格变化,10天、20天的均价
print df_CB.head()
        open  high  close   low      volume  price_change  p_change  \

date
2015-01-05 4.18 4.50 4.42 4.18 23084548.0 0.27 6.51
2015-01-06 4.38 4.74 4.56 4.28 23127260.0 0.14 3.17
2015-01-07 4.46 4.64 4.54 4.44 15485755.0 -0.02 -0.44
2015-01-08 4.55 4.57 4.33 4.31 14892726.0 -0.21 -4.63
2015-01-09 4.28 4.76 4.47 4.23 22776194.0 0.14 3.23

          ma5   ma10   ma20       v_ma5      v_ma10       v_ma20  turnover  

date
2015-01-05 4.036 3.862 3.736 20236083.4 20700121.8 18068874.18 1.13
2015-01-06 4.184 3.964 3.791 21016213.4 21901637.0 18406099.90 1.13
2015-01-07 4.322 4.029 3.838 20103937.2 19875380.1 18223578.34 0.76
2015-01-08 4.400 4.090 3.867 19874622.2 18551524.5 18027645.85 0.73
2015-01-09 4.464 4.180 3.901 19873296.6 19332925.8 18291454.08 1.11

#value表示涨跌,表示今日的收盘减去昨天的收盘
value = pd.Series(df_CB['close']-df_CB['close'].shift(1),\
                  index=df_CB.index)
#第一个值为NaN,将其后向填充
value = value.bfill()
#差值大于0表示涨,置位1
value[value>=0]=1 
value[value<0]=0 
df_CB['Value']=value
#后向填充空缺值
df_CB=df_CB.fillna(method='bfill')
df_CB=df_CB.astype('float64')
print df_CB.head()
        open  high  close   low      volume  price_change  p_change  \

date
2015-01-05 4.18 4.50 4.42 4.18 23084548.0 0.27 6.51
2015-01-06 4.38 4.74 4.56 4.28 23127260.0 0.14 3.17
2015-01-07 4.46 4.64 4.54 4.44 15485755.0 -0.02 -0.44
2015-01-08 4.55 4.57 4.33 4.31 14892726.0 -0.21 -4.63
2015-01-09 4.28 4.76 4.47 4.23 22776194.0 0.14 3.23

          ma5   ma10   ma20       v_ma5      v_ma10       v_ma20  \

date
2015-01-05 4.036 3.862 3.736 20236083.4 20700121.8 18068874.18
2015-01-06 4.184 3.964 3.791 21016213.4 21901637.0 18406099.90
2015-01-07 4.322 4.029 3.838 20103937.2 19875380.1 18223578.34
2015-01-08 4.400 4.090 3.867 19874622.2 18551524.5 18027645.85
2015-01-09 4.464 4.180 3.901 19873296.6 19332925.8 18291454.08

        turnover  Value  

date
2015-01-05 1.13 1.0
2015-01-06 1.13 1.0
2015-01-07 0.76 0.0
2015-01-08 0.73 0.0
2015-01-09 1.11 1.0

#绘图
%matplotlib inline
Data = df_CB[['open','close','ma5','ma10','ma20']]
Data=Data.astype(float)
Data.plot()
Data.ix['2016-01-01':'2017-01-01'].plot()

python爬取股票数据_第1张图片
python爬取股票数据_第2张图片

#同理获取中国工商银行等其他银行历史数据
df_gongshang=ts.get_hist_data('601398', start='2015-01-01', end='2017-11-01')

df_gongshang.to_csv('G:\\Project\\data\\gongshang.csv', sep=',' ,index=True)
df_gongshang=pd.read_csv(r'G:\\Project\\data\\gongshang.csv',encoding='gbk')
print df_gongshang.tail()
       date  open  high  close   low      volume  price_change  p_change  \

685 2015-01-09 4.82 5.15 4.88 4.74 9915904.0 0.05 1.03
686 2015-01-08 5.05 5.07 4.83 4.80 6920490.5 -0.21 -4.17
687 2015-01-07 5.00 5.10 5.04 4.95 8105967.5 -0.06 -1.18
688 2015-01-06 5.00 5.36 5.10 4.95 14975615.0 0.04 0.79
689 2015-01-05 4.93 5.15 5.06 4.90 13658517.0 0.19 3.90

   ma5   ma10   ma20       v_ma5      v_ma10      v_ma20  turnover  

685 4.982 4.818 4.628 10715298.8 9210890.05 8513672.29 0.37
686 4.980 4.763 4.614 10832707.4 8843879.40 8431188.07 0.26
687 4.956 4.728 4.600 10841814.8 8930133.55 8525606.67 0.30
688 4.870 4.695 4.571 10993154.7 9866091.80 8553582.62 0.56
689 4.774 4.626 4.533 9339578.6 8973967.90 8094866.02 0.51

#将日期作为index,顺序排列
df_gongshang = df_gongshang.set_index('date')
df_gongshang = df_gongshang.sort_index()
#print df_gongshang.tail()
#绘图
Data_gs = df_gongshang[['close','ma10','ma20']]
Data_gs=Data_gs.astype(float)
Data_gs.plot()
Data_gs.ix['2016-01-01':'2017-01-01'].plot()

python爬取股票数据_第3张图片
python爬取股票数据_第4张图片

你可能感兴趣的:(python数据挖掘)