有的朋友在做量化投资时,苦恼于数据源问题。
一般来说,质量较好的数据源:收费的有Wind,免费的有Tushare。前者数据大而全,后者的数据量在团队的努力中逐步完善。
下附Tushare链接:
https://tushare.pro/document/2?doc_id=96
本人为了获取Tushare数据并保存在本地,写了部分代码,共享给网友,避免重复劳动。
import os
import tushare as ts
import pandas as pd
import datetime,time
ts.set_token(’***’) #申请tushare账号之后,可获取token
pro = ts.pro_api()
class DataTushare():
def getCurrentTime(self):
return time.strftime('[%Y-%m-%d %H:%M:%S]', time.localtime(time.time()))
def AStockTradingDate(self, output_path):
df_trading_date = pro.trade_cal(exchange_id='', start_date='20021231', end_date='20251231', fields='cal_date', is_open='1')
df_trading_date.columns=['date']
df_trading_date['date'] = pd.to_datetime(df_trading_date['date'])
df_trading_date.to_pickle(output_path + '/wind_trading_date.pkl')
def AStockListDelist(self, output_path):
'''
股票上市、退市日期
tushare数据与wind的【暂停上市A股】数据在股票列表上不一致。wind的【暂停上市A股】包含了B股数据,扣除B股数据后一致。二者的list_date、delist_date是相互一致的。
delist_date:该日期当天退市,该日期之前尚在上市。
'''
print(self.getCurrentTime(),": AStockListDelist Download Start!")
df_basicL = pro.stock_basic(exchange_id='', list_status='L', fields='ts_code,symbol,name,list_date,delist_date, list_status') #暂停上市的股票在这里边
df_basicD = pro.stock_basic(exchange_id='', list_status='D', fields='ts_code,symbol,name,list_date,delist_date, list_status')
df_basicP = pro.stock_basic(exchange_id='', list_status='P', fields='ts_code,symbol,name,list_date,delist_date, list_status') ##暂停上市的股票无。
df_list_delist = pd.concat([df_basicL, df_basicD, df_basicP]).drop(['symbol'],axis=1).reset_index(drop=True)
df_list_delist.rename(columns={'name':'symbolname', 'ts_code':'symbol'}, inplace=True)
df_list_delist['list_date'] = pd.to_datetime(df_list_delist['list_date'])
df_list_delist['delist_date'] = pd.to_datetime(df_list_delist['delist_date'])
df_list_delist = df_list_delist.fillna(pd.Timestamp(2099,12,31))
df_list_delist.to_pickle(output_path + '/wind_stock_name_list_delist.pkl')
print(self.getCurrentTime(),": AStockListDelist Download Complete!")
def AStockDailyData(self, start_time, end_time, output_path):
'''
pro.daily拿到的数据包含停牌股票,停牌股票的vol,amount为0。 不包含暂停上市股票。 不包含退市股票退市后的数据,但是包含退市前不交易的数据。
后续suspend拿到的停牌股票,不包括暂停上市股票。
wind的API的wset拿到的停牌数据包含非A股的,不包含暂停上市股票。如果去除非A标的,则与tushare suspend一致。
wind的数据浏览器:如果指定日期,则与tushare一致,拿到的停牌股票不包括暂停上市股票。如果不指定日期,用前一交易日或什么的,拿到的可能包括部分暂停上市股票,及其他正常交易的股票。
'''
df_trading_date = pro.trade_cal(exchange_id='', start_date=start_time, end_date=end_time, fields='cal_date', is_open='1')
print(self.getCurrentTime(),": AStockDailyData Download Start!")
df_price = pd.DataFrame()
df_adj = pd.DataFrame()
for i in df_trading_date.cal_date:
try:
df_price_daily = pro.daily(trade_date=i)
df_adj_daily = pro.adj_factor(trade_date=i)
df_price = pd.concat([df_price, df_price_daily])
df_adj = pd.concat([df_adj, df_adj_daily])
except Exception as e:
print ( self.getCurrentTime(),":AStockDailyData %s : Exception :%s" % (i,str(e)) )
time.sleep(2)
continue
df_price_adj = pd.merge(df_price, df_adj, on=['ts_code','trade_date'], how='left')
df_price_adj = df_price_adj.sort_values(['trade_date','ts_code']).reset_index(drop=True)
df_price_adj['trade_date'] = pd.to_datetime(df_price_adj['trade_date'])
df_price_adj.rename(columns={'trade_date':'date', 'ts_code':'symbol'}, inplace=True)
df_basic = pd.DataFrame()
for i in df_trading_date.cal_date:
try:
df_basic_daily = pro.daily_basic(trade_date=i)
df_basic = pd.concat([df_basic, df_basic_daily])
except Exception as e:
print ( self.getCurrentTime(),":AStockDailyData %s : Exception :%s" % (i,str(e)) )
time.sleep(2)
continue
df_basic['trade_date'] = pd.to_datetime(df_basic['trade_date'])
df_basic.rename(columns={'trade_date':'date', 'ts_code':'symbol'}, inplace=True)
xy = pd.merge(df_price_adj, df_basic.loc[:,['symbol','date','total_share','float_share','free_share']], on=['symbol','date'], how='left')
xy['vol'] = 100 * xy['vol'] #原始单位为手
xy['amount'] = 1000 * xy['amount'] #原始单位为千元
xy['total_share'] = 10000*xy['total_share'] #原始单位为万股
xy['float_share'] = 10000*xy['float_share'] #原始单位为万股
xy['free_share'] = 10000*xy['free_share'] #原始单位为万股
xy_0 = pd.read_pickle(output_path + '/wind_stock_daily_quote.pkl')
xy = pd.concat([xy_0, xy], sort=False).reset_index(drop=True)
xy.to_pickle(output_path + '/wind_stock_daily_quote.pkl')
print(self.getCurrentTime(),": AStockDailyData Download Complete!")
def AStockSuspend(self, start_time, end_time, output_path):
'''
停牌股票。
'''
df_trading_date = pro.trade_cal(exchange_id='', start_date=start_time, end_date=end_time, fields='cal_date', is_open='1')
print(self.getCurrentTime(),": AStockSuspend Download Start!")
df_suspend = pd.DataFrame()
for i in df_trading_date.cal_date:
try:
df = pro.suspend( suspend_date=i, fiedls='ts_code,suspend_date,resume_date,suspend_reason')
df_suspend = pd.concat([df_suspend, df])
except Exception as e:
print ( self.getCurrentTime(),":AStockSuspend %s : Exception :%s" % (i,str(e)) )
time.sleep(2)
continue
df_suspend = df_suspend.sort_values(['suspend_date','ts_code']).reset_index(drop=True)
df_suspend['suspend_date'] = pd.to_datetime(df_suspend['suspend_date'])
df_suspend['resume_date'] = pd.to_datetime(df_suspend['resume_date'])
df_suspend.rename(columns={'suspend_date':'date', 'ts_code':'symbol'}, inplace=True)
df_suspend_0 = pd.read_pickle(output_path + '/wind_stock_suspend.pkl')
df_suspend = pd.concat([df_suspend_0, df_suspend]).reset_index(drop=True)
df_suspend.to_pickle(output_path + '/wind_stock_suspend.pkl')
print(self.getCurrentTime(),": AStockSuspend Download Complete!")
def AIndexDailyData(self, indexSymbolList, start_time, end_time, output_path):
'''
指数日频数据。
'''
print(self.getCurrentTime(),": AIndexDailyData Download Start!")
df_index_daily = pd.DataFrame()
for i in indexSymbolList:
try:
df = pro.index_daily(ts_code=i, start_date=start_time, end_date=end_time)
df_index_daily = pd.concat([df_index_daily, df])
except Exception as e:
print ( self.getCurrentTime(),":AIndexDailyData %s : Exception :%s" % (i,str(e)) )
time.sleep(2)
continue
df_index_daily = df_index_daily.sort_values(['trade_date','ts_code']).reset_index(drop=True)
df_index_daily['trade_date'] = pd.to_datetime(df_index_daily['trade_date'])
df_index_daily.rename(columns={'trade_date':'date', 'ts_code':'symbol'}, inplace=True)
df_index_daily['vol'] = df_index_daily['vol'] * 100 #原始单位为手
df_index_daily['amount'] = df_index_daily['amount'] * 1000 #原始单位为千元
df_index_daily_0 = pd.read_pickle(output_path + '/wind_index_daily_quote.pkl')
df_index_daily = pd.concat([df_index_daily_0, df_index_daily]).reset_index(drop=True)
df_index_daily.to_pickle(output_path + '/wind_index_daily_quote.pkl')
print(self.getCurrentTime(),": AIndexDailyData Download Complete!")
def download_from_tushare():
output_path = os.path.join(os.environ[‘HOME_DATA’], r’stock’) #根据自己情况给出数据保存路径。
datatushare = DataTushare()
start_time = end_time = time.strftime(’%Y%m%d’,time.localtime(time.time()))
datatushare.AStockTradingDate(output_path)
datatushare.AStockListDelist(output_path)
datatushare.AStockSuspend(start_time, end_time, output_path)
datatushare.AStockDailyData(start_time, end_time, output_path)
datatushare.AIndexDailyData([‘000016.SH’,‘000300.SH’,‘000905.SH’,‘399006.SZ’], start_time, end_time, output_path)
if name == “main”:
exit_code = os.system(‘ping www.baidu.com’)
if ~exit_code:
download_from_tushare()
else:
raise Exception(str(datetime.datetime.now()),’: Internet Connection Failed!’)