通过tushare获取量化投资研究数据源

有的朋友在做量化投资时,苦恼于数据源问题。
一般来说,质量较好的数据源:收费的有Wind,免费的有Tushare。前者数据大而全,后者的数据量在团队的努力中逐步完善。

下附Tushare链接:
https://tushare.pro/document/2?doc_id=96

本人为了获取Tushare数据并保存在本地,写了部分代码,共享给网友,避免重复劳动。

import os
import tushare as ts
import pandas as pd
import datetime,time

ts.set_token(’***’) #申请tushare账号之后,可获取token
pro = ts.pro_api()

class DataTushare():

def getCurrentTime(self):
    return time.strftime('[%Y-%m-%d %H:%M:%S]', time.localtime(time.time()))

def AStockTradingDate(self, output_path):
    df_trading_date = pro.trade_cal(exchange_id='', start_date='20021231', end_date='20251231', fields='cal_date', is_open='1')
    df_trading_date.columns=['date']
    df_trading_date['date'] = pd.to_datetime(df_trading_date['date'])
    df_trading_date.to_pickle(output_path + '/wind_trading_date.pkl')


def AStockListDelist(self, output_path):
    '''
    股票上市、退市日期
    tushare数据与wind的【暂停上市A股】数据在股票列表上不一致。wind的【暂停上市A股】包含了B股数据,扣除B股数据后一致。二者的list_date、delist_date是相互一致的。
    delist_date:该日期当天退市,该日期之前尚在上市。
    '''
    print(self.getCurrentTime(),": AStockListDelist Download Start!")
    df_basicL = pro.stock_basic(exchange_id='', list_status='L', fields='ts_code,symbol,name,list_date,delist_date, list_status')  #暂停上市的股票在这里边
    df_basicD = pro.stock_basic(exchange_id='', list_status='D', fields='ts_code,symbol,name,list_date,delist_date, list_status')
    df_basicP = pro.stock_basic(exchange_id='', list_status='P', fields='ts_code,symbol,name,list_date,delist_date, list_status')  ##暂停上市的股票无。
    df_list_delist = pd.concat([df_basicL, df_basicD, df_basicP]).drop(['symbol'],axis=1).reset_index(drop=True)
    df_list_delist.rename(columns={'name':'symbolname', 'ts_code':'symbol'}, inplace=True)        
    df_list_delist['list_date'] = pd.to_datetime(df_list_delist['list_date'])
    df_list_delist['delist_date'] = pd.to_datetime(df_list_delist['delist_date'])
    df_list_delist = df_list_delist.fillna(pd.Timestamp(2099,12,31))
    df_list_delist.to_pickle(output_path + '/wind_stock_name_list_delist.pkl')
    print(self.getCurrentTime(),": AStockListDelist Download Complete!")



def AStockDailyData(self, start_time, end_time, output_path):
    '''
    pro.daily拿到的数据包含停牌股票,停牌股票的vol,amount为0。 不包含暂停上市股票。 不包含退市股票退市后的数据,但是包含退市前不交易的数据。
    后续suspend拿到的停牌股票,不包括暂停上市股票。
    wind的API的wset拿到的停牌数据包含非A股的,不包含暂停上市股票。如果去除非A标的,则与tushare suspend一致。
    wind的数据浏览器:如果指定日期,则与tushare一致,拿到的停牌股票不包括暂停上市股票。如果不指定日期,用前一交易日或什么的,拿到的可能包括部分暂停上市股票,及其他正常交易的股票。
    '''
    df_trading_date = pro.trade_cal(exchange_id='', start_date=start_time, end_date=end_time, fields='cal_date', is_open='1')
    print(self.getCurrentTime(),": AStockDailyData Download Start!")
    df_price = pd.DataFrame()
    df_adj = pd.DataFrame()
    for i in df_trading_date.cal_date:
        try:
            df_price_daily = pro.daily(trade_date=i)
            df_adj_daily = pro.adj_factor(trade_date=i)
            df_price = pd.concat([df_price, df_price_daily])
            df_adj = pd.concat([df_adj, df_adj_daily])           
        except Exception as e:
            print ( self.getCurrentTime(),":AStockDailyData %s : Exception :%s" % (i,str(e)) )
            time.sleep(2)
            continue        
    df_price_adj = pd.merge(df_price, df_adj, on=['ts_code','trade_date'], how='left')
    df_price_adj = df_price_adj.sort_values(['trade_date','ts_code']).reset_index(drop=True)
    df_price_adj['trade_date'] = pd.to_datetime(df_price_adj['trade_date'])
    df_price_adj.rename(columns={'trade_date':'date', 'ts_code':'symbol'}, inplace=True)


    df_basic = pd.DataFrame()
    for i in df_trading_date.cal_date:
        try:
            df_basic_daily = pro.daily_basic(trade_date=i)
            df_basic = pd.concat([df_basic, df_basic_daily])
        except Exception as e:
            print ( self.getCurrentTime(),":AStockDailyData %s : Exception :%s" % (i,str(e)) )
            time.sleep(2)
            continue            
    df_basic['trade_date'] = pd.to_datetime(df_basic['trade_date'])
    df_basic.rename(columns={'trade_date':'date', 'ts_code':'symbol'}, inplace=True)
    
    xy = pd.merge(df_price_adj, df_basic.loc[:,['symbol','date','total_share','float_share','free_share']], on=['symbol','date'], how='left')
    xy['vol'] = 100 * xy['vol']  #原始单位为手
    xy['amount'] = 1000 * xy['amount'] #原始单位为千元
    xy['total_share'] = 10000*xy['total_share']  #原始单位为万股
    xy['float_share'] = 10000*xy['float_share']  #原始单位为万股
    xy['free_share'] = 10000*xy['free_share']  #原始单位为万股

    xy_0 = pd.read_pickle(output_path + '/wind_stock_daily_quote.pkl')
    xy = pd.concat([xy_0, xy], sort=False).reset_index(drop=True)
    xy.to_pickle(output_path + '/wind_stock_daily_quote.pkl')
    print(self.getCurrentTime(),": AStockDailyData Download Complete!")        

        
def AStockSuspend(self, start_time, end_time, output_path):
    '''
    停牌股票。
    '''
    df_trading_date = pro.trade_cal(exchange_id='', start_date=start_time, end_date=end_time, fields='cal_date', is_open='1')       
    print(self.getCurrentTime(),": AStockSuspend Download Start!")
    df_suspend = pd.DataFrame()
    for i in df_trading_date.cal_date:
        try:
            df = pro.suspend( suspend_date=i,  fiedls='ts_code,suspend_date,resume_date,suspend_reason') 
            df_suspend = pd.concat([df_suspend, df])
        except Exception as e:
            print ( self.getCurrentTime(),":AStockSuspend %s : Exception :%s" % (i,str(e)) )
            time.sleep(2)
            continue
    df_suspend = df_suspend.sort_values(['suspend_date','ts_code']).reset_index(drop=True)
    df_suspend['suspend_date'] = pd.to_datetime(df_suspend['suspend_date'])
    df_suspend['resume_date'] = pd.to_datetime(df_suspend['resume_date'])
    df_suspend.rename(columns={'suspend_date':'date', 'ts_code':'symbol'}, inplace=True)
    
    df_suspend_0 = pd.read_pickle(output_path + '/wind_stock_suspend.pkl')
    df_suspend = pd.concat([df_suspend_0, df_suspend]).reset_index(drop=True)
    df_suspend.to_pickle(output_path + '/wind_stock_suspend.pkl')
    print(self.getCurrentTime(),": AStockSuspend Download Complete!")


def AIndexDailyData(self, indexSymbolList, start_time, end_time, output_path):
    '''
    指数日频数据。
    ''' 
    print(self.getCurrentTime(),": AIndexDailyData Download Start!")
    df_index_daily = pd.DataFrame()
    for i in indexSymbolList:
        try:
            df = pro.index_daily(ts_code=i, start_date=start_time, end_date=end_time)
            df_index_daily = pd.concat([df_index_daily, df])
        except Exception as e:
            print ( self.getCurrentTime(),":AIndexDailyData %s : Exception :%s" % (i,str(e)) )
            time.sleep(2)
            continue
    df_index_daily = df_index_daily.sort_values(['trade_date','ts_code']).reset_index(drop=True)
    df_index_daily['trade_date'] = pd.to_datetime(df_index_daily['trade_date'])
    df_index_daily.rename(columns={'trade_date':'date', 'ts_code':'symbol'}, inplace=True)
    df_index_daily['vol'] = df_index_daily['vol'] * 100  #原始单位为手
    df_index_daily['amount'] = df_index_daily['amount'] * 1000      #原始单位为千元  
    
    df_index_daily_0 = pd.read_pickle(output_path + '/wind_index_daily_quote.pkl')
    df_index_daily = pd.concat([df_index_daily_0, df_index_daily]).reset_index(drop=True)
    df_index_daily.to_pickle(output_path + '/wind_index_daily_quote.pkl')        
    print(self.getCurrentTime(),": AIndexDailyData Download Complete!")

def download_from_tushare():
output_path = os.path.join(os.environ[‘HOME_DATA’], r’stock’) #根据自己情况给出数据保存路径。
datatushare = DataTushare()
start_time = end_time = time.strftime(’%Y%m%d’,time.localtime(time.time()))
datatushare.AStockTradingDate(output_path)
datatushare.AStockListDelist(output_path)
datatushare.AStockSuspend(start_time, end_time, output_path)
datatushare.AStockDailyData(start_time, end_time, output_path)
datatushare.AIndexDailyData([‘000016.SH’,‘000300.SH’,‘000905.SH’,‘399006.SZ’], start_time, end_time, output_path)

if name == “main”:
exit_code = os.system(‘ping www.baidu.com’)
if ~exit_code:
download_from_tushare()
else:
raise Exception(str(datetime.datetime.now()),’: Internet Connection Failed!’)

你可能感兴趣的:(通过tushare获取量化投资研究数据源)