各种股票软件,例如通达信、同花顺、大智慧,都可以实时查看股票价格和走势,做一些简单的选股和定量分析,但是如果你想做更复杂的分析,例如回归分析、关联分析等就有点捉襟见肘,所以最好能够获取股票历史及实时数据并存储到数据库,然后再通过其他工具,例如SPSS、SAS、EXCEL或者其他高级编程语言连接数据库获取股票数据进行定量分析,这样就能实现更多目的了。
为此,首先需要找到可以获取股票数据的接口,新浪、雅虎、腾讯等都有接口可以实时获取股票数据,历史数据选择了雅虎接口,收盘数据选择了腾讯接口。
(1)项目结构
(2)数据库连接池
connectionpool.py
#-*- coding: UTF-8 -*- ''' create a connection pool ''' from DBUtils import PooledDB import MySQLdb import string maxconn = 30 #最大连接数 mincached = 10 #最小空闲连接 maxcached = 20 #最大空闲连接 maxshared = 30 #最大共享连接 connstring="root#root#127.0.0.1#3307#pystock#utf8" #数据库地址 dbtype = "mysql" #选择mysql作为存储数据库 def createConnectionPool(connstring, dbtype): db_conn = connstring.split("#"); if dbtype=='mysql': try: pool = PooledDB.PooledDB(MySQLdb, user=db_conn[0],passwd=db_conn[1],host=db_conn[2],port=string.atoi(db_conn[3]),db=db_conn[4],charset=db_conn[5], mincached=mincached,maxcached=maxcached,maxshared=maxshared,maxconnections=maxconn) return pool except Exception, e: raise Exception,'conn datasource Excepts,%s!!!(%s).'%(db_conn[2],str(e)) return None pool = createConnectionPool(connstring, dbtype)
(3)数据库操作
DBOperator.py
#-*- coding: UTF-8 -*- ''' Created on 2015-3-13 @author: Casey ''' import MySQLdb from stockmining.stocks.setting import LoggerFactory import connectionpool class DBOperator(object): def __init__(self): self.logger = LoggerFactory.getLogger('DBOperator') #self.conn = None def connDB(self): #单连接 #self.conn=MySQLdb.connect(host="127.0.0.1",user="root",passwd="root",db="pystock",port=3307,charset="utf8") #连接池中获取连接 self.conn=connectionpool.pool.connection() return self.conn def closeDB(self): if(self.conn != None): self.conn.close() def insertIntoDB(self, table, dict): try: if(self.conn != None): cursor = self.conn.cursor() else: raise MySQLdb.Error('No connection') sql = "insert into " + table + "(" param = [] for key in dict: sql += key + ',' param.append(dict.get(key)) param = tuple(param) sql = sql[:-1] + ") values(" for i in range(len(dict)): sql += "%s," sql = sql[:-1] + ")" self.logger.debug(sql % param) n = cursor.execute(sql, param) self.conn.commit() cursor.close() except MySQLdb.Error,e: self.logger.error("Mysql Error %d: %s" % (e.args[0], e.args[1])) self.conn.rollback() def execute(self, sql): try: if(self.conn != None): cursor = self.conn.cursor() else: raise MySQLdb.Error('No connection') n = cursor.execute(sql) return n except MySQLdb.Error,e: self.logger.error("Mysql Error %d: %s" % (e.args[0], e.args[1])) def findBySQL(self, sql): try: if(self.conn != None): cursor = self.conn.cursor() else: raise MySQLdb.Error('No connection') cursor.execute(sql) rows = cursor.fetchall() return rows except MySQLdb.Error,e: self.logger.error("Mysql Error %d: %s" % (e.args[0], e.args[1])) def findByCondition(self, table, fields, wheres): try: if(self.conn != None): cursor = self.conn.cursor() else: raise MySQLdb.Error('No connection') sql = "select " for field in fields: sql += field + "," sql = sql[:-1] + " from " + table + " where " param = [] values = '' for where in wheres: sql += where.key + "='%s' and " param.append(where.value) param = tuple(param) self.logger.debug(sql) n = cursor.execute(sql[:-5] % param) self.conn.commit() cursor.close() except MySQLdb.Error,e: self.logger.error("Mysql Error %d: %s" % (e.args[0], e.args[1]))
(4)日志
LoggerFactory.py
#-*- coding: UTF-8 -*- ''' Created on 2015-3-11 @author: Casey ''' import logging import time ''' 传入名称 ''' def getLogger(name): now = time.strftime('%Y-%m-%d %H:%M:%S') logging.basicConfig( level = logging.DEBUG, format = now +" : " + name + ' LINE %(lineno)-4d %(levelname)-8s %(message)s', datefmt = '%m-%d %H:%M', filename = "d:\\stocks\stock.log", filemode = 'w'); console = logging.StreamHandler(); console.setLevel(logging.DEBUG); formatter = logging.Formatter(name + ': LINE %(lineno)-4d : %(levelname)-8s %(message)s'); console.setFormatter(formatter); logger = logging.getLogger(name) logger.addHandler(console); return logger if __name__ == '__main__': getLogger("www").debug("www")
(5)获取股票历史数据
采用雅虎的接口:http://ichart.yahoo.com/table.csv?s=<string>&a=<int>&b=<int>&c=<int>&d=<int>&e=<int>&f=<int>&g=d&ignore=.csv
参 数:s ― 股票名称
a ― 起始时间,月
b ― 起始时间,日
c ― 起始时间,年
d ― 结束时间,月
e ― 结束时间,日
f ― 结束时间,年
g― 时间周期。
(一定注意月份参数,其值比真实数据-1。如需要9月数据,则写为08。)
示例 查询浦发银行2010.09.25 �C 2010.10.8之间日线数据
http://ichart.yahoo.com/table.csv?s=600000.SS&a=08&b=25&c=2010&d=09&e=8&f=2010&g=d
返回:
Date,Open,High,Low,Close,Volume,Adj Close
2010-09-30,12.37,12.99,12.32,12.95,76420500,12.95
2010-09-29,12.20,12.69,12.12,12.48,79916400,12.48
2010-09-28,12.92,12.92,12.57,12.58,63988100,12.58
2010-09-27,13.00,13.02,12.89,12.94,43203600,12.94
因为数据量比较大,需要跑很久,所以也可以考虑多线程模式来获取相关数据,单线程模式:
#-*- coding: UTF-8 -*- ''' Created on 2015-3-1 @author: Casey ''' import urllib import re import sys from setting import params import urllib2 from db import * dbOperator = DBOperator() table = "stock_quote_yahoo" '''查找指定日期股票流量''' def isStockExitsInDate(table, stock, date): sql = "select * from " + table + " where code = '%d' and date='%s'" % (stock, date) n = dbOperator.execute(sql) if n >= 1: return True def getHistoryStockData(code, dataurl): try: r = urllib2.Request(dataurl) try: stdout = urllib2.urlopen(r, data=None, timeout=3) except Exception,e: print ">>>>>> Exception: " +str(e) return None stdoutInfo = stdout.read().decode(params.codingtype).encode('utf-8') tempData = stdoutInfo.replace('"', '') stockQuotes = [] if tempData.find('404') != -1: stockQuotes = tempData.split("\n") stockDetail = {} for stockQuote in stockQuotes: stockInfo = stockQuote.split(",") if len(stockInfo) == 7 and stockInfo[0]!='Date': if not isStockExitsInDate(table, code, stockInfo[0]): stockDetail["date"] = stockInfo[0] stockDetail["open"] = stockInfo[1] #开盘 stockDetail["high"] = stockInfo[2] #最高 stockDetail["low"] = stockInfo[3] #最低 stockDetail["close"] = stockInfo[4] #收盘 stockDetail["volume"] = stockInfo[5] #交易量 stockDetail["adj_close"] = stockInfo[6] #收盘adj价格 stockDetail["code"] = code #代码 dbOperator.insertIntoDB(table, stockDetail) result = tempData except Exception as err: print ">>>>>> Exception: " + str(dataurl) + " " + str(err) else: return result finally: None def get_stock_history(): #沪市2005-2015历史数据 for code in range(601999, 602100): dataUrl = "http://ichart.yahoo.com/table.csv?s=%d.SS&a=01&b=01&c=2005&d=01&e=01&f=2015&g=d" % code print getHistoryStockData(code, dataUrl ) #深市2005-2015历史数据 for code in range(1, 1999): dataUrl = "http://ichart.yahoo.com/table.csv?s=%06d.SZ&a=01&b=01&c=2005&d=01&e=01&f=2015&g=d" % code print getHistoryStockData(code, dataUrl) #中小板股票 for code in range(2001, 2999): dataUrl = "http://ichart.yahoo.com/table.csv?s=%06d.SZ&a=01&b=01&c=2005&d=01&e=01&f=2015&g=d" % code print getHistoryStockData(code, dataUrl) #创业板股票 for code in range(300001, 300400): dataUrl = "http://ichart.yahoo.com/table.csv?s=%d.SZ&a=01&b=01&c=2005&d=01&e=01&f=2015&g=d" % code print getHistoryStockData(code, dataUrl) def main(): "main function" dbOperator.connDB() get_stock_history() dbOperator.closeDB() if __name__ == '__main__': main()
(6)获取实时价格和现金流数据
A:实时价格数据采用腾讯的接口:沪市:http://qt.gtimg.cn/q=sh<int>,深市:http://qt.gtimg.cn/q=sz<int>
如获取平安银行的股票实时数据:http://qt.gtimg.cn/q=sz000001,会返回一个包含股票数据的字符串:
v_sz000001="51~平安银行~000001~11.27~11.27~11.30~316703~151512~165192~11.27~93~11.26~ 4352~11.25~4996~11.24~1037~11.23~1801~11.28~1181~11.29~2108~11.30~1075~11.31~1592~11.32~ 1118~15:00:24/11.27/3146/S/3545407/17948|14:56:59/11.26/15/S/16890/17787| 14:56:56/11.25/404/S/454693/17783|14:56:54/11.26/173/B/194674/17780|14:56:51 /11.26/306/B/344526/17777|14:56:47/11.26/16/B/18016/17773~ 20151029150142~0.00~0.00~11.36~11.25~ 11.26/313557/354285045~ 316703~35783~0.27~7.38~~11.36~11.25~0.98~1330.32~1612.59~1.03~12.40~10.14~";
数据比较多,比较有用的是:1-名称;2-代码;3-价格;4-昨日收盘;5-今日开盘;6-交易量(手);7-外盘;8-内盘;9-买一;10-买一量;11-买二;12-买二量;13-买三;14-买三量;15-买四;16-买四量;17-买五;18-买五量;19-卖一;20-卖一量;21-卖二;22-卖二量;23-卖三;24-卖三量;25-卖四;26-卖四量;27-卖五;28-卖五量;30-时间;31-涨跌;32-涨跌率;33-最高价;34-最低价;35-成交量(万);38-换手率;39-市盈率;42-振幅;43-流通市值;44-总市值;45-市净率
B:现金流数据仍然采用腾讯接口:沪市:http://qt.gtimg.cn/q=ff_sh<int>,深市:http://qt.gtimg.cn/q=ff_sz<int>
例如平安银行的现金流数据http://qt.gtimg.cn/q=ff_sz000001:
v_ff_sz000001="sz000001~21162.20~24136.40~-2974.20~-8.31~14620.87~11646.65~2974.22~ 8.31~35783.07~261502.0~261158.3~平安银行~20151029~20151028^37054.20^39358.20~ 20151027^39713.50^42230.70~20151026^82000.80^83689.90~20151023^81571.30^71743.10";
比较重要的:1-主力流入;2-主力流出;3-主力净流量;4-主力流入/主力总资金;5-散户流入;6-散户流出;7-散户净流量;8-散户流入/散户总资金;9-总资金流量;12-名字;13-日期
采用多线程、数据库连接池实现股票实时价格和现金流数据的获取:
#-*- coding: UTF-8 -*- ''' Created on 2015年3月2日 @author: Casey ''' import time import threading ''' 上证编码:'600001' .. '602100' 深圳编码:'000001' .. '001999' 中小板:'002001' .. '002999' 创业板:'300001' .. '300400' ''' import urllib2 from datetime import date from db import * from setting import * class StockTencent(object): #数据库表 __stockTables = {'cash':'stock_cash_tencent','quotation':'stock_quotation_tencent'} '''初始化''' def __init__(self): self.__logger = LoggerFactory.getLogger('StockTencent') self.__dbOperator = DBOperator() def main(self): self.__dbOperator.connDB() threading.Thread(target = self.getStockCash).start() threading.Thread(target = self.getStockQuotation).start() self.__dbOperator.closeDB() '''查找指定日期股票流量''' def __isStockExitsInDate(self, table, stock, date): sql = "select * from " + table + " where code = '%s' and date='%s'" % (stock, date) n = self.__dbOperator.execute(sql) if n >= 1: return True '''获取股票资金流明细''' def __getStockCashDetail(self, dataUrl): #读取数据 tempData = self.__getDataFromUrl(dataUrl) if tempData == None: time.sleep(10) tempData = self.__getDataFromUrl(dataUrl) return False #解析资金流向数据 stockCash = {} stockInfo = tempData.split('~') if len(stockInfo) < 13: return if len(stockInfo) != 0 and stockInfo[0].find('pv_none') == -1: table = self.__stockTables['cash'] code = stockInfo[0].split('=')[1][2:] date = stockInfo[13] if not self.__isStockExitsInDate(table, code, date): stockCash['code'] = stockInfo[0].split('=')[1][2:] stockCash['main_in_cash'] = stockInfo[1] stockCash['main_out_cash'] = stockInfo[2] stockCash['main_net_cash'] = stockInfo[3] stockCash['main_net_rate'] = stockInfo[4] stockCash['private_in_cash'] = stockInfo[5] stockCash['private_out_cash'] = stockInfo[6] stockCash['private_net_cash'] = stockInfo[7] stockCash['private_net_rate'] = stockInfo[8] stockCash['total_cash'] = stockInfo[9] stockCash['name'] = stockInfo[12].decode('utf8') stockCash['date'] = stockInfo[13] #插入数据库 self.__dbOperator.insertIntoDB(table, stockCash) '''获取股票交易信息明细''' def getStockQuotationDetail(self, dataUrl): tempData = self.__getDataFromUrl(dataUrl) if tempData == None: time.sleep(10) tempData = self.__getDataFromUrl(dataUrl) return False stockQuotation = {} stockInfo = tempData.split('~') if len(stockInfo) < 45: return if len(stockInfo) != 0 and stockInfo[0].find('pv_none') ==-1 and stockInfo[3].find('0.00') == -1: table = self.__stockTables['quotation'] code = stockInfo[2] date = stockInfo[30] if not self.__isStockExitsInDate(table, code, date): stockQuotation['code'] = stockInfo[2] stockQuotation['name'] = stockInfo[1].decode('utf8') stockQuotation['price'] = stockInfo[3] stockQuotation['yesterday_close'] = stockInfo[4] stockQuotation['today_open'] = stockInfo[5] stockQuotation['volume'] = stockInfo[6] stockQuotation['outer_sell'] = stockInfo[7] stockQuotation['inner_buy'] = stockInfo[8] stockQuotation['buy_one'] = stockInfo[9] stockQuotation['buy_one_volume'] = stockInfo[10] stockQuotation['buy_two'] = stockInfo[11] stockQuotation['buy_two_volume'] = stockInfo[12] stockQuotation['buy_three'] = stockInfo[13] stockQuotation['buy_three_volume'] = stockInfo[14] stockQuotation['buy_four'] = stockInfo[15] stockQuotation['buy_four_volume'] = stockInfo[16] stockQuotation['buy_five'] = stockInfo[17] stockQuotation['buy_five_volume'] = stockInfo[18] stockQuotation['sell_one'] = stockInfo[19] stockQuotation['sell_one_volume'] = stockInfo[20] stockQuotation['sell_two'] = stockInfo[22] stockQuotation['sell_two_volume'] = stockInfo[22] stockQuotation['sell_three'] = stockInfo[23] stockQuotation['sell_three_volume'] = stockInfo[24] stockQuotation['sell_four'] = stockInfo[25] stockQuotation['sell_four_volume'] = stockInfo[26] stockQuotation['sell_five'] = stockInfo[27] stockQuotation['sell_five_volume'] = stockInfo[28] stockQuotation['datetime'] = stockInfo[30] stockQuotation['updown'] = stockInfo[31] stockQuotation['updown_rate'] = stockInfo[32] stockQuotation['heighest_price'] = stockInfo[33] stockQuotation['lowest_price'] = stockInfo[34] stockQuotation['volume_amout'] = stockInfo[35].split('/')[2] stockQuotation['turnover_rate'] = stockInfo[38] stockQuotation['pe_rate'] = stockInfo[39] stockQuotation['viberation_rate'] = stockInfo[42] stockQuotation['circulated_stock'] = stockInfo[43] stockQuotation['total_stock'] = stockInfo[44] stockQuotation['pb_rate'] = stockInfo[45] self.__dbOperator.insertIntoDB(table, stockQuotation) '''读取信息''' def __getDataFromUrl(self, dataUrl): r = urllib2.Request(dataUrl) try: stdout = urllib2.urlopen(r, data=None, timeout=3) except Exception,e: self.__logger.error(">>>>>> Exception: " +str(e)) return None stdoutInfo = stdout.read().decode(params.codingtype).encode('utf-8') tempData = stdoutInfo.replace('"', '') self.__logger.debug(tempData) return tempData '''获取股票现金流量''' def getStockCash(self): self.__logger.debug("开始:收集股票现金流信息") try: #沪市股票 for code in range(600001, 602100): dataUrl = "http://qt.gtimg.cn/q=ff_sh%d" % code self.__getStockCashDetail(dataUrl) #深市股票 for code in range(1, 1999): dataUrl = "http://qt.gtimg.cn/q=ff_sz%06d" % code self.__getStockCashDetail(dataUrl) #中小板股票 for code in range(2001, 2999): dataUrl = "http://qt.gtimg.cn/q=ff_sz%06d" % code self.__getStockCashDetail(dataUrl) #'300001' .. '300400' #创业板股票 for code in range(300001, 300400): dataUrl = "http://qt.gtimg.cn/q=ff_sz%d" % code self.__getStockCashDetail(dataUrl) except Exception as err: self.__logger.error(">>>>>> Exception: " +str(code) + " " + str(err)) finally: None self.__logger.debug("结束:股票现金流收集") '''获取股票交易行情数据''' def getStockQuotation(self): self.__logger.debug("开始:收集股票交易行情数据") try: #沪市股票 for code in range(600001, 602100): dataUrl = "http://qt.gtimg.cn/q=sh%d" % code self.getStockQuotationDetail(dataUrl) #深市股票 for code in range(1, 1999): dataUrl = "http://qt.gtimg.cn/q=sz%06d" % code self.getStockQuotationDetail(dataUrl) #中小板股票 for code in range(2001, 2999): dataUrl = "http://qt.gtimg.cn/q=sz%06d" % code self.getStockQuotationDetail(dataUrl) #'300001' .. '300400' # 创业板股票 for code in range(300001, 300400): dataUrl = "http://qt.gtimg.cn/q=sz%d" % code self.getStockQuotationDetail(dataUrl) except Exception as err: self.__logger.error(">>>>>> Exception: " +str(code) + " " + str(err)) finally: None self.__logger.debug("结束:收集股票交易行情数据") if __name__ == '__main__': StockTencent(). main()
(7)加入到系统任务计划中收集盘后数据
(8)收集后的数据可以用以分析了,例如:
求取10月28日主力净流入最大的股票:select * from stock_cash_tencent where main_net_cash = (select max(main_net_cash) from stock_cash_tencent where date = '20151028' )
原来是“兴蓉环境”,当日放量上涨,次日收跌,连续多日有主力资金流入。
excel中做分析:
平安银行的资金流量分析