各种股票软件,例如通达信、同花顺、大智慧,都可以实时查看股票价格和走势,做一些简单的选股和定量分析,但是如果你想做更复杂的分析,例如回归分析、关联分析等就有点捉襟见肘,所以最好能够获取股票历史及实时数据并存储到数据库,然后再通过其他工具,例如SPSS、SAS、EXCEL或者其他高级编程语言连接数据库获取股票数据进行定量分析,这样就能实现更多目的了。
为此,首先需要找到可以获取股票数据的接口,新浪、雅虎、腾讯等都有接口可以实时获取股票数据,历史数据选择了雅虎接口,收盘数据选择了腾讯接口。
(1)项目结构
(2)数据库连接池
connectionpool.py
#-*- coding: UTF-8 -*-
'''
create a connection pool
'''
from DBUtils import PooledDB
import MySQLdb
import string
maxconn = 30 #最大连接数
mincached = 10 #最小空闲连接
maxcached = 20 #最大空闲连接
maxshared = 30 #最大共享连接
connstring="root#root#127.0.0.1#3307#pystock#utf8" #数据库地址
dbtype = "mysql" #选择mysql作为存储数据库
def createConnectionPool(connstring, dbtype):
db_conn = connstring.split("#");
if dbtype=='mysql':
try:
pool = PooledDB.PooledDB(MySQLdb, user=db_conn[0],passwd=db_conn[1],host=db_conn[2],port=string.atoi(db_conn[3]),db=db_conn[4],charset=db_conn[5], mincached=mincached,maxcached=maxcached,maxshared=maxshared,maxconnections=maxconn)
return pool
except Exception, e:
raise Exception,'conn datasource Excepts,%s!!!(%s).'%(db_conn[2],str(e))
return None
pool = createConnectionPool(connstring, dbtype)
(3)数据库操作
DBOperator.py
#-*- coding: UTF-8 -*-
'''
Created on 2015-3-13
@author: Casey
'''
import MySQLdb
from stockmining.stocks.setting import LoggerFactory
import connectionpool
class DBOperator(object):
def __init__(self):
self.logger = LoggerFactory.getLogger('DBOperator')
#self.conn = None
def connDB(self):
#单连接
#self.conn=MySQLdb.connect(host="127.0.0.1",user="root",passwd="root",db="pystock",port=3307,charset="utf8")
#连接池中获取连接
self.conn=connectionpool.pool.connection()
return self.conn
def closeDB(self):
if(self.conn != None):
self.conn.close()
def insertIntoDB(self, table, dict):
try:
if(self.conn != None):
cursor = self.conn.cursor()
else:
raise MySQLdb.Error('No connection')
sql = "insert into " + table + "("
param = []
for key in dict:
sql += key + ','
param.append(dict.get(key))
param = tuple(param)
sql = sql[:-1] + ") values("
for i in range(len(dict)):
sql += "%s,"
sql = sql[:-1] + ")"
self.logger.debug(sql % param)
n = cursor.execute(sql, param)
self.conn.commit()
cursor.close()
except MySQLdb.Error,e:
self.logger.error("Mysql Error %d: %s" % (e.args[0], e.args[1]))
self.conn.rollback()
def execute(self, sql):
try:
if(self.conn != None):
cursor = self.conn.cursor()
else:
raise MySQLdb.Error('No connection')
n = cursor.execute(sql)
return n
except MySQLdb.Error,e:
self.logger.error("Mysql Error %d: %s" % (e.args[0], e.args[1]))
def findBySQL(self, sql):
try:
if(self.conn != None):
cursor = self.conn.cursor()
else:
raise MySQLdb.Error('No connection')
cursor.execute(sql)
rows = cursor.fetchall()
return rows
except MySQLdb.Error,e:
self.logger.error("Mysql Error %d: %s" % (e.args[0], e.args[1]))
def findByCondition(self, table, fields, wheres):
try:
if(self.conn != None):
cursor = self.conn.cursor()
else:
raise MySQLdb.Error('No connection')
sql = "select "
for field in fields:
sql += field + ","
sql = sql[:-1] + " from " + table + " where "
param = []
values = ''
for where in wheres:
sql += where.key + "='%s' and "
param.append(where.value)
param = tuple(param)
self.logger.debug(sql)
n = cursor.execute(sql[:-5] % param)
self.conn.commit()
cursor.close()
except MySQLdb.Error,e:
self.logger.error("Mysql Error %d: %s" % (e.args[0], e.args[1]))
(4)日志
LoggerFactory.py
#-*- coding: UTF-8 -*-
'''
Created on 2015-3-11
@author: Casey
'''
import logging
import time
'''
传入名称
'''
def getLogger(name):
now = time.strftime('%Y-%m-%d %H:%M:%S')
logging.basicConfig(
level = logging.DEBUG,
format = now +" : " + name + ' LINE %(lineno)-4d %(levelname)-8s %(message)s',
datefmt = '%m-%d %H:%M',
filename = "d:\\stocks\stock.log",
filemode = 'w');
console = logging.StreamHandler();
console.setLevel(logging.DEBUG);
formatter = logging.Formatter(name + ': LINE %(lineno)-4d : %(levelname)-8s %(message)s');
console.setFormatter(formatter);
logger = logging.getLogger(name)
logger.addHandler(console);
return logger
if __name__ == '__main__':
getLogger("www").debug("www")
(5)获取股票历史数据
采用雅虎的接口:http://ichart.yahoo.com/table.csv?s=
参 数:s — 股票名称
a — 起始时间,月
b — 起始时间,日
c — 起始时间,年
d — 结束时间,月
e — 结束时间,日
f — 结束时间,年
g— 时间周期。
(一定注意月份参数,其值比真实数据-1。如需要9月数据,则写为08。)
示例 查询浦发银行2010.09.25 – 2010.10.8之间日线数据
http://ichart.yahoo.com/table.csv?s=600000.SS&a=08&b=25&c=2010&d=09&e=8&f=2010&g=d
返回:
Date,Open,High,Low,Close,Volume,Adj Close
2010-09-30,12.37,12.99,12.32,12.95,76420500,12.95
2010-09-29,12.20,12.69,12.12,12.48,79916400,12.48
2010-09-28,12.92,12.92,12.57,12.58,63988100,12.58
2010-09-27,13.00,13.02,12.89,12.94,43203600,12.94
因为数据量比较大,需要跑很久,所以也可以考虑多线程模式来获取相关数据,单线程模式:
#-*- coding: UTF-8 -*-
'''
Created on 2015-3-1
@author: Casey
'''
import urllib
import re
import sys
from setting import params
import urllib2
from db import *
dbOperator = DBOperator()
table = "stock_quote_yahoo"
'''查找指定日期股票流量'''
def isStockExitsInDate(table, stock, date):
sql = "select * from " + table + " where code = '%d' and date='%s'" % (stock, date)
n = dbOperator.execute(sql)
if n >= 1:
return True
def getHistoryStockData(code, dataurl):
try:
r = urllib2.Request(dataurl)
try:
stdout = urllib2.urlopen(r, data=None, timeout=3)
except Exception,e:
print ">>>>>> Exception: " +str(e)
return None
stdoutInfo = stdout.read().decode(params.codingtype).encode('utf-8')
tempData = stdoutInfo.replace('"', '')
stockQuotes = []
if tempData.find('404') != -1: stockQuotes = tempData.split("\n")
stockDetail = {}
for stockQuote in stockQuotes:
stockInfo = stockQuote.split(",")
if len(stockInfo) == 7 and stockInfo[0]!='Date':
if not isStockExitsInDate(table, code, stockInfo[0]):
stockDetail["date"] = stockInfo[0]
stockDetail["open"] = stockInfo[1] #开盘
stockDetail["high"] = stockInfo[2] #最高
stockDetail["low"] = stockInfo[3] #最低
stockDetail["close"] = stockInfo[4] #收盘
stockDetail["volume"] = stockInfo[5] #交易量
stockDetail["adj_close"] = stockInfo[6] #收盘adj价格
stockDetail["code"] = code #代码
dbOperator.insertIntoDB(table, stockDetail)
result = tempData
except Exception as err:
print ">>>>>> Exception: " + str(dataurl) + " " + str(err)
else:
return result
finally:
None
def get_stock_history():
#沪市2005-2015历史数据
for code in range(601999, 602100):
dataUrl = "http://ichart.yahoo.com/table.csv?s=%d.SS&a=01&b=01&c=2005&d=01&e=01&f=2015&g=d" % code
print getHistoryStockData(code, dataUrl )
#深市2005-2015历史数据
for code in range(1, 1999):
dataUrl = "http://ichart.yahoo.com/table.csv?s=%06d.SZ&a=01&b=01&c=2005&d=01&e=01&f=2015&g=d" % code
print getHistoryStockData(code, dataUrl)
#中小板股票
for code in range(2001, 2999):
dataUrl = "http://ichart.yahoo.com/table.csv?s=%06d.SZ&a=01&b=01&c=2005&d=01&e=01&f=2015&g=d" % code
print getHistoryStockData(code, dataUrl)
#创业板股票
for code in range(300001, 300400):
dataUrl = "http://ichart.yahoo.com/table.csv?s=%d.SZ&a=01&b=01&c=2005&d=01&e=01&f=2015&g=d" % code
print getHistoryStockData(code, dataUrl)
def main():
"main function"
dbOperator.connDB()
get_stock_history()
dbOperator.closeDB()
if __name__ == '__main__':
main()
(6)获取实时价格和现金流数据
A:实时价格数据采用腾讯的接口:沪市:http://qt.gtimg.cn/q=sh
如获取平安银行的股票实时数据:http://qt.gtimg.cn/q=sz000001,会返回一个包含股票数据的字符串:
v_sz000001="51~平安银行~000001~11.27~11.27~11.30~316703~151512~165192~11.27~93~11.26~ 4352~11.25~4996~11.24~1037~11.23~1801~11.28~1181~11.29~2108~11.30~1075~11.31~1592~11.32~ 1118~15:00:24/11.27/3146/S/3545407/17948|14:56:59/11.26/15/S/16890/17787| 14:56:56/11.25/404/S/454693/17783|14:56:54/11.26/173/B/194674/17780|14:56:51 /11.26/306/B/344526/17777|14:56:47/11.26/16/B/18016/17773~ 20151029150142~0.00~0.00~11.36~11.25~ 11.26/313557/354285045~ 316703~35783~0.27~7.38~~11.36~11.25~0.98~1330.32~1612.59~1.03~12.40~10.14~";
数据比较多,比较有用的是:1-名称;2-代码;3-价格;4-昨日收盘;5-今日开盘;6-交易量(手);7-外盘;8-内盘;9-买一;10-买一量;11-买二;12-买二量;13-买三;14-买三量;15-买四;16-买四量;17-买五;18-买五量;19-卖一;20-卖一量;21-卖二;22-卖二量;23-卖三;24-卖三量;25-卖四;26-卖四量;27-卖五;28-卖五量;30-时间;31-涨跌;32-涨跌率;33-最高价;34-最低价;35-成交量(万);38-换手率;39-市盈率;42-振幅;43-流通市值;44-总市值;45-市净率
B:现金流数据仍然采用腾讯接口:沪市:http://qt.gtimg.cn/q=ff_sh
例如平安银行的现金流数据http://qt.gtimg.cn/q=ff_sz000001:
v_ff_sz000001="sz000001~21162.20~24136.40~-2974.20~-8.31~14620.87~11646.65~2974.22~ 8.31~35783.07~261502.0~261158.3~平安银行~20151029~20151028^37054.20^39358.20~ 20151027^39713.50^42230.70~20151026^82000.80^83689.90~20151023^81571.30^71743.10";
比较重要的:1-主力流入;2-主力流出;3-主力净流量;4-主力流入/主力总资金;5-散户流入;6-散户流出;7-散户净流量;8-散户流入/散户总资金;9-总资金流量;12-名字;13-日期
采用多线程、数据库连接池实现股票实时价格和现金流数据的获取:
#-*- coding: UTF-8 -*-
'''
Created on 2015年3月2日
@author: Casey
'''
import time
import threading
'''
上证编码:'600001' .. '602100'
深圳编码:'000001' .. '001999'
中小板:'002001' .. '002999'
创业板:'300001' .. '300400'
'''
import urllib2
from datetime import date
from db import *
from setting import *
class StockTencent(object):
#数据库表
__stockTables = {'cash':'stock_cash_tencent','quotation':'stock_quotation_tencent'}
'''初始化'''
def __init__(self):
self.__logger = LoggerFactory.getLogger('StockTencent')
self.__dbOperator = DBOperator()
def main(self):
self.__dbOperator.connDB()
threading.Thread(target = self.getStockCash).start()
threading.Thread(target = self.getStockQuotation).start()
self.__dbOperator.closeDB()
'''查找指定日期股票流量'''
def __isStockExitsInDate(self, table, stock, date):
sql = "select * from " + table + " where code = '%s' and date='%s'" % (stock, date)
n = self.__dbOperator.execute(sql)
if n >= 1:
return True
'''获取股票资金流明细'''
def __getStockCashDetail(self, dataUrl):
#读取数据
tempData = self.__getDataFromUrl(dataUrl)
if tempData == None:
time.sleep(10)
tempData = self.__getDataFromUrl(dataUrl)
return False
#解析资金流向数据
stockCash = {}
stockInfo = tempData.split('~')
if len(stockInfo) < 13: return
if len(stockInfo) != 0 and stockInfo[0].find('pv_none') == -1:
table = self.__stockTables['cash']
code = stockInfo[0].split('=')[1][2:]
date = stockInfo[13]
if not self.__isStockExitsInDate(table, code, date):
stockCash['code'] = stockInfo[0].split('=')[1][2:]
stockCash['main_in_cash'] = stockInfo[1]
stockCash['main_out_cash'] = stockInfo[2]
stockCash['main_net_cash'] = stockInfo[3]
stockCash['main_net_rate'] = stockInfo[4]
stockCash['private_in_cash'] = stockInfo[5]
stockCash['private_out_cash'] = stockInfo[6]
stockCash['private_net_cash'] = stockInfo[7]
stockCash['private_net_rate'] = stockInfo[8]
stockCash['total_cash'] = stockInfo[9]
stockCash['name'] = stockInfo[12].decode('utf8')
stockCash['date'] = stockInfo[13]
#插入数据库
self.__dbOperator.insertIntoDB(table, stockCash)
'''获取股票交易信息明细'''
def getStockQuotationDetail(self, dataUrl):
tempData = self.__getDataFromUrl(dataUrl)
if tempData == None:
time.sleep(10)
tempData = self.__getDataFromUrl(dataUrl)
return False
stockQuotation = {}
stockInfo = tempData.split('~')
if len(stockInfo) < 45: return
if len(stockInfo) != 0 and stockInfo[0].find('pv_none') ==-1 and stockInfo[3].find('0.00') == -1:
table = self.__stockTables['quotation']
code = stockInfo[2]
date = stockInfo[30]
if not self.__isStockExitsInDate(table, code, date):
stockQuotation['code'] = stockInfo[2]
stockQuotation['name'] = stockInfo[1].decode('utf8')
stockQuotation['price'] = stockInfo[3]
stockQuotation['yesterday_close'] = stockInfo[4]
stockQuotation['today_open'] = stockInfo[5]
stockQuotation['volume'] = stockInfo[6]
stockQuotation['outer_sell'] = stockInfo[7]
stockQuotation['inner_buy'] = stockInfo[8]
stockQuotation['buy_one'] = stockInfo[9]
stockQuotation['buy_one_volume'] = stockInfo[10]
stockQuotation['buy_two'] = stockInfo[11]
stockQuotation['buy_two_volume'] = stockInfo[12]
stockQuotation['buy_three'] = stockInfo[13]
stockQuotation['buy_three_volume'] = stockInfo[14]
stockQuotation['buy_four'] = stockInfo[15]
stockQuotation['buy_four_volume'] = stockInfo[16]
stockQuotation['buy_five'] = stockInfo[17]
stockQuotation['buy_five_volume'] = stockInfo[18]
stockQuotation['sell_one'] = stockInfo[19]
stockQuotation['sell_one_volume'] = stockInfo[20]
stockQuotation['sell_two'] = stockInfo[22]
stockQuotation['sell_two_volume'] = stockInfo[22]
stockQuotation['sell_three'] = stockInfo[23]
stockQuotation['sell_three_volume'] = stockInfo[24]
stockQuotation['sell_four'] = stockInfo[25]
stockQuotation['sell_four_volume'] = stockInfo[26]
stockQuotation['sell_five'] = stockInfo[27]
stockQuotation['sell_five_volume'] = stockInfo[28]
stockQuotation['datetime'] = stockInfo[30]
stockQuotation['updown'] = stockInfo[31]
stockQuotation['updown_rate'] = stockInfo[32]
stockQuotation['heighest_price'] = stockInfo[33]
stockQuotation['lowest_price'] = stockInfo[34]
stockQuotation['volume_amout'] = stockInfo[35].split('/')[2]
stockQuotation['turnover_rate'] = stockInfo[38]
stockQuotation['pe_rate'] = stockInfo[39]
stockQuotation['viberation_rate'] = stockInfo[42]
stockQuotation['circulated_stock'] = stockInfo[43]
stockQuotation['total_stock'] = stockInfo[44]
stockQuotation['pb_rate'] = stockInfo[45]
self.__dbOperator.insertIntoDB(table, stockQuotation)
'''读取信息'''
def __getDataFromUrl(self, dataUrl):
r = urllib2.Request(dataUrl)
try:
stdout = urllib2.urlopen(r, data=None, timeout=3)
except Exception,e:
self.__logger.error(">>>>>> Exception: " +str(e))
return None
stdoutInfo = stdout.read().decode(params.codingtype).encode('utf-8')
tempData = stdoutInfo.replace('"', '')
self.__logger.debug(tempData)
return tempData
'''获取股票现金流量'''
def getStockCash(self):
self.__logger.debug("开始:收集股票现金流信息")
try:
#沪市股票
for code in range(600001, 602100):
dataUrl = "http://qt.gtimg.cn/q=ff_sh%d" % code
self.__getStockCashDetail(dataUrl)
#深市股票
for code in range(1, 1999):
dataUrl = "http://qt.gtimg.cn/q=ff_sz%06d" % code
self.__getStockCashDetail(dataUrl)
#中小板股票
for code in range(2001, 2999):
dataUrl = "http://qt.gtimg.cn/q=ff_sz%06d" % code
self.__getStockCashDetail(dataUrl)
#'300001' .. '300400'
#创业板股票
for code in range(300001, 300400):
dataUrl = "http://qt.gtimg.cn/q=ff_sz%d" % code
self.__getStockCashDetail(dataUrl)
except Exception as err:
self.__logger.error(">>>>>> Exception: " +str(code) + " " + str(err))
finally:
None
self.__logger.debug("结束:股票现金流收集")
'''获取股票交易行情数据'''
def getStockQuotation(self):
self.__logger.debug("开始:收集股票交易行情数据")
try:
#沪市股票
for code in range(600001, 602100):
dataUrl = "http://qt.gtimg.cn/q=sh%d" % code
self.getStockQuotationDetail(dataUrl)
#深市股票
for code in range(1, 1999):
dataUrl = "http://qt.gtimg.cn/q=sz%06d" % code
self.getStockQuotationDetail(dataUrl)
#中小板股票
for code in range(2001, 2999):
dataUrl = "http://qt.gtimg.cn/q=sz%06d" % code
self.getStockQuotationDetail(dataUrl)
#'300001' .. '300400'
# 创业板股票
for code in range(300001, 300400):
dataUrl = "http://qt.gtimg.cn/q=sz%d" % code
self.getStockQuotationDetail(dataUrl)
except Exception as err:
self.__logger.error(">>>>>> Exception: " +str(code) + " " + str(err))
finally:
None
self.__logger.debug("结束:收集股票交易行情数据")
if __name__ == '__main__':
StockTencent(). main()
(7)加入到系统任务计划中收集盘后数据
(8)收集后的数据可以用以分析了,例如:
求取10月28日主力净流入最大的股票:select * from stock_cash_tencent where main_net_cash = (select max(main_net_cash) from stock_cash_tencent where date = '20151028' )
原来是“兴蓉环境”,当日放量上涨,次日收跌,连续多日有主力资金流入。
excel中做分析:
平安银行的资金流量分析