#-*- coding:utf-8 -*- import requests from bs4 import BeautifulSoup import tushare as ts import pandas as pd import lxml import time import datetime import pymysql pymysql.install_as_MySQLdb() from sqlalchemy import create_engine ''' 股票基本信息获取模块 ''' # 调用stock_basic接口,获取股票基本信息 def get_security_info(): # 获取字段包含:ts代码、股票代码、股票名称、行业、股票市场类型、交易所类型、上市日期 stock_list = pro.stock_basic(list_status='L', fields='ts_code,symbol,name,industry,market,exchange,list_date') cursor = conn.cursorsor() # 执行删除数据命令 cursor.execute('delect from Quantized_data.stock_info') conn.commit() for index, row in stock_list.iterrows(): ts_code = row['ts_code'] stk_code = row['symbol'] stk_name = row['name'] industry = row['industry'] market = row['market'] exchange = row['exchange'] list_date = datetime.datetime.strptime(row['list_date'],’%Y%m%d') cursor = conn.cursorsor() # 执行新增数据命令 cursor.execute('insert into Quantized_data.stock_info(ts_code,stk_code,stk_name,industry,market,exchange,list_date) values (%s, %s, %s, %s, %s, %s, %s)', %(ts_code,stk_code,stk_name,industry,market,exchange,list_date)) conn.commit() # 调用stock_company接口,获取公司基本信息 def get_company_info(): # 获取字段包含:ts代码、省份、城市、员工人数、主要业务及产品,SSE上交所 SZSE深交所 ,默认SSE company_list1 = pro.stock_company(exchange='SSE', fields='ts_code,province,city,employees,main_business') company_list2 = pro.stock_company(exchange='SZSE', fields='ts_code,province,city,employees,main_business') cursor = conn.cursorsor() # 执行删除数据命令 cursor.execute('delect from Quantized_data.company_info') conn.commit() for index, row in company_list1.iterrows(): ts_code = row['ts_code'] province = row['province'] city = row['city'] employees = int(row['employees']) main_business = row['main_business'] cursor = conn.cursorsor() # 执行新增数据命令 cursor.execute('insert into Quantized_data.company_info(ts_code,province,city,employees,main_business) values (%s, %s, %s, %s, %s)', %(ts_code,province,city,employees,main_business)) conn.commit() for index, row in company_list2.iterrows(): ts_code = row['ts_code'] province = row['province'] city = row['city'] employees = row['employees'] main_business = row['main_business'] cursor = conn.cursorsor() # 执行新增数据命令 cursor.execute('insert into Quantized_data.company_info(ts_code,province,city,employees,main_business) values (%s, %s, %s, %s, %s)', %(ts_code,province,city,employees,main_business)) conn.commit() # 调用concept_detail接口,获取概念股列表 def get_concept_info(): # 获取字段包含:ts代码、concept_name stock_list = pro.stock_basic(list_status='L', fields='ts_code') cursor = conn.cursorsor() # 执行删除数据命令 cursor.execute('delect from Quantized_data.concept_info') conn.commit() for index, row in stock_list.iterrows(): ts_code = stk['ts_code'] concept_list = pro.concept_detail(ts_code=ts_code, fields='ts_code,concept_name') for index, row in concept_list.iterrows(): ts_code = row['ts_code'] concept_name = row['concept_name'] cursor = conn.cursorsor() # 执行新增数据命令 cursor.execute('insert into Quantized_data.concept_info(ts_code,concept_name) values (%s, %s)', %(ts_code,concept_name)) conn.commit() # 调用stk_holdernumber接口,获取股东人数不定期公布 单次最大3000,总量不限制,每分钟调取100次 def get_stk_holder(): # 获取字段:ts代码、公告日期、股东人数 stock_list = pro.stock_basic(list_status='L', fields='ts_code') for index, row in stock_list.iterrows(): ts_code = stk['ts_code'] stk_holdernum = pro.stk_holdernumber(ts_code=ts_code,fields='ts_code,ann_date,holder_num') time.sleep(5) for for index, row in stk_holdernum.iterrows(): ts_code = row['ts_code'] ann_date = datetime.datetime.strptime(row['ann_date'],'%Y%m%d') holder_num = float(row['holder_num']) cursor = conn.cursorsor() # 执行新增数据命令 cursor.execute('insert into Quantized_data.stock_holdernum(ts_code,ann_date,holder_num) values (%s, %s, %s)', %(ts_code,ann_date,holder_num)) conn.commit() ''' 股票每日行情数据获取模块 ''' # 调用daily接口,获取每日行情数据 每分钟内最多调取200次,每次4000条数据 def get_market_data(): # 获取字段包含:交易日期、ts代码、开盘价、最高价、最低价、收盘价、昨收价、涨跌幅、成交额(千元) stock_list = pro.stock_basic(list_status='L', fields='ts_code') for index, row in stock_list.iterrows(): ts_code = row['ts_code'] daily_info = pro.daily(ts_code=ts_code,fields='trade_date,ts_code,open,high,low,close,pre_close,pct_chg,amount') for index, row in stk.iterrows(): trade_date = datetime.datetime.strptime(row['trade_date'],'%Y%m%d') ts_code = row['ts_code'] open = float(row['open']) high = float(row['high']) low = float(row['low']) close = float(row['close']) pre_close = float(row['pre_close']) pct_chg = float(row['pct_chg']) amount = float(row['amount']) cursor = conn.cursorsor() # 执行新增数据命令 cursor.execute('insert into Quantized_data.daily_market_data(trade_date,ts_code,open,high,low,close,pre_close,pct_chg,amount) values (%s, %s, %s, %s, %s, %s, %s, %s, %s)', %(trade_date,ts_code,open,high,low,close,pre_close,pct_chg,amount) conn.commit() # 爬取新浪财经股市雷达异动数据 def sina_stock_radar_Spider(): for page in range(1,16): url = 'http://finance.sina.com.cn/stockradar/stockradar' + str(page) + '.html' headers = {"User-Agent":"Mozilla/5.0 (Windows NT 6.3; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/65.0.3314.0 Safari/537.36 SE 2.X MetaSr 1.0"} html = requests.get(url=url,headers=headers) html.encoding = html.apparent_encoding soup = BeautifulSoup(html.text,'lxml') tr_list = soup.find_all('tr') for index, tr in enumerate(tr_list1): if index != 0: th_list = tr.find_all('th') trade_date = time.strftime("%Y%m%d", time.localtime()) stk_code = th_list[1].string stk_name = th_list[2].string change_time = th_list[0].string change_type = th_list[3].string cursor = conn.cursorsor() # 执行新增数据命令 cursor.execute('insert into Quantized_data.sina_stock_radar(trade_date,stk_code,stk_name,change_time,change_type) values (%s, %s, %s, %s, %s)', %(trade_date,stk_code,stk_name,change_time,change_type)) conn.commit() # 调用top_inst接口,获取龙虎榜数据 单次最大10000 def get_top_tiger(): # 获取字段:交易日期、ts代码、营业部名称、买入额万、买入占总成交比例、卖出额万、卖出占总成交比例、净成交额万 lhb_list = pro.top_inst(trade_date='20180928',fields='trade_date,ts_code,exalter,buy,buy_rate,sell,sell_rate,net_buy') for index, row in stk.iterrows(): trade_date = row['trade_date'] ts_code = row['ts_code'] yyb = row['exalter'] buy = row['buy'] buy_rate = row['buy_rate'] sell = row['sell'] sell_rate = row['sell_rate'] net_buy = row['net_buy'] cursor = conn.cursorsor() # 执行新增数据命令 cursor.execute('insert into stock.stock_list(ts_code,concept_name) values (%s, %s)', %(ts_code,concept_name)) conn.commit() # 调用limit_list接口,获取涨跌停数据 单次最大1000,总量不限制 def get_zdt_info(): #获取字段包含:交易日期、ts代码、股票名称、收盘价、封单金额、封单金额/日成交金额、首次涨停时间、最后封板时间、打开次数、涨跌停强度、D跌停U涨停 zdt_info = pro.limit_list(trade_date=, fields='trade_date,ts_code,name,close,fd_amount,fc_ratio,first_time,last_time,open_times,strth,limit') for index, row in stk.iterrows(): trade_date = row['trade_date'] ts_code = row['ts_code'] name = row['name'] close = row[''] fd_amount = row['fd_amount'] fc_ratio = row['fc_ratio'] first_time = row['first_time'] last_time = row['last_time'] open_times = row['open_times'] strth = row['strth'] limit = row['limit'] cursor = conn.cursorsor() # 执行新增数据命令 cursor.execute('insert into stock.stock_list(ts_code,concept_name) values (%s, %s)', %(ts_code,concept_name)) conn.commit() # 调用moneyflow接口,获取个股资金流向数据 单次最大提取4000行记录,总量不限制 def moneyflow(): # 获取字段包含:交易日期、ts代码、大单买入金额万元、大单卖出金额万元、特大单买入金额万元、特大单卖出金额万元、净流入额万元 stk = pro.stock_basic(list_status='L', fields='ts_code') for index, row in stk.iterrows(): ts_code = row['ts_code'] moneyflow = pro.moneyflow(ts_code=ts_code,trade_date=,fields='trade_date,ts_code,buy_lg_amount,sell_lg_amount,buy_elg_amount,sell_elg_amount,net_mf_amount') for index, row in stk.iterrows(): trade_date = row['trade_date'] ts_code = row['ts_code'] buy_lg_amount = row['buy_lg_amount'] sell_lg_amount = row['sell_lg_amount'] buy_elg_amount = row['buy_elg_amount'] sell_elg_amount = row['sell_elg_amount'] net_mf_amount = row['net_mf_amount'] cursor = conn.cursorsor() # 执行新增数据命令 cursor.execute('insert into stock.stock_list(ts_code,concept_name) values (%s, %s)', %(ts_code,concept_name)) conn.commit() ''' 大盘风控数据获取模块 ''' # 爬取新浪财经实时大单 def spider(): for page in range(0,3000): url = 'http://vip.stock.finance.sina.com.cn/quotes_service/view/cn_bill_all.php?num=100&sort=ticktime&asc=0&volume=10000&type=0&' + str(page) headers = {"User-Agent":"Mozilla/5.0 (Windows NT 6.3; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/65.0.3314.0 Safari/537.36 SE 2.X MetaSr 1.0"} html = requests.get(url=url,headers=headers) html.encoding = html.apparent_encoding soup = BeautifulSoup(html.text,'lxml') tr_list = soup.find_all('tr') for index, tr in enumerate(tr_list1): if index != 0: th_list = tr.find_all('th') td_list = tr.find_all('td') change_time = th_list[2].string symbol = th_list[1].string name = th_list[0].string cjj = th_list[3] cjl = change_value = th_list[3].string trade_date = time.strftime("%Y%m%d", time.localtime()) cursor = conn.cursorsor() # 执行新增数据命令 cursor.execute('insert into stock.stock_list(symbol,name,trade_date,change_time,change_value) values (%s, %s, %s, %s, %s)', %(symbol,name,trade_date,change_time,change_value)) conn.commit() ''' 市场资讯信息获取模块 ''' ''' 个股风控数据获取模块 ''' # 主函数 if __name__=='__main__': ts.set_token('ac16b470869c5d82db5033ae9288f77b282d2b5519507d6d2c72fdd7') pro = ts.pro_api() # engine = create_engine('mysql://root:[email protected]/stock?charset=utf8') # 打开数据库连接 conn = pymysql.connect(host="127.0.0.1", port="3306", user='root', password='123456',database='Quantized_data',charset="utf-8") # 使用cursorsor()方法获取操作游标 cursorsor = conn.cursor() # 获取股票基本信息 get_security_info() # 获取公司基本信息 get_company_info() # 爬取新浪财经股市雷达异动数据 sina_stock_radar_Spider() # 关闭数据库连接 conn.close() #-*- coding:utf-8 -*- import requests from bs4 import BeautifulSoup import time import tushare as ts import pandas as pd import lxml def spider(): for page in range(0, 3000): url = 'http://vip.stock.finance.sina.com.cn/quotes_service/view/cn_bill_all.php?num=100&sort=ticktime&asc=0&volume=10000&type=0&' + str( page) headers = { "User-Agent": "Mozilla/5.0 (Windows NT 6.3; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/65.0.3314.0 Safari/537.36 SE 2.X MetaSr 1.0"} html = requests.get(url=url, headers=headers) html.encoding = html.apparent_encoding soup = BeautifulSoup(html.text, 'lxml') tr_list = soup.find_all('tr') #print(tr_list) for index, tr in enumerate(tr_list): if index != 0: th_list = tr.find_all('th') print(th_list) print(type(th_list)) td_list = tr.find_all('td') value = tr_list[3].sting print(value) # cjl = td_list[1].content # print(cjl) ''' name = th_list[0].string print(name) code = th_list[1].sting print(code) mytime = tr_list[2].sting print(mytime) cjj = td_list[0].sting print(cjj) ''' if __name__=='__main__': spider()