程序运行截图:
mysql代码:
create database financial;
use financial;
CREATE TABLE `lrb` (
`id` int(11) NOT NULL AUTO_INCREMENT,
`报告期` datetime DEFAULT NULL,
`股票名` varchar(45) DEFAULT NULL,
`股票代码` varchar(45) DEFAULT NULL,
`净利润` varchar(45) DEFAULT NULL,
`净利润同比` varchar(45) DEFAULT NULL,
`扣非归母净利润` varchar(45) DEFAULT NULL,
`扣非归母净利润同比` varchar(45) DEFAULT NULL,
`营业总收入` varchar(45) DEFAULT NULL,
`营业总收入同比` varchar(45) DEFAULT NULL,
`营业支出` varchar(45) DEFAULT NULL,
`营业支出同比` varchar(45) DEFAULT NULL,
`销售费用` varchar(45) DEFAULT NULL,
`管理费用` varchar(45) DEFAULT NULL,
`财务费用` varchar(45) DEFAULT NULL,
`营业总支出` varchar(45) DEFAULT NULL,
`营业总支出同比` varchar(45) DEFAULT NULL,
`营业利润` varchar(45) DEFAULT NULL,
`营业利润同比` varchar(45) DEFAULT NULL,
`利润总额` varchar(45) DEFAULT NULL,
`公告日期` datetime DEFAULT NULL,
PRIMARY KEY (`id`)
) ENGINE=InnoDB AUTO_INCREMENT=452 DEFAULT CHARSET=utf8;
CREATE TABLE `ybmx` (
`id` int(11) NOT NULL AUTO_INCREMENT,
`报告期` datetime DEFAULT NULL,
`股票名` varchar(45) DEFAULT NULL,
`股票代码` varchar(45) DEFAULT NULL,
`报告名称` varchar(100) DEFAULT NULL,
`评级类别` varchar(45) DEFAULT NULL,
`作者` varchar(100) DEFAULT NULL,
`机构` varchar(45) DEFAULT NULL,
PRIMARY KEY (`id`)
) ENGINE=InnoDB AUTO_INCREMENT=513 DEFAULT CHARSET=utf8;
CREATE TABLE `yjb` (
`id` int(11) NOT NULL AUTO_INCREMENT,
`报告期` datetime DEFAULT NULL,
`股票名` varchar(45) DEFAULT NULL,
`股票代码` varchar(45) DEFAULT NULL,
`每股收益` varchar(45) DEFAULT NULL,
`每股收益扣除` varchar(45) DEFAULT NULL,
`营业收入` varchar(45) DEFAULT NULL,
`营业收入同比增长` varchar(45) DEFAULT NULL,
`营业收入季度环比增长` varchar(45) DEFAULT NULL,
`净利润` varchar(45) DEFAULT NULL,
`净利润同比增长` varchar(45) DEFAULT NULL,
`净利润季度环比增长` varchar(45) DEFAULT NULL,
`每股净资产` varchar(45) DEFAULT NULL,
`净资产收益率` varchar(45) DEFAULT NULL,
`每股经营现金流量` varchar(45) DEFAULT NULL,
`销售毛利率` varchar(45) DEFAULT NULL,
`利润分配` varchar(90) DEFAULT NULL,
`股息率` varchar(45) DEFAULT NULL,
`首次公告日期` datetime DEFAULT NULL,
`最新公告日期` datetime DEFAULT NULL,
PRIMARY KEY (`id`)
) ENGINE=InnoDB AUTO_INCREMENT=545 DEFAULT CHARSET=utf8;
python代码:
import json
import re
import sys
import time
from ast import literal_eval
from 股吧数据抓取.MysqlHelper import MysqlHelper
import requests
"""mysql数据库类"""
helper = MysqlHelper('localhost', 8080, 'root', '123', 'financial', 'utf8')
"""深圳证券交易所获取指定股票代码公A司基本信息"""
class ForumBaseData:
result = ""
"""初始化目标基本信息目标地址"""
"""code:股票代码"""
def __init__(self, code):
self.url = "http://www.szse.cn/api/report/index/companyGeneralization?random=0.23242534566812312&secCode=" + code
try:
self.respose = requests.get(self.url)
except requests.exceptions.ConnectionError:
self.respose = requests.get(self.url)
self.saveBaseData()
def saveBaseData(self):
global result
# 解析json数据
result = json.loads(self.respose.text)["message"]
if result == "成功":
data = json.loads(self.respose.text)["data"]
# 查询是否已存在这条数据
sql = 'select agdm,bgdm from forumBaseData_Sz where agdm=%s and bgdm=%s'
point = helper.all(sql, [data["agdm"], data["bgdm"]])
if len(point) == 0:
sql = "insert into forumBaseData_Sz(gsqc,ywqc,zcdz,agdm,agjc,agssrq,agzgb,agltgb,bgdm,bgjc,bgssrq,bgzgb,bgltgb,dldq,sheng,shi,sshymc,http)values(%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s)"
params = [data["gsqc"], data["ywqc"], data["zcdz"], data["agdm"], data["agjc"], data["agssrq"],
data["agzgb"], data["agltgb"], data["bgdm"], data["bgjc"], data["bgssrq"], data["bgzgb"],
data["bgltgb"], data["dldq"], data["sheng"], data["shi"], data["sshymc"], data["http"], ]
helper.execute(sql, params)
else:
print("此条公司股票基本信息已存在数据库中!")
"""打印结果"""
def __str__(self):
global result
return result
class FinancialData:
def __init__(self, code):
self.code = code
# 服务器域名
# 首页url
self.mainpage = "http://data.eastmoney.com/bbsj/yjbb/%s.html" % self.code
self.base = "http://dcfm.eastmoney.com//em_mutisvcexpandinterface/api/js/get?type="
self.tail = "&token=70f12f2f4f091e459a279469fe49eca5&filter=(scode=" + self.code + ")&st=reportdate&sr=1&p=1&ps=50&js=var%20AOADDBxl={pages:(tp),data:%20(x),font:(font)}&rt=52235310"
# 业绩报表
self.yjserver = self.base + "YJBB21_YJBB" + self.tail
# 利润表
self.lrserver = self.base + "CWBB_LRB20" + self.tail
# 研报明细表
self.page = 1
self.pagenum = 0
self.ybmxserver = "http://reportapi.eastmoney.com/report/list?pageNo=%s&pageSize=50&code=%s&industryCode=*&industry=*&rating=*&ratingchange=*&beginTime=&endTime=&fields=&qType=0" % (
self.page, self.code)
# 业绩报表字典
self.yjb_dict = {'reportdate': '报告期', 'sname': '股票名', 'scode': '股票代码',
'basiceps': '每股收益', 'cutbasiceps': '每股收益扣除', 'totaloperatereve': '营业收入',
'ystz': '营业收入同比增长', 'yshz': '营业收入季度环比增长', 'parentnetprofit': '净利润',
'sjltz': '净利润同比增长', 'sjlhz': '净利润季度环比增长', 'bps': '每股净资产',
'roeweighted': '净资产收益率', 'mgjyxjje': '每股经营现金流量', 'xsmll': '销售毛利率',
'assigndscrpt': '利润分配', 'gxl': '股息率', 'firstnoticedate': '首次公告日期',
'latestnoticedate': '最新公告日期'}
# 利润表字典
self.lrb_dict = {'reportdate': '报告期', 'sname': '股票名', 'scode': '股票代码',
'parentnetprofit': '净利润', 'sjltz': '净利润同比', 'kcfjcxsyjlr': '扣非归母净利润',
'sjlktz': '扣非归母净利润同比', 'totaloperatereve': '营业总收入', 'tystz': '营业总收入同比',
'operateexp': '营业支出', 'operateexp_tb': '营业支出同比', 'saleexp': '销售费用',
'manageexp': '管理费用', 'financeexp': '财务费用', 'totaloperateexp': '营业总支出',
'totaloperateexp_tb': '营业总支出同比', 'operateprofit': '营业利润', 'yltz': '营业利润同比',
'sumprofit': '利润总额', 'noticedate': '公告日期'}
# 研报明细字典
self.ybmx_dict = {'publishDate': '报告期', 'stockName': '股票名', 'stockCode': '股票代码',
'title': '报告名称', 'emRatingName': '评级类别', 'researcher': '作者', 'orgSName': '机构'}
# 总表
self.table_dict = {'yjb': self.yjb_dict, 'lrb': self.lrb_dict, 'ybmx': self.ybmx_dict}
# 表名list 业绩表,利润表,研报明细表
self.table_name = ['yjb', 'lrb', 'ybmx']
# 数据地址list
self.url_list = [self.yjserver, self.lrserver, self.ybmxserver]
# 数据list
self.data_list = []
# 请求头
self.headers = {
'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8',
'Accept-Encoding': 'gzip, deflate',
'Accept-Language': 'zh-CN,zh;q=0.9',
'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/70.0.3538.25 Safari/537.36 Core/1.70.3722.400 QQBrowser/10.5.3738.400'}
# 获取金融数据
def getFinancialData(self):
for url in self.url_list:
if url != self.ybmxserver:
req = requests.get(url=url, headers=self.headers)
req.encoding = 'utf-8'
html = req.text
data = html.replace("var AOADDBxl=", "").replace("var CiBQPcHj=", "").replace('pages:',
'"pages":').replace(
'data:', '"data":').replace(
'font:', '"font":')
data = json.loads(data)
# 解析字典
self.realdata = literal_eval(
keymap_replace(str(data['data']), MappingToDict(data['font']['FontMapping'])))
self.data_list.append(self.realdata)
# print(self.realdata)
else:
req = requests.get(url=url, headers=self.headers)
req.encoding = 'utf-8'
html = req.text
data = json.loads(html)
self.data_list.append(data['data'])
# 总页数
if data['hits'] != 0:
self.pagenum = data['hits'] // 50 + 1
else:
self.pagenum = 0
# 循环将所有数据保存
while self.page < self.pagenum:
self.page = self.page + 1
self.changeybmxserver(self.page)
req = requests.get(url=self.ybmxserver, headers=self.headers)
req.encoding = 'utf-8'
html = req.text
data = json.loads(html)
self.data_list.append(data['data'])
self.page = 1
# print(str(self.data_list))
def changeybmxserver(self, page):
self.ybmxserver = "http://reportapi.eastmoney.com/report/list?pageNo=%s&pageSize=50&code=%s&industryCode=*&industry=*&rating=*&ratingchange=*&beginTime=&endTime=&fields=&qType=0" % (
page, self.code)
# 将金融数据保存到数据库中
def saveFinancialData(self):
# 获取金融数据
if len(self.code) <= 6 and self.code.isdigit():
self.getFinancialData()
for i in range(len(self.table_name)):
value_dict = {}
if i < 2:
# print(self.data_list[i])
for j in range(len(self.data_list[i])):
data = self.data_list[i][j]
if j == 0:
print("股票名称:%s" % data['sname'])
print('开始下载' + self.table_name[i] + '表')
sys.stdout.write(
'\r %s:[正在下载 %.2f%%] ' % (self.table_name[i], ((j + 1) / len(self.data_list[i])) * 100))
sys.stdout.flush()
time.sleep(0.1)
if ((j + 1) / len(self.data_list[i])) * 100 == 100:
print(self.table_name[i] + '表下载完成')
value_dict['报告期'] = data['reportdate']
value_dict['股票名'] = data['sname']
value_dict['股票代码'] = data['scode']
for key, value in data.items():
if key in self.table_dict[self.table_name[i]]:
value_dict[self.table_dict[self.table_name[i]][key]] = value
global sql
if i == 0:
sql = 'select `报告期` from yjb where `报告期`=%s and `股票名`=%s and `股票代码`=%s'
elif i == 1:
sql = 'select `报告期` from lrb where `报告期`=%s and `股票名`=%s and `股票代码`=%s'
# eval去掉字符串引号
point = helper.all(sql, [data['reportdate'], data['sname'], data['scode']])
if len(point) == 0:
sql1 = """insert into %s (`报告期`,`股票名`,`股票代码`) values ('%s','%s','%s')""" % (
self.table_name[i], data['reportdate'], data['sname'], data['scode'])
helper.execute(sql1)
for key, value in value_dict.items():
if key not in ['报告期', '股票名', '股票代码']:
sql2 = """UPDATE %s SET %s='%s' WHERE `报告期`='%s' """ % (
self.table_name[i], key, value, value_dict['报告期'])
helper.execute(sql2)
else:
# 数据重复不插入
pass
value_dict = {}
else:
k = i
while k < len(self.data_list):
for j in range(len(self.data_list[k])):
data = self.data_list[k][j]
if j == 0:
print("股票名称:%s" % data['stockName'])
print(
'开始下载' + self.table_name[i] + '表' + '(第%i页/共%i页)' % (k - 1, self.pagenum))
sys.stdout.write(
'\r %s:[正在下载 %.2f%%] ' % (self.table_name[i], ((j + 1) / len(self.data_list[k])) * 100))
sys.stdout.flush()
time.sleep(0.1)
if ((j + 1) / len(self.data_list[k])) * 100 == 100:
print(self.table_name[i] + '表下载完成')
sql = 'select `报告名称` from ybmx where `报告名称`=%s and `作者`=%s and `报告期`=%s'
# eval去掉字符串引号
params = [data['title'], data['researcher'], data['publishDate']]
point = helper.all(sql, params)
if len(point) == 0:
sql1 = "insert into ybmx (`报告期`,`股票名`,`股票代码`,`报告名称`,`评级类别`,`作者`,`机构`) values (%s,%s,%s,%s,%s,%s,%s)"
params1 = [data['publishDate'], data['stockName'], data['stockCode'],
data['title'], data['emRatingName'], data['researcher'], data['orgSName']]
helper.execute(sql1, params1)
else:
# 数据重复不插入
pass
else:
k = k + 1
print("金融数据下载完成!")
else:
print("股票代码输入有误!")
# mapping转dice
def MappingToDict(mapplist):
dicts = {}
for mapping in mapplist:
dicts[mapping["code"]] = str(mapping["value"])
return dicts
# 按照字典替换字符
def keymap_replace(
string: str,
mappings: dict,
lower_keys=False,
lower_values=False,
lower_string=False,
) -> str:
replaced_string = string.lower() if lower_string else string
for character, replacement in mappings.items():
replaced_string = replaced_string.replace(
character.lower() if lower_keys else character,
replacement.lower() if lower_values else replacement
)
return replaced_string
if __name__ == '__main__':
# ForumBaseData("002555")
print('*' * 60)
print('\t\t\t\t\t金融数据下载工具')
print('*' * 60)
code = input('请输入股票代码:')
print('*' * 60)
FinancialData(code).saveFinancialData()
# FinancialData(code).getFinancialData()
print('*' * 60)
程序可能存在部分bug,欢迎交流指正。