使用Python爬虫获取上交所和深交所所有股票的名称和交易信息
获取上交所和深交所所有股票的名称和交易信息
保存到文件中
requests-bs4-re
网易财经:https://money.163.com/stock/
新浪股票:http://finance.sina.com.cn/stock
股票信息静态存在HTML页面中,非js代码生成,没有robots协议限制
浏览器F12,源代码查看等
从网易财经获取股票列表
根据股票列表逐个到网易财经获取个股信息
将结果存储到文件
import re
import requests
from bs4 import BeautifulSoup
codepath = 'code.txt'
def gethtml(url):
try:
headers = {'User-Agent': 'Mozilla/5.0'}
r = requests.get(url, headers=headers)
r.raise_for_status()
r.encoding = r.apparent_encoding
return r.text
except:
return ''
def getcodelist(url, page):
for p in range(0, page+1):
codeurl = url+str(p)
html = gethtml(codeurl)
als = re.findall(r'[0-9]*', html)
for a in als:
soup = BeautifulSoup(a, 'html.parser')
code = soup.get_text()
with open(codepath, 'a') as c:
c.write(code + '\n')
c.close()
def savestocklist(stockurl):
with open(codepath,'r') as c:
for r in c.readlines():
try:
url = stockurl+'0'+r.replace('\n', '')+'.html'
html = gethtml(url)
soup = BeautifulSoup(html, 'html.parser')
tittles = soup.select("body script")
stock_tag = tittles[4]
stl = stock_tag.get_text().replace(' ', '')
st = re.findall(r'\'.*\'', stl)
stock = {
'name': st[0],
'code': st[1],
'price': st[2],
'change': st[3],
'yesterday': st[4],
'today': st[5],
'high': st[6],
'low': st[7]}
with open('stock.txt', 'a', encoding='utf-8') as t:
t.write(str(stock)+'\n')
t.close()
except:
url = stockurl + '1' + r.replace('\n', '') + '.html'
html = gethtml(url)
soup = BeautifulSoup(html, 'html.parser')
tittles = soup.select("body script")
stock_tag = tittles[4]
stl = stock_tag.get_text().replace(' ', '')
st = re.findall(r'\'.*\'', stl)
stock = {
'name': st[0],
'code': st[1],
'price': st[2],
'change': st[3],
'yesterday': st[4],
'today': st[5],
'high': st[6],
'low': st[7]}
with open('stock.txt', 'a', encoding='utf-8') as t:
t.write(str(stock) + '\n')
t.close()
def main():
url = 'http://quotes.money.163.com/data/caibao/yjyg_00.html' \
'?reportdate=20190930&sort=reportdate&order=desc&page='
stockurl ='http://quotes.money.163.com/'
page = 2
getcodelist(url, page)
savestocklist(stockurl)
main()