import requests
from bs4 import BeautifulSoup
import pymongo
client = pymongo.MongoClient(host='localhost',port=27017,connect=False)
stocks = client['stock']
stock_data = stocks['stock_data']
if "stock_data" in stocks.list_collection_names():
stock_data = stocks['stock_data']
stock_data.drop()
else:
stock_data = stocks['stock_data']
def getcodeUrl(url):
nameLi = []
hrefLi = []
response = requests.get(url)
soup = BeautifulSoup(response.text,'lxml')
items = soup.select('a')
server = 'http://quote.cfi.cn/'
for item in items[:-8]:
name = item.text
href = server + item['href']
nameLi.append(name)
hrefLi.append(href)
return nameLi,hrefLi
def getcodeInfo(name,code_url):
response = requests.get(code_url)
soup = BeautifulSoup(response.text, 'lxml')
datas = soup.select('td',id_="act_quote")
infos = datas[29:42]
try:
c0 = infos[0].text.split(":")[1]
c1 = infos[1].text.split(":")[1]
c2 = infos[2].text.split(":")[1]
c3 = infos[3].text.split(":")[1]
c4 = infos[4].text.split(":")[1]
c5 = infos[5].text.split(":")[1]
c6 = infos[6].text.replace("手","").split(":")[1]
c7 = infos[7].text.replace("万元","").split(":")[1]
if infos[8].text.split(":")[1] not in ["--", '正无穷大']:
c8 = infos[8].text.split(":")[1]
else:
c8 = "0"
if infos[9].text.replace("计算公式说明","").split(":")[1] not in ["--",'正无穷大']:
c9 = infos[9].text.replace("计算公式说明","").split(":")[1]
else:
c9 = "0"
c10 = infos[10].text.split(":")[1]
if len(infos[11].text.split(":")) >= 2:
c11 = infos[11].text.split(" ")[1].split(":")[1].replace("元", "")
else:
c11 = "0"
h_info = infos[12].text.replace("平均市盈率",":平均市盈率").replace("扣除后平均市盈率",":扣除后平均市盈率").replace("扣除后:",":扣除后").split(":")
c12 = h_info[1]
c13 = h_info[3]
c14 = h_info[5]
c15 = infos[12].select('a')[0]['href'].replace("quotelist.aspx?bklb=hy&bkid=","http://quote.cfi.cn/quotelist.aspx?sortcol=stockcode&sortway=asc&bklb=hy&bkid=")
data = {
'股票名称':name,
'今开': float(c0),
'最高': float(c1),
'增幅': float(c2.strip("%")),
'还手率':float(c3.strip("%")),
'昨收': float(c4),
'最低': float(c5),
'成交量': float(c6),
'成交金额': float(c7),
'市盈率':float(c8),
'扣除后市盈率':float(c9),
'市净率':float(c10),
'每股收益':float(c11),
'所属行业':c12,
"所属产业链接":c15,
'平均市盈率':float(c13),
'扣除后平均市盈率':float(c14)
}
stock_data.insert_one(data)
except Exception as e:
print(e)
urlList = ['http://quote.cfi.cn/stockList.aspx?=t' + str(n) for n in range(2,18)]
n = 1
for url in urlList:
names = getcodeUrl(url)[0]
hrefs = getcodeUrl(url)[1]
for name,href in zip(names,hrefs):
getcodeInfo(name,href)
print("插入成功第%d条数据"%n,name)
n += 1