哈哈哈

# -*- coding: utf-8 -*-
import requests
from lxml import etree
import pandas as pd
import tushare as ts
from redis import Redis
import hashlib
import pymysql

def stock():
    # 初始化tushare.pro接口
    pro = ts.pro_api('ac16b470869c5d82db5033ae9288f77b282d2b5519507d6d2c72fdd7')

    # 创建MySQL链接对象
    conn1 = pymysql.connect(user='root', password='123456', database='stock', charset='utf8')
    cursor = conn1.cursor()

    # 创建redis链接对象
    conn2 = Redis(host='127.0.0.1', port=6379)

    # 定义成功、失败条数
    success = 0
    fail = 0
    exist = 0

    # 定义空list
    code_list = []

    # 获取数据
    stock_basic = pro.stock_basic(list_status='L', fields='ts_code,symbol')
    for index,row in stock_basic.iterrows():
        ts_code = row['ts_code']
        symbol = row['symbol']
        xl_code = ts_code[-2:] + ts_code[:6]

        code_list.append(xl_code)
        return code_list

        # 将解析到的数据值生成一个唯一的标识进行redis存储
        source = ts_code
        source_id = hashlib.sha256(source.encode()).hexdigest()
        # 将解析内容的唯一表示存储到redis的data_id中
        ex = conn2.sadd('data_id', source_id)

        # 获取没有爬取的内容
        if ex == 1:
            try:
                sql1 = '''insert into stk_code_list(ts_code,symbol,xl_code) values('%s','%s','%s')'''%(ts_code,symbol,xl_code)
                cursor.execute(sql1)
                conn1.commit()
                success +=1
            except:
                conn1.rollback()
                fail +=1

        # 获取已经爬取的内容
        else:
            exist +=1

    # 关闭MySQL连接
    cursor.close()
    conn1.close()

    print('程序抓取成功数据:%d条'%success)
    print('程序抓取失败数据:%d条'%fail)
    print('MySQL原来有数据:%d条'%exist)

def companyInfo(code_list):
    basic_url = 'http://vip.stock.finance.sina.com.cn/corp/go.php/vCI_CorpInfo/stockid/'
    headers = {
        'User-Agent': 'Mozilla/5.0 (Windows NT 6.3; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/65.0.3314.0 Safari/537.36 SE 2.X MetaSr 1.0'
    }
    for code in code_list:
        print(str(code[2:]))
        url = basic_url + str(code[2:]) + '.phtml'
        html = requests.get(url=url, headers=headers).text

        tree = etree.HTML(html)
        tr_list = tree.xpath('//*[@id="comInfo1"]/tbody/tr')
        print(tr_list)
        for tr in tr_list:
            print(tr)
            list_date = tr.xpath('./tr[3]/td[4]/a')
            print(list_date)
def main():
    code_list = stock()
    companyInfo(code_list)

if __name__=='__main__':
    main()

你可能感兴趣的:(哈哈哈)