python 爬取东方财富股吧论坛个股评论信息

import os
import re
import sys
import urllib
import urllib.request#在python3.x中没有urllib2,必须使用此语句
from urllib import request  
from bs4 import BeautifulSoup
import requests            #Beautiful Soup是一个可以从HTML或XML文件中提取结构化数据的Python库 
    
        
user_agent = 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/77.0.3865.120 Safari/537.36'
referer = 'https://tushare.pro/login?next=%2Fnews%2Fnews_sina'
headers = {
         'User-Agent' : user_agent,
         'Host' : 'tushare.pro',
         'Origin' : 'https://tushare.pro',
         'Referer' : referer
 }
         
        
#downpicture = urllib.request.urlopen('http://guba.eastmoney.com/list,603121.html').read()

stockPageRequest = urllib.request.urlopen('http://guba.eastmoney.com/list,603121.html')
htmlTitleContent = str(stockPageRequest.read(), 'utf-8')
#正则匹配标题
titlePattern = re.compile('title="(.*?)"', re.S)
p_title = 'title="(.*?)"(.*?)'
title = re.findall(p_title,htmlTitleContent)
#循环输出对应的标题
for i in range(len(title)):
        print(title[i])
        

你可能感兴趣的:(python)