爬虫程序(获取新闻网站的URL)2018-11-03


import pymysql

import requests as re

from bs4 import BeautifulSoup

try:

    for i in range(1,389):#389

        url="http://by.cuc.edu.cn/zcyw/"+str(i)

        r=re.get(url)

        #print(r.text)

        soup = BeautifulSoup(r.text,'html.parser')

        title=soup.find_all('h3',attrs={'class','tit'})

        print(i)

        for t in title:

            newsurl=t.find_all('a')

            urllen=str(newsurl[0]).find('target')

            print(str(newsurl[0])[9:urllen-2])

            print(t.get_text())

except:

    print("error") 

你可能感兴趣的:(爬虫程序(获取新闻网站的URL)2018-11-03)