第5课-中国天气网爬虫案例

一、中国天气网爬虫案例

#中国天气网爬虫

import  requests
from pyecharts.charts import Bar
from bs4 import BeautifulSoup
import copy

import html5lib
datas = []
data = {
    "city":None,
    "day":None,
    "higher_temp":None,
    "lower_temp":None
}

HEADERS = {
    "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/78.0.3904.108 Safari/537.36",
    "Referer": "http://www.weather.com.cn/textFC/db.shtml"
}
def weather_spider_dome(url):
    html = requests.get(url=url,headers=HEADERS).content.decode("utf-8")
    soup = BeautifulSoup(html,"html5lib")
    conMidtabs = soup.find_all(attrs={"class":"conMidtab"})
    if url.find("gat")!= "-1":
        for conMidtab in conMidtabs:
            tables = conMidtab.find(attrs={"class":"conMidtab2"}).find_all("table")
            for table in tables:
                trs = table.find_all("tr")
                for i,tr in enumerate(trs):
                    global cur_day
                    tds = trs[i].find_all("td")
                    if i == 0:
                        start = tds[2].string.find("(")
                        end = tds[2].string.find(")")
                        cur_day = tds[2].string[start+1:end]
                        data["day"] = cur_day
                    elif i==2:
                        data["city"] = list(tds[1].stripped_strings)[0]
                        higher_temp = tds[4].string
                        lower_temp = tds[7].string
                        data["higher_temp"] = higher_temp
                        data["lower_temp"] = lower_temp
                        datas.append(copy.copy(data))
                    elif i>=3:
                        data["city"] = list(tds[0].stripped_strings)[0]
                        higher_temp = tds[3].string
                        lower_temp = tds[6].string
                        data["higher_temp"] = higher_temp
                        data["lower_temp"] = lower_temp
                        datas.append(copy.copy(data))
    else:
        for conMidtab in conMidtabs:
            conMidtab2s = conMidtab.find_all(attrs={"class":"conMidtab2"})
            for conMidtab2 in conMidtab2s:
                trs = conMidtab2.find_all("tr")
                for i,tr in enumerate(trs):
                    tds = trs[i].find_all("td")
                    if i == 0:
                        start = tds[2].string.find("(")
                        end = tds[2].string.find(")")
                        cur_day = tds[2].string[start+1:end]
                        data["day"] = cur_day
                    elif i>1:
                        higher_temp = ""
                        lower_temp = ""
                        if i==2:
                            higher_temp = copy.copy(tds[4].string)
                            lower_temp = copy.copy(tds[7].string)
                            data["higher_temp"] = higher_temp
                            data["lower_temp"] = lower_temp
                        elif i>2:
                            higher_temp = tds[3].string
                            lower_temp = tds[6].string
                        data["higher_temp"] = higher_temp
                        data["lower_temp"] = lower_temp
                        data["city"] = list(tds[0].stripped_strings)
                        print(data)
                        datas.append(copy.copy(data))




if __name__=="__main__":
   urls = ["http://www.weather.com.cn/textFC/hb.shtml",
           "http://www.weather.com.cn/textFC/db.shtml",
           "http://www.weather.com.cn/textFC/hd.shtml",
           "http://www.weather.com.cn/textFC/hz.shtml",
           "http://www.weather.com.cn/textFC/hn.shtml",
           "http://www.weather.com.cn/textFC/xb.shtml",
           "http://www.weather.com.cn/textFC/xn.shtml",
           "http://www.weather.com.cn/textFC/gat.shtml"]

   for url in urls:
       weather_spider_dome(url)
   for i in datas:
       print(i)
   # cities = []
   # temp = []
   # for i in datas:
   #     if i["day"] == "12月11日":
   #         cities.append(i["city"])
   #         cities.append(i["city"])
   #         temp.append(i["higher_temp"])
   #         temp.append(i["lower_temp"])
   # print(cities)
   # print(temp)
   # bar = Bar()
   #
   #
   # bar.add_xaxis(cities)
   # bar.add_yaxis("12月11日", temp)
   # bar.render("weather.html")

  

你可能感兴趣的:(第5课-中国天气网爬虫案例)