beautiful_获取中国天气网数据

import re
import os
import time
import pandas as pd
import requests
from bs4 import BeautifulSoup
import random
# 完整代码
url = 'http://www.weather.com.cn/weather/101010100.shtml'
header = {'User-Agent':'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/77.0.3865.42 Safari/537.36'}
# proxy = [{'http': 'http://58.212.42.116:36708'}, {'http':'http://117.57.91.53:9999'}, {'http':'123.169.35.184:9999'}]
response = requests.get(url,headers = header) #proxies = random.choice(proxy)
response.encoding = 'utf-8'
soup = BeautifulSoup(response.text,'lxml')  # 对html进行解析,完成初始化
# print(soup.prettify()) # 字符串按照标准缩进格式输出,自动进行格式更正

ul_tag = soup.find(name='ul', class_ = 't clearfix')
li_tag = ul_tag.find_all(name = 'li')

datalist = []
for tag in li_tag:
    try:
        # 日期
        date = tag.find(name='h1').text

        # 天气
        weather = tag.find(name='p' , class_='wea').text

        #最高温,最低温
#         maxc = tag.find('p', 'tem').span.text
        minc = tag.find(name ='p', class_= 'tem').i.text

        # 风向1
        win = tag.find(name ='p', class_='win').span['title']

        # 风力
        seed = tag.find(name ='p', class_='win').i.text
        datalist.append([date,weather,minc,win,seed])
    except Exception as e:
        print(e)
        break
        
df = pd.DataFrame(datalist,columns=['日期', '天气', '最低温', '风向', '风力'])
df.to_excel('./中国天气.xls',index = False)

 

你可能感兴趣的:(爬虫,python,url,json)