获取城市天气数据(python代码)

import datetime
import pandas
import requests
import json
import pymongo
import time
import numpy as np
import dateutil.parser


def request(year, month, codenum):
    url = "http://d1.weather.com.cn/calendar_new/" + year + "/{}_".format(codenum) + year + month + ".html"
    #print('url:', url)
    headers = {
        "User-Agent": "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/50.0.2661.102 Safari/537.36",
        "Referer": "http://www.weather.com.cn/weather40d/{}.shtml".format(codenum),
    }
    return requests.get(url, headers=headers)


def parse(res):
    json_str = res.content.decode(encoding='utf-8')[11:]
    return json.loads(json_str)


def date_now():
    today = datetime.datetime.now()
    return today


def date_40(today):
    need_time = today + datetime.timedelta(days=+40)
    re_date = need_time.strftime('%Y-%m-%d')
    return re_date


def save(list, local):
    subkey = {'date': '日期', 'max': '最高温度', 'min': '最低温度', 'hgl': '降水概率', 'fe': '节日', 'wk': '星期', 'time': '发布时间',
              'hmax': '历史平均最高温度', 'hmin': '历史平均最低温度','w1': '天气'}

    for dict in list:
        subdict = {value: dict[key] for key, value in subkey.items()}  # 提取原字典中部分键值对,并替换key为中文
        if subdict['最高温度'] == '' or subdict['最低温度'] == '':
            subdict['最高温度'] = subdict['历史平均最高温度']
            subdict['最低温度'] = subdict['历史平均最低温度']

        subdict["地区"] = local

        date1 = subdict["日期"]
        date2 = dateutil.parser.parse(date1)
        date3 = date2.strftime('%Y-%m-%d')

        today = date_now()
        today2 = today.strftime('%Y-%m-%d')
        re_date = date_40(today)
        subdict["日期"] = date3

        if (date3 >= today2) and (date3 <= re_date):
            forecast.update(subdict, {'$setOnInsert': subdict}, True)


df = pandas.read_excel(r"salecity.xlsx", engine='openpyxl', header=None)
max_row = df.shape[0]
print("表格地区数:", max_row)


def messcol_2(num_row, num_col):  # 获取第num_col列单元格内容
    cell = df.loc[[num_row], [num_col]]
    cells = np.array(cell)
    cell_message = cells.tolist()
    mess = cell_message[0][0]
    return mess


if __name__ == '__main__':
    year = "2021"
    month = 4
    client = pymongo.MongoClient('localhost', 27017)  # 连接mongodb,端口27017
    test = client['test']  # 创建数据库文件test
    forecast = test['forecast416']  # 创建表forecast

    for i in range(1, max_row):
        mess_url = messcol_2(i, 1)  # 获取第2列单元格内容--代号
        url = str(mess_url)
        mess_local = messcol_2(i, 0)  # 获取第1列单元格内容--地区
        local = str(mess_local)
        #print("地区", local, "代号:", url)

        j = month
        for j in range(month, 6):
            n = j
            months = str(n) if n > 9 else "0" + str(n)  # 小于10的月份要补0
            par = parse(request(year, months, url))
            save(par, local)

    data = pandas.DataFrame(list(forecast.find()))
    data.to_excel('data.xlsx', encoding='utf-8', index=False)

    # time.sleep(1)

 

你可能感兴趣的:(天气爬虫,python)