【Python】自动获取法定假日


数据源


官网, 一年自动获取一次即可
体验: 本年节假日接口体验

national_holidays.csv:

,name,startDate,endDate,year
0,元旦,01-01,01-01,2021
1,春节,02-11,02-17,2021
2,清明节,04-03,04-05,2021
3,劳动节,05-01,05-05,2021
4,端午节,06-12,06-14,2021
5,中秋节,09-19,09-21,2021
6,国庆节,10-01,10-07,2021

statutory_holiday_process.py

import datetime
import re
import pandas as pd
import requests

URL_PATTERN = f'
STATUTORY_HOLIDAY_TEXT_PATTERN = '

.*?、(.*?):(.*?)

'
Y_M_D_BETWEEN_PATTERN = r"(\d{4})年(\d{1,2})月(\d{1,2})日至(\d{4})年(\d{1,2})月(\d{1,2})日" Y_M_D_PATTERN = r"(\d{4})年(\d{1,2})月(\d{1,2})日" M_D_BETWEEN_PATTERN = r"(\d{1,2})月(\d{1,2})日至(\d{1,2})月(\d{1,2})日" M_D_BETWEEN_PATTERN2 = r"(\d{1,2})月(\d{1,2})日至(\d{1,2})日" M_D_PATTERN = r"(\d{1,2})月(\d{1,2})日" STATUTORY_HOLIDAY_PATH = "ETL/statutory_holiday_server/statutory_holiday/" GOV_OPEN_INFO_URL = r"http://sousuo.gov.cn/s.htm?t=zhengce&q=%E8%8A%82%E5%81%87%E6%97%A5%E5%AE%89%E6%8E%92%E9%80%" \ r"9A%E7%9F%A5&timetype=&mintime=&maxtime=&sort=&sortType=&searchfield=&pcodeJiguan=&childtype=" \ r"&subchildtype=&tsbq=&pubtimeyear=&puborg=&pcodeYear=&pcodeNum=&filetype=&p=&n=&inpro=&sug_t=" CHINA_STATUTORY_HOLIDAY = ["元旦", "春节", "清明节", "劳动节", "端午节", "中秋节", "国庆节"] def get_statutory_holiday_info_url(): YEAR = str(datetime.datetime.now().year) response = requests.get(url=GOV_OPEN_INFO_URL) text = response.text result = re.findall(URL_PATTERN % YEAR, text, re.M) if result: return result[0] raise ValueError("Cannot get statutory holiday info url") def get_date_info(text): "2018年123月30日至2019年1月1日放假调休" "2月4日至10日" "10月1日至7日放假调休" "4月4日放假,与周末连休" "1月27日至2月2日" date_info = {} YEAR = str(datetime.datetime.now().year) result = re.search(Y_M_D_BETWEEN_PATTERN, text) if result: start_year = result[1] start_month = result[2] if len(result[2]) == 2 else f"0{result[2]}" start_day = result[3] if len(result[3]) == 2 else f"0{result[3]}" date_info["start_date"] = "-".join([start_year, start_month, start_day]) end_year = result[4] end_month = result[5] if len(result[5]) == 2 else f"0{result[5]}" end_day = result[6] if len(result[6]) == 2 else f"0{result[6]}" date_info["end_date"] = "-".join([end_year, end_month, end_day]) return date_info result = re.search(M_D_BETWEEN_PATTERN, text) if result: start_year = YEAR start_month = result[1] if len(result[1]) == 2 else f"0{result[1]}" start_day = result[2] if len(result[2]) == 2 else f"0{result[2]}" date_info["start_date"] = "-".join([start_year, start_month, start_day]) end_year = YEAR end_month = result[3] if len(result[3]) == 2 else f"0{result[3]}" end_day = result[4] if len(result[4]) == 2 else f"0{result[4]}" date_info["end_date"] = "-".join([end_year, end_month, end_day]) return date_info result = re.search(M_D_BETWEEN_PATTERN2, text) if result: start_year = YEAR start_month = result[1] if len(result[1]) == 2 else f"0{result[1]}" start_day = result[2] if len(result[2]) == 2 else f"0{result[2]}" date_info["start_date"] = "-".join([start_year, start_month, start_day]) end_year = YEAR end_month = start_month end_day = result[3] if len(result[3]) == 2 else f"0{result[3]}" date_info["end_date"] = "-".join([end_year, end_month, end_day]) return date_info result = re.search(Y_M_D_PATTERN, text) if result: start_year = result[1] start_month = result[2] if len(result[2]) == 2 else f"0{result[2]}" start_day = result[3] if len(result[3]) == 2 else f"0{result[3]}" date_info["end_date"] = date_info["start_date"] = "-".join([start_year, start_month, start_day]) return date_info result = re.search(M_D_PATTERN, text) if result: start_year = YEAR start_month = result[1] if len(result[1]) == 2 else f"0{result[1]}" start_day = result[2] if len(result[2]) == 2 else f"0{result[2]}" date_info["end_date"] = date_info["start_date"] = "-".join([start_year, start_month, start_day]) return date_info raise ValueError("statutory holiday info Error") def get_statutory_holiday_info(): statutory_holiday_info = {} response = requests.get(url=get_statutory_holiday_info_url()) text = response.content.decode("utf8") result = re.findall(STATUTORY_HOLIDAY_TEXT_PATTERN, text, re.M) for info in result: holiday_name_info = info[0] holiday_name = [holiday for holiday in CHINA_STATUTORY_HOLIDAY if holiday == holiday_name_info] if holiday_name: statutory_holiday_info[holiday_name[0]] = get_date_info(info[1].split("。")[0]) else: holiday_name = [holiday_name_info for holiday in CHINA_STATUTORY_HOLIDAY if holiday in holiday_name_info] statutory_holiday_info[holiday_name[0]] = get_date_info(info[1].split("。")[0]) return statutory_holiday_info def statutory_holiday_info_to_csv(): YEAR = str(datetime.datetime.now().year) statutory_holiday_info = get_statutory_holiday_info() statutory_holiday = [] for name, info in statutory_holiday_info.items(): statutory_holiday.append({"name": name, "startDate": info["start_date"][-5:], "endDate": info["end_date"][-5:], "year": YEAR}) df = pd.DataFrame(statutory_holiday) df.to_csv(STATUTORY_HOLIDAY_PATH + "national_holidays.csv", encoding="utf8")

get_statutory_holiday() 获取信息

STATUTORY_HOLIDAY_PATH = "./"
def get_statutory_holiday():
    year = datetime.datetime.now().year
    df = pd.read_csv(STATUTORY_HOLIDAY_PATH + "national_holidays.csv", encoding="utf8")

    df = df.loc[df["year"] == year]
    if df.empty:
        statutory_holiday_info_to_csv()

    df = pd.read_csv(STATUTORY_HOLIDAY_PATH + "national_holidays.csv", encoding="utf8")
    data = pd.DataFrame(df, columns=["name", "startDate", "endDate"]).to_dict(orient="records")
    response = json.dumps(data, indent=4, ensure_ascii=False)
    return response

你可能感兴趣的:(数据获取,Tool工具,爬虫,python,开发语言,后端)