获取节假日信息(来源国务院)
import datetime, re, os, csv
from bs4 import BeautifulSoup
from urllib import parse, requestclass Holiday:def __init__(self, year=None):self.url = "http://sousuo.gov.cn/s.htm?" self.year = year if year else datetime.datetime.now().yearself.oneDay = datetime.timedelta(days=1)self.kwargs = {"t": "zhengce","q": "{}节假日".format(year if year else self.year)}self.firstDay = datetime.datetime.strptime(str(self.year), "%Y") self.finalDay = datetime.datetime.strptime(str(self.year + 1), "%Y") - self.oneDay self.totalDay = (self.finalDay - self.firstDay + self.oneDay).days @staticmethoddef get_url(url, kwargs=None, timeout=5):"""爬取网页"""url = url + parse.urlencode(kwargs) if kwargs else urlreturn BeautifulSoup(request.urlopen(url=url, timeout=timeout), "lxml")def holidays(self, data: list):"""假期数据处理"""result = {}for row in data:if len(row) == 0 or row[0] not in ["一", "二", "三", "四", "五", "六", "七"]:continueholiday_str, total, workday_str = re.split(r"共(.)天", row)title = re.compile(r"、(.+):").findall(holiday_str)[0]if title == "元旦":result[str(self.year) + "-01-01"] = titleresult[str(self.year) + "-01-02"] = titleresult[str(self.year) + "-01-03"] = titlecontinueholiday_dates = re.findall(r"\d+", holiday_str)workday_dates = re.findall(r"\d+", workday_str)start_date = datetime.datetime.strptime(str(self.year) + holiday_dates[0] + holiday_dates[1], "%Y%m%d")for i in range(int(total)): result[(start_date + self.oneDay * i).strftime("%Y-%m-%d")] = titleif len(workday_dates) == 2: result["-".join([str(self.year), workday_dates[0].zfill(2), workday_dates[1].zfill(2)])] = "补班"if len(workday_dates) == 4: result["-".join([str(self.year), workday_dates[0].zfill(2), workday_dates[1].zfill(2)])] = "补班"result["-".join([str(self.year), workday_dates[2].zfill(2), workday_dates[3].zfill(2)])] = "补班"return resultdef all_days(self, holidays: dict):"""整合所有日期"""result = []date = self.firstDayfor _ in range(self.totalDay):res = holidays.get(date.strftime("%Y-%m-%d"))date_str = date.strftime("%Y-%m-%d")if res:if res == "补班":row = {"Date": date_str, "Holiday": res, "Tag": "补班"}else:row = {"Date": date_str, "Holiday": res, "Tag": "假期休息"}else:if date.weekday() <= 4:row = {"Date": date_str, "Holiday": "", "Tag": "工作日"}else:row = {"Date": date_str, "Holiday": "", "Tag": "周末休息"}result.append(row)date += self.oneDayreturn resultdef get_data(self):"""获取数据"""resp = self.get_url(url=self.url, kwargs=self.kwargs)try:href = resp.select("a[href][onclick][target]")[0]["href"]except:if "抱歉" in resp.select("div[class='jg_box'] p")[0].get_text():print("抱歉, 没有找到相关结果")exit()text = self.get_url(url=href).select("td[class='b12c']")[0].get_text().split("\n")return self.all_days(self.holidays(text))def export(self, filepath=None, filename=None):"""导出文件:param filepath: 导出路径, 默认: 当前文件目录:param filename: 文件名称, 默认: 2021Holiday"""data = self.get_data()filename = filename + ".csv" if filename else f"{self.year}Holiday.csv"path = os.path.join(filepath, filename) if filepath else filenamewith open(path, "w", newline="", encoding="utf-8-sig") as f:f_csv = csv.DictWriter(f, ["Date", "Holiday", "Tag"])f_csv.writeheader()f_csv.writerows(data)@staticmethoddef get_workday(filepath, date=None, which="last"):"""获取指定工作日:param filepath::param date: 指定日期, 格式: 2021-10-01:param which: 指定类型, "last" | "next":return: 日期对象"""with open(filepath, "r") as f:data = {k: v for k, _, v in list(csv.reader(f))[1:]}day = datetime.datetime.strptime(date, "%Y-%m-%d") if date else datetime.datetime.now()if which == "last":oneDay = datetime.timedelta(days=-1)else:oneDay = datetime.timedelta(days=1)for i in range(10):day += oneDayif data.get(day.strftime("%Y-%m-%d")) in ["补班", "工作日"]:breakreturn dayif __name__ == '__main__':Holiday(year=2021).export(filepath=None, filename=None)
本文来自互联网用户投稿,文章观点仅代表作者本人,不代表本站立场,不承担相关法律责任。如若转载,请注明出处。 如若内容造成侵权/违法违规/事实不符,请点击【内容举报】进行投诉反馈!