python爬虫练习-爬取洛谷题目

采用re正则表达式和bs4
并将结果写入problem.txt

import requests
import re
from bs4 import BeautifulSoup

url = "https://www.luogu.com.cn/problem/list"

header = {
    "user-agent": "Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/70.0.3538.25 Safari/537.36 Core/1.70.3877.400 QQBrowser/10.8.4533.400"
}

resp = requests.get(url,headers=header)
# print(resp.text)

page_content = BeautifulSoup(resp.text, "html.parser")
page_main_content = page_content.find("div",attrs={"class": "lg-container"})
# print(page_main_content)

# problem_list = obj.finditer(page_content)
#
# for i in problem_list:
#     print(i.group("name"))
problem_list = page_main_content.find_all("li")
# print(problem_list)
obj = re.compile(r'
  • (?P.*?)(?P.*?)
  • '
    ) # test_str = '
  • P1035 [NOIP2002 普及组] 级数求和
  • '
    # iterator = obj.finditer(test_str) # for i in iterator: # print(i.group("name")) with open("problem.txt", mode="w", encoding="utf-8") as f: for i in problem_list: # print(str(i)) ite = obj.finditer(str(i)) for j in ite: print(j.group("name")) f.write(j.group("name")+'\n')

    你可能感兴趣的:(python,python爬虫,python,爬虫)