通过urllib简易爬取腾讯新闻内容方法封装

import urllib.request
import re


def get_new_qq(url, pat, file):
    data = urllib.request.urlopen(url).read().decode("gbk", "ignore").strip()
    res = re.compile(pat).findall(data)
    with open(file, "w") as f:
        f.write(str(res))
        return res


if __name__ == '__main__':
    url = "https://www.qq.com/?pgv_ref=1"
    pat = "
  • (.*)
  • "
    file = "../腾讯新闻.txt" print(get_new_qq(url, pat, file))

    你可能感兴趣的:(Python)