Python学习之内建模块(5):HTMLParser

下面的代码用于分析python官网的html源码,找到我们需要的python会议的信息,时间,地点,名称

#!/usr/bin/env python3
# -*- coding: utf-8 -*-
from html.parser import HTMLParser
from html.entities import name2codepoint

class MyHTMLParser(HTMLParser):


    def __init__(self):
        super().__init__()
        self._mark = 0
    def handle_starttag(self, tag, attrs):
        if(attrs!=[] and 'event-title' in attrs[0]):self._mark,no = 1,print('{\ntitle:',end = '')
        elif(tag == 'time'):self._mark,no = 2,print('time: %s'%attrs[0][1].strip(),end = '')
        elif(attrs!=[] and 'event-location' in attrs[0]):self._mark,no = 3,print('location:',end = '')
    def handle_data(self, data):
        if(self._mark!=0):
            print(data.strip(),'\n}'if (self._mark==3) else '')
            self._mark %= 3
    def handle_endtag(self, tag):
        pass
    def handle_startendtag(self, tag, attrs):
        pass
    def handle_comment(self, data):
        pass
    def handle_entityref(self, name):
        pass
    def handle_charref(self, name):
        pass

with open(r'C:\Users\admin\Desktop\test.txt','r',encoding = 'utf-8') as f:
    s = f.read()[1:]
parser = MyHTMLParser()
parser.feed(s)

你可能感兴趣的:(Python学习之内建模块(5):HTMLParser)