Element转html
from html.parser import HTMLParser
from lxml import etree, html
import requests
response = requests.get('https://www.baidu.com')
html_element = etree(response)
html_text = html.tostring(html_element [0],encoding='utf-8').decode('utf-8')
print(tree3)
html清洗标签
from html.parser import HTMLParser
from lxml import etree, html
import requests
from lxml.html.clean import Cleaner
response = requests.get('https://www.baidu.com')
html_element = etree(response)
1. etree清洗标签
content = etree.tostring(html_element, pretty_print=True).decode('utf-8')
desc = html.unescape(content)
print(desc)
2. html清洗标签
cleaner = Cleaner()
cleaner.javascript = True
cleaner.style = True
result = cleaner.clean_html(html_element)
清洗标签的办法还有很多种(如:import html模块,re,xpath等等),挑一个最喜欢的就行