import re
from requests_html import HTMLSession
from selenium import webdriver
from bisect import bisect
def parseAndReplaceSvgNode(d_tags):
for d_tag in d_tags:
position = d_tag.value_of_css_property("background-position")
x, y = map(int, re.findall("\d+", position))
num = data[bisect(ys, y)][bisect(xs, x)]
# 替换节点为普通文本
browser.execute_script(f"""
var element = arguments[0];
element.parentNode.replaceChild(document.createTextNode("{num}"), element);
""", d_tag)
browser = webdriver.Chrome()
url = 'http://www.porters.vip/confus...'
browser.get(url)
d_tag = Skrill下载browser.find_element_by_css_selector('d[class^="vhk"]')
background_image_url = d_tag.value_of_css_property("background-image")
svg_url = background_image_url[5:-2]
session = HTMLSession()
html_session = session.get(svg_url)
xs = []
ys = []
data = []
for text_tag in html_session.html.xpath(r"//text"):
if not xs:
xs.extend(map(int, text_tag.xpath(".//@x")[0].split()))
ys.append(int(text_tag.xpath(".//@y")[0]))
data.append(list(text_tag.xpath(".//text()")[0]))
一次性替换掉整个DOM中所有的svg节点为对应的文本
parseAndReplaceSvgNode(
browser.find_elements_by_css_selector('d[class^="vhk"]'))
删除a标签
element = browser.find_element_by_css_selector('.title a')
browser.execute_script("""
var element = arguments[0];
element.parentNode.removeChild(element);
""", element)
获取标题
title = browser.find_element_by_class_name("title").text
获取评论
comment = browser.find_element_by_class_name("comments").text
人均
avgPrice = browser.find_element_by_class_name('avgPriceTitle').text
口味、环境、服务
comment_score_tags = browser.find_elements_by_css_selector(
".comment_score .item")
taste = comment_score_tags[0].text
environment = comment_score_tags[1].text
service = comment_score_tags[2].text
地址
address = browser.find_element_by_css_selector('.address .address_detail').text
特色
characteristic = browser.find_element_by_css_selector(
'.characteristic .info-name').text
电话
phone = browser.find_element_by_class_name("more").text
print(title, comment, avgPrice, taste, environment,
service, address, characteristic, phone)