猫眼电影评分字体截图识别

from selenium import webdriver

import pytesseract
from PIL import Image
from io import BytesIO

browser = webdriver.Chrome('./chromedriver')

browser.get("https://maoyan.com/films/1206875?_v_=yes")

browser.execute_script('document.querySelector(".banner").style.background = "white"')
browser.execute_script('document.querySelector(".stonefont").style.color = "black"')

score_element = browser.find_element_by_class_name("stonefont")



scale = 1.0

x1 = score_element.location["x"] * scale
y1 = score_element.location['y'] * scale
x2 = x1 + score_element.size["width"] * scale
y2 = y1 + score_element.size["height"] * scale

# 全屏截图
full_img_data = browser.get_screenshot_as_png()

full_img = Image.open(BytesIO(full_img_data))
# 猫眼电影评分截图
cut_img = full_img.crop((x1,y1,x2,y2))

full_img.save("19-full_img.png")
cut_img.save('19-cut_img.png')

# 利用打码平台识别评分,不推荐,浪费钱
# code = decode('19-cut_img.png')

# --psm 7 表示单行识别
print(pytesseract.image_to_string(cut_img,config='--psm 7'))

browser.quit()

 

你可能感兴趣的:(python爬虫)