多的不说,直接上代码
import requests, json, time, re
from fontTools.ttLib import TTFont
from io import BytesIO
test_url = 'https://book.qidian.com/info/1016035304'
resp = requests.get(test_url)
font_class = re.findall("https://qidian.gtimg.com/qd_anti_spider/(.{1,10})\.eot\?'\) format\('eot'\)", resp.text)[0]
font_url = 'https://qidian.gtimg.com/qd_anti_spider/%s.ttf' % font_class
def get_encode(cmap, values):
WORD_MAP = {'zero': '0', 'one': '1', 'two': '2', 'three': '3', 'four': '4', 'five': '5', 'six': '6', 'seven': '7',
'eight': '8', 'nine': '9', 'period': '.'}
word_count = ''
for value in values.split(';'):
if value:
value = value[2:]
key = cmap[int(value)]
word_count += WORD_MAP[key]
return word_count
def get_font_cmap(font_url):
resp = requests.get(font_url)
font = TTFont(BytesIO(resp.content))
cmap = font.getBestCmap()
font.close()
return cmap
cmap = get_font_cmap(font_url)
pattern = '(.*?)' % font_class
numberlist = re.findall(pattern, resp.text)
numberlist.pop(1)
key_tuple = ['总字数','总推荐','周推荐']
value_tupe = list()
for each_num in numberlist:
value_tupe.append(get_encode(cmap, each_num))
print(dict(zip(key_tuple, value_tupe)))