爬虫字体加密之-起点中文网

多的不说,直接上代码

import requests, json, time, re
from fontTools.ttLib import TTFont
from io import BytesIO


test_url = 'https://book.qidian.com/info/1016035304'

resp = requests.get(test_url)
font_class = re.findall("https://qidian.gtimg.com/qd_anti_spider/(.{1,10})\.eot\?'\) format\('eot'\)", resp.text)[0]
font_url = 'https://qidian.gtimg.com/qd_anti_spider/%s.ttf' % font_class


def get_encode(cmap, values):
    WORD_MAP = {'zero': '0', 'one': '1', 'two': '2', 'three': '3', 'four': '4', 'five': '5', 'six': '6', 'seven': '7',
                'eight': '8', 'nine': '9', 'period': '.'}
    word_count = ''
    for value in values.split(';'):
        if value:
            value = value[2:]
            key = cmap[int(value)]
            word_count += WORD_MAP[key]
    return word_count


def get_font_cmap(font_url):
    resp = requests.get(font_url)
    font = TTFont(BytesIO(resp.content))
    cmap = font.getBestCmap()
    font.close()
    return cmap


cmap = get_font_cmap(font_url)

pattern = '(.*?)' % font_class
numberlist = re.findall(pattern, resp.text)
numberlist.pop(1)
key_tuple = ['总字数','总推荐','周推荐']
value_tupe = list()
for each_num in numberlist:
    value_tupe.append(get_encode(cmap, each_num))

print(dict(zip(key_tuple, value_tupe)))

 

你可能感兴趣的:(python,爬虫)