突破canvas指纹识别(semiee)

canvas指纹是前端根据浏览器信息和客户端设备各种特征使用特定算法生成可以精准标识浏览器唯一性的特征码.
反爬解决思路: 批量canvas指纹+批量账号捆绑发送请求
1、账号申请
2、canvas指纹获取: https://www.cnblogs.com/xiezhengcai/p/4252008.html

# config.py
semiee_users = [  
    {'u': '17138523956', 'p': 'bmMKzV', 'device_id': '244ae093'},
    {'u': '17189005506', 'p': 'sc37otuUP6Lk', 'device_id': '244ae093'},
    {'u': '17159821573', 'p': 'CJ2w03&z', 'device_id': '7e41cc0f'},
    {'u': '17038344632', 'p': 'f#!bXx', 'device_id': '7e41cc0f'},
]

3、登录获取验证保存Token

# getToken.py
import base64
import json
import time
import random
import redis
import requests
import os
import sys
import execjs, string
BASE_DIR = os.path.dirname(os.path.dirname(os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))))
sys.path.insert(0, BASE_DIR)
from Material.settings import REDIS_HOST, REDIS_PARAMS, REDIS_PORT
from Material.tools.utils import get_proxys


proxies = get_proxys()
session = requests.session()
login_url = "https://www.semiee.com/bdxx-api/index/login"
headers = {
  'Accept-Language': 'zh-CN,zh;q=0.8',
  'User-Agent': 'okhttp-okgo/jeasonlzy',
  'deviceId': '',
  'source': '0',
  'os': '0',
  'Content-Type': 'application/json;charset=utf-8',
  'Host': 'www.semiee.com'
}


def login(username, password, deviceId):
    payload = {
        'username': username,
        'password': password,
        'type': '1',
        'deviceId': deviceId,
    }
    response = session.request("POST", login_url, headers=headers, data=json.dumps(payload), proxies=proxies)
    result = json.loads(response.text).get('result')
    remark = json.loads(response.text).get('remark')
    token = result.get('token') if result else ''
    print('loginsucc: ', username, time.strftime('%Y-%m-%d %H:%M:%S', time.localtime(time.time()))) if token else print('loginerror: {}_{}'.format(username, remark))
    return username, token


def check_token(token):
    check_url = 'https://www.semiee.com/bdxx-api/chipChoice/detail/2b279fb6-cdb0-4625-ae17-59c183d79cc4'
    check_headers = {k:v for k, v in headers.items()}
    check_headers['token'] = token
    response = session.request("GET", check_url, headers=check_headers, proxies=proxies)
    result = json.loads(response.text).get('result')
    # remark = json.loads(response.text).get('remark')
    if result:
        return True


def save_cookie(datas, rds_name):
    rds = redis.StrictRedis(host='10.8.104.247', password=REDIS_PARAMS['password'], port=6379, db='10', health_check_interval=30)
    for data in datas:
        try:
            t = time.localtime(time.time())
            username = data.get('username')
            deviceId = data.get('deviceId')
            token = data.get('token') + '|||' + deviceId + '|||' + time.strftime('%Y-%m-%d %H:%M:%S', t)
            rds.hset(rds_name, username, token)
            # rds.expire('eeworlditem:loginCookies', 36*60*60)
            print('{} saveToken succ!'.format(datas.index(data)))
        except:
            pass


def run(users, rds_name):
    cookies = []
    for user in users:
        username = user.get('u')
        password = user.get('p')
        deviceId = user.get('device_id', '')
        try:
            username, token = login(username, password, deviceId)
            if not token:
                continue
            if check_token(token):
                cookies.append({'username': username, 'token': token, 'deviceId': deviceId})
        except Exception as e:
            time.sleep(5)
    try:
        save_cookie(cookies, rds_name)
    except Exception as e:
        print('save_cookie error: ', e)


if __name__ == '__main__':
    from . import config
    rds_name = 'semiee:token'
    run(config.semiee_users, rds_name)

4、携带Token请求

# parseUrl.py
import requests
import json, random

urls = [
    "https://www.semiee.com/bdxx-api/chipChoice/detail/2b279fb6-cdb0-4625-ae17-59c183d79cc4",
    "https://www.semiee.com/bdxx-api/chipChoice/detail/d2d6eaaa-71a3-4300-aabd-f42d9762dbe6"
]
payload={}
rds = redis.StrictRedis(host='127.0.0.1', port=6379, health_check_interval=30)
rds_token = 'semiee:token'
keys = rds.hkeys(rds_token )
user = random.choice(keys).decode()
token_str = rds.hget(rds_token, user)
token = token_str.decode().split('|||')[0]
deviceId = token_str.decode().split('|||')[1]
headers = {
  'deviceId': deviceId,
  'token': token,
  'User-Agent': 'okhttp-okgo/jeasonlzy'
}

def run():
	for url in urls:
	    response = requests.request("GET"、 url、 headers=headers、 data=payload)
	    result = json.loads(response.text).get('result')
	    remark = json.loads(response.text).get('remark')
	    model = result.get('model') if result else ''
	    print(model) if model else print(remark)


if __name__ == '__main__':
	run()

你可能感兴趣的:(python,爬虫)