import json
import requests
import execjs
import json
import re, time
from typing import Literal, Union
import subprocess
import numpy
import cv2
# 第一步获取https://passport.bilibili.com/x/passport-login/captcha中的token, challeng,和gt
# 第二步获取https://api.geetest.com/gettype.php中的信息
# 第三步获取https://api.geetest.com/get.php中间的图片信息
# 以上步骤要拿到点选坐标,验证图片链接,gt,challenge, c,s, token
#
class Bilibili:
def __init__(self):
self.headers = {
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/121.0.0.0 Safari/537.36 Edg/121.0.0.0",
"Origin": "https://www.bilibili.com",
"Referer": "https://www.bilibili.com/",
}
self.session = requests.Session()
def get_challenge(self):
url = "https://passport.bilibili.com/x/passport-login/captcha"
params = {
"source": "main-fe-header",
"t": str(int(time.time()*1000))
}
try:
req = self.session.get(url=url, params=params, headers=self.headers)
except requests.exceptions.RequestException as e:
print(e)
else:
data = json.loads(req.text)
return data.get("data")["token"], data["data"]["geetest"]["challenge"], data["data"]["geetest"]["gt"]
def get_pic_xy(self, pic):
url = "https://static.geetest.com"+pic
try:
req = self.session.get(url=url, headers=self.headers)
print(req.url)
except requests.exceptions.RequestException:
print("图片请求错误")
else:
print("请手动获取验证码点选坐标") # 这里是没有办法的事情,ddddocr不太给力,我的图片处理和识别技术也不太行,只能先手动获取了
with open("pic.jpg", "wb") as f:
f.write(req.content)
xx = []
yy = []
img = cv2.imread("pic.jpg")
def click_callback(event, x, y, flags, param):
if event == cv2.EVENT_LBUTTONDOWN:
xy = ("%d, %d") % (x, y)
xx.append(x)
yy.append(y)
cv2.circle(img, (x, y), 1, (0, 0, 255), thickness=-1)
cv2.putText(img, xy, (x, y), cv2.FONT_HERSHEY_PLAIN, 1, (0, 0, 0), thickness=1)
cv2.imshow("image", img)
cv2.namedWindow("image")
cv2.setMouseCallback("image", click_callback)
cv2.imshow("image", img)
cv2.waitKey(0)
return xx, yy
def get_all_info(self):
token, challenge, gt = self.get_challenge()
params = {
"gt":gt,
"callback": "geetest_"+str(int(time.time()*1000))
}
self.session.get("https://api.geetest.com/gettype.php", params=params,headers=self.headers)
params = {
"gt": gt,
"challenge": challenge,
"lang": "zh-cn",
"pt": "0",
"client_type":"web",
"w": "",
"callback": "geetest_"+ str(int(time.time()*1000))
}
url = "https://api.geetest.com/ajax.php"
try: # 这里时请求验证码类型
req = self.session.get(url=url, params=params, headers=self.headers)
except requests.exceptions.RequestException as e:
print(e)
url = "https://api.geetest.com/get.php"
params = {
"is_next": "true",
"type": "click",
"gt": gt,
"challenge":challenge,
"lang": "zh-cn",
"https:": "false",
"protocol": "https://",
"offline": "false",
"product": "embed",
"api_server": "aip.geetest.com",
"isPC":"true",
"autoReset": "true",
"width": "100%",
"callback": "geetest_" + str(int(time.time())*1000)
}
# 这里是获取请求验证码的图片信息
try:
req = self.session.get(url=url, params=params, headers=self.headers)
except requests.exceptions.RequestException as e:
print(e)
else:
pattern = re.compile(r"\((.*?)\)", re.S)
data = json.loads(pattern.findall(req.text)[0])["data"]
pic = data["pic"]
c = data["c"]
s = data["s"]
xx ,yy = self.get_pic_xy(pic)
return c,s, pic, xx, yy, challenge, token, gt
def do_verify(self):
cc, ss, pic, xx, yy, challenge, token, gt = self.get_all_info()
string_a = []
for i in range(len(xx)):
x, y = xx[i], yy[i]
final_x = int(round(int(x) / 333.375 * 100 * 100, 0))
final_y = int(round(int(y) / 333.375 * 100 * 100, 0))
final = f"{final_x}_{final_y}"
string_a.append(final)
string_a = ",".join(string_a)
code = ""
with open("biliget_w.txt", "r") as f:
code = f.read()
run = execjs.compile(code)
result = run.call("get_w",gt,challenge,cc, ss, string_a, pic)
params = {
"gt":gt,
"challenge":challenge,
"lang":"zh-cn",
"pt":0,
"client_type":"web",
"w":result
}
validate = ""
try: # 获取validate
url = "https://api.geetest.com/ajax.php"
req = self.session.get(url=url,params=params)
pattern = re.compile(r"\((.*)\)")
validate = json.loads(pattern.findall(req.text)[0])["data"]["validate"]
except requests.exceptions.RequestException as e:
print("请求错误",e)
return validate, token, challenge
def get_password(self, hash, public_key, password):
code = ""
with open("bilipw.txt", "r", encoding="utf-8") as file:
code = file.read()
run = execjs.compile(code) # subprocess中的encoding要改为utf-8
result = run.call("get_password",hash, public_key, password)
return result
def do_login(self):
get_key_url = "https://passport.bilibili.com/x/passport-login/web/key"
key_params = {
"_": str(time.time()*1000)
}
req = self.session.get(get_key_url,params=key_params, headers=self.headers)
data = json.loads(req.text)
hash = data.get("data")["hash"]
public_key = data.get("data")["key"]
login_url = "https://passport.bilibili.com/x/passport-login/web/login"
user_info = {
"username": "用户名",
"password": "密码"
}
validate, token, challenge = self.do_verify()
data = {
"source": "main-fe-header",
"username": user_info["username"],
"password": self.get_password(hash, public_key, user_info["password"]),
"validate": validate,
"token": token,
"seccode":validate + "|jordan",
"challenge": challenge
} # 这里要加上自己的cookie,cookie中的参数有登录环境的检测
self.headers["Cookie"] = "buvid3=F1AE06DB-6D8E-48CD-E01C-1307C8BECA2D62566infoc; b_nut=1693214862; i-wanna-go-back=-1; b_ut=7; _uuid=BCD510C23-92B7-B46B-63F8-1AF6CEF721DD62843infoc; buvid4=D454060E-19E2-C4BE-1FF5-8E4A728DE46A63496-023082817-Pk1O31qDhl50PsOstdGqIo2YiPsiX%2FyVIYMjQZyFFb8GnCewWe1Ujw%3D%3D; header_theme_version=CLOSE; rpdid=0zbfAHUodz|TOaOLDwz|xEq|3w1QAyy6; CURRENT_QUALITY=80; CURRENT_BLACKGAP=0; CURRENT_FNVAL=4048; buvid_fp_plain=undefined; hit-dyn-v2=1; LIVE_BUVID=AUTO9016963162137110; enable_web_push=ENABLE; is-2022-channel=1; home_feed_column=4; bp_video_offset_1215933159=894868444895248389; fingerprint=f8b9ad7d06542e211fb67f921ad6b5b4; buvid_fp=f8b9ad7d06542e211fb67f921ad6b5b4; PVID=1; bili_ticket=eyJhbGciOiJIUzI1NiIsImtpZCI6InMwMyIsInR5cCI6IkpXVCJ9.eyJleHAiOjE3MDc3MDQyMzUsImlhdCI6MTcwNzQ0NDk3NSwicGx0IjotMX0.KKuuBKEFLvwyHNJF0MY0i2HoUm2w-qQs1i2GHXOSZQk; bili_ticket_expires=1707704175; b_lsid=10FBEBB1E_18D92A2F532; sid=4viotcmf; browser_resolution=955-645; iflogin_when_web_push=0"
try:
req = self.session.post(url=login_url,data=data, headers=self.headers)
print(req.status_code) # 对比返回数据后的userId就知道有没有登录成功了
print(req.text)
except requests.exceptions.RequestException as e:
print("登录请求错误", e)
pass
if __name__ == "__main__":
bilibili = Bilibili()
bilibili.do_login()
以上是主体代码,具体的验证逻辑网上有很多参考资料和教学视频。
w参数值和password的js逆向代码在我的github上https://github.com/KTVICTORY18/js-(仅供学习使用)
get_w是w,pw是password