“知道”题库脚本——升级版

#!/usr/bin/env python
# encoding: utf-8
'''
@author: JHC
@license: None
@contact: [email protected]
@file: ttt.py
@time: 2022/4/22 23:27
@desc:替换self.uuid就能把题和答案写入mysql数据库
		uuid在cookies里
'''
import json
import requests
import pymysql.cursors
import sys
from tqdm import tqdm


class Spider():

    def __init__(self):
        self.con = pymysql.connect(host='localhost', port=3306, user='root', password='123456', db='questionsbank',
                              charset='utf8', cursorclass=pymysql.cursors.DictCursor)
        # self.uuid="V8qYDjej"
        self.uuid = "Vj1vy1A7"
        self.courseId = "10464858"
        self.randomExerciseStyle = "0"
        self.isFirst = True

        self.params = (
            ('courseId', self.courseId),
            ('randomExerciseStyle',self.randomExerciseStyle),
            ('isFirst',self.isFirst),
            ('uuid',self.uuid)
        )
        self.url_queryAnswerSheet = 'https://hike-examstu.zhihuishu.com/zhsathome/randomExercise/queryAnswerSheet'
        self.url_queryRandomExerciseDetail = 'https://hike-examstu.zhihuishu.com/zhsathome/randomExercise/queryRandomExerciseDetail'

    def get_exerciseId_list(self):
    	'''获取练习所有题id'''
        res = requests.get(url=self.url_queryAnswerSheet,params=self.params)
        exerciseId_list = [i["exerciseId"] for i in res.json()["rt"]["lists"]]
        return exerciseId_list

    def get_content(self):
    	'''获取题目内容'''
        exerciseId_list = self.get_exerciseId_list()
        INDEX = 1
        for questionId in tqdm(exerciseId_list):
            params = (
                ('courseId', self.courseId),
                ('questionId', questionId),
                ('times', '2'),
                ('randomExerciseStyle', self.randomExerciseStyle),
                ('uuid', self.uuid),
            )
            response = requests.get(url=self.url_queryRandomExerciseDetail,
                                     params=params)
            if response.json()["rt"] is not None:
                randomExerciseSortId = response.json()["rt"]["randomAnswerDetailDto"]["randomExerciseId"]
                answerList = [i["id"] for i in response.json()["rt"]["optionList"]]
                questionType = response.json()["rt"]["questionName"]
                if questionType == '单选题':
                    answer = self.get_answer_radio(randomExerciseSortId, answerList)
                    self.radio(response, INDEX,questionType,answer)
                elif questionType == '多选题':
                    answer = self.get_answer_multi(randomExerciseSortId,answerList)
                    self.multi(response, INDEX, questionType,answer)
                elif questionType == '判断题':
                    answer = self.get_answer_judge(randomExerciseSortId)
                    self.judge(response, INDEX,questionType,answer)
                else:
                    print('异常')
                INDEX += 1
            else:
                print("异常题号为{}".format(INDEX))
                INDEX += 1


    def judge(self,response, INDEX,questionType,answer):
	    '''判断'''
        key1, key2, key3, key4, key5, key6 = self.set_opt(response)
        id, title, type = self.process_data(INDEX, questionType, response)
        self.insert_data(id=id,title=title,type=type,key1=key1,key2=key2,key3=key3,key4=key4,key5=key5,key6=key6,answer=answer)

    def multi(self,response,INDEX,questionType,answer):
	    '''多选'''
        key1, key2, key3, key4, key5, key6 = self.set_opt(response)
        id, title, type = self.process_data(INDEX, questionType, response)
        self.insert_data(id=id, title=title, type=type,key1=key1,key2=key2,key3=key3,key4=key4,key5=key5,key6=key6,answer=answer)

    def radio(self,response,INDEX,questionType,answer):
    	'''单选'''
        key1, key2, key3, key4, key5, key6 = self.set_opt(response)
        id, title, type = self.process_data(INDEX, questionType, response)
        self.insert_data(id=id, title=title, type=type, key1=key1, key2=key2, key3=key3, key4=key4, key5=key5, key6=key6,answer=answer)

    def process_data(self, INDEX, questionType, response):
    	'''删除题目中的垃圾字符'''
        content = response.json()["rt"]["content"]
        id = INDEX
        title = str(content).replace(" ","").replace("

","").replace("

"
,"").replace("
"
,"") type = questionType return id, title, type def set_opt(self,response): '''拼接选项,纯为了好看''' optionList = response.json()["rt"]["optionList"] flag = 1 key1, key2, key3, key4, key5, key6 = None,None,None,None,None,None for option in optionList: option_content = option["content"].replace(" ","").replace("

","").replace("

"
,"") if flag == 1: single = 'A. ' key1 = single + option_content if flag == 2: single = 'B. ' key2 = single + option_content if flag == 3: single = 'C. ' key3 = single + option_content if flag == 4: single = 'D. ' key4 = single + option_content if flag == 5: single = 'E. ' key5 = single + option_content if flag == 6: single = 'F. ' key6 = single + option_content flag += 1 return key1,key2,key3,key4,key5,key6 def insert_data(self,id,title,type,key1=None,key2=None,key3=None,key4=None,key5=None,key6=None,answer=None): '''写入数据到mysql''' try: with self.con.cursor() as cur: sql = 'INSERT INTO xingce (id, title,key1,key2,key3,key4,key5,key6,type,answer) VALUES (%s,%s, %s, %s, %s, %s,%s,%s,%s,%s)' cur.execute(sql, (id,title,key1,key2,key3,key4,key5,key6,type,answer)) self.con.commit() except : self.con.rollback() def get_answer_judge(self,randomExerciseSortId): '''判断题答案获取''' for key in range(0,10): url = "https://hike-examstu.zhihuishu.com/zhsathome/randomExercise/submitAnswer?uuid={}".format(self.uuid) headers = { "Content-Type": "application/json", } data = {"data": {"answerContent": key, "fileList": [], "isReview": True, "randomExerciseSortId": randomExerciseSortId, "courseId": "10464858"}} res = requests.post(url, headers=headers, data=json.dumps(data)) if res.json()["rt"] == True: if key == 0: return "B" else: return "A" else: pass def get_answer_radio(self, randomExerciseSortId,answerList): '''单选题,答案获取''' for key in answerList: url = "https://hike-examstu.zhihuishu.com/zhsathome/randomExercise/submitAnswer?uuid={}".format( self.uuid) headers = { "Content-Type": "application/json", } data = {"data": {"answerContent": key, "fileList": [], "isReview": True, "randomExerciseSortId": randomExerciseSortId, "courseId": "10464858"}} res = requests.post(url, headers=headers, data=json.dumps(data)) if res.json()["rt"] == True: if answerList.index(key) == 0: return "A" elif answerList.index(key) == 1: return "B" elif answerList.index(key) == 2: return "C" elif answerList.index(key) == 3: return "D" elif answerList.index(key) == 4: return "E" elif answerList.index(key) == 5: return "F" else: pass def get_answer_multi(self,randomExerciseSortId,answerList): '''多选题 答案获取''' url = "https://hike-examstu.zhihuishu.com/zhsathome/randomExercise/submitAnswer?uuid={}".format( self.uuid) headers = { "Content-Type": "application/json", } key_list = self.get_key(answerList) for key in key_list: if len(key)>0: key_new = [str(i) for i in key] key_str = ",".join(key_new) data = {"data": {"answerContent": str(key_str), "fileList": [], "isReview": True, "randomExerciseSortId": randomExerciseSortId, "courseId": "10464858"}} res = requests.post(url, headers=headers, data=json.dumps(data)) if res.json()["rt"] == True: r_answer_list = [] for i in key: if answerList.index(i)==0: r_answer_list.append("A") if answerList.index(i)==1: r_answer_list.append("B") if answerList.index(i)==2: r_answer_list.append("C") if answerList.index(i)==3: r_answer_list.append("D") if answerList.index(i)==4: r_answer_list.append("E") if answerList.index(i)==5: r_answer_list.append("F") return ",".join(r_answer_list) else: pass else: pass def get_key(self,answerList): ''' 返回多选题选项所有不重复的组合 ''' lena = len(answerList) sum = [] for i in range(2 ** lena): comb = [] for j in range(lena): if (i >> j) % 2 == 1: comb.append(answerList[j]) sum.append(comb) return sum if __name__ == '__main__': sp = Spider() sp.get_content()

你可能感兴趣的:(爬虫,python)