python3爬虫实战之三
每学期都要对老师的教学质量进行评价,emmm,老师们都很棒,但评价很繁琐,,就写了个爬虫脚本自行评价,不是a就是b
环境先安装requests库、beautifulsiup库
友情链接,前段js评价方法: https://blog.csdn.net/lxfhahaha/article/details/72638659
看心情,啥时候补个详细步骤介绍,如果我有动力的话= =
hdu_spider.py
import requests
from bs4 import BeautifulSoup
import re
import sys,io,os
import hashlib
import random
def loginToHDU(headers,conn):
urlInital='http://cas.hdu.edu.cn/cas/login?service=http%3A%2F%2Fi.hdu.edu.cn%2Fdcp%2Findex.jsp'
r1=conn.get(urlInital,headers=headers)
lt=re.search('name=\"lt\"\svalue=\"(.*?)\"',r1.text)
username=input('请输入您的学号:')
password=input('亲输入您的密码:')
hl = hashlib.md5()
hl.update(password.encode(encoding='utf-8'))
password=hl.hexdigest()
formData={
'encodedService': 'http%3a%2f%2fi.hdu.edu.cn%2fdcp%2findex.jsp',
'service': 'http://i.hdu.edu.cn/dcp/index.jsp',
'serviceName': None,
'loginErrCnt': '0',
'username': username,
'password': password,
'lt': lt.group(1)
}
urlLogin='http://cas.hdu.edu.cn/cas/login'
r2=conn.post(urlLogin,headers=headers,data=formData)
if len(r2.cookies)!=0:
print ('登陆成功,等待爬取数据...')
else:
print ('账户错误,重新输入!')
return None
ticket=re.search('ticket=(.*?)\"',r2.text)
urlRedirct='http://i.hdu.edu.cn/dcp/index.jsp?ticket='+ticket.group(1)
r3=conn.get(urlRedirct,headers=headers)
urlRedirct2='http://i.hdu.edu.cn/dcp/forward.action?path=/portal/portal&p=wkHomePage'
r4=conn.get(urlRedirct2,headers=headers)
return [conn,r4]
def loginToXUANKE(headers,conn):
urlXuanKe='http://i.hdu.edu.cn/dcp/forward.action?path=dcp/apps/sso/jsp/ssoDcpSelf&appid=1142'
r5=conn.get(urlXuanKe,headers=headers)
ticket=re.search('ticket=(.*?)\"',r5.text)
urlXuanKe1='http://jxgl.hdu.edu.cn/default.aspx?ticket='+ticket.group(1)
r6=conn.get(urlXuanKe1,headers=headers)
ticket=re.search('ticket=(.*?)\"',r6.text)
urlXuanKe2='http://jxdc.hdu.edu.cn/index.php?ticket='+ticket.group(1)
r7=conn.get(urlXuanKe2,headers=headers)
urlXuanKe3='http://jxgl.hdu.edu.cn/index.aspx'
r8=conn.get(urlXuanKe3,headers=headers)
ticket=re.search('ticket=(.*?)\"',r8.text)
urlXuanKe4='http://jxgl.hdu.edu.cn/index.aspx?ticket='+ticket.group(1)
r9=conn.get(urlXuanKe4,headers=headers)
final=re.search('window.open\(\'(.*?)\'',r9.text)
finalUrl='http://jxgl.hdu.edu.cn/'+final.group(1)
r10=conn.get(finalUrl,headers=headers)
f=open('test.html','w+')
f.writelines(r10.text)
f.close()
return [conn,r10]
def randomRate(keNum,rateA,rateB):
listStr='0'*rateA+'1'*rateB
listResult=[]
for i in range(0,keNum):
listOne=[]
for j in range(0,10):
listOne.append(listStr[random.randint(0,len(listStr)-1)])
if '0' not in listOne:
listOne[random.randint(0,9)]='0'
if '1' not in listOne:
listOne[random.randint(0,9)]='1'
listResult.append(listOne)
return listResult
def GOTOXuePingJiao(headers,conn,ReOld,rateA=9,rateB=1):
urlXpj=re.search(']*?)>理论教学质量评价<\/a>',ReOld.text)
urlXpj2='http://jxgl.hdu.edu.cn/'+urlXpj.group(1)
r1=conn.get(urlXpj2,headers=headers)
f=open('test.html','w+')
f.writelines(r1.text)
f.close()
b1=BeautifulSoup(r1.text,'lxml')
keNum=(b1.select('#pjkc option'))
print( '现在有 '+str(len(keNum))+' 门课程,进行预选择:')
listResult=randomRate(len(keNum),rateA,rateB)
for i in range(0,len(keNum)):
d = [c=='0' and 'A' or 'B' for c in listResult[i]]
print(keNum[i].get_text()+':'+str(d))
nextDa=input('这是预评价结果,如若对该结果无异议,请输入 Y/y 来完成继续,或者按任意键退出本程序.')
if nextDa.upper()!='Y':
return
print('开始评价...')
postPingJia={
'0':'A(非常满意)',
'1':'B(满意)',
'2':'C(基本满意)',
'3':'D(不满意)'
}
for i in range(0,len(keNum)):
if i!=0:
b1=BeautifulSoup(r.text,'lxml')
formData.clear()
postUrl='http://jxgl.hdu.edu.cn/'+b1.select('#Form1')[0].get('action')
formData={
'__EVENTTARGET': None,
'__EVENTARGUMENT': None,
'__LASTFOCUS': None,
'__VIEWSTATE':(b1.select('#__VIEWSTATE')[0].get('value')).encode("gb2312") ,
'__EVENTVALIDATION': (b1.select('#__EVENTVALIDATION')[0].get('value')).encode("gb2312") ,
'pjkc':(b1.select('#pjkc option[selected="selected"]')[0].get('value')).encode("gb2312") ,
'pjxx': None,
'txt1': None,
'TextBox1': 0,
'Button1': (b1.select('#Button1')[0].get('value')).encode("gb2312") ,
}
selectPj=b1.select('#DataGrid1 select')
listResult[i]= listResult[i] * int(len(selectPj) / 10)
if (len(listResult[i])!=10):
listResult[i][0]='1'
listResult[i][1] = '1'
a=len(listResult[i])
for index,one in enumerate(b1.select('#DataGrid1 select')):
formData[one.get('name')]=(postPingJia[listResult[i][index]]).encode("gb2312")
r=conn.post(postUrl,headers=headers,data=formData)
if i == len(keNum)-1 :
pd2=input('现在已经完成各项评价,您可以按 Y/y 来进行最后一步的“提交”操作,或者按任意键退出,自行到网络上提交:')
if pd2.upper()=='Y':
formData['Button2']=(b1.select('#Button2')[0].get('value')).encode("gb2312")
formData.pop('Button1')
r = conn.post(postUrl, headers=headers, data=formData)
print('结束评价...')
if __name__ == '__main__':
headers={
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/65.0.3325.181 Safari/537.36',
'Referer': 'http://cas.hdu.edu.cn'
}
while True:
conn = requests.session()
conn = loginToHDU(headers,conn)
if conn != None :
conn=conn[0]
break
InXuanKe = loginToXUANKE(headers, conn)
GOTOXuePingJiao(headers,InXuanKe[0],InXuanKe[1])