import uuid
import datetime
import requests
import json
import hashlib
import random
from random import choice
from court.ip_lists import ip_lists
import time
import pymysql
def devid():
devid=str(uuid.uuid1()).replace('-','')
return devid
def timespan():
timespan=datetime.datetime.now().strftime('%Y%m%d%H%M%S')
return timespan
def nonce():
nonce = []
s = ['a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o', 'p', 'q', 'r', 's', 't', 'u', 'v',
'w', 'x', 'y', 'z', '0', '1', '2', '3', '4', '5', '6', '7', '8', '9']
for i in range(0, 4):
a = random.choice(s)
nonce.append(a)
non = str(nonce).replace("[", "").replace("]", "").replace("'", "").replace(",", "").replace(" ", "")
return non
def signature(timespan,nonce,devid):
s = timespan+ nonce + devid
mid = hashlib.md5(s.encode(encoding='utf-8')).hexdigest()
return mid
def token():
times = timespan()
dev = devid()
nonc = nonce()
url='http://wenshuapp.court.gov.cn/MobileServices/GetToken'
headers={
'Content-Type': 'application/json',
'timespan': timespan(),
'nonce': nonce(),
'devid': devid(),
'signature':signature(times,nonc,dev) ,
'User-Agent': 'Dalvik/1.6.0 (Linux; U; Android 4.4.2; vivo X20A Build/NMF26X)',
'Host': 'wenshuapp.court.gov.cn',
'Connection': 'Keep-Alive',
'Accept-Encoding': 'gzip',
# 'Content-Length': '71'
}
data='{"app":"cpws","devid":"%s","apptype":"1"}'%(devid)
html=requests.post(url=url,headers=headers,data=data,proxies={'http':choice(ip_lists)})
token=json.loads(html.text)['token']
print(token)
return token
def parse():
while 1:
try:
url='http://wenshuapp.court.gov.cn/MobileServices/GetAddCountAndTotalAndPVCount'
times=timespan()
dev=devid()
nonc= nonce()
toke=token()
headers={
'Content-Type': 'application/json',
'timespan': times,
'nonce':nonc,
'devid': dev,
'signature':signature(times,nonc,dev),
'User-Agent': 'Dalvik/1.6.0 (Linux; U; Android 4.4.2; vivo X20A Build/NMF26X)',
'Host': 'wenshuapp.court.gov.cn',
'Connection': 'Keep-Alive',
'Accept-Encoding': 'gzip',
'Content-Length': '60'
}
data='{"app":"cpws","reqtoken":"%s"}' % toke
data = data.encode('utf-8')
html=requests.post(url=url,headers=headers,data=data,timeout=6)
if html.text=='':
print('html.text=null')
time.sleep(2)
continue
print(html.text)
return
except Exception as e:
print(e)
continue
def stare_request():
i=0
while i<118876:
try:
times = timespan()
dev=devid()
nonc = nonce()
headers={
'Content-Type': 'application/json',
'timespan': times,
'nonce': nonc,
'devid':dev,
'signature':signature(times,nonc,dev),
'User-Agent': 'Dalvik/1.6.0 (Linux; U; Android 4.4.2; vivo X20A Build/NMF26X)',
'Host': 'wenshuapp.court.gov.cn',
'Connection': 'Keep-Alive',
'Accept-Encoding': 'gzip',
# 'Content-Length': '242'
}
toke=token()
data = '{"dicval":"asc","reqtoken":"%s",' \
'"condition":"/CaseInfo/案/@上传日期=[2018-06-21 TO 2018-06-22]",' \
'"skip":"%s","app":"cpws","limit":"20","dickey":"/CaseInfo/案/@法院层级"}' % (toke, str(i))
data=data.encode('utf-8')
url='http://wenshuapp.court.gov.cn/MobileServices/GetLawListData'
html=requests.post(url=url,headers=headers,data=data ,timeout=6,proxies={'http':choice(ip_lists)})
if html.text == '' :
print('html.text=null')
time.sleep(0.5)
continue
elif 'Request Error' in html.text:
print('Request Error')
time.sleep(0.5)
continue
elif html.status_code!=200:
continue
print('爬取到第几条数据:',i)
i+=20
except Exception as e:
print(e)
continue
if __name__ == '__main__':
parse()
stare_request()
在请求的时候一次不一定能请求到,这个需要多请求几次就可以获得数据了。
数据的解密关键字段是toke,times,uuid和返回的数据了。解密的方法就不多说了。
但是在爬取的时候感觉还是爬取web版的要快一点,可能还是因为对APP这个不是太熟悉的吧。