代码如下
def predeal_page(self, response):
page_qq = int(response.doc("html").find("#form\:j_id258").val())
page_size = 1000
if response.save['isFirst']==0:
page_count = int(response.doc("html").find("#form\:j_id260").val())
else:
page_count = response.save['count']
print("pagecount="+str(page_count))
start_time = time.strftime('%Y%m%d',time.localtime(time.time() - 2*24*60*60))
end_time = time.strftime('%Y%m%d',time.localtime(time.time()-1*24*60*60))
print("共搜索" + str(page_count) + "条记录")
actionUrl = "http://xxx.com/pages/erpt/rpt_main_report.jsf"
dic={}
dic["P_AUTHTAG"]={"index":1,"value":"1111","name":"1111","label":"","displayCondition":""}
dic["P_DATE_TYPE"] ={"index":2,"value":"1","name":"订单时间","label":"日期类型","displayCondition":"true"}
dic["P_START_DATE"] ={"index":3,"value":start_time,"name":start_time,"label":"订单开始日期","displayCondition":"true"}
dic["P_END_DATE"] ={"index":4,"value":end_time,"name":end_time,"label":"订单结束日期","displayCondition":"true"}
formj_id23 = str(base64.b64encode(str.encode(json.dumps(dic,ensure_ascii = False).replace(' ', ''))),encoding = "utf-8")
print(formj_id23)
headers = {}
headers["Content-Type"]="application/x-www-form-urlencoded"
headers["Origin"] = "http://report.mall.10010.com"
data = {}
data["AJAXREQUEST"] = "_viewRoot"
data["_authKey"] = ""
data["form:j_id23"] = formj_id23
data["rpt_id"] = "TRADE_004"
data["form:j_id26"] = "TRADE_004"
data["form:j_id27"] = "/mallrpt/pages/trade_004.xhtml"
data["form:j_id258"] = str(page_size)
data["form:j_id259"] = "0"
data["form:j_id260"] = "0"
data["form:P_BROWSER_TYPE"] = "Chrome"
data["form:P_BROWSER_VERSION"] = "43.0.2357.130"
data["form:exportFormat"] = ""
data["form:motionFlag"] = "query"
data["form_SUBMIT"] = "1"
data["javax.faces.ViewState"] = "rO0ABXVyABNbTGphdmEubGFuZy5PYmplY3Q7kM5YnxBzKWwCAAB4cAAAAAN0AAVqX2lkMXB0ACEv cGFnZXMvZXJwdC9ycHRfbWFpbl9yZXBvcnQueGh0bWw="
#判断是否是第一次调用
if response.save['isFirst'] == 0:
data["form:j_id28"] = "form:j_id28"
if response.save['isFirst'] == 1:
data["form:j_id259"] = str(page_size*(response.save['num']-1))
data["param1"] = "next"
data["form:j_id255"] = "form:j_id255"
data["form:j_id260"] = str(page_count)
#循环递归调用
times = int((page_count-1)/page_size) + 1
print("times="+str(times))
if response.save['num'] < times:
self.crawl(actionUrl+"?id="+str(response.save['num']),data=data,cookies=response.cookies, callback=self.detail_page,headers=headers,method="POST",priority=1)
num = response.save['num'] + 1
next=actionUrl+"?id2="+str(num)
self.crawl(next,data=data,cookies=response.cookies, callback=self.predeal_page,headers=headers,method="POST",save={'isFirst':1,'num':num,'count':page_count})