废话就不说了,咱们直接上代码
def startGetData(self): index = 0 while index < 3: index = index + 1 self.url = "http://r.inews.qq.com/getQQNewsUnreadList?idfa=18454932-A441-4720-8973-776284A58B7F&apptype=ios&rtAd=1&screen_height=667&network_type=wifi&" \ "store=1&activefrom=&global_info=0%7C1%7C1%7C1%7C1%7C4%7C2%7C1%7C2%7C0%7C1%7C2%7C2%7C0%7C&screen_scale=2&adcode=440112&screen_width=375&__qnr=2" \ "08e" + str( self.getRondomStr()) + "&isJailbreak=1&qqnews_refpage=QNLaunchWindowViewController&omgid=96192500048fe94e120b203d6b2be528edb2001011321e&device_model=iPhone7%2" \ "C2&pagestartFrom=icon&startFrom=icon&startarticleid=&devid=F3D6D07C-4D87-40B5-8533-A6E8D1331C89&omgbizid=1a9a8d53c482a14b7e7bd33737409def8cae006011" \ "321e&appver=9.3.1_qqnews_5.5.60" time.sleep(2) self.parse_url(self.url, self.getBody(), self.getHeader())
这个是启动函数
def parse_url(self, url, body, header): response = requests.post(url, data=body, headers=header, verify=False) self.parse_json(response.content.decode("utf-8"))
网络抓取数据
def getBody(self): body = "cachedCount=12&adReqData=%7B%22chid%22%3A2%2C%22adtype%22%3A0%2C%22pf%22%3A%22iphone%22%2C%22launch%22%3A%220%22%2C%22ext%2" \ "2%3A%7B%22mob%22%3A%7B%22mobstr%22%3A%22Aejy45%2BNeSZw4VxymYnnIhMV%2BMEM%2B6sW9RSCUl%5C%2F9xdgy%2BxrCQ6TAqjN60uZp%5C%2FkwPlA4" \ "%2BwjM1T8AXgIGEry2mILkUiCjeN70vErvi4%5C%2FmstXaNzifAOoa7z%5C%2FtBFHEaFMuXU7nT4QWZqSqFPkQZtFndYKCzXlF0BpxSjiix7NQ55fW7kauVHkYlEI" \ "UF%2BKIlWeD42St4AymSqB7MqMT343a%2BdeUKG9Qm6YMZrmG%5C%2FWgZZZ7LiwfgC7hox%5C%2FhhCwOHmOwSbiZKRdUGgs%2ByCxL6FPLofYZdVDSABgNzm95ie%2" \ "B2Wdp25gHy%5C%2FyadnSpx6BokKCKMXOa6oZsz%5C%2FLtqRwFTPco7vKuptgpNHTn29wMeH2zfK4Kmb8Gg%2B9fTe8y3xQuC1lV81EZdDhL5QZfYkXh5%2BTcri%5C" \ "%2F6sH0i3714dC0jB475JLTIeKL5H3i3sYt8CECIpGKyexU6tUwrYl%5C%2FlL%2BGfQy15pG09WxlMBWraD1iQuS9jLwcCrRRlsOe86Y8gun1094i74NgAsud%2B9HTiMsB" \ "V4%5C%2FEItskEhTGXXV1V7Ps7yB8vzgsjWtHXyEacaUyWAnSfJDD%2BCFbwcPxNWuz%5C%2FYfeTryrTWyuMJh5mLiX2Ly9cuIuSYDciv679xopKEGDMtEC4tUq0x%2BKY" \ "QY9R8EaEDfTcPZrG6BPhIwRA9WiG0oTCOzhEiewq7FTG373fbMjpLuikxQ%5C%2FUbf7B5SAnLj%5C%2F4MsIfX5XeJAfmbsvzqG8zZbEpYwbebgjYthwoEwqgmNcO28KEISy2" \ "Z8uNW95qyuRlXokQBsVYPMb7l8isNsVZr0r9rCRFueMIlMtke6lkI1peXQNJbrhoOgqAUhcloxe7Ot%2Bqn9o0YutSr0RbWhycUV0%2Bc2DMAipZM4vtct7cMBYsVUuXP1GLBP1G" \ "TInbkGKdpRPDKl7HXaLq0Zn9Cvs59zCbJc6ND0wQXfq%2BgGTFCIAcysbbNIejC2CiRcjlyUBLdsqp4tqD6uGTX3FByULEkDrE1DO7AHE%5C%2FPqg3An7CFR0BkRh1KsCd34GWerx" \ "mB9WQIEa0tIUKZuRWkW3qZKJyo5eUieVcAI78Ul09C5JHwebRfPLQzSy1fTl4lgaKtmM2y3Lo6WY48P9PLCeQbA1lDSKw1Ku8U3wzOnmyieimQvdNAc0lEpOgykMhSfAva4lGvYGhvd" \ "M7RzTq%2BaoJh4p2ip2Oa30gojevgjc%22%7D%7D%2C%22ver%22%3A%225.5.60%22%2C%22slot%22%3A%5B%7B%22islocal%22%3A0%2C%22orders_info%22%3A%5B%22503856" \ "81%2C2120191%2C3602870493%2C19%2C101%2C110%2C1%22%2C%2250708076%2C2787691%2C2897359378%2C19%2C101%2C110%2C1%22%2C%2249679576%2C1918850882%2C28" \ "90192620%2C19%2C4307%2C110%2C1%22%2C%2250645127%2C6868108%2C4173790238%2C1000%2C705%2C110%2C2%22%5D%2C%22channel%22%3A%22news_news_top%22%2C%22r" \ "efresh_type%22%3A1%2C%22loid%22%3A%221%2C13%22%2C%22recent_rot%22%3A%5B%221%2C2%2C3%22%2C%224%22%5D%7D%5D%2C%22appversion%22%3A%22180319%22%7D&lon" \ "=113.4367974175347&uid=A6D2B510-4986-4884-8250-964B34B8FB22&chlid=news_news_top&is_new_user=0&feedbackNewsId=NEW2018042302871600%7C0%2CTWF20180423" \ "04111100%7C1%2CNEW2016111603351800%7C3%2CFIN2018042400905700%7C0%2C20180424A0FFXO00%7C0%2C20180424A0CP7I00%7C2%2C20180422A0UM4R00%7C0%2C20180423A1SOC" \ "P00%7C2%2C20180423A0PJNH00%7C2%2CHVD2017TOP000000000%7C10%2C20180422A127CW00%7C0%2C20180423A1VAON00%7C0%2C20180423A1WOUH00%7C2%2C20180423A02FOZ00%7C0" \ "%2C20180423A1VFAF00%7C0&newsTopPage=1&user_chlid=news_video_top%2Cnews_news_19%2Cnews_news_gz%2Cnews_news_ent%2Cnews_news_sports%2Cnews_news_mil&town_n" \ "ame=Unknown&addPushNews=0&lat=23.10431803385417&feedbackModulePos=%28null%29%7C3%2C10&channelPosition=0&page=2&picType=0%2C0%2C0%2C0%2C0%2C2%2C0%2C2%2C" \ "2%2C0%2C0%2C0%2C2%2C0%2C0&forward=0&adcode=440112&village_name=Unknown" return body
网络请求需要携带的参数
def parse_json(self, jsonStr): print(jsonStr) DataInfo.time = Util().getCurrTime() try: json_object = json.loads(jsonStr) if "adList" in json_object: adList = json_object["adList"] json_list = (json.loads(adList))["order"] for json_str in json_list: self.saveDataInfo(json_str) except KeyError as x: print(x)
解析抓取到的数据
def saveDataInfo(self, json_str): DataInfo.title = json_str["title"] DataInfo.channel = "tengxunxinwen" DataInfo.appdownload = json_str["url"] DataInfo.pic_list = self.getBitmap(json_str) DataInfo.device_type = "ios" DataInfo.type = 1 MySqlManager().insert_inspection_list(1)
保存数据到mysql
def getBitmap(self, json_str): file_path = self.path filename = str(int(time.time() * 1000000)) + ".jpg" bitmap = {} bitmap_path = {} if "resource_url0" in json_str: DataInfo.source_type = 1 bitmap["pic1"] = json_str["resource_url0"] bitmap_path["pic_path1"] = file_path + "pic1_" + filename DataInfo.pic_path = bitmap_path Util().save_img(json_str["resource_url0"], "pic1_" + filename,file_path) elif "resource_urlList" in json_str: DataInfo.source_type = 2 bitmap["pic1"] = json_str["resource_urlList"][0]["url"] bitmap_path["pic_path1"] = file_path + "pic1_" + filename Util().save_img(json_str["resource_urlList"][0]["url"], "pic1_" + filename,file_path) bitmap["pic2"] = json_str["resource_urlList"][1]["url"] bitmap_path["pic_path2"] = file_path + "pic2_" + filename Util().save_img(json_str["resource_urlList"][1]["url"], "pic2_" + filename,file_path) bitmap["pic3"] = json_str["resource_urlList"][2]["url"] bitmap_path["pic_path3"] = file_path + "pic3_" + filename Util().save_img(json_str["resource_urlList"][2]["url"], "pic3_" + filename,file_path) DataInfo.pic_path = bitmap_path return bitmap
保存抓取到的图片到本地