饿了么需要准备多个账号获取cookie,很容易封cookie
1.评论版本
import requests
from lxml import etree
import re
import json
import csv
import pandas as pd
import hashlib
product_lists=[]
def down_load(url):
headers1 = {
# "Cookie":"SINAGLOBAL=7238757845138.87.1528291392417; UOR=,,spr_web_360_hao360_weibo_t001; login_sid_t=bd5a4abe734c091249cdce71379c0348; cross_origin_proto=SSL; Ugrow-G0=e66b2e50a7e7f417f6cc12eec600f517; TC-V5-G0=866fef700b11606a930f0b3297300d95; _s_tentry=-; Apache=685802145012.8082.1542780237180; ULV=1542780237187:19:3:1:685802145012.8082.1542780237180:1541462062210; TC-Page-G0=cdcf495cbaea129529aa606e7629fea7; WBtopGlobal_register_version=18608f873d5d88f2; SSOLoginState=1542781061; wvr=6; SUBP=0033WrSXqPxfM725Ws9jqgMF55529P9D9W50QEC8VdzuOHjXwxjEGser5JpX5K2hUgL.Fo-feo.ceKe4S0M2dJLoIpjLxKqLBoqL1-qLxKqLB.eLB-2LxKqL1KMLB.2t; ALF=1574324177; SCF=ApgoQqG5luyu67rkHic6LidzChLHTIe5EQZgRnsuPrfkK57iJqk723zd_GSb5ZMq2jbGlYvGXkZ6LbJj5PpY6zI.; SUB=_2A2528WQBDeRhGeNL6VsX8S3FzDuIHXVVh9LJrDV8PUNbmtAKLVnXkW9NSQ30mXwLfrcwH1SRYaTHBUXB4ipbEQrL; SUHB=02MvCTyTmQYvsK; un=18514476337; YF-V5-G0=a5a6106293f9aeef5e34a2e71f04fae4; wb_view_log_5529613977=1920*10801",
# "Cache-Control":"max-age=0",
"Connection": "keep-alive",
"Cookie": "bid=YTzZGjuP; gr_user_id=97f998e4-aa93-486a-99f8-854c784f6bcd; BAIDU_SSP_lcr=https://www.baidu.com/link?url=Pwq6pphyjxk3oQRJg05jjUGiwtLjL7FBVFdqA8oFqpZoXc0hIxm9p0sa3fyWRSSs&wd=&eqid=c9c1816400092dd0000000035c218bdd; Hm_lvt_ecd4feb5c351cc02583045a5813b5142=1545271479,1545702369; __utmc=177678124; __utmz=177678124.1545702370.2.2.utmcsr=baidu|utmccn=(organic)|utmcmd=organic; gr_session_id_8187ff886f0929da=3a10677b-5117-45e8-9de4-d80fc87ffc01; gr_session_id_da48e7b9eb89482489897fc1e45e98b6=3f128e3a-5f5c-42fd-9c45-e0e1ddeb97b2; _ga=GA1.2.377109310.1545271479; _gid=GA1.2.1629322193.1545716866; __utma=177678124.377109310.1545271479.1545702370.1545717020.3; __utmt=1; gr_session_id_8187ff886f0929da_3a10677b-5117-45e8-9de4-d80fc87ffc01=true; Hm_lpvt_ecd4feb5c351cc02583045a5813b5142=1545717163; __utmb=177678124.4.10.1545717020",
"Host": "www.xiachufang.com",
# "Referer":"https://weibo.com/",
"Upgrade-Insecure-Requests": "1",
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/71.0.3578.80 Safari/537.36",
# "Accept":"text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8",
# "Accept-Encoding":"gzip, deflate, br",
# "Accept-Language":"zh-CN,zh;q=0.9",
}
html = requests.get(url=url,headers=headers1,allow_redirects=False)
print(html.status_code)
# print(html.headers["location"])
if html.status_code == 302:
new_id_url="http://www.xiachufang.com"+html.headers["location"]
print(new_id_url)
new_html=requests.get(url=new_id_url,headers=headers1).text
return new_id_url,etree.HTML(new_html)
else:
print("++++++++++++++++")
print(url)
# print(requests.get(url=url,headers=headers1).text)
return url,etree.HTML(requests.get(url=url,headers=headers1).text)
# return (new_html,new_id_url)
def down_load1(url):
headers2 = {
# "Cookie":"SINAGLOBAL=7238757845138.87.1528291392417; UOR=,,spr_web_360_hao360_weibo_t001; login_sid_t=bd5a4abe734c091249cdce71379c0348; cross_origin_proto=SSL; Ugrow-G0=e66b2e50a7e7f417f6cc12eec600f517; TC-V5-G0=866fef700b11606a930f0b3297300d95; _s_tentry=-; Apache=685802145012.8082.1542780237180; ULV=1542780237187:19:3:1:685802145012.8082.1542780237180:1541462062210; TC-Page-G0=cdcf495cbaea129529aa606e7629fea7; WBtopGlobal_register_version=18608f873d5d88f2; SSOLoginState=1542781061; wvr=6; SUBP=0033WrSXqPxfM725Ws9jqgMF55529P9D9W50QEC8VdzuOHjXwxjEGser5JpX5K2hUgL.Fo-feo.ceKe4S0M2dJLoIpjLxKqLBoqL1-qLxKqLB.eLB-2LxKqL1KMLB.2t; ALF=1574324177; SCF=ApgoQqG5luyu67rkHic6LidzChLHTIe5EQZgRnsuPrfkK57iJqk723zd_GSb5ZMq2jbGlYvGXkZ6LbJj5PpY6zI.; SUB=_2A2528WQBDeRhGeNL6VsX8S3FzDuIHXVVh9LJrDV8PUNbmtAKLVnXkW9NSQ30mXwLfrcwH1SRYaTHBUXB4ipbEQrL; SUHB=02MvCTyTmQYvsK; un=18514476337; YF-V5-G0=a5a6106293f9aeef5e34a2e71f04fae4; wb_view_log_5529613977=1920*10801",
# "Cache-Control":"max-age=0",
"Connection": "keep-alive",
"Cookie": "bid=YTzZGjuP; gr_user_id=97f998e4-aa93-486a-99f8-854c784f6bcd; BAIDU_SSP_lcr=https://www.baidu.com/link?url=Pwq6pphyjxk3oQRJg05jjUGiwtLjL7FBVFdqA8oFqpZoXc0hIxm9p0sa3fyWRSSs&wd=&eqid=c9c1816400092dd0000000035c218bdd; Hm_lvt_ecd4feb5c351cc02583045a5813b5142=1545271479,1545702369; __utmc=177678124; __utmz=177678124.1545702370.2.2.utmcsr=baidu|utmccn=(organic)|utmcmd=organic; gr_session_id_8187ff886f0929da=3a10677b-5117-45e8-9de4-d80fc87ffc01; gr_session_id_da48e7b9eb89482489897fc1e45e98b6=3f128e3a-5f5c-42fd-9c45-e0e1ddeb97b2; _ga=GA1.2.377109310.1545271479; _gid=GA1.2.1629322193.1545716866; __utma=177678124.377109310.1545271479.1545702370.1545717020.3; __utmt=1; gr_session_id_8187ff886f0929da_3a10677b-5117-45e8-9de4-d80fc87ffc01=true; Hm_lpvt_ecd4feb5c351cc02583045a5813b5142=1545717163; __utmb=177678124.4.10.1545717020",
"Host": "www.xiachufang.com",
# "Referer":"https://weibo.com/",
"Upgrade-Insecure-Requests": "1",
"User-Agent": "Mozilla/5.0 (Macintosh; U; Intel Mac OS X 10_6_8; en-us) AppleWebKit/534.50 (KHTML, like Gecko) Version/5.1 Safari/534.50",
# "Accept":"text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8",
# "Accept-Encoding":"gzip, deflate, br",
# "Accept-Language":"zh-CN,zh;q=0.9",
}
html = requests.get(url=url,headers=headers2).text
# return (new_html,new_id_url)
return etree.HTML(html)
def down_load2(url):
headers3 = {
# "Cookie":"SINAGLOBAL=7238757845138.87.1528291392417; UOR=,,spr_web_360_hao360_weibo_t001; login_sid_t=bd5a4abe734c091249cdce71379c0348; cross_origin_proto=SSL; Ugrow-G0=e66b2e50a7e7f417f6cc12eec600f517; TC-V5-G0=866fef700b11606a930f0b3297300d95; _s_tentry=-; Apache=685802145012.8082.1542780237180; ULV=1542780237187:19:3:1:685802145012.8082.1542780237180:1541462062210; TC-Page-G0=cdcf495cbaea129529aa606e7629fea7; WBtopGlobal_register_version=18608f873d5d88f2; SSOLoginState=1542781061; wvr=6; SUBP=0033WrSXqPxfM725Ws9jqgMF55529P9D9W50QEC8VdzuOHjXwxjEGser5JpX5K2hUgL.Fo-feo.ceKe4S0M2dJLoIpjLxKqLBoqL1-qLxKqLB.eLB-2LxKqL1KMLB.2t; ALF=1574324177; SCF=ApgoQqG5luyu67rkHic6LidzChLHTIe5EQZgRnsuPrfkK57iJqk723zd_GSb5ZMq2jbGlYvGXkZ6LbJj5PpY6zI.; SUB=_2A2528WQBDeRhGeNL6VsX8S3FzDuIHXVVh9LJrDV8PUNbmtAKLVnXkW9NSQ30mXwLfrcwH1SRYaTHBUXB4ipbEQrL; SUHB=02MvCTyTmQYvsK; un=18514476337; YF-V5-G0=a5a6106293f9aeef5e34a2e71f04fae4; wb_view_log_5529613977=1920*10801",
# "Cache-Control":"max-age=0",
"Connection": "keep-alive",
"Cookie": "bid=YTzZGjuP; gr_user_id=97f998e4-aa93-486a-99f8-854c784f6bcd; BAIDU_SSP_lcr=https://www.baidu.com/link?url=Pwq6pphyjxk3oQRJg05jjUGiwtLjL7FBVFdqA8oFqpZoXc0hIxm9p0sa3fyWRSSs&wd=&eqid=c9c1816400092dd0000000035c218bdd; Hm_lvt_ecd4feb5c351cc02583045a5813b5142=1545271479,1545702369; __utmc=177678124; __utmz=177678124.1545702370.2.2.utmcsr=baidu|utmccn=(organic)|utmcmd=organic; gr_session_id_8187ff886f0929da=3a10677b-5117-45e8-9de4-d80fc87ffc01; gr_session_id_da48e7b9eb89482489897fc1e45e98b6=3f128e3a-5f5c-42fd-9c45-e0e1ddeb97b2; _ga=GA1.2.377109310.1545271479; _gid=GA1.2.1629322193.1545716866; __utma=177678124.377109310.1545271479.1545702370.1545717020.3; __utmt=1; gr_session_id_8187ff886f0929da_3a10677b-5117-45e8-9de4-d80fc87ffc01=true; Hm_lpvt_ecd4feb5c351cc02583045a5813b5142=1545717163; __utmb=177678124.4.10.1545717020",
"Host": "www.xiachufang.com",
# "Referer":"https://weibo.com/",
"Upgrade-Insecure-Requests": "1",
"User-Agent": "Mozilla/5.0 (Macintosh; U; Intel Mac OS X 10_6_8; en-us) AppleWebKit/534.50 (KHTML, like Gecko) Version/5.1 Safari/534.50",
# "Accept":"text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8",
# "Accept-Encoding":"gzip, deflate, br",
# "Accept-Language":"zh-CN,zh;q=0.9",
}
html = requests.get(url=url,headers=headers3).text
# return (new_html,new_id_url)
return etree.HTML(html)
word_pd=pd.read_csv(r"C:\Users\Lavector\Desktop\百事小红书\ele1.csv",engine='python',header=None).values.tolist()
print(word_pd)
# for m in range(1):
for m in range(1,len(word_pd)):
key_word=word_pd[m][0]
print(key_word)
url1="http://www.xiachufang.com/search/?keyword={}".format(key_word)
try:
# key_word="酸奶"
# url1 = "http://www.xiachufang.com/search/?keyword={}".format(key_word)
product_html_all=down_load(url1)
product_html=product_html_all[1]
print(len(product_html.xpath('//div[contains(@class,"pure-u-3-4")]//div[@class="normal-recipe-list"]//ul[@class="list"]/li')))
for w in range(len(product_html.xpath('//div[contains(@class,"pure-u-3-4")]//div[@class="normal-recipe-list"]//ul[@class="list"]/li'))):
# for i in range(1):
print("**************")
product_url="http://www.xiachufang.com"+product_html.xpath('//div[contains(@class,"pure-u-3-4")]//div[@class="normal-recipe-list"]//ul[@class="list"]/li[{}]//p[@class="name"]/a/@href'.format(w+1))[0]
print(product_url)
product_name=product_html.xpath('//div[contains(@class,"pure-u-3-4")]//div[@class="normal-recipe-list"]//ul[@class="list"]/li[{}]//p[@class="name"]/a/text()'.format(w+1))[0].strip()
if product_html.xpath('//div[contains(@class,"pure-u-3-4")]//div[@class="normal-recipe-list"]//ul[@class="list"]/li[{}]//p[@class="stats"]/span[1]/text()'.format(w+1)):
product_star=product_html.xpath('//div[contains(@class,"pure-u-3-4")]//div[@class="normal-recipe-list"]//ul[@class="list"]/li[{}]//p[@class="stats"]/span[1]/text()'.format(w+1))[0]
else:
product_star="暂无"
if product_html.xpath('//div[contains(@class,"pure-u-3-4")]//div[@class="normal-recipe-list"]//ul[@class="list"]/li[{}]//p[@class="stats"]/span[@class="bold score"]/text()'.format(w+1)):
product_make=product_html.xpath('//div[contains(@class,"pure-u-3-4")]//div[@class="normal-recipe-list"]//ul[@class="list"]/li[{}]//p[@class="stats"]/span[@class="bold score"]/text()'.format(w+1))[0]
else:
product_make=0
product_author=product_html.xpath('//div[contains(@class,"pure-u-3-4")]//div[@class="normal-recipe-list"]//ul[@class="list"]/li[{}]//p[@class="author"]/a[1]/text()'.format(w+1))[0]
product_author_url="http://www.xiachufang.com"+product_html.xpath('//div[contains(@class,"pure-u-3-4")]//div[@class="normal-recipe-list"]//ul[@class="list"]/li[{}]//p[@class="author"]/a[1]/@href'.format(w+1))[0]
person_html=down_load2(product_author_url)
person_info1=person_html.xpath('//div[@class="gray-font"]/div[1]')[0]
person_info=person_info1.xpath('string(.)').strip().replace(" ","").replace("\n","")
print(person_info)
print(product_html.xpath('//div[contains(@class,"pure-u-3-4")]//div[@class="normal-recipe-list"]//ul[@class="list"]/li[{}]//p[@class="name"]/a/@href'.format(w+1))[0])
print(product_name)
detil_html=down_load1(product_url)[1]
product_save=detil_html.xpath('//div[@class="pv"]/text()')[0]
product_content1=detil_html.xpath('//div[contains(@class,"recipe-show")]')[0]
product_content=product_content1.xpath('string(.)').strip().replace(" ","").replace("\n","")
product_time=detil_html.xpath('//div[@class="time"]/span/text()')[0]
product_meke_all=detil_html.xpath('//div[contains(@class,"cooked")]/span[@class="number"]/text()')[0]
print(product_meke_all)
product_lists.append(
[key_word, product_author, person_info, product_name, product_url, product_content, product_star,
product_meke_all, product_make, product_save, product_time])
print(product_html[0])
print("++++++++")
if "category" in product_html_all[0]:
for k in range(2):
url2 = product_html_all[0]+"?page={}".format(str(k+2))
product_html2 = down_load2(url2)
for p in range(len(product_html2.xpath(
'//div[contains(@class,"pure-u-3-4")]//div[@class="normal-recipe-list"]//ul[@class="list"]/li'))):
# for i in range(1):
print("**************")
product_url = "http://www.xiachufang.com" + product_html2.xpath(
'//div[contains(@class,"pure-u-3-4")]//div[@class="normal-recipe-list"]//ul[@class="list"]/li[{}]//p[@class="name"]/a/@href'.format(
p + 1))[0]
print(product_url)
product_name = product_html2.xpath(
'//div[contains(@class,"pure-u-3-4")]//div[@class="normal-recipe-list"]//ul[@class="list"]/li[{}]//p[@class="name"]/a/text()'.format(
p + 1))[0].strip()
if product_html2.xpath(
'//div[contains(@class,"pure-u-3-4")]//div[@class="normal-recipe-list"]//ul[@class="list"]/li[{}]//p[@class="stats"]/span[1]/text()'.format(
p + 1)):
product_star = product_html2.xpath(
'//div[contains(@class,"pure-u-3-4")]//div[@class="normal-recipe-list"]//ul[@class="list"]/li[{}]//p[@class="stats"]/span[1]/text()'.format(
p + 1))[0]
else:
product_star="暂无"
if product_html2.xpath(
'//div[contains(@class,"pure-u-3-4")]//div[@class="normal-recipe-list"]//ul[@class="list"]/li[{}]//p[@class="stats"]/span[@class="bold score"]/text()'.format(
p + 1)):
product_make = product_html2.xpath(
'//div[contains(@class,"pure-u-3-4")]//div[@class="normal-recipe-list"]//ul[@class="list"]/li[{}]//p[@class="stats"]/span[@class="bold score"]/text()'.format(
p + 1))[0]
else:
product_make=0
product_author = product_html2.xpath(
'//div[contains(@class,"pure-u-3-4")]//div[@class="normal-recipe-list"]//ul[@class="list"]/li[{}]//p[@class="author"]/a[1]/text()'.format(
p + 1))[0]
product_author_url = "http://www.xiachufang.com" + product_html2.xpath(
'//div[contains(@class,"pure-u-3-4")]//div[@class="normal-recipe-list"]//ul[@class="list"]/li[{}]//p[@class="author"]/a[1]/@href'.format(
p + 1))[0]
person_html = down_load2(product_author_url)
person_info1 = person_html.xpath('//div[@class="gray-font"]/div[1]')[0]
person_info = person_info1.xpath('string(.)').strip().replace(" ", "").replace("\n", "")
print(person_info)
print(product_html2.xpath(
'//div[contains(@class,"pure-u-3-4")]//div[@class="normal-recipe-list"]//ul[@class="list"]/li[{}]//p[@class="name"]/a/@href'.format(
p + 1))[0])
print(product_name)
detil_html = down_load1(product_url)[1]
product_save = detil_html.xpath('//div[@class="pv"]/text()')[0]
product_content1 = detil_html.xpath('//div[contains(@class,"recipe-show")]')[0]
product_content = product_content1.xpath('string(.)').strip().replace(" ", "").replace("\n", "")
product_time = detil_html.xpath('//div[@class="time"]/span/text()')[0]
product_meke_all = \
detil_html.xpath('//div[contains(@class,"cooked")]/span[@class="number"]/text()')[0]
print(product_meke_all)
product_lists.append(
[key_word, product_author, person_info, product_name, product_url, product_content,
product_star,
product_meke_all, product_make, product_save, product_time])
else:
for k in range(2):
url3 = product_html_all[0] + "&page={}".format(str(k + 2))
product_html2 = down_load2(url3)
for p in range(len(product_html2.xpath(
'//div[contains(@class,"pure-u-3-4")]//div[@class="normal-recipe-list"]//ul[@class="list"]/li'))):
# for i in range(1):
print("**************")
product_url = "http://www.xiachufang.com" + product_html2.xpath(
'//div[contains(@class,"pure-u-3-4")]//div[@class="normal-recipe-list"]//ul[@class="list"]/li[{}]//p[@class="name"]/a/@href'.format(
p + 1))[0]
print(product_url)
product_name = product_html2.xpath(
'//div[contains(@class,"pure-u-3-4")]//div[@class="normal-recipe-list"]//ul[@class="list"]/li[{}]//p[@class="name"]/a/text()'.format(
p + 1))[0].strip()
if product_html2.xpath(
'//div[contains(@class,"pure-u-3-4")]//div[@class="normal-recipe-list"]//ul[@class="list"]/li[{}]//p[@class="stats"]/span[1]/text()'.format(
p + 1)):
product_star = product_html2.xpath(
'//div[contains(@class,"pure-u-3-4")]//div[@class="normal-recipe-list"]//ul[@class="list"]/li[{}]//p[@class="stats"]/span[1]/text()'.format(
p + 1))[0]
else:
product_star="暂无"
if product_html2.xpath(
'//div[contains(@class,"pure-u-3-4")]//div[@class="normal-recipe-list"]//ul[@class="list"]/li[{}]//p[@class="stats"]/span[@class="bold score"]/text()'.format(
p + 1)):
product_make = product_html2.xpath(
'//div[contains(@class,"pure-u-3-4")]//div[@class="normal-recipe-list"]//ul[@class="list"]/li[{}]//p[@class="stats"]/span[@class="bold score"]/text()'.format(
p + 1))[0]
else:
product_make=0
product_author = product_html2.xpath(
'//div[contains(@class,"pure-u-3-4")]//div[@class="normal-recipe-list"]//ul[@class="list"]/li[{}]//p[@class="author"]/a[1]/text()'.format(
p + 1))[0]
product_author_url = "http://www.xiachufang.com" + product_html2.xpath(
'//div[contains(@class,"pure-u-3-4")]//div[@class="normal-recipe-list"]//ul[@class="list"]/li[{}]//p[@class="author"]/a[1]/@href'.format(
p + 1))[0]
person_html = down_load2(product_author_url)
person_info1 = person_html.xpath('//div[@class="gray-font"]/div[1]')[0]
person_info = person_info1.xpath('string(.)').strip().replace(" ", "").replace("\n", "")
print(person_info)
print(product_html2.xpath(
'//div[contains(@class,"pure-u-3-4")]//div[@class="normal-recipe-list"]//ul[@class="list"]/li[{}]//p[@class="name"]/a/@href'.format(
p + 1))[0])
print(product_name)
detil_html = down_load1(product_url)[1]
product_save = detil_html.xpath('//div[@class="pv"]/text()')[0]
product_content1 = detil_html.xpath('//div[contains(@class,"recipe-show")]')[0]
product_content = product_content1.xpath('string(.)').strip().replace(" ", "").replace("\n", "")
product_time = detil_html.xpath('//div[@class="time"]/span/text()')[0]
product_meke_all = \
detil_html.xpath('//div[contains(@class,"cooked")]/span[@class="number"]/text()')[0]
print(product_meke_all)
product_lists.append(
[key_word, product_author, person_info, product_name, product_url, product_content,
product_star,
product_meke_all, product_make, product_save, product_time])
# replys=""
#评论信息
# for j in range(len(detil_html.xpath('//div[contains(@class,"page-bottom-outer")]/div[@class="page-container"]//li'))):
# reply_name=detil_html.xpath('//div[contains(@class,"page-bottom-outer")]/div[@class="page-container"]//li[{}]//div[contains(@class,"info")]/a/text()'.format(j+1))[0]
# product_reply=detil_html.xpath('//div[contains(@class,"page-bottom-outer")]/div[@class="page-container"]//li[{}]//div[@class="right-bottom"]/text()'.format(j+1))[0]
# # replys+="用户名"+reply_name+"评论内容"+product_reply+"{{{{{}}}}}"
# reply_time1=detil_html.xpath('//div[contains(@class,"page-bottom-outer")]/div[@class="page-container"]//li[{}]//div[@role="question"]//div[contains(@class,"info")]/span/text()'.format(j+1))
# reply_time="".join(reply_time1)
# reply_url="http://www.xiachufang.com"+detil_html.xpath('//div[contains(@class,"page-bottom-outer")]/div[@class="page-container"]//li[{}]//div[contains(@class,"info")]/a/@href'.format(j+1))[0]
# person_reply_html = down_load2(reply_url)
# person_reply_info1 = person_reply_html.xpath('//div[@class="gray-font"]/div[1]')[0]
# person_reply_info = person_reply_info1.xpath('string(.)').strip().replace(" ","").replace("\n","")
# print(person_reply_info)
# print(key_word,product_author,person_info,product_name,product_url,product_content,product_star,product_meke_all,product_make,product_save,product_time,reply_name,person_reply_info,reply_time,product_reply)
# product_lists.append([key_word,product_author,person_info,product_name,product_url,product_content,product_star,product_meke_all,product_make,product_save,product_time,reply_name,person_reply_info,reply_time,product_reply])
except:
with open("下厨房61.csv", "w", encoding="utf-8-sig", newline="") as f:
k = csv.writer(f, dialect="excel")
k.writerow(["关键词", "作者", "作者信息", "产品名称", "产品链接","内容", "产品评分", "做过人数", "7天做过","产品收藏", "创建时间"])
for list in product_lists:
k.writerow(list)
pass
print(product_lists)
print(len(product_lists))
with open("下厨房62.csv", "w", encoding="utf-8-sig", newline="") as f:
k = csv.writer(f, dialect="excel")
k.writerow(["关键词", "作者", "作者信息", "产品名称", "产品链接","内容", "产品评分", "做过人数", "7天做过","产品收藏", "创建时间"])
# k.writerow(["关键词", "作者", "作者信息", "产品名称", "产品链接","内容", "产品评分", "做过人数", "7天做过","产品收藏", "创建时间", "评论人","评论者信息", "评论时间","评论"])
for list in product_lists:
k.writerow(list)
2.无评论版
import requests
import json
import csv
import pandas as pd
import time
lists_content=[]
word_pd=pd.read_csv(r"C:\Users\Lavector\Desktop\百事小红书\ele1.csv",engine='python',header=None).values.tolist()
print(word_pd)
for m in range(len(word_pd)):
key_word=word_pd[m][0]
print(key_word)
try:
for m in range(3):
url="https://mainsite-restapi.ele.me/shopping/v2/restaurants/search?extras[]=activities&search_item_type=3&is_rewrite=1&latitude=36.04272&longitude=103.871645&keyword={}&order_by=0&offset={}&limit=10&terminal=weapp".format(key_word,m*10)
headers={
"charset":"utf-8",
"Accept-Encoding":"gzip",
"referer":"https://servicewechat.com/wxece3a9a4c82f58c9/185/page-frame.html",
"x-shard":"loc=103.871645,36.04272",
"cookie":"SID=8mNKOGfiLGqBnp1xixcAE3TAtI2B2q6kheHg",
"content-type":"application/json",
"User-Agent":"Mozilla/5.0 (Linux; Android 7.1.1; OD103 Build/NMF26F; wv) AppleWebKit/537.36 (KHTML, like Gecko) Version/4.0 Chrome/62.0.3202.84 Mobile Safari/537.36 MicroMessenger/6.7.3.1360(0x26070333) NetType/WIFI Language/zh_CN Process/appbrand0",
"Host":"mainsite-restapi.ele.me",
"Connection":"Keep-Alive",
}
html=requests.get(url,headers=headers,verify=False).text
# time.sleep(2)
print(html)
html=json.loads(html)
restaurants=html['inside']["3"]["restaurant_with_foods"]
print(restaurants)
print(len(restaurants))
if len(restaurants) >=5:
for i in range(len(restaurants)):
shop_name=restaurants[i]["restaurant"]["name"]
shop_id=restaurants[i]["restaurant"]["id"]
shop_rate=restaurants[i]["restaurant"]["rating"]
shop_rely=restaurants[i]["restaurant"]["rating_count"]
shop_url=restaurants[i]["restaurant"]["scheme"]
shop_foods_name = ""
for j in range(len(restaurants[i]["foods"])):
shop_foods_name+=restaurants[i]["foods"][j]['name']+"{{{{{}}}}}"
print(shop_name,shop_id,shop_rate,shop_rely,shop_foods_name)
# url1="https://h5.ele.me/pizza/ugc/restaurants/{}/batch_comments?has_content=true&offset=0&limit=20".format(shop_id)
# headers1={
# "Host": "h5.ele.me",
# "Connection": "keep-alive",
# "User-Agent": "Mozilla/5.0 (Linux; Android 6.0.1; MuMu Build/V417IR; wv) AppleWebKit/537.36 (KHTML, like Gecko) Version/4.0 Chrome/52.0.2743.100 Mobile Safari/537.36 MicroMessenger/6.6.7.1321(0x26060736) NetType/WIFI Language/zh_CN MicroMessenger/6.6.7.1321(0x26060736) NetType/WIFI Language/zh_CN miniProgram",
# "x-shard": "shopid={};loc=104.076776,30.653985".format(shop_id),
# "Accept": "*/*",
# "Referer": "https://h5.ele.me/shop/",
# "Accept-Encoding": "gzip, deflate",
# "Accept-Language": "zh-CN,en-US;q=0.8",
# "Cookie": "perf_ssid=xyyvoble4waynps09k99ll8lm3v41r98_2018-12-25; ubt_ssid=72i7evuki0294ufdmz7g367ec1e8d7ti_2018-12-25; cna=tPWoFBQiUSACAT2U8+AvSuXB; _utrace=f581d37d6d2894dab36eab4df5928444_2018-12-25; track_id=1547709004|9f932c41c9ed5855c61245cb12271e13939951ec221730de18|6640aee9e0134517080d9b27b4ececac; isg=BAkJYN4isfjpBE1uHJjuBGvUEzej_vzJCobL2at-hfAv8ikE86YNWPcDMBBunpXA; SID=f0n218703hEV9kxyinpe6SMQTFrFhaXcLEMQ; USERID=3561782466",
# "X-Requested-With": "com.tencent.mm",
# }
# html1=requests.get(url1,headers=headers1,verify=False).text
# html1=json.loads(html1)
# for k in range(len(html1['comments'])):
# reply_username=html1['comments'][k]['username']
# reply_rate=html1['comments'][k]['rating']
# reply_time=html1['comments'][k]['rated_at']
# reply_text=html1['comments'][k]['rating_text']
# reply_buy=""
# for l in range(len(html1['comments'][k]['food_ratings'])):
# reply_buy+=html1['comments'][k]['food_ratings'][l]['rate_name']
# print(key_word,"国贸",shop_name,shop_url,shop_rely,shop_foods_name,reply_username,reply_buy,reply_rate,reply_time,reply_text)
lists_content.append([key_word,"兰州",shop_name,shop_url,shop_rely,shop_foods_name])
else:
for i in range(len(restaurants)):
shop_name=restaurants[i]["restaurant"]["name"]
shop_id=restaurants[i]["restaurant"]["id"]
shop_rate=restaurants[i]["restaurant"]["rating"]
shop_rely=restaurants[i]["restaurant"]["rating_count"]
shop_url=restaurants[i]["restaurant"]["scheme"]
shop_foods_name = ""
for j in range(len(restaurants[i]["foods"])):
shop_foods_name+=restaurants[i]["foods"][j]['name']+"{{{{{}}}}}"
print(shop_name,shop_id,shop_rate,shop_rely,shop_foods_name)
# url1="https://h5.ele.me/pizza/ugc/restaurants/{}/batch_comments?has_content=true&offset=0&limit=20".format(shop_id)
# headers1={
# "Host": "h5.ele.me",
# "Connection": "keep-alive",
# "User-Agent": "Mozilla/5.0 (Linux; Android 6.0.1; MuMu Build/V417IR; wv) AppleWebKit/537.36 (KHTML, like Gecko) Version/4.0 Chrome/52.0.2743.100 Mobile Safari/537.36 MicroMessenger/6.6.7.1321(0x26060736) NetType/WIFI Language/zh_CN MicroMessenger/6.6.7.1321(0x26060736) NetType/WIFI Language/zh_CN miniProgram",
# "x-shard": "shopid={};loc=104.076776,30.653985".format(shop_id),
# "Accept": "*/*",
# "Referer": "https://h5.ele.me/shop/",
# "Accept-Encoding": "gzip, deflate",
# "Accept-Language": "zh-CN,en-US;q=0.8",
# "Cookie": "perf_ssid=xyyvoble4waynps09k99ll8lm3v41r98_2018-12-25; ubt_ssid=72i7evuki0294ufdmz7g367ec1e8d7ti_2018-12-25; cna=tPWoFBQiUSACAT2U8+AvSuXB; _utrace=f581d37d6d2894dab36eab4df5928444_2018-12-25; track_id=1547709004|9f932c41c9ed5855c61245cb12271e13939951ec221730de18|6640aee9e0134517080d9b27b4ececac; isg=BAkJYN4isfjpBE1uHJjuBGvUEzej_vzJCobL2at-hfAv8ikE86YNWPcDMBBunpXA; SID=f0n218703hEV9kxyinpe6SMQTFrFhaXcLEMQ; USERID=3561782466",
# "X-Requested-With": "com.tencent.mm",
# }
# html1=requests.get(url1,headers=headers1,verify=False).text
# html1=json.loads(html1)
# for k in range(len(html1['comments'])):
# reply_username=html1['comments'][k]['username']
# reply_rate=html1['comments'][k]['rating']
# reply_time=html1['comments'][k]['rated_at']
# reply_text=html1['comments'][k]['rating_text']
# reply_buy=""
# for l in range(len(html1['comments'][k]['food_ratings'])):
# reply_buy+=html1['comments'][k]['food_ratings'][l]['rate_name']
# print(key_word,"国贸",shop_name,shop_url,shop_rely,shop_foods_name,reply_username,reply_buy,reply_rate,reply_time,reply_text)
lists_content.append([key_word,"兰州",shop_name,shop_url,shop_rely,shop_foods_name])
# pass
except:
with open("佳吉兰州3.csv", "w", encoding="utf-8-sig", newline="") as f:
k = csv.writer(f, dialect="excel")
k.writerow(["关键词", "地点","店铺名称", "店铺链接", "店铺评论数", "热销产品"])
for list in lists_content:
k.writerow(list)
print(lists_content)
with open("佳吉兰州5.csv", "w", encoding="utf-8-sig", newline="") as f:
k = csv.writer(f, dialect="excel")
k.writerow(["关键词", "地点","店铺名称", "店铺链接", "店铺评论数", "热销产品"])
for list in lists_content:
k.writerow(list)