1、去哪儿
# -*- coding:utf-8 -*-
import re
import json
import requests
import pandas as pd
date=[]
content=[]
for i in range(1,1000):
try:
print("正在抓取第"+str(i)+"页")
url="https://touch.piao.qunar.com/touch/queryCommentsAndTravelTips.json?type=mp&pageSize=10&fromType=SIGHT&pageNum="+str(i)+"&sightId=5759&tagType=0"
html=requests.get(url).text
html=json.loads(html)
data=html['data']
# print(data)
commentList=data['commentList']
# print(commentList)
for each in commentList:
# print(each)
content1=each['content']
txt = re.sub("♬ ", "", content1)
date1=each['date']
date.append(date1)
content.append(content1)
except:
pass
result=pd.DataFrame({'date:':date,'content':content})
result.to_csv('F:/qunaer.csv',index=False)
2、携程
# -*- coding:utf-8 -*-
import re
import requests
import json
date=[]
comment=[]
import pandas as pd
for i in range(1,130):
try:
print('正在抓取第'+str(i)+"页")
url="https://m.ctrip.com/restapi/soa2/10491/json/GetCommentListAndHotTagList?_fxpcqlniredt=09031014411533277785"
data1={
"BusinessId":"20485",
"BusinessType":"11",
"ChannelType":"7",
"CommentTagId":"0",
"ImageFilter":"false",
"PageIndex":int(i),
"PageSize":"10",
"PoiId":"0",
"SortType":"3",
"StarType":"0",
"TouristType":"0",
"VideoImageHeight":"392",
"VideoImageWidth":"700"
}
data2={
"auth":"null",
"cid":"09031014411533277785",
"ctok":"",
"cver":"1.0",
"lang":"01",
"sid":"8888",
"syscode":"09"
}
data3={
"lang":"01",
"sid":"8888",
"syscode":"09"
}
data={
"CommentResultInfoEntity":data1,
"head":data2
}
html=requests.post(url,data=json.dumps(data)).text
# print(html)
html=json.loads(html)
CommentResult=html['CommentResult']
# print(CommentResult)
CommentInfo=CommentResult['CommentInfo']
for each in CommentInfo:
# print(each)
Content=each['Content']
Content = re.sub("♬ ", "", Content)
print(Content)
PlayYear=each['PlayYear']
PlayMonth=each['PlayMonth']
PlayDay=each['PlayDay']
date1=str(PlayYear)+'-'+str(PlayMonth)
# print(str(PlayYear)+'-'+str(PlayMonth))
comment.append(Content)
date.append(date1)
except:
pass
result=pd.DataFrame({'date:':date,'content':comment})
result.to_csv('F:/xiecheng.csv',index=False)