一个txt文档中每一行都保存有一个搜索词,需要在一个网站上输入搜索词并将需要的结果保存下来
思路:
headers可以使用fiddler获取
import urllib.request
import urllib.parse
from bs4 import BeautifulSoup
file = open('相似问题.txt', 'w', encoding='utf-8')
with open('question.txt', 'r', encoding='utf-8') as question_file:
while True:
lines = question_file.readline()
if not lines:
break
url = 'http://127.0.0.1:8000/ '
headers = {'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36'
' (KHTML, like Gecko) Chrome/67.0.3396.99 Safari/537.36',
'Referer': 'http://47.93.34.71:2021/',
'Accept': 'text/html,application/xhtml+xml,application/xml;'
'q=0.9,image/webp,image/apng,*/*;q=0.8',
'Accept-Encoding': 'gzip, deflate',
'Accept-Language': 'zh-CN,zh;q=0.9'
}
postdata = {'desc_q_by_user': lines, 'csrfmiddlewaretoken': 'I3SeW9DO0l289Kvtc2wke1v4zwcr6Rxj'}
data = urllib.parse.urlencode(postdata).encode(encoding='utf-8')
request = urllib.request.Request(url, data=data, headers=headers)
response = urllib.request.urlopen(request)
soup_string = BeautifulSoup(response, 'html.parser')
question_related = soup_string.find(attrs={'class': 't1'})
file.write(lines+'\n')
for question in question_related:
file.write(str.strip(question))
print(str.strip(question))
file.write('\n\n\n\n')
file.close()