import json
import jsonpath
import requests
import time,re
bese_url = "https://www.douyu.com/gapi/rkc/directory/0_0/{}"
head = {
"user-agent": "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/68.0.3440.106 Safari/537.36"
}
for i in range(0,100):
bese_url1 = bese_url.format(i)
dakai=requests.get(bese_url,headers = head)
yeshu=dakai.text
yeshu1 = json.loads(yeshu)
yeshu2 = jsonpath.jsonpath(yeshu1,"$..data.rl")[0]
for xx in yeshu2:
xx = str(xx)
print(xx)
mingzi = re.findall("'rn': '(.*?)',",xx)
mingzi1 = ''
for i in mingzi:
mingzi1+=i
renming = re.findall("'nn': '(.*?)',",xx)
renming1=""
for i in renming:
renming1+=i
fangjian = re.findall("'url': '/(.+?)',", xx)
fangjian1 = ''
for i in fangjian:
fangjian1+=i
print(fangjian1)
redu = re.findall("'ol':(.*?),",xx)
redu1 = ''
for i in redu:
redu1 +=i
fangjian1 = 'https://www.douyu.com/'+fangjian1
quanbu = mingzi1+','+renming1+','+fangjian1+','+redu1+'\n'
print(quanbu)
with open('E:/斗鱼/'+'斗鱼.csv','a+',encoding="utf-8-sig")as f:
f.write(quanbu)
time.sleep(5)
会写出一个Excel表格,存放在E:\斗鱼下(记得在E盘下创建一个“斗鱼”文件夹,没写创建文件夹的代码)