爬取斗鱼所有房间及直播源

# -*- coding: utf-8 -*-
from douyu import main
import requests
import json,sys
import random



reload(sys)
sys.setdefaultencoding("utf-8")

Max=165#斗鱼页数



def createRandomString(len):
    print ('wet'.center(10,'*'))
    raw = ""
    range1 = range(58, 65) # between 0~9 and A~Z
    range2 = range(91, 97) # between A~Z and a~z

    i = 0
    while i < len:
        seed = random.randint(48, 122)
        if ((seed in range1) or (seed in range2)):
            continue;
        raw += chr(seed);
        i += 1
    return raw



def getNumber():
    p = 0
    urls = ['https://www.douyu.com/gapi/rkc/directory/0_0/{}'.format(page) for page in range(1, Max)]
    fp=open("douyu_"+createRandomString(4)+".txt","w")
    fp.write("「斗鱼」\n")
    for url in urls:
        res = requests.get(url)
        j = json.loads(res.text)
        l1 = j['data']
        l2 = l1['rl']
        p = p+1
        fp.write("==第%d页==\n"% p)
        for i in range(len(l2)):
            Anchor = l2[i]['nn']            
            RoomNumber = l2[i]['rid']        
            print Anchor+","+main(RoomNumber)+"\n"
            if not "未开播" in main(RoomNumber):
                fp.write(Anchor+","+main(RoomNumber)+"\n")

    fp.write("\n")
    fp.close()
    print(u'斗鱼房间数据已保存')
    
getNumber()

爬取斗鱼所有房间及直播源_第1张图片
爬取斗鱼所有房间及直播源_第2张图片
cmd加入显示直播源;
txt加入分页,每120条数据为一页,最后一页除外;

main模块请看上一篇

你可能感兴趣的:(python,爬虫)