利用B站API爬取多个视频的播放信息

B站有个Api,专门用来反馈视频的播放数,硬币数等信息
最后在工程文件夹生成一个txt文件
在scrapy中,修改pipline,item,spider简单实现,这里直接贴出代码。

from lxml import etree
import json
import time
from tutoral.items import DmozItem

def next_url(c,rnum):
    c = c + 1
    if(c <= rnum):
        return c
    else:
        return None

class DmozSpider(scrapy.spiders.Spider):
    name = 'dmoz'
    allow_domains = ['bilibili.com']
    start_urls = [
        "https://api.bilibili.com/x/web-interface/archive/stat?aid=1"
    ]
    
    
    def __init__(self):
        self.c = 2


    def parse(self,response):
        #f = open("e:\da.txt","w+",encoding='utf8')
        a = json.loads(response.text)
        
        if(a['code'] == 0 ):
            item = DmozItem()
            item['aid'] = a['data']['aid']
            item['bvid'] = a['data']['bvid']
            item['view'] = a['data']['view']
            item['danmaku'] = a['data']['danmaku']
            item['reply'] = a['data']['reply']
            item['favorite'] = a['data']['favorite']
            item['coin'] = a['data']['coin']
            item['share'] = a['data']['share']
            item['like'] = a['data']['like']
            #f.write(str(a))
            #f.close()
            time.sleep(0.1)
            yield item
        
        
        self.c = self.c + 1
            
        
        
        yield scrapy.Request(url = "https://api.bilibili.com/x/web-interface/archive/stat?aid="+str(self.c))


    
    


import json

class TutoralPipeline(object):
    def process_item(self, item, spider):
        a = open('c,txt','a')
        content=json.dumps(dict(item))+'\n'
        a.write(content)
        a.close()
        
        
        return item

import scrapy


class DmozItem(scrapy.Item):
    # define the fields for your item here like:
    # name = scrapy.Field()
    aid = scrapy.Field()
    bvid = scrapy.Field()
    view = scrapy.Field()
    danmaku = scrapy.Field()
    reply = scrapy.Field()
    favorite = scrapy.Field()
    coin = scrapy.Field()
    share = scrapy.Field()
    like = scrapy.Field()
    

你可能感兴趣的:(利用B站API爬取多个视频的播放信息)