bilibili获取up主基本信息

-- coding: utf-8 --

import scrapy
import json
from scrapy_project.items import BilibiliItem

class BilibiliSpider(scrapy.Spider):
name = ‘bilibili’
allowed_domains = [‘bilibili.com’]
start_urls = ‘https://space.bilibili.com/ajax/member/GetInfo’
data_dict = {
‘mid’: ‘116683’,
‘csrf’: ”,
}
def start_requests(self):
return [scrapy.FormRequest(url = self.start_urls,formdata=self.data_dict,headers = {
‘Referer’: ‘https://space.bilibili.com/116683/‘,
‘User-Agent’: ‘Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/67.0.3396.87 Safari/537.36’
})]

def parse(self, response):
    # with open('bilibili.html','wb') as f:
    #     f.write(response.body)
    # print(response.body)
    res = json.loads(response.body.decode('utf-8'))
    ress = res['data']
    name = ress['name']
    sex = ress['sex']
    sign = ress['sign']

    item = BilibiliItem()
    item['name'] = name
    item['sex'] = sex
    item['sign'] = sign
    print(item)
    # 通过 yield 将item传递给 itempipeline
    yield item

你可能感兴趣的:(作业)