day5、scrapy有意思吧音乐

intrest.py

# -*- coding: utf-8 -*-
import scrapy
from u148.items import U148Item
#爬取 有意思吧网站的音乐
#http://www.u148.cn/music


class IntrestSpider(scrapy.Spider):
    name = 'intrest'
    allowed_domains = ['u148.cn']
    start_urls = ['http://www.u148.cn/music/']

    def parse(self, response):
        print("__________________________")
        m_list = response.xpath("//article[starts-with(@class,'ajaxpost box')]")
        # 遍历这些音乐
        for m in m_list:
            item = U148Item()
            item["title"] = m.xpath(".//h2/a/@title").extract()[0]
            item["img"] = m.xpath(".//img[@class='thumb']/@src").extract()[0]
            item["zhaiyao"] = m.xpath(".//div[@class='excerpt']/text()").extract()[0]
            item["author"] = m.xpath(".//span[@class='author']/a/text()").extract()[0]
            item["time"] = m.xpath(".//span[@class='date']/text()").extract()[0]
            yield item

items.py

import scrapy


class U148Item(scrapy.Item):
    title = scrapy.Field()
    img = scrapy.Field()
    zhaiyao = scrapy.Field()
    author = scrapy.Field()
    time = scrapy.Field()

pipelines.py

class U148Pipeline(object):
    def process_item(self, item, spider):
        return item

你可能感兴趣的:(day5、scrapy有意思吧音乐)