爬虫笔记

https://www.51zxw.net/study.asp?vip=20451602 学习网址

import scrapy
from scrapy.linkextractors import LinkExtractor
import requests,random,re
from myspoder.items import MyspoderItem


class MusicspiderSpider(scrapy.Spider):
    name = 'music'
    #allowed_domains = ['htqyy.com']
    #start_urls = ['http://www.htqyy.com/top']
    def start_requests(self):
            yield scrapy.Request("http://www.htqyy.com/top/musicList/hot?pageIndex=0&pageSize=20",
                          headers={
                              'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_8_3) AppleWebKit/536.5 (KHTML, like Gecko) Chrome/19.0.1084.54 Safari/536.5',
                                "Referer": 'http://www.htqyy.com/'})

    def parse(self, response):
        #filename="music.html"
        data=response.body.decode() #获取响应内容
        #open(filename,"wb").write(data) #写入到本地
      

你可能感兴趣的:(python,笔记)