Python 爬虫 bs4 简用

import scrapy
from bs4 import BeautifulSoup


class BdSpider(scrapy.Spider):
    name = 'bd'
    allowed_domains = ['news.baidu.com']
    start_urls = ['http://news.baidu.com/ns?word=%E4%B9%A0%E8%BF%91%E5%B9%B3&tn=news&from=news&cl=2&rn=20&ct=1']

def parse(self, response):
    # print(response.body.decode())
    response = response.body
    response=BeautifulSoup(response,'lxml')
    newslist = response.select('div.result')
    for news in newslist:
        title = news.select('h3 a')[0].text.strip()
        print(title)

    pass

你可能感兴趣的:(Python 爬虫 bs4 简用)