scrapy中解决中文乱码问题

场景一:输出到文件中
spiders/test.py

def parse(self, response):
    response.css('title::text').extract()[0] ######## 重要1

pipelines.py

import json
import codecs

class TestPipeline(object):

    def open_spider(self, spider):
        self.file = codecs.open('test.json', 'w', encoding='utf-8') ######### 重要2

    def close_spider(self, spider):
        self.file.close()

    def process_item(self, item, spider):
        line = json.dumps(dict(item), ensure_ascii=False) + "\n" ####### 重要3
        self.file.write(line)
        return item

场景二:入数据库MySQL
spiders/test.py

def parse(self, response):
    response.css('title::text').extract()[0].encode('utf-8') # 重要1

pipelines.py

import json
import codecs

class TestPipeline(object):

    def open_spider(self, spider):
        self.db = ......

    def close_spider(self, spider):
        self.db......

    def process_item(self, item, spider):
        sql = """insert into tmp1 (name) values (%s)"""
        self.write_db.execute(sql, (item['name'])) #####重要2

你可能感兴趣的:(python)