爬虫通用MysqlItem

items.py
class GeneralMysqlItem(scrapy.Item):
    items = scrapy.Field()
    table_name = scrapy.Field()

    def __init__(self, table_name=None, *args, **kwargs):
        super(GeneralMysqlItem, self).__init__(*args, **kwargs)
        self['table_name'] = table_name

    def get_insert_sql(self, spider):
        keys = list(self['items'][0].keys())
        # keys.append('time_stamp')
        sql = '''
            {} into {}({})
            values {}
        '''.format(
            spider.custom_settings.get('MYSQL_INSERT_MODEL','insert ignore'),
            self['table_name'] or spider.custom_settings['TABLE_NAME'],
            ','.join(['`{}`'.format(key) for key in keys]),
            ','.join([('(' + ','.join(['%s' for i in range(len(keys))]) + ')') for j in range(len(self['items']))])
        )
        params = []
        tmp = list(
            params.extend(
                self.format_sql_values(item, keys))
            for item in self['items']
        )
        del tmp
        return sql, params

    def format_sql_values(self, item, keys):
        tmp = [item[key] for key in keys]  # 有时间戳keys[:-1]
        # tmp.append(int(time.time()))
        return tmp

spider.py
class XxxSpider(scrapy.Spider):
     pass
    def parse(self, response):
        items = self.create_item_object()
        for item in []:
            items['items'].append(item)

        if items['items']:
            yield items

    def create_item_object(self, table_name=None):
        items = GeneralMysqlItem(table_name)
        items['items'] = []
        return items

你可能感兴趣的:(爬虫通用MysqlItem)