weixin_30642029

Python——Scrapy爬取链家网站所有房源信息

用scrapy爬取链家全国以上房源分类的信息：

路径：

items.py

# -*- coding: utf-8 -*-

# Define here the models for your scraped items
#
# See documentation in:
# https://doc.scrapy.org/en/latest/topics/items.html

import scrapy


class LianItem(scrapy.Item):
    # define the fields for your item here like:
    # name = scrapy.Field()
    pass


class ErShouFangItem(scrapy.Item):
    # 省份
    province = scrapy.Field()
    # 城市
    city = scrapy.Field()
    # 总价
    total_price = scrapy.Field()
    # 单价
    single_price = scrapy.Field()
    # 楼层
    room_info = scrapy.Field()
    # 住宅位置
    region = scrapy.Field()
    # 房屋朝向及装修情况
    direction = scrapy.Field()
    # 建筑面积
    area = scrapy.Field()
    # 建筑类型
    house_struct = scrapy.Field()
    # 房屋户型
    huxing = scrapy.Field()
    # 购买时间
    buy_time = scrapy.Field()
    # url
    ershou_detail_url = scrapy.Field()


class NewHouseItem(scrapy.Item):
    # 省份
    province = scrapy.Field()
    # 城市
    city = scrapy.Field()
    # 标题
    title = scrapy.Field()
    # 位置
    region = scrapy.Field()
    # 房屋信息
    room_info = scrapy.Field()
    # 建筑面积
    area = scrapy.Field()
    # 价格
    price = scrapy.Field()
    # 详情页
    newHouse_detail_url = scrapy.Field()


class RentHouseItem(scrapy.Item):
    # 省份
    province = scrapy.Field()
    # 城市
    city = scrapy.Field()
    # 标题
    title = scrapy.Field()
    # 价格
    price = scrapy.Field()
    # 房间信息（房源户型、朝向、面积、租赁方式）
    house_info = scrapy.Field()

    # 发布时间
    pub_time = scrapy.Field()
    # 入住：
    in_time = scrapy.Field()
    # 租期
    lease = scrapy.Field()
    # 楼层
    floor = scrapy.Field()
    # 电梯：
    lift = scrapy.Field()
    # 车位：
    carport = scrapy.Field()
    # 用水：
    use_water = scrapy.Field()
    # 用电：
    use_electricity = scrapy.Field()
    # 燃气：
    use_gas = scrapy.Field()
    # url
    rent_detail_url = scrapy.Field()

class OfficeHouseItem(scrapy.Item):
    # 省份
    province = scrapy.Field()
    # 城市
    city = scrapy.Field()
    # 标题
    title = scrapy.Field()
    # 价格
    price = scrapy.Field()
    # 数量
    num = scrapy.Field()
    # 面积
    area = scrapy.Field()
    # url
    office_detail_url = scrapy.Field()

class XiaoquHouseItem(scrapy.Item):
    # 省份
    province = scrapy.Field()
    # 城市
    city = scrapy.Field()
    # 标题
    title = scrapy.Field()
    # 地区
    region = scrapy.Field()
    # 单价
    single_price = scrapy.Field()
    # 建筑年代
    build_time = scrapy.Field()
    # 建筑类型
    house_struct = scrapy.Field()
    # 物业费用
    service_fees = scrapy.Field()
    # 物业公司
    service_company = scrapy.Field()
    # 开发商
    build_company = scrapy.Field()
    # 楼栋数
    building_nums = scrapy.Field()
    # 房屋总数
    house_nums = scrapy.Field()
    # url
    xiaoqu_detail_url = scrapy.Field()

View Code

pipelines.py

# -*- coding: utf-8 -*-

# Define your item pipelines here
#
# Don't forget to add your pipeline to the ITEM_PIPELINES setting
# See: https://doc.scrapy.org/en/latest/topics/item-pipeline.html
from scrapy.exporters import JsonLinesItemExporter
from lian.items import ErShouFangItem, NewHouseItem,RentHouseItem,OfficeHouseItem,XiaoquHouseItem  # 已经导入成功，不用管


class LianPipeline(object):
    def __init__(self):
        self.ershoufang_fp = open('ershoufang.json', 'wb')
        self.ershoufang_exporter = JsonLinesItemExporter(self.ershoufang_fp, ensure_ascii=False)

        self.newhouse_fp = open('newhouse.json', 'wb')
        self.newhouse_exporter = JsonLinesItemExporter(self.newhouse_fp, ensure_ascii=False)

        self.renthouse_fp = open('renthouse.json', 'wb')
        self.renthouse_exporter = JsonLinesItemExporter(self.renthouse_fp, ensure_ascii=False)

        self.officehouse_fp = open('officehouse.json', 'wb')
        self.officehouse_exporter = JsonLinesItemExporter(self.officehouse_fp, ensure_ascii=False)

        self.xiaoquhouse_fp = open('xiaoquhouse.json', 'wb')
        self.xiaoquhouse_exporter = JsonLinesItemExporter(self.xiaoquhouse_fp, ensure_ascii=False)

    def process_item(self, item, spider):
        if isinstance(item, ErShouFangItem):
            self.ershoufang_exporter.export_item(item)
        elif isinstance(item, NewHouseItem):
            self.newhouse_exporter.export_item(item)
        elif isinstance(item, RentHouseItem):
            self.renthouse_exporter.export_item(item)
        elif isinstance(item ,OfficeHouseItem):
            self.officehouse_exporter.export_item(item)
        else:
            self.xiaoquhouse_exporter.export_item(item)
        return item

    def close_spider(self, spider):
        self.ershoufang_fp.close()
        self.newhouse_fp.close()
        self.renthouse_fp.close()
        # self.officehouse_fp.closed()
        self.xiaoquhouse_fp.close()

View Code

lian_spider.py

# -*- coding: utf-8 -*-
import scrapy
import re
from lian.items import ErShouFangItem,NewHouseItem,RentHouseItem,OfficeHouseItem,XiaoquHouseItem # 已经导入成功，不用管
class LianSpiderSpider(scrapy.Spider):
    name = 'lian_spider'
    allowed_domains = ['lianjia.com']
    start_urls = ['https://www.lianjia.com/city/']
    headers = {
        'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/74.0.3729.131 Safari/537.36',
        'Cookie': 'select_city=510700; lianjia_uuid=8bd3d017-2c99-49a5-826e-986f56ce99b9; _smt_uid=5cd3cd13.44c49764; UM_distinctid=16a9b59145a158-0442ba7704d667-3b654406-c0000-16a9b59146011e; _jzqckmp=1; _ga=GA1.2.822868133.1557384475; _gid=GA1.2.801531476.1557384475; all-lj=ed5a77c9e9ec3809d0c1321ec78803ae; lianjia_ssid=50fd11a7-d48c-4dde-b281-287224c40487; TY_SESSION_ID=ae45e1a4-b6d9-46bb-81c8-7cff32931953; Hm_lvt_9152f8221cb6243a53c83b956842be8a=1557384618,1557389971,1557392984,1557446598; _jzqc=1; _jzqy=1.1557384468.1557446599.1.jzqsr=baidu|jzqct=%E9%93%BE%E5%AE%B6.-; _qzjc=1; sensorsdata2015jssdkcross=%7B%22distinct_id%22%3A%2216a9b5916632a6-01ac8dcdbbb8a7-3b654406-786432-16a9b59166452e%22%2C%22%24device_id%22%3A%2216a9b5916632a6-01ac8dcdbbb8a7-3b654406-786432-16a9b59166452e%22%2C%22props%22%3A%7B%22%24latest_traffic_source_type%22%3A%22%E7%9B%B4%E6%8E%A5%E6%B5%81%E9%87%8F%22%2C%22%24latest_referrer%22%3A%22%22%2C%22%24latest_referrer_host%22%3A%22%22%2C%22%24latest_search_keyword%22%3A%22%E6%9C%AA%E5%8F%96%E5%88%B0%E5%80%BC_%E7%9B%B4%E6%8E%A5%E6%89%93%E5%BC%80%22%7D%7D; _jzqa=1.1500973956232310800.1557384468.1557451920.1557454945.6; _jzqx=1.1557451920.1557454945.2.jzqsr=mianyang%2Elianjia%2Ecom|jzqct=/ershoufang/pag1/.jzqsr=mianyang%2Elianjia%2Ecom|jzqct=/ershoufang/; CNZZDATA1255604082=609852050-1557381958-https%253A%252F%252Fwww.baidu.com%252F%7C1557455869; CNZZDATA1254525948=1645681089-1557382543-https%253A%252F%252Fwww.baidu.com%252F%7C1557458144; CNZZDATA1255633284=262578687-1557381275-https%253A%252F%252Fwww.baidu.com%252F%7C1557458627; Hm_lpvt_9152f8221cb6243a53c83b956842be8a=1557459240; _qzja=1.677427564.1557384472885.1557451920228.1557454945305.1557459200351.1557459240226.0.0.0.62.6; _qzjb=1.1557454945305.13.0.0.0; _qzjto=33.3.0; _jzqb=1.13.10.1557454945.1'
    }

    # 每个城市
    def parse(self, response):
        lis = response.xpath('//div[@class="city_list_section"]/ul/li')
        city_links = []
        for li in lis:
            province = li.xpath('.//div[@class="city_list_tit c_b"]/text()').extract_first()
            # print(province)
            lis2 = li.xpath('.//div[@class="city_province"]/ul/li')
            city_info = {}
            for l in lis2:
                city_info['city'] = l.xpath('./a/text()').extract_first()
                city_info['city_link'] = l.xpath('./a/@href').extract_first()
                city_links.append(city_info)
                # print(city_info)
                yield scrapy.Request(
                    url=city_info['city_link'],
                    headers=self.headers,
                    callback=self.parse_rent_type,
                    meta={'city_name': (province,city_info['city'])}
                )
        # 爬取海外房源，所有城市标题上房有海外房源的信息均为一致，所以只爬取一次
        # yield scrapy.Request(
        #     url='https://i.lianjia.com/us',
        #     headers=self.headers,
        #     callback=self.parse_haiwai
        # )

    # 类型（二手房，新房，租房，商业办公，小区）
    def parse_rent_type(self, response):
        province,city_name = response.meta.get('city_name')
        lis = response.xpath('//div[@class="nav typeUserInfo"]/ul/li')
        for li in lis:
            type = li.xpath('./a/text()').extract_first()
            if type == '二手房':
                ershoufang_link = li.xpath('./a/@href').extract_first()
                # print("city：{}————————{}".format(city_name,ershoufang_link))
                next_urls = [ershoufang_link + '/pg{}/'.format(str(i)) for i in range(1, 101)]
                i = 0
                for url in next_urls:
                    i = i+1
                    yield scrapy.Request(
                        url=url,
                        headers=self.headers,
                        callback=self.parse_ershoufang,
                        meta={'city_name': (province,city_name,i)}
                    )
            # 不好找页码
            elif type == '新房':
                xinfang_link = li.xpath('./a/@href').extract_first()
                xinfang_link = xinfang_link + '/loupan/'
                yield scrapy.Request(
                    url=xinfang_link,
                    headers=self.headers,
                    callback=self.parse_xinfang,
                    meta={'city_name': (province,city_name)}
                )
            elif type == '租房':
                zufang_link = li.xpath('./a/@href').extract_first()
                next_urls = [zufang_link + '/pg{}/'.format(str(i)) for i in range(1, 101)]
                i = 0
                for url in next_urls:
                    i = i + 1
                    yield scrapy.Request(
                        url=url,
                        headers=self.headers,
                        callback=self.parse_zufang,
                        meta={'city_name': (url,province,city_name,i)}
                    )
            # 不好找页码
            elif type == '商业办公':

                #TODO 有一个重定向，只会爬取一页
                shangyebangong_link = li.xpath('./a/@href').extract_first()
                shangyebangong_link = str(shangyebangong_link)+"/xzl/rent/mlist"
                # print(province, city_name,shangyebangong_link)
                if shangyebangong_link == None:
                    continue
                yield scrapy.Request(
                    url=shangyebangong_link,
                    headers=self.headers,
                    callback=self.parse_shangyebangong,
                    meta={'city_name': (province,city_name)}
                )
            # 不好找页码
            elif type == '小区':
                xiaoqu_link = li.xpath('./a/@href').extract_first()
                yield scrapy.Request(
                    url=xiaoqu_link,
                    headers=self.headers,
                    callback=self.parse_xiaoqu,
                    meta={'city_name': (province,city_name)}
                )

    # 获取二手房主页item+
    def parse_ershoufang(self, response):
        province,city_name,i = response.meta.get('city_name')
        lis = response.xpath('//ul[@class="sellListContent"]/li')
        for li in lis:
            ershou_detail_link = li.xpath('.//div[@class="title"]/a/@href').extract_first()
            # 注意有的房屋信息为None
            if ershou_detail_link == None:
                continue
            # print("{}——————{}".format(city_name,ershou_detail_link))
            yield scrapy.Request(
                url=ershou_detail_link,
                headers=self.headers,
                callback=self.parse_ershoufang_detail,
                meta={'city_name': (ershou_detail_link,province,city_name,i)}
            )
    # 二手房item详情页
    def parse_ershoufang_detail(self, response):
        ershou_detail_link,province,city_name,i = response.meta.get('city_name')
        title = response.xpath('//div[@class="sellDetailHeader"]//div[@class="title"]/h1/text()').extract_first()
        # print("***第{}页*** 城市：{}   二手房   标题：{}".format(i,city_name, title))
        total_price = response.xpath('//div[@class="price "]/span[@class="total"]/text()').extract_first() + str(response.xpath('//div[@class="price "]/span[@class="unit"]/span/text()').extract_first()).strip()
        single_price = response.xpath('//span[@class="unitPriceValue"]/text()').extract_first() + str(response.xpath('//span[@class="unitPriceValue"]/i/text()').extract_first())
        room_info = response.xpath('//div[@class="room"]/div[1]/text()').extract_first() + '-' + response.xpath('//div[@class="room"]/div[2]/text()').extract_first()
        region = response.xpath('//div[@class="areaName"]/span[@class="info"]/a[1]/text()').extract_first() + '-' +  response.xpath('//div[@class="areaName"]/span[@class="info"]/a[2]/text()').extract_first()
        direction = response.xpath('//div[@class="type"]/div[1]/text()').extract_first() + '-' + response.xpath('//div[@class="type"]/div[2]/text()').extract_first()
        area = response.xpath('//div[@class="area"]/div[1]/text()').extract_first()
        house_struct = response.xpath('//div[@class="area"]/div[2]/text()').extract_first()
        huxing = response.xpath('//div[@class="introContent"]/div[1]/div[2]/ul/li[1]/text()').extract_first()
        buy_time = response.xpath('//div[@class="transaction"]/div[2]/ul/li[3]/span[2]/text()').extract_first()
        print("***第{}页*** 城市：{}   二手房   标题：{} 总价：{} 单价：{} 楼层：{} 住宅位置：{} 房屋朝向：{} 建筑面积：{} 建筑类型：{} 房屋户型：{} 购买时间：{}".format(i, city_name, title,total_price,single_price,room_info,region,direction,area,house_struct,huxing,buy_time))

        item = ErShouFangItem(
            province = province,
            city = city_name,
            total_price = total_price,
            single_price = single_price,
            room_info = room_info,
            region = region,
            direction = direction,
            area = area,
            house_struct = house_struct,
            huxing = huxing,
            buy_time = buy_time,
            ershou_detail_url = ershou_detail_link
        )
        yield item

    # 新房楼盘主页
    def parse_xinfang(self, response):
        province,city_name = response.meta.get('city_name')
        lis = response.xpath('//ul[@class="resblock-list-wrapper"]/li')
        for li in lis:
            title = li.xpath('./a[@class="resblock-img-wrapper "]/@title').extract_first()

            region_infos = li.xpath('.//div[@class="resblock-location"]//text()').extract()
            region = ''
            for i in region_infos:
                region = region + i.replace('\n', '').strip(' ')

            room_infos = li.xpath('.//a[@class="resblock-room"]/span//text()').extract()
            room_info = ''
            for i in room_infos:
                room_info = room_info + i.strip(' ')

            area_infos = li.xpath('.//div[@class="main-price"]/span//text()').extract()
            area = ''
            for i in area_infos:
                area = area + i.strip(' ')

            # 加上单位并去除首尾空格
            price = li.xpath('.//div[@class="main-price"]/span[1]/text()').extract_first() + str(li.xpath('.//div[@class="main-price"]/span[2]/text()').extract_first()).strip()

            newhouse_detail_url = 'https://bj.fang.lianjia.com'+str(li.xpath('./a[@class="resblock-img-wrapper "]/@href').extract_first())
            print("城市：{}   新房  {}  {}".format(city_name,title, newhouse_detail_url))
            item = NewHouseItem(
                province=province,
                city = city_name,
                title = title,
                region = region,
                room_info = room_info,
                area = area,
                price = price,
                newHouse_detail_url = newhouse_detail_url
            )
            yield item

    # 租房首页
    def parse_zufang(self, response):
        zufang_link, province, city_name, i = response.meta.get('city_name')
        # 去掉链接pg页码信息
        # print("去掉之前：{}".format(zufang_link))
        zufang_link = re.findall('(.*?)/zufang//pg\d+/',zufang_link)[0]
        items = response.xpath('//div[@class="content__list"]/div')
        for zu in items:
            zufang_detail_link = zufang_link + str(zu.xpath('./a[@class="content__list--item--aside"]/@href').extract_first())
            # 注意有的房屋信息为None
            if zufang_detail_link == None:
                continue
            # print("{}——————{}".format(city_name,zufang_detail_link))
            yield scrapy.Request(
                url=zufang_detail_link,
                headers=self.headers,
                callback=self.parse_zufang_detail,
                meta={'city_name': (zufang_detail_link,province,city_name,i)}
            )
    # 租房信息详情
    def parse_zufang_detail(self, response):
        zufang_detail_link, province, city_name, i = response.meta.get('city_name')
        title = response.xpath('//div[@class="content clear w1150"]/p/text()').extract_first()
        price = response.xpath('//div[@class="content__aside fr"]/p/span/text()').extract_first()
        house_infos = response.xpath('//ul[@class="content__aside__list"]/p//text()').extract()
        house_info = ''
        for i in house_infos:
            house_info = house_info + i.replace('\n','/').strip(' ')
        # 发布时间
        pub_time = str(response.xpath('string(//div[@class="content__subtitle"])').extract_first())
        pub_time = re.findall('\d{4}-\d{1,2}-\d{1,2}',pub_time)
        if pub_time:
            pub_time = pub_time[0]
        else:
            pub_time = None
        # 入住时间
        in_time = response.xpath('//div[@class="content__article__info"]/ul/li[3]/text()').extract_first()
        # 租期
        lease = response.xpath('//div[@class="content__article__info"]/ul/li[5]/text()').extract_first()
        # 楼层
        floor = response.xpath('//div[@class="content__article__info"]/ul/li[8]/text()').extract_first()
        # 是否有电梯
        lift = response.xpath('//div[@class="content__article__info"]/ul/li[9]/text()').extract_first()
        # 是否有停车位
        carport = response.xpath('//div[@class="content__article__info"]/ul/li[11]/text()').extract_first()
        use_water = response.xpath('//div[@class="content__article__info"]/ul/li[12]/text()').extract_first()
        use_electricity = response.xpath('//div[@class="content__article__info"]/ul/li[14]/text()').extract_first()
        use_gas = response.xpath('//div[@class="content__article__info"]/ul/li[15]/text()').extract_first()

        # print(" 城市：{}   租房   {} {} {} {} {} {} {}".format(city_name, lease,floor,lift,carport,use_water,use_electricity,use_gas))
        item = RentHouseItem(
            province = province,
            city = city_name,
            title = title,
            price = price,
            house_info = house_info,
            pub_time = pub_time,
            in_time = in_time,
            lease = lease,
            floor = floor,
            lift = lift,
            carport = carport,
            use_water = use_water,
            use_electricity = use_electricity,
            use_gas = use_gas,
            rent_detail_url = zufang_detail_link
        )
        yield item
        print("***第{}页*** 城市：{}   租房   {}   {}".format(i, city_name, title, price))

    # 海外房源信息
    # def parse_haiwai(self,response):
    #     items = response.xpath('//*[@id="env"]/div[4]/div/div[2]')
    #     for i in items:
    #         title = i.xpath('.//div[class="titles"]/a/div/text()').extract_first()
    #         price = i.xpath('.//span[@class="fr"]/text()').extract_first()
    #         print("城市：美国   标题：{}   价格：{}".format(title,price))

    # 商业办公主页item详情
    def parse_shangyebangong(self, response):
        province, city_name = response.meta.get('city_name')
        items = response.xpath('//div[@class="result__ul"]/a')
        for i in items:
            office_detail_url = response.xpath('./@href')
            title = i.xpath('./div/p[@class="result__li-title"]/text()').extract_first()
            area = i.xpath('./div/p[@class="result__li-features"]/text()').extract_first()
            nums = i.xpath('./div/p[@class="result__li-other"]/text()').extract_first()
            price = i.xpath('./div/p[@class="result__li-price"]/span/text()').extract_first()
            item = OfficeHouseItem(
                province = province,
                city = city_name,
                title = title,
                price = price,
                num = nums,
                area = area,
                office_detail_url = office_detail_url
            )
            yield item
            print("城市：{}   商业办公   标题：{}   面积：{}   数量：{}   价格：{}   url:{}".format(city_name, title, area, nums, price, office_detail_url))

    # 小区主页item
    def parse_xiaoqu(self, response):
        province,city_name = response.meta.get('city_name')
        ul = response.xpath('//ul[@class="listContent"]/li')
        for li in ul:
            xiaoqu_detail_link = li.xpath('.//a[@class="img"]/@href').extract_first()
            if xiaoqu_detail_link == None:
                continue
            yield scrapy.Request(
                url=xiaoqu_detail_link,
                headers=self.headers,
                callback=self.parse_xiaoqu_detail,
                meta={'city_name': (xiaoqu_detail_link,province,city_name)}
            )
    # 小区item详情
    def parse_xiaoqu_detail(self, response):
        xiaoqu_detail_link,province,city_name = response.meta.get('city_name')
        title = response.xpath('//h1[@class="detailTitle"]/text()').extract_first()
        region = response.xpath('//div[@class="detailDesc"]/text()').extract_first()
        single_price = response.xpath('//span[@class="xiaoquUnitPrice"]/text()').extract_first()

        # 注意有的房屋没有建成时间信息，影响后面值得获取，需要进行判断后准确取值
        build_time = str(response.xpath('//div[@class="xiaoquInfo"]/div[1]/span[2]/text()').extract_first()).strip()
        house_struct = None
        service_fees = None

        pattern = re.compile('[0-9]+')
        if pattern.findall(build_time):
            build_time = build_time
            house_struct = response.xpath('//div[@class="xiaoquInfo"]/div[2]/span[2]/text()').extract_first()
            service_fees = response.xpath('//div[@class="xiaoquInfo"]/div[3]/span[2]/text()').extract_first()
            service_company = response.xpath('//div[@class="xiaoquInfo"]/div[4]/span[2]/text()').extract_first()
            build_company = response.xpath('//div[@class="xiaoquInfo"]/div[5]/span[2]/text()').extract_first()
            building_nums = response.xpath('//div[@class="xiaoquInfo"]/div[6]/span[2]/text()').extract_first()
            house_nums = response.xpath('//div[@class="xiaoquInfo"]/div[7]/span[2]/text()').extract_first()
        else:
            build_time = None
            house_struct = response.xpath('//div[@class="xiaoquInfo"]/div[1]/span[2]/text()').extract_first()
            service_fees = response.xpath('//div[@class="xiaoquInfo"]/div[2]/span[2]/text()').extract_first()
            service_company = response.xpath('//div[@class="xiaoquInfo"]/div[3]/span[2]/text()').extract_first()
            build_company = response.xpath('//div[@class="xiaoquInfo"]/div[4]/span[2]/text()').extract_first()
            building_nums = response.xpath('//div[@class="xiaoquInfo"]/div[5]/span[2]/text()').extract_first()
            house_nums = response.xpath('//div[@class="xiaoquInfo"]/div[6]/span[2]/text()').extract_first()

        item = XiaoquHouseItem(
            province=province,
            city = city_name,
            title=title,
            region=region,
            single_price=single_price,
            build_time=build_time,
            house_struct=house_struct,
            service_fees=service_fees,
            service_company=service_company,
            build_company=build_company,
            building_nums=building_nums,
            house_nums=house_nums,
            xiaoqu_detail_url=xiaoqu_detail_link
        )
        yield item
        print("省份：{} 城市：{}   小区   {}   {}   {}   {}   {}   {}   {}".format(province, city_name, build_time,house_struct,service_fees,service_company,build_company,building_nums,house_nums))

View Code

settings.py

# -*- coding: utf-8 -*-

# Scrapy settings for lian project
#
# For simplicity, this file contains only settings considered important or
# commonly used. You can find more settings consulting the documentation:
#
#     https://doc.scrapy.org/en/latest/topics/settings.html
#     https://doc.scrapy.org/en/latest/topics/downloader-middleware.html
#     https://doc.scrapy.org/en/latest/topics/spider-middleware.html

BOT_NAME = 'lian'

SPIDER_MODULES = ['lian.spiders']
NEWSPIDER_MODULE = 'lian.spiders'

LOG_LEVEL = "WARNING"

# Crawl responsibly by identifying yourself (and your website) on the user-agent
USER_AGENT = 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/74.0.3729.157 Safari/537.36'

# Obey robots.txt rules
ROBOTSTXT_OBEY = True

DOWNLOAD_FAIL_ON_DATALOSS = False
# Configure maximum concurrent requests performed by Scrapy (default: 16)
#CONCURRENT_REQUESTS = 32

# Configure a delay for requests for the same website (default: 0)
# See https://doc.scrapy.org/en/latest/topics/settings.html#download-delay
# See also autothrottle settings and docs
#DOWNLOAD_DELAY = 3
# The download delay setting will honor only one of:
#CONCURRENT_REQUESTS_PER_DOMAIN = 16
#CONCURRENT_REQUESTS_PER_IP = 16

# Disable cookies (enabled by default)
#COOKIES_ENABLED = False

# Disable Telnet Console (enabled by default)
#TELNETCONSOLE_ENABLED = False

# Override the default request headers:
#DEFAULT_REQUEST_HEADERS = {
#   'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8',
#   'Accept-Language': 'en',
#}

# Enable or disable spider middlewares
# See https://doc.scrapy.org/en/latest/topics/spider-middleware.html
#SPIDER_MIDDLEWARES = {
#    'lian.middlewares.LianSpiderMiddleware': 543,
#}

# Enable or disable downloader middlewares
# See https://doc.scrapy.org/en/latest/topics/downloader-middleware.html
#DOWNLOADER_MIDDLEWARES = {
#    'lian.middlewares.LianDownloaderMiddleware': 543,
#}

# Enable or disable extensions
# See https://doc.scrapy.org/en/latest/topics/extensions.html
#EXTENSIONS = {
#    'scrapy.extensions.telnet.TelnetConsole': None,
#}

# Configure item pipelines
# See https://doc.scrapy.org/en/latest/topics/item-pipeline.html
ITEM_PIPELINES = {
   'lian.pipelines.LianPipeline': 300,
}

# Enable and configure the AutoThrottle extension (disabled by default)
# See https://doc.scrapy.org/en/latest/topics/autothrottle.html
#AUTOTHROTTLE_ENABLED = True
# The initial download delay
#AUTOTHROTTLE_START_DELAY = 5
# The maximum download delay to be set in case of high latencies
#AUTOTHROTTLE_MAX_DELAY = 60
# The average number of requests Scrapy should be sending in parallel to
# each remote server
#AUTOTHROTTLE_TARGET_CONCURRENCY = 1.0
# Enable showing throttling stats for every response received:
#AUTOTHROTTLE_DEBUG = False

# Enable and configure HTTP caching (disabled by default)
# See https://doc.scrapy.org/en/latest/topics/downloader-middleware.html#httpcache-middleware-settings
#HTTPCACHE_ENABLED = True
#HTTPCACHE_EXPIRATION_SECS = 0
#HTTPCACHE_DIR = 'httpcache'
#HTTPCACHE_IGNORE_HTTP_CODES = []
#HTTPCACHE_STORAGE = 'scrapy.extensions.httpcache.FilesystemCacheStorage'

View Code

结果：

转载于:https://www.cnblogs.com/Jery-9527/p/10875017.html

系统学习Python——并发模型和异步编程：进程、线程和GIL
分类目录：《系统学习Python》总目录在文章《并发模型和异步编程：基础知识》我们简单介绍了Python中的进程、线程和协程。本文就着重介绍Python中的进程、线程和GIL的关系。Python解释器的每个实例都是一个进程。使用multiprocessing或concurrent.futures库可以启动额外的Python进程。Python的subprocess库用于启动运行外部程序（不管使用何种
为什么会出现“与此站点的连接不安全”警告？
当浏览器弹出“与此站点的连接不安全”的红色警告时，不仅会让访客感到不安，还可能直接导致用户流失、品牌信誉受损，甚至引发数据泄露风险。作为网站运营者，如何快速解决这一问题？一、为什么会出现“与此站点的连接不安全”警告？浏览器提示“不安全连接”，本质上是检测到当前网站与用户之间的数据传输未经过加密保护。以下是触发警告的常见原因：1.未安装SSL证书SSL（SecureSocketsLayer）证书是网
Flask框架入门：快速搭建轻量级Python网页应用「已注销」 python-AI python基础网站网络 python flask 后端
转载：Flask框架入门：快速搭建轻量级Python网页应用1.Flask基础Flask是一个使用Python编写的轻量级Web应用框架。它的设计目标是让Web开发变得快速简单，同时保持应用的灵活性。Flask依赖于两个外部库：Werkzeug和Jinja2，Werkzeug作为WSGI工具包处理Web服务的底层细节，Jinja2作为模板引擎渲染模板。安装Flask非常简单，可以使用pip安装命令
求是网：“内卷式”竞争的突出表现和主要危害有哪些？加百力财经研究科技知识人工智能大数据
"内卷式"竞争主要表现为：企业层面的低价竞争、同质化竞争和营销"逐底竞争"；地方政府层面的违规优惠政策、盲目重复建设和设置市场壁垒。危害体现在三个层面：微观上导致"劣币驱逐良币"，损害消费者利益；中观上破坏行业生态，挤压产业链利润空间；宏观上扭曲资源配置，抑制创新活力。什么是“内卷式”竞争？概括其一般特征，是指经济主体为了维持市场地位或争夺有限市场，不断投入大量精力和资源，却没有带来整体收益增长的
Python Flask 框架入门：快速搭建 Web 应用的秘诀 Python编程之道 Python人工智能与大数据 Python编程之道 python flask 前端 ai
PythonFlask框架入门：快速搭建Web应用的秘诀关键词Flask、微框架、路由系统、Jinja2模板、请求处理、WSGI、Web开发摘要想快速用Python搭建一个灵活的Web应用？Flask作为“微框架”代表，凭借轻量、可扩展的特性，成为初学者和小型项目的首选。本文将从Flask的核心概念出发，结合生活化比喻、代码示例和实战案例，带你一步步掌握：如何用Flask搭建第一个Web应用？路由
上位机知识篇---SD卡&U盘镜像
常用的镜像烧录软件balenaEtcherbalenaEtcher是一个开源的、跨平台的工具，用于将操作系统镜像文件（如ISO和IMG文件）烧录到SD卡和USB驱动器中。以下是其使用方法、使用场景和使用注意事项的介绍：使用方法下载安装：根据自己的操作系统，从官方网站下载对应的安装包。Windows系统下载.exe文件后双击安装；Linux系统若下载的是.deb文件，可在终端执行“sudodpkg-
JavaScript 树形菜单总结 Auscy microsoft
树形菜单是前端开发中常见的交互组件，用于展示具有层级关系的数据（如文件目录、分类列表、组织架构等）。以下从核心概念、实现方式、常见功能及优化方向等方面进行总结。一、核心概念层级结构：数据以父子嵌套形式存在，如{id:1,children:[{id:2}]}。节点：树形结构的基本单元，包含自身信息及子节点（若有）。展开/折叠：子节点的显示与隐藏切换，是树形菜单的核心交互。递归渲染：因数据层级不固定，
python_虚拟环境阿_焦 python
第一、配置虚拟环境：virtualenv（1）pipvirtualenv>安装虚拟环境包（2）pipinstallvirtualenvwrapper-win>安装虚拟环境依赖包（3）c盘创建虚拟目录>C:\virtualenv>配置环境变量【了解一下】：（1）如何使用virtualenv创建虚拟环境a、cd到C:\virtualenv目录下：b、mkvirtualenvname>创建虚拟环境nam
全面触摸屏输入法设计与实现长野君
本文还有配套的精品资源，点击获取简介：触摸屏输入法是针对触摸设备优化的文字输入方案，包括虚拟键盘、手写、语音识别和手势等多种输入方式。本方案通过提供主程序文件、用户手册、界面截图、示例图、说明文本和音效文件，旨在为用户提供一个完整的、多样的文字输入体验。开发者通过持续优化算法和用户界面，使用户在无物理键盘环境下也能高效准确地进行文字输入。1.触摸屏输入法概述简介在现代信息技术飞速发展的今天，触摸屏
高效批量单词翻译工具的设计与应用
本文还有配套的精品资源，点击获取简介：在信息技术飞速发展的今天，批量单词翻译工具通过计算机的数据处理能力，大大提高了语言学习和文字处理的效率。用户通过简单输入单词列表到一个文本文件，并运行翻译程序，即可获得翻译结果并保存至指定文件。该工具集成了内置或外部翻译引擎，利用自然语言处理技术实现快速准确的翻译，并可能提供词性识别等附加功能。尽管机器翻译无法完全取代人工校对，但它为用户提供了一种高效的翻译解
FPGA小白到项目实战：Verilog+Vivado全流程通关指南（附光学类岗位技能映射）阿牛的药铺算法移植部署 fpga开发 verilog
FPGA小白到项目实战：Verilog+Vivado全流程通关指南（附光学类岗位技能映射）引言：为什么这个FPGA入门路线能帮你快速上岗？本文设计了一条**"Verilog语法→工具链操作→光学项目实战→岗位技能对标"的阶梯式学习路径。不同于泛泛而谈的FPGA教程，我们聚焦光学类产品开发**核心能力（时序接口设计、图像处理算法移植、高速接口应用），通过3个递进式项目（从LED闪烁到图像边缘检测），
Python爱心光波
系列文章序号直达链接Tkinter1Python李峋同款可写字版跳动的爱心2Python跳动的双爱心3Python蓝色跳动的爱心4Python动漫烟花5Python粒子烟花Turtle1Python满屏飘字2Python蓝色流星雨3Python金色流星雨4Python漂浮爱心5Python爱心光波①6Python爱心光波②7Python满天繁星8Python五彩气球9Python白色飘雪10Pyt
Python流星雨 Want595 python 开发语言
文章目录系列文章写在前面技术需求完整代码代码分析1.模块导入2.画布设置3.画笔设置4.颜色列表5.流星类(Star)6.流星对象创建7.主循环8.流星运动逻辑9.视觉效果10.总结写在后面系列文章序号直达链接表白系列1Python制作一个无法拒绝的表白界面2Python满屏飘字表白代码3Python无限弹窗满屏表白代码4Python李峋同款可写字版跳动的爱心5Python流星雨代码6Python
Android ViewBinding 使用与封装教程积跬步DEV Android 开发实战大全 android
AndroidViewBinding使用与封装教程：一、ViewBinding是什么？核心功能：为每个XML布局文件自动生成一个绑定类（如ActivityMainBinding），直接暴露所有带ID的视图引用。优点：避免繁琐的findViewById()，类型安全且编译时检查。对比DataBinding：ViewBinding仅处理视图引用，无数据绑定功能。DataBinding支持双向数据绑定，
基于链家网的二手房数据采集清洗与可视化分析 Mint_Datazzh 项目 selenium 网络爬虫
个人学习内容笔记，仅供参考。项目链接：https://gitee.com/rongwu651/lianjia原文链接：基于链家网的二手房数据采集清洗与可视化分析–笔墨云烟研究内容该课题的主要目的是通过将二手房网站上的存量与已销售房源，构建一个二手房市场行情情况与房源特点的可视化平台。该平台通过HTML架构和Echarts完成可视化的搭建。因此，该课题的主要研究内容就是如何利用相关技术设计并实现这样
Python之七彩花朵代码实现 PlutoZuo Python python 开发语言
Python之七彩花朵代码实现文章目录Python之七彩花朵代码实现下面是一个简单的使用Python的七彩花朵。这个示例只是一个简单的版本，没有很多高级功能，但它可以作为一个起点，你可以在此基础上添加更多功能。importturtleastuimportrandomasraimportmathtu.setup(1.0,1.0)t=tu.Pen()t.ht()colors=['red','skybl
Python 脚本最佳实践2025版
前文可以直接把这篇文章喂给AI,可以放到AI角色设定里,也可以直接作为提示词.这样,你只管提需求,写脚本就让AI来.概述追求简洁和清晰：脚本应简单明了。使用函数(functions)、常量(constants)和适当的导入(import)实践来有逻辑地组织你的Python脚本。使用枚举(enumerations)和数据类(dataclasses)等数据结构高效管理脚本状态。通过命令行参数增强交互性
《分片终章的哈希裂痕：藏在数据拼接里的隐形逻辑》前端
在大文件分片传输里，有一个令人费解的现象：当所有分片的校验都显示正常，拼接后的整体文件却与源文件的哈希值不符，而问题往往精准地指向最后一片。这并非偶然的技术故障，而是数据传输链条中多重隐形逻辑交织的必然结果，如同钟表的齿轮在最后一圈突然出现难以察觉的错位。文件被切割成固定大小的分片时，最后一片往往是规则的例外。它如同拼图中形状特异的收尾piece，尺寸可能小于其他分片，却承担着衔接整体的关键作用。
（Python基础篇）了解和使用分支结构 EternityArt 基础篇 python
目录一、引言二、Python分支结构的类型与语法（一）if语句（单分支）（二）if-else语句（双分支）（三）if-elif-else语句（多分支）三、分支结构的应用场景（一）提示用户输入用户名，然后再提示输入密码，如果用户名是“admin”并且密码是“88888”则提示正确，否则，如果用户名不是admin还提示用户用户名不存在,（二）提示用户输入用户名，然后再提示输入密码，如果用户名是“adm
（Python基础篇）循环结构 EternityArt 基础篇 python
一、什么是Python循环结构？循环结构是编程中重复执行代码块的机制。在Python中，循环允许你：1.迭代处理数据：遍历列表、字典、文件内容等。2.自动化重复任务：如批量处理数据、生成序列等。3.控制执行流程：根据条件决定是否继续或终止循环。二、为什么需要循环结构？假设你需要打印1到100的所有偶数：没有循环：需手动编写100行print()语句。print(0)print(2)print(4)
（Python基础篇）字典的操作 EternityArt 基础篇 python 开发语言
一、引言在Python编程中，字典（Dictionary）是一种极具灵活性的数据结构，它通过“键-值对”（key-valuepair）的形式存储数据，如同现实生活中的字典——通过“词语（键）”快速查找“释义（值）”。相较于列表和元组的有序索引访问，字典的优势在于基于键的快速查找，这使得它在处理需要频繁通过唯一标识获取数据的场景中极为高效。掌握字典的操作，能让我们更高效地组织和管理复杂数据，是Pyt
LeetCode算法题：电话号码的字母组合吱屋猪_ 算法 leetcode java
题目描述：给定一个仅包含数字2-9的字符串，返回所有它能表示的字母组合。答案可以按任意顺序返回。给出数字到字母的映射如下（与电话按键相同）。注意1不对应任何字母。2->"abc"3->"def"4->"ghi"5->"jkl"6->"mno"7->"pqrs"8->"tuv"9->"wxyz"例如，给定digits="23"，返回["ad","ae","af","bd","be","bf","cd
基于开源AI智能名片链动2+1模式与S2B2C商城小程序的渠道选择策略研究说私域人工智能小程序
摘要：在数字化商业环境下，品牌与产品的渠道选择对其市场推广和运营成功至关重要。本文聚焦于如何依据自身品牌和产品特性，结合开源AI智能名片链动2+1模式与S2B2C商城小程序，运用科学的渠道选择方法，慎重挑选1-2个适宜平台，集中资源发力并取得成绩后再拓展其他渠道。通过理论分析与案例研究，探讨该策略的有效性和可行性，为企业渠道布局提供参考。关键词：渠道选择；开源AI智能名片；链动2+1模式；S2B2
Python七彩花朵 Want595 python 开发语言
系列文章序号直达链接Tkinter1Python李峋同款可写字版跳动的爱心2Python跳动的双爱心3Python蓝色跳动的爱心4Python动漫烟花5Python粒子烟花Turtle1Python满屏飘字2Python蓝色流星雨3Python金色流星雨4Python漂浮爱心5Python爱心光波①6Python爱心光波②7Python满天繁星8Python五彩气球9Python白色飘雪10Pyt
洛谷 P11120 [ROIR 2024 Day 1] 登机题解殇之夜洛谷 c++c语言算法
Part0前言这种题一看就是签到题，也是特水，建议评红或橙。Part1思路就是先将已有位置先填对称，然后将剩余还未添加的乘客以对称方式填入。首先可以特判掉需要的位置大于空位的情况，直接输出Impossible。然后用数组记录.和X的位置，先遍历所有X的位置，然后看他的对称位置是否为空，若为空，则填入X，然后m--。最后若musingnamespacestd;chara[1010][10];stru
Redis Sentinel（哨兵）和 Redis Cluster（集群） G丶AEOM 八股普通学习区 Redis redis 数据库缓存
哨兵机制和集群有什么区别Redis集群主要有两种，一种是RedisSentinel哨兵集群，一种是RedisCluster。主从集群，包括一个Master和多个Slave节点，Master负责数据的读写，Slave负责数据的读取，Master上收到的数据变更会同步到Slave节点上实现数据同步，但不提供容错和恢复，在Master宕机时不会选出新的Master，导致后续客户端所有写请求直接失败。所以
CentOS7环境卸载MySQL5.7 Hadoop_Liang mysql 数据库 mysql
备份重要数据切记，卸载之前先备份mysql重要的数据。备份一个数据库例如：备份名为mydatabase的数据库到backup.sql的文件中mysqldump-uroot-ppassword123mydatabase>backup.sql备份所有数据库mysqldump-uroot-ppassword123--all-databases>all_databases_backup.sql注意：-p后
php SPOF 贵哥的编程之路(热爱分享为后来者) PHP语言经典程序100题 php 开发语言
1.什么是单点故障（SPOF）？单点故障指的是系统中某个组件一旦失效，整个系统或服务就会不可用。常见的单点有：数据库、缓存、Web服务器、负载均衡、网络设备等。2.常见单点故障场景只有一台数据库服务器，宕机后所有业务不可用只有一台Redis缓存，挂掉后缓存全部失效只有一台Web服务器，挂掉后网站无法访问只有一个负载均衡节点，挂掉后流量无法分发只有一条网络链路，断开后所有服务失联3.消除单点故障的主
用OpenCV标定相机内参应用示例（C++和Python）
下面是一个完整的使用OpenCV进行相机内参标定（CameraCalibration）的示例，包括C++和Python两个版本，基于棋盘格图案标定。一、目标：相机标定通过拍摄多张带有棋盘格图案的图像，估计相机的内参：相机矩阵（内参）K畸变系数distCoeffs可选外参（R,T）标定精度指标（如重投影误差）二、棋盘格参数设置（根据自己的棋盘格设置）：棋盘格角点数：9x6（内角点，9列×6行）；每个
Anaconda 详细下载与安装教程
Anaconda详细下载与安装教程1.简介Anaconda是一个用于科学计算的开源发行版，包含了Python和R的众多常用库。它还包括了conda包管理器，可以方便地安装、更新和管理各种软件包。2.下载Anaconda2.1访问官方网站首先，打开浏览器，访问Anaconda官方网站。2.2选择适合的版本在页面中，你会看到两个主要的下载选项：AnacondaIndividualEdition：适用于
多线程编程之理财周凡杨 java 多线程生产者消费者理财
现实生活中，我们一边工作，一边消费，正常情况下会把多余的钱存起来，比如存到余额宝，还可以多挣点钱，现在就有这个情况：我每月可以发工资20000万元（暂定每月的1号），每月消费5000（租房+生活费）元（暂定每月的1号），其中租金是大头占90%，交房租的方式可以选择（一月一交，两月一交、三月一交），理财：1万元存余额宝一天可以赚1元钱，
[Zookeeper学习笔记之三]Zookeeper会话超时机制 bit1129 zookeeper
首先，会话超时是由Zookeeper服务端通知客户端会话已经超时，客户端不能自行决定会话已经超时，不过客户端可以通过调用Zookeeper.close()主动的发起会话结束请求，如下的代码输出内容 Created /zoo-739160015 CONNECTEDCONNECTED .............CONNECTEDCONNECTED CONNECTEDCLOSEDCLOSED
SecureCRT快捷键 daizj secureCRT 快捷键
ctrl + a : 移动光标到行首ctrl + e ：移动光标到行尾crtl + b: 光标前移1个字符crtl + f: 光标后移1个字符crtl + h : 删除光标之前的一个字符ctrl + d ：删除光标之后的一个字符crtl + k ：删除光标到行尾所有字符crtl + u : 删除光标至行首所有字符crtl + w: 删除光标至行首
Java 子类与父类这间的转换周凡杨 java 父类与子类的转换
最近同事调的一个服务报错，查看后是日期之间转换出的问题。代码里是把 java.sql.Date 类型的对象强制转换为 java.sql.Timestamp 类型的对象。报java.lang.ClassCastException。代码：
可视化swing界面编辑朱辉辉33 eclipse swing
今天发现了一个WindowBuilder插件，功能好强大，啊哈哈，从此告别手动编辑swing界面代码，直接像VB那样编辑界面，代码会自动生成。首先在Eclipse中点击help，选择Install New Software,然后在Work with中输入WindowBui
web报表工具FineReport常用函数的用法总结（文本函数）老A不折腾 finereport web报表工具报表软件 java报表
文本函数 CHAR CHAR(number):根据指定数字返回对应的字符。CHAR函数可将计算机其他类型的数字代码转换为字符。 Number:用于指定字符的数字，介于1Number:用于指定字符的数字，介于165535之间（包括1和65535）。示例: CHAR(88)等于“X”。 CHAR(45)等于“-”。 CODE CODE(text):计算文本串中第一个字
mysql安装出错林鹤霄 mysql安装
[root@localhost ~]# rpm -ivh MySQL-server-5.5.24-1.linux2.6.x86_64.rpm Preparing... #####################
linux下编译libuv aigo libuv
下载最新版本的libuv源码，解压后执行： ./autogen.sh 这时会提醒找不到automake命令，通过一下命令执行安装（redhat系用yum，Debian系用apt-get）： # yum -y install automake # yum -y install libtool 如果提示错误：make: *** No targe
中国行政区数据及三级联动菜单 alxw4616
近期做项目需要三级联动菜单,上网查了半天竟然没有发现一个能直接用的! 呵呵,都要自己填数据....我了个去这东西麻烦就麻烦的数据上. 哎,自己没办法动手写吧. 现将这些数据共享出了,以方便大家.嗯,代码也可以直接使用文件说明 lib\area.sql -- 县及县以上行政区划分代码（截止2013年8月31日)来源：国家统计局发布时间：2014-01-17 15:0
哈夫曼加密文件百合不是茶哈夫曼压缩哈夫曼加密二叉树
在上一篇介绍过哈夫曼编码的基础知识,下面就直接介绍使用哈夫曼编码怎么来做文件加密或者压缩与解压的软件,对于新手来是有点难度的,主要还是要理清楚步骤; 加密步骤: 1,统计文件中字节出现的次数,作为权值 2,创建节点和哈夫曼树 3,得到每个子节点01串 4,使用哈夫曼编码表示每个字节
JDK1.5 Cyclicbarrier实例 bijian1013 java thread java多线程 Cyclicbarrier
CyclicBarrier类一个同步辅助类，它允许一组线程互相等待，直到到达某个公共屏障点 (common barrier point)。在涉及一组固定大小的线程的程序中，这些线程必须不时地互相等待，此时 CyclicBarrier 很有用。因为该 barrier 在释放等待线程后可以重用，所以称它为循环的 barrier。 CyclicBarrier支持一个可选的 Runnable 命令，
九项重要的职业规划 bijian1013 工作学习
一. 学习的步伐不停止古人说，活到老，学到老。终身学习应该是您的座右铭。世界在不断变化，每个人都在寻找各自的事业途径。您只有保证了足够的技能储
【Java范型四】范型方法 bit1129 java
范型参数不仅仅可以用于类型的声明上，例如 package com.tom.lang.generics; import java.util.List; public class Generics<T> { private T value; public Generics(T value) { this.value =
【Hadoop十三】HDFS Java API基本操作 bit1129 hadoop
package com.examples.hadoop; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.FSDataInputStream; import org.apache.hadoop.fs.FileStatus; import org.apache.hadoo
ua实现split字符串分隔 ronin47 lua split
LUA并不象其它许多"大而全"的语言那样，包括很多功能，比如网络通讯、图形界面等。但是LUA可以很容易地被扩展：由宿主语言(通常是C或 C++)提供这些功能，LUA可以使用它们，就像是本来就内置的功能一样。LUA只包括一个精简的核心和最基本的库。这使得LUA体积小、启动速度快，从而适合嵌入在别的程序里。因此在lua中并没有其他语言那样多的系统函数。习惯了其他语言的字符串分割函
java-从先序遍历和中序遍历重建二叉树 bylijinnan java
public class BuildTreePreOrderInOrder { /** * Build Binary Tree from PreOrder and InOrder * _______7______ / \ __10__ ___2 / \ / 4
openfire开发指南《连接和登陆》开窍的石头 openfire 开发指南 smack
第一步官网下载smack.jar包下载地址：http://www.igniterealtime.org/downloads/index.jsp#smack 第二步把smack里边的jar导入你新建的java项目中开始编写smack连接openfire代码 p
[移动通讯]手机后盖应该按需要能够随时开启 comsci 移动
看到新的手机，很多由金属材质做的外壳，内存和闪存容量越来越大，CPU速度越来越快，对于这些改进，我们非常高兴，也非常欢迎但是，对于手机的新设计，有几点我们也要注意第一：手机的后盖应该能够被用户自行取下来，手机的电池的可更换性应该是必须保留的设计,
20款国外知名的php开源cms系统 cuiyadll cms
内容管理系统，简称CMS，是一种简易的发布和管理新闻的程序。用户可以在后端管理系统中发布，编辑和删除文章，即使您不需要懂得HTML和其他脚本语言，这就是CMS的优点。在这里我决定介绍20款目前国外市面上最流行的开源的PHP内容管理系统，以便没有PHP知识的读者也可以通过国外内容管理系统建立自己的网站。 1. Wordpress WordPress的是一个功能强大且易于使用的内容管
Java生成全局唯一标识符 darrenzhu java uuid unique identifier id
How to generate a globally unique identifier in Java http://stackoverflow.com/questions/21536572/generate-unique-id-in-java-to-label-groups-of-related-entries-in-a-log http://stackoverflow
php安装模块检测是否已安装过, 使用的SQL语句 dcj3sjt126com sql
SHOW [FULL] TABLES [FROM db_name] [LIKE 'pattern'] SHOW TABLES列举了给定数据库中的非TEMPORARY表。您也可以使用mysqlshow db_name命令得到此清单。本命令也列举数据库中的其它视图。支持FULL修改符，这样SHOW FULL TABLES就可以显示第二个输出列。对于一个表，第二列的值为BASE T
5天学会一种 web 开发框架 dcj3sjt126com Web 框架 framework
web framework层出不穷，特别是ruby/python,各有10+个,php/java也是一大堆根据我自己的经验写了一个to do list,按照这个清单，一条一条的学习，事半功倍，很快就能掌握一共25条，即便很磨蹭，2小时也能搞定一条，25*2=50。只需要50小时就能掌握任意一种web框架各类web框架大同小异:现代web开发框架的6大元素，把握主线，就不会迷路建议把本文
Gson使用三(Map集合的处理,一对多处理) eksliang json gson Gson map Gson 集合处理
转载请出自出处：http://eksliang.iteye.com/blog/2175532 一、概述 Map保存的是键值对的形式，Json的格式也是键值对的，所以正常情况下，map跟json之间的转换应当是理所当然的事情。二、Map参考实例 package com.ickes.json; import java.lang.refl
cordova实现“再点击一次退出”效果 gundumw100 android
基本的写法如下： document.addEventListener("deviceready", onDeviceReady, false); function onDeviceReady() { //navigator.splashscreen.hide(); document.addEventListener("b
openldap configuration leaning note iwindyforest configuration
hostname // to display the computer name hostname <changed name> // to change go to: /etc/sysconfig/network, add/modify HOSTNAME=NEWNAME to change permenately dont forget to change /etc/hosts
Nullability and Objective-C 啸笑天 Objective-C
https://developer.apple.com/swift/blog/?id=25 http://www.cocoachina.com/ios/20150601/11989.html http://blog.csdn.net/zhangao0086/article/details/44409913 http://blog.sunnyxx
jsp中实现参数隐藏的两种方法 macroli JavaScript jsp
在一个JSP页面有一个链接，//确定是一个链接?点击弹出一个页面，需要传给这个页面一些参数。//正常的方法是设置弹出页面的src="***.do?p1=aaa&p2=bbb&p3=ccc"//确定目标URL是Action来处理?但是这样会在页面上看到传过来的参数，可能会不安全。要求实现src="***.do"，参数通过其他方法传！//////
Bootstrap A标签关闭modal并打开新的链接解决方案 qiaolevip 每天进步一点点学习永无止境 bootstrap 纵观千象
Bootstrap里面的js modal控件使用起来很方便，关闭也很简单。只需添加标签 data-dismiss="modal" 即可。可是偏偏有时候需要a标签既要关闭modal，有要打开新的链接，尝试多种方法未果。只好使用原始js来控制。 <a href="#/group-buy" class="btn bt
二维数组在Java和C中的区别流淚的芥末 java c 二维数组数组
Java代码： public class test03 { public static void main(String[] args) { int[][] a = {{1},{2,3},{4,5,6}}; System.out.println(a[0][1]); } } 运行结果： Exception in thread "mai
systemctl命令用法 wmlJava linux systemctl
对比表，以 apache / httpd 为例任务旧指令新指令使某服务自动启动 chkconfig --level 3 httpd on systemctl enable httpd.service 使某服务不自动启动 chkconfig --level 3 httpd off systemctl disable httpd.service 检查服务状态 service h

Python——Scrapy爬取链家网站所有房源信息

你可能感兴趣的:(Python——Scrapy爬取链家网站所有房源信息)