python去哪儿网的旅游景点信息

爬取过程分析:
1、网站url:‘https://piao.qunar.com/ticket/list.htm?keyword=北京&page=1’
2、http请求方法为get方法
3、用beautifulsoup提取所需要的信息
4、将爬取的信息存至本地

python去哪儿网的旅游景点信息_第1张图片
具体代码如下:

import requests
from bs4 import BeautifulSoup

class QuNaEr():
    def __init__(self, keyword, page=1):
        self.keyword = keyword
        self.page = page

    def qne_spider(self):
        url = 'https://piao.qunar.com/ticket/list.htm?keyword=%s&page=%s' % (self.keyword, self.page)
        response = requests.get(url)
        response.encoding = 'utf-8'
        text = response.text
        bs_obj = BeautifulSoup(text, 'html.parser')

        arr = bs_obj.find('div', {'class': 'result_list'}).contents
        with open('./qunaer/tour.csv','a') as f:
            for i in arr:
                info = i.attrs
                # 景区名称
                name = info.get('data-sight-name')
                print(name)
                # 地址
                address = info.get('data-address')
                # 近期售票数
                count = info.get('data-sale-count')
                # 经纬度
                point = info.get('data-point')

                # 起始价格
                try:
                    price = i.find('span', {'class': 'sight_item_price'})
                    price = price.find_all('em')
                    price = price[0].text

                    f.write('{},{},{},{},{}\n'.format(name,address,count,price,point))
                except Exception as e:
                    print(e)


if __name__ == '__main__':
    citys = ['北京', '上海', '成都', '三亚', '广州', '重庆', '深圳', '西安', '杭州', '厦门', '武汉', '大连', '苏州']
    with open('./qunaer/tour.csv', 'a') as f:
        f.write('{},{},{},{},{}\n'.format('景区名称', '地址', '售票数', ‘起始价格', '经纬度'))
    for i in citys:
        for page in range(1, 10):
            qne = QuNaEr(i, page=page)
            qne.qne_spider()

你可能感兴趣的:(python)