爬取大众点评网的酒店信息

输入城市的拼音,就能爬取大众点评上面该城市酒店的信息,将数据写入 csv 文件。

不完善点:

  1. 只能输入拼音,当然可以下载第三方库 pinyin 进行转换。
  2. 未对输入的城市进行判断 。
    代码如下:
import requests
from lxml import etree
import csv
import re

class DPHotel():
	
	def __init__(self,city):
		
		self.city = city
		self.base_url = "http://www.dianping.com/{city}/hotel/n10p{page}"
		self.headers = {"User-Agent":r"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/69.0.3497.100 Safari/537.36" }
		self.page = int(self.get_response().xpath('//div[@class="page"]//a[@rel="nofollow"]//text()')[-2])
		self.writer = self.create_file()
	
	def get_response(self,page=1):
		
		response = requests.get(self.base_url.format(city=self.city,page=page), headers=self.headers).text
		response = etree.HTML(response)
		
		return response
	
	def create_file(self):
		
		file = open('{city}.csv'.format(city=self.city),'w',newline='')
		fieldnames = ['name','bottom-price','rank','place','tags']
		writer = csv.DictWriter(file,fieldnames=fieldnames)	
		writer.writeheader()
		return writer
	
	def get_info(self):
		
		for page in range(1,self.page+1):
			response = self.get_response(page)
			data = response.xpath('//div[@class="list-wrapper"]//div[@class="content"]//ul[@class="hotelshop-list"]//li[@class="hotel-block"]')

			for hotel in data:
				info={}
				info['name'] = hotel.xpath('.//div[1]//div[1]//h2//a[1]/text()')[0]
				info['place'] = ''.join(hotel.xpath('.//p[@class="place"]//text()')).strip()
				info['tags'] = '、'.join(hotel.xpath('.//p[@class="hotel-tags"]//text()'))
				info['bottom-price'] = hotel.xpath('.//div[@class="price"]/p//text()')[2]
				info['rank'] = '.'.join(re.findall('[0-9]+',hotel.xpath('.//div[@class="item-rank-ctn"]/span/@class')[0])[0])
			
				self.writer.writerow(info)
			

if __name__ == '__main__':
	
	city = input('请输入要爬取的城市(拼音): ')
	hotel = DPHotel(city)
	hotel.get_info()

ps:代码比较简单,没有写注释

你可能感兴趣的:(爬虫)