住哪儿网上面酒店信息爬虫源代码

import urllib.parse
import urllib.request
import json
import time
import pymysql.cursors
#from datetime import date, datetime, timedelta
def getjson(page):
    url = 'http://www.zhuna.cn/?m=search.getList'
    cityname = urllib.parse.quote('北京')
    page = str(page)
    uri = url + '&page=' + page + '&sort=0&distance=3' + '&ecityid=0101' + '&cityname=' + cityname + '&hid=&youhui=0&assure=0&lat=&lng=&mapbarname=&mapbarid=0&lsid='
   # print(uri)
    temp = urllib.request.urlopen(uri)
    temp = json.loads(temp.read().decode())
    return temp
config = {
          'host':'127.0.0.1',
          'port':3306,
          'user':'root',
          'password':'oaken',
          'db':'hotel',
          'charset':'utf8mb4',
          'cursorclass':pymysql.cursors.DictCursor,
          }
connection = pymysql.connect(**config)
with connection.cursor() as cursor:
    sqlCreate = 'CREATE TABLE hotel_'+time.strftime("%Y%m%d")+'(hotel_name varchar(100) not null,hotel_province varchar(20),hotel_price float,PRIMARY KEY ( hotel_name ));'
    cursor.execute(sqlCreate)
    connection.commit()
i = 1

while(1):
    try:
        result = getjson(i)
        i = i + 1
        time.sleep(5)
    except:
        break
    j = 0
    while(1):
        try:
            try:
                with connection.cursor() as cursor:
                    sql = 'INSERT INTO hotel_'+time.strftime("%Y%m%d")+' (hotel_name, hotel_province, hotel_price) VALUES (%s, %s, %s)'
                    cursor.execute(sql, (result['data']['hotels'][j]['hotelname'], '北京', float(result['data']['hotels'][j]['Min_Jiage'])))
                    connection.commit()
            except:
                print('***')
            print(result['data']['hotels'][j]['hotelname'],end = '  ')
            print(result['data']['hotels'][j]['Min_Jiage'],end = '  ')
            print(result['data']['hotels'][j]['comment_scores']['score'])
            j = j + 1
        except:
            break        
connection.close()
    
import urllib.parse
import urllib.request
import json
import time
import pymysql.cursors
#from datetime import date, datetime, timedelta
def getjson(page):
    url = 'http://www.zhuna.cn/index.php?m=hotel.comment'
    cityname = urllib.parse.quote('北京')
    h_id = str(14380)
    page = str(page)
    uri = url + '&id=' + h_id + '&type=' + '2' + '&page=' + page
   # print(uri)
    temp = urllib.request.urlopen(uri)
    temp = json.loads(temp.read().decode())
    return temp
config = {
          'host':'127.0.0.1',
          'port':3306,
          'user':'root',
          'password':'oaken',
          'db':'hotel',
          'charset':'utf8mb4',
          'cursorclass':pymysql.cursors.DictCursor,
          }
connection = pymysql.connect(**config)
with connection.cursor() as cursor:
    sqlCreate = 'CREATE TABLE hotel_comment_'+time.strftime("%Y%m%d")+'(comment_id varchar(20) not null,hotel_comment varchar(400),PRIMARY KEY ( comment_id ));'
    cursor.execute(sqlCreate)
    connection.commit()
i = 1
n = 1

while(1):

    try:
        result = getjson(i)
        i = i + 1
        time.sleep(5)
    except:
        break
    if(result['data']['pages'] < i):
        break
    j = 0
    while(1):
        try:
            try:
                with connection.cursor() as cursor:
                    sql = 'INSERT INTO hotel_comment_'+time.strftime("%Y%m%d")+' (comment_id, hotel_comment) VALUES (%s, %s)'
                    cursor.execute(sql, (str(n),result['data']['comment'][j]['df_content']))
                    n = n + 1
                    connection.commit()
            except:
                print('***')
            print(result['data']['comment'][j]['df_content'],end = '  ')
            j = j + 1
        except:
            break        
connection.close()
    

【注意】:mysql需要进行相关配置配置

 

你可能感兴趣的:(Python)