import base64
import datetime
import zlib
from urllib.parse import urlencode
import requests
import time
import pymysql
db = pymysql.connect(host="xxxx", port=xxxx, user="xxxx", password="xxxx",\
db="xxxx", charset="utf8")
cursor = db.cursor()
base_url = 'https://apimeishi.meituan.com/meishi/filter/v6/deal/select?'
headers = {
'user-agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) '
'Chrome/75.0.3770.90 Safari/537.36',
}
meishi_dict = {
'36': '小吃快餐',
'11': '面包甜点',
'21329': '饮品店',
'57': '粤菜',
'17': '火锅',
'20097': '生日蛋糕',
'55': '川湘菜',
'35': '西餐',
'28': '日韩料理',
'54': '烧烤烤串',
'395': '聚餐宴请',
'20059': '日本菜',
'20004': '香锅烤鱼',
'20638': '生鲜蔬果',
'400': '中式烧烤/烤串',
'21404': '特色菜'
}
def save_to_mongo(product):
# print(product.get('avgScore'))
try:
values = ', '.join(['%s'] * len(product))
sql = 'INSERT INTO meishi(front_img, shop_name, avg_score, food_cate_name, area_name, avg_price, poi_id, cate_name) VALUES ({values})'.format(values=values)
# 执行sql语句
cursor.execute(sql, tuple(product.values()))
# # cursor.execute(sql)
# # 执行sql语句
db.commit()
except Exception as e:
print(e)
# 发生错误时回滚
db.rollback()
# try:
# db[MONGO_COLLECTION].insert(product)
# except Exception as e:
# print(e.args)
def url_change(url):
a = []
for i in url:
url0 = pymysql.escape_string(i)
a.append(url0)
return a
def get_offset(offset, cate_id):
print('正在爬取第:' + str(offset) + '页')
params = {
'cateId': cate_id,
'areaId': '-1',
'sort': 'defaults',
'limit': '25',
'offset': offset, # offset
'utm_medium': 'WEIXINPROGRAM',
'userId': '599391734',
'uuid': 'C293FFD24758854B729512AA42F7D9E2C8F3B0541D4F5E73F5E751166F1907D9',
'token': encode_token(),
'version': '9.6.0',
'version_name': '9.6.0',
'utm_term': '0',
'utm_campaign': '0',
'cityId': '20',
'lat': '22.51595',
'lng': '113.3926',
'myLat': '22.51595',
'myLng': '113.3926',
'mypos': '22.51595%2C113.3926',
}
url = base_url + urlencode(params)
try:
requests.packages.urllib3.disable_warnings()
response = requests.get(url=url, headers=headers, verify=False)
if response.status_code == 200:
print(url)
return response.json()
except requests.ConnectionError as e:
print('Error', e.args)
def parse_offset(json, cateName):
if json:
items = json.get('data').get('poiList').get('poiInfos')
for item in items:
store = {
'frontImg': item.get('frontImg').split('%')[0], # 图片
'name': item.get('name'), # 店铺名
'avgScore': float(item.get('avgScore')), # 评分
'foodCateName': item.get('cateName'), # 类别
'areaName': item.get('areaName'), # 店铺地址
'avgPrice': float(item.get('avgPrice')), # 人均
'poiId': item.get('poiid'),
'cateName': cateName
}
yield store
def encode_token():
ts = int(datetime.datetime.now().timestamp() * 1000)
token_dict = {
'rId': 100900,
'ver': '1.0.6',
'ts': ts,
'cts': ts + 100 * 1000,
'brVD': [1010, 750],
'brR': [[1920, 1080], [1920, 1040], 24, 24],
'bI': ['https://gz.meituan.com/meishi/c11/', ''],
'mT': [],
'kT': [],
'aT': [],
'tT': [],
'aM': '',
'sign': 'eJwdjktOwzAQhu/ShXeJ4zYNKpIXqKtKFTsOMLUn6Yj4ofG4UjkM10CsOE3vgWH36df/2gAjnLwdlAPBBsYoR3J/hYD28f3z+PpUnmJEPqYa5UWEm0mlLBRqOSaP1qjEtFB849VeRXJ51nr56AOSVIi9S0E3LlfSzhitMix/mQwsrdWa7aTyCjInDk1mKu9nvOHauCQWq2rB/8laqd3cX+adv0zdzm3nbjTOdzCi69A/HQAHOOyHafMLmEtKXg=='
}
# 二进制编码
encode = str(token_dict).encode()
# 二进制压缩
compress = zlib.compress(encode)
# base64编码
b_encode = base64.b64encode(compress)
# 转为字符串
token = str(b_encode, encoding='utf-8')
return token
if __name__ == '__main__':
for offset in range(0, 300):
if offset % 5 == 0:
time.sleep(2)
print('暂时休眠一波')
for key in meishi_dict:
json = get_offset(offset, key)
results = parse_offset(json, meishi_dict[key])
for result in results:
save_to_mongo(result)