#!/usr/bin/env python
# -*- coding:utf-8 -*-
# Author : zhibo.wang
# E-mail : [email protected]
# Desc : ofo
import time
import random
import socket
import hashlib
import datetime
import threading
import numpy as np
from more_itertools import chunked
from requests_toolbelt import MultipartEncoder
class Crawler:
oss = OSS2()
W = Weixin()
def __init__(self):
#self.city_code = [131, 289, 257, 340]
# self.city_code = {"citycode": 257, "cityname": "广州市"} # 城市code
self.city_code = {"citycode": 131, "cityname": "北京市"} # 城市code
self.timeout = 10 # 超时时间
self.offset = 0.0022 # 平移量
self.indexs = None
self.db = mongo_con_keepalive()
self.start_time = datetime.datetime.now()
self.url = "https://san.ofo.so/ofo/Api/nearbyofoCar"
self.wait_time = [0.9, 1, 1.1, 1.2, 1.3] # 间隔时间
# 用户token,可用抓包工具抓取
self.keys = [{ "Content-Type":"multipart/form-data; boundary=--------FormDataxxx", "boundary": "--------FormDataxxx"},
{
"Content-Type":"multipart/form-data; boundary=--------FormDataxxx",
"boundary": "--------FormDataxxx"},
]
self.headers = {
"Accept": "*/*",
"Host": "san.ofo.so",
"Accept-Language": "zh-CN",
"Origin": "https://common.ofo.so",
"Accept-Encoding": "gzip, deflate",
"Referer": "https://common.ofo.so/newdist/?Journey",
"User-Agent":"Mozilla/5.0 (iPhone; CPU iPhone OS 11_2_1 like Mac OS X) AppleWebKit/604.4.7 (KHTML, like Gecko) Mobile/15C153 MicroMessenger/6.6.0 NetType/WIFI Language/zh_CN"
}
def request(self, lat, lng, gridid):
# 获取相关 数据
key = random.choice(self.keys)
fields={
"token": key["token"],
"lat": lat,
"lng": lng,
"source": "-5",
}
multipart_encoder = MultipartEncoder(fields=fields,
boundary=key["boundary"])
headers = self.headers
headers["Content-Type"] = key["Content-Type"]
date = datetime.datetime.now()
response = requests.post(url=self.url, headers=headers, data=multipart_encoder, timeout=self.timeout)
try:
data = response.json()
if data["errorCode"] == 200:
if len(data["values"]["info"]["cars"]) != 0:
file_name = self.create_file_name("{0},{1}".format(lat, lng), ".json")
data["center_lng"], data["center_lat"] = float(lng), float(lat)
data["citycode"] = self.city_code["citycode"]
data["cityname"] = self.city_code["cityname"]
data["gridid"] = gridid
else:
print(data)
except Exception as e:
print("request error: ", e)
def get_city_gridid(self, gridid_data, db):
# 根据偏移量生成 坐标
for i in gridid_data:
gridid = i["gridid"]
print("gridid: ", gridid)
try:
# 转换坐标
left_lng, top_lat = bd09togcj02(i["left_lng"], i["top_lat"])
right_lng, bottom_lat = bd09togcj02(i["right_lng"], i["bottom_lat"])
lat_range = np.arange(float(bottom_lat), float(top_lat), self.offset)[1:]
for lat in lat_range:
lng_range = np.arange(float(left_lng), float(right_lng), self.offset)[1:]
for lng in lng_range:
self.request(str(lat), str(lng), gridid)
time.sleep(random.choice(self.wait_time))
except Exception as e:
print("get_city_gridid error:", i, e)
def start(self):
all_data = self.db.get_collection("active_grids").find({"citycode": self.city_code["citycode"]}, no_cursor_timeout=True)
print("count: ", all_data.count())
all_data_list = list(chunked(list(all_data), int(all_data.count()/len(self.keys))))
p = []
for i in range(0, len(all_data_list)):
t = threading.Thread(target=self.get_city_gridid, args=(all_data_list[i], self.db))
p.append(t)
for x in p:
x.start()
for x in p:
x.join()
if __name__ == "__main__":
c = Crawler()
c.start()
{ "carno" : "EXxvn8",
"ordernum" : "",
"userIdLast" : "1",
"lng" : 113.24468731813714,
"lat" : 23.273194605097277,
"Time" : "2018-03-27 19:37:16",
"recordBatchNo" : "19"}