# -*- coding: utf-8 -*-
import codecs
import sys
reload(sys)
sys.setdefaultencoding('utf8') # 设置默认编码格式为'utf-8'
import json
import urllib2;
import xlrd
import re
import numpy as np
import io
import unicodecsv as ucsv
import requests
filename = "D:/a/lonlat02.csv"
def readExcel():
workbook = xlrd.open_workbook("D:/lonlat/industry_site_info.xlsx")
sheet = workbook.sheet_by_name("industry_site_info")
print(sheet.nrows)
Scrap_hotel_lists(sheet)
def Scrap_hotel_lists(sheet):
ids = []
Latitudes = []
longitudes = []
dzmcs = []
jdmcs = []
for i in range(1, sheet.nrows):
try:
print (i)
values = sheet.row_values(i)
dzmc = values[4]
id = values[0]
jdmc = values[3]
str3=getlnglat(dzmc)
lng=str3[0]
lat=str3[1]
gogleresult=getgoglelnglat(lng, lat)
goglelat=gogleresult[0]
goglelng=gogleresult[1]
comprehension=str3[2]
confidence=str3[3]
if comprehension>=80 and confidence>=75:
ids.append(id)
Latitudes.append(goglelat)
longitudes.append(goglelng)
dzmcs.append(dzmc)
jdmcs.append(jdmc)
else:
ids.append(id)
Latitudes.append('0')
longitudes.append('0')
dzmcs.append(dzmc)
jdmcs.append(jdmc)
except BaseException:
print('BaseException',dzmc)
print 'end!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!'
hotel_array = np.array((ids, Latitudes,longitudes,dzmcs,jdmcs)).T
list_header = ['id', 'Latitude','longitude','dzmc','jdmc']
array_header = np.array((list_header))
hotellists = np.vstack((array_header, hotel_array))
with io.open(filename, 'ab+') as f:
w = ucsv.writer(f, encoding='gbk')
w.writerows(hotellists)
def getlnglat(address):
#print address
url = 'http://api.map.baidu.com/geocoding/v3/'
output = 'json'
ak = '百度地图ak'
uri = url + '?' + 'address=' + address +'&city=珠海'+ '&output=' + output + '&ak=' + ak +'&callback=showLocation'
req = urllib2.urlopen(uri)
res = req.read().decode()
num = re.match(r'(.*?)\((.*?)\)(.*?)', res).group(2)
content=json.loads(num)
return content['result']['location']['lng'], content['result']['location']['lat'],content['result']['comprehension'],content['result']['confidence']
def getgoglelnglat(lng,lat):
lng = bytes(lng)
lat = bytes(lat)
url = 'http://www.gpsspg.com/apis/maps/geo/'
uri = url + '?' +'output=jsonp&lat=' + lat + '&lng=' + lng + '&type=2&callback=jQuery1102007221953702775341_1565922430294&_=1565922430295'
headers = {"Referer": "http://www.gpsspg.com/iframe/maps/qq_181109.htm?mapi=2","User-Agent":"Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/76.0.3809.100 Safari/537.36","Host":"www.gpsspg.com"}
response = requests.get(uri, headers=headers)
num = re.match(r'(.*?)\((.*?)\)(.*?)', response.content).group(2)
content = json.loads(num)
return content['result'][0]['lat'][0],content['result'][0]['lng'][0]
readExcel()