** ** 朋友需要帮忙获取资源信息,于是我利用周末时间,用python写了个工具从高德地图上抓取信息(包括位置,电话,经纬度等),得到了他想要的数据, 绝对可靠****
url = "http://restapi.amap.com/v3/place/text?&keywords=&types=" + self.cityType + "&city=" + self.cityID + "&citylimit=true&output=xml&offset=" + str(offset) + "&page="+ str(maxPage) + "&key=你的key&extensions=base"
if self.getHtml(url)==0:
print 'parsing page 1 ... ...'
#parse the xml file and get the total record number
totalRecord_str=self.parseXML()
totalRecord=string.atoi(str(totalRecord_str))
if (total_record%offset)!=0:
maxPage=totalRecord/offset+2
else:
maxPage=totalRecord/offset+1
print(totalRecord)
print(maxPage)
for pageIndex in range(1, maxPage + 1):
try:
url = "http://restapi.amap.com/v3/place/text?&keywords=&types=" + self.cityType + "&city=" + self.cityID + "&citylimit=true&output=xml&offset=" + str(offset) + "&page="+ str(pageIndex) + "&key=你的key&extensions=base"
# 请求的结构化url地址如上;请使用自己的key,见:http://lbs.amap.com/api/webservice/guide/api/search/
poiSoup = BeautifulSoup(urllib.urlopen(url).read(),"xml") #读入对应页码的页面
for tagIndex in range(len(poiTag)):
poiSoupTag[tagIndex] = poiSoup.findAll(poiTag[tagIndex]) # 根据Tag读对应页码的POI标签内容
for rowIndex in range(len(poiSoupTag[0])):
for colIndex in range(len(poiSoupTag)):
sheet.write(len(poiSoupTag[0]) * (pageIndex - 1) + rowIndex + 1, colIndex, re.findall(pattern,u" "+str(poiSoupTag[colIndex][rowIndex])))
# 根据正则表达式提取内容,并在对应行与列写入
except Exception as e:
print(e) # 设置错误输出
poiExcel.save("E:/POI&" + self.cityType + "&" + self.cityID + ".xls") # 保存
有需求的童鞋 扣扣联系 934012496