https://lbs.amap.com/
服务示例
https://restapi.amap.com/v3/geocode/regeo?output=xml&location=116.310003,39.991957&key=<用户的key>&radius=1000&extensions=all
参数:
1.key:用户申请的key
2. location: 经纬度 , 格式 : 经度,纬度
3. radius: 搜索半径
4. extensions: 返回结果控制
5. output: 输出内容格式,可选 JSON,XML
需求: JSON格式 / 只要businessAreas信息
https://restapi.amap.com/v3/geocode/regeo?&location=%s&key=47a5fdc15f2b7cd24653073319ed5763
解释:
默认output = JSON格式
location=%s 占位作用,方便后续根据经纬度进行参数提交
URL="http://restapi.amap.com/v3/geocode/regeo?location=%s&key=47a5fdc15f2b7cd24653073319ed5763 "
使用默认经纬度的显示结果:
需求:获取businessAreas
格式: regeocode.addressComponent.businessAreas
{"status":"1","regeocode":{"addressComponent":{"city":[],"province":"北京市","adcode":"110108","district":"海淀区","towncode":"110108015000","streetNumber":{"number":"5号","location":"116.310454,39.9927339","direction":"东北","distance":"94.5489","street":"颐和园路"},"country":"中国","township":"燕园街道","businessAreas":[{"location":"116.303364,39.97641","name":"万泉河","id":"110108"},{"location":"116.314222,39.98249","name":"中关村","id":"110108"},{"location":"116.294214,39.99685","name":"西苑","id":"110108"}],"building":{"name":"北京大学","type":"科教文化服务;学校;高等院校"},"neighborhood":{"name":"北京大学","type":"科教文化服务;学校;高等院校"},"citycode":"010"},"formatted_address":"北京市海淀区燕园街道北京大学"},"info":"OK","infocode":"10000"}
--scala语言编程
import org.apache.commons.httpclient.HttpClient
import org.apache.commons.httpclient.methods.{GetMethod, PostMethod}
//解析URL工具类
object HttpUtils {
//发起get请求
def get(url:String): Unit ={
//1.获取HttpClient
val client = new HttpClient()
//2.GetMethod获取get请求方式 , PostMethod则是获取post请求
val getMethod = new GetMethod(url)
//3.发起请求
//返回状态码
val code: Int = client.executeMethod(getMethod)
//4.判断状态码,如果是200就是成功的,否则没有结果
if (code==200){
//返回响应体
getMethod.getResponseBodyAsString
}else{
//否则返回空字符串
""
}
}
}
package com.wonderland.utils
import java.util
import com.alibaba.fastjson.{JSON, JSONObject}
//解析Json工具类
object ParseJsonUtils {
def parseJson(json:String)={
//使用alibaba 的fastjson 进行解析
val obj: JSONObject = JSON.parseObject(json)
//最外层的key
val regeocode = obj.getJSONObject("regeocode")
//获取第二层key
val addressComponent = regeocode.getJSONObject("addressComponent")
//获取目标层key
val businessAreas = addressComponent.getJSONArray("businessAreas")
//获取目标层businessAreas数据,把每条json数据转成对象,再把对象存进集合中
val areas: util.List[BusinessArea] = businessAreas.toJavaList(classOf[BusinessArea])
//导入集合转换
import scala.collection.JavaConversions._
//获取所有name的值,使用逗号分隔
areas.map(_.name).mkString(",")
}
}
//创建一个object,接收json数据,因为json数据格式{"location":"116.303364,39.97641","name":"万泉河","id":"110108"}
//所以需要三个变量
//传递给 toJavaObject()
case class BusinessArea(id:String,location:String,name:String)
package com.proc
import ch.hsr.geohash.GeoHash
import com.wonderland.utils._
import org.apache.kudu.client.CreateTableOptions
import org.apache.spark.sql.{DataFrame, Dataset, Row, SparkSession}
import scala.util.Try
object BusinessAreaProcess {
//创建sink表
val TABLENAME =s"Business_Area_Process_${DateUtils.getDate()}"
def main(args: Array[String]): Unit = {
val spark: SparkSession = SparkSessionUtils.getSparkSession()
import spark.implicits._
//读取ODS的表
import org.apache.kudu.spark.kudu._
val source: DataFrame = spark.read
.option("kudu.master", ConfigUtils.KUDU_MASTER)
.option("kudu.table", "ODS_${DateUtils.getDate()}")
.kudu
//列裁剪,去重,过滤 获取经纬度
val ds1: Dataset[(String, String)] = source.selectExpr("longitude", "latitude")
.filter("longitude is not null and latitude is not null ") //注意,这里不能使用''过滤,否则把全部信息过滤了
.distinct()
.as[(String, String)]
//获取商圈,需要参数是 : 经度,纬度
val result = ds1.map(item => {
val longitude = item._1
val latitude = item._2
//拼接经纬度,作为解析URL的参数
val params = s"${longitude},${latitude}"
//将参数传入URL中,因为URL中的location使用%s占位了
val url: String = ConfigUtils.URL.format(params)
//使用工具类进行解析URL,发起请求,获取响应信息
val responseJson: String = HttpUtils.get(url)
//解析json结果--使用ParseJsonUtils工具类--避免解析出错
val areas: String = Try(ParseJsonUtils.parseJson(responseJson)).getOrElse("")
//对经纬度进行geoHash编码,作用: 把相邻的经纬度生成同一个编码,这样才会在同一个商圈中
//三个参数 : latitude: Double, longitude: Double, numberOfCharacters: Int
val geoHashCode: String = GeoHash.geoHashStringWithCharacterPrecision(latitude.toDouble, longitude.toDouble, 8)
//返回数据
(geoHashCode,areas)
}).toDF("geoHashCode", "areas")
.filter("areas is not null and areas != '' ")
.distinct()
//获取到result,把数据写入Kudu
val schema = result.schema
val keys = Seq("geoHashCode")
val options = new CreateTableOptions
import scala.collection.JavaConversions._
options.addHashPartitions(Seq("geoHashCode"), 3)
options.setNumReplicas(1)
WriterUtils.write(spark, TABLENAME, schema, keys, options, result)
}
}
注;
未显示的工具类有:
application.conf 配置文件,存放URL
ConfigUtils 配置文件工具类,用于获取配置信息
WriterUtils kudu写入工具类,用于把数据写入kudu表格中