一、 Geospark安装:
我采用的是gradle安装,安装环境是Spark3.0.3+Scala.2.12.3
plugins {
id 'java'
id 'scala'
}
group 'org.example'
version '1.0-SNAPSHOT'
configurations {
jar.archiveName = 'OutputToDeltaV3.jar'
}
repositories {
maven{ url 'http://maven.aliyun.com/nexus/content/groups/public'}
}
dependencies {
compile group: 'org.scala-lang', name: 'scala-library', version: '2.12.2'
compile group: 'org.apache.spark', name: 'spark-core_2.12', version: '3.0.3'
compile group: 'org.apache.spark', name: 'spark-sql_2.12', version: '3.0.3'
compile group: 'org.apache.spark', name: 'spark-hive_2.12', version: '3.0.3'
compile group: 'org.apache.hive',name: 'hive-jdbc',version: '1.2.0'
compile group: 'io.delta', name: 'delta-core_2.12', version: '0.7.0'
compile group: 'org.apache.spark', name: 'spark-sql-kafka-0-10_2.12', version: '3.0.3'
compile group: 'mysql', name: 'mysql-connector-java', version: '5.1.41'
compile group: 'org.apache.hadoop',name: 'hadoop-common',version: '2.7.7'
compile group: 'org.apache.hadoop',name: 'hadoop-mapreduce-client-core',version: '2.7.7'
// compile group: 'org.apache.logging.log4j', name: 'log4j-api', version: '2.14.1'
// compile group: 'org.apache.logging.log4j', name: 'log4j-core', version: '2.14.1'
// geospark 依赖开始的地方
compile group: 'org.apache.sedona',name: 'sedona-core-3.0_2.12',version: '1.0.1-incubating'
compile group: 'org.apache.sedona',name: 'sedona-sql-3.0_2.12',version: '1.0.1-incubating'
compile group: 'org.apache.sedona',name: 'sedona-viz-3.0_2.12',version: '1.0.1-incubating'
compile group: 'org.locationtech.jts',name: 'jts-core',version: '1.18.0'
compile group: 'org.datasyslab',name: 'geotools-wrapper',version: 'geotools-24.0'
compile ('org.wololo:jts2geojson:0.16.1'){
exclude group: 'org.locationtech.jt',module: 'jts-core'
exclude group: 'com.fasterxml.jackson.core',module:'*'
}
// geospark依赖结束的地方
compile('ru.yandex.clickhouse:clickhouse-jdbc:0.1.53')
{
exclude group: 'com.fasterxml.jackson.core',module: 'jackson-databind'
exclude group: 'com.fasterxml.jackson.core',module: 'jackson-core'
}
compile fileTree(dir:'lib',includes:['*jar'])
implementation fileTree(dir:'lib',includes:['*jar'])
}
二、 Geospark 能够读取文件的结构有:CSV,TSV,Shapefile, Geojson;
S1.CSV结构如下:
// 单点坐标
-88.331492,32.324142,hotel
-88.175933,32.360763,gas
-88.388954,32.357073,bar
-88.221102,32.35078,restaurant
// 多点坐标
-88.331492,32.324142,-88.331492,32.324142,-88.331492,32.324142,-88.331492,32.324142,-88.331492,32.324142,hotel
-88.175933,32.360763,-88.175933,32.360763,-88.175933,32.360763,-88.175933,32.360763,-88.175933,32.360763,gas
-88.388954,32.357073,-88.388954,32.357073,-88.388954,32.357073,-88.388954,32.357073,-88.388954,32.357073,bar
-88.221102,32.35078,-88.221102,32.35078,-88.221102,32.35078,-88.221102,32.35078,-88.221102,32.35078,restaurant
C1.CSV读取方式:
// 创建单个点坐标的方法
val pointRDDInputLocation = "/Download/checkin.csv"
val pointRDDOffset = 0 // The point long/lat starts from Column 0
val pointRDDSplitter = FileDataSplitter.CSV
val carryOtherAttributes = true // Carry Column 2 (hotel, gas, bar...)
var objectRDD = new PointRDD(sc, pointRDDInputLocation, pointRDDOffset, pointRDDSplitter, carryOtherAttributes)
// 创建多变形RDD
val polygonRDDInputLocation = "/Download/checkinshape.csv"
val polygonRDDStartOffset = 0 // The coordinates start from Column 0
val polygonRDDEndOffset = 9 // The coordinates end at Column 9
val polygonRDDSplitter = FileDataSplitter.CSV
val carryOtherAttributes = true // Carry Column 10 (hotel, gas, bar...)
var objectRDD = new PolygonRDD(sc, polygonRDDInputLocation, polygonRDDStartOffset, polygonRDDEndOffset, polygonRDDSplitter, carryOtherAttributes)
S2. TSV结构如下:
POINT (-88.331492 32.324142) hotel
POINT (-88.175933 32.360763) gas
POINT (-88.388954 32.357073) bar
POINT (-88.221102 32.35078) restaurant
C2. TSV读取方式:
val inputLocation = "/Download/checkin.tsv"
val wktColumn = 0 // The WKT string starts from Column 0
val allowTopologyInvalidGeometries = true // Optional
val skipSyntaxInvalidGeometries = false // Optional
val spatialRDD = WktReader.readToGeometryRDD(sparkSession.sparkContext, inputLocation, wktColumn, allowTopologyInvalidGeometries, skipSyntaxInvalidGeometries)
S3.Shapefile结构如下:
- shapefile1
- shapefile2
- myshapefile
- myshapefile.shp
- myshapefile.shx
- myshapefile.dbf
- myshapefile...
- ...
C3. Shapefile 读取方式:
val shapefileInputLocation="/Download/myshapefile"
val spatialRDD = ShapefileReader.readToGeometryRDD(sparkSession.sparkContext, shapefileInputLocation)
S4. Geojson结构如下:
{ "type": "Feature", "properties": { "STATEFP": "01", "COUNTYFP": "077", "TRACTCE": "011501", "BLKGRPCE": "5", "AFFGEOID": "1500000US010770115015", "GEOID": "010770115015", "NAME": "5", "LSAD": "BG", "ALAND": 6844991, "AWATER": 32636 }, "geometry": { "type": "Polygon", "coordinates": [ [ [ -87.621765, 34.873444 ], [ -87.617535, 34.873369 ], [ -87.6123, 34.873337 ], [ -87.604049, 34.873303 ], [ -87.604033, 34.872316 ], [ -87.60415, 34.867502 ], [ -87.604218, 34.865687 ], [ -87.604409, 34.858537 ], [ -87.604018, 34.851336 ], [ -87.603716, 34.844829 ], [ -87.603696, 34.844307 ], [ -87.603673, 34.841884 ], [ -87.60372, 34.841003 ], [ -87.603879, 34.838423 ], [ -87.603888, 34.837682 ], [ -87.603889, 34.83763 ], [ -87.613127, 34.833938 ], [ -87.616451, 34.832699 ], [ -87.621041, 34.831431 ], [ -87.621056, 34.831526 ], [ -87.62112, 34.831925 ], [ -87.621603, 34.8352 ], [ -87.62158, 34.836087 ], [ -87.621383, 34.84329 ], [ -87.621359, 34.844438 ], [ -87.62129, 34.846387 ], [ -87.62119, 34.85053 ], [ -87.62144, 34.865379 ], [ -87.621765, 34.873444 ] ] ] } },
{ "type": "Feature", "properties": { "STATEFP": "01", "COUNTYFP": "045", "TRACTCE": "021102", "BLKGRPCE": "4", "AFFGEOID": "1500000US010450211024", "GEOID": "010450211024", "NAME": "4", "LSAD": "BG", "ALAND": 11360854, "AWATER": 0 }, "geometry": { "type": "Polygon", "coordinates": [ [ [ -85.719017, 31.297901 ], [ -85.715626, 31.305203 ], [ -85.714271, 31.307096 ], [ -85.69999, 31.307552 ], [ -85.697419, 31.307951 ], [ -85.675603, 31.31218 ], [ -85.672733, 31.312876 ], [ -85.672275, 31.311977 ], [ -85.67145, 31.310988 ], [ -85.670622, 31.309524 ], [ -85.670729, 31.307622 ], [ -85.669876, 31.30666 ], [ -85.669796, 31.306224 ], [ -85.670356, 31.306178 ], [ -85.671664, 31.305583 ], [ -85.67177, 31.305299 ], [ -85.671878, 31.302764 ], [ -85.671344, 31.302123 ], [ -85.668276, 31.302076 ], [ -85.66566, 31.30093 ], [ -85.665687, 31.30022 ], [ -85.669183, 31.297677 ], [ -85.668703, 31.295638 ], [ -85.671985, 31.29314 ], [ -85.677177, 31.288211 ], [ -85.678452, 31.286376 ], [ -85.679236, 31.28285 ], [ -85.679195, 31.281426 ], [ -85.676865, 31.281049 ], [ -85.674661, 31.28008 ], [ -85.674377, 31.27935 ], [ -85.675714, 31.276882 ], [ -85.677938, 31.275168 ], [ -85.680348, 31.276814 ], [ -85.684032, 31.278848 ], [ -85.684387, 31.279082 ], [ -85.692398, 31.283499 ], [ -85.705032, 31.289718 ], [ -85.706755, 31.290476 ], [ -85.718102, 31.295204 ], [ -85.719132, 31.29689 ], [ -85.719017, 31.297901 ] ] ] } },
{ "type": "Feature", "properties": { "STATEFP": "01", "COUNTYFP": "055", "TRACTCE": "001300", "BLKGRPCE": "3", "AFFGEOID": "1500000US010550013003", "GEOID": "010550013003", "NAME": "3", "LSAD": "BG", "ALAND": 1378742, "AWATER": 247387 }, "geometry": { "type": "Polygon", "coordinates": [ [ [ -86.000685, 34.00537 ], [ -85.998837, 34.009768 ], [ -85.998012, 34.010398 ], [ -85.987865, 34.005426 ], [ -85.986656, 34.004552 ], [ -85.985, 34.002659 ], [ -85.98851, 34.001502 ], [ -85.987567, 33.999488 ], [ -85.988666, 33.99913 ], [ -85.992568, 33.999131 ], [ -85.993144, 33.999714 ], [ -85.994876, 33.995153 ], [ -85.998823, 33.989548 ], [ -85.999925, 33.994237 ], [ -86.000616, 34.000028 ], [ -86.000685, 34.00537 ] ] ] } },
{ "type": "Feature", "properties": { "STATEFP": "01", "COUNTYFP": "089", "TRACTCE": "001700", "BLKGRPCE": "2", "AFFGEOID": "1500000US010890017002", "GEOID": "010890017002", "NAME": "2", "LSAD": "BG", "ALAND": 1040641, "AWATER": 0 }, "geometry": { "type": "Polygon", "coordinates": [ [ [ -86.574172, 34.727375 ], [ -86.562684, 34.727131 ], [ -86.562797, 34.723865 ], [ -86.562957, 34.723168 ], [ -86.562336, 34.719766 ], [ -86.557381, 34.719143 ], [ -86.557352, 34.718322 ], [ -86.559921, 34.717363 ], [ -86.564827, 34.718513 ], [ -86.567582, 34.718565 ], [ -86.570572, 34.718577 ], [ -86.573618, 34.719377 ], [ -86.574172, 34.727375 ] ] ] } },
C4,Geojson 读取方式:
val inputLocation = "/Download/polygon.json"
val allowTopologyInvalidGeometries = true // Optional
val skipSyntaxInvalidGeometries = false // Optional
val spatialRDD = GeoJsonReader.readToGeometryRDD(sparkSession.sparkContext, inputLocation, allowTopologyInvalidGeometries, skipSyntaxInvalidGeometries)