Hadoop 2.4.0 + eclipse 3.7.0 + gis-tools-for-hadoop-master
package esri.hadoop.text; import java.io.IOException; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.FSDataInputStream; import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.Path; import org.apache.hadoop.io.IntWritable; import org.apache.hadoop.io.LongWritable; import org.apache.hadoop.io.Text; import org.apache.hadoop.mapreduce.Mapper; import com.esri.core.geometry.Envelope; import com.esri.core.geometry.Envelope2D; import com.esri.core.geometry.GeometryEngine; import com.esri.core.geometry.Point; import com.esri.core.geometry.QuadTree; import com.esri.core.geometry.QuadTree.QuadTreeIterator; import com.esri.core.geometry.SpatialReference; import com.esri.json.EsriFeatureClass; public class MapperClass extends Mapper<LongWritable, Text, Text, IntWritable> { int longitudeIndex; int latitudeIndex; String labelAttribute; EsriFeatureClass featureClass; SpatialReference spatialReference; QuadTree quadTree; QuadTreeIterator quadTreeIter;
//创建四叉树 private void buildQuadTree(){ quadTree = new QuadTree(new Envelope2D(-180, -90, 180, 90), 8); Envelope envelope = new Envelope(); for (int i=0;i<featureClass.features.length;i++){ featureClass.features[i].geometry.queryEnvelope(envelope); quadTree.insert(i, new Envelope2D(envelope.getXMin(), envelope.getYMin(), envelope.getXMax(), envelope.getYMax())); } quadTreeIter = quadTree.getIterator(); } private int queryQuadTree(Point pt) { quadTreeIter.resetIterator(pt, 0); int elmHandle = quadTreeIter.next(); while (elmHandle >= 0){ int featureIndex = quadTree.getElement(elmHandle); if (GeometryEngine.contains(featureClass.features[featureIndex].geometry, pt, spatialReference)){ return featureIndex; } elmHandle = quadTreeIter.next(); } return -1; } @Override protected void setup(Context context) throws IOException, InterruptedException { Configuration config = context.getConfiguration(); spatialReference = SpatialReference.create(4326); String featuresPath = config.get("sample.features.input"); labelAttribute = config.get("sample.features.keyattribute", "NAME"); latitudeIndex = config.getInt("samples.csvdata.columns.lat", 1); longitudeIndex = config.getInt("samples.csvdata.columns.long", 2); FSDataInputStream iStream = null; spatialReference = SpatialReference.create(4326); try { Path p= new Path(featuresPath); FileSystem hdfs = p .getFileSystem (config); iStream = hdfs.open(new Path(featuresPath)); featureClass = EsriFeatureClass.fromJson(iStream); } catch (Exception e) { e.printStackTrace(); } finally { if (iStream != null) { try { iStream.close(); } catch (IOException e) { } } } if (featureClass != null){ buildQuadTree(); } } @Override protected void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException { if (key.get() == 0) return; String line = value.toString(); String [] values = line.split(","); float latitude = Float.parseFloat(values[latitudeIndex]); float longitude = Float.parseFloat(values[longitudeIndex]); Point point = new Point(longitude, latitude); IntWritable one = new IntWritable(1); int featureIndex = queryQuadTree(point); if (featureIndex >= 0){ String name = (String)featureClass.features[featureIndex].attributes.get(labelAttribute); if (name == null) name = "???"; context.write(new Text(name), one); } else { context.write(new Text("*Outside Feature Set"), one); } } }
package esri.hadoop.text; import java.io.IOException; import org.apache.hadoop.io.IntWritable; import org.apache.hadoop.io.Text; import org.apache.hadoop.mapreduce.Reducer; public class ReduceClass extends Reducer<Text, IntWritable, Text, IntWritable> { @Override protected void reduce(Text key, Iterable<IntWritable> values, Context context) throws IOException, InterruptedException { // TODO Auto-generated method stub int sumCount = 0; for (IntWritable sum : values) { sumCount += sum.get(); } context.write(key, new IntWritable(sumCount)); } }
package esri.hadoop.text; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.Path; import org.apache.hadoop.io.IntWritable; import org.apache.hadoop.io.Text; import org.apache.hadoop.mapreduce.Job; import org.apache.hadoop.mapreduce.lib.input.TextInputFormat; import org.apache.hadoop.mapreduce.lib.output.TextOutputFormat; import org.apache.hadoop.util.GenericOptionsParser; public class Sample { public static void main (String[] args1) throws Exception { Configuration config = new Configuration(); // 这句话很关键 config.set("mapred.job.tracker", "10.2.173.15:9000"); String[] ioArgs = new String[] { "/user/guest/esri/counties-data/california-counties.json","/user/guest/esri/earthquake-data/earthquakes.csv", "/user/guest/esri/DataCount" }; String [] args = new GenericOptionsParser(config, ioArgs).getRemainingArgs(); if (args.length != 3) { System.out.println("Invalid Arguments"); throw new IllegalArgumentException(); } config.set("sample.features.input", args[0]); config.set("sample.features.keyattribute", "NAME"); config.setInt("samples.csvdata.columns.lat", 1); config.setInt("samples.csvdata.columns.long", 2); Job job = Job.getInstance(config); job.setJobName("Earthquake Data Aggregation Sample"); job.setOutputKeyClass(Text.class); job.setOutputValueClass(IntWritable.class); job.setMapperClass(MapperClass.class); job.setReducerClass(ReduceClass.class); job.setCombinerClass(ReduceClass.class); job.setInputFormatClass(TextInputFormat.class); job.setOutputFormatClass(TextOutputFormat.class); TextInputFormat.setInputPaths(job, new Path(args[1])); TextOutputFormat.setOutputPath(job, new Path(args[2])); job.setJarByClass(Sample.class); System.exit(job.waitForCompletion(true) ? 0 : 1); } }