hadoop的FileSplit简单使用

hadoop的FileSplit简单使用


FileSplit类继承关系:

hadoop的FileSplit简单使用_第1张图片


FileSplit类中的属性和方法:

hadoop的FileSplit简单使用_第2张图片



作业输入:
hadoop@hadoop:/home/hadoop/blb$ hdfs dfs -text /user/hadoop/libin/input/inputpath1.txt
hadoop  a
spark   a
hive    a
hbase   a
tachyon a
storm   a
redis   a
hadoop@hadoop:/home/hadoop/blb$ hdfs dfs -text /user/hadoop/libin/input/inputpath2.txt
hadoop  b
spark   b
kafka   b
tachyon b
oozie   b
flume   b
sqoop   b
solr    b
hadoop@hadoop:/home/hadoop/blb$ 


代码:
import java.io.IOException;


import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.NullWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapred.SplitLocationInfo;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.input.FileSplit;
import org.apache.hadoop.mapreduce.lib.input.TextInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
import org.apache.hadoop.mapreduce.lib.output.TextOutputFormat;
import org.apache.hadoop.util.GenericOptionsParser;


public class GetSplitMapReduce {
	public static void main(String[] args) throws IOException, ClassNotFoundException, InterruptedException {
		Configuration conf = new Configuration();
		String[] otherArgs = new GenericOptionsParser(conf, args).getRemainingArgs();
		if(otherArgs.length!=2){
			System.err.println("Usage databaseV1 <inputpath> <outputpath>");
		}


		Job job = Job.getInstance(conf, GetSplitMapReduce.class.getSimpleName() + "1");
		job.setJarByClass(GetSplitMapReduce.class);
		job.setMapOutputKeyClass(Text.class);
		job.setMapOutputValueClass(Text.class);
		job.setOutputKeyClass(Text.class);
		job.setOutputValueClass(NullWritable.class);
		job.setMapperClass(MyMapper1.class);
		job.setNumReduceTasks(0);
		job.setInputFormatClass(TextInputFormat.class);
		job.setOutputFormatClass(TextOutputFormat.class);
		FileInputFormat.addInputPath(job, new Path(otherArgs[0]));
		FileOutputFormat.setOutputPath(job, new Path(otherArgs[1]));
		job.waitForCompletion(true);
	}
	public static class MyMapper1 extends Mapper<LongWritable, Text, Text, NullWritable>{
		@Override
		protected void map(LongWritable key, Text value, Mapper<LongWritable, Text, Text, NullWritable>.Context context)
				throws IOException, InterruptedException {
		
			FileSplit fileSplit=(FileSplit) context.getInputSplit();
			String pathname=fileSplit.getPath().getName();	//获取目录名字
			int depth = fileSplit.getPath().depth();	//获取目录深度
			Class<? extends FileSplit> class1 = fileSplit.getClass(); //获取当前类
			long length = fileSplit.getLength();	//获取文件长度
			SplitLocationInfo[] locationInfo = fileSplit.getLocationInfo();	//获取位置信息
			String[] locations = fileSplit.getLocations();	//获取位置
			long start = fileSplit.getStart(); //The position of the first byte in the file to process.
			String string = fileSplit.toString();
			//fileSplit.
			
			context.write(new Text("===================================================================================="), NullWritable.get());
			context.write(new Text("pathname--"+pathname), NullWritable.get());
			context.write(new Text("depth--"+depth), NullWritable.get());
			context.write(new Text("class1--"+class1), NullWritable.get());
			context.write(new Text("length--"+length), NullWritable.get());
			context.write(new Text("locationInfo--"+locationInfo), NullWritable.get());
			context.write(new Text("locations--"+locations), NullWritable.get());
			context.write(new Text("start--"+start), NullWritable.get());
			context.write(new Text("string--"+string), NullWritable.get());
		}
	}
}



对应inputpath2.txt文件的输出:
hadoop@hadoop:/home/hadoop/blb$ hdfs dfs -text /user/hadoop/libin/out2/part-m-00000
====================================================================================
pathname--inputpath2.txt
depth--5
class1--class org.apache.hadoop.mapreduce.lib.input.FileSplit
length--66
locationInfo--null
locations--[Ljava.lang.String;@4ff41ba0
start--0
string--hdfs://hadoop:9000/user/hadoop/libin/input/inputpath2.txt:0+66
====================================================================================
pathname--inputpath2.txt
depth--5
class1--class org.apache.hadoop.mapreduce.lib.input.FileSplit
length--66
locationInfo--null
locations--[Ljava.lang.String;@2341ce62
start--0
string--hdfs://hadoop:9000/user/hadoop/libin/input/inputpath2.txt:0+66
====================================================================================
pathname--inputpath2.txt
depth--5
class1--class org.apache.hadoop.mapreduce.lib.input.FileSplit
length--66
locationInfo--null
locations--[Ljava.lang.String;@35549603
start--0
string--hdfs://hadoop:9000/user/hadoop/libin/input/inputpath2.txt:0+66
====================================================================================
pathname--inputpath2.txt
depth--5
class1--class org.apache.hadoop.mapreduce.lib.input.FileSplit
length--66
locationInfo--null
locations--[Ljava.lang.String;@4444ba4f
start--0
string--hdfs://hadoop:9000/user/hadoop/libin/input/inputpath2.txt:0+66
====================================================================================
pathname--inputpath2.txt
depth--5
class1--class org.apache.hadoop.mapreduce.lib.input.FileSplit
length--66
locationInfo--null
locations--[Ljava.lang.String;@7c23bb8c
start--0
string--hdfs://hadoop:9000/user/hadoop/libin/input/inputpath2.txt:0+66
====================================================================================
pathname--inputpath2.txt
depth--5
class1--class org.apache.hadoop.mapreduce.lib.input.FileSplit
length--66
locationInfo--null
locations--[Ljava.lang.String;@dee2400
start--0
string--hdfs://hadoop:9000/user/hadoop/libin/input/inputpath2.txt:0+66
====================================================================================
pathname--inputpath2.txt
depth--5
class1--class org.apache.hadoop.mapreduce.lib.input.FileSplit
length--66
locationInfo--null
locations--[Ljava.lang.String;@d7d8325
start--0
string--hdfs://hadoop:9000/user/hadoop/libin/input/inputpath2.txt:0+66
====================================================================================
pathname--inputpath2.txt
depth--5
class1--class org.apache.hadoop.mapreduce.lib.input.FileSplit
length--66
locationInfo--null
locations--[Ljava.lang.String;@2b2cf90e
start--0
string--hdfs://hadoop:9000/user/hadoop/libin/input/inputpath2.txt:0+66



对应inputpath1.txt文件的输出:
hadoop@hadoop:/home/hadoop/blb$ hdfs dfs -text /user/hadoop/libin/out2/part-m-00001
====================================================================================
pathname--inputpath1.txt
depth--5
class1--class org.apache.hadoop.mapreduce.lib.input.FileSplit
length--58
locationInfo--null
locations--[Ljava.lang.String;@4ff41ba0
start--0
string--hdfs://hadoop:9000/user/hadoop/libin/input/inputpath1.txt:0+58
====================================================================================
pathname--inputpath1.txt
depth--5
class1--class org.apache.hadoop.mapreduce.lib.input.FileSplit
length--58
locationInfo--null
locations--[Ljava.lang.String;@2341ce62
start--0
string--hdfs://hadoop:9000/user/hadoop/libin/input/inputpath1.txt:0+58
====================================================================================
pathname--inputpath1.txt
depth--5
class1--class org.apache.hadoop.mapreduce.lib.input.FileSplit
length--58
locationInfo--null
locations--[Ljava.lang.String;@35549603
start--0
string--hdfs://hadoop:9000/user/hadoop/libin/input/inputpath1.txt:0+58
====================================================================================
pathname--inputpath1.txt
depth--5
class1--class org.apache.hadoop.mapreduce.lib.input.FileSplit
length--58
locationInfo--null
locations--[Ljava.lang.String;@4444ba4f
start--0
string--hdfs://hadoop:9000/user/hadoop/libin/input/inputpath1.txt:0+58
====================================================================================
pathname--inputpath1.txt
depth--5
class1--class org.apache.hadoop.mapreduce.lib.input.FileSplit
length--58
locationInfo--null
locations--[Ljava.lang.String;@7c23bb8c
start--0
string--hdfs://hadoop:9000/user/hadoop/libin/input/inputpath1.txt:0+58
====================================================================================
pathname--inputpath1.txt
depth--5
class1--class org.apache.hadoop.mapreduce.lib.input.FileSplit
length--58
locationInfo--null
locations--[Ljava.lang.String;@dee2400
start--0
string--hdfs://hadoop:9000/user/hadoop/libin/input/inputpath1.txt:0+58
====================================================================================
pathname--inputpath1.txt
depth--5
class1--class org.apache.hadoop.mapreduce.lib.input.FileSplit
length--58
locationInfo--null
locations--[Ljava.lang.String;@d7d8325
start--0
string--hdfs://hadoop:9000/user/hadoop/libin/input/inputpath1.txt:0+58
hadoop@hadoop:/home/hadoop/blb$ 


你可能感兴趣的:(hadoop的FileSplit简单使用)