package com.taobao.app;
import java.io.IOException;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Mapper;
public class AccessLogMapper extends Mapper<Object, Text, Text, Text> {
public void map(Object key, Text value, Context context) throws IOException {
String temp[] = value.toString().split(" ");
String k = temp[0];
String v = temp[1];
System.out.println("mapper: key " + k + "value " + v);
try {
context.write(new Text(k), new Text(v));
} catch (InterruptedException e) {
}
}
}
package com.taobao.app;
import org.apache.hadoop.hbase.client.Put;
import org.apache.hadoop.hbase.io.ImmutableBytesWritable;
import org.apache.hadoop.hbase.mapreduce.TableReducer;
import org.apache.hadoop.hbase.util.Bytes;
import org.apache.hadoop.io.Text;
public class AccessLogReducer extends TableReducer<Text, Text, ImmutableBytesWritable > {
public void reduce(Text arg0, Iterable<Text> arg1, Context context) {
String k =arg0.toString();
String v =arg1.iterator().next().toString();
ImmutableBytesWritable key = new ImmutableBytesWritable(Bytes.toBytes(k));
Put put = new Put(Bytes.toBytes(k));
put.add(Bytes.toBytes("f1"), Bytes.toBytes("qualifier"), Bytes .toBytes(v));
System.out.println("reduce key:"+k+ " value:"+ v );
try {
context.write(key, put);
} catch (Exception e) {
}
}
}
package com.taobao.app;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.conf.Configured;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.input.TextInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
import org.apache.hadoop.util.Tool;
public class LogAnalysiser extends Configured implements Tool {
public int run(String[] args) throws Exception{
Configuration conf=new Configuration();
conf.set("hbase.rootdir", "hdfs://my031068.sqa.cm4.tbsite.net:9000/hbase");
conf.set("hbase.zookeeper.quorum", "10.232.31.68");
conf.set("hbase.zookeeper.property.clientPort", "2222");
Job job = new Job(conf, "Sample MR Application");
job.setJarByClass(AccessLogSampleApplication.class);
job.setInputFormatClass(TextInputFormat.class);
job.setMapperClass(AccessLogMapper.class);
job.setReducerClass(AccessLogReducer.class);
job.setMapOutputKeyClass(Text.class);
job.setMapOutputValueClass(Text.class);
FileInputFormat.addInputPath(job, new Path("c:/input"));
FileOutputFormat.setOutputPath(job, new Path("c:/output"));
job.waitForCompletion(true);
return job.waitForCompletion(true) ? 0 : 1;
}
}
package com.taobao.app;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.util.ToolRunner;
public class AccessLogSampleApplication {
public static void main(String[] args) throws Exception {
int m_rc = 0;
m_rc = ToolRunner.run(new Configuration(), new LogAnalysiser(), args);
System.exit(m_rc);
}
}
1.txt的内容如下:
1 name1--txt1-www.javabloger.com
2 name2--txt1
3 name3--txt1
4 name4--txt1
5 name5--txt1
2.txt的内容如下:
6 name6--txt2-www.javabloger.com
7 name7--txt2
8 name8--txt2
9 name9--txt2
10 name10--txt2
3.txt的内容如下:
11 name11--txt3-www.javabloger.com
12 name12--txt3
13 name13--txt3
14 name14--txt3
15 name15--txt3
下面是console上打的信息:
11/07/30 22:07:31 INFO jvm.JvmMetrics: Initializing JVM Metrics with processName=JobTracker, sessionId=
11/07/30 22:07:31 WARN mapred.JobClient: Use GenericOptionsParser for parsing the arguments. Applications should implement Tool for the same.
11/07/30 22:07:32 WARN mapred.JobClient: No job jar file set. User classes may not be found. See JobConf(Class) or JobConf#setJar(String).
11/07/30 22:07:32 INFO input.FileInputFormat: Total input paths to process : 3
11/07/30 22:07:32 INFO mapred.JobClient: Running job: job_local_0001
11/07/30 22:07:32 INFO input.FileInputFormat: Total input paths to process : 3
11/07/30 22:07:32 INFO mapred.MapTask: io.sort.mb = 100
11/07/30 22:07:32 INFO mapred.MapTask: data buffer = 79691776/99614720
11/07/30 22:07:32 INFO mapred.MapTask: record buffer = 262144/327680
mapper: key 1value name1--txt1-www.javabloger.com
11/07/30 22:07:32 INFO mapred.MapTask: Starting flush of map output
mapper: key 2value name2--txt1
mapper: key 3value name3--txt1
mapper: key 4value name4--txt1
mapper: key 5value name5--txt1
11/07/30 22:07:32 INFO mapred.MapTask: Finished spill 0
11/07/30 22:07:32 INFO mapred.TaskRunner: Task:attempt_local_0001_m_000000_0 is done. And is in the process of commiting
11/07/30 22:07:32 INFO mapred.LocalJobRunner:
11/07/30 22:07:32 INFO mapred.TaskRunner: Task 'attempt_local_0001_m_000000_0' done.
11/07/30 22:07:32 INFO mapred.MapTask: io.sort.mb = 100
11/07/30 22:07:32 INFO mapred.MapTask: data buffer = 79691776/99614720
11/07/30 22:07:32 INFO mapred.MapTask: record buffer = 262144/327680
mapper: key 6value name6--txt2-www.javabloger.com
mapper: key 7value name7--txt2
mapper: key 8value name8--txt2
mapper: key 9value name9--txt2
mapper: key 10value name10--txt2
11/07/30 22:07:32 INFO mapred.MapTask: Starting flush of map output
11/07/30 22:07:32 INFO mapred.MapTask: Finished spill 0
11/07/30 22:07:32 INFO mapred.TaskRunner: Task:attempt_local_0001_m_000001_0 is done. And is in the process of commiting
11/07/30 22:07:32 INFO mapred.LocalJobRunner:
11/07/30 22:07:32 INFO mapred.TaskRunner: Task 'attempt_local_0001_m_000001_0' done.
11/07/30 22:07:32 INFO mapred.MapTask: io.sort.mb = 100
11/07/30 22:07:32 INFO mapred.MapTask: data buffer = 79691776/99614720
11/07/30 22:07:32 INFO mapred.MapTask: record buffer = 262144/327680
mapper: key 11value name11--txt3-www.javabloger.com
mapper: key 12value name12--txt3
mapper: key 13value name13--txt3
mapper: key 14value name14--txt3
mapper: key 15value name15--txt3
11/07/30 22:07:32 INFO mapred.MapTask: Starting flush of map output
11/07/30 22:07:32 INFO mapred.MapTask: Finished spill 0
11/07/30 22:07:32 INFO mapred.TaskRunner: Task:attempt_local_0001_m_000002_0 is done. And is in the process of commiting
11/07/30 22:07:32 INFO mapred.LocalJobRunner:
11/07/30 22:07:32 INFO mapred.TaskRunner: Task 'attempt_local_0001_m_000002_0' done.
11/07/30 22:07:32 INFO mapred.LocalJobRunner:
11/07/30 22:07:32 INFO mapred.Merger: Merging 3 sorted segments
11/07/30 22:07:32 INFO mapred.Merger: Down to the last merge-pass, with 3 segments left of total size: 315 bytes
11/07/30 22:07:32 INFO mapred.LocalJobRunner:
reduce key:1 value:name1--txt1-www.javabloger.com
reduce key:10 value:name10--txt2
reduce key:11 value:name11--txt3-www.javabloger.com
reduce key:12 value:name12--txt3
reduce key:13 value:name13--txt3
reduce key:14 value:name14--txt3
reduce key:15 value:name15--txt3
reduce key:2 value:name2--txt1
reduce key:3 value:name3--txt1
reduce key:4 value:name4--txt1
reduce key:5 value:name5--txt1
reduce key:6 value:name6--txt2-www.javabloger.com
reduce key:7 value:name7--txt2
reduce key:8 value:name8--txt2
reduce key:9 value:name9--txt2
输出到c:/output目录下,part-t-00000内容如下:
31 row=1, families={(family=f1, keyvalues=(1/f1:qualifier/9223372036854775807/Put/vlen=30)}
31 30 row=10, families={(family=f1, keyvalues=(10/f1:qualifier/9223372036854775807/Put/vlen=12)}
31 31 row=11, families={(family=f1, keyvalues=(11/f1:qualifier/9223372036854775807/Put/vlen=31)}
31 32 row=12, families={(family=f1, keyvalues=(12/f1:qualifier/9223372036854775807/Put/vlen=12)}
31 33 row=13, families={(family=f1, keyvalues=(13/f1:qualifier/9223372036854775807/Put/vlen=12)}
31 34 row=14, families={(family=f1, keyvalues=(14/f1:qualifier/9223372036854775807/Put/vlen=12)}
31 35 row=15, families={(family=f1, keyvalues=(15/f1:qualifier/9223372036854775807/Put/vlen=12)}
32 row=2, families={(family=f1, keyvalues=(2/f1:qualifier/9223372036854775807/Put/vlen=11)}
33 row=3, families={(family=f1, keyvalues=(3/f1:qualifier/9223372036854775807/Put/vlen=11)}
34 row=4, families={(family=f1, keyvalues=(4/f1:qualifier/9223372036854775807/Put/vlen=11)}
35 row=5, families={(family=f1, keyvalues=(5/f1:qualifier/9223372036854775807/Put/vlen=11)}
36 row=6, families={(family=f1, keyvalues=(6/f1:qualifier/9223372036854775807/Put/vlen=30)}
37 row=7, families={(family=f1, keyvalues=(7/f1:qualifier/9223372036854775807/Put/vlen=11)}
38 row=8, families={(family=f1, keyvalues=(8/f1:qualifier/9223372036854775807/Put/vlen=11)}
39 row=9, families={(family=f1, keyvalues=(9/f1:qualifier/9223372036854775807/Put/vlen=11)}
11/07/30 22:07:32 INFO mapred.TaskRunner: Task:attempt_local_0001_r_000000_0 is done. And is in the process of commiting
11/07/30 22:07:32 INFO mapred.LocalJobRunner:
11/07/30 22:07:32 INFO mapred.TaskRunner: Task attempt_local_0001_r_000000_0 is allowed to commit now
11/07/30 22:07:33 INFO output.FileOutputCommitter: Saved output of task 'attempt_local_0001_r_000000_0' to c:/output
11/07/30 22:07:33 INFO mapred.LocalJobRunner: reduce > reduce
11/07/30 22:07:33 INFO mapred.TaskRunner: Task 'attempt_local_0001_r_000000_0' done.
11/07/30 22:07:33 INFO mapred.JobClient: map 100% reduce 100%
11/07/30 22:07:33 INFO mapred.JobClient: Job complete: job_local_0001
11/07/30 22:07:33 INFO mapred.JobClient: Counters: 12
11/07/30 22:07:33 INFO mapred.JobClient: FileSystemCounters
11/07/30 22:07:33 INFO mapred.JobClient: FILE_BYTES_READ=55525
11/07/30 22:07:33 INFO mapred.JobClient: FILE_BYTES_WRITTEN=113650
11/07/30 22:07:33 INFO mapred.JobClient: Map-Reduce Framework
11/07/30 22:07:33 INFO mapred.JobClient: Reduce input groups=15
11/07/30 22:07:33 INFO mapred.JobClient: Combine output records=0
11/07/30 22:07:33 INFO mapred.JobClient: Map input records=15
11/07/30 22:07:33 INFO mapred.JobClient: Reduce shuffle bytes=0
11/07/30 22:07:33 INFO mapred.JobClient: Reduce output records=15
11/07/30 22:07:33 INFO mapred.JobClient: Spilled Records=30
11/07/30 22:07:33 INFO mapred.JobClient: Map output bytes=279
11/07/30 22:07:33 INFO mapred.JobClient: Combine input records=0
11/07/30 22:07:33 INFO mapred.JobClient: Map output records=15
11/07/30 22:07:33 INFO mapred.JobClient: Reduce input records=15
11/07/30 22:07:33 INFO mapred.JobClient: Running job: job_local_0001
11/07/30 22:07:33 INFO mapred.JobClient: Job complete: job_local_0001
11/07/30 22:07:33 INFO mapred.JobClient: Counters: 12
11/07/30 22:07:33 INFO mapred.JobClient: FileSystemCounters
11/07/30 22:07:33 INFO mapred.JobClient: FILE_BYTES_READ=55525
11/07/30 22:07:33 INFO mapred.JobClient: FILE_BYTES_WRITTEN=113650
11/07/30 22:07:33 INFO mapred.JobClient: Map-Reduce Framework
11/07/30 22:07:33 INFO mapred.JobClient: Reduce input groups=15
11/07/30 22:07:33 INFO mapred.JobClient: Combine output records=0
11/07/30 22:07:33 INFO mapred.JobClient: Map input records=15
11/07/30 22:07:33 INFO mapred.JobClient: Reduce shuffle bytes=0
11/07/30 22:07:33 INFO mapred.JobClient: Reduce output records=15
11/07/30 22:07:33 INFO mapred.JobClient: Spilled Records=30
11/07/30 22:07:33 INFO mapred.JobClient: Map output bytes=279
11/07/30 22:07:33 INFO mapred.JobClient: Combine input records=0
11/07/30 22:07:33 INFO mapred.JobClient: Map output records=15
11/07/30 22:07:33 INFO mapred.JobClient: Reduce input records=15