1、编写Recoder
package hadoop.db; import java.io.DataInput; import java.io.DataOutput; import java.io.IOException; import java.sql.PreparedStatement; import java.sql.ResultSet; import java.sql.SQLException; import org.apache.hadoop.io.Text; import org.apache.hadoop.io.Writable; import org.apache.hadoop.mapreduce.lib.db.DBWritable; public class DBRecoder implements Writable, DBWritable { int xxbh; private String zbh; private String zmc; private String bh; private String mc; @Override public void readFields(ResultSet rs) throws SQLException { this.xxbh = rs.getInt(1); this.zbh = rs.getString(2); this.zmc = rs.getString(3); this.bh = rs.getString(4); this.mc = rs.getString(5); } @Override public void write(PreparedStatement ps) throws SQLException { ps.setInt(1, this.xxbh); ps.setString(2, zbh); ps.setString(3, zmc); ps.setString(4, bh); ps.setString(5, mc); } @Override public void readFields(DataInput in) throws IOException { this.xxbh = in.readInt(); this.zbh = Text.readString(in); this.zmc = Text.readString(in); this.bh = Text.readString(in); this.mc = Text.readString(in); } @Override public void write(DataOutput out) throws IOException { out.writeInt(xxbh); Text.writeString(out, zbh); Text.writeString(out, zmc); Text.writeString(out, bh); Text.writeString(out, mc); // out.write(zbh.getBytes("utf-8")); // out.write(zmc.getBytes("utf-8")); // out.write(bh.getBytes("utf-8")); // out.write(mc.getBytes("utf-8")); } @Override public String toString() { // TODO Auto-generated method stub return xxbh+":"+zbh+":"+zmc+":"+bh+":"+mc; } }
二、Mapper
package hadoop.db; import java.io.IOException; import org.apache.hadoop.io.LongWritable; import org.apache.hadoop.io.Text; import org.apache.hadoop.mapred.MapReduceBase; import org.apache.hadoop.mapred.OutputCollector; import org.apache.hadoop.mapred.Reporter; import org.apache.hadoop.mapreduce.Mapper; public class DBMapper extends Mapper <LongWritable,DBRecoder, LongWritable, Text>{ public void map(LongWritable arg0, DBRecoder arg1, OutputCollector arg2, Reporter arg3) throws IOException { // TODO Auto-generated method stub arg2.collect(new LongWritable(arg1.xxbh), new Text(arg1.toString())); } }
三、Reducer
package hadoop.db; import java.io.IOException; import java.util.Iterator; import org.apache.hadoop.io.Text; import org.apache.hadoop.mapred.MapReduceBase; import org.apache.hadoop.mapred.OutputCollector; import org.apache.hadoop.mapred.Reporter; import org.apache.hadoop.mapreduce.Reducer; public class IdentityReducer<K, Text> extends Reducer<K, Text, K, Text> { /** Writes all keys and values directly to output. */ public void reduce(K key, Iterator<Text> values, OutputCollector<K, Text> output, Reporter reporter) throws IOException { while (values.hasNext()) { Text v = (Text) values.next(); System.out.println(v.toString()); output.collect(key, v); } } }
等同org.apache.hadoop.mapred.lib.IdentityReducer作用。
四、测试方法
package hadoop.db; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.Path; import org.apache.hadoop.io.LongWritable; import org.apache.hadoop.io.Text; import org.apache.hadoop.mapreduce.Job; import org.apache.hadoop.mapreduce.lib.db.DBConfiguration; import org.apache.hadoop.mapreduce.lib.db.DBInputFormat; import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat; public class Test { private static String driverClass = "oracle.jdbc.driver.OracleDriver"; private static String dbUrl = "jdbc:oracle:thin:@172.16.60.33:1521:igatest0"; public static void main(String[] args) throws Exception { //引入org.apache.hadoop.mapred包下的API /*JobClient client = new JobClient(); JobConf conf = new JobConf(hadoop.db.Test.class); // TODO: specify output types conf.setOutputKeyClass(LongWritable.class); conf.setOutputValueClass(Text.class); // TODO: specify input and output DIRECTORIES (not files) conf.setInputFormat(DBInputFormat.class); FileOutputFormat.setOutputPath(conf, new Path("hdfs://172.16.60.33:9000/home/hadoop/db/output-"+System.currentTimeMillis())); DBConfiguration.configureDB(conf, driverClass, dbUrl,"igate320_sz","igate320_sz"); String [] fields = {"xxbh","zbh", "zmc", "bh","mc"}; // DBInputFormat.setInput(conf,DBRecoder.class, "T_SYS_XTZD",null, "xxbh", fields); DBInputFormat.setInput(conf, DBRecoder.class, "SELECT xxbh,zbh,zmc,bh,mc FROM t_sys_xtzd ORDER BY xxbh ", "SELECT count(1) FROM t_sys_xtzd "); // TODO: specify a mapper conf.setMapperClass(DBMapper.class); // TODO: specify a reducer conf.setReducerClass(IdentityReducer.class); client.setConf(conf); try { JobClient.runJob(conf); } catch (Exception e) { e.printStackTrace(); }*/ //引入org.apache.hadoop.mapreduce 包下的API Configuration conf = new Configuration(); Job job = new Job(conf, "word count sunsu"); job.setJarByClass(Test.class); job.setOutputKeyClass(LongWritable.class); job.setOutputValueClass(DBRecoder.class); FileOutputFormat.setOutputPath(job, new Path("hdfs://172.16.60.33:9000/home/hadoop/db/output-"+System.currentTimeMillis())); DBConfiguration.configureDB(job.getConfiguration(), driverClass, dbUrl,"igate320_sz","igate320_sz"); String [] fields = {"xxbh","zbh", "zmc", "bh","mc"}; // DBInputFormat.setInput(job,DBRecoder.class, "T_SYS_XTZD","", "xxbh", fields); DBInputFormat.setInput(job, DBRecoder.class, "select xxbh,zbh,zmc,bh,mc from T_SYS_XTZD", "select count(1) from T_SYS_XTZD"); job.setMapperClass(DBMapper.class); job.setReducerClass(IdentityReducer.class); System.exit(job.waitForCompletion(true) ? 0 : 1); } }
hadoop-1.1.1版本不支持oracle数据库,可修改org.apache.hadoop.mapred.lib.db.DBInputFormat中getSelectQuery()方法生成对应数据库SQL