从HDFS文件导入HBase

从HDFS文件导入HBase,继承自Mapper,代码如下:

  1. package hbase.mr;  

  2.   

  3. import java.io.IOException;  

  4.   

  5. import hbase.curd.HTableUtil;  

  6.   

  7. import org.apache.commons.cli.CommandLine;  

  8. import org.apache.commons.cli.CommandLineParser;  

  9. import org.apache.commons.cli.HelpFormatter;  

  10. import org.apache.commons.cli.Option;  

  11. import org.apache.commons.cli.Options;  

  12. import org.apache.commons.cli.PosixParser;  

  13. import org.apache.commons.codec.digest.DigestUtils;  

  14. import org.apache.hadoop.conf.Configuration;  

  15. import org.apache.hadoop.fs.Path;  

  16. import org.apache.hadoop.hbase.KeyValue;  

  17. import org.apache.hadoop.hbase.client.Put;  

  18. import org.apache.hadoop.hbase.io.ImmutableBytesWritable;  

  19. import org.apache.hadoop.hbase.mapreduce.TableOutputFormat;  

  20. import org.apache.hadoop.hbase.util.Bytes;  

  21. import org.apache.hadoop.io.LongWritable;  

  22. import org.apache.hadoop.io.Text;  

  23. import org.apache.hadoop.io.Writable;  

  24. import org.apache.hadoop.mapreduce.Job;  

  25. import org.apache.hadoop.mapreduce.Mapper;  

  26. import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;  

  27. import org.apache.hadoop.util.GenericOptionsParser;  

  28.   

  29. public class ImportFromFile {  

  30.   

  31.     /** 

  32.      * 从文件导入到HBase 

  33.      * @param args 

  34.      */  

  35.     public static final String NAME="ImportFromFile";  

  36.     public enum Counters{LINES}  

  37.       

  38.     static class ImportMapper extends Mapper<LongWritable,Text,  

  39.         ImmutableBytesWritable,Writable>{  

  40.         private byte[] family =null;  

  41.         private byte[] qualifier = null;  

  42.         @Override  

  43.         protected void setup(Context cxt){  

  44.             String column = cxt.getConfiguration().get("conf.column");  

  45.             byte[][] colkey = KeyValue.parseColumn(Bytes.toBytes(column));  

  46.             family = colkey[0];  

  47.             if(colkey.length>1){  

  48.                 qualifier = colkey[1];  

  49.             }  

  50.         }  

  51.         @Override  

  52.         public void map(LongWritable offset,Text line,Context cxt){  

  53.             try{  

  54.                 String lineString= line.toString();  

  55.                 byte[] rowkey= DigestUtils.md5(lineString);  

  56.                 Put put = new Put(rowkey);  

  57.                 put.add(family,qualifier,Bytes.toBytes(lineString));  

  58.                 cxt.write(new ImmutableBytesWritable(rowkey), put);  

  59.                 cxt.getCounter(Counters.LINES).increment(1);  

  60.             }catch(Exception e){  

  61.                 e.printStackTrace();  

  62.             }  

  63.         }  

  64.     }  

  65.     private static CommandLine parseArgs(String[] args){  

  66.         Options options = new Options();  

  67.         Option o = new Option("t" ,"table",true,"table to import into (must exist)");  

  68.         o.setArgName("table-name");  

  69.         o.setRequired(true);  

  70.         options.addOption(o);  

  71.           

  72.         o= new Option("c","column",true,"column to store row data into");  

  73.         o.setArgName("family:qualifier");  

  74.         o.setRequired(true);  

  75.         options.addOption(o);  

  76.           

  77.         o = new Option("i""input"true,  

  78.         "the directory or file to read from");  

  79.         o.setArgName("path-in-HDFS");  

  80.         o.setRequired(true);  

  81.         options.addOption(o);  

  82.         options.addOption("d""debug"false"switch on DEBUG log level");  

  83.         CommandLineParser parser = new PosixParser();  

  84.         CommandLine cmd = null;  

  85.         try {  

  86.             cmd = parser.parse(options, args);  

  87.         } catch (Exception e) {  

  88.             System.err.println("ERROR: " + e.getMessage() + "\n");  

  89.             HelpFormatter formatter = new HelpFormatter();  

  90.             formatter.printHelp(NAME + " ", options, true);  

  91.             System.exit(-1);  

  92.         }  

  93.         return cmd;  

  94.     }  

  95.     public static void main(String[] args) throws IOException, InterruptedException, ClassNotFoundException {  

  96.           

  97.         Configuration conf = HTableUtil.getConf();  

  98.         String[] otherArgs = new GenericOptionsParser(conf, initialArg()).getRemainingArgs();   

  99.         CommandLine cmd = parseArgs(otherArgs);  

  100.         String table = cmd.getOptionValue("t");  

  101.         String input = cmd.getOptionValue("i");  

  102.         String column = cmd.getOptionValue("c");  

  103.         conf.set("conf.column", column);  

  104.         Job job = new Job(conf, "Import from file " + input + " into table " + table);  

  105.         job.setJarByClass(ImportFromFile.class);  

  106.         job.setMapperClass(ImportMapper.class);  

  107.         job.setOutputFormatClass(TableOutputFormat.class);  

  108.         job.getConfiguration().set(TableOutputFormat.OUTPUT_TABLE, table);  

  109.         job.setOutputKeyClass(ImmutableBytesWritable.class);  

  110.         job.setOutputValueClass(Writable.class);  

  111.         job.setNumReduceTasks(0);   

  112.         FileInputFormat.addInputPath(job, new Path(input));  

  113.         System.exit(job.waitForCompletion(true) ? 0 : 1);  

  114.     }  

  115.       

  116.     private static String[] initialArg(){  

  117.         String []args = new String[6];  

  118.         args[0]="-c";  

  119.         args[1]="fam:data";  

  120.         args[2]="-i";  

  121.         args[3]="/user/hadoop/input/picdata";  

  122.         args[4]="-t";  

  123.         args[5]="testtable";  

  124.         return args;  

  125.     }  

  126. }  

你可能感兴趣的:(hbase)