Delete Hbase rows example
$hadoop jar ./sponge-hserver.jar com.citi.sponge.mapreduce.MRDeleteRows -Dtable="elf_log" -DstartKey="10000:1365663164575:88888:testhome" -DstopKey="10000:1365663164575:88890:testhome" -Dquorum="vm-15c2-3bbf.nam.nsroot.net,vm-ab1f-dd21.nam.nsroot.net,vm-cb03-2277.nam.nsroot.net"
$hadoop jar ./sponge-hserver.jar com.citi.sponge.mapreduce.MRDeleteRows -Dtable="elf_log" -Dappid="10000" -DstartTime="2010-01-01-01-01" -DstopTime="2014-01-01-01-01" -Dquorum="vm-15c2-3bbf.nam.nsroot.net,vm-ab1f-dd21.nam.nsroot.net,vm-cb03-2277.nam.nsroot.net"
import java.io.IOException; import java.text.DateFormat; import java.text.ParseException; import java.text.SimpleDateFormat; import java.util.Date; import java.util.Map.Entry; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.conf.Configured; import org.apache.hadoop.hbase.HBaseConfiguration; import org.apache.hadoop.hbase.client.Delete; import org.apache.hadoop.hbase.client.Result; import org.apache.hadoop.hbase.client.Scan; import org.apache.hadoop.hbase.io.ImmutableBytesWritable; import org.apache.hadoop.hbase.mapreduce.TableMapReduceUtil; import org.apache.hadoop.hbase.mapreduce.TableMapper; import org.apache.hadoop.mapreduce.Job; import org.apache.hadoop.util.Tool; import org.apache.hadoop.util.ToolRunner; public class MRDeleteRows extends Configured implements Tool { String startRowKey; String stopRowKey; String quorum; String table; String startTime; String stopTime; String appID; public String getStartTime() { return startTime; } public String getStopTime() { return stopTime; } public String getAppID() { return appID; } public String getQuorum() { return quorum; } public String getStartRowKey() { return startRowKey; } public String getStopRowKey() { return stopRowKey; } public String getTable() { return table; } @Override public int run(String[] arg0) throws Exception { Configuration conf = getConf(); for (Entry<String, String> entry : conf) { if (entry.getKey().equals("startKey")) { this.startRowKey = entry.getValue(); } if (entry.getKey().equals("stopKey")) { this.stopRowKey = entry.getValue(); } if (entry.getKey().equals("quorum")) { this.quorum = entry.getValue(); } if (entry.getKey().equals("table")) { this.table = entry.getValue(); } if (entry.getKey().equals("startTime")) { this.startTime = entry.getValue(); } if (entry.getKey().equals("stopTime")) { this.stopTime = entry.getValue(); } if (entry.getKey().equals("appid")) { this.appID = entry.getValue(); } } return 0; } static String getRowKey(String appID, String time){ DateFormat df = new SimpleDateFormat("yyyy-MM-dd-HH-mm"); Date date = null; try{ date = df.parse(time); }catch(ParseException e){ System.out.println("Please input correct date format"); System.exit(1); } return appID + ":" + date.getTime(); } static class DeleteMapper extends TableMapper<ImmutableBytesWritable, Delete> { public DeleteMapper() { } @Override public void map(ImmutableBytesWritable row, Result value, Context context) throws IOException { ImmutableBytesWritable userKey = new ImmutableBytesWritable(row.get()); try{ Delete delete = new Delete(row.get()); context.write(userKey, delete); } catch (InterruptedException e){ e.printStackTrace(); throw new IOException(e); } } } public static void main(String[] args) throws Exception { MRDeleteRows deleteElf = new MRDeleteRows(); ToolRunner.run(deleteElf, args); Configuration config = HBaseConfiguration.create(); config.set("hbase.zookeeper.quorum", deleteElf.getQuorum()); Job job = new Job(config, "DeleteHbaseRowkeys"); job.setJarByClass(MRDeleteRows.class); Scan scan = new Scan(); System.out.println("quorum: " + deleteElf.getQuorum()); System.out.println("table: " + deleteElf.getTable()); if(deleteElf.getStartRowKey()!=null && deleteElf.getStopRowKey()!=null){ System.out.println("startkey: " + deleteElf.getStartRowKey()); System.out.println("stopkey: " + deleteElf.getStopRowKey()); scan.setStartRow(deleteElf.getStartRowKey().getBytes()); scan.setStopRow(deleteElf.getStopRowKey().getBytes()); } if(deleteElf.getAppID()!=null && deleteElf.getStartTime()!=null && deleteElf.getStopTime()!=null){ System.out.println("AppID: " + deleteElf.getAppID()); System.out.println("start time: " + deleteElf.getStartTime()); System.out.println("stop time: " + deleteElf.getStopTime()); scan.setStartRow(getRowKey(deleteElf.getAppID(),deleteElf.getStartTime()).getBytes()); scan.setStopRow(getRowKey(deleteElf.getAppID(),deleteElf.getStopTime()).getBytes()); } scan.setCacheBlocks(false); TableMapReduceUtil.initTableMapperJob(deleteElf.getTable(), scan, DeleteMapper.class, ImmutableBytesWritable.class, Delete.class, job); TableMapReduceUtil.initTableReducerJob(deleteElf.getTable(), null, job); boolean b = job.waitForCompletion(true); if (!b) { throw new IOException("error with job!"); } } }
Hbase Loader MapReduce Example
import java.io.IOException; import java.util.Calendar; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.Path; import org.apache.hadoop.hbase.HBaseConfiguration; import org.apache.hadoop.hbase.client.Put; import org.apache.hadoop.hbase.io.ImmutableBytesWritable; import org.apache.hadoop.hbase.mapreduce.TableMapReduceUtil; import org.apache.hadoop.hbase.mapreduce.TableReducer; import org.apache.hadoop.hbase.util.Bytes; import org.apache.hadoop.io.LongWritable; import org.apache.hadoop.io.Text; import org.apache.hadoop.mapreduce.Job; import org.apache.hadoop.mapreduce.Mapper; import org.apache.hadoop.mapreduce.lib.input.FileInputFormat; import org.apache.hadoop.mapreduce.lib.input.TextInputFormat; import org.apache.hadoop.util.GenericOptionsParser; /** * Sample Uploader MapReduce * <p> * This is EXAMPLE code. You will need to change it to work for your context. * <p> * Uses {@link TableReducer} to put the data into HBase. Change the InputFormat * to suit your data. In this example, we are importing a CSV file. * <p> * * <pre> * row,family,qualifier,value * </pre> * <p> * The table and columnfamily we're to insert into must preexist. * <p> * There is no reducer in this example as it is not necessary and adds * significant overhead. If you need to do any massaging of data before * inserting into HBase, you can do this in the map as well. * <p> * Do the following to start the MR job: * * <pre> * ./bin/hadoop org.apache.hadoop.hbase.mapreduce.SampleUploader /tmp/input.csv TABLE_NAME * </pre> * <p> * This code was written against HBase 0.21 trunk. * * Before running this job, please make sure set HADOOP_CLASSPATH. You need to include zookeeper.jar and hbase-0.90.4-cdh3u3.jar */ public class BulkLoaderToHbase { private static final String NAME = "BulkLoaderToHbase"; private static byte[] SYSINFO; private static byte[] CONTENT; private static byte[] APP_ID; private static byte[] ENV; private static byte[] HOSTNAME; private static byte[] BODY; private static byte[] LOG_FILE_NAME; private static byte[] LOG_TYPE; private static byte[] LOG_FILE_PATH; private static byte[] appId_v; private static byte[] env_v; private static byte[] hostname_v; private static byte[] logPath_v; private static byte[] logFileName_v; private static byte[] logType_v; private static long nano = 0; static class Uploader extends Mapper<LongWritable, Text, ImmutableBytesWritable, Put> { private long checkpoint = 100; private long count = 0; @Override public void map(LongWritable key, Text line, Context context) throws IOException { Calendar cal = Calendar.getInstance(); String rowkey = Bytes.toString(appId_v) + ":" + cal.getTimeInMillis() + ":" + (nano++) + ":" + Bytes.toString(hostname_v); byte[] rowKeyValue = Bytes.toBytes(rowkey); Put put = new Put(rowKeyValue); put.add(SYSINFO, APP_ID, appId_v); put.add(SYSINFO, ENV, env_v); put.add(SYSINFO, HOSTNAME, hostname_v); put.add(CONTENT, BODY, line.getBytes()); put.add(CONTENT, LOG_FILE_PATH, logPath_v); put.add(CONTENT, LOG_FILE_NAME, logFileName_v); put.add(CONTENT, LOG_TYPE, logType_v); // Uncomment below to disable WAL. This will improve performance but // means // you will experience data loss in the case of a RegionServer // crash. // put.setWriteToWAL(false); try { context.write(new ImmutableBytesWritable(rowKeyValue), put); } catch (InterruptedException e) { e.printStackTrace(); } // Set status every checkpoint lines if (++count % checkpoint == 0) { context.setStatus("Emitting Put " + count); } } } /** * Job configuration. */ public static Job configureJob(Configuration conf, String[] args) throws IOException { SYSINFO = Bytes.toBytes("sysInfo"); CONTENT = Bytes.toBytes("content"); APP_ID = Bytes.toBytes("appId"); ENV = Bytes.toBytes("env"); HOSTNAME = Bytes.toBytes("hostName"); BODY = Bytes.toBytes("body"); LOG_FILE_PATH = Bytes.toBytes("logFilePath"); LOG_FILE_NAME = Bytes.toBytes("logFileName"); LOG_TYPE = Bytes.toBytes("logType"); Path inputPath = new Path(args[0]); String tableName = args[1]; appId_v = Bytes.toBytes(args[2]); env_v = Bytes.toBytes(args[3]); hostname_v = Bytes.toBytes(args[4]); logPath_v = Bytes.toBytes(args[5]); logFileName_v = Bytes.toBytes(args[6]); logType_v = Bytes.toBytes(args[7]); Job job = new Job(conf, NAME + "_" + tableName); job.setJarByClass(Uploader.class); FileInputFormat.setInputPaths(job, inputPath); job.setInputFormatClass(TextInputFormat.class); job.setMapperClass(Uploader.class); // No reducers. Just write straight to table. Call initTableReducerJob // because it sets up the TableOutputFormat. TableMapReduceUtil.initTableReducerJob(tableName, null, job); job.setNumReduceTasks(0); return job; } /** * Main entry point. * * @param args * The command line parameters. * @throws Exception * When running the job fails. */ public static void main(String[] args) throws Exception { Configuration conf = HBaseConfiguration.create(); String[] otherArgs = new GenericOptionsParser(conf, args) .getRemainingArgs(); if (otherArgs.length != 8) { System.err .println("Wrong number of arguments: " + otherArgs.length); System.err.println("Usage: " + NAME + " <input> <tablename> <appId> <env> <hostname> <logpath> <logFileName> <logType>"); System.exit(-1); } Job job = configureJob(conf, otherArgs); System.exit(job.waitForCompletion(true) ? 0 : 1); } }