由于HBase到Hive的Mapping无法把timestamp映射过去,见官方解释:https://cwiki.apache.org/confluence/display/Hive/HBaseIntegration
There are two SERDEPROPERTIES
that control the mapping of HBase columns to Hive:
hbase.columns.mapping
hbase.table.default.storage.type
: Can have a value of either string
(the default) or binary
, this option is only available as of Hive 0.9 and the string
behavior is the only one available in earlier versionsThe column mapping support currently available is somewhat cumbersome and restrictive:
hbase.columns.mapping
string (so for a Hive table with n columns, the string should have n entries); whitespace should not be used in between entries since these will be interperted as part of the column name, which is almost certainly not what you want:key
or of the form column-family-name:[column-name][#(binary|string)
(the type specification that delimited by # was added in Hive 0.9.0, earlier versions interpreted everything as strings)
hbase.table.default.storage.type
will be used#b
instead of #binary
)binary
the bytes in the corresponding HBase cells are expected to be of the form that HBase's Bytes
class yields.:key
mapping (we don't support compound keys yet):key
was not supported, and the first Hive column implicitly mapped to the key; as of Hive 0.6, it is now strongly recommended that you always specify the key explictly; we will drop support for implicit key mapping in the future)The next few sections provide detailed examples of the kinds of column mappings currently possible.
所以只能自己写MR程序导出HBase Table的数据到HDFS路径里面
然后,在load到Hive中供查询
/** * Extends the base <code>Mapper</code> class to add the required input key * and value classes. * * @param <KEYOUT> The type of the key. * @param <VALUEOUT> The type of the value. * @see org.apache.hadoop.mapreduce.Mapper */ @InterfaceAudience.Public @InterfaceStability.Stable public abstract class TableMapper<KEYOUT, VALUEOUT> extends Mapper<ImmutableBytesWritable, Result, KEYOUT, VALUEOUT> { }
/** * Extends the basic <code>Reducer</code> class to add the required key and * value input/output classes. While the input key and value as well as the * output key can be anything handed in from the previous map phase the output * value <u>must</u> be either a {@link org.apache.hadoop.hbase.client.Put Put} * or a {@link org.apache.hadoop.hbase.client.Delete Delete} instance when * using the {@link TableOutputFormat} class. * <p> * This class is extended by {@link IdentityTableReducer} but can also be * subclassed to implement similar features or any custom code needed. It has * the advantage to enforce the output value to a specific basic type. * * @param <KEYIN> The type of the input key. * @param <VALUEIN> The type of the input value. * @param <KEYOUT> The type of the output key. * @see org.apache.hadoop.mapreduce.Reducer */ @InterfaceAudience.Public @InterfaceStability.Stable public abstract class TableReducer<KEYIN, VALUEIN, KEYOUT> extends Reducer<KEYIN, VALUEIN, KEYOUT, Mutation> { }
public class HBaseTableMapper extends TableMapper<Text, Text> { private String userId; private String columnFamily; private String qualifier; private Long timestamp; private Object val; private KeyValue[] kv; private static Text keyOut; private static Text valOut; @Override protected void map(ImmutableBytesWritable key, Result value, Context context) throws IOException, InterruptedException { // get user id userId = new String(key.get()); kv = value.raw(); timestamp = kv[0].getTimestamp(); val = new String(kv[0].getValue()); qualifier = new String(kv[0].getQualifier()); columnFamily = new String(kv[0].getFamily()); // use timestamp as key to sort, reduce only get //keyOut.set(DumpUtils.combineString(timestamp, userId)); // only use userId as key, be sure that the userId record is unique keyOut.set(userId); valOut.set(DumpUtils.combineString(columnFamily, qualifier, val, timestamp)); context.write(keyOut, valOut); } @Override protected void setup(Context context) throws IOException, InterruptedException { keyOut = new Text(); valOut = new Text(); } @Override protected void cleanup(Context context) throws IOException, InterruptedException { super.cleanup(context); } }
public class HBaseDump { private static Map<String, Object> parseCmd(final String[] args) throws ParseException { Options options = new Options(); options.addOption("config", true, "project configuration"); CommandLineParser paraer = new BasicParser(); CommandLine line = paraer.parse(options, args); if (line.hasOption("config")) { return ConfigParser.load(line.getOptionValue("config")); } else if (line.hasOption("help")) { usage(); } return null; } private static void usage() { System.out.println("hadoop jar xxxx.jar <main.class> -config <config file path>"); } public static void main(String[] args) throws IOException, ClassNotFoundException, InterruptedException, ParseException { Map map = parseCmd(args); System.out.println(map.toString()); final String tableName = (String) map.get(Constant.TABLE_NAME); final String cf = (String) map.get(Constant.TABLE_COLUMN_FAMILY); final String qualifier = (String) map.get(Constant.TABLE_QUALIFIER); final String jobName = (String) map.get(Constant.JOB_NAME); final String output = (String) map.get(Constant.OUTPUT_PATH); final String HBASE_RPC_ENGINE = (String) map.get(Constant.HBASE_RPC_ENGINE); final int reduceTasks = Integer.parseInt((String) map.get(Constant.MAPRED_REDUCE_TASKS)); Configuration configuration = HBaseConfiguration.create(); configuration.set(Constant.HBASE_ZOOKEEPER_QUORUM, (String) map.get(Constant.HBASE_ZOOKEEPER_QUORUM)); configuration.set(Constant.HBASE_ZOOKEEPER_PROPERTY_CLIENTPORT, (String) map.get(Constant.HBASE_ZOOKEEPER_PROPERTY_CLIENTPORT)); configuration.set(Constant.HBASE_RPC_ENGINE, HBASE_RPC_ENGINE); configuration.set(Constant.ZOOKEEPER_ZNODE_PARENT, (String) map.get(Constant.ZOOKEEPER_ZNODE_PARENT)); Job job = new Job(configuration, jobName); job.setOutputFormatClass(TextOutputFormat.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(Text.class); job.setNumReduceTasks(reduceTasks); Path path = new Path(output); FileSystem fs = FileSystem.get(configuration); if (fs.exists(path)) { fs.delete(path, true); } FileOutputFormat.setOutputPath(job, new Path(output)); Scan scan = new Scan(); scan.addColumn(Bytes.toBytes(cf), Bytes.toBytes(qualifier)); // scan.setFilter(new FirstKeyOnlyFilter()); TableMapReduceUtil.initTableMapperJob(tableName, scan, HBaseTableMapper.class, Text.class, Text.class, job); //TableMapReduceUtil.initTableReducerJob(tableName, HBaseDumpReducer.class, job); System.exit(job.waitForCompletion(true) ? 0 : 1); } }