https://issues.apache.org/jira/browse/HBASE-9867
IllegalAccessError: class com.google.protobuf.HBaseZeroCopyByteString cannot access its superclass com.google.protobuf.LiteralByteString
下载0.98版本的jar文件,并将hbase-protocol-0.98.6.1-hadoop2.jar上传,设置classpath就可以运行了。
export HADOOP_CLASSPATH="/usr/local/hbase-0.96.2-hadoop2/utiljar/hbase-protocol-0.98.6.1-hadoop2.jar"
public static final String NAME = "ImportFromFile";
// Mapper
static class ImportMapper extends Mapper<LongWritable, Text, ImmutableBytesWritable, Mutation> {
private byte[] family = null;
private byte[] qualifier = null;
@Override
protected void map(LongWritable key, Text value, Context context)
throws IOException, InterruptedException {
String line = value.toString();
byte[] rowKey = DigestUtils.md5(line);
Put put = new Put(rowKey);
put.add(family, qualifier, line.getBytes());
context.write(new ImmutableBytesWritable(rowKey), put);
context.getCounter("count", "").increment(1);
}
@Override
protected void setup(Context context) throws IOException,
InterruptedException {
String key = context.getConfiguration().get("conf.column");
byte[][] colKey = KeyValue.parseColumn(key.getBytes());
family = colKey[0];
if (colKey.length > 1) {
qualifier = colKey[1];
}
}
}
public static void main(String[] args) throws Exception {
Configuration config = HBaseConfiguration.create();
String[] otherArgs = new GenericOptionsParser(config, args).getRemainingArgs();
CommandLine cmd = parseArgs(otherArgs);
if (cmd.hasOption("d")) {
config.set("conf.debug", "true");
}
// 获取命令中的参数
String table = cmd.getOptionValue("t");
String input = cmd.getOptionValue("i");
String column = cmd.getOptionValue("c");
config.set("conf.column", column);
Job job = Job.getInstance(config, "import file " + input + " into " + table);
job.setJarByClass(HbMRImportFromFile.class);
job.setMapperClass(ImportMapper.class);
job.setOutputFormatClass(TableOutputFormat.class);
job.getConfiguration().set(TableOutputFormat.OUTPUT_TABLE, table);
job.setOutputKeyClass(ImmutableBytesWritable.class);
job.setOutputValueClass(Writeable.class);
job.setNumReduceTasks(0);
FileInputFormat.addInputPaths(job, input);
job.waitForCompletion(true);
}
// 解析命令参数
private static CommandLine parseArgs(String[] otherArgs) {
Options options = new Options();
Option option = new Option("t", "table", true, "table name is needed!");
option.setArgName("table-name");
option.setRequired(true);
options.addOption(option);
option = new Option("c", "column", true, "column to store row dat");
option.setArgName("family:qualifier");
option.setRequired(true);
options.addOption(option);
option = new Option("i", "input", true, "directory or file read from");
option.setArgName("path-in-HDFS");
option.setRequired(true);
options.addOption(option);
option = new Option("d", "debug", false, "switch on debug log level");
options.addOption(option);
CommandLineParser parser = new PosixParser();
CommandLine cmd = null;
try {
cmd = parser.parse(options, otherArgs);
} catch (ParseException e) {
System.out.println(e);
System.exit(-1);
}
if (cmd.hasOption("d")) {
Logger log = Logger.getLogger("mapreduce");
log.setLevel(Level.DEBUG);
}
return cmd;
}
从表中读取并写入到指定的文件中去。
public static final String NAME = "AnalyzeData";
public enum Counters { ROWS, COLS, ERROR, VALID}
// Map类
static class M extends TableMapper<Text, IntWritable> {
@Override
protected void map(ImmutableBytesWritable key, Result columns,
Context context) throws IOException, InterruptedException {
context.getCounter(Counters.ROWS).increment(1);
String value = null;
for (Cell cell : columns.listCells()) {
context.getCounter(Counters.COLS).increment(1);
value = Bytes.toString(cell.getValueArray(), cell.getValueOffset(), cell.getValueLength());
JSONObject json = JSONObject.parseObject(value);
String author = json.getString("author");
context.write(new Text(author), new IntWritable(1));
context.getCounter(Counters.VALID).increment(1);
}
}
}
// Reduce类
static class R extends Reducer<Text, IntWritable, Text, IntWritable> {
@Override
protected void reduce(Text key, Iterable<IntWritable> values, Context context)
throws IOException, InterruptedException {
int count = 0;
for (IntWritable one : values) {
count ++;
}
context.write(new Text(key), new IntWritable(count));
}
}
public static void main(String[] args) throws IOException, ParseException, ClassNotFoundException, InterruptedException {
Configuration config = HBaseConfiguration.create();
String[] args2 = new GenericOptionsParser(config, args).getRemainingArgs();
CommandLine cmd = parseArgs(args2);
// 获取参数
String tableName = cmd.getOptionValue("t");
System.out.println(tableName);
String column = cmd.getOptionValue("c");
String output = cmd.getOptionValue("o");
Scan scan = new Scan();
if(column != null) {
byte[][] colKey = KeyValue.parseColumn(Bytes.toBytes(column));
if(colKey.length > 1) {
scan.addColumn(colKey[0], colKey[1]);
} else {
scan.addFamily(colKey[0]);
}
}
Job job = Job.getInstance(config, "HbaseAnalyse");
job.setJarByClass(HbMRAnalyze.class);
TableMapReduceUtil.initTableMapperJob(tableName, scan, M.class, Text.class, IntWritable.class, job);
job.setReducerClass(R.class);
job.setOutputKeyClass(Text.class);
job.setOutputValueClass(IntWritable.class);
job.setNumReduceTasks(1);
FileOutputFormat.setOutputPath(job, new Path(output));
job.waitForCompletion(true);
}
// 解析参数
private static CommandLine parseArgs(String[] args2) throws ParseException {
Options options = new Options();
Option option = new Option("t", "tableName", true, "needed");
option.setArgName("table-name");
option.setRequired(true);
options.addOption(option);
option = new Option("c", "column", true, "needed");
option.setArgName("family:qulifier");
option.setRequired(true);
options.addOption(option);
option = new Option("o", "output", true, "needed");
option.setArgName("output");
option.setRequired(true);
options.addOption(option);
option = new Option("d", "debug", false, "debug");
options.addOption(option);
CommandLineParser parser = new PosixParser();
CommandLine cmd = null;
cmd = parser.parse(options, args2);
return cmd;
}