Hbase葱岭探秘--MR导入与分析数据

Hbase数据导入

Hbase0.96Bug

https://issues.apache.org/jira/browse/HBASE-9867

IllegalAccessError: class com.google.protobuf.HBaseZeroCopyByteString cannot access its superclass com.google.protobuf.LiteralByteString

解决方法

下载0.98版本的jar文件,并将hbase-protocol-0.98.6.1-hadoop2.jar上传,设置classpath就可以运行了。

 export HADOOP_CLASSPATH="/usr/local/hbase-0.96.2-hadoop2/utiljar/hbase-protocol-0.98.6.1-hadoop2.jar"  

主要代码

public static final String NAME = "ImportFromFile";
    // Mapper
    static class  ImportMapper extends Mapper<LongWritable, Text, ImmutableBytesWritable, Mutation> {

        private byte[] family = null;
        private byte[] qualifier = null;

        @Override
        protected void map(LongWritable key, Text value, Context context)
                throws IOException, InterruptedException {

            String line = value.toString();
            byte[] rowKey = DigestUtils.md5(line);
            Put put = new Put(rowKey);
            put.add(family, qualifier, line.getBytes());
            context.write(new ImmutableBytesWritable(rowKey), put);
            context.getCounter("count", "").increment(1);
        }

        @Override
        protected void setup(Context context) throws IOException,
                InterruptedException {
            String key = context.getConfiguration().get("conf.column");
            byte[][] colKey = KeyValue.parseColumn(key.getBytes());
            family = colKey[0];
            if (colKey.length > 1) {
                qualifier = colKey[1];
            }
        }


    }


    public static void main(String[] args) throws Exception {

        Configuration config = HBaseConfiguration.create();

        String[] otherArgs = new GenericOptionsParser(config, args).getRemainingArgs();

        CommandLine cmd = parseArgs(otherArgs);

        if (cmd.hasOption("d")) {
            config.set("conf.debug", "true");
        }
        // 获取命令中的参数
        String table = cmd.getOptionValue("t");
        String input = cmd.getOptionValue("i");
        String column = cmd.getOptionValue("c");

        config.set("conf.column", column);

        Job job = Job.getInstance(config, "import file " + input + " into " + table);
        job.setJarByClass(HbMRImportFromFile.class);
        job.setMapperClass(ImportMapper.class);
        job.setOutputFormatClass(TableOutputFormat.class);

        job.getConfiguration().set(TableOutputFormat.OUTPUT_TABLE, table);

        job.setOutputKeyClass(ImmutableBytesWritable.class);
        job.setOutputValueClass(Writeable.class);
        job.setNumReduceTasks(0);
        FileInputFormat.addInputPaths(job, input);

        job.waitForCompletion(true);

    }
    // 解析命令参数
    private static CommandLine parseArgs(String[] otherArgs) {

        Options options = new Options();
        Option option = new Option("t", "table", true, "table name is needed!");
        option.setArgName("table-name");
        option.setRequired(true);
        options.addOption(option);

        option = new Option("c", "column", true, "column to store row dat");
        option.setArgName("family:qualifier");
        option.setRequired(true);
        options.addOption(option);

        option = new Option("i", "input", true, "directory or file read from");
        option.setArgName("path-in-HDFS");
        option.setRequired(true);
        options.addOption(option);

        option = new Option("d", "debug", false, "switch on debug log level");
        options.addOption(option);

        CommandLineParser parser = new PosixParser();
        CommandLine cmd = null;
        try {
            cmd = parser.parse(options, otherArgs);
        } catch (ParseException e) {
            System.out.println(e);
            System.exit(-1);
        }

        if (cmd.hasOption("d")) {
            Logger log = Logger.getLogger("mapreduce");
            log.setLevel(Level.DEBUG);
        }

        return cmd;
    }

HBase分析

从表中读取并写入到指定的文件中去。

public static final String NAME = "AnalyzeData";
    public enum Counters { ROWS, COLS, ERROR, VALID}
    // Map类
    static class M extends TableMapper<Text, IntWritable> {

        @Override
        protected void map(ImmutableBytesWritable key, Result columns,
                Context context) throws IOException, InterruptedException {

            context.getCounter(Counters.ROWS).increment(1);
            String value = null;

            for (Cell cell : columns.listCells()) {

                context.getCounter(Counters.COLS).increment(1);

                value = Bytes.toString(cell.getValueArray(), cell.getValueOffset(), cell.getValueLength());
                JSONObject json = JSONObject.parseObject(value);
                String author = json.getString("author");
                context.write(new Text(author), new IntWritable(1));
                context.getCounter(Counters.VALID).increment(1);
            }

        }


    }
    // Reduce类
    static class R extends Reducer<Text, IntWritable, Text, IntWritable> {

        @Override
        protected void reduce(Text key, Iterable<IntWritable> values, Context context)
                throws IOException, InterruptedException {

            int count = 0;
            for (IntWritable one : values) {
                count ++;
            }
            context.write(new Text(key), new IntWritable(count));
        }


    }

    public static void main(String[] args) throws IOException, ParseException, ClassNotFoundException, InterruptedException {
        Configuration config = HBaseConfiguration.create();

        String[] args2 = new GenericOptionsParser(config, args).getRemainingArgs();

        CommandLine cmd = parseArgs(args2);
        // 获取参数
        String tableName = cmd.getOptionValue("t");
        System.out.println(tableName);

        String column = cmd.getOptionValue("c");
        String output = cmd.getOptionValue("o");

        Scan scan = new Scan();
        if(column != null) {
            byte[][] colKey = KeyValue.parseColumn(Bytes.toBytes(column));
            if(colKey.length > 1) {
                scan.addColumn(colKey[0], colKey[1]);
            } else {
                scan.addFamily(colKey[0]);
            }
        }

        Job job = Job.getInstance(config, "HbaseAnalyse");
        job.setJarByClass(HbMRAnalyze.class);
        TableMapReduceUtil.initTableMapperJob(tableName, scan, M.class, Text.class, IntWritable.class, job);
        job.setReducerClass(R.class);
        job.setOutputKeyClass(Text.class);
        job.setOutputValueClass(IntWritable.class);
        job.setNumReduceTasks(1);

        FileOutputFormat.setOutputPath(job, new Path(output));
        job.waitForCompletion(true);

    }
    // 解析参数
    private static CommandLine parseArgs(String[] args2) throws ParseException {

        Options options = new Options();
        Option option = new Option("t", "tableName", true, "needed");
        option.setArgName("table-name");
        option.setRequired(true);
        options.addOption(option);

        option = new Option("c", "column", true, "needed");
        option.setArgName("family:qulifier");
        option.setRequired(true);
        options.addOption(option);

        option = new Option("o", "output", true, "needed");
        option.setArgName("output");
        option.setRequired(true);
        options.addOption(option);

        option = new Option("d", "debug", false, "debug");
        options.addOption(option);

        CommandLineParser parser = new PosixParser();
        CommandLine cmd = null;
        cmd = parser.parse(options, args2);
        return cmd;
    }

你可能感兴趣的:(mapreduce,hbase,Hbase与MR)