在MapReduce中连接Hbase数据

1.在Hbase中创建EMPLOYEE表:create'EMPLOYEE','cf1',并创造一批销售订单数据,包含但限于产品id、销售员id、销售时间、销售额;

2.Hbase中创建TotalSale表:create 'TotalSale','cf1',并创造一批销售数据,包括但不限于用户id、销售总额;

3.编写mapperreducerdriver源代码;

4.编写testDriver程序输出员工id、销售单数和销售总额。


testMapper

package com.hbasepackage;
import java.io.IOException;

import org.apache.hadoop.io.Text;

import org.apache.hadoop.hbase.io.*;

import org.apache.hadoop.hbase.client.Result;

//import org.apache.hadoop.hbase.io.ImmutableBytesWritable;

import org.apache.hadoop.hbase.mapreduce.TableMapper;

import org.apache.hadoop.io.IntWritable;

import org.apache.hadoop.io.Text;

import org.apache.hadoop.hbase.util.Bytes;

import com.google.common.primitives.*;

//import com.yammer.metrics.core.HealthCheck.Result;

@SuppressWarnings("unused")
public class testMapper extends TableMapper {

	public void map(ImmutableBytesWritable rowKey, Result columns, Context

	context)

	throws IOException, InterruptedException {

		try {

			// get rowKey and convert it to string

			String inKey = new String(rowKey.get());

			// set new key having only date

			String oKey = inKey.split("#")[0];

			// get sales column in byte format first and then convert it to
			// string(as it is stored as string from hbase shell)

			byte[] bSales = columns.getValue(Bytes.toBytes("cf1"), Bytes.

			toBytes("sales"));

			String sSales = new String(bSales);

			Integer sales = new Integer(sSales);

			// emit date and sales values

			context.write(new Text(oKey), new IntWritable(sales));

		} catch (RuntimeException e) {

			e.printStackTrace();

		}

	}

}



testReducer

package com.hbasepackage;

import java.io.IOException;

import org.apache.hadoop.hbase.client.Put;

import org.apache.hadoop.hbase.io.ImmutableBytesWritable;

import org.apache.hadoop.hbase.mapreduce.TableReducer;

import org.apache.hadoop.io.IntWritable;

import org.apache.hadoop.io.Text;

import org.apache.hadoop.hbase.util.Bytes;

import com.google.common.primitives.*;

@SuppressWarnings("unused")
public class testReducer extends TableReducer {

	public void reduce(Text key, Iterable values, Context

	context)

	throws IOException, InterruptedException {

		try {

			int sum = 0;

			// loop through different sales vales and add it to sum

			for (IntWritable sales : values) {

				Integer intSales = new Integer(sales.toString());

				sum += intSales;

			}

			String keyString = key.toString();

			System.out.println("" + keyString + "\t" + sum);

			// create hbase put with rowkey as date

			Put insHBase = new Put(key.getBytes());

			// insert sum value to hbase

			insHBase.add(Bytes.toBytes("cf1"), Bytes.toBytes("Total sales:"),

			Bytes.toBytes(sum));

			// write data to Hbase table

			context.write(null, insHBase);

		} catch (Exception e) {

			e.printStackTrace();

		}

	}
}



testDriver

package com.hbasepackage;

import java.io.IOException;

import org.apache.hadoop.conf.Configuration;

import org.apache.hadoop.hbase.client.Scan;

import org.apache.hadoop.hbase.mapreduce.TableMapReduceUtil;

import org.apache.hadoop.io.IntWritable;

import org.apache.hadoop.io.Text;

import org.apache.hadoop.mapreduce.Job;

import org.apache.hadoop.hbase.util.Bytes;

import com.google.common.primitives.*;

@SuppressWarnings("unused")
public class testDriver {

	public static void main(String[] args) throws Exception {

		Configuration conf = new Configuration();

		// define scan and define column families to scan

		Scan scan = new Scan();

		scan.addFamily(Bytes.toBytes("cf1"));

		Job job = new Job(conf);

		job.setMapperClass(testMapper.class);

		job.setReducerClass(testReducer.class);

		job.setJarByClass(testDriver.class);

		// define input hbase table

		TableMapReduceUtil.initTableMapperJob(

		"EMPLOYEE",

		scan,

		testMapper.class,

		Text.class,

		IntWritable.class,

		job);

		// define output table

		TableMapReduceUtil.initTableReducerJob(

		"TotalSale",

		testReducer.class,

		job);

		job.waitForCompletion(true);

	}

}


你可能感兴趣的:(大数据学习)