在hadoop上跑hbase 的map reduce程序的时候环境却很多jar,需要在hadoop-env.sh中进行补充
引入hbase lib下面以hbase开头的jar,
另外增加metrics-core-2.2.0.jar, 发在extlib下面
for f in $HBASE_HOME/lib/hbase*.jar; do
if [ "$HADOOP_CLASSPATH" ]; then
export HADOOP_CLASSPATH=$HADOOP_CLASSPATH:$f
else
export HADOOP_CLASSPATH=$f
fi
done
for f in $HADOOP_HOME/extlib/*.jar; do
if [ "$HADOOP_CLASSPATH" ]; then
export HADOOP_CLASSPATH=$HADOOP_CLASSPATH:$f
else
export HADOOP_CLASSPATH=$f
fi
done
./hadoop jar /tmp/idStatics.jar sea.hbase.IdStatic
源代码:
import java.io.IOException;
import java.util.List;
import org.apache.commons.collections.CollectionUtils;
import org.apache.commons.lang.StringUtils;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.conf.Configured;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.hbase.Cell;
import org.apache.hadoop.hbase.CellUtil;
import org.apache.hadoop.hbase.HBaseConfiguration;
import org.apache.hadoop.hbase.client.Result;
import org.apache.hadoop.hbase.client.Scan;
import org.apache.hadoop.hbase.io.ImmutableBytesWritable;
import org.apache.hadoop.hbase.mapreduce.TableMapReduceUtil;
import org.apache.hadoop.hbase.mapreduce.TableMapper;
import org.apache.hadoop.hbase.util.Bytes;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.Reducer;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
import org.apache.hadoop.util.Tool;
import org.apache.hadoop.util.ToolRunner;
public class IdStatics extends Configured implements Tool{
public static final String table = "id_records";
public static final byte[] column = Bytes.toBytes("idValue");
@Override
public int run(String[] arg0) throws Exception {
Configuration conf = HBaseConfiguration.create();
conf.set("hbase.zookeeper.quorum", "192.168.1.250:2181,192.168.1.250:2182,192.168.1.250:2183");
conf.set("zookeeper.znode.parent", "/hbase13");
Job job = Job.getInstance(conf,"read_data_from_hbase");
job.setJarByClass(IdStatics.class);
job.setMapOutputKeyClass(Text.class);
job.setMapOutputValueClass(Text.class);
job.setReducerClass(ReadReducer.class);
job.setSpeculativeExecution(false);
//对整个CF扫描
Scan scan = new Scan();
scan.addFamily(column);
scan.setMaxVersions(5);
scan.setCaching(10);
scan.setBatch(2);
TableMapReduceUtil.initTableMapperJob(this.table,
scan,
ReadMapper.class,
Text.class,
Text.class,
job);
String output = "/result";
FileSystem.get(job.getConfiguration()).delete(new Path(output), true);
FileOutputFormat.setOutputPath(job,new Path(output));
return job.waitForCompletion(true) ? 0 : 1;
}
static class ReadMapper extends TableMapper
@Override
protected void map(ImmutableBytesWritable key, Result result, Context context) throws IOException,InterruptedException{
if (result == null || result.isEmpty()) return;
result.getFamilyMap(column);
List
if(CollectionUtils.isNotEmpty(cells)){
for(Cell cell:cells){
//String family = Bytes.toString(CellUtil.cloneFamily(cell));
String value = Bytes.toString(CellUtil.cloneValue(cell));
context.write(new Text(StringUtils.reverse(value)),new Text("1"));
}
}
}
}
static class ReadReducer extends Reducer
@Override
protected void reduce(Text key, Iterable
Integer total=0;
for(Text each:values) {
total=total+Integer.valueOf(each.toString());
}
if(total>1) {
context.write(key,new Text(total.toString()));
}
}
}
public static void main(String[] args) throws Exception{
System.exit(ToolRunner.run(new IdStatics(),args));
}
}