hadoop+es+hbase mapreduce将hbase数据写入es
1.添加依赖:
4.0.0
com.kexion
HBaseMapReduce
1.0-SNAPSHOT
jar
HBaseMapReduce
http://www.example.com
org.apache.hadoop hadoop-client 2.7.2 jackson-xc org.codehaus.jackson
org.apache.hadoop
hadoop-common
2.7.2
netty
io.netty
slf4j-api
org.slf4j
guava
com.google.guava
slf4j-log4j12
org.slf4j
log4j
log4j
commons-logging
commons-logging
commons-compress
org.apache.commons
httpclient
org.apache.httpcomponents
jackson-core-asl
org.codehaus.jackson
commons-codec
commons-codec
jackson-mapper-asl
org.codehaus.jackson
commons-collections
commons-collections
commons-lang
commons-lang
jackson-jaxrs
org.codehaus.jackson
org.apache.hadoop
hadoop-hdfs
2.7.2
jsr305
com.google.code.findbugs
netty-all
io.netty
org.apache.hbase
hbase-client
1.3.1
commons-logging
commons-logging
junit
junit
guava
com.google.guava
hadoop-common
org.apache.hadoop
slf4j-api
org.slf4j
hadoop-auth
org.apache.hadoop
netty-all
io.netty
hadoop-mapreduce-client-core
org.apache.hadoop
commons-codec
commons-codec
org.apache.logging.log4j
log4j-core
2.9.0
org.apache.hbase
hbase-server
1.3.1
netty-all
io.netty
guava
com.google.guava
commons-logging
commons-logging
commons-codec
commons-codec
junit
junit
hadoop-common
org.apache.hadoop
hadoop-auth
org.apache.hadoop
hadoop-client
org.apache.hadoop
hadoop-hdfs
org.apache.hadoop
hadoop-mapreduce-client-core
org.apache.hadoop
org.apache.hbase
hbase-common
1.3.1
guava
com.google.guava
commons-logging
commons-logging
commons-codec
commons-codec
junit
junit
hadoop-common
org.apache.hadoop
hadoop-mapreduce-client-core
org.apache.hadoop
com.oracle
ojdbc14
10.2.0.2.0
org.elasticsearch
elasticsearch
5.6.1
io.netty
netty-all
4.1.13.Final
org.elasticsearch.client
transport
5.6.1
commons-codec
commons-codec
netty
io.netty
compiler
com.github.spullara.mustache.java
httpcore
org.apache.httpcomponents
junit
junit
4.11
test
src/main/java **/*.properties **/*.xml false org.apache.maven.plugins maven-surefire-plugin true true maven-compiler-plugin 2.3.2 1.8 1.8 maven-assembly-plugin com.kexion.mapReduce.hbasetohbase.HbaseToHbaseDriver assembly.xml make-assembly package single **2.编写job:** package com.kexion.mapReduce.hbasetohbase;
import java.sql.Connection;
import java.sql.PreparedStatement;
import java.sql.ResultSet;
import java.util.ArrayList;
import java.util.List;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.conf.Configured;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.hbase.HBaseConfiguration;
import org.apache.hadoop.hbase.client.Put;
import org.apache.hadoop.hbase.client.Scan;
import org.apache.hadoop.hbase.io.ImmutableBytesWritable;
import org.apache.hadoop.hbase.mapreduce.TableMapReduceUtil;
import org.apache.hadoop.hbase.util.Bytes;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
import org.apache.hadoop.util.Tool;
import com.kexion.mapReduce.hbasetohdfs.HbaseToHdfs;
import com.kexion.mapReduce.hbasetohdfs.MyMapper;
import com.kexion.utils.HbaseUtils;
import com.kexion.utils.OracleConnect;
public class HbaseToHbaseDriver{
private static PreparedStatement stm = null;
private static ResultSet rs = null;
private static PreparedStatement stm_clo = null;
private static ResultSet rs_clo = null;
public static void main(String[] args) throws Exception {
//1.获取所有的Sjzybh
List sjzybhs=new ArrayList();
//String sql=“select sjzybh from T_ZYK_XGSJX where sjzybh in (‘D-340500170000-ZNB-17-3400032’,‘D-340500170000-ZNB-17-1400034’,‘D-340500170000-ZNB-17-2100002’,‘D-340500170000-ZNB-17-2300011’) group by sjzybh”;
String sql=“select sjzybh from T_ZYK_XGSJX where sjzybh in (‘D-340500170000-ZNB-17-3400032’) group by sjzybh”;
Connection connection = OracleConnect.getConn();
stm = connection.prepareStatement(sql);
rs = stm.executeQuery();
while (rs.next()) {
sjzybhs.add(rs.getString(“sjzybh”));
}
OracleConnect.colseResource(connection, stm, rs);
System.out.println("-----------------------");
for (String sjzybh : sjzybhs) {
HbaseUtils hbaseUtils=new HbaseUtils();
boolean exists = hbaseUtils.tableExists(sjzybh);
if(exists) {
String cloums_sql=“SELECT glid,zykid,zwmc,ssmlsjxywmc,sjzybh FROM T_ZYK_XGSJX WHERE SJZYBH=’”+sjzybh+"’";
Connection connection_cloum = OracleConnect.getConn();
stm_clo = connection_cloum.prepareStatement(cloums_sql);
rs_clo = stm_clo.executeQuery();
String cloums_string="";
String cloums_info="";
while (rs_clo.next()) {
cloums_string+=","+rs_clo.getString("ssmlsjxywmc");
cloums_info+="|"+rs_clo.getInt("zykid")+","+rs_clo.getString("zwmc")+","+rs_clo.getString("ssmlsjxywmc");
}
OracleConnect.colseResource(connection_cloum, stm_clo, rs_clo);
cloums_string=cloums_string.substring(1);
String[] strings = cloums_string.split(",");
String hbaseTableName = sjzybh;
Configuration config = HBaseConfiguration.create();
config.set("hbase.zookeeper.quorum","hadoop101,hadoop102,hadoop103");
config.set("hbase.zookeeper.property.clientPort", "2181");
//conf.set("zookeeper.znode.parent", "/hbase");
config.set("hbase.rootdir", "hdfs://mycluster/hbase");
config.set("table_name", hbaseTableName);
config.set("cloums_info", cloums_info.substring(1));
config.set("index", "bshk");
Job job = Job.getInstance(config, "Hbase2HbaseMrTest");
job.setJarByClass(HbaseToHbaseDriver.class);
Scan scan = new Scan();
scan.addFamily(Bytes.toBytes("A"));
for (String str : strings) {
scan.addColumn(Bytes.toBytes("A"), Bytes.toBytes(str));
}
scan.setCaching(500);
scan.setCacheBlocks(false);
TableMapReduceUtil.initTableMapperJob(hbaseTableName, scan, ReadHbaseMapper.class, ImmutableBytesWritable.class, Put.class, job);
TableMapReduceUtil.initTableReducerJob("aaa_bak", HbaseWriteReducer.class, job);
job.setNumReduceTasks(3);
boolean b = job.waitForCompletion(true);
if (b) {
System.out.println("hbase to hbase ok");
}
}
}
}
}
3.map:
@Override
protected void map(ImmutableBytesWritable key, Result value,Context context) throws IOException, InterruptedException {
String tableName=context.getConfiguration().get(“table_name”);
String cloums_info=context.getConfiguration().get(“cloums_info”);
System.out.println(cloums_info);
String[] split = cloums_info.split("\|");
//获取主键
String row = Bytes.toString(CellUtil.cloneRow(value.rawCells()[0]));
for (String string : split) {
System.out.println(string);
String cloum=string.split(",")2;//列名
String zykid=string.split(",")[0];//资源库id
String sylx=string.split(",")1;//索引类型
System.out.println("-------------22-----------------");
List array = StringUtils.getStringArray(split,string);
//生成标识号rowkey
String rowkey="";
String rowValue ="";
String qualifier="";
for (Cell cell : value.rawCells()) {
qualifier = Bytes.toString(CellUtil.cloneQualifier(cell));
rowValue = Bytes.toString(CellUtil.cloneValue(cell));
System.out.println(rowValue);
if(cloum.equals(qualifier)&&rowValue!=null&&!"".equals(rowValue)) {
rowkey=StringUtils.getNumStr()+""+StringUtils.getDateStr()+""+(zykid.length()==2?zykid:(“0”+zykid))+"_"+rowValue;
continue;
}
}
if("".equals(rowkey)) {
continue;
}
Put put=new Put(Bytes.toBytes(rowkey));
put.add(Bytes.toBytes(“A”),Bytes.toBytes(“SYLX”),Bytes.toBytes(sylx));
put.add(Bytes.toBytes(“A”),Bytes.toBytes(“SYZ”),Bytes.toBytes(rowValue));
put.add(Bytes.toBytes(“A”),Bytes.toBytes(“LYZD”),Bytes.toBytes(qualifier));
put.add(Bytes.toBytes(“A”),Bytes.toBytes(“LYB”),Bytes.toBytes(tableName));
put.add(Bytes.toBytes(“A”),Bytes.toBytes(“ZJZ”),Bytes.toBytes(row));
put.add(Bytes.toBytes(“A”),Bytes.toBytes(“RKSJ”),Bytes.toBytes(StringUtils.getDateStr()));
String glsylx="";
String glsyz="";
//获取关联索引
for (String cloumInfo : array) {
String glsy_cloum=cloumInfo.split(",")2;//列名
String glsy_sylx=cloumInfo.split(",")1;//索引类型
for (Cell cell : value.rawCells()) {
String glsy_qualifier = Bytes.toString(CellUtil.cloneQualifier(cell));
String glsy_rowValue = Bytes.toString(CellUtil.cloneValue(cell));
if(glsy_cloum.equals(glsy_qualifier)&&glsy_rowValue!=null&&!"".equals(glsy_rowValue)) {
glsylx+=","+glsy_sylx;
glsyz+=","+glsy_rowValue;
continue;
}
}
}
Map
Map
json.put(“SYLX”, sylx);
json.put(“SYZ”, rowValue);
json.put(“LYZD”, qualifier);
json.put(“LYB”, tableName);
json.put(“ZJZ”, row);
json.put(“RKSJ”, StringUtils.getDateStr());
if(!"".equals(glsyz)) {
put.add(Bytes.toBytes(“A”),Bytes.toBytes(“GLSYLX”),Bytes.toBytes(glsylx.substring(1)));
put.add(Bytes.toBytes(“A”),Bytes.toBytes(“GLSYZ”),Bytes.toBytes(glsyz.substring(1)));
json.put(“GLSYLX”, glsylx.substring(1));
json.put(“GLSYZ”, glsyz.substring(1));
}
infoJson.put(“info”, json);
addUpdateBuilderToBulk(client.prepareUpdate(index, index, rowkey).setDocAsUpsert(true).setDoc(infoJson));
ImmutableBytesWritable rowkeyWritable =new ImmutableBytesWritable(Bytes.toBytes(rowkey));
context.write(rowkeyWritable, put);
}
4.编写reduce:
@Override
protected void reduce(ImmutableBytesWritable key, Iterable values,Context context) throws IOException, InterruptedException {
//读出每一行数据写入表中
for(Put put:values) {
System.out.println(“执行reduce”);
List list = put.get(Bytes.toBytes(“A”),Bytes.toBytes(“GLSYZ”));
for (Cell cell : list) {
String qualifier = Bytes.toString(CellUtil.cloneQualifier(cell));
String rowValue = Bytes.toString(CellUtil.cloneValue(cell));
}
context.write(NullWritable.get(), put);
}
}
链接: link.
图片:
带尺寸的图片:
居中的图片:
居中并且带尺寸的图片:
当然,我们为了让用户更加便捷,我们增加了图片拖拽功能。
去博客设置页面,选择一款你喜欢的代码片高亮样式,下面展示同样高亮的 代码片
.
// An highlighted block
var foo = 'bar';
一个简单的表格是这么创建的:
项目 | Value |
---|---|
电脑 | $1600 |
手机 | $12 |
导管 | $1 |
使用:---------:
居中
使用:----------
居左
使用----------:
居右
第一列 | 第二列 | 第三列 |
---|---|---|
第一列文本居中 | 第二列文本居右 | 第三列文本居左 |
SmartyPants将ASCII标点字符转换为“智能”印刷标点HTML实体。例如:
TYPE | ASCII | HTML |
---|---|---|
Single backticks | 'Isn't this fun?' |
‘Isn’t this fun?’ |
Quotes | "Isn't this fun?" |
“Isn’t this fun?” |
Dashes | -- is en-dash, --- is em-dash |
– is en-dash, — is em-dash |
一个具有注脚的文本。1
Markdown将文本转换为 HTML。
您可以使用渲染LaTeX数学表达式 KaTeX:
Gamma公式展示 Γ ( n ) = ( n − 1 ) ! ∀ n ∈ N \Gamma(n) = (n-1)!\quad\forall n\in\mathbb N Γ(n)=(n−1)!∀n∈N 是通过欧拉积分
Γ ( z ) = ∫ 0 ∞ t z − 1 e − t d t . \Gamma(z) = \int_0^\infty t^{z-1}e^{-t}dt\,. Γ(z)=∫0∞tz−1e−tdt.
你可以找到更多关于的信息 LaTeX 数学表达式here.
可以使用UML图表进行渲染。 Mermaid. 例如下面产生的一个序列图::
这将产生一个流程图。:
我们依旧会支持flowchart的流程图:
如果你想尝试使用此编辑器, 你可以在此篇文章任意编辑。当你完成了一篇文章的写作, 在上方工具栏找到 文章导出 ,生成一个.md文件或者.html文件进行本地保存。
如果你想加载一篇你写过的.md文件,在上方工具栏可以选择导入功能进行对应扩展名的文件导入,
继续你的创作。
注脚的解释 ↩︎