hadoop+es+hbase mapreduce将hbase数据写入es

hadoop+es+hbase mapreduce将hbase数据写入es

1.添加依赖:


4.0.0

com.kexion
HBaseMapReduce
1.0-SNAPSHOT
jar

HBaseMapReduce

http://www.example.com

org.apache.hadoop hadoop-client 2.7.2 jackson-xc org.codehaus.jackson

  org.apache.hadoop
  hadoop-common
  2.7.2
  
    
      netty
      io.netty
    
    
      slf4j-api
      org.slf4j
    
    
      guava
      com.google.guava
    
    
      slf4j-log4j12
      org.slf4j
    
    
      log4j
      log4j
    
    
      commons-logging
      commons-logging
    
    
      commons-compress
      org.apache.commons
    
    
      httpclient
      org.apache.httpcomponents
    
      
          jackson-core-asl
          org.codehaus.jackson
      
    
      commons-codec
      commons-codec
    
    
      jackson-mapper-asl
      org.codehaus.jackson
    
    
      commons-collections
      commons-collections
    
    
      commons-lang
      commons-lang
    
    
      jackson-jaxrs
      org.codehaus.jackson
    
  


  org.apache.hadoop
  hadoop-hdfs
  2.7.2
  
    
      jsr305
      com.google.code.findbugs
    
    
      netty-all
      io.netty
    
  


  org.apache.hbase
  hbase-client
  1.3.1
    
        
            commons-logging
            commons-logging
        
      
        junit
        junit
      
      
        guava
        com.google.guava
      
      
        hadoop-common
        org.apache.hadoop
      
      
        slf4j-api
        org.slf4j
      
      
        hadoop-auth
        org.apache.hadoop
      
      
        netty-all
        io.netty
      
      
        hadoop-mapreduce-client-core
        org.apache.hadoop
      
      
        commons-codec
        commons-codec
      
    


  org.apache.logging.log4j
  log4j-core
  2.9.0


  org.apache.hbase
  hbase-server
  1.3.1
    
        
            netty-all
            io.netty
        
      
        guava
        com.google.guava
      
      
        commons-logging
        commons-logging
      
      
        commons-codec
        commons-codec
      
      
        junit
        junit
      
      
        hadoop-common
        org.apache.hadoop
      
      
        hadoop-auth
        org.apache.hadoop
      
      
        hadoop-client
        org.apache.hadoop
      
      
        hadoop-hdfs
        org.apache.hadoop
      
      
        hadoop-mapreduce-client-core
        org.apache.hadoop
      
    


  org.apache.hbase
  hbase-common
  1.3.1
  
    
      guava
      com.google.guava
    
    
      commons-logging
      commons-logging
    
    
      commons-codec
      commons-codec
    
    
      junit
      junit
    
    
      hadoop-common
      org.apache.hadoop
    
    
      hadoop-mapreduce-client-core
      org.apache.hadoop
    
  


  com.oracle
  ojdbc14
  10.2.0.2.0


  org.elasticsearch
  elasticsearch
  5.6.1


  io.netty
  netty-all
  4.1.13.Final


  org.elasticsearch.client
  transport
  5.6.1
  
    
      commons-codec
      commons-codec
    
    
      netty
      io.netty
    
    
      compiler
      com.github.spullara.mustache.java
    
    
      httpcore
      org.apache.httpcomponents
    
  


  junit
  junit
  4.11
  test

src/main/java **/*.properties **/*.xml false org.apache.maven.plugins maven-surefire-plugin true true maven-compiler-plugin 2.3.2 1.8 1.8 maven-assembly-plugin com.kexion.mapReduce.hbasetohbase.HbaseToHbaseDriver assembly.xml make-assembly package single **2.编写job:** package com.kexion.mapReduce.hbasetohbase;

import java.sql.Connection;
import java.sql.PreparedStatement;
import java.sql.ResultSet;
import java.util.ArrayList;
import java.util.List;

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.conf.Configured;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.hbase.HBaseConfiguration;
import org.apache.hadoop.hbase.client.Put;
import org.apache.hadoop.hbase.client.Scan;
import org.apache.hadoop.hbase.io.ImmutableBytesWritable;
import org.apache.hadoop.hbase.mapreduce.TableMapReduceUtil;
import org.apache.hadoop.hbase.util.Bytes;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
import org.apache.hadoop.util.Tool;

import com.kexion.mapReduce.hbasetohdfs.HbaseToHdfs;
import com.kexion.mapReduce.hbasetohdfs.MyMapper;
import com.kexion.utils.HbaseUtils;
import com.kexion.utils.OracleConnect;

public class HbaseToHbaseDriver{
private static PreparedStatement stm = null;
private static ResultSet rs = null;
private static PreparedStatement stm_clo = null;
private static ResultSet rs_clo = null;
public static void main(String[] args) throws Exception {
//1.获取所有的Sjzybh
List sjzybhs=new ArrayList();
//String sql=“select sjzybh from T_ZYK_XGSJX where sjzybh in (‘D-340500170000-ZNB-17-3400032’,‘D-340500170000-ZNB-17-1400034’,‘D-340500170000-ZNB-17-2100002’,‘D-340500170000-ZNB-17-2300011’) group by sjzybh”;
String sql=“select sjzybh from T_ZYK_XGSJX where sjzybh in (‘D-340500170000-ZNB-17-3400032’) group by sjzybh”;
Connection connection = OracleConnect.getConn();
stm = connection.prepareStatement(sql);
rs = stm.executeQuery();
while (rs.next()) {
sjzybhs.add(rs.getString(“sjzybh”));
}
OracleConnect.colseResource(connection, stm, rs);
System.out.println("-----------------------");
for (String sjzybh : sjzybhs) {
HbaseUtils hbaseUtils=new HbaseUtils();
boolean exists = hbaseUtils.tableExists(sjzybh);
if(exists) {
String cloums_sql=“SELECT glid,zykid,zwmc,ssmlsjxywmc,sjzybh FROM T_ZYK_XGSJX WHERE SJZYBH=’”+sjzybh+"’";

            Connection connection_cloum = OracleConnect.getConn();
            stm_clo = connection_cloum.prepareStatement(cloums_sql);
            rs_clo = stm_clo.executeQuery();
            String cloums_string="";
            String cloums_info="";
            while (rs_clo.next()) { 
                cloums_string+=","+rs_clo.getString("ssmlsjxywmc");
                cloums_info+="|"+rs_clo.getInt("zykid")+","+rs_clo.getString("zwmc")+","+rs_clo.getString("ssmlsjxywmc");
            }
            OracleConnect.colseResource(connection_cloum, stm_clo, rs_clo);
            
            cloums_string=cloums_string.substring(1);
            String[] strings = cloums_string.split(",");
            String hbaseTableName = sjzybh;
            Configuration config = HBaseConfiguration.create();
            config.set("hbase.zookeeper.quorum","hadoop101,hadoop102,hadoop103");
            config.set("hbase.zookeeper.property.clientPort", "2181");
            //conf.set("zookeeper.znode.parent", "/hbase");
            config.set("hbase.rootdir", "hdfs://mycluster/hbase");
            config.set("table_name", hbaseTableName);
            config.set("cloums_info", cloums_info.substring(1));
            config.set("index", "bshk");
            Job job = Job.getInstance(config, "Hbase2HbaseMrTest");
            job.setJarByClass(HbaseToHbaseDriver.class);
            Scan scan = new Scan();
            scan.addFamily(Bytes.toBytes("A"));
            for (String str : strings) {
                scan.addColumn(Bytes.toBytes("A"), Bytes.toBytes(str));
            }
            scan.setCaching(500);
            scan.setCacheBlocks(false);

            TableMapReduceUtil.initTableMapperJob(hbaseTableName, scan, ReadHbaseMapper.class, ImmutableBytesWritable.class, Put.class, job);
            TableMapReduceUtil.initTableReducerJob("aaa_bak", HbaseWriteReducer.class, job);
            job.setNumReduceTasks(3);
            boolean b = job.waitForCompletion(true);
            if (b) {
                System.out.println("hbase to hbase ok");
            }
        }
        
    }

}

}

3.map:
@Override
protected void map(ImmutableBytesWritable key, Result value,Context context) throws IOException, InterruptedException {
String tableName=context.getConfiguration().get(“table_name”);
String cloums_info=context.getConfiguration().get(“cloums_info”);
System.out.println(cloums_info);
String[] split = cloums_info.split("\|");
//获取主键
String row = Bytes.toString(CellUtil.cloneRow(value.rawCells()[0]));
for (String string : split) {
System.out.println(string);
String cloum=string.split(",")2;//列名
String zykid=string.split(",")[0];//资源库id
String sylx=string.split(",")1;//索引类型
System.out.println("-------------22-----------------");
List array = StringUtils.getStringArray(split,string);
//生成标识号rowkey
String rowkey="";
String rowValue ="";
String qualifier="";
for (Cell cell : value.rawCells()) {
qualifier = Bytes.toString(CellUtil.cloneQualifier(cell));
rowValue = Bytes.toString(CellUtil.cloneValue(cell));
System.out.println(rowValue);
if(cloum.equals(qualifier)&&rowValue!=null&&!"".equals(rowValue)) {
rowkey=StringUtils.getNumStr()+""+StringUtils.getDateStr()+""+(zykid.length()==2?zykid:(“0”+zykid))+"_"+rowValue;
continue;
}
}
if("".equals(rowkey)) {
continue;
}
Put put=new Put(Bytes.toBytes(rowkey));
put.add(Bytes.toBytes(“A”),Bytes.toBytes(“SYLX”),Bytes.toBytes(sylx));
put.add(Bytes.toBytes(“A”),Bytes.toBytes(“SYZ”),Bytes.toBytes(rowValue));
put.add(Bytes.toBytes(“A”),Bytes.toBytes(“LYZD”),Bytes.toBytes(qualifier));
put.add(Bytes.toBytes(“A”),Bytes.toBytes(“LYB”),Bytes.toBytes(tableName));
put.add(Bytes.toBytes(“A”),Bytes.toBytes(“ZJZ”),Bytes.toBytes(row));
put.add(Bytes.toBytes(“A”),Bytes.toBytes(“RKSJ”),Bytes.toBytes(StringUtils.getDateStr()));
String glsylx="";
String glsyz="";
//获取关联索引
for (String cloumInfo : array) {
String glsy_cloum=cloumInfo.split(",")2;//列名
String glsy_sylx=cloumInfo.split(",")1;//索引类型
for (Cell cell : value.rawCells()) {
String glsy_qualifier = Bytes.toString(CellUtil.cloneQualifier(cell));
String glsy_rowValue = Bytes.toString(CellUtil.cloneValue(cell));
if(glsy_cloum.equals(glsy_qualifier)&&glsy_rowValue!=null&&!"".equals(glsy_rowValue)) {
glsylx+=","+glsy_sylx;
glsyz+=","+glsy_rowValue;
continue;
}
}
}
Map json = new HashMap();
Map infoJson = new HashMap();
json.put(“SYLX”, sylx);
json.put(“SYZ”, rowValue);
json.put(“LYZD”, qualifier);
json.put(“LYB”, tableName);
json.put(“ZJZ”, row);
json.put(“RKSJ”, StringUtils.getDateStr());
if(!"".equals(glsyz)) {
put.add(Bytes.toBytes(“A”),Bytes.toBytes(“GLSYLX”),Bytes.toBytes(glsylx.substring(1)));
put.add(Bytes.toBytes(“A”),Bytes.toBytes(“GLSYZ”),Bytes.toBytes(glsyz.substring(1)));
json.put(“GLSYLX”, glsylx.substring(1));
json.put(“GLSYZ”, glsyz.substring(1));
}
infoJson.put(“info”, json);
addUpdateBuilderToBulk(client.prepareUpdate(index, index, rowkey).setDocAsUpsert(true).setDoc(infoJson));
ImmutableBytesWritable rowkeyWritable =new ImmutableBytesWritable(Bytes.toBytes(rowkey));
context.write(rowkeyWritable, put);
}
4.编写reduce:
@Override
protected void reduce(ImmutableBytesWritable key, Iterable values,Context context) throws IOException, InterruptedException {
//读出每一行数据写入表中
for(Put put:values) {
System.out.println(“执行reduce”);
List list = put.get(Bytes.toBytes(“A”),Bytes.toBytes(“GLSYZ”));
for (Cell cell : list) {
String qualifier = Bytes.toString(CellUtil.cloneQualifier(cell));
String rowValue = Bytes.toString(CellUtil.cloneValue(cell));
}
context.write(NullWritable.get(), put);
}
}

插入链接与图片

链接: link.

图片: Alt

带尺寸的图片: Alt

居中的图片: Alt

居中并且带尺寸的图片: Alt

当然,我们为了让用户更加便捷,我们增加了图片拖拽功能。

如何插入一段漂亮的代码片

去博客设置页面,选择一款你喜欢的代码片高亮样式,下面展示同样高亮的 代码片.

// An highlighted block
var foo = 'bar';

生成一个适合你的列表

  • 项目
    • 项目
      • 项目
  1. 项目1
  2. 项目2
  3. 项目3
  • 计划任务
  • 完成任务

创建一个表格

一个简单的表格是这么创建的:

项目 Value
电脑 $1600
手机 $12
导管 $1

设定内容居中、居左、居右

使用:---------:居中
使用:----------居左
使用----------:居右

第一列 第二列 第三列
第一列文本居中 第二列文本居右 第三列文本居左

SmartyPants

SmartyPants将ASCII标点字符转换为“智能”印刷标点HTML实体。例如:

TYPE ASCII HTML
Single backticks 'Isn't this fun?' ‘Isn’t this fun?’
Quotes "Isn't this fun?" “Isn’t this fun?”
Dashes -- is en-dash, --- is em-dash – is en-dash, — is em-dash

创建一个自定义列表

Markdown
Text-to- HTML conversion tool
Authors
John
Luke

如何创建一个注脚

一个具有注脚的文本。1

注释也是必不可少的

Markdown将文本转换为 HTML

KaTeX数学公式

您可以使用渲染LaTeX数学表达式 KaTeX:

Gamma公式展示 Γ ( n ) = ( n − 1 ) ! ∀ n ∈ N \Gamma(n) = (n-1)!\quad\forall n\in\mathbb N Γ(n)=(n1)!nN 是通过欧拉积分

Γ ( z ) = ∫ 0 ∞ t z − 1 e − t d t   . \Gamma(z) = \int_0^\infty t^{z-1}e^{-t}dt\,. Γ(z)=0tz1etdt.

你可以找到更多关于的信息 LaTeX 数学表达式here.

新的甘特图功能,丰富你的文章

Mon 06 Mon 13 Mon 20 已完成 进行中 计划一 计划二 现有任务 Adding GANTT diagram functionality to mermaid
  • 关于 甘特图 语法,参考 这儿,

UML 图表

可以使用UML图表进行渲染。 Mermaid. 例如下面产生的一个序列图::

张三 李四 王五 你好!李四, 最近怎么样? 你最近怎么样,王五? 我很好,谢谢! 我很好,谢谢! 李四想了很长时间, 文字太长了 不适合放在一行. 打量着王五... 很好... 王五, 你怎么样? 张三 李四 王五

这将产生一个流程图。:

链接
长方形
圆角长方形
菱形
  • 关于 Mermaid 语法,参考 这儿,

FLowchart流程图

我们依旧会支持flowchart的流程图:

Created with Raphaël 2.2.0 开始 我的操作 确认? 结束 yes no
  • 关于 Flowchart流程图 语法,参考 这儿.

导出与导入

导出

如果你想尝试使用此编辑器, 你可以在此篇文章任意编辑。当你完成了一篇文章的写作, 在上方工具栏找到 文章导出 ,生成一个.md文件或者.html文件进行本地保存。

导入

如果你想加载一篇你写过的.md文件,在上方工具栏可以选择导入功能进行对应扩展名的文件导入,
继续你的创作。


  1. 注脚的解释 ↩︎

你可能感兴趣的:(hadoop+es+hbase mapreduce将hbase数据写入es)