验证hive2es的一个例子

package com.peidw.hive2es;

import com.alibaba.fastjson.JSONObject;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.NullWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.input.TextInputFormat;
import org.apache.hadoop.util.StringUtils;
import org.apache.htrace.commons.logging.Log;
import org.apache.htrace.commons.logging.LogFactory;
import org.elasticsearch.hadoop.mr.EsOutputFormat;

import java.io.IOException;

/**
 * 实现mydb.tmp_stud1表同步至es
 * 该表位置:  hdfs://192.168.177.139:8888/user/hive/warehouse/mydb.db/tmp_peidw
 */
public class Hive2Es2TmpStud1 {
    private static final Log LOG = LogFactory.getLog(Hive2Es2TmpStud1.class);


    public static class MyMapper extends Mapper { //




        @Override
        protected void setup(Context context) throws IOException, InterruptedException {
            super.setup(context);
        }

        @Override
        public void run(Context context) throws IOException, InterruptedException {
            super.run(context);
        }

        @Override
        protected void map(LongWritable key, Text value, Context context)
                throws IOException, InterruptedException {
            LOG.info("value.toString().trim().getBytes()--->"+value.toString().trim());
            String[] _ary= StringUtils.split(value.toString(), '\t' );

            JSONObject jo=new JSONObject();
            jo.put( "name", _ary[0]);
            jo.put( "vage", _ary[1]);
            jo.put( "height", _ary[2]);
            jo.put( "isok", _ary[3]);

            context.write(NullWritable.get(), new Text(jo.toJSONString()) );
        }

        @Override
        protected void cleanup(Context context) throws IOException,InterruptedException {
            super.cleanup(context);
        }



    }

    public static void main(String[] args) throws IOException, ClassNotFoundException, InterruptedException {

        Configuration conf = new Configuration();
        conf.setBoolean("mapred.map.tasks.speculative.execution", false);
        conf.setBoolean("mapred.reduce.tasks.speculative.execution", false);
        conf.set("es.nodes", "centos.hadoop:9200");
        conf.set("es.resource", "tmp_stud1/tmp_stud1");
        conf.set("es.mapping.id", "name");
        conf.set("es.input.json", "true");
        conf.setInt("es.mapred.number_of_shards",2);
        conf.setInt("es.mapred.number_of_replicas",0);

        Job job = Job.getInstance(conf, "hadoop es write test");
        job.setMapperClass(Hive2Es2TmpStud1.MyMapper.class);

        job.setInputFormatClass(TextInputFormat.class);

        job.setOutputFormatClass(EsOutputFormat.class);

        job.setMapOutputKeyClass(NullWritable.class);
        job.setMapOutputValueClass(Text.class);


        // 设置输入路径
        FileInputFormat.setInputPaths(job, new Path("hdfs://192.168.177.139:8888/user/hive/warehouse/mydb.db/tmp_stud1"));
        //job.waitForCompletion(true);
        System.exit(job.waitForCompletion(true) ? 0 : 1);
    }

}

 

pom



    4.0.0

    com.peidw
    mr
    1.0-SNAPSHOT


    
        UTF-8
        2.7.7
        1.4.13
     



    
        
            org.apache.hadoop
            hadoop-client
            ${hadoop.version}
        
        
            org.apache.hadoop
            hadoop-common
            ${hadoop.version}
        
        
            org.apache.hadoop
            hadoop-minicluster
            ${hadoop.version}
            test
        
        
            org.apache.hadoop
            hadoop-hdfs
            ${hadoop.version}
        

        
            org.apache.hadoop
            hadoop-yarn-api
            ${hadoop.version}
        

        
            org.apache.hadoop
            hadoop-yarn-client
            ${hadoop.version}
        

        
            org.apache.hadoop
            hadoop-yarn-common
            ${hadoop.version}
        

        
            org.apache.hbase
            hbase-client
            ${hbase.version}
        

        
            org.apache.hbase
            hbase-common
            ${hbase.version}
        

        
            org.elasticsearch
            elasticsearch-hadoop
            5.5.0
            
                
                    cascading-hadoop
                    cascading
                
                
                    cascading-local
                    cascading
                
            
        

        
            com.alibaba
            fastjson
            1.2.5
        

    

    
        
            
                maven-assembly-plugin
                3.1.1
                
                    
                        src/main/resources/assembly.xml
                    
                    
                        
                            
                            com.peidw.hive2es.Hive2Es3
                        
                    
                
                
                    
                        make-assembly
                        package
                        
                            single
                        
                    
                
            
            
                org.apache.maven.plugins
                maven-compiler-plugin
                3.8.1
                
                    1.8
                    1.8
                
            
        
    


assembly.xml


    job
    
        jar
    
    false
    
        
            false
            runtime
            lib
            
                ${groupId}:${artifactId}
            
        
        
            true
            
                ${groupId}:${artifactId}
            
        
    

 

你可能感兴趣的:(ElasticSearch)