如何实时的写入到hive分区表中?

在项目中我们经常会有需求要求将数据实时的写入到hive中,从而直接能从hive中查询到我们的数据,那么如何实时的写入到hive中呢,目前有3种方式:
1,获取hive的元数据地址,账号和密码等
2,数据直接写入到hive表对应的hdfs目录下,然后添加分区映射到对应的目录
3,采用spark的api写入到hive中
针对如上的2种方式,我下面作下的讲解:
第一种方式:
a,利用jdbc操作hive server2,此种方式不建议,因为需要hive server2的认证的用户名和密码,公司会考虑安全风险不会让采用此方式。
b,添加hiveserver2的认证
hive-site.xml,缺省为NONE。此处为自定义的XXXX

hive.server2.authentication
XXXX


hive.server2.custom.authentication.class
org.apache.hadoop.hive.contrib.auth.XXXXPasswdAuthenticator

c,实现如下:
public class Write2Hive{
public static void main(String[] args) throws IOException {
List dataList = new ArrayList();
List list = new ArrayList();
list.add("11111");
list.add("xiaofei");
dataList.add(list);
list = new ArrayList();
list.add("2222");
list.add("xiaolong");
dataList.add(list);
String dst = "/test/kk.txt";
createFile(dst,dataList);
loadData2Hive(dst);
}

/**
* 将数据插入hdfs中,用于load到hive表中,默认分隔符是"\001"
* @param dst
* @param contents
* @throws IOException
*/
public static void createFile(String dst , List dataList) throws IOException{
Configuration conf = new Configuration();
FileSystem fs = FileSystem.get(conf);
Path dstPath = new Path(dst); //目标路径
//打开一个输出流
FSDataOutputStream outputStream = fs.create(dstPath);
StringBuffer sb = new StringBuffer();
for(List data:dataList){
for(String value:data){
sb.append(value).append("\001");
}
sb.deleteCharAt(sb.length() - 4);//去掉最后一个分隔符
sb.append("\n");
}
sb.deleteCharAt(sb.length() - 2);//去掉最后一个换行符
byte[] contents = sb.toString().getBytes();
outputStream.write(contents);
outputStream.close();
fs.close();
System.out.println("文件创建成功!");

}
/**
* 将HDFS文件load到hive表中 db指定数据库
* @param dst
*/
public static void loadData2Hive(String dst) {
String JDBC_DRIVER = "org.apache.hive.jdbc.HiveDriver";
String CONNECTION_URL = "jdbc:hive2://node2:10000/db;auth=noSasl";
String username = "cutsom";
String password = "hive";
Connection con = null;
try {
Class.forName(JDBC_DRIVER);
con = (Connection) DriverManager.getConnection(CONNECTION_URL,username,password);
Statement stmt = con.createStatement();

String sql = " load data inpath '"+dst+"' into table population.population_information ";

stmt.execute(sql);
System.out.println("loadData到Hive表成功!");
} catch (SQLException e) {
e.printStackTrace();
} catch (ClassNotFoundException e) {
e.printStackTrace();
}finally {
// 关闭rs、ps和con
if(con != null){
try {
con.close();
} catch (SQLException e) {
e.printStackTrace();
}
}
}
}
}

第2种方式:
写入到hive外部表对应的hdfs目录中
1,建表:注意分隔符自定为了逗号,按照day和hour进行分区
create external table test2(id String,name string,addr string) partitioned by(day string,hour String) row format delimited fields terminated by ',' location 'hdfs://node3:8020/app/hive/test2/';
,2,实现:
public class Write2Hive{

public static void main(String[] args) {
Configuration conf = new Configuration();
//添加如下参数,不然会报错
conf.set("fs.hdfs.impl", "org.apache.hadoop.hdfs.DistributedFileSystem");
conf.set("dfs.support.append", "true")
//获取到分区时间,如time=201809161559000,day=20180916 hour=15
String time = DateTimeTool.GetDateTimeNow();
String dayTime = time.substring(0, 8);
String hourTime = time.substring(8, 10);
//设置写入数据的地址,test2为表名
String filePath = "hdfs://node3:8020/app/hive/test2/" + dayTime +"/"+hourTime+ "/test2";
// String filePath = "hdfs://node3:8020/app/hive/xiaofei/xiaofei";
Path path = new Path(filePath);
try {
FileSystem fileSystem = FileSystem.get(new URI(filePath), conf);
if (!fileSystem.exists(path)) {
//如果目录不存在,就创建目录,同时关闭流,然后再打开流
//为什么要关闭流再打开流呢?
//因为之前的流线程还在被占用,需要刷新流,不然后续写入会报错
fileSystem.create(path, true);
fileSystem.close();
fileSystem = FileSystem.get(new URI(filePath), conf);
}
// FSDataOutputStream out = fileSystem.create(path, false); 这是一次写入用
//采用追加的方式以二进制方式写入
//以2进制的模式写入是为了避免字符集问题
FSDataOutputStream append = fileSystem.append(path);
//alter table test4 add partition(day='20180906') location '/app/hive/test4/20180906'; shell定时将表增加到映射表里
append.write("1111".getBytes(), 0, "1111".getBytes().length);
append.write(",".getBytes(), 0, ",".getBytes().length);
append.write("xiaofei".getBytes(), 0, "xaiofei".getBytes().length);
append.write(",".getBytes(), 0, ",".getBytes().length);
append.write("henan".getBytes(), 0, "henan".getBytes().length);
append.write("\n".getBytes(), 0, "\n".getBytes().length);
append.flush();
append.close();
fileSystem.close();
} catch (IOException e) {
e.printStackTrace();
} catch (URISyntaxException e) {
e.printStackTrace();
}
}
}
3,映射到分区表
hdfs的结构如下:/app/hive/test2/20180916/15/test2
映射到分区:
ALTER TABLE test2 ADD PARTITION (day='20180916',hour='15') location '/app/hive/test2'
注意:是指向了test2层级, 吧day和hour当作字段来处理
查询表:
全量查询:
hive> select * from test2;
OK
1111 xiaofei henan 20180916 15
Time taken: 0.198 seconds, Fetched: 1 row(s)

查询具体分区:
hive> select * from test2 where hour='15';
OK
1111 xiaofei henan 20180916 15
Time taken: 1.119 seconds, Fetched: 1 row(s)

第3种方式见我上一篇博文,有详细的写入过程。
附上测试的建表语句:
create external table test2(id String,name string,addr string) partitioned by(day string,hour String) row format delimited fields terminated by ',' location 'hdfs://node3:8020/app/hive/test2/';

ALTER TABLE test2 ADD PARTITION (day='20180916',hour='15') location '/app/hive/test/20180916/15'

上述代码做法在分布式多线程模式下是存在bug的,现更新如下:
为什么要做一个单例和加上同步锁呢?因为数据写入到hdfs是单线程的,不支持异步写入,不加锁,不做单例会存在空指针和流进程被占用的情况。
public class Function {

private static Function function = null;

public Function() {

}

public synchronized static Function getInstance() {
    if (function == null) {
        function = new Function();
    }

    return function;
}

/**
 * get outputStream append
 * create external table ods_ott_deviceonline_buffer(rowkey String,terminalId string,onlineDate string) partitioned by(day string,hour String) row format delimited fields terminated by ',' location 'hdfs://node3:8020/app/hive/ods_ott_deviceonline_buffer/';
 * ALTER TABLE ods_ott_deviceonline_buffer ADD PARTITION (day='20180917',hour='09') location '/app/hive/ods_ott_deviceonline_buffer/时间/最小粒度'
 * 按照day和hour进行分区
 *
 * @param tableName
 * @return
 */
public synchronized Boolean write2Hive(String tableName, String time, String terminalId, String deviceid) {
    Boolean result = true;
    String dayTime = time.substring(0, 8);
    String hourTime = time.substring(8, 10);
    Configuration conf = new Configuration();
    conf.set("fs.hdfs.impl", "org.apache.hadoop.hdfs.DistributedFileSystem");
    conf.set("dfs.support.append", "true");
    String filePath = "hdfs://node3:8020/app/hive/" + tableName + "/" + dayTime + "/" + hourTime + "/" + tableName;
    Path path = null;
    FSDataOutputStream append = null;
    FileSystem fileSystem =null;
    try {
        path = new Path(filePath);
        fileSystem = FileSystem.get(new URI(filePath), conf);
        if (!fileSystem.exists(path)) {
            fileSystem.create(path, true);
            fileSystem.close();
            fileSystem = FileSystem.get(new URI(filePath), conf);
        }
        append = fileSystem.append(path);
        append.write((deviceid + "_" + time).getBytes(), 0, deviceid.getBytes().length);
        append.write(",".getBytes(), 0, ",".getBytes().length);
        append.write(time.getBytes(), 0, time.getBytes().length);
        append.write("\n".getBytes(), 0, "\n".getBytes().length);
        append.flush();
    } catch (Exception e) {
        result = false;
        e.printStackTrace();
    } finally {
        if (append != null) {
            try {
                append.close();
            } catch (IOException e) {
                e.printStackTrace();
            }
        }
        if (fileSystem != null) {
            try {
                fileSystem.close();
            } catch (IOException e) {
                e.printStackTrace();
            }
        }
    }
    return result;
}


/**
 * 用户行为表写入hdfs---hive外部表 按照每小时进行分区
 * ALTER TABLE test3 ADD PARTITION (day='20180911',hour='16') location '/app/hive/test3/20180911/16'
 * 按照day和hour进行分区
 *
 * @return
 */
public synchronized Boolean userBehavior2Hive(String tableName, UserBehavior userBehavior, String time) {
    Boolean result = true;
    String dayTime = time.substring(0, 8);
    String hourTime = time.substring(8, 10);
    Configuration conf = new Configuration();
    conf.set("fs.hdfs.impl", "org.apache.hadoop.hdfs.DistributedFileSystem");
    conf.set("dfs.support.append", "true");
    String filePath = "hdfs://node3:8020/app/hive/" + tableName + "/" + dayTime + "/" + hourTime + "/" + tableName;
    Path path = null;
    FSDataOutputStream append = null;
    FileSystem fileSystem =null;
    try {
        path = new Path(filePath);
        fileSystem = FileSystem.get(new URI(filePath), conf);
        if (!fileSystem.exists(path)) {
            fileSystem.create(path, true);
            fileSystem.close();
            fileSystem = FileSystem.get(new URI(filePath), conf);
        }
        append = fileSystem.append(path);
        append.write(userBehavior.getId().getBytes(), 0, userBehavior.getId().getBytes().length);
        append.write(",".getBytes(), 0, ",".getBytes().length);
        append.write(userBehavior.getIp().getBytes(), 0, userBehavior.getIp().getBytes().length);
        append.write(",".getBytes(), 0, ",".getBytes().length);
        append.write(userBehavior.getCreateTime().getBytes(), 0, userBehavior.getCreateTime().getBytes().length);
        append.write(",".getBytes(), 0, ",".getBytes().length);
        append.write(userBehavior.getCode().getBytes(), 0, userBehavior.getCode().getBytes().length);
        append.write(",".getBytes(), 0, ",".getBytes().length);
        append.write(userBehavior.getVideoType().getBytes(), 0, userBehavior.getVideoType().getBytes().length);
        append.write(",".getBytes(), 0, ",".getBytes().length);
        append.write(userBehavior.getVideoSource().getBytes(), 0, userBehavior.getVideoSource().getBytes().length);
        append.write(",".getBytes(), 0, ",".getBytes().length);
        append.write(userBehavior.getFirstLevel().getBytes(), 0, userBehavior.getFirstLevel().getBytes().length);
        append.write(",".getBytes(), 0, ",".getBytes().length);
        append.write(userBehavior.getSecondLevel().getBytes(), 0, userBehavior.getSecondLevel().getBytes().length);
        append.write(",".getBytes(), 0, ",".getBytes().length);
        append.write(userBehavior.getThreeLevel().getBytes(), 0, userBehavior.getThreeLevel().getBytes().length);
        append.write(",".getBytes(), 0, ",".getBytes().length);
        append.write(userBehavior.getPannelLocation().getBytes(), 0, userBehavior.getPannelLocation().getBytes().length);
        append.write(",".getBytes(), 0, ",".getBytes().length);
        append.write(userBehavior.getPostUrl().getBytes(), 0, userBehavior.getPostUrl().getBytes().length);
        append.write(",".getBytes(), 0, ",".getBytes().length);
        append.write(userBehavior.getOpenPostTime().getBytes(), 0, userBehavior.getOpenPostTime().getBytes().length);
        append.write(",".getBytes(), 0, ",".getBytes().length);
        append.write(userBehavior.getChannelSwitchTime().getBytes(), 0, userBehavior.getChannelSwitchTime().getBytes().length);
        append.write(",".getBytes(), 0, ",".getBytes().length);
        append.write(userBehavior.getChannelName().getBytes(), 0, userBehavior.getChannelName().getBytes().length);
        append.write(",".getBytes(), 0, ",".getBytes().length);
        append.write(userBehavior.getVideoQuality().getBytes(), 0, userBehavior.getVideoQuality().getBytes().length);
        append.write(",".getBytes(), 0, ",".getBytes().length);
        append.write(userBehavior.getVideoName().getBytes(), 0, userBehavior.getVideoName().getBytes().length);
        append.write(",".getBytes(), 0, ",".getBytes().length);
        append.write(userBehavior.getDirectName().getBytes(), 0, userBehavior.getDirectName().getBytes().length);
        append.write(",".getBytes(), 0, ",".getBytes().length);
        append.write(userBehavior.getActorName().getBytes(), 0, userBehavior.getActorName().getBytes().length);
        append.write(",".getBytes(), 0, ",".getBytes().length);
        append.write(userBehavior.getVideoPlot().getBytes(), 0, userBehavior.getVideoPlot().getBytes().length);
        append.write(",".getBytes(), 0, ",".getBytes().length);
        append.write(userBehavior.getVideoScore().getBytes(), 0, userBehavior.getVideoScore().getBytes().length);
        append.write(",".getBytes(), 0, ",".getBytes().length);
        append.write(userBehavior.getVideoRegion().getBytes(), 0, userBehavior.getVideoRegion().getBytes().length);
        append.write(",".getBytes(), 0, ",".getBytes().length);
        append.write(userBehavior.getVideoSize().getBytes(), 0, userBehavior.getVideoSize().getBytes().length);
        append.write(",".getBytes(), 0, ",".getBytes().length);
        append.write(userBehavior.getVideoTimeLength().getBytes(), 0, userBehavior.getVideoTimeLength().getBytes().length);
        append.write(",".getBytes(), 0, ",".getBytes().length);
        append.write(userBehavior.getVideoStream().getBytes(), 0, userBehavior.getVideoStream().getBytes().length);
        append.write(",".getBytes(), 0, ",".getBytes().length);
        append.write(userBehavior.getContentSource().getBytes(), 0, userBehavior.getContentSource().getBytes().length);
        append.write(",".getBytes(), 0, ",".getBytes().length);
        append.write(userBehavior.getVideoStartTime().getBytes(), 0, userBehavior.getVideoStartTime().getBytes().length);
        append.write(",".getBytes(), 0, ",".getBytes().length);
        append.write(userBehavior.getVideoEndTime().getBytes(), 0, userBehavior.getVideoEndTime().getBytes().length);
        append.write(",".getBytes(), 0, ",".getBytes().length);
        append.write(userBehavior.getVideoErrorTime().getBytes(), 0, userBehavior.getVideoErrorTime().getBytes().length);
        append.write(",".getBytes(), 0, ",".getBytes().length);
        append.write(userBehavior.getWatchTime().getBytes(), 0, userBehavior.getWatchTime().getBytes().length);
        append.write(",".getBytes(), 0, ",".getBytes().length);
        append.write(userBehavior.getFirstFrameTime().getBytes(), 0, userBehavior.getFirstFrameTime().getBytes().length);
        append.write(",".getBytes(), 0, ",".getBytes().length);
        append.write(userBehavior.getKadunTime().getBytes(), 0, userBehavior.getKadunTime().getBytes().length);
        append.write(",".getBytes(), 0, ",".getBytes().length);
        append.write(userBehavior.getAppName().getBytes(), 0, userBehavior.getAppName().getBytes().length);
        append.write(",".getBytes(), 0, ",".getBytes().length);
        append.write(userBehavior.getSearchValue().getBytes(), 0, userBehavior.getSearchValue().getBytes().length);
        append.write(",".getBytes(), 0, ",".getBytes().length);
        append.write(userBehavior.getBootTimeDelay().getBytes(), 0, userBehavior.getBootTimeDelay().getBytes().length);
        append.write(",".getBytes(), 0, ",".getBytes().length);
        append.write(userBehavior.getOpenAppTime().getBytes(), 0, userBehavior.getOpenAppTime().getBytes().length);
        append.write(",".getBytes(), 0, ",".getBytes().length);
        append.write(userBehavior.getLoadPageTime().getBytes(), 0, userBehavior.getLoadPageTime().getBytes().length);
        append.write(",".getBytes(), 0, ",".getBytes().length);
        append.write(userBehavior.getLogs().getBytes(), 0, userBehavior.getLogs().getBytes().length);
        append.write("\n".getBytes(), 0, "\n".getBytes().length);
        append.flush();
    } catch (Exception e) {
        result = false;
        e.printStackTrace();
    } finally {
        if (append != null) {
            try {
                append.close();
            } catch (IOException e) {
                e.printStackTrace();
            }
        }
        if (fileSystem != null) {
            try {
                fileSystem.close();
            } catch (IOException e) {
                e.printStackTrace();
            }
        }
    }
    return result;
}

}

你可能感兴趣的:(如何实时的写入到hive分区表中?)