本文所做示例是将hive表格中的数据读取出来并存入mysql数据库。前提是以mysql作为hive的元数据库已经调通,因为连接mysql数据库的时候所用的数据库,用户名和密码都要hive的配置文件一致。
1、在eclipse下建立工程,并将1)hive中lib目录下的所有包都导入到该项目。2)将hadoop中的相关包导入。在hadoop-2.x以前只要导入hadoop-xxx-core.jar就可以,hadoop-2.x以后的版本比较蛋疼,被分散在hadoop安装目录的share/hadoop/下的各个子目录里,以防万一全部导入。3)导入mysql的jdbc驱动包。
2、该项目第一个文件叫做ConnectDB,里面两个方法getHiveConnection和getMysqlConnection,分别返回连接到hive或者mysql数据库的一个connection。主要是在Class.forName中导入驱动文件和在getConnection获取连接的时候要对应相应的数据库,要与hive中的配置文件保持一致。
package com.scutcv.mrshen; import java.sql.Connection; import java.sql.DriverManager; import java.sql.SQLException; import org.apache.hadoop.hive.ql.parse.HiveParser_IdentifiersParser.nullCondition_return; import org.codehaus.groovy.ast.stmt.ThrowStatement; public class ConnectDB { private static Connection connection = null; private static Connection connection2mysql = null; private ConnectDB() { } public static Connection getHiveConnection() throws SQLException { if (connection == null) { try { Class.forName("org.apache.hadoop.hive.jdbc.HiveDriver"); } catch (ClassNotFoundException e) { // TODO: handle exception e.printStackTrace(); System.exit(1); } connection = DriverManager.getConnection("jdbc:hive://master:50031/default","hive","hive"); System.out.println("to hive: " + connection.toString()); } return connection; } public static Connection getMysqlConnection() throws SQLException { if (connection2mysql == null) { try { Class.forName("com.mysql.jdbc.Driver"); } catch (ClassNotFoundException e) { // TODO: handle exception e.printStackTrace(); System.exit(1); } connection2mysql = DriverManager.getConnection("jdbc:mysql://master:3306/hive?createDataBaseIfNotExist","hive","hive"); System.out.println("to mysql: " + connection2mysql.toString()); } return connection2mysql; } public static void closeHive() throws SQLException { if (connection != null) { connection.close(); } } public static void closeMysql() throws SQLException { if (connection2mysql != null) { connection2mysql.close(); } } }
package com.scutcv.mrshen; import java.sql.Connection; import java.sql.ResultSet; import java.sql.SQLException; import java.sql.Statement; public class hiveUtil { public static void createTablseOrloadData(String hql)throws SQLException { Connection connection = ConnectDB.getHiveConnection(); Statement statement = connection.createStatement(); ResultSet resultSet = statement.executeQuery(hql); } public static ResultSet queryHive(String hql) throws SQLException { Connection connection = ConnectDB.getHiveConnection(); Statement statement = connection.createStatement(); ResultSet resultSet = statement.executeQuery(hql); return resultSet; } public static void hive2Mysql(ResultSet resultSet) throws SQLException { Connection connection = ConnectDB.getMysqlConnection(); Statement statement = connection.createStatement(); while(resultSet.next()) { /* int cid = resultSet.getInt(1); int type = resultSet.getInt(3); int aid = resultSet.getInt(5); String time = resultSet.getString(2); String adv = resultSet.getString(4); String conutry = resultSet.getString(6); String prov = resultSet.getString(7); String city = resultSet.getString(8); String snp = resultSet.getString(9); String ptype = resultSet.getString(10); String phone = resultSet.getString(11); int stateCode = statement.executeUpdate(getSQLString(cid, time, type, adv, aid, conutry, prov, city, snp, ptype, phone)); */ int key = resultSet.getInt(1); String val = resultSet.getString(2); int stateCode = statement.executeUpdate("insert into KVtest values(" + key + ",'" + val + "')"); } } public static String getSQLString(int cid, String time, int type, String adv, int aid, String country, String prov, String city, String snp, String ptype, String phone) { String sql = "insert into dmining values(" + cid + "," + time + "," + type + "," + adv + "," + aid + "," + country + "," + prov + "," + city + "," + snp + "," + ptype + "," + phone + ")"; return sql; } public static void printQueryResult(ResultSet resultSet) throws SQLException { while(resultSet.next()) { int cid = resultSet.getInt(1); int type = resultSet.getInt(3); int aid = resultSet.getInt(5); String time = resultSet.getString(2); String adv = resultSet.getString(4); String country = resultSet.getString(6); String prov = resultSet.getString(7); String city = resultSet.getString(8); String snp = resultSet.getString(9); String ptype = resultSet.getString(10); String phone = resultSet.getString(11); System.out.println("cid: " + cid); System.out.println("time: " + time); System.out.println("ad_type: " + type); System.out.println("advertisment: " + adv); System.out.println("aid: " + aid); System.out.println("country: " + country); System.out.println("province: " + prov); System.out.println("city: " + city); System.out.println("service network provider: " + snp); System.out.println("phone_type: " + ptype); System.out.println("phone: " + phone + '\n'); } } }
1)由于创建表格和导入数据逻辑步骤完全一样,只是查询的语句有区别,因此我把他们合并写作一个方法createTablesOrloadData。
2)queryHive方法返回hive的查询结果
3)hive2Mysql方法将方法2)中的结果通过迭代读取并插入mysql中的数据库。由于一开始我建立的表格要插入的数据含有中文导致乱码,因为只是测试,所以新加了printQueryResult方法将查询结果简单打印出来。为了节约时间我在hive中新建了表格kv2,并将hive的测试用例导入,因此注释掉的部分忽略不计。值得注意的是,在返回的查询结果resultSet中,取数据的时候下标是从1而不是从0开始的,一开始我从0读取,结果报了数据越界的异常。如下图。
package com.scutcv.mrshen; import java.sql.ResultSet; import java.sql.SQLException; public class execHive { public static void main(String[] args) throws SQLException { ResultSet resultSet = hiveUtil.queryHive("select * from kv2"); hiveUtil.hive2Mysql(resultSet); // hiveUtil.printQueryResult(resultSet); // String createHql = "create table kv2(id INT, name string)"; // hiveUtil.createTablseOrloadData(createHql); // String loadHql = "load data local inpath '/home/hduser/hive-0.13.1/examples/files/kv2.txt' overwrite into table kv2"; // hiveUtil.createTablseOrloadData(loadHql); ConnectDB.closeHive(); ConnectDB.closeMysql(); } }上述是将hive的查询数据写入mysql的测试,另外注释掉的还有在hive中创建kv2,导入数据等。按需自取。在运行execHive之前必须将hive服务开起来,指定的端口号要和ConnectDB中获取hive连接的配置一致。