如何通过SpringData操作Hive数据

目录

1.创建项目导入相关依赖

2.编写Spring配置文件

3.启动Hive的远程连接服务HiveServer2

4.通过API操作HIVE


1.创建项目导入相关依赖


	4.0.0

	SpringDataHive_Demo01
	SpringDataHive_Demo01
	0.0.1-SNAPSHOT
	jar

	SpringDataHive_Demo01
	http://maven.apache.org

	
		4.1.6.RELEASE
		1.7.6
		1.2.17
		1.3
		UTF-8
		2.3.0.M1
		2.7.1
		1.2.1
	

	
		
			jdk.tools
			jdk.tools
			1.8
			system
			${JAVA_HOME}/lib/tools.jar
		

		
		
			junit
			junit
			4.10
			test
		

		
		
			org.springframework
			spring-core
			${spring.version}
			
				
					commons-logging
					commons-logging
				
			
		

		
			org.springframework
			spring-beans
			${spring.version}
		

		
			org.springframework
			spring-context
			${spring.version}
		

		
			org.springframework
			spring-aop
			${spring.version}
		

		
		
			org.slf4j
			slf4j-api
			${slf4j.version}
		

		
			org.slf4j
			jcl-over-slf4j
			${slf4j.version}
		

		
			org.slf4j
			slf4j-log4j12
			${slf4j.version}
			runtime
		

		
			log4j
			log4j
			${log4j.version}
			runtime
		

		
		
			org.springframework.data
			spring-data-hadoop
			${spring.hadoop.version}
			
				
					org.springframework
					spring-context-support
				
			
		

		
		
			org.springframework
			spring-jdbc
			${spring.version}
		

		
		
			org.springframework
			spring-test
			${spring.version}
		

		
		
			org.springframework
			spring-tx
			${spring.version}
		

		
		
			org.apache.hadoop
			hadoop-common
			${hadoop.version}
			compile
		

		
		
			org.apache.hive
			hive-metastore
			${hive.version}
		

		
			org.apache.hive
			hive-service
			${hive.version}
		

		
			org.apache.thrift
			libfb303
			0.9.1
		

		
		
			org.apache.hive
			hive-common
			${hive.version}
			runtime
		

		
		
			org.apache.hive
			hive-jdbc
			${hive.version}
			runtime
		

		
			org.apache.hive
			hive-shims
			${hive.version}
			runtime
		

		
			org.apache.hive
			hive-serde
			${hive.version}
			runtime
		

		
			org.apache.hive
			hive-contrib
			${hive.version}
			runtime
		

		

		
			org.codehaus.groovy
			groovy
			1.8.5
			runtime
		

	

2.编写Spring配置文件




	
	
		fs.defaultFS=hdfs://lj01:9000
	

	
	
	
	
	
	
	
	
		
		
		
		
	

	
	


3.启动Hive的远程连接服务HiveServer2

./hive --service hiveserver2


4.通过API操作HIVE

实体类:

package com.lj.domain;

public class Stu {
	private int id;
	private String name;
	public Stu() {
	}
	
	public Stu(int id, String name) {
		super();
		this.id = id;
		this.name = name;
	}

	public int getId() {
		return id;
	}
	public void setId(int id) {
		this.id = id;
	}
	public String getName() {
		return name;
	}
	public void setName(String name) {
		this.name = name;
	}
	@Override
	public String toString() {
		return "Stu [id=" + id + ", name=" + name + "]";
	}
	
}

编写测试类:

package com.lj.test;

import java.sql.Connection;
import java.sql.PreparedStatement;
import java.sql.ResultSet;
import java.util.ArrayList;
import java.util.List;

import org.junit.Before;
import org.junit.Test;
import org.springframework.context.ApplicationContext;
import org.springframework.context.support.ClassPathXmlApplicationContext;
import org.springframework.data.hadoop.hive.HiveClient;
import org.springframework.data.hadoop.hive.HiveClientCallback;
import org.springframework.data.hadoop.hive.HiveTemplate;

import com.lj.domain.Stu;

public class Test01 {
	private ApplicationContext context = null;
	
	@Before
	public void before(){
		context = new ClassPathXmlApplicationContext("applicationContext.xml");
	}
	
	@Test
	public void test02(){
		//1.获取hiveTemplate
		HiveTemplate hiveTemplate = (HiveTemplate) context.getBean("hiveTemplate");
		//2.利用hiveTemplate查询hive
		List list = hiveTemplate.execute(new HiveClientCallback>() {
			public List doInHive(HiveClient hiveClient) throws Exception {
				List list = new ArrayList();
				Connection conn = hiveClient.getConnection();
				PreparedStatement ps = conn.prepareStatement("select * from stu");
				ResultSet rs = ps.executeQuery();
				while(rs.next()){
					Stu stu = new Stu();
					stu.setId(rs.getInt("id"));
					stu.setName(rs.getString("name"));
					list.add(stu);
				}
				return list;
			}
		});
		//3.打印结果
		System.out.println(list);
	}
	

	@Test
	public void test01(){
		//1.获取hiveTemplate
		HiveTemplate hiveTemplate = (HiveTemplate) context.getBean("hiveTemplate");
		//2.利用hiveTemplate查询hive
		List list = hiveTemplate.query("select * from stu");
		System.out.println(list);
	}
}

总结:

我们可以发现在test01中,查询出来的结果只有数据库的第一列,是因为它的源代码只返回第一列。

我们可以自己写一个返回全部数据的方法

简单逻辑:1.先拿到模板类“hiveTemplate”的对象,用模板对象调用execute方法,在这个方法参数传了一个回调函数HiveClientCallback,里面有一个doInHive,给他一个hiveClient,来拿到连接,基于连接对象做处理,查询完成后封装,封装完返回,此时返回的东西被返回在当前execute方法外面,拿到这个结果进行打印

 

你可能感兴趣的:(大数据)