Hadoop入门学习笔记-第六天(hive数据库命令相关操作)

hive命令行操作/user/hive/warehouse(hdfs目录))
说明:hive创建的数据库和数据库表都存在hdfs,一般默认目录为:/user/hive/warehouse

1…创建hive的数据库
1.hive创建数据库的最简单写法和mysql差不多:
create database foo;

2.仅当名为foo的数据库当前不存在时才创建:
create database if not exists foo;

3.创建数据库时指定位置,这个位置一般是在hdfs上的位置:(可以忽略这个操作,不指定会使用xml配置的默认位置,一般位于:/user/hive/warehouse(hdfs目录))
create database foo location '/db/foo';

4.查看已经创建的数据库:
show databases ;
使用foo数据库
use foo;
查看表结构
show tables;

5.在元数据管理库中查看foo数据库管理位置(mysql 数据库中,use hive ,查询dbs数据表)  -- 元数据库中执行
	ariaDB [hive]> select * from DBS;
	+-------+-----------------------+--------------------------------------------+---------+------------+------------+-----------+
	| DB_ID | DESC                  | DB_LOCATION_URI                            | NAME    | OWNER_NAME | OWNER_TYPE | CTLG_NAME |
	+-------+-----------------------+--------------------------------------------+---------+------------+------------+-----------+
	|     1 | Default Hive database | hdfs://cluster1/user/hive/warehouse        | default | public     | ROLE       | hive      |
	|     6 | NULL                  | hdfs://cluster1/user/hive/warehouse/foo.db | foo     | root       | USER       | hive      |
	+-------+-----------------------+--------------------------------------------+---------+------------+------------+-----------+
	2 rows in set (0.00 sec)

	MariaDB [hive]>

2.创建数据库表(需要指定库名)
1.创建数据表和mysql 大同小异
create table t(id int,name string,age int);
drop table t;
create table foo.t(id int,name string,age int);

2.查询表结构在元数据库mysql中的储存位置
select * from TBLS;
	+--------+-------------+-------+------------------+-------+------------+-----------+-------+----------+---------------+--------------------+--------------------+--------------------+
	| TBL_ID | CREATE_TIME | DB_ID | LAST_ACCESS_TIME | OWNER | OWNER_TYPE | RETENTION | SD_ID | TBL_NAME | TBL_TYPE      | VIEW_EXPANDED_TEXT | VIEW_ORIGINAL_TEXT | IS_REWRITE_ENABLED |
	+--------+-------------+-------+------------------+-------+------------+-----------+-------+----------+---------------+--------------------+--------------------+--------------------+
	|      2 |  1589005827 |     6 |                0 | root  | USER       |         0 |     2 | t        | MANAGED_TABLE | NULL               | NULL               |                    |
	+--------+-------------+-------+------------------+-------+------------+-----------+-------+----------+---------------+--------------------+--------------------+--------------------+
	1 row in set (0.00 sec)
3.插入数据
	insert into t(id,name,age)  values(1,'andy',25);

3.jdbc java 项目实例
import java.sql.Connection;
import java.sql.DriverManager;
import java.sql.ResultSet;
import java.sql.Statement;

public class HiveJdbcDemo {
	public static void main(String[] args) throws Exception{
		Class.forName("org.apache.hive.jdbc.HiveDriver");
		Connection conn = DriverManager.getConnection("jdbc:hive2://192.168.18.10:10000/foo","root","root");
		Statement st = conn.createStatement();
		ResultSet rs = st.executeQuery("select id , name ,age from t");
		while(rs.next()){
			System.out.println(rs.getInt(1) + "," + rs.getString(2) + "," + rs.getInt(3)) ;
		}
		rs.close();
		st.close();
		conn.close();
	}
}

jdbc 链接hiveserver2 服务时,出现的常见问题。(参考:https://blog.csdn.net/qq_16633405/article/details/82190440 这篇文章讲的很仔细)


rror: Could not open client transport with JDBC Uri: jdbc:hive2://localhost:10000/foo: Failed to open new session:
	java.lang.RuntimeException: org.apache.hadoop.ipc.RemoteException(org.apache.hadoop.security.authorize.AuthorizationException): 
	User: root is not allowed to impersonate root (state=08S01,code=0)

解决方法:
	1.对root用户的权限全部放开:(hadoop.proxyuser.root.hosts 如果用户名为hadoop 那么这里就是hadoop.proxyuser.hadoop.hosts)
		在hadoop的配置文件core-site.xml中添加如下属性:
		
			hadoop.proxyuser.root.hosts
			*
		
		
			hadoop.proxyuser.root.groups
			*
		
	2.查看安装hive服务的节点的nameNode 是否为Active (假设nn1为nameNode Master 节点,上面安装了hive,那么此台节点必须是active的)
		hdfs haadmin -getServiceState nn1  查看服务状态(是Active还是Standby)
		如果nn1为Standby,那么可以把另外一台nameNode 节点杀掉,nn1会自动激活成Active ,然后再重启另一台nameNode节点	

hive 表

1.managed table
	托管表。
	删除表时,数据也删除了。
2.external table 
	drop table t;
	hfds dfs -ls /user/hive/warehouse
	查看hdfs,数据还在保存着。
	外部表
	删除表时,数据不被删除。

hive命令

//创建表,external 外部表
$hive>CREATE external TABLE IF NOT EXISTS t2(id int,name string,age int)
COMMENT 'xx' ROW FORMAT DELIMITED FIELDS TERMINATED BY ',' STORED AS TEXTFILE ; 

//查看表数据
$hive>desc t2 ;
$hive>desc formatted t2 ;

//加载数据到hive表
$hive>load data local inpath '/home/centos/customers.txt' into table t2 ;	//local上传文件
$hive>load data inpath '/user/centos/customers.txt' [overwrite] into table t2 ;	//移动文件

//复制表
mysql>create table tt as select * from users ;		//携带数据和表结构
mysql>create table tt like users ;			//不带数据,只有表结构

hive>create table tt as select * from users ;	
hive>create table tt like users ;	


//count()查询要转成mr
$hive>select count(*) from t2 ;
$hive>select id,name from t2 ;


//
$hive>select * from t2 order by id desc ;				//MR
	
//启用/禁用表
$hive>ALTER TABLE t2 ENABLE NO_DROP;	//不允许删除
$hive>ALTER TABLE t2 DISABLE NO_DROP;	//允许删除


//分区表,优化手段之一,从目录的层面控制搜索数据的范围。
//创建分区表.
$hive>CREATE TABLE t3(id int,name string,age int) PARTITIONED BY (Year INT, Month INT) ROW FORMAT DELIMITED FIELDS TERMINATED BY ',' ;

//显式表的分区信息
$hive>SHOW PARTITIONS t3;

//添加分区,创建目录
$hive>alter table t3 add partition (year=2014, month=12);

//删除分区
hive>ALTER TABLE employee_partitioned DROP IF EXISTS PARTITION (year=2014, month=11);

//分区结构
hive>/user/hive/warehouse/mydb2.db/t3/year=2014/month=11
hive>/user/hive/warehouse/mydb2.db/t3/year=2014/month=12


//加载数据到分区表
hive>load data local inpath '/home/centos/customers.txt' into table t3 partition(year=2014,month=11);

//创建桶表
$hive>CREATE TABLE t4(id int,name string,age int) CLUSTERED BY (id) INTO 3 BUCKETS ROW FORMAT DELIMITED FIELDS TERMINATED BY ',' ;

//加载数据不会进行分桶操作
$hive>load data local inpath '/home/centos/customers.txt' into table t4 ;

//查询t3表数据插入到t4中。
$hive>insert into t4 select id,name,age from t3 ;

//桶表的数量如何设置?
//评估数据量,保证每个桶的数据量block的2倍大小。


//连接查询
$hive>CREATE TABLE customers(id int,name string,age int) ROW FORMAT DELIMITED FIELDS TERMINATED BY ',' ;
$hive>CREATE TABLE orders(id int,orderno string,price float,cid int) ROW FORMAT DELIMITED FIELDS TERMINATED BY ',' ;

//加载数据到表
//内连接查询
hive>select a.*,b.* from customers a , orders b where a.id = b.cid ;
//左外
hive>select a.*,b.* from customers a left outer join orders b on a.id = b.cid ;
hive>select a.*,b.* from customers a right outer join orders b on a.id = b.cid ;
hive>select a.*,b.* from customers a full outer join orders b on a.id = b.cid ;

//explode,炸裂,表生成函数。
//使用hive实现单词统计
//1.建表
$hive>CREATE TABLE doc(line string) ROW FORMAT DELIMITED FIELDS TERMINATED BY ',' ;

你可能感兴趣的:(Hadoop入门学习笔记,hadoop,数据库,java,hive,mysql)