1…创建hive的数据库
1.hive创建数据库的最简单写法和mysql差不多:
create database foo;
2.仅当名为foo的数据库当前不存在时才创建:
create database if not exists foo;
3.创建数据库时指定位置,这个位置一般是在hdfs上的位置:(可以忽略这个操作,不指定会使用xml配置的默认位置,一般位于:/user/hive/warehouse(hdfs目录))
create database foo location '/db/foo';
4.查看已经创建的数据库:
show databases ;
使用foo数据库
use foo;
查看表结构
show tables;
5.在元数据管理库中查看foo数据库管理位置(mysql 数据库中,use hive ,查询dbs数据表) -- 元数据库中执行
ariaDB [hive]> select * from DBS;
+-------+-----------------------+--------------------------------------------+---------+------------+------------+-----------+
| DB_ID | DESC | DB_LOCATION_URI | NAME | OWNER_NAME | OWNER_TYPE | CTLG_NAME |
+-------+-----------------------+--------------------------------------------+---------+------------+------------+-----------+
| 1 | Default Hive database | hdfs://cluster1/user/hive/warehouse | default | public | ROLE | hive |
| 6 | NULL | hdfs://cluster1/user/hive/warehouse/foo.db | foo | root | USER | hive |
+-------+-----------------------+--------------------------------------------+---------+------------+------------+-----------+
2 rows in set (0.00 sec)
MariaDB [hive]>
2.创建数据库表(需要指定库名)
1.创建数据表和mysql 大同小异
create table t(id int,name string,age int);
drop table t;
create table foo.t(id int,name string,age int);
2.查询表结构在元数据库mysql中的储存位置
select * from TBLS;
+--------+-------------+-------+------------------+-------+------------+-----------+-------+----------+---------------+--------------------+--------------------+--------------------+
| TBL_ID | CREATE_TIME | DB_ID | LAST_ACCESS_TIME | OWNER | OWNER_TYPE | RETENTION | SD_ID | TBL_NAME | TBL_TYPE | VIEW_EXPANDED_TEXT | VIEW_ORIGINAL_TEXT | IS_REWRITE_ENABLED |
+--------+-------------+-------+------------------+-------+------------+-----------+-------+----------+---------------+--------------------+--------------------+--------------------+
| 2 | 1589005827 | 6 | 0 | root | USER | 0 | 2 | t | MANAGED_TABLE | NULL | NULL | |
+--------+-------------+-------+------------------+-------+------------+-----------+-------+----------+---------------+--------------------+--------------------+--------------------+
1 row in set (0.00 sec)
3.插入数据
insert into t(id,name,age) values(1,'andy',25);
3.jdbc java 项目实例
import java.sql.Connection;
import java.sql.DriverManager;
import java.sql.ResultSet;
import java.sql.Statement;
public class HiveJdbcDemo {
public static void main(String[] args) throws Exception{
Class.forName("org.apache.hive.jdbc.HiveDriver");
Connection conn = DriverManager.getConnection("jdbc:hive2://192.168.18.10:10000/foo","root","root");
Statement st = conn.createStatement();
ResultSet rs = st.executeQuery("select id , name ,age from t");
while(rs.next()){
System.out.println(rs.getInt(1) + "," + rs.getString(2) + "," + rs.getInt(3)) ;
}
rs.close();
st.close();
conn.close();
}
}
jdbc 链接hiveserver2 服务时,出现的常见问题。(参考:https://blog.csdn.net/qq_16633405/article/details/82190440 这篇文章讲的很仔细)
rror: Could not open client transport with JDBC Uri: jdbc:hive2://localhost:10000/foo: Failed to open new session:
java.lang.RuntimeException: org.apache.hadoop.ipc.RemoteException(org.apache.hadoop.security.authorize.AuthorizationException):
User: root is not allowed to impersonate root (state=08S01,code=0)
解决方法:
1.对root用户的权限全部放开:(hadoop.proxyuser.root.hosts 如果用户名为hadoop 那么这里就是hadoop.proxyuser.hadoop.hosts)
在hadoop的配置文件core-site.xml中添加如下属性:
hadoop.proxyuser.root.hosts
*
hadoop.proxyuser.root.groups
*
2.查看安装hive服务的节点的nameNode 是否为Active (假设nn1为nameNode Master 节点,上面安装了hive,那么此台节点必须是active的)
hdfs haadmin -getServiceState nn1 查看服务状态(是Active还是Standby)
如果nn1为Standby,那么可以把另外一台nameNode 节点杀掉,nn1会自动激活成Active ,然后再重启另一台nameNode节点
1.managed table
托管表。
删除表时,数据也删除了。
2.external table
drop table t;
hfds dfs -ls /user/hive/warehouse
查看hdfs,数据还在保存着。
外部表
删除表时,数据不被删除。
//创建表,external 外部表
$hive>CREATE external TABLE IF NOT EXISTS t2(id int,name string,age int)
COMMENT 'xx' ROW FORMAT DELIMITED FIELDS TERMINATED BY ',' STORED AS TEXTFILE ;
//查看表数据
$hive>desc t2 ;
$hive>desc formatted t2 ;
//加载数据到hive表
$hive>load data local inpath '/home/centos/customers.txt' into table t2 ; //local上传文件
$hive>load data inpath '/user/centos/customers.txt' [overwrite] into table t2 ; //移动文件
//复制表
mysql>create table tt as select * from users ; //携带数据和表结构
mysql>create table tt like users ; //不带数据,只有表结构
hive>create table tt as select * from users ;
hive>create table tt like users ;
//count()查询要转成mr
$hive>select count(*) from t2 ;
$hive>select id,name from t2 ;
//
$hive>select * from t2 order by id desc ; //MR
//启用/禁用表
$hive>ALTER TABLE t2 ENABLE NO_DROP; //不允许删除
$hive>ALTER TABLE t2 DISABLE NO_DROP; //允许删除
//分区表,优化手段之一,从目录的层面控制搜索数据的范围。
//创建分区表.
$hive>CREATE TABLE t3(id int,name string,age int) PARTITIONED BY (Year INT, Month INT) ROW FORMAT DELIMITED FIELDS TERMINATED BY ',' ;
//显式表的分区信息
$hive>SHOW PARTITIONS t3;
//添加分区,创建目录
$hive>alter table t3 add partition (year=2014, month=12);
//删除分区
hive>ALTER TABLE employee_partitioned DROP IF EXISTS PARTITION (year=2014, month=11);
//分区结构
hive>/user/hive/warehouse/mydb2.db/t3/year=2014/month=11
hive>/user/hive/warehouse/mydb2.db/t3/year=2014/month=12
//加载数据到分区表
hive>load data local inpath '/home/centos/customers.txt' into table t3 partition(year=2014,month=11);
//创建桶表
$hive>CREATE TABLE t4(id int,name string,age int) CLUSTERED BY (id) INTO 3 BUCKETS ROW FORMAT DELIMITED FIELDS TERMINATED BY ',' ;
//加载数据不会进行分桶操作
$hive>load data local inpath '/home/centos/customers.txt' into table t4 ;
//查询t3表数据插入到t4中。
$hive>insert into t4 select id,name,age from t3 ;
//桶表的数量如何设置?
//评估数据量,保证每个桶的数据量block的2倍大小。
//连接查询
$hive>CREATE TABLE customers(id int,name string,age int) ROW FORMAT DELIMITED FIELDS TERMINATED BY ',' ;
$hive>CREATE TABLE orders(id int,orderno string,price float,cid int) ROW FORMAT DELIMITED FIELDS TERMINATED BY ',' ;
//加载数据到表
//内连接查询
hive>select a.*,b.* from customers a , orders b where a.id = b.cid ;
//左外
hive>select a.*,b.* from customers a left outer join orders b on a.id = b.cid ;
hive>select a.*,b.* from customers a right outer join orders b on a.id = b.cid ;
hive>select a.*,b.* from customers a full outer join orders b on a.id = b.cid ;
//explode,炸裂,表生成函数。
//使用hive实现单词统计
//1.建表
$hive>CREATE TABLE doc(line string) ROW FORMAT DELIMITED FIELDS TERMINATED BY ',' ;