hive元数据存储在mysql中,因此需要进入mysql中创建hive元数据库;如果已存在hive元数据库,则修改元数据库字符格式
##修改已存在的hive元数据库,字符编码格式为utf-8
mysql> alter database metastore character set utf8;
##进入hive元数据库
mysql> use metastore;
##查看元数据库字符编码格式
mysql> show variables like 'character_set_database';
hive启动后,修改hive的元数据信息,无需重启mysql和hive就能生效
解决desc命令注释中文乱码:修改hive存储在mysql里的元数据相关信息
# 1).修改字段注释字符集
mysql> alter table COLUMNS_V2 modify column COMMENT varchar(256) character set utf8;
# 2).修改表注释字符集
mysql> alter table TABLE_PARAMS modify column PARAM_VALUE varchar(4000) character set utf8;
# 3).修改分区表参数,以支持分区键能够用中文表示
mysql> alter table PARTITION_PARAMS modify column PARAM_VALUE varchar(4000) character set utf8;
alter table PARTITION_KEYS modify column PKEY_COMMENT varchar(4000) character set utf8;
# 4).修改索引注解
mysql> alter table INDEX_PARAMS modify column PARAM_VALUE varchar(4000) character set utf8;
默认不支持创建中文分区
hive (default)> alter table table_partation add partition(dt='中文分区');
解决:
##进入hive元数据库
mysql> use metastore;
##查看元数据库下配置分区的表
mysql> show create table PARTITIONS;
##设置成功
mysql> alter table PARTITIONS modify column `PART_NAME` varchar(250) character set utf8;
======================================================================================
hive (default)> create table test_part_table(
word string,
num bigint
)partitioned by(dt string)
row format delimited fields terminated by '\t';
hive (default)> alter table test_part_table add partition(dt='中文分区aaa');
hive (default)> ALTER TABLE address SET TBLPROPERTIES('comment' = '注释注释注释');
## 非分区表
SELECT
a.TBL_ID,
d.`NAME` dbName,
a.TBL_NAME,
b.PARAM_VALUE numRows,
c.PARAM_VALUE totalSize
FROM
TBLS AS a
left JOIN TABLE_PARAMS AS b
on a.TBL_ID = b.TBL_ID
left JOIN TABLE_PARAMS AS c
on a.TBL_ID = c.TBL_ID
left JOIN DBS as d
on d.DB_ID = a.DB_ID
where
b.PARAM_KEY = "numRows"
and
c.PARAM_KEY = "totalSize"
select
table1.`name` as dbName,
table1.TBL_NAME,
table1.PARAM_VALUE
from ( select
result.TBL_ID,
result.`NAME`,
result.TBL_NAME,
sum(f.PARAM_VALUE) as PARAM_VALUE
from ( select
r.TBL_ID,
r.TBL_NAME,
s.DB_ID,
s.`NAME`,
r.PART_ID
from ( select
b.TBL_ID,
b.PART_ID,
a.TBL_NAME,
a.DB_ID from TBLS a
inner join `PARTITIONS` b
on a.TBL_ID = b.TBL_ID
) as r
inner join DBS s
on r.DB_ID = s. DB_ID
) AS result
inner join PARTITION_PARAMS f
on result.PART_ID = f.PART_ID
WHERE f.PARAM_KEY = 'numRows' group by result.TBL_ID) as `table1` -- 分区表数量
union all
select
table2.dbName,
table2.TBL_NAME,
table2.numRows
from (SELECT
a.TBL_ID,
d.`NAME` dbName,
a.TBL_NAME,
b.PARAM_VALUE numRows
FROM
TBLS AS a
left JOIN TABLE_PARAMS AS b
on a.TBL_ID = b.TBL_ID
left JOIN DBS as d
on d.DB_ID = a.DB_ID
where
b.PARAM_KEY = "numRows") as `table2` -- 非分区表数量
order by TBL_NAME asc