2019/2/28 星期四

cdh中hive创建表出现中文乱码问题

0: jdbc:hive2://192.168.0.141:10000> create table movie(
. . . . . . . . . . . . . . . . . .> userID int comment '用户ID',
. . . . . . . . . . . . . . . . . .> movieID int comment '电影ID',
. . . . . . . . . . . . . . . . . .> rating int comment '电影评分',
. . . . . . . . . . . . . . . . . .> timestamped bigint comment '评分时间戳',
. . . . . . . . . . . . . . . . . .> movieName string comment '电影名字', 
. . . . . . . . . . . . . . . . . .> movieType string comment '电影类型', 
. . . . . . . . . . . . . . . . . .> sex string comment '性别', 
. . . . . . . . . . . . . . . . . .> age int comment '年龄', 
. . . . . . . . . . . . . . . . . .> occupation string comment '职业', 
. . . . . . . . . . . . . . . . . .> zipcode string comment '邮政编码'
. . . . . . . . . . . . . . . . . .> ) comment '影评三表合一' 
. . . . . . . . . . . . . . . . . .> row format delimited fields terminated by ","
. . . . . . . . . . . . . . . . . .> ;

0: jdbc:hive2://192.168.0.141:10000> desc movie;
INFO : Compiling command(queryId=hive_20190228181919_b6eb4f8a-7d17-4437-b089-750bf2e81d0f): desc movie
INFO : Semantic Analysis Completed
INFO : Returning Hive schema: Schema(fieldSchemas:[FieldSchema(name:col_name, type:string, comment:from deserializer), FieldSchema(name:data_type, type:string, comment:from deserializer), FieldSchema(name:comment, type:string, comment:from deserializer)], properties:null)
INFO : Completed compiling command(queryId=hive_20190228181919_b6eb4f8a-7d17-4437-b089-750bf2e81d0f); Time taken: 0.036 seconds
INFO : Executing command(queryId=hive_20190228181919_b6eb4f8a-7d17-4437-b089-750bf2e81d0f): desc movie
INFO : Starting task [Stage-0:DDL] in serial mode
INFO : Completed executing command(queryId=hive_20190228181919_b6eb4f8a-7d17-4437-b089-750bf2e81d0f); Time taken: 0.016 seconds
INFO : OK
+--------------+------------+----------+--+
| col_name | data_type | comment |
+--------------+------------+----------+--+
| userid | int | ??ID |
| movieid | int | ??ID |
| rating | int | ???? |
| timestamped | bigint | ????? |
| moviename | string | ???? |
| movietype | string | ???? |
| sex | string | ?? |
| age | int | ?? |
| occupation | string | ?? |
| zipcode | string | ???? |
+--------------+------------+----------+--+

进入mysql
[root@NewCDH-0--141 ~]# mysql -uroot -p
Enter password:
mysql> show create database hive;
+----------+---------------------------------------------------------------+
| Database | Create Database |
+----------+---------------------------------------------------------------+
| hive | CREATE DATABASE hive /!40100 DEFAULT CHARACTER SET utf8 / |
+----------+---------------------------------------------------------------+
1 row in set (0.02 sec)
直接执行下面的命令

mysql> use hive;
Reading table information for completion of table and column names
You can turn off this feature to get a quicker startup with -A

mysql>  alter table COLUMNS_V2 modify column COMMENT varchar(256) character set utf8;
Query OK, 57 rows affected (0.20 sec)
Records: 57  Duplicates: 0  Warnings: 0

mysql> alter table TABLE_PARAMS modify column PARAM_VALUE varchar(4000) character set utf8;
Query OK, 58 rows affected (0.03 sec)
Records: 58  Duplicates: 0  Warnings: 0

mysql> alter table PARTITION_PARAMS  modify column PARAM_VALUE varchar(4000) character set utf8;
Query OK, 0 rows affected (0.03 sec)
Records: 0  Duplicates: 0  Warnings: 0

mysql> alter table PARTITION_KEYS  modify column PKEY_COMMENT varchar(4000) character set utf8;
Query OK, 0 rows affected (0.04 sec)
Records: 0  Duplicates: 0  Warnings: 0

mysql> alter table  INDEX_PARAMS  modify column PARAM_VALUE  varchar(4000) character set utf8;
Query OK, 0 rows affected (0.19 sec)
Records: 0  Duplicates: 0  Warnings: 0

然后需要重新创建表,之前的表还是乱码


create table movie02(
userID int comment '用户ID',
movieID int comment '电影ID',
rating int comment '电影评分',
timestamped bigint comment '评分时间戳',
movieName string comment '电影名字', 
movieType string comment '电影类型', 
sex string comment '性别', 
age int comment '年龄', 
occupation string comment '职业', 
zipcode string comment '邮政编码'
) comment '影评三表合一' 
row format delimited fields terminated by ",";

0: jdbc:hive2://192.168.0.141:10000> desc movie02;
+--------------+------------+----------+--+
| col_name | data_type | comment |
+--------------+------------+----------+--+
| userid | int | 用户ID |
| movieid | int | 电影ID |
| rating | int | 电影评分 |
| timestamped | bigint | 评分时间戳 |
| moviename | string | 电影名字 |
| movietype | string | 电影类型 |
| sex | string | 性别 |
| age | int | 年龄 |
| occupation | string | 职业 |
| zipcode | string | 邮政编码 |
+--------------+------------+----------+--+
0: jdbc:hive2://192.168.0.141:10000> show create table movie02;
+----------------------------------------------------+--+
| createtab_stmt |
+----------------------------------------------------+--+
| CREATE TABLE movie02( |
| userid int COMMENT '用户ID', |
| movieid int COMMENT '电影ID', |
| rating int COMMENT '电影评分', |
| timestamped bigint COMMENT '评分时间戳', |
| moviename string COMMENT '电影名字', |
| movietype string COMMENT '电影类型', |
| sex string COMMENT '性别', |
| age int COMMENT '年龄', |
| occupation string COMMENT '职业', |
| zipcode string COMMENT '邮政编码') |
| COMMENT '影评三表合一' |
| ROW FORMAT SERDE |
| 'org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe' |
| WITH SERDEPROPERTIES ( |
| 'field.delim'=',', |
| 'serialization.format'=',') |
| STORED AS INPUTFORMAT |
| 'org.apache.hadoop.mapred.TextInputFormat' |
| OUTPUTFORMAT |
| 'org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat' |
| LOCATION |
| 'hdfs://nameservice1/user/hive/warehouse/movie02' |
| TBLPROPERTIES ( |
| 'transient_lastDdlTime'='1551350122') |
+----------------------------------------------------+--+
25 rows selected (0.167 seconds)
乱码问题解决了。

参考链接
https://www.cnblogs.com/qingyunzong/p/8724155.html
cdh hive 中文注释乱码解决方法(简单几步):https://blog.csdn.net/lingbo229/article/details/81324624