环境:CDH 5.5.1
hive创建表时带有中文就报错,如下:
CREATE TABLE IF NOT EXISTS test(pid bigint
COMMENT '测试id',fid int COMMENT 'luntanid')COMMT 'zhuti' ROW FORMAT DELIMITED FIELDS TERMINATED BY '\t';
FAILED: Execution Error, return code 1 from org.apache.hadoop.hive.ql.exec.DDLTask. MetaException(message:javax.jdo.JDODataStoreException: Transaction failed to commit
at org.datanucleus.api.jdo.NucleusJDOHelper.getJDOExceptionForNucleusException(NucleusJDOHelper.java:451)
at org.datanucleus.api.jdo.JDOTransaction.rollback(JDOTransaction.java:186)
at org.apache.hadoop.hive.metastore.ObjectStore.rollbackTransaction(ObjectStore.java:539)
at org.apache.hadoop.hive.metastore.ObjectStore.createTable(ObjectStore.java:893)
at sun.reflect.GeneratedMethodAccessor12.invoke(Unknown Source)
at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
at java.lang.reflect.Method.invoke(Method.java:606)
at org.apache.hadoop.hive.metastore.RawStoreProxy.invoke(RawStoreProxy.java:98)
at com.sun.proxy.$Proxy10.createTable(Unknown Source)
at org.apache.hadoop.hive.metastore.HiveMetaStore$HMSHandler.create_table_core(HiveMetaStore.java:1411)
at org.apache.hadoop.hive.metastore.HiveMetaStore$HMSHandler.create_table_with_environment_context(HiveMetaStore.java:1444)
at sun.reflect.GeneratedMethodAccessor11.invoke(Unknown Source)
at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
at java.lang.reflect.Method.invoke(Method.java:606)
at org.apache.hadoop.hive.metastore.RetryingHMSHandler.invokeInternal(RetryingHMSHandler.java:133)
at org.apache.hadoop.hive.metastore.RetryingHMSHandler.invoke(RetryingHMSHandler.java:99)
at com.sun.proxy.$Proxy12.create_table_with_environment_context(Unknown Source)
at org.apache.hadoop.hive.metastore.api.ThriftHiveMetastore$Processor$create_table_with_environment_context.getResult(ThriftHiveMetastore.java:9136)
at org.apache.hadoop.hive.metastore.api.ThriftHiveMetastore$Processor$create_table_with_environment_context.getResult(ThriftHiveMetastore.java:9120)
at org.apache.thrift.ProcessFunction.process(ProcessFunction.java:39)
at org.apache.hadoop.hive.metastore.TUGIBasedProcessor$1.run(TUGIBasedProcessor.java:110)
at org.apache.hadoop.hive.metastore.TUGIBasedProcessor$1.run(TUGIBasedProcessor.java:106)
at java.security.AccessController.doPrivileged(Native Method)
at javax.security.auth.Subject.doAs(Subject.java:415)
at org.apache.hadoop.security.UserGroupInformation.doAs(UserGroupInformation.java:1671)
at org.apache.hadoop.hive.metastore.TUGIBasedProcessor.process(TUGIBasedProcessor.java:118)
at org.apache.thrift.server.TThreadPoolServer$WorkerProcess.run(TThreadPoolServer.java:285)
at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1145)
at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:615)
at java.lang.Thread.run(Thread.java:745)
NestedThrowablesStackTrace:
Exception thrown flushing changes to datastore
org.datanucleus.exceptions.NucleusDataStoreException: Exception thrown flushing changes to datastore
搜索了一圈,都说是将字符集设置为utf8,
mysql
mysql> show variables like '%char%';
+--------------------------+----------------------------+
| Variable_name | Value |
+--------------------------+----------------------------+
| character_set_client | utf8 |
| character_set_connection | utf8 |
|
character_set_database | utf8 |
| character_set_filesystem | binary |
| character_set_results | utf8 |
| character_set_server | utf8 |
| character_set_system | utf8 |
| character_sets_dir | /usr/share/mysql/charsets/ |
+--------------------------+----------------------------+
hive-site
javax.jdo.option.ConnectionURL
jdbc:mysql://hadoop01.test:3306/hive?useUnicode=true&characterEncoding=UTF-8
都没有问题啊
后来在网上无意中看到以下内容:
"""
默认情况下我们需要将数据库的编码设置为lanin1.
alter database hive character set latin1;
但为了以下是为了支持hive建表时插入中文注释 需要在mysql中做如下设置:
use hive;
//修改字段注释字符集
alter table COLUMNS_V2 modify column COMMENT varchar(256) character set utf8;
//修改表注释字符集
alter table TABLE_PARAMS modify column PARAM_VALUE varchar(4000) character set utf8;
//修改分区注释字符集
alter table PARTITION_KEYS modify column PKEY_COMMENT varchar(4000) character set utf8;
"""
尝试执行以上4条命令,执行这个命令后就正常了。
mysql> show variables like '%char%';
+--------------------------+----------------------------+
| Variable_name | Value |
+--------------------------+----------------------------+
| character_set_client | utf8 |
| character_set_connection | utf8 |
|
character_set_database | latin1 |
| character_set_filesystem | binary |
| character_set_results | utf8 |
| character_set_server | utf8 |
| character_set_system | utf8 |
| character_sets_dir | /usr/share/mysql/charsets/ |
+--------------------------+----------------------------+
8 rows in set (0.00 sec)
数据库字符集变latin1
当时我创建hive库的时候使用如下命令:
create database hive DEFAULT CHARSET utf8 COLLATE utf8_general_ci;
不知道为什么会这样,但是目前解决的建表中文问题,先这样处理了。