主要整理于2011、2012年
1、启动server及元数据
nohup hive --service hiveserver2 10000 > /dw/log/hiveserver.log 2>&1 &
nohup hive --service metastore 9083 > /dw/log/metastore.log 2>&1 &
2、字符集
hive元数据存储用mysql时,无法用utf-8字符集问题:
是由于表partition_keys中PKEY_NAME长度过长导致,将PKEY_NAME长度改小即可(innodb(255),myisam(333)).
3、hadoop命令
hadoop fs -ls /data/stg/s_user_cndt/2011/08/ #查看当前目录
hadoop fs -mkdir /data/stg/s_user_cndt/2011/08/04 #创建目录
hadoop fs -copyFromLocal /home/hadooper/fdp/data/s_user_cndt.txt /data/stg/s_user_cndt/2011/09/21/ #put文件
hadoop dfsadmin -report #报告状态
4、添加分区
alter table s_user_cndt add partition(year=2011, month=08, day=04);
ALTER TABLE s_prod_down ADD PARTITION (year=2011, month=10, day=11) LOCATION '/data/stg/feiliao/2011/10/11' PARTITION (year=2011, month=10, day=12) LOCATION '/data/stg/feiliao/2011/10/12';
5、导入数据
LOAD DATA LOCAL INPATH '/tmp/test.txt' OVERWRITE INTO TABLE test1 PARTITION (day_id=20120101);
6、导出文件
insert overwrite local directory '/tmp/fdp/data/tmp/'
select INTERNAL_ID,LAST_OP_TIME,CLIENT_TYPE,PLATFORM_TYPE,CLIENT_VERSION,ITEM,ITEM_DIRE,ITEM_VALUE,ITEM_TYPE,CREATE_DATE from s_user_cndt where year=2011 and month=09 and day=18 and client_type = 15;
cat * > tmp.txt
sed 's/\x01/,/g' tmp.txt > s_user_cndt.txt
scp s_user_cndt.txt appuser@172.21.0.240:/data/fdp/tmp/
7、insert overwrite如果无数据,不会覆盖
8、map类型使用
select user_id, t_key, t_value
from(
SELECT user_id, map_keys(word_ids) as t_keys, map_values(word_ids) as t_values FROM test2
)b LATERAL VIEW explode(t_keys) tk as t_key LATERAL VIEW explode(t_values) tk as t_value;
9、创建包含map类型表
create table test2(user_id int, word_ids map
10、函数
add jar /home/hadoop/tmp/participle.jar;
create temporary function f_split_word as 'com.dmp.participle.ParticipleUDF';
11、中文乱码,在mysql元数据中
Use hive;
alter table COLUMNS_V2 modify column COMMENT varchar(256) character set utf8;
alter table TABLE_PARAMS modify column PARAM_VALUE varchar(4000) character set utf8;
alter table PARTITION_KEYS modify column PKEY_COMMENT varchar(4000) character set utf8;