建表
-- 新建Hive表: --
drop table if exists dev.table;
create table if not exists dev.dtable(
datag_time string comment '字段名称',
group_type char(10) comment '字段名称',
rationality_index double comment '字段名称',
deep_value_index double comment '字段名称'
)
comment '表名称注释'
partitioned by (
dt string comment 'partition : date'
)
row format delimited
fields terminated by '\001'
tblproperties (
'author'='tian'
);
插入数据
--插入数据
set hive.exec.dynamic.partition=true;
set hive.exec.dynamic.partition.mode=nonstrict;
INSERT INTO TABLE app.* PARTITION (dt)
select
distinct *, *,*,*,*,dt
from *.*
where name is not NULL AND name like '%桌%' AND dt='2023-02-25'
LIMIT 2000000;
统计表的行数:
select count(*) from 表名 where dt='';
--两个表做差集
select
id
from
表1
where dt>='2022-10-07'
union
select
id
from
表2
where dt>='2022-10-07'
;
left outer join 和left join是一样的。 left join 包含左表的所有行。
分区排序,全局排序, MapReduce中的内部排序
https://blog.csdn.net/qq_43192537/article/details/102293995
https://blog.csdn.net/glittledream/article/details/84789571
Alter table 表名 change column 原字段名称 现字段名称 数据类型
https://my.oschina.net/u/2380815/blog/4453765
Hive0.14版本之前是不支持update和delete操作的,之后的Hive数据表必须要满足一定的条件,比如ORC存储、ACID支持等,才可以进行update和delete操作,本篇文章讲一下传统的hive数据表如果通过写SQL的方式实现数据的更新。
https://blog.csdn.net/weixin_30416497/article/details/97950750
SELECT item_name,brand_name_full
FROM 表名
WHERE dt>="2021-05-25" and brand_name_full<>"NO BRAND"
LIMIT 3000
(1) 去除重复项,模糊查询
select
distinct sku_name
from 表名
where sku_name is not NULL AND sku_name like '%桌%' AND dt='2023-02-25';
(2) 去重后统计数量
select count(distinct AccountID) from CharacterLogin where day="27" and month="10";
(3) 对某一列去重
distinct 只能加到最前面,同时存在多个列时,不能对单列去重复,所以可以使用row_number()等,具体如下:
对sku_name去重复
SELECT dropre.sku_name,
dropre.m1,
dropre.m2,
dropre.m3,
dropre.mm
FROM
(SELECT aa.sku_name,aa.
m1,
aa.m2,
aa.m3,aa.mm,row_number()
over (partition by aa.sku_name order BY aa.mm) as rn
FROM app.app_*** AS aa )
AS dropre
WHERE dropre.rn =1
https://www.cnblogs.com/rrttp/p/9026359.html
https://www.cnblogs.com/likai198981/archive/2013/03/29/2989740.html
https://blog.csdn.net/love_java_cc/article/details/52234889
hive -f t.sql
bin/hive -f sql.q >> res.csv
https://stackoverflow.com/questions/28674753/hive-runtime-error-while-processing-row-in-hive(向量化参数调优)
https://codeantenna.com/a/BD4HSLCWqZ (对某一列去重)