select count(*) from (select disctinct column from xxx) x
Select count(distinct column) from xxx
结果不一样,因为count(distin column)已经去null了
use mid_trafficwisdom;
drop table if exists city_with_station;
create table if not exists city_with_station(
start_city_id bigint comment '起始城市ID',
start_city_name string comment '起始城市名',
end_city_id bigint comment '到达城市ID',
end_city_name string comment '到达城市名',
com_start_city_id bigint comment '公共起始城市ID',
com_start_city_name string comment '公共起始城市名',
com_end_city_id bigint comment '公共到达城市ID',
com_end_city_name string comment '公共到达城市名',
traffic_type string comment '交通类型',
section_station_set string comment '站到站集合',
create_time string comment '数据插入时间'
)comment '城市包含站-站集合'
location '/data/train/trafficwisdom/mid/city_with_station'
INSERT OVERWRITE TABLE mid_trafficwisdom.search_transfer_log PARTITION(push_date ='2019-01-29' )
SELECT from_city_name,to_city_name,start_date,traffic_type,from_station_name,to_station_name,from_station_code,to_station_code,send_time
from tmp_trafficwisdom.search_transfer_log_tmp where from_city_name is not null and to_city_name is not null
ALTER TABLE mid_trafficwisdom.order_train_log DROP PARTITION (log_date = '2019-01-29');
//批量删除
ALTER TABLE ota_interface_check_info_log DROP PARTITION(log_date <='2017-08-10')
修改表名
ALTER TABLE name RENAME TO new_name
set hive.exec.dynamic.partition=true;
set hive.exec.dynamic.partition.mode=nonstrict;
INSERT OVERWRITE TABLE mid_trafficwisdom.ml_cross_data PARTITION (log_date)
SELECT D.userid, D.city, D.category, D.from_place, D.to_place
, D.isclick, D.future_day, D.hour_time, D.banner_min_price, D.banner_min_time
, D.start_city_id, D.start_city_name, D.end_city_id, D.end_city_name, D.push_date
FROM tmp_trafficwisdom.ml_cross_data D
ALTER TABLE app_trafficwisdom.route_traffic_transfer_without_date CHANGE transfer_overtime transfer_overtime bigint comment '第一段途径时间'
select transport_code, start_station_code, start_station_name, end_station_code, end_station_name, run_time, price, create_date
from (SELECT *, ROW_NUMBER() OVER (PARTITION BY transport_code, start_station_code , end_station_code ORDER BY create_date DESC) AS rn
FROM mid_trafficwisdom.section_ticket_price where traffic_type='F') A where A.rn=1
SELECT *
FROM (
SELECT from_city_name, to_city_name, COUNT(*) AS search_num
, Array('TT', 'TB', 'TF', 'FB', 'FT', 'BF', 'BT') AS traffic_types
FROM mid_trafficwisdom.search_line_log
WHERE push_date = '2019-01-29'
AND from_city_name IS NOT NULL
AND to_city_name IS NOT NULL
GROUP BY from_city_name, to_city_name
) d
LATERAL VIEW explode(traffic_types) adTable AS traffic_type limit 100;
多列合并一列
SELECT transport_code, duration_date, start_station_code, start_station_name, end_station_code
, end_station_name, seat_infos.seat_name, seat_infos.seat_price
FROM (
SELECT transport_code, duration_date, start_station_code, start_station_name, end_station_code
, end_station_name
, array(
named_struct('seat_name', '硬座', 'seat_price', hard_seat_price),
named_struct('seat_name', '无座', 'seat_price', no_seat_price),
named_struct('seat_name', '软卧上', 'seat_price', soft_sleeper_up_price),
named_struct('seat_name', '软卧下', 'seat_price', soft_sleeper_down_price),
named_struct('seat_name', '硬卧上', 'seat_price', hard_sleeper_up_price),
named_struct('seat_name', '硬卧中', 'seat_price', hard_sleeper_mid_price),
named_struct('seat_name', '硬卧下', 'seat_price', hard_sleeper_down_price),
named_struct('seat_name', '二等座', 'seat_price', second_seat_price),
named_struct('seat_name', '一等座', 'seat_price', first_seat_price),
named_struct('seat_name', '商务座', 'seat_price', business_seat_price)) AS seat_info
FROM mid_trafficwisdom.ticket_train_log
WHERE duration_date >= '{today,yyyy-MM-dd}'
) M
LATERAL VIEW explode(seat_info) seat_info AS seat_infos
WHERE seat_infos.seat_price IS NOT NULL AND seat_infos.seat_price > 0;
set hive.groupby.mapaggr.checkinterval=100000 ;--这个是group的键对应的记录条数超过这个值则会进行分拆,值根据具体数据量设置
set hive.groupby.skewindata=true; --如果是group by过程出现倾斜 应该设置为true
set hive.skewjoin.key=100000; --这个是join的键对应的记录条数超过这个值则会进行分拆,值根据具体数据量设置
set hive.optimize.skewjoin=true;--如果是join 过程出现倾斜 应该设置为true
o498X0bh9ySoH2h3ijQWMDzYSrjk_852_010002_2_319527329_南通_商丘_2019-01-29_1_367347825_780_1小时50分钟"
split(value,'_')[size(split(value, "_" )) - 1 ] as min_time,
if(regexp_extract(min_time,'^([0-9]*?)小?时.*$',1)>0,regexp_extract(min_time,'^([0-9]*?)小?时.*$',1),0)*3600+if(regexp_extract(min_time,'([0-9]*?)分钟.*$',1)>0 ,regexp_extract(min_time,'([0-9]*?)分钟.*$',1),0)*60 as min_time
SELECT from_city_name, to_city_name, search_num, traffic_type
FROM (
SELECT from_city_name, to_city_name, COUNT(*) AS search_num
, Array('TT', 'TB', 'TF', 'FB', 'FT', 'BF', 'BT') AS traffic_types
FROM mid_trafficwisdom.search_direct_log
WHERE log_date = '2019-02-26'
AND from_city_name IS NOT NULL
AND to_city_name IS NOT NULL
GROUP BY from_city_name, to_city_name
) d
LATERAL VIEW explode(traffic_types) adTable AS traffic_type
from_unixtime(cast (time as int) , 'yyyy-MM-dd HH:mm:ss') as creat_time
from_unixtime(cast(substring(time,1,10) as int),'yyyy-MM-dd HH:mm:ss') as creat_time
from_unixtime(unix_timestamp(date, 'yyyyMMdd'), 'yyyy-MM-dd') as create_date
CONCAT_WS('+',M.wtboscheduleno,M.wtbobusno) AS bus_code
{
"abTest": [{
"expNo": "20180716_JLH201807",
"expGroup": "Z"
}, {
"expNo": "20181210_BI",
"expGroup": "Z"
}, {
"expNo": "20190114_guolv",
"expGroup": "Z"
}, {
"expNo": "20181022_PaiXu",
"expGroup": "Z"
}],
"requestId": "c6e8bf08a64ffee900988b3124ee4442"
}
SELECT *,GET_JSON_OBJECT(wtodatasourcenew,'$.requestId')
FROM base_tcdctrafficwisdomtravelorder.wisdomtravelorder where wtocreatedate BETWEEN '2019-03-17 00:00:00' AND '2019-03-17 23:59:59'
LIMIT 100
CASE
WHEN round(b.order_num * 1.0 / a.search_num, 6) IS NULL THEN 0
ELSE round(b.order_num * 1.0 / a.search_num, 6)
END AS allpercent