SELECT database AS `数据库名`,table AS `表名`, name AS `列名`,type AS `类型`,default_expression AS `默认值`, comment AS `字段说明` FROM system.columns WHERE database='dbName' and `table` ='tableName' #clickhouse查看某个数据库的某个表有多少字段
SELECT DISTINCT table AS `表名` FROM system.columns WHERE database=‘dbname’
SELECT DISTINCT table AS `表名` FROM system.columns WHERE database=currentDatabase() #当前数据库
select count(*) FROM system.columns WHERE database=currentDatabase() and "table" ='tableName' #大于0存在
select count(*) FROM system.columns WHERE database='defualt' and "table" ='tableName'#大于0存在
select
sum(rows) as row,--总行数
formatReadableSize(sum(data_uncompressed_bytes)) as ysq,--原始大小
formatReadableSize(sum(data_compressed_bytes)) as ysh,--压缩大小
round(sum(data_compressed_bytes) / sum(data_uncompressed_bytes) * 100, 0) ys_rate--压缩率
from system.parts
select database,
table,
sum(bytes) as size,
sum(rows) as rows,
min(min_date) as min_date,
max(max_date) as max_date,
sum(bytes_on_disk) as bytes_on_disk,
sum(data_uncompressed_bytes) as data_uncompressed_bytes,
sum(data_compressed_bytes) as data_compressed_bytes,
(data_compressed_bytes / data_uncompressed_bytes) * 100 as compress_rate,
max_date - min_date as days,
size / (max_date - min_date) as avgDaySize
from system.parts
where active
and database = 'default'
and table = 'user'
group by database, table
select
database,
table,
formatReadableSize(size) as size,
formatReadableSize(bytes_on_disk) as bytes_on_disk,
formatReadableSize(data_uncompressed_bytes) as data_uncompressed_bytes,
formatReadableSize(data_compressed_bytes) as data_compressed_bytes,
compress_rate,
rows,
days,
formatReadableSize(avgDaySize) as avgDaySize
from
(
select
database,
table,
sum(bytes) as size,
sum(rows) as rows,
min(min_date) as min_date,
max(max_date) as max_date,
sum(bytes_on_disk) as bytes_on_disk,
sum(data_uncompressed_bytes) as data_uncompressed_bytes,
sum(data_compressed_bytes) as data_compressed_bytes,
(data_compressed_bytes / data_uncompressed_bytes) * 100 as compress_rate,
max_date - min_date as days,
size / (max_date - min_date) as avgDaySize
from system.parts
where active
and database = 'default'
and table = 'user'
group by
database,
table
)
select partition
from system.parts
where active
and database = 'default'
and table = 'user'
SELECT database,
table,
count() AS parts,
uniq(partition) AS partitions,
sum(marks) AS marks,
sum(rows) AS rows,
formatReadableSize(sum(data_compressed_bytes)) AS compressed,
formatReadableSize(sum(data_uncompressed_bytes)) AS uncompressed,
round((sum(data_compressed_bytes) / sum(data_uncompressed_bytes)) * 100.,2) AS percentage
FROM system.parts
WHERE active
and database = 'default'
and table = 'user'
GROUP BY database, table
SELECT table,
formatReadableSize(sum(data_compressed_bytes)) AS tc,
formatReadableSize(sum(data_uncompressed_bytes)) AS tu,
round((sum(data_compressed_bytes) / sum(data_uncompressed_bytes)) * 100,2) AS ratio
FROM system.columns
WHERE database = 'default'
and table = 'user'
GROUP BY table
ORDER BY sum(data_compressed_bytes) ASC
SELECT column,
any(type),
sum(column_data_compressed_bytes) AS compressed,
sum(column_data_uncompressed_bytes) AS uncompressed,
sum(rows)
FROM system.parts_columns
WHERE database = 'default'
and table = 'user'
AND active
GROUP BY column
ORDER BY column ASC
select
query_id,read_rows,total_rows_approx,memory_usage,
initial_user,initial_address,elapsed,query
from system.processes order by read_rows desc;
SHOW PROCESSLIST
select count(*) from system.mutations where database = 'DBNAME' and is_done =0;
KILL MUTATION WHERE 1=1 AND database = 'DBNAME' AND table = ' TABLENAME';
kill query where query_id='query_id查询ID'; #删掉某个查询
select (case when 1=1 and 2=2 or 1!=2 then 1 else 0 end )as t
SELECT number, if(number < 5, 'less than 5', 'greater than or equal to 5')
FROM (SELECT number FROM numbers(10));
– 另外 ClickHouse 本身也支持三元表达式,底层依旧会转成 if
SELECT number, number < 5 ? 'less than 5' : 'greater than or equal to 5'
FROM (SELECT number FROM numbers(10));
SELECT number, CASE WHEN number < 5 THEN 'less than 5' ELSE 'greater than or equal to 5' END
FROM (SELECT number FROM numbers(10));
我们看到底层转化成了 multiIf,那么这个 multiIf 是做什么的呢?首先 if 函数的参数如下:
if(cond, then, else)
而 multiIf 函数的参数如下:
multiIf(cond1, then1, cond2, then2, cond3, then3, ..., else)
所以从名字上也能看出来 multiIf 是干什么的,if 只能有一个条件,相当于编程语言中的 if … else;而 multiIf 可以接收多个条件,相当于编程语言中的 if … else if … else if … else。
multiIf(number < 5, 'less than 5', number = 5, 'equal to 5', 'greater than 5')
等价于
SELECT number,
CASE WHEN number < 5 THEN 'less than 5' WHEN number = 5 THEN 'equal to 5' ELSE 'greater than 5' END
FROM (SELECT number FROM numbers(10));
a < b 等价于 less(a, b)
a = b 等价于 equals(a, b)
a > b 等价于 greater(a, b)
a <= b 等价于 lessOrEquals(a, b)
a >= b 等价于 greaterOrEquals(a, b)
a != b 等价于 notEquals(a, b)
SELECT e();
SELECT pi();
SELECT exp(1), exp(2);
SELECT exp2(2), exp10(2);
SELECT log(e()), ln(e() * e());
SELECT log2(8), log10(1000);
SELECT sqrt(9);
SELECT cbrt(27);
SELECT pow(3, 4); #结果81
sin、cos、tan:计算正弦值、余弦值、正切值
asin、acos、atan:计算反正弦值、反余弦值、反正切值
sinh、cosh、tanh:计算双曲正弦值、双曲余弦值、双曲正切值
asinh、acosh、atanh:计算反双曲正弦值、反双曲余弦值、反双曲正切值
atan2:atan 的增强版,具体细节可以百度或者谷歌
SELECT floor(3.14), ceil(3.14);
SELECT floor(3.14, 1), ceil(3.14, 2), ceiling(3.14, 3);
SELECT trunc(3.14), trunc(-2.17); #truncate、trunc:截断小数点
SELECT trunc(3.14, 1), trunc(-2.17, 1); #仍然可以选择保留位数
SELECT round(3.1415926, 3), round(3.1415926); #不指定位数,将一位都不保留
注意:ClickHouse 中的 round 还有一种特殊用法,那就是对整数四舍五入。
– 当指定为负数时,表示对整数或者小数点前面的进行四舍五入
– -1 表示针对最后一位,所以 round(222, -1) 得到的结果是 220,round(228, -1) 得到的结果是 230
SELECT round(222, -1), round(228, -1);
– -2 表示针对最后两位,所以 round(-350, -2) 得到的结果是 -400,round(349, -2) 得到的结果是 300
– 因为 50 达到了 100 的一半,49 没有达到 100 的一半
SELECT round(-350, -2), round(349, -2);
– -3 表示针对最后三位,所以 round(499, -3) 得到的结果是 0,round(500, -3) 得到的结果是 1000
– 因为 499 没有达到 1000 的一半,500 达到了 1000 的一半
SELECT round(499, -3), round(500, -3);
roundToExp2:将数值转为某个最接近的 2 的整数次幂,比如 roundToExp2(33) 得到的就是 32,因为 32 是 2 的 5 次幂;roundToExp2(31) 得到的就是 16,因为 16 是 2 的 4 次幂
SELECT roundToExp2(33), roundToExp2(31), roundToExp2(1), roundToExp2(-11);
roundAge:如果一个数值小于 18,返回其本身;否则将其转成 18、25、35、45、55 当中与之最接近的一个值,很明显这个函数是针对 Yandex 公司的业务而专门设计的
SELECT roundAge(15), roundAge(20), roundAge(29), roundAge(38), roundAge(1000);
roundDown:将一个数值四舍五入到某个数组中与之最接近的值,如果数值小于数组中的最小值,那么等于最小值
WITH [18, 25, 35, 45, 55] AS arr
SELECT roundDown(15, arr), roundDown(20, arr), roundDown(29, arr), roundDown(38, arr), roundDown(1000, arr)
SELECT rand32(), rand64();
SELECT rand32(), randConstant() FROM numbers(3);
SELECT char(97), char(97, 98, 99);
SELECT hex(97), hex(98), hex(99);
SELECT hex('abc'); # 十六进制:a -> 61, b -> 62, c->63
SELECT unhex('616263');
SELECT hostName();#getMacro:从服务器的宏配置中获取指定的值
<macros>
<name>XXX</name>
</macros>
然后即可通过 getMacro(name) 获取,当然也可以查看所有的宏。
SELECT * FROM system.macros;
SELECT fqdn();
SELECT '/root/girls/1.csv' file_path, basename(file_path) file_name;
SELECT visibleWidth(3.1415), visibleWidth('satori'), visibleWidth(Null);
SELECT visibleWidth([1,2,3,4,Null]), length('[1,2,3,4,Null]');
SELECT toTypeName([1, 2, 3]), toTypeName(123), toTypeName((11, '22'));
SELECT currentDatabase();
SELECT currentUser();
https://clickhouse.com/docs/en/sql-reference/functions
先来一个完整的例子,该示例根据行为日志计算用户访问的top路径
select data, count(1) cn from (
with maxIf( c_t , cat='page_view'and act='页面浏览') as max_time, -- 目标事件时间
arraySort(
e -> e.1,
arrayFilter(x->x.1<=toUInt64OrZero(max_time),groupArray((toUInt64OrZero(c_t), (cat,act) )))
) as sorted_array,
-- 按时间排序后的数据
-- arrayPushFront( sorted_array, sorted_array[1] ) as e_arr,
arrayFilter(
(i, e,z) -> z.1 < toUInt64OrZero(max_time)
and (e > 1800000 or (z.2.1='page_view' and z.2.2='页面浏览')),
arrayEnumerate(sorted_array), arrayDifference( sorted_array.1 ),sorted_array
) as arr_indx, -- 过滤目标事件、时间差后的数据
arrayReduce('max',arr_indx) +1 as smIndx,
arrayFilter(
(e,i) -> i>=smIndx and e.1<=toUInt64OrZero(max_time) ,
sorted_array, arrayEnumerate(sorted_array)
) as data_
select u_i,
arrayFilter((x,y,i)-> i=1 or i>1 and y<>0 ,data_.2,arrayDifference(arrayEnumerateDense(data_.2)),arrayEnumerate(data_)) as data__,
arraySlice(data__,-7,7 ) as data,
-- arrayStringConcat(data,'->') as path,
hasAll(data, [ ('page_view','页面_浏览') ]) as has_way_point --路径中必须经过的点
from app.scene_tracker where c_p='PC' and length(u_i)>20
group by u_i having length(data)>1
) tab
where has_way_point=1 group by data order by cn desc limit 100
示例1:
SELECT a, neighbor( a,-1 ) from (SELECT arrayJoin( [1,2,3,6,34,3,11] ) as a,'u' as b)
# 还是上面的例子
SELECT a, neighbor( a,-1 ) from (SELECT arrayJoin( [1,2,3,6,34,3,11] ) as a,'u' as b)
# 还是上面的例子 略作修改,可对比示例1和示例3的结果区别
SELECT a, neighbor( a,-1 ) from (SELECT arrayJoin( arraySort([1,2,3,6,34,3,11]) ) as a,'u' as b)
# 我们只获取数组中的偶数部分
SELECT a, neighbor( a,-1 ) from (SELECT arrayJoin( arraySort(arrayFilter(x->x%2=0, [1,2,3,6,34,3,11])) ) as a,'u' as b)
SELECT arrayEnumerate( [1,2,3,6,34,3,11] )
SELECT arrayDifference( [1,2,3,6,34,3,11] )
SELECT arrayReduce('avg', [1,2,3,6,34,3,11] )
SELECT arrayEnumerateDense( [1,2,3,6,34,3,11] )
SELECT arraySlice( [1,2,3,6,34,3,11] , -3, 2)
# 返回:34 3
SELECT hasAny( [1,2,3,6,34,3,11] , [3,1])
SELECT arrayStringConcat( [1,2,3,6,34,3,11] , '-')
SELECT arrayPushFront( [1,2,3,6,34,3,11] , 8)
SELECT arrayPopFront( [1,2,3,6,34,3,11] )
#生成长度为3 的数组
SELECT arrayWithConstant( 3, 'a')
#范围值为['a','a','a']
SELECT arrayUniq( [1,2,3,6,34,3,11])
select a,runningDifference(a) from (SELECT arrayJoin( [1,2,3,6,34,3,11] ) as a,'u' as b)
SELECT arrayCompact([1, 2, 2, 3, 2, 3, 3])
#返回值为 [1,2,3,2,3]