创建数据库
CREATE DATABASE db_name [[WITH] OWNER [=] user_name];
删除数据库
DROP DATABASE [IF EXISTS] db_name;
创建数据表
CREATE TABLE [IF EXISTS]
[schema_name.]table_name
([{column_name column_type [column_constraints, [...]]
| table_contraints
[, ...]
}]);
删除表
DROP TABLE [IF EXISTS] table_name [, ...];
修改表
仅支持重命名表(RENAME TABLE)和增加列(ADD CPLUMN)。对于外部表(foreign table)没有限制
-- 重命名表
ALTER TABLE table_name RENAME to new_table_name;
-- 增加列
ALTER TABLE IF EXISTS table_name ADD COLUMN new_column_name data_type;
增加注释
Holo支持给表,外表,列等增加注释的功能
-- 给表增加注释
COMMENT ON TABLE table_name IS 'my comments on table table_name';
-- 给列增加注释
COMMENT ON COLUMN table_name.col1 IS 'This my first col1';
-- 给外部表增加注释
COMMENT ON FOREIGN TABLE foreign_table IS 'comment on my foreign table';
模式(SCHEMA)
-- 查看当前schema
SELECT current_schema();
-- 创建新的schema
CREATE SCHEMA my_schema;
-- 跨schema建表
CREATE TABLE my_schema.mytest (name text, id int, age int);
-- 原schema下的表将全部转到新schema下
ALTER SCHEMA oldschema rename to newschema;
分区表
PostgreSQL分区表用于将一张大表分成同构的若干张子表,利于加速查询、方便管理
-- 创建分区表
CREATE TABLE [IF NOT EXISTS] [schema_name.]table_name PARTITION OF parent_table FOR VALUES IN (string_literal);
-- 分离目标表的指定分区
ALTER TABLE [IF EXISTS] table_name DETACH PARTITION partition_name;
-- 删除分区表
DROP TABLE table_name;
外部表
指不存储于Hologres中的表。Hologres与大数据生态无缝打通,可对外部表直接加速查询,也可以将外部表的数据导入到Hologres中进行数据处理
目前Holo只支持MaxCompute中的表作为外部表
-- 新建外部表
CREATE FOREIGN TABLE src_pt_odps(key text)
server odps_server options(project_name 'odps_project', table_name 'test');
-- 删除外部表
DROP FOREIGN TABLE [IF EXISTS] table_name [, ...]
[ CASCADE | RESTRICT];
-- 查看外部表
SELECT * FROM table_name;
** CREATE CAST 用于定义数据类型之间的转换**
CREATE CAST(source_type AS target_type)
WITH INOUT
[ AS ASSIGNMENT | AS IMPLICIT]
注:
source_type:该转换的源数据类型
targer_type:该转换的目标数据类型
示例:创建数据类型之间的转换
CREATE CAST (text AS integer) WITH INOUT AS IMPLICIT;
** DROP CAST语句用于删除已定义的数据类型转换
DROP CAST [IF EXISTS] (source_type AS targer_type)
示例:
DROP CAST IF EXISTS (text AS timestamptz);
DROP CAST IF EXISTS (text AS integer);
创建视图
CREATE [TEMP | TEMPORARY] VIEW
view_name AS
SELECT column1, column2 ...
FROM table_name
WHERE [condition];
-- 创建内部表视图
create view view1 as select * from t1_foreign;
-- 创建内部表及外部表的联合视图
create view view2 as select * from t2_holo
union all
select * from t1_foreign;
删除视图
DROP VIEW ;
插入
INSERT INTO table[(column [,...])] VALUES ({expression} [,...]) [, ...] | query
在Hologres中,INSERT支持两种形式
1.插入确定的value
INSERT INTO rh_holo2 (cate_id, cate_name) VALUES
(3, 't1'),
(3, 'f1'),
(3, 'trxxue'),
(3, 'x'),
(4, 'sajojsaio');
2.插入select的结果
INSERT INTO test2
SELECT * FROM test1;
DELETE:对表指定列的行数据进行删除
DELETE FROM table_name [*]
[ [AS] alias] [WHERE condition]
alias:别名,目标表的替代名称
condition:删除的条件
用法举例:
DELETE FROM delete_test AS dt WHERE dt.a = 10;
DELETE FROM delete_test AS dt WHERE dt.b is null;
DELETE FROM delete+test AS dt WHERE dt.b = "";
UPDATE:对表指定列的行数据进行更新
UPDATE table [*] [ [AS] alias]
SET column = {expression}
[ FROM from_list] [WHERE condition]
alias:别名,目标表的代称
expression:表达式
condition:更新条件
举例:
UPDATE update_test set b = b + 10 where a = 'b1';
UPDATE update_test set c = 'new_' || a, d = null
where b = 20;
UPDATE update_test set (b,c,d) = (1,"test_c","d");
SELECT查询语法
[WITH with_query [, ...]]
SELECT [ALL | DISTINCT [ ON (expression [, ...])]]
* | expression [[AS] output_name] [, ...]
[FROM from_item [, ...]]
[WHERE condition]
[GROUP BY grouping_element [, ...]]
[HAVING condition [, ...]]
[{UNION | INTERSECT | EXCEPT} [ALL] SELECT]
[ORDER BY expression [ASC | DESC | USING operator] [, ...]]
[LIMIT {count | ALL}]
DISTINCT:取出重复行,只保留一行
FROM:为SELECT指定一个或更多源表
WHERE:展示condition指定的内容
GROUP BY:按照指定的表达式分组
HAVING:过滤
UNION:SELECT语句所返回的行的并集
INTERSECT:select语句返回的行的交集
EXCEPT:计算位于左SELECT语句的结果中但不在右SELECT语句结果中的行集合
ORDER BY:按照指定的表达式排序
LIMIT:count指定要返回的最大行数
示例:
select * from sale_detail where shop_name like 'hang%';
从sale_detail表中查询所有带hang的店铺
select region from sale_detail group by region;
从sale_detail表中查询region信息并以region进行分组
select * from sale_detail order by region limit 100;
从sale_detail表中查询region信息并以region排序数据100行
UNION并集
求两个数据集的并集。即,将两个数据集合并成一个数据集
未去重:
SELECT * FROM VALUES (1, 2), (1, 2), (3, 4) t(a, b)
UNION ALL
SELECT * FROM VALUES (1, 2), (3, 4) t(a, b);
去重:
SELECT * FROM VALUES (1, 2), (1, 2), (3, 4) t(a, b)
UNION
SELECT * FROM VALUES (1, 2), (3, 4) t(a, b);
INTERSECT交集
求两个数据集的交集。即,数据两个数据集均包含的记录
未去重:
SELECT * FROM VALUES (1, 2), (1, 2), (3, 4) t(a, b)
INTERSECT ALL
SELECT * FROM VALUES (1, 2), (1, 2), (7, 8) t(a, b);
去重:
SELECT * FROM VALUES (1, 2), (1, 2), (3, 4) t(a, b)
INTERSECT
SELECT * FROM VALUES (1, 2), (1, 2), (7, 8) t(a, b);
EXCEPT补集
求第二个数据集在第一个数据集中的补集。即,输出第一个数据集包含而第二个数据集不包含的记录
未去重:
SELECT * FROM VALUES (1, 2), (1, 2), (3, 4) t(a, b)
EXCEPT ALL
SELECT * FROM VALUES (1, 2), (1, 2), (7, 8) t(a, b);
去重:
SELECT * FROM VALUES (1, 2), (1, 2), (3, 4) t(a, b)
EXCEPT
SELECT * FROM VALUES (1, 2), (1, 2), (7, 8) t(a, b);
INSERT ON CONFLICT语句用于在指定列插入某行数据时,如果主键存在重复的行数据,则对该数据执行更新或跳过操作
应用场景:适用于SQL方式导入数据的场景
create table conflict_2(
a int not null primary key,
b int,
c int
);
insert into conflict_2 values(1,5,6);
insert into conflict_1 select * from conflict_2 on conflict(a) do update set b = excluded.b; //主键相同时,将表conflict_2的列数据更新到conflict_1中
insert into conflict_1 values(2,7,8) on conflict(a) do update set b = excluded.b, c = excluded,c
where conflict_1.c = 4; //主键值相同时,将表conflict_2的某一行数据全部插入至表conflict_1中
insert into conflict_1 select * from conflict_2 on conflict(a) do nothing; //主键相同时,跳过表conflict_2的数据
insert into conflict_1 select * from conflict_2 on conflict do nothing; //do nothing不指定冲突列时,默认冲突列为主键
TRUNCATE语句用于清空目标表
使用限制:
TRUNCATE [TABLE] name [, ...] [CONTINUE IDENTITY | RESTART IDENTITY]
# 默认为CONTINUE IDENTITY
call set_table_property('table_name', 'orientation', '[column | row]');
call set_table_property('table_name', 'clustering_key', '[columnName{: [desc | asc]} [, ...]]');
call set_table_property('table_name', 'segment_key', '[columnName [, ...]]');
call set_table_property('table_name', 'bitmap_columns', '[columnName [, ...]]');
call set_table_property('table_name', 'dictionary_encoding_columns', '[columnName [,...]]');
call set_table_property('table_name', 'time_to_live_in_seconds', '');
call set_table_property('table_name', 'distribution_key', '[columnName [, ...]]');
# 按行存
begin;
create table tb1 (a int not null, b text not null, primary key(a));
call set_table_property("tb1", "orientation", "row");
commit;
# 按列存
begin;
create table tb1 (a int not null, b text not null, primary key(a));
call set_table_property("tb1", "orientation", "column");
commit;
begin;
create table tb1 (a int not null, b text not null);
call set_table_property('tb1', 'clustering_key', 'a');
commit;
begin;
create table tb1 (a int not null, ts timestamp not null);
call set_table_property('tb1', 'segment_key', 'ts');
commit;
begin;
create table tb1 (a int not null, b text not null);
call set_table_property('tb1', 'bitmap_columns', 'a');
commit;
begin;
create table tb1 (a int not null, b text not null);
call set_table_property('tb1', 'dictionary_encoding_columns', 'b');
commit;
begin;
create table tb1 (a int, b int, c int);
call set_table_property('tb1', 'distribution_key', 'a');
commit;
begin;
create table tb2 (a int, b int, c int);
call set_table_property('tb1', 'distribution_key', 'b');
commit;
select count(1) from tb1 join tb2 on tb1.a = tb2.b; -- 将分布列设置为join key
begin;
create table tb1 (a int not null, b text not null);
call set_table_property('tb1', 'time_to_live_in_seconds', '3.1415926');
commit;
用过explain查看执行计划
执行analyze,生成统计信息