Postgresql分表与优化

--1、创建主表
CREATE TABLE tbl_partition
(
  date_key date,
  hour_key smallint,
  client_key integer,
  item_key integer,
  account integer,
  expense numeric
);

--2、创建多个分表。每个分区表必须继承自主表,并且正常情况下都不要为这些分区表添加任何新的列。
CREATE TABLE tbl_partition_2016_01() inherits (tbl_partition);
CREATE TABLE tbl_partition_2016_02() inherits (tbl_partition);
CREATE TABLE tbl_partition_2016_03() inherits (tbl_partition);
--CREATE TABLE tbl_partition_2016_04() inherits (tbl_partition);

--3、TODO为分区表添加限制。这些限制决定了该表所能允许保存的数据集范围。这里必须保证各个分区表之间的限制不能有重叠。
ALTER TABLE tbl_partition_2016_01
ADD CONSTRAINT tbl_partition_2016_01_check_date_key
CHECK (date_Key >= '2016-01-01'::date AND date_Key < '2016-02-01'::date);
ALTER TABLE tbl_partition_2016_02
ADD CONSTRAINT tbl_partition_2016_02_check_date_key
CHECK (date_Key >= '2016-02-01'::date AND date_Key < '2016-03-01'::date);
ALTER TABLE tbl_partition_2016_03
ADD CONSTRAINT tbl_partition_2016_03_check_date_key
CHECK (date_Key >= '2016-03-01'::date AND date_Key < '2016-04-01'::date);

--4、为每一个分区表,在主要的列上创建索引。该索引并不是严格必须创建的,但在大部分场景下,它都非常有用。
---CREATE INDEX tbl_partition_all_date_key ON tbl_partition_all (date_key,client_key);
CREATE INDEX tbl_partition_date_key_2016_01
ON tbl_partition_2016_01 (date_key,client_key);
CREATE INDEX tbl_partition_date_key_2016_02
ON tbl_partition_2016_02 (date_key,client_key);
CREATE INDEX tbl_partition_date_key_2016_03
ON tbl_partition_2016_03 (date_key,client_key);

--5、定义一个trigger或者rule把对主表的数据插入操作重定向到对应的分区表。
--创建分区函数
CREATE OR REPLACE FUNCTION tbl_partition_trigger()
  RETURNS TRIGGER AS $$
BEGIN
  IF NEW.date_key >= DATE '2016-01-01' AND NEW.date_Key < DATE '2016-02-01'
  THEN
    INSERT INTO tbl_partition_2016_01 VALUES (NEW.*);
  ELSIF NEW.date_key >= DATE '2016-02-01' AND NEW.date_Key < DATE '2016-03-01'
    THEN
      INSERT INTO tbl_partition_2016_02 VALUES (NEW.*);
  ELSIF NEW.date_key >= DATE '2016-03-01' AND NEW.date_Key < DATE '2016-04-01'
    THEN
      INSERT INTO tbl_partition_2016_03 VALUES (NEW.*);
  END IF;
  RETURN NULL;
END;
$$
LANGUAGE plpgsql;

--6、挂载分区Trigger
CREATE TRIGGER insert_tbl_partition_trigger
BEFORE INSERT ON tbl_partition
FOR EACH ROW EXECUTE PROCEDURE tbl_partition_trigger();

--7、创建全表
CREATE TABLE tbl_partition_all
(
  date_key date,
  hour_key smallint,
  client_key integer,
  item_key integer,
  account integer,
  expense numeric
);

--8、自动建表触发器
CREATE OR REPLACE FUNCTION tbl_partition_trigger()
  RETURNS TRIGGER AS $$
DECLARE month_text TEXT;
        this_month_first_day_text TEXT;
        next_month_first_day_text TEXT;
        insert_statement TEXT;
BEGIN
  SELECT to_char(NEW.date_key, 'YYYY_MM') INTO month_text;
  SELECT get_month_first_day(NEW.date_key) INTO this_month_first_day_text;
  SELECT to_char(to_date(this_month_first_day_text,'YYYY-MM-DD') + interval '1 month', 'YYYY-MM-DD') INTO next_month_first_day_text;
  insert_statement := 'INSERT INTO tbl_partition_'
                      || month_text||' VALUES ($1.*)';
  EXECUTE insert_statement USING NEW;
  RETURN NULL;
  EXCEPTION
  WHEN UNDEFINED_TABLE
    THEN
      EXECUTE
      'CREATE TABLE IF NOT EXISTS tbl_partition_'
      || month_text
      || '(CHECK (date_key >= '''
      || this_month_first_day_text
      || ''' and date_key<'''
      || next_month_first_day_text
      || ''')) INHERITS (tbl_partition)';
      RAISE NOTICE 'CREATE NON-EXISTANT TABLE tbl_partition_%', month_text;
      EXECUTE
      'CREATE INDEX tbl_partition_date_key_'
      || month_text
      || ' ON tbl_partition_'
      || month_text
      || '(date_key)';
      EXECUTE insert_statement USING NEW;
      RETURN NULL;
END;
$$
LANGUAGE plpgsql;

--get_month_first_day
CREATE OR REPLACE FUNCTION get_month_first_day(in in_date date,out out_date text)
AS $$
BEGIN
  SELECT to_char(in_date, 'YYYY_MM')||'_01' INTO out_date;
END;
$$
LANGUAGE plpgsql;

--插入数据脚本
INSERT INTO
    tbl_partition_all
select
    (select
        array_agg(i::date)
    from
        generate_series(
        '2015-12-01'::date,
        '2015-12-30'::date,
        '1 day'::interval) as t(i)
    )[floor(random()*4)+1] as date_key,
    floor(random()*24) as hour_key,
    floor(random()*1000000)+1 as client_key,
    floor(random()*100000)+1 as item_key,
    floor(random()*20)+1 as account,
    floor(random()*10000)+1 as expense
from
    generate_series(1,300000000,1);
    
INSERT INTO tbl_partition SELECT * FROM tbl_partition_all;


-------------------------------------------------主体结束-----------------------------------------
-------------------------------------------------主体结束-----------------------------------------

--使用rule创建分表
CREATE RULE tbl_partition_rule_2016_01 AS
ON INSERT TO tbl_partition
  WHERE
    date_key >= DATE '2016-01-01' AND date_Key < DATE '2016-02-01'
DO INSTEAD
  INSERT INTO tbl_partition_2016_01 VALUES (NEW.*);

--删除继承关系
ALTER TABLE tbl_partition_2016_01 NO INHERIT tbl_partition;

--查询对比测试
EXPLAIN  ANALYZE
select count(account) ,client_key  from  tbl_partition  v
where v.date_key >='2016-03-02'   and v.date_key <='2016-03-07' group by client_key ;

EXPLAIN  ANALYZE
select count(account) ,client_key  from  tbl_partition_all  v
where v.date_key >='2016-03-02'   and v.date_key <='2016-03-12' group by client_key ;

 

你可能感兴趣的:(Postgresql分表与优化)