Greenplum或DeepGreen数据库查看表倾斜的方法总结(2)

根据上次写的博客:

http://blog.csdn.net/jiangshouzhuang/article/details/51792580

下面整理了一个更新详细的查看数据倾斜的函数,如下:

CREATE OR REPLACE FUNCTION my_create_func_for_extended_skew(
    out schema_name             varchar,
    out table_name              varchar,
    out total_size_GB           numeric(15,2),
    out seg_min_size_GB         numeric(15,2),
    out seg_max_size_GB         numeric(15,2),
    out seg_avg_size_GB         numeric(15,2),
    out seg_gap_min_max_percent numeric(6,2),
    out seg_gap_min_max_GB      numeric(15,2),
    out nb_empty_seg            int)
RETURNS SETOF record AS
$$
DECLARE
    v_function_name text := 'my_create_func_for_extended_skew';
    v_location int;
    v_sql text;
    v_db_oid text;
    v_num_segments numeric;
    v_skew_amount numeric;
    v_res record;
BEGIN
    --定义代码的位置,方便用来定位问题--
    v_location := 1000;
   
    --获取当前数据库的oid-- 
    SELECT oid INTO v_db_oid
    FROM pg_database
    WHERE datname = current_database();

    v_location := 2200;
    v_sql := 'DROP EXTERNAL TABLE IF EXISTS my_db_files_external';

    v_location := 2300;
    EXECUTE v_sql;

    v_location := 3000;
    v_sql := 'CREATE EXTERNAL WEB TABLE my_db_files_external ' ||
            '(segment_id int, relfilenode text, filename text, ' ||
            'size numeric) ' ||
            'execute E''ls -l $GP_SEG_DATADIR/base/' || v_db_oid ||
            ' | ' ||
            'grep gpadmin | ' ||
            E'awk {''''print ENVIRON["GP_SEGMENT_ID"] "\\t" $9 "\\t" ' ||
            'ENVIRON["GP_SEG_DATADIR"] "/' || v_db_oid ||
            E'/" $9 "\\t" $5''''}'' on all ' || 'format ''text''';

    /*
    select * from my_db_files_external;
    segment_id |   relfilenode    |                      filename                       |   size   
    ------------+------------------+-----------------------------------------------------+-----------
          1 | 10774            | /data1/primary/gpseg1/29786/10774             |     65536
          1 | 10776            | /data1/primary/gpseg1/29786/10776             |         0
          1 | 10778            | /data1/primary/gpseg1/29786/10778             |     32768
          1 | 10779            | /data1/primary/gpseg1/29786/10779             |     32768
          1 | 10781            | /data1/primary/gpseg1/29786/10781             |         0
          1 | 10783            | /data1/primary/gpseg1/29786/10783             |     32768
          1 | 10784            | /data1/primary/gpseg1/29786/10784             |     32768
          1 | 10786            | /data1/primary/gpseg1/29786/10786             |         0
          1 | 10788            | /data1/primary/gpseg1/29786/10788             |     32768
          1 | 10789            | /data1/primary/gpseg1/29786/10789             |     32768
    .......................   
    */
   
    v_location := 3100;
    EXECUTE v_sql;

    v_location := 4000;
    for v_res in (
                select  sub.vschema_name,
                        sub.vtable_name,
                        (sum(sub.size)/(1024^3))::numeric(15,2) AS vtotal_size_GB,
                        --获取Segments的最小,最大和平均大小-----        
                        (min(sub.size)/(1024^3))::numeric(15,2) as vseg_min_size_GB,
                        (max(sub.size)/(1024^3))::numeric(15,2) as vseg_max_size_GB,
                        (avg(sub.size)/(1024^3))::numeric(15,2) as vseg_avg_size_GB,
                        --最小的segment和最大的segment的间隙百分比--      
                        (100*(max(sub.size) - min(sub.size))/greatest(max(sub.size),1))::numeric(6,2) as vseg_gap_min_max_percent,
                        ((max(sub.size) - min(sub.size))/(1024^3))::numeric(15,2) as vseg_gap_min_max_GB,
                        count(sub.size) filter (where sub.size = 0) as vnb_empty_seg
                    from (
                        SELECT  n.nspname AS vschema_name,
                                c.relname AS vtable_name,
                                db.segment_id,
                                sum(db.size) AS size
                            FROM ONLY my_db_files_external db
                                JOIN pg_class c ON split_part(db.relfilenode, '.'::text, 1) = c.relfilenode::text
                                JOIN pg_namespace n ON c.relnamespace = n.oid
                            WHERE c.relkind = 'r'::"char"
                                and n.nspname not in ('pg_catalog','information_schema','gp_toolkit')
                                and not n.nspname like 'pg_temp%'
                            GROUP BY n.nspname, c.relname, db.segment_id
                           
                           
                            /*        
                            这一块子查询会列出每个表在所有segment上面的大小                 
                            schema_name |    table_name     | segment_id |    size   
                            --------------+--------------------+------------+------------
                             os           | ao_queue           |         36 |       1120
                             os           | ao_variables       |         30 |          0
                             os           | ao_schedule        |         12 |          0
                             os           | ao_ext_connection  |          3 |          0
                             os           | ao_ext_connection  |         27 |          0
                             os           | ao_schedule        |         20 |          0
                             os           | ao_schedule        |         36 |        208
                             ...................
                            
                             比如我们查看 my_bigtable 的情况:
                             SELECT  n.nspname AS schema_name,
                                c.relname AS table_name,
                                db.segment_id,
                                sum(db.size) AS size
                            FROM ONLY my_db_files_external db
                                JOIN pg_class c ON split_part(db.relfilenode, '.'::text, 1) = c.relfilenode::text
                                JOIN pg_namespace n ON c.relnamespace = n.oid
                            WHERE c.relkind = 'r'::"char"
                                and n.nspname not in ('pg_catalog','information_schema','gp_toolkit')
                                and not n.nspname like 'pg_temp%'
                                and c.relname = 'my_bigtable'
                            GROUP BY n.nspname, c.relname, db.segment_id;
                            
                            */
                           
                        ) sub
                    group by 1,2
                    --Extract only table bigger than 1 GB
                    --   and with a skew greater than 20%
                    /*having sum(sub.size)/(1024^3) > 1
                        and (100*(max(sub.size) - min(sub.size))/greatest(max(sub.size),1))::numeric(6,2) > 20
                    order by 1,2,3
                    limit 100*/
                ) loop
        schema_name             = v_res.vschema_name;
        table_name              = v_res.vtable_name;
        total_size_GB           = v_res.vtotal_size_GB;
        seg_min_size_GB         = v_res.vseg_min_size_GB;
        seg_max_size_GB         = v_res.vseg_max_size_GB;
        seg_avg_size_GB         = v_res.vseg_avg_size_GB;
        seg_gap_min_max_percent = v_res.vseg_gap_min_max_percent;
        seg_gap_min_max_GB      = v_res.vseg_gap_min_max_GB;
        nb_empty_seg            = v_res.vnb_empty_seg;
        return next;
    end loop;

    v_location := 4100;
    v_sql := 'DROP EXTERNAL TABLE IF EXISTS my_db_files_external';

    v_location := 4200;
    EXECUTE v_sql;

    return;
   
    EXCEPTION
        WHEN OTHERS THEN
            RAISE EXCEPTION '(%:%:%)', v_function_name, v_location, sqlerrm;
END;
$$
language plpgsql;

 

 

 

 

你可能感兴趣的:(Greenplum或DeepGreen数据库查看表倾斜的方法总结(2))