```sql
--一 日期计算和转换
--1.1 系统时间
SELECT
SYSDATE -- 2021-01-27 DATE
,SYSTIME -- 18:01:57
,SYSTIMESTAMP --2021-01-27 18:01:57 DATE
,current_date --2020-05-14 DATE
,current_timestamp --2020-05-14 10:26:57.613
FROM SYSTEM.DUAL;
--1.2 将不规则的字符串格式化成YYYY-MM-DD的字符串
SELECT
TDH_TODATE('20200101') --2020-01-01
,TDH_TODATE('2020/01/01') --2020-01-01
,date_format('2020-05-01 12:00:00','yyyy-MM-dd') --2020-05-01
,date_format('2020-05-01 12:00:00','yyyy-MM') --2020-05
FROM SYSTEM.DUAL;
--1.3 将字符串和日期相互转换
SELECT
-- STRING 转换为 DATE
DATE '20200101' -- 2020-01-01(DATE)
,DATE('2020-01-01') -- 2020-01-01(DATE)
,CAST('20200101' AS DATE) -- 2020-01-01(DATE)
,CAST('2020-01-01' AS DATE) -- 2020-01-01(DATE)
,CAST('2020-01-01 12:25:50' AS TIMESTAMP)-- 2020-01-01 12:25:50(DATE)
-- DATE转换成STRING
,TO_CHAR(SYSDATE,'YYYY-MM-DD') -- 2022-02-07
,TO_CHAR(SYSDATE,'YYYYMMDD') -- 20220207
,TO_CHAR(SYSDATE,'YYYY/MM/DD') -- 2022/02/07
FROM SYSTEM.DUAL;
--1.4 获取日期月初、月末、年初、年末
SELECT
TRUNC('2016-10-22','MM') --2016-10-01
,LAST_DAY('2016-10-22') --2016-10-31(DATE)
,TRUNC('2016-10-22','YYYY') --2016-01-01
,DATE_ADD(TRUNC(ADD_MONTHS('2016-10-22',12),'YYYY'),-1) --2016-12-31
FROM SYSTEM.DUAL LIMIT 1;
--1.5 截取年或月或天
SELECT
SYSTIMESTAMP --2022-02-09 11:13:15 DATE
,SYSDATE --2022-02-09 DATE
--方式一
,EXTRACT(YEAR FROM '2015-01-15') --2015 INT
,EXTRACT(DAY FROM '2015-01-15') --15 INT
,EXTRACT(MONTH FROM '2015-01-15') --1 INT
,EXTRACT(HOUR FROM '2022-08-20 02:30:58') --2 INT 12小时制
,EXTRACT(MINUTE FROM '2022-08-20 02:30:58') --30 INT
,EXTRACT(SECOND FROM '2022-08-20 02:30:58') --58 INT
,EXTRACT(YEAR FROM SYSDATE) --2022 INT
,EXTRACT(MONTH FROM SYSDATE) --2 INT
,EXTRACT(DAY FROM SYSDATE) --9 INT
,EXTRACT(HOUR FROM SYSTIMESTAMP) --11 INT
,EXTRACT(MINUTE FROM SYSTIMESTAMP) --13 INT
,EXTRACT(SECOND FROM SYSTIMESTAMP) --15 INT
--方式二
,dayofmonth('2020-05-06') --6 INT 查询该日期在本月的第多少天
,weekofyear('2020-05-14') --20 INT 日期所在年份的第多少周
,year('2020-05-14 12:00:00') --2020
,month('2020-05-14 12:00:00') --5
,day('2020-05-14 12:00:00') --14
,minute('2020-05-14 12:00:00') --0
,second('2020-05-14 12:00:00') --0
FROM SYSTEM.DUAL LIMIT 1;
--1.6 时间间隔写法
SELECT SYSDATE --2022-02-09 DATE
-- 间隔写法一(常用写法)
,DATE_ADD('2020-09-30', 1) --2020-10-01
,DATE_SUB('2020-09-30', 1) --2020-09-29
,ADD_MONTHS('2020-09-30',1) --2020-10-30 ***结果只是月份+1,并不是下月月底日期,想要下月月底加LAST_DAY()
,ADD_MONTHS('2020-09-30',-1) --2020-08-30
-- 间隔写法二
,SYSDATE + INTERVAL '2' YEAR --2024-02-09 DATE
,SYSDATE + INTERVAL '2' MONTH --2022-04-09 DATE
,SYSDATE + INTERVAL '2' DAY --2022-02-11 11:31:59 DATE
,SYSDATE + INTERVAL '2' HOUR --2022-02-09 13:31:59 DATE
,SYSDATE + INTERVAL '2' MINUTE --2022-02-09 11:33:59 DATE
,SYSDATE + INTERVAL '2' SECOND --2022-02-09 11:32:01 DATE
-- 间隔写法三
,SYSDATE + TO_YEAR_INTERVAL(2) --2024-02-09 DATE
,SYSDATE + TO_MONTH_INTERVAL(2) --2022-04-09 DATE
,SYSDATE + TO_DAY_INTERVAL(2) --2022-02-11 11:31:59 DATE
,SYSDATE + TO_HOUR_INTERVAL(2) --2022-02-09 13:31:59 DATE
,SYSDATE + TO_MINUTE_INTERVAL(2) --2022-02-09 11:33:59 DATE
,SYSDATE + TO_SECOND_INTERVAL(2) --2022-02-09 11:32:01 DATE
-- 间隔日期按照周末计算
,next_day('2022-08-01','Monday') --取该日期的下一个周几的日期
--星期一到星期日的英文(Monday,Tuesday、Wednesday、Thursday、Friday、Saturday、Sunday) 2022-08-08 00:00:00
-- 日期相减
,datediff('2020-05-01','2020-05-05') --4
FROM SYSTEM.DUAL;
--1.7天数转换成日期
DATE_ADD('1899-12-31',CAST(T.STOP_DATE AS INT))
--二 存储测试、函数创建
--2.1 测验hive存储的语句以及循环调用
DECLARE
next_pim_day String;
a INT;
b INT;
c INT := 0;
BEGIN
SELECT date_add(max(txdate),1) into next_pim_day FROM CDP_t_pim_info;
--输出变量,测验结果
DBMS_OUTPUT.PUT_LINE(next_pim_day)
--循环执行方式一
a := 10;
b := 1;
WHILE b < a
LOOP
DBMS_OUTPUT.PUT_LINE(a || '参与循环 ' || b);
b := b + 1;
DBMS_OUTPUT.PUT_LINE(b );
END LOOP;
DBMS_OUTPUT.PUT_LINE(a || '不参与循环 ' ||b);
--循环执行方式二
LOOP
c := c +1;
DBMS_OUTPUT.PUT_LINE(c || '参与循环 ');
EXIT WHEN c=10;
END LOOP;
END ;
--2.2游标
DECLARE
transactions_type sjzt_jxcx_req_result%ROWTYPE;
CURSOR cur IS SELECT distinct req_code,model_type FROM sjzt_jxcx_req_result;
BEGIN
FOR transactions_type IN cur
LOOP
DBMS_OUTPUT.PUT_LINE(transactions_type.req_code || ' ' || transactions_type.model_type)
END LOOP;
END;
--2.3.1 hive自定义函数
create or replace function FUN_NL(iden_no in STRING) return STRING is
res STRING;
-----通过身份证号获取年龄
begin
select case
when length(trim(iden_no)) = 18 then
to_char(sysdate,'yyyy') - substr(trim(iden_no), 7, 4)
else
'99'
end as brnl
into res
from SYSTEM.DUAL t;
return(res);
end FUN_NL;
--SELECT FUN_NL('123') FROM SYSTEM.DUAL ; --调用
--2.3.2 调用
DECLARE
v1 STRING
BEGIN
v1 := FUN_NL('1236')
dbms_output.put_line(v1)
end ;
--三 存储中常用常用语法语句
--1.0 匹配
-- 正则匹配字母
v_sql := 'DELETE FROM bc_pmctl_user_lsb_instr WHERE length(user_id) <> 11 or user_id rLIKE \'[a-z,A-Z]+\';' -- +表示一个到多个; *表示0到多个; ?表示0或者1个;
DBMS_OUTPUT.PUT_LINE(v_sql);
-- 替换特殊字符的函数 其中 chr(a int)函数根据传进来的参数,返回对应ASCII的字符类型
TRANSLATE(TRANSLATE(A.PARTY_NAME,'?@!^┦∫~',''),chr(13)||chr(10),','); -- 10->换行键 13->回车键
SELECT TRANSLATE('What is this','ait','-*@') FROM system.dual; --Wh-@ *s @h*s
TRANSLATE(a,b,c) 将a中出现的b,将b中各个字符替换成c中位置顺序与其相同的c中的字符;实质上是过滤
--split 的用法转义字符
select '603115018200475408|45456454564',SPLIT('603115018200475408|45456454564','\\\|')[1] from system.dual limit 200 ;
--2.0 动态sql拼接变量的方法 单引号可以使用'\'转义
--将日期格式由monthUp_last_day名称'monthUp_last_day'方便拼接sql; 拼接动态sql
monthUp_last_day := '\''||monthUp_last_day||'\'';
在存储中可以将删除sql语句直接放在存储中,不用拼接;
一般动态sql适用于:动态表名或者分区的拼接;
--3.1.1 行转列示例
SELECT name,b FROM XJ02 LATERAL VIEW explode(split(old_org_id,',')) a AS b; -- 侧写视图和炸裂函数
--3.1.2 行转列时存在空值自定义赋值
SELECT
req_code,
max(CASE WHEN query_check = 'paper_no_all' THEN query_check_content ELSE NULL end) paper_no_all,
nvl(max(CASE WHEN query_check = 'date_start' THEN query_check_content ELSE NULL end),'1900-01-01') date_start,
nvl(max(CASE WHEN query_check = 'date_end' THEN query_check_content ELSE NULL end),substr(SYSDATE,1,10)) date_end
FROM sjzt_jxcx_req_result_orc WHERE model_type = '3-1' GROUP BY req_code;
--3.2 列转行示例
SELECT
concat_ws('|',collect_set(trim(二分)) )AS 二分,
sum(loan_bal)AS 贷款余额,
REPAY_ACC_ACC_NO
FROM dhfx_n_hch_nj_acct_2 a --where FUN_LOANTYPE_new (prodt_no)<>'3'
GROUP BY REPAY_ACC_ACC_NO ;
--4 条件判断
SELECT
IF(1!=1,'trans','age'),--age
COALESCE(NULL,NULL,1), -- 返回第一个非null值 1
CASE 3 WHEN 2 THEN 'a' when 1 THEN 'b' ELSE 'c' END --c
FROM system.dual LIMIT 1;
--5 在系统默认表中hive生成统一ID、转换字段类型示例
select
regexp_replace(reflect("java.util.UUID", "randomUUID"), "-", ""), --6525f793dc1b476ca5d01279a2e3bd5e
CAST('2015-01-01' AS DATE),--2015-01-01 date
CAST(1.78 AS DOUBLE),--1.78 number
CAST(123456789.3456 AS DECIMAL(11,2))--123,456,789.35 number
from system.dual;
-- 多表关联的方式,不建议使用
with dsf as
(
语句一
),
fx as(
语句二
),
...
,
yl as(
语句三
)
select
*
from on_zong_body t
left join zxbg_zszb_jy zx on t.bus_id=zx.bus_id and t.certificate_code=zx.certno --本人征信指标.
left join dsf s on s.bus_id=t.bus_id
left join on_zong_send se on se.bus_id=t.bus_id
where dh.line_id is not null
order by t.createtime desc;
--四 存储中书写注意事项
--存储书写注意事项
正确写法: v_sql := ;
错误写法:v_sql : = ;
-- not in 使用注意事项 如果判断字段存在空值则会失效,使用如下写法不会失效
select * from cdp_t_emp_loan_use_middle_temp1 WHERE rela_type not IN ('01','02','03') OR rela_type IS null;
--五 优化,排错
--六 进程相关
--七 DDL语句
-- 跑数reduce数量不够
SET mapred.reduce.tasks=500;
-- 修改表中的字段sql
alter table JWCX_FK_ZKHCXLC change column qqdbs qqdbs string COMMENT '请求单标识';
-- 添加字段
ALTER TABLE base_card_fk ADD COLUMNS (txdate DATE DEFAULT NULL COMMENT '数据日期');
--21.144.1.100 21.144.1.102 表赋权
GRANT all ON TABLE cdp.cdp_t_emp_loan_cx_gcb1 TO user cdp;
## 创建表的相关操作
--ORC表
clustered by (rh_org) into 101 buckets stored as orc tblproperties ("transactional"="true");
--external文本表
row format delimited fields terminated by '!';
----原样复制表结构
CREATE TABLE T_CDP_EMP_LABEL LIKE cdp_t_emp_label [row format delimited fields terminated by '!'];
--ES映射表 创表时末尾添加 字段增加分词器 设置查询模式 查询语句示例(模糊和精确查询)
1.STORED AS ES WITH SHARD NUMBER 10 REPLICATION 1;
2.cust_base_khdz string append analyzer 'ZH' 'ik'
3.SET ngmr.exec.mode=LOCAL;
4.SELECT * FROM cpd_t_cust_label_es WHERE CONTAINS(cust_base_khdz,"'江苏省苏州市相城区*'")
AND cust_base_sex='男' AND cust_base_age >'5' AND new_org_id= '32007708' LIMIT 10;
--hbase 映射表
STORED AS HYPERDRIVE
tblproperties(
'COMPRESSION'='SNAPPY',
'hyperdrive.table.splitkey'='"1","2","3","4","5","6","7","8","9","a","b","c","d","e","f"'
);
--hbase映射表中 es同步全文索引及查询
CREATE FULLTEXT INDEX ON cpd_t_cust_label_hd
(cust_base_khdz,cust_base_name,cust_base_phone)SHARD NUM 10;
SELECT * FROM cpd_t_cust_label_hd WHERE CONTAINS(cust_base_khdz,"prefix '江苏省'");
SELECT * FROM cpd_t_cust_label_hd WHERE CONTAINS(cust_base_khdz,"wildcard '*苏州市*'");
--hbase映射表中 hbase二级索引
CREATE GLOBAL INDEX cpd_t_cust_label_hd_cust_base_zzhm_new_org_id ON cpd_t_cust_label_hd
(cust_base_zzhm(18),new_org_id(8));
--创建分区表
DROP TABLE T_CDM_DWD_CUST_INFO_TQ;
CREATE TABLE IF NOT EXISTS T_CDM_DWD_CUST_INFO_TQ (
cs_field1 varchar(1) DEFAULT NULL COMMENT 'varchar(1)测试字段'
,cs_field2 varchar(8) DEFAULT NULL COMMENT 'varchar(8)测试字段'
,cs_field3 varchar(20) DEFAULT NULL COMMENT 'varchar(20)测试字段'
,cs_field4 varchar(60) DEFAULT NULL COMMENT 'varchar(60)测试字段'
,cs_field5 string DEFAULT NULL COMMENT 'string测试字段'
,cs_field6 int DEFAULT NULL COMMENT 'int测试字段'
,cs_field7 decimal(10,2) DEFAULT NULL COMMENT 'decimal(10,2)测试字段'
,cs_field8 float DEFAULT NULL COMMENT 'float测试字段'
,cs_field9 char(1) DEFAULT NULL COMMENT 'char(1)测试字段'
) COMMENT '数据接口测试多字段大数据量测试表'
PARTITIONED BY (bds_etl_job_dt date)
row format delimited fields terminated by '`';
INSERT INTO cdp.TB_BUSINESS_UPDATEPHONE PARTITION (bds_etl_job_dt = '2021-12-01')
select bankid
,cardno
,cnname
,didno
,updatephone
,updatedate
,bds_etl_job_dt
FROM T_ZP_TB_BUSINESS_UPDATEPHONE;
如果A表 join B表,同时右表存在一对多的关系,结果的条数会比预想的多
```
SHOW PLSQL FUNCTIONS db_name;
DESC PLSQL FUNCTION EXTENDED SPLIT;
SHOW PLSQL PACKAGES db_name;
DESC PLSQL PACKAGE EXTENDED SPLIT; eg: DESC PLSQL PACKAGE EXTENDED CDP_PRO_prod_gjhk;
CREATE (OR REPLACE) FUNCTION/PROCEDURE/PACKAGE BODY
DROP PLSQL FUNCTION/PROCEDURE/PACKAGE
CREATE DATABASE LINK demo02hive using 'jdbc:hive://IP:端口号/default';
CREATE DATABASE LINK hive119 CONNECT TO hive IDENTIFIED BY '123' USING 'jdbc:hive2://IP:端口号/default';
CREATE DATABASE LINK zxb_20210305 CONNECT TO dev IDENTIFIED BY '密码' USING 'jdbc:oracle:thin:@IP:端口号/cpddpdb';
CREATE DATABASE LINK mysql_bob_118 CONNECT TO bob IDENTIFIED BY '123456' USING 'jdbc:mysql://IP:3306/test';
CREATE DATABASE LINK data_center CONNECT TO data_center IDENTIFIED BY '密码' USING 'jdbc:postgresql://IP:端口号/data_center'
SELECT * FROM public.a_work_bug@data_center LIMIT 10;
SHOW DATABASE LINKS
DESC DATABASE LINK app_mpm
DROP DATABASE LINK zxb_20210305;
SHOW DATABASES;
desc CDP_t_kjzf_info;
SHOW TABLES [in 数据库名] LIKE '*BASE*';
查询表的最新加载日期
SELECT max(last_txdate) INTO c1_date FROM etl_job@mysql_jspsbc t
where t.etl_job = 'STD_LGF_PK_FIX_LEG' AND last_jobstatus='Done'
beeline -u 'jdbc:hive2://IP:端口号' -n app_data -p 密码
beeline -u 'jdbc:hive2://IP:端口号' -n cdp -p 密码
beeline -u 'jdbc:hive2://IP:端口号' -n mpm -p 密码
beeline -u jdbc:hive2:
hdfs dfs -rm -R hdfs:
hadoop fs -scp root@IP:hdfs:
hadoop distcp -update -async -skipcrccheck hdfs:
hive