sql用于特征工程-阿里智联招聘天池比赛复赛部分代码

阿里智联招聘天池比赛部分代码 --阿里复赛平台


--onthot

select feature, row_number() over(order by t.feature desc) as desire_jd_1p_coss_pairs_label from (
select DISTINCT desire_jd_1p_coss_pairs as feature
from ${t1} ) t;

select t1.*,
t2.city_coss_pairs_label,
from ${t1} t1
left join ${t2} t2 on t1.city_coss_pairs = t2.feature


select distinct user_id from ${t1}

-- t2.`(jd_no)?+.+` from ${t1} t1 left join ${t2} t2 on t1.jd_no = t2.jd_no
select * 
from ${t1} t1 where t1.user_id in (select t2.user_id from ${t2} t2)

select * 
from ${t1} t1 where t1.user_id in (select t2.user_id from ${t2} t2)


select count(*) from ${t1}

--case A when 'a' then 1 when ..then.. else .. end)as ..
select age,
(case sex when 'male' then 1 else 0 end)as sex,    (case  when sex='male' then 1 else 0 end
 end)
(case cp when 'angina' then 0 when。 ) as  cp
from $(t1)
--LIMIT 5;
--select sex count(*),avg(新名称) from .. group by sex计数
where col2 IS (not) null

--更新
 tips.loc[tips['tip'] < 2, 'tip'] *= 2
 UPDATE tips SET tip = tip*2 WHERE tip < 2

 --筛选
 DELETE FROM tips WHERE tip > 9;

 --ORDER BY tip DESC
 SELECT * FROM tips ORDER BY tip DESC LIMIT 10 OFFSET 5;

 --排序分组 ROW_NUMBER():PARTITION BY分组   ORDER BY
SELECT * FROM (
  SELECT
    t.*,
    ROW_NUMBER() OVER(PARTITION BY day ORDER BY total_bill DESC) AS rn
  FROM tips t
)
WHERE rn < 3
ORDER BY day, rn;

--表联合concat':ROW_NUMBER()
SELECT city, rank FROM df1 UNION ALL SELECT city, rank  FROM df2

--表连接:RIGHT JOIN/LEFT/INNER
SELECT *
FROM df1
FULL OUTER JOIN df2
  ON df1.key = df2.key;
--when
SELECT 
case 
WHEN class='largeDoses' then 0
when class='smallDoses' then 1
when class='didntLike' then 2
end as class
,mileage,play,ice_cream

from ${t1}
--排序
Select ROW_NUMBER() OVER(partition by concat(user_id,jd_no) order by concat(satisfied,delivered,browsed) DESC) AS rn from table1 --分区后排序作为新的列

M=Select  *,rank()over(partition by subjects order by score desc) as [rank] from #Student
查找出每门学科第一的学生:
WITH A AS(M)SELECT * FROM A WHERE [rank]=1

-- --labelencoder吗
select feature, row_number() over(order by t.feature desc) as desire_jd_industry_label from (
select DISTINCT desire_jd_industry_id as feature
from ${t1}) t  

--表合并
select t1.*, t3.desire_jd_industry_label,
t4.cur_industry_label 
from 
${t1} t1 
left JOIN 
${t3} t3 on t1.desire_jd_industry_id = t3.feature
left JOIN 
${t4} t4 on t1.cur_industry_id = t4.feature

--
select * 
from ${t1} t
where desire_jd_industry_id is not null;


--
select t1.`(desire_jd_industry_id)?+.+` , t2.feature as desire_jd_industry_id
from 
${t1} t1
left JOIN 
${t2} t2 
on t1.prediction_result = t2.desire_jd_industry_label

--0827
文档
6、在使用 ORDER BY 排序时,NULL 会被认为比任何值都小,这个行为与 MYSQL 一致,但是
与 ORACLE 不一致。ORDER BY 后面必须加 LIMIT。ORDER BY 后面必须加列的别名,当 SELECT
某列时,如果没有指定列的别名,将列名作为列的别名。
7、[LIMIT number]的 number 是常数,限制输出行数。当使用无 LIMIT 的 SELECT 语句直接
从屏幕输出查看结果时,最多只输出 1000 行。每个项目空间的这个屏显最大限制限制可能
不同,可以通过控制台面板控制。
8、SORT BY 前必须加 DISTRIBUTE BY;
9、 ORDER BY 不和 DISTRIBUTE BY/SORT BY 共用,同时 GROUP BY 也不和 DISTRIBUTE BY/SORT
BY 共用;


 、 MAPJOIN HINT

 --0827,18.00

select * from ${t1} where cluster_index=10
select node1 from ${t1} where grp_id='Noah';

--排序
CREATE TABLE clean_zhaopin_round2_action_test AS
select t.user_id, t.jd_no, 0 as satisfied, 0 as delivered, 0 as browsed from 
( (用于新建表)SELECT concat(user_id,jd_no) as p_key,
user_id, jd_no,
ROW_NUMBER() OVER(partition by concat(user_id,jd_no) order by jd_no) AS rn  
from zhaopin_round2_action_test) t where t.rn=1;


--0828
--类型转换
select cast('2019-02-16 00:00:02' AS datetime) > '2019-02-16 00:00:01';
--DOUBLE值存在一定的精度差,因此,不建议您直接使用等号对两个DOUBLE类型的数据进行比较

注释
ALTER TABLE table_name SET COMMENT 'tbl comment';

--列操作
ALTER TABLE table_name ADD COLUMNS (col_name1 type1 comment 'XXX',col_name2 type2 comment 'XXX');

ALTER TABLE table_name CHANGE COLUMN old_col_name RENAME TO new_col_name; --改列名
ALTER TABLE table_name CHANGE COLUMN col_name COMMENT comment_string; --注释
ALTER TABLE table_name CHANGE COLUMN old_col_name new_col_name column_type COMMENT column_comment;  --同时修改列名及列注释

--SELECT_EXPR正则表达式:sql正则:https://blog.csdn.net/zjx2016/article/details/80784449

order by不和distribute by/sort by共用,同时group by也不和distribute by/sort by共用,必须使用select的输出列别名。

--语序
order by不和distribute by/sort by共用,同时group by也不和distribute by/sort by共用,必须使用select的输出列别名。

 

你可能感兴趣的:(编程,数据库)