其他:
sql语句的执行顺序:from-->join-->on-->where-->groupby-->having-->select-->orderby
sql语句的分类:DQL查询语句:select from where
DDL定义语句:create alter drop
DML操作语句:update delete insert
DCL控制语句:grant rollback commit
删除表:drop table t:整个表删除,包括表里的内容和表结构
truncate table t:删除表里的全部内容,但保留表结构
delete from table t where col = 'cc':删除满足条件的记录
表操作:1.创建:create table t() 主键:primary key 自增:anto_increment 非null:not null
2.插入:insert into table t values();
3.更新:update t set col = 'aaa' where col = '123'
排序:同时对两列进行排序:order by col1 desc,col2 asc
子查询:查询语句里的查询嵌套
组合查询:union语句
1.表order有三个字段,店铺ID,订单时间,订单金额,查询一个月内每周都有销量的店铺。weekofyear(day)函数
--每天属于一年中的哪周
with tmp_order as
(select
id
,weekofyear(dt) as dt_new
,amount
from order
where dt >= '2019-07-01'
and dt <= '2019-07-31')
--有5个不同周的店铺即为所求
select id, dt_new
from tmp_order
group by id, dt_new
having count(dt_new) = 5
2. 如何找出每一个级别下面购买最多的十个用户?求每个班级成绩排名前三的学生?row_number()over()窗口函数
--使用窗口函数
select
item
,tuid
from
(select
item
,tuid
,row_number() over(partition by item order by amount desc) as flag
from orders)
where flag <= 10
--不使用窗口函数
select
item
,tuid
from orders o1
where (select count(1) + 1 from orders where tuid = o1.tuid and amount > o1.amount) <= 3
3. 查找每个用户最近一次和最早登录APP的时间。first_value、last_value
select
tuid
,flag_first
,flag_last
from
(select
tuid,
,first_value() over(partition by tuid order by dt) as flag_first
,last_value() over(partition by tuid order by dt rows between unbounded preceding and unbuonded following) as flag_last
from orders)
group by tuid
--注意last_value() over()函数默认的是取截止当前日期的最新日期,即:
--last_value() over(partition by tuid order by rows between unbounded preceding and current row)
4. 拼多多笔试:
给一个表ord,包括user_id(用户id),goods_id(商品id),goods_num(购买商品数),ord_amt(购买金额),create_time(购买时间)。求最近30天每件商品的平均售价
select
goods_id
,sum(ord_amt) / sum(goods_num)
from ord
where datediff(from_unixtime(use_unixtime(), 'yyyy-MM-dd'), to_date(create_time)) <= 30
group by goods_id
5.拼多多笔试:
pv_log(用户浏览记录表):pv_id(页面id),user_id(用户id),create_time(访问时间)
dim_user(用户注册记录表):user_id(用户id),age(年龄),create_time(注册时间)
统计浏览不同页面数的用户对应的平均年龄
select
pv_id
,avg(age)
from
pv_log as a1
left join
dim_user as a2
on a1.user_id = a2.user_id
where a2.age is not null
group by pv_id
6.拼多多笔试:
tbl_ordr(用户订单表):user_id(用户id), ordr_id(订单号), ordr_goods(订单商品id), ordr_time(预定时间)
tbl_clk(用户商品点击明细表):clk_id(点击id), user_id, clk_time(点击时间), clk_goods(点击的商品id,和ordr_goods对应)
用户点击商品之后的订单算是这次点击产生的订单;多次点击后产生的订单,算订单创建前最后一次点击产生的订单,求有订单商品的点击及订单号
--假设表中的时间是yyyy-MM-dd HH-mm-ss的形式
--订单表先与商品点击明细表相关联,并初步去除无商品点击记录且产生订单的商品,不考虑时间先后,并以
--时间戳形式计算订单时间与点击时间的时间差并转换成秒
with tmp_ord_clk as(
select
a1.user_id
,a1.ordr_id
,a1.ordr_goods
,a1.ordr_time
,a2.clk_id
,a2.clk_time
,(unix_timestamp(a1.ordr_time) - unix_timestamp(a2.clk_time)) / 60 as flag
from
(tbl_ordr as a1
left join
tbl_clk as a2
on a1.user_id = a2.user_id
and a1.ordr_goods = a2.clk_goods)
where a2.clk_id is not null
and a2.clk_time is not null)
--取时间差大于零,且大于零中最小的就是所求
select
ordr_id
,clk_id
,min(flag)
from tmp_ord_clk
where flag > 0
group by ordr_id, clk_id
7.拼多多笔试
表1——订单表orders,大概字段有(user_id‘用户编号’, order_pay‘订单金额’ , order_time‘下单时间’)。
表2——活动报名表act_apply,大概字段有(act_id‘活动编号’, user_id‘报名用户’,act_time‘报名时间’)
要求:
1.
select
a2.act_id
,count(*) as order_num_total
,sum(a1.order_pay) as order_pay_total
from
(orders as a1
left join
act_apply as a2
on a1.user_id = a2.user_id)
where a1.order_time > a2.act_apply
group by a2.act_id
2.
--获得每个活动的开始的最早时间
with tmp_begin_date as(
select
act_id
,time_flag
from
(select
act_id
,first_value() over(partition by act_id order by act_time) as begin_time
from act_apply)
group by act_id)
--订单表与活动表对应起来
,tmp_order_act as
(select
a1.user_id
,a1.order_time
,a2.act_id
,a2.act_time
from
orders as a1
left join
act_apply as a2
on a1.user_id = a2.user_id)
--求活动开始至今的每天平均产生订单数
select
b2.act_id
,count(*)
from
tmp_order_act as b1
left join
tmp_begin_date as b2
on b1.act_id = b2.act_id
where b1.order_time > b2.begin_time
and b1.order_time <= from_unixtime(unix_timestamp(), 'yyyy-MM-dd HH:mm:ss')
group by b2.acid
8.拼多多笔试
表1——用户行为表tracking_log,大概字段有(user_id‘用户编号’,opr_id‘操作编号’,log_time‘操作时间’),时间dt分区(自己加的)
要求:
2.统计每天符合以下条件的用户数:A操作之后是B操作,AB操作必须相邻。
1.
select
to_date(log_time) as dt
,count(distinct user_id) as user_total
,count(*) / count(distinct user_id) as avg_opr
from tracking_log
group by to_date(log_time))
2.
select count(distinct user_id)
from
(select
user_id
,opr_id
,lead(opr_id, 1, 'none') over(partition by user_id order by log_time) as opr_flag
from tracking_log) as a1
where a1.opr_id = 'A'
and a1.opr_flag = 'B'
9.拼多多笔试
表1——用户登陆表user_log,大概字段有(user_id‘用户编号’,log_time‘登陆时间’)
要求:.每天新增用户数,以及他们第2天、30天的回访比例
--找到每天新增用户
with tmp_new_add as(
select
user_id
,min(log_time) as start_log_time
from user_log)
--新增用户每天登陆距离新增日期的时间差
,tmp_date_gap as
(select
user_id
,start_log_time
,case when datediff(log_time, start_log_time) = 1 then 1 else 0 end remain_1
,case when datediff(log_time, start_log_time) = 30 then 1 else 0 end remain_30
from
(user_log as a1
left join
tmp_new_add as a2
on a1.user_id = a2.user_id)
--计算每天新增用户数及次留、30留
select
start_log_time
,count(distinct user_id)
,sum(remain_1) / count(distinct user_id) as remain_1
,sum(remain_30) / count(distinct user_id) as remain_30
from tmp_date_gap
group by start_log_time