https://blog.csdn.net/moguxiansheng1106/article/details/44258499
SELECT SUM(CASE
WHEN sat_time = '好评' THEN 1
ELSE 0
END)/COUNT(sat_time) AS "好评率"
FROM a JOIN b ON a.good_id = b.good_id
WHERE user_name = '小明' AND sub_time BETWEEN ('2017-1-1') AND ('2017-1-31') AND b.bu_name = '母婴' AND b.brand_name ='花王'
(1)
select * from
(select b.brand_name, a.logday,row_number() OVER (PARTITION BY b.brand_name ORDER BY a.sale_amt desc) as num
from a join b on a.sku_id = b.sku_id
where year(a.logday)='2017' and b.user_name = '小明')c
where c.num <= 5
group by b.brand_name
(2)两表合并c->按品牌和时间分组,计算对应的销售额->前一天表和当天表按品牌合并->计算每个品牌在每一天的增长率->(未实现)
https://bbs.csdn.net/topics/392421965?page=1
create table a(
SKU_ID INT NOT NULL,
logday datetime NOT NULL,
sale_amt INT NOT NULL);
insert into a values (1,'2019-06-01',100);
insert into a values (2,'2019-06-01',200);
insert into a values (1,'2019-06-01',100);
insert into a values (4,'2019-06-01',300);
insert into a values (1,'2019-06-02',200);
insert into a values (2,'2019-06-02',150);
insert into a values (3,'2019-06-02',200);
insert into a values (4,'2019-06-02',100);
insert into a values (1,'2019-06-03',400);
insert into a values (3,'2019-06-03',80);
insert into a values (4,'2019-06-03',190);
insert into a values (4,'2019-06-04',390);
insert into a values (3,'2019-06-04',150);
insert into a values (1,'2019-06-05',150);
insert into a values (2,'2019-06-05',300);
insert into a values (3,'2019-06-05',200);
insert into a values (4,'2019-06-05',180);
insert into a values (2,'2019-06-05',300);
insert into a values (2,'2019-06-06',250);
insert into a values (2,'2019-06-06',350);
create table b(
SKU_ID INT NOT NULL,
brand_name varchar(12) NOT NULL,
user_name varchar(12) NOT NULL
);
insert into b values (1,'A','xm');
insert into b values (2,'A','xm');
insert into b values (3,'B','xm');
insert into b values (4,'B','xm');
with cte_1
as
(
select A.logday,B.brand_name, sum(A.sale_amt)as sum_amt
from A
join B on A.SKU_ID=B.SKU_ID
where user_name='xm'
group by A.logday,B.brand_name),
cte_2
as
(select logday, brand_name
from cte_1
)
select * from cte_2
create table t_tree
(
id int not null
,node_name varchar(50) not null
,parent_id int not null
,[description] varchar(255) null
)
insert into t_tree
(
id, node_name, parent_id
)
values
(1,'A',0),
(2 ,'B' ,1),
(3 ,'C' ,1),
(4 ,'D' ,2),
(5 ,'E' ,2),
(6 ,'F' ,2),
(7 ,'G' ,8)
with
district as
(
select * from t_tree where node_name= 'A'
union all
select a.* from t_tree a, district b
where a.parent_id = b.id
)
select * from district
https://www.w3school.com.cn/sql/sql_union.asp
6. INNER JOIN
INNER JOIN 与 JOIN 是相同的。INNER JOIN 关键字在表中存在至少一个匹配时返回行。
7. LEFT OUTER JOIN
left join 是 left outer join 的简写,两者含义一样的。一个LEFT OUTER JOIN包含“左”表中的所有记录,即使它与在此连接中指定的“右”表并不存在任何匹配。
8. PRIMARY KEY 约束
PRIMARY KEY 约束唯一标识数据库表中的每条记录。
主键必须包含唯一的值。
主键列不能包含 NULL 值。
每个表都应该有一个主键,并且每个表只能有一个主键。在表中,此主键可以包含单个或多个列(字段)。
9. 创建sql表
MYSQL5.6
-- borrowed from https://stackoverflow.com/q/7745609/808921
CREATE TABLE IF NOT EXISTS `docs` (
`id` int(6) unsigned NOT NULL,
`rev` int(3) unsigned NOT NULL,
`content` varchar(200) NOT NULL,
PRIMARY KEY (`id`,`rev`)
) DEFAULT CHARSET=utf8;
INSERT INTO `docs` (`id`, `rev`, `content`) VALUES
('1', '1', 'The earth is flat'),
('2', '1', 'One hundred angels can dance on the head of a pin'),
('1', '2', 'The earth is flat and rests on a bull\'s horn'),
('1', '3', 'The earth is like a ball.');
sql server中的real数据类型
float 和 real 数据类型被称为近似数据类型。近似数值数据类型并不存储为许多数字指定的精确值,它们只储存这些值的最近似值。float 和 real 的使用遵循有关近似数值数据类型的 IEEE 754 规范。IEEE 754 规范提供四种舍入模式:舍入到最近、向上舍入、向下舍入以及舍入到零。Microsoft SQL Server 2005 使用向上舍入。
decimal(numeric ) 用于精确存储数值(整型)
money 用于精确存储数值(浮点型)
float 和 real 不能精确存储数值
利用SQL创建表时的 NOT NULL 约束:
CREATE TABLE test
(
age int(10),
sex varchar(20),
name varchar(20) NOT NULL
)
NOT NULL 约束强制列不接受 NULL 值。
约束,就是限制某些东西不能干什么,或者说不能是什么样子。
create table ord1(
user_id INT NOT NULL,
ord_id INT NOT NULL,
ord_amt REAL NOT NULL,
create_time datetime NOT NULL
);
INSERT INTO ord1 VALUES(1,10,100,'2019-07-02 10:00:01');
INSERT INTO ord1 VALUES(1,11,120,'2019-07-01 11:00:01');
INSERT INTO ord1 VALUES(1,11,120,'2019-07-02 11:00:01');
INSERT INTO ord1 VALUES(1,12,150,'2019-07-03 10:00:01');
INSERT INTO ord1 VALUES(2,13,200,'2019-07-02 10:00:01');
INSERT INTO ord1 VALUES(2,14,300,'2019-07-05 11:00:01');
INSERT INTO ord1 VALUES(3,15,100,'2019-07-02 10:00:01');
INSERT INTO ord1 VALUES(3,16,300,'2019-07-03 10:00:01');
INSERT INTO ord1 VALUES(4,17,200,'2019-07-02 10:00:01');
INSERT INTO ord1 VALUES(5,18,100,'2019-07-02 10:00:01');
create table act1(
act_id VARCHAR(15) NOT NULL,
user_id INT NOT NULL,
create_time datetime NOT NULL
);
INSERT INTO act1 VALUES('A',1,'2019-07-02 10:00:01');
INSERT INTO act1 VALUES('A',2,'2019-07-02 11:00:01');
INSERT INTO act1 VALUES('A',3,'2019-07-02 10:00:01');
INSERT INTO act1 VALUES('B',4,'2019-07-02 10:00:01');
INSERT INTO act1 VALUES('B',5,'2019-07-02 10:00:01');
INSERT INTO act1 VALUES('C',6,'2019-07-02 10:00:01');
统计每个活动类型所有用户的总订单额,订单数
select
b.act_id
,count(ord_id)
,sum(ord_amt)
from ord1 a
join act1 b
on a.user_id = b.user_id
where a.create_time >= b.create_time
group by b.act_id;
每个活动类型活动开始时间(第一个用户报名的时间)到今天,平均每天产生的订单数
select
b.act_id
,sum(ord_amt)/DATEDIFF(day, min(a.create_time),'2019-7-28') as avg
from ord1 a
left join act1 b
on a.user_id = b.user_id
where a.create_time >= b.create_time
group by b.act_id;
create table logtable(
user_id INT NOT NULL,
opr_type VARCHAR(15) NOT NULL,
log_time datetime NOT NULL);
insert into logtable values (1,'A','2019-06-01 10:10:01');
insert into logtable values (1,'B','2019-06-01 10:11:01');
insert into logtable values (1,'C','2019-06-01 10:12:01');
insert into logtable values (9,'A','2019-06-01 10:13:01');
insert into logtable values (9,'B','2019-06-01 10:14:01');
insert into logtable values (2,'A','2019-06-01 10:10:02');
insert into logtable values (2,'C','2019-06-01 10:11:01');
insert into logtable values (2,'B','2019-06-01 10:12:01');
insert into logtable values (3,'A','2019-06-02 10:10:01');
insert into logtable values (3,'B','2019-06-02 10:11:01');
insert into logtable values (4,'A','2019-06-03 10:10:01');
insert into logtable values (4,'C','2019-06-03 10:11:01');
insert into logtable values (4,'B','2019-06-03 10:12:01');
每天的访客数量
select date_format(log_time,'%y-%m-%d')
,count(distinct user_id)
from logtable
group by date_format(log_time,'%y-%m-%d');
每天执行opr_type=A到B,先A后B,而且必须紧紧挨着的用户数
思路:增加新标签usertime(同user_id下大于当前log_id的注册时间的最小值), 并将其加入原数据表中;再通过特定标签进行右连接以保证A到B,先A后B,而且必须紧紧挨着的。最后通过注册时间进行分组,利用count()计算符合条件的用户数。
select date_format(t1.log_time,'%y-%m-%d')
,count(distinct t1.user_id)
from (
select *,(select min(log_time) from logtable l2 where l2.user_id = l1.user_id and l2.log_time > l1.log_time) as usetime
from logtable as l1
) as t1
right join logtable as t2
on t2.user_id=t1.user_id
where t2.opr_type = 'B'
and t1.opr_type = 'A'
and t2.log_time = t1.usetime
group by date_format(t1.log_time,'%y-%m-%d');
注:usertime是同user_id下大于当前log_id的注册时间的最小值
create table logtable(
user_id INT NOT NULL,
opr_type VARCHAR(15) NOT NULL,
log_time datetime NOT NULL);
insert into logtable values (1,'A','2019-06-01 10:10:01');
insert into logtable values (1,'B','2019-06-01 10:11:01');
insert into logtable values (1,'C','2019-06-01 10:12:01');
insert into logtable values (9,'A','2019-06-01 10:13:01');
insert into logtable values (9,'B','2019-06-01 10:14:01');
insert into logtable values (2,'A','2019-06-01 10:10:02');
insert into logtable values (2,'C','2019-06-01 10:11:01');
insert into logtable values (2,'B','2019-06-01 10:12:01');
insert into logtable values (3,'A','2019-06-02 10:10:01');
insert into logtable values (3,'B','2019-06-02 10:11:01');
insert into logtable values (4,'A','2019-06-03 10:10:01');
insert into logtable values (4,'C','2019-06-03 10:11:01');
insert into logtable values (4,'B','2019-06-03 10:12:01');
insert into logtable values (1,'A','2019-06-02 10:10:01');
insert into logtable values (1,'B','2019-06-02 10:11:01');
insert into logtable values (1,'C','2019-06-03 10:12:01');
insert into logtable values (9,'A','2019-06-02 10:13:01');
insert into logtable values (9,'B','2019-06-03 10:14:01');
insert into logtable values (2,'A','2019-06-03 10:10:02');
insert into logtable values (2,'C','2019-06-03 10:11:01');
insert into logtable values (2,'B','2019-06-03 10:12:01');
insert into logtable values (3,'A','2019-06-03 10:10:01');
insert into logtable values (3,'B','2019-06-05 10:11:01');
insert into logtable values (4,'A','2019-06-07 10:10:01');
insert into logtable values (4,'C','2019-06-05 10:11:01');
insert into logtable values (4,'B','2019-06-06 10:12:01');
select date_format(t1.first_time,'%y-%m-%d')
,count(distinct case when t1.log_time = t1.first_time then user_id else null end) as aa
,count(distinct case when date_format(t1.first_time,'%y-%m-%d') = date_format(date_sub(t1.log_time,interval 1 day),'%y-%m-%d') then user_id else null end ) as bb
from(
select *,(select min(log_time) from logtable l2 where l1.user_id = l2.user_id) as first_time
from logtable l1) as t1
group by date_format(t1.first_time,'%y-%m-%d');
https://blog.csdn.net/kylin_learn/article/details/97616259
select distinct name from table where name not in (select distinct name from table where fenshu<=80)
select name from table group by name having min(fenshu)>80
delete from tablename where 自动编号 not in(select min( 自动编号) from tablename group by学号, 姓名, 课程编号, 课程名称, 分数)
select a.name, b.name
from team a, team b
where a.name < b.name
create table logtable(
year INT NOT NULL,
mouth INT NOT NULL,
amount FLOAT(2,1) NOT NULL);
insert into logtable values (1991,1,1.1);
insert into logtable values (1991,2,1.2);
insert into logtable values (1991,3,1.3);
insert into logtable values (1991,4,1.4);
insert into logtable values (1992,1,2.1);
insert into logtable values (1992,2,2.2);
insert into logtable values (1992,3,2.3);
insert into logtable values (1992,4,2.4);
select year,
(select amount from logtable m where mouth=1 and m.year=logtable.year) as m1,
(select amount from logtable m where mouth=2 and m.year=logtable.year) as m2,
(select amount from logtable m where mouth=3 and m.year=logtable.year) as m3,
(select amount from logtable m where mouth=4 and m.year=logtable.year) as m4
from logtable group by year
select logtable.year,sum(case when logtable.mouth = 1 then 1.1 else 0 end)as m1,
sum(case when logtable.mouth = 2 then 1.2 else 0 end)as m2,
sum(case when logtable.mouth = 3 then 1.3 else 0 end)as m3,
sum(case when logtable.mouth = 4 then 1.4 else 0 end)as m4
from logtable
group by logtable.year
select a.title,a.username,max(a.adddate)from table a
group by a.title,a.username
select a.a, a.b, a.c, b.c, b.d, b.f from a LEFT OUTER JOIN b ON a.a = b.c
select * from 日程安排 where datediff(minute,getdate(),开始时间)>5
DELETE FROM
B
WHERE NOT EXISTS(
SELECT 1
FROM
A
WHERE
B.id=A.id
)
sql语句查询中exists中为什么要用select 1?
如果有查询结果,查询结果就会全部被1替代(当不需要知道结果是什么,只需要知道有没有结果的时候会这样用),可以提高语句的运行效率,在大数据量的情况下,提升效果非常明显。
以select * from A where exists(select * from B where A.a=B.a)为例,
exists表示, 对于A中的每一个记录, 如果在表B中有记录,其属性a的值与表A这个记录的属性a的值相同,则表A的这个记录是符合条件的记录,
如果是NOT exists, 则表示如果表B中没有记录能与表A这个记录连接, 则表A的这个记录是符合条件的记录。
select sname from s where sno not in
(select distinct sno from sc join c on sc.cno = c.cno where c.cteacher='小易');
select s.sname, avg(sc.score) as avg
from s join sc
on s.sno = sc.sno
where sc.score > 90
group by s.sname having count(*) > 2;
create table logtable(
courseid INT NOT NULL,
coursename VARCHAR(10) NOT NULL,
score INT NULL);
insert into logtable values (1,'JAVA',70);
insert into logtable values (2,'ORACLE',90);
insert into logtable values (3,'XML',40);
insert into logtable values (4,'JSP',30);
insert into logtable values (5,'SERVELET',80);
(mysql, ms sql server)
select courseid, coursename ,score ,(case when score < 60 then 'fail' else 'pass' end)as mark from logtable;
(oracle)
select courseid, coursename, score, decode(sign(score-60),-1,'fail','pass') as mark from course
create table testtable1(
ID INT NOT NULL,
department VARCHAR(12) NOT NULL);
insert into testtable1 values(1,'A');
insert into testtable1 values(2,'B');
insert into testtable1 values(3,'C');
create table testtable2
(
dptID INT NOT NULL,
name VARCHAR(12) NOT NULL
);
insert into testtable2 values(1,'D');
insert into testtable2 values(1,'E');
insert into testtable2 values(2,'F');
insert into testtable2 values(3,'G');
insert into testtable2 values(4,'H');
SELECT testtable2.* , ISNULL(department,'I')
FROM testtable1 right join testtable2 on testtable2.dptID = testtable1.ID
create table testtable1(
p_ID INT NOT NULL,
p_Num INT NOT NULL,
s_id VARCHAR(12) NOT NULL);
insert into testtable1 values(1,10,'01');
insert into testtable1 values(1,12,'02');
insert into testtable1 values(2,8,',01');
insert into testtable1 values(3,11,'01');
insert into testtable1 values(3,8,'03');
select p_id ,
sum(case when s_id='01' then p_num else 0 end) as s1_id
,sum(case when s_id='02' then p_num else 0 end) as s2_id
,sum(case when s_id='03' then p_num else 0 end) as s3_id
from testtable1 group by p_id
create table logtable1(
title VARCHAR NOT NULL,
user_id INT NOT NULL,
log_time datetime NOT NULL);
insert into logtable1 values ('A',1,'2019-06-01 ');
insert into logtable1 values ('A',2,'2019-06-02 ');
insert into logtable1 values ('A',3,'2019-06-03 ');
insert into logtable1 values ('B',4,'2019-06-02 ');
insert into logtable1 values ('B',5,'2019-06-07 ');
insert into logtable1 values ('B',6,'2019-06-05 ');
insert into logtable1 values ('B',7,'2019-06-03 ');
create table logtable2(
title VARCHAR NOT NULL,
user_id INT NOT NULL,
name varchar(12) NOT NULL);
insert into logtable2 values ('A',7,'a ');
insert into logtable2 values ('A',6,'b ');
insert into logtable2 values ('A',5,'c ');
insert into logtable2 values ('B',4,'c ');
insert into logtable2 values ('B',3,'e');
insert into logtable2 values ('B',2,'d ');
insert into logtable2 values ('B',1,'e');
select log_time from logtable1,logtable2 where logtable1.user_id =logtable2.user_id and name='c'
(2) 使用标准SQL嵌套语句查询选修课程编号为’C2’的学员姓名和所属单位?
答:select sn,sd from s,sc where s.s#=sc.s# and sc.c#=’c2’
(3) 使用标准SQL嵌套语句查询不选修课程编号为’C5’的学员姓名和所属单位?
答:select sn,sd from s where s# not in(select s# from sc where c#=’c5’)
(4)查询选修了课程的学员人数
答:select count(distinct s#) as number from sc
select number = count(distinct s#) from sc
(5) 查询选修课程超过5门的学员学号和所属单位?
答:select sn,sd from s where s# in(select s# from sc group by s# having count(distinct c#)>5)
31. 查询表A中存在ID重复三次以上的记录,完整的查询语句如下:
select *
from A
where id in(select ID from A group by id having count(id)>3)
create table B as select Member_ID, min(Log_time), URL from A group by Member_ID
select * from A where trade_no not in (select trade_no from B)
SELECT Company, OrderNumber FROM Orders ORDER BY Company DESC, OrderNumber ASC
groupby分组了以后,order需要是分组的列
https://developer.aliyun.com/ask/82363?spm=a2c6h.13159736
一张成绩表有如下字段,班级ID,英语成绩,数据成绩,语文成绩,查询出 每个班级英语成绩最高的前两名的记录。
方法一
SELECT * FROM CJ m
where(
select COUNT(*) from CJ n
where m.Classid = n.Classid and n.English > m.English)<2
order by Classid, English desc
方法二(官方说最好的方法)
select a.Classid,a.English from
(select Classid,English,row_number() over(partition by Classid order by English desc) as n
from CJ) a
where n<=2
row_number() OVER (PARTITION BY COL1 ORDER BY COL2) 表示根据COL1分组,在分组内部根据 COL2排序,而此函数计算的值就表示每组内部排序后的顺序编号(组内连续的唯一的)
https://www.cnblogs.com/hxfcodelife/p/10226934.html
SELECT
first_day,
sum(case when by_day = 0 then 1 else 0 end) day_0,
sum(case when by_day = 1 then 1 else 0 end) day_1,
sum(case when by_day = 2 then 1 else 0 end) day_2,
sum(case when by_day = 3 then 1 else 0 end) day_3,
sum(case when by_day = 4 then 1 else 0 end) day_4,
sum(case when by_day = 5 then 1 else 0 end) day_5,
sum(case when by_day = 6 then 1 else 0 end) day_6,
sum(case when by_day >= 7 then 1 else 0 end) day_7plus
FROM
(SELECT
user_id,
login_time,
first_day,
DATEDIFF(day,first_day,login_time) as by_day
FROM
(
SELECT
b.user_id,
b.login_time,
c.first_day
FROM
(
SELECT
user_id,
str_to_date(login_time,'%Y/%m/%d') login_time
FROM user_info
GROUP BY 1,2) b
LEFT JOIN
(
SELECT
user_id,
min(login_time) first_day
FROM
(
select
user_id,
str_to_date(login_time,'%Y/%m/%d') login_time
FROM
user_info
group by 1,2
) a
group by 1
) c
on b.user_id = c.user_id
order by 1,2
) e
order by 1,2
) f
group by first_day
order by 1
https://www.jianshu.com/p/be2cb8880df6
注:order by1,2在mysql数据库系统中表示先对第一列排序,如果第一列值相同则按第二列排序。
MYSQL
create table testtable1(
p_ID INT NOT NULL,
p_Num INT NOT NULL,
s_id VARCHAR(12) NOT NULL);
insert into testtable1 values(1,10,'01');
insert into testtable1 values(1,12,'02');
insert into testtable1 values(2,8,',01');
insert into testtable1 values(3,11,'01');
insert into testtable1 values(3,8,'03');
select p_ID,p_Num from testtable1 group by 1,2;
CREATE TABLE graduates (
name varchar(255) ,
income int
)
go
INSERT INTO graduates VALUES ('桑普森', '400000');
INSERT INTO graduates VALUES ('迈克', '30000');
INSERT INTO graduates VALUES ('怀特', '20000');
INSERT INTO graduates VALUES ('阿诺德', '20000');
INSERT INTO graduates VALUES ('史密斯', '20000');
INSERT INTO graduates VALUES ('劳伦斯', '15000');
INSERT INTO graduates VALUES ('哈德逊', '15000');
INSERT INTO graduates VALUES ('肯特', '10000');
INSERT INTO graduates VALUES ('贝克', '10000');
INSERT INTO graduates VALUES ('斯科特', '10000');
通过count(*)求每个数出现的次数->通过max()求出现的最多次数->通过having过滤出出现次数最多的income
select income,count(*) cnt
from graduates
group by income
having count(*) >= (
select max(cnt) from (select count(*) cnt from graduates group by income) tmp
注:GO语句把程序分成一个个代码块,即使一个代码块执行错误,它后面的代码块仍然会执行。
2)有极端值,且无某数据重复出现多次的情况下集中趋势的刻画。
求中位数
select AVG(DISTINCT income)
from (
select T1.income from graduates T1,graduates T2
group by T1.income
having sum(case when T2.income >= T1.income then 1 else 0 end) >= count(*)/2
and sum(case when T2.income <= T1.income then 1 else 0 end) >= count(*)/2
) tmp
go
注:
平均数应用场合:没有极端值的情况下数据集中趋势的刻画。
1)统计不同月份的下单人数
select month(paidTime) ,count(distinct userId) from data.orderinfo
where isPaid = '已支付'
group by month(paidTime)
2)统计用户三月份的回购率和复购率
复购率指当月消费者中消费次数多于一次的人数占比
回购率指本月消费者中在下月再次消费的占比
复购率:
先统计三月份每个购买者的购买次数,作为一个子查询返回,
外层使用count+if函数统计大于一次消费的购买者人数,将其与总人数相除,即可得到复购率。
select count(ct) ,count(if(ct>1,1,null)),count(if(ct>1,1,null))/count(ct) as ratio
from ( select userId,count(userId) as ct from data.orderinfo
where isPaid = '已支付'
and month(paidTime) = 3
group by userId) t
回购率:
根据用户id和月份相差访问月份相差一个月使用左连接,统计在本月购买的用户数量,本月购买并在下月继续购买的客户数量,进而计算回购率。
select t1.m,count(t1.m),count(t2.m),count(t2.m)/count(t1.m) from (
select userId,date_format(paidTime,'%Y-%m-01') as m from data.orderinfo
where isPaid = '已支付'
group by userId,date_format(paidTime,'%Y-%m-01')) t1
left join (
select userId,date_format(paidTime,'%Y-%m-01') as m from data.orderinfo
where isPaid = '已支付'
group by userId,date_format(paidTime,'%Y-%m-01')) t2
on t1.userId = t2.userId and t1.m = date_sub(t2.m,interval 1 month)
group by t1.m
3)统计男女用户平均消费频次
通过内连接过滤空值,连接两个表,通过count统计单个购买者的购买次数,
根据性别分组,统计均值,得到男女平均消费频次。
select sex,avg(ct) from (
select o.userId,sex,count(1) as ct from data.orderinfo o
inner join (
select * from data.userinfo
where sex!= '') t
on o.userId = t.userId
group by userId) t2
group by sex
count(1) 与 count( * ) 都表示对全部数据行的查询, 在统计结果的时候,不会忽略列值为NULL 。在字段较多、数据量较大的情况下,使用count(1) 要明显比 count( * ) 更加高效。
4)统计多次消费的用户,第一次和最后一次消费间隔是多少
提取多次消费用户,用datediff计算max和min的差值
select userId,max(paidTime),min(paidTime),
datediff(day,min(paidTime),max(paidTime)) from data.orderinfo
where isPaid = '已支付'
group by userId having count(1) > 1
5)统计不同年龄段用户消费频次是否有差异
先计算用户所属的不同年龄段,并利用内连接使之加入表中,然后不同用户id下的消费频次,最后根据年龄段分组,统计不同年龄段下用户的平均消费频次。
CEIL(N):上取整,取大于等于N的最小整数。
select age,avg(ct) from (
select o.userId,age,count(o.userId) as ct
from data.orderinfo o
inner join (
select userId,ceil((year(now()) - year(birth)) / 10) as age
from data.userinfo
where birth > '1901-00-00') t
on o.userId = t.userId
group by o.userId,age) t2
group by age
count(o.userId)统计消费频次
6)统计消费的二八法则,消费top 20%用户,贡献了多少额度?
select count(userId),sum(total) from (
select userId,sum(price) as total from data.orderinfo o
where isPaid = '已支付'
group by userId
order by total desc
limit 17000)t
top = order.groupby('userID').sum().sort_values('price',ascending = False)
top.shape[0]*0.2#计算top前20%有多少人, 计算结果为17129.8
top = top.head(17130)
top.price.sum()#计算top前20%的人的总贡献度
为行分配序号
SELECT
ROW_NUMBER() OVER (
ORDER BY productName
) row_num,
productName,
msrp
FROM
products
ORDER BY
productName;
找到每组的前N行
WITH inventory
AS (SELECT
productLine,
productName,
quantityInStock,
ROW_NUMBER() OVER (
PARTITION BY productLine
ORDER BY quantityInStock DESC) row_num
FROM
products
)
SELECT
productLine,
productName,
quantityInStock
FROM
inventory
WHERE
row_num <= 3;
CREATE TABLE t (
id INT,
name VARCHAR(10) NOT NULL
);
INSERT INTO t(id,name)
VALUES(1,'A'),
(2,'B'),
(2,'B'),
(3,'C'),
(3,'C'),
(3,'C'),
(4,'D');
使用该ROW_NUMBER()函数进行分区编号
SELECT
id,
name,
ROW_NUMBER() OVER (PARTITION BY id, name ORDER BY id) AS row_num
FROM t;
从输出中可以看出,唯一的行是行号等于1的行。
版本低于8.0的MySQL不支持row_number(),但幸运的是,MySQL提供了可用于模拟row_number()函数的会话变量 。
SET @row_number = 0;
SELECT
(@row_number:=@row_number + 1) AS num, firstName, lastName
FROM
employees
LIMIT 5;
在第一个语句中,我们定义了一个名为的变量 row_number(),并将其值设置为0。 这row_number()是由@前缀指示的会话变量。
在第二个语句中,我们从表中选择数据,并将每行employees row_number()变量值增加到1。该LIMIT子句用于约束返回的行数,在这种情况下,它被设置为5。
MySQL row_number - 为每个组添加行号
SELECT
@row_number:=CASE
WHEN @customer_no = customerNumber THEN @row_number + 1
ELSE 1
END AS num,
@customer_no:=customerNumber as CustomerNumber,
paymentDate,
amount
FROM
payments
ORDER BY customerNumber;
select t1.date
from Tourists as t1, Tourists as t2, Tourists as t3
on t1.id = (t2.id+1) and t2.id = (t3.id+1)
where t1.visits > 100 and t2.visits > 100 and t3.visits > 100
MS SQL SERVER
CREATE TABLE salary (
gender varchar(255) not null ,
income int not null
)
INSERT INTO salary VALUES ('m', '400000');
INSERT INTO salary VALUES ('m', '30000');
INSERT INTO salary VALUES ('f', '20000');
INSERT INTO salary VALUES ('f', '20000');
update salary
set gender = replace('mf', gender, '');
select * from salary
select A.id,
((case when A.d1 = B.d1 then 1 else 0 end) +
(case when A.d2 = B.d2 then 1 else 0 end) +
...) as count_match
from A join B
on A.d1 = B.d1
having count_match >= 19
Case具有两种格式。简单Case函数和Case搜索函数。
简单case函数
CASE sex
WHEN '1' THEN '男'
WHEN '2' THEN '女'
ELSE '其他' END
case搜索函数
CASE WHEN sex = '1' THEN '男'
WHEN sex = '2' THEN '女'
ELSE '其他' END
CREATE TABLE goods_table (U VARCHAR(20),G VARCHAR(20),S INT);
INSERT INTO goods_table VALUES ('u0','g0',3);
INSERT INTO goods_table VALUES ('u0','g1',2);
INSERT INTO goods_table VALUES ('u0','g2',1);
INSERT INTO goods_table VALUES ('u1','g0',4);
INSERT INTO goods_table VALUES ('u1','g1',5);
INSERT INTO goods_table VALUES ('u1','g2',6);
INSERT INTO goods_table VALUES ('u2','g0',7);
INSERT INTO goods_table VALUES ('u2','g1',8);
INSERT INTO goods_table VALUES ('u2','g2',9);
select T.Ua, T.Ub,SUM(T.res) sum_prod
FROM(select a.U as Ua,a.G as Ga,a.S as Sa,b.U as Ub,b.G as Gb,b.S as Sb,(a.S*b.S)as res
from goods_table a,goods_table b
where a.U<b.U and a.G = b.G) T
GROUP BY T.Ua, T.Ub
1)统计教授多门课老师数量
select count(*) from (select count(*) from class
group by teacher having count(distinct course) > 1 )A
2)输出每位老师教授课程数统计
select teacher, count(distinct course) as count_course
from class
group by teacher
CREATE TABLE goods_table (ID INT, knight VARCHAR(20),vote_knight INT);
INSERT INTO goods_table VALUES (1,'A',3);
INSERT INTO goods_table VALUES (2,'B',5);
INSERT INTO goods_table VALUES (3,'C',1);
INSERT INTO goods_table VALUES (4,'B',2);
INSERT INTO goods_table VALUES (5,'C',1);
select knight,sum(vote_knight)as num from goods_table
group by knight
order by num desc limit 1;
报错:Unable to get host connection: Connections could not be acquired from the underlying database!
可能原因: 1.数据库用户名密码是否正确;2.连接数是否超过最大连接数。
set @sum1 = (select sum(frequency)+1 as sum1 from goods_table)
CREATE TABLE goods_table (ID INT, number INT,frequency INT);
INSERT INTO goods_table VALUES (1,5,2);
INSERT INTO goods_table VALUES (2,2,1);
INSERT INTO goods_table VALUES (3,3,1);
INSERT INTO goods_table VALUES (4,4,1);
INSERT INTO goods_table VALUES (5,1,1);
set @sum = (select sum(frequency)+1 as sum from goods_table);
set @index1 = 0;
set @last_index = 0;
select avg(t.number)
from
(select @last_index:=@index1 as last_index, @index1:=@index1 + frequency as index1,
frequency, number from
(select * from goods_table
group by goods_table.number
)a
)as t
where t.index1 in (floor(@sum/2),ceiling(@sum/2))
or (floor(@sum/2)>=t.last_index and ceiling(@sum/2)<t.index1);
CREATE TABLE info_table (ID INT, class INT,score INT);
INSERT INTO info_table VALUES (1,1,60);
INSERT INTO info_table VALUES (2,2,70);
INSERT INTO info_table VALUES (3,3,50);
INSERT INTO info_table VALUES (4,1,90);
INSERT INTO info_table VALUES (5,2,80);
INSERT INTO info_table VALUES (5,2,75);
INSERT INTO info_table VALUES (6,1,85);
INSERT INTO info_table VALUES (7,3,100);
INSERT INTO info_table VALUES (8,3,55);
INSERT INTO info_table VALUES (9,1,70);
INSERT INTO info_table VALUES (10,3,90);
select class,avg(score)as median
from
(select a.num,a.class,a.score,b.count
from(
SELECT
@row_number:= CASE
WHEN @customer_no = class THEN @row_number + 1
ELSE 1
END AS num,
@customer_no:=class as class,
ID,
score
FROM
(select * from info_table order by class,score)
ORDER BY class,score)a
left join (select class,count(*)as count from info_table group by class )b
on a.class = b.class
having a.num = (case when b.count%2 = 0 then b.count/2 else ceiling(b.count/2) end)
or a.num = (case when b.count%2 = 0 then b.count/2+1 else ceiling(b.count/2) end)
)c
group by c.class
写sql查询过去一个月付款用户量(提示 用户量需去重)最高的3天分别是哪几天。
select day(pay_time) as pay_time, count(distinct user_id) as c from table group by day(pay_time) order by c desc limit 3
写sql查询昨天每个用户最后付款的订单ID及金额
整合包含用户在昨天最后付款信息的一张表并与原表相连(相连时根据用户id,最后付款时间与支付时间判别),在表中选取需要的订单ID及金额信息
select order_id, order_amount from
(select user_id, max(pay_time) as mt
from table group by user_id where DATEDIFF(day, pay_time, NOW()) = 1)t1
left join table as t2
on t1.user_id = t2.user_id and t1.mt = t2.pay_time)
select a.user_id, a.goods_id
from a
left outer join b
on (case when a.user_id is null then concat(‘hive’,rand() ) else a.user_id end )= b.user_id;
select * from table order by rand() limit 2000
select visit_date, count(distince user_id)
where DateDiff(day,t1.log_time,now())<= 30)
group by visit_date
近30天连续访问7天以上的用户数量
select count(*)
from table t1, table t2, ..., table t7
where t1.visit_date = (t2.visit_date+1) and t2.visit_date = (t3.visit_date+1)
and ... and t6.visit_date = (t7.visit_date+1) and t1.user_id = t2.user_id and t2.user_id = t3.user_id and...and t6.user_id = t7.user_id
and DateDiff(dd,t1.log_time,getdate())<= 30)
create table logtable(
user_id INT NOT NULL,
log_time datetime NOT NULL);
insert into logtable values (1,'2019-06-01 10:10:01');
insert into logtable values (1,'2019-06-01 10:11:01');
insert into logtable values (1,'2019-06-01 10:12:01');
insert into logtable values (9,'2019-06-01 10:13:01');
insert into logtable values (9,'2019-06-01 10:14:01');
insert into logtable values (2,'2019-06-01 10:10:02');
insert into logtable values (2,'2019-06-01 10:11:01');
insert into logtable values (2,'2019-06-01 10:12:01');
insert into logtable values (3,'2019-06-02 10:10:01');
insert into logtable values (3,'2019-06-02 10:11:01');
insert into logtable values (4,'2019-06-03 10:10:01');
insert into logtable values (4,'2019-06-03 10:11:01');
insert into logtable values (4,'2019-06-03 10:12:01');
insert into logtable values (1,'2019-06-02 10:10:01');
insert into logtable values (1,'2019-06-02 10:11:01');
insert into logtable values (1,'2019-06-03 10:12:01');
insert into logtable values (9,'2019-06-02 10:13:01');
insert into logtable values (9,'2019-06-03 10:14:01');
insert into logtable values (2,'2019-06-03 10:10:02');
insert into logtable values (2,'2019-06-03 10:11:01');
insert into logtable values (2,'2019-06-03 10:12:01');
insert into logtable values (3,'2019-06-03 10:10:01');
insert into logtable values (3,'2019-06-05 10:11:01');
insert into logtable values (4,'2019-06-07 10:10:01');
insert into logtable values (4,'2019-06-05 10:11:01');
insert into logtable values (4,'2019-06-06 10:12:01');
select logtime,count(*) from
(select b.user_id, logtime, first_time
from
(select distinct * from
(select user_id, date_format(log_time,'%Y-%m-%d')as logtime from logtable
group by user_id,log_time)a
)b
join
(select user_id,min(logtime)as first_time
from
(select distinct * from
(select user_id, date_format(log_time,'%Y-%m-%d')as logtime from logtable
group by user_id,log_time)a
)b
group by user_id
)c
on b.user_id = c.user_id
where b.logtime = c.first_time
)d
group by first_time
select coalesce(success_cnt, 1) from tableA
当success_cnt 为null值的时候,将返回1,否则将返回success_cnt的真实值。
select coalesce(success_cnt,period,1) from tableA
当success_cnt不为null,那么无论period是否为null,都将返回success_cnt的真实值(因为success_cnt是第一个参数),当success_cnt为null,而period不为null的时候,返回period的真实值。只有当success_cnt和period均为null的时候,将返回1。
select TIMESTAMPDIFF(DAY, '2018-03-20 23:59:00', '2015-03-22 00:00:00');
–相差49小时
select TIMESTAMPDIFF(HOUR, '2018-03-20 09:00:00', '2018-03-22 10:00:00');
–相差2940分钟
select TIMESTAMPDIFF(MINUTE, '2018-03-20 09:00:00', '2018-03-22 10:00:00');
–相差176400秒
select TIMESTAMPDIFF(SECOND, '2018-03-20 09:00:00', '2018-03-22 10:00:00');
#查找入职员工时间排名倒数第三的员工所有信息
CREATE TABLE employees (
emp_no int(11) NOT NULL,
birth_date date NOT NULL,
first_name varchar(14) NOT NULL,
last_name varchar(16) NOT NULL,
gender char(1) NOT NULL,
hire_date date NOT NULL,
PRIMARY KEY (emp_no));
SELECT * FROM employees
WHERE hire_date =
(SELECT DISTINCT hire_date FROM employees ORDER BY hire_date DESC LIMIT 1 OFFSET 2)
Limit 与offset 一起使用的用法
举个例子,LIMIT 3 OFFSET 1, 这意味着,跳过第1条记录(即从第2条记录开始),返回接下来3条记录。即最终得到,原本的第2,3,4条记录。
#查找所有员工入职时候的薪水情况,给出emp_no以及salary, 并按照emp_no进行逆序
CREATE TABLE employees (
emp_no int(11) NOT NULL,
birth_date date NOT NULL,
first_name varchar(14) NOT NULL,
last_name varchar(16) NOT NULL,
gender char(1) NOT NULL,
hire_date date NOT NULL,
PRIMARY KEY (emp_no));
CREATE TABLE salaries (
emp_no int(11) NOT NULL,
salary int(11) NOT NULL,
from_date date NOT NULL,
to_date date NOT NULL,
PRIMARY KEY (emp_no,from_date));
思路
a. 通过 employees表的hire_date = salaries表的from_date,e.emp_no = s.emp_no 连接两张表;
b. 用ORDER BY 对emp_no进行排序。
方法一:INNER JOIN连接两张表
SELECT s.emp_no, s.salary
FROM employees AS e INNER JOIN salaries AS s
ON e.emp_no = s.emp_no AND e.hire_date = s.from_date
ORDER BY s.emp_no DESC
方法二:并列查询
SELECT s.emp_no, s.salary
FROM employees AS e, salaries AS s
WHERE e.emp_no = s.emp_no AND e.hire_date = s.from_date
ORDER BY s.emp_no DESC
INNER JOIN VS 并列查询
Q: 这两种方法有什么不同呢?
A: 内连接是取左右两张表的交集形成一个新表,用FROM并列两张表后仍然还是两张表。如果还要对新表进行操作则要用内连接。从效率上看应该FROM并列查询比较快,因为不用形成新表。本题从效果上看两个方法没区别。
#获取所有部门当前manager的当前薪水情况,给出dept_no, emp_no以及salary,当前表示to_date=‘9999-01-01’
CREATE TABLE dept_manager (
dept_no char(4) NOT NULL,
emp_no int(11) NOT NULL,
from_date date NOT NULL,
to_date date NOT NULL,
PRIMARY KEY (emp_no,dept_no));
CREATE TABLE salaries (
emp_no int(11) NOT NULL,
salary int(11) NOT NULL,
from_date date NOT NULL,
to_date date NOT NULL,
PRIMARY KEY (emp_no,from_date));
方法一:INNER JOIN
SELECT dm.dept_no, dm.emp_no, s.salary
FROM dept_manager AS dm INNER JOIN salaries AS s
ON dm.emp_no = s.emp_no AND dm.to_date = '9999-01-01' AND s.to_date = '9999-01-01'
方法二:并列查询
SELECT dm.dept_no, dm.emp_no, s.salary
FROM dept_manager AS dm, salaries AS s
WHERE dm.emp_no = s.emp_no AND dm.to_date = '9999-01-01' AND s.to_date = '9999-01-01'
获取所有非manager的员工emp_no
CREATE TABLE dept_manager (
dept_no char(4) NOT NULL,
emp_no int(11) NOT NULL,
from_date date NOT NULL,
to_date date NOT NULL,
PRIMARY KEY (emp_no,dept_no));
CREATE TABLE employees (
emp_no int(11) NOT NULL,
birth_date date NOT NULL,
first_name varchar(14) NOT NULL,
last_name varchar(16) NOT NULL,
gender char(1) NOT NULL,
hire_date date NOT NULL,
PRIMARY KEY (emp_no));
思路
思路一:employees 表 LEFT JOIN dept_emp 表,通过判断 dept_emp对应为空的记录,可以筛选出 「非manager的员工」;
思路二:在表employees中排除dept_emp表中的记录,用NOT IN字段。
方法一:IS NULL
SELECT e.emp_no
FROM employees AS e LEFT JOIN dept_manager AS dm ON e.emp_no = dm.emp_no
WHERE dm.dept_no IS NULL
方法二: NOT IN
SELECT emp_no FROM employees
WHERE emp_no NOT IN (SELECT emp_no FROM dept_manager)
select A.*
from
(select B.user_id,B.good_id,max(log_time) as recent_time
from A join B on A.user_id = B.user_id and A.log_time < B.pay_time
group by B.user_id,B.good_id)C
join A on A.user_id = C.user_id and A.good_id = C.good_id and A.log_time = C.recent_time
计算每个用户支付每个商品订单前浏览的前五条信息
select * from
(
select B.user_id,B.good_id,row_number()over(partition by A.user_id, A.goods_id order by A.log_time desc) as num
from A join B on A.user_id = B.user_id and A.log_time < B.pay_time
)
where num < 6
create table logtable2(
title VARCHAR NOT NULL,
user_id INT NOT NULL,
name varchar(12) NOT NULL);
insert into logtable2 values ('A',7,'a ');
insert into logtable2 values ('A',6,'b ');
insert into logtable2 values ('A',5,'c ');
insert into logtable2 values ('B',4,'c ');
insert into logtable2 values ('B',3,'e');
insert into logtable2 values ('B',2,'d ');
insert into logtable2 values ('B',1,'e');
SELECT TOP 50 PERCENT * FROM logtable2
create table OrderInfo(uid string,orderid bigint,oderamount double);
insert into OrderInfo(uid,orderid,oderamount)values('A1001',301,300.0);
insert into OrderInfo(uid,orderid,oderamount)values('A1002',523,720.0);
insert into OrderInfo(uid,orderid,oderamount)values('A1001',303,450.0);
create table Coupon(orderid bigint,coupon string, discount double);
insert into Coupon(orderid,coupon,discount)values(301,'A',5.0);
insert into Coupon(orderid,coupon,discount)values(301,'B',10.0);
insert into Coupon(orderid,coupon,discount)values(303,'A',10.0);
insert into Coupon(orderid,coupon,discount)values(523,'B',15.0);
select E.uid, E.orderid,E.oderamount - E.discount_a - E.discount_b as realamount
from
(
select C.uid, C.orderid,C.oderamount,C.coupon_a,C.discount_a,D.coupon as coupon_b,(case when D.discount is null then 0 else D.discount end) as discount_b from
(
select OrderInfo.uid, OrderInfo.orderid,OrderInfo.oderamount,Coupon.coupon as coupon_a,(case when Coupon.discount is null then 0 else Coupon.discount end) as discount_a from OrderInfo left join Coupon
on OrderInfo.orderid = Coupon.orderid and Coupon.coupon = 'A'
)C
left join Coupon D on C.orderid = D.orderid and D.coupon = 'B'
)E
order by 1,2