【SQL基础学习-01】

SQL学习基础与语法-01

1.between and

两种方式
1.between 在列值得某与某之间
select
device_id,
gender,
age,
university
from user_profile
WHERE
age between 20 and 23

2.and的来连接条件范围
SELECT
device_id,
gender,
age
from user_profile
WHERE
age >= 20 and age<=23

2.not in,in

SELECT
  device_id,
  gender,
  age,
  university
FROM
  user_profile
WHERE
  university NOT IN ('复旦大学')

3.空值过滤

解法一:
select device_id,gender,age,university
from user_profile
where age is not NULL;
解法二:
select device_id,gender,age,university
from user_profile
where age !='';

4.where in,not in

4.1not in用法

SELECT
  device_id,
  gender,
  age,
  university,
  gpa
FROM
  user_profile
WHERE
  university NOT IN ('浙江大学')

4.2where in用法

SELECT
  device_id,
  gender,
  age,
  university,
  gpa
FROM
  user_profile
WHERE
  university IN ('北京大学', '复旦大学', '山东大学')

5.通配符

一般形式为:
列名 [NOT ] LIKE
匹配串中可包含如下四种通配符:
_:匹配任意一个字符;
%:匹配0个或多个字符;
[ ]:匹配[ ]中的任意一个字符(若要比较的字符是连续的,则可以用连字符“-”表 达 );
[^ ]:不匹配[ ]中的任意一个字符。

# 1.查询姓张的学生
SELECT * FROM 学生表 WHERE 姓名 LIKE '张%'
# 2.查询姓张并且名字为3个字的
SELECT * FROM 学生表 WHERE 姓名 LIKE '张__'
# 3.如果把姓名列的类型改为nchar(20),在SQL Server 2012中执行没有结果。
# 原因是姓名列的类型是char(20),当姓名少于20个汉字时,系统在存储这些数据时自动在后边补空格,
# 空格作为一个字符,也参加LIKE的比较。可以用rtrim()去掉右空格。
# 4.查询学生表中姓‘张’、姓‘李’和姓‘刘’的学生的情况
SELECT * FROM 学生表 WHERE rtrim(姓名) LIKE '张__'
# 5.查询学生表中姓‘张’、姓‘李’和姓‘刘’的学生的情况。
SELECT * FROM 学生表 WHERE 姓名 LIKE '[张李刘]%'
# 6.查询学生表表中名字的第2个字为“小”或“大”的学生的姓名和学号。
SELECT 姓名,学号 FROM 学生表 WHERE 姓名 LIKE '_[小大]%'
# 7.查询学生表中所有不姓“刘”的学生
SELECT 姓名 FROM 学生 WHERE 姓名 NOT LIKE '刘%'
# 8.从学生表表中查询学号的最后一位不是2、3、5的学生信息。
SELECT * FROM 学生表 WHERE 学号 LIKE '%[^235]'
SELECT device_id,age,university FROM user_profile
WHERE university LIKE '%北京%'

6.limit用法

# LIMIT 子句可以被用于强制 SELECT 语句返回指定的记录数。
# LIMIT 接受一个或两个数字参数。参数必须是一个整数常量。
# 如果只给定一个参数,它表示返回最大的记录行数目。
# 如果给定两个参数,第一个参数指定第一个返回记录行的偏移量,第二个参数指定返回记录行的最大数目。
# 为了检索从某一个偏移量到记录集的结束所有的记录行,可以指定第二个参数为 -1。
# 初始记录行的偏移量是 0(而不是 1)。
# 1.检索记录行 [6-7]
SELECT * FROM table LIMIT 5,2
# 2.检索前 5 个记录行[3-7]5条数据
SELECT * FROM user_profile  LIMIT 5 OFFSET 2
# 3.检索记录行 11-last
SELECT * FROM table LIMIT 10,-1
# 4.检索前 5 个记录行
SELECT * FROM table LIMIT 5

7.去重distinct,group by

# 去重两种思路
# 第一种:使用distinct(关键词distinct用于返回唯一不同的值)
select distinct university from user_profile;
# 第二种:使用group by按照university进行分组
select university from user_profile group by university;

8.max(),min()

# 方法1
select
  max(gpa) as gpa
from
  user_profile
where
  university = '复旦大学';

# 方法2
select
  gpa
from
  user_profile
where
  university = '复旦大学'
order by
  gpa desc
limit
  1

9.avg(),round(),count()

# 表头重命名,用as语法
# 浮点数的平均值可能小数点位数很多,按照示例保存一位小数,用round函数
select 
  count(gender) as male_num,
  round(avg(gpa), 1) as avg_gpa
from user_profile where gender="male";

10.聚合函数having过滤

select
  university,
  avg(question_cnt) as avg_question_cnt,
  avg(answer_cnt) as avg_answer_cnt
from
  user_profile
group by
  university
having
  avg_question_cnt < 5 or avg_answer_cnt < 20

11.分组排序

select
  university,
  avg(question_cnt) as avg_question_cnt
from
  user_profile
group by
  university
order by
  avg_question_cnt

12.left join,right join,inner join

left join(左联接) ——返回包括左表中的所有记录和右表中联结字段相等的记录
right join(右联接)——返回包括右表中的所有记录和左表中联结字段相等的记录
inner join(等值连接)—— 只返回两个表中联结字段相等的行

# 1.子查询
select
  device_id,
  question_id,
  result
from
  question_practice_detail
where
  device_id IN (
    select
      device_id
    from
      user_profile
    where
      university = '浙江大学'
  )
#  2.多表连接 
select
  q.device_id,
  question_id,
  result
from
  question_practice_detail q
  inner join user_profile u on q.device_id = u.device_id
where
  university = '浙江大学'
  
  select
  university,
  count(question_id) / count(distinct qpd.device_id) as avg_answer_cnt
from
  question_practice_detail as qpd
  inner join user_profile as up on qpd.device_id = up.device_id
group by
  university

13.三张表关联(不推荐,性能不佳)

select
  university,
  difficult_level,
  round(
    count(qpd.question_id) / count(distinct qpd.device_id),
    4
  ) as avg_answer_cnt
from
  question_practice_detail as qpd
  left join user_profile as up on up.device_id = qpd.device_id
  left join question_detail as qd on qd.question_id = qpd.question_id
group by
  university,
  difficult_level
  # 第一段:select选择字段,含原生字段及构造字段,其中构造的过程中常用sum count round +-*/ 等函数及运算表达式
select
    university,
    difficult_level,
    round(count(qpd.question_id) / count(distinct qpd.device_id),4) as avg_answer_cnt

# 第二段:from选择表格,含合并表格操作
from question_practice_detail as qpd
    join user_profile as up
        on up.device_id=qpd.device_id
    join question_detail as qd
        on qd.question_id=qpd.question_id

# 第三段:数据操作段,含 分组/排序/筛选/取指定行数等操作
group by university,difficult_level;


14.union,union all(去重,不去重)

分别查看&结果不去重:所以直接使用两个条件的or是不行的,直接用union也不行,要用union all,分别去查满足条件1的和满足条件2的,然后合在一起不去重

# 结果不去重就用union all , 去重就用union
select 
  device_id,gender,age,gpa
from
  user_profile
where
  university = '山东大学'
union all
select
  device_id,gender,age,gpa
from
  user_profile
where
  gender = 'male'

15.case函数、if()函数

CASE函数
是一种多分支的函数,可以根据条件列表的值返回多个可能的结果表达式中的一个。
可用在任何允许使用表达式的地方,但不能单独作为一个语句执行。
分为:
简单CASE函数
搜索CASE函数
CASE 测试表达式
WHEN 简单表达式1 THEN 结果表达式1
WHEN 简单表达式2 THEN 结果表达式2 …
WHEN 简单表达式n THEN 结果表达式n
[ ELSE 结果表达式n+1 ]
END

  #case函数
  SELECT
    学号,
    课程号,
    CASE
      WHEN 成绩 >= 90 THEN '优'
      WHEN 成绩 BETWEEN 80 AND 89 THEN '良'
      WHEN 成绩 BETWEEN 70 AND 79 THEN '中'
      WHEN 成绩 BETWEEN 60 AND 69 THEN '及格'
      WHEN 成绩 < 60 THEN '不及格'
    END 成绩
  FROM
    成绩表
  WHERE
    课程号 = 'M01F011'

SELECT
    CASE
      WHEN age < 25 OR age IS NULL THEN '25岁以下'
      WHEN age >= 25 THEN '25岁及以上'
    END age_cut,
    COUNT(*) number
  FROM
    user_profile
  GROUP BY
    age_cut
#if()函数
SELECT
  IF(age >= 25, "25岁及以上", "25岁以下") AS age_cut,
  count(*) AS number
FROM
  user_profile
GROUP BY
  age_cut;

16.日期函数

DAYOFWEEK(date)
返回日期date的星期索引(1=星期天,2=星期一, ……7=星期六)。这些索引值对应于ODBC标准。

select DAYOFWEEK('1998-02-03')
-> 3

WEEKDAY(date)
返回date的星期索引(0=星期一,1=星期二, ……6= 星期天)。

mysql> select WEEKDAY('1997-10-04 22:23:00');
-> 5

DAYOFMONTH(date)
返回date的月份中日期,在131范围内。

mysql> select DAYOFMONTH('1998-02-03');
-> 3

DAYOFYEAR(date)
返回date在一年中的日数,1366范围内。

mysql> select DAYOFYEAR('1998-02-03');
-> 34

MONTH(date)
返回date的月份,范围112。

mysql> select MONTH('1998-02-03');
-> 2

DAYNAME(date)
返回date的星期名字。

mysql> select DAYNAME("1998-02-05");
-> 'Thursday'

MONTHNAME(date)
返回date的月份名字。

mysql> select MONTHNAME("1998-02-05");
-> 'February'

QUARTER(date)
返回date一年中的季度,范围14。

mysql> select QUARTER('98-04-01');
-> 2

WEEK(date)
对于星期天是一周的第一天的地方,有一个单个参数,返回date的周数,范围在052。

mysql> select WEEK('1998-02-20');
-> 7

WEEK(date,first)
2个参数形式WEEK()允许你指定星期是否开始于星期天或星期一。
如果第二个参数是0,星期从星期天开始,
如果第二个参数是1,从星期一开始。

mysql> select WEEK('1998-02-20',0);
-> 7

mysql> select WEEK('1998-02-20',1);
-> 8

YEAR(date)
返回date的年份,范围在10009999。

mysql> select YEAR('98-02-03');
-> 1998

HOUR(time)
返回time的小时,范围是023。

mysql> select HOUR('10:05:03');
-> 10

MINUTE(time)
返回time的分钟,范围是059。

mysql> select MINUTE('98-02-03 10:05:03');
-> 5

SECOND(time)
回来time的秒数,范围是059。

mysql> select SECOND('10:05:03');
-> 3

PERIOD_ADD(P,N)
增加N个月到阶段P(以格式YYMM或YYYYMM)。以格式YYYYMM返回值。注意阶段参数P不是日期值。

mysql> select PERIOD_ADD(9801,2);
-> 199803

PERIOD_DIFF(P1,P2)
返回在时期P1和P2之间月数,P1和P2应该以格式YYMM或YYYYMM。注意,时期参数P1和P2不是日期值。

mysql> select PERIOD_DIFF(9802,199703);
-> 11
select
  day(date) as day,
  count(question_id) as question_cnt
from
  question_practice_detail
where
  month(date) = 8
  and year(date) = 2021
group by
  date

16.lag()和lead()函数用法

<窗口函数> OVER ([PARTITION BY <用于分组的列>] ORDER BY <用于排序的列>)
lag和lead分别是向前向后的意思
参数有三个。expression:列名;offset:偏移量;default_value:超出记录窗口的默认值(默认为null,可以设置为0)

#lag和lead分别是向前向后的意思
#参数有三个。
#expression:列名;offset:偏移量;default_value:超出记录窗口的默认值(默认为null,可以设置为0)
select * from weather
#将温度向后退一行
SELECT
	id,
	date,
	temperature,
	LAG( temperature, 1, 0 ) OVER ( ORDER BY date ) AS temp 
FROM
	weather;
#查询出当天比前天温度高的id
SELECT
	id 
FROM
	(SELECT id, date, temperature, LAG( temperature, 1, 0) OVER ( ORDER BY date ) AS temp FROM weather) as tmp 
WHERE
	temperature > temp 
	AND temp != 0;
#将温度向前移一行
SELECT
	id,
	date,
	temperature,
	lead( temperature, 1, 0 ) OVER ( ORDER BY date ) AS temp 
FROM
	weather;
#查询出当天比后一天温度高的id
SELECT
	id 
FROM
	(SELECT id, date, temperature, lead( temperature, 1, 0) OVER ( ORDER BY date ) AS temp FROM weather) as tmp 
WHERE
	temperature > temp 
	AND temp != 0;

select count(date2) / count(date1) as avg_ret
from (
    select
        distinct qpd.device_id,
        qpd.date as date1,
        uniq_id_date.date as date2
    from question_practice_detail as qpd
    left join(
        select distinct device_id, date
        from question_practice_detail
    ) as uniq_id_date
    on qpd.device_id=uniq_id_date.device_id
        and date_add(qpd.date, interval 1 day)=uniq_id_date.date
) as id_last_next_date

select avg(if(datediff(date2, date1)=1, 1, 0)) as avg_ret
from (
    select
        distinct device_id,
        date as date1,
        lead(date) over (partition by device_id order by date) as date2
    from (
        select distinct device_id, date
        from question_practice_detail
    ) as uniq_id_date
) as id_last_next_date

17.group by分组函数,partition by分析函数

group by是分组函数,partition by是分析函数(然后像sum()等是聚合函数);
在执行顺序上,对于group by而言

from > where > group by > having > order by
对于partition by而言,在执行完select之后,在所得结果集之上进行partition。

在group by后的结果集上使用聚合函数,会作用在分组下的所有记录上。而如果在partition结果上聚合,千万注意聚合函数是逐条累计运行结果的!
group by将分组记录汇总成一条记录,具有去重效果
partition by显示所有数据,不会去重

#group by去重
SELECT
	id,
	date,
	sum( temperature ) 
FROM
	weather 
GROUP BY
	temperature 
	
#partition by不去重,组内排序PARTITION BY temperature ORDER BY date
SELECT
	id,
	date,
	temperature,
	sum( temperature ) over ( PARTITION BY temperature ORDER BY date ) 
FROM
	weather


select count(date2) / count(date1) as avg_ret
from (
    select
        distinct qpd.device_id,
        qpd.date as date1,
        uniq_id_date.date as date2
    from question_practice_detail as qpd
    left join(
        select distinct device_id, date
        from question_practice_detail
    ) as uniq_id_date
    on qpd.device_id=uniq_id_date.device_id
        and date_add(qpd.date, interval 1 day)=uniq_id_date.date
) as id_last_next_date

select avg(if(datediff(date2, date1)=1, 1, 0)) as avg_ret
from (
    select
        distinct device_id,
        date as date1,
        lead(date) over (partition by device_id order by date) as date2
    from (
        select distinct device_id, date
        from question_practice_detail
    ) as uniq_id_date
) as id_last_next_date

18.date_add()函数和datediff()函数

18.1DATE_ADD(date,INTERVAL expr unit)

// 得到当前时间增加1个小时的结果
select date_add(now(),interval 1 hour)

// 得到当前时间增加1天的结果
select date_add(now(),interval 1 day)

// 得到当前时间减少72个小时的结果
select date_add(now(),interval -72 hour)

// 还有一些更加高级的用法,比如增加一天一小时一分一秒
// 但是如果你不太喜欢这种用法,你仍然可以使用等同于该数值的second来进行加减
date_add(now(),interval '1 1:1:1' day_second)

18.2DATEDIFF(datepart,startdate,enddate)

select DATEDIFF(year,'2010-12-31','2019-01-01')
select DATEDIFF(Month,'2018-01-01','2019-01-01')
select DATEDIFF(Week,'2018-12-01','2018-12-31')
select DATEDIFF(day,'2018-12-01','2018-12-31')
select count(date2) / count(date1) as avg_ret
from (
    select
        distinct qpd.device_id,
        qpd.date as date1,
        uniq_id_date.date as date2
    from question_practice_detail as qpd
    left join(
        select distinct device_id, date
        from question_practice_detail
    ) as uniq_id_date
    on qpd.device_id=uniq_id_date.device_id
        and date_add(qpd.date, interval 1 day)=uniq_id_date.date
) as id_last_next_date

select avg(if(datediff(date2, date1)=1, 1, 0)) as avg_ret
from (
    select
        distinct device_id,
        date as date1,
        lead(date) over (partition by device_id order by date) as date2
    from (
        select distinct device_id, date
        from question_practice_detail
    ) as uniq_id_date
) as id_last_next_date

你可能感兴趣的:(笔记,sql,学习,mysql)