Mysql的版本为8.0.29
# 学生表
create table student
(
sid varchar(10),
sname varchar(32),
saged datetime,
ssex varchar(10)
) comment '学生表';
# 学生表插入数据
insert into student(sid,sname,saged,ssex)values('01' , '赵雷' , '1990-01-01' , '男');
insert into student(sid,sname,saged,ssex)values('02' , '钱电' , '1990-12-21' , '男');
insert into student(sid,sname,saged,ssex)values('03' , '孙风' , '1990-05-20' , '男');
insert into student(sid,sname,saged,ssex)values('04' , '李云' , '1990-08-06' , '男');
insert into student(sid,sname,saged,ssex)values('05' , '周梅' , '1991-12-01' , '女');
insert into student(sid,sname,saged,ssex)values('06' , '吴兰' , '1992-03-01' , '女');
insert into student(sid,sname,saged,ssex)values('07' , '郑竹' , '1989-07-01' , '女');
insert into student(sid,sname,saged,ssex)values('08' , '王菊' , '1990-01-20' , '女');
-- -------------------------------------------------------------
# 课程表
create table course
(
cid varchar(10),
cname varchar(32),
tid varchar(10)
) comment '课程表';
# 插入课程数据
insert into course(cid,cname,tid)values('01' , '语文' , '02');
insert into course(cid,cname,tid)values('02' , '数学' , '01');
insert into course(cid,cname,tid)values('03' , '英语' , '03');
-- --------------------------------------------------------
# 教师表
create table teacher
(
tid varchar(10),
tname varchar(32)
)comment '教师表';
# 插入教师表信息
insert into teacher(tid,tname)values('01' , '张三');
insert into teacher(tid,tname)values('02' , '李四');
insert into teacher(tid,tname)values('03' , '王五');
-- --------------------------------------------------
# 成绩表
create table sc
(
sid varchar(10),
cid varchar(10),
score int(30)
)comment '成绩表';
# 插入成绩信息
insert into SC values('01','01',80);
insert into SC values('01','02',90);
insert into SC values('01','03',99);
insert into SC values('02','01',70);
insert into SC values('02','02',60);
insert into SC values('02','03',80);
insert into SC values('03','01',80);
insert into SC values('03','02',80);
insert into SC values('03','03',80);
insert into SC values('04','01',50);
insert into SC values('04','02',30);
insert into SC values('04','03',20);
insert into SC values('05','01',76);
insert into SC values('05','02',87);
insert into SC values('06','01',31);
insert into SC values('06','03',34);
insert into SC values('07','02',89);
insert into SC values('07','03',98);
def main(args: Array[String]): Unit = {
val spark: SparkSession = SparkSession.builder().master("local[*]").appName("mysql").getOrCreate()
import spark.implicits._
val driver = "com.mysql.cj.jdbc.Driver"
val url = "jdbc:mysql://192.168.95.130:3306/mysql50"
val user = "root"
val password = "root"
val prop = new Properties()
prop.setProperty("driver",driver)
prop.setProperty("user",user)
prop.setProperty("password",password)
val courseTable = "course"
val scoreTable = "sc"
val studentTable = "student"
val teacherTable = "teacher"
val courseDf: DataFrame = spark.read.jdbc(url, courseTable, prop)
val scDf: DataFrame = spark.read.jdbc(url, scoreTable, prop)
val studentDf: DataFrame = spark.read.jdbc(url, studentTable, prop)
val teacherDf: DataFrame = spark.read.jdbc(url, teacherTable, prop)
}
}
scDf.as("s1").join(scDf.as("s2"), "sid")
.filter("s1.cid=01 and s2.cid=02 and s1.score>s2.score")
.join(studentDf, "sid").show
scDf.as("s1").join(scDf.as("s2"), "sid")
.filter("s1.cid=01 and s2.cid=02 and s1.score)
.join(studentDf, "sid").show
val frame: DataFrame = scDf.as("s1")
.groupBy("sid")
.avg("score")
.join(studentDf.as("s2"), "sid").filter($"avg(score)">=60)
frame.show()
studentDf.as("s1")
.join(scDf.as("s2").groupBy("sid").avg("score"),Seq("sid"),"left_outer")
.filter($"avg(score)"<60 ||$"avg(score)".isNull).show()
studentDf.join(scDf.groupBy("sid").count(), Seq("sid"), "left_outer")
.join(scDf.groupBy("sid").sum(), Seq("sid"), "left_outer").show()
val l: Long = teacherDf.filter($"tname".like("李%")).count()
println(l)
val value: Dataset[Row] = scDf.join(courseDf, "cid")
.join(teacherDf, "tid")
.join(studentDf, "sid")
.filter($"tname".equalTo("张三"))
value.show()
scDf.join(teacherDf.filter("tname ='张三' ")
.join(courseDf,"tid"),"cid")
.join(studentDf,Seq("sid"),"right_outer")
.filter($"tname" isNull)
.show()
studentDf.join(scDf.filter($"cid".equalTo("01")),"sid")
.join(scDf.filter($"cid".equalTo("02")),"sid").show()
studentDf.join(scDf.filter("cid=2"), Seq("sid"), "leftouter")
.where("cid is null")
.join(scDf.filter("cid=1"), Seq("sid")).show()
studentDf.join(scDf,Seq("sid"),"left_outer")
.groupBy("sid")
.count()
.where("count<3")
.join(studentDf,"sid").show()
studentDf.join(scDf, "sid").as("s1")
.join(scDf.where("sid=01"), "cid")
.select("s1.sid")
.distinct()
.where("sid!=01")
.join(studentDf, "sid").show
scDf.where("sid=01").as("s1")
.join(scDf.as("s2"), "cid")
.groupBy("s2.sid").count().as("s3")
.where(s"count=${scDf.where("sid=01").count()} and sid!=01")
.join(studentDf, "sid")show()
studentDf.join(scDf,"sid")
.join(courseDf,"cid")
.join(teacherDf.where("tname='张三'"),"tid").as("a")
.select("sid")
.join(studentDf.as("b"),Seq("sid"),"right_outer")
.where("a.sid is null")
.select("sname").show()
scDf.where("score<60").groupBy("sid").count()
.where("count>=2")
.join(scDf,"sid").groupBy("sid").avg("score")
.join(studentDf,"sid").show()
scDf.where("cid=01")
.join(studentDf,Seq("sid"),"right_outer")
.where("score<60 or score is null")
.orderBy($"score".desc).show()
scDf.join(scDf.groupBy("sid").avg("score"),Seq("sid"),"left_outer")
.join(studentDf,"sid").orderBy($"avg(score)".desc).show()
import org.apache.spark.sql.functions._
val s1: DataFrame = scDf
.groupBy("cid")
.agg(max("score").as("maxscore")
, min("score").as("minscore")
, avg("score").as("avgscore")
, count("score").as("num")
)
//及格率
val jige: DataFrame = scDf.rdd.map(
x => {
if (x.getAs("score").toString.toInt > 60) (x(1).toString, 1) else (x(1).toString, 0)
}).reduceByKey(_ + _).toDF("cid", "jige")
//中等率
val zhongdeng: DataFrame = scDf.rdd.map(
x => {
if (x.getAs("score").toString.toInt > 70) (x(1).toString, 1) else (x(1).toString, 0)
}).reduceByKey(_ + _).toDF("cid", "zhongdeng")
//优良率
val youliang: DataFrame = scDf.rdd.map(
x => {
if (x.getAs("score").toString.toInt > 80) (x(1).toString, 1) else (x(1).toString, 0)
}).reduceByKey(_ + _).toDF("cid", "youliang")
//优秀率
val youxiu: DataFrame = scDf.rdd.map(
x => {
if (x.getAs("score").toString.toInt > 90) (x(1).toString, 1) else (x(1).toString, 0)
}).reduceByKey(_ + _).toDF("cid", "youxiu")
//联表
s1.join(jige,"cid")
.join(zhongdeng,"cid")
.join(youliang,"cid")
.join(youxiu,"cid")
.withColumn("jigelv",col("jige")/col("num"))
.withColumn("zhongdenglv",col("zhongdeng")/col("num"))
.withColumn("youlianglv",col("youliang")/col("num"))
.withColumn("youxiulv",col("youxiu")/col("num"))
.drop("jige","zhongdeng","youxiu","youliang").show()
scDf.selectExpr("*","row_number() over(partition by cid order by score desc)").show()
scDf.selectExpr("*","sum(score) over(partition by sid) as sumscore")
.dropDuplicates("sid")
.selectExpr("*","row_number() over(order by sumscore desc)").show()
scDf.join(courseDf,"cid")
.join(teacherDf,"tid")
.groupBy("tid","cid")
.avg("score")
.orderBy($"avg(score)".desc).show()
scDf.selectExpr("*","row_number() over(partition by cid order by score desc) num")
.where("num between 2 and 3")
.join(studentDf,"sid").show()
//分段
val fenduan = scDf.rdd.map(x=>{
if(x.getAs("score").toString.toInt < 60) (x(1).toString,1)
else if(x.getAs("score").toString.toInt < 70) (x(1).toString,2)
else if(x.getAs("score").toString.toInt < 85) (x(1).toString,3)
else (x(1).toString,4)
}).toDF("cid","fenduan")
//联表
fenduan.groupBy("cid").count.as("f1")
.join(fenduan.groupBy("cid","fenduan").count.as("f2"),"cid")
.withColumn("percent",$"f2.count"/$"f1.count")
.drop($"f1.count")
.join(courseDf,"cid").show()
scDf.groupBy("sid")
.avg("score")
.selectExpr("*",s"row_number() over(order by 'avg(score)')")
.show()
scDf.selectExpr("*","row_number() over(partition by cid order by score desc) num")
.where("num<=3").show()
scDf.groupBy("cid").count().show()
scDf.groupBy("sid").count().where("count=2")
.join(studentDf,"sid").show()
studentDf.groupBy("ssex").count().show()
studentDf.where("sname like '%风%'").show()
studentDf.groupBy("sname").count()
.where("count>1").show()
studentDf.where("year(saged)=1990").show()
scDf.groupBy("cid").avg("score").orderBy($"avg(score)".desc,$"cid").show()
scDf.groupBy("sid").avg("score")
.where("avg(score)>=85")
.join(studentDf,"sid").show()
scDf.where("score<60")
.join(courseDf,"cid")
.where("cname='数学'")
.join(studentDf,"sid").show()
studentDf.join(scDf,Seq("sid"),"left_outer").show()
scDf.where("score>70")
.join(studentDf,"sid")
.join(courseDf,"cid").show()
scDf.where("score<60 or score is null")
.join(studentDf,"sid").show()
scDf.where("cid=01 and score>80")
.join(studentDf,"sid").show()
scDf.groupBy("cid").count().show()
scDf.join(studentDf,"sid")
.join(courseDf,"cid")
.join(teacherDf,"tid")
.where("tname='张三'")
.join(studentDf,"sid")
.selectExpr("*","max(score) over() max")
.where("max=score").show()
scDf.as("s1")
.join(scDf.as("s2"),"sid")
.where("s1.score=s2.score and s1.cid!=s2.cid").show()
scDf.selectExpr("*","row_number() over(partition by cid order by score desc)rank")
.where("rank<=3").show()
scDf.groupBy("cid").count()
.where("count>=5")
.orderBy($"count".desc)
.orderBy("cid").show()
scDf.groupBy("sid").count()
.where("count>=2").show()
studentDf.join(scDf,Seq("sid"),"left_outer")
.groupBy("sid").count()
.where(s"count=${courseDf.select("cid").count() }")
.join(studentDf,"sid").show()
studentDf.selectExpr("*","year(current_date)-year(saged)").show()
studentDf.where(
"unix_timestamp(cast(concat_ws('-',date_format(current_date(),'yyyy')," +
"date_format(saged,'MM'),date_format(saged,'dd'))as date),'yyyy-MM-dd')" +
" between unix_timestamp(current_date()) " +
"and unix_timestamp(date_sub(next_day(current_date(),'MON'),1),'yyyy-MM-dd')").show()
studentDf.where(
" unix_timestamp( cast( concat_ws('-',date_format(current_date(),'yyyy')" +
",date_format(saged,'MM'),date_format(saged,'dd') ) as date ),'yyyy-MM-dd') " +
"between unix_timestamp(date_sub(next_day(current_date(),'MON'),1),'yyyy-MM-dd') " +
"and unix_timestamp(date_add(next_day(current_date(),'MON'),6),'yyyy-MM-dd') ").show()
studentDf.where("month(saged)=month(current_date)").show()
studentDf.where("month(saged)=month(current_date)+1").show()