如果你是SBT工程,请加入以下依赖到build.sbt文件中:
1
|
libraryDependencies +=
"com.databricks"
%
"spark-csv_2.10"
%
"1.3.0"
|
1
2
3
4
5
|
spark-csv_2.
10
1.3
.
0
|
1
2
|
import
org.apache.spark.SparkConf
val conf =
new
SparkConf().setAppName(
"csvDataFrame"
).setMaster(
"local[2]"
)
|
1
|
val sc =
new
SparkContext(conf)
|
1
|
val sqlContext=
new
SQLContext(sc)
|
1
2
|
import
com.databricks.spark.csv._
val students=sqlContext.csvFile(filePath=
"StudentData.csv"
, useHeader=
true
, delimiter=
'|'
)
|
1
2
|
val options = Map(
"header"
->
"true"
,
"path"
->
"E:\\StudentData.csv"
)
val newStudents = sqlContext.read.options(options).format(
"com.databricks.spark.csv"
).load()
|
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
|
id|studentName|phone|email
1
|Burke|
1
-
300
-
746
-
8446
|ullamcorper.velit.in
@ametnullaDonec
.co.uk
2
|Kamal|
1
-
668
-
571
-
5046
|pede.Suspendisse
@interdumenim
.edu
3
|Olga|
1
-
956
-
311
-
1686
|Aenean.eget.metus
@dictumcursusNunc
.edu
4
|Belle|
1
-
246
-
894
-
6340
|vitae.aliquet.nec
@neque
.co.uk
5
|Trevor|
1
-
300
-
527
-
4967
|dapibus.id
@acturpisegestas
.net
6
|Laurel|
1
-
691
-
379
-
9921
|adipiscing
@consectetueripsum
.edu
7
|Sara|
1
-
608
-
140
-
1995
|Donec.nibh
@enimEtiamimperdiet
.edu
8
|Kaseem|
1
-
881
-
586
-
2689
|cursus.et.magna
@euismod
.org
9
|Lev|
1
-
916
-
367
-
5608
|Vivamus.nisi
@ipsumdolor
.com
10
|Maya|
1
-
271
-
683
-
2698
|accumsan.convallis
@ornarelectusjusto
.edu
11
|Emi|
1
-
467
-
270
-
1337
|est
@nunc
.com
12
|Caleb|
1
-
683
-
212
-
0896
|Suspendisse
@Quisque
.edu
13
|Florence|
1
-
603
-
575
-
2444
|sit.amet.dapibus
@lacusAliquamrutrum
.ca
14
|Anika|
1
-
856
-
828
-
7883
|euismod
@ligulaelit
.co.uk
15
|Tarik|
1
-
398
-
171
-
2268
|turpis
@felisorci
.com
16
|Amena|
1
-
878
-
250
-
3129
|lorem.luctus.ut
@scelerisque
.com
17
|Blossom|
1
-
154
-
406
-
9596
|Nunc.commodo.auctor
@eratSed
.co.uk
18
|Guy|
1
-
869
-
521
-
3230
|senectus.et.netus
@lectusrutrum
.com
19
|Malachi|
1
-
608
-
637
-
2772
|Proin.mi.Aliquam
@estarcu
.net
20
|Edward|
1
-
711
-
710
-
6552
|lectus
@aliquetlibero
.co.uk
|
1
|
case
class
Employee(id: Int, name: String)
|
1
2
3
|
val conf =
new
SparkConf().setAppName(
"colRowDataFrame"
). setMaster(
"local[2]"
)
val sc =
new
SparkContext(conf)
val sqlContext =
new
SQLContext(sc)
|
1
|
val listOfEmployees = List(Employee(
1
,
"iteblog"
), Employee(
2
,
"Jason"
), Employee(
3
,
"Abhi"
))
|
1
2
3
4
5
|
val empFrame = sqlContext.createDataFrame(listOfEmployees)
empFrame.printSchema
root
|-- id: integer (nullable =
false
)
|-- name: string (nullable =
true
)
|
1
2
3
4
5
6
|
val empFrameWithRenamedColumns = sqlContext.createDataFrame(listOfEmployees).withColumnRenamed(
"id"
,
"empId"
)
empFrameWithRenamedColumns.printSchema
root
|-- empId: integer (nullable =
false
)
|-- name: string (nullable =
true
)
|
1
|
empFrameWithRenamedColumns.registerTempTable(
"employeeTable"
)
|
1
2
3
4
5
6
7
8
9
|
val sortedByNameEmployees = sqlContext.sql(
"select * from employeeTable order by name desc"
)
sortedByNameEmployees.show()
+-----+-------+
|empId| name|
+-----+-------+
|
1
|iteblog|
|
2
| Jason|
|
3
| Abhi|
+-----+-------+
|
1
|
def createDataFrame[A <: Product : TypeTag](rdd: RDD[A]): DataFrame
|
1
2
3
4
5
6
7
8
9
10
11
12
|
val mobiles=sqlContext.createDataFrame(Seq((
1
,
"Android"
), (
2
,
"iPhone"
))) mobiles.printSchema mobiles.show()
root
|-- _1: integer (nullable =
false
)
|-- _2: string (nullable =
true
)
+---+-------+
| _1| _2|
+---+-------+
|
1
|Android|
|
2
| iPhone|
+---+-------+
|
1
2
3
4
5
6
|
students.printSchema
root
|-- id: string (nullable =
true
)
|-- studentName: string (nullable =
true
)
|-- phone: string (nullable =
true
)
|-- email: string (nullable =
true
)
|
如果采用的是load方式参见DataFrame的,students.printSchema的输出则如下:
1
2
|
root
|-- id|studentName|phone|email: string (nullable =
true
)
|
Show函数和其他函数不同的地方在于其不仅会显示需要打印的行,而且还会打印出头信息,并且会直接在默认的输出流打出(console)。来看看怎么使用吧:
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
|
students.show()
//打印出20行
+---+-----------+--------------+--------------------+
| id|studentName| phone| email|
+---+-----------+--------------+--------------------+
|
1
| Burke|
1
-
300
-
746
-
8446
|ullamcorper.velit...|
|
2
| Kamal|
1
-
668
-
571
-
5046
|pede.Suspendisse@...|
|
3
| Olga|
1
-
956
-
311
-
1686
|Aenean.eget.metus...|
|
4
| Belle|
1
-
246
-
894
-
6340
|vitae.aliquet.nec...|
|
5
| Trevor|
1
-
300
-
527
-
4967
|dapibus.id
@acturp
...|
|
6
| Laurel|
1
-
691
-
379
-
9921
|adipiscing
@consec
...|
|
7
| Sara|
1
-
608
-
140
-
1995
|Donec.nibh
@enimEt
...|
|
8
| Kaseem|
1
-
881
-
586
-
2689
|cursus.et.magna
@e
...|
|
9
| Lev|
1
-
916
-
367
-
5608
|Vivamus.nisi
@ipsu
...|
|
10
| Maya|
1
-
271
-
683
-
2698
|accumsan.convalli...|
|
11
| Emi|
1
-
467
-
270
-
1337
| est
@nunc
.com|
|
12
| Caleb|
1
-
683
-
212
-
0896
|Suspendisse
@Quisq
...|
|
13
| Florence|
1
-
603
-
575
-
2444
|sit.amet.dapibus@...|
|
14
| Anika|
1
-
856
-
828
-
7883
|euismod
@ligulaeli
...|
|
15
| Tarik|
1
-
398
-
171
-
2268
|turpis
@felisorci
.com|
|
16
| Amena|
1
-
878
-
250
-
3129
|lorem.luctus.ut
@s
...|
|
17
| Blossom|
1
-
154
-
406
-
9596
|Nunc.commodo.auct...|
|
18
| Guy|
1
-
869
-
521
-
3230
|senectus.et.netus...|
|
19
| Malachi|
1
-
608
-
637
-
2772
|Proin.mi.Aliquam@...|
|
20
| Edward|
1
-
711
-
710
-
6552
|lectus
@aliquetlib
...|
+---+-----------+--------------+--------------------+
only showing top
20
rows
students.show(
15
)
+---+-----------+--------------+--------------------+
| id|studentName| phone| email|
+---+-----------+--------------+--------------------+
|
1
| Burke|
1
-
300
-
746
-
8446
|ullamcorper.velit...|
|
2
| Kamal|
1
-
668
-
571
-
5046
|pede.Suspendisse@...|
|
3
| Olga|
1
-
956
-
311
-
1686
|Aenean.eget.metus...|
|
4
| Belle|
1
-
246
-
894
-
6340
|vitae.aliquet.nec...|
|
5
| Trevor|
1
-
300
-
527
-
4967
|dapibus.id
@acturp
...|
|
6
| Laurel|
1
-
691
-
379
-
9921
|adipiscing
@consec
...|
|
7
| Sara|
1
-
608
-
140
-
1995
|Donec.nibh
@enimEt
...|
|
8
| Kaseem|
1
-
881
-
586
-
2689
|cursus.et.magna
@e
...|
|
9
| Lev|
1
-
916
-
367
-
5608
|Vivamus.nisi
@ipsu
...|
|
10
| Maya|
1
-
271
-
683
-
2698
|accumsan.convalli...|
|
11
| Emi|
1
-
467
-
270
-
1337
| est
@nunc
.com|
|
12
| Caleb|
1
-
683
-
212
-
0896
|Suspendisse
@Quisq
...|
|
13
| Florence|
1
-
603
-
575
-
2444
|sit.amet.dapibus@...|
|
14
| Anika|
1
-
856
-
828
-
7883
|euismod
@ligulaeli
...|
|
15
| Tarik|
1
-
398
-
171
-
2268
|turpis
@felisorci
.com|
+---+-----------+--------------+--------------------+
only showing top
15
rows
students.show(
true
)
+---+-----------+--------------+--------------------+
| id|studentName| phone| email|
+---+-----------+--------------+--------------------+
|
1
| Burke|
1
-
300
-
746
-
8446
|ullamcorper.velit...|
|
2
| Kamal|
1
-
668
-
571
-
5046
|pede.Suspendisse@...|
|
3
| Olga|
1
-
956
-
311
-
1686
|Aenean.eget.metus...|
|
4
| Belle|
1
-
246
-
894
-
6340
|vitae.aliquet.nec...|
|
5
| Trevor|
1
-
300
-
527
-
4967
|dapibus.id
@acturp
...|
|
6
| Laurel|
1
-
691
-
379
-
9921
|adipiscing
@consec
...|
|
7
| Sara|
1
-
608
-
140
-
1995
|Donec.nibh
@enimEt
...|
|
8
| Kaseem|
1
-
881
-
586
-
2689
|cursus.et.magna
@e
...|
|
9
| Lev|
1
-
916
-
367
-
5608
|Vivamus.nisi
@ipsu
...|
|
10
| Maya|
1
-
271
-
683
-
2698
|accumsan.convalli...|
|
11
| Emi|
1
-
467
-
270
-
1337
| est
@nunc
.com|
|
12
| Caleb|
1
-
683
-
212
-
0896
|Suspendisse
@Quisq
...|
|
13
| Florence|
1
-
603
-
575
-
2444
|sit.amet.dapibus@...|
|
14
| Anika|
1
-
856
-
828
-
7883
|euismod
@ligulaeli
...|
|
15
| Tarik|
1
-
398
-
171
-
2268
|turpis
@felisorci
.com|
|
16
| Amena|
1
-
878
-
250
-
3129
|lorem.luctus.ut
@s
...|
|
17
| Blossom|
1
-
154
-
406
-
9596
|Nunc.commodo.auct...|
|
18
| Guy|
1
-
869
-
521
-
3230
|senectus.et.netus...|
|
19
| Malachi|
1
-
608
-
637
-
2772
|Proin.mi.Aliquam@...|
|
20
| Edward|
1
-
711
-
710
-
6552
|lectus
@aliquetlib
...|
+---+-----------+--------------+--------------------+
only showing top
20
rows
students.show(
false
)
+---+-----------+--------------+-----------------------------------------+
|id |studentName|phone |email |
+---+-----------+--------------+-----------------------------------------+
|
1
|Burke |
1
-
300
-
746
-
8446
|ullamcorper.velit.in
@ametnullaDonec
.co.uk|
|
2
|Kamal |
1
-
668
-
571
-
5046
|pede.Suspendisse
@interdumenim
.edu |
|
3
|Olga |
1
-
956
-
311
-
1686
|Aenean.eget.metus
@dictumcursusNunc
.edu |
|
4
|Belle |
1
-
246
-
894
-
6340
|vitae.aliquet.nec
@neque
.co.uk |
|
5
|Trevor |
1
-
300
-
527
-
4967
|dapibus.id
@acturpisegestas
.net |
|
6
|Laurel |
1
-
691
-
379
-
9921
|adipiscing
@consectetueripsum
.edu |
|
7
|Sara |
1
-
608
-
140
-
1995
|Donec.nibh
@enimEtiamimperdiet
.edu |
|
8
|Kaseem |
1
-
881
-
586
-
2689
|cursus.et.magna
@euismod
.org |
|
9
|Lev |
1
-
916
-
367
-
5608
|Vivamus.nisi
@ipsumdolor
.com |
|
10
|Maya |
1
-
271
-
683
-
2698
|accumsan.convallis
@ornarelectusjusto
.edu |
|
11
|Emi |
1
-
467
-
270
-
1337
|est
@nunc
.com |
|
12
|Caleb |
1
-
683
-
212
-
0896
|Suspendisse
@Quisque
.edu |
|
13
|Florence |
1
-
603
-
575
-
2444
|sit.amet.dapibus
@lacusAliquamrutrum
.ca |
|
14
|Anika |
1
-
856
-
828
-
7883
|euismod
@ligulaelit
.co.uk |
|
15
|Tarik |
1
-
398
-
171
-
2268
|turpis
@felisorci
.com |
|
16
|Amena |
1
-
878
-
250
-
3129
|lorem.luctus.ut
@scelerisque
.com |
|
17
|Blossom |
1
-
154
-
406
-
9596
|Nunc.commodo.auctor
@eratSed
.co.uk |
|
18
|Guy |
1
-
869
-
521
-
3230
|senectus.et.netus
@lectusrutrum
.com |
|
19
|Malachi |
1
-
608
-
637
-
2772
|Proin.mi.Aliquam
@estarcu
.net |
|
20
|Edward |
1
-
711
-
710
-
6552
|lectus
@aliquetlibero
.co.uk |
+---+-----------+--------------+-----------------------------------------+
only showing top
20
rows
students.show(
10
,
false
)
+---+-----------+--------------+-----------------------------------------+
|id |studentName|phone |email |
+---+-----------+--------------+-----------------------------------------+
|
1
|Burke |
1
-
300
-
746
-
8446
|ullamcorper.velit.in
@ametnullaDonec
.co.uk|
|
2
|Kamal |
1
-
668
-
571
-
5046
|pede.Suspendisse
@interdumenim
.edu |
|
3
|Olga |
1
-
956
-
311
-
1686
|Aenean.eget.metus
@dictumcursusNunc
.edu |
|
4
|Belle |
1
-
246
-
894
-
6340
|vitae.aliquet.nec
@neque
.co.uk |
|
5
|Trevor |
1
-
300
-
527
-
4967
|dapibus.id
@acturpisegestas
.net |
|
6
|Laurel |
1
-
691
-
379
-
9921
|adipiscing
@consectetueripsum
.edu |
|
7
|Sara |
1
-
608
-
140
-
1995
|Donec.nibh
@enimEtiamimperdiet
.edu |
|
8
|Kaseem |
1
-
881
-
586
-
2689
|cursus.et.magna
@euismod
.org |
|
9
|Lev |
1
-
916
-
367
-
5608
|Vivamus.nisi
@ipsumdolor
.com |
|
10
|Maya |
1
-
271
-
683
-
2698
|accumsan.convallis
@ornarelectusjusto
.edu |
+---+-----------+--------------+-----------------------------------------+
only showing top
10
rows
|
1
2
3
4
5
6
7
8
|
students.head(
5
).foreach(println)
[
1
,Burke,
1
-
300
-
746
-
8446
,ullamcorper.velit.in
@ametnullaDonec
.co.uk]
[
2
,Kamal,
1
-
668
-
571
-
5046
,pede.Suspendisse
@interdumenim
.edu]
[
3
,Olga,
1
-
956
-
311
-
1686
,Aenean.eget.metus
@dictumcursusNunc
.edu]
[
4
,Belle,
1
-
246
-
894
-
6340
,vitae.aliquet.nec
@neque
.co.uk]
[
5
,Trevor,
1
-
300
-
527
-
4967
,dapibus.id
@acturpisegestas
.net]
println(students.head())
[
1
,Burke,
1
-
300
-
746
-
8446
,ullamcorper.velit.in
@ametnullaDonec
.co.uk]
|
1
2
3
4
5
6
7
8
|
println(students.first())
[
1
,Burke,
1
-
300
-
746
-
8446
,ullamcorper.velit.in
@ametnullaDonec
.co.uk]
students.take(
5
).foreach(println)
[
1
,Burke,
1
-
300
-
746
-
8446
,ullamcorper.velit.in
@ametnullaDonec
.co.uk]
[
2
,Kamal,
1
-
668
-
571
-
5046
,pede.Suspendisse
@interdumenim
.edu]
[
3
,Olga,
1
-
956
-
311
-
1686
,Aenean.eget.metus
@dictumcursusNunc
.edu]
[
4
,Belle,
1
-
246
-
894
-
6340
,vitae.aliquet.nec
@neque
.co.uk]
[
5
,Trevor,
1
-
300
-
527
-
4967
,dapibus.id
@acturpisegestas
.net]
|
1
|
val emailDataFrame: DataFrame = students.select(
"email"
)
|
1
2
3
4
5
6
7
8
9
|
emailDataFrame.show(
3
)
+--------------------+
| email|
+--------------------+
|ullamcorper.velit...|
|pede.Suspendisse@...|
|Aenean.eget.metus...|
+--------------------+
only showing top
3
rows
|
1
2
3
4
5
6
7
8
9
10
11
12
|
val studentEmailDF = students.select(
"studentName"
,
"email"
)
studentEmailDF.show(
5
)
+-----------+--------------------+
|studentName| email|
+-----------+--------------------+
| Burke|ullamcorper.velit...|
| Kamal|pede.Suspendisse@...|
| Olga|Aenean.eget.metus...|
| Belle|vitae.aliquet.nec...|
| Trevor|dapibus.id
@acturp
...|
+-----------+--------------------+
only showing top
5
rows
|
1
2
3
4
|
val studentEmailDF = students.select(
"studentName"
,
"iteblog"
)
studentEmailDF.show(
5
)
Exception in thread
"main"
org.apache.spark.sql.AnalysisException: cannot resolve
'iteblog'
given input columns id, studentName, phone, email;
|
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
|
students.filter(
"id > 5"
).show(
7
)
+---+-----------+--------------+--------------------+
| id|studentName| phone| email|
+---+-----------+--------------+--------------------+
|
6
| Laurel|
1
-
691
-
379
-
9921
|adipiscing
@consec
...|
|
7
| Sara|
1
-
608
-
140
-
1995
|Donec.nibh
@enimEt
...|
|
8
| Kaseem|
1
-
881
-
586
-
2689
|cursus.et.magna
@e
...|
|
9
| Lev|
1
-
916
-
367
-
5608
|Vivamus.nisi
@ipsu
...|
|
10
| Maya|
1
-
271
-
683
-
2698
|accumsan.convalli...|
|
11
| Emi|
1
-
467
-
270
-
1337
| est
@nunc
.com|
|
12
| Caleb|
1
-
683
-
212
-
0896
|Suspendisse
@Quisq
...|
|
13
| Florence|
1
-
603
-
575
-
2444
|sit.amet.dapibus@...|
|
14
| Anika|
1
-
856
-
828
-
7883
|euismod
@ligulaeli
...|
|
15
| Tarik|
1
-
398
-
171
-
2268
|turpis
@felisorci
.com|
+---+-----------+--------------+--------------------+
only showing top
10
rows
students.filter(
"studentName =''"
).show(
7
)
+---+-----------+--------------+--------------------+
| id|studentName| phone| email|
+---+-----------+--------------+--------------------+
|
21
| |
1
-
598
-
439
-
7549
|consectetuer.adip...|
|
32
| |
1
-
184
-
895
-
9602
|accumsan.laoreet@...|
|
45
| |
1
-
245
-
752
-
0481
|Suspendisse.eleif...|
|
83
| |
1
-
858
-
810
-
2204
|sociis.natoque
@eu
...|
|
94
| |
1
-
443
-
410
-
7878
|Praesent.eu.nulla...|
+---+-----------+--------------+--------------------+
|
1
2
3
4
5
6
7
8
9
10
11
|
students.filter(
"studentName ='' OR studentName = 'NULL'"
).show(
7
)
+---+-----------+--------------+--------------------+
| id|studentName| phone| email|
+---+-----------+--------------+--------------------+
|
21
| |
1
-
598
-
439
-
7549
|consectetuer.adip...|
|
32
| |
1
-
184
-
895
-
9602
|accumsan.laoreet@...|
|
33
| NULL|
1
-
105
-
503
-
0141
|Donec
@Inmipede
.co.uk|
|
45
| |
1
-
245
-
752
-
0481
|Suspendisse.eleif...|
|
83
| |
1
-
858
-
810
-
2204
|sociis.natoque
@eu
...|
|
94
| |
1
-
443
-
410
-
7878
|Praesent.eu.nulla...|
+---+-----------+--------------+--------------------+
|
1
2
3
4
5
6
7
8
9
10
11
12
13
|
students.filter(
"SUBSTR(studentName,0,1) ='M'"
).show(
7
)
+---+-----------+--------------+--------------------+
| id|studentName| phone| email|
+---+-----------+--------------+--------------------+
|
10
| Maya|
1
-
271
-
683
-
2698
|accumsan.convalli...|
|
19
| Malachi|
1
-
608
-
637
-
2772
|Proin.mi.Aliquam@...|
|
24
| Marsden|
1
-
477
-
629
-
7528
|Donec.dignissim.m...|
|
37
| Maggy|
1
-
910
-
887
-
6777
|facilisi.Sed.nequ...|
|
61
| Maxine|
1
-
422
-
863
-
3041
|aliquet.molestie....|
|
77
| Maggy|
1
-
613
-
147
-
4380
| pellentesque
@mi
.net|
|
97
| Maxwell|
1
-
607
-
205
-
1273
|metus.In
@musAenea
...|
+---+-----------+--------------+--------------------+
only showing top
7
rows
|
1
2
3
4
5
6
7
8
9
10
11
12
13
|
students.sort(students(
"studentName"
).desc).show(
7
)
+---+-----------+--------------+--------------------+
| id|studentName| phone| email|
+---+-----------+--------------+--------------------+
|
50
| Yasir|
1
-
282
-
511
-
4445
|eget.odio.Aliquam...|
|
52
| Xena|
1
-
527
-
990
-
8606
|in.faucibus.orci@...|
|
86
| Xandra|
1
-
677
-
708
-
5691
|libero
@arcuVestib
...|
|
43
| Wynter|
1
-
440
-
544
-
1851
|amet.risus.Donec@...|
|
31
| Wallace|
1
-
144
-
220
-
8159
| lorem.lorem
@non
.net|
|
66
| Vance|
1
-
268
-
680
-
0857
|pellentesque
@netu
...|
|
41
| Tyrone|
1
-
907
-
383
-
5293
|non.bibendum.sed@...|
+---+-----------+--------------+--------------------+
only showing top
7
rows
|
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
|
students.sort(
"studentName"
,
"id"
).show(
10
)
+---+-----------+--------------+--------------------+
| id|studentName| phone| email|
+---+-----------+--------------+--------------------+
|
21
| |
1
-
598
-
439
-
7549
|consectetuer.adip...|
|
32
| |
1
-
184
-
895
-
9602
|accumsan.laoreet@...|
|
45
| |
1
-
245
-
752
-
0481
|Suspendisse.eleif...|
|
83
| |
1
-
858
-
810
-
2204
|sociis.natoque
@eu
...|
|
94
| |
1
-
443
-
410
-
7878
|Praesent.eu.nulla...|
|
91
| Abel|
1
-
530
-
527
-
7467
| urna
@veliteu
.edu|
|
69
| Aiko|
1
-
682
-
230
-
7013
|turpis.vitae.puru...|
|
47
| Alma|
1
-
747
-
382
-
6775
| nec.enim
@non
.org|
|
26
| Amela|
1
-
526
-
909
-
2605
| in
@vitaesodales
.edu|
|
16
| Amena|
1
-
878
-
250
-
3129
|lorem.luctus.ut
@s
...|
+---+-----------+--------------+--------------------+
only showing top
10
rows
|
1
|
students.sort(students(
"studentName"
).asc, students(
"id"
).asc).show(
10
)
|
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
|
students.select(students(
"studentName"
).as(
"name"
), students(
"email"
)).show(
10
)
+--------+--------------------+
| name| email|
+--------+--------------------+
| Burke|ullamcorper.velit...|
| Kamal|pede.Suspendisse@...|
| Olga|Aenean.eget.metus...|
| Belle|vitae.aliquet.nec...|
| Trevor|dapibus.id
@acturp
...|
| Laurel|adipiscing
@consec
...|
| Sara|Donec.nibh
@enimEt
...|
| Kaseem|cursus.et.magna
@e
...|
| Lev|Vivamus.nisi
@ipsu
...|
| Maya|accumsan.convalli...|
+--------+--------------------+
only showing top
10
rows
|
1
|
students.registerTempTable(
"students"
)
|
1
2
3
4
5
6
7
8
9
10
11
12
13
14
|
sqlContext.sql(
"select * from students where studentName!='' order by email desc"
).show(
7
)
+---+-----------+--------------+--------------------+
| id|studentName| phone| email|
+---+-----------+--------------+--------------------+
|
87
| Selma|
1
-
601
-
330
-
4409
|vulputate.velit
@p
...|
|
96
| Channing|
1
-
984
-
118
-
7533
|viverra.Donec.tem...|
|
4
| Belle|
1
-
246
-
894
-
6340
|vitae.aliquet.nec...|
|
78
| Finn|
1
-
213
-
781
-
6969
|vestibulum.massa@...|
|
53
| Kasper|
1
-
155
-
575
-
9346
|velit.eget
@pedeCu
...|
|
63
| Dylan|
1
-
417
-
943
-
8961
|vehicula.aliquet@...|
|
35
| Cadman|
1
-
443
-
642
-
5919
|ut.lacus
@adipisci
...|
+---+-----------+--------------+--------------------+
only showing top
7
rows
|
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
|
val students1 = sqlContext.csvFile(filePath =
"E:\\StudentPrep1.csv"
, useHeader =
true
, delimiter =
'|'
)
val students2 = sqlContext.csvFile(filePath =
"E:\\StudentPrep2.csv"
, useHeader =
true
, delimiter =
'|'
)
val studentsJoin = students1.join(students2, students1(
"id"
) === students2(
"id"
))
studentsJoin.show(studentsJoin.count.toInt)
+---+-----------+--------------+--------------------+---+------------------+--------------+--------------------+
| id|studentName| phone| email| id| studentName| phone| email|
+---+-----------+--------------+--------------------+---+------------------+--------------+--------------------+
|
1
| Burke|
1
-
300
-
746
-
8446
|ullamcorper.velit...|
1
|BurkeDifferentName|
1
-
300
-
746
-
8446
|ullamcorper.velit...|
|
2
| Kamal|
1
-
668
-
571
-
5046
|pede.Suspendisse@...|
2
|KamalDifferentName|
1
-
668
-
571
-
5046
|pede.Suspendisse@...|
|
3
| Olga|
1
-
956
-
311
-
1686
|Aenean.eget.metus...|
3
| Olga|
1
-
956
-
311
-
1686
|Aenean.eget.metus...|
|
4
| Belle|
1
-
246
-
894
-
6340
|vitae.aliquet.nec...|
4
|BelleDifferentName|
1
-
246
-
894
-
6340
|vitae.aliquet.nec...|
|
5
| Trevor|
1
-
300
-
527
-
4967
|dapibus.id
@acturp
...|
5
| Trevor|
1
-
300
-
527
-
4967
|dapibusDifferentE...|
|
6
| Laurel|
1
-
691
-
379
-
9921
|adipiscing
@consec
...|
6
|LaurelInvalidPhone|
000000000
|adipiscing
@consec
...|
|
7
| Sara|
1
-
608
-
140
-
1995
|Donec.nibh
@enimEt
...|
7
| Sara|
1
-
608
-
140
-
1995
|Donec.nibh
@enimEt
...|
|
8
| Kaseem|
1
-
881
-
586
-
2689
|cursus.et.magna
@e
...|
8
| Kaseem|
1
-
881
-
586
-
2689
|cursus.et.magna
@e
...|
|
9
| Lev|
1
-
916
-
367
-
5608
|Vivamus.nisi
@ipsu
...|
9
| Lev|
1
-
916
-
367
-
5608
|Vivamus.nisi
@ipsu
...|
|
10
| Maya|
1
-
271
-
683
-
2698
|accumsan.convalli...|
10
| Maya|
1
-
271
-
683
-
2698
|accumsan.convalli...|
+---+-----------+--------------+--------------------+---+------------------+--------------+--------------------+
|
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
|
val studentsRightOuterJoin = students1.join(students2, students1(
"id"
) === students2(
"id"
),
"right_outer"
)
studentsRightOuterJoin.show(studentsRightOuterJoin.count.toInt)
+----+-----------+--------------+--------------------+---+--------------------+--------------+--------------------+
| id|studentName| phone| email| id| studentName| phone| email|
+----+-----------+--------------+--------------------+---+--------------------+--------------+--------------------+
|
1
| Burke|
1
-
300
-
746
-
8446
|ullamcorper.velit...|
1
| BurkeDifferentName|
1
-
300
-
746
-
8446
|ullamcorper.velit...|
|
2
| Kamal|
1
-
668
-
571
-
5046
|pede.Suspendisse@...|
2
| KamalDifferentName|
1
-
668
-
571
-
5046
|pede.Suspendisse@...|
|
3
| Olga|
1
-
956
-
311
-
1686
|Aenean.eget.metus...|
3
| Olga|
1
-
956
-
311
-
1686
|Aenean.eget.metus...|
|
4
| Belle|
1
-
246
-
894
-
6340
|vitae.aliquet.nec...|
4
| BelleDifferentName|
1
-
246
-
894
-
6340
|vitae.aliquet.nec...|
|
5
| Trevor|
1
-
300
-
527
-
4967
|dapibus.id
@acturp
...|
5
| Trevor|
1
-
300
-
527
-
4967
|dapibusDifferentE...|
|
6
| Laurel|
1
-
691
-
379
-
9921
|adipiscing
@consec
...|
6
| LaurelInvalidPhone|
000000000
|adipiscing
@consec
...|
|
7
| Sara|
1
-
608
-
140
-
1995
|Donec.nibh
@enimEt
...|
7
| Sara|
1
-
608
-
140
-
1995
|Donec.nibh
@enimEt
...|
|
8
| Kaseem|
1
-
881
-
586
-
2689
|cursus.et.magna
@e
...|
8
| Kaseem|
1
-
881
-
586
-
2689
|cursus.et.magna
@e
...|
|
9
| Lev|
1
-
916
-
367
-
5608
|Vivamus.nisi
@ipsu
...|
9
| Lev|
1
-
916
-
367
-
5608
|Vivamus.nisi
@ipsu
...|
|
10
| Maya|
1
-
271
-
683
-
2698
|accumsan.convalli...|
10
| Maya|
1
-
271
-
683
-
2698
|accumsan.convalli...|
|
null
|
null
|
null
|
null
|
999
|LevUniqueToSecondRDD|
1
-
916
-
367
-
5608
|Vivamus.nisi
@ipsu
...|
+----+-----------+--------------+--------------------+---+--------------------+--------------+--------------------+
|
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
|
val studentsLeftOuterJoin = students1.join(students2, students1(
"id"
) === students2(
"id"
),
"left_outer"
)
studentsLeftOuterJoin.show(studentsLeftOuterJoin.count.toInt)
+---+-----------+--------------+--------------------+----+------------------+--------------+--------------------+
| id|studentName| phone| email| id| studentName| phone| email|
+---+-----------+--------------+--------------------+----+------------------+--------------+--------------------+
|
1
| Burke|
1
-
300
-
746
-
8446
|ullamcorper.velit...|
1
|BurkeDifferentName|
1
-
300
-
746
-
8446
|ullamcorper.velit...|
|
2
| Kamal|
1
-
668
-
571
-
5046
|pede.Suspendisse@...|
2
|KamalDifferentName|
1
-
668
-
571
-
5046
|pede.Suspendisse@...|
|
3
| Olga|
1
-
956
-
311
-
1686
|Aenean.eget.metus...|
3
| Olga|
1
-
956
-
311
-
1686
|Aenean.eget.metus...|
|
4
| Belle|
1
-
246
-
894
-
6340
|vitae.aliquet.nec...|
4
|BelleDifferentName|
1
-
246
-
894
-
6340
|vitae.aliquet.nec...|
|
5
| Trevor|
1
-
300
-
527
-
4967
|dapibus.id
@acturp
...|
5
| Trevor|
1
-
300
-
527
-
4967
|dapibusDifferentE...|
|
6
| Laurel|
1
-
691
-
379
-
9921
|adipiscing
@consec
...|
6
|LaurelInvalidPhone|
000000000
|adipiscing
@consec
...|
|
7
| Sara|
1
-
608
-
140
-
1995
|Donec.nibh
@enimEt
...|
7
| Sara|
1
-
608
-
140
-
1995
|Donec.nibh
@enimEt
...|
|
8
| Kaseem|
1
-
881
-
586
-
2689
|cursus.et.magna
@e
...|
8
| Kaseem|
1
-
881
-
586
-
2689
|cursus.et.magna
@e
...|
|
9
| Lev|
1
-
916
-
367
-
5608
|Vivamus.nisi
@ipsu
...|
9
| Lev|
1
-
916
-
367
-
5608
|Vivamus.nisi
@ipsu
...|
|
10
| Maya|
1
-
271
-
683
-
2698
|accumsan.convalli...|
10
| Maya|
1
-
271
-
683
-
2698
|accumsan.convalli...|
|
11
| iteblog|
999999
| iteblog
@iteblog
.com|
null
|
null
|
null
|
null
|
+---+-----------+--------------+--------------------+----+------------------+--------------+--------------------+
|
1
|
val saveOptions = Map(
"header"
->
"true"
,
"path"
->
"iteblog.csv"
)
|
1
|
val copyOfStudents = students.select(students(
"studentName"
).as(
"name"
), students(
"email"
))
|
1
|
copyOfStudents.write.format(
"com.databricks.spark.csv"
).mode(SaveMode.Overwrite).options(saveOptions).save()
|