注意:将mysql的驱动包拷贝到spark/lib下,将hive-site.xml拷贝到项目resources下,远程调试不要使用主机名
import org.apache.spark._
import org.apache.spark.SparkConf
import org.apache.spark.SparkContext
import org.apache.spark.sql.hive.HiveContext
import java.io.FileNotFoundException
import java.io.IOException
object HiveSelect {
def main(args: Array[String]) {
System.setProperty("hadoop.home.dir", "D:\\hadoop") //加载hadoop组件
val conf = new SparkConf().setAppName("HiveApp").setMaster("spark://192.168.66.66:7077")
.set("spark.executor.memory", "1g")
.set("spark.serializer", "org.apache.spark.serializer.KryoSerializer")
.setJars(Seq("D:\\workspace\\scala\\out\\scala.jar"))//加载远程spark
//.set("hive.metastore.uris", "thrift://192.168.66.66:9083")//远程hive的meterstore地址
// .set("spark.driver.extraClassPath","D:\\json\\mysql-connector-java-5.1.39.jar")
val sparkcontext = new SparkContext(conf);
try {
val hiveContext = new HiveContext(sparkcontext);
hiveContext.sql("use siat"); //使用数据库
hiveContext.sql("DROP TABLE IF EXISTS src") //删除表
hiveContext.sql("CREATE TABLE IF NOT EXISTS src (key INT, value STRING) " +
"ROW FORMAT DELIMITED FIELDS TERMINATED BY '\t' ");//创建表
hiveContext.sql("LOAD DATA LOCAL INPATH 'D:\\workspace\\scala\\src.txt' INTO TABLE src "); //导入数据
hiveContext.sql(" SELECT * FROM src").collect().foreach(println);//查询数据
}
catch {
case e: FileNotFoundException => println("Missing file exception")
case ex: IOException => println("IO Exception")
case ee: ArithmeticException => println(ee)
case eee: Throwable => println("found a unknown exception" + eee)
case ef: NumberFormatException => println(ef)
case ec: Exception => println(ec)
case e: IllegalArgumentException => println("illegal arg. exception");
case e: IllegalStateException => println("illegal state exception");
}
finally {
sparkcontext.stop()
}
}
}
附录1:scala-spark api-http://spark.apache.org/docs/latest/api/scala/index.html#org.apache.spark.package
org.apache.spark
org.apache.spark.api.java
org.apache.spark.api.java.function
org.apache.spark.broadcast
org.apache.spark.graphx
org.apache.spark.graphx.impl
org.apache.spark.graphx.lib
org.apache.spark.graphx.util
org.apache.spark.input
org.apache.spark.internal
org.apache.spark.internal.io
org.apache.spark.io
org.apache.spark.launcher
org.apache.spark.mapred
org.apache.spark.metrics.source
org.apache.spark.ml
org.apache.spark.ml.attribute
org.apache.spark.ml.classification
org.apache.spark.ml.clustering
org.apache.spark.ml.evaluation
org.apache.spark.ml.feature
org.apache.spark.ml.fpm
org.apache.spark.ml.linalg
org.apache.spark.ml.param
org.apache.spark.ml.recommendation
org.apache.spark.ml.regression
org.apache.spark.ml.source.libsvm
org.apache.spark.ml.stat
org.apache.spark.ml.stat.distribution
org.apache.spark.ml.tree
org.apache.spark.ml.tuning
org.apache.spark.ml.util
org.apache.spark.mllib
org.apache.spark.mllib.classification
org.apache.spark.mllib.clustering
org.apache.spark.mllib.evaluation
org.apache.spark.mllib.feature
org.apache.spark.mllib.fpm
org.apache.spark.mllib.linalg
org.apache.spark.mllib.linalg.distributed
org.apache.spark.mllib.optimization
org.apache.spark.mllib.pmml
org.apache.spark.mllib.random
org.apache.spark.mllib.rdd
org.apache.spark.mllib.recommendation
org.apache.spark.mllib.regression
org.apache.spark.mllib.stat
org.apache.spark.mllib.stat.distribution
org.apache.spark.mllib.stat.test
org.apache.spark.mllib.tree
org.apache.spark.mllib.tree.configuration
org.apache.spark.mllib.tree.impurity
org.apache.spark.mllib.tree.loss
org.apache.spark.mllib.tree.model
org.apache.spark.mllib.util
org.apache.spark.partial
org.apache.spark.rdd
org.apache.spark.scheduler
org.apache.spark.scheduler.cluster
org.apache.spark.security
org.apache.spark.serializer
org.apache.spark.sql
org.apache.spark.sql.api.java
org.apache.spark.sql.catalog
org.apache.spark.sql.expressions
org.apache.spark.sql.expressions.javalang
org.apache.spark.sql.expressions.scalalang
org.apache.spark.sql.hive
org.apache.spark.sql.hive.execution
org.apache.spark.sql.hive.orc
org.apache.spark.sql.jdbc
org.apache.spark.sql.sources
org.apache.spark.sql.streaming
org.apache.spark.sql.types
org.apache.spark.sql.util
org.apache.spark.status.api.v1
org.apache.spark.status.api.v1.streaming
org.apache.spark.storage
org.apache.spark.streaming
org.apache.spark.streaming.api.java
org.apache.spark.streaming.dstream
org.apache.spark.streaming.flume
org.apache.spark.streaming.kafka
org.apache.spark.streaming.kinesis
org.apache.spark.streaming.receiver
org.apache.spark.streaming.scheduler
org.apache.spark.streaming.scheduler.rate
org.apache.spark.streaming.util
org.apache.spark.ui.env
org.apache.spark.ui.exec
org.apache.spark.ui.jobs
org.apache.spark.ui.storage
org.apache.spark.util
org.apache.spark.util.random
org.apache.spark.util.sketch