修改saprk-submit.sh
standalone 集群模式是 以这样的配置方式是可以的
/opt/spark/bin/spark-submit \
--class $1 \
--num-executors 1 \
--driver-memory 1g \
--executor-memory 1g \
--executor-cores 2 \
--files /opt/hive/conf/hive-site.xml \
--driver-class-path /opt/hive/lib/mysql-connector-java-5.1.39.jar \
/opt/jars/spark/spark-hive.jar\
Scala 版本
/**
* 关于Spark 和 Hive 的一个集成
*/
object SparkHiveOps extends App{
val conf = new SparkConf().setAppName("SparkHiveOps")
val sc = new SparkContext(conf)
val hiveContext = new HiveContext(sc)
/**
* 查询hvie中的数据
* */
val df = hiveContext.table("word");
df.show()
/**
* 向hive中写数据
* teacher_info
* name,height
* teacher_basic
* name,age,married,children
* 这是两张有关联的表,关联字段是name,
* 需求:
* teacher_info和teacher_basic做join操作,求出每个teacher的name,age,height,married,children
* 要求height>180
*/
//在hive中创建相应的表
hiveContext.sql("DROP TABLE IF EXISTS teacher_basic")
hiveContext.sql("CREATE TABLE teacher_basic(" +
"name string, " +
"age int, " +
"married boolean, " +
"children int) " +
"row format delimited " +
"fields terminated by ','")
//向teacher_basic表中加载数据
hiveContext.sql("LOAD DATA LOCAL INPATH '/opt/data/spark/teacher_basic.txt' INTO TABLE teacher_basic")
//创建第二张表
hiveContext.sql("DROP TABLE IF EXISTS teacher_info")
hiveContext.sql("CREATE TABLE teacher_info(name string, height int) row format delimited fields terminated by ','")
hiveContext.sql("LOAD DATA LOCAL INPATH '/opt/data/spark/teacher_info.txt' INTO TABLE teacher_info")
//执行多表关联
val joinDF = hiveContext.sql("select b.name, b.age, b.married, b.children, i.height from teacher_basic b left join teacher_info i on b.name = i.name where i.height > 180")
hiveContext.sql("DROP TABLE IF EXISTS teacher")
joinDF.show()
joinDF.write.saveAsTable("teacher")
sc.stop()
}
Java版本
public class SparkHiveJava {
public static void main(String[] args) {
SparkConf conf = new SparkConf().setAppName(SparkHiveJava.class.getSimpleName());
JavaSparkContext sc = new JavaSparkContext(conf);
HiveContext hiveContext = new HiveContext(sc);
//查询hvie 中的数据
DataFrame df = hiveContext.table("word");
df.show();
//向数据库中写数据
/**
* 向hive中写数据
* teacher_info
* name,height
* teacher_basic
* name,age,married,children
* 这是两张有关联的表,关联字段是name,
* 需求:
* teacher_info和teacher_basic做join操作,求出每个teacher的name,age,height,married,children
* 要求height>180
*/
//在hive中创建相应的表
hiveContext.sql("DROP TABLE IF EXISTS teacher_basic");
hiveContext.sql("CREATE TABLE teacher_basic(" +
"name string, " +
"age int, " +
"married boolean, " +
"children int) " +
"row format delimited " +
"fields terminated by ','");
//向teacher_basic表中加载数据
hiveContext.sql("LOAD DATA LOCAL INPATH '/opt/data/spark/teacher_basic.txt' INTO TABLE teacher_basic");
//创建第二张表
hiveContext.sql("DROP TABLE IF EXISTS teacher_info");
hiveContext.sql("CREATE TABLE teacher_info(name string, height int) row format delimited fields terminated by ','");
hiveContext.sql("LOAD DATA LOCAL INPATH '/opt/data/spark/teacher_info.txt' INTO TABLE teacher_info");
//执行多表关联
DataFrame joinDF = hiveContext.sql("select b.name, b.age, b.married, b.children, i.height from teacher_basic b left join teacher_info i on b.name = i.name where i.height > 180");
hiveContext.sql("DROP TABLE IF EXISTS teacher");
joinDF.show();
joinDF.write().saveAsTable("teacher");
sc.stop();
}
}
O audit: ugi=kkk ip=unknown-ip-addr cmd=get_table : db=default tbl=word
这样写无法执行
/opt/spark/bin/spark-submit \
--class $1 \
--master spark://master:7077 \
--num-executors 1 \
--driver-memory 1g \
--executor-memory 1g \
--executor-cores 2 \
/opt/jars/spark/spark-hive.jar\
standalone 集群模式是 以这样的配置方式是可以的
/opt/spark/bin/spark-submit \
--class $1 \
--num-executors 1 \
--driver-memory 1g \
--executor-memory 1g \
--executor-cores 2 \
--files /opt/hive/conf/hive-site.xml \
--driver-class-path /opt/hive/lib/mysql-connector-java-5.1.39.jar \
/opt/jars/spark/spark-hive.jar\
/opt/spark/bin/spark-submit \
--class $1 \
--master yarn \
--deploy-mode cluster \
--num-executors 1 \
--driver-memory 1g \
--executor-memory 1g \
--executor-cores 2 \
--files /opt/hive/conf/hive-site.xml \
--driver-class-path /opt/hive/lib/mysql-connector-java-5.1.39.jar \
/opt/jars/spark/spark-hive.jar\
目前yarn集群模式,搞不定呀