spark读取hive和写入hive

1.导入maven

<properties>
        <spark.version>2.1.1</spark.version>
        <scala.version>2.11.8</scala.version>
</properties>
<dependencies>
        <dependency>
            <groupId>org.apache.spark</groupId>
            <artifactId>spark-core_2.11</artifactId>
            <version>${
     spark.version}</version>
        </dependency>
        <dependency>
            <groupId>org.apache.spark</groupId>
            <artifactId>spark-sql_2.11</artifactId>
            <version>${
     spark.version}</version>
        </dependency>
        <dependency>
            <groupId>org.apache.spark</groupId>
            <artifactId>spark-hive_2.11</artifactId>
            <version>${
     spark.version}</version>
        </dependency>
</dependencies>

2.读hive数据

//创建SparkSession
val sparkSession: SparkSession = SparkSession.builder()
								.appName("MemberLogETL")
								.master("local[*]")
								.enableHiveSupport()
								.getOrCreate()
//导入隐式转换
import sparkSession.implicits._
//切换数据库
spark.sql("use db_hive_edu");
//读取用户表所有数据
Dataset<Row> data = spark.sql("select * from user");
//打印数据
data.show();
//关闭
sparkSession.close()

3.写数据到hive

//正常写入,表存在会报错
Dataset.write.saveAsTable("dwd_member")
//覆盖写入
Dataset.write.mode(SaveMode.Overwrite).insertInto("dwd_member")
不使用自带hive需要配置:hive-site.xml

你可能感兴趣的:(大数据,scala,hive,spark)