spark2.x 读写cassandra

spark2.x 连接 cassandra 示例

import org.apache.spark.SparkConf
import org.apache.spark.sql.{SaveMode, SparkSession}
def main(args: Array[String]): Unit = {
	val conf = new SparkConf()
      .set("spark.cassandra.connection.host", "xxx.xxx.x.xxx,xxx.xxx.x.xxx,xxx.xxx.x.xxx")
      .set("spark.cassandra.connection.port", "9042")
      .set("spark.cassandra.auth.username", "xxxxx")
      .set("spark.cassandra.auth.password", "xxxxx")
      .set("spark.sql.shuffle.partitions", "20")
    val spark = SparkSession.builder
      .appName("cassandra test")
      .master("local[*]")
      .config(conf)
      .enableHiveSupport()
      .getOrCreate()
// 读取hive数据写入Cassandra
	val df= spark.sql("select * from cassandra_data.test")
	df.write
      .format("org.apache.spark.sql.cassandra")
      .option("keyspace", "hive_to_cassandra")
      .option("table", "test")
      .option("column", "id")
      .option("column", "name")
      .option("column", "age")
      .option("column", "birthday")
      //      .option("spark.cassandra.output.consistency.level", "ALL")
      .option("spark.cassandra.output.consistency.level", "ONE")
      .mode(SaveMode.Append)
      .save()
// 读取Cassandra数据
	val dfToCass= spark.read.format("org.apache.spark.sql.cassandra").option("keyspace", "hive_to_cassandra").option("table", "test").load()
	dfToCass.show(5, false)
	spark.stop()
}
  • 项目部署
    • 下载的cassandra安装包: spark-cassandra-connector_2.11-x.x.x.jar ,jsr166e-x.x.x.jar

你可能感兴趣的:(spark学习笔记)