SparkSQL读取Cassandra数据源

pom依赖

        <dependency>
            <groupId>com.datastax.sparkgroupId>
            <artifactId>spark-cassandra-connector_2.11artifactId>
            <version>2.0.0-M1version>
        dependency>
        <dependency>
            <groupId>org.apache.sparkgroupId>
            <artifactId>spark-sql_2.11artifactId>
            <version>2.1.1version>
        dependency>

scala版

import com.datastax.spark.connector.CassandraRow
import com.datastax.spark.connector.rdd.CassandraRDD
import org.apache.spark.{SparkConf, SparkContext}
import com.datastax.spark.connector._


object test {
  def main(args: Array[String]): Unit = {
    val conf = new SparkConf(true)
      .set("spark.cassandra.connection.host", "127.0.0.1")
    val sc = new SparkContext("local", "test", conf)
    val rdd : CassandraRDD[CassandraRow] = sc.cassandraTable("test", "words").where("word in ('foo', 'fo2') and count > 1 and count < 8")
    rdd.foreach(row => {println(row.toString())})
  }
}

Java版

https://github.com/datastax/spark-cassandra-connector/blob/master/doc/7_java_api.md

import com.datastax.spark.connector.japi.rdd.CassandraJavaRDD;
import org.apache.spark.SparkConf;
import org.apache.spark.SparkContext;
import static com.datastax.spark.connector.japi.CassandraJavaUtil.javaFunctions;

public class test {
    public static void main(String[] args) {
        SparkConf conf= new SparkConf(true).set("spark.cassandra.connection.host", "127.0.0.1");
        SparkContext sc = new SparkContext("local", "test", conf);
        CassandraJavaRDD rdd = javaFunctions(sc).cassandraTable("test", "words");
        CassandraJavaRDD result = rdd.where("word in ('foo', 'fo2') and count > 1 and count < 8");
        result.foreach(row -> System.out.println(row));
    }
}

欢迎关注个人公众号:数据库漫游指南

SparkSQL读取Cassandra数据源_第1张图片

你可能感兴趣的:(SparkSQL)