【Spark】用IDEA编写Spark程序 基础入门

  • 创建maven工程并添加jar包


    4.0.0
 
    cn.itcast
    SparkDemo
    1.0-SNAPSHOT
 
    
    
        
            aliyun
            http://maven.aliyun.com/nexus/content/groups/public/
        
        
            cloudera
            https://repository.cloudera.com/artifactory/cloudera-repos/
        
        
            jboss
            http://repository.jboss.com/nexus/content/groups/public
        
    
    
        1.8
        1.8
        UTF-8
        2.11.8
        2.11
        2.7.4
        2.2.0
    
    
        
            org.scala-lang
            scala-library
            ${scala.version}
        
        
            org.apache.spark
            spark-core_2.11
            ${spark.version}
        
        
            org.apache.spark
            spark-sql_2.11
            ${spark.version}
        
        
            org.apache.spark
            spark-hive_2.11
            ${spark.version}
        
        
            org.apache.spark
            spark-hive-thriftserver_2.11
            ${spark.version}
        
        
            org.apache.spark
            spark-streaming_2.11
            ${spark.version}
        
        
        
            org.apache.spark
            spark-streaming-kafka-0-10_2.11
            ${spark.version}
        
        
            org.apache.spark
            spark-sql-kafka-0-10_2.11
            ${spark.version}
        
 
        
 
        
            org.apache.hadoop
            hadoop-client
            2.7.4
        
        
            org.apache.hbase
            hbase-client
            1.3.1
        
        
            org.apache.hbase
            hbase-server
            1.3.1
        
        
            com.typesafe
            config
            1.3.3
        
        
            mysql
            mysql-connector-java
            5.1.38
        
    
 
    
        src/main/scala
        src/test/scala
        
            
            
                org.apache.maven.plugins
                maven-compiler-plugin
                3.5.1
            
            
            
                net.alchim31.maven
                scala-maven-plugin
                3.2.2
                
                    
                        
                            compile
                            testCompile
                        
                        
                            
                                -dependencyfile
                                ${project.build.directory}/.scala_dependencies
                            
                        
                    
                
            
            
                org.apache.maven.plugins
                maven-surefire-plugin
                2.18.1
                
                    false
                    true
                    
                        **/*Test.*
                        **/*Suite.*
                    
                
            
            
                org.apache.maven.plugins
                maven-shade-plugin
                2.3
                
                    
                        package
                        
                            shade
                        
                        
                            
                                
                                    *:*
                                    
                                        META-INF/*.SF
                                        META-INF/*.DSA
                                        META-INF/*.RSA
                                    
                                
                            
                            
                                
                                    
                                
                            
                        
                    
                
            
        
    
    

 

  • 本地运行

实现wordCount效果

//1.创建SparkContext
val config = new SparkConf().setAppName("worldCount").setMaster("local[*]")
val sc = new SparkContext(config)
sc.setLogLevel("WARN")

//2.读取文件
val fileRDD: RDD[String] = sc.textFile("C:\\Users\\86132\\Desktop\\4.2号练习题\\words.txt")

//3.处理数据
val wordRDD: RDD[String] = fileRDD.flatMap(_.split(" "))

//每个单词记为1
val wordAndOneRDD: RDD[(String, Int)] = wordRDD.map((_,1))

//汇总
val wordAndCount: RDD[(String, Int)] = wordAndOneRDD.reduceByKey(_+_)

//4.收集结果
val result: Array[(String, Int)] = wordAndCount.collect()
result.foreach(println)

 

 

  • 集群运行代码

实现wordCount效果

 

代码

 

//1.创建SparkContext
val conf = new SparkConf().setAppName("worldCount")
val sc = new SparkContext(conf);
sc.setLogLevel("WARN")
//2.读取文件
val fileRDD: RDD[String] = sc.textFile(args(0))

//3.处理数据
val dataRDD: RDD[String] = fileRDD.flatMap(_.split(" "))

//每个单词记为1
val dataOneRDD: RDD[(String, Int)] = dataRDD.map((_,1))

//汇总
val wordCount: RDD[(String, Int)] = dataOneRDD.reduceByKey(_+_)

//文件输出路径
wordCount.saveAsTextFile(args(1))

 

执行命令提交到Spark-HA集群

 

/export/servers/spark-2.2.0-bin-2.6.0-cdh5.14.0/bin/spark-submit \
--class WorldCount \
--master spark://node01:7077,node02:7077 \
--executor-memory 1g \
--total-executor-cores 2 \
/root/wc.jar \
hdfs://node01:8020/words.txt \
hdfs://node01:8020/cc

 

执行命令提交到YARN集群

/export/servers/spark-2.2.0-bin-2.6.0-cdh5.14.0/bin/spark-submit \
--class WorldCount  \
--master yarn \
--deploy-mode cluster \
--driver-memory 1g \
--executor-memory 1g \
--executor-cores 2 \
--queue default \
/root/wc.jar \
hdfs://node01:8020/words.txt \
hdfs://node01:8020/wordcount/output

 

 

查看任务

http://node01:8088/cluster

【Spark】用IDEA编写Spark程序 基础入门_第1张图片

 

你可能感兴趣的:(Sprak)