spark摘要

mac下kafka

kafka详解
https://developer.51cto.com/art/202003/611798.html

// /usr/local/Cellar/kafka/2.4.1/
/usr/local/bin/zookeeper-server-start -daemon /usr/local/etc/kafka/zookeeper.properties  // 1.启动zookeeper (我这里的应该是kafka自带的) root账号下
/usr/local/bin/kafka-server-start /usr/local/etc/kafka/server.properties // 2. 在另一个终端下启动 ,这两个程序都可以nohup交到后台  root

kafka-topics --create --zookeeper localhost:2181 --replication-factor 1 --partitions 3 --topic test1// 3. 在启动一个终端(可非root) 创建topic partitions分区为3
kafka-topics --zookeeper localhost:2181 --list
kafka-console-producer --broker-list localhost:9092 --topic test1 // 4. kafka生产者客户端命令 (非root)
kafka-console-consumer --bootstrap-server  localhost:9092 --topic test1 // 5. 消费者

//SparkSQL
val config: SparkConf = new SparkConf().setMaster("local[*]").setAppName("SparkStreaming01_WordCount")

var streamingContext: StreamingContext = new StreamingContext(config, Seconds(3)) //3 秒钟,伴生对象,不需要new

//从kafka中采集数据
var kafkaDStream: ReceiverInputDStream[(String, String)] = KafkaUtils.createStream(streamingContext, "localhost:2181",
  "test1", Map("test1" -> 3)) // map 中3 代表分区

JSON解析

condition.params.json

condition.params.json={startDate:"2019-11-01", \
  endDate:"2019-12-28", \
  startAge: "20", \
  endAge: "50", \
  professionals: "",  \
  city: "", \
  gender:"", \
  keywords:"", \
  categoryIds:"", \
  targetPageFlow:"1,2,3,4,5,6,7"}
import com.lh.utils.{JdbcUtil, PropertiesUtil}

val conditionJson: String = properties.getProperty("condition.params.json")

val conditionObj: JSONObject = JSON.parseObject(conditionJson)
// 3. 取出其中的目标页面跳转
val targetPageFlow: String = conditionObj.getString("targetPageFlow")
 <dependency>
     <groupId>com.alibaba</groupId>
     <artifactId>fastjson</artifactId>
     <version>1.2.47</version>
 </dependency>
各种资源

rdd,partition等概念 https://www.cnblogs.com/bonelee/p/11063453.html

你可能感兴趣的:(spark&scala)