kafka详解
https://developer.51cto.com/art/202003/611798.html
// /usr/local/Cellar/kafka/2.4.1/
/usr/local/bin/zookeeper-server-start -daemon /usr/local/etc/kafka/zookeeper.properties // 1.启动zookeeper (我这里的应该是kafka自带的) root账号下
/usr/local/bin/kafka-server-start /usr/local/etc/kafka/server.properties // 2. 在另一个终端下启动 ,这两个程序都可以nohup交到后台 root
kafka-topics --create --zookeeper localhost:2181 --replication-factor 1 --partitions 3 --topic test1// 3. 在启动一个终端(可非root) 创建topic partitions分区为3
kafka-topics --zookeeper localhost:2181 --list
kafka-console-producer --broker-list localhost:9092 --topic test1 // 4. kafka生产者客户端命令 (非root)
kafka-console-consumer --bootstrap-server localhost:9092 --topic test1 // 5. 消费者
//SparkSQL
val config: SparkConf = new SparkConf().setMaster("local[*]").setAppName("SparkStreaming01_WordCount")
var streamingContext: StreamingContext = new StreamingContext(config, Seconds(3)) //3 秒钟,伴生对象,不需要new
//从kafka中采集数据
var kafkaDStream: ReceiverInputDStream[(String, String)] = KafkaUtils.createStream(streamingContext, "localhost:2181",
"test1", Map("test1" -> 3)) // map 中3 代表分区
condition.params.json
condition.params.json={startDate:"2019-11-01", \
endDate:"2019-12-28", \
startAge: "20", \
endAge: "50", \
professionals: "", \
city: "", \
gender:"", \
keywords:"", \
categoryIds:"", \
targetPageFlow:"1,2,3,4,5,6,7"}
import com.lh.utils.{JdbcUtil, PropertiesUtil}
val conditionJson: String = properties.getProperty("condition.params.json")
val conditionObj: JSONObject = JSON.parseObject(conditionJson)
// 3. 取出其中的目标页面跳转
val targetPageFlow: String = conditionObj.getString("targetPageFlow")
<dependency>
<groupId>com.alibaba</groupId>
<artifactId>fastjson</artifactId>
<version>1.2.47</version>
</dependency>
rdd,partition等概念 https://www.cnblogs.com/bonelee/p/11063453.html