PV——统计值

代码如下》》》

package Data_text

import org.apache.flink.api.common.functions.AggregateFunction
import org.apache.flink.streaming.api.TimeCharacteristic
import org.apache.flink.streaming.api.scala._
import org.apache.flink.streaming.api.scala.function.WindowFunction
import org.apache.flink.streaming.api.windowing.time.Time
import org.apache.flink.streaming.api.windowing.windows.TimeWindow
import org.apache.flink.util.Collector

import java.text.SimpleDateFormat
import java.time.format.DateTimeFormatter
import java.time.{Instant, ZoneId}

case class add2(userId: Long, itemId: Long, categoryId: Int, behavior: String, timestamp: Long)
case class add3(windowEnd: Long,count:Long)
object pv_4 {
  def main(args: Array[String]): Unit = {

    //创建环境
    val env = StreamExecutionEnvironment.getExecutionEnvironment
    env.setStreamTimeCharacteristic(TimeCharacteristic.EventTime)
    env.setParallelism(1)

    //从文件中获取数据
    val value = env.readTextFile("E:\\idea项目\\untitled4\\src\\main\\resources\\UserBehavior.csv")

    val stream = value
      .map(a=>{
        val arr = a.split(",")
        add2(userId = arr(0).toLong, itemId = arr(1).toLong, categoryId = arr(2).toInt, behavior = arr(3), timestamp = arr(4).toLong)
      })
      .assignAscendingTimestamps(_.timestamp * 1000L)

    val stream2 = stream
      .filter(_.behavior == "pv")
      .map(b=>{
        ("pv",1)
      })
      .keyBy(_._1)
      .timeWindow(Time.hours(1))
      .aggregate(new AggregateFunction[(String,Int),Long,Long] {
        override def createAccumulator(): Long = 0L

        override def add(in: (String, Int), acc: Long): Long = acc + 1

        override def getResult(acc: Long): Long = acc

        override def merge(acc: Long, acc1: Long): Long = acc + acc1
      },new WindowFunction[Long,add3,String,TimeWindow] {
        override def apply(key: String, window: TimeWindow, input: Iterable[Long], out: Collector[add3]): Unit = {
          val ts= window.getEnd
//          val instant = Instant.ofEpochSecond(ts).atZone(ZoneId.of("UTC"))
//          val time = instant.format(DateTimeFormatter.ofPattern("yyyy-MM-dd HH:mm:ss"))
          val size = input.iterator.next()
          out.collect(add3(ts,size))
        }
      }
      )


    stream2.print()
    env.execute("")

  }

}

运行结果》》》

PV——统计值_第1张图片

 

你可能感兴趣的:(java,scala,开发语言)