现将元组简单处理
import org.apache.flink.api.java.aggregation.Aggregations
import org.apache.flink.api.scala.{DataSet, ExecutionEnvironment}
import org.apache.flink.streaming.api.scala._
object AggregateTest {
def main(args: Array[String]): Unit = {
// flink 批处理
val env = ExecutionEnvironment.getExecutionEnvironment
val input: DataSet[(Int, String, Double)] = env.fromElements(
(1, "hello", 4.0),
(1, "hello", 5.0),
(1, "hello", 5.0),
(3, "world", 6.0),
(3, "world", 6.0)
)
// 批处理API 以上面元组字符串分组
val output = input.groupBy(1)
//同一个DataSet上的多个Aggregate函数之间用and连接
//在根据字符串分组的基础上,对Int属性求和
.aggregate(Aggregations.SUM, 0)
//再对Double属性求最小值
.and(Aggregations.MIN, 2)
.print()
}
}
输出结果:
(6,world,6.0)
(3,hello,4.0)
测试1:
// Int属性求和输出
.aggregate(Aggregations.SUM, 0)
.print()
输出结果
(6,world,6.0)
(3,hello,5.0)
测试2:
// Double属性求最小输出
.aggregate(Aggregations.MIN, 2).print()
输出结果
(3,world,6.0)
(1,hello,4.0)
import org.apache.flink.api.java.aggregation.Aggregations
import org.apache.flink.api.scala.{DataSet, ExecutionEnvironment}
import org.apache.flink.streaming.api.scala._
object AggregateTest {
def main(args: Array[String]): Unit = {
// flink 批处理
val env = ExecutionEnvironment.getExecutionEnvironment
val input: DataSet[(Int, String, Double)] = env.fromElements(
(1, "hello", 4.0),
(1, "hello", 5.0),
(1, "hello", 5.0),
(3, "world", 6.0),
(3, "world", 6.0)
)
val output = input
//去掉分组
.aggregate(Aggregations.SUM, 0)
.and(Aggregations.MIN, 2)
.print()
}
}
输出结果
(9,world,4.0)