Table API是用于流和批处理的统一关系的API,以下为对同一种需求的三种简单的不同使用形式
需求:求每个用户的订单总额
import org.apache.flink.api.common.typeinfo.{TypeInformation, Types}
import org.apache.flink.table.api.scala._
import org.apache.flink.streaming.api.scala.{ StreamExecutionEnvironment}
import org.apache.flink.table.api.{EnvironmentSettings}
import org.apache.flink.table.descriptors.{FileSystem, OldCsv, Schema}
import org.apache.flink.types.Row
object FlinkTableSQL {
def main(args: Array[String]): Unit = {
val fsSettings = EnvironmentSettings.newInstance().useOldPlanner().inStreamingMode().build()
val fsEnv = StreamExecutionEnvironment.getExecutionEnvironment
val tEnv = StreamTableEnvironment.create(fsEnv, fsSettings)
//从项目根目录中加载数据
val path = this.getClass.getClassLoader.getResource("Orders.txt").getPath
tEnv.connect(new FileSystem().path(path))
.withFormat(new OldCsv() //数据格式
.field("cID", Types.STRING)
.field("cName",Types.STRING)
.field("cCountry",Types.STRING)
.field("revenue",Types.DOUBLE).lineDelimiter("\n"))
.withSchema(new Schema() //数据类型
.field("cID", Types.STRING)
.field("cName",Types.STRING)
.field("cCountry",Types.STRING)
.field("revenue",Types.DOUBLE))
.inAppendMode()
.registerTableSource("Orders") //Orders表的注册
val orders = tEnv.scan("Orders")
//以单个记号(')开头来引用表的属性
val resultTable = orders
.groupBy('cID, 'cName) //分组
.select('cID, 'cName, 'revenue.sum as 'revSum) //查询计算
// ******************
//2.使用SQL进行表的查询
// ******************
val revenue1 = tEnv.sqlQuery(
"""
|SELECT cID, cName, SUM(revenue) AS revSum
|FROM Orders
|GROUP BY cID, cName
"""
.stripMargin) //stripMargin 删除由空格或控制字符组成的前导前缀
//控制台打印结果
implicit val tpe: TypeInformation[Row] = Types.ROW(Types.STRING,Types.STRING, Types.DOUBLE)
resultTable.toRetractStream[Row].print()
// 启动
fsEnv.execute()
}
}
import org.apache.flink.streaming.api.scala.{DataStream, StreamExecutionEnvironment}
import org.apache.flink.table.api.{EnvironmentSettings, Table}
import org.apache.flink.table.api.scala.StreamTableEnvironment
import org.apache.flink.table.api.scala._
import org.apache.flink.streaming.api.scala._
object FlinkTableSQL {
def main(args: Array[String]): Unit = {
val fsSettings = EnvironmentSettings.newInstance().useOldPlanner().inStreamingMode().build()
val fsEnv = StreamExecutionEnvironment.getExecutionEnvironment
val tEnv = StreamTableEnvironment.create(fsEnv, fsSettings)
//从本地读取数据并将流转化为table
val orders: Table = fsEnv.readTextFile("file:///E:Orders.txt")
.map(line => line.split(","))
.map(o => Orders(o(0), o(1), o(2), o(3).toDouble))
.toTable(tEnv, 'cID, 'cName, 'cCountry, 'revenue)
//进行表的注册
tEnv.registerTable("orders",orders)
val resultTable = tEnv.sqlQuery(
"""
|SELECT cID, cName, SUM(revenue) AS revSum
|FROM orders
|GROUP BY cID, cName
"""
.stripMargin) //stripMargin 删除由空格或控制字符组成的前导前缀
/*
val resultTable = orders
.groupBy('cID, 'cName) //分组
.select('cID, 'cName, 'revenue.sum as 'revSum) //查询计算
*/
tEnv.toRetractStream[(String, String, Double)](resultTable).print()
fsEnv.execute()
}
}
//需创建样例类
case class Orders(cID:String,cName:String,cCountry:String,revenue:Double)
import org.apache.flink.streaming.api.scala.{DataStream, StreamExecutionEnvironment}
import org.apache.flink.table.api.EnvironmentSettings
import org.apache.flink.table.api.scala.StreamTableEnvironment
import org.apache.flink.table.api.scala._
import org.apache.flink.streaming.api.scala._
object FlinkTableSQL {
def main(args: Array[String]): Unit = {
val bsEnv = StreamExecutionEnvironment.getExecutionEnvironment
val bsS = EnvironmentSettings.newInstance().useBlinkPlanner().inStreamingMode().build()
val bsTEnv = StreamTableEnvironment.create(bsEnv, bsS)
//从本地读取数据作为数据源
val value: DataStream[Orders] = bsEnv.readTextFile("file:///E:Orders.txt")
.map(line => line.split(","))
.map(o => Orders(o(0), o(1), o(2), o(3).toDouble))
val orders = bsTEnv.registerDataStream("orders",value,'cID,'cName,'cCountry,'revenue)
val resultTable = bsTEnv.scan("orders")
.groupBy('cID, 'cName) //分组
.select('cID, 'cName, 'revenue.sum as 'revSum) //查询计算
bsTEnv.toRetractStream[(String, String,Double)](resultTable).print()
bsEnv.execute()
}
}