Flink之DataStream Source、DataSet Source

DataStream API 是用于进行Stream流计算开发的API

//DataStream  env  入口
//流处理,最后是需要execute执行
val env=StreamExecutionEnvironment.getExecutionEnvironment

DataSet API 是用于进行Batch计算开发的API

//DataSet  env入口
//批式处理最后没有execute
val env2=ExecutionEnvironment.getExecutionEnvironment
package com.ruozedata.flink

import org.apache.flink.api.scala._
import org.apache.flink.configuration.Configuration
import org.apache.flink.streaming.api.scala.StreamExecutionEnvironment

object SourceApp {
  def main(args: Array[String]): Unit = {
    val env=StreamExecutionEnvironment.getExecutionEnvironment
    val env2=ExecutionEnvironment.getExecutionEnvironment
    //fromCollection(env)  //从集合读取
    //textFile(env)			//从文件读取
    //csvFile(env2)			//从csv读取,获取自己想要的字段
    //csvFile2(env2)   		//从Csv读取,改进之后
    //readRecursiveFiles(env2)  	// 迭代读取目录下的文件
    readCompressionFile(env2)		// 读取压缩文件


  }

  //从集合生成DataStream
  def fromCollection(env: StreamExecutionEnvironment)={
    val text=env.fromCollection(1 to 10)
    text.print().setParallelism(1)

    env.execute("SourceApp")
  }

  //读取文件生成DataStream
  def textFile(env:StreamExecutionEnvironment)={
    val text=env.readTextFile("D:\\Develop\\hadoopsource\\test.txt")
    // 读取文件夹
    // val text1=env.readTextFile("D:\\Develop\\hadoopsource")
    text.print().setParallelism(1)

    env.execute("SourceApp")
  }

  //读取csv文件,DataSet流
  def csvFile(env:ExecutionEnvironment)={
    val text=env.readCsvFile[(String,Int)]("D:\\Develop\\hadoopsource\\people.csv"
      ,ignoreFirstLine = true   //忽略第一行
    ,includedFields = Array(0,1))   //需要哪几列

    text.print()

   // env.execute("SourceApp")
  }

  //读取csv文件,DataSet流
  def csvFile2(env:ExecutionEnvironment)={
    case class Teacher(name:String,age:Int,job:String)
    val text=env.readCsvFile[Teacher]("D:\\Develop\\hadoopsource\\people.csv"
      ,ignoreFirstLine = true   //忽略第一行
      ,pojoFields = Array("name","age","job"))   //需要哪几列

    text.print()

    // env.execute("SourceApp")
  }

  //递归读取文件夹
  def readRecursiveFiles(env:ExecutionEnvironment)={

    val configuration = new Configuration()
    configuration.setBoolean("recursive.file.enumeration", true)

    val lines=env.readTextFile("D:\\Develop\\hadoopsource\\flink")

    lines.withParameters(configuration).print()
    // env.execute("SourceApp")
  }

  //读压缩文件
  def readCompressionFile(env:ExecutionEnvironment)={

    val lines=env.readTextFile("D:\\Develop\\hadoopsource\\apache-maven-3.3.9-bin.tar.gz")

    lines.print()

    // env.execute("SourceApp")
  }

}

你可能感兴趣的:(Flink)