基于calcite为CSV文件包装SQL接口

源码如下:

import java.sql.DriverManager
import java.util.Properties

import scala.collection.JavaConversions.asJavaIterable
import scala.collection.JavaConversions.mapAsJavaMap
import scala.collection.JavaConversions.seqAsJavaList
import scala.io.Source

import org.apache.calcite.DataContext
import org.apache.calcite.jdbc.CalciteConnection
import org.apache.calcite.linq4j.Linq4j
import org.apache.calcite.rel.`type`.RelDataTypeFactory
import org.apache.calcite.schema.ScannableTable
import org.apache.calcite.schema.Schema
import org.apache.calcite.schema.Statistics
import org.apache.calcite.schema.Table
import org.apache.calcite.schema.impl.AbstractSchema

object CSV2SQL {
  def main(args: Array[String]) =
    {
      Class.forName("org.apache.calcite.jdbc.Driver");
      val info = new Properties();
      info.setProperty("lex", "JAVA");
      val connection = DriverManager.getConnection("jdbc:calcite:", info);
      val calciteConnection =
        connection.asInstanceOf[CalciteConnection];
      val rootSchema = calciteConnection.getRootSchema();

      rootSchema.add("hr", new AbstractSchema() {
        //schema里有一张emps表
        override def getTableMap = Map("emps" -> new Table with ScannableTable {
          override def getRowType(typeFactory: RelDataTypeFactory) =
            //emps表有3个字段:id/name/age
            typeFactory.createStructType(List(typeFactory.createJavaType(classOf[Integer]), typeFactory.createJavaType(classOf[String]), typeFactory.createJavaType(classOf[Integer])), List("id", "name", "age"));

          //扫描得到所有的记录
          override def scan(root: DataContext) = {
            val src: java.lang.Iterable[Array[Object]] = Source.fromFile("/etc/emps.csv").getLines().map { x =>
              val xs = x.split(","); Array[Object](new Integer(xs(0).trim()), xs(1).trim(), new Integer(xs(2).trim()))
            }.toIterable

            Linq4j.asEnumerable(src)
          }
          //没有统计信息
          override def getStatistic() = Statistics.UNKNOWN;
          //这是一张表
          override def getJdbcTableType() = Schema.TableType.TABLE;
        })
      });

      val statement = calciteConnection.createStatement();

      val resultSet = statement.executeQuery("select * from hr.emps where age>35");

      while (resultSet.next()) {
        (1 to resultSet.getMetaData.getColumnCount).foreach(x => print(resultSet.getObject(x) + "\t"));
        println("");
      }

      resultSet.close();
      statement.close();
      connection.close();
    }
}

该例子从/etc/emps.csv文件中读取行,将每一行包装成Array[Object]。

/etc/emps.csv的内容如下:

1,bluejoe,39
2,alex,7
3,even,36

最终程序输出:

1   bluejoe 39  
3   even    36  

你可能感兴趣的:(大数据技术与系统)