简介
Apache Drill是Google Dremel 的开源实现。可以查询 local files、HDFS、Amazon S3、HBase、MongoDB、Hive、RDBMS、Kafka、OpenTSDB等数据源的数据。一个SQL查询可以同时查询多个数据源数据。
结点规划
结点 |
角色 |
node1 |
Drillbit |
node2 |
Drillbit |
node3 |
Drillbit |
下载解压
#http://drill.apache.org
[root@node1 ~]# wget https://mirrors.tuna.tsinghua.edu.cn/apache/drill/drill-1.12.0/apache-drill-1.12.0.tar.gz
[root@node1 ~]# tar -zxvf apache-drill-1.12.0.tar.gz
配置
[root@node1 ~]# cd /root/apache-drill-1.12.0/conf
#drill-override.conf
drill.exec: {
cluster-id: "my-drill-cluster",
zk.connect: "node1:2181,node2:2181,node3:2181"
}
#drill-env.sh
export DRILL_MAX_DIRECT_MEMORY="2G"
export DRILL_HEAP="1G"
分发
[root@node1 ~]# scp -r apache-drill-1.12.0 root@node2:~
[root@node1 ~]# scp -r apache-drill-1.12.0 root@node3:~
启动
[root@node1 ~]# apache-drill-1.12.0/bin/drillbit.sh start
[root@node2 ~]# apache-drill-1.12.0/bin/drillbit.sh start
[root@node3 ~]# apache-drill-1.12.0/bin/drillbit.sh start
验证
1、WebUI
http://node1:8047
2、Drill命令行
[root@node2 ~]# /root/apache-drill-1.12.0/bin/sqlline -u jdbc:drill:zk=node3:2181
查询Hive HBase HDFS Mysql
查询Hive
Drill WebUI配置Hive Storage
{
"type": "hive",
"enabled": true,
"configProps": {
"hive.metastore.uris": "thrift://node2:9083",
"javax.jdo.option.ConnectionURL": "jdbc:mysql://node2/hive",
"hive.metastore.warehouse.dir": "/apps/hive/warehouse",
"fs.default.name": "hdfs://node1:8020",
"hive.metastore.sasl.enabled": "false"
}
}
Drill Cli查询Hive
0: jdbc:drill:zk=node3:2181> select `studentid`,`language`,`math`,`english` from hive.test limit 5;
查询HBase
Drill WebUI配置HBase Storage
{
"type": "hbase",
"config": {
"hbase.zookeeper.quorum": "node3,node2,node1",
"hbase.zookeeper.property.clientPort": "2181",
"zookeeper.znode.parent": "/hbase-unsecure"
},
"size.calculator.enabled": false,
"enabled": true
}
Drill Cli查询HBase
0: jdbc:drill:zk=node3:2181> use hbase;
0: jdbc:drill:zk=node3:2181> SELECT CONVERT_FROM(row_key, 'UTF8') AS rowkey,CONVERT_FROM(test.cf1.english, 'UTF8') AS english,CONVERT_FROM(test.cf1.math, 'UTF8') AS math FROM hbase.test;
查询HDFS
Drill WebUI配置HDFS Storage
{
"type": "file",
"enabled": true,
"connection": "hdfs://node1:8020/",
"config": null,
"workspaces": {
"root": {
"location": "/",
"writable": true,
"defaultInputFormat": null,
"allowAccessOutsideWorkspace": false
},
"json_files": {
"location": "/json_files",
"writable": false,
"defaultInputFormat": null,
"allowAccessOutsideWorkspace": false
}
},
"formats": {
"csv": {
"type": "text",
"extensions": [
"csv"
],
"delimiter": ","
},
"tsv": {
"type": "text",
"extensions": [
"tsv"
],
"delimiter": "\t"
},
"parquet": {
"type": "parquet"
},
"json": {
"type": "json",
"extensions": [
"json"
]
}
}
}
Drill Cli查询HDFS
0: jdbc:drill:zk=node3:2181> use hdfs.json_files;
0: jdbc:drill:zk=node3:2181> SELECT id,type,name,sales FROM `donuts.json` WHERE type='donut';
查询Mysql
Drill WebUI配置Mysql Storage
{
"type": "jdbc",
"driver": "com.mysql.jdbc.Driver",
"url": "jdbc:mysql://node2:3306",
"username": "root",
"password": "",
"enabled": true
}
Drill Cli查询Mysql
0: jdbc:drill:zk=node3:2181> select TBL_NAME,TBL_ID from mysql.hive.TBLS;