思路:
- 以上一篇文章中部署的Hive为基础部署Presto
- Presto集群包含Coordinator和Worker两类节点,节点类型通过容器环境变量设置
- 节点node.properties配置文件中不设置node.id,节点挂了由Kubernetes重启拉起一个新节点
[root@master-0 ~]# kubectl get nodes -o wide
NAME STATUS ROLES AGE VERSION EXTERNAL-IP OS-IMAGE KERNEL-VERSION CONTAINER-RUNTIME
master-0 Ready master 14d v1.9.2+coreos.0 <none> CentOS Linux 7 (Core) 3.10.0-862.el7.x86_64 docker://1.13.1
worker-0 Ready <none> 14d v1.9.2+coreos.0 <none> CentOS Linux 7 (Core) 3.10.0-862.el7.x86_64 docker://1.13.1
worker-1 Ready <none> 14d v1.9.2+coreos.0 <none> CentOS Linux 7 (Core) 3.10.0-862.el7.x86_64 docker://1.13.1
[root@master-0 ~]# kubectl get svc -o wide
NAME TYPE CLUSTER-IP EXTERNAL-IP PORT(S) AGE SELECTOR
hadoop-dn-service ClusterIP None <none> 9000/TCP,50010/TCP,50075/TCP 19h app=hadoop-dn
hadoop-nn-service ClusterIP None <none> 9000/TCP,50070/TCP 19h app=hadoop-nn
hadoop-ui-service NodePort 10.233.21.71 <none> 8088:32295/TCP,50070:31127/TCP 19h app=hadoop-nn
hive-metadata-mysql-service NodePort 10.233.23.56 <none> 3306:31470/TCP 41m app=hive-metadata-mysql
hive-service NodePort 10.233.60.239 <none> 10000:30717/TCP,10002:30001/TCP,9083:32335/TCP 41m app=hive
kubernetes ClusterIP 10.233.0.1 <none> 443/TCP 14d <none>
Presto没有官方镜像,这里我基于Centos 7.5和Presto 0.208制作了自己的镜像,Dockerfile如下:
FROM 192.168.101.88:5000/base/centos:7.5.1804
MAINTAINER leichen.china@gmail.com
ADD jdk-8u151-linux-x64.tar.gz /opt
ADD presto-server-0.208.tar.gz /opt
ENV PRESTO_HOME /opt/presto-server-0.208
ENV JAVA_HOME /opt/jdk1.8.0_151
ENV PATH $JAVA_HOME/bin:$PATH
脚本:docker build -t 192.168.101.88:5000/dmcop2/presto-server:dm-0.208 .
apiVersion: v1
kind: ConfigMap
metadata:
name: presto-config-cm
labels:
app: presto-coordinator
data:
bootstrap.sh: |-
#!/bin/bash
cd /root/bootstrap
mkdir -p $PRESTO_HOME/etc/catalog
cat ./node.properties > $PRESTO_HOME/etc/node.properties
cat ./jvm.config > $PRESTO_HOME/etc/jvm.config
cat ./config.properties > $PRESTO_HOME/etc/config.properties
cat ./log.properties > $PRESTO_HOME/etc/log.properties
sed -i 's/${COORDINATOR_NODE}/'$COORDINATOR_NODE'/g' $PRESTO_HOME/etc/config.properties
for cfg in ../catalog/*; do
cat $cfg > $PRESTO_HOME/etc/catalog/${cfg##*/}
done
$PRESTO_HOME/bin/launcher run --verbose
node.properties: |-
node.environment=production
node.data-dir=/var/presto/data
jvm.config: |-
-server
-Xmx16G
-XX:+UseG1GC
-XX:G1HeapRegionSize=32M
-XX:+UseGCOverheadLimit
-XX:+ExplicitGCInvokesConcurrent
-XX:+HeapDumpOnOutOfMemoryError
-XX:+ExitOnOutOfMemoryError
config.properties: |-
coordinator=${COORDINATOR_NODE}
node-scheduler.include-coordinator=true
http-server.http.port=8080
query.max-memory=10GB
query.max-memory-per-node=1GB
query.max-total-memory-per-node=2GB
discovery-server.enabled=true
discovery.uri=http://presto-coordinator-service:8080
log.properties: |-
com.facebook.presto=INFO
说明:
1、启动脚本执行时,将配置文件覆盖到对应路径,然后根据环境变量COORDINATOR_NODE设置节点类型
2、配置文件config.properties中的discovery.uri设置为coordinator对应的serviceName
apiVersion: v1
kind: ConfigMap
metadata:
name: presto-catalog-config-cm
labels:
app: presto-coordinator
data:
hive.properties: |-
connector.name=hive-hadoop2
hive.metastore.uri=thrift://hive-service:9083
说明:
1、配置hive.properties文件,指定Hive的ServiceName和metastore端口
2、文件将被挂载到POD内的Catalog目录
apiVersion: apps/v1
kind: Deployment
metadata:
name: presto-coordinator
spec:
replicas: 1
revisionHistoryLimit: 10
selector:
matchLabels:
app: presto-coordinator
template:
metadata:
labels:
app: presto-coordinator
spec:
containers:
- name: presto-coordinator
image: 192.168.101.88:5000/dmcop2/presto-server:dm-0.208
command: ["bash", "-c", "chmod +x /root/bootstrap/bootstrap.sh && /root/bootstrap/bootstrap.sh"]
ports:
- name: http-coord
containerPort: 8080
protocol: TCP
env:
- name: COORDINATOR_NODE
value: "true"
volumeMounts:
- name: presto-config-volume
mountPath: /root/bootstrap
- name: presto-catalog-config-volume
mountPath: /root/catalog
- name: presto-data-volume
mountPath: /var/presto/data
readinessProbe:
initialDelaySeconds: 10
periodSeconds: 5
httpGet:
path: /v1/cluster
port: http-coord
volumes:
- name: presto-config-volume
configMap:
name: presto-config-cm
- name: presto-catalog-config-volume
configMap:
name: presto-catalog-config-cm
- name: presto-data-volume
emptyDir: {}
---
kind: Service
apiVersion: v1
metadata:
labels:
app: presto-coordinator
name: presto-coordinator-service
spec:
ports:
- port: 8080
targetPort: http-coord
name: http-coord
selector:
app: presto-coordinator
type: NodePort
---
apiVersion: apps/v1
kind: Deployment
metadata:
name: presto-worker
spec:
replicas: 2
revisionHistoryLimit: 10
selector:
matchLabels:
app: presto-worker
template:
metadata:
labels:
app: presto-worker
spec:
initContainers:
- name: wait-coordinator
image: 192.168.101.88:5000/dmcop2/presto-server:dm-0.208
command: ["bash", "-c", "until curl -sf http://presto-coordinator-service:8080/ui/; do echo 'waiting for coordinator started...'; sleep 2; done;"]
containers:
- name: presto-worker
image: 192.168.101.88:5000/dmcop2/presto-server:dm-0.208
command: ["bash", "-c", "chmod +x /root/bootstrap/bootstrap.sh && /root/bootstrap/bootstrap.sh"]
ports:
- name: http-coord
containerPort: 8080
protocol: TCP
env:
- name: COORDINATOR_NODE
value: "false"
volumeMounts:
- name: presto-config-volume
mountPath: /root/bootstrap
- name: presto-catalog-config-volume
mountPath: /root/catalog
- name: presto-data-volume
mountPath: /var/presto/data
readinessProbe:
initialDelaySeconds: 10
periodSeconds: 5
exec:
command: ["bash", "-c", "curl -s http://presto-coordinator-service:8080/v1/node | tr ',' '\n' | grep -s $(hostname -i)"]
volumes:
- name: presto-config-volume
configMap:
name: presto-config-cm
- name: presto-catalog-config-volume
configMap:
name: presto-catalog-config-cm
- name: presto-data-volume
emptyDir: {}
说明:
1、Coordinator和Worker分两个Deployment部署,但是使用同一个ConfigMap进行配置
2、启动脚本bootstrap.sh根据环境变量COORDINATOR_NODE动态修改配置文件
3、Coordinator通过HTTP访问/v1/cluster检查容器是否就绪;Worker通过访问Coordinator服务的/v1/node判断是否包含自己检查容器是否就绪
4、使用Service NodePort对外提供访问
[root@master-0 presto]# kubectl get svc
NAME TYPE CLUSTER-IP EXTERNAL-IP PORT(S) AGE
hadoop-dn-service ClusterIP None <none> 9000/TCP,50010/TCP,50075/TCP 19h
hadoop-nn-service ClusterIP None <none> 9000/TCP,50070/TCP 19h
hadoop-ui-service NodePort 10.233.21.71 <none> 8088:32295/TCP,50070:31127/TCP 19h
hive-metadata-mysql-service NodePort 10.233.23.56 <none> 3306:31470/TCP 1h
hive-service NodePort 10.233.60.239 <none> 10000:30717/TCP,10002:30001/TCP,9083:32335/TCP 1h
kubernetes ClusterIP 10.233.0.1 <none> 443/TCP 14d
presto-coordinator-service NodePort 10.233.50.222 <none> 8080:30418/TCP 39s
[root@master-0 presto]# wget https://repo1.maven.org/maven2/com/facebook/presto/presto-cli/0.208/presto-cli-0.208-executable.jar
[root@master-0 presto]# chmod +x presto-cli-0.208-executable.jar
[root@master-0 presto]# ./presto-cli-0.208-executable.jar --server 192.168.112.240:30418 --catalog hive --schema default
presto:default> select * from abc;
a
---
1
(1 row)
Query 20180907_030117_00002_bmkxf, FINISHED, 1 node
Splits: 17 total, 17 done (100.00%)
0:03 [1 rows, 2B] [0 rows/s, 0B/s]
presto:default>
[root@master-0 presto]# kubectl get deployment
NAME DESIRED CURRENT UP-TO-DATE AVAILABLE AGE
hive 1 1 1 1 1h
hive-metadata-mysql 1 1 1 1 1h
presto-coordinator 1 1 1 1 21m
presto-worker 2 2 2 2 21m
[root@master-0 presto]# kubectl scale deployment presto-worker --replicas=3
deployment "presto-worker" scaled
[root@master-0 presto]# kubectl get deployment
NAME DESIRED CURRENT UP-TO-DATE AVAILABLE AGE
hive 1 1 1 1 1h
hive-metadata-mysql 1 1 1 1 1h
presto-coordinator 1 1 1 1 23m
presto-worker 3 3 3 3 23m