分布式集群搭建,管理cpu和gpu集群
系统:
master-》centos7
cpu-》cenos7
gpu -》ubuntu
1、master 安装
systemctl stop firewalld && systemctl disable firewalld
rpm -Uvh http://repos.mesosphere.io/el/7/noarch/RPMS/mesosphere-el-repo-7-1.noarch.rpm
yum -y install mesos mesosphere-zookeeper marathon
nginx(用于域名)
vi /etc/yum.repos.d/nginx.repo
[nginx]
name=nginx repo
baseurl=http://nginx.org/packages/centos/$releasever/$basearch/
gpgcheck=0
enabled=1
consul_0.6.4_linux_amd64.zip
2、master配置
zookeeper:
master1: echo 1 > /var/lib/zookeeper/myid
master2: echo 2 > /var/lib/zookeeper/myid
master3: echo 3 > /var/lib/zookeeper/myid
vi /etc/zookeeper/conf/zoo.cfg #加入
server.1=192.168.1.110:2888:3888
server.2=192.168.1.111:2888:3888
server.3=192.168.1.112:2888:3888
mesos:
vi /etc/mesos/zk #加入
zk://192.168.1.110:2181,192.168.1.111:2181,192.168.1.112:2181/mesos
echo 2 > /etc/mesos-master/quorum(显示集群最少需要的master =number/2+1)
master1: echo 192.168.1.110 | sudo tee /etc/mesos-master/hostname
master2: echo 192.168.1.111 | sudo tee /etc/mesos-master/hostname
master3: echo 192.168.1.112 | sudo tee /etc/mesos-master/hostname
echo "192.168.1.110" > /etc/mesos-master/hostname
echo "/var/lib/mesos" > /etc/mesos-master/work_dir
marathon:
mkdir -p /etc/marathon/conf/
echo 'my ip' > /etc/marathon/conf/hostname
echo 'gpu_resources,external_volumes' > /etc/marathon/conf/enable_features
consul:
start_consul.sh
#!/bin/bash
nohup /home/user/tools/consul/consul agent -server -bootstrap-expect 3 -data-dir /home/user/tools/consul/data -node trm02 -bind 172.18.128.132 -client 0.0.0.0 -ui-dir /home/user/tools/consul/dist -dc stg -pid-file=/home/user/tools/consul/pid >> /home/user/tools/consul/log/consul.log 2>&1 &
stop_consul.sh
#!/bin/bash
#/home/user/tools/consul/consul leave
kill -9 $(cat /home/user/tools/consul/pid)
rm /home/user/tools/consul/pid
使能:
systemctl start marathon
systemctl enable marathon
systemctl start mesos
systemctl enable mesos
systemctl start zookeeper
systemctl enable zookeeper
3、slave安装如master
meosos
echo "zk://192.168.1.110:2181,192.168.1.111:2181,192.168.1.112:2181/mesos" > /etc/mesos/zk
echo "docker,mesos" > containerizers
echo "file:///root/.docker/config.json" > docker_config
echo "10mins" > executor_registration_timeout
echo "" > hostname
echo "/home/user/tools/mesos/slave" > work_dir
如果是gpu的话需要增加配置
echo "cgroup/devices,gpu/nvidia" > /etc/mesos-slave/isolation
echo "type:tiangong-dev" > /etc/mesos-slave/attributes
echo "172.18.192.91" > /etc/mesos-slave/advertise
consul
start_consul.sh
#!/bin/bash
nohup /home/user/tools/consul/consul agent -data-dir /home/user/tools/consul/data -node stg2 -bind 172.18.128.32 -client 0.0.0.0 -ui-dir /home/user/tools/consul/dist -dc stg -pid-file=/home/user/tools/consul/pid >> /home/user/tools/consul/log/consul.log 2>&1 &