$ cat /etc/centos-release
CentOS Linux release 7.4.1708 (Core)
安装部署:
## yum -y install epel-release git curl sshpass python-pip git
# useradd -m -d /home/tidb tidb
# passwd tidb
# visudo
tidb ALL=(ALL) NOPASSWD: ALL
生成 ssh key: 执行 su 命令从 root 用户切换到 tidb 用户下。
#ssh key
# su - tidb
$ ssh-keygen -t rsa
提示 Enter passphrase 时直接回车即可。执行成功后,ssh 私钥文件为 /home/tidb/.ssh/id_rsa, ssh 公钥文件为 /home/tidb/.ssh/id_rsa.pub
$ cd /home/tidb/tidb-ansible
$ sudo pip install -r ./requirements.txt
--ansible版本确认:
$ ansible --version
ansible 2.7.7
config file = /etc/ansible/ansible.cfg
configured module search path = [u'/home/tidb/.ansible/plugins/modules', u'/usr/share/ansible/plugins/modules']
ansible python module location = /usr/lib/python2.7/site-packages/ansible
executable location = /bin/ansible
python version = 2.7.5 (default, Aug 4 2017, 00:39:18) [GCC 4.8.5 20150623 (Red Hat 4.8.5-16)]
下载2.1版本:
$ git clone -b release-2.1 https://github.com/pingcap/tidb-ansible.git
-- 编辑主机文件:
$ vim /home/tidb/tidb-ansible/hosts.ini
[servers]
172.16.4.171
172.16.4.172
172.16.4.173
172.16.4.174
172.16.4.175
[all:vars]
username = tidb
#ntp_server = pool.ntp.org
ntp_server = 172.16.4.168
-- 配置主机互信
执行以下命令,按提示输入部署目标机器 root 用户密码。该步骤将在部署目标机器上创建 tidb 用户,并配置 sudo 规则,配置中控机与部署目标机器之间的 ssh 互信。
$ cd /home/tidb/tidb-ansible
$ ansible-playbook -i hosts.ini create_users.yml -u root -k
SSH password:
PLAY [all] *******************************
TASK [create user] ***********************
ok: [172.16.4.173]
ok: [172.16.4.174]
ok: [172.16.4.175]
ok: [172.16.4.172]
ok: [172.16.4.171]
TASK [set authorized key] ****************
ok: [172.16.4.171]
ok: [172.16.4.172]
ok: [172.16.4.174]
ok: [172.16.4.173]
ok: [172.16.4.175]
TASK [update sudoers file] **************
ok: [172.16.4.171]
ok: [172.16.4.173]
ok: [172.16.4.172]
ok: [172.16.4.174]
ok: [172.16.4.175]
PLAY RECAP **************
172.16.4.171 : ok=3 changed=0 unreachable=0 failed=0
172.16.4.172 : ok=3 changed=0 unreachable=0 failed=0
172.16.4.173 : ok=3 changed=0 unreachable=0 failed=0
172.16.4.174 : ok=3 changed=0 unreachable=0 failed=0
172.16.4.175 : ok=3 changed=0 unreachable=0 failed=0
Congrats! All goes well. :-)
目标主机上部署NTP服务:
你的部署目标机器时间、时区设置一致,已开启 NTP 服务且在正常同步时间,此步骤可忽略.
$ ansible-playbook -i hosts.ini deploy_ntp.yml -u tidb -b
...
PLAY RECAP ***********************
172.16.4.171 : ok=6 changed=3 unreachable=0 failed=0
172.16.4.172 : ok=6 changed=3 unreachable=0 failed=0
172.16.4.173 : ok=6 changed=3 unreachable=0 failed=0
172.16.4.174 : ok=6 changed=3 unreachable=0 failed=0
172.16.4.175 : ok=6 changed=3 unreachable=0 failed=0
Congrats! All goes well. :-)
在部署目标机器上批量设置:
$ ansible -i hosts.ini all -m shell -a "cpupower frequency-set --governor performance" -u tidb -b
各个目标主机采用ext4 文件格式。
配置信息:
组件规划:
$ vim inventory.ini
## TiDB Cluster Part
[tidb_servers]
172.16.4.172
172.16.4.173
[tikv_servers]
172.16.4.173
172.16.4.174
172.16.4.175
[pd_servers]
172.16.4.173
172.16.4.174
172.16.4.175
[spark_master]
172.16.4.174
[spark_slaves]
172.16.4.175
[lightning_server]
172.16.4.174
172.16.4.175
[importer_server]
172.16.4.174
172.16.4.175
## Monitoring Part
# prometheus and pushgateway servers
[monitoring_servers]
172.16.4.171
[grafana_servers]
172.16.4.171
# node_exporter and blackbox_exporter servers
[monitored_servers]
172.16.4.171
172.16.4.172
172.16.4.173
172.16.4.174
172.16.4.175
[alertmanager_servers]
172.16.4.171
[kafka_exporter_servers]
172.16.4.171
## Binlog Part
[pump_servers]
pump1 ansible_host=172.16.4.173 deploy_dir=/data/tidb/pump
pump2 ansible_host=172.16.4.174 deploy_dir=/data/tidb/pump
pump3 ansible_host=172.16.4.175 deploy_dir=/data/tidb/pump
[drainer_servers]
172.16.4.173
172.16.4.174
172.16.4.175
## Group variables
[pd_servers:vars]
# location_labels = ["zone","rack","host"]
## Global variables
[all:vars]
deploy_dir = /data/tidb/tidb_deploy
## Connection
# ssh via normal user
ansible_user = tidb
cluster_name = pre-tidb
tidb_version = v2.1.4
# process supervision, [systemd, supervise]
process_supervision = systemd
timezone = Asia/Shanghai
enable_firewalld = False
# check NTP service
enable_ntpd = True
set_hostname = False
## binlog trigger
enable_binlog = True
# kafka cluster address for monitoring, example:
# kafka_addrs = "192.168.0.11:9092,192.168.0.12:9092,192.168.0.13:9092"
kafka_addrs = "172.16.4.173:9092,172.16.4.174:9092,172.16.4.175:9092"
# zookeeper address of kafka cluster for monitoring, example:
# zookeeper_addrs = "192.168.0.11:2181,192.168.0.12:2181,192.168.0.13:2181"
zookeeper_addrs = "172.16.4.173:2181,172.16.4.174:2181,172.16.4.175:2181"
# store slow query log into seperate file
enable_slow_query_log = True
# enable TLS authentication in the TiDB cluster
enable_tls = False
# KV mode
deploy_without_tidb = False
# Optional: Set if you already have a alertmanager server.
# Format: alertmanager_host:alertmanager_port
alertmanager_target = ""
grafana_admin_user = "admin"
grafana_admin_password = "admin"
### Collect diagnosis
collect_log_recent_hours = 2
enable_bandwidth_limit = True
# default: 10Mb/s, unit: Kbit/s
collect_bandwidth_limit = 10000
注意:
若所有 server 返回 tidb 表示 ssh 互信配置成功:
$ ansible -i inventory.ini all -m shell -a 'whoami'
172.16.4.172 | CHANGED | rc=0 >>
tidb
172.16.4.173 | CHANGED | rc=0 >>
tidb
172.16.4.174 | CHANGED | rc=0 >>
tidb
172.16.4.171 | CHANGED | rc=0 >>
tidb
172.16.4.175 | CHANGED | rc=0 >>
tidb
pump1 | CHANGED | rc=0 >>
tidb
pump2 | CHANGED | rc=0 >>
tidb
pump3 | CHANGED | rc=0 >>
tidb
若所有 server 返回 root 表示 tidb 用户 sudo 免密码配置成功:
$ ansible -i inventory.ini all -m shell -a 'whoami' -b
172.16.4.171 | CHANGED | rc=0 >>
root
172.16.4.173 | CHANGED | rc=0 >>
root
172.16.4.172 | CHANGED | rc=0 >>
root
172.16.4.174 | CHANGED | rc=0 >>
root
172.16.4.175 | CHANGED | rc=0 >>
root
pump2 | CHANGED | rc=0 >>
root
pump1 | CHANGED | rc=0 >>
root
pump3 | CHANGED | rc=0 >>
root
执行 local_prepare.yml playbook,联网下载 TiDB binary 到中控机:
$ ansible-playbook local_prepare.yml
初始化系统环境,修改内核参数:
$ ansible-playbook bootstrap.yml
... 软件下载信息 ...
TASK [local : download tidb binary] ************
ok: [localhost] => (item={u'url': u'http://download.pingcap.org/tidb-v2.1.4-linux-amd64.tar.gz', u'version': u'v2.1.4', u'name': u'tidb'})
TASK [local : download common binary] *************
ok: [localhost] => (item={u'url': u'http://download.pingcap.org/fio-3.8.tar.gz', u'checksum': u'sha256:15739abde7e74b59ac59df57f129b14fc5cd59e1e2eca2ce37b41f8c289c3d58', u'version': 3.8, u'name': u'fio'})
ok: [localhost] => (item={u'url': u'http://download.pingcap.org/grafana_collector-latest-linux-amd64.tar.gz', u'version': u'latest', u'name': u'grafana_collector'})
ok: [localhost] => (item={u'url': u'http://download.pingcap.org/kafka_exporter-1.1.0.linux-amd64.tar.gz', u'version': u'1.1.0', u'name': u'kafka_exporter'})
TASK [local : download diagnosis tools] **************
changed: [localhost] => (item={u'url': u'http://download.pingcap.org/tidb-insight-v0.2.5-1-g99b8fea.tar.gz', u'version': u'v0.2.5-1-g99b8fea', u'name': u'tidb-insight'})
TASK [local : download cfssl binary] *************
TASK [local : download cfssljson binary] **************
TASK [local : include_tasks] *************
included: /home/tidb/tidb-ansible/roles/local/tasks/binary_deployment.yml for localhost
TASK [local : download other binary] ************
TASK [local : download other binary under gfw] ************
ok: [localhost] => (item={u'url': u'http://download.pingcap.org/prometheus-2.2.1.linux-amd64.tar.gz', u'version': u'2.2.1', u'name': u'prometheus'})
ok: [localhost] => (item={u'url': u'http://download.pingcap.org/alertmanager-0.14.0.linux-amd64.tar.gz', u'version': u'0.14.0', u'name': u'alertmanager'})
ok: [localhost] => (item={u'url': u'http://download.pingcap.org/node_exporter-0.15.2.linux-amd64.tar.gz', u'version': u'0.15.2', u'name': u'node_exporter'})
ok: [localhost] => (item={u'url': u'http://download.pingcap.org/pushgateway-0.4.0.linux-amd64.tar.gz', u'version': u'0.4.0', u'name': u'pushgateway'})
ok: [localhost] => (item={u'url': u'http://download.pingcap.org/grafana-4.6.3.linux-x64.tar.gz', u'version': u'4.6.3', u'name': u'grafana'})
ok: [localhost] => (item={u'url': u'http://download.pingcap.org/blackbox_exporter-0.12.0.linux-amd64.tar.gz', u'version': u'0.12.0', u'name': u'blackbox_exporter'})
TASK [local : download TiSpark packages] *************
ok: [localhost] => (item={u'url': u'http://download.pingcap.org/spark-2.3.2-bin-hadoop2.7.tgz', u'checksum': u'sha256:6246b20d95c7596a29fb26d5b50a3ae3163a35915bec6c515a8e183383bedc43', u'version': u'2.3.2', u'name': u'spark-2.3.2-bin-hadoop2.7.tgz'})
ok: [localhost] => (item={u'url': u'http://download.pingcap.org/tispark-latest-linux-amd64.tar.gz', u'version': u'latest', u'name': u'tispark-latest.tar.gz'})
ok: [localhost] => (item={u'url': u'http://download.pingcap.org/tispark-sample-data.tar.gz', u'version': u'latest', u'name': u'tispark-sample-data.tar.gz'})
TASK [local : unarchive third party binary] *************
changed: [localhost] => (item={u'url': u'https://github.com/prometheus/prometheus/releases/download/v2.2.1/prometheus-2.2.1.linux-amd64.tar.gz', u'version': u'2.2.1', u'name': u'prometheus'})
changed: [localhost] => (item={u'url': u'https://github.com/prometheus/alertmanager/releases/download/v0.14.0/alertmanager-0.14.0.linux-amd64.tar.gz', u'version': u'0.14.0', u'name': u'alertmanager'})
changed: [localhost] => (item={u'url': u'https://github.com/prometheus/node_exporter/releases/download/v0.15.2/node_exporter-0.15.2.linux-amd64.tar.gz', u'version': u'0.15.2', u'name': u'node_exporter'})
changed: [localhost] => (item={u'url': u'https://github.com/prometheus/blackbox_exporter/releases/download/v0.12.0/blackbox_exporter-0.12.0.linux-amd64.tar.gz', u'version': u'0.12.0', u'name': u'blackbox_exporter'})
changed: [localhost] => (item={u'url': u'https://github.com/prometheus/pushgateway/releases/download/v0.4.0/pushgateway-0.4.0.linux-amd64.tar.gz', u'version': u'0.4.0', u'name': u'pushgateway'})
changed: [localhost] => (item={u'url': u'https://s3-us-west-2.amazonaws.com/grafana-releases/release/grafana-4.6.3.linux-x64.tar.gz', u'version': u'4.6.3', u'name': u'grafana'})
TASK [local : unarchive tispark] *************
changed: [localhost]
TASK [local : unarchive tispark-sample-data] *************
changed: [localhost]
TASK [local : cp monitoring binary] *************
changed: [localhost] => (item=alertmanager)
changed: [localhost] => (item=prometheus)
changed: [localhost] => (item=node_exporter)
changed: [localhost] => (item=pushgateway)
changed: [localhost] => (item=blackbox_exporter)
TASK [local : cp tispark] ***************************************************************************************************************************************************************************************************************
changed: [localhost]
TASK [local : cp tispark-sample-data] *************
changed: [localhost]
TASK [local : unarchive tidb binary] *************
changed: [localhost] => (item={u'url': u'http://download.pingcap.org/tidb-v2.1.4-linux-amd64.tar.gz', u'version': u'v2.1.4', u'name': u'tidb'})
TASK [local : unarchive common binary] *************
changed: [localhost] => (item={u'url': u'http://download.pingcap.org/fio-3.8.tar.gz', u'checksum': u'sha256:15739abde7e74b59ac59df57f129b14fc5cd59e1e2eca2ce37b41f8c289c3d58', u'version': 3.8, u'name': u'fio'})
changed: [localhost] => (item={u'url': u'http://download.pingcap.org/grafana_collector-latest-linux-amd64.tar.gz', u'version': u'latest', u'name': u'grafana_collector'})
changed: [localhost] => (item={u'url': u'http://download.pingcap.org/kafka_exporter-1.1.0.linux-amd64.tar.gz', u'version': u'1.1.0', u'name': u'kafka_exporter'})
TASK [local : cp tidb binary] *************
changed: [localhost] => (item={u'url': u'http://download.pingcap.org/tidb-v2.1.4-linux-amd64.tar.gz', u'version': u'v2.1.4', u'name': u'tidb'})
....
... 内存检查报错:
初始化系统环境,修改内核参数,此外还会检查内存和磁盘等信息:
若内存不足则会报错:fatal: [172.16.4.173]: FAILED! => {"changed": false, "msg": "This machine does not have sufficient RAM to run TiDB, at least 16000 MB."}
各个指标会检查是否达到最小的运行环境的数据。
若报错则注释掉:
$$ vim bootstrap.yml
# This play book is intend for one pass execution
- name: initializing deployment target
hosts: localhost
gather_facts: false
roles:
- check_config_static
- name: check node config
hosts: all
gather_facts: false
become: true
roles:
- pre-ansible
- bootstrap
- name: check system
hosts: all
any_errors_fatal: true
roles:
- check_system_static
# - { role: check_system_optional, when: not dev_mode|default(false) }
- name: tikv_servers machine benchmark
hosts: tikv_servers
gather_facts: false
roles:
- { role: machine_benchmark, when: not dev_mode|default(false) }
- name: create ops scripts
hosts: localhost
connection: local
gather_facts: false
roles:
- ops
-- SSD 硬盘检查报错:
TASK [machine_benchmark : Preflight check - Does fio randread iops of tikv_data_dir disk meet requirement] ******************************************************************************************************************************
fatal: [172.16.4.173]: FAILED! => {"changed": false, "msg": "fio: randread iops of tikv_data_dir disk is too low: 64 < 40000, it is strongly recommended to use SSD disks for TiKV and PD, or there might be performance issues."}
fatal: [172.16.4.174]: FAILED! => {"changed": false, "msg": "fio: randread iops of tikv_data_dir disk is too low: 58 < 40000, it is strongly recommended to use SSD disks for TiKV and PD, or there might be performance issues."}
fatal: [172.16.4.175]: FAILED! => {"changed": false, "msg": "fio: randread iops of tikv_data_dir disk is too low: 391 < 40000, it is strongly recommended to use SSD disks for TiKV and PD, or there might be performance issues."}
$$ vim bootstrap.yml
- name: tikv_servers machine benchmark
hosts: tikv_servers
gather_facts: false
roles:
# - { role: machine_benchmark, when: not dev_mode|default(false) }
$ansible-playbook bootstrap.yml
...
PLAY RECAP ******************
172.16.4.171 : ok=28 changed=5 unreachable=0 failed=0
172.16.4.172 : ok=28 changed=5 unreachable=0 failed=0
172.16.4.173 : ok=28 changed=5 unreachable=0 failed=0
172.16.4.174 : ok=28 changed=5 unreachable=0 failed=0
172.16.4.175 : ok=28 changed=5 unreachable=0 failed=0
localhost : ok=3 changed=2 unreachable=0 failed=0
pump1 : ok=28 changed=5 unreachable=0 failed=0
pump2 : ok=28 changed=5 unreachable=0 failed=0
pump3 : ok=28 changed=5 unreachable=0 failed=0
Congrats! All goes well. :-)
部署grafana的监控主机则需要部署依赖的软件:
#sudo yum -y install fontconfig open-sans-fonts
部署tidb:
$ansible-playbook deploy.yml
...
PLAY RECAP ****************
172.16.4.171 : ok=114 changed=61 unreachable=0 failed=0
172.16.4.172 : ok=52 changed=22 unreachable=0 failed=0
172.16.4.173 : ok=96 changed=36 unreachable=0 failed=0
172.16.4.174 : ok=111 changed=43 unreachable=0 failed=0
172.16.4.175 : ok=112 changed=44 unreachable=0 failed=0
localhost : ok=1 changed=0 unreachable=0 failed=0
pump1 : ok=20 changed=9 unreachable=0 failed=0
pump2 : ok=20 changed=9 unreachable=0 failed=0
pump3 : ok=20 changed=9 unreachable=0 failed=0
Congrats! All goes well. :-)
-- 上面的步骤可简单记为:
初始化系统环境,修改内核参数:
$ ansible-playbook bootstrap.yml
部署tidb:
$ ansible-playbook deploy.yml
启动tidb:
$ ansible-playbook start.yml
tidb集群的关闭:
$ ansible-playbook stop.yml
部署Tispark集群需要在所在的主机安装部署JDK8版本:
登录TiDB数据库,登录数据库只能使用MySQL5.7的客户端版本,使用MySQL8.0则报错:
# cat /etc/yum.repos.d/mysql-community.repo
[mysql57-community]
name=MySQL 5.7 Community Server
baseurl=http://repo.mysql.com/yum/mysql-5.7-community/el/7/$basearch/
enabled=1
gpgcheck=1
gpgkey=file:///etc/pki/rpm-gpg/RPM-GPG-KEY-mysql
# rpm -qa | grep -i mysql | grep -i 5.7
mysql-community-libs-5.7.24-1.el7.x86_64
mysql-community-devel-5.7.24-1.el7.x86_64
mysql-community-libs-compat-5.7.24-1.el7.x86_64
mysql-community-common-5.7.24-1.el7.x86_64
mysql-community-client-5.7.24-1.el7.x86_64
mysql-community-server-5.7.24-1.el7.x86_64
TiDB的监控登录:
http://172.16.4.171:3000
登录的用户名和密码为admin/admin.