【ClickHouse系列】ClickHouse集群自动化搭建

为了测似方便,这里提供一个搭建2shard,2replica的ClickHouse集群的脚本

1.构建ClickHouse基于CentOS7的镜像

官方已经有clickhouse-server的镜像为什么还要自己打一个?

因为官方镜像是为了最小化安装,其中没有vi,ifconfig等等命令不方便调试、测试

Dockerfile

FROM centos:7.6.1810

RUN yum -y install sudo vim tcpdump sysstat lsof wget net-tools \
    penssh openssh-server openssh-clients lrzsz rsyslog systemd systemd-libs

RUN curl -s https://packagecloud.io/install/repositories/altinity/clickhouse/script.rpm.sh | bash && yum install -y clickhouse-server clickhouse-client

RUN localedef -i en_US -f UTF-8 en_US.UTF-8 && ln -snf /usr/share/zoneinfo/Asia/Shanghai /etc/localtime && echo 'Asia/Shanghai' > /etc/timezone

ADD https://github.com/tianon/gosu/releases/download/1.10/gosu-amd64 /bin/gosu

COPY entrypoint.sh /entrypoint.sh

RUN chmod +x entrypoint.sh /bin/gosu

EXPOSE 9000 8123 9009
VOLUME /var/lib/clickhouse

ENV CLICKHOUSE_CONFIG /etc/clickhouse-server/config.xml

ENTRYPOINT ["/entrypoint.sh"]

entrypoint.sh

#!/bin/bash

# set some vars
CLICKHOUSE_CONFIG="${CLICKHOUSE_CONFIG:-/etc/clickhouse-server/config.xml}"
USER="$(id -u clickhouse)"
GROUP="$(id -g clickhouse)"

# port is needed to check if clickhouse-server is ready for connections
HTTP_PORT="$(clickhouse extract-from-config --config-file $CLICKHOUSE_CONFIG --key=http_port)"

# get CH directories locations
DATA_DIR="$(clickhouse extract-from-config --config-file $CLICKHOUSE_CONFIG --key=path || true)"
TMP_DIR="$(clickhouse extract-from-config --config-file $CLICKHOUSE_CONFIG --key=tmp_path || true)"
USER_PATH="$(clickhouse extract-from-config --config-file $CLICKHOUSE_CONFIG --key=user_files_path || true)"
LOG_PATH="$(clickhouse extract-from-config --config-file $CLICKHOUSE_CONFIG --key=logger.log || true)"
LOG_DIR="$(dirname $LOG_PATH || true)"
ERROR_LOG_PATH="$(clickhouse extract-from-config --config-file $CLICKHOUSE_CONFIG --key=logger.errorlog || true)"
ERROR_LOG_DIR="$(dirname $ERROR_LOG_PATH || true)"
FORMAT_SCHEMA_PATH="$(clickhouse extract-from-config --config-file $CLICKHOUSE_CONFIG --key=format_schema_path || true)"

# ensure directories exist
mkdir -p \
    "$DATA_DIR" \
    "$ERROR_LOG_DIR" \
    "$LOG_DIR" \
    "$TMP_DIR" \
    "$USER_PATH" \
    "$FORMAT_SCHEMA_PATH"

if [ "$CLICKHOUSE_DO_NOT_CHOWN" != "1" ]; then
    # ensure proper directories permissions
    chown -R $USER:$GROUP \
        "$DATA_DIR" \
        "$ERROR_LOG_DIR" \
        "$LOG_DIR" \
        "$TMP_DIR" \
        "$USER_PATH" \
        "$FORMAT_SCHEMA_PATH"
fi

if [ -n "$(ls /docker-entrypoint-initdb.d/)" ]; then
    gosu clickhouse /usr/bin/clickhouse-server --config-file=$CLICKHOUSE_CONFIG &
    pid="$!"

    # check if clickhouse is ready to accept connections
    # will try to send ping clickhouse via http_port (max 12 retries, with 1 sec delay)
    if ! wget --spider --quiet --tries=12 --waitretry=1 --retry-connrefused "http://localhost:$HTTP_PORT/ping" ; then
        echo >&2 'ClickHouse init process failed.'
        exit 1
    fi
    clickhouseclient=( clickhouse-client --multiquery )
    echo
    for f in /docker-entrypoint-initdb.d/*; do
        case "$f" in
            *.sh)
                if [ -x "$f" ]; then
                    echo "$0: running $f"
                    "$f"
                else
                    echo "$0: sourcing $f"
                    . "$f"
                fi
                ;;
            *.sql)    echo "$0: running $f"; cat "$f" | "${clickhouseclient[@]}" ; echo ;;
            *.sql.gz) echo "$0: running $f"; gunzip -c "$f" | "${clickhouseclient[@]}"; echo ;;
            *)        echo "$0: ignoring $f" ;;
        esac
        echo
    done

    if ! kill -s TERM "$pid" || ! wait "$pid"; then
        echo >&2 'Finishing of ClickHouse init process failed.'
        exit 1
    fi
fi

# if no args passed to `docker run` or first argument start with `--`, then the user is passing clickhouse-server arguments
if [[ $# -lt 1 ]] || [[ "$1" == "--"* ]]; then
    exec gosu clickhouse /usr/bin/clickhouse-server --config-file=$CLICKHOUSE_CONFIG "$@"
fi

# Otherwise, we assume the user want to run his own process, for example a `bash` shell to explore this image
exec "$@"

2.生成集群每个节点的metrika.xml配置文件

gen_config.py

from xml.dom.minidom import Document

CLUSTER_NAME = "default"
INSTANCE_PORT = "9000"
ZK_PORT = "2181"

class XmlMaker:

    def __init__(self, host_path, zk_host_path, xml_path):
        self.host_path = host_path
        self.zk_host_path = zk_host_path
        self.xml_path = xml_path
        self.host_list = []
        self.host_zk_list = []

    def read_host(self):
        file = open(self.host_path, "r")
        self.host_list = file.readlines()
        for i in self.host_list:
            oneline = i.strip().split(" ")
            if len(oneline) != 2:
                print("Error")

    def read_zk_host(self):
        file = open(self.zk_host_path, "r")
        self.host_zk_list = file.readlines()
        for i in self.host_zk_list:
            oneline = i.strip().split(" ")
            if len(oneline) != 2:
                print("Error")

    def make_xml(self):
        doc = Document()
        yandex = doc.createElement("yandex")
        doc.appendChild(yandex)
        clickhouse_remote_servers = doc.createElement("clickhouse_remote_servers")
        yandex.appendChild(clickhouse_remote_servers)
        cluster_name = doc.createElement(CLUSTER_NAME)
        clickhouse_remote_servers.appendChild(cluster_name)
        index = 0
        while index < len(self.host_list):
            shard_object = doc.createElement("shard")
            cluster_name.appendChild(shard_object)

            ir_object = doc.createElement("internal_replication")
            ir_object_ctx = doc.createTextNode("true")
            ir_object.appendChild(ir_object_ctx)
            shard_object.appendChild(ir_object)

            replica_object = doc.createElement("replica")
            shard_object.appendChild(replica_object)

            host_object = doc.createElement("host")
            host = (self.host_list[index].split(' ', 1))[0]
            host_object_ctx = doc.createTextNode(host)
            host_object.appendChild(host_object_ctx)
            replica_object.appendChild(host_object)

            port_object = doc.createElement("port")
            port_object_ctx = doc.createTextNode(INSTANCE_PORT)
            port_object.appendChild(port_object_ctx)
            replica_object.appendChild(port_object)

            replica_object2 = doc.createElement("replica")
            shard_object.appendChild(replica_object2)

            index += 1

            host_object2 = doc.createElement("host")
            host2 = (self.host_list[index].split(' ', 1))[0]
            host_object_ctx2 = doc.createTextNode(host2)
            host_object2.appendChild(host_object_ctx2)
            replica_object2.appendChild(host_object2)

            port_object2 = doc.createElement("port")
            port_object_ctx2 = doc.createTextNode(INSTANCE_PORT)
            port_object2.appendChild(port_object_ctx2)
            replica_object2.appendChild(port_object2)

            index += 1

        zookeeper_servers = doc.createElement("zookeeper-servers")
        yandex.appendChild(zookeeper_servers)

        # zk1
        node_object = doc.createElement("node")
        node_object.setAttribute("index", "1")
        zookeeper_servers.appendChild(node_object)

        zk_host_object = doc.createElement("host")
        zk_host = (self.host_zk_list[0].split(' ', 1))[0]
        zk_host_object_ctx = doc.createTextNode(zk_host)
        zk_host_object.appendChild(zk_host_object_ctx)
        node_object.appendChild(zk_host_object)

        zk_port_object = doc.createElement("port")
        zk_port_object_ctx = doc.createTextNode(ZK_PORT)
        zk_port_object.appendChild(zk_port_object_ctx)
        node_object.appendChild(zk_port_object)

        # zk2
        node_object2 = doc.createElement("node")
        node_object2.setAttribute("index", "2")
        zookeeper_servers.appendChild(node_object2)

        zk_host_object2 = doc.createElement("host")
        zk_host2 = (self.host_zk_list[1].split(' ', 1))[0]
        zk_host_object_ctx2 = doc.createTextNode(zk_host2)
        zk_host_object2.appendChild(zk_host_object_ctx2)
        node_object2.appendChild(zk_host_object2)

        zk_port_object2 = doc.createElement("port")
        zk_port_object_ctx2 = doc.createTextNode(ZK_PORT)
        zk_port_object2.appendChild(zk_port_object_ctx2)
        node_object2.appendChild(zk_port_object2)

        # zk3
        node_object3 = doc.createElement("node")
        node_object3.setAttribute("index", "3")
        zookeeper_servers.appendChild(node_object3)

        zk_host_object3 = doc.createElement("host")
        zk_host3 = (self.host_zk_list[2].split(' ', 1))[0]
        zk_host_object_ctx3 = doc.createTextNode(zk_host3)
        zk_host_object3.appendChild(zk_host_object_ctx3)
        node_object3.appendChild(zk_host_object3)

        zk_port_object3 = doc.createElement("port")
        zk_port_object_ctx3 = doc.createTextNode(ZK_PORT)
        zk_port_object3.appendChild(zk_port_object_ctx3)
        node_object3.appendChild(zk_port_object3)

        # listen_host
        listen_host_object = doc.createElement("listen_host")
        listen_host_object_ctx = doc.createTextNode("::")
        listen_host_object.appendChild(listen_host_object_ctx)
        yandex.appendChild(listen_host_object)

        listen_host_object = doc.createElement("listen_host")
        listen_host_object_ctx = doc.createTextNode("0.0.0.0")
        listen_host_object.appendChild(listen_host_object_ctx)
        yandex.appendChild(listen_host_object)

        listen_try_object = doc.createElement("listen_try")
        listen_try_object_ctx = doc.createTextNode("1")
        listen_try_object.appendChild(listen_try_object_ctx)
        yandex.appendChild(listen_try_object)

        # macros
        macros_object = doc.createElement("macros")
        yandex.appendChild(macros_object)

        shard_object = doc.createElement("shard")
        shard_object_ctx = doc.createTextNode("@SHARD@")
        shard_object.appendChild(shard_object_ctx)
        macros_object.appendChild(shard_object)

        shard_object = doc.createElement("replica")
        shard_object_ctx = doc.createTextNode("@REPLICA@")
        shard_object.appendChild(shard_object_ctx)
        macros_object.appendChild(shard_object)

        with open(self.xml_path, 'w') as f:
            f.write(doc.toprettyxml(indent='\t', encoding='utf-8'))
        f.close()


if __name__ == "__main__":
    read = XmlMaker("host", "zk_host", "metrika.xml")
    read.read_host()
    read.read_zk_host()
    read.make_xml()
    print(read.host_path)
    print(read.zk_host_path)

3.集群搭建完整脚本

ch_auto.sh

因为方便测试Replicated表,搭建2shard2replica的ClickHouse集群也顺带启动一个3节点zk集群,使用到zk_auto.sh,详见:zk集群搭建

#!/bin/bash

./zk_auto.sh

docker stop ch1 && docker rm ch1
docker stop ch2 && docker rm ch2
docker stop ch3 && docker rm ch3
docker stop ch4 && docker rm ch4

docker run -dit --name ch1 --hostname ch1 xiedeyantu/clickhouse:1.0.0
docker run -dit --name ch2 --hostname ch2 xiedeyantu/clickhouse:1.0.0
docker run -dit --name ch3 --hostname ch3 xiedeyantu/clickhouse:1.0.0
docker run -dit --name ch4 --hostname ch4 xiedeyantu/clickhouse:1.0.0

docker inspect -f='{{.NetworkSettings.IPAddress}} {{.Config.Hostname}}' $(sudo docker ps -a -q)|grep ch > host
docker inspect -f='{{.NetworkSettings.IPAddress}} {{.Config.Hostname}}' $(sudo docker ps -a -q)|grep zk > zk_host

python gen_config.py

cp metrika_temp.xml metrika1.xml
cp metrika_temp.xml metrika2.xml
cp metrika_temp.xml metrika3.xml
cp metrika_temp.xml metrika4.xml

i=0
num=(0 1 1 2 2)
for hostname in $(cat host|awk '{print $1}')
  do
    ((i++))
    sed -i "s/@SHARD@/${num[$i]}/g" metrika$i.xml
    sed -i "s/@REPLICA@/$hostname/g" metrika$i.xml
  done

docker cp metrika1.xml ch1:/etc/metrika.xml
docker cp metrika2.xml ch2:/etc/metrika.xml
docker cp metrika3.xml ch3:/etc/metrika.xml
docker cp metrika4.xml ch4:/etc/metrika.xml

docker restart ch1 && docker restart ch2 && docker restart ch3 && docker restart ch4

你可能感兴趣的:(ClickHouse,Linux,Python)