为了测似方便,这里提供一个搭建2shard,2replica的ClickHouse集群的脚本
官方已经有clickhouse-server的镜像为什么还要自己打一个?
因为官方镜像是为了最小化安装,其中没有vi,ifconfig等等命令不方便调试、测试
FROM centos:7.6.1810
RUN yum -y install sudo vim tcpdump sysstat lsof wget net-tools \
penssh openssh-server openssh-clients lrzsz rsyslog systemd systemd-libs
RUN curl -s https://packagecloud.io/install/repositories/altinity/clickhouse/script.rpm.sh | bash && yum install -y clickhouse-server clickhouse-client
RUN localedef -i en_US -f UTF-8 en_US.UTF-8 && ln -snf /usr/share/zoneinfo/Asia/Shanghai /etc/localtime && echo 'Asia/Shanghai' > /etc/timezone
ADD https://github.com/tianon/gosu/releases/download/1.10/gosu-amd64 /bin/gosu
COPY entrypoint.sh /entrypoint.sh
RUN chmod +x entrypoint.sh /bin/gosu
EXPOSE 9000 8123 9009
VOLUME /var/lib/clickhouse
ENV CLICKHOUSE_CONFIG /etc/clickhouse-server/config.xml
ENTRYPOINT ["/entrypoint.sh"]
#!/bin/bash
# set some vars
CLICKHOUSE_CONFIG="${CLICKHOUSE_CONFIG:-/etc/clickhouse-server/config.xml}"
USER="$(id -u clickhouse)"
GROUP="$(id -g clickhouse)"
# port is needed to check if clickhouse-server is ready for connections
HTTP_PORT="$(clickhouse extract-from-config --config-file $CLICKHOUSE_CONFIG --key=http_port)"
# get CH directories locations
DATA_DIR="$(clickhouse extract-from-config --config-file $CLICKHOUSE_CONFIG --key=path || true)"
TMP_DIR="$(clickhouse extract-from-config --config-file $CLICKHOUSE_CONFIG --key=tmp_path || true)"
USER_PATH="$(clickhouse extract-from-config --config-file $CLICKHOUSE_CONFIG --key=user_files_path || true)"
LOG_PATH="$(clickhouse extract-from-config --config-file $CLICKHOUSE_CONFIG --key=logger.log || true)"
LOG_DIR="$(dirname $LOG_PATH || true)"
ERROR_LOG_PATH="$(clickhouse extract-from-config --config-file $CLICKHOUSE_CONFIG --key=logger.errorlog || true)"
ERROR_LOG_DIR="$(dirname $ERROR_LOG_PATH || true)"
FORMAT_SCHEMA_PATH="$(clickhouse extract-from-config --config-file $CLICKHOUSE_CONFIG --key=format_schema_path || true)"
# ensure directories exist
mkdir -p \
"$DATA_DIR" \
"$ERROR_LOG_DIR" \
"$LOG_DIR" \
"$TMP_DIR" \
"$USER_PATH" \
"$FORMAT_SCHEMA_PATH"
if [ "$CLICKHOUSE_DO_NOT_CHOWN" != "1" ]; then
# ensure proper directories permissions
chown -R $USER:$GROUP \
"$DATA_DIR" \
"$ERROR_LOG_DIR" \
"$LOG_DIR" \
"$TMP_DIR" \
"$USER_PATH" \
"$FORMAT_SCHEMA_PATH"
fi
if [ -n "$(ls /docker-entrypoint-initdb.d/)" ]; then
gosu clickhouse /usr/bin/clickhouse-server --config-file=$CLICKHOUSE_CONFIG &
pid="$!"
# check if clickhouse is ready to accept connections
# will try to send ping clickhouse via http_port (max 12 retries, with 1 sec delay)
if ! wget --spider --quiet --tries=12 --waitretry=1 --retry-connrefused "http://localhost:$HTTP_PORT/ping" ; then
echo >&2 'ClickHouse init process failed.'
exit 1
fi
clickhouseclient=( clickhouse-client --multiquery )
echo
for f in /docker-entrypoint-initdb.d/*; do
case "$f" in
*.sh)
if [ -x "$f" ]; then
echo "$0: running $f"
"$f"
else
echo "$0: sourcing $f"
. "$f"
fi
;;
*.sql) echo "$0: running $f"; cat "$f" | "${clickhouseclient[@]}" ; echo ;;
*.sql.gz) echo "$0: running $f"; gunzip -c "$f" | "${clickhouseclient[@]}"; echo ;;
*) echo "$0: ignoring $f" ;;
esac
echo
done
if ! kill -s TERM "$pid" || ! wait "$pid"; then
echo >&2 'Finishing of ClickHouse init process failed.'
exit 1
fi
fi
# if no args passed to `docker run` or first argument start with `--`, then the user is passing clickhouse-server arguments
if [[ $# -lt 1 ]] || [[ "$1" == "--"* ]]; then
exec gosu clickhouse /usr/bin/clickhouse-server --config-file=$CLICKHOUSE_CONFIG "$@"
fi
# Otherwise, we assume the user want to run his own process, for example a `bash` shell to explore this image
exec "$@"
from xml.dom.minidom import Document
CLUSTER_NAME = "default"
INSTANCE_PORT = "9000"
ZK_PORT = "2181"
class XmlMaker:
def __init__(self, host_path, zk_host_path, xml_path):
self.host_path = host_path
self.zk_host_path = zk_host_path
self.xml_path = xml_path
self.host_list = []
self.host_zk_list = []
def read_host(self):
file = open(self.host_path, "r")
self.host_list = file.readlines()
for i in self.host_list:
oneline = i.strip().split(" ")
if len(oneline) != 2:
print("Error")
def read_zk_host(self):
file = open(self.zk_host_path, "r")
self.host_zk_list = file.readlines()
for i in self.host_zk_list:
oneline = i.strip().split(" ")
if len(oneline) != 2:
print("Error")
def make_xml(self):
doc = Document()
yandex = doc.createElement("yandex")
doc.appendChild(yandex)
clickhouse_remote_servers = doc.createElement("clickhouse_remote_servers")
yandex.appendChild(clickhouse_remote_servers)
cluster_name = doc.createElement(CLUSTER_NAME)
clickhouse_remote_servers.appendChild(cluster_name)
index = 0
while index < len(self.host_list):
shard_object = doc.createElement("shard")
cluster_name.appendChild(shard_object)
ir_object = doc.createElement("internal_replication")
ir_object_ctx = doc.createTextNode("true")
ir_object.appendChild(ir_object_ctx)
shard_object.appendChild(ir_object)
replica_object = doc.createElement("replica")
shard_object.appendChild(replica_object)
host_object = doc.createElement("host")
host = (self.host_list[index].split(' ', 1))[0]
host_object_ctx = doc.createTextNode(host)
host_object.appendChild(host_object_ctx)
replica_object.appendChild(host_object)
port_object = doc.createElement("port")
port_object_ctx = doc.createTextNode(INSTANCE_PORT)
port_object.appendChild(port_object_ctx)
replica_object.appendChild(port_object)
replica_object2 = doc.createElement("replica")
shard_object.appendChild(replica_object2)
index += 1
host_object2 = doc.createElement("host")
host2 = (self.host_list[index].split(' ', 1))[0]
host_object_ctx2 = doc.createTextNode(host2)
host_object2.appendChild(host_object_ctx2)
replica_object2.appendChild(host_object2)
port_object2 = doc.createElement("port")
port_object_ctx2 = doc.createTextNode(INSTANCE_PORT)
port_object2.appendChild(port_object_ctx2)
replica_object2.appendChild(port_object2)
index += 1
zookeeper_servers = doc.createElement("zookeeper-servers")
yandex.appendChild(zookeeper_servers)
# zk1
node_object = doc.createElement("node")
node_object.setAttribute("index", "1")
zookeeper_servers.appendChild(node_object)
zk_host_object = doc.createElement("host")
zk_host = (self.host_zk_list[0].split(' ', 1))[0]
zk_host_object_ctx = doc.createTextNode(zk_host)
zk_host_object.appendChild(zk_host_object_ctx)
node_object.appendChild(zk_host_object)
zk_port_object = doc.createElement("port")
zk_port_object_ctx = doc.createTextNode(ZK_PORT)
zk_port_object.appendChild(zk_port_object_ctx)
node_object.appendChild(zk_port_object)
# zk2
node_object2 = doc.createElement("node")
node_object2.setAttribute("index", "2")
zookeeper_servers.appendChild(node_object2)
zk_host_object2 = doc.createElement("host")
zk_host2 = (self.host_zk_list[1].split(' ', 1))[0]
zk_host_object_ctx2 = doc.createTextNode(zk_host2)
zk_host_object2.appendChild(zk_host_object_ctx2)
node_object2.appendChild(zk_host_object2)
zk_port_object2 = doc.createElement("port")
zk_port_object_ctx2 = doc.createTextNode(ZK_PORT)
zk_port_object2.appendChild(zk_port_object_ctx2)
node_object2.appendChild(zk_port_object2)
# zk3
node_object3 = doc.createElement("node")
node_object3.setAttribute("index", "3")
zookeeper_servers.appendChild(node_object3)
zk_host_object3 = doc.createElement("host")
zk_host3 = (self.host_zk_list[2].split(' ', 1))[0]
zk_host_object_ctx3 = doc.createTextNode(zk_host3)
zk_host_object3.appendChild(zk_host_object_ctx3)
node_object3.appendChild(zk_host_object3)
zk_port_object3 = doc.createElement("port")
zk_port_object_ctx3 = doc.createTextNode(ZK_PORT)
zk_port_object3.appendChild(zk_port_object_ctx3)
node_object3.appendChild(zk_port_object3)
# listen_host
listen_host_object = doc.createElement("listen_host")
listen_host_object_ctx = doc.createTextNode("::")
listen_host_object.appendChild(listen_host_object_ctx)
yandex.appendChild(listen_host_object)
listen_host_object = doc.createElement("listen_host")
listen_host_object_ctx = doc.createTextNode("0.0.0.0")
listen_host_object.appendChild(listen_host_object_ctx)
yandex.appendChild(listen_host_object)
listen_try_object = doc.createElement("listen_try")
listen_try_object_ctx = doc.createTextNode("1")
listen_try_object.appendChild(listen_try_object_ctx)
yandex.appendChild(listen_try_object)
# macros
macros_object = doc.createElement("macros")
yandex.appendChild(macros_object)
shard_object = doc.createElement("shard")
shard_object_ctx = doc.createTextNode("@SHARD@")
shard_object.appendChild(shard_object_ctx)
macros_object.appendChild(shard_object)
shard_object = doc.createElement("replica")
shard_object_ctx = doc.createTextNode("@REPLICA@")
shard_object.appendChild(shard_object_ctx)
macros_object.appendChild(shard_object)
with open(self.xml_path, 'w') as f:
f.write(doc.toprettyxml(indent='\t', encoding='utf-8'))
f.close()
if __name__ == "__main__":
read = XmlMaker("host", "zk_host", "metrika.xml")
read.read_host()
read.read_zk_host()
read.make_xml()
print(read.host_path)
print(read.zk_host_path)
因为方便测试Replicated表,搭建2shard2replica的ClickHouse集群也顺带启动一个3节点zk集群,使用到zk_auto.sh,详见:zk集群搭建
#!/bin/bash
./zk_auto.sh
docker stop ch1 && docker rm ch1
docker stop ch2 && docker rm ch2
docker stop ch3 && docker rm ch3
docker stop ch4 && docker rm ch4
docker run -dit --name ch1 --hostname ch1 xiedeyantu/clickhouse:1.0.0
docker run -dit --name ch2 --hostname ch2 xiedeyantu/clickhouse:1.0.0
docker run -dit --name ch3 --hostname ch3 xiedeyantu/clickhouse:1.0.0
docker run -dit --name ch4 --hostname ch4 xiedeyantu/clickhouse:1.0.0
docker inspect -f='{{.NetworkSettings.IPAddress}} {{.Config.Hostname}}' $(sudo docker ps -a -q)|grep ch > host
docker inspect -f='{{.NetworkSettings.IPAddress}} {{.Config.Hostname}}' $(sudo docker ps -a -q)|grep zk > zk_host
python gen_config.py
cp metrika_temp.xml metrika1.xml
cp metrika_temp.xml metrika2.xml
cp metrika_temp.xml metrika3.xml
cp metrika_temp.xml metrika4.xml
i=0
num=(0 1 1 2 2)
for hostname in $(cat host|awk '{print $1}')
do
((i++))
sed -i "s/@SHARD@/${num[$i]}/g" metrika$i.xml
sed -i "s/@REPLICA@/$hostname/g" metrika$i.xml
done
docker cp metrika1.xml ch1:/etc/metrika.xml
docker cp metrika2.xml ch2:/etc/metrika.xml
docker cp metrika3.xml ch3:/etc/metrika.xml
docker cp metrika4.xml ch4:/etc/metrika.xml
docker restart ch1 && docker restart ch2 && docker restart ch3 && docker restart ch4