关于Databend部署-集群版

一、准备工作

  • 下载databend【根据各自的系统,下载对应的版本: uname -a】
    databend下载地址
  • 解压对应的压缩包
  • 查看当前系统是否支持avx/sse/avx2等指令集
# macos查看cpu支持的指令集
sysctl -a | grep machdep.cpu.features

# macos 查看cpu是否支持SSE 4.2
sysctl -a | grep machdep.cpu.features | grep SSE

# 查看支持的所有的指令集
sysctl machdep.cpu
  • 集群规划:3个meta实例+3个query实例
    注意不同实例的端口配置,确保集群唯一,不冲突

接下来就是开始配置:【注:当前采用的是单机多实例的方式部署,故数据存储采用的是fs】

二、配置

关于databend-meta配置,主要配置的内容:【 注意端口和raft_id的唯一性

# 指定日志的目录
log_dir            = "~/tests/databend/logs/log1"
# meta的管理地址
admin_api_address  = "0.0.0.0:28101"
# 读写meta的rpc地址
grpc_api_address   = "0.0.0.0:28102"

#### 关于meta集群化的raft配置:注意不同的实例id是不同的
[raft_config]
id            = 1
raft_dir      = "~/tests/databend/metas/meta1"
raft_api_port = 28103

# Assign raft_{listen|advertise}_host in test config.
# This allows you to catch a bug in unit tests when something goes wrong in raft meta nodes communication. 
raft_listen_host = "0.0.0.0"
raft_advertise_host = "localhost"

# 集群模式配置一
# 首先第一个启动的meta节点采用single方式
# Start up mode: single node cluster
single        = true

# 集群模式配置一
# 当新增新的meta节点时配置
join                = ["0.0.0.0:28103"]
# Start up mode: single node cluster
single        = false
  • 样例配置:meta集群第一个node
log_dir            = "~/tests/databend/logs/log1"
admin_api_address  = "0.0.0.0:28101"
grpc_api_address   = "0.0.0.0:28102"

[raft_config]
id            = 1
raft_dir      = "~/tests/databend/metas/meta1"
raft_api_port = 28103

# Assign raft_{listen|advertise}_host in test config.
raft_listen_host = "0.0.0.0"
raft_advertise_host = "localhost"

# Start up mode: single node cluster
single        = true
  • 新增新节点时的配置,以此类推调整对应的配置文件join和single的配置
log_dir            = "~/tests/databend/logs/log2"
admin_api_address  = "0.0.0.0:28201"
grpc_api_address   = "0.0.0.0:28202"

[raft_config]
id            = 2
raft_dir      = "~/tests/databend/metas/meta2"
raft_api_port = 28203

# Assign raft_{listen|advertise}_host in test config.
# This allows you to catch a bug in unit tests when something goes wrong in raft meta nodes communication. 
raft_listen_host = "0.0.0.0"
raft_advertise_host = "localhost"

# 代表当前节点要加入已存在的meta集群,可以指定多个已存在的meta raft地址
join                = ["0.0.0.0:28103"]
# 此时对应的single需设置为false
# Start up mode: single node cluster
single        = false

接下来就是关于databend-query配置
主要的配置项如下:

[query]
max_active_sessions = 256
wait_timeout_mills = 5000

# For flight rpc.
# 主要是进行shuffle时使用地址
flight_api_address = "0.0.0.0:19091"

# query节点的http地址
# Databend Query http address.
# For admin RESET API.
admin_api_address = "0.0.0.0:18081"

# metrics的地址
# Databend Query metrics RESET API.
metric_api_address = "0.0.0.0:17071"

# mysql client的http访问地址
# Databend Query MySQL Handler.
mysql_handler_host = "0.0.0.0"
mysql_handler_port = 3307

# clickhouse client访问的http地址
# Databend Query ClickHouse Handler.
clickhouse_http_handler_host = "0.0.0.0"
clickhouse_http_handler_port = 18125

# http handler地址(采用curl之类的方式)
# Databend Query HTTP Handler.
http_handler_host = "0.0.0.0"
http_handler_port = 18001

# 用来标识租户id和集群id
tenant_id = "test_tenant"
cluster_id = "test_cluster"

table_engine_memory_enabled = true
database_engine_github_enabled = true

table_cache_enabled = true
table_memory_cache_mb_size = 1024
table_disk_cache_root = "_cache"
table_disk_cache_mb_size = 10240

# 日志相关配置
[log]
level = "ERROR"
dir = "~/tests/databend/logs/log1"
query_enabled = true

# meta server相关配置:主要是endpoints
[meta]
endpoints = ["0.0.0.0:28102","0.0.0.0:28202","0.0.0.0:28203"]
embedded_dir = "~/tests/databend/metas/meta_embedded_1"
# address = "0.0.0.0:9191"
username = "root"
password = "root"
client_timeout_in_second = 60
auto_sync_interval = 60

# 采用的存储类型
# Storage config.
[storage]
# fs | s3 | azblob
type = "fs"

# 当使用fs时需要指定一个本地路径
# Set a local folder to store your data.
# Comment out this block if you're NOT using local file system as storage.
[storage.fs]
data_path = "~/tests/databend/datas/stateless_test_data"

配置时需要注意meta部分和storage部分
meta因为是cluster模式,需要指定endpoints; storage因为是使用的fs,需要确保最终的数据存放到一个共享目录或共享存储还可以使用分布式文件存储系统,确保数据能够被所有的databend-query节点共享。

  • 样例配置如下: databend-query配置基本无差别( 注意端口的唯一性 )
[query]
max_active_sessions = 256
wait_timeout_mills = 5000

# For flight rpc.
flight_api_address = "0.0.0.0:19091"

# Databend Query http address.
# For admin RESET API.
admin_api_address = "0.0.0.0:18081"

# Databend Query metrics RESET API.
metric_api_address = "0.0.0.0:17071"

# Databend Query MySQL Handler.
mysql_handler_host = "0.0.0.0"
mysql_handler_port = 3307

# Databend Query ClickHouse Handler.
clickhouse_http_handler_host = "0.0.0.0"
clickhouse_http_handler_port = 18125

# Databend Query HTTP Handler.
http_handler_host = "0.0.0.0"
http_handler_port = 18001

tenant_id = "test_tenant"
cluster_id = "test_cluster"

table_engine_memory_enabled = true
database_engine_github_enabled = true

table_cache_enabled = true
table_memory_cache_mb_size = 1024
table_disk_cache_root = "_cache"
table_disk_cache_mb_size = 10240

[log]
level = "ERROR"
dir = "~/tests/databend/logs/log1"
query_enabled = true

[meta]
endpoints = ["0.0.0.0:28102","0.0.0.0:28202","0.0.0.0:28203"]
# To enable embedded meta-store, set address to "".
embedded_dir = "~/tests/databend/metas/meta_embedded_1"
# address = "0.0.0.0:9191"
username = "root"
password = "root"
client_timeout_in_second = 60
auto_sync_interval = 60

# Storage config.
[storage]
# fs | s3 | azblob
type = "fs"

# Set a local folder to store your data.
# Comment out this block if you're NOT using local file system as storage.
[storage.fs]
data_path = "~/tests/databend/datas/stateless_test_data"

三、启动集群

  • 首先要启动meta
# 以single方式启动第一个meta节点
./bin/databend-meta -c  configs/databend-meta-1.toml > logs/meta_1.log 2>&1 &
# 新增其他节点到已存在的集群
./bin/databend-meta -c  configs/databend-meta-2.toml > logs/meta_2.log 2>&1 &
./bin/databend-meta -c  configs/databend-meta-3.toml > logs/meta_3.log 2>&1 &

验证meta集群

curl -s http://127.0.0.1:28101/v1/cluster/nodes
或者
# 确认单个节点是否健康
curl -I http://127.0.0.1:28101/v1/health 
  • 启动databendq-query节点
./bin/databend-meta -c  configs/databend-query-1.toml > logs/query_1.log 2>&1 &

./bin/databend-meta -c  configs/databend-query-2.toml > logs/query_2.log 2>&1 &

./bin/databend-meta -c  configs/databend-query-3.toml > logs/query_3.log 2>&1 &

验证是否正常

 curl -I http://127.0.0.1:18081/v1/health

四、测试

1、官方性能测试用例:databend-performance
2、sql测试: databend-sql
3、msyql client测试用例

注意3个databend-query的端口不同

mysql -h 127.0.0.1 -P 3307 -u root

create databease if not exists test_db;
use test_db;
create table if not exists tb_a(a int);
show create table tb_a;
insert into tb_a values(1), (2),(3),(4);
select * from tb_a;

五、部署过程中出现的问题

1、ERROR 1105 (HY000): Code: 3001, displayText = entity not found (object error:

* 大概率是databend-query数据存储路径不共享,并不是所有的实例都是可用的 *

2、新增meta实例后,启动没有成功,

一般是join部分配置存在问题, 可以将当前集群已存在的节点都添加到待添加的新meta节点的配置文件join项中;
或者时进行单机多实例时端口冲突;
或者当前meta节点raft_id是否全局唯一;

3、启动query节点失败

注意meta配置的endpoints项是否正确;见上面的databend-query.toml模版

4、部署时多参考官方文档
databend-standalone部署
databend-manage
关于Databend部署-单机版(测试)

你可能感兴趣的:(关于Databend部署-集群版)