系统:centos 7.1及以上;Ubuntu 16.04及以上
软件:Java 1.8及以上;GCC 4.8.2及以上;
ansible cluster -m shell -a "yum -y install gcc gcc-c++ autoconf pcre pcre-devel make automake"
ansible cluster -m shell -a "yum -y install wget httpd-tools vim"
ansible cluster -m shell -a "gcc --version"
sudo apt install -y gcc
echo "0" > /proc/sys/vm/swappiness
echo "0" > /proc/sys/vm/overcommit_memory
sysctl -p
cat << EOF >> /etc/security/limits.conf
* soft noproc 65535
* hard noproc 65535
* soft nofile 65535
* hard nofile 65535
* hard memlock unlimited
* soft memlock unlimited
EOF
# jdk
export JAVA_HOME=/usr/java/jdk1.8.0_201-amd64
export CLASSPATH=.:$JAVA_HOME/jre/lib/rt.jar:$JAVA_HOME/lib/dt.jar:$JAVA_HOME/lib/tools.jar
export PATH=$JAVA_HOME/bin:$PATH
cat > /etc/sysctl.conf << EOF
vm.max_map_count = 262144
net.ipv4.tcp_retries2 = 5
fs.file-max = 6553560
net.ipv4.tcp_max_tw_buckets = 6000
net.ipv4.tcp_sack = 1
net.ipv4.tcp_window_scaling = 1
net.ipv4.tcp_rmem = 4096 87380 4194304
net.ipv4.tcp_wmem = 4096 16384 4194304
net.ipv4.tcp_max_syn_backlog = 16384
net.core.netdev_max_backlog = 32768
net.core.somaxconn = 32768
net.core.wmem_default = 8388608
net.core.rmem_default = 8388608
net.core.rmem_max = 16777216
net.core.wmem_max = 16777216
net.ipv4.tcp_timestamps = 1
net.ipv4.tcp_fin_timeout = 20
net.ipv4.tcp_synack_retries = 2
net.ipv4.tcp_syn_retries = 2
net.ipv4.tcp_syncookies = 1
net.ipv4.tcp_tw_reuse = 1
net.ipv4.tcp_mem = 94500000 915000000 927000000
net.ipv4.tcp_max_orphans = 3276800
net.ipv4.ip_local_port_range = 1024 65000
net.ipv6.conf.all.disable_ipv6 = 1
net.ipv6.conf.default.disable_ipv6 = 1
kernel.numa_balancing = 0
kernel.shmmax = 68719476736
kernel.printk = 5
kernel.sysrq = 1
vm.overcommit_memory = 0
vm.swappiness = 1
vm.dirty_background_ratio = 5
vm.dirty_ratio = 80
EOF
sysctl -p
查看是否支持avx2指令集,返回0,说明不支持
cat /proc/cpuinfo | grep avx2 | wc -l
根据JDK版本,CPU架构,是否支持avx2,下载对应的Doris安装包
下载地址:https://doris.apache.org/zh-CN/download
部署目录: /opt/module/doris/fe
日志目录: /data/doris/log/fe
元数据目录: /data/doris/data/meta
部署目录: /opt/module/doris/be
日志目录: /data/doris/log/be
数据存储目录: /data/doris/data/storage
部署目录: /opt/module/doris/hdfs_broker
ansible cluster -m shell -a "mkdir -p /opt/module/doris/"
ansible cluster -m shell -a "mkdir -p /data/doris/log/{fe,be}"
ansible cluster -m shell -a "mkdir -p /data/doris/data/{meta,storage.SSD}"
tar -xvJf apache-doris-1.2.6-bin-x64.tar.xz
chown -R root:root apache-doris-1.2.6-bin-x64
AVA_HOME=/usr/java/jdk1.8.0_201-amd64
# the output dir of stderr and stdout
LOG_DIR = /data/doris/log/fe
DATE = `date +%Y%m%d-%H%M%S`
JAVA_OPTS="-Xmx8192m -XX:+UseMembar -XX:SurvivorRatio=8 -XX:MaxTenuringThreshold=7 -XX:+PrintGCDateStamps -XX:+PrintGCDetails -XX:+UseConcMarkSweepGC -XX:+UseParNewGC -XX:+CMSClassUnloadingEnabled -XX:-CMSParallelRemarkEnabled -XX:CMSInitiatingOccupancyFraction=80 -XX:SoftRefLRUPolicyMSPerMB=0 -Xloggc:/data/doris/log/fe/fe.gc.log.$DATE"
# For jdk 9+, this JAVA_OPTS will be used as default JVM options
JAVA_OPTS_FOR_JDK_9="-Xmx8192m -XX:SurvivorRatio=8 -XX:MaxTenuringThreshold=7 -XX:+CMSClassUnloadingEnabled -XX:-CMSParallelRemarkEnabled -XX:CMSInitiatingOccupancyFraction=80 -XX:SoftRefLRUPolicyMSPerMB=0 -Xlog:gc*:/data/doris/log/fe/fe.gc.log.$DATE:time"
##
## the lowercase properties are read by main program.
##
# INFO, WARN, ERROR, FATAL
sys_log_level = INFO
# store metadata, must be created before start FE.
# Default value is ${DORIS_HOME}/doris-meta
# meta_dir = ${DORIS_HOME}/doris-meta
meta_dir = /data/doris/data/meta
# Default dirs to put jdbc drivers,default value is ${DORIS_HOME}/jdbc_drivers
# jdbc_drivers_dir = ${DORIS_HOME}/jdbc_drivers
http_port = 8030
rpc_port = 9020
query_port = 9030
edit_log_port = 9010
mysql_service_nio_enabled = true
# Choose one if there are more than one ip except loopback address.
# Note that there should at most one ip match this list.
# If no ip match this rule, will choose one randomly.
# use CIDR format, e.g. 10.10.10.0/24
# Default value is empty.
# priority_networks = 10.10.10.0/24;192.168.0.0/16
priority_networks = 192.168.0.122/24
# Advanced configurations
# log_roll_size_mb = 1024
sys_log_dir = /data/doris/log/fe
# sys_log_roll_num = 10
# sys_log_verbose_modules = org.apache.doris
audit_log_dir = /data/doris/log/fe
# audit_log_modules = slow_query, query
# audit_log_roll_num = 10
# meta_delay_toleration_second = 10
# qe_max_connection = 1024
# max_conn_per_user = 100
# qe_query_timeout_second = 300
# qe_slow_log_ms = 5000
default_storage_medium = SSD
dynamic_partition_enable = true
JAVA_HOME=/usr/java/jdk1.8.0_201-amd64
PPROF_TMPDIR="/data/doris/log/be"
CUR_DATE=`date +%Y%m%d-%H%M%S`
JAVA_OPTS="-Xmx1024m -DlogPath=/data/doris/log/be/jni.log -Xloggc:/data/doris/log/be/be.gc.log.$CUR_DATE -Dsun.java.command=DorisBE -XX:-CriticalJNINatives -DJDBC_MIN_POOL=1 -DJDBC_MAX_POOL=100 -DJDBC_MAX_IDEL_TIME=300000 -DJDBC_MAX_WAIT_TIME=5000"
# For jdk 9+, this JAVA_OPTS will be used as default JVM options
JAVA_OPTS_FOR_JDK_9="-Xmx1024m -DlogPath=/data/doris/log/be/jni.log -Xlog:gc:/data/doris/log/be/be.gc.log.$CUR_DATE -Dsun.java.command=DorisBE -XX:-CriticalJNINatives -DJDBC_MIN_POOL=1 -DJDBC_MAX_POOL=100 -DJDBC_MAX_IDEL_TIME=300000 -DJDBC_MAX_WAIT_TIME=5000"
# since 1.2, the JAVA_HOME need to be set to run BE process.
# JAVA_HOME=/path/to/jdk/
# INFO, WARNING, ERROR, FATAL
sys_log_level = INFO
# ports for admin, web, heartbeat service
be_port = 9060
webserver_port = 18040
heartbeat_service_port = 9050
brpc_port = 8060
# Choose one if there are more than one ip except loopback address.
# Note that there should at most one ip match this list.
# If no ip match this rule, will choose one randomly.
# use CIDR format, e.g. 10.10.10.0/24
# Default value is empty.
# priority_networks = 10.10.10.0/24;192.168.0.0/16
priority_networks = 192.168.0.129/24
# data root path, separate by ';'
# you can specify the storage medium of each root path, HDD or SSD
# you can add capacity limit at the end of each root path, separate by ','
# eg:
# storage_root_path = /home/disk1/doris.HDD,50;/home/disk2/doris.SSD,1;/home/disk2/doris
# /home/disk1/doris.HDD, capacity limit is 50GB, HDD;
# /home/disk2/doris.SSD, capacity limit is 1GB, SSD;
# /home/disk2/doris, capacity limit is disk capacity, HDD(default)
#
# you also can specify the properties by setting ':', separate by ','
# property 'medium' has a higher priority than the extension of path
#
# Default value is ${DORIS_HOME}/storage, you should create it by hand.
# storage_root_path = ${DORIS_HOME}/storage
storage_root_path = /data/doris/data/storage.SSD
# Default dirs to put jdbc drivers,default value is ${DORIS_HOME}/jdbc_drivers
# jdbc_drivers_dir = ${DORIS_HOME}/jdbc_drivers
# Advanced configurations
sys_log_dir = /data/doris/log/be
# sys_log_roll_mode = SIZE-MB-1024
# sys_log_roll_num = 10
# sys_log_verbose_modules = *
# log_buffer_level = -1
# palo_cgroups
# 更快的导入速度,默认为2
flush_thread_num_per_store = 5
# 获得更好的查询性能
enable_storage_vectorization = true
enable_low_cardinality_optimize = true
enable_segcompaction = true
# 使用新的compaction算法
enable_vertical_compaction = true
ansible cluster -m copy -a 'src=/opt/module/apache-doris-1.2.6-bin-x64/fe dest=/opt/module/doris/'
ansible cluster -m copy -a 'src=/opt/module/apache-doris-1.2.6-bin-x64/be dest=/opt/module/doris/'
ansible cluster -m copy -a 'src=/opt/module/apache-doris-1.2.6-bin-x64/extensions/apache_hdfs_broker dest=/opt/module/doris/'
ansible cluster -m copy -a 'src=/opt/module/apache-doris-1.2.6-bin-x64/extensions/audit_loader dest=/opt/module/doris/'
priority_networks = 192.168.0.129/24
priority_networks = 192.168.0.137/24
wget https://cdn.mysql.com/archives/mysql-8.0/mysql-community-common-8.0.25-1.el7.x86_64.rpm
wget https://cdn.mysql.com/archives/mysql-8.0/mysql-community-client-plugins-8.0.25-1.el7.x86_64.rpm
wget https://cdn.mysql.com/archives/mysql-8.0/mysql-community-libs-8.0.25-1.el7.x86_64.rpm
wget https://cdn.mysql.com/archives/mysql-8.0/mysql-community-client-8.0.25-1.el7.x86_64.rpm
rpm -ivh mysql-community-common-8.0.25-1.el7.x86_64.rpm
rpm -ivh mysql-community-client-plugins-8.0.25-1.el7.x86_64.rpm
rpm -ivh mysql-community-libs-8.0.25-1.el7.x86_64.rpm
rpm -ivh mysql-community-client-8.0.25-1.el7.x86_64.rpm
或
rpm -Uvh *.rpm --nodeps --force
bin/start_fe.sh --daemon
注:8030是默认端口
http://192.168.0.122:8030
# 无密码登录
mysql -h127.0.0.1 -P9030 -uroot
SET PASSWORD FOR 'root' = PASSWORD('Boshi*2023');
#退出再次登录即可
mysql -h192.168.0.122 -P9030 -uroot -pBoshi*2023
mysql-client 连接到 FE
# IsMaster、Join 和 Alive 三列均为true,则表示节点正常
SHOW PROC '/frontends';
通过URL来访问
http://fe_host:fe_http_port/api/bootstrap
fe_host FE节点ip
fe_http_port FE安装节点conf/fe.conf中配置的端口
http://192.168.0.122:8030/api/bootstrap
# 首先第一次启动时,需执行以下命令:
fe/bin/start_fe.sh --helper leader_fe_host:edit_log_port --daemon
fe/bin/start_fe.sh --helper xx.xx.xx.xx:9010 --daemon
# 使用 mysql-client 连接到已启动的 FE,并执行:
ALTER SYSTEM ADD FOLLOWER "follower_host:edit_log_port";
ALTER SYSTEM ADD FOLLOWER "xx.xx.xx.xx:9010";
# 再次加入fe2, fe3:
fe/bin/start_fe.sh --helper xx.xx.xx.xx:9010 --daemon
ALTER SYSTEM ADD FOLLOWER "xx.xx.xx.xx:9010";
ALTER SYSTEM ADD FOLLOWER "xx.xx.xx.xx:9010";
SHOW PROC '/frontends';
# 首先第一次启动时,需执行以下命令:
fe/bin/start_fe.sh --helper leader_fe_host:edit_log_port --daemon
fe/bin/start_fe.sh --helper xx.xx.xx.xx:9010 --daemon
# 使用 mysql-client 连接到已启动的 FE,并执行:
ALTER SYSTEM ADD OBSERVER "observer_host:edit_log_port";
ALTER SYSTEM ADD FOLLOWER "xx.xx.xx.xx:9010";
sysctl -w vm.max_map_count=2000000
ulimit -n 65536
bin/start_be.sh --daemon
#添加 BE
ALTER SYSTEM ADD BACKEND "192.168.0.129:9050";
ALTER SYSTEM ADD BACKEND "192.168.0.137:9050";
# 删除BE
ALTER SYSTEM DROPP BACKEND "hostname:9050";
ALTER SYSTEM DROPP BACKEND "192.168.0.129:9050";
ALTER SYSTEM DROPP BACKEND "192.168.0.137:9050";
mysql-client 连接到 FE
# isAlive 列应为 true
SHOW PROC '/backends' \G;
通过URL来访问
http://be_host:webserver_port/api/health
webserver_port BE安装节点conf/be.conf中配置的端口
http://192.168.0.129:18040/api/health
http://fe_host:fe_http_port
http://192.168.0.122:8030
CREATE USER 'doris'@'%' IDENTIFIED BY 'Doris*2023' DEFAULT ROLE 'admin';
SHOW GRANTS FOR doris@'%';
# 登录
mysql -h192.168.0.122 -P9030 -udoris -pDoris*2023
create database tmp;
create database ods;
create database dwd;
create database dws;
create database ads;
CREATE TABLE tmp.dept
(
`deptno` int,
`dname` varchar(14),
`loc` varchar(13)
)
UNIQUE KEY(deptno)
COMMENT "部门表"
DISTRIBUTED BY HASH (deptno) BUCKETS 8
PROPERTIES(
"replication_num" = "1"
);
insert into tmp.dept values (10,'accounting','new york'),(20,'research','dallas'),(30,'sales','chicago'),(40,'operations','boston');
CREATE TABLE tmp.emp
(
`empno` int,
`ename` varchar(10),
`job` varchar(9),
`mgr` int,
`hiredate` date,
`sal` float,
`comm` float,
`deptno` int
)
UNIQUE KEY(empno)
COMMENT "员工表"
DISTRIBUTED BY HASH (empno) BUCKETS 8
PROPERTIES(
"replication_num" = "1"
);
insert into tmp.emp values (7369,'smith','clerk',7902,'1980-12-17',800,null,20),
(7499,'allen','salesman',7698,'1981-02-20',1600,300,30),
(7521,'ward','salesman',7698,'1981-02-22',1250,500,30),
(7566,'jones','manager',7839,'1981-04-02',2975,null,20),
(7654,'martin','salesman',7698,'1981-09-28',1250,1400,30),
(7698,'blake','manager',7839,'1981-05-01',2850,null,30),
(7782,'clark','manager',7839,'1981-06-09',2450,null,10),
(7788,'scott','analyst',7566,'1987-07-13',3000,null,20),
(7839,'king','president',null,'1981-11-07',5000,null,10),
(7844,'turner','salesman',7698,'1981-09-08',1500,0,30),
(7876,'adams','clerk',7788,'1987-07-13',1100,null,20),
(7900,'james','clerk',7698,'1981-12-03',950,null,30),
(7902,'ford','analyst',7566,'1981-12-03',3000,null,20),
(7934,'miller','clerk',7782,'1982-01-23',1300,null,10);
CREATE TABLE tmp.salgrade
(
`grade` int,
`losal` int,
`hisal` int
)
DUPLICATE KEY(grade,losal,hisal)
COMMENT "工资水平"
DISTRIBUTED BY HASH(losal,hisal) BUCKETS 8
PROPERTIES (
"replication_num" = "1"
);
insert into tmp.salgrade values (1,700,1200),(2,1201,1400),(3,1401,2000),(4,2001,3000),(5,3001,9999);
-- 这是一个用户消费和行为记录的数据表
CREATE TABLE IF NOT EXISTS tmp.user_info
(
`user_id` LARGEINT NOT NULL COMMENT "用户 id",
`date` DATE NOT NULL COMMENT "数据灌入日期时间",
`city` VARCHAR(20) COMMENT "用户所在城市",
`age` SMALLINT COMMENT "用户年龄",
`sex` TINYINT COMMENT "用户性别",
`last_visit_date` DATETIME REPLACE DEFAULT "1970-01-01 00:00:00" COMMENT "用户最后一次访问时间",
`cost` BIGINT SUM DEFAULT "0" COMMENT "用户总消费",
`max_dwell_time` INT MAX DEFAULT "0" COMMENT "用户最大停留时间",
`min_dwell_time` INT MIN DEFAULT "99999" COMMENT "用户最小停留时间"
)
ENGINE=olap
AGGREGATE KEY(`user_id`, `date`, `city`, `age`, `sex`)
COMMENT "用户消费和行为记录表"
-- 分区
-- 分桶
DISTRIBUTED BY HASH(`user_id`) BUCKETS 8
PROPERTIES (
"replication_num" = "1"
);
insert into tmp.user_info values
(10000,'2017-10-01','北京',20,0,'2017-10-01 06:00:00',20,10,10),
(10000,'2017-10-01','北京',20,0,'2017-10-01 07:00:00',15,2,2),
(10001,'2017-10-01','北京',30,1,'2017-10-01 17:05:45',2,22,22),
(10002,'2017-10-02','上海',20,1,'2017-10-02 12:59:12',200,5,5),
(10003,'2017-10-02','广州',32,0,'2017-10-02 11:20:00',30,11,11),
(10004,'2017-10-01','深圳',35,0,'2017-10-01 10:00:15',100,3,3),
(10004,'2017-10-03','深圳',35,0,'2017-10-03 10:20:22',11,6,6);
# 自动聚合出如下结果:
# 订单日期,订单id,userId,商品id,购买得总件数,支付总额
create table tmp.order_info
(
`date` date COMMENT "日期时间",
`oid` bigint COMMENT "订单id",
`userid` varchar(255) COMMENT "用户id",
`spid` varchar(255) COMMENT "商品id",
`total` int sum COMMENT "商品总数",
`pay` int sum COMMENT "支付总金额"
)
ENGINE=olap
AGGREGATE KEY(`date`,`oid`,`userid`,`spid`)
COMMENT "订单聚合表"
partition by range(`date`)
(
partition `p20221201` values less than ("2022-12-02"),
partition `p20221202` values less than ("2022-12-03"),
partition `p20221203` values less than ("2022-12-04")
)
DISTRIBUTED BY HASH(`userid`) BUCKETS 2
PROPERTIES (
"replication_num" = "1"
);
-- 插入数据
-- 订单日期,订单id,userId,商品id,购买件数,支付的金额
insert into tmp.order_info values
('2022-12-01',1,'u01','p01',2,20),
('2022-12-01',1,'u01','p02',1,10),
('2022-12-01',1,'u01','p01',1,10),
('2022-12-01',2,'u02','p03',2,40);