Centos7搭建SkyWalking监控单机及集群服务器

Skywalking最新发布版5.0.0 beta2的单机及集群安装说明

1. 单机部署

  1. 单机部署组件需求:
    1. H2或ElasticSearch单机,以下仅示例ElasticSearch安装及配置,便于拓展集群部署
  2. 官方说明:
    1. 单机模式默认使用本地H2数据库,不支持集群部署。主要用于:预览、功能测试、演示和低压力系统。
    2. 如果使用单机collector用于非演示环境,你可选择使用Elasticsearch作为存储实现。
  3. 版本需求:
    ElasticSearch: 5.x(建议使用5.x最高版本5.6.10)
  4. 第三方软件需求:
    JDK8+
  5. 被监控软件需求:
    JKD6+

1.1 ElasticSearch安装说明

#安装ElasticSearch索引数据库,提供数据存储及快速查询
wget https://artifacts.elastic.co/downloads/elasticsearch/elasticsearch-5.6.10.tar.gz

tar -xf elasticsearch-5.6.10.tar.gz
mv elasticsearch-5.6.10 elasticsearch

#创建Skywalking数据及日志存储目录
cd elasticsearch/
mkdir data
mkdir log

cd config/

#配置ElasticSearch单机节点信息,date、logs目录使用绝对路径
cat >> elasticsearch.yml << EOF
cluster.name: CollectorDBCluster
node.name: node-1
path.data: /home/mon/apps/elasticsearch/data
path.logs: /home/mon/apps/elasticsearch/logs
network.host: 0.0.0.0
thread_pool.bulk.queue_size: 1000
http.port: 9200 
EOF

#查看日志
tail -100f /home/mon/apps/elasticsearch/logs/CollectorDBCluster.log

#创建启停脚本,添加到系统服务
cd bin/
cat >> elasticsearch.sh << EOF
cd "$(dirname "$0")" || exit 1

ES_HOME="$(pwd)"
EXEC_PATH=$ES_HOME
EXEC=elasticsearch
DAEMON=$EXEC_PATH/$EXEC
PID_FILE=$ES_HOME/elasticsearch.pid
ServiceName='Elasticsearch'

#操作
ACTION=$1

echoRed() { echo $'\e[0;31m'"$1"$'\e[0m'; }
echoGreen() { echo $'\e[0;32m'"$1"$'\e[0m'; }
echoYellow() { echo $'\e[0;33m'"$1"$'\e[0m'; }

usage() {
    echo $'\n\n\n'
    echoRed "Usage: ${0} support command {start|stop|restart|status}"
    echo $'\n\n\n'
    exit 1
}

psCheck() {
    echo "-----------------All instances in this machine--------------------"
    echo "$(ps -ef | grep ${ServiceName} | grep -E -v "grep|start|stop|status|restart")"
}

await() {
    end=$(date +%s)
    let "end+=10"
    while
        [[ $now -le $end ]]
    do
        now=$(date +%s)
        sleep 1
    done
}

#1.检查操作参数
[ $# -gt 0 ] || usage

if [ ! -x $DAEMON ] ; then
       echo "ERROR: $DAEMON not found"
       exit 1
fi


#根据PID检查是否在运行
isRunningPid() {
    ps -p "$1" &>/dev/null
}

#根据PID_FILE检查是否在运行
isRunning() {
    [[ -f "$PID_FILE" ]] || return 1
    local pid=$(cat "$PID_FILE")
    ps -p "$pid" &>/dev/null
    return
}

start() {
    echo "--------------Starting $ServiceName:"
    echo $'\n\n\n'

    #检查已经运行
    if isRunning "$PID_FILE"; then
        echoYellow "Result: Running, no need to start"
        echo $'\n\n\n'
        exit 0
    fi

    echo "Boot Command: nohup $DAEMON"
    echo $'\n\n\n'

    nohup $DAEMON >/dev/null 2>&1 &
    #$DAEMON -d

    disown $!
    echo $! >"$PID_FILE"

    #等5秒
    await

    TIMEOUT=100
    while (! isRunning "$APP_PID"); do
        if ((TIMEOUT-- == 0)); then
            echoRed "Result: Start timeout"
            echo $'\n\n\n'
            exit 1
        fi
        sleep 1
    done

    echoGreen "Result: Start success,Running (PID: $(<$PID_FILE))"
    echo $'\n\n\n'

    psCheck
}

stop() {
    echo "--------------Stopping $ServiceName:"
    echo $'\n\n\n'
    if [ ! -f "$PID_FILE" ]; then
        echoYellow "Result: Not running"
        echo $'\n\n\n'
        psCheck
        return 0
    fi

    local pid=$(<${PID_FILE})

    if [ -z $pid ]; then
        #pid文件存在,但进程却不存在
        echoRed "Result: Not running (PID: $pid not found)"
        echo $'\n\n\n'
        psCheck
        rm -f "$PID_FILE"
        return 0
    fi

    kill "$pid" 2>/dev/null

    TIMEOUT=30
    while isRunning $PID_FILE; do
        if ((TIMEOUT-- == 0)); then
            kill -KILL "$PID" 2>/dev/null
        fi
        sleep 1
    done
    rm -f "$PID_FILE"
    echoGreen "Result: Stop success"
    echo $'\n\n\n'
}

status() {
    echo "--------------Status $ServiceName:"
    echo $'\n\n\n'
    [[ -f "$PID_FILE" ]] || {
        echoYellow "Result: Not running"
        echo $'\n\n\n'
        psCheck
        return 1
    }
    local pid=$(<$PID_FILE)

    if isRunningPid $pid; then
        echoGreen "Result: Running (PID: $pid )"
        echo $'\n\n\n'
        psCheck
        return 0
    else
        echoRed "Result: Not running (PID: $pid not found)"
        echo $'\n\n\n'
        psCheck
        return 1
    fi
}

case "$ACTION" in
start)
    start
    ;;

stop)
    stop
    ;;

restart)
    stop
    start
    ;;

status)
    status
    ;;
*)
    usage
    ;;
esac

#成功退出
exit 0
EOF

ln -s 'readlink -f /home/mon/apps/elasticsearch/bin/elasticsearch.sh' /usr/local/bin/elasticsearch

#直接后台启动
./elasticsearch -d

#启动
service elasticsearch start && service elasticsearch status

#添加防火墙
firewall-cmd --add-port=9200/tcp --permanent ;\ 
firewall-cmd --add-port=9300/tcp --permanent ;\ 
firewall-cmd --reload

1.1.1 URL查询

  • 数据索引查询: http://10.20.30.123:9200/_cat/indices
  • 集群节点信息: http://10.20.30.123:9200/_cat/health?v

1.2 安装Skywalking单机模式

#包括Collector收集器,Agent探针,WebUI响应界面
wget http://www-eu.apache.org/dist/incubator/skywalking/5.0.0-beta2/apache-skywalking-apm-incubating-5.0.0-beta2.tar.gz

tar -xf apache-skywalking-apm-incubating-5.0.0-beta2.tar.gz

mv apache-skywalking-apm-incubating skywalking
cd skywalking/bin

#启动Collector、WebUI
./startup.sh

#在目标应用上启用Agent探针
java -javaagent=/home/mon/apps/skywalking/agent/skywalking-agent.jar -jar order.jar
#添加启动参数,定义命名空间和项目编码
java -javaagent=/home/mon/apps/skywalking/agent/skywalking-agent.jar -jar -Dskywalking.agent.namespace=order -Dskywalking.agent.application_code=order order.jar

1.2.1 SkyWalking监控界面URL: http://10.20.30.123:8080

1.3 部署探针

  1. 拷贝skywalking-agent目录到所需位置,探针包含整个目录,请不要改变目录结构
  2. 增加JVM启动参数,-javaagent:/path/to/skywalking-agent/skywalking-agent.jar。参数值为skywalking-agent.jar的绝对路径。

新目录结构如下:

+-- skywalking-agent
    +-- activations
         apm-toolkit-log4j-1.x-activation.jar
         apm-toolkit-log4j-2.x-activation.jar
         apm-toolkit-logback-1.x-activation.jar
         ...
    +-- config
         agent.config  
    +-- plugins
         apm-dubbo-plugin.jar
         apm-feign-default-http-9.x.jar
         apm-httpClient-4.x-plugin.jar
         .....
    skywalking-agent.jar
  • /config/agent.config包含探针所需配置,中文说明如下。
# 当前的应用编码,最终会显示在webui上。
# 建议一个应用的多个实例,使用有相同的application_code。请使用英文
agent.application_code=Your_ApplicationName

# 每三秒采样的Trace数量
# 默认为负数,代表在保证不超过内存Buffer区的前提下,采集所有的Trace
# agent.sample_n_per_3_secs=-1

# 设置需要忽略的请求地址
# 默认配置如下
# agent.ignore_suffix=.jpg,.jpeg,.js,.css,.png,.bmp,.gif,.ico,.mp3,.mp4,.html,.svg

# 探针调试开关,如果设置为true,探针会将所有操作字节码的类输出到/debugging目录下
# skywalking团队可能在调试,需要此文件
# agent.is_open_debugging_class = true

# 对应Collector的config/application.yml配置文件中 agent_server/jetty/port 配置内容
# 例如:
# 单节点配置:SERVERS="127.0.0.1:8080" 
# 集群配置:SERVERS="10.2.45.126:8080,10.2.45.127:7600" 
collector.servers=127.0.0.1:10800

# 日志文件名称前缀
logging.file_name=skywalking-agent.log

# 日志文件最大大小
# 如果超过此大小,则会生成新文件。
# 默认为300M
logging.max_file_size=314572800

# 日志级别,默认为DEBUG。
logging.level=DEBUG
  • 启动被监控应用。

1.3.1 高级特性

  • 插件会被统一放置在plugins目录中,新的插件,也只需要在启动阶段,放在目录中,就自动生效。删除则失效。
  • 配置除了通过/config/agent.config文件外,可以通过环境变量和VM参数(-D)来进行设置
    • 参数的key = skywalking. + agent.config文件中的key
    • 优先级:系统环境变量 > VM参数(-D) > /config/agent.config中的配置
  • Log默认使用文件输出,输出到/logs目录中

1.3.2 Tomcat配置探针FAQ

  • Linux Tomcat 7, Tomcat 8
    修改tomcat/bin/catalina.sh,在首行加入如下信息
CATALINA_OPTS="$CATALINA_OPTS -javaagent:/path/to/skywalking-agent/skywalking-agent.jar"; export CATALINA_OPTS
  • Windows Tomcat 7, Tomcat 8
    修改tomcat/bin/catalina.bat,在首行加入如下信息
set "CATALINA_OPTS=-javaagent:/path/to/skywalking-agent/skywalking-agent.jar"
  • JAR 部署
    修改启动命令添加启动参数-javaagent
 java -javaagent:/path/to/skywalking-agent/skywalking-agent.jar -jar yourApp.jar

2. 集群部署

  1. 组件需求:
    1. Elasticsearch 5.x
    2. Zookeeper 3.4.10
  2. 其他需求同上单机部署

2.1 Elasticsearch集群部署

​ 安装参考上面的单机部署,以下仅演示配置及集群部署

#安装ElasticSearch索引数据库,提供数据存储及快速查询
wget https://artifacts.elastic.co/downloads/elasticsearch/elasticsearch-5.6.10.tar.gz

tar -xf elasticsearch-5.6.10.tar.gz
mv elasticsearch-5.6.10 elasticsearch

#创建Skywalking数据及日志存储目录
cd elasticsearch/
mkdir data
mkdir log

cd config/

#配置ElasticSearch单机节点信息,date、logs目录使用绝对路径
cat >> elasticsearch.yml << EOF
cluster.name: CollectorDBCluster
node.name: node-1
path.data: /home/mon/apps/elasticsearch/data
path.logs: /home/mon/apps/elasticsearch/logs
network.host: 0.0.0.0
thread_pool.bulk.queue_size: 1000
http.port: 9200 
discovery.zen.ping.unicast.hosts: ["10.20.30.123:9300", "10.20.30.124:9300","10.20.30.125:9300"]
discovery.zen.minimum_master_nodes: 2
EOF

#同步ElasticSearch到同集群其他机器
scp -r ./elasticsearch [email protected]:/home/mon/apps
scp -r ./elasticsearch [email protected]:/home/mon/apps

#开启防火墙端口
firewall-cmd --zone=public --add-port=9200/tcp --permanent
firewall-cmd --zone=public --add-port=9300/tcp --permanent
firewall-cmd --reload

2.2 Zookeeper集群部署

#安装zookeeper注册发现中心
wget https://archive.apache.org/dist/zookeeper/zookeeper-3.4.10/zookeeper-3.4.10.tar.gz

tar -xf zookeeper-3.4.10.tar.gz
mv zookeeper-3.4.10 zookeeper
cd zookeeper/conf/

#写入Zookeeper集群配置文件,data目录使用绝对路径
cat > zoo.cfg < /home/mon/apps/zookeeper/tmp/myid

#分发安装包
#将配置好的zookeeper拷贝到其他节点
scp -r ./zookeeper [email protected]:/home/mon/apps
scp -r ./zookeeper [email protected]:/home/mon/apps
    #注意:修改另外两台服务器对应/home/mon/apps/zookeeper/tmp/myid内容
#server.2
echo 2 > /home/mon/apps/zookeeper/tmp/myid
#server.3
echo 2 > /home/mon/apps/zookeeper/tmp/myid

#开放防火墙端口
firewall-cmd --zone=public --add-port=2181/tcp --permanent
firewall-cmd --zone=public --add-port=2888/tcp --permanent
firewall-cmd --zone=public --add-port=3888/tcp --permanent
firewall-cmd --reload

#启动集群,按server.1、server.2、server.3的顺序分别启动Zookeeper服务
cd /home/mon/apps/zookeeper/bin
./zkServer.sh start

#停止集群,分别停止Zookeeper服务
./zkServer.sh stop

#查询Zookeeper节点状态
./zkServer.sh status

2.3 SkyWalking集群部署

#包括Collector收集器,Agent探针,WebUI响应界面
wget http://www-eu.apache.org/dist/incubator/skywalking/5.0.0-beta2/apache-skywalking-apm-incubating-5.0.0-beta2.tar.gz

tar -xf apache-skywalking-apm-incubating-5.0.0-beta2.tar.gz

mv apache-skywalking-apm-incubating skywalking

#配置Zookeeper、ElasticSearch集群
cd skywalking/config
find . -name 'application.yml' |grep admin |xargs sed -i 's/#cluster:/cluster:/g'
find . -name 'application.yml' |grep admin |xargs sed -i 's/#  zookeeper:/  zookeeper:/g'
find . -name 'application.yml' |grep admin |xargs sed -i 's/#    hostPort: localhost:2181/    hostPort: 10.20.30.123:2181,10.20.30.124:2181,10.20.30.125:2181/g'
find . -name 'application.yml' |grep admin |xargs sed -i 's/#    sessionTimeout: 100000/    sessionTimeout: 100000/g'
find . -name 'application.yml' |grep admin |xargs sed -i 's/    host: localhost/    host: 0.0.0.0/g'
find . -name 'application.yml' |grep admin |xargs sed -i 's/    clusterNodes: localhost:9300/    clusterNodes: 10.20.30.123:9300,10.20.30.124:9300,10.20.30.125:9300/g'

#配置WebUI访问Collector集群
cd ../webapp
find . -name 'webapp.yml' |grep admin |xargs sed -i 's/    listOfServers: 127.0.0.1:10800/    listOfServers: 10.20.30.123:10800,10.20.30.124:10800,10.20.30.125:10800/g'

#配置WebUI端口号,可使用默认8080,以下仅做示例,根据实际需求修改
find . -name 'application.yml' |grep admin |xargs sed -i 's/  port: 8080/  port: 8081/g'

#开放防火墙端口
#8080为WebUI组件使用的端口号,可在webapp.yml中修改
firewall-cmd --zone=public --add-port=8080/tcp --permanent
firewall-cmd --zone=public --add-port=10800/tcp --permanent
firewall-cmd --zone=public --add-port=11800/tcp --permanent
firewall-cmd --zone=public --add-port=12800/tcp --permanent
firewall-cmd --reload

#启动Collector、WebUI,在集群机器上分别启动
cd ../bin
./startup.sh

#也可分别启动Collector和WebUI
./collectorService.sh
./webappService.sh

2.4 Agent探针集群部署

#配置SkyWalking集群
cd ../agent/config
find . -name 'application.yml' |grep admin |xargs sed -i 's/# collector.direct_servers=www.skywalking.service.io/collector.direct_servers=10.20.30.123:11800,10.20.30.124:11800,10.20.30.125:11800/g'

#分发Agent探针,在部署SkyWalking的机器上
cd skywalking
scp -r agent [email protected]:/home/mon/apps/

#探针启动方式参考上面单机模式部署

你可能感兴趣的:(Centos7搭建SkyWalking监控单机及集群服务器)