【采坑】Centos7 open-falcon监控总结(非docker部署)

一、背景

#

二、操作步骤

1.配置yum源
yum install epel-release -y
yum clean all
yum makecache
2.安装git
yum install git -y

[root@wushengtest-agent1 ~]# git version
git version 1.8.3.1
3.下载redis和mysql-server
yum install -y redis mysql-server

[root@wushengtest-agent2 ~]# systemctl start redis
[root@wushengtest-agent2 ~]# systemctl enable redis
Created symlink from /etc/systemd/system/multi-user.target.wants/redis.service to /usr/lib/systemd/system/redis.service.
[root@wushengtest-agent1 ~]# systemctl status redis
● redis.service - Redis persistent key-value database
   Loaded: loaded (/usr/lib/systemd/system/redis.service; enabled; vendor preset: disabled)
  Drop-In: /etc/systemd/system/redis.service.d
           └─limit.conf
   Active: active (running) since Sun 2018-09-02 12:40:34 CST; 10s ago
 Main PID: 1606 (redis-server)
   CGroup: /system.slice/redis.service
           └─1606 /usr/bin/redis-server 127.0.0.1:6379

Sep 02 12:40:34 wushengtest-agent1 systemd[1]: Starting Redis persistent key-value database...
Sep 02 12:40:34 wushengtest-agent1 systemd[1]: Started Redis persistent key-value database.

[root@wushengtest-agent2 ~]# systemctl start mysqld
[root@wushengtest-agent2 ~]# systemctl enables mysqld
Unknown operation 'enables'.
[root@wushengtest-agent2 ~]# systemctl enable mysqld
[root@wushengtest-agent2 ~]# systemctl status mysqld
● mysqld.service - MySQL Server
   Loaded: loaded (/usr/lib/systemd/system/mysqld.service; enabled; vendor preset: disabled)
   Active: active (running) since Thu 2018-08-30 16:45:03 CST; 2 days ago
     Docs: man:mysqld(8)
           http://dev.mysql.com/doc/refman/en/using-systemd.html
 Main PID: 1421 (mysqld)
   CGroup: /system.slice/mysqld.service
           └─1421 /usr/sbin/mysqld --daemonize --pid-file=/var/run/mysqld/mysqld.pid

Warning: Journal has been rotated since unit was started. Log output is incomplete or unavailable.
4.后端安装
mkdir -p /home/src/github.com/open-falcon/
cd /home/src/github.com/open-falcon/
git clone https://github.com/open-falcon/falcon-plus.git
5.初始化MySQL表结构(请确保redis和MySQL已启动)
[root@wushengtest-agent2 db_schema]# cd /home/src/github.com/open-falcon/ && git clone https://github.com/open-falcon/falcon-plus.git 
[root@wushengtest-agent2 db_schema]# cd /home/src/github.com/open-falcon/falcon-plus/scripts/mysql/db_schema/
[root@wushengtest-agent2 db_schema]# mysql -h 192.168.10.137 -u root -p < 1_uic-db-schema.sql
Enter password: 
[root@wushengtest-agent2 db_schema]# mysql -h 192.168.10.137 -u root -p < 2_portal-db-schema.sql
Enter password: 
[root@wushengtest-agent2 db_schema]# mysql -h 192.168.10.137 -u root -p < 3_dashboard-db-schema.sql
Enter password: 
[root@wushengtest-agent2 db_schema]# mysql -h 192.168.10.137 -u root -p < 4_graph-db-schema.sql
Enter password: 
[root@wushengtest-agent2 db_schema]# mysql -h 192.168.10.137 -u root -p < 5_alarms-db-schema.sql
Enter password: 
6.编译源码打包
[root@wushengtest-agent2 falcon-plus]# cd /home/src/github.com/open-falcon/falcon-plus
[root@wushengtest-agent2 falcon-plus]# go get github.com/open-falcon/rrdlite
[root@wushengtest-agent2 falcon-plus]# make all
go build -o bin/agent/falcon-agent ./modules/agent
go build -o bin/aggregator/falcon-aggregator ./modules/aggregator
go build -o bin/graph/falcon-graph ./modules/graph
go build -o bin/hbs/falcon-hbs ./modules/hbs
go build -o bin/judge/falcon-judge ./modules/judge
go build -o bin/nodata/falcon-nodata ./modules/nodata
go build -o bin/transfer/falcon-transfer ./modules/transfer
go build -o bin/gateway/falcon-gateway ./modules/gateway
go build -o bin/api/falcon-api ./modules/api
go build -o bin/alarm/falcon-alarm ./modules/alarm
go build -ldflags "-X main.GitCommit=`git rev-parse --short HEAD` -X main.Version=0.2.1" -o open-falcon
[root@wushengtest-agent2 falcon-plus]# make pack
go build -ldflags "-X main.GitCommit=`git rev-parse --short HEAD` -X main.Version=0.2.1" -o open-falcon
tar -C out -zcf open-falcon-v0.2.1.tar.gz .
#由以上可知,在改路径下生成了open-falcon-v0.2.1.tar.gz,也可以直接访问https://github.com/open-falcon/falcon-plus/releases下载编译好的压缩包
7.部署后端
[root@wushengtest-agent2 falcon-plus]# mkdir -p /home/work
[root@wushengtest-agent2 falcon-plus]# cd /home/src/github.com/open-falcon/falcon-plus
[root@wushengtest-agent2 falcon-plus]# tar -xzvf open-falcon-v0.2.1.tar.gz -C /home/work/

#修改配置
aggregator                          /home/work/aggregator/config/cfg.json
graph                               /home/work/graph/config/cfg.json
hbs                                 /home/work/hbs/config/cfg.json
nodata                              /home/work/nodata/config/cfg.json
api                                 /home/work/api/config/cfg.json
alarm                               /home/work/alarm/config/cfg.json

[root@wushengtest-agent2 work]# grep -Ilr 3306  ./
./aggregator/config/cfg.json
./graph/config/cfg.json
./hbs/config/cfg.json
./nodata/config/cfg.json
./api/config/cfg.json
./alarm/config/cfg.json

【采坑】Centos7 open-falcon监控总结(非docker部署)_第1张图片

其他几个配置文件与之相同
8.启动后端
[root@wushengtest-agent2 work]# ./open-falcon start
[falcon-graph] 84620
[falcon-hbs] 84630
[falcon-judge] 84637
[falcon-transfer] 84643
[falcon-nodata] 84649
[falcon-aggregator] 84657
[falcon-agent] 84665
[falcon-gateway] 84672
[falcon-api] 84678
[falcon-alarm] 84686
9.部署前端
[root@wushengtest-agent2 work]# mkdir -p /home/front/open-falcon
[root@wushengtest-agent2 work]# git clone https://github.com/open-falcon/dashboard.git
Cloning into 'dashboard'...
remote: Counting objects: 1093, done.
remote: Compressing objects: 100% (39/39), done.
remote: Total 1093 (delta 16), reused 25 (delta 8), pack-reused 1044
Receiving objects: 100% (1093/1093), 1.45 MiB | 327.00 KiB/s, done.
Resolving deltas: 100% (582/582), done.

[root@wushengtest-agent2 work]# yum install -y python-virtualenv python-devel openldap-devel mysql-devel
[root@wushengtest-agent2 work]# yum groupinstall "Development tools" -y

#安装dashboard
[root@wushengtest-agent2 open-falcon]# cd /home/front/open-falcon/dashboard/
[root@wushengtest-agent2 dashboard]# virtualenv ./env
New python executable in /home/front/open-falcon/dashboard/env/bin/python
Installing setuptools, pip, wheel...done.
[root@wushengtest-agent2 dashboard]# ls
babel.cfg  control  Dockerfile  entrypoint.sh  env  gunicorn.conf  i18n.md  LICENSE  NOTICE  pip_requirements.txt  README.md  rrd  screenshots  scripts  wsgi.py
[root@wushengtest-agent2 dashboard]# ./env/bin/pip install -r pip_requirements.txt

#pip_requirement.txt内容如下
Flask==0.10.1
Flask-Babel==0.9
Jinja2==2.7.2
Werkzeug==0.9.4
gunicorn==19.9.0
python-dateutil==2.2
requests==2.3.0
mysql-python
python-ldap
10.启动open-falcon
[root@wushengtest-agent2 falcon-plus]# cd /home/front/open-falcon/dashboard/
[root@wushengtest-agent2 dashboard]# bash control start
falcon-dashboard started..., pid=100002
[root@wushengtest-agent2 dashboard]# bash control status
falcon-dashboard now is running, pid=100002
11.通过浏览器,输入http://192.168.10.137:8081/auth/login 访问

【采坑】Centos7 open-falcon监控总结(非docker部署)_第2张图片

12.dashbord没有默认创建任何账号包括管理账号,需要你通过页面进行注册账号。
想拥有管理全局的超级管理员账号,需要手动注册用户名为root的账号(第一个帐号名称为root的用户会被自动设置为超级管理员)。
超级管理员可以给普通用户分配权限管理。
小提示:注册账号能够被任何打开dashboard页面的人注册,所以当给相关的人注册完账号后,需要去关闭注册账号功能。只需要去修改api组件的配置文件cfg.json,将signup_disable配置项修改为true,重启api即可。当需要给人开账号的时候,再将配置选项改回去,用完再关掉即可。
13.安装open-falcon agent
通过scp命令把open-falcon 服务端/home/work/目录下的agent和open-falcon脚本以及public,plugins发送到需要监控的服务器上即可。(网上很多教程表示只需将agent目录和open-falcon脚本远程传输到相应服务器上即可,但是我复制过去,启动agent后端口1988已生成,并且日志中已正常无报错但是ip:端口访问显示404.经调试,应将agent同级public和plugins也复制过去后正常。)修改一下/home/work/agent/config/cfg.json中heartbeats的addr和transfer的addr改为服务端的ip:端口。
./open-falcon start agent
./falcon-agent --check
/home/work/agent/config/cfg.json
{
    "debug": true,   # 控制一些debug信息的输出,生产环境通常设置为false
    "hostname": "",  # agent采集了数据发给transfer,endpoint就设置为了hostname,默认通过`hostname`获取,如果配置中配置了hostname,就用配置中的
    "ip": "", # agent与hbs心跳的时候会把自己的ip地址发给hbs,agent会自动探测本机ip,如果不想让agent自动探测,可以手工修改该配置
    "plugin": {
        "enabled": false, # 默认不开启插件机制
        "dir": "./plugin",  # 把放置插件脚本的git repo clone到这个目录
        "git": "https://github.com/open-falcon/plugin.git", # 放置插件脚本的git repo地址
        "logs": "./logs" # 插件执行的log,如果插件执行有问题,可以去这个目录看log
    },
    "heartbeat": {
        "enabled": true,  # 此处enabled要设置为true
        "addr": "127.0.0.1:6030", # hbs的地址,端口是hbs的rpc端口
        "interval": 60, # 心跳周期,单位是秒
        "timeout": 1000 # 连接hbs的超时时间,单位是毫秒
    },
    "transfer": {
        "enabled": true, 
        "addrs": [
            "127.0.0.1:18433"
        ],  # transfer的地址,端口是transfer的rpc端口, 可以支持写多个transfer的地址,agent会保证HA
        "interval": 60, # 采集周期,单位是秒,即agent一分钟采集一次数据发给transfer
        "timeout": 1000 # 连接transfer的超时时间,单位是毫秒
    },
    "http": {
        "enabled": true,  # 是否要监听http端口
        "listen": ":1988",
        "backdoor": false
    },
    "collector": {
        "ifacePrefix": ["eth", "em"], # 默认配置只会采集网卡名称前缀是eth、em的网卡流量,配置为空就会采集所有的,lo的也会采集。可以从/proc/net/dev看到各个网卡的流量信息
        "mountPoint": []
    },
    "default_tags": {
    },
    "ignore": {  # 默认采集了200多个metric,可以通过ignore设置为不采集
        "cpu.busy": true,
        "df.bytes.free": true,
        "df.bytes.total": true,
        "df.bytes.used": true,
        "df.bytes.used.percent": true,
        "df.inodes.total": true,
        "df.inodes.free": true,
        "df.inodes.used": true,
        "df.inodes.used.percent": true,
        "mem.memtotal": true,
        "mem.memused": true,
        "mem.memused.percent": true,
        "mem.memfree": true,
        "mem.swaptotal": true,
        "mem.swapused": true,
        "mem.swapfree": true
    }
}

这里写图片描述

客户端目录展示

这里写图片描述

【采坑】Centos7 open-falcon监控总结(非docker部署)_第3张图片

14.访问客户端,查看数据。http:客户端ip:1988

【采坑】Centos7 open-falcon监控总结(非docker部署)_第4张图片

【采坑】Centos7 open-falcon监控总结(非docker部署)_第5张图片

三、问题总结

1.go: cannot find GOROOT directory: /usr/lib/go
解决方法:在/etc/profile中定义go的路径,不然之后的go get配置会报错
export GOROOT=/usr/lib/golang GOPATH=/home
PATH="$PATH":/usr/bin/mysql:$GOROOT/bin

source /etc/profile
2.Error: HTTPConnectionPool(host=’127.0.0.1’, port=8080): Max retries exceeded with url: /api/v1/user/create (Caused by : [Errno 111] Connection refused)
解决方法:数据库./aggregator/config/cfg.json,./graph/config/cfg.json,./hbs/config/cfg.json,./nodata/config/cfg.json,./api/config/cfg.json,./alarm/config/cfg.json这几个文件配置账号密码的地方多注意,原本应该是root:root(mysql密码)@xxx,我写成root:@root导致数据库连不上
3.配置go指令环境变量否则make all报错
解决方法:同问题1中解决方法,配置go环境变量。
4.部署客户端后,修改cfg.json后日志中没有报错,访问ip:端口显示404.
解决方法:因为我把服务端的服务器也作为客户端监控,因此可以对比服务端中agent和客户端中agent的差异。将agent同级目录plugin和public复制到客户端agent同级,重启客户端即可。

你可能感兴趣的:(技术总结)