Open-Falcon 监控

Posted by 小炒肉 on August 18, 2017

环境准备

官方文档 https://book.open-falcon.org/zh_0_2

安装 golang

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
wget https://storage.googleapis.com/golang/go1.6.4.linux-amd64.tar.gz

tar zxvf go1.6.4.linux-amd64.tar.gz

mv go /opt/local/

# 增加环境变量

vi /etc/profile


# Golang ENV
export GOROOT=/opt/local/go
export PATH=$PATH:$GOROOT/bin
export GOPATH=/opt/local/golang

go version
go version go1.6.4 linux/amd64

安装 Mysql 5.7

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
# 初始化依赖

yum -y install cmake ncurses ncurses-devel bison bison-devel boost boost-devel


# 创建 mysql 用户以及相关目录

/usr/sbin/groupadd mysql
/usr/sbin/useradd -g mysql mysql
mkdir -p /opt/local/mysql/data
mkdir -p /opt/local/mysql/binlog
mkdir -p  /opt/local/mysql/logs
mkdir -p /opt/local/mysql/relaylog
mkdir -p /var/lib/mysql
mkdir -p /opt/local/mysql/etc


# 下载 源码包

wget ftp://ftp.mirrorservice.org/sites/ftp.mysql.com/Downloads/MySQL-5.7/mysql-5.7.19.tar.gz

tar zxvf mysql-5.7.19.tar.gz

cd mysql-5.7.19

cmake -DCMAKE_INSTALL_PREFIX="/opt/local/mysql" -DDEFAULT_CHARSET=utf8 \
-DMYSQL_DATADIR="/opt/local/mysql/data/" -DCMAKE_INSTALL_PREFIX="/opt/local/mysql" \
-DINSTALL_PLUGINDIR=plugin -DWITH_INNOBASE_STORAGE_ENGINE=1 -DDEFAULT_COLLATION=utf8_general_ci \
-DENABLED_LOCAL_INFILE=1 -DENABLED_PROFILING=1 -DWITH_ZLIB=system \
-DWITH_EXTRA_CHARSETS=none -DMYSQL_MAINTAINER_MODE=OFF -DEXTRA_CHARSETS=all \
-DWITH_PERFSCHEMA_STORAGE_ENGINE=1 -DWITH_MYISAM_STORAGE_ENGINE=1 \
-DDOWNLOAD_BOOST=1 -DWITH_BOOST=/usr/local/boost


make -j `cat /proc/cpuinfo | grep processor| wc -l`

make install


# 创建相关目录,授权

chmod +w /opt/local/mysql
chown -R mysql:mysql /opt/local/mysql
chmod +w /var/lib/mysql
chown -R mysql:mysql /var/lib/mysql
cp /opt/local/mysql/support-files/mysql.server  /etc/init.d/mysqld
chmod 755 /etc/init.d/mysqld
echo 'basedir=/opt/local/mysql/' >> /etc/init.d/mysqld
echo 'datadir=/opt/local/mysql/data' >>/etc/init.d/mysqld


# 创建关联

ln -s /opt/local/mysql/lib/mysql /usr/lib/mysql
ln -s /opt/local/mysql/include/mysql /usr/include/mysql
ln -s /opt/local/mysql/bin/mysql /usr/bin/mysql
ln -s /opt/local/mysql/bin/mysqldump /usr/bin/mysqldump
ln -s /opt/local/mysql/bin/myisamchk /usr/bin/myisamchk
ln -s /opt/local/mysql/bin/mysqld_safe /usr/bin/mysqld_safe
ln -s /tmp/mysql.sock /var/lib/mysql/mysql.sock


# 初始化数据库

vi /opt/local/mysql/etc/my.cnf

[client]
default-character-set=utf8mb4

[mysqld]
########basic settings########
server-id = 1
port = 3306
user = mysql
bind_address = 127.0.0.1
autocommit = 1
character_set_server=utf8mb4
collation-server=utf8mb4_unicode_ci
skip-character-set-client-handshake
init_connect='SET collation_connection = utf8mb4_unicode_ci'
init_connect='SET NAMES utf8mb4'
skip_name_resolve = 1
max_connections = 800
max_connect_errors = 1000
datadir = /opt/local/mysql/data
pid-file = /opt/local/mysql/mysql.pid
transaction_isolation = READ-COMMITTED
explicit_defaults_for_timestamp = 1
join_buffer_size = 134217728
tmp_table_size = 67108864
tmpdir = /tmp
max_allowed_packet = 16777216
sql_mode = "STRICT_TRANS_TABLES,NO_ENGINE_SUBSTITUTION,NO_ZERO_DATE,NO_ZERO_IN_DATE,ERROR_FOR_DIVISION_BY_ZERO,NO_AUTO_CREATE_USER"
interactive_timeout = 1800
wait_timeout = 1800
read_buffer_size = 16777216
read_rnd_buffer_size = 33554432
sort_buffer_size = 33554432
########log settings########
log_error = /opt/local/mysql/logs/mysqld.log
slow_query_log = 1
slow_query_log_file = /opt/local/mysql/logs/slow.log
log_queries_not_using_indexes = 1
log_slow_admin_statements = 1
log_slow_slave_statements = 1
log_throttle_queries_not_using_indexes = 10
expire_logs_days = 90
long_query_time = 2
min_examined_row_limit = 100
########replication settings########
master_info_repository = TABLE
relay_log_info_repository = TABLE
log_bin = /opt/local/mysql/binlog/mysql-bin
sync_binlog = 1
gtid_mode = on
enforce_gtid_consistency = 1
log_slave_updates
binlog_format = row
relay_log = /opt/local/mysql/relaylog/relay-bin
relay_log_recovery = 1
binlog_gtid_simple_recovery = 1
slave_skip_errors = ddl_exist_errors
########innodb settings########
innodb_page_size = 16384
innodb_buffer_pool_size = 8G
innodb_buffer_pool_instances = 8
innodb_buffer_pool_load_at_startup = 1
innodb_buffer_pool_dump_at_shutdown = 1
innodb_lru_scan_depth = 2000
innodb_lock_wait_timeout = 5
innodb_io_capacity = 4000
innodb_io_capacity_max = 8000
innodb_flush_method = O_DIRECT
innodb_file_format = Barracuda
innodb_file_format_max = Barracuda
innodb_log_group_home_dir = /opt/local/mysql/relaylog
innodb_undo_directory = /opt/local/mysql/binlog
innodb_undo_logs = 128
innodb_undo_tablespaces = 3
innodb_flush_neighbors = 1
innodb_log_file_size = 4G
innodb_log_buffer_size = 16777216
innodb_purge_threads = 4
innodb_large_prefix = 1
innodb_thread_concurrency = 64
innodb_print_all_deadlocks = 1
innodb_strict_mode = 1
innodb_sort_buffer_size = 67108864
############mysql 5.7 ##################
innodb_buffer_pool_dump_pct = 40
innodb_page_cleaners = 4
innodb_undo_log_truncate = 1
innodb_max_undo_log_size = 2G
innodb_purge_rseg_truncate_frequency = 128
binlog_gtid_simple_recovery=1
log_timestamps=system
transaction_write_set_extraction=MURMUR32
show_compatibility_56=on
########semi sync replication settings########
#plugin_dir=/opt/local/mysql/lib/plugin
#plugin_load = "rpl_semi_sync_master=semisync_master.so;rpl_semi_sync_slave=semisync_slave.so"
#loose_rpl_semi_sync_master_enabled = 1
#loose_rpl_semi_sync_slave_enabled = 1
#loose_rpl_semi_sync_master_timeout = 5000



rm -rf /etc/my.cnf

cd /opt/local/mysql/bin/

/opt/local/mysql/bin/mysqld --initialize --user=mysql --basedir=/opt/local/mysql --datadir=/opt/local/mysql/data

# 查看 mysql 密码

cat /opt/local/mysql/logs/mysqld.log |grep password


# 启动 mysql

service mysqld start

chkconfig mysqld on

# 设置安全配置

/opt/local/mysql/bin/mysql_secure_installation -uroot -p


# 登陆 mysql

mysql -uroot -p

安装 redis

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
# 下载 redis

wget http://download.redis.io/releases/redis-3.2.10.tar.gz

tar zxvf redis-3.2.10.tar.gz 

cd redis-3.2.10

make

make install

cd  utils

./install_server.sh

...... 输入相关信息 ......

cd /etc/init.d/

mv redis_6379  redis


# 创建 目录

mkdir -p /opt/local/redis/{data,logs,conf}

cd /opt/local/redis/conf

vi redis.conf


bind 127.0.0.1
protected-mode yes
port 6379
tcp-backlog 2048
timeout 0
tcp-keepalive 300
daemonize yes
supervised no
pidfile /var/run/redis.pid
loglevel notice
logfile "/opt/local/redis/logs/redis.log"
maxmemory 10gb
databases 16
save 900 1
save 300 10
save 60 10000
stop-writes-on-bgsave-error yes
rdbcompression yes
rdbchecksum yes
dbfilename redis_dump.rdb
dir /opt/local/redis/data
slave-serve-stale-data yes
slave-read-only yes
repl-diskless-sync no
repl-diskless-sync-delay 5
repl-disable-tcp-nodelay no
slave-priority 100
appendonly no
appendfilename "appendonly.aof"
appendfsync everysec
no-appendfsync-on-rewrite no
auto-aof-rewrite-percentage 100
auto-aof-rewrite-min-size 64mb
aof-load-truncated yes
lua-time-limit 5000
slowlog-log-slower-than 10000
slowlog-max-len 128
latency-monitor-threshold 0
notify-keyspace-events ""
hash-max-ziplist-entries 512
hash-max-ziplist-value 64
list-max-ziplist-size -2
list-compress-depth 0
set-max-intset-entries 512
zset-max-ziplist-entries 128
zset-max-ziplist-value 64
hll-sparse-max-bytes 3000
activerehashing yes
client-output-buffer-limit normal 0 0 0
client-output-buffer-limit slave 256mb 64mb 60
client-output-buffer-limit pubsub 32mb 8mb 60
hz 10
aof-rewrite-incremental-fsync yes



# 启动 redis

chkconfig redis on

service redis start

配置 Open-Falcon

初始化环境

1
2
3
4
mkdir -p $GOPATH/src/github.com/open-falcon
cd $GOPATH/src/github.com/open-falcon
git clone https://github.com/open-falcon/falcon-plus.git

导入数据库

1
2
3
4
5
6
7
cd $GOPATH/src/github.com/open-falcon/falcon-plus/scripts/mysql/db_schema/
mysql -u root -p < 1_uic-db-schema.sql
mysql -u root -p < 2_portal-db-schema.sql
mysql -u root -p < 3_dashboard-db-schema.sql
mysql -u root -p < 4_graph-db-schema.sql
mysql -u root -p < 5_alarms-db-schema.sql

编译 程序

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
cd $GOPATH/src/github.com/open-falcon/falcon-plus/

# 编译所有的模块
make all


# 编译指定模块
make agent


# 打包
make pack


# 创建目录

mkdir /opt/local/open-falcon

mv open-falcon-v0.2.1.tar.gz /opt/local/open-falcon

cd /opt/local/open-falcon

tar zxvf open-falcon-v0.2.1.tar.gz 

[[email protected] open-falcon]# ls
agent  aggregator  alarm  api  gateway  graph  hbs  judge  nodata  open-falcon  plugins  public  transfer

配置 Transfer

transfer是数据转发服务。它接收agent上报的数据,然后按照哈希规则进行数据分片、并将分片后的数据分别push给graph&judge等组件。

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
cd /opt/local/open-falcon/transfer/config

vi cfg.json

{
    "debug": true,
    "minStep": 30,
    "http": {
        "enabled": true,
        "listen": "0.0.0.0:6060"
    },
    "rpc": {
        "enabled": true,
        "listen": "0.0.0.0:8433"
    },
    "socket": {
        "enabled": false,
        "listen": "0.0.0.0:4444",
        "timeout": 3600
    },
    "judge": {
        "enabled": true,
        "batch": 200,
        "connTimeout": 1000,
        "callTimeout": 5000,
        "maxConns": 32,
        "maxIdle": 32,
        "replicas": 500,
        "cluster": {
            "judge-00" : "127.0.0.1:6080"
        }
    },
    "graph": {
        "enabled": true,
        "batch": 200,
        "connTimeout": 1000,
        "callTimeout": 5000,
        "maxConns": 32,
        "maxIdle": 32,
        "replicas": 500,
        "cluster": {
            "graph-00" : "127.0.0.1:6070"
        }
    },
    "tsdb": {
        "enabled": false,
        "batch": 200,
        "connTimeout": 1000,
        "callTimeout": 5000,
        "maxConns": 32,
        "maxIdle": 32,
        "retry": 3,
        "address": "127.0.0.1:8088"
    }
}


配置 Graph

graph是存储绘图数据的组件。graph组件 接收transfer组件推送上来的监控数据,同时处理api组件的查询请求、返回绘图数据。

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
# 创建数据 目录

mkdir -p /opt/data/6070


cd /opt/local/open-falcon/graph/config

vi cfg.json

{
    "debug": false,
    "http": {
        "enabled": true,
        "listen": "0.0.0.0:6071"
    },
    "rpc": {
        "enabled": true,
        "listen": "0.0.0.0:6070"
    },
    "rrd": {
        "storage": "/opt/data/6070"
    },
    "db": {
        "dsn": "root:[email protected](127.0.0.1:3306)/graph?loc=Local&parseTime=true",
        "maxIdle": 4
    },
    "callTimeout": 5000,
    "migrate": {
            "enabled": false,
            "concurrency": 2,
            "replicas": 500,
            "cluster": {
                    "graph-00" : "127.0.0.1:6070"
            }
    }
}


配置 Api 组件

api组件,提供统一的restAPI操作接口。比如:api组件接收查询请求,根据一致性哈希算法去相应的graph实例查询不同metric的数据,然后汇总拿到的数据,最后统一返回给用户。

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
cd /opt/local/open-falcon/api/config

vi cfg.json 
# 主要修改 "salt": "" 为加密字串

{
        "log_level": "debug",
        "db": {
                "falcon_portal": "root:[email protected](127.0.0.1:3306)/falcon_portal?charset=utf8&parseTime=True&loc=Local",
                "graph": "root:[email protected](127.0.0.1:3306)/graph?charset=utf8&parseTime=True&loc=Local",
                "uic": "root:[email protected](127.0.0.1:3306)/uic?charset=utf8&parseTime=True&loc=Local",
                "dashboard": "root:[email protected](127.0.0.1:3306)/dashboard?charset=utf8&parseTime=True&loc=Local",
                "alarms": "root:[email protected](127.0.0.1:3306)/alarms?charset=utf8&parseTime=True&loc=Local",
                "db_bug": true
        },
        "graphs": {
                "cluster": {
                        "graph-00": "127.0.0.1:6070"
                },
                "max_conns": 100,
                "max_idle": 100,
                "conn_timeout": 1000,
                "call_timeout": 5000,
                "numberOfReplicas": 500
        },
        "metric_list_file": "./api/data/metric",
        "web_port": "0.0.0.0:8080",
        "access_control": true,
        "signup_disable": false,
        "salt": "pleaseinputwhichyouareusingnow",
        "skip_auth": false,
        "default_token": "default-token-used-in-server-side",
        "gen_doc": false,
        "gen_doc_path": "doc/module.html"
}


部署 Heartbeat 服务

心跳服务器,所有agent都会连到HBS,每分钟发一次心跳请求。

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
cd /opt/local/open-falcon/hbs/config

vi cfg.json


{
    "debug": true,
    "database": "root:[email protected](127.0.0.1:3306)/falcon_portal?loc=Local&parseTime=true",
    "hosts": "",
    "maxConns": 20,
    "maxIdle": 15,
    "listen": ":6030",
    "trustable": [""],
    "http": {
        "enabled": true,
        "listen": "0.0.0.0:6031"
    }
}


部署 Judge 服务

Judge用于告警判断,agent将数据push给Transfer,Transfer不但会转发给Graph组件来绘图,还会转发给Judge用于判断是否触发告警。

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
cd /opt/local/open-falcon/judge/config

vi cfg.json

{
    "debug": true,
    "debugHost": "nil",
    "remain": 11,
    "http": {
        "enabled": true,
        "listen": "0.0.0.0:6081"
    },
    "rpc": {
        "enabled": true,
        "listen": "0.0.0.0:6080"
    },
    "hbs": {
        "servers": ["127.0.0.1:6030"],
        "timeout": 300,
        "interval": 60
    },
    "alarm": {
        "enabled": true,
        "minInterval": 300,
        "queuePattern": "event:p%v",
        "redis": {
            "dsn": "127.0.0.1:6379",
            "maxIdle": 5,
            "connTimeout": 5000,
            "readTimeout": 5000,
            "writeTimeout": 5000
        }
    }
}

部署一个 mail 服务

发送警告邮件需要部署一个 mail 服务 用于发送邮件, 这里边部署一个简单的 mail-provider 地址 https://github.com/zzlyzq/mail-provider

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
# 下载 依赖模块

cd /opt/local/golang/src/github.com/open-falcon

git clone https://github.com/zzlyzq/mail-provider

cd /opt/local/golang/src/github.com/open-falcon/mail-provider

# 下载依赖
go get 

./control build (编译)

./control pack (打包)

mkdir /opt/local/open-falcon/mail-provider

mv falcon-mail-provider-0.0.1.tar.gz /opt/local/open-falcon/mail-provider

cd /opt/local/open-falcon/mail-provider

tar zxvf falcon-mail-provider-0.0.1.tar.gz


# 修改配置文件 里的 smtp 为自己的地址
# QQ邮箱,请开启 smtp 的功能,在QQ邮箱后台开启

vi cfg.json

{
    "debug": true,
    "http": {
        "listen": "0.0.0.0:4000",
        "token": ""
    },
    "smtp": {
        "addr": "smtp.qq.com:587",
        "username": "[email protected]",
        "password": "123456",
        "from": "[email protected]"
    }
}



# 运行程序

./control start

# 查看日志
./control tail



# 测试, 在测试时 token 暂时设置为空


curl http://127.0.0.1:4000/sender/mail -d "[email protected]&subject=xx&content=yy"

部署一个 微信网关

微信网关 git https://github.com/Yanjunhui/chat

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
cd /opt/local/open-falcon

git clone https://github.com/Yanjunhui/chat

cd chat/

chmod +x control.sh

# 需要修改 配置文件

cat config.conf


#http 服务端口
[http]
port = 4567

#微信接口信息
[weixin]
CorpID = ww6424d33203e90e20
AgentId = 1000002
Secret = FoST_8RQSTjZwH_CN3aQW6UKksjCSI9mizFqD7HKhrw
EncodingAESKey = K2M3WMhRHIOH4I1Ww5jxpllGrgY01nvBjUgTvcJEEHX


# 启动
./control.sh start
./control.sh status



## 注意: 

要收到 im 报警信息,必须要在 个人用户里面 填写 微信相关资料

微信相关帐号是  登陆微信公众号 --> 通讯里, 里面用户的 帐号

不是个人微信帐号,填写个人帐号,是收不到报警的。

部署 Alarm 服务

alarm模块是处理报警event的,judge产生的报警event写入redis,alarm从redis读取处理,并进行不同渠道的发送。

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
cd /opt/local/open-falcon/alarm/config

vi cfg.json


{
    "log_level": "debug",
    "http": {
        "enabled": true,
        "listen": "0.0.0.0:9912"
    },
    "redis": {
        "addr": "127.0.0.1:6379",
        "maxIdle": 5,
        "highQueues": [
            "event:p0",
            "event:p1",
            "event:p2"
        ],
        "lowQueues": [
            "event:p3",
            "event:p4",
            "event:p5",
            "event:p6"
        ],
        "userIMQueue": "/queue/user/im",
        "userSmsQueue": "/queue/user/sms",
        "userMailQueue": "/queue/user/mail"
    },
    "api": {
        "im": "http://127.0.0.1:4567/send",
        "sms": "http://127.0.0.1:10086/sms",
        "mail": "http://127.0.0.1:4000/sender/mail",
        "dashboard": "http://127.0.0.1:8081",
        "plus_api":"http://127.0.0.1:8080",
        "plus_api_token": "default-token-used-in-server-side"
    },
    "falcon_portal": {
        "addr": "root:[email protected](127.0.0.1:3306)/alarms?charset=utf8&loc=Asia%2FChongqing",
        "idle": 10,
        "max": 100
    },
    "worker": {
        "im": 10,
        "sms": 10,
        "mail": 50
    },
    "housekeeper": {
        "event_retention_days": 7,
        "event_delete_batch": 100
    }
}

配置 Nodata 服务

nodata用于检测监控数据的上报异常。nodata和实时报警judge模块协同工作,过程为: 配置了nodata的采集项超时未上报数据,nodata生成一条默认的模拟数据;用户配置相应的报警策略,收到mock数据就产生报警。采集项上报异常检测,作为judge模块的一个必要补充,能够使judge的实时报警功能更加可靠、完善。

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
cd /opt/local/open-falcon/nodata/config

vi cfg.json 

{
    "debug": true,
    "http": {
        "enabled": true,
        "listen": "0.0.0.0:6090"
    },
    "plus_api":{
        "connectTimeout": 500,
        "requestTimeout": 2000,
        "addr": "http://127.0.0.1:8080",
        "token": "default-token-used-in-server-side"
    },
    "config": {
        "enabled": true,
        "dsn": "root:[email protected](127.0.0.1:3306)/falcon_portal?loc=Local&parseTime=true&wait_timeout=604800",
        "maxIdle": 4
    },
    "collector":{
        "enabled": true,
        "batch": 200,
        "concurrent": 10
    },
    "sender":{
        "enabled": true,
        "connectTimeout": 500,
        "requestTimeout": 2000,
        "transferAddr": "127.0.0.1:6060",
        "batch": 500
    }
}


配置 Aggregator 服务

集群聚合模块。聚合某集群下的所有机器的某个指标的值,提供一种集群视角的监控体验。

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
cd /opt/local/open-falcon/aggregator/config

vi cfg.json

{
    "debug": true,
    "http": {
        "enabled": false,
        "listen": "0.0.0.0:6055"
    },
    "database": {
        "addr": "root:[email protected](127.0.0.1:3306)/falcon_portal?loc=Local&parseTime=true",
        "idle": 10,
        "ids": [1, -1],
        "interval": 55
    },
    "api": {
        "connect_timeout": 500,
        "request_timeout": 2000,
        "plus_api": "http://127.0.0.1:8080",
        "plus_api_token": "default-token-used-in-server-side",
        "push_api": "http://127.0.0.1:1988/v1/push"
    }
}


启动所有服务

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
cd /opt/local/open-falcon

./open-falcon start

./open-falcon check
        falcon-graph         UP           71646 
          falcon-hbs         UP           71658 
        falcon-judge         UP           71670 
     falcon-transfer         UP           71678 
       falcon-nodata         UP           71686 
   falcon-aggregator         UP           71695 
        falcon-agent         UP           71706 
      falcon-gateway         UP           71715 
          falcon-api         UP           71724 
        falcon-alarm         UP           71738 

配置 Agent

agent用于采集机器负载监控指标,比如cpu.idle、load.1min、disk.io.util等等,每隔60秒push给Transfer。agent与Transfer建立了长连接,数据发送速度比较快,agent提供了一个http接口/v1/push用于接收用户手工push的一些数据,然后通过长连接迅速转发给Transfer。

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
cd /opt/local/open-falcon/agent/config

# Agent 配置文件


{
    "debug": true,
    "hostname": "",
    "ip": "",
    "plugin": {
        "enabled": false,
        "dir": "./plugin",
        "git": "https://github.com/open-falcon/plugin.git",
        "logs": "./logs"
    },
    "heartbeat": {
        "enabled": true,
        "addr": "127.0.0.1:6030",
        "interval": 60,
        "timeout": 1000
    },
    "transfer": {
        "enabled": true,
        "addrs": [
            "127.0.0.1:8433"
        ],
        "interval": 60,
        "timeout": 1000
    },
    "http": {
        "enabled": false,
        "listen": ":1988",
        "backdoor": false
    },
    "collector": {
        "ifacePrefix": ["eth", "em"],
        "mountPoint": []
    },
    "default_tags": {
    },
    "ignore": {
        "cpu.busy": true,
        "df.bytes.free": true,
        "df.bytes.total": true,
        "df.bytes.used": true,
        "df.bytes.used.percent": true,
        "df.inodes.total": true,
        "df.inodes.free": true,
        "df.inodes.used": true,
        "df.inodes.used.percent": true,
        "mem.memtotal": true,
        "mem.memused": true,
        "mem.memused.percent": true,
        "mem.memfree": true,
        "mem.swaptotal": true,
        "mem.swapused": true,
        "mem.swapfree": true
    }
}




1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
# agent 脚本  添加 hostname = IP

#!/bin/bash
mkdir /opt/local
cd /opt/local
wget http://172.16.1.100/agent.tar.gz
tar zxvf agent.tar.gz
rm -rf agent.tar.gz
IPADDR=`ifconfig em1|grep inet|grep -v 127.0.0.1|grep -v inet6|awk '{print $2}'|tr -d "addr:"`
sed -i 's/\"hostname\"\:.*$/\"hostname\"\: \"'$IPADDR'\"\,/g' open-falcon/agent/config/cfg.json
cat open-falcon/agent/config/cfg.json
cd open-falcon
./open-falcon start agent
./open-falcon check agent

配置前端 dashboard

初始化依赖

1
2
3
4
5
6
yum install -y python-virtualenv
yum install -y python-devel
yum install -y openldap-devel
yum install -y mysql-devel
yum groupinstall "Development tools"

安装配置 dashboard

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
cd /opt/local/open-falcon

git clone https://github.com/open-falcon/dashboard.git

cd dashboard

# 创建 python 独立运行环境

virtualenv ./env

# 安装 python 依赖模块

# 执行安装

./env/bin/pip install -r pip_requirements.txt -i http://mirrors.aliyun.com/pypi/simple/



# 修改配置

# 修改里面的 mysql 配置

rrd/config.py

ALARM_DB_PASS

启动 dashboard

1
2
3
4
5
6
7
8
9
10
11
12
13
# debug 模式

./env/bin/python wsgi.py


# 正常模式

bash control start


# 查看日志
bash control tail 

登陆 WEB UI

1
2
3
4
5
6
http://172.16.1.100:8081/


注册  root 帐号  为 admin 帐号

监控报警

配置 报警名单

1
2
3
4
5
6
7
8
9
10
11
12
# 首先配置 用户组

dashboard --> Welcome root -->  Teams 

Add+  -- > Create Team

# 创建一个 ICT 组

名称: ICT
简介: 运维组
成员: root

配置 nodata 监控 agent

1
2
3
4
5
6
7
8
# 创建 HostGrop

dashboard --> HostGroups

添加一个名称 dev-server 的 HostGroups

点击 hosts -- > Add Host  添加  dev 相关服务器 

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
# 创建 nodata
# 监控 client 的 agent ,如果 agent 掉了, 那么无法上传数据,所以直接配置
模板是不行的, 必须配置 nodata 在 agent 抓不到数据的时候 值为 -1 . 

dashboard --> nodata

Add nodata

name: nodata.agent

endpoint选择:   机器分组  ---  dev-server

metric: agent.alive

type: GAUGE

周期: 60

数据上报中断时,补发如下值: -1


1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
# 创建 策略配置

dashboard --> Templates

创建一个 名称为 agent-alive 的模板


# 模板基础信息:

name: agent-alive

模板策略列表:

metric: agent.alive   note: 无法连接agent

if [all(#3)] < 0 : alarm(); callback();

save 保存

# 模板报警配置:

之前设置的 ICT



# Save
1
2
3
4
5
6
7
8
# 绑定 模板

dashboard --> HostGroups

查找  dev-server   点击  templates

查找  agent-alive   选择 + Bind

1
2
3
4
5
6
7
8
9
10
11
# 测试

关闭一个 agent 的进程


等待60秒 查看 Alarm-Dashboard


等待收取邮件~


配置 系统监控指标

1
2
3
4
5
dashboard --> Templates

Add 添加一个 系统指标模块

监控指标如下:

sys.png-60.9kB

FAQ

修改 报警模板

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
cd open-falcon/falcon-plus/modules/alarm/cron


# 报警内容:

cat builder.go



package cron

import (
        "fmt"

        "github.com/open-falcon/falcon-plus/common/model"
        "github.com/open-falcon/falcon-plus/common/utils"
        "github.com/open-falcon/falcon-plus/modules/alarm/g"
)

func BuildCommonSMSContent(event *model.Event) string {
        return fmt.Sprintf(
                "[P%d][%s][%s][][%s %s %s %s %s%s%s][O%d %s]",
                event.Priority(),
                event.Status,
                event.Endpoint,
                event.Note(),
                event.Func(),
                event.Metric(),
                utils.SortedTags(event.PushedTags),
                utils.ReadableFloat(event.LeftValue),
                event.Operator(),
                utils.ReadableFloat(event.RightValue()),
                event.CurrentStep,
                event.FormattedTime(),
        )
}

func BuildCommonIMContent(event *model.Event) string {
        return fmt.Sprintf(
                "[报警级别: %d][报警状态: %s][报警Host: %s][报警内容: %s][报警时间: %s]",
                event.Priority(),
                event.Status,
                event.Endpoint,
                event.Note(),
                //event.Func(),
                //event.Metric(),
                //utils.SortedTags(event.PushedTags),
                //utils.ReadableFloat(event.LeftValue),
                //event.Operator(),
                //utils.ReadableFloat(event.RightValue()),
                //event.CurrentStep,
                event.FormattedTime(),
        )
}

func BuildCommonMailContent(event *model.Event) string {
        link := g.Link(event)
        return fmt.Sprintf(
                "报警状态: %s\r\n报警级别: %d\r\n报警Host: %s\r\n报警事件: %s\r\n事件标签: %s\r\n报警表达式: %s: %s%s%s\r\n报警内容: %s\r\n最大报警次数: %d   当前报警次数: %d\r\n报警时间: %s\r\n报警模板: %s\r\n",
                event.Status,
                event.Priority(),
                event.Endpoint,
                event.Metric(),
                utils.SortedTags(event.PushedTags),
                event.Func(),
                utils.ReadableFloat(event.LeftValue),
                event.Operator(),
                utils.ReadableFloat(event.RightValue()),
                event.Note(),
                event.MaxStep(),
                event.CurrentStep,
                event.FormattedTime(),
                link,
        )
}

func GenerateSmsContent(event *model.Event) string {
        return BuildCommonSMSContent(event)
}

func GenerateMailContent(event *model.Event) string {
        return BuildCommonMailContent(event)
}

func GenerateIMContent(event *model.Event) string {
        return BuildCommonIMContent(event)
}