跳到主要内容

第6章 应用:xxx企业NagX配置

6.1 Nagvios端配置

6.1.1 基本配置

6.1.1.1 报警信息接收人配置

contacts.cfg文件

define contact {
contact_name guofs
use generic-contact
alias JN_guofs
email guofs@os-w.com
}

define contactgroup {

contactgroup_name admins
alias Nagios Administrators
members guofs
}

/etc/mail.rc

set sendcharsets=iso-8859-1,utf-8
set from=jszc_dsd@os-w.com
set smtp=198.218.32.152:30025
set smtp-auth-user=jszc_dsd@os-w.com
set smtp-auth-password=abc_123456

6.1.1.2 联系人对像模板

define contact {

name generic-contact ; The name of this contact template
service_notification_period 24x7 ; service notifications can be sent anytime
host_notification_period 24x7 ; host notifications can be sent anytime
service_notification_options w,u,c,r,f,s ; send notifications for all service states, flapping
events, and scheduled downtime events
host_notification_options d,u,r,f,s ; send notifications for all host states, flapping eve
nts, and scheduled downtime events
service_notification_commands notify-service-by-email ; send service notifications via email
host_notification_commands notify-host-by-email ; send host notifications via email
host_notifications_enabled 1
service_notifications_enabled 1
register 0 ; DON'T REGISTER THIS DEFINITION - ITS NOT A REAL CONT
ACT, JUST A TEMPLATE!
}

6.1.1.3 配置主机对像模板

  • 父对像
define host{
;base conf
name generic-host ; The name of this host template
;icon_image ico2/sr2.png
;statusmap_image ico2/sr2.png
;alias Host,Print,Network Device,etc
;check conf
active_checks_enabled 1
check_period 24x7 ; checked round the clock
check_interval 2 ; Actively check the host every 30s
retry_interval 1 ; Schedule host check retries at 30s intervals
max_check_attempts 2 ; Check each Linux host 5 times (max)
check_command check-host-alive ;Default command to check hosts
event_handler_enabled 1 ; Host event handler is enabled
;notifications conf
notifications_enabled 1 ; Host notifications are enabled
notification_period 24x7 ; Send host notifications at any time
notification_interval 30 ; Resend notifications every 1 hours
notification_options d,u,r
;contact conf
contacts null
contact_groups null
;other conf
flap_detection_enabled 1 ; Flap detection is enabled
;failure_prediction_enabled 1 ; Failure prediction is enabled
process_perf_data 1 ; Process performance data
retain_status_information 1 ; Retain status information across program restarts
retain_nonstatus_information 1 ; Retain non-status information across program restarts
action_url /pnp4nagios/index.php/graph?host=$HOSTNAME$&srv=_HOST_' class='tips' rel='/pnp4nagios/index.php/popup?host=$HOSTNAME$&srv=_HOST_
register 0 ; DONT REGISTER THIS DEFINITION - ITS NOT A REAL HOST, JUST A TEMPLATE!
}
  • 子对像
define host {

name Default-host ; The name of this host template
use generic-host ; This template inherits other values from the generic-host template
check_period 24x7 ; By default, Linux hosts are checked round the clock
check_interval 5 ; Actively check the host every 5 minutes
retry_interval 1 ; Schedule host check retries at 1 minute intervals
max_check_attempts 10 ; Check each Linux host 10 times (max)
check_command check-host-alive ; Default command to check Linux hosts
notification_period workhours ; Linux admins hate to be woken up, so we only notify during the day
; Note that the notification_period variable is being overridden from
; the value that is inherited from the generic-host template!
notification_interval 120 ; Resend notifications every 2 hours
notification_options d,u,r ; Only send notifications for specific host states
hostgroups default_host_group
contact_groups admins ; Notifications get sent to the admins by default
register 0 ; DON'T REGISTER THIS DEFINITION - ITS NOT A REAL HOST, JUST A TEMPLATE!
}

6.1.1.4 配置服务对像模板

  • 父对像
define service{
;generic conf
name generic-service ; The 'name' of this service template
;icon_image ico2/isp1.gif
;check conf
active_checks_enabled 1 ; Active service checks are enabled
passive_checks_enabled 1 ; Passive service checks are enabled/accepted
event_handler_enabled 1 ; Service event handler is enabled
check_period 24x7 ; The service can be checked at any time of the day
#normal_check_interval 2 ; Check the service every 30s under normal conditions
#retry_check_interval 1 ; Re-check the service every 30s until a hard state can be determined
max_check_attempts 2 ; Re-check the service up to 5 times in order to determine its final (hard) state
check_interval 10 ; Check the service every 10 minutes under normal conditions
retry_interval 2 ; Re-check the service every two minutes until a hard state can be determined
;notifications conf
notifications_enabled 1 ; Service notifications are enabled
notification_options w,u,c,r ; Send notifications about warning, unknown, critical, and recovery events
notification_interval 240 ; Re-notify about service problems every hour
notification_period 24x7 ; Notifications can be sent out at any time
;contact conf
contacts null
contact_groups null
;other conf
parallelize_check 1 ; Active service checks should be parallelized (disabling this can lead to major performance problems)
obsess_over_service 1 ; We should obsess over this service (if necessary)
check_freshness 0 ; Default is to NOT check service 'freshness'
flap_detection_enabled 1 ; Flap detection is enabled
#failure_prediction_enabled 1 ; Failure prediction is enabled
process_perf_data 1 ; Process performance data
retain_status_information 1 ; Retain status information across program restarts
retain_nonstatus_information 1 ; Retain non-status information across program restarts
is_volatile 0 ; The service is not volatile
action_url /pnp4nagios/index.php/graph?host=$HOSTNAME$&srv=$SERVICEDESC$' class='tips' rel='/pnp4nagios/index.php/popup?host=$HOSTNAME$&srv=$SERVICEDESC$
register 0 ; DONT REGISTER THIS DEFINITION - ITS NOT A REAL SERVICE, JUST A TEMPLATE!
}
  • 子对像
define service {

name Default-service ; The name of this service template
use generic-service ; Inherit default values from the generic-service definition
max_check_attempts 3 ; Re-check the service up to 4 times in order to determine its final (hard) state
check_interval 2 ; Check the service every 5 minutes under normal conditions
retry_interval 1 ; Re-check the service every minute until a hard state can be determined
servicegroups default_service_group
contact_groups admins ; Notifications get sent to the admins by default
register 0 ; DONT REGISTER THIS DEFINITION - ITS NOT A REAL SERVICE, JUST A TEMPLATE!
}

6.1.1.5 组对像

  • 主机组
define hostgroup{
hostgroup_name default_host_group
alias default host group
}
  • 服务组
define servicegroup{
servicegroup_name default_service_group
alias default service group
}

6.1.1.6 命令对像

define command{
command_name check_load_linux
command_line $USER1$/check_snmp_load_linux -H $HOSTADDRESS$ -C $ARG1$
}


define command{
command_name check_cpu_all
command_line $USER1$/check_snmp_cpu_win_linux -H $HOSTADDRESS$ -C $ARG1$ -w $ARG2$ -c $ARG3$
}


define command{
command_name check_storage_by_string
command_line $USER1$/check_snmp_storage_by_string -H $HOSTADDRESS$ -C $ARG1$ -s $ARG2$ -w $ARG3$ -c $ARG4$
}


define command{
command_name check_ifrate_by_ip
command_line $USER1$/check_snmp_iftraffic_by_ip -H $HOSTADDRESS$ -C $ARG1$ -w $ARG2$ -c $ARG3$
}

define command{
command_name check_ifrate_by_ifname
command_line $USER1$/check_snmp_iftraffic_by_ifname -H $HOSTADDRESS$ -C $ARG1$ -r $ARG2$ -s $ARG3$ -w $ARG4$ -c $ARG5$
}

define command{
command_name check_cpu_temp
command_line $USER1$/check_snmp_perl -H $HOSTADDRESS$ -C $ARG1$ -o .1.3.6.1.4.1.8888.88.3.1.2.8.99.112.117.95.116.101.109.112 -w $ARG2$ -c $ARG3$ -m 100
}

6.1.2 监控对像配置

6.1.2.1 主机对像

6.1.2.2 服务对像

Alt text

6.1.2.3 数据转换为nagios配置

  • 定义数据库及表
drop table if exists host_list;
create table host_list (
host_name varchar(255) NOT NULL DEFAULT '',
alias varchar(255) NOT NULL DEFAULT '',
host_ip varchar(16) NOT NULL DEFAULT '',
check_host int(255) DEFAULT NULL,
host_gp varchar(255) NOT NULL DEFAULT '',
primary key (host_name)
);
drop table if exists service_list;
create table service_list (
host_ip varchar(16) NOT NULL DEFAULT '',
service_desc_str varchar(255) NOT NULL DEFAULT '',
check_cmd varchar(255) NOT NULL DEFAULT '',
service_gp varchar(255) NOT NULL DEFAULT ''
);
  • 将收集的数据写入数据库
  • 编写脚本转换成nagios格式
    脚本tp.py代码
# !/usr/bin/python
# -*- coding: utf-8 -*-

#引入模块
import pymysql
import sys
reload(sys)
sys.setdefaultencoding('utf8')

#连接数据库
dbconn=pymysql.connect(
host="192.168.80.202",
database="gfs",
user="root",
password="mysql+888",
port=3380,
charset="utf8"
)

#建立游标
cursor = dbconn.cursor()



#-------------查看和写入文件,组对像转换----------------------
sql = "select * from obj_group"
cursor.execute(sql)
dbconn.commit()

filehandle = open("/tmp/gp.cfg",mode="w")

for row in cursor.fetchall():
#主机组
filehandle.write("define hostgroup{\n")
filehandle.write("\thostgroup_name\t%s\n" % row[0])
filehandle.write("\talias\t%s\n" % row[1])
filehandle.write("}\n")
#服务组
filehandle.write("define servicegroup{\n")
filehandle.write("\tservicegroup_name\t%s-service\n" % row[0])
filehandle.write("\talias\t%s\n" % row[1])
filehandle.write("}\n")

filehandle.close()


#-------------查看和写入文件,主机对像转换----------------------
sql = "select * from host_list"
cursor.execute(sql)
dbconn.commit()

filehandle = open("/tmp/host_list.cfg",mode="w")
for row in cursor.fetchall():
filehandle.write("define host{\n")
filehandle.write("\tuse\tDefault-host\n")
filehandle.write("\thost_name\t%s\n" % row[0])
filehandle.write("\talias\t%s\n" % row[1])
filehandle.write("\taddress\t%s\n" % row[2])
filehandle.write("\tactive_checks_enabled\t%s\n" % row[3])
filehandle.write("\thostgroups\t%s\n" % row[4])
filehandle.write("}\n")

filehandle.close()

#-------------查看和写入文件,服务对像转换----------------------
sql = "select 'Default-service' as Father,host_name,service_desc_str,check_cmd,service_gp from host_list as a,service_list as b where a.host_ip = b.host_ip"
cursor.execute(sql)
dbconn.commit()

filehandle = open("/tmp/service_list.cfg",mode="w")
for row in cursor.fetchall():
filehandle.write("define service{\n")
filehandle.write("\tuse\t%s\n" % row[0])
filehandle.write("\thost_name\t%s\n" % row[1])
filehandle.write("\tservice_description\t%s\n" % row[2])
filehandle.write("\tcheck_command\t%s\n" % row[3])
filehandle.write("\tservicegroups\t%s\n" % row[4])
filehandle.write("}\n")

filehandle.close()


#关闭游标
cursor.close()
#关闭数据库连接
dbconn.close()

转换操作

# python tp.py
将生成如下3个文件
/tmp/gp.cfg
/tmp/host_list.cfg
/tmp/service_list.cfg

将这3个文件上传到服务器/usr/local/nagios/etc/objects/conf/目录

6.1.3 实例:CPU温度采集

  • 安装插件
# yum -y install lm_sensors lm_sensors-sensord

# chkconfig --list | grep sensor
lm_sensors 0:off 1:off 2:off 3:off 4:off 5:off 6:off
# chkconfig --add lm_sensors
# chkconfig lm_sensors on

# sensors-detect 配置,一路回车yes即可
# sensors
coretemp-isa-0000

Adapter: ISA adapter
Physical id 0: +30.0 C (high = +79.0 C, crit = +89.0 C)
Core 0: +24.0 C (high = +79.0 C, crit = +89.0 C)
Core 1: +24.0 C (high = +79.0 C, crit = +89.0 C)
Core 2: +24.0 C (high = +79.0 C, crit = +89.0 C)
Core 3: +25.0 C (high = +79.0 C, crit = +89.0 C)
Core 4: +25.0 C (high = +79.0 C, crit = +89.0 C)
Core 8: +24.0 C (high = +79.0 C, crit = +89.0 C)
Core 9: +24.0 C (high = +79.0 C, crit = +89.0 C)
Core 10: +25.0 C (high = +79.0 C, crit = +89.0 C)
Core 11: +24.0 C (high = +79.0 C, crit = +89.0 C)
Core 12: +25.0 C (high = +79.0 C, crit = +89.0 C)

# service lm_sensors start
Starting lm_sensors: loading module ipmi-si coretemp [ OK ]
  • 采集CPU温度(各个核心的平均温度)
# core_tmp=`sensors | grep Core | gawk '{print $3}' | grep -o [0-9][0-9] | awk '{i+=1;sum+=$1} END {print(sum/i)}'`
# echo $core_tmp
22.75
  • 定义脚本
# touch /etc/snmp/cpu_temp.sh
# chmod +x /etc/snmp/cpu_temp.sh
# cat /etc/snmp/cpu_temp.sh
#!/bin/bash
tmp=`/usr/bin/sensors | grep Core | gawk '{print $3}' | grep -o [0-9][0-9] | awk '{i+=1;sum+=$1} END {print(sum/i)}'`
echo $tmp
exit 0
  • 自定义OID,配置snmpd.conf
#view    systemview    included   .1.3.6.1.2.1.1
#view systemview included .1.3.6.1.2.1.25.1.1
view mib2 included .1.3.6.1 fc
view mib2 included .iso.org.dod.internet.mgmt.mib-2 fc
view mib2 included .1.3.6.1.4.1.2021 fc
view mib2 included .1.3.6.1.4.1.8888 fc

#access notConfigGroup "" any noauth exact systemview none none
access notConfigGroup "" any noauth exact mib2 none none

#自定义oid,(下面代码,可任意位置)
extend .1.3.6.1.4.1.8888.88 cpu_temp /bin/sh /etc/snmp/cpu_temp.sh

重启service snmpd restart

查看

# snmpwalk -v 2c 198.76.20.125 -c jnj_123456 -On .1.3.6.1.4.1.8888.88
.1.3.6.1.4.1.8888.88.1.0 = INTEGER: 1
.1.3.6.1.4.1.8888.88.2.1.2.8.99.112.117.95.116.101.109.112 = STRING: "/bin/sh"
.1.3.6.1.4.1.8888.88.2.1.3.8.99.112.117.95.116.101.109.112 = STRING: "/etc/snmp/cpu_temp.sh"
.1.3.6.1.4.1.8888.88.2.1.4.8.99.112.117.95.116.101.109.112 = ""
.1.3.6.1.4.1.8888.88.2.1.5.8.99.112.117.95.116.101.109.112 = INTEGER: 5
.1.3.6.1.4.1.8888.88.2.1.6.8.99.112.117.95.116.101.109.112 = INTEGER: 1
.1.3.6.1.4.1.8888.88.2.1.7.8.99.112.117.95.116.101.109.112 = INTEGER: 1
.1.3.6.1.4.1.8888.88.2.1.20.8.99.112.117.95.116.101.109.112 = INTEGER: 4
.1.3.6.1.4.1.8888.88.2.1.21.8.99.112.117.95.116.101.109.112 = INTEGER: 1
.1.3.6.1.4.1.8888.88.3.1.1.8.99.112.117.95.116.101.109.112 = STRING: "21.3"
.1.3.6.1.4.1.8888.88.3.1.2.8.99.112.117.95.116.101.109.112 = STRING: "21.3"
.1.3.6.1.4.1.8888.88.3.1.3.8.99.112.117.95.116.101.109.112 = INTEGER: 1
.1.3.6.1.4.1.8888.88.3.1.4.8.99.112.117.95.116.101.109.112 = INTEGER: 0
.1.3.6.1.4.1.8888.88.4.1.2.8.99.112.117.95.116.101.109.112.1 = STRING: "21.3"

# snmpwalk -v 2c 198.76.20.125 -c jnj_123456 -On .1.3.6.1.4.1.8888.88.3.1.2.8.99.112.117.95.116.101.109.112
.1.3.6.1.4.1.8888.88.3.1.2.8.99.112.117.95.116.101.109.112 = STRING: "21.225"
  • 定义nagios插件和命令
# 插件命令
/usr/local/nagios/libexec/check_snmp_perl_new -H 198.76.20.125 -C jnj_123456 -o .1.3.6.1.4.1.8888.88.3.1.2.8.99.112.117.95.116.101.109.112 -w 60 -c 80 -m 100

# 添加插件到nagios配置文件
define command{
command_name check_cpu_temp
command_line $USER1$/check_snmp_perl -H $HOSTADDRESS$ -C $ARG1$ -o .1.3.6.1.4.1.8888.88.3.1.2.8.99.112.117.95.116.101.109.112 -w $ARG2$ -c $ARG3$ -m 100
}


define service{
use Default-service
host_name JN_SJDB2
service_description CPU-Temp
check_command check_cpu_temp!jnj_123456!50!70
servicegroups DB-service
}
  • 效果查看 Alt text

6.2 Nagvis端配置

6.2.1 拓补制作

例如: Alt text

6.2.2 拓扑图添加到nagvis

  • 上传图片 Alt text

Alt text

  • 添加地图 Alt text

Alt text

  • 添加地图图例 Alt text 选好背景图 Alt text

效果如下: Alt text

6.2.3 添加监控对像到图例

Alt text

Alt text 上述下拉条中,将有各类对像供选择。

6.3 效果展示

6.3.1 Nagios自带web

Alt text

6.3.2 Nagios主机组

Alt text

6.3.3 Nagios服务组

Alt text

6.3.4 Nagios报警邮件

Alt text

6.3.5 PNP4Nagios趋势图1

Alt text

6.3.6 PNP4Nagios趋势图2

Alt text

6.3.7 NagVis展示图

Alt text