這是用shell開發的nagios插件,根據Nagios Plugin Development Guidelines和Nagios Plugin API編寫,在前人的基礎上進行補充,支持官方標準的-V、-c -v -t選項。插件主要思路是通過netstat命令獲取tcp的各個狀態連接,統計每個狀態數量,最后按照標準的nagios插件格式輸出檢測信息和性能信息。監控那種狀態,連接數多少警報,都可以通過腳本參數指定。
nagios監控服務器,通過nrpe在被監控端執行檢測腳本,并把檢測腳本運行的狀態返回值和輸出信息,返回給nagios監控服務器。其中狀態返回值0代表正常,1代表警告,2代表緊急,3代表未知。
說明:腳本支持這幾個參數
-V|–version 顯示腳本版本信息
-c|--critical threshold 指定critical的閥值,必須指定
-w|–warning threshold 指定warning的閥值,必須指定
-s|–status tcp status 指定tcp連接狀態,必須指定
-h|–help 獲取使用幫助
-t|–timeout time 指定腳本運行超時時間
threshold格式
~代表負無窮大
從0開始可以省略start:即0:end
前面有start: 沒有指明結束值,則為無窮大
報警會在超出起始值與結束值之間,包括這兩個值
起始值為@,則報警會在起始值與結束值之間,包括這兩個值
10 小于0和大于10則警報
10: 在10到正無窮大之外,即負無窮大到10警報
~:10 在負無窮大到10之外,即10到正無窮大警報
10:20 10到20之外,即小于10和大于20
@10:20 在10到20之間警報
@10 在0到10之間警報
這個腳本可能還有bug,歡迎大家修復。
#!/bin/bash
STATE_OK=0
STATE_WARNING=1
STATE_CRITICAL=2
STATE_UNKNOWN=3
STATE_DEPENDENT=4
PATH=/bin:/sbin:/usr/bin:/usr/sbin:/usr/local/bin:/usr/local/sbin
print_revision() {
echo "(nagios-plugins-netstat 0.5)"
}
usage(){
echo -e "Usage: $0 [-V|--version] [-h|--help] <-w|--warning warning threshold>\n<-c|--critical critical threshold> <-s|--status status>\n<-t|--timeout time>"
}
check_range() {
mflag=0
if echo $1 | grep -E -q "^@?((~:[0-9]*$)|([0-9]+:?[0-9]*$))";then
if echo $1 |grep -E -q "^@";then
range=${1#@}
mflag=1
else
range=$1
fi
if echo $range |grep -E -q ":";then
start=${range%%:*}
if [ "$start" = "~" ];then
start=-999999
fi
if [ "$start" -lt 0 ];then
return 2
fi
end=${range#*:}
if [[ -z $end ]];then
end=65535
fi
else
start=0
end=$range
fi
if [ "$start" != "~" ] && [ "$end" != "" ];then
if [ $start -gt $end ];then
return 2
fi
fi
else
echo "invalid range"
return 2
fi
return 0
}
select_arg(){
if [ $# -eq 0 ];then
return 1
fi
wcount=0
ccount=0
scount=0
until [ $# -eq 0 ];do
case $1 in
-V|--version)
versionflag=1
shift 1
;;
-h|--help)
helpflag=1
shift 1
;;
-w|--warning)
[ $# -lt 2 ] && return 1
check_range $2
if [ $? -ne 0 ];then
return 1
else
warn_start=$start
warn_end=$end
warn_mflag=$mflag
fi
shift 2
let wcount++
;;
-c|--critical)
[ $# -lt 2 ] && return 1
check_range $2
if [ $? -ne 0 ];then
return 1
else
critical_start=$start
critical_end=$end
critical_mflag=$mflag
fi
shift 2
let ccount++
;;
-s|--status)
[ $# -lt 2 ] && return 1
case $2 in
established|ESTABLISHED)
status=established
;;
time_wait|TIME_WAIT)
status=time_wait
;;
syn_recv|SYN_RECV)
status=syn_recv
;;
fin_wait1|FIN_WAIT1)
status=fin_wait1
;;
fin_wait1|FIN_WAIT2)
status=fin_wait2
;;
last_ack|LAST_ACK)
status=last_ack
;;
close_wait|CLOSE_WAIT)
status=close_wait
;;
*)
return 1
;;
esac
shift 2
let scount++
;;
-t|--timeout)
[ $# -lt 2 ] && return 1
if ! echo $2 |grep -E -q "^[1-9][0-9]*$";then
return 1
fi
timeout=$2
;;
*)
return 1
;;
esac
done
return 0
}
alarm(){
connect=`netstat -ant | awk '/^tcp/ && !/LISTEN/{S[$NF]++}END{for(i in S) print i,S[i]}'`
established=`echo $connect |awk '/ESTABLISHED/{print $2}'`
[ -z $established ] && established=0
time_wait=`echo $connect |awk '/TIME_WAIT/{print $2}'`
[ -z $time_wait ] && time_wait=0
syn_recv=`echo $connect |awk '/SYN_RECV/{print $2}'`
[ -z $syn_recv ] && syn_recv=0
fin_wait1=`echo $connect |awk '/FIN_WAIT1/{print $2}'`
[ -z $fin_wait1 ] && fin_wait1=0
fin_wait2=`echo $connect |awk '/FIN_WAIT2/{print $2}'`
[ -z $fin_wait2 ] && fin_wait2=0
last_ack=`echo $connect |awk '/LAST_ACK/{print $2}'`
[ -z $last_ack ] && last_ack=0
close_wait=`echo $connect |awk '/CLOSE_WAIT/{print $2}'`
[ -z $close_wait ] && close_wait=0
if [ $warn_mflag -eq 0 -a $critical_mflag -eq 0 ];then
w1=-ge;w2=-le;c1=-ge;c2=-le;wboole=-a;cboole=-a
elif [ $warn_mflag -eq 1 -a $critical_mflag -eq 0 ];then
w1=-le;w2=-ge;c1=-ge;c2=-le;wboole=-o;cboole=-a
elif [ $warn_mflag -eq 0 -a $critical_mflag -eq 1 ];then
w1=-ge;w2=-le;c1=-le;c2=-ge;wboole=-a;cboole=-o
elif [ $warn_mflag -eq 1 -a $critical_mflag -eq 1 ];then
w1=-le;w2=-ge;c1=-le;c2=-ge;wboole=-o;cboole=-o
fi
if [ ${!status} $w1 $warn_start $wboole ${!status} $w2 $warn_end ] && [ ${!status} $c1 $critical_start $cboole ${!status} $c2 $critical_end ];then
exitcode=0
else
if ! [ ${!status} $w1 $warn_start $wboole ${!status} $w2 $warn_end ];then
exitcode=1
fi
if ! [ ${!status} $c1 $critical_start $cboole ${!status} $c2 $critical_end ];then
exitcode=2
fi
if [ ${!status} -le 0 ];then
exitcode=3
fi
fi
if [ $exitcode -eq 0 ];then
serviceoutput="$status OK - total:${!status}"
elif [ $exitcode -eq 1 ];then
serviceoutput="$status Warning - total:${!status}"
elif [ $exitcode -eq 2 ];then
serviceoutput="$status Critical - total:${!status}"
elif [ $exitcode -eq 3 ];then
serviceoutput="$status Unknown - total:${!status}"
fi
echo -e "$serviceoutput;| established $established;\ntime_wait $time_wait;\nsyn_recv $syn_recv;\nfin_wait1 $fin_wait1;\nfin_wait2 $fin_wait2;\nlast_ack $last_ack;\nclose_wait $close_wait"
exit $exitcode
}
select_arg $@
[ $? -ne 0 ] && usage && exit 3
if [[ -n $versionflag ]];then
if [ $versionflag -eq 1 ];then
print_revision && exit 0
fi
else
[[ -n $helpflag ]] && [ $helpflag -eq 1 ] && usage && exit 0
fi
[ $ccount -ne 1 ] || [ $wcount -ne 1 ] || [ $scount -ne 1 ] && usage && exit 3
[ -z $timeout ] && timeout=10
alarm &
commandpid=$!
(sleep $timeout;commandchild=$(ps -eo pid,ppid | awk "\$2==$commandpid{print \$1}");for b in $com
mandchild;do kill -9 $b &>/dev/null ;done;kill -9 $commandpid &>/dev/null) &
watchdog=$!
wait $commandpid &>/dev/null
pexitcode=$?
[ $pexitcode -gt 3 ] && pexitcode=3
watchdogchild=`ps -eo pid,ppid | awk "\\$2==$watchdog{print \\$1}"`
for a in $watchdogchild;do
kill -9 $a &>/dev/null
done
kill -9 $watchdog &>/dev/null
exit $pexitcode免責聲明:本站發布的內容(圖片、視頻和文字)以原創、轉載和分享為主,文章觀點不代表本網站立場,如果涉及侵權請聯系站長郵箱:is@yisu.com進行舉報,并提供相關證據,一經查實,將立刻刪除涉嫌侵權內容。