Nagios-Plugin für Oracle Grid-Infrastructure und Oracle-Restart

Veröffentlicht am Januar 19, 2013

Hallo,
neben dem Thema Virtualisierung beschäftige ich mich noch viel mit Oracle Grid-Infrastructure und Nagios.
Ich habe mir heute mal die Zeit genommen und ein entsprechendes Plugin geschrieben.

Getestet habe ich das Skript mit Oracle Restart 11.2.0.3 und Grid-Infrastructure 11.2.0.3. Es wird mindestens die Version 11.2.0.1 benötigt, da mit der Grid-Infrastructure die Syntax von crsctl grundlegend geändert wurde.

Folgende Punkte werden im Plugin berücksichtigt:

  • Support für Oracle Restart und Grid-Infrastructure
    wird automatisch erkannt
  • Anzahl Voting-Disks bei Grid-Infrastructure überwachen
    verlorene Voting-Disk führt zur CRITICAL-Meldung
    Überwachung der Voting-Disks nur bei Grid-Infrastructure
  • Resourcen im Status OFFLINE oder INTERMEDIATE die nicht im AUTO_START=never sind führen zum CRITICAL
  • ora.gsd und ora.ons werden bei der Statusauswertung ignoriert
  • sudo für Clusterbefehle erforderlich

Im Kopfbereich des Plugins befindet sich ein Beispiel für die sudo-Konfiguration

Parallel habe ich das Plugin auf http://exchange.nagios.org eingereicht, warte dort aber noch auf die Veröffentlichung.

Gruß
Thorsten

Update: 23.01.2012
Das Plugin steht nun auch auf Nagiosexchange zur Verfügung:
http://exchange.nagios.org/directory/Plugins/Clustering-and-High-2DAvailability/Check-Oracle-Grid-2DInfrastructure-or-Oracle-Restart/details

[bash]
#!/bin/bash
#
# $Id: check_gridinfra 634 2013-01-19 19:51:46Z tbr $
#
# Copyright 2013 (c) Thorsten Bruhns (tbruhns@gmx.de)
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 2 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program; if not, write to the Free Software
# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA

# This plugins searchs for Resources in crs in State OFFLINE or INTERMEDIATE
# Resources with STARTMODE=never are ignored. Same applies for ora.ons and ora.gds.
# The number of voting-disks is checked. 1, 3 or 5 votedisks are expected!
# The plugin is aware of Grid Infrastructure and Oracle Restart. The check for
# votedisks are skipped in an Oracle Restart Environment!
#
# This plugin need sudo to work with nrpe
# the nrpe-owner will use the following commands with sudo:

# sudo-User command
# CRS_OWNER $CRS_HOME/bin/crsctl
# CRS_OWNER $CRS_HOME/bin/orabase
# CRS_OWNER $CRS_HOME/bin/ocrcheck
# CRS_OWNER $CRS_HOME/bin/ocrconfig

# we do only a small check of running processes due to crsctl will
# return a hughe amount of errors when crsd isn't running
#
# example sudo-setup for nrpe:
#Defaults:nrpe !requiretty
#Cmnd_Alias CRSCTL = /u01/app/11.2.0.3/grid/bin/crsctl
#Cmnd_Alias ORABASE = /u01/app/11.2.0.3/grid/bin/orabase
#Cmnd_Alias OCRCHECK = /u01/app/11.2.0.3/grid/bin/ocrcheck
#Cmnd_Alias OCRCONFIG = /u01/app/11.2.0.3/grid/bin/ocrconfig
#User_Alias NRPE_ADMIN = nagios, nrpe
#NRPE_ADMIN ALL = (oracle) NOPASSWD: CRSCTL, ORABASE, OCRCONFIG, OCRCHECK

PATH=/bin:/sbin:/usr/bin:/usr/sbin:/usr/local/bin:/usr/local/sbin

PROGNAME=`basename $0`
PROGPATH=`echo $0 | sed -e 's,[/][^/][^/]*$,,'`
REVISION=$Version$

GI_Filter="(STATE = OFFLINE) AND (TYPE != ora.ons.type) AND (AUTO_START != never)"

check_sudo() {
sudo -l > /dev/null 2>&1
retcode=${?}
if [ ${retcode} -ne 0 ]
then
echo "CRITICAL - sudo not working for: sudo -l "
exit 2
fi
}

get_gi_type() {
# Do we have Oracle Restart or full Grid Infrastructure?
# Oracle Restart => OCR-Location is $ORACLE_HOME/cdata/localhost/local.ocr
${OCRCHECKCMD} -config | grep $ORACLE_HOME/cdata/localhost/local.ocr > /dev/null
retcode=${PIPESTATUS[1]}

if [ ${retcode} -eq 0 ]
then
# We have Oracle Restart
export ORACRS_TYPE=Restart
else
# We have a full Oracle Grid Infrastructure Environment!
export ORACRS_TYPE=GridInfra
fi
}

check_votedisks() {
# Check for votedisks only needed on real Grid-Infrastructure!
if [ ${ORACRS_TYPE} = 'GridInfra' ]
then
# we are on a real Grid-Infrastructure
# We need 1,3 or 5 voting discs
# Cluster won't work if we have less then number of votedisks / 2
# A cluster with 3 needed disks won't work with 1. the same aplies for 5.
# => Easy check possible, because 2 or 4 ONLINE-Votedisks is an error!
votecount=`${CRSCTLCMD} query css votedisk| cut -b5- | grep ^ONLINE | wc -l`
if [ ${votecount:-0} -eq 1 -o ${votecount:-0} -eq 3 -o ${votecount:-0} -eq 5 ]
then
# Votecount is ok
# => we can step to Resourcecheck of the cluster!
votedisksok=yes
else
echo "CRITICAL - Number of Votingdisks not ok. check votedisks with 'crsctl query css votedisk' Count "${votecount:-1}
exit 2
fi

fi
}

get_CRSOWNER() {
# who is clusterwareowner?
# User of running ASM-Instance is clusterware-Owner!
ORACRSOWNER=`ps -elf | grep asm_pmon_+ASM | grep -v " grep asm_pmon_+ASM" | cut -d" " -f3`
retcode=${PIPESTATUS[1]}
if [ ${ORACRSOWNER:-leer} = 'leer' ]
then
# We can't get the owner of ASM. mostly this is due to a down ASM-Instance
# Cluster won't work without ASM!
echo "CRITICAL - ASM is down or unknown CRSOWNER is unknown,"
exit 2
else
export ORACRSOWNER
fi

}

set_env()
{
check_sudo

ORATAB=/etc/oratab

# set environment from /etc/oratab
# 1st line with +ASM will be used for CRS_HOME
ORACLE_SID=`grep "^+ASM" ${ORATAB} |cut -d":" -f1`
if [ ${?} -ne 0 ]
then
echo "CRITICAL - ASM-Environment can't be found in oratab!"
exit 2
else
export ORACLE_SID

# getting ORACLE_HOME from oratab
ORACLE_HOME=`cat ${ORATAB} | grep "^"${ORACLE_SID} | cut -d":" -f2`
export ORACLE_HOME

get_CRSOWNER

# if we have a grid infrastructure or oracle restart we get the ORACLE_BASE with
# a executable from Oracle!
ORACLE_BASE=`sudo -n -u ${ORACRSOWNER} ${ORACLE_HOME}/bin/orabase`
export ORACLE_BASE

PATH=${PATH}:${ORACLE_HOME}/bin
export PATH
fi

CRSCTLCMD=$ORACLE_HOME/bin/crsctl
CRSCTLCMD="sudo -n -u "${ORACRSOWNER}" "${CRSCTLCMD}
OCRCHECKCMD="sudo -n -u "${ORACRSOWNER}" "$ORACLE_HOME/bin/ocrcheck

}

print_usage() {
echo "Usage: $PROGNAME"
}

print_help() {
echo ""
print_usage
echo ""
echo "This plugin checks a running Oracle Grid-Infrastructure for OFFLINE und INTERMEDIATE-Resources."
echo ""
exit 0
}

check_grid_infra() {
execcmd="$CRSCTLCMD stat res -t -w "
count=`$execcmd "((STATE = OFFLINE) OR (STATE = INTERMEDIATE)) AND (TYPE != ora.ons.type) AND (TYPE != ora.gsd.type) AND (AUTO_START != never)" | wc -l`
# retcode and cound=0 => all is ok!
retcode=${PIPESTATUS[0]}
if [ ${retcode:-1} -ne 0 ]
then
echo "Error while executing crsctl. Returencode="${retcode}
else
if [ ${count:-1} -eq 0 ]
then
# all resources are fine!
echo "OK - All Resources are ONLINE!"
exit 0
else
execcmd="$CRSCTLCMD stat res -w "
result=`$execcmd "((STATE = OFFLINE) OR (STATE = INTERMEDIATE)) AND (TYPE != ora.ons.type) AND (TYPE != ora.gsd.type) AND (AUTO_START != never)" | egrep "^NAME=|^LAST_SERVER=|^STATE="| cut -d"=" -f2- `
echo "CRITICAL - Problems with some Resources! Check with 'crsctl stat res -t'. Details: "$result
exit 2
fi
fi

}

set_env $*

case "$1" in
--help)
print_help
exit 0
;;
-h)
print_help
exit 0
;;
*)
get_gi_type
check_votedisks
check_grid_infra
;;
esac

[/bash]

Schreibe einen Kommentar

Deine E-Mail-Adresse wird nicht veröffentlicht. Erforderliche Felder sind mit * markiert.

© 2011-2019 SJT CONSULTING – Alle Rechte vorbehalten. | Datenschutz | Impressum