Files
VSC/Icinga2/Monitoring/003 - IPMI/MIB/check_ipmi.sh
T
claudio 368d6fafea Issue
Code backup
2026-05-10 16:59:01 +02:00

1047 lines
33 KiB
Bash

#!/bin/bash
# - VAR
# - Bash info
APPNAME=$(basename $0)
NAME="Check IPMI"
AUTHOR="Kalarumeth"
VERSION="v0.2"
URL="https://github.com/Kalarumeth"
# - Default settings for connection
COMMUNITY="public"
HOST_NAME="localhost"
SNMPVERSION="2c"
# - State Variables
STATE_OK=0
STATE_WARN=1
STATE_CRIT=2
STATE_UNK=3
STATE=$STATE_OK
# - Range Variables
WA=60
CR=70
# - OID
IPMI.OIDS() {
# - memTotal
OID_memTotalReal="1.3.6.1.4.1.2021.4.5"
OID_memTotalFree="1.3.6.1.4.1.2021.4.11"
# - ssCpu
OID_ssCpuSystem="1.3.6.1.4.1.2021.11.10"
# - snmperrError
OID_snmperrErrorFlag="1.3.6.1.4.1.2021.101.100"
OID_snmperrErrorMessage="1.3.6.1.4.1.2021.101.101"
# - obj
OID_objName="1.3.6.1.4.1.21317.1.3.1.13"
OID_objValue="1.3.6.1.4.1.21317.1.3.1.2"
}
IDRAC.OIDS() {
# - systemInfo
OID_productType=".1.3.6.1.4.1.674.10892.5.4.300.10.1.9.1"
OID_serviceTag=".1.3.6.1.4.1.674.10892.5.4.300.10.1.11.1"
# - globalDevice
OID_globalSystem=".1.3.6.1.4.1.674.10892.5.2.1.0"
OID_systemLcd=".1.3.6.1.4.1.674.10892.5.2.2.0"
OID_globalStorage=".1.3.6.1.4.1.674.10892.5.2.3.0"
OID_systemPower=".1.3.6.1.4.1.674.10892.5.2.4.0"
OID_systemPowerUpTime=".1.3.6.1.4.1.674.10892.5.2.5.0"
# - namesDevice
OID_namePowerUnit=".1.3.6.1.4.1.674.10892.5.4.600.12.1.8"
OID_nameChassisIntrusion=".1.3.6.1.4.1.674.10892.5.4.300.70.1.8"
OID_nameCoolingUnit=".1.3.6.1.4.1.674.10892.5.4.700.10.1.7"
OID_nameDrive=".1.3.6.1.4.1.674.10892.5.5.1.20.130.4.1.2"
# - statusDevice
OID_statusPowerUnit=".1.3.6.1.4.1.674.10892.5.4.600.12.1.5"
OID_statusChassisIntrusion=".1.3.6.1.4.1.674.10892.5.4.300.70.1.5"
OID_statusCoolingUnit=".1.3.6.1.4.1.674.10892.5.4.700.10.1.8"
OID_statusDrive=".1.3.6.1.4.1.674.10892.5.5.1.20.130.4.1.4"
OID_statusPredictiveDrive=".1.3.6.1.4.1.674.10892.5.5.1.20.130.4.1.31"
# - temperatureDevice
OID_temperatureProbeStatus=".1.3.6.1.4.1.674.10892.5.4.700.20.1.5"
OID_temperatureProbeReading=".1.3.6.1.4.1.674.10892.5.4.700.20.1.6"
OID_temperatureProbeLocation=".1.3.6.1.4.1.674.10892.5.4.700.20.1.8"
}
# - MAIN CODE
Script.HostAlive() {
for server in $HOST_NAME; do
ping -c1 -W1 -q $server &>/dev/null
if [[ $? != 0 ]] ; then
printf "%s\n" "$server is unreachable"
exit $STATE_UNK
fi
done
}
snmp() {
snmpwalk -v $SNMPVERSION -Oe -c $COMMUNITY $HOST_NAME $1
}
# - IPMI Health Check
IPMI.Main() {
case $1 in
all)
IPMI.Full
exit $STATE ;;
cpu)
IPMI.Cpu
exit $STATE ;;
err)
IPMI.Error
exit $STATE ;;
fan)
IPMI.Fan
exit $STATE ;;
hw)
IPMI.Hardware
exit $STATE ;;
ram)
IPMI.Ram
exit $STATE ;;
temp)
IPMI.Temperature
exit $STATE ;;
*)
echo "Unknown check!"
Help.IPMI
exit $STATE_UNK ;;
esac
}
IPMI.GetData() {
IPMI.OIDS
case $1 in
obj)
raw_name=$(snmp $OID_objName | cut -d '"' -f 2 | sed "s/$//g")
value=($(snmp $OID_objValue | cut -d '"' -f 2 | cut -d '.' -f 1 | sed "s/$/\n/g"))
readarray -t name <<< $raw_name ;;
cpu)
value=$(snmp $OID_ssCpuSystem | cut -d ' ' -f 4) ;;
ram)
RAM_ALL=$(snmp $OID_memTotalReal | cut -d " " -f4)
RAM_FRE=$(snmp $OID_memTotalFree | cut -d " " -f4)
RAM_ALLK=$(echo "$RAM_ALL" | awk '{ kbyte = $1 /1024/1024; print kbyte }' | xargs printf "%.2f")
RAM_FREK=$(echo "$RAM_FRE" | awk '{ kbyte = $1 /1024/1024; print kbyte }' | xargs printf "%.2f")
RAM_PERC=$(echo "$RAM_FRE" "$RAM_ALL" | awk '{ ramp = $1 /$2 *100; print ramp }' | xargs printf "%.2f" )
RAM_UPERC=$(echo "$RAM_PERC" | awk '{ ramup = 100 - $1; print ramup }')
RAM_P=$(echo "$RAM_UPERC" | cut -d "." -f1 )
RAM_USE=$(echo "$RAM_ALL" "$RAM_FRE" | awk '{ used = $1 -$2; print used }' )
RAM_USEK=$(echo "$RAM_USE" | awk '{ kbyte = $1 /1024/1024; print kbyte }' | xargs printf "%.2f") ;;
error)
flag=$(snmp $OID_snmperrErrorFlag | cut -d ' ' -f 4)
message=$(snmp $OID_snmperrErrorMessage | cut -d ' ' -f 4) ;;
esac
}
IPMI.Hardware() {
IPMI.GetData obj
if [[ $NoHeader != 1 ]] ; then
IPMI.Hardware.Header
fi
for((i=0; i<${#name[@]}; i++))
do
if [[ ${name[$i]} == *"Intru"* ]] ; then
for element in "${value[$i]}"; do
if [[ $element == 0 ]] || [[ $element == "0x00" ]]; then
printf "%s\t%s\n" "OK" "${name[$i]}"
break
else
printf "%s\t%s\n" "CRIT" "${name[$i]}"
STATE=$STATE_CRIT
break
fi
done
elif [[ ${name[$i]} == *"Status"* ]] ; then
for element in "${value[$i]}"; do
if [[ $element == 1 ]] || [[ $element == "0x01" ]]; then
printf "%s\t%s\n" "OK" "${name[$i]}"
break
else
printf "%s\t%s\n" "CRIT" "${name[$i]}"
STATE=$STATE_CRIT
break
fi
done
fi
done
}
IPMI.Hardware.Header() {
ErrorCounter=0
for((i=0; i<${#name[@]}; i++));
do
if [[ ${name[$i]} == *"Intru"* ]] ; then
for element in "${value[$i]}"; do
if [[ $element != 0 ]] && [[ $element != "0x00" ]]; then
ErrorCounter=$((ErrorCounter + 1))
fi
done
elif [[ ${name[$i]} == *"Status"* ]] ; then
for element in "${value[$i]}"; do
if [[ $element != 1 ]] && [[ $element != "0x01" ]]; then
ErrorCounter=$((ErrorCounter + 1))
fi
done
fi
done
for Error in "$ErrorCounter"; do
if [[ $Error == 0 ]] ; then
printf "%s\t%s\n\n" "OK!" "All is Working"
else
printf "%s\t%s\n\n" "CRITICAL!" "$Error Obj Not Working"
fi
done
printf "%s\t%s\n%s\n" "STATUS" "NAME" "======================"
}
IPMI.Fan() {
IPMI.GetData obj
if [[ $NoHeader != 1 ]] ; then
printf "%s\t%s\n\n" "OK!" "All Fan is Working"
printf "%s\t%s\t%s\n%s\n" "STATUS" "NAME" "VALUE" "========================="
fi
for((i=0; i<${#name[@]}; i++))
do
if [[ ${name[$i]} == *"FAN"* ]] ; then
for element in "${value[$i]}"; do
if [[ $element != 0 ]]; then
printf "%s\t%s\t%s\n" "OK" "${name[$i]}" "${value[$i]} RPM"
break
else
printf "%s\t%s\n" "N/A" "${name[$i]}"
break
fi
done
fi
done
}
IPMI.Temperature() {
IPMI.GetData obj
if [[ $NoHeader != 1 ]] ; then
IPMI.Temperature.Header
fi
for((i=0; i<${#name[@]}; i++))
do
if [[ ${name[$i]} == *"Temp"* ]] ; then
for element in "${value[$i]}"; do
if [[ $element == 0 ]]; then
break
elif [[ $element < $WA-1 ]]; then
printf "%s\t%s\t%s\n" "OK" "${name[$i]}" "${value[$i]} °C"
break
elif [[ $element < $CR-1 ]]; then
printf "%s\t%s\t%s\n" "WARN" "${name[$i]}" "${value[$i]} °C"
STATE=$STATE_WARN
break
elif [[ $element > $CR-1 ]]; then
printf "%s\t%s\t%s\n" "CRIT" "${name[$i]}" "${value[$i]} °C"
STATE=$STATE_CRIT
break
fi
done
fi
done
}
IPMI.Temperature.Header() {
WarningCounter=0
CriticalCounter=0
for((i=0; i<${#name[@]}; i++))
do
if [[ ${name[$i]} == *"Temp"* ]] ; then
for element in "${value[$i]}"; do
if [[ $element < $WA-1 ]]; then
break
elif [[ $element < $CR-1 ]]; then
WarningCounter=$(( WarningCounter + 1 ))
break
elif [[ $element > $CR-1 ]]; then
CriticalCounter=$(( CriticalCounter + 1 ))
break
fi
done
fi
done
for Error in "$(( WarningCounter + CriticalCounter ))"; do
if [[ $WarningCounter == 0 ]] && [[ $CriticalCounter == 0 ]] ; then
printf "%s\t%s\n\n" "OK!" "All Temperature is in range"
elif [[ $WarningCounter != 0 ]] && [[ $CriticalCounter == 0 ]] ; then
printf "%s\t%s\n\n" "WARNING!" "$Error Temperature is out range"
else
printf "%s\t%s\n\n" "CRITICAL!" "$Error Temperature is out range"
fi
done
printf "%s\t%s\t%s\t%s\n%s\n" "STATUS" "NAME" "" "VALUE" "=============================="
}
IPMI.Cpu() {
IPMI.GetData cpu
case 1 in
$(($value <= $WA-1)))
if [[ $NoHeader != 1 ]] ; then
printf "%s\t%s\n" "OK!" "CPU used: $value%"
else
printf "%s\t%s\n" "OK" "CPU used: $value%"
fi ;;
$(($value <= $CR-1)))
if [[ $NoHeader != 1 ]] ; then
printf "%s\t%s\n" "WARNING!" "CPU used: $value%"
else
printf "%s\t%s\n" "WARN" "CPU used: $value%"
fi
STATE=$STATE_WARN ;;
$(($value > $CR-1)))
if [[ $NoHeader != 1 ]] ; then
printf "%s\t%s\n" "CRITICAL!" "CPU used: $value%"
else
printf "%s\t%s\n" "CRIT" "CPU used: $value%"
fi
STATE=$STATE_CRIT ;;
esac
}
IPMI.Ram() {
IPMI.GetData ram
case 1 in
$(($RAM_P <= $WA-1)))
if [[ $NoHeader != 1 ]] ; then
printf "%s\t%s\n" "OK!" "RAM used: $RAM_USEK / $RAM_ALLK GB ($RAM_UPERC%)"
printf "%s\t%s\n" "" "RAM free: $RAM_FREK GB ($RAM_PERC%)"
else
printf "%s\t%s\n" "OK" "RAM used: $RAM_USEK / $RAM_ALLK GB ($RAM_UPERC%)"
fi ;;
$(($RAM_P <= $CR-1)))
if [[ $NoHeader != 1 ]] ; then
printf "%s\t%s\n" "WARNING!" "RAM used: $RAM_USEK / $RAM_ALLK GB ($RAM_UPERC%)"
printf "%s\t%s\n" "" "RAM free: $RAM_FREK GB ($RAM_PERC%)"
else
printf "%s\t%s\n" "WARN" "RAM used: $RAM_USEK / $RAM_ALLK GB ($RAM_UPERC%)"
fi
STATE=$STATE_WARN ;;
$(($RAM_P > $CR-1)))
if [[ $NoHeader != 1 ]] ; then
printf "%s\t%s\n" "CRITICAL!" "RAM used: $RAM_USEK / $RAM_ALLK GB ($RAM_UPERC%)"
printf "%s\t%s\n" "" "RAM free: $RAM_FREK GB ($RAM_PERC%)"
else
printf "%s\t%s\n" "CRIT" "RAM used: $RAM_USEK / $RAM_ALLK GB ($RAM_UPERC%)"
fi
STATE=$STATE_CRIT ;;
esac
}
IPMI.Error() {
IPMI.GetData error
if [[ $flag == *"0"* ]]; then
if [[ $NoHeader != 1 ]] ; then
printf "%s\t%s\n" "OK!" "No Error Found"
else
printf "%s\t%s\n" "OK" "No Error Found"
fi
else
if [[ $NoHeader != 1 ]] ; then
printf "%s\t%s\n" "CRITICAL!" "Error Found: $flag" "Message" "$message"
else
printf "%s\t%s\n" "CRIT" "Error Found: $flag" "Message" "$message"
fi
STATE=$STATE_CRIT
fi
}
IPMI.Full() {
IPMI.Full.Header
IPMI.Full.Error
printf "\n\n\n%s\n\n" "All Check Run"
NoHeader=1
IPMI.Error
IPMI.Cpu
IPMI.Ram
IPMI.Hardware
IPMI.Temperature
IPMI.Fan
}
IPMI.Full.Header() {
WarningCounter=0
CriticalCounter=0
IPMI.GetData obj
for((i=0; i<${#name[@]}; i++))
do
if [[ ${name[$i]} == *"Intru"* ]] ; then
for element in "${value[$i]}"; do
if [[ $element != 0 ]] && [[ $element != "0x00" ]]; then
CriticalCounter=$(( CriticalCounter + 1 ))
fi
done
elif [[ ${name[$i]} == *"Status"* ]] ; then
for element in "${value[$i]}"; do
if [[ $element != 1 ]] && [[ $element != "0x01" ]]; then
CriticalCounter=$(( CriticalCounter + 1 ))
fi
done
fi
done
for((i=0; i<${#name[@]}; i++))
do
if [[ ${name[$i]} == *"Temp"* ]] ; then
for element in "${value[$i]}"; do
if [[ $element < $WA-1 ]]; then
break
elif [[ $element < $CR-1 ]]; then
WarningCounter=$(( WarningCounter + 1 ))
break
elif [[ $element > $CR-1 ]]; then
CriticalCounter=$(( CriticalCounter + 1 ))
break
fi
done
fi
done
IPMI.GetData error
if [[ $flag != *"0"* ]]; then
CriticalCounter=$(( CriticalCounter + 1 ))
shift
fi
IPMI.GetData cpu
case 1 in
$(($value <= $WA-1)))
shift ;;
$(($value <= $CR-1)))
WarningCounter=$(( WarningCounter + 1 )) ;;
$(($value > $CR-1)))
CriticalCounter=$(( CriticalCounter + 1 )) ;;
esac
IPMI.GetData ram
case 1 in
$(($RAM_P <= $WA-1)))
shift ;;
$(($RAM_P <= $CR-1)))
WarningCounter=$(( WarningCounter + 1 )) ;;
$(($RAM_P > $CR-1)))
CriticalCounter=$(( CriticalCounter + 1 )) ;;
esac
for Error in "$(( WarningCounter + CriticalCounter ))"; do
if [[ $WarningCounter == 0 ]] && [[ $CriticalCounter == 0 ]] ; then
printf "%s\t%s\n\n" "OK!" "All Check's in range"
elif [[ $WarningCounter != 0 ]] && [[ $CriticalCounter == 0 ]] ; then
printf "%s\t%s\n\n" "WARNING!" "Error: $Error"
else
printf "%s\t%s\n" "CRITICAL!" "Critical Error: $CriticalCounter"
if [[ $WarningCounter != 0 ]] ; then
printf "%s\t%s\t%s\n" "" "" "Warning Error: $WarningCounter"
fi
printf "\n"
fi
done
}
IPMI.Full.Error() {
IPMI.GetData error
if [[ $flag != *"0"* ]]; then
printf "%s\t%s\n" "CRIT" "Error Found: $flag" "Message" "$message"
shift
fi
IPMI.GetData cpu
case 1 in
$(($value <= $WA-1)))
shift ;;
$(($value <= $CR-1)))
printf "%s\t%s\n" "WARN" "CPU used: $value%" ;;
$(($value > $CR-1)))
printf "%s\t%s\n" "CRIT" "CPU used: $value%" ;;
esac
IPMI.GetData ram
case 1 in
$(($RAM_P <= $WA-1)))
shift ;;
$(($RAM_P <= $CR-1)))
printf "%s\t%s\n" "WARN" "RAM used: $RAM_USEK / $RAM_ALLK GB ($RAM_UPERC%)" ;;
$(($RAM_P > $CR-1)))
printf "%s\t%s\n" "CRIT" "RAM used: $RAM_USEK / $RAM_ALLK GB ($RAM_UPERC%)" ;;
esac
IPMI.GetData obj
for((i=0; i<${#name[@]}; i++))
do
if [[ ${name[$i]} == *"Intru"* ]] ; then
for element in "${value[$i]}"; do
if [[ $element != 0 ]] && [[ $element != "0x00" ]]; then
printf "%s\t%s\n" "CRIT" "${name[$i]}"
fi
done
elif [[ ${name[$i]} == *"Status"* ]] ; then
for element in "${value[$i]}"; do
if [[ $element != 1 ]] && [[ $element != "0x01" ]]; then
printf "%s\t%s\n" "CRIT" "${name[$i]}"
fi
done
fi
done
for((i=0; i<${#name[@]}; i++))
do
if [[ ${name[$i]} == *"Temp"* ]] ; then
for element in "${value[$i]}"; do
if [[ $element < $WA-1 ]]; then
break
elif [[ $element < $CR-1 ]]; then
printf "%s\t%s\t%s\n" "WARN" "${name[$i]}" "${value[$i]} °C"
break
elif [[ $element > $CR-1 ]]; then
printf "%s\t%s\t%s\n" "CRIT" "${name[$i]}" "${value[$i]} °C"
break
fi
done
fi
done
}
# - Dell Health Check
IDRAC.Main() {
case $1 in
all)
IDRAC.Full
exit $STATE ;;
drive)
IDRAC.Drive
exit $STATE ;;
glob)
IDRAC.GlobalDevice
exit $STATE ;;
hw)
IDRAC.Hardware
exit $STATE ;;
*)
echo "Unknown check!"
Help.IDRAC
exit $STATE_UNK ;;
esac
}
IDRAC.GetData() {
IDRAC.OIDS
case $1 in
drive)
arrayStatusDrive=($(snmp $OID_statusDrive | cut -d ' ' -f 4 | sed "s/$//g"))
arrayPredictDrive=($(snmp $OID_statusPredictiveDrive | cut -d ' ' -f 4 | sed "s/$//g"))
rawNameDrive=$(snmp $OID_nameDrive | cut -d '"' -f 2 | sed "s/$//g")
readarray -t arrayNameDrive <<< $rawNameDrive ;;
global)
globalSystem=$(snmp $OID_globalSystem | cut -d ' ' -f 4 | sed "s/$//g")
systemLcd=$(snmp $OID_systemLcd | cut -d ' ' -f 4 | sed "s/$//g")
globalStorage=$(snmp $OID_globalStorage | cut -d ' ' -f 4 | sed "s/$//g")
systemPower=$(snmp $OID_systemPower | cut -d ' ' -f 4 | sed "s/$//g")
systemPowerUpTime=$(snmp $OID_systemPowerUpTime | cut -d ' ' -f 4 | sed "s/$//g")
productType=$(snmp $OID_productType | cut -d '"' -f 2 | sed "s/$//g")
serviceTag=$(snmp $OID_serviceTag | cut -d '"' -f 2 | sed "s/$//g") ;;
hardware)
systemPower=$(snmp $OID_systemPower | cut -d ' ' -f 4 | sed "s/$//g")
nameFan=$(snmp $OID_nameCoolingUnit | cut -d '"' -f 2 | sed "s/$//g")
statusFan=$(snmp $OID_statusCoolingUnit | cut -d ' ' -f 4 | sed "s/$//g")
nameIntrusion=$(snmp $OID_nameChassisIntrusion | cut -d '"' -f 2 | sed "s/$//g")
statusIntrusion=$(snmp $OID_statusChassisIntrusion | cut -d ' ' -f 4 | sed "s/$//g")
rawNamePS=$(snmp $OID_namePowerUnit | cut -d '"' -f 2 | sed "s/$//g")
readarray -t arrayNamePS <<< $rawNamePS
arrayStatusPS=($(snmp $OID_statusPowerUnit | cut -d ' ' -f 4 | sed "s/$//g"))
rawNameTemp=$(snmp $OID_temperatureProbeLocation | cut -d '"' -f 2 | sed "s/$//g")
readarray -t arrayNameTemp <<< $rawNameTemp
arrayStatusTemp=($(snmp $OID_temperatureProbeStatus | cut -d ' ' -f 4 | sed "s/$//g"))
arrayValueTemp=($(snmp $OID_temperatureProbeReading | cut -d ' ' -f 4 | sed "s/$//g" | awk '{ value = $1 /10; print value }')) ;;
esac
}
IDRAC.NormalState() {
case $1 in
1) printf "%s" "other" ;;
2) printf "%s" "unknow" ;;
3) printf "%s" "ok" ;;
4) printf "%s" "warning" ;;
5) printf "%s" "critical" ;;
6) printf "%s" "nonRecoverable" ;;
esac
}
IDRAC.ErrorCounter.NormalState() {
if [[ $1 == *"array"* ]] ; then
for((i=0; i<$1; i++))
do
case $1 in
1) warningCounter=$(( warningCounter + 1 )) ;;
2) unknownCounter=$(( unknownCounter + 1 )) ;;
3) okCounter=$(( okCounter + 1 )) ;;
4) warningCounter=$(( warningCounter + 1 )) ;;
5) criticalCounter=$(( criticalCounter + 1 )) ;;
6) criticalCounter=$(( criticalCounter + 1 )) ;;
esac
done
else
case $1 in
1) warningCounter=$(( warningCounter + 1 )) ;;
2) unknownCounter=$(( unknownCounter + 1 )) ;;
3) okCounter=$(( okCounter + 1 )) ;;
4) warningCounter=$(( warningCounter + 1 )) ;;
5) criticalCounter=$(( criticalCounter + 1 )) ;;
6) criticalCounter=$(( criticalCounter + 1 )) ;;
esac
fi
}
IDRAC.ProbeState() {
case $1 in
1) printf "%s" "other" ;;
2) printf "%s" "unknow" ;;
3) printf "%s" "ok" ;;
4) printf "%s" "nonCriticalUpper" ;;
5) printf "%s" "criticalUpper" ;;
6) printf "%s" "nonRecoverableUpper" ;;
7) printf "%s" "nonCriticalLower" ;;
8) printf "%s" "criticalLower" ;;
9) printf "%s" "nonRecoverableLower" ;;
10) printf "%s" "failed" ;;
esac
}
IDRAC.ErrorCounter.ProbeState() {
case $1 in
1) warningCounter=$(( warningCounter + 1 )) ;;
2) unknownCounter=$(( unknownCounter + 1 )) ;;
3) okCounter=$(( okCounter + 1 )) ;;
4) warningCounter=$(( warningCounter + 1 )) ;;
5) criticalCounter=$(( criticalCounter + 1 )) ;;
6) warningCounter=$(( warningCounter + 1 )) ;;
7) warningCounter=$(( warningCounter + 1 )) ;;
8) criticalCounter=$(( criticalCounter + 1 )) ;;
9) warningCounter=$(( warningCounter + 1 )) ;;
10) criticalCounter=$(( criticalCounter + 1 )) ;;
esac
}
IDRAC.Drive() {
IDRAC.GetData drive
if [[ $NoHeader != 1 ]] ; then
IDRAC.Drive.Header
fi
for((i=0; i<${#arrayNameDrive[@]}; i++))
do
printf "%s\t" "${arrayNameDrive[$i]}"
IDRAC.Drive.arrayStatus
IDRAC.Drive.arrayPredict
done
}
IDRAC.Drive.arrayStatus() {
case ${arrayStatusDrive[$i]} in
1) printf "%s\t" "unknown" ;;
2) printf "%s\t" "ready" ;;
3) printf "%s\t" "online" ;;
4) printf "%s\t" "foreign" ;;
5) printf "%s\t" "offline" ;;
6) printf "%s\t" "blocked" ;;
7) printf "%s\t" "failed" ;;
8) printf "%s\t" "non-raid" ;;
9) printf "%s\t" "removed" ;;
esac
}
IDRAC.Drive.arrayPredict() {
if [[ ${arrayPredictDrive[$i]} != 0 ]]; then
printf "%s\n" "predictive failure - replace drive"
else
printf "\n"
fi
}
IDRAC.Drive.Header() {
unknownCounter=0
okCounter=0
warningCounter=0
criticalCounter=0
readyCounter=0
onlineCounter=0
foreignCounter=0
offlineCounter=0
blockedCounter=0
failedCounter=0
nonraidCounter=0
removedCounter=0
predictiveCounter=0
for((i=0; i<${#arrayStatusDrive[@]}; i++))
do
case ${arrayStatusDrive[$i]} in
1) unknownCounter=$(( unknownCounter + 1 )) ;;
2) readyCounter=$(( readyCounter + 1 ))
okCounter=$(( okCounter + 1 )) ;;
3) onlineCounter=$(( onlineCounter + 1 ))
okCounter=$(( okCounter + 1 )) ;;
4) foreignCounter=$(( foreignCounter + 1 ))
warningCounter=$(( warningCounter + 1 )) ;;
5) offlineCounter=$(( offlineCounter + 1 ))
warningCounter=$(( warningCounter + 1 )) ;;
6) blockedCounter=$(( blockedCounter + 1 ))
warningCounter=$(( warningCounter + 1 )) ;;
7) failedCounter=$(( failedCounter + 1 ))
criticalCounter=$(( criticalCounter + 1 )) ;;
8) nonraidCounter=$(( nonraidCounter + 1 ))
okCounter=$(( okCounter + 1 )) ;;
9) removedCounter=$(( removedCounter + 1 ))
criticalCounter=$(( criticalCounter + 1 )) ;;
esac
if [[ ${arrayPredictDrive[$i]} = 1 ]]; then
predictiveCounter=$(( predictiveCounter + 1 ))
warningCounter=$(( warningCounter + 1 ))
fi
done
for Error in "$(( warningCounter + criticalCounter ))"; do
if [[ $warningCounter == 0 ]] && [[ $criticalCounter == 0 ]] ; then
printf "%s\t%s\n\n" "OK!" "All Drives are working correctly"
STATE=$STATE_OK
elif [[ $warningCounter != 0 ]] && [[ $criticalCounter == 0 ]] ; then
printf "%s\t%s\n\n" "WARNING!" "$Error Drive issue."
STATE=$STATE_WARN
else
printf "%s\t%s\n\n" "CRITICAL!" "$Error Drive issue."
STATE=$STATE_CRIT
fi
done
if [[ $predictiveCounter != 0 ]]; then
printf "%s\t%s\n" "" "$predictiveCounter in state Predictive Failure"
elif [[ $failedCounter != 0 ]]; then
printf "%s\t%s\n" "" "$failedCounter in state Failed"
elif [[ $removedCounter != 0 ]]; then
printf "%s\t%s\n" "" "$removedCounter in state Removed"
elif [[ $unknownCounter != 0 ]]; then
printf "%s\t%s\n" "" "$unknownCounter in state Unknown"
elif [[ $blockedCounter != 0 ]]; then
printf "%s\t%s\n" "" "$blockedCounter in state Blocked"
elif [[ $offlineCounter != 0 ]]; then
printf "%s\t%s\n" "" "$offlineCounter in state Offline"
elif [[ $foreignCounter != 0 ]]; then
printf "%s\t%s\n" "" "$foreignCounter in state Foreign"
fi
printf "\n"
}
IDRAC.GlobalDevice() {
IDRAC.GetData global
if [[ $NoHeader != 1 ]] ; then
IDRAC.GlobalDevice.Header
fi
printf "%s\t" "System Power Status:"
if [[ $systemPower == 4 ]]; then
IDRAC.GlobalDevice.PowerState
IDRAC.GlobalDevice.PowerUpTime
else
IDRAC.GlobalDevice.PowerState
fi
printf "%s\t" "Device Status:"
IDRAC.NormalState $globalSystem && printf "\n"
printf "%s\t" "Storage Status:"
IDRAC.NormalState $globalStorage && printf "\n"
printf "%s\t" "LCD Status:"
IDRAC.NormalState $systemLcd && printf "\n"
if [[ $warningCounter != 0 ]] || [[ $criticalCounter != 0 ]]; then
printf "%s\t%s\n" "Type: $productType" "ServiceTag: $serviceTag"
fi
}
IDRAC.GlobalDevice.PowerState() {
case $systemPower in
1) printf "%s\n" "other" ;;
2) printf "%s\n" "unknow" ;;
3) printf "%s\n" "off" ;;
4) printf "%s\n" "on" ;;
esac
}
IDRAC.GlobalDevice.PowerUpTime() {
printf "%s\t" "System PowerUp Time:"
local totalSeconds=$systemPowerUpTime;
local seconds=$((totalSeconds%60));
local minutes=$((totalSeconds/60%60));
local hours=$((totalSeconds/60/60%24));
local days=$((totalSeconds/60/60/24));
(( $days > 0 )) && printf '%dd ' $days;
(( $hours > 0 )) && printf '%dh ' $hours;
(( $minutes > 0 )) && printf '%dm ' $minutes;
(( $days > 0 || $hours > 0 || $minutes > 0 )) && printf '%ds\n' $seconds;
}
IDRAC.GlobalDevice.Header() {
unknownCounter=0
okCounter=0
warningCounter=0
criticalCounter=0
case $systemPower in
1) warningCounter=$(( warningCounter + 1 )) ;;
2) unknownCounter=$(( unknownCounter + 1 )) ;;
3) criticalCounter=$(( criticalCounter + 1 )) ;;
4) okCounter=$(( okCounter + 1 )) ;;
esac
arrayStatus=( $globalSystem $globalStorage $systemLcd )
for((i=0; i<${#arrayStatus[@]}; i++))
do
case ${arrayStatus[i]} in
1) warningCounter=$(( warningCounter + 1 )) ;;
2) unknownCounter=$(( unknownCounter + 1 )) ;;
3) okCounter=$(( okCounter + 1 )) ;;
4) warningCounter=$(( warningCounter + 1 )) ;;
5) criticalCounter=$(( criticalCounter + 1 )) ;;
6) criticalCounter=$(( criticalCounter + 1 )) ;;
esac
done
if [[ $warningCounter == 0 ]] && [[ $criticalCounter == 0 ]] ; then
printf "%s\t%s\t%s\n\n" "OK!" "Type: $productType" "ServiceTag: $serviceTag"
elif [[ $warningCounter != 0 ]] && [[ $criticalCounter == 0 ]] ; then
STATE=$STATE_WARN
printf "%s\t%s\n\n" "WARNING!" "$warningCounter Warning States"
elif [[ $criticalCounter != 0 ]] && [[ $unknownCounter != 0 ]] ; then
STATE=$STATE_CRIT
if [[ $systemPower = 3 ]] ; then
printf "%s\t%s\n" "CRITICAL!" "System is turned off"
printf "\t\t%s\n" "$criticalCounter Critical States"
if [[ $warningCounter != 0 ]] ; then
printf "\t\t%s\n" "$warningCounter Warning States"
fi
if [[ $unknownCounter != 0 ]] ; then
printf "\t\t%s\n" "$unknownCounter Unknown States"
fi
else
printf "%s\t%s\n" "CRITICAL!" "$criticalCounter Critical States"
if [[ $warningCounter != 0 ]] ; then
printf "\t\t%s\n" "$warningCounter Warning States"
fi
if [[ $unknownCounter != 0 ]] ; then
printf "\t\t%s\n" "$unknownCounter Unknown States"
fi
fi
fi
printf "\n"
}
IDRAC.Hardware() {
IDRAC.GetData hardware
if [[ $NoHeader != 1 ]] ; then
IDRAC.Hardware.Header
fi
printf "%s\t" "$nameFan"
IDRAC.NormalState $statusFan && printf "\n"
printf "%s\t\t" "$nameIntrusion"
IDRAC.NormalState $statusIntrusion && printf "\n"
for((i=0; i<${#arrayNameTemp[@]}; i++))
do
if [[ ${arrayNameTemp[$i]} == *"Inlet"* ]] ; then
printf "%s\t\t" "${arrayNameTemp[$i]}"
elif [[ ${arrayNameTemp[$i]} == *"Exhaust"* ]] ; then
printf "%s\t" "${arrayNameTemp[$i]}"
else
printf "%s\t\t\t" "${arrayNameTemp[$i]}"
fi
IDRAC.ProbeState ${arrayStatusTemp[i]} && printf "\n"
done
for((i=0; i<${#arrayNamePS[@]}; i++))
do
printf "%s\t\t\t" "${arrayNamePS[$i]}"
IDRAC.NormalState ${arrayStatusPS[i]} && printf "\n"
done
}
IDRAC.Hardware.Header() {
unknownCounter=0
okCounter=0
warningCounter=0
criticalCounter=0
for((i=0; i<${#arrayStatusPS[@]}; i++))
do
IDRAC.ErrorCounter.NormalState ${arrayStatusPS[i]}
done
for((i=0; i<${#arrayStatusTemp[@]}; i++))
do
IDRAC.ErrorCounter.NormalState ${arrayStatusTemp[i]}
done
IDRAC.ErrorCounter.NormalState $statusFan
IDRAC.ErrorCounter.NormalState $statusIntrusion
if [[ $warningCounter == 0 ]] && [[ $criticalCounter == 0 ]] ; then
printf "%s\t%s\n\n" "OK!" "All Hardware are working correctly"
elif [[ $warningCounter != 0 ]] && [[ $criticalCounter == 0 ]] ; then
printf "%s\t%s\n" "WARNING!" "$warningCounter Warning States"
if [[ $unknownCounter != 0 ]] ; then
printf "\t\t%s\n" "$unknownCounter Unknown States"
fi
STATE=$STATE_WARN
elif [[ $criticalCounter != 0 ]] && [[ $unknownCounter != 0 ]] ; then
printf "%s\t%s\n" "CRITICAL!" "$criticalCounter Critical States"
if [[ $warningCounter != 0 ]] ; then
printf "\t\t%s\n" "$warningCounter Warning States"
fi
if [[ $unknownCounter != 0 ]] ; then
printf "\t\t%s\n" "$unknownCounter Unknown States"
fi
STATE=$STATE_CRIT
fi
printf "\n"
}
# - Help
Help.Main() {
echo "Check IPMI Status"
echo ''
Help.Usage
echo ''
Help.Option
echo ''
Help.IPMI
echo ''
Help.IDRAC
echo ''
Help.Info
echo ''
exit $STATE_UNK
}
Help.Usage() {
printf "%s\n" "Method to compose the execution string"
printf "%s\n" "./$APPNAME -c <SNMP community> -h <host> -wa <value> -cr <value> -ipmi <object>"
}
Help.Option() {
printf "%s\n" "OPTIONS:"
printf "%s\t%s\t%s\n\t\t\t%s\n" "-c" "--community" "SNMP v2 community string with Read access." " Default is: $COMMUNITY."
printf "%s\t%s\t\t%s\n\t\t\t%s\n" "-h" "--host" "Host name or IP address to check." " Default is: $HOST_NAME."
printf "%s\t%s\t%s\n\t\t\t%s\n" "-wa" "--allert-wa" "Defines the threshold for Warning." " Default is: $WA."
printf "%s\t%s\t%s\n\t\t\t%s\n" "-cr" "--allert-cr" "Defines the threshold for Critical." " Default is: $CR."
printf "%s\t%s\t%s\n\t\t\t%s\n" "-ipmi" "" "Field for select element to check on IPMI Device" " { all | cpu | err | fan | he | temp }"
printf "%s\t%s\t%s\n\t\t\t%s\n" "-idrac" "" "Field for select element to check on IDRAC Device" " { all | drive | glob | hw }"
printf "%s\t%s\t\t%s\n" "-H" "--help" "Show script help"
printf "%s\t%s\t%s\n" "-V" "--version" "Show script version"
}
Help.IPMI() {
printf "\n%s\n\n" "IPMI Check Function"
printf "%s\t%s\n\n" "Check" "Description"
printf "%s\t%s\n" "all" "Full monitoring check with single output"
printf "%s\t%s\n" "cpu" "Monitoring CPU load"
printf "%s\t%s\n" "err" "Monitoring system error"
printf "%s\t%s\n" "fan" "Monitoring fan speed"
printf "%s\t%s\n" "ram" "Monitoring RAM load"
printf "%s\t%s\n" "hw" "Monitoring Hardware status"
printf "%s\t%s\n" "temp" "Monitoring temperature sensor"
}
Help.IDRAC() {
printf "\n%s\n\n" "IDRAC Check Function"
printf "%s\t%s\n\n" "Check" "Description"
printf "%s\t%s\n" "all" "[WiP] Full monitoring check with single output"
printf "%s\t%s\n" "drive" "Monitoring Drive unit"
printf "%s\t%s\n" "glob" "Monitoring Global system"
printf "%s\t%s\n" "hw" "Monitoring Hardware"
}
Help.Info() {
printf "%s\t%s\t%s\n" "INFO:" "$NAME" "$VERSION" "" "$AUTHOR" "$URL"
}
# - COMMAND LINE ENCODER
while test -n "$1"; do
case "$1" in
--host|-h)
HOST_NAME=$2
shift ;;
--comunity | -c)
COMMUNITY=$2
shift ;;
--allert-wa|-wa)
WA=$2
shift ;;
--allert-cr|-cr)
CR=$2
shift ;;
-ipmi)
IPMI.Main $2
shift ;;
-idrac)
IDRAC.Main $2
shift ;;
--help | -H)
Help.Main ;;
--version | -V)
Help.Info
exit $STATE ;;
*)
echo "Unknown argument: $1"
print_help
exit $STATE_UNK ;;
esac
shift
done