#!/bin/bash
###########
#   apcPdu.sh
#   retrieves statistics from APC gen 1 and 2 PDUs
#
#   Author:
#   Ian Perry
#
#   Last Modified:
#   2020-12-28 -- Creation
###########

# Initialize variables
psu1Status=0         #  Power supply status, 1=OK 2=Failed
psu1StatusOID="1.3.6.1.4.1.318.1.1.12.4.1.1.0"
psu2Status=0
psu2StatusOID="1.3.6.1.4.1.318.1.1.12.4.1.2.0"
maxLoad=0           # Max load in Amps
maxLoadOID="1.3.6.1.4.1.318.1.1.12.2.1.1.0"
overload=0          # Overload indicator. 1=OK, 2=Under, 3=Near-Over, 4=Over
overloadOID="1.3.6.1.4.1.318.1.1.12.2.3.1.1.3.1"
voltage=-1           # Voltage in volts
voltageOID="1.3.6.1.4.1.318.1.1.26.6.3.1.6"
voltageRating=0
voltageRatingOID="1.3.6.1.4.1.318.1.1.12.1.15.0"

# Amperage settings
pduLoad=0           # Total load in 1/10 Amp
pduLoadOID="1.3.6.1.4.1.318.1.1.12.2.3.1.1.2.1"
lowLoadMin=0        # Low load setting in Amps
lowLoadMinOID="1.3.6.1.4.1.318.1.1.12.2.2.1.1.2" 
nearOverMin=0       # Near overload bottom limit (i.e. anything above is in near-overload)
nearOverMinOID="1.3.6.1.4.1.318.1.1.12.2.2.1.1.3" 
overLoadMin=0       # Overload threshold, anything above this is overloaded.
overLoadMinOID="1.3.6.1.4.1.318.1.1.12.2.2.1.1.4" 

# Wattage settings
power=0             # Power in 1/100 kW
powerOID="1.3.6.1.4.1.318.1.1.26.4.3.1.5"
lowPowerMin=0       # Low power setting in 1/10 kW. Note that power is return in hundredths while thresholds are in tenths.
lowPowerMinOID="1.3.6.1.4.1.318.1.1.26.4.1.1.7"
nearPowerMin=0      # Near overload power threshold
nearPowerMinOID="1.3.6.1.4.1.318.1.1.26.4.1.1.8"
overPowerMin=0      # Overload power threshold
overPowerMinOID="1.3.6.1.4.1.318.1.1.26.4.1.1.9"

# Other variables
check="all" # Default check
checkList="all load psuStatus overload voltage power" # list of available checks
checkListLower=$(echo $checkList | awk '{print tolower($0)}') # Convert to lowercase for input validation
outString="" # Final output string
textVal="" # Message with warn/crit
dataVal="" # Data values
ampCrit=0
ampWarn=0
pwrCrit=0
pwrWarn=0
returnStat=0 # return status
voltageTarget=0

set -e
while getopts ":H:t:c:w:v:" opt; do
    case $opt in
    H ) # Specify hostname
        HSTNAME="$OPTARG" # Hostname reserved keyword
        ;;
    t ) # Specify test, default all
        check="$OPTARG"
        ;;
    c ) # Specify crit value for amperage and wattage
        ampCrit="$OPTARG"
        ;;
    w ) # Specify warn value for amperage and wattage
        ampWarn="$OPTARG"
        ;;
    r ) # Power crit
        pwrCrit="$OPTARG"
        ;;
    a ) # Power warn
        pwrWarn="$OPTARG"
        ;;
    v ) # Specify voltage target if other than 240. See voltage check for details.
        voltageTarget="$OPTARG"
        ;;
    ? )
        echo "script usage: $(basename $0) [-H hostname] [-t test] [-c ampCrit] [-w ampWarn] [-r pwrCrit ] [ -a pwrWarn ] [-v targetvoltage]" >&2
        exit 3
        ;;
    esac
done

# Convert input check to lowercase for validation
check=$(echo $check | awk '{print tolower($0)}')

# If check is not valid, quit.
if ! $(echo $checkListLower | grep -q $check); then
    echo "Invalid check specified. Valid selection: $checkList"
    exit 3
fi

# Voltage calculation. If no voltage is provided via the -v flag
# attempts to read voltage from the PDU. If able to read PDU, voltage is
# classified as such, by read voltage
#
# >= 0 && < 164 = 120V
# >= 164 && <= 224 = 208
# > 240 = 240
#
# If device doesn't have voltage monitoring (that is, result is either
# unchanged and left at -1, or set to -1) and no voltage is specified
# via the -v flag, all checks will return unknown status with a message
# to assign the PDU to a different hostgroup.

# Voltage Reading?
## Yes: Leave voltage as reading
## No:  Set voltage as -1

# Target voltage set?
## Yes: Keep target voltage
## No:  Nominal voltage rating?
### Yes: Rating -1?
#### Yes: Voltage -1?
##### Yes: Error.
##### No:  Set target to voltage
#### No: Grab target dependent on voltage
### No: Set target based on rating

# Voltage -1?
# Set to target


voltage=$(snmpwalk -c public -v1 -Ovq $HSTNAME $voltageOID) # Check for the voltage.
if ! [[ $voltage ]] ; then # Verify we got anything. If we didn't, set to -1
    voltage=-1
fi

# If voltage target is set, leave it as such.
if (($voltageTarget != 0)) ; then
    :
else #If no voltage target is provided, check nominal voltage rating.
    voltageRating=$(snmpwalk -c public -v1 -Ovq $HSTNAME $voltageRatingOID)
    if [[ $voltageRating ]] ; then # We get a rating
        if (($voltageRating == -1)) ; then # Rating is -1?
            : # Leave it to handle later
        else
            if (($voltageRating == 230)) ; then # 230 gets bumped to 240.
                voltageTarget=240
            else
                voltageTarget=$voltageRating # Set target to rating
            fi
        fi
    else
        voltageRating=-1
    fi

    if (($voltageRating == -1)) ; then # If set to -1, check voltage
        if (($voltage == -1)) ; then # If voltage is -1, error
            echo "UNKNOWN - Unable to automatically determine voltage rating. Please place device in voltage assigned hostgroup."
            exit 2
        else # If voltage has a alue, set target based on value.
            if (($voltage >= 0 && $voltage < 164)) ; then
                voltageTarget=120
            elif (($voltage >= 164 && $voltage <= 224)) ; then
                voltageTarget=208
            elif (($voltage > 224 && $voltage < 300)) ; then
                voltageTarget=240
            else
                echo "UNKNOWN - Unable to automatically determine voltage rating. Please place device in voltage assigned hostgroup."
                exit 2
            fi
        fi
    fi

    if (($voltage == -1)) ; then # If we end up with a voltage target, but not a voltage, set the voltage to the target.
        voltage=$voltageTarget
    fi
fi


# returnStat is 0 for OK, 1 for warning, 2 for unknown, 3 for crit, different from Nagios return status.
# Functions that return OK do not set 0, as it's default. Functions that set 1 or 2 use a pseudo-max function
# to determine whether there's a higher status current. If there is, higher wins out.
# Any function that sets a critical status sets the critical status automatically without max function.


# Amperage check
# Current amperage in 1/10 amps
pduLoad=$(snmpwalk -c public -v1 -Ovq $HSTNAME $pduLoadOID)
# If crit and warn are not specified, use the values that are set in the PDU config
# This is done outside of the check so that pwrCrit/warn can be calculated if they're
# not set
if (($ampCrit == 0)) ; then
    ampCrit=$(snmpwalk -c public -v1 -Ovq $HSTNAME $overLoadMinOID)
fi
if (($ampWarn == 0)) ; then
    ampWarn=$(snmpwalk -c public -v1 -Ovq $HSTNAME $nearOverMinOID)
fi
# Low load minimum is always pulled from PDU.
lowLoadMin=$(snmpwalk -c public -v1 -Ovq $HSTNAME $lowLoadMinOID)

# Convert values to 1/10 amp to match pulled amperage value without accuracy loss
lowLoadMin=$(($lowLoadMin*10))
ampWarn=$(($ampWarn*10))
ampCrit=$(($ampCrit*10))

if [[ $check == "all" ]] || [[ $check == "load" ]]; then

    # Data for performance monitoring
    dataVal+="load_amps=`echo "scale=1;$pduLoad / 10.0" | bc -l`;`echo "scale=1;$ampWarn / 10.0" | bc -l`;`echo "scale=1;$ampCrit / 10.0" | bc -l`;"

    # Under load minimum, warning
    if (( $pduLoad < $lowLoadMin )) ; then
        returnStat=$(($returnStat>1 ? $returnStat : 1))
        textVal+="WARNING - PDU load is below minimum threshold. Current load `echo "scale=1;$pduLoad / 10.0" | bc -l`A. Low threshold `echo "scale=1;$lowLoadMin / 10.0" | bc -l`A.; "

    # In between minimum and near overload, ok
    elif (( $pduLoad >= $lowLoadMin )) && (( $pduLoad < $ampWarn )) ; then
	# No returnStat set as 0 is default
        textVal+="OK - PDU load is in normal range. Current load `echo "scale=1;$pduLoad / 10.0" | bc -l`A.; "

    # In near overload, warning
    elif (( $pduLoad >= $ampWarn )) && (( $pduLoad < $ampCrit )) ; then
        returnStat=$(($returnStat>1 ? $returnStat : 1))
        textVal+="WARNING - PDU load is approaching overload. Current load `echo "scale=1;$pduLoad / 10.0" | bc -l`A. Near-Overload threshold `echo "scale=1;$ampWarn / 10.0" | bc -l`A.; "

    # Overloaded, critical
    elif (( $pduLoad >= $ampCrit )) ; then
        returnStat=3
        textVal+="CRITICAL - PDU is overloaded. Current load `echo "scale=1;$pduLoad / 10.0" | bc -l`A. Overload threshold `echo "scale=1;$ampCrit / 10.0" | bc -l`A.; "

    # Invalid, unknown
    else
        returnStat=$(($returnStat>2 ? $returnStat : 2))
        textVal+="UNKNOWN - Script returned an invalid value for PDU load. Current load `echo "scale=1;$pduLoad / 10.0" | bc -l`A.; "
    fi
        
fi

# PSU power supply self-status
if [[ $check == "all" ]] || [[ $check == "psustatus" ]]; then
    # Grab sattus from PDU.
    #  1 = OK
    #  2 = PSU failure
    psu1Status=$(snmpwalk -c public -v1 -Ovq $HSTNAME $psu1StatusOID)
    # Only switched PDUs have a second power supply. Metered PDUs will return 3.
    psu2Status=$(snmpwalk -c public -v1 -Ovq $HSTNAME $psu2StatusOID)
    if (($psu1Status == 1)) ; then
        textVal+="OK - PDU power supply 1 OK.; "
    elif (($psu1Status == 2)) ; then
        returnStat=3
        textVal+="CRITICAL - PDU power supply 1 failure.; "
    else
        returnStat=$(($returnStat>2 ? $returnStat : 2))
        textVal+="UNKNOWN - Script returned an invalid value for PDU power supply failure. Value returned: $psu1Status.; "
    fi
    
    # Check second power supply on switched PDUs
    if (($psu2Status != 3)) ; then
 	if (($psu2Status == 1)) ; then
	    textVal+="OK - PDU power supply 2 OK.; "
	elif (($psu2Status == 2)) ; then
	    returnStat=3
	    textVal+="CRITICAL - PDU power supply 2 failure.; "
	else
	    returnStat=$(($returnStat>2 ? $returnStat : 2))
	    textVal+="UNKNOWN - Script returned an invalid value for PDU power supply 2. Value returned $psu2Status.; "
	fi
    fi
fi

# PDU overload self-status
if [[ $check == "all" ]] || [[ $check == "overload" ]]; then
    # Pull self-status from PDU
    #  1 = OK
    #  2 = Under minimum
    #  3 = Near overload
    #  4 = Overload
    overload=$(snmpwalk -c public -v1 -Ovq $HSTNAME $overloadOID)

    # OK
    if (($overload == 1)) ; then
        textVal+="OK - PDU self-status OK.; "
	# No returnStat set as OK is default

    # Under - Warn
    elif (($overload == 2)) ; then
        textVal+="WARNING - PDU self-status under minimum threshold.; "
        returnStat=$(($returnStat>1 ? $returnStat : 1))

    # Near over - Warn
    elif (($overload == 3)) ; then
        textVal+="WARNING - PDU self-status nearing overload.; "
        returnStat=$(($returnStat>1 ? $returnStat : 1))

    # Over - Crit
    elif (($overload == 4)) ; then
        textVal+="CRITICAL - PDU self-status overloaded.; "
        returnStat=3

    # Other - Unknown
    else
        textVal+="UNKNOWN - Script returned unknown self-status for PDU. Self-status $overload"
        returnStat=$(($returnStat>2 ? $returnStat : 2))
    fi
fi

# Voltage
if [[ $check == "all" ]] || [[ $check == "voltage" ]] ; then
    # Standards as per ANSI Service Entrance tolerance figures. Range A is used for warn, Range B for crit.
    # See http://git.indigex.com/indigex/technical-services/doc/wikis/Environmental-Monitoring/Utility-Power
    # scale=0 is used to convert from float to int. Only works with division, which is why a fraction
    # is being used instead of a decimal.
    voltCritHigh=$(echo "scale=0; $voltageTarget * 1058/1000" | bc -l) # 105.8 % 
    voltCritLow=$(echo  "scale=0; $voltageTarget *  880/1000" | bc -l) # 91.7  %  
    voltWarnHigh=$(echo "scale=0; $voltageTarget * 1050/1000" | bc -l) # 105.0 %
    voltWarnLow=$(echo  "scale=0; $voltageTarget *  917/1000" | bc -l) # 95.0  %

    # Data for performance monitoring
    dataVal+="voltage=$voltage;$voltWarnLow:$voltWarnHigh;$voltCritLow:$voltCritHigh;"

    # Above warn low and below warn high, OK
    if (($voltage <= $voltWarnHigh)) && (($voltage >= $voltWarnLow)) ; then
        textVal+="OK - PDU input voltage in normal range. Voltage ${voltage}V; "
    # Return stat not set, OK is default

    # Below crit low or above crit high, CRIT
    elif (($voltage >= $voltCritHigh)) || (($voltage <= $voltCritLow)) ; then
        textVal+="CRITICAL - PDU input voltage outside critical range. Voltage ${voltage}V. High critical threshold ${voltCritHigh}V, low critical threshold ${voltCritLow}V.; "
        returnStat=3

    # Below warn low or above warn high, WARN
    elif (($voltage >= $voltWarnHigh)) || (($voltage <= $voltWarnLow)) ; then
        textVal+="WARNING - PDU input voltage outside warning range. Voltage ${voltage}V. High warning range ${voltWarnHigh}V-${voltCritHigh}V. Low warning range ${voltCritLow}V-${voltWarnLow}V.; "
        returnStat=$(($returnStat>1 ? $returnStat : 1))

    # Other, UNKNOWN
    else
        textVal+="UNKNOWN - Script returned invalid PDU input voltage. Voltage ${voltage}V.; "
        returnStat=$(($returnStat>2 ? $returnStat : 2))
    fi
fi

# Wattage
if [[ $check == "all" ]] || [[ $check == "power" ]] ; then
    # Grab wattage (in 1/100 kW aka 10W/dekwatt)
    power=$(snmpwalk -c public -v1 -Ovq $HSTNAME $powerOID)

    # Calculate wattage based on voltage & amperage
    if ( ! [[ $power ]] ) || (($power == -1)) ; then
        power=$(echo "scale=0; ( $pduLoad * $voltage ) / 100.0" | bc -l)
    fi

	# Default crit/warn values are value set in PDU GUI
    if (($pwrCrit <= 0)) ; then
        pwrCrit=$(snmpwalk -c public -v1 -Ovq $HSTNAME $overPowerMinOID)
        if ( ! [[ $pwrCrit ]] ) || (($pwrCrit < 0)); then
            # These are usually returned in 100W.
            # Amperage thresholds are returned in 1 amp, but have been converted to 1/10.
            # Amp threshold is reconverted to straight amps, multiplied by voltage to get
            # thresholds in watts, then divided by 100 to get the correct amount of 100W units.
            pwrCrit=$(echo "scale=0; ( $voltage * ( $ampCrit / 10) ) / 100" | bc -l)
           
        fi
    fi

    if (($pwrWarn <= 0)) ; then
        pwrWarn=$(snmpwalk -c public -v1 -Ovq $HSTNAME $nearPowerMinOID)
        if ( ! [[ $pwrWarn ]] ) || (($pwrWarn < 0)) ; then
            pwrWarn=$(echo "scale=0; ( $voltage * ( $ampWarn / 10) ) / 100" | bc -l)
        fi
    fi

	# Low threshold is always pulled from PDU
    lowPowerMin=$(snmpwalk -c public -v1 -Ovq $HSTNAME $lowPowerMinOID)
    if ! [[ $lowPowerMin ]] ; then
        lowPowerMin=$(echo "scale=0; ( $voltage * ( $lowLoadMin / 10) ) / 100" | bc -l)
    fi


	# Thresholds are returned in 1/10kW aka 100W/heptowatt.
	# Values are multiplied by 10 to get a value in dekawatts to match the read power value
	# x/10 = x*10/100
        lowPowerMin=$(($lowPowerMin*10))
        pwrWarn=$(($pwrWarn*10))
        pwrCrit=$(($pwrCrit*10))

        # Data for performance monitoring
        dataVal+="power_w=`echo "scale=0;$power * 10.0 / 1.0" | bc -l`;`echo "scale=0;$pwrWarn  * 10.0 / 1.0" | bc -l`;`echo "scale=0;$pwrCrit  * 10.0 / 1.0" | bc -l`;"
        
    # Under min, WARN
    if (( $power < $lowPowerMin )) ; then
        returnStat=$(($returnStat>1 ? $returnStat : 1))
        textVal+="WARNING - PDU load is below minimum threshold. Current load `echo "scale=2;$power / 100.0" | bc -l`kW. Low threshold `echo "scale=2;$lowPowerMin / 100.0" | bc -l`kW.; "

    # Above min, below near over, OK 
    elif (( $power >= $lowPowerMin )) && (( $power < $pwrWarn )) ; then
        textVal+="OK - PDU load is in normal range. Current load `echo "scale=2;$power / 100.0" | bc -l`kW.; "
    # returnStat not set as OK is default

    # Near over, WARN
    elif (( $power >= $pwrWarn )) && (( $power < $pwrCrit )) ; then
        returnStat=$(($returnStat>1 ? $returnStat : 1))
        textVal+="WARNING - PDU load is approaching overload. Current load `echo "scale=2;$power / 100.0" | bc -l`kW. Near-Overload threshold `echo "scale=2;$pwrWarn / 100.0" | bc -l`kW.; "

    # Overloaded, CRIT
    elif (( $power >= $pwrCrit )) ; then
        returnStat=3
        textVal+="CRITICAL - PDU is overloaded. Current load `echo "scale=2;$power / 100.0" | bc -l`kW. Overload threshold `echo "scale=2;$pwrCrit / 100.0" | bc -l`kW.; "

    # Other, UNKNOWN
    else
        returnStat=$(($returnStat>2 ? $returnStat : 2))
        textVal+="UNKNOWN - Script returned an invalid value for PDU load. Current load `echo "scale=2;$power / 100.0" | bc -l`kW.; "
    fi  
fi

# Add a line for overall system health if performing all checks at once.
if $(echo $check | grep -q "all"); then
    case $returnStat in
        0)
            outString+="OK -- Overall system health OK. --- "
            ;;
        1)
            outString+="WARNING -- Overall system health has issues. --- "
            ;;
        2)
            outString+="UNKNOWN -- Overall system health checks returned an invalid value on one or more checks. --- "
            ;;
        3)
            outString+="CRITICAL -- Overall system health critical. Immediate attention required. --- "
            ;;
        *)
            outString+="UNKNOWN -- Script return status invalid. Script return status $returnStat --- "
            ;;
    esac
fi

# Add the result of individual tests
outString+=$textVal

# If there's a data value, add it on.
if [[ $dataVal ]] ; then
    outString+="|"
    outString+=$dataVal
fi

# Reminder that 2 and 3 are switched from Nagios standard due to arithmetic for above functionality.
case $returnStat in
    0)
        echo $outString
        exit 0
        ;;
    1)
        echo $outString
        exit 1
        ;;
    2)
        echo $outString
        exit 3
        ;;
    3)
        echo $outString
        exit 2
        ;;
    *)
        echo "UNKNOWN -- Script returned a faulty return status. Return status $returnStat"
        exit 3
        ;;
esac

