AlkantarClanX12

Your IP : 3.133.137.10


Current Path : /proc/self/root/lib64/nagios/plugins/nccustom/
Upload File :
Current File : //proc/self/root/lib64/nagios/plugins/nccustom/check_nc_cp_backup_process.sh

#!/usr/bin/bash

## This check measures age of nc_cp_backup.sh process and rises alarm if it
## takes longer than expected
# Forked from nagios_hosting_check_backup-splited (check_cpbackup_process.sh) by Vladimir Kuprikov

usage() {
    cat << EOF
    This script measures age of pkgacct and rsync processes which have nc_cp_backup.sh as parent 
    and rises alarm if it takes longer than expected

Usage: $(basename "${0}") -c CTIME[D|H|M] -w WTIME[D|H|M]

    -h Print this help
    -c CRITICAL status if backups are older than CTIME (default CTIME = 5 days)\
 (default assumes Days)
    -w WARNING status if backups are older than WTIME (default WTIME = 3 days)\
 (default assumes Days)

EOF
}

# Set variables
PARAMS="${@}"
NCCPBACKUP="/root/bin/nc_cp_backup.sh"
#INTERPRETER="(/bin/){0,1}bash"
PKGACCT="pkgacct"
RSYNC="rsync"
CPBACKUP_PAUSED="/backup/CPBACKUP_PAUSED"

declare -A UNITS=([days]=86400 [hours]=3600 [minutes]=60)
declare -A CPROC
declare -A WPROC

# Set defaults
#myIFS="${IFS}"
WTIME=259200  # 3 days
CTIME=432000  # 5 days
UNIT="days"
WUNIT="${UNIT}"
CUNIT="${UNIT}"
OK=0
WARN=1
CRIT=2
UNKN=3
RC="${UNKN}"

ident_time () {
    unset IDENT NUM UNIT TIME
    IDENT="${1: -1}"
    NUM="${1//,/.}"

    case "${IDENT}" in
        [Dd])
        UNIT="days"
        NUM="${NUM:0:$(( ${#NUM} - 1 ))}"
        ;;

        [Hh])
        UNIT="hours"
        NUM="${NUM:0:$(( ${#NUM} - 1 ))}"
        ;;

        [Mm])
        UNIT="minutes"
        NUM="${NUM:0:$(( ${#NUM} - 1 ))}"
        ;;

        [0-9])
        TIME="$(bc -l <<<"scale=0; ${NUM}*86400")"
        UNIT="days"
        return
        ;;

        *)
        echo "Can not recognize time ${1}. Please use identifier [D|H|M]"
        exit "${UNKN}"
        ;;
    esac

    if [[ "${NUM}" =~ ^[0-9]+$ ]] ; then
        : # All is OK, keep running
    else
        echo "Can not recognize time ${1}."
        exit "${UNKN}"
    fi

    TIME="$(bc -l <<<"scale=0; ${NUM:0:$(( ${#1} - 1 ))}*${UNITS[${UNIT}]}")" # Show time in seconds
}


while getopts ":hw:c:" OPTS ${PARAMS}; do
    case "${OPTS}" in
        h)
        usage
        exit "${WARN}"
        ;;

        w)
        ident_time "${OPTARG}"
        WTIME="${TIME}"
        WUNIT="${UNIT}"
        ;;

        c)
        ident_time "${OPTARG}"
        CTIME="${TIME}"
        CUNIT="${UNIT}"
        ;;

        :)
        echo "Option -${OPTARG} requires an argument" >&2
        usage
        exit "${UNKN}"
        ;;

        ?)
        usage
        exit "${UNKN}"
        ;;

    esac
done

#CPBACKUPPROC="$( ps ax -o lstart -o "|%P|%p|" -o command | awk -v NCCPBACKUP="${NCCPBACKUP}" '
#                (match($0,NCCPBACKUP)&& ! match($0,"\\|awk")) {print $0}')"

CPBACKUPPROC="$( ps ax -o lstart -o "|%P|%p|" -o command | awk -v pkgacct="${PKGACCT}" -v rsync="${RSYNC}" '
                 ((match($0,pkgacct) || match($0,rsync)) && ! match($0,"\\|awk")) {print $0}')"

if [[ -n "${CPBACKUPPROC}" ]] ; then
    IFS='|'
    flagSubProcRunning=0
    while read mySDATE myP2ID myPID myCOMMAND ; do

    # we are expecting the following tree of nc_cp_backup.sh processes (refering to the task https://track.namecheap.net/browse/TO-6668):
    #root      738309  3.6  0.0 108256  1700 pts/1    S+   05:15   0:00  |       \_ bash /root/bin/nc_cp_backup.sh -a
    #root      738353  0.0  0.0 108256   832 pts/1    S+   05:15   0:00  |           \_ bash /root/bin/nc_cp_backup.sh -a
    #root      738364  0.0  0.0 108256   676 pts/1    S+   05:15   0:00  |           |   \_ bash /root/bin/nc_cp_backup.sh -a
    #root      738367  0.2  0.0 220000 25812 pts/1    D+   05:15   0:00  |           |       \_ pkgacct - husqolkv - av: 4
    #root      738369  0.0  0.0 103004   700 pts/1    S+   05:15   0:00  |           |       \_ tail -1
    #root      738360  0.0  0.0 108256   832 pts/1    S+   05:15   0:00  |           \_ bash /root/bin/nc_cp_backup.sh -a
    #root      738372  0.0  0.0 108256   676 pts/1    S+   05:15   0:00  |           |   \_ bash /root/bin/nc_cp_backup.sh -a
    #root      738376  0.2  0.0 220012 25808 pts/1    D+   05:15   0:00  |           |       \_ pkgacct - axiaatmy - av: 4
    #root      738378  0.0  0.0 103004   700 pts/1    S+   05:15   0:00  |           |       \_ tail -1
    #root      738370  0.0  0.0 108256   832 pts/1    S+   05:15   0:00  |           \_ bash /root/bin/nc_cp_backup.sh -a
    #root      738381  0.0  0.0 108256   676 pts/1    S+   05:15   0:00  |           |   \_ bash /root/bin/nc_cp_backup.sh -a
    #root      738382  0.2  0.0 220000 25812 pts/1    D+   05:15   0:00  |           |       \_ pkgacct - delejqqw - av: 4
    #root      738383  0.0  0.0 103004   696 pts/1    S+   05:15   0:00  |           |       \_ tail -1
    #root      738684  0.0  0.0 102968   616 pts/1    S+   05:16   0:00  |           \_ sleep 1
    #
    #so here is 3 level parent-child relations. And we want to monitor the second one (PIDs 738353, 738360, 738370 in example)

        myPID="${myPID// /}"  # removing spaces
        myP2ID="${myP2ID// /}"  # removing spaces

        if [ -d "/proc/${myPID}" ] ; then
#            EXE="$(readlink -e "/proc/${myPID}/exe")"
            CMDLINE="$(< /proc/${myPID}/cmdline)"
            P2CMDLINE="$(< /proc/${myP2ID}/cmdline)"           # get cmdline of our parental process
            myP3ID="$(awk '{print $4}' /proc/${myP2ID}/stat )" # get PID of parental of our parental
            P3CMDLINE="$(< /proc/${myP3ID}/cmdline)"           # get cmdline of parental of our parental
        else
            continue
        fi

# !!! OLD BEHAVIOUR !!!
# We are expecting to see a process "bash /root/bin/nc_cp_backup.sh"
# also we are expecting that it will be child of the same-named initiator
# and our parent is not a child of same-named initiator
# We are checking in the following way:
# 1 make sure that process is run by bash
# 2 check that our parental and parental of our parental are not similar (we are on the second level)
# 3 our cmdline is what we are expecting to see
# 4 check that our cmdline is equal cmdline of our parental (it is not an accidental coincidence)
#        if [[ "${EXE}" =~ ^${INTERPRETER} ]] && \
#        if [[ "${P2CMDLINE}" != "${P3CMDLINE}" ]] && \
#           [[ "${CMDLINE}" =~ ^${INTERPRETER}${NCCPBACKUP} ]] && \
#           [[ "${CMDLINE}" == "${P2CMDLINE}" ]] ; then

# !!! NEW BEHAVIOUR !!!
# Now we are looking for processes pkgacct or rsync with parent cmdline "bash /root/bin/nc_cp_backup.sh" and parent of parent should be the same

        if [[ "${P2CMDLINE}" == "${P3CMDLINE}" ]] && \
           ( [[ "${CMDLINE}" =~ ^${PKGACCT} ]] || [[ "${CMDLINE}" =~ ^${RSYNC} ]] ); then
            
            flagSubProcRunning=1   #nc_cp_backup is running and there is at least one of the running backing up sub-processes
            
            STARTDATE="$(date -d "${mySDATE}" +"%s")"

            if ! [ "${STARTDATE}" -eq "${STARTDATE}" ] 2>/dev/null ; then    #make sure that STARTDATE is an integer
                echo "ERROR: can not get start date of process"
                exit "${UNKN}"
            fi

            RUNTIME="$(( $(date  +"%s") - ${STARTDATE} ))"

            if (( ${RUNTIME} >= ${CTIME} )) ; then
                ((CFLAG++))
                CPROC[${myPID}]="${RUNTIME}"
            elif (( ${RUNTIME} >= ${WTIME} )) ; then
                ((WFLAG++))
                WPROC[${myPID}]="${RUNTIME}"
            else
                ((OFLAG++))
            fi
        else
            continue
        fi
    done <<<"${CPBACKUPPROC}"

# if nc_cp_backup is running and there are no any of the running backing up sub-processes we should check if the nc_cp_backup has a PAUSED flag:
# WARNING: file /backup/CPBACKUP_PAUSED older than 8 hours
# CRITICAL: file /backup/CPBACKUP_PAUSED older than 12 hours

    if [[ ${flagSubProcRunning} == 0 && -e "${CPBACKUP_PAUSED}" ]]; then

        pauseAge=$((($(date +%s) - $(date +%s -r "${CPBACKUP_PAUSED}")) / 3600))    # in hours

        if (( pauseAge >= 12 )) ; then
            echo "CRITICAL. CPBACKUP is PAUSED for more than 12 hours"
            exit 2
        elif (( pauseAge >= 8 )) ; then
            echo "WARNING. CPBACKUP is PAUSED for more than 8 hours"
            exit 1
        fi
    fi

    if [[ -n "${CFLAG}" ]] ; then
        STATUS="[CRITICAL]"
        STATUSTXT="${NCCPBACKUP} process(es): PID(s)={${!CPROC[@]}} exceeded running time of $(( ${CTIME} /${UNITS[${CUNIT}]} )) ${CUNIT}"
        RC="${CRIT}"
    elif [[ -n "${WFLAG}" ]] ; then
        STATUS="[WARNING]"
        STATUSTXT="${NCCPBACKUP} process(es): PID(s)={${!WPROC[@]}} exceeded running time of $(( ${WTIME} /${UNITS[${WUNIT}]} )) ${WUNIT}"
        RC="${WARN}"
    else
        STATUS="[OK]"
        STATUSTXT="${NCCPBACKUP} process is OK"
        RC="${OK}"
    fi

    echo "${STATUS} ${STATUSTXT}"
    exit "${RC}"

else
    echo "[OK] ${NCCPBACKUP} is not running"
    exit "${OK}"
fi

echo "UNKNOWN RESULT"
exit "${UNKN}"