Script to monitor the Alert Log for all instance on a server for errors and email if found.
I have been running some form of this script since the days of 8i and still find it useful. Customize it to suit your specific needs. Better to know there is an issue before someone tells you.
Add this to your crontab.
# Check Alert log for errors and email DBA
5,20,35,50 7-16 * * 0-7 /oracle/scripts/monitor/check_alert.sh
The check_alert.sh script
#!/bin/sh
# File-name:
check_alert.sh
#-----------------------------------------------------
# Checks Oracle
alert log files for all databases
# 1. Gets database name from oratab.
# 2. Checks ORA
errors in the alert log file
# 3. Checks fail
errors in the alert log file
# 4. Checks Fatal NI connect errors
# If error(s) found)then
# 5. Makes a copy of the alert file
# 6. Cleans the alert log file
# 7. Sends an
e-mail with results to DBA
#-----------------------------------------------------
ORACLE_BASE=/oracle/app/oracle
export
ORACLE_BASE
TMPDIR=/tmp
export TMPDIR
ORATAB=/etc/oratab
export ORATAB
#-----------------------------------------------
# Get the Oracle
instances from the oratab file
# No blank lines
#-----------------------------------------------
cat
${ORATAB}/oratab | while read LINE
do
case $LINE in
\#*) ;; #comment-line in oratab
*)
ORACLE_SID=`echo $LINE | awk -F: '{print
$1}' -`
if [ "$ORACLE_SID" = '*' ] ; then
ORACLE_SID=""
fi
export ORACLE_SID;
ORACLE_HOME=`echo $LINE | awk -F: '{print
$2}' -`; export ORACLE_HOME
SHLIB_PATH=$ORACLE_HOME/lib:/usr/lib;
export SHLIB_PATH
LD_LIBRARY_PATH=$ORACLE_HOME/lib; export
LD_LIBRARY_PATH
#------------------------------------------------------------------------
# Initialization
#------------------------------------------------------------------------
l_err=0
l_found=0
l_date=`date '+%c'`
l_filedate=`date '+%m%d%H%M'`
l_log=${ORACLE_BASE}/diag/rdbms/${ORACLE_SID}/${ORACLE_SID}/trace/check_alert_${ORACLE_SID}.log
l_alertfile=${ORACLE_BASE}/diag/rdbms/${ORACLE_SID}/${ORACLE_SID}/trace/alert_${ORACLE_SID}.log
echo $l_date "*** log BEGIN ***"
> $l_log
echo
"---------------------------------------------------------------------"
>> $l_log
echo "Script : "${0} >>
$l_log
echo "Database : "$ORACLE_SID
>> $l_log
echo "Server : "`uname -n`
>> $l_log
echo "Alert Log : "$l_alertfile
>> $l_log
echo "Copy To :
"${l_alertfile}.${l_date} >> $l_log
echo
"---------------------------------------------------------------------"
>> $l_log
#------------------------------------------------------------------------
# Verify the
existance of the Oracle environment variables
#------------------------------------------------------------------------
if test `env | grep ORACLE_SID | wc -l` -ne
1 ; then
l_err=1
echo "ORACLE_SID is not set \n"
>> $l_log
fi
if test `env | grep ORACLE_HOME | wc -l`
-ne 1 ; then
l_err=1
echo "ORACLE_HOME is not set
\n" >> $l_log
fi
#------------------------------------------------------------------------
# Check the alert
log file for any errors and clean it
#------------------------------------------------------------------------
if test -f ${l_alertfile} ; then
if test `grep "ORA-"
${l_alertfile} | wc -l` -ne 0 ; then
l_err=1
l_found=1
echo "There is an ORA- error in
the Oracle alert log file!" >> $l_log
grep "ORA-" ${l_alertfile}
>> $l_log
fi
if test `grep -i "fail"
${l_alertfile} | wc -l` -ne 0 ; then
l_err=1
l_found=1
echo
"--------------------------------------------------------------"
>> $l_log
echo "There is a fail error in the
Oracle alert log file!" >> $l_log
grep -i "fail" ${l_alertfile}
>> $l_log
fi
if test `grep -i "Fatal NI connect
error" ${l_alertfile} | wc -l` -ne 0 ; then
if test `grep -i "Fatal NI
connect error" ${l_alertfile} | wc -l` -gt 25 ; then
l_err=1
l_found=1
echo
"--------------------------------------------------------------"
>> $l_log
echo "There is a fail error in
the Oracle alert log file!" >> $l_log
grep -i "Fatal NI connect
error" ${l_alertfile} >> $l_log
echo " " >> $l_log
echo "Clients" >>
$l_log
grep -i "Client address:
(ADDRESS=(PROTOCOL=tcp)(HOST=" ${l_alertfile} >> $l_log
grep -i "Time: "
${l_alertfile} >> $l_log
fi
if test $l_err -eq 0 ; then
echo "There are NO errors in
the alert log file" >> $l_log
fi
fi
# Make a copy of
the alert log file only if it's not empty and there are errors
#------------------------------------------------------------------------------
if test $l_err -eq 1 ; then
if test `cat ${l_alertfile} | wc -l`
-ne 0 ; then
cat ${l_alertfile} >>
${l_alertfile}.${l_filedate}
# rm ${l_alertfile}
# touch ${l_alertfile}
fi
fi
echo
"--------------------------------------------------------------"
>> $l_log
echo ${l_date} "*** log END ***"
>> $l_log
fi # Check the alert log
#------------------------------------------------------------------------
#------------------------------------------------------------------------
# Send errors to
DBA
#------------------------------------------------------------------------
if test $l_err -eq 1 ; then
mail -s "${ORACLE_SID} on
`uname -n` : ERRORS in alert_$ORACLE_SID.log"
"your_email@company.com" > /dev/null < $l_log
fi
esac
done
#-----------------------------------------------------
# End of script
No comments:
Post a Comment