Friday, March 5, 2010

I had to write a script to check sudden CPU spike occured everyday Which is not getting captured in sar logs,or other performance tool .I dont want to change default monitoring tool collection delay . I wanted extact process id and various performance related parameters during that particular time.

Below script came in handy .I ran it for 24 hours and i could capture the culprit with full details

nohup ./psmon.sh 24 &

script will create a file named hostname.cpu.stat.date +%m%d file in current directory



#!/bin/bash
# Collect Process information
#
# psmon.sh
#
# (c)Dhanesh
# usage : ./psmon.sh nn nn is in hours
#


DATE=`date +%m%d`
LOGFILE="`uname -n`.cpu.stat.${DATE}"
if [ "$#" -lt 1 ]
then
echo " Usage : $0 nn "
echo " nn is in Hours "
exit
fi

echo "Logfile: $LOGFILE"

#Time calculation
#Start time in seconds
TIME=` expr ${1} \* 3600 `
CTIME=`perl -e 'print time()'`
ETIME=$(( $TIME + $CTIME ))

echo "Process Monitor `uname -n` Start Time : `date` "

while [ ${CTIME} -le ${ETIME} ]
do
prstat -Tc 5 2 >> ${LOGFILE}
echo "`date` : `uptime`" >> ${LOGFILE}
vmstat 2 2 | perl -e 'while (<>) { print localtime() . ": $_"; }' >> ${LOGFILE}
prstat -t 1 1 >> ${LOGFILE}
prstat -Z 1 1 >> ${LOGFILE}

# Find Current time in seconds
CTIME=`perl -e 'print time()'`

# find ptree of the proceess which is > 30% cpu time
PID=`prstat -c 1 1 | egrep "[3-9][0-9]%|100%" | awk '{print $1}'`
if [ "${PID}" != "" ]
then
ptree ${PID} >> ${LOGFILE}
vmstat 2 2 | perl -e 'while (<>) { print localtime() . ": $_"; }' >> ${LOGFILE}
iostat -xtc >> ${LOGFILE}
netstat -i | perl -e 'while (<>) { print localtime() . ": $_"; }' >> ${LOGFILE}

fi
# put collection Delay
#
sleep 20

done

echo "Process Monitor `uname -n` Start END Time : `date` "