I had to write a script to check sudden CPU spike occured everyday Which is not getting captured in sar logs,or other performance tool .I dont want to change default monitoring tool collection delay . I wanted extact process id and various performance related parameters during that particular time.
Below script came in handy .I ran it for 24 hours and i could capture the culprit with full details
nohup ./psmon.sh 24 &
script will create a file named hostname.cpu.stat.date +%m%d file in current directory
#!/bin/bash
# Collect Process information
#
# psmon.sh
#
# (c)Dhanesh
# usage : ./psmon.sh nn nn is in hours
#
DATE=`date +%m%d`
LOGFILE="`uname -n`.cpu.stat.${DATE}"
if [ "$#" -lt 1 ]
then
echo " Usage : $0 nn "
echo " nn is in Hours "
exit
fi
echo "Logfile: $LOGFILE"
#Time calculation
#Start time in seconds
TIME=` expr ${1} \* 3600 `
CTIME=`perl -e 'print time()'`
ETIME=$(( $TIME + $CTIME ))
echo "Process Monitor `uname -n` Start Time : `date` "
while [ ${CTIME} -le ${ETIME} ]
do
prstat -Tc 5 2 >> ${LOGFILE}
echo "`date` : `uptime`" >> ${LOGFILE}
vmstat 2 2 | perl -e 'while (<>) { print localtime() . ": $_"; }' >> ${LOGFILE}
prstat -t 1 1 >> ${LOGFILE}
prstat -Z 1 1 >> ${LOGFILE}
# Find Current time in seconds
CTIME=`perl -e 'print time()'`
# find ptree of the proceess which is > 30% cpu time
PID=`prstat -c 1 1 | egrep "[3-9][0-9]%|100%" | awk '{print $1}'`
if [ "${PID}" != "" ]
then
ptree ${PID} >> ${LOGFILE}
vmstat 2 2 | perl -e 'while (<>) { print localtime() . ": $_"; }' >> ${LOGFILE}
iostat -xtc >> ${LOGFILE}
netstat -i | perl -e 'while (<>) { print localtime() . ": $_"; }' >> ${LOGFILE}
fi
# put collection Delay
#
sleep 20
done
echo "Process Monitor `uname -n` Start END Time : `date` "