#!/bin/bash
# $Id: email_delay_tracking.sh 562 2006-05-29 12:04:53Z patpro $
#
# 1.1		2006-05-28		ajout graph zoom.
# 1.0        2006-05-22         version initiale : Patrick Proniewski
#
# Script designed to send email at regular interval, and to keep track of
# their delay until they come back.
# Works along with filtrage.awk to keep track of delays added by each "hop" 
# during email delivery process
#
# Usage: the script is launched by crontab every X seconds (X>100). It creates
# an email message on MY_SMTP via an ssh connection and log the date on two
# files. Then it updates data for expected emails and plots data for received 
# emails.
# When fetchmail gets an email from the pop server, it's handled by a .forward
# file. This file pushes the email through a pipe into the awk scripts.
#
# some settings 
MY_SMTP="SMTP-FRONTAL"                             # smtp server to start from
MY_ROOT="/chemin/du/dossier/emaildelay"            # working directory
MY_SSHKEY="/chemin/de/la/cle/emailmonitor"         # ssh key to create email on MY_SMTP
MY_DATELIST="datelist"                             # list of data from emails
MY_DATEWAIT="datewait"                             # list of data from expected emails
MY_GRAPH="delais_courrier.png"                     # gnuplot output
MY_LOGFILE="emailmonitor.log"                      # flat text file to log info
MY_RANGE=$((30*60*60))                             # range of data plotted 
MY_TZ=$((2*60*60))                                 # shift from GMT for gnuplot, in seconds
MY_ACCOUNT="emailmonitor@VOTRE-DOMAINE"            # pop account to retrieve email from
MY_FLAG="E"                                        # Flag used for expected emails in MY_DATELIST
#
LV1=60                                             # level for alarm, the nbr corresponds to the step
LV2=60                                             # in the mail delivery
LV3=60
LV4=60

# create & move to MY_ROOT sandbox or die
mkdir -p "${MY_ROOT}"
cd "${MY_ROOT}" || exit 1

# move stderr into MY_LOGFILE
exec 6>&2 # backup stderr into file descriptor 6
exec 2>${MY_LOGFILE} # replace stderr with MY_LOGFILE
# enable debuging
set -o xtrace

# From now, every file path is relative do MY_ROOT

# File format : 
# date delay_forcast delay_stepN ... delay_step2 delay_step1 label
#
# in MY_DATELIST delay_forcast=0 if email is fetched, if not delay_forcast=FLAG
# in MY_DATEWAIT delay_forcast=NOW-date secondes
# label null, except if we go beyond LV1..LVn

# a basic function to log informations into a text file
function logrun() {
	echo -n $(date) >&2
	echo " # $*" >&2
}

# trigger mail creation on MY_SMTP and feed data into MY_DATELIST & MY_DATEWAIT
# in:  nothing
# out: date
function init_mail() {
	# the ssh request create email and retrieve date in seconds from epoch
	REMOTE_DATE=$(ssh -i ${MY_SSHKEY} ${MY_SMTP} "echo test | mail ${MY_ACCOUNT} & date +%s")
	LOCAL_DATE=$(date +%s)
	# if REMOTE_DATE looks good, then proceed.
	if [ ${REMOTE_DATE:=0} -gt $((LOCAL_DATE-3600)) ]; then
		echo "${REMOTE_DATE} ${MY_FLAG}" >> ${MY_DATELIST}
		echo "${REMOTE_DATE} $((LOCAL_DATE-REMOTE_DATE)) 0 0 0 0 N" >> ${MY_DATEWAIT}
	else
		logrun "init_mail failed, REMOTE_DATE doesn't look good (${REMOTE_DATE})"
	fi
}

function update_datelist() {
	STAMP=$(echo $POOL | awk '{print $1}') 
	DATA=$(echo $POOL | awk 'BEGIN {A=""} {L=$0; if($6>'$LV1') {A=A"+MXAV"} if($5>'$LV2') {A=A"+AV"} if($4>'$LV3') {A=A"+MXMGW"} if($3>'$LV4') {A=A"+MGWCO"}} END {print L" "A}')
	# MY_DATELIST
	# attention: the ${STAMP:0:8} grep makes it very risky to use a delay lesser than 100 seconds between each email
	egrep -v ${STAMP:0:8}.*${MY_FLAG} ${MY_DATELIST} > ${MY_DATELIST}.tmp && \
	echo ${DATA} >> ${MY_DATELIST}.tmp && \
	sort -n ${MY_DATELIST}.tmp > ${MY_DATELIST}
	rm ${MY_DATELIST}.tmp
	# MY_DATEWAIT
	# we remove the POPed mail from the expected email list:
	egrep -v ^${STAMP:0:8} ${MY_DATEWAIT} > ${MY_DATEWAIT}.tmp && \
	mv -f ${MY_DATEWAIT}.tmp ${MY_DATEWAIT}
}

function update_expected() {
	# then we update other "expected email" records:
	LOCAL_DATE=$(date +%s)
	awk '{print $1" "'${LOCAL_DATE}'-$1" "$3" "$4" "$5" "$6}' ${MY_DATEWAIT} >> ${MY_DATEWAIT}.tmp
	mv -f ${MY_DATEWAIT}.tmp ${MY_DATEWAIT}
}

function graph_datelist() {
# last date value of MY_DATELIST, used to compute xrange
LAST_DATE=$(tail -1 ${MY_DATELIST} | awk '{print $1}')
FIRST_DATE=$((LAST_DATE-MY_RANGE))
# max value of MY_DATELIST in the MY_RANGE window, used to compute yrange
MAX_DELAY=$(awk '{ if($1>='$FIRST_DATE') {print $3+$4+$5+$6} }' ${MY_DATELIST} | sort -rn | head -1 )
	MAX_DELAY_ZOOM=$(awk '{ if($1>='$((LAST_DATE-7200))') {print $3+$4+$5+$6} }' ${MY_DATELIST} | sort -rn | head -1 )
	if [ $MAX_DELAY_ZOOM -gt 200 ]; then
		MAX_DELAY_ZOOM=200
	fi
	
/opt/local/bin/gnuplot <<EOF
set terminal png size 1024,600
set output "emailmonitor.png"
set format xy "%g"
set xdata time
set timefmt "%s"
set format x "%H"
set ylabel "délais total en secondes"
set yrange [0:$((MAX_DELAY+MAX_DELAY*5/100))]
set xrange [$((FIRST_DATE+MY_TZ)):$((LAST_DATE+MY_TZ))]
set xlabel "heure d'envoi (fenetre glissante de $((MY_RANGE/3600)) heures)"
set style data boxes
set style fill solid 0.70 noborder
set title "Délais aux différentes étapes de l'acheminement des courriers"
plot "< /opt/local/bin/gawk '{ if(substr(strftime(\"%H:%M\",\$1),0,4)>=\"22:0\" || substr(strftime(\"%H:%M\",\$1),0,4)<=\"07:0\") {print \$1\" $((MAX_DELAY+MAX_DELAY/5))\";} else {print \$1\" 0\";} }' ${MY_DATELIST}" using (\$1+$MY_TZ):2 lt 29 notitle fs solid 0.15, \
"< cat ${MY_DATELIST}" using (\$1+$MY_TZ):(\$3+\$4+\$5+\$6) t "gateway -> pop    " lt 36, \
"< cat ${MY_DATELIST}" using (\$1+$MY_TZ):(\$4+\$5+\$6) t "serv1 -> gateway" lt 1, \
"< cat ${MY_DATELIST}" using (\$1+$MY_TZ):(\$5+\$6) t "amavis -> serv1    " lt 73, \
"< cat ${MY_DATELIST}" using (\$1+$MY_TZ):6 t "serv1 -> amavis" lt 94, \
"< cat ${MY_DATEWAIT}" using (\$1+$MY_TZ):2:(50) t "en transit" lt 0

set terminal png size 800,600
set output "emailmonitor-zoom.png"
set format xy "%g"
set xdata time
set timefmt "%s"
set format x "%H:%M"
set ylabel "délais total en secondes"
set yrange [0:$((MAX_DELAY_ZOOM+MAX_DELAY_ZOOM*20/100))]
set xrange [$((LAST_DATE+MY_TZ-7200)):$((LAST_DATE+MY_TZ+350))]
set xlabel "heure d'envoi (fenetre glissante de $((7200/3600)) heures)"
set style data boxes
set style fill solid 0.70 noborder
set title "Délais aux différentes étapes de l'acheminement des courriers"
plot "< /opt/local/bin/gawk '{ if(substr(strftime(\"%H:%M\",\$1),0,4)>=\"22:0\" || substr(strftime(\"%H:%M\",\$1),0,4)<=\"07:0\") {print \$1\" $((MAX_DELAY+MAX_DELAY/5))\";} else {print \$1\" 0\";} }' ${MY_DATELIST}" using (\$1+$MY_TZ):2 lt 29 notitle fs solid 0.15, \
"< tail -2000 ${MY_DATELIST}" using (\$1+$MY_TZ):(\$3+\$4+\$5+\$6) t "gateway -> pop    " lt 36, \
"< tail -2000 ${MY_DATELIST}" using (\$1+$MY_TZ):(\$4+\$5+\$6) t "serv1 -> gateway" lt 1, \
"< tail -2000 ${MY_DATELIST}" using (\$1+$MY_TZ):(\$5+\$6) t "amavis -> serv1    " lt 73, \
"< tail -2000 ${MY_DATELIST}" using (\$1+$MY_TZ):6 t "serv1 -> amavis" lt 94, \
"< tail -2000 ${MY_DATEWAIT}" using (\$1+$MY_TZ):2:(200) t "en transit" lt 0

EOF

}

if [ $# -eq 0 ]; then
	# If a flag file is present we just let the script sleep to avoid race 
	# condition problem
	while [ -f TRACKING_IN_PROGRESS ]; do sleep 1; done
	touch MAILING_IN_PROGRESS
	# no args, so standard behaviour: we send a test mail
	init_mail && logrun "init_mail succeeded"
else 
	# we have some args, so update behaviour: we update both lists with data
	# provided by the awk processing script
	# If a flag file is present we just let the script sleep to avoid race 
	# condition problem
	while [ -f MAILING_IN_PROGRESS ]; do sleep 1; done
	touch TRACKING_IN_PROGRESS
	POOL=$*
	update_datelist && logrun "update_datelist succeeded"
fi

update_expected && logrun "update_expected succeeded"
graph_datelist && logrun "graph_datelist succeeded"

rm -f MAILING_IN_PROGRESS TRACKING_IN_PROGRESS

# disable debuging
set +o xtrace
# restore stderr, close MY_LOGFILE
exec 2>&6 6>&-

# $Id: email_delay_tracking.sh 562 2006-05-29 12:04:53Z patpro $
