#!/bin/bash
#
# Pipe logfiles to be analyzed into this program via stdin
#
# e.g. cat /var/log/haproxy.log | haproxy_timespent.sh 10
#
timelimit=$1
export timelimit=${timelimit:-0}
maxlimit=$2
export maxlimit=${maxlimit:-0}

# grep statement removes tcplog lines from haproxy log (have 8 or more blank-separated fields)
# sed replaces acquisition noise
# sed brings line to format "hour method url time[ms]"
cat | \
    grep -E '^.* \[...........:..:..:......\] [^ ]* [^ ]* [^ ]* [^ ]* [^ ]* [^ ]* [^ ]* [^ ]*' | \
    sed 'sX/PerFact/.*/WebApp/X/WebApp/X' | \
    sed 's|^.* \[...........:\(..\):..:......\] [^ ]* [^ ]* [^/]*/[^/]*/[^/]*/[^/]*/\([^ ]*\) \([^ ]*\) .* "\([^ ]* [^? "]*\).*$|\1 \4 \3 \2 |' | \
    sort | \
    awk 'BEGIN {prev_line=""; count=0; sum=0; limit=ENVIRON["timelimit"]*1000; maxlimit=ENVIRON["maxlimit"]*1000}
        {
          line=$1 " " $2 " " $3 " " $4;

          if (line!=prev_line) {
            if (prev_line != "") {
              if (sum > limit || max > maxlimit) {
                print prev_line " " count " " int(sum/1000) " " int(min/1000) " " int(sum/count/1000) " " int(max/1000);
              }
            }            
            sum=$5;
            min=$5;
            max=$5;
            count=1;
            prev_line=line;
          }  else {
            sum+=$5;
            if (min > $5) min=$5;
            if (max < $5) max=$5;
            count+=1;
          }
        }
        END {
              if (sum > limit || max > maxlimit) {
                print prev_line " " count " " int(sum/1000) " " int(min/1000) " " int(sum/count/1000) " " int(max/1000);
              }
        }' | \
	tee haproxy_timespent.log
