#!/usr/bin/env python

import datetime
import optparse
import re
import time
import sys

log_re = re.compile(r'^([A-Z]+) +'
                    r'\[ *(\d+)\] +'
                    r'("([^"]+)" +)?'
                    r'([^ ]+) +'
                    r'([A-Z][a-z][a-z] \d\d \d\d:\d\d:\d\d) +'
                    r'([^ ].*)$')

level_names = ('ERROR', 'WARN', 'INFO', 'DEBUG', 'LOG')
levels = dict([(name, i+1) for i, name in enumerate(level_names)])

last_line = None
def parse_line(l):
    global last_line
    try:
        m = log_re.match(l)
        g = m.groups()
        parsed = {'level': levels[g[0]],
                  'pid': int(g[1]),
                  'logname': g[3],
                  'object': g[4],
                  'date': time.mktime(time.strptime(g[5], '%b %d %H:%M:%S')),
                  'message': g[6]}
        last_line = dict(parsed)
        return parsed

    except Exception, e:
        # try to get sane values
        if last_line:
            parsed = dict(last_line)
        else:
            parsed =  {'pid': -1, 'logname':None,
                       'object': None, 'date': None}
        parsed['level'] = None
        parsed['message'] = l
        return parsed

class StatsAnalyzer(object):
    def __init__(self):
        raise NotImplementedError

    def linein(self, line):
        raise NotImplementedError

    def print_summary(self):
        raise NotImplementedError

class BasicStatsAnalyzer(StatsAnalyzer):
    def __init__(self):
        self.numlines = 0
        self.numlevel = dict([(level, 0) for level in range(1,6)])
        self.numlevel[None] = 0
        self.mintime = None
        self.maxtime = None

    def linein(self, line):
        self.numlines += 1
        self.numlevel[line['level']] += 1
        if line['date']:
            # we know they are sorted chronologically
            if not self.mintime:
                self.mintime = line['date']
            self.maxtime = line['date']

    def print_summary(self):
        print
        print 'Basic statistical analysis'
        print
        if self.mintime:
            print 'Start time:', time.strftime("%b %d %H:%M:%S",
                                               time.localtime(self.mintime))
            print 'Stop time:', time.strftime("%b %d %H:%M:%S",
                                               time.localtime(self.maxtime))
            dt = datetime.datetime.fromtimestamp
            print 'Duration:', str(dt(self.maxtime) - dt(self.mintime))
        print 'Number of lines:', self.numlines
        print 'Level distribution:'
        for level_name in level_names:
            print ('             %05s: %d'
                   % (level_name, self.numlevel[levels[level_name]]))
        print '      Unrecognized: %d' % self.numlevel[None]

class TracebackAnalyzer(StatsAnalyzer):
    def __init__(self):
        self.tb_count = 0
        self.tracebacks = {}
        self.current_traceback = None

    def finish_traceback(self):
        tb = '\n'.join(self.current_traceback)
        if tb.rstrip():
            current_count, time = self.tracebacks.get(tb, (0, self.tb_count))
            self.tracebacks[tb] = current_count + 1, time
        self.current_traceback = None
        self.tb_count += 1

    def linein(self, line):
        if line['level'] is None:
            if self.current_traceback is None:
                self.current_traceback = []
            elif line['message'].rstrip() == 'Twisted traceback:':
                self.finish_traceback()
                self.current_traceback = []
            self.current_traceback.append(line['message'].rstrip())
        else:
            if self.current_traceback is not None:
                self.finish_traceback()

    def print_summary(self):
        print
        print '%d tracebacks found.' % len(self.tracebacks)
        print 'Printing tracebacks in the order of when they were first seen.'
        print
        tbs = [(time, tb, count)
               for tb, (count, time) in self.tracebacks.items()]
        tbs.sort()

        def last_line(s):
            return filter(None, map(str.rstrip, s.split('\n')))[-1]

        print "Summary:"
        for time, tb, count in tbs:
            print '%d times: %s' % (count, last_line(tb))

        print
        print "Details:"
        for time, tb, count in tbs:
            print '='*80
            print 'Seen %d times:' % count
            print tb
            print

class HistogramAnalyzer(StatsAnalyzer):
    BUCKETSIZE = datetime.timedelta(minutes=30)

    def __init__(self):
        self.linehist = []
        self.lasttime = None

    def make_bucket(self):
        return {'numlines': 0}

    def first_bucket(self, t):
        last = datetime.datetime.fromtimestamp(t)
        last = last.replace(second=0, microsecond=0)
        if last.minute > 30:
            last = last.replace(minute=0)
            last += datetime.timedelta(hours=1)
        else:
            last = last.replace(minute=30)
        self.linehist.append(self.make_bucket())
        self.lasttime = time.mktime(last.timetuple())

    def new_bucket(self):
        last = datetime.datetime.fromtimestamp(self.lasttime)
        last += self.BUCKETSIZE
        self.linehist.append(self.make_bucket())
        self.lasttime = time.mktime(last.timetuple())

    def linein(self, line):
        t = line['date']
        if t:
            if self.lasttime is not None:
                while t > self.lasttime:
                    self.new_bucket()
            else:
                self.first_bucket(t)

        if self.linehist:
            bucket = self.linehist[-1]
            bucket['numlines'] += 1

    def print_summary(self):
        print
        print 'Time distribution of log lines'
        print


        maxlines = reduce(max, (b['numlines'] for b in self.linehist), 0)
        starsize = int(maxlines / 40.)

        t = datetime.datetime.fromtimestamp(self.lasttime)
        t -= len(self.linehist) * self.BUCKETSIZE
        for i in xrange(len(self.linehist)):
            bucket = self.linehist[i]
            print ('    %s: %04d %s'
                   % (t.strftime("%b %d %H:%M:%S"),
                      bucket['numlines'],
                      (bucket['numlines']/starsize)*'*'))
            t += self.BUCKETSIZE

def analyze(f, basic_stats=False, histogram=False, tracebacks=False):
    f.seek(0)

    analyzers = []
    if basic_stats:
        analyzers.append(BasicStatsAnalyzer())
    if histogram:
        analyzers.append(HistogramAnalyzer())
    if tracebacks:
        analyzers.append(TracebackAnalyzer())
    if not analyzers:
        print >>sys.stderr, \
              "No analyzers specified, try %s --help" % (sys.argv[0],)
        return

    for line in f:
        parsed = parse_line(line)
        for a in analyzers:
            a.linein(parsed)

    for a in analyzers:
        a.print_summary()

def main(args):
    parser = optparse.OptionParser(usage="usage: %prog [options] LOGFILE")
    parser.add_option('', '--basic-stats',
                      action="store_true", dest="basic_stats",
                      help="output some basic statistics on the file")
    parser.add_option('', '--histogram',
                      action="store_true", dest="histogram",
                      help="output a histogram of debug statements")
    parser.add_option('', '--tracebacks',
                      action="store_true", dest="tracebacks",
                      help="analyze the tracebacks found in the log")
    options, args = parser.parse_args(args)

    if len(args) != 2:
        print >>sys.stderr, ("usage: %s LOGFILE" % (args[0],))
        return 1
    filename = args[1]
    try:
        log = open(filename, 'r')
    except IOError, e:
        print >>sys.stderr, ("Error opening log file %s: %s"
                             % (filename, e))
        return 1

    analyze(log,
            basic_stats=options.basic_stats,
            histogram=options.histogram,
            tracebacks=options.tracebacks)

if __name__ == '__main__':
    sys.exit(main(sys.argv))
