#!/usr/bin/env python
#
# say.py  -  Create dict-like logbook entries and analyze them.
#
# $Revision: 1.3 $
#
# Copyright (C) 2013 Jan Jockusch <jan.jockusch@perfact.de>
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 2 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program; if not, write to the Free Software
# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
#
#
# $Id: say.py,v 1.3 2015/08/28 16:50:05 perfact Exp $

# Functions used in Zope: syslog_ext, syslog_read

import dateutil.parser  # for parsing dates more easily
import logging
import sys
# For using the old syslog API
import syslog
# Python2/3 compatibility
import six
# For working with log files
import glob
import stat
import os
import time
import gzip
import datetime
import shlex
import locale
try:
    import ast
    safe_eval = ast.literal_eval
except ImportError:
    safe_eval = eval
# For displaying the call stack
import traceback
import uuid
# For safe_syscall
from .generic import safe_syscall, to_string


if not six.PY2:
    unicode = str

journal_logger = logging.getLogger('Say')

try:
    import systemd.journal  # For logging with systemd
    try:
        logging_handler = systemd.journal.JournalHandler()
    except AttributeError:
        # structure of module changed? check debian-packages?
        logging_handler = systemd.journal.JournaldLogHandler()
    use_syslog = False
except ImportError:
    import logging.handlers
    # Fall back to a standard syslog handler
    logging_handler = logging.handlers.SysLogHandler()
    use_syslog = True

journal_logger.setLevel(logging.DEBUG)
journal_logger.propagate = False
journal_logger.addHandler(logging_handler)


def journal_log(msg, level=4):
    # Log using the journal logger
    output = {
        0: journal_logger.critical,
        1: journal_logger.error,
        2: journal_logger.warning,
        3: journal_logger.info,
        4: journal_logger.debug,
        5: journal_logger.debug,
    }.get(int(level), journal_logger.debug)
    output(msg)


def syslog_ext(message, priority=None,
               level=None):
    # Ensure that the string is 8-bit in Python2, and a str in Python3
    message = to_string(message)

    if priority is None:
        priority = (syslog.LOG_DEBUG | syslog.LOG_LOCAL0)
    if level is not None:
        priority = {
            0: syslog.LOG_CRIT,
            1: syslog.LOG_ERR,
            2: syslog.LOG_WARNING,
            3: syslog.LOG_INFO,
            4: syslog.LOG_DEBUG,
            5: syslog.LOG_DEBUG,
        }.get(int(level), syslog.LOG_DEBUG) | syslog.LOG_LOCAL0
    else:
        level = 3

    if use_syslog:
        syslog.syslog(priority, message)
    else:
        journal_log(message, level=level)
    return


def date_conv(date):
    d = dateutil.parser.parse(date)
    return (d.utctimetuple(),
            time.mktime(d.utctimetuple())+d.microsecond/1000000.0)


def ts_to_date(ts):
    datetime_obj = datetime.datetime.fromtimestamp(ts)
    return datetime_obj.strftime('%Y-%m-%d %H:%M:%S.%f')[:-3]


def syslog_read(path, start, stop, grep='', logtype='syslog', limit=1000):
    '''Extract lines from syslog files given by the path
    using start and stop times given.
    '''
    locale.setlocale(locale.LC_ALL, 'C')  # use default (C) locale

    # Transform the interval into other formats useful for comparing
    # with file timestamps and log lines (different formats needed,
    # regexp compiler maybe...)
    s_time, s_seconds = date_conv(start)
    e_time, e_seconds = date_conv(stop)

    # Apache example
    # 192.168.42.35 - - [16/Apr/2013:07:21:37 +0200] "GET / HTTP/1.1" 500 399
    # Zope example
    # 127.0.0.1 - admin [04/Apr/2013:22:59:39 +0200] \
    # GET /p_/ControlPanel_icon HTTP/1.1" 200 242
    # Syslog example
    # May 19 06:53:01 ema-devel2013 CRON[12059]: ...

    # Log lines need methods (1) get_seconds, (2) get_content
    if logtype == 'apache':
        def get_seconds(line):
            date_str = line.split(']', 1)[0].split('[', 1)[1]
            timetuple, seconds = date_conv(date_str)
            return seconds

        def get_content(line):
            try:
                (ipaddr, identifier, userid, ts,
                 tzone, req, retcode, octets,
                 referer, agent, microsecs) = shlex.split(line)[:11]
            except ValueError:
                # Broken line. Break.
                return line

            method, url, protocol = req.split(None, 2)

            d = {
                'ipaddr': ipaddr,
                'userid': userid if userid != '-' else None,
                'microsecs': microsecs,
                'method': method,
                'proto': protocol,
                'url': url,
                'retcode': retcode,
                'datetime': (ts+' '+tzone)[1:-1],
                'octets': int(octets) if octets != '-' else 0,
                }
            if method != 'GET':
                d['method'] = method
            if protocol != 'HTTP/1.1':
                d['proto'] = protocol
            if ipaddr != '127.0.0.1':
                d['ipaddr'] = ipaddr
            if userid != '-':
                d['userid'] = userid

            return d

    elif logtype == 'syslog':
        log_time_fmt = '%Y %b %d %H:%M:%S'
        log_length = 15

        def get_seconds(line):
            # Formats for this type of logfile

            # There's a problem with the file_year, which is the last
            # modification time of the file. This may be wrong for
            # November/December
            year = file_year
            if file_year_change and line[:3] in ('Nov', 'Dec'):
                year -= 1

            return time.mktime(time.strptime(
                str(year) + ' ' + line[:log_length], log_time_fmt
            ))

        def get_content(line):
            body = line[16:].strip()
            try:
                data = '{' + body.split(': {', 1)[1]
                return safe_eval(data)
            except Exception:
                pass
            return body

    # List all candidates ordered by time
    all_files = []
    file_year = None
    file_year_change = False
    for filename in glob.glob(path):
        s = os.stat(filename)
        mtime = s.st_mtime

        # Store the file's modification year to fill in the missing
        # year number. This is imprecise close to year changes. XXX
        new_file_year = time.gmtime(os.stat(filename)[stat.ST_MTIME]).tm_year
        file_year_change = (file_year is not None and
                            file_year != new_file_year)
        file_year = new_file_year

        all_files.append([mtime, filename, file_year, file_year_change])
    all_files.sort()

    # Dump all files which surely don't contain matches
    files = []
    for mtime, filename, file_year, file_year_change in all_files:
        # as long as mtime is lower than starting time, dump
        if mtime < s_seconds:
            continue
        files.append([filename, file_year, file_year_change])
        # as soon as prior mtime is higher than ending time, dump
        if mtime > e_seconds:
            break

    emit = []
    # Scan each of the remaining files
    for filename, file_year, file_year_change in files:
        if grep:
            cmd = ("zgrep -E '%s' '%s'" %
                   (grep.replace("'", "\\'"),
                    filename.replace("'", "\\'")))
            fh = os.popen(cmd)
        elif filename.endswith('.gz'):
            fh = gzip.open(filename, 'r')
        else:
            fh = open(filename, 'r')

        # Quickly scan to starting point, skipping lines until the
        # first appears matching the starting format.
        while True:
            line = fh.readline().strip()
            if not line:
                break

            secs = get_seconds(line)
            if secs < s_seconds:
                continue
            if secs > e_seconds:
                break

            # Emit a dictionary
            cont = get_content(line)

            if isinstance(cont, dict):
                # cont is already a dictionary? Add seconds.
                cont['seconds'] = secs
            else:
                # Else build a simple dictionary.
                cont = {'seconds': secs, 'content': cont}

            # Add datetime component (if not already there)
            if 'datetime' not in cont:
                secs = cont.get('ts', None)
                if secs:
                    secs = secs * .001
                else:
                    secs = cont.get('seconds', None)
                if secs:
                    cont['datetime'] = ts_to_date(secs)

            emit.append(cont)
            if len(emit) > limit:
                break

        fh.read()
        fh.close()

    return emit


# Functions which display the call stack
def call_stack(frame=1, limit=3):
    '''Return a list of calls representing the current stack.

    frame: start stack display n frames above call_stack.
    limit: limit output to n frames

    >>> call_stack(frame=0)[-1].find('return traceback.format_stack') != -1
    True
    '''
    f = sys._getframe(frame)
    return traceback.format_stack(f, limit=limit)


def get_zope_traceback(frame=1):
    '''Return a list of calls representing the current stack. This function
    differs from call_stack(frame, limit): It only returns the processes
    inside Zope.

    frame: start stack display n frames above call_stack.

    Return value:
    dictionary {
        'zope_obj': <ZODB object> Provides informations like type and path.
        'line':   <integer> line number, where the process inside this object
                  is called.
    }
    '''
    ret = []

    frame_obj = sys._getframe(frame)
    for frame, line_no in traceback.walk_stack(frame_obj):
        tb_supplement = None

        # Each frame that is of interest to us has a traceback supplement,
        # which is handled differently for each (zope) object type, e.g.
        # python script: f_globals
        # page template: f_locals
        tb_key = '__traceback_supplement__'
        tb_supplement = (
            frame.f_locals.get(tb_key) or frame.f_globals.get(tb_key)
        )

        if not tb_supplement:
            continue

        # This is copied from the Zope Exception handler
        factory = tb_supplement[0]
        args = tb_supplement[1:]
        supplement = factory(*args)

        # The supplement (factory) is now a class usually implementing
        # ITracebackSupplement.
        # In any case, it is a object providing attributes for line number
        # or the zope object.
        tb_entry = {}

        # Take the line number provided by walkstack function as general
        # fallback
        tb_entry['line'] = line_no
        if hasattr(supplement, 'line') and supplement.line != -1:

            # If the traceback supplement itself provides a valid line number
            # (!= -1), prefer this value instead.
            tb_entry['line'] = supplement.line

        supplement_line_no = getattr(supplement, 'line', -1)
        tb_entry['line'] = (
            supplement_line_no if supplement_line_no != -1 else line_no
        )

        # The object referencing is inconsistent per each supplement
        # at the moment, so we reference it with one key.
        tb_entry['zope_obj'] = (
            getattr(supplement, 'object', None)
            or getattr(supplement, 'manageable_object', None)
        )

        ret.append(tb_entry)

    ret.reverse()
    return ret


def say(message, **kw):
    '''Emit a formatted log message.'''
    d = {'message': message}
    d['source'] = call_stack(frame=2, limit=1)[0]
    d['level'] = 3
    d['id'] = str(uuid.uuid4())
    d['ts'] = int(time.time()*1000)
    d.update(kw)
    level = d.get('level', 3)
    journal_log(str(d), level)


# Profiling helpers
def prof_start(storage, label=None):
    '''Nestable profiling stopwatch'''

    # We store a dictionary of stopwatch starting time stamps
    # in the storage variable __profiler_millis_start

    # By default, this works like a stack, where you push
    # new starting time stamps onto the stack with each call to "start"

    # If you pass "label", the next starting point will also be stored
    # as that label.

    millis_start = int(time.time()*1000)

    req = storage
    reqvar = '__profiler_millis_start'
    timers = req.get(reqvar, {})

    if '__stack' not in timers:
        timers['__stack'] = []

    if label:
        timers[label] = millis_start

    if label is None or len(timers['__stack']) == 0:
        timers['__stack'].append(millis_start)

    req.set(reqvar, timers)

    return millis_start


def prof_stop(storage, name='Std', profiler_ms=None, label=None,
              user=None, **kw):
    '''Nestable stop-watch, stopping part'''
    # Stop the profiling watch and write a record if the threshold has been
    # passed.

    # With no arguments, this method pops the latest starting point and
    # compares with that.

    # The earliest starting point remains on the stack, allowing more
    # "stops" than "starts".

    # Given a "name", that name is recorded in the log book.

    # Given a "label", the stop watch starter for that label is used. If
    # the label is not found, we still write a message.

    # TODO: Instead of using the system logger, this should report to the
    # upcoming sensor system!

    req = storage
    reqvar = '__profiler_millis_start'

    # Basic logging data
    url = storage.get('URL', None)
    if user is None:
        user = 'dummy'

    timers = req.get(reqvar, {})
    stack = timers.get('__stack', [])

    if len(stack) == 0:
        # More pops than pushes? Not good...
        journal_log(str({
            'name': name,
            'msg': 'Profiler has received more stops than starts!',
            'user': user,
            'url': url
        }), level=3)
        return

    millis_start = None
    source = 'unknown'
    if label is not None:
        millis_start = timers.get(label, None)
        source = 'label'

    if millis_start is None:
        millis_start = stack.pop()
        source = 'pop %d' % len(stack)
        if len(stack) == 0:
            source = 'first'
        else:
            req.set(reqvar, timers)

    if profiler_ms is None:
        profiler_ms = 0
    millis_stop = int(time.time()*1000)
    duration = millis_stop - millis_start

    if duration < profiler_ms:
        # Not triggered? Bail out.
        return

    msg = {'msg': 'Profiler triggered.', 'limit': profiler_ms,
           'duration': duration, 'name': name, 'user': user,
           'timer_srcs': source, 'url': url}
    out = kw
    out.update(msg)
    journal_log(str(out))


# Journal reader

def journal_read(matches=None, seconds_past=30,
                 from_secs=None, upto_secs=None):
    if matches is None:
        matches = {
            '_SYSTEMD_UNIT': 'measure.service',
        }
    reader = systemd.journal.Reader()
    # reader.log_level(systemd.journal.LOG_INFO)
    reader.add_match(**matches)
    if seconds_past:
        import time
        now = time.time()
        reader.this_boot()  # needed to disambiguate seek_realtime()
        reader.seek_realtime(int(now-seconds_past)*1000000)
    out = []
    for entry in reader:
        out.append(dict(entry))
    return out


def journal_remote_read(host='localhost', since='', until='', match=''):
    '''Preliminary remote reader.
    Output format is still volatile.
    '''
    cmd = ['journalctl', '--output', 'json']
    if host != 'localhost':
        cmd = ['ssh', host, ] + cmd
    if since:
        cmd += ['--since', since]
    if until:
        cmd += ['--until', until]
    if match:
        cmd += [match, ]
    retcode, output = safe_syscall(cmd)
    return retcode, output


if __name__ == '__main__':
    if not use_syslog:
        for item in journal_read():
            print(item['MESSAGE'])


def test_sayget():
    from pprint import pprint
    # Testing code. Move this into sensible tests...
    start = time.strftime('%Y-%m-%d %H:%M:%S %z',
                          time.gmtime((time.time() - 300)))
    stop = time.strftime('%Y-%m-%d %H:%M:%S %z', time.gmtime((time.time())))

    ret = syslog_read('/var/log/apache2/access.log*', start, stop,
                      logtype='apache', limit=1000)
    for line in ret:
        pprint([line['seconds'], line])
    print("")

    ret = syslog_read('/var/log/perfactema*', start, stop, grep='')
    for line in ret:
        print(line['seconds'], line)

    print(call_stack())
