Source code for schrodinger.application.licensing.lmgrd_log_parser

#!/usr/bin/env python

import collections
import csv
# configure logging
import logging
import optparse
import os
import os.path
import re
import sys
from datetime import date
from datetime import datetime
from datetime import timedelta
from past.utils import old_div

from schrodinger.utils import csv_unicode

log = logging.getLogger("logparser")
log.addHandler(logging.NullHandler())

event = collections.namedtuple('event', "time feature tokens user host")
parser = None


[docs]class UsageError(Exception):

[docs]    def __init__(self, msg):
        self.msg = msg

    def __str__(self):
        return self.msg


# Logfile-parsing code
[docs]def parse_logfile(logfile):
    """
    Parse an lmgrd logfile and return a list of checkin/checkout events.

    Checkin/checkout events are reported as named tuples of type 'event'.
    (See the parse_events() header doc for a description.)
    """
    with open(logfile, 'r') as fd:
        events = parse_events(fd)
    return events


[docs]def parse_events(loglines):
    """
    Read the lines of an lmgrd log file, and return a list of checkin/checkout
    events.  Loglines is an iterable that provides the lines of the log file.

    Checkin/checkout events are reported as named tuples of type 'event',
    which have the elements (time, feature, tokens, user, host).  The
    'tokens' element is a signed integer; positive token counts represent
    checkouts and negative counts represent checkins.  The time is a
    datetime.  The 'feature', 'host', and 'user' elements are strings.

    Checkin/checkout events include only the time for the event; the date
    needs to be determined separately.  The date is reported in the log file
    at startup and periodically on TIMESTAMP lines.  TIMESTAMP lines are not
    written at midnight, for some reason, and so transitions from one day to
    the next need to be inferred from the hour of the checkin/checkout time.

    NOTE: If the lmgrd log doesn't include the startup output, events logged
    before the first TIMESTAMP line will be ignored.

    Sample log file checkout/in and timestamp lines are shown below.::

        23:06:31 (lmgrd) FLEXnet Licensing (v11.10.0.0 build 95001 x64_mac10) started on Simon-Gaos-MacBook-Pro.local (Mac) (10/30/2012)
        13:20:41 (SCHROD) OUT: "MMLIBS" saunders@ancalagon
        13:20:41 (SCHROD) IN: "MMLIBS" saunders@ancalagon
         9:10:40 (lmgrd) TIMESTAMP 10/31/2012
         9:10:40 (SCHROD) TIMESTAMP 10/31/2012
         9:42:58 (SCHROD) OUT: "SUITE_22JAN2013" saunders@Simon-Gaos-MacBook-Pro.local  (4 licenses)
         9:42:58 (SCHROD) OUT: "GLIDE_SP_DOCKING" saunders@Simon-Gaos-MacBook-Pro.local  (4 licenses)
         9:42:58 (SCHROD) IN: "SUITE_22JAN2013" saunders@Simon-Gaos-MacBook-Pro.local  (4 licenses)
         9:42:58 (SCHROD) IN: "GLIDE_SP_DOCKING" saunders@Simon-Gaos-MacBook-Pro.local  (4 licenses)
        21:37:00 (lmgrd) TIMESTAMP 10/31/2012
        11:46:36 (SCHROD) UPGRADE: "PRIMEX_MAIN" golubkov@lori.schrodinger.com  (1->8 licenses)
    """
    start_regex = re.compile(
        r"""
        ^\s*(\d+):(\d\d):(\d\d)\s+  # timestamp [HH:MM:SS]
        \((lmgrd)\)\s               # (lmgrd)
        FLEXnet\sLicensing          # FLEXnet Licensing
        .*started.*                 # started
        \((\d+)/(\d+)/(\d+)\)$      # date [MM/DD/YYYY]
        """, re.X)
    time_regex = re.compile(
        r"""
        ^\s*(\d+):(\d\d):(\d\d)\s+  # timestamp [HH:MM:SS]
        \((\w+)\)\s                 # source [(lmgrd) or (SCHROD)]
        TIMESTAMP\s                 # TIMESTAMP
        (\d+)/(\d+)/(\d+)           # date [MM/DD/YYYY]
        """, re.X)
    lic_regex = re.compile(
        r"""
        ^\s*(\d+):(\d\d):(\d\d)\s+  # timestamp [HH:MM:SS]
        \((?:lmgrd|SCHROD)\)\s+     # source [(lmgrd) or (SCHROD)]
        (IN|OUT|UPGRADE):\s+        # action [IN, OUT, or UPGRADE]
        "([^"]+)"\s+                # license feature name ["MMLIBS"]
        (\S[^@]*)@(\S+)             # user [user@host]
        (?:\s+\((\d+)(?:->(\d+))?\slicenses\))? # license count [(4 licenses), (1->8 licenses)]
        """, re.X)

    curdate = None
    lasthour = 0
    first_time = None
    discarded = 0
    events = []
    for line in loglines:
        # Skip comments
        if line.startswith('#'):
            continue
        match = lic_regex.match(line)
        if match:
            (thour, tmin, tsec, action, feature, user, host, n1,
             n2) = match.groups()
            if action == "UPGRADE":
                numtokens = int(n2) - int(n1)
            elif n1:
                numtokens = int(n1)
            else:
                numtokens = 1
            if action == "IN":
                numtokens = -numtokens
            curhour, curmin, cursec = int(thour), int(tmin), int(tsec)
            if curhour < lasthour:
                curdate = curdate + timedelta(1)
            try:
                timestamp = datetime(curdate.year, curdate.month, curdate.day,
                                     curhour, curmin, cursec)
                events.append(event(timestamp, feature, numtokens, user, host))
                lasthour = curhour  # don't record lasthour until we have a usable curdate
                if first_time is None:
                    first_time = timestamp
            except AttributeError:
                if curdate is None:
                    discarded += 1
                else:
                    raise
        else:
            match = time_regex.match(line)
            if not match:
                match = start_regex.match(line)
            if match:
                (thour, tmin, tsec, source, mon, day, year) = match.groups()
                curdate = date(int(year), int(mon), int(day))
                lasthour = int(thour)
                continue
    if discarded > 0:
        log.debug(
            "\nWARNING: Discarded %d events logged before first TIMESTAMP @ %s"
            % (discarded, str(first_time)))
    return events


# Helper functions for usage workup
[docs]def period_start(event_time, period):
    """
    Returns the start time for the reporting interval in which the given
    event time lies.  The event time is a datetime; the period (the length
    of a reporting interval) is a timedelta.

    Note: If the period is less than a day, there must be an integral number
    of reporting intervals per day.  If it's greater than a day, the period
    must be an integral number of days.  These assumptions aren't verified here.

    If the period is zero, the event_time is returned.
    """
    d = event_time.date()
    if period.days == 0:
        if period.seconds == 0:
            return event_time
        day_start = datetime(d.year, d.month, d.day)
        day_elapsed = (event_time - day_start).seconds
        return day_start + timedelta(
            0, period.seconds * int(old_div(day_elapsed, period.seconds)))
    else:
        year_start = date(d.year, 1, 1)
        year_days = (d - year_start).days
        start_day = year_start + timedelta(int(old_div(year_days, period.days)),
                                           0)
        return datetime(start_day.year, start_day.month, start_day.day)


[docs]def format_usage_row(row, features):
    """
    Return a string containing a summary of the data in one row of the usage table.
    The first element is a timestamp (datetime); the remaining elements are the
    usage for each feature, in the interval ending at that time.  This is just
    used for debug output.
    """
    return ("%20s " % row[0]) + "".join(
        [" %.1f:%s" % (u, f) for (u, f) in zip(row[1:], features) if u > 0])


# Token-usage workup
[docs]def maximum_usage(events,
                  features,
                  period,
                  start=None,
                  stop=None,
                  user="",
                  host="",
                  init=[]):  # noqa: M511
    """
    Take the list of checkin/checkout events and features, and return a
    table with the maximum number of tokens checked out at a time for each
    feature over each time period.

    The result is a list of rows (lists).  Each row represents a single period,
    with the end-time for that period followed by the maximum number of
    tokens checked out for each feature (in the same order as the input
    'features' list.)
    """
    result = []
    tokens = {}
    maxtokens = {}

    for f in features:
        if f in init:
            tokens[f] = init[f]
        else:
            tokens[f] = 0
        maxtokens[f] = 0
    if period > timedelta(0, 0):
        periodend = start + period
    else:
        periodend = stop

    log.debug("%20s %16s %8s %12s %-8s" %
              ("Time", "Feature", "Tokens", "User", "Host"))
    for ev in events:
        while periodend < ev.time:
            # Generate output row for this period.
            row = [periodend] + [maxtokens[f] for f in features]
            result.append(row)
            log.debug(format_usage_row(row, features))
            for f in features:
                maxtokens[f] = tokens[f]
            if periodend < stop:
                periodend += period
            else:
                return result
        if (user == "" or ev.user == user) and (host == "" or host == ev.host):
            log.debug("%20s %16s %8d %12s %-8s" % ev)
            tokens[ev.feature] += ev.tokens
            if ev.time >= start and maxtokens[ev.feature] < tokens[ev.feature]:
                maxtokens[ev.feature] = tokens[ev.feature]
    # Handle intervals following the final event
    while periodend <= stop:
        row = [periodend] + [maxtokens[f] for f in features]
        result.append(row)
        log.debug(format_usage_row(row, features))
        for f in features:
            maxtokens[f] = tokens[f]
        if periodend == stop:
            break
        periodend += period
    return result


[docs]def time_weighted_usage(events,
                        features,
                        period,
                        start=None,
                        stop=None,
                        user="",
                        host="",
                        init=[]):  # noqa: M511
    """
    Take the list of checkin/checkout events and features, and return a
    table with the total usage of each feature in token-hours for each
    time period.  Events can be filtered by user and host, and by the
    start and stop times for the analysis.  The number of tokens checked
    out for each feature at the beginning of the log can be specified using
    the init parameter (a dict, mapping feature name to token count.)

    The result is a list of rows (lists).  Each row represents a single period,
    with the end-time for that period followed by the usage for each feature
    (in the same order as the input 'features' list) in token-hours.
    """
    result = []
    tokens = {}
    usage = {}
    lasttime = {}

    for f in features:
        if f in init:
            tokens[f] = init[f]
        else:
            tokens[f] = 0
        usage[f] = 0
        lasttime[f] = start
    if period > timedelta(0, 0):
        periodend = start + period
    else:
        periodend = stop

    # log.debug("%20s %16s %8s %12s %-8s" % (
    #     "Time", "Feature", "Tokens", "User", "Host"))

    # Loop over checkin/checkout events
    for ev in events:
        while periodend < ev.time:
            # Generate output row for this period.
            for f in features:
                elapsed = (periodend - lasttime[f]).total_seconds()
                usage[f] += elapsed * tokens[f]
            row = [periodend] + [old_div(usage[f], 3600.0) for f in features]
            result.append(row)
            # log.debug(format_usage_row(row, features))
            for f in features:
                usage[f] = 0
                lasttime[f] = periodend
            if periodend < stop:
                periodend += period
            else:
                return result

        # Handle this event
        if (user == "" or ev.user == user) and (host == "" or host == ev.host):
            # log.debug("%20s %16s %8d %12s %-8s" % ev) # event data
            if ev.time >= start:
                elapsed = (ev.time - lasttime[ev.feature]).total_seconds()
                usage[ev.feature] += elapsed * tokens[ev.feature]
                lasttime[ev.feature] = ev.time
            tokens[ev.feature] += ev.tokens
            if tokens[ev.feature] < 0:
                log.info("!! %d %s tokens checked out!" %
                         (tokens[ev.feature], ev.feature))

    # Handle intervals following the final event
    while periodend <= stop:
        for f in features:
            elapsed = (periodend - lasttime[f]).total_seconds()
            usage[f] += elapsed * tokens[f]
        row = [periodend] + [old_div(usage[f], 3600.0) for f in features]
        result.append(row)
        # log.debug(format_usage_row(row, features))
        if periodend == stop:
            break
        for f in features:
            usage[f] = 0
            lasttime[f] = periodend
        periodend += period
    return result


[docs]def initial_tokens(events, features, user="", host=""):
    """
    Take the list of checkin/checkout events and features, and returns
    dicts reporting the apparent number of checked-out tokens for each feature
    at the point where the log starts, and the final number of tokens still
    checked out at the point where the log ends.

    Ordinarily, this should be zero.  If the checkout records are incomplete,
    there may be more tokens checked in than checked out.  The dict returned by
    this function will be used to make sure the total number of tokens checked
    out for each feature will never be reported to be negative.
    """
    tokens = {}
    mintokens = {}

    log.debug("\nDetermine the minimum token usage over the entire record...")
    log.debug("Host: %s" % host)
    log.debug("User: %s" % user)

    for f in features:
        tokens[f] = 0
        mintokens[f] = 0

    # log.debug("%20s %16s %8s %12s %-8s" % (
    #     "Time", "Feature", "Tokens", "User", "Host"))
    for ev in events:
        if (user == "" or ev.user == user) and (host == "" or host == ev.host):
            # log.debug("%20s %16s %8d %12s %-8s" % ev)
            tokens[ev.feature] += ev.tokens
            if tokens[ev.feature] < mintokens[ev.feature]:
                mintokens[ev.feature] = tokens[ev.feature]
    init = {}
    final = {}
    for k, v in mintokens.items():
        init[k] = -v
        final[k] = init[k] + tokens[k]
    return (init, final)


[docs]def write_events_csv(fname, events):
    """
    Write the given list of checkin/checkout events to a CSV file.
    """
    with csv_unicode.writer_open(fname) as fd:
        writer = csv.writer(fd)
        writer.writerow(("Time", "Feature", "Tokens", "User", "Host"))
        for ev in events:
            writer.writerow(ev)


[docs]def write_usage_csv(fname, usage, features):
    """
    Write the given timeline of token usage to a CSV file.
    """
    with csv_unicode.writer_open(fname) as fd:
        writer = csv.writer(fd)
        writer.writerow(["Time"] + features)
        for row in usage:
            writer.writerow(row)


[docs]def write_overall_usage_csv(fname, usage, features):
    """
    Write the given overall token usage to a CSV file,
    with each token's usage on a separate row.
    """
    with csv_unicode.writer_open(fname) as fd:
        writer = csv.writer(fd)
        writer.writerow(["Feature", "Usage"])
        writer.writerows(list(zip(features, usage[0][1:])))


[docs]def report_usage(usage, features):
    """
    Write the token usage timeline to the console.
    (This is mainly for debugging.)
    """
    log.info("")
    for row in usage:
        line = ("%20s " % row[0]) + "".join(
            [" %d:%s" % (u, f) for (u, f) in zip(row[1:], features) if u != 0])
        if len(line) > 32:
            log.info(line)


[docs]def report_total_usage(total, features):
    """
    Write the given total usage data for each feature, one per line.
    """
    log.info("\nTotal token usage:")
    log.info("%-23s %12s" % ("feature", "token-hours"))
    for i in range(len(features)):
        log.info("%-28s %7.1f" % (features[i], total[i]))


[docs]def calculate_total_usage(usage, features):
    """
    Calculate the total usage for each feature by summing down the list
    of per-period usage data.

    A tuple of the totals is returned, with the total usage for the nth
    feature in the nth position.
    """
    total = [0.0 for f in features]
    for row in usage:
        for i in range(len(features)):
            total[i] += row[i + 1]
    return total


[docs]def parse_args(args=None):
    """
    Parse the commandline arguments and return a Namespace object containing the
    option values.
    """
    global parser

    prolog = "Usage: %prog [<options>] <logfile>"
    epilog = """ Parse an lmgrd log file and write a CSV file reporting the
usage of Schrodinger license tokens.  By default, the total time-weighted
usage for each license feature is reported, for all licenses checked out
over the entire log file.  To calculate the license usage over a specific
time period, use the "--start" and "--stop" options to specify the beginning
and end of the time period.  Use the "--user" or "--host" options to report
only checkouts for a specific user or on a specific host.  Use '--period
<interval>' to get the time-dependent token usage for each time interval of
the specified length; for example, use "--period 1h" to get hourly token
usage.
"""

    parser = optparse.OptionParser(usage=prolog, epilog=epilog)
    parser.add_option("-o",
                      "--out",
                      dest="outfile",
                      default="out.csv",
                      help="CSV output file (default is 'out.csv')")
    parser.add_option("--events",
                      dest="events",
                      action="store_true",
                      help="Report checkin/checkout events, rather than usage.")
    parser.add_option(
        "--maxusage",
        dest="maxusage",
        action="store_true",
        help="Report maximum token usage for each feature, over each period.")

    parser.add_option(
        "--period",
        dest="period",
        default="0",
        help=
        "Report usage totals over each period (eg, '1h', '5m') or '0' to report overall usage.  (default is 0)"
    )
    parser.add_option(
        "--start",
        dest="start",
        default=None,
        help=
        "Ignore events earlier than this time (eg '2014-07-01' or '2014-06-08 16:00')"
    )
    parser.add_option(
        "--stop",
        dest="stop",
        default=None,
        help=
        "Ignore events after this time (eg '2014-07-08' or '2014-06-09 04:00')")
    parser.add_option("--user",
                      dest="user",
                      default="",
                      help="Display only data for a specific user.")
    parser.add_option("--host",
                      dest="host",
                      default="",
                      help="Display only data for a specific host.")

    parser.add_option("--listfeatures",
                      action="store_true",
                      help="List names of all features checked in or out.")
    parser.add_option("--listusers",
                      action="store_true",
                      help="List names of all users found in the log.")
    parser.add_option("--listhosts",
                      action="store_true",
                      help="List names of all hosts found in the log.")

    parser.add_option("-d",
                      "--debug",
                      dest="debug",
                      action="store_true",
                      help="produce debug output")
    parser.add_option("-v",
                      "--verbose",
                      dest="verbose",
                      action="store_true",
                      help="display more details")

    (opt, args) = parser.parse_args(args)

    if len(args) == 0:
        raise UsageError("No logfile was specified")
    opt.infile = args[0]

    try:
        opt.period = parse_timedelta(opt.period, units="s")
        if opt.start:
            opt.start = parse_datetime(opt.start)
        if opt.stop:
            opt.stop = parse_datetime(opt.stop)
    except ValueError as exc:
        raise UsageError(str(exc))

    if opt.debug:
        opt.verbose = True

    return opt


[docs]def parse_timedelta(text, units="s"):
    """
    Parse the given string as a timedelta.  Units may be specified
    explicitly using one of the symbols "s", "m", "h", "d" (lowercase only).
    If no units are specified in the text, the default units are assumed.
    """
    orig_text = text
    text = text.strip()
    text = text.replace(" ", "")
    if re.match(r"^\d+$", text):
        text += units
    if not re.match(r"^(\d+[smhd])+$", text):
        raise ValueError("Cannot parse time interval '%s'" % orig_text)
    result = timedelta(0, 0)
    while len(text) > 0:
        m = re.match(r"^(\d+)([smhd])(.*)$", text)
        (val, unit, text) = m.groups()
        if unit == "m":
            result += timedelta(0, int(val) * 60)
        elif unit == "h":
            result += timedelta(0, int(val) * 3600)
        elif unit == "d":
            result += timedelta(int(val), 0)
        else:
            result += timedelta(0, int(val))
    return result


[docs]def parse_datetime(text):
    """
    Parse the given string as a datetime.
    """
    formats = (
        "%Y-%m-%d %H:%M",
        "%y%m%d %H:%M",
        "%Y-%m-%d",
        "%y%m%d",
    )
    for fmt in formats:
        try:
            return datetime.strptime(text, fmt)
        except ValueError:
            pass
    raise ValueError("Cannot parse datetime '%s'" % text)


[docs]def main(args=None):
    """
    Main function
    """
    log.addHandler(logging.StreamHandler(sys.stdout))
    log.setLevel(logging.INFO)

    # Parse the commandline args
    opt = parse_args(args)

    if opt.debug:
        log.setLevel(logging.DEBUG)

    # Check input file
    if not os.path.isfile(opt.infile):
        log.error("Error: Bad log file '%s'" % opt.infile)
        return 1
    if opt.verbose:
        log.info("Input log file: %s" % opt.infile)

    # Parse log file
    events = parse_logfile(opt.infile)

    if opt.user:
        events = [ev for ev in events if ev.user == opt.user]
    if opt.host:
        events = [ev for ev in events if ev.host == opt.host]

    features = sorted(list(set([ev.feature for ev in events])))
    users = sorted(list(set([ev.user for ev in events])))
    hosts = sorted(list(set([ev.host for ev in events])))
    first_time = events[0].time
    last_time = events[-1].time

    if opt.start is None:
        opt.start = period_start(first_time, opt.period)
    if opt.stop is None:
        if opt.period > timedelta(0, 0):
            opt.stop = period_start(last_time, opt.period) + opt.period
        else:
            opt.stop = last_time

    log.info("\nParsed %d checkin/checkout events" % len(events))
    log.info("  between %s" % first_time)
    log.info("      and %s" % last_time)
    log.info("  %d different features were checked out" % len(features))
    log.info("  by %d different users on %d hosts" % (len(users), len(hosts)))

    if opt.listusers:
        log.info("\nUsers:")
        for u in users:
            log.info(u)

    if opt.listhosts:
        log.info("\nHosts:")
        for h in hosts:
            log.info(h)

    if opt.listfeatures:
        log.info("\nFeatures:")
        for f in features:
            log.info(f)

    if opt.events:
        write_events_csv(opt.outfile, events)
        log.info("\nCheckin/checkout events written to %s" % opt.outfile)
        return 0

    init, final = initial_tokens(events, features, user=opt.user, host=opt.host)
    line = "\ninitial usage: %s" % "".join(
        [" %d:%s" % (v, k) for (k, v) in init.items() if v > 0])
    log.info(line)
    line = "final usage: %s" % "".join(
        [" %d:%s" % (v, k) for (k, v) in final.items() if v > 0])
    log.info(line)

    if opt.maxusage:
        if opt.period > timedelta(0, 0):
            log.info("\nCalculate the maximum token usage over each period.")
        else:
            log.info("\nCalculate the maximum token usage for each feature.")
    else:
        if opt.period > timedelta(0, 0):
            log.info(
                "\nCalculate time-weighted token usage (token-hours) over each period."
            )
        else:
            log.info(
                "\nCalculate total time-weighted token usage (token-hours) for each feature."
            )
    log.info("  Start time: %s" % str(opt.start))
    log.info("  Stop time:  %s" % str(opt.stop))
    if opt.period > timedelta(0, 0):
        log.info("  Period: %s" % str(opt.period))
    if opt.host:
        log.info("  Host: %s" % opt.host)
    if opt.user:
        log.info("  User: %s" % opt.user)

    if opt.maxusage:
        usage = maximum_usage(events,
                              features,
                              period=opt.period,
                              start=opt.start,
                              stop=opt.stop,
                              user=opt.user,
                              host=opt.host,
                              init=init)
    else:
        usage = time_weighted_usage(events,
                                    features,
                                    period=opt.period,
                                    start=opt.start,
                                    stop=opt.stop,
                                    user=opt.user,
                                    host=opt.host,
                                    init=init)

    if opt.debug:
        report_usage(usage, features)

    if not opt.maxusage:
        if opt.period > timedelta(0, 0):
            total = calculate_total_usage(usage, features)
            report_total_usage(total, features)
        else:
            report_total_usage(usage[0][1:], features)

    if opt.period == timedelta(0, 0):
        write_overall_usage_csv(opt.outfile, usage, features)
    else:
        write_usage_csv(opt.outfile, usage, features)
    log.info("\nUsage data written to %s" % opt.outfile)

    return 0


if __name__ == "__main__":
    try:
        exitcode = main(sys.argv[1:])
    except UsageError as exc:
        exitcode = 1
        print("ERROR:", exc)
        parser.print_help()
    sys.exit(exitcode)