Source code for schrodinger.application.licensing.lmgrd_log_parser

#!/usr/bin/env python

import collections
import csv
# configure logging
import logging
import optparse
import os
import os.path
import re
import sys
from datetime import date
from datetime import datetime
from datetime import timedelta
from past.utils import old_div

from schrodinger.utils import csv_unicode

log = logging.getLogger("logparser")
log.addHandler(logging.NullHandler())

event = collections.namedtuple('event', "time feature tokens user host")
parser = None


[docs]class UsageError(Exception):
[docs] def __init__(self, msg): self.msg = msg
def __str__(self): return self.msg
# Logfile-parsing code
[docs]def parse_logfile(logfile): """ Parse an lmgrd logfile and return a list of checkin/checkout events. Checkin/checkout events are reported as named tuples of type 'event'. (See the parse_events() header doc for a description.) """ with open(logfile, 'r') as fd: events = parse_events(fd) return events
[docs]def parse_events(loglines): """ Read the lines of an lmgrd log file, and return a list of checkin/checkout events. Loglines is an iterable that provides the lines of the log file. Checkin/checkout events are reported as named tuples of type 'event', which have the elements (time, feature, tokens, user, host). The 'tokens' element is a signed integer; positive token counts represent checkouts and negative counts represent checkins. The time is a datetime. The 'feature', 'host', and 'user' elements are strings. Checkin/checkout events include only the time for the event; the date needs to be determined separately. The date is reported in the log file at startup and periodically on TIMESTAMP lines. TIMESTAMP lines are not written at midnight, for some reason, and so transitions from one day to the next need to be inferred from the hour of the checkin/checkout time. NOTE: If the lmgrd log doesn't include the startup output, events logged before the first TIMESTAMP line will be ignored. Sample log file checkout/in and timestamp lines are shown below.:: 23:06:31 (lmgrd) FLEXnet Licensing (v11.10.0.0 build 95001 x64_mac10) started on Simon-Gaos-MacBook-Pro.local (Mac) (10/30/2012) 13:20:41 (SCHROD) OUT: "MMLIBS" saunders@ancalagon 13:20:41 (SCHROD) IN: "MMLIBS" saunders@ancalagon 9:10:40 (lmgrd) TIMESTAMP 10/31/2012 9:10:40 (SCHROD) TIMESTAMP 10/31/2012 9:42:58 (SCHROD) OUT: "SUITE_22JAN2013" saunders@Simon-Gaos-MacBook-Pro.local (4 licenses) 9:42:58 (SCHROD) OUT: "GLIDE_SP_DOCKING" saunders@Simon-Gaos-MacBook-Pro.local (4 licenses) 9:42:58 (SCHROD) IN: "SUITE_22JAN2013" saunders@Simon-Gaos-MacBook-Pro.local (4 licenses) 9:42:58 (SCHROD) IN: "GLIDE_SP_DOCKING" saunders@Simon-Gaos-MacBook-Pro.local (4 licenses) 21:37:00 (lmgrd) TIMESTAMP 10/31/2012 11:46:36 (SCHROD) UPGRADE: "PRIMEX_MAIN" golubkov@lori.schrodinger.com (1->8 licenses) """ start_regex = re.compile( r""" ^\s*(\d+):(\d\d):(\d\d)\s+ # timestamp [HH:MM:SS] \((lmgrd)\)\s # (lmgrd) FLEXnet\sLicensing # FLEXnet Licensing .*started.* # started \((\d+)/(\d+)/(\d+)\)$ # date [MM/DD/YYYY] """, re.X) time_regex = re.compile( r""" ^\s*(\d+):(\d\d):(\d\d)\s+ # timestamp [HH:MM:SS] \((\w+)\)\s # source [(lmgrd) or (SCHROD)] TIMESTAMP\s # TIMESTAMP (\d+)/(\d+)/(\d+) # date [MM/DD/YYYY] """, re.X) lic_regex = re.compile( r""" ^\s*(\d+):(\d\d):(\d\d)\s+ # timestamp [HH:MM:SS] \((?:lmgrd|SCHROD)\)\s+ # source [(lmgrd) or (SCHROD)] (IN|OUT|UPGRADE):\s+ # action [IN, OUT, or UPGRADE] "([^"]+)"\s+ # license feature name ["MMLIBS"] (\S[^@]*)@(\S+) # user [user@host] (?:\s+\((\d+)(?:->(\d+))?\slicenses\))? # license count [(4 licenses), (1->8 licenses)] """, re.X) curdate = None lasthour = 0 first_time = None discarded = 0 events = [] for line in loglines: # Skip comments if line.startswith('#'): continue match = lic_regex.match(line) if match: (thour, tmin, tsec, action, feature, user, host, n1, n2) = match.groups() if action == "UPGRADE": numtokens = int(n2) - int(n1) elif n1: numtokens = int(n1) else: numtokens = 1 if action == "IN": numtokens = -numtokens curhour, curmin, cursec = int(thour), int(tmin), int(tsec) if curhour < lasthour: curdate = curdate + timedelta(1) try: timestamp = datetime(curdate.year, curdate.month, curdate.day, curhour, curmin, cursec) events.append(event(timestamp, feature, numtokens, user, host)) lasthour = curhour # don't record lasthour until we have a usable curdate if first_time is None: first_time = timestamp except AttributeError: if curdate is None: discarded += 1 else: raise else: match = time_regex.match(line) if not match: match = start_regex.match(line) if match: (thour, tmin, tsec, source, mon, day, year) = match.groups() curdate = date(int(year), int(mon), int(day)) lasthour = int(thour) continue if discarded > 0: log.debug( "\nWARNING: Discarded %d events logged before first TIMESTAMP @ %s" % (discarded, str(first_time))) return events
# Helper functions for usage workup
[docs]def period_start(event_time, period): """ Returns the start time for the reporting interval in which the given event time lies. The event time is a datetime; the period (the length of a reporting interval) is a timedelta. Note: If the period is less than a day, there must be an integral number of reporting intervals per day. If it's greater than a day, the period must be an integral number of days. These assumptions aren't verified here. If the period is zero, the event_time is returned. """ d = event_time.date() if period.days == 0: if period.seconds == 0: return event_time day_start = datetime(d.year, d.month, d.day) day_elapsed = (event_time - day_start).seconds return day_start + timedelta( 0, period.seconds * int(old_div(day_elapsed, period.seconds))) else: year_start = date(d.year, 1, 1) year_days = (d - year_start).days start_day = year_start + timedelta(int(old_div(year_days, period.days)), 0) return datetime(start_day.year, start_day.month, start_day.day)
[docs]def format_usage_row(row, features): """ Return a string containing a summary of the data in one row of the usage table. The first element is a timestamp (datetime); the remaining elements are the usage for each feature, in the interval ending at that time. This is just used for debug output. """ return ("%20s " % row[0]) + "".join( [" %.1f:%s" % (u, f) for (u, f) in zip(row[1:], features) if u > 0])
# Token-usage workup
[docs]def maximum_usage(events, features, period, start=None, stop=None, user="", host="", init=[]): # noqa: M511 """ Take the list of checkin/checkout events and features, and return a table with the maximum number of tokens checked out at a time for each feature over each time period. The result is a list of rows (lists). Each row represents a single period, with the end-time for that period followed by the maximum number of tokens checked out for each feature (in the same order as the input 'features' list.) """ result = [] tokens = {} maxtokens = {} for f in features: if f in init: tokens[f] = init[f] else: tokens[f] = 0 maxtokens[f] = 0 if period > timedelta(0, 0): periodend = start + period else: periodend = stop log.debug("%20s %16s %8s %12s %-8s" % ("Time", "Feature", "Tokens", "User", "Host")) for ev in events: while periodend < ev.time: # Generate output row for this period. row = [periodend] + [maxtokens[f] for f in features] result.append(row) log.debug(format_usage_row(row, features)) for f in features: maxtokens[f] = tokens[f] if periodend < stop: periodend += period else: return result if (user == "" or ev.user == user) and (host == "" or host == ev.host): log.debug("%20s %16s %8d %12s %-8s" % ev) tokens[ev.feature] += ev.tokens if ev.time >= start and maxtokens[ev.feature] < tokens[ev.feature]: maxtokens[ev.feature] = tokens[ev.feature] # Handle intervals following the final event while periodend <= stop: row = [periodend] + [maxtokens[f] for f in features] result.append(row) log.debug(format_usage_row(row, features)) for f in features: maxtokens[f] = tokens[f] if periodend == stop: break periodend += period return result
[docs]def time_weighted_usage(events, features, period, start=None, stop=None, user="", host="", init=[]): # noqa: M511 """ Take the list of checkin/checkout events and features, and return a table with the total usage of each feature in token-hours for each time period. Events can be filtered by user and host, and by the start and stop times for the analysis. The number of tokens checked out for each feature at the beginning of the log can be specified using the init parameter (a dict, mapping feature name to token count.) The result is a list of rows (lists). Each row represents a single period, with the end-time for that period followed by the usage for each feature (in the same order as the input 'features' list) in token-hours. """ result = [] tokens = {} usage = {} lasttime = {} for f in features: if f in init: tokens[f] = init[f] else: tokens[f] = 0 usage[f] = 0 lasttime[f] = start if period > timedelta(0, 0): periodend = start + period else: periodend = stop # log.debug("%20s %16s %8s %12s %-8s" % ( # "Time", "Feature", "Tokens", "User", "Host")) # Loop over checkin/checkout events for ev in events: while periodend < ev.time: # Generate output row for this period. for f in features: elapsed = (periodend - lasttime[f]).total_seconds() usage[f] += elapsed * tokens[f] row = [periodend] + [old_div(usage[f], 3600.0) for f in features] result.append(row) # log.debug(format_usage_row(row, features)) for f in features: usage[f] = 0 lasttime[f] = periodend if periodend < stop: periodend += period else: return result # Handle this event if (user == "" or ev.user == user) and (host == "" or host == ev.host): # log.debug("%20s %16s %8d %12s %-8s" % ev) # event data if ev.time >= start: elapsed = (ev.time - lasttime[ev.feature]).total_seconds() usage[ev.feature] += elapsed * tokens[ev.feature] lasttime[ev.feature] = ev.time tokens[ev.feature] += ev.tokens if tokens[ev.feature] < 0: log.info("!! %d %s tokens checked out!" % (tokens[ev.feature], ev.feature)) # Handle intervals following the final event while periodend <= stop: for f in features: elapsed = (periodend - lasttime[f]).total_seconds() usage[f] += elapsed * tokens[f] row = [periodend] + [old_div(usage[f], 3600.0) for f in features] result.append(row) # log.debug(format_usage_row(row, features)) if periodend == stop: break for f in features: usage[f] = 0 lasttime[f] = periodend periodend += period return result
[docs]def initial_tokens(events, features, user="", host=""): """ Take the list of checkin/checkout events and features, and returns dicts reporting the apparent number of checked-out tokens for each feature at the point where the log starts, and the final number of tokens still checked out at the point where the log ends. Ordinarily, this should be zero. If the checkout records are incomplete, there may be more tokens checked in than checked out. The dict returned by this function will be used to make sure the total number of tokens checked out for each feature will never be reported to be negative. """ tokens = {} mintokens = {} log.debug("\nDetermine the minimum token usage over the entire record...") log.debug("Host: %s" % host) log.debug("User: %s" % user) for f in features: tokens[f] = 0 mintokens[f] = 0 # log.debug("%20s %16s %8s %12s %-8s" % ( # "Time", "Feature", "Tokens", "User", "Host")) for ev in events: if (user == "" or ev.user == user) and (host == "" or host == ev.host): # log.debug("%20s %16s %8d %12s %-8s" % ev) tokens[ev.feature] += ev.tokens if tokens[ev.feature] < mintokens[ev.feature]: mintokens[ev.feature] = tokens[ev.feature] init = {} final = {} for k, v in mintokens.items(): init[k] = -v final[k] = init[k] + tokens[k] return (init, final)
[docs]def write_events_csv(fname, events): """ Write the given list of checkin/checkout events to a CSV file. """ with csv_unicode.writer_open(fname) as fd: writer = csv.writer(fd) writer.writerow(("Time", "Feature", "Tokens", "User", "Host")) for ev in events: writer.writerow(ev)
[docs]def write_usage_csv(fname, usage, features): """ Write the given timeline of token usage to a CSV file. """ with csv_unicode.writer_open(fname) as fd: writer = csv.writer(fd) writer.writerow(["Time"] + features) for row in usage: writer.writerow(row)
[docs]def write_overall_usage_csv(fname, usage, features): """ Write the given overall token usage to a CSV file, with each token's usage on a separate row. """ with csv_unicode.writer_open(fname) as fd: writer = csv.writer(fd) writer.writerow(["Feature", "Usage"]) writer.writerows(list(zip(features, usage[0][1:])))
[docs]def report_usage(usage, features): """ Write the token usage timeline to the console. (This is mainly for debugging.) """ log.info("") for row in usage: line = ("%20s " % row[0]) + "".join( [" %d:%s" % (u, f) for (u, f) in zip(row[1:], features) if u != 0]) if len(line) > 32: log.info(line)
[docs]def report_total_usage(total, features): """ Write the given total usage data for each feature, one per line. """ log.info("\nTotal token usage:") log.info("%-23s %12s" % ("feature", "token-hours")) for i in range(len(features)): log.info("%-28s %7.1f" % (features[i], total[i]))
[docs]def calculate_total_usage(usage, features): """ Calculate the total usage for each feature by summing down the list of per-period usage data. A tuple of the totals is returned, with the total usage for the nth feature in the nth position. """ total = [0.0 for f in features] for row in usage: for i in range(len(features)): total[i] += row[i + 1] return total
[docs]def parse_args(args=None): """ Parse the commandline arguments and return a Namespace object containing the option values. """ global parser prolog = "Usage: %prog [<options>] <logfile>" epilog = """ Parse an lmgrd log file and write a CSV file reporting the usage of Schrodinger license tokens. By default, the total time-weighted usage for each license feature is reported, for all licenses checked out over the entire log file. To calculate the license usage over a specific time period, use the "--start" and "--stop" options to specify the beginning and end of the time period. Use the "--user" or "--host" options to report only checkouts for a specific user or on a specific host. Use '--period <interval>' to get the time-dependent token usage for each time interval of the specified length; for example, use "--period 1h" to get hourly token usage. """ parser = optparse.OptionParser(usage=prolog, epilog=epilog) parser.add_option("-o", "--out", dest="outfile", default="out.csv", help="CSV output file (default is 'out.csv')") parser.add_option("--events", dest="events", action="store_true", help="Report checkin/checkout events, rather than usage.") parser.add_option( "--maxusage", dest="maxusage", action="store_true", help="Report maximum token usage for each feature, over each period.") parser.add_option( "--period", dest="period", default="0", help= "Report usage totals over each period (eg, '1h', '5m') or '0' to report overall usage. (default is 0)" ) parser.add_option( "--start", dest="start", default=None, help= "Ignore events earlier than this time (eg '2014-07-01' or '2014-06-08 16:00')" ) parser.add_option( "--stop", dest="stop", default=None, help= "Ignore events after this time (eg '2014-07-08' or '2014-06-09 04:00')") parser.add_option("--user", dest="user", default="", help="Display only data for a specific user.") parser.add_option("--host", dest="host", default="", help="Display only data for a specific host.") parser.add_option("--listfeatures", action="store_true", help="List names of all features checked in or out.") parser.add_option("--listusers", action="store_true", help="List names of all users found in the log.") parser.add_option("--listhosts", action="store_true", help="List names of all hosts found in the log.") parser.add_option("-d", "--debug", dest="debug", action="store_true", help="produce debug output") parser.add_option("-v", "--verbose", dest="verbose", action="store_true", help="display more details") (opt, args) = parser.parse_args(args) if len(args) == 0: raise UsageError("No logfile was specified") opt.infile = args[0] try: opt.period = parse_timedelta(opt.period, units="s") if opt.start: opt.start = parse_datetime(opt.start) if opt.stop: opt.stop = parse_datetime(opt.stop) except ValueError as exc: raise UsageError(str(exc)) if opt.debug: opt.verbose = True return opt
[docs]def parse_timedelta(text, units="s"): """ Parse the given string as a timedelta. Units may be specified explicitly using one of the symbols "s", "m", "h", "d" (lowercase only). If no units are specified in the text, the default units are assumed. """ orig_text = text text = text.strip() text = text.replace(" ", "") if re.match(r"^\d+$", text): text += units if not re.match(r"^(\d+[smhd])+$", text): raise ValueError("Cannot parse time interval '%s'" % orig_text) result = timedelta(0, 0) while len(text) > 0: m = re.match(r"^(\d+)([smhd])(.*)$", text) (val, unit, text) = m.groups() if unit == "m": result += timedelta(0, int(val) * 60) elif unit == "h": result += timedelta(0, int(val) * 3600) elif unit == "d": result += timedelta(int(val), 0) else: result += timedelta(0, int(val)) return result
[docs]def parse_datetime(text): """ Parse the given string as a datetime. """ formats = ( "%Y-%m-%d %H:%M", "%y%m%d %H:%M", "%Y-%m-%d", "%y%m%d", ) for fmt in formats: try: return datetime.strptime(text, fmt) except ValueError: pass raise ValueError("Cannot parse datetime '%s'" % text)
[docs]def main(args=None): """ Main function """ log.addHandler(logging.StreamHandler(sys.stdout)) log.setLevel(logging.INFO) # Parse the commandline args opt = parse_args(args) if opt.debug: log.setLevel(logging.DEBUG) # Check input file if not os.path.isfile(opt.infile): log.error("Error: Bad log file '%s'" % opt.infile) return 1 if opt.verbose: log.info("Input log file: %s" % opt.infile) # Parse log file events = parse_logfile(opt.infile) if opt.user: events = [ev for ev in events if ev.user == opt.user] if opt.host: events = [ev for ev in events if ev.host == opt.host] features = sorted(list(set([ev.feature for ev in events]))) users = sorted(list(set([ev.user for ev in events]))) hosts = sorted(list(set([ev.host for ev in events]))) first_time = events[0].time last_time = events[-1].time if opt.start is None: opt.start = period_start(first_time, opt.period) if opt.stop is None: if opt.period > timedelta(0, 0): opt.stop = period_start(last_time, opt.period) + opt.period else: opt.stop = last_time log.info("\nParsed %d checkin/checkout events" % len(events)) log.info(" between %s" % first_time) log.info(" and %s" % last_time) log.info(" %d different features were checked out" % len(features)) log.info(" by %d different users on %d hosts" % (len(users), len(hosts))) if opt.listusers: log.info("\nUsers:") for u in users: log.info(u) if opt.listhosts: log.info("\nHosts:") for h in hosts: log.info(h) if opt.listfeatures: log.info("\nFeatures:") for f in features: log.info(f) if opt.events: write_events_csv(opt.outfile, events) log.info("\nCheckin/checkout events written to %s" % opt.outfile) return 0 init, final = initial_tokens(events, features, user=opt.user, host=opt.host) line = "\ninitial usage: %s" % "".join( [" %d:%s" % (v, k) for (k, v) in init.items() if v > 0]) log.info(line) line = "final usage: %s" % "".join( [" %d:%s" % (v, k) for (k, v) in final.items() if v > 0]) log.info(line) if opt.maxusage: if opt.period > timedelta(0, 0): log.info("\nCalculate the maximum token usage over each period.") else: log.info("\nCalculate the maximum token usage for each feature.") else: if opt.period > timedelta(0, 0): log.info( "\nCalculate time-weighted token usage (token-hours) over each period." ) else: log.info( "\nCalculate total time-weighted token usage (token-hours) for each feature." ) log.info(" Start time: %s" % str(opt.start)) log.info(" Stop time: %s" % str(opt.stop)) if opt.period > timedelta(0, 0): log.info(" Period: %s" % str(opt.period)) if opt.host: log.info(" Host: %s" % opt.host) if opt.user: log.info(" User: %s" % opt.user) if opt.maxusage: usage = maximum_usage(events, features, period=opt.period, start=opt.start, stop=opt.stop, user=opt.user, host=opt.host, init=init) else: usage = time_weighted_usage(events, features, period=opt.period, start=opt.start, stop=opt.stop, user=opt.user, host=opt.host, init=init) if opt.debug: report_usage(usage, features) if not opt.maxusage: if opt.period > timedelta(0, 0): total = calculate_total_usage(usage, features) report_total_usage(total, features) else: report_total_usage(usage[0][1:], features) if opt.period == timedelta(0, 0): write_overall_usage_csv(opt.outfile, usage, features) else: write_usage_csv(opt.outfile, usage, features) log.info("\nUsage data written to %s" % opt.outfile) return 0
if __name__ == "__main__": try: exitcode = main(sys.argv[1:]) except UsageError as exc: exitcode = 1 print("ERROR:", exc) parser.print_help() sys.exit(exitcode)