Source code for

Provides the `jaguarDiff` workup method for use with jaguar and qsite jobs.
It wraps the qsite/jaguar output class 'diff' function so that it produces
results which are meaningful to STU.  Can also be used from the command line.

$Revision 0.1 $

@copyright: (c) Schrodinger, LLC. All rights reserved

from math import floor
from math import log10
from past.utils import old_div

# Packages --------------------------------------------------------------------
import numpy

from import IncompleteOutput
from schrodinger.application.qsite.output import QSiteOutput as QSOut

from . import failures

#contributors Dan Wandschneider
_version = "$Revision 0.1 $"

#Globals ----------------------------------------------------------------------
minPrecision = 2e-6
minPrecisionPercent = 5.0  #indeed means 5%,

#see line 71 for time difference tolerance.
#see line 321 for memory difference tolerance.
#strFormat =  "%-25s %20s %20s\n"

[docs]def jaguarDiff(testFile, refFile, *options): """ Compare every property available in the Jaguar/QSite out file against a reference. Usage:: # file.out - Path of the file to be compared # reference.out - Path of the reference file outcome_workup = jaguarDiff('file.out', 'reference.out', 'tol=1.0', 'log=workup.log', 'config=config.txt', 'value=1e-4') Optional parameters: * tol=value - A multipler to by which loosen ALL tolerances * log=file - Path to an optional output log. If it is not provided, the output is written to the console. * config=file - A configuration file format as follows: Any text after a '#' character is a comment, and is ignored all other text is in key/value pairs, one per line. possible keys are: log, tol, and any value that appears in the output file i.e. value = 1e-4 overrides the tolerance for 'value' to 1e-4 * timing=boolean - if true, jobs can 'fail' based on timing alone. Default is false. All other arguments are taken to be tolerance overrides for specific values """ tolOverrides = {} for arg in options: aName, aValue = arg.split("=") optionTest(aName, aValue, tolOverrides) if "config" in tolOverrides: parseConfigFile(tolOverrides) tolOverrides.pop("config") test = QSOut(testFile) ref = QSOut(refFile) # Did the reference pass? ref_passed = ref.status == QSOut.OK # Fast fail if only the test failed. if test.status != QSOut.OK and ref_passed: msg = '%s failed with a FATAL ERROR' % testFile if hasattr(test, 'fatal_errorno'): msg += ' (%s)' % test.fatal_errorno msg += ': ' + test.fatal_error raise failures.WorkupFailure(msg) #use the jaguar/qsite diff function. diffs = test.diff(ref, factor=tolOverrides.get("tol", 1.0)) outString = [] #Additional tests: #memory usage by subjob diffs.extend(getMemDiffs(testFile, refFile, tolOverrides)) #atom RMSD: try: rmsd = getRMSD(test.getStructures()[-1], ref.getStructures()[-1]) except IncompleteOutput: if ref_passed: raise else: if rmsd > tolOverrides.get('rmsd', minPrecision * 10): msg = "{:<25} {:>20}\n".format(' rmsd', n2s(rmsd)) outString.append(msg) # for d in diffs: prop, o_value, ref_value = d #modifies outString to hold new results! printline(prop, o_value, ref_value, outString, tolOverrides) #job duration. testTime, refTime = None, None try: refTime = ref.getDuration().total_seconds() except AttributeError: if ref_passed: raise else: testTime = test.getDuration().total_seconds() # time = 0.0 is possible try: percentDiff = old_div(abs(testTime - refTime), refTime) except ZeroDivisionError: percentDiff = old_div( tolOverrides.get('duration%', minPrecisionPercent * 7), 100) + 0.1 if (percentDiff > old_div( tolOverrides.get('duration%', minPrecisionPercent * 7), 100) and abs(testTime - refTime) > tolOverrides.get('duration', 60.0)): testTimeStr = str(int(testTime)) + "s" refTimeStr = str(int(refTime)) + "s" if diffs: diffs.append(("duration", testTimeStr, refTimeStr)) if outString: testJ, refJ = getJagVersion(testFile), getJagVersion(refFile) message = ("Values shown for " + testFile + "\nDiff = %s - %s\n" % (testFile, refFile)) if testJ and refJ: time_expr = '' if testTime is not None: time_expr = "Job duration(s): %.2f" % testTime message += ("{} Jaguar version: {}{}\n".format( testFile, testJ, time_expr)) if testTime is not None: time_expr = "Job duration(s): %.2f" % refTime message += ("{} Jaguar version: {}{}\n".format( refFile, refJ, time_expr)) message += "\n" outString.insert(0, message) outString.append("\n") logFile = None if "log" in tolOverrides: with open(tolOverrides["log"], "w") as logFile: for line in outString: logFile.write(line) msg = " {} differences found. Printing summary to {}".format( len(outString) - 2, tolOverrides["log"]) raise AssertionError(msg) else: raise AssertionError(''.join(outString)) return True
[docs]def get_tol_override(tolOverrides, prop): """ search for override -- allows for wildcards as * """ import fnmatch override = minPrecision # if there is an exact match, grab it # this is required as fnmatch wont match [] as they have special meaning if prop in tolOverrides: override = tolOverrides[prop] else: for k, v in tolOverrides.items(): match = fnmatch.fnmatch(prop, k) if match: override = v break return override
[docs]def printline(prop, o_value, ref_value, logArray, tolOverrides): """ Compares the difference between two properties to a tolerance. If the difference is greater than the tolerance, appends a string explaning this to an output array. This function recurses to examine differences within arrays and dictionaries. :param prop: The name of the property :param o_value: The 'output value' to be compared :param ref_value: The reference value to be compared :param logArray: An array of strings, each of which describes a difference. Results are appended. :param tolOverrides: tolerance overrides for specific property names. :type tolOverrides: dict """ def append_line(s1, s2, s3, logArray): if len(str(s2)) > 20 or len(str(s3)) > 20: message = f"\n{s1}: \n {s2}\n {s3}\n\n" else: message = f"{s1:<25} {s2:>20} {s3:>20}\n" logArray.append(message) return s1 = " " + prop s2 = "" s3 = "" override = get_tol_override(tolOverrides, prop.strip()) try: if override.lower() == 'ignore': return except AttributeError: pass if o_value is None and ref_value is None: return if o_value is None: s2 = "is missing from new file" append_line(s1, s2, s3, logArray) return if ref_value is None: s2 = "is missing from reference" append_line(s1, s2, s3, logArray) return #Strings must match exactly if isinstance(o_value, str): if o_value != ref_value: s2 = o_value s3 = "%s" % ref_value append_line(s1, s2, s3, logArray) return else: return #Dictionaries are compared recursively #by looking at the differences between matching elements if hasattr(o_value, "keys"): if not hasattr(ref_value, "keys"): s2 = str(len(o_value)) + " values" s3 = ref_value append_line(s1, s2, s3, logArray) return else: logArray.append(s1 + "\n") lenNow = len(logArray) s1 += "." for a in set(list(o_value) + list(ref_value)): s2 = o_value[a] s3 = ref_value[a] printline(s1 + a, s2, s3, logArray, tolOverrides) if len(logArray) == lenNow: logArray.pop() return #Arrays are compared recursively #by looking at the differences between matching elements if hasattr(o_value, "__getitem__"): # numpy floats have __getitem__ but its not actually callable -- lame try: o_len = len(o_value) ok = True except TypeError: ok = False if ok: if not hasattr(ref_value, "__getitem__"): s2 = str(len(o_value)) + " values" s3 = ref_value append_line(s1, s2, s3, logArray) return elif len(o_value) != len(ref_value): s2 = str(len(o_value)) + " values" s3 = str(len(ref_value)) + " values" append_line(s1, s2, s3, logArray) return else: logArray.append(s1 + "\n") lenNow = len(logArray) for i in range(len(o_value)): s1a = "%s[%d]" % (s1, i) printline(s1a, o_value[i], ref_value[i], logArray, tolOverrides) if len(logArray) == lenNow: logArray.pop() return #Other Iterables are compared recursively #by looking at the differences between matching elements if hasattr(o_value, "__iter__"): if not hasattr(ref_value, "__iter__"): s2 = str(len(o_value)) + " values" s3 = ref_value append_line(s1, s2, s3, logArray) return elif len(o_value) != len(ref_value): s2 = str(len(o_value)) + " values" s3 = str(len(ref_value)) + " values" append_line(s1, s2, s3, logArray) return else: logArray.append(s1 + "\n") lenNow = len(logArray) i = 0 for o_valueI, ref_valueI in zip(o_value, ref_value): s1a = "%s[%d]" % (s1, i) printline(s1a, o_valueI, ref_valueI, logArray, tolOverrides) i += 1 if len(logArray) == lenNow: logArray.pop() return if hasattr(o_value, "_attrs"): #Property lists are compared recursively #by looking at the differences between matching elements logArray.append(s1 + "\n") lenNow = len(logArray) s1 += "." for a in o_value._attrs: s2 = eval("o_value." + a) s3 = eval("ref_value." + a) printline(s1 + a, s2, s3, logArray, tolOverrides) if len(logArray) == lenNow: logArray.pop() return # objects that dont have _attrs but do have data if hasattr(o_value, "__dict__"): if not hasattr(ref_value, "__dict__"): s2 = str(len(o_value)) + " values" s3 = ref_value append_line(s1, s2, s3, logArray) return else: o_keys = list(o_value.__dict__) r_keys = list(ref_value.__dict__) # make sure we have all the keys all_keys = list(o_keys) for k in r_keys: if k not in all_keys: all_keys.append(k) for k in all_keys: s1a = f"{s1}.{k}" printline(s1a, o_value.__dict__.get(k, None), ref_value.__dict__.get(k, None), logArray, tolOverrides) return #This property can be subtracted. Is the difference greater than #the tolerance? if hasattr(o_value, "__sub__"): diff = o_value - ref_value if hasattr(diff, "__abs__"): if abs(diff) < override: return else: s2 = "value=%f" % o_value s3 = "diff=%f" % diff append_line(s1, s2, s3, logArray) return else: try: if (abs(float(str(diff)))) < override: return else: s2 = "value=%f" % float(str(o_value)) s3 = "diff=%f" % float(str(diff)) append_line(s1, s2, s3, logArray) return except: pass #Next to last resort: Are the two objects equal? elif hasattr(o_value, "__eq__"): if not hasattr(ref_value, "__eq__"): s2 = str(len(o_value)) + " values" s3 = ref_value append_line(s1, s2, s3, logArray) return else: # not required because __ne__ may not be defined if not o_value == ref_value: s2 = n2s(o_value, 4) s3 = "ref=%s" % n2s(ref_value) append_line(s1, s2, s3, logArray) return else: return # if I havent returned yet then I guess I don't know what to do # I'll just report the two values s2 = o_value s3 = "ref=%s" % ref_value append_line(s1, s2, s3, logArray)
[docs]def getJagVersion(fileName): """Gets the jaguar version from a jaguar/qsite output file""" with open(fileName) as fh: for line in fh: line = line.lower() if "jaguar version" in line or "qsite version" in line: line = line.replace(",", "") line = line.split("version")[-1] line = line.replace(" ", "") line = line.replace("|", "") line = line.split("release") line = line[0] + "." + line[1] line = line.strip() return line
[docs]def optionTest(aName, aValue, tolOverrides): """ Parses input arguments. Can be used on either command line or config file. :param aName: Name of the parameter :type aName: str :param aValue: Parameter's value :type aValue: str :param tolOverrides: Tolerances. Results are appended. :type tolOverrides: dict """ global minPrecisionPercent global minPrecision if "%" in aValue: aName = aName + "%" aValue = aValue.replace("%", "") if aName == "tol" or aName == "tolerance": minPrecision *= float(aValue) tolOverrides["tol"] = float(aValue) elif aName == "tol%" or aName == "tolp": minPrecisionPercent *= float(aValue) elif aName == "minPrecision": minPrecision = float(aValue) elif aName == "minPrecisionPercent" or aName == "minPrecision%": minPrecisionPercent = float(aValue) elif aName == "log" or aName == "logfile": tolOverrides["log"] = aValue elif aName == "config": tolOverrides["config"] = aValue elif aName == "timing": if aValue.lower() == "false": aValue = False else: aValue = bool(aValue) tolOverrides["timing"] = aValue else: try: tolOverrides[aName] = float(aValue) except ValueError: if aValue.lower() == 'ignore': tolOverrides[aName] = aValue else: raise return tolOverrides
[docs]def parseConfigFile(tolOverrides): """ Parses a config file for use in this script. Config file follows this example format:: log = workup.log #Everything following a pound is a comment tol 2.0 #equals signs are optional energy_2_electron = 1e-2 #mostly, the config file is a place to #put multiple tolerance overrides. :param tolOverrides: A dictionary containing any known options, mostly tolerance overrides """ tol = 1.0 with open(tolOverrides["config"]) as config: for line in config: line = line.split("#")[0] if not line: continue line = line.replace("=", " ") try: name, value = line.split()[:2] optionTest(name, value, tolOverrides) except: pass return tolOverrides
[docs]def n2s(number, precision=3): """some hackish formatting stuff""" try: number = float(str(number)) if abs(number) > 1e5 or abs(number) < 1e-5: return "%.2e" % number elif abs(number) > 1000: return "%d" % number else: return "%s" % round(number, -int(floor(log10(abs(number)))) + precision - 1) except: return str(number)
[docs]def getRMSD(test, ref): """finds the RMSD between the atoms in two structure objects""" rmsd = test.getXYZ() - ref.getXYZ() rmsd = rmsd * rmsd rmsd = old_div(numpy.sum(rmsd.flat), len(test.atom)) rmsd = numpy.sqrt(rmsd) return rmsd
[docs]def get_mems_from_file(filename): """ Scan a Jaguar outfile for memory used in each subprogram. :param filename: file name :type filename: str :return: Memory used for each subprogram :rtype: dictionary """ mems = {} mbBuffer = 0 with open(filename) as afile: for line in afile: if "Peak memory" in line: mbBuffer = float(line.split()[-2]) if "end of program" in line: prg = line.split()[-1] if prg in mems: if mbBuffer > mems[prg]: mems[prg] = mbBuffer mbBuffer = 0 else: mems[prg] = mbBuffer mbBuffer = 0 return mems
[docs]def getMemDiffs(test, ref, tolOverrides): """ Finds the difference in memory usage (for each subprogram) between the two runs. This diff is skipped unless a tolerance is manually given. :param test: Name of the test output file :type test: str :param ref: Name of the reference output file :type ref: str :param tolOverrides: Tolerances for specific tests. This function is interested in Memory% which provides a minimum tolerance as a percent difference. :type tolOverrides: dict :return: Memory differences of each executable type between the two files :rtype: list """ if 'Memory%' in tolOverrides: percentTol = old_div(tolOverrides['Memory%'], 100.0) else: return [] testMem = get_mems_from_file(test) refMem = get_mems_from_file(ref) memDiffs = [] for key in set(list(testMem) + list(refMem)): if key not in testMem or key not in refMem: continue # only a meaningful comparison if memory usage was recorded in # both files elif testMem[key] != 0 and refMem[key] != 0: if (old_div(abs(testMem[key] - refMem[key]), refMem[key]) > percentTol): memDiffs.append(("Program " + key, "%dMb" % testMem[key], "diff = %dMb" % (testMem[key] - refMem[key]))) return memDiffs
if __name__ == "__main__": """ CLI tool to test and rerun diff comparisons using the jaguar/qsite "diff" function. usage: $SCHRODINGER/run file.out reference.out """ try: def printHelp(): """prints a usage message.""" print(" Finds differences between two Jaguar/QSite output files.") print(" Usage:") print( " $SCHRODINGER/run file.out reference.out [tol=1.0 log=workup.log config=config.txt value=1e-4]" ) print("file.out - Path of the file to be compared") print("reference.out - Path of the reference file") print(" Optional parameters:") print( "tol=x - A multipler to by which loosen ALL tolerances" ) print( "log - Path to an optional output log. if it is not provided, the output is written to the console." ) print( "config - A configuration file format as follows" ) print( " Any text after a '#' character is a comment, and is ignored") print(" all other text is in key/value pairs, one per line. ") print( " possible keys are: log, tol, and any value that appears in the output file" ) print(" i.e.") print(" value = 1e-4") print(" overrides the tolerance for 'value' to 1e-4") print( "All other arguments are taken to be tolerance overrides for specific values" ) from sys import argv if "-h" in argv or len(argv) < 3: printHelp() else: jaguarDiff(*argv[1:]) except KeyboardInterrupt as E: print("User exited. Analysis not finished: ", str(E))