Source code for schrodinger.test.stu.outcomes.jaguar

"""
Provides the `jaguarDiff` workup method for use with jaguar and qsite jobs.
It wraps the qsite/jaguar output class 'diff' function so that it produces
results which are meaningful to STU.  Can also be used from the command line.

$Revision 0.1 $

@copyright: (c) Schrodinger, LLC. All rights reserved
"""

from math import floor
from math import log10
from past.utils import old_div

# Packages --------------------------------------------------------------------
import numpy

from schrodinger.application.jaguar.results import IncompleteOutput
from schrodinger.application.qsite.output import QSiteOutput as QSOut

from . import failures

#contributors Dan Wandschneider
_version = "$Revision 0.1 $"

#Globals ----------------------------------------------------------------------
minPrecision = 2e-6
minPrecisionPercent = 5.0  #indeed means 5%,

#see line 71 for time difference tolerance.
#see line 321 for memory difference tolerance.
#strFormat =  "%-25s %20s %20s\n"


[docs]def jaguarDiff(testFile, refFile, *options):
    """
    Compare every property available in the Jaguar/QSite out file against a
    reference.

    Usage::

        # file.out         - Path of the file to be compared
        # reference.out    - Path of the reference file
        outcome_workup = jaguarDiff('file.out', 'reference.out', 'tol=1.0',
                                    'log=workup.log', 'config=config.txt',
                                    'value=1e-4')

    Optional parameters:

    * tol=value - A multipler to by which loosen ALL tolerances
    * log=file  - Path to an optional output log. If it is not provided, the
      output is written to the console.
    * config=file - A configuration file format as follows: Any text after a
      '#' character is a comment, and is ignored all other text is in key/value
      pairs, one per line. possible keys are: log, tol, and any value that
      appears in the output file i.e. value = 1e-4 overrides the tolerance for
      'value' to 1e-4
    * timing=boolean - if true, jobs can 'fail' based on timing alone. Default
      is false.

    All other arguments are taken to be tolerance overrides for specific values
    """
    tolOverrides = {}

    for arg in options:
        aName, aValue = arg.split("=")
        optionTest(aName, aValue, tolOverrides)

    if "config" in tolOverrides:
        parseConfigFile(tolOverrides)
        tolOverrides.pop("config")

    test = QSOut(testFile)
    ref = QSOut(refFile)

    # Did the reference pass?
    ref_passed = ref.status == QSOut.OK

    # Fast fail if only the test failed.
    if test.status != QSOut.OK and ref_passed:
        msg = '%s failed with a FATAL ERROR' % testFile
        if hasattr(test, 'fatal_errorno'):
            msg += ' (%s)' % test.fatal_errorno
        msg += ': ' + test.fatal_error
        raise failures.WorkupFailure(msg)

    #use the jaguar/qsite diff function.
    diffs = test.diff(ref, factor=tolOverrides.get("tol", 1.0))
    outString = []

    #Additional tests:
    #memory usage by subjob
    diffs.extend(getMemDiffs(testFile, refFile, tolOverrides))

    #atom RMSD:
    try:
        rmsd = getRMSD(test.getStructures()[-1], ref.getStructures()[-1])
    except IncompleteOutput:
        if ref_passed:
            raise
    else:
        if rmsd > tolOverrides.get('rmsd', minPrecision * 10):
            msg = "{:<25} {:>20}\n".format('  rmsd', n2s(rmsd))
            outString.append(msg)

    #
    for d in diffs:
        prop, o_value, ref_value = d
        #modifies outString to hold new results!
        printline(prop, o_value, ref_value, outString, tolOverrides)

    #job duration.
    testTime, refTime = None, None
    try:
        refTime = ref.getDuration().total_seconds()
    except AttributeError:
        if ref_passed:
            raise
    else:
        testTime = test.getDuration().total_seconds()

        # time = 0.0 is possible
        try:
            percentDiff = old_div(abs(testTime - refTime), refTime)
        except ZeroDivisionError:
            percentDiff = old_div(
                tolOverrides.get('duration%', minPrecisionPercent * 7),
                100) + 0.1

        if (percentDiff > old_div(
                tolOverrides.get('duration%', minPrecisionPercent * 7), 100) and
                abs(testTime - refTime) > tolOverrides.get('duration', 60.0)):
            testTimeStr = str(int(testTime)) + "s"
            refTimeStr = str(int(refTime)) + "s"
            if diffs:
                diffs.append(("duration", testTimeStr, refTimeStr))

    if outString:
        testJ, refJ = getJagVersion(testFile), getJagVersion(refFile)

        message = ("Values shown for " + testFile + "\nDiff = %s - %s\n" %
                   (testFile, refFile))
        if testJ and refJ:
            time_expr = ''
            if testTime is not None:
                time_expr = "Job duration(s): %.2f" % testTime
            message += ("{} Jaguar version: {}{}\n".format(
                testFile, testJ, time_expr))
            if testTime is not None:
                time_expr = "Job duration(s): %.2f" % refTime
            message += ("{} Jaguar version: {}{}\n".format(
                refFile, refJ, time_expr))
        message += "\n"
        outString.insert(0, message)
        outString.append("\n")

        logFile = None
        if "log" in tolOverrides:
            with open(tolOverrides["log"], "w") as logFile:
                for line in outString:
                    logFile.write(line)
            msg = "  {} differences found.  Printing summary to {}".format(
                len(outString) - 2, tolOverrides["log"])
            raise AssertionError(msg)
        else:
            raise AssertionError(''.join(outString))
    return True


[docs]def get_tol_override(tolOverrides, prop):
    """
    search for override -- allows for wildcards as *

    """
    import fnmatch
    override = minPrecision

    # if there is an exact match, grab it
    # this is required as fnmatch wont match [] as they have special meaning
    if prop in tolOverrides:
        override = tolOverrides[prop]
    else:
        for k, v in tolOverrides.items():
            match = fnmatch.fnmatch(prop, k)
            if match:
                override = v
                break

    return override


[docs]def printline(prop, o_value, ref_value, logArray, tolOverrides):
    """
        Compares the difference between two properties to a tolerance.  If the
        difference is greater than the tolerance, appends a string explaning
        this to an output array.  This function recurses to examine differences
        within arrays and dictionaries.

        :param prop: The name of the property
        :param o_value: The 'output value' to be compared
        :param ref_value: The reference value to be compared
        :param logArray: An array of strings, each of which describes a
                difference.  Results are appended.
        :param tolOverrides: tolerance overrides for specific property names.
        :type tolOverrides: dict
        """

    def append_line(s1, s2, s3, logArray):
        if len(str(s2)) > 20 or len(str(s3)) > 20:
            message = f"\n{s1}: \n  {s2}\n  {s3}\n\n"
        else:
            message = f"{s1:<25} {s2:>20} {s3:>20}\n"

        logArray.append(message)
        return

    s1 = "  " + prop
    s2 = ""
    s3 = ""

    override = get_tol_override(tolOverrides, prop.strip())

    try:
        if override.lower() == 'ignore':
            return
    except AttributeError:
        pass

    if o_value is None and ref_value is None:
        return

    if o_value is None:
        s2 = "is missing from new file"
        append_line(s1, s2, s3, logArray)
        return

    if ref_value is None:
        s2 = "is missing from reference"
        append_line(s1, s2, s3, logArray)
        return

    #Strings must match exactly
    if isinstance(o_value, str):
        if o_value != ref_value:
            s2 = o_value
            s3 = "%s" % ref_value
            append_line(s1, s2, s3, logArray)
            return
        else:
            return

    #Dictionaries are compared recursively
    #by looking at the differences between matching elements
    if hasattr(o_value, "keys"):
        if not hasattr(ref_value, "keys"):
            s2 = str(len(o_value)) + " values"
            s3 = ref_value
            append_line(s1, s2, s3, logArray)
            return
        else:
            logArray.append(s1 + "\n")
            lenNow = len(logArray)
            s1 += "."
            for a in set(list(o_value) + list(ref_value)):
                s2 = o_value[a]
                s3 = ref_value[a]
                printline(s1 + a, s2, s3, logArray, tolOverrides)
            if len(logArray) == lenNow:
                logArray.pop()
            return

    #Arrays are compared recursively
    #by looking at the differences between matching elements
    if hasattr(o_value, "__getitem__"):

        # numpy floats have __getitem__ but its not actually callable -- lame
        try:
            o_len = len(o_value)
            ok = True
        except TypeError:
            ok = False

        if ok:
            if not hasattr(ref_value, "__getitem__"):
                s2 = str(len(o_value)) + " values"
                s3 = ref_value
                append_line(s1, s2, s3, logArray)
                return
            elif len(o_value) != len(ref_value):
                s2 = str(len(o_value)) + " values"
                s3 = str(len(ref_value)) + " values"
                append_line(s1, s2, s3, logArray)
                return
            else:
                logArray.append(s1 + "\n")
                lenNow = len(logArray)
                for i in range(len(o_value)):
                    s1a = "%s[%d]" % (s1, i)
                    printline(s1a, o_value[i], ref_value[i], logArray,
                              tolOverrides)
                if len(logArray) == lenNow:
                    logArray.pop()
                return

    #Other Iterables are compared recursively
    #by looking at the differences between matching elements
    if hasattr(o_value, "__iter__"):
        if not hasattr(ref_value, "__iter__"):
            s2 = str(len(o_value)) + " values"
            s3 = ref_value
            append_line(s1, s2, s3, logArray)
            return
        elif len(o_value) != len(ref_value):
            s2 = str(len(o_value)) + " values"
            s3 = str(len(ref_value)) + " values"
            append_line(s1, s2, s3, logArray)
            return
        else:
            logArray.append(s1 + "\n")
            lenNow = len(logArray)
            i = 0
            for o_valueI, ref_valueI in zip(o_value, ref_value):
                s1a = "%s[%d]" % (s1, i)
                printline(s1a, o_valueI, ref_valueI, logArray, tolOverrides)
                i += 1
            if len(logArray) == lenNow:
                logArray.pop()
            return

    if hasattr(o_value, "_attrs"):
        #Property lists are compared recursively
        #by looking at the differences between matching elements
        logArray.append(s1 + "\n")
        lenNow = len(logArray)
        s1 += "."
        for a in o_value._attrs:
            s2 = eval("o_value." + a)
            s3 = eval("ref_value." + a)
            printline(s1 + a, s2, s3, logArray, tolOverrides)
        if len(logArray) == lenNow:
            logArray.pop()
        return

    # objects that dont have _attrs but do have data
    if hasattr(o_value, "__dict__"):
        if not hasattr(ref_value, "__dict__"):
            s2 = str(len(o_value)) + " values"
            s3 = ref_value
            append_line(s1, s2, s3, logArray)
            return
        else:
            o_keys = list(o_value.__dict__)
            r_keys = list(ref_value.__dict__)

            # make sure we have all the keys
            all_keys = list(o_keys)
            for k in r_keys:
                if k not in all_keys:
                    all_keys.append(k)

            for k in all_keys:
                s1a = f"{s1}.{k}"
                printline(s1a, o_value.__dict__.get(k, None),
                          ref_value.__dict__.get(k, None), logArray,
                          tolOverrides)
            return

    #This property can be subtracted.  Is the difference greater than
    #the tolerance?
    if hasattr(o_value, "__sub__"):
        diff = o_value - ref_value
        if hasattr(diff, "__abs__"):
            if abs(diff) < override:
                return
            else:
                s2 = "value=%f" % o_value
                s3 = "diff=%f" % diff
                append_line(s1, s2, s3, logArray)
                return
        else:
            try:
                if (abs(float(str(diff)))) < override:
                    return
                else:
                    s2 = "value=%f" % float(str(o_value))
                    s3 = "diff=%f" % float(str(diff))
                    append_line(s1, s2, s3, logArray)
                    return
            except:
                pass

    #Next to last resort: Are the two objects equal?
    elif hasattr(o_value, "__eq__"):
        if not hasattr(ref_value, "__eq__"):
            s2 = str(len(o_value)) + " values"
            s3 = ref_value
            append_line(s1, s2, s3, logArray)
            return
        else:
            # not required because __ne__ may not be defined
            if not o_value == ref_value:
                s2 = n2s(o_value, 4)
                s3 = "ref=%s" % n2s(ref_value)
                append_line(s1, s2, s3, logArray)
                return
            else:
                return

    # if I havent returned yet then I guess I don't know what to do
    # I'll just report the two values
    s2 = o_value
    s3 = "ref=%s" % ref_value
    append_line(s1, s2, s3, logArray)


[docs]def getJagVersion(fileName):
    """Gets the jaguar version from a jaguar/qsite output file"""
    with open(fileName) as fh:
        for line in fh:
            line = line.lower()
            if "jaguar version" in line or "qsite version" in line:
                line = line.replace(",", "")
                line = line.split("version")[-1]
                line = line.replace(" ", "")
                line = line.replace("|", "")
                line = line.split("release")
                line = line[0] + "." + line[1]
                line = line.strip()
                return line


[docs]def optionTest(aName, aValue, tolOverrides):
    """
    Parses input arguments.  Can be used on either command line or config
    file.
    :param aName: Name of the parameter
    :type aName: str
    :param aValue: Parameter's value
    :type aValue: str
    :param tolOverrides: Tolerances.  Results are appended.
    :type tolOverrides: dict
    """
    global minPrecisionPercent
    global minPrecision

    if "%" in aValue:
        aName = aName + "%"
        aValue = aValue.replace("%", "")

    if aName == "tol" or aName == "tolerance":
        minPrecision *= float(aValue)
        tolOverrides["tol"] = float(aValue)
    elif aName == "tol%" or aName == "tolp":
        minPrecisionPercent *= float(aValue)
    elif aName == "minPrecision":
        minPrecision = float(aValue)
    elif aName == "minPrecisionPercent" or aName == "minPrecision%":
        minPrecisionPercent = float(aValue)
    elif aName == "log" or aName == "logfile":
        tolOverrides["log"] = aValue
    elif aName == "config":
        tolOverrides["config"] = aValue
    elif aName == "timing":
        if aValue.lower() == "false":
            aValue = False
        else:
            aValue = bool(aValue)
        tolOverrides["timing"] = aValue
    else:
        try:
            tolOverrides[aName] = float(aValue)
        except ValueError:
            if aValue.lower() == 'ignore':
                tolOverrides[aName] = aValue
            else:
                raise

    return tolOverrides


[docs]def parseConfigFile(tolOverrides):
    """
    Parses a config file for use in this script.  Config file follows this
    example format::

        log = workup.log    #Everything following a pound is a comment
        tol  2.0                    #equals signs are optional
        energy_2_electron = 1e-2    #mostly, the config file is a place to
                                    #put multiple tolerance overrides.

    :param tolOverrides: A dictionary containing any known options, mostly
            tolerance overrides
    """
    tol = 1.0
    with open(tolOverrides["config"]) as config:
        for line in config:
            line = line.split("#")[0]
            if not line:
                continue
            line = line.replace("=", " ")
            try:
                name, value = line.split()[:2]
                optionTest(name, value, tolOverrides)
            except:
                pass
    return tolOverrides


[docs]def n2s(number, precision=3):
    """some hackish formatting stuff"""
    try:
        number = float(str(number))
        if abs(number) > 1e5 or abs(number) < 1e-5:
            return "%.2e" % number
        elif abs(number) > 1000:
            return "%d" % number
        else:
            return "%s" % round(number,
                                -int(floor(log10(abs(number)))) + precision - 1)
    except:
        return str(number)


[docs]def getRMSD(test, ref):
    """finds the RMSD between the atoms in two structure objects"""
    rmsd = test.getXYZ() - ref.getXYZ()
    rmsd = rmsd * rmsd
    rmsd = old_div(numpy.sum(rmsd.flat), len(test.atom))
    rmsd = numpy.sqrt(rmsd)
    return rmsd


[docs]def get_mems_from_file(filename):
    """
    Scan a Jaguar outfile for memory used in each subprogram.

    :param filename: file name
    :type filename: str

    :return: Memory used for each subprogram
    :rtype: dictionary
    """

    mems = {}
    mbBuffer = 0

    with open(filename) as afile:
        for line in afile:
            if "Peak memory" in line:
                mbBuffer = float(line.split()[-2])
            if "end of program" in line:
                prg = line.split()[-1]
                if prg in mems:
                    if mbBuffer > mems[prg]:
                        mems[prg] = mbBuffer
                        mbBuffer = 0
                else:
                    mems[prg] = mbBuffer
                    mbBuffer = 0

    return mems


[docs]def getMemDiffs(test, ref, tolOverrides):
    """
    Finds the difference in memory usage (for each subprogram) between the
    two runs.  This diff is skipped unless a tolerance is manually given.

    :param test: Name of the test output file
    :type test: str
    :param ref: Name of the reference output file
    :type ref: str
    :param tolOverrides: Tolerances for specific tests.  This
            function is interested in Memory% which provides a minimum
            tolerance as a percent difference.
    :type tolOverrides: dict
    :return: Memory differences of each executable type between the two
            files
    :rtype: list
    """
    if 'Memory%' in tolOverrides:
        percentTol = old_div(tolOverrides['Memory%'], 100.0)
    else:
        return []

    testMem = get_mems_from_file(test)
    refMem = get_mems_from_file(ref)

    memDiffs = []
    for key in set(list(testMem) + list(refMem)):
        if key not in testMem or key not in refMem:
            continue
        # only a meaningful comparison if memory usage was recorded in
        # both files
        elif testMem[key] != 0 and refMem[key] != 0:
            if (old_div(abs(testMem[key] - refMem[key]), refMem[key]) >
                    percentTol):
                memDiffs.append(("Program " + key, "%dMb" % testMem[key],
                                 "diff = %dMb" % (testMem[key] - refMem[key])))

    return memDiffs


if __name__ == "__main__":
    """
    CLI tool to test and rerun diff comparisons using the jaguar/qsite "diff"
    function.
    usage:
    $SCHRODINGER/run jaguarDiff.py file.out reference.out
    """
    try:

        def printHelp():
            """prints a usage message."""
            print(" Finds differences between two Jaguar/QSite output files.")
            print(" Usage:")
            print(
                " $SCHRODINGER/run jaguarDiff.py file.out reference.out [tol=1.0 log=workup.log config=config.txt value=1e-4]"
            )
            print("file.out         - Path of the file to be compared")
            print("reference.out            - Path of the reference file")
            print(" Optional parameters:")
            print(
                "tol=x                    - A multipler to by which loosen ALL tolerances"
            )
            print(
                "log                      - Path to an optional output log.  if it is not provided, the output is written to the console."
            )
            print(
                "config                   - A configuration file format as follows"
            )
            print(
                " Any text after a '#' character is a comment, and is ignored")
            print(" all other text is in key/value pairs, one per line.  ")
            print(
                " possible keys are: log, tol, and any value that appears in the output file"
            )
            print(" i.e.")
            print(" value = 1e-4")
            print(" overrides the tolerance for 'value' to 1e-4")
            print(
                "All other arguments are taken to be tolerance overrides for specific values"
            )

        from sys import argv

        if "-h" in argv or len(argv) < 3:
            printHelp()
        else:
            jaguarDiff(*argv[1:])

    except KeyboardInterrupt as E:
        print("User exited.  Analysis not finished: ", str(E))