Source code for schrodinger.test.stu.outcomes.custom.glide

import glob

from schrodinger import adapter
from schrodinger import structure
from schrodinger.application.glide import glide
from schrodinger.infra import mm
from schrodinger.structutils.measure import measure_bond_angle
from schrodinger.structutils.rmsd import calculate_in_place_rmsd
from schrodinger.test.stu.outcomes.failures import WorkupFailure

try:
    from schrodinger.application.scisol.packages.wscore import \
        scoring_report as wscore_scoring_report
except ImportError:
    pass


def _get_property(ct, propname, filename, index):
    """
    Thin wrapper to get a property from a CT, returning an error message
    that will be usable by users of STU if the property doesn't exist.
    (filename and index are only used for constructing the error message.)
    """
    try:
        return ct.property[propname]
    except KeyError:
        raise WorkupFailure("Structure #%d in '%s' does not have "
                            "property '%s'" % (index, filename, propname))


[docs]def property_is_consistent(filename, propname, tolerance=0.0):
    """
    Check that the value of a numeric property for all structures in a file are
    within 'tolerance' of the value for the first structure in the file.
    """
    reader = structure.StructureReader(filename)
    try:
        refct = next(reader)
    except StopIteration:
        raise WorkupFailure("File '%s' has no structures" % filename)
    refval = _get_property(refct, propname, filename, 1)
    for index, ct in enumerate(reader, 2):
        val = _get_property(ct, propname, filename, index)
        if abs(val - refval) > tolerance:
            raise WorkupFailure(
                "Property %s is different for structure #%d "
                "in file '%s': abs(%g-%g) > %g" %
                (propname, index, filename, val, refval, tolerance))
    return True


[docs]def properties_do_not_exist(filename, *properties):
    """
    Check that none of the structures in the specified file have any of the
    specified properties.
    """
    for index, ct in enumerate(structure.StructureReader(filename), 1):
        for property in properties:
            if property in ct.property:
                raise WorkupFailure("CT %d in '%s' has property '%s'" %
                                    (index, filename, property))
    return True


def _compare_prop(ct1, ct2, prop, tol=1e-6):
    v1 = ct1.property[prop]
    v2 = ct2.property[prop]
    if abs(v1 - v2) > tol:
        raise WorkupFailure(f"Inconsistent '{prop}': {v1} != {v2}")


[docs]def score_input_pose_test(dock_raw, sip_raw):
    """
    Compare the second pose from the docking raw file with the first pose from
    the score-in-place raw file. The GlideScore and coordinates should be
    essentially identical, and the docked pose should have the
    b_i_glide_inputpose property.
    """
    ct1 = structure.Structure.read(dock_raw, index=2)
    ct2 = structure.Structure.read(sip_raw)
    if not ct1.property['b_i_glide_inputpose']:
        raise WorkupFailure("Docked pose #2 lacks inputpose property")
    _compare_prop(ct1, ct2, 'r_i_glide_gscore')
    atomlist1 = list(range(1, ct1.atom_total + 1))
    atomlist2 = list(range(1, ct2.atom_total + 1))
    rmsd = calculate_in_place_rmsd(ct1, atomlist1, ct2, atomlist2)
    if rmsd > 0.001:
        raise WorkupFailure("Bad RMSD: %f" % (rmsd))
    return True


[docs]def check_constraint(filename, ctype, label, atom, index):
    """
    Check that a grid file has the expected constraint by type, label, atom,
    and index. The index is 1-based. The atom argument is ignored if <= 0.
    """
    constraints = glide.read_constraints(filename)
    if index > len(constraints):
        raise WorkupFailure("Not enough constraints: %d > %d" %
                            (index, len(constraints)))
    constraint = constraints[index - 1]
    if constraint.type() != ctype:
        raise WorkupFailure("Wrong constraint type for constraint %d: "
                            "%s != %s" % (index, constraint.type(), ctype))
    if str(constraint) != label:
        raise WorkupFailure("Wrong constraint label for constraint %d: "
                            "%s != %s" % (index, str(constraint), label))
    if atom > 0 and constraint.atom() != atom:
        raise WorkupFailure("Wrong atom number for constraint %d: "
                            "%s != %s" % (index, constraint.atom(), atom))
    return True


[docs]def check_dihedral(filename, smarts, expected_value, tolerance):
    """
    Measure the dihedrals matching 'smarts' on the first structure in
    'filename', and make sure that they are within 'tolerance' of
    'expected_value'. All values are in degrees. It is considered a failure for
    the pattern not to match the structure, or for the pattern not to match
    four atoms.
    """
    ct = structure.Structure.read(filename)
    matches = adapter.evaluate_smarts(ct, smarts)
    if not matches:
        raise WorkupFailure("SMARTS pattern '%s' does not match structure in"
                            " '%s'" % (smarts, filename))
    for atoms in matches:
        if len(atoms) != 4:
            raise WorkupFailure("SMARTS pattern '%s' does not match 4 atoms" %
                                (smarts))
        dih = ct.measure(*atoms)
        delta = (dih - expected_value) % 360
        if delta > 180.0:
            delta = 360.0 - delta
        if delta > tolerance:
            raise WorkupFailure("Dihedral %d %d %d %d = %f is not within"
                                " %f deg of expected value %f" %
                                (atoms[0], atoms[1], atoms[2], atoms[3], dih,
                                 tolerance, expected_value))
    return True


# The first three atoms from each SMARTS pattern below define an angle. For a
# given angle, only the allowed_angles from the FIRST matching pattern are
# used.
#
# This is a simplistic model, since we are only looking for extreme
# distortions. We don't try to guess the hybridization of the atom, but
# allow angles that would be reasonable for any of sp, sp2, sp3.
ALLOWED_ANGLES = [
    # (SMARTS, [allowed angles])
    ('*1**1', [60.0]),
    ('*1***1', [90.0]),
    # 100.0 below to be more lenient and to account for sulfur
    ('*~[#6,#7,#8,#16,#17]~*', [100.0, 120.0, 180.0]),
]


def _find_distortions(ct, tolerance=15.0, max_distortions=None):
    """
    Return a list of distortions given a structure. A distortion is an angle
    that's different from one of the ideal angles by more than "tolerance" in
    degrees. Returns a list of tuples (atoms, angle, distortion), where atoms
    is a tuple of three ints.
    """
    distortions = []
    seen = set()
    for smarts, allowed_angles in ALLOWED_ANGLES:
        for matching_atom_indexes in adapter.evaluate_smarts(ct, smarts):
            angle_atom_indexes = tuple(matching_atom_indexes[:3])
            if angle_atom_indexes in seen:
                continue
            seen.add(angle_atom_indexes)
            atoms = [ct.atom[a] for a in angle_atom_indexes]
            angle = measure_bond_angle(*atoms)
            min_diff = min(abs(angle - a) for a in allowed_angles)
            if min_diff > tolerance:
                distortions.append((angle_atom_indexes, angle, min_diff))
                if (max_distortions is not None and
                        len(distortions) >= max_distortions):
                    break
    return distortions


[docs]def check_libfile_for_required_constraints(basename, nreqcons):
    """
    Checks to see that all of the returned poses satisfy the minimum number of
    constraints. Should no poses be reported, this test will pass. We are
    only trying to catch returned poses lacking the required number
    of constraints here.
    """

    filenames = [
        f for f in glob.glob("%s*" % basename) if "_raw" in f or "_lib" in f
    ]

    if len(filenames) == 0:
        raise WorkupFailure("No lib/raw files found in current directory")
    elif len(filenames) > 1:
        raise WorkupFailure("More than one lib/raw file found in current"
                            " directory")

    filename = filenames[0]

    cons_labels = None
    for index, st in enumerate(structure.StructureReader(filename), 1):
        if cons_labels is None:
            cons_labels = [
                key for key in list(st.property)
                if key.startswith("b_glide_cons")
            ]
        cons_met = 0
        for cons_label in cons_labels:
            if st.property[cons_label]:
                cons_met += 1

        if cons_met < nreqcons:
            raise WorkupFailure("Pose %s only satisfied %s constraint(s)" %
                                (index, cons_met))

    return True


[docs]def check_for_distorted_angles(filename, tolerance=15.0, first=1, last=None):
    """
    Passes if all the structures in a file are "clean", meaning that no bond
    angles are more than 'tolerance' degrees away from their expected values.
    """
    messages = []
    for index, ct in enumerate(structure.StructureReader(filename, index=first),
                               first):
        if last and index > last:
            break
        distortions = _find_distortions(ct, tolerance, max_distortions=1)
        if distortions:
            atoms, angle, diff = distortions[0]
            message = "structure %d is distorted; first distortion: " \
                "atoms=%s, angle=%.1f, diff=%.1f" % (index, atoms, angle, diff)
            messages.append(message)
    if messages:
        raise WorkupFailure("Found distorted angles in '%s'\n%s" %
                            (filename, "\n".join(messages)))
    return True


def _check_xp_components(index, urh, tol=1e-6):
    """
    Make sure that the r_glide_XP properties present in the current block
    add up to r_glide_XP_GScore.
    """
    score = 0.0
    for key in mm.m2io_get_data_names(urh, mm.M2IO_REAL_TYPES):
        val = mm.m2io_get_real(urh, [key]).pop()
        if key == 'r_glide_XP_GScore':
            expected_score = val
        elif key.startswith('r_glide_XP'):
            score += val
    if abs(score - expected_score) > tol:
        raise WorkupFailure("XP Score mismatch for pose %d: %f != %f" %
                            (index, score, expected_score))


[docs]def check_xp_components(filename):
    """
    Given a lib file generated by an XP job with descriptors, make sure that
    the sum of all the scoring components equals the total score.
    """
    for index, st in enumerate(structure.StructureReader(filename), 1):
        urh = mm.mmct_ct_m2io_get_unrequested_handle(st.handle)
        _check_xp_components(index, urh)
        mm.m2io_goto_next_block(urh, 'm_glide_XPvisualizer')
        _check_xp_components(index, urh)
        mm.m2io_leave_block(urh)
    return True


[docs]def check_wscore_components(filename):
    """
    Given a lib file generated by a Glide docking job with WScore mode active,
    make sure that the sum of all the scoring components equals the total score.
    """
    for index, st in enumerate(structure.StructureReader(filename), 1):
        expected_score = st.property["r_ws_wscore"]
        if expected_score == 10000.0:
            # We don't expect the components to add up for "bad" poses
            return True
        urh = mm.mmct_ct_m2io_get_unrequested_handle(st.handle)
        mm.m2io_goto_next_block(urh, 'm_wsviz_data')
        _check_wscore_components(index, urh, expected_score)
        mm.m2io_leave_block(urh)
    return True


def _check_wscore_components(index, urh, expected_score, tol=1e-6):
    """
    Make sure that the r_glide_XP properties present in the current block
    add up to r_glide_XP_GScore.
    """
    score = 0.0
    for key in mm.m2io_get_data_names(urh, mm.M2IO_REAL_TYPES):
        val = mm.m2io_get_real(urh, [key]).pop()
        if key.startswith('r_wsviz'):
            score += val
    if abs(score - expected_score) > tol:
        raise WorkupFailure("WScore mismatch for pose %d: %f != %f" %
                            (index, score, expected_score))


[docs]def check_poses_for_specific_scoring_term(filename,
                                          expected_term,
                                          expected_value=None,
                                          min_abs_expected_value=None,
                                          expected_i_lig_atom=None,
                                          expected_i_prot_atom=None):
    """
    Make sure that a particular scoring term has a non-zero value for each of the ct's
    in the input filename. Note, we will skip any cts which have the Glide receptor
    property, "b_glide_receptor" set to True.

    If the term is expected to have a specific value, the expected_value optional
    argument can be used.

    If the term is expected to have a minimum value, min_abs_expected_value can be
    supplied. The absolute value is used so that this can refer to a reward
    which may be negative, or a penalty which may be positive. This is a less
    than or equal comparison.

    If the term is expected to only occur on a specific ligand
    or protein atom, then expected_i_lig_atom or expected_i_prot_atom can be
    set. If both are supplied, then both are used.

    expected_i_lig_atom and expected_i_prot_atom can be an int
    or a list of int if the penalty applies to multiple atoms.

    To pass a list of ints in a stu workup, enclose the list in quotes.
    For example: expected_i_lig_atom="[10, 11, 14, 16]"
    """

    if isinstance(expected_i_lig_atom, str):
        expected_i_lig_atom = eval(expected_i_lig_atom)

    if isinstance(expected_i_prot_atom, str):
        expected_i_prot_atom = eval(expected_i_prot_atom)

    reporter = wscore_scoring_report.WScoreSipReport()
    wscore_sip_poses = list(reporter.processInputPoses(filename))
    if len(wscore_sip_poses) != 1:
        error = "Expected only a single pose in pv_file. Found {}".\
                format(len(wscore_sip_poses))
        raise RuntimeError(error)

    wscore_sip_pose = wscore_sip_poses[0]
    PenaltyWScoreSipTerm = wscore_scoring_report.PenaltyWScoreSipTerm
    LigandStrainWScoreSipTerm = wscore_scoring_report.LigandStrainWScoreSipTerm
    RewardWScoreSipTerm = wscore_scoring_report.RewardWScoreSipTerm

    penalty_terms = wscore_sip_pose.termsByType(PenaltyWScoreSipTerm)
    strain_terms = wscore_sip_pose.termsByType(LigandStrainWScoreSipTerm)
    reward_terms = wscore_sip_pose.termsByType(RewardWScoreSipTerm)

    wscore_terms = []
    for term_lists in [penalty_terms, strain_terms, reward_terms]:
        wscore_terms.extend(term_lists)

    _check_poses_for_specific_scoring_term(wscore_terms, expected_term,
                                           expected_value,
                                           min_abs_expected_value,
                                           expected_i_lig_atom,
                                           expected_i_prot_atom)

    # Return True if all comparisons successful (no exceptions raised)
    return True


def _check_poses_for_specific_scoring_term(wscore_terms,
                                           expected_term,
                                           expected_value=None,
                                           min_abs_expected_value=None,
                                           expected_i_lig_atom=None,
                                           expected_i_prot_atom=None):
    """
    Chack for a non-zero value of a particular scoring term for a given ct.
    If a specific value is sought, the expected_value optional
    argument can be used.

    If the penalty is expected to only occur on a specific ligand
    or protein atom, then ligand_iatom or protein_iatom can be set.
    If both are set, then both are used.

    """

    matching_terms = [
        term for term in wscore_terms if term.title == expected_term
    ]

    found_non_zero_term = False
    for term in matching_terms:
        if term.value:
            found_non_zero_term = True
            break

    if not found_non_zero_term:
        raise WorkupFailure("Pose did not have a non-zero value for "
                            "%s " % (expected_term))

    if expected_value:
        found_matching_value = False
        for term in matching_terms:
            if term.value and term.value == expected_value:
                found_matching_value = True
                break
        if not found_matching_value:
            raise WorkupFailure("Pose did not have a value of %f for "
                                "%s " % (expected_value, expected_term))

    if min_abs_expected_value:
        found_matching_min_abs_value = False
        for term in matching_terms:
            if term.value and abs(term.value) >= min_abs_expected_value:
                found_matching_min_abs_value = True
                break
        if not found_matching_min_abs_value:
            raise WorkupFailure("Pose did not have a min abs value of %f for "
                                "%s " % (min_abs_expected_value, expected_term))

    if not expected_i_lig_atom and not expected_i_prot_atom:
        return

    for kind, expected_i_atom in zip(
        ['lig', 'prot'], [expected_i_lig_atom, expected_i_prot_atom]):
        if not expected_i_atom:
            continue

        # Always convert expected_i_atom to a list
        # even if it is a single element list
        expected_i_atom_type = type(expected_i_atom)
        if expected_i_atom_type is int:
            expected_i_atom = [expected_i_atom]
        elif expected_i_atom_type is list:
            pass  # Nothing to do
        else:
            error = f"Unknown expected_i_{kind}_atom type: {expected_i_atom_type}"
            raise RuntimeError(error)

        expected_i_atom = set(expected_i_atom)

        observed_i_atom = set()

        # Collect the list of all affected atoms involved with this term
        for term in matching_terms:
            attribute_name = f"{kind}_atoms"
            term_atoms = getattr(term, attribute_name)
            observed_i_atom.update(term_atoms)

        # Might be better to just check that expected_i_atom
        # is in observed_i_atom but have not yet
        # encountered a case that requires this.
        # This might be if there are multiple penalties
        # of the same kind but only interested in a specific one.
        if not expected_i_atom.issubset(observed_i_atom):
            error = "pose did not have {} on {} atom(s) {} ".format(
                expected_term, kind, expected_i_atom)
            error += f"\nExpected atoms: {expected_i_atom}"
            error += f"\nObserved atoms: {observed_i_atom}"
            raise WorkupFailure(error)


[docs]def measure_smarts(filename, smarts, expected_value, tolerance):
    """
    Measure the distance/angle/dihedral matching `smarts` on each structure in
    `filename`, and make sure that they are within `tolerance` of
    `expected_value`. The SMARTS pattern must match 2, 3, or 4 atoms for
    measuring distance, angle, or dihedrals, respectively.
    """
    with structure.StructureReader(filename) as reader:
        for i, st in enumerate(reader, 1):
            matches = adapter.evaluate_smarts(st, smarts)
            for match in matches:
                x = st.measure(*match)
                if len(match) == 4:
                    delta = (x - expected_value) % 360
                    if delta > 180.0:
                        delta = 360.0 - delta
                else:
                    delta = abs(x - expected_value)
                if delta > tolerance:
                    msg = (f'Measurement for atoms {match} = {x:.2f} '
                           f'of {filename}:{i} not within {expected_value} '
                           f'+/- {tolerance}')
                    raise WorkupFailure(msg)