Source code for schrodinger.test.stu.outcomes.custom.glide

import glob

from schrodinger import adapter
from schrodinger import structure
from schrodinger.application.glide import glide
from schrodinger.infra import mm
from schrodinger.structutils.measure import measure_bond_angle
from schrodinger.structutils.rmsd import calculate_in_place_rmsd
from schrodinger.test.stu.outcomes.failures import WorkupFailure

try:
    from schrodinger.application.scisol.packages.wscore import \
        scoring_report as wscore_scoring_report
except ImportError:
    pass


def _get_property(ct, propname, filename, index):
    """
    Thin wrapper to get a property from a CT, returning an error message
    that will be usable by users of STU if the property doesn't exist.
    (filename and index are only used for constructing the error message.)
    """
    try:
        return ct.property[propname]
    except KeyError:
        raise WorkupFailure("Structure #%d in '%s' does not have "
                            "property '%s'" % (index, filename, propname))


[docs]def property_is_consistent(filename, propname, tolerance=0.0): """ Check that the value of a numeric property for all structures in a file are within 'tolerance' of the value for the first structure in the file. """ reader = structure.StructureReader(filename) try: refct = next(reader) except StopIteration: raise WorkupFailure("File '%s' has no structures" % filename) refval = _get_property(refct, propname, filename, 1) for index, ct in enumerate(reader, 2): val = _get_property(ct, propname, filename, index) if abs(val - refval) > tolerance: raise WorkupFailure( "Property %s is different for structure #%d " "in file '%s': abs(%g-%g) > %g" % (propname, index, filename, val, refval, tolerance)) return True
[docs]def properties_do_not_exist(filename, *properties): """ Check that none of the structures in the specified file have any of the specified properties. """ for index, ct in enumerate(structure.StructureReader(filename), 1): for property in properties: if property in ct.property: raise WorkupFailure("CT %d in '%s' has property '%s'" % (index, filename, property)) return True
def _compare_prop(ct1, ct2, prop, tol=1e-6): v1 = ct1.property[prop] v2 = ct2.property[prop] if abs(v1 - v2) > tol: raise WorkupFailure(f"Inconsistent '{prop}': {v1} != {v2}")
[docs]def score_input_pose_test(dock_raw, sip_raw): """ Compare the second pose from the docking raw file with the first pose from the score-in-place raw file. The GlideScore and coordinates should be essentially identical, and the docked pose should have the b_i_glide_inputpose property. """ ct1 = structure.Structure.read(dock_raw, index=2) ct2 = structure.Structure.read(sip_raw) if not ct1.property['b_i_glide_inputpose']: raise WorkupFailure("Docked pose #2 lacks inputpose property") _compare_prop(ct1, ct2, 'r_i_glide_gscore') atomlist1 = list(range(1, ct1.atom_total + 1)) atomlist2 = list(range(1, ct2.atom_total + 1)) rmsd = calculate_in_place_rmsd(ct1, atomlist1, ct2, atomlist2) if rmsd > 0.001: raise WorkupFailure("Bad RMSD: %f" % (rmsd)) return True
[docs]def check_constraint(filename, ctype, label, atom, index): """ Check that a grid file has the expected constraint by type, label, atom, and index. The index is 1-based. The atom argument is ignored if <= 0. """ constraints = glide.read_constraints(filename) if index > len(constraints): raise WorkupFailure("Not enough constraints: %d > %d" % (index, len(constraints))) constraint = constraints[index - 1] if constraint.type() != ctype: raise WorkupFailure("Wrong constraint type for constraint %d: " "%s != %s" % (index, constraint.type(), ctype)) if str(constraint) != label: raise WorkupFailure("Wrong constraint label for constraint %d: " "%s != %s" % (index, str(constraint), label)) if atom > 0 and constraint.atom() != atom: raise WorkupFailure("Wrong atom number for constraint %d: " "%s != %s" % (index, constraint.atom(), atom)) return True
[docs]def check_dihedral(filename, smarts, expected_value, tolerance): """ Measure the dihedrals matching 'smarts' on the first structure in 'filename', and make sure that they are within 'tolerance' of 'expected_value'. All values are in degrees. It is considered a failure for the pattern not to match the structure, or for the pattern not to match four atoms. """ ct = structure.Structure.read(filename) matches = adapter.evaluate_smarts(ct, smarts) if not matches: raise WorkupFailure("SMARTS pattern '%s' does not match structure in" " '%s'" % (smarts, filename)) for atoms in matches: if len(atoms) != 4: raise WorkupFailure("SMARTS pattern '%s' does not match 4 atoms" % (smarts)) dih = ct.measure(*atoms) delta = (dih - expected_value) % 360 if delta > 180.0: delta = 360.0 - delta if delta > tolerance: raise WorkupFailure("Dihedral %d %d %d %d = %f is not within" " %f deg of expected value %f" % (atoms[0], atoms[1], atoms[2], atoms[3], dih, tolerance, expected_value)) return True
# The first three atoms from each SMARTS pattern below define an angle. For a # given angle, only the allowed_angles from the FIRST matching pattern are # used. # # This is a simplistic model, since we are only looking for extreme # distortions. We don't try to guess the hybridization of the atom, but # allow angles that would be reasonable for any of sp, sp2, sp3. ALLOWED_ANGLES = [ # (SMARTS, [allowed angles]) ('*1**1', [60.0]), ('*1***1', [90.0]), # 100.0 below to be more lenient and to account for sulfur ('*~[#6,#7,#8,#16,#17]~*', [100.0, 120.0, 180.0]), ] def _find_distortions(ct, tolerance=15.0, max_distortions=None): """ Return a list of distortions given a structure. A distortion is an angle that's different from one of the ideal angles by more than "tolerance" in degrees. Returns a list of tuples (atoms, angle, distortion), where atoms is a tuple of three ints. """ distortions = [] seen = set() for smarts, allowed_angles in ALLOWED_ANGLES: for matching_atom_indexes in adapter.evaluate_smarts(ct, smarts): angle_atom_indexes = tuple(matching_atom_indexes[:3]) if angle_atom_indexes in seen: continue seen.add(angle_atom_indexes) atoms = [ct.atom[a] for a in angle_atom_indexes] angle = measure_bond_angle(*atoms) min_diff = min(abs(angle - a) for a in allowed_angles) if min_diff > tolerance: distortions.append((angle_atom_indexes, angle, min_diff)) if (max_distortions is not None and len(distortions) >= max_distortions): break return distortions
[docs]def check_libfile_for_required_constraints(basename, nreqcons): """ Checks to see that all of the returned poses satisfy the minimum number of constraints. Should no poses be reported, this test will pass. We are only trying to catch returned poses lacking the required number of constraints here. """ filenames = [ f for f in glob.glob("%s*" % basename) if "_raw" in f or "_lib" in f ] if len(filenames) == 0: raise WorkupFailure("No lib/raw files found in current directory") elif len(filenames) > 1: raise WorkupFailure("More than one lib/raw file found in current" " directory") filename = filenames[0] cons_labels = None for index, st in enumerate(structure.StructureReader(filename), 1): if cons_labels is None: cons_labels = [ key for key in list(st.property) if key.startswith("b_glide_cons") ] cons_met = 0 for cons_label in cons_labels: if st.property[cons_label]: cons_met += 1 if cons_met < nreqcons: raise WorkupFailure("Pose %s only satisfied %s constraint(s)" % (index, cons_met)) return True
[docs]def check_for_distorted_angles(filename, tolerance=15.0, first=1, last=None): """ Passes if all the structures in a file are "clean", meaning that no bond angles are more than 'tolerance' degrees away from their expected values. """ messages = [] for index, ct in enumerate(structure.StructureReader(filename, index=first), first): if last and index > last: break distortions = _find_distortions(ct, tolerance, max_distortions=1) if distortions: atoms, angle, diff = distortions[0] message = "structure %d is distorted; first distortion: " \ "atoms=%s, angle=%.1f, diff=%.1f" % (index, atoms, angle, diff) messages.append(message) if messages: raise WorkupFailure("Found distorted angles in '%s'\n%s" % (filename, "\n".join(messages))) return True
def _check_xp_components(index, urh, tol=1e-6): """ Make sure that the r_glide_XP properties present in the current block add up to r_glide_XP_GScore. """ score = 0.0 for key in mm.m2io_get_data_names(urh, mm.M2IO_REAL_TYPES): val = mm.m2io_get_real(urh, [key]).pop() if key == 'r_glide_XP_GScore': expected_score = val elif key.startswith('r_glide_XP'): score += val if abs(score - expected_score) > tol: raise WorkupFailure("XP Score mismatch for pose %d: %f != %f" % (index, score, expected_score))
[docs]def check_xp_components(filename): """ Given a lib file generated by an XP job with descriptors, make sure that the sum of all the scoring components equals the total score. """ for index, st in enumerate(structure.StructureReader(filename), 1): urh = mm.mmct_ct_m2io_get_unrequested_handle(st.handle) _check_xp_components(index, urh) mm.m2io_goto_next_block(urh, 'm_glide_XPvisualizer') _check_xp_components(index, urh) mm.m2io_leave_block(urh) return True
[docs]def check_wscore_components(filename): """ Given a lib file generated by a Glide docking job with WScore mode active, make sure that the sum of all the scoring components equals the total score. """ for index, st in enumerate(structure.StructureReader(filename), 1): expected_score = st.property["r_ws_wscore"] if expected_score == 10000.0: # We don't expect the components to add up for "bad" poses return True urh = mm.mmct_ct_m2io_get_unrequested_handle(st.handle) mm.m2io_goto_next_block(urh, 'm_wsviz_data') _check_wscore_components(index, urh, expected_score) mm.m2io_leave_block(urh) return True
def _check_wscore_components(index, urh, expected_score, tol=1e-6): """ Make sure that the r_glide_XP properties present in the current block add up to r_glide_XP_GScore. """ score = 0.0 for key in mm.m2io_get_data_names(urh, mm.M2IO_REAL_TYPES): val = mm.m2io_get_real(urh, [key]).pop() if key.startswith('r_wsviz'): score += val if abs(score - expected_score) > tol: raise WorkupFailure("WScore mismatch for pose %d: %f != %f" % (index, score, expected_score))
[docs]def check_poses_for_specific_scoring_term(filename, expected_term, expected_value=None, min_abs_expected_value=None, expected_i_lig_atom=None, expected_i_prot_atom=None): """ Make sure that a particular scoring term has a non-zero value for each of the ct's in the input filename. Note, we will skip any cts which have the Glide receptor property, "b_glide_receptor" set to True. If the term is expected to have a specific value, the expected_value optional argument can be used. If the term is expected to have a minimum value, min_abs_expected_value can be supplied. The absolute value is used so that this can refer to a reward which may be negative, or a penalty which may be positive. This is a less than or equal comparison. If the term is expected to only occur on a specific ligand or protein atom, then expected_i_lig_atom or expected_i_prot_atom can be set. If both are supplied, then both are used. expected_i_lig_atom and expected_i_prot_atom can be an int or a list of int if the penalty applies to multiple atoms. To pass a list of ints in a stu workup, enclose the list in quotes. For example: expected_i_lig_atom="[10, 11, 14, 16]" """ if isinstance(expected_i_lig_atom, str): expected_i_lig_atom = eval(expected_i_lig_atom) if isinstance(expected_i_prot_atom, str): expected_i_prot_atom = eval(expected_i_prot_atom) reporter = wscore_scoring_report.WScoreSipReport() wscore_sip_poses = list(reporter.processInputPoses(filename)) if len(wscore_sip_poses) != 1: error = "Expected only a single pose in pv_file. Found {}".\ format(len(wscore_sip_poses)) raise RuntimeError(error) wscore_sip_pose = wscore_sip_poses[0] PenaltyWScoreSipTerm = wscore_scoring_report.PenaltyWScoreSipTerm LigandStrainWScoreSipTerm = wscore_scoring_report.LigandStrainWScoreSipTerm RewardWScoreSipTerm = wscore_scoring_report.RewardWScoreSipTerm penalty_terms = wscore_sip_pose.termsByType(PenaltyWScoreSipTerm) strain_terms = wscore_sip_pose.termsByType(LigandStrainWScoreSipTerm) reward_terms = wscore_sip_pose.termsByType(RewardWScoreSipTerm) wscore_terms = [] for term_lists in [penalty_terms, strain_terms, reward_terms]: wscore_terms.extend(term_lists) _check_poses_for_specific_scoring_term(wscore_terms, expected_term, expected_value, min_abs_expected_value, expected_i_lig_atom, expected_i_prot_atom) # Return True if all comparisons successful (no exceptions raised) return True
def _check_poses_for_specific_scoring_term(wscore_terms, expected_term, expected_value=None, min_abs_expected_value=None, expected_i_lig_atom=None, expected_i_prot_atom=None): """ Chack for a non-zero value of a particular scoring term for a given ct. If a specific value is sought, the expected_value optional argument can be used. If the penalty is expected to only occur on a specific ligand or protein atom, then ligand_iatom or protein_iatom can be set. If both are set, then both are used. """ matching_terms = [ term for term in wscore_terms if term.title == expected_term ] found_non_zero_term = False for term in matching_terms: if term.value: found_non_zero_term = True break if not found_non_zero_term: raise WorkupFailure("Pose did not have a non-zero value for " "%s " % (expected_term)) if expected_value: found_matching_value = False for term in matching_terms: if term.value and term.value == expected_value: found_matching_value = True break if not found_matching_value: raise WorkupFailure("Pose did not have a value of %f for " "%s " % (expected_value, expected_term)) if min_abs_expected_value: found_matching_min_abs_value = False for term in matching_terms: if term.value and abs(term.value) >= min_abs_expected_value: found_matching_min_abs_value = True break if not found_matching_min_abs_value: raise WorkupFailure("Pose did not have a min abs value of %f for " "%s " % (min_abs_expected_value, expected_term)) if not expected_i_lig_atom and not expected_i_prot_atom: return for kind, expected_i_atom in zip( ['lig', 'prot'], [expected_i_lig_atom, expected_i_prot_atom]): if not expected_i_atom: continue # Always convert expected_i_atom to a list # even if it is a single element list expected_i_atom_type = type(expected_i_atom) if expected_i_atom_type is int: expected_i_atom = [expected_i_atom] elif expected_i_atom_type is list: pass # Nothing to do else: error = f"Unknown expected_i_{kind}_atom type: {expected_i_atom_type}" raise RuntimeError(error) expected_i_atom = set(expected_i_atom) observed_i_atom = set() # Collect the list of all affected atoms involved with this term for term in matching_terms: attribute_name = f"{kind}_atoms" term_atoms = getattr(term, attribute_name) observed_i_atom.update(term_atoms) # Might be better to just check that expected_i_atom # is in observed_i_atom but have not yet # encountered a case that requires this. # This might be if there are multiple penalties # of the same kind but only interested in a specific one. if not expected_i_atom.issubset(observed_i_atom): error = "pose did not have {} on {} atom(s) {} ".format( expected_term, kind, expected_i_atom) error += f"\nExpected atoms: {expected_i_atom}" error += f"\nObserved atoms: {observed_i_atom}" raise WorkupFailure(error)
[docs]def measure_smarts(filename, smarts, expected_value, tolerance): """ Measure the distance/angle/dihedral matching `smarts` on each structure in `filename`, and make sure that they are within `tolerance` of `expected_value`. The SMARTS pattern must match 2, 3, or 4 atoms for measuring distance, angle, or dihedrals, respectively. """ with structure.StructureReader(filename) as reader: for i, st in enumerate(reader, 1): matches = adapter.evaluate_smarts(st, smarts) for match in matches: x = st.measure(*match) if len(match) == 4: delta = (x - expected_value) % 360 if delta > 180.0: delta = 360.0 - delta else: delta = abs(x - expected_value) if delta > tolerance: msg = (f'Measurement for atoms {match} = {x:.2f} ' f'of {filename}:{i} not within {expected_value} ' f'+/- {tolerance}') raise WorkupFailure(msg)