Source code for schrodinger.test.stu.outcomes.prime_workups

import operator
import re
from past.utils import old_div

import numpy as np

from schrodinger import structure
from schrodinger.protein.analysis import Report as ProteinReport
from schrodinger.structutils import analyze
from schrodinger.structutils import measure
from schrodinger.structutils.interactions import hbond
from schrodinger.test import structurecheck


[docs]def partial_structure_reader(filename, frames_to_check=None):
    """
    Iterator that returns the specified frames of the structures in
    filename {structure} By default return all frames, but if frames_to_check
    { list, set or  anything with an "__in__" method" of integers} is set
    then the only frames for which their index (starting with 0) are in
    frames to check are returned.  Note that even if a list is passed with
    integers out of order, the frames will always be yielded in the
    order that they are in the file.
    """
    if isinstance(frames_to_check, str):
        frames_to_check = eval(frames_to_check)
    if (type(frames_to_check) == int):
        frames_to_check = [frames_to_check]
    for ict, ct in enumerate(structure.StructureReader(filename)):
        if (frames_to_check is not None and ict not in frames_to_check):
            continue
        yield ct


[docs]def convert_plop_name_to_asl(input_str):
    """
    Convert a string specifying a residue or atom into an asl expresion
    Input should be in the form <chain>:<resnum><inscode> or
    <chain>:<resnum><inscore>:<pdb atom name>

    <chain> is the chain id or a _ or no character if space
    <resnum> is the residue number (possibly negative)
    <inscode> is the pdb insertion code
    <pdb atom name> The 4 letter pdb atom name. Either spaces or _ can be used
    for padding to get to 4 characters. Examples A:12 :23A B:-1 _:12

    The output is a ASL expression stored as a string that corresponds
    to the same atom(s) in the input.
    """
    atom_match = re.search(r"(.{0,1}):(-{0,1}\d+)([^:]{0,1}):(.+)", input_str)
    res_match = re.search(r"(.{0,1}):(-{0,1}\d+)([^:]{0,1})", input_str)
    if (atom_match):
        chain = atom_match.group(1)
        resnum = eval(atom_match.group(2))
        inscode = atom_match.group(3)
        pdbatom = atom_match.group(4)
        pdbatom = pdbatom.replace("_", " ")
    elif (res_match):
        chain = res_match.group(1)
        resnum = eval(res_match.group(2))
        inscode = res_match.group(3)
        pdbatom = None
    else:
        raise TypeError("Could not convert input: %s" % input_str)
    if (chain == "_" or chain == ""):
        chain = " "
    if (inscode == ""):
        inscode = " "
    if (pdbatom is None):
        return '(chain.name "%s" AND res.num %d AND res.inscode "%s")' % (
            chain, resnum, inscode)
    else:
        return ('(chain.name "%s" AND res.num %d AND res.inscode "%s" AND ' +
                'atom.ptype "%s")') % (chain, resnum, inscode, pdbatom)


[docs]def get_moved_residues(ct, ref_ct, tolerance):
    """
    For a given ct {structure.Stucture} and ref_ct {structure.Structure}
    with the same number of atoms in the same order, return a sorted list
    with all the residue names (__str__() evaluation of their associated
    structure._Residue class) that contains atoms that moved more than
    a tolerance {float} in Angstroms.
    """
    if (len(ct.atom) != len(ref_ct.atom)):
        raise RuntimeError('Input structures must have the same ' +
                           'number of atoms')
    residues_moved = set()
    for atom1, atom2 in zip(ct.atom, ref_ct.atom):
        if (measure.measure_distance(atom1, atom2) > tolerance):
            residues_moved.add(str(atom1.getResidue()))
    return sorted(list(residues_moved))


[docs]def get_phi_psi(ct):
    """
    Return a dictionary with the key being the name of each residues and
    the value being a 2 member list containing the PHI and PSI angles
    of that residue
    """
    output = {}
    rep = ProteinReport(ct, ['BACKBONE DIHEDRALS']).data[0]
    for point in rep.points:
        if (point.descriptor == '-'):
            continue
        vec = [i for i in point.values]
        resnum = point.descriptor.split()[1]
        resid = point.descriptor.split(':')[0] + ':' + resnum
        output[resid] = vec
    return output


[docs]def get_modified_backbone_angles(ct, ref_ct, tolerance):
    """
    For a given ct {structure.Stucture} and ref_ct {structure.Structure}
    with the same number of atoms in the same order, return a sorted list
    with all the residue names (__str__() evaluation of their associated
    structure._Residue class) that contains atoms that moved either phi
    or psi angles more than tolerance {float} in degrees
    """
    phi_psi = get_phi_psi(ct)
    ref_phi_psi = get_phi_psi(ref_ct)
    changed = set()
    for res in ref_phi_psi:
        try:
            delta_phi = min(abs(phi_psi[res][0] - ref_phi_psi[res][0]),
                            abs(phi_psi[res][0] - ref_phi_psi[res][0] - 360),
                            abs(phi_psi[res][0] - ref_phi_psi[res][0] + 360))
            delta_psi = min(abs(phi_psi[res][1] - ref_phi_psi[res][1]),
                            abs(phi_psi[res][1] - ref_phi_psi[res][1] + 360),
                            abs(phi_psi[res][1] - ref_phi_psi[res][1] - 360))
        except TypeError:
            continue
        if (delta_phi > tolerance or delta_psi > tolerance):
            changed.add(res)
    return list(changed)


[docs]def check_moved_residues(filename,
                         reference_fn,
                         moved_residues,
                         tolerance=0.01,
                         check_backbone_dihedrals=False,
                         frames_to_check=None):
    """
    STU workup that will check to see if a list of moved residues { string
    the residue names (__str__() evaluation of the structure._Residue class)
    separated by spaces} corresponds to the list of all residue that have an
    an atom that moved more than tolerance {float} between any of the
    frames_to_check in filename (see partial_structure_reader) and the first
    structure frame in reference_fn.  Returns True if the lists match, False
    if they dont
    if check_bacbone_dihedrals{logical} is True then check use the residues
    that either their phi or psi angle moved by more than tolerance degrees
    are used instead of distance in cartesian space
    """
    ref_ct = structure.Structure.read(reference_fn)
    moved_residues_list = moved_residues.split(" ")
    errors = []

    for ct in partial_structure_reader(filename, frames_to_check):
        if (check_backbone_dihedrals):
            this_moved_residues_list = get_modified_backbone_angles(
                ct, ref_ct, tolerance)
        else:
            this_moved_residues_list = get_moved_residues(ct, ref_ct, tolerance)

        should_have_moved = set(moved_residues_list) - \
                            set(this_moved_residues_list)
        should_not_have_moved = set(this_moved_residues_list) - \
                                set(moved_residues_list)
        if (should_have_moved):
            errors.append("ERROR: %s Residues did not moved %s" %
                          (ct.title, " ".join(list(should_have_moved))))
        if (should_not_have_moved):
            errors.append("ERROR: %s Residues moved %s" %
                          (ct.title, " ".join(list(should_not_have_moved))))

    if errors:
        raise AssertionError('\n'.join(errors))
    return True


[docs]def check_protein_health(filename,
                         frames_to_check=None,
                         steric_delta=0.3,
                         bond_length_deviation=0.15,
                         protein_health_kwargs=None):
    """
    Use the featurs in structurecheck to identify potential problems in
    the structure.  By default this will find any instances where
    there is a steric clash of more than 30% of the vdW radius (set
    by steric delta {float}) or any bond length deviations of more than
    0.15 angstroms (set by bond_length_deviation {float})
    The number of allowable clashes, bond length deviations and other
    structual properties can be set by passing a dictionary into
    protein_health_kwargs {dict} as decirbed in the help for
    structurecheck.ProteinReportCheck.
    frames_to_check is descirbed above
    Note that protein_health_kwargs needs to have a dictionary passed to it
    to be useful as a STU test.  Currently STU does not allow this.
    """
    if (protein_health_kwargs is None):
        protein_health_kwargs = {'STERIC CLASHES': 0, 'BOND LENGTHS': 0}
    for ct in partial_structure_reader(filename, frames_to_check):
        prc = structurecheck.ProteinReportCheck(
            ct,
            steric_delta=steric_delta,
            bond_length_deviation=bond_length_deviation)
        prc.assertProteinHealth(ct.title, protein_health_kwargs)
    return True


[docs]def measure_distance(ct1, name1, ct2, name2):
    """
    For a given structure ct1 {structure.Structure} and the Plop residue or
    atom name name1 {string, described convert_plop_name_to_asl}
    and a second ct2 {structure.Structure}, name2 {string} pair following the
    same convention, return the minimum distance between the two.  If
    either name matches no values in the structure then None is returned.
    """
    # This could be either an atom name or a residue name
    list1 = analyze.evaluate_asl(ct1, convert_plop_name_to_asl(name1))
    list2 = analyze.evaluate_asl(ct2, convert_plop_name_to_asl(name2))
    min_dist = None
    for iatom1 in list1:
        for iatom2 in list2:
            one_dist = measure.measure_distance(ct1.atom[iatom1],
                                                ct2.atom[iatom2])
            if (min_dist is None or one_dist < min_dist):
                min_dist = one_dist
    return min_dist


[docs]def check_distance(filename,
                   name1,
                   name2,
                   distance,
                   tolerance,
                   frames_to_check=None):
    """
    STU workup
    For the specified structure frames in a filename {string} return True
    if the minimum distance between the atom or residue name1 {string} is
    distance {float} +- tolerance{float} Angstroms.  Otherwise return false
    (see partial_structrue_reader for description of frames_to check)
    """
    errors = []
    for ct in partial_structure_reader(filename, frames_to_check):
        one_dist = measure_distance(ct, name1, ct, name2)
        if (abs(one_dist - distance) > tolerance):
            msg = "ERROR: %s Distance " % ct.title + \
                "{}-{}({:f} A) != {:f} +/- {:f} A".format(name1, name2, one_dist,
                    distance, tolerance)
            errors.append(msg)
    if errors:
        raise AssertionError('\n'.join(errors))
    return True


[docs]def check_torsion(filename,
                  name1,
                  name2,
                  name3,
                  name4,
                  value,
                  tolerance=10,
                  frames_to_check=None):
    """
    For the specified structure frames in a filename {string} return True if
    the torsion between atoms with names name1-4 {string} is value {float} +-
    tolerance{float} degrees. Otherwise return false (see
    partial_structure_reader for description of frames_to check) The format for
    name is <chain>:<residue number>:<pdb atom name> where a _ is replaced with
    a space (note that the <pdb atom name> should be 4 characters and padded
    with _ for spaces ie A:12:_CA_.
    """
    errors = []
    for ct in partial_structure_reader(filename, frames_to_check):
        iatoms = []
        for name in name1, name2, name3, name4:
            alist = analyze.evaluate_asl(ct, convert_plop_name_to_asl(name))
            if len(alist) != 1:
                raise RuntimeError("%s in %s should match exactly 1 atom(%d)" %
                                   (name, filename, len(alist)))
            iatoms.append(alist[0])
        one_tor = ct.measure(*iatoms)
        diff = min(abs(one_tor - value), abs(one_tor - 360 - value),
                   abs(one_tor + 360 - value))
        if diff > tolerance:
            output = False
            msg = "ERROR: %s Torsion " % ct.title + \
                "%s(%d)-%s(%d)-%s(%d)-%s(%d)(%f deg) != %f +/- %f deg" % (
                    name1, iatoms[0], name2, iatoms[1], name3, iatoms[2],
                    name4, iatoms[3], one_tor, value, tolerance)
            errors.append(msg)
    if errors:
        raise AssertionError('\n'.join(errors))
    return True


[docs]def check_for_residue_list(filename, residues, frames_to_check=None):
    """
    For the specified structure frames in a filename {string} return True if
    the residues match the residues listed in residues {string, space seperated
    list of residue names as given by the __str__ operator of
    structure._Residue} then return True. (see partial_structrue_reader for
    description of frames_to check)
    """
    errors = []
    input_residues = set(residues.split())
    for ct in partial_structure_reader(filename, frames_to_check):
        ct_residues = {str(res) for res in ct.residue}
        for input_res in input_residues:
            if (input_res not in ct_residues):
                msg = f"{ct.title} has missing residue {input_res}"
                errors.append(msg)
        for ct_res in ct_residues:
            if (ct_res not in input_residues):
                msg = f"{ct.title} has extra residue {ct_res}"
                errors.append(msg)
    if errors:
        raise AssertionError('\n'.join(errors))
    return True


[docs]def check_structure_counts(filename,
                           atoms=None,
                           residues=None,
                           molecules=None,
                           frames_to_check=None):
    """
    STU workup
    For the specified structure frames in a filename {string} return True
    if all contain the specified numbers of atoms{integer},
    residues{integer}, and molecules{integer},  If any of these are not set
    then that test is skipped.  Otherwise return False.
    (see partial_structrue_reader for description of frames_to check)
    """
    errors = []
    for ct in partial_structure_reader(filename, frames_to_check):
        if (atoms is not None and len(ct.atom) != atoms):
            msg = "%s # Atoms %d != %d" % (ct.title, len(ct.atom), atoms)
            errors.append(msg)
        if (residues is not None and len(ct.residue) != residues):
            msg = "%s # Residues %d != %d" % (ct.title, len(
                ct.residue), residues)
            errors.append(msg)
        if (molecules is not None and len(ct.molecule) != molecules):
            msg = "%s # Molecules %d != %d" % (ct.title, len(
                ct.molecule), molecules)
            errors.append(msg)
    if errors:
        raise AssertionError('\n'.join(errors))
    return True


[docs]def determine_truth_table(filename,
                          property,
                          reference_property,
                          cutoff=None,
                          reverse=False,
                          frames_to_check=None):
    """
    Return a truth table as a dictionary with keys tp, tn, fp, fn & "no
    prediction" with the number of frames where the prediction is a True
    Positive, True Negative, False Positive, False Negative, or no prediction
    was provided.

    For an input filename{string} containing one or more structures readable by
    StructureReader.

    property{string} the property name of predicted value stored as ct-level
    property filename

    referecne property{string} the property name of true value stored as
    ct-level property filename

    By default the value in property will be converted to a boolean, however
    the cutoff {variable that supports >=} is defined then a predicted value of
    True will given whenever the value is greater than this cutoff.

    If reverse is True then the predicted value will be reversed (this makes
    the most sense when cutoff is also used)

    frame_to_check is passed to partial_structure_reader
    """
    truth_table = {'tp': 0, 'tn': 0, 'fp': 0, 'fn': 0, 'no prediction': 0}
    for ct in partial_structure_reader(filename, frames_to_check):
        if (reference_property not in ct.property):
            continue
        reference = bool(ct.property[reference_property])
        if (property not in ct.property):
            truth_table['no prediction'] += 1
            continue
        if (cutoff is None):
            pred = bool(ct.property[property])
        else:
            pred = (ct.property[property] >= cutoff)
        if (reverse):
            pred = not pred
        if (pred and reference):
            truth_table['tp'] += 1
        elif (not pred and not reference):
            truth_table['tn'] += 1
        elif (pred and not reference):
            truth_table['fp'] += 1
        elif (not pred and reference):
            truth_table['fn'] += 1
    return truth_table


[docs]def check_prime_accuracy(min_accuracy,
                         filename,
                         property,
                         reference_property,
                         cutoff=None,
                         reverse=False,
                         frames_to_check=None):
    """
    STU Workup that will check if the accuracy of a predictors is more than
    some cutoff value and raise an assertion error otherwise usage:

    :param min_accuracy: 0.0 - 1.0
    :type min_accuracy: float

    :param filename: path of file readable by StructureReader
    :type filename: str

    :param property: property name of predicted value stored as ct-level
                     property in the structures within filename
    :type property: str

    :param reference_property: property name of the reference value stored as
                               ct-level property filename
    :type reference_property: str

    :param cutoff:  variable that supports >= on property
    :type cutoff: int or float

    :type reverse: bool

    :param frame_to_check: indicies of frames within filename to process
    :type frames_to_check: str

    By default the value in property will be converted to a boolean, however
    the cutoff {variable that supports >=} is defined then a predicted value of
    True will given whenever the value is greater than this cutoff.

    If reverse is True then the predicted value will be reversed (this makes
    the most sense when cutoff is also used).
    """
    truth_table = determine_truth_table(filename,
                                        property,
                                        reference_property,
                                        cutoff=cutoff,
                                        reverse=reverse,
                                        frames_to_check=frames_to_check)
    try:
        accuracy = old_div(float(truth_table['tp'] + truth_table['tn']), \
            (truth_table['tp'] + truth_table['tn'] +
             truth_table['fp'] + truth_table['fn']))
    except ZeroDivisionError:
        raise AssertionError("No predictions to check")
    if (accuracy < min_accuracy):
        raise AssertionError(
            "Accuracy %5.3f < %5.3f TP %d TN %d FP %d FN %d" %
            (accuracy, min_accuracy, truth_table['tp'], truth_table['tn'],
             truth_table['fp'], truth_table['fn']))
    return True


[docs]def check_F1_score(min_F1,
                   filename,
                   property,
                   reference_property,
                   cutoff=None,
                   reverse=False,
                   frames_to_check=None):
    """
    STU Workup that will check if the F1 Score of a predictors is more
    than some cutoff value and raise an assertion error otherwise

    :param min_f1: 0.0 -1.0
    :type min_f1: float

    :param filename: path of file readable by StructureReader
    :type filename: str

    :param property: property name of predicted value stored as ct-level
                     property in the structures within filename
    :type property: str

    :param reference_property: property name of the reference value stored as
                               ct-level property filename
    :type reference_property: str

    :param cufoff: cutoff variable that supports >= on property
    :type cutoff: int or float

    :param reverse: reverse
    :type reverse: bool

    :param frame_to_check: indicies of frames within filename to process
    :type frames_to_check: str

    By default the value in property will be converted to a boolean, however
    the cutoff {variable that supports >=} is defined then a predicted value of
    True will given whenever the value is greater than this cutoff.

    If reverse is True then the predicted value will be reversed (this makes
    the most sense when cutoff is also used)
    """
    truth_table = determine_truth_table(filename,
                                        property,
                                        reference_property,
                                        cutoff=cutoff,
                                        reverse=reverse,
                                        frames_to_check=frames_to_check)
    try:
        prec = 1.0 * truth_table['tp'] / (truth_table['tp'] + truth_table['fp'])
        rec = 1.0 * truth_table['tp'] / (truth_table['tp'] + truth_table['fn'])
        F1 = 2.0 * prec * rec / (prec + rec)
    except ZeroDivisionError:
        F1 = 0.0
    if (F1 < min_F1):
        raise AssertionError("F1Score %5.3f < %5.3f TP %d TN %d FP %d FN %d" %
                             (F1, min_F1, truth_table['tp'], truth_table['tn'],
                              truth_table['fp'], truth_table['fn']))
    return True


[docs]def check_ct_property(filename,
                      property_name,
                      value,
                      tolerance="=",
                      frames_to_check=None):
    """
    STU Workup
    For a specified structure in a filename {string} return True
    if the property named  property_name{string} is equal to
    value {any type}
    If tolerance is ">", "<", ">=", "<=" or "in" than use that operation
    instead of equals.  Otherwise is tolerance is provided and not
    in the above list than return True iff all frames are within
    tolerance of value.
    """
    errors = []
    operator_map = {
        '<': operator.lt,
        '<=': operator.le,
        '>': operator.gt,
        '>=': operator.ge,
        '=': operator.eq,
        '==': operator.eq,
        '!=': operator.ne,
        'in': operator.contains
    }
    for ct in partial_structure_reader(filename, frames_to_check):
        try:
            prop_value = ct.property[property_name]
        except KeyError:
            msg = "Entry {} does not have property {}".format(
                ct.title, property_name)
            raise AssertionError(msg)
        if tolerance in operator_map:
            if (not operator_map[tolerance](prop_value, value)):
                msg = "Entry {} property {} ({}) is NOT {} {}".format(
                    ct.title, property_name, prop_value, tolerance, value)
                errors.append(msg)
        else:
            if (abs(prop_value - value) > tolerance):
                msg = "Entry {} property {} ({}) is NOT == {} +/- {}".format(
                    ct.title, property_name, prop_value, value, tolerance)
                errors.append(msg)
    if errors:
        raise AssertionError('\n'.join(errors))
    return True


[docs]def check_ct_property_unique(filename,
                             property_name,
                             tolerance=0.00001,
                             frames_to_check=None):
    """
    Raise an AssertionError if there are entries with duplicated values of a
    given ct-level property.

    :param filename: Maestro formated structure file to read
    :type filename: str

    :param property_name: ct-level property name. This should start with `r_`
        or `i_`
    :type tolerance: float
    :param tolerance: Values within this tolerance are considered equal
    :type frames_to_check: list of int
    :param frames_to_check: check these frames in filename as described
        in partial structure reader
    """
    values = []
    for ct in partial_structure_reader(filename, frames_to_check):
        if (len(values) > 0):
            closest = np.min(
                np.abs(np.array(values) - ct.property[property_name]))
            if (closest < tolerance):
                raise AssertionError(
                    "Entry {} in {}".format(ct.title, filename) +
                    " property {}:{:f} is not unique".format(
                        property_name, ct.property[property_name]))
        values.append(ct.property[property_name])
    return True


[docs]def check_hbond(filename,
                name1,
                name2,
                frames_to_check=None,
                distance_max=2.5,
                donor_angle=120,
                acceptor_angle=90):
    """
    Return True if the atom pair specified by `name1` and `name2` is a hydrogen
    bond in every frame in a structure file.

    For example, `check_hbond('some_file.maegz', 'A:1:H', 'A:4:O')`

    :type frames_to_check: list(int)
    :param frames_to_check: Only cts in `filename` for which their index
                            (starting with 0) are in `frames_to_check` are
                            returned.

    For example, `check_hbond('some_file.maegz', 'A:1:H', 'A:4:O', "[0, 6, 2]")`
    will check_hbonds in cts zero, six, and two. All selected cts must have the
    defined hbond for this check to return True

    If this argument is not set, then all cts in `filename` are checked.

    distance_max, donor_angleh, acceptor_angle: See description in
    schrodinger.structutils.interactions.hbond.match_hbond
    """

    for ct in partial_structure_reader(filename, frames_to_check):
        list1 = analyze.evaluate_asl(ct, convert_plop_name_to_asl(name1))
        list2 = analyze.evaluate_asl(ct, convert_plop_name_to_asl(name2))
        if len(list1) != 1:
            error = "name {} in ct {} in filename {} matches more than one atom"
            error = error.format(name1, ct.title, filename)
            raise RuntimeError(error)
        if len(list2) != 1:
            error = "name {} in ct {} in filename {} matches more than one atom"
            error = error.format(name2, ct.title, filename)
            raise RuntimeError(error)

        atom1 = ct.atom[list1[0]]
        atom2 = ct.atom[list2[0]]
        hbond_found = hbond.match_hbond(atom1, atom2, distance_max, donor_angle,
                                        acceptor_angle)
        if not hbond_found:
            raise AssertionError('{} is not h-bonded to {} in structure {}'.\
                    format(name1, name2, ct.title))
    return True


[docs]def check_position(filename,
                   name,
                   target_x,
                   target_y,
                   target_z,
                   tolerance,
                   frames_to_check=None):
    """
    For the specified structures in a filename `string` return True if the
    specified residue or atom identified by `name` is within `tolerance` of
    the coordinate at `target_x, target_y, target_z`.

    If a residue is given, then any atom within `tolerance` of the target
    coordinates will return True.

    The distance must be less than or equal to the `tolerance`.

    For example::

        check_position('some_file.maegz', 'A:1', 2.38, 0.91, 3.79, 1.0)

    :param frames_to_check: list(int)
    :type frames_to_check: Only cts in `filename` for which their index
                           (starting with 0) are in frames_to_check are
                           returned.

    For example::

        check_position('some_file.maegz', 'A:1', 2.38, 0.91, 3.79, 1.0,
                       "[0, 6, 2]")

    will check the position of residue A:1 in cts zero, six, and two. All
    selected cts must individually return True for check_position to return
    True.

    If this argument is not set, then all cts in `filename` are checked.
    """

    tol_squared = tolerance**2
    target_vector = np.array([target_x, target_y, target_z])
    for ct in partial_structure_reader(filename, frames_to_check):
        iatoms = analyze.evaluate_asl(ct, convert_plop_name_to_asl(name))
        if len(iatoms) == 0:
            error = "name {} in ct {} in filename {} matches nothing"
            error = error.format(name, ct.title, filename)
            raise RuntimeError(error)

        atom_within_tol = False
        for iatom in iatoms:
            atom = ct.atom[iatom]
            delta_vec = target_vector - atom.xyz
            dist_squared = np.dot(delta_vec, delta_vec)
            if dist_squared <= tol_squared:
                atom_within_tol = True
                break

        if not atom_within_tol:
            # Example full message:
            #  No atom in A:2:O was found within 2.00 A of
            #  (2.38, 0.91, 3.79) across all cts in dummyfile.maegz
            msg = 'No atom in {} was found within {:.2f} A of '.\
                    format(name, tolerance)
            msg += '({:.2f}, {:.2f}, {:.2f}) '.format(*target_vector)
            if frames_to_check:
                msg += f'in cts {frames_to_check} '
            else:
                msg += 'across all cts '
            msg += f'in {filename}'
            raise AssertionError(msg)

    return True


[docs]def check_sequence(filename, chain, sequence):
    """
    Check that the first structure in a file has a given 1-letter sequence
    inputs:

    :param filename: Maestro formatted file to check
    :type filename: str
    :param chain: Chain name to check
    :type chain: str (string of length 1 character)
    :param sequence: protein sequence to check for. The sequence given here
                     must match that in the structure file exactly to pass. Any
                     chain breaks (no matter the length in sequence space (ie
                     A:2 - A:3 or A:2 to A:103)) are represented by a "-"
    :type sequence: str of upper-case single character protein codes
    :return: True is returned if this criteria is met, An AssertionError is
             raised if this criteria is not met.
    :rtype: bool
    """
    ct = structure.Structure.read(filename)
    ct_sequence = ""
    for one_chain in ct.chain:
        if one_chain.name != chain:
            continue
        last_res = None
        for res in structure.get_residues_by_connectivity(one_chain):
            # Skip if this is an isolated residue
            if len(res.atom[1].getMolecule().residue) == 1:
                continue
            # Add a "-" if there is a chain break
            if last_res is not None and not last_res.isConnectedToResidue(res):
                ct_sequence += "-"
            ct_sequence += res.getCode()
            last_res = res
    if sequence != ct_sequence:
        raise AssertionError("Provided Sequence: %s\nSequence in File : %s" %
                             (sequence, ct_sequence))
    return True