Source code for schrodinger.protein.analysis

"""
A class for diagnosing and reporting common structural problems of protein
complexes.

Usage:

instance = Report(ct)
instance.write_to_stdout()

Copyright Schrodinger, LLC. All rights reserved.

"""

import future.utils
import math
import re
import sys
from past.utils import old_div

import numpy

import schrodinger.infra.mm as mm
from schrodinger import structure
from schrodinger.infra.mmbitset import Bitset
from schrodinger.protein.analysis_params import backbone_dihedrals
from schrodinger.protein.analysis_params import ideal_bond_angles
from schrodinger.protein.analysis_params import ideal_bond_lengths
from schrodinger.protein.analysis_params import improper_torsions
from schrodinger.protein.analysis_params import planar_groups
from schrodinger.protein.analysis_params import pxp_vdwr
from schrodinger.protein.analysis_params import sidechain_dihedral_atomnames
from schrodinger.protein.analysis_params import sidechain_dihedrals
from schrodinger.structutils import analyze

VDWR_MMAT = 0
VDWR_MMAT_H = 0
VDWR_PXP = 1

use_gfactors = False  #: Use gfactors instead of deviations.
kT = 0.5925  #: Value of kT at room temperature.

DISALLOWED = " disallowed"
DASH = "-"


[docs]class Report:
    """
    A class to calculate properties of proteins.

    To use this class in a script to compute a set of data::

        reporter = Report(struct, ['SIDECHAIN DIHEDRALS', 'GAMMA BFACTORS'])
        dihedrals = reporter.get_set('SIDECHAIN DIHEDRALS')
        for point in dihedrals.points:
            resnum = point.descriptor.split()[1]
            resid = point.descriptor.split(':')[0] + ':' + resnum
            for val in point.values:
                try:
                    true_value = float(val)
                except ValueError:
                    # Some angles will have '-' if they aren't defined for
                    # this residue type
                    continue
                do_something_with_chi_angle(true_value)
    """

[docs]    def __init__(self, ct, sets_to_run=('ALL',)):
        """
        Create a Report instance.

        :type ct: `Structure<schrodinger.structure.Structure>`
        :param ct: The structure this Report operates on

        :type sets_to_run: list
        :param sets_to_run: Either ['ALL'] (default) or one or more of valid
                            sets.

        Valid sets to compute:

        - 'STERIC CLASHES'
        - 'PRIMEX STERIC CLASHES'
        - 'BOND LENGTHS'
        - 'BOND ANGLES'
        - 'BACKBONE DIHEDRALS'
        - 'SIDECHAIN DIHEDRALS'
        - 'GFACTOR SUMMARY'
        - 'BFACTORS'
        - 'GAMMA BFACTORS'
        - 'PEPTIDE PLANARITY'
        - 'SIDECHAIN PLANARITY'
        - 'IMPROPER TORSIONS'
        - 'CHIRALITY'
        - 'MISSING ATOMS'
        """

        self.data = []
        self.table_names = []
        self.table_headers = {}
        self.data_sets_to_run = []

        self.data_set_labels = {
            'STERIC CLASHES': self.steric_clash_data_set,
            'PRIMEX STERIC CLASHES': self.primex_steric_clash_data_set,
            'BOND LENGTHS': self.bond_length_data_set,
            'BOND ANGLES': self.bond_angle_data_set,
            'BACKBONE DIHEDRALS': self.backbone_dihedral_data_set,
            'SIDECHAIN DIHEDRALS': self.sidechain_dihedral_data_set,
            'GFACTOR SUMMARY': self.Gfactor_summary_data_set,
            'BFACTORS': self.Bfactor_data_set,
            'GAMMA BFACTORS': self.gamma_Bfactor_data_set,
            'PEPTIDE PLANARITY': self.peptide_planarity_data_set,
            'SIDECHAIN PLANARITY': self.sidechain_planarity_data_set,
            'IMPROPER TORSIONS': self.improper_torsion_data_set,
            'CHIRALITY': self.chirality_data_set,
            'MISSING ATOMS': self.missing_atoms_data_set
        }

        if 'ALL' in sets_to_run:
            sets_to_run = [
                'STERIC CLASHES',  # 'PRIMEX STERIC CLASHES', Too slow
                'BOND LENGTHS',
                'BOND ANGLES',
                'BACKBONE DIHEDRALS',
                'SIDECHAIN DIHEDRALS',
                'GFACTOR SUMMARY',
                'BFACTORS',
                'GAMMA BFACTORS',
                'PEPTIDE PLANARITY',
                'SIDECHAIN PLANARITY',
                'IMPROPER TORSIONS',
                'CHIRALITY',
                'MISSING ATOMS'
            ]

        need_xtal_mates = set(['STERIC CLASHES', 'PRIMEX STERIC CLASHES'])

        for label in sets_to_run:
            if label in list(self.data_set_labels):
                self.data_sets_to_run.append(
                    (label, self.data_set_labels[label]))

        try:
            if need_xtal_mates.intersection(sets_to_run):
                mates = analyze.generate_crystal_mates(ct, radius=4.0)
            else:
                mates = []
        except:
            self.setup_protein(ct)
        else:
            # generate_crystal_mates can return 0 mates if something goes wrong
            if len(mates) > 0:
                for atom in mates[0].atom:
                    atom.property['i_pa_xtalatom'] = 0
                for imate in range(1, len(mates)):
                    for atom in mates[imate].atom:
                        atom.property['i_pa_xtalatom'] = 1
                    mates[0].extend(mates[imate])
                atoms_to_delete = analyze.evaluate_asl(
                    mates[0], "not (fillres within 4 (atom.i_pa_xtalatom 0 ) )")
                mates[0].deleteAtoms(atoms_to_delete)
                self.setup_protein(mates[0])
            else:
                self.setup_protein(ct)
        self.working_protein.noxtal_ct = ct

        for set_to_run in self.data_sets_to_run:
            set_label = set_to_run[0]
            set_type = set_to_run[1]
            curr_set = set_type(set_label)
            curr_set.analyze(self)
            self.data.append(curr_set)
            self.table_names.append(curr_set.title)
            self.table_headers[curr_set.title] = [
                field for field in curr_set.fields
            ]

    ####################################################################################
    ### Basic Data Structure
    ####################################################################################

[docs]    class data_set:
        """
        Base class for all data sets.

        :ivar label: the name of this data set
        :vartype label: str

        :ivar title: the name of this data set when printing out the data
        :vartype title: str

        :ivar fields: first item is the name of the `data_point` descriptor
            property, remaining items are the names of the items in the
            `data_point` values property
        :vartype fields: list[str]

        :ivar points: list of `data_point` objects
        :vartype points: list(data_point)

        :ivar summary: overall summary of the data set for the entire protein
        :vartype summary: str

        :ivar bad_points: a subset of problematic points for filtering in
            table and bubble plot
        :vartype bad_points: list(data_point)

        :ivar count: the number of violations, in most cases the length of
            `data_points`, but for 'global' or non-residue specific properties
            it may be just 0 (for no issues) or 1 (e.g., X-Ray check)
        :vartype count: int

        :ivar score: raw quality score, which has higher priority
        :vartype score: float

        :ivar bubble_scale: normalized scale
        :vartype bubble_scale: float

        :ivar color: bubble color used as a pylab color argument
        :vartype color: str

        :ivar area: bubble area
        :vartype area: float
        """

[docs]        def __init__(self, label):
            self.label = label
            self.title = ""
            self.fields = []
            self.points = []
            self.summary = 'N/A'

            # the following is for bubble plot
            self.bad_points = []
            self.count = 0
            self.score = 1.234
            self.bubble_scale = 0.0
            self.color = ''
            self.area = 0.0

[docs]        class data_point:
            """
            Class that holds the data for each point in a data set

            :ivar descriptor: label for this point - typically the user friendly
                names of the atom or residues involved
            :vartype descriptor: str

            :ivar values: the values at this point -  varies by subclass
            :vartype values: list(float or str)

            :ivar atoms: the atoms involved in this point
            :vartype atoms: list(int)
            """

[docs]            def __init__(
                    self,
                    descriptor="",
                    values=[],  # noqa: M511
                    atoms=[]):  # noqa: M511

                self.descriptor = descriptor
                self.values = values
                self.atoms = atoms

[docs]        def add_point(self, descriptor="", values=[], atoms=[]):  # noqa: M511
            """
            Add a new point to the points property

            :type descriptor: str
            :param descriptor: Label for this point - typically the
                user-friendly names of the atom or residues involved

            :type values: list
            :param values: The values at this point - varies by subclass

            :type atoms: list
            :param atoms: The atoms involved in this point
            """

            self.points.append(self.data_point(descriptor, values, atoms))
            return

[docs]        def report_data_points(self):
            """
            Return all data points for this set in a list

            :rtype: list
            :return: list of data for each point in self.points, each item is a
                list whose first item is the point.descriptor and remaining items
                are the point.values items.
            """

            info = []
            for point in self.points:
                info.append([point.descriptor] + point.values)
            return info

[docs]        def analyze(self, parent):
            """
            Must be subclassed, this implementation does nothing

            :type parent: `Report` object
            :param parent: The Report object that this is for
            """

            return

[docs]        def report(self):
            """
            Must be subclassed, this implementation does nothing
            """

            return

    ####################################################################################################################
    ### Look for steric clashes
    ####################################################################################################################

[docs]    class steric_clash_data_set(data_set):
        """
        Class to compute and hold data for Steric Clashes.

        Data point descriptor is atoms involved, values are "Distance", "Min
        Allowed", "Delta".

        Summary is N/A

        See parent class `data_set` for additional documenation
        """

[docs]        def __init__(self, *args, **kwargs):
            Report.data_set.__init__(self, *args, **kwargs)
            self.parent = None
            self.min_hbond_ratio = 0.75
            self.clash_threshold = 0.85

        # Are two atoms within three bonds?
[docs]        def within_three_bonds(self, protein, iatom, target_atom):
            """
            Method to determine whether two atoms are within three bonds of each
            other.

            :type protein: Report.local_protein
            :param parent: The Report object that this is for

            :type iatom: int
            :param iatom: atom number of first atom

            :type target_atom: int
            :param target_atom: atom number of the second atom

            :rtype: bool
            :return: True if atoms are within 3 bonds of each other, False if
                not
            """

            iatom_atoms = []
            num_iatom_bonds = mm.mmct_atom_get_bond_total(protein.ct, iatom)
            for iatom_bond in range(1, num_iatom_bonds + 1):
                jatom = mm.mmct_atom_get_bond_atom(protein.ct, iatom,
                                                   iatom_bond)
                if jatom == target_atom:
                    return True
                else:
                    num_jatom_bonds = mm.mmct_atom_get_bond_total(
                        protein.ct, jatom)
                    for jatom_bond in range(1, num_jatom_bonds + 1):
                        katom = mm.mmct_atom_get_bond_atom(
                            protein.ct, jatom, jatom_bond)
                        if katom == target_atom:
                            return True
                        else:
                            num_katom_bonds = mm.mmct_atom_get_bond_total(
                                protein.ct, katom)
                            for katom_bond in range(1, num_katom_bonds + 1):
                                latom = mm.mmct_atom_get_bond_atom(
                                    protein.ct, katom, katom_bond)
                                if latom == target_atom:
                                    return True
            return False

[docs]        def analyze(self, parent):
            """
            compute and store the data for this class

            :type parent: l{report} object
            :param parent: the report object that this is for
            """

            self.title = "Steric Clashes"
            self.fields = [
                "        Atoms           ", "Distance", "Min Allowed", "Delta"
            ]
            self.tooltip = ("Ratio of the interatomic distances to the sum of "
                            "the atomic radii")

            self.run_analysis(parent.working_protein)

[docs]        def run_analysis(self, protein):
            """
            Iterate over the atom pairs and check record clashes
            :param protein: the protein
            :type protein: Report.local_protein
            """

            # Get all interactions efficiently within 3.5 angstrom using a
            # distance cell.
            atoms = protein.xtal_atoms
            xyz = [[atom.x, atom.y, atom.z] for atom in protein.xtal_atoms]
            dcell = mm.mmct_create_distance_cell_xyz2(xyz, 3.5)
            for n, atom in enumerate(atoms):
                clash_list = mm.mmct_query_distance_cell(
                    dcell, atom.x, atom.y, atom.z)
                size = mm.mmlist_get_size(clash_list)
                indices = [mm.mmlist_get(clash_list, i) for i in range(size)]
                indices.sort()
                for index in filter(lambda x: x > n, indices):
                    self.analyze_pair(protein, atom, atoms[index])
            mm.mmct_delete_distance_cell(dcell)

[docs]        def analyze_pair(self, protein, atom1, atom2):
            """
            Test a pair of atoms and record data if they clash. Order does not
            matter.
            :param protein: The protein
            :type protein: Report.local_protein
            :param atom1: One atom of the pair
            :type atom1: Report.local_atom
            :param atom2: The second atom of the pair
            :type atom2: Report.local_atom
            """
            if atom1.pdbname[-1] == '*' and atom2.pdbname[-1] == '*':
                return
            distance = protein.ct.measure(atom1.index, atom2.index)

            reference_distance = atom1.radius + atom2.radius
            clash = old_div(distance, reference_distance)

            # Analyze potential clashes
            if clash > self.clash_threshold:
                return

            # Make sure it's not an H-bond.
            if self.check_hbond(protein, atom1, atom2, clash, distance):
                return

            # This is an expensive check, so only do it if everything else has passed.
            if not self.within_three_bonds(protein, atom1.index, atom2.index):
                clash_name = atom1.residue_name + ":" + atom1.pdbname + " - " \
                           + atom2.residue_name + ":" + atom2.pdbname
                # Assume atom indices in the normal CT are the same as in the CT with images (should be safe).
                view_atoms = []
                for atom in [atom1, atom2]:
                    if atom.pdbname[-1] != '*':
                        view_atoms.append(atom.index)
                self.add_point(clash_name, [
                    distance, reference_distance, reference_distance - distance
                ], view_atoms)

[docs]        def check_hbond(self,
                        protein,
                        atom1,
                        atom2,
                        clash=None,
                        distance=None,
                        require_hydrogen=True):
            """
            Test and atom pair to see if they can be considered a hydrogen bond.
            The presence of a H-bond makes permissible atom proximity that would
            normally be considered a clash. Atom order does not matter.

            :param protein: The protein
            :type protein: Report.local_protein
            :param atom1: atom 1
            :type atom1: Report.local_atom
            :param atom2: atom 2
            :type atom2: Report.local_atom
            :param clash: Pre-computed clash ratio
            :type clash: float or None
            :param distance: Pre-computed distance
            :type distance: float or None
            :param require_hydrogen: Whether an intervening hydrogen must be
                found to qualify as an H-bond.
            :type require_hydrogen: bool
            """
            # Only consider non-carbon heavy atoms
            if atom1.atomic_number in [1, 6] or atom2.atomic_number in [1, 6]:
                return False

            if distance is None:
                distance = protein.ct.measure(atom1.index, atom2.index)
            if clash is None:
                reference_distance = atom1.radius + atom2.radius
                clash = old_div(distance, reference_distance)

            if clash > 1:
                return False
            if clash < self.min_hbond_ratio:
                return False
            if not require_hydrogen:
                return True
            if self.find_hbond_hydrogen(protein, atom1, atom2, distance):
                return True
            if self.find_hbond_hydrogen(protein, atom2, atom1, distance):
                return True
            return False

[docs]        def find_hbond_hydrogen(self, protein, donor, acceptor, distance=None):
            """
            Locate a hydrogen that is bound to the donor that is closer to the
            acceptor than the donor is.
            :param protein: the protein
            :type protein: Report.local_protein
            :param donor: donor atom
            :type donor: Report.local_atom
            :param acceptor: acceptor atom
            :type acceptor: Report.local_atom
            :param distance: pre-computed distance between donor and acceptor
            :type distance: float or None
            """
            if distance is None:
                distance = protein.ct.measure(donor.index, acceptor.index)

            for bonded_atom in protein.ct.atom[donor.index].bonded_atoms:
                if bonded_atom.atomic_number == 1:
                    h_dist = protein.ct.measure(bonded_atom, acceptor.index)
                    if h_dist < distance:
                        return bonded_atom
            return None

    ####################################################################################################################
    ### Look for steric clashes - PrimeX version
    ####################################################################################################################

[docs]    class primex_steric_clash_data_set(steric_clash_data_set):
        """
        A subclass of steric_clash_data_set that computes clashes using a
        different set of criteria used by PrimeX Polish.
        """

[docs]        def __init__(self, *args, **kwargs):
            Report.steric_clash_data_set.__init__(self, *args, **kwargs)
            self.min_hbond_ratio = 0.1
            self.clash_threshold = 0.8

[docs]        def analyze(self, parent):
            """
            compute and store the data for this class

            :type parent: l{report} object
            :param parent: the report object that this is for
            """

            self.title = "PrimeX Steric Clashes"
            self.fields = [
                "        Atoms           ", "Distance", "Min Allowed", "Delta",
                "Severe"
            ]
            self.tooltip = ("Ratio of the interatomic distances to the sum of "
                            "the atomic radii - PrimeX version")

            ct = parent.working_protein.ct
            primex_working_protein = parent.make_local_protein(
                ct, True, VDWR_PXP)

            self.run_analysis(primex_working_protein)

[docs]        def check_hbond(self, protein, atom1, atom2, clash=None, distance=None):
            """
            Runs a number of quick checks first before calling the super class'
            method. See the super class for more information.
            """

            # Check for H-bonding directly via H
            if (atom1.atomic_number == 1 and atom2.atomic_number not in [1, 6]
                    or atom1.atomic_number not in [1, 6] and
                    atom2.atomic_number == 1):
                if clash > self.min_hbond_ratio:
                    return True

            # N and O never clash
            if (atom1.atomic_number == 7 and atom2.atomic_number == 8 or
                    atom1.atomic_number == 8 and atom2.atomic_number == 7):
                return True

            # Non-backbone N never clashes with N
            # Non-backbone O never clashes with O
            a1backbone = (len(atom1.pdbname.split()[0]) == 1)
            a2backbone = (len(atom2.pdbname.split()[0]) == 1)
            if (atom1.atomic_number == atom2.atomic_number == 7 or
                    atom1.atomic_number == atom2.atomic_number == 8):
                if not a1backbone or not a2backbone:
                    return True

            # Run the regular h-bond test
            super = Report.steric_clash_data_set
            return super.check_hbond(self, protein, atom1, atom2, clash,
                                     distance, False)

[docs]        def add_point(self, descriptor, values, atoms):
            """
            Makes changes to how the output data is recorded relative to the
            super class.
            """
            distance = values[0]
            reference_distance = values[1]
            clash = old_div(distance, reference_distance)
            if clash <= 0.7:
                severe = 'Severe'
            else:
                severe = ''
            values.append(severe)

            super = Report.steric_clash_data_set
            super.add_point(self, descriptor, values, atoms)

    ####################################################################################################################
    ### Measure bond lengths
    ####################################################################################################################

[docs]    class bond_length_data_set(data_set):
        """
        Class to compute and hold data for Bond Length Deviations

        Data point descriptor is atoms involved, values are "Deviation"

        Summary is the RMS the deviations.

        See parent class `data_set` for additional documenation
        """

[docs]        def analyze(self, parent):
            """
            compute and store the data for this class

            :type parent: `report` object
            :param parent: the report object that this is for
            """

            self.title = "Bond Length Deviations"
            self.tooltip = ("Deviation from the ideal value derived from Engh "
                            "and Huber")
            if use_gfactors:
                self.fields = ["Bond", "G-Factor"]
            else:
                self.fields = ["Bond", "Deviation"]
            self.summary = 0.0

            num_total = 0

            for atom in parent.working_protein.atoms:
                pdbres = mm.mmct_atom_get_pdbres(parent.working_protein.ct,
                                                 atom.index)
                if pdbres not in list(ideal_bond_lengths):
                    continue
                if atom.pdbname in list(ideal_bond_lengths[pdbres]):
                    num_bond = mm.mmct_atom_get_bond_total(
                        parent.working_protein.ct, atom.index)
                    for ibond in range(1, num_bond + 1):
                        bound_atom = mm.mmct_atom_get_bond_atom(
                            parent.working_protein.ct, atom.index, ibond)
                        bound_atom_pdbname = mm.mmct_atom_get_pdbname(
                            parent.working_protein.ct, bound_atom)
                        if bound_atom_pdbname in list(
                                ideal_bond_lengths[pdbres][atom.pdbname]):
                            bond_name = atom.residue_name + ":" + atom.pdbname + " - " + bound_atom_pdbname
                            bond_length = parent.working_protein.ct.measure(
                                atom.index, bound_atom)
                            deviation = abs(
                                bond_length - ideal_bond_lengths[pdbres][
                                    atom.pdbname][bound_atom_pdbname][1])
                            if use_gfactors:
                                coefficient = ideal_bond_lengths[pdbres][
                                    atom.pdbname][bound_atom_pdbname][0]
                                gfactor = -coefficient * deviation**2 / kT
                                self.add_point(bond_name, [gfactor],
                                               [atom.index, bound_atom])
                            else:
                                self.add_point(bond_name, [deviation],
                                               [atom.index, bound_atom])
                                self.summary += deviation**2
                                num_total += 1
            if num_total > 0:
                self.summary = math.sqrt(old_div(self.summary,
                                                 float(num_total)))

            return

    ####################################################################################################################
    ### Measure bond angles
    ####################################################################################################################

[docs]    class bond_angle_data_set(data_set):
        """
        Class to compute and hold data for Bond Angle Deviations

        Data point descriptor is atoms involved, values are "Deviation"

        Summary is the RMS of the deviations.

        See parent class `data_set` for additional documenation
        """

[docs]        def analyze(self, parent):
            """
            compute and store the data for this class

            :type parent: `report` object
            :param parent: the report object that this is for
            """

            self.title = "Bond Angle Deviations"
            self.tooltip = ("Deviation from the ideal value derived from Engh "
                            "and Huber")
            if use_gfactors:
                self.fields = ["Angle", "G-Factor"]
            else:
                self.fields = ["Angle", "Deviation"]
            self.summary = 0.0

            num_total = 0

            for atom in parent.working_protein.atoms:
                pdbres = mm.mmct_atom_get_pdbres(parent.working_protein.ct,
                                                 atom.index)
                if pdbres not in list(ideal_bond_lengths):
                    continue
                if atom.pdbname in list(ideal_bond_angles[pdbres]):
                    num_ibond = mm.mmct_atom_get_bond_total(
                        parent.working_protein.ct, atom.index)
                    for ibond in range(1, num_ibond + 1):
                        bound_atom_1 = mm.mmct_atom_get_bond_atom(
                            parent.working_protein.ct, atom.index, ibond)
                        bound_atom_1_pdbname = mm.mmct_atom_get_pdbname(
                            parent.working_protein.ct, bound_atom_1)
                        if bound_atom_1_pdbname in list(
                                ideal_bond_angles[pdbres][atom.pdbname]):
                            num_jbond = mm.mmct_atom_get_bond_total(
                                parent.working_protein.ct, bound_atom_1)
                            for jbond in range(1, num_jbond + 1):
                                bound_atom_2 = mm.mmct_atom_get_bond_atom(
                                    parent.working_protein.ct, bound_atom_1,
                                    jbond)
                                bound_atom_2_pdbname = mm.mmct_atom_get_pdbname(
                                    parent.working_protein.ct, bound_atom_2)
                                if bound_atom_2_pdbname in list(
                                        ideal_bond_angles[pdbres][atom.pdbname]
                                    [bound_atom_1_pdbname]):
                                    angle_name = atom.residue_name + ":" + atom.pdbname + " - " + bound_atom_1_pdbname + " - " + bound_atom_2_pdbname
                                    bond_angle = parent.working_protein.ct.measure(
                                        atom.index, bound_atom_1, bound_atom_2)
                                    deviation = abs(
                                        bond_angle - ideal_bond_angles[pdbres][
                                            atom.pdbname][bound_atom_1_pdbname]
                                        [bound_atom_2_pdbname][1])
                                    if use_gfactors:
                                        coefficient = ideal_bond_angles[pdbres][
                                            atom.pdbname][bound_atom_1_pdbname][
                                                bound_atom_2_pdbname][0]
                                        deviation = numpy.pi * deviation / 180.0
                                        gfactor = -coefficient * deviation**2 / kT
                                        self.add_point(angle_name, [gfactor], [
                                            bound_atom_1, atom.index,
                                            bound_atom_2
                                        ])
                                    else:
                                        self.add_point(
                                            angle_name, [deviation], [
                                                bound_atom_1, atom.index,
                                                bound_atom_2
                                            ])
                                        self.summary += deviation**2
                                        num_total += 1
            if num_total > 0:
                self.summary = math.sqrt(old_div(self.summary,
                                                 float(num_total)))

            return

    ####################################################################################################################
    ### Backbone dihedrals
    ####################################################################################################################

[docs]    class backbone_dihedral_data_set(data_set):
        """
        Class to compute and hold data for Backbone Dihedrals

        Data point descriptor is the residue involved, values are "Phi", "Psi",
        "G-Factor"

        Summary is N/A

        See parent class `data_set` for additional documenation
        """

[docs]        def analyze(self, parent):
            """
            compute and store the data for this class

            :type parent: `report` object
            :param parent: the report object that this is for
            """

            self.title = "Backbone Dihedrals"
            self.tooltip = (
                "Log-probability (G-Factor) for backbone dihedrals, "
                "along with dihedral angles")
            self.fields = ["Residue", "Phi", "Psi", "G-Factor"]

            residues = parent.working_protein.residues
            if not residues:
                return
            ct = parent.working_protein.ct
            residue = residues[0]
            default_values = ['-', '-', '-']
            indices = residue.get_atom_indices()
            self.add_point(residue.name, default_values, indices)

            pattern = re.compile("-180.0")
            for iresidue in range(1, len(residues) - 1):
                # Get previous, current, and next residue and their backbone
                # atoms
                presidue = residues[iresidue - 1]
                residue = residues[iresidue]
                nresidue = residues[iresidue + 1]
                pC = presidue.backbone[" C  "]
                N = residue.backbone[" N  "]
                CA = residue.backbone[" CA "]
                C = residue.backbone[" C  "]
                nN = nresidue.backbone[" N  "]
                indices = residue.get_backbone_indices()
                # Check if atoms exist
                if not (pC and N and CA and C and nN):
                    self.add_point(residue.name, default_values, indices)
                    continue
                # Check if residues are connected
                connected = (ct.measure(pC.index, N.index) < 3.0 and
                             ct.measure(C.index, nN.index) < 3.0)
                if not connected:
                    self.add_point(residue.name, default_values, indices)
                    continue
                phi = ct.measure(pC.index, N.index, CA.index, C.index)
                psi = ct.measure(N.index, CA.index, C.index, nN.index)
                dihedral_key = str(round(phi, -1)) + ',' + str(round(psi, -1))
                dihedral_key = re.sub(pattern, "180.0", dihedral_key)
                pdbres = mm.mmct_atom_get_pdbres(ct, CA.index)
                if pdbres not in list(backbone_dihedrals):
                    pdbres = "GEN "
                phi = round(phi, 2)
                psi = round(psi, 2)
                dihedrals = backbone_dihedrals[pdbres]
                if dihedral_key in dihedrals:
                    gfactor = math.log(dihedrals[dihedral_key])
                    values = [phi, psi, gfactor]
                else:
                    values = [phi, psi, DISALLOWED]
                self.add_point(residue.name, values, indices)
            # Add last residue point
            if len(residues) > 2:
                residue = residues[-1]
                indices = residue.get_backbone_indices()
                self.add_point(residue.name, default_values, indices)

    ####################################################################################################################
    ### Sidechain dihedrals
    ####################################################################################################################

[docs]    class sidechain_dihedral_data_set(data_set):
        """
        Class to compute and hold data for Sidechain Dihedrals

        Data point descriptor is the residue involved, values are "Chi1",
        "Chi2", "G-Factor"

        Summary is N/A

        See parent class `data_set` for additional documenation
        """

[docs]        def analyze(self, parent):
            """
            compute and store the data for this class

            :type parent: `report` object
            :param parent: the report object that this is for
            """

            self.title = "Sidechain Dihedrals"
            self.tooltip = (
                "Log-probability (G-Factor) for sidechain dihedrals, "
                "along with dihedral angles")
            self.fields = ["Residue", "Chi1", "Chi2", "G-Factor"]

            for residue in parent.working_protein.residues:
                residue_dihedrals = []
                pdbres = mm.mmct_atom_get_pdbres(parent.working_protein.ct,
                                                 residue.atoms[0].index)
                # If no sidechain atoms, pick an appropriate backbone one.
                target_atoms = []
                if len(residue.sidechain) > 0:
                    target_atoms = residue.get_sidechain_indices()
                else:
                    target_atoms = residue.get_backbone_indices()
                if pdbres in sidechain_dihedrals:
                    for dihedral in sidechain_dihedral_atomnames[pdbres]:
                        dihedral_atoms = []
                        for atomname in dihedral:
                            for atom in residue.atoms:
                                if atom.pdbname == atomname:
                                    dihedral_atoms.append(atom.index)
                        if len(dihedral_atoms) == 4:
                            dihedral_value = parent.working_protein.ct.measure(
                                dihedral_atoms[0], dihedral_atoms[1],
                                dihedral_atoms[2], dihedral_atoms[3])
                            residue_dihedrals.append(dihedral_value)
                    if len(residue_dihedrals) == len(
                            sidechain_dihedral_atomnames[pdbres]):
                        # Round to the nearest 10 degrees
                        #dihedral_list = ",".join([ str(round(dihedral,1)) for dihedral in residue_dihedrals ])
                        residue_dihedrals = [
                            round(dihedral, 2) for dihedral in residue_dihedrals
                        ]
                        dihedral_key = ",".join([
                            str(round(dihedral, -1))
                            for dihedral in residue_dihedrals
                        ])
                        pattern = re.compile("-180.0")
                        dihedral_key = re.sub(pattern, "180.0", dihedral_key)
                        if len(residue_dihedrals) == 1:
                            residue_dihedrals.append(DASH)
                        if dihedral_key in sidechain_dihedrals[pdbres]:
                            self.add_point(
                                residue.name, residue_dihedrals + [
                                    math.log(sidechain_dihedrals[pdbres]
                                             [dihedral_key])
                                ], target_atoms)
                        else:
                            self.add_point(residue.name,
                                           residue_dihedrals + [DISALLOWED],
                                           target_atoms)
                    else:
                        self.add_point(residue.name, [DASH, DASH, DASH],
                                       target_atoms)
                else:
                    self.add_point(residue.name, [DASH, DASH, DASH],
                                   target_atoms)

            return

    ####################################################################################################################
    ### Summary of G-factors
    ####################################################################################################################

[docs]    class Gfactor_summary_data_set(data_set):
        """
        Class to compute and hold data for G-factor summaries of the Backbone
        and Sidechain dihedrals.

        Data point descriptor is the residue involved, values are "Backbone",
        "Sidechain", "Total"

        Summary is N/A

        See parent class `data_set` for additional documenation
        """

[docs]        def analyze(self, parent):
            """
            compute and store the data for this class

            :type parent: `report` object
            :param parent: the report object that this is for
            """

            self.title = "G-factor Summary"
            self.tooltip = (
                "Log-probability (G-Factor) for backbone dihedrals, "
                "sidechain dihedrals, and the sum of the two")
            self.fields = ["Residue", "Backbone", "Sidechain", "Total"]

            atoms = {}
            for set in parent.data:
                if set.title == "Backbone Dihedrals":
                    backbone_data = set.report_data_points()
                    for point in set.points:
                        atoms[point.descriptor] = point.atoms
                elif set.title == "Sidechain Dihedrals":
                    sidechain_data = set.report_data_points()

            for iline in range(len(backbone_data)):
                if backbone_data[iline][3] == DISALLOWED or sidechain_data[
                        iline][3] == DISALLOWED:
                    total = DISALLOWED
                elif sidechain_data[iline][3] == DASH:
                    total = backbone_data[iline][3]
                elif backbone_data[iline][3] == DASH:
                    total = sidechain_data[iline][3]
                else:
                    total = backbone_data[iline][3] + sidechain_data[iline][3]
                self.add_point(
                    backbone_data[iline][0],
                    [backbone_data[iline][3], sidechain_data[iline][3], total],
                    atoms[backbone_data[iline][0]])

            return

    ####################################################################################################################
    ### Calculate B-factor information
    ####################################################################################################################

[docs]    class Bfactor_data_set(data_set):
        """
        Class to compute and hold data for B-Factors

        Data point descriptor is the residue involved, values are "Backbone",
        "BBStdDev", "Sidechain", "SCStdDev"

        Summary is the average B-Factor for backbone and sidechain atoms

        See parent class `data_set` for additional documenation
        """

[docs]        def analyze(self, parent):
            """
            compute and store the data for this class

            :type parent: `report` object
            :param parent: the report object that this is for
            """

            self.title = "Average B-factors"
            self.tooltip = ("Average temperature factors for the backbone "
                            "and sidechain of each residue")
            self.fields = [
                "Residue", "Backbone", "BBStdDev", "Sidechain", "SCStdDev"
            ]
            self.summary = 0.0

            num_total = 0
            for residue in parent.working_protein.residues:

                back_n_atoms = 0
                side_n_atoms = 0

                avg_back_Bfactor = 0.0
                avg_side_Bfactor = 0.0
                for atom in future.utils.listvalues(residue.backbone):
                    if atom is None:
                        continue
                    if atom.atomic_number == 1:
                        continue
                    avg_back_Bfactor += atom.temperature_factor
                    self.summary += atom.temperature_factor
                    back_n_atoms += 1
                    num_total += 1
                for atom in future.utils.listvalues(residue.sidechain):
                    if atom.atomic_number == 1:
                        continue
                    avg_side_Bfactor += atom.temperature_factor
                    self.summary += atom.temperature_factor
                    side_n_atoms += 1
                    num_total += 1
                if back_n_atoms > 0:
                    avg_back_Bfactor /= back_n_atoms
                if side_n_atoms > 0:
                    avg_side_Bfactor /= side_n_atoms

                stddev_back_Bfactor = 0.0
                stddev_side_Bfactor = 0.0
                for atom in future.utils.listvalues(residue.backbone):
                    if atom is None:
                        continue
                    if atom.atomic_number == 1:
                        continue
                    stddev_back_Bfactor += (atom.temperature_factor -
                                            avg_back_Bfactor)**2
                for atom in future.utils.listvalues(residue.sidechain):
                    if atom.atomic_number == 1:
                        continue
                    stddev_side_Bfactor += (atom.temperature_factor -
                                            avg_side_Bfactor)**2
                if back_n_atoms > 0:
                    stddev_back_Bfactor /= back_n_atoms
                if side_n_atoms > 0:
                    stddev_side_Bfactor /= side_n_atoms

                self.add_point(residue.name, [
                    avg_back_Bfactor, stddev_back_Bfactor, avg_side_Bfactor,
                    stddev_side_Bfactor
                ], residue.get_atom_indices())

            if num_total > 0:
                self.summary = old_div(self.summary, float(num_total))

            return

    ####################################################################################################################
    ### Gamma atom B-factor
    ####################################################################################################################

[docs]    class gamma_Bfactor_data_set(data_set):
        """
        Class to compute and hold data for B-Factors of sidechain gamma atoms

        Data point descriptor is the residue involved, value is "B-Factor"

        Summary is the average B-Factor for gamma atoms

        See parent class `data_set` for additional documenation
        """

[docs]        def analyze(self, parent):
            """
            compute and store the data for this class

            :type parent: `report` object
            :param parent: the report object that this is for
            """

            self.title = "Gamma Atom B-Factor"
            self.tooltip = ("Temperature-factor for the gamma atom in each "
                            "residue")
            self.fields = ["Residue", "B-Factor"]
            self.summary = 0.0

            num_total = 0

            for residue in parent.working_protein.residues:
                for atom in future.utils.listvalues(residue.sidechain):
                    if atom.pdbname[2] == "G":
                        self.add_point(residue.name + ":" + atom.pdbname,
                                       [atom.temperature_factor], [atom.index])
                        self.summary += atom.temperature_factor
                        num_total += 1

            if num_total > 0:
                self.summary = old_div(self.summary, float(num_total))

            return

    ####################################################################################################################
    ### Calculate peptide bond planarity
    ####################################################################################################################

[docs]    class peptide_planarity_data_set(data_set):
        """
        Class to compute and hold data for Peptide Planarity

        Data point descriptor is the atoms involved, value is "Dihedral Angle"

        Summary is the average absolute planarity

        See parent class `data_set` for additional documenation
        """

        MAX_PEPTIDE_BOND_LENGTH = 3.0

[docs]        def analyze(self, parent):
            """
            compute and store the data for this class

            :type parent: `report` object
            :param parent: the report object that this is for
            """

            self.title = "Peptide Planarity"
            self.tooltip = ("RMSD of the atoms in the peptide linkage from "
                            "the plane that minimizes the RMSD")
            self.fields = ["Peptide Bond", "Dihedral Angle"]
            self.summary = 0.0

            num_total = 0

            ct = parent.working_protein.ct
            residues = parent.working_protein.residues
            residue_total = len(residues)
            for iresidue in range(residue_total - 1):
                # Get backbone atoms of current and neighboring residues
                residue = residues[iresidue]
                CA_atom = residue.backbone[" CA "]
                C_atom = residue.backbone[" C  "]
                nresidue = residues[iresidue + 1]
                nN_atom = nresidue.backbone[" N  "]
                nCA_atom = nresidue.backbone[" CA "]
                atoms = [CA_atom, C_atom, nN_atom, nCA_atom]
                if any(atom is None for atom in atoms):
                    continue
                distance = ct.measure(C_atom.index, nN_atom.index)
                if distance >= self.MAX_PEPTIDE_BOND_LENGTH:
                    continue
                dihedral = ct.measure(CA_atom.index, C_atom.index,
                                      nN_atom.index, nCA_atom.index)
                descriptor = f"{residue.name:>10s} - {nresidue.name:<10s}"
                values = [abs(dihedral)]
                atoms = [C_atom.index, nN_atom.index]
                self.add_point(descriptor, values, atoms)
                self.summary += abs(dihedral)
                num_total += 1

            if num_total > 0:
                self.summary = old_div(self.summary, float(num_total))

            return

    ####################################################################################################################
    ### Calculate the planarity of appropriate side chain groups
    ####################################################################################################################

[docs]    class sidechain_planarity_data_set(data_set):
        """
        Class to compute and hold RMS data for planarity of sidechains

        Data point descriptor is the residue involved, value is "RMSD From
        Planarity"

        Summary is the average RMSD deviation from planarity of sidechains

        See parent class `data_set` for additional documenation
        """

[docs]        def analyze(self, parent):
            """
            compute and store the data for this class

            :type parent: `report` object
            :param parent: the report object that this is for
            """

            self.title = "Sidechain Planarity"
            self.tooltip = ("RMSD of nominally planar sidechain groups from "
                            "the plane that minimizes the RMSD")
            self.fields = ["Residue", "RMSD From Planarity"]
            self.summary = 0.0

            # Conventions:
            #    ax + by + cz + d = 0
            # OR z = mx + ny + b
            #
            # a = [ x1 y1 1 ]
            #     [ x2 y2 1 ]
            #     [ x3 y3 1 ]
            # b = [ z1 ]
            #     [ z2 ]
            #     [ z3 ]
            #
            # a * solution = b
            #
            # solution = [ m ]
            #            [ n ]
            #            [ b ]
            #
            # distance = |ax0 + by0 + cz0 + d| / sqrt( a^2 + b^2 + c^2 )
            #          = |mx0 + ny0 - z0 + b| / sqrt( m^2 + n^2 + 1 )

            num_total = 0
            for residue in parent.working_protein.residues:
                pdbres = mm.mmct_atom_get_pdbres(parent.working_protein.ct,
                                                 residue.atoms[0].index)
                if pdbres in planar_groups:
                    atoms = []
                    atom_indices = []
                    for atom in future.utils.listvalues(residue.sidechain):
                        if atom.pdbname in planar_groups[pdbres]:
                            atoms.append([atom.x, atom.y, atom.z])
                            atom_indices.append(atom.index)
                    if len(atoms) > 3:
                        # Try this with three different choices, in case the ring is parallel to an axis.
                        best_msd = 1000.0
                        # z = ax + by + c
                        A = numpy.array([[atom[0], atom[1], 1] for atom in atoms
                                        ])
                        b = numpy.array([atom[2] for atom in atoms])
                        solution, _, _, _ = numpy.linalg.lstsq(A, b, rcond=None)
                        msd = 0.0
                        for atom in atoms:
                            msd += old_div(
                                (solution[0] * atom[0] + solution[1] * atom[1] -
                                 atom[2] + solution[2])**2,
                                (solution[0]**2 + solution[1]**2 + 1))
                        if msd < best_msd:
                            best_msd = msd
                        # y = ax + bz + c
                        A = numpy.array([[atom[0], atom[2], 1] for atom in atoms
                                        ])
                        b = numpy.array([atom[1] for atom in atoms])
                        solution, _, _, _ = numpy.linalg.lstsq(A, b, rcond=None)
                        msd = 0.0
                        for atom in atoms:
                            msd += old_div(
                                (solution[0] * atom[0] + solution[1] * atom[2] -
                                 atom[1] + solution[2])**2,
                                (solution[0]**2 + solution[1]**2 + 1))
                        if msd < best_msd:
                            best_msd = msd
                        # x = az + by + c
                        A = numpy.array([[atom[2], atom[1], 1] for atom in atoms
                                        ])
                        b = numpy.array([atom[0] for atom in atoms])
                        solution, _, _, _ = numpy.linalg.lstsq(A, b, rcond=None)
                        msd = 0.0
                        for atom in atoms:
                            msd += old_div(
                                (solution[0] * atom[2] + solution[1] * atom[1] -
                                 atom[0] + solution[2])**2,
                                (solution[0]**2 + solution[1]**2 + 1))
                        if msd < best_msd:
                            best_msd = msd
                        # Now wrap it up.
                        best_msd /= float(len(atoms))
                        rmsd = math.sqrt(best_msd)
                        self.summary += rmsd
                        num_total += 1
                        self.add_point(residue.name, [rmsd], atom_indices)

            if num_total > 0:
                self.summary = old_div(self.summary, float(num_total))

            return

    ####################################################################################################################
    ### Improper Torsions
    ####################################################################################################################

[docs]    class improper_torsion_data_set(data_set):
        """
        Class to compute and hold RMS data for improper torsions

        Data point descriptor is the residue involved, value is "RMS Deviation"

        Summary is the average RMSD deviation for improper torsions

        See parent class `data_set` for additional documenation
        """

[docs]        def analyze(self, parent):
            """
            compute and store the data for this class

            :type parent: `report` object
            :param parent: the report object that this is for
            """

            self.title = "Improper Torsions"
            self.tooltip = ("RMSD of the improper torsion for atoms in "
                            "nominally planar sidechains")
            self.fields = ["Residue", "RMS Deviation"]
            self.summary = 0.0

            num_total = 0

            for residue in parent.working_protein.residues:
                residue_dihedrals = []
                pdbres = mm.mmct_atom_get_pdbres(parent.working_protein.ct,
                                                 residue.atoms[0].index)
                if pdbres in improper_torsions:
                    rms_deviation = 0.0
                    num_found = 0
                    for dihedral in improper_torsions[pdbres]:
                        dihedral_atoms = []
                        for atomname in dihedral:
                            for atom in residue.atoms:
                                if atom.pdbname == atomname:
                                    dihedral_atoms.append(atom.index)
                        if len(dihedral_atoms) == 4:
                            dihedral_value = parent.working_protein.ct.measure(
                                dihedral_atoms[0], dihedral_atoms[1],
                                dihedral_atoms[2], dihedral_atoms[3])
                            rms_deviation += (180.0 - abs(dihedral_value))**2
                            self.summary += (180.0 - abs(dihedral_value))
                            num_found += 1
                            num_total += 1
                    if num_found > 0:
                        rms_deviation = math.sqrt(
                            old_div(rms_deviation, float(num_found)))
                        self.add_point(residue.name, [rms_deviation],
                                       residue.get_sidechain_indices())
                    else:
                        self.add_point(residue.name, [DASH],
                                       residue.get_sidechain_indices())
                else:
                    self.add_point(residue.name, [DASH],
                                   residue.get_sidechain_indices())

            if num_total > 0:
                self.summary = old_div(self.summary, float(num_total))

            return

    ####################################################################################################################
    ### Calculate the CA stereochemistry
    ####################################################################################################################

[docs]    class chirality_data_set(data_set):
        """
        Class to compute and hold C-alpha chirality data

        Data point descriptor is the residue involved, value is C-alpha
        "Chirality"

        Summary is N/A

        See parent class `data_set` for additional documenation
        """

[docs]        def analyze(self, parent):
            """
            compute and store the data for this class

            :type parent: `report` object
            :param parent: the report object that this is for
            """

            self.title = "C-alpha Stereochemistry"
            self.tooltip = ("L/D chirality of the C-alpha atoms")
            self.fields = ["Residue", "Chirality"]

            for residue in parent.working_protein.residues:
                if " CB " in list(residue.sidechain):
                    try:
                        CA_N = [
                            residue.backbone[" N  "].x -
                            residue.backbone[" CA "].x,
                            residue.backbone[" N  "].y -
                            residue.backbone[" CA "].y,
                            residue.backbone[" N  "].z -
                            residue.backbone[" CA "].z
                        ]
                        CA_C = [
                            residue.backbone[" C  "].x -
                            residue.backbone[" CA "].x,
                            residue.backbone[" C  "].y -
                            residue.backbone[" CA "].y,
                            residue.backbone[" C  "].z -
                            residue.backbone[" CA "].z
                        ]
                        CA_CB = [
                            residue.sidechain[" CB "].x -
                            residue.backbone[" CA "].x,
                            residue.sidechain[" CB "].y -
                            residue.backbone[" CA "].y,
                            residue.sidechain[" CB "].z -
                            residue.backbone[" CA "].z
                        ]
                        backbone_cross = [
                            CA_N[1] * CA_C[2] - CA_N[2] * CA_C[1],
                            CA_N[2] * CA_C[0] - CA_N[0] * CA_C[2],
                            CA_N[0] * CA_C[1] - CA_N[1] * CA_C[0]
                        ]
                        dot_product = backbone_cross[0] * CA_CB[
                            0] + backbone_cross[1] * CA_CB[1] + backbone_cross[
                                2] * CA_CB[2]
                        if dot_product > 0.0:
                            self.add_point(residue.name, ["L"],
                                           [residue.backbone[" CA "].index])
                        else:
                            self.add_point(residue.name, ["D"],
                                           [residue.backbone[" CA "].index])
                    except:
                        pass

            return

    ####################################################################################################################
    ### Find residues with missing atoms
    ####################################################################################################################

[docs]    class missing_atoms_data_set(data_set):
        """
        Class to compute and hold information on missing atoms

        Data point descriptor is the residue involved, value is nothing

        Summary is N/A

        See parent class `data_set` for additional documenation
        """

[docs]        def analyze(self, parent):
            """
            compute and store the data for this class

            :type parent: `report` object
            :param parent: the report object that this is for
            """

            self.title = "Missing Atoms"
            self.tooltip = ("Residues for which atoms are missing from the "
                            "structure")
            self.fields = ["Residue"]

            resnames = []
            resatoms = []

            iter_res = structure.get_residues_unsorted(
                parent.working_protein.noxtal_ct)
            for residue in iter_res:
                resnames.append(parent.get_residue_name(residue.atom[1]))
                resatoms.append([int(atom) for atom in residue.atom])

            mm.mmpdb_initialize(mm.error_handler)
            bs = Bitset(
                mm.mmpdb_set_incomplete_res_bs(
                    parent.working_protein.noxtal_ct))
            mm.mmpdb_terminate()

            for i in bs:
                self.add_point(resnames[i - 1],
                               values=[],
                               atoms=resatoms[i - 1])

            return

    ####################################################################################
    ### For setting up the protein
    ####################################################################################

    # Store atom information locally, for speed.
[docs]    class local_atom:
        """
        Private class used to store atom information locally for speed
        """

[docs]        def __init__(self):
            self.index = 0
            self.atomic_number = 0
            self.atom_type = 0
            self.radius = 0.0
            self.pdbname = ""
            self.x = 0.0
            self.y = 0.0
            self.z = 0.0
            self.temperature_factor = 0.0
            return

    # Group atoms into residues, for ease in cycling over them.
[docs]    class local_residue:
        """
        Private class used to store residue information locally for convenience
        """

[docs]        def __init__(self):
            self.name = ""
            self.backbone = {" N  ": None, \
                             " CA ": None, \
                             " C  ": None, \
                             " O  ": None, \
                             " CH3": None}
            self.sidechain = {}
            self.atoms = []
            return

[docs]        def get_backbone_indices(self):
            backbone_atoms = []
            for atom in future.utils.listvalues(self.backbone):
                if atom is not None:
                    backbone_atoms.append(atom.index)
            return backbone_atoms

[docs]        def get_sidechain_indices(self):
            return [
                atom.index for atom in future.utils.listvalues(self.sidechain)
            ]

[docs]        def get_atom_indices(self):
            return [atom.index for atom in self.atoms]

    # The local copy of the entire structure.
[docs]    class local_protein:
        """
        Private class used to store protein information locally for convenience
        """

[docs]        def __init__(self):
            self.atoms = []
            self.xtal_atoms = []
            self.residues = []
            self.ct = None
            self.noxtal_ct = None
            return

[docs]    def get_vdw_radius(self, atom, vdwr_mode=VDWR_MMAT_H):
        if vdwr_mode == VDWR_MMAT:
            return mm.mmat_get_vdw_radius(atom.atom_type)
        if vdwr_mode == VDWR_MMAT_H:
            if atom.atomic_number == 1:
                return 1.1
            else:
                return mm.mmat_get_vdw_radius(atom.atom_type)
        if vdwr_mode == VDWR_PXP:
            return pxp_vdwr[atom.atomic_number]
        raise ValueError('Unknown value for Van der Waals radius mode: %d' %
                         vdwr_mode)

[docs]    def setup_protein(self, ct):
        """
        An internal method used to set up the protein for Report calculations.
        This method should be considered a private method of the Report class
        and need not be explicitly called by the user/calling script.

        :type ct: `Structure<schrodinger.structure.Structure>`
        :param ct: The structure this method operates on
        """
        self.working_protein = self.make_local_protein(ct)

[docs]    def make_local_protein(self,
                           ct,
                           keep_hydrogens=False,
                           vdwr_mode=VDWR_MMAT_H):
        """

        :param ct: The structure this method operates on
        :type ct: `Structure<schrodinger.structure.Structure>`
        :param keep_hydrogens: Whether to include hydrogens in the local protein
        :type keep_hydrogens: bool
        :param vdwr_mode: which VDWR source should be used
        :type vdwr_mode: int
        """
        working_protein = self.local_protein()
        working_protein.ct = ct

        for iatom in range(1, mm.mmct_ct_get_atom_total(ct) + 1):
            if 'i_pa_xtalatom' in ct.atom[iatom].property and ct.atom[
                    iatom].property['i_pa_xtalatom'] == 1:
                xtal = True
            else:
                xtal = False
            iatom_name = mm.mmct_atom_get_pdbname(ct, iatom)
            iatom_residue = mm.mmct_atom_get_pdbres(ct, iatom)
            if iatom_name == " CA " or (iatom_name == " CH3" and
                                        iatom_residue == "ACE "):
                residue_name = self.get_residue_name(iatom, working_protein)
                residue_atoms = ct.getResidueAtoms(iatom)
                new_residue = self.local_residue()
                new_residue.name = residue_name
                for residue_atom in residue_atoms:
                    # Not interested in hydrogens.
                    if not keep_hydrogens and residue_atom.atomic_number == 1:
                        continue
                    new_atom = self.local_atom()
                    new_atom.index = int(residue_atom)
                    new_atom.atomic_number = mm.mmct_atom_get_atomic_number(
                        ct, residue_atom)
                    new_atom.atom_type = mm.mmct_atom_get_type(ct, residue_atom)
                    new_atom.radius = self.get_vdw_radius(new_atom, vdwr_mode)
                    if xtal:
                        new_atom.pdbname = mm.mmct_atom_get_pdbname(
                            ct, residue_atom) + ' *'
                    else:
                        new_atom.pdbname = mm.mmct_atom_get_pdbname(
                            ct, residue_atom)
                    new_atom.residue_name = residue_name
                    new_atom.x = mm.mmct_atom_get_x(ct, residue_atom)
                    new_atom.y = mm.mmct_atom_get_y(ct, residue_atom)
                    new_atom.z = mm.mmct_atom_get_z(ct, residue_atom)
                    try:
                        new_atom.temperature_factor = mm.mmct_atom_property_get_real(
                            ct, 'r_m_pdb_tfactor', int(residue_atom))
                    except:
                        new_atom.temperature_factor = 0.0
                    if new_atom.pdbname in list(new_residue.backbone):
                        new_residue.backbone[new_atom.pdbname] = new_atom
                    else:
                        new_residue.sidechain[new_atom.pdbname] = new_atom
                    new_residue.atoms.append(new_atom)
                    if not xtal:  # This list should not contain symmetry atoms.
                        working_protein.atoms.append(new_atom)
                    # This list should contain all atoms, normal and symmetry.
                    working_protein.xtal_atoms.append(new_atom)
                if not xtal:  # Only keep the residue if it's a non-xtal one.
                    working_protein.residues.append(new_residue)
        return working_protein

    # Utility to piece together a comprehensive residue name.
[docs]    def get_residue_name(self, iatom, protein=None):
        """
        Create a residue name for the atom iatom.

        :type iatom: int
        :param iatom: the atom index to build a residue name for

        :rtype: str
        :return: A residue name of the form:
                Chain:PDB_residue_codeResidue_numberInsertion_code
                where PDB_residue_code is a 4 character field and Residue_number is
                a 3 character field
        """
        if protein is None:
            protein = self.working_protein

        (resnum, inscode) = mm.mmct_atom_get_resnum(protein.ct, iatom)
        if inscode == " ":
            inscode = ""
        chain = mm.mmct_atom_get_chain(protein.ct, iatom)
        if chain == " ":
            chain = "_"
        pdbres = mm.mmct_atom_get_pdbres(protein.ct, iatom)
        residue = "%s:%4s%3d%s" % (chain, pdbres, resnum, inscode)
        return residue

[docs]    def get_set(self, set_label):
        """
        Return a single set of data that was specified using the sets_to_run
        parameter when the class object was created.

        :type set_label: str
        :param set_label: One of the sets specified at Report object creation
            time using the sets_to_run parameter.

        Valid sets are:

        - 'STERIC CLASHES'
        - 'BOND LENGTHS'
        - 'BOND ANGLES'
        - 'BACKBONE DIHEDRALS'
        - 'SIDECHAIN DIHEDRALS'
        - 'GFACTOR SUMMARY'
        - 'BFACTORS'
        - 'GAMMA BFACTORS'
        - 'PEPTIDE PLANARITY'
        - 'SIDECHAIN PLANARITY'
        - 'IMPROPER TORSIONS'
        - 'CHIRALITY'
        - 'MISSING ATOMS'

        If the requested set was not specified at the Report object creation
        time, None will be returned.
        """

        for set in self.data:
            if set.label == set_label:
                return set
        return None

[docs]    def write_to_stdout(self):
        """
        Write each of the computed sets of data to the terminal
        """

        for set in self.data:

            print("%-40s" % set.title)

            print(("%-30s" % set.fields[0]),
                  end=' ')  # CHANGED to align columns
            for header in set.fields[1:]:
                print(("%-10s" % header), end=' ')
            print()

            for point in set.points:
                print(("%-30s" % point.descriptor), end=' ')
                for value in point.values:
                    try:
                        float(value)
                    except:
                        print(("%10s" % str(value)), end=' ')
                    else:
                        print(("%8.2f" % value), end=' ')
                print()

            print()

        return


if __name__ == '__main__':

    inputfile_name = sys.argv[1]
    ct = structure.StructureReader.read(inputfile_name)

    prosane = Report(ct, sets_to_run=['ALL'])
    prosane.write_to_stdout()

    print()
    print('Summaries:')
    print()
    for set in prosane.data:
        print('  ' + set.title + ' : ' + str(set.summary))
    print()
    print('Getting by label: ')
    print('  ' + str(prosane.get_set('IMPROPER TORSIONS').summary))
    print()