Source code for schrodinger.pipeline.stages.rmsd
"""
A stage for calculating RMSDs for structures in two sets.
Copyright Schrodinger, LLC. All rights reserved.
"""
# Contributors: Matvey Adzhigirey
import sys
from schrodinger import structure
from schrodinger.pipeline import pipeio
from schrodinger.pipeline import stage
from schrodinger.structutils import rmsd
[docs]class RmsdStage(stage.Stage):
"""
Stage for calculating RMSDs for structures in two sets.
First set - original structures
Second set - new conformations
The keywords specific to this stage are...
UNIQUEFIELD_REF - Property in the reference set used to match
structures for comparison. If unset, it defaults
to the UNIQUEFIELD property. The RMSD is
calculated for pairs in which the UNIQUEFIELD_REF
value in the reference ligand matches the
UNIQUEFIELD value in the modified ligand.
UNIQUEFIELD - Property in the modified set used to match
structures for comparison.
RMSDFIELD - Property to save RMSDs into in the output file
RENUMBER_STRUCTURES - Should be False if atom numbering is the same
between same structure in two input sets
IGNORE_STATES - If True, ignore hydrogens and treat all bonds
as single order.
The stage takes two input structure file sets and generates one
output structure file set.
"""
[docs] def __init__(self, *args, **kwargs):
"""
See class docstring.
"""
specs = """
UNIQUEFIELD_REF = string(default=None)
UNIQUEFIELD = string(default="s_m_title")
RMSDFIELD = string(default="r_vsw_rmsd")
RENUMBER_STRUCTURES = boolean(default=False)
IGNORE_STATES = boolean(default=False)
"""
stage.Stage.__init__(self, specs=specs, *args, **kwargs)
self.addExpectedInput(1, "structures", True)
self.addExpectedInput(2, "structures", False)
self.addExpectedOutput(1, "structures", True)
# Property for keeping track of where we were when the program crashed
self.status = "NOT STARTED"
[docs] def operate(self):
"""
Algorithm:
Compare each structure in the original (first) ligands set to every
structure in the modified (second) set. If their corresponding
UNIQUEFIELD_* properties are the same, calculate RMS for the structure,
save it to RMSDFIELD, and write out the structure to the output file.
"""
# FIXME: USE outname = self.getOutputName(1)
uniquefield_ref = self['UNIQUEFIELD_REF']
uniquefield = self['UNIQUEFIELD']
if not uniquefield_ref:
uniquefield_ref = uniquefield
rmsdfield = self['RMSDFIELD']
renumber_structures = self['RENUMBER_STRUCTURES']
ignore_states = self['IGNORE_STATES']
reference_ligands = self.getInput(1).getFiles()
modified_ligands = self.getInput(2).getFiles()
# Read the modified structures (second set) into memory:
print("Reading second input set (modified structures)...")
modified_sts = []
st_num = 0
for filename in modified_ligands:
for modst in structure.StructureReader(filename):
st_num += 1
if st_num % 1000 == 0:
sys.stdout.write(".")
sys.stdout.flush()
modified_sts.append(modst)
print('\nNumber of modified structures:', len(modified_sts))
outfile = self.genOutputFileName(1, extension='.mae')
st_num = 0
num_out_sts = 0
self.info("Calculating RMSDs...")
self.info(" Reference property = '%s'" % uniquefield_ref)
self.info(" Modified ligand property = '%s'" % uniquefield)
self.info("")
self.info("%-30s %12s %12s %12s" %
("Identifier", "Ref. st. #", "Mod. st. #", "RMSD"))
self.info("-" * 69)
writer = structure.StructureWriter(outfile)
for ligfile in reference_ligands:
for refst in structure.StructureReader(ligfile):
st_num += 1
if st_num % 1000 == 0:
sys.stdout.write(".")
sys.stdout.flush()
# Determine if there is a MODIFIED ligand with same
# UNIQUEFIELD:
ref_id = refst.property.get(uniquefield_ref)
# This reference ligand has no UNIQUEFIELD property
if ref_id is None:
msg = "Reference ligand #%i has no propety %s" % (
st_num, uniquefield_ref)
raise RuntimeError(msg)
for i, modst in enumerate(modified_sts):
mod_id = modst.property.get(uniquefield)
# This reference ligand has no UNIQUEFIELD property
if mod_id is None:
msg = "Modified ligand #%i has no propety %s" % (
i + 1, uniquefield)
raise RuntimeError(msg)
if mod_id != ref_id:
continue # Go to the next modified ligand
conf_rmsd = rmsd.ConformerRmsd(refst, modst)
if ignore_states:
# Set the following option so ConformerRmsd will work with
# varying tautomers and ionization states.
conf_rmsd.use_heavy_atom_graph = True
if renumber_structures:
conf_rmsd.renumber_structures = True
rmsdinst = conf_rmsd.calculate()
self.info("%-30s %12i %12i %12.4f" %
(ref_id, st_num, i + 1, rmsdinst))
modst.property[rmsdfield] = rmsdinst
writer.append(modst)
num_out_sts += 1
writer.close()
msg = "\nNumber of structures for which RMSD was calculated: %i" % num_out_sts
msg += "\nOutput file: %s" % outfile
self.info(msg)
sys.stdout.flush()
self.setOutput(1, pipeio.Structures([outfile]))
return
# EOF