Source code for schrodinger.pipeline.stages.rmsd

"""
A stage for calculating RMSDs for structures in two sets.

Copyright Schrodinger, LLC. All rights reserved.

"""

# Contributors: Matvey Adzhigirey

import sys

from schrodinger import structure
from schrodinger.pipeline import pipeio
from schrodinger.pipeline import stage
from schrodinger.structutils import rmsd


[docs]class RmsdStage(stage.Stage): """ Stage for calculating RMSDs for structures in two sets. First set - original structures Second set - new conformations The keywords specific to this stage are... UNIQUEFIELD_REF - Property in the reference set used to match structures for comparison. If unset, it defaults to the UNIQUEFIELD property. The RMSD is calculated for pairs in which the UNIQUEFIELD_REF value in the reference ligand matches the UNIQUEFIELD value in the modified ligand. UNIQUEFIELD - Property in the modified set used to match structures for comparison. RMSDFIELD - Property to save RMSDs into in the output file RENUMBER_STRUCTURES - Should be False if atom numbering is the same between same structure in two input sets IGNORE_STATES - If True, ignore hydrogens and treat all bonds as single order. The stage takes two input structure file sets and generates one output structure file set. """
[docs] def __init__(self, *args, **kwargs): """ See class docstring. """ specs = """ UNIQUEFIELD_REF = string(default=None) UNIQUEFIELD = string(default="s_m_title") RMSDFIELD = string(default="r_vsw_rmsd") RENUMBER_STRUCTURES = boolean(default=False) IGNORE_STATES = boolean(default=False) """ stage.Stage.__init__(self, specs=specs, *args, **kwargs) self.addExpectedInput(1, "structures", True) self.addExpectedInput(2, "structures", False) self.addExpectedOutput(1, "structures", True) # Property for keeping track of where we were when the program crashed self.status = "NOT STARTED"
[docs] def operate(self): """ Algorithm: Compare each structure in the original (first) ligands set to every structure in the modified (second) set. If their corresponding UNIQUEFIELD_* properties are the same, calculate RMS for the structure, save it to RMSDFIELD, and write out the structure to the output file. """ # FIXME: USE outname = self.getOutputName(1) uniquefield_ref = self['UNIQUEFIELD_REF'] uniquefield = self['UNIQUEFIELD'] if not uniquefield_ref: uniquefield_ref = uniquefield rmsdfield = self['RMSDFIELD'] renumber_structures = self['RENUMBER_STRUCTURES'] ignore_states = self['IGNORE_STATES'] reference_ligands = self.getInput(1).getFiles() modified_ligands = self.getInput(2).getFiles() # Read the modified structures (second set) into memory: print("Reading second input set (modified structures)...") modified_sts = [] st_num = 0 for filename in modified_ligands: for modst in structure.StructureReader(filename): st_num += 1 if st_num % 1000 == 0: sys.stdout.write(".") sys.stdout.flush() modified_sts.append(modst) print('\nNumber of modified structures:', len(modified_sts)) outfile = self.genOutputFileName(1, extension='.mae') st_num = 0 num_out_sts = 0 self.info("Calculating RMSDs...") self.info(" Reference property = '%s'" % uniquefield_ref) self.info(" Modified ligand property = '%s'" % uniquefield) self.info("") self.info("%-30s %12s %12s %12s" % ("Identifier", "Ref. st. #", "Mod. st. #", "RMSD")) self.info("-" * 69) writer = structure.StructureWriter(outfile) for ligfile in reference_ligands: for refst in structure.StructureReader(ligfile): st_num += 1 if st_num % 1000 == 0: sys.stdout.write(".") sys.stdout.flush() # Determine if there is a MODIFIED ligand with same # UNIQUEFIELD: ref_id = refst.property.get(uniquefield_ref) # This reference ligand has no UNIQUEFIELD property if ref_id is None: msg = "Reference ligand #%i has no propety %s" % ( st_num, uniquefield_ref) raise RuntimeError(msg) for i, modst in enumerate(modified_sts): mod_id = modst.property.get(uniquefield) # This reference ligand has no UNIQUEFIELD property if mod_id is None: msg = "Modified ligand #%i has no propety %s" % ( i + 1, uniquefield) raise RuntimeError(msg) if mod_id != ref_id: continue # Go to the next modified ligand conf_rmsd = rmsd.ConformerRmsd(refst, modst) if ignore_states: # Set the following option so ConformerRmsd will work with # varying tautomers and ionization states. conf_rmsd.use_heavy_atom_graph = True if renumber_structures: conf_rmsd.renumber_structures = True rmsdinst = conf_rmsd.calculate() self.info("%-30s %12i %12i %12.4f" % (ref_id, st_num, i + 1, rmsdinst)) modst.property[rmsdfield] = rmsdinst writer.append(modst) num_out_sts += 1 writer.close() msg = "\nNumber of structures for which RMSD was calculated: %i" % num_out_sts msg += "\nOutput file: %s" % outfile self.info(msg) sys.stdout.flush() self.setOutput(1, pipeio.Structures([outfile])) return
# EOF