Source code for schrodinger.application.prime.wrapper

"""
Classes to assist in preparing prime input files
"""
import os

from schrodinger import structure
from schrodinger.application.inputconfig import InputConfig
from schrodinger.structutils import analyze

ALL_LIGANDS = "all"
NO_LIGANDS = "none"


[docs]class PrimeConfig(InputConfig): """ Class for reading and writing simplified Prime homology modeling input files. """ specs = [ "JOB_TYPE = string(default='MODEL')", "DIR_NAME = string(default='.')", "QUERY_OFFSET = integer(default=0)", "TAILS = integer(default=0)", "SIDE_OPT = string(default='false')", "MINIMIZE = string(default='true')", "BUILD_INSERTIONS = string(default='true')", "MAX_INSERTION_SIZE = integer(default=20)", "BUILD_TRANSITIONS = string(default='true')", "BUILD_DELETIONS = string(default='true')", "KEEP_ROTAMERS = string(default='true')", "KNOWLEDGE_BASED = string(default='true')", "NUM_OUTPUT_STRUCT = integer(default=1)", "TEMPLATE_RESIDUE_NUMBERS = string(default='false')" ]
[docs] def __init__(self, kw=None): """ Accepts one argument which is either a path or a keyword dictionary. """ InputConfig.__init__(self, kw, self.specs) self.validateValues(preserve_errors=False, copy=True)
[docs] def setAlignment(self, alignment, ligands=(ALL_LIGANDS,), include_water=False, template_number=1): """ :type alignment: `BasePrimeAlignment` :param alignment: Alignment. :type ligands: tuple of str :param ligands: Names of ligands to be included in the model. :type include_water: bool :param include_water: Whether to include water molecules in the built model. :type template_number: int :param template_number: Template number """ template = alignment.template short_name = basename_no_ext(template.name) full_template_name = short_name + '_' + template.chain self['TEMPLATE_NAME'] = full_template_name self['%s_NUMBER' % full_template_name] = template_number self['%s_STRUCT_FILE' % full_template_name] = template.file_name self['%s_ALIGN_FILE' % full_template_name] = '%s.aln' % alignment.name # Save template ligands. # Currently, this assumes every ligand consists of a single residue. index = 0 if ligands and NO_LIGANDS not in ligands: for lig in template.ligands: if ALL_LIGANDS in ligands or lig.pdbres in ligands: het_name = '%s_HETERO_%d' % (full_template_name, index) lig_id = 'LIG%s%c:%d' % (lig.pdbres, template.chain, lig.st.atom[1].resnum) self[het_name] = lig_id index += 1 # Save water molecules. if include_water: for water in template.waters: het_name = '%s_HETERO_%d' % (full_template_name, index) atom = water.atom[1] lig_id = 'LIG%s%c:%d' % (atom.pdbres, template.chain, atom.resnum) self[het_name] = lig_id index += 1 self.validateValues(preserve_errors=False, copy=False)
[docs] def writeConfig(self, filename): """ Writes a simplified input file to filename. This input file needs to be run via `$SCHRODINGER/prime`. :type filename: str :param filename: Name of output file. """ if not filename.endswith('.inp'): raise ValueError("Prime input file should end with *.inp") self.writeInputFile(filename, ignore_none=True, yesno=False) return filename
def _quote(self, value, multiline=True): """ Overwrite 'InputConfig._quote' to not quote any values """ if str(value).startswith('LIG'): return value[3:] return value
[docs]class ConsensusConfig(InputConfig): """ Class for reading and writing simplified Prime consensus modeling input files. """ specs = [ "ALIGN_FILE = string()" "SC_OPT = string(default='yes')" ]
[docs] def __init__(self, kw=None): """ Accepts one argument which is either a path or a keyword dictionary. """ InputConfig.__init__(self, kw, self.specs)
[docs] def setAlignment(self, alignment): """ """ self['ALIGN_FILE'] = alignment.name + '.aln' self['SC_OPT'] = 'yes' self.validateValues(preserve_errors=False, copy=False)
[docs] def writeConfig(self, filename): """ Writes a simplified input file to filename. This input file needs to be run via `$SCHRODINGER/consensus_homology`. :type filename: str :param filename: Name of output file. """ if not filename.endswith('.inp'): raise ValueError( "Consensus homology input file should end with *.inp") self.writeInputFile(filename, ignore_none=True, yesno=False) return filename
class _WorkingDirectoryMixin: def __init__(self, *args, **kwargs): super().__init__(*args, **kwargs) self._working_directory = None def setWorkingDirectory(self, wd): """ Set directory used to write files :param wd: Path of the working directory :type wd: str """ self._working_directory = wd def prependWorkingDirectory(self, path): """ If it's set, prepend the working directory to the given path. :param path: Path to modify :type path: str :return: Modified path :rtype: str """ if self._working_directory is not None: path = os.path.join(self._working_directory, path) return path
[docs]class PrimeTemplate(_WorkingDirectoryMixin): """ This class encapsulates Prime template structure. A template includes a single chain. """ WATER_NAMES = {'HOH', 'WAT'} FIND_LIGANDS_KWARGS = { 'allow_ion_only_molecules': True, 'min_heavy_atom_count': 1, 'excluded_residue_names': WATER_NAMES, }
[docs] def __init__(self, pdb_id=''): super().__init__() self._st = None self._ligands = [] self._waters = [] self._file_name = pdb_id + '.pdb' self.sequence = '' self.chain = '' self.name = pdb_id
@property def file_name(self): return self._file_name @property def st(self): return self._st @st.setter def st(self, st): self._st = st self.chain = st.atom[1].chain.strip() self._file_name = ''.join( [basename_no_ext(self.name), '_', self.chain, '.pdb']) @property def ligands(self): """ Returns a list of ligands found in the template structure. :rtype: list of `schrodinger.structuils.analyze.Ligand` objects :return: List of ligands in current template structure. """ if not self._ligands: self._ligands = analyze.find_ligands(self._st, **self.FIND_LIGANDS_KWARGS) return self._ligands @property def waters(self): if not self._waters: self._waters = self._getWaters() return self._waters def _getWaters(self): """ Returns a list of `schrodinger.structure._Molecule` objects corresponding to water molecules present in the template structure. :rtype: list of `schrodinger.structure._Molecule` :return: List of water molecules in the template. """ waters = [] for mol in self._st.molecule: if mol.atom[1].pdbres[:3] in self.WATER_NAMES: waters.append(mol) return waters
[docs] def writePDB(self): """ Writes template structure to PDB file. :rtype: bool :return: True on successful write. """ file_name = self.prependWorkingDirectory(self._file_name) try: self._st.write(file_name, format=structure.PDB) except IOError: print('Cannot write template structure ', file_name) return False return True
[docs]class BasePrimeAlignment(_WorkingDirectoryMixin): """ Encapsulates pairwise query-template alignment. Allows writing alignment files in Prime format. """
[docs] def __init__(self, name, query_seq): """ :type name: str :param name: Alignment name. :type query_seq: str :param query_seq: Text of prealigned query sequence. """ super().__init__() self._name = name self._query_sequence = query_seq self._query_sequence = convert_gaps_to_dots(self.query_sequence) self._query_sequence = self.query_sequence.upper() self._template = None self._template_sequence = ''
@property def name(self): return self._name @property def query_sequence(self): return self._query_sequence @property def template(self): return self._template @property def template_sequence(self): return self._template_sequence
[docs] def setTemplate(self, template, sequence=None): """ Adds a pre-aligned template sequence to the alignment. :type template: `PrimeTemplate` :param template: Template. :type sequence: str :param sequence: Template sequence aligned to query (i.e. including gaps). """ self._template = template self._template_sequence = sequence if sequence else template.sequence
[docs] def setWorkingDirectory(self, wd): """ Propagate change in working directory to template """ super().setWorkingDirectory(wd) if self._template is not None: self._template.setWorkingDirectory(wd)
[docs] def getAlnFilename(self): """ Get the filename of the aln written by `writeInputForPrime`. """ return self.prependWorkingDirectory(f"{self.name}.aln")
[docs] def writeInputForPrime(self): """ Writes Prime alignment to external file. """ file_name = self.getAlnFilename() try: out_file = open(file_name, "w") except IOError: return False out_file.write("ProbeAA:" + convert_gaps_to_dots(self._query_sequence) + "\n") out_file.write("Fold AA:" + convert_gaps_to_dots(self._template_sequence) + "\n") out_file.close() self._template.writePDB() return True
[docs] def writeInputForConsensus(self, file_name, append=False): """ Writes out FASTA alignment for consensus homology modeling. The alignment needs to include chain name and secondary structure definition for each template sequence. :type append: bool :param append: Whether to append to existing file. :type file_name: str :param file_name: Name of output file. """ file_name = self.prependWorkingDirectory(file_name) try: if append: out_file = open(file_name, 'a') else: out_file = open(file_name, 'w') out_file.write(">Query\n") out_file.write(self.query_sequence.replace('.', '~') + "\n") except IOError: return False chain_id = self._template.st.atom[1].chain name = '>' + basename_no_ext(self._template.name) + '_' + chain_id # write ssa string fasta_name = name + ' ssa,\n' out_file.write(fasta_name) ssa = convert_gaps_to_tildes(self.getSSA(self._template.st, chain_id)).replace(' ', '-') out_file.write(ssa + '\n') # write sequence string fasta_name = name + ' Chain,\n' out_file.write(fasta_name) out_file.write(convert_gaps_to_tildes(self._template_sequence) + '\n') # write template structure self._template.writePDB() out_file.close() return True
[docs] def getSSA(self, st, chain_id=''): """ Gets a secondary structure annotation string for structure st chain chain_id. :type st: `schrodinger.structure.Structure` :param st: Input structure. :type chain_id: str :param chain_id: Chain ID to extract SSA from. """ secondary_types = { structure.SS_NONE: ' ', structure.SS_HELIX: 'H', structure.SS_STRAND: 'E', structure.SS_LOOP: ' ', structure.SS_TURN: ' ' } ssa = [] for chain in st.chain: if chain.name == chain_id: prev_res_id = (None, None, None) for atom in chain.atom: res_id = (atom.resnum, atom.chain, atom.inscode) if res_id != prev_res_id: ssa.append(secondary_types[atom.secondary_structure]) prev_res_id = res_id return ''.join(ssa)
[docs]def basename_no_ext(file_name): """ Returns file basename without extension. """ return os.path.splitext(os.path.basename(file_name))[0]
[docs]def convert_gaps_to_dots(sequence): """ Replaces gap symbols with dots to make a sequence compatible with Prime. :type sequence: str :param sequence: Input gapped sequence. :rtype: str :return: Converted sequence. """ return sequence.replace(' ', '.').replace('-', '.').replace('~', '.')
[docs]def convert_gaps_to_tildes(sequence): """ Replaces gap symbols with tildes to make a sequence compatible with consensus modeling. :type sequence: str :param sequence: Input gapped sequence. :rtype: str :return: Converted sequence. """ return sequence.replace('.', '~').replace('-', '~')