Source code for schrodinger.protein.tasks.sta

import copy

from schrodinger.models import parameters
from schrodinger.protein import alignment
from schrodinger.protein import sequence
from schrodinger.tasks import tasks


[docs]class STATask(tasks.SubprocessCmdTask): output: list
[docs] class Input(parameters.CompoundParam): """ :ivar query_seq: The sequence to use as the STA query sequence :ivar ref_seq: The sequence to use as the STA reference structure :ivar constraints: Pairs of (query_seq, ref_seq) residues to constrain """ query_seq: sequence.ProteinSequence = None ref_seq: sequence.ProteinSequence = None protein_family: str = None constraints: list
######################### # TASK METHODS ######################### def _getQueryFile(self): return self.getTaskFilename(self.name + '.seq') def _getTemplateFile(self): return self.getTaskFilename(self.name + '-template.pdb') @tasks.preprocessor def _writeInput(self): query_file = self._getQueryFile() seq = copy.deepcopy(self.input.query_seq) seq.removeAllGaps() aln = alignment.ProteinAlignment([seq]) aln.toFastaFile(query_file) template_file = self._getTemplateFile() ref_struc = self.input.ref_seq.getStructure() ref_struc.write(template_file)
[docs] def makeCmd(self): """ @overrides: tasks.AbstractCmdTask """ cmd = ['sta'] args = [ '-NOJOBID', '-template_pdb', self._getTemplateFile(), '-template_chain_id', self.input.ref_seq.structure_chain, self._getQueryFile(), ] if self.input.protein_family is not None: args.extend(['-protein_family', self.input.protein_family]) for pair in self.input.constraints: args.extend(['-pair', self._formatConstraint(*pair)]) cmd.extend(args) return cmd
@staticmethod def _formatConstraint(query_res, structured_res): """ Format a constraint for STA. A constraint is formatted as 1-based residue indices of the query residue and the structured residue joined by an underscore. e.g. query_res 1 and structured_res 0 is represented as 2_1. :param query_res: The query sequence residue to constrain :type query_res: residue.Residue :param structured_res: The structured sequence residue to constrain :type structured_res: residue.Residue """ # Backend takes 1-based residue indexes structured_idx = structured_res.idx_in_seq + 1 query_idx = query_res.idx_in_seq + 1 return f"{query_idx}_{structured_idx}" @tasks.postprocessor def _readOutputFile(self): output_file = self.getTaskFilename(self.name + '.raw') aligned_ref_seq_parts = [] aligned_query_seq_parts = [] with open(output_file) as fh: for line in fh: if line.startswith('ProbeAA:'): aligned_query_seq_parts.append(line.split()[1]) elif line.startswith('Fold AA:'): aligned_ref_seq_parts.append(line.split()[2]) if not aligned_ref_seq_parts or not aligned_query_seq_parts: raise RuntimeError("No output") aligned_ref_seq = "".join(aligned_ref_seq_parts) aligned_query_seq = "".join(aligned_query_seq_parts) self.output = [aligned_ref_seq, aligned_query_seq]
[docs] def getGaps(self): if not self.output: raise RuntimeError("Cannot get gaps without output") aligned_ref_seq, aligned_query_seq = self.output ref_gaps = [i for i, ch in enumerate(aligned_ref_seq) if ch == "."] query_gaps = [i for i, ch in enumerate(aligned_query_seq) if ch == "."] return (ref_gaps, query_gaps)