Source code for schrodinger.protein.tasks.structure_alignment

# Runs SKA structure alignment jobs
import logging
from collections import namedtuple

import schrodinger
from schrodinger.application import ska
from schrodinger.utils import log

TMP_ORIG_ATOM_IDX_PROP = "i_msv_orig_atom_idx"

SEPARATOR = "_"

logger = log.get_output_logger(__file__)
if schrodinger.in_dev_env():
    logger.setLevel(logging.DEBUG)

StructAlignResult = namedtuple(
    "StructAlignResult",
    ["ref_string", "other_string", "ref_sse", "other_sse", "psd", "rmsd"])


[docs]def get_unique_seq_names(sequences): """ Create unique names for the given sequences for use with structure alignment The original sequence name can be recreated using `get_orig_seq_name`. :param sequences: Protein sequences :type sequences: list[sequence.ProteinSequence] :return: Generator of tuples of unique sequence names and sequences :rtype: collections.abc.Iterable[tuple(str, sequence.ProteinSequence)]] """ for index, seq in enumerate(sequences): name = f'{index}{SEPARATOR}{seq.name}' yield name, seq
[docs]def get_orig_seq_name(prefixed_name): """ Given a name output by `get_unique_seq_names`, return the original sequence name """ return prefixed_name.split(SEPARATOR, 1)[1]
[docs]def runStructureAlignment( ref, others, keywords=None, ref_asl=None, other_asl=None, ): """ Runs protein pairwise structure alignment using "ska" backend. :param ref: Reference name and structure :type ref: tuple(str, structure.Structure) :param others: Non-reference (mobile) names and structures :type others: list[tuple(str, structure.Structure)] :param keywords: Keywords to pass to ska :type keywords: dict :param ref_asl: ASL for reference structure :type ref_asl: str :param other_asl: ASL for other structures :type other_asl: str :return: Alignment results keyed by non-reference structure names :rtype: dict(str, StructAlignResult) """ if not others: return {} if keywords is None: keywords = {} try: all_results = ska.pairwise_align_ct(ref, others, keywords=keywords, log=logger, save_props=True, std_res=True, asl=ref_asl, asl_mobile=other_asl) except RuntimeError: # See log for error info return {} ref_name, _ = ref seq_results = dict() for result in all_results: if not result.align: # No results to process continue ref_string = result.align.pop(ref_name) if len(result.align) != 1: raise ValueError("Expected only one non-ref item in result align " f"dict, got {len(result.align)}.") other_name, other_string = result.align.popitem() struct_aln_result = StructAlignResult(ref_string, other_string, result.sse[ref_name], result.sse[other_name], result.psd, result.rmsd) seq_results[other_name] = struct_aln_result return seq_results