Source code for schrodinger.application.msv.gui.validate_align

"""
Helper methods for running align commands from the GUI
"""
import contextlib
import itertools
from functools import partial

import decorator

from schrodinger.application.msv import command
from schrodinger.application.msv.gui import dialogs
from schrodinger.application.msv.gui.viewconstants import SeqAlnMode
from schrodinger.protein.align import CantAlignException
from schrodinger.ui.qt import messagebox
from schrodinger.ui.qt.utils import wrap_qt_tag

ALN_SELECTION_TEXT = """
<div>
Residues are selected. Alignment will be limited to the residues within the
specified index blocks:
<ul>
{0}
</ul>
To align all residues in the sequences, choose Cancel and clear the residue
selection. To continue with the limited alignment, choose OK.
</div>
"""

PARTIAL_ANCHORS_TEXT = """
<div>
Multiple alignment requires only fully-anchored columns. The following columns
are partially anchored:
<ul>
<li>{0}</li>
</ul>
To clear anchors and align all residues in the sequence, choose {clear_text}.
To anchor all residues in these columns and continue with anchored alignment,
choose {yes_text}.
"""

ALTERNATE_REGIONS_TEXT = wrap_qt_tag("""
Alternate regions for chimeric homology modeling exist. Alignment requires
these regions to be cleared. To align, choose {yes_text}.
""")

USE_STRUC_SEQ_AS_REF_WARNING = wrap_qt_tag("""
The reference sequence has no structure. The first {}
sequence in the list with structure will be used instead.
""")

NO_REF_STRUCTURE_ERR = wrap_qt_tag("""
Need a structured reference sequence and at
least 1 structured sequence to align to it.""")

TOO_FEW_STRUCTURES_ERR = wrap_qt_tag("""
At least two sequences with structure are needed to run {undo_desc}.""")

STRUCTURE_BLOCK_ERR = wrap_qt_tag("""
{undo_desc} cannot run with anchored or selected residues.
Please unanchor/deselect residues and try again.""")

SEL_SEQS_ANCHOR_ERR = wrap_qt_tag("""
{undo_desc} does not currently support aligning with both selected sequences and
anchored residues. Please remove anchors or deselect sequences and try again."""
                                 )

ALL_SEL_ANCHORED_ERR = wrap_qt_tag("""
Cannot align because all selected columns are anchored. Please unanchor
residues or select other residues and try again.""")

SEL_ANCHOR_ERR = wrap_qt_tag("""
{undo_desc} does not currently support aligning with residues selected to the
left of anchors. Please deselect residues or remove anchors and try again.""")

INTERCHAIN_ANCHORS_TEXT = wrap_qt_tag("""
Cannot align due to interchain anchors. Do you wish to clear these anchors and
continue?
""")


[docs]def alignment_precheck(aln): """ Returns whether we have at least one sequence with at least one residue :rtype: bool :return: Whether we can align """ if len(aln) < 2: return False, "Two sequences are required to align" residues = itertools.chain(seq.residues() for seq in aln) if not any(residues): return False, "Sequences need residues to be aligned" return True, ""
class _AlignmentSelectedSeqs(contextlib.AbstractContextManager): """ Temporarily remove non-selected seqs from an alignment. Note: the implementation is not robust to changes in the number or ordering of alignment seqs (the position of removed seqs is cached by index) """ def __init__(self, aln, pairwise=False, filter_unstructured=False): """ :param pairwise: Whether only two seqs are kept if none are selected :type pairwise: bool :param filter_unstructured: Whether to filter unstructured seqs :type filter_unstructured: bool """ self._orig_selected_res = None self.aln = aln self.orig_ref_seq = aln.getReferenceSeq() if filter_unstructured: filter_func = lambda seq: seq.hasStructure() else: filter_func = lambda seq: True seqs_matching_ref_type = aln.getSeqsMatchingRefType() idxs_to_keep = [ i for i, s in enumerate(aln) if filter_func(s) and s in seqs_matching_ref_type ] self.ref_seq_idx = idxs_to_keep[0] # We need special handling for interchain disulfide bonds in any # sequences that we temporarily remove from the alignment. Removing a # sequence from the alignment will automatically delete interchain # disulfides, so this class manually removes and then restores these # bonds. This ensures that the bonds will properly survive both the # alignment and an undo of the alignment. self._known_interchain_disulfides = set() self._pred_interchain_disulfides = set() if self.ref_seq_idx != 0: # record interchain disulfides in the reference sequence, since # we'll be removing it to set a temporary reference sequence self._known_interchain_disulfides.update( bond for bond in self.aln[0].disulfide_bonds if bond.is_inter_sequence) self._pred_interchain_disulfides.update( bond for bond in self.aln[0].pred_disulfide_bonds if bond.is_inter_sequence) self._updateSeqSelection(pairwise=pairwise, idxs_to_keep=idxs_to_keep) # Store map of seqs to remove on enter and restore on exit self._seqs_to_remove_map = self._getSeqsToRemove(idxs_to_keep) if self._seqs_to_remove_map: seqs_to_remove = self._seqs_to_remove_map.values() self._known_interchain_disulfides.update( bond for seq in seqs_to_remove for bond in seq.disulfide_bonds if bond.is_inter_sequence) self._pred_interchain_disulfides.update( bond for seq in seqs_to_remove for bond in seq.pred_disulfide_bonds if bond.is_inter_sequence) # If the reference seq needs to be removed, prompt the user self.user_canceled = self._promptToReplaceStructurelessRefSeq() def _updateSeqSelection(self, pairwise, idxs_to_keep): """ Update sequence selection state :param pairwise: Whether only two seqs are kept if none are selected :param idxs_to_keep: Sequence indices that passed filtering """ aln = self.aln seq_sel_model = aln.seq_selection_model # Store Alignment Sets to align before changing selection aln_sets, loose_seqs = get_aln_sets_and_seqs_to_align(aln) seq_sel_model.setSelectionState({self.orig_ref_seq}, False) ref_seq = aln[self.ref_seq_idx] seq_sel_model.setSelectionState({ref_seq}, True) sel_seqs = seq_sel_model.getSelection() # If no non-reference seqs are selected, select the first one if pairwise and len(sel_seqs) == 1: idxs_to_keep = set(idxs_to_keep) for idx, seq in enumerate(aln): if idx not in idxs_to_keep or idx == self.ref_seq_idx: continue seq_sel_model.setSelectionState({seq}, True) break for aln_set in aln_sets: seq_sel_model.setSelectionState(aln_set, True) if loose_seqs: seq_sel_model.setSelectionState(loose_seqs, True) def _getSeqsToRemove(self, idxs_to_keep): """ Create mapping between indexes and seqs to remove/restore :param idxs_to_keep: Sequence indices that passed filtering :return: Mapping of original sequence index to sequence :rtype: dict(int, sequence.Sequence) """ aln = self.aln # Remove non-selected seqs if some (not all) non-ref seqs are selected if partial_seq_selection(aln, ref_seq=aln[self.ref_seq_idx]): sel_indices = set(aln.seq_selection_model.getSelectionIndices()) idxs_to_keep = sel_indices.intersection(idxs_to_keep) # Compute dict of indices and sequences to remove and restore all_idxs = set(range(len(aln))) idxs_to_remove = all_idxs.difference(idxs_to_keep) idxs_to_remove.discard(0) return {idx: aln[idx] for idx in idxs_to_remove} def _promptToReplaceStructurelessRefSeq(self): """ Ask for confirmation to replace the structureless reference sequence with a structured sequence :return: True if the reference seq needs to be replaced and the user cancels, False otherwise """ # This method depends on `self.ref_seq_idx` and # `self._seqs_to_remove_map` being up to date if self.ref_seq_idx == 0: return False if len(self._seqs_to_remove_map) > 1: # Removing more than just reference sel_text = 'selected ' else: sel_text = '' msg = USE_STRUC_SEQ_AS_REF_WARNING.format(sel_text) save_response_key = 'replace_structureless_ref_seq_for_alignment' response = messagebox.QuestionMessageBox( text=msg, save_response_key=save_response_key, yes_text='OK', no_text=None, add_cancel_btn=True).exec() user_canceled = not response return user_canceled def __enter__(self): """ Remove non-selected seqs """ # Manually remove bonds so that they're properly restored on undo for bond in (self._known_interchain_disulfides | self._pred_interchain_disulfides): self.aln.removeDisulfideBond(bond) if self.ref_seq_idx != 0: # We are temporarily removing a structureless ref seq self.aln.setReferenceSeq(self.aln[self.ref_seq_idx]) self.aln.removeSeq(self.orig_ref_seq) if self._seqs_to_remove_map: seqs_to_remove = self._seqs_to_remove_map.values() old_aln_sets, old_set_id, old_no_set = self.aln._getCurrentAlnSets( seqs_to_remove) self._restoreAlnSets = partial(self.aln._restoreAlnSets, old_aln_sets, old_set_id, old_no_set) orig_selected_res = set() selected_res = self.aln.res_selection_model.getSelection() for seq in seqs_to_remove: orig_selected_res.update(selected_res.intersection(seq)) self._orig_selected_res = orig_selected_res self.aln.removeSeqs(seqs_to_remove) def __exit__(self, *exc): """ Restore non-selected seqs """ tmp_ref_seq = None if self.ref_seq_idx != 0: tmp_ref_seq = self.aln.getReferenceSeq() self.aln.addSeq(self.orig_ref_seq) self.aln.setReferenceSeq(self.orig_ref_seq) self.aln.removeSeq(tmp_ref_seq) to_restore = list(self._seqs_to_remove_map.items()) if tmp_ref_seq is not None: to_restore.append((self.ref_seq_idx, tmp_ref_seq)) for index, seq in sorted(to_restore): self.aln.addSeq(seq, index) if self._seqs_to_remove_map: if self._orig_selected_res: self.aln.res_selection_model.setSelectionState( self._orig_selected_res, True) self._restoreAlnSets() # Restore the bonds that we removed in __enter__. We don't use # _restoreInvalidatedBonds here because we need to use the undoable # version of addDisulfideBond. for res1, res2 in self._known_interchain_disulfides: self.aln.addDisulfideBond(res1, res2) for res1, res2 in self._pred_interchain_disulfides: self.aln.addDisulfideBond(res1, res2, known=False) class _AlignStack(contextlib.ExitStack): """ Custom context manager to be used when aligning alignments. It automatically creates a single, compressed undo command for all changes made to the alignment within the context. If a `CantAlignException` or `UserCanceledException` is raised in the context or the `cantAlign` method is called, the undo command will be undone and discarded. """ def __init__(self, widget, undo_desc, *args, **kwargs): """ :param widget: must have attributes `undo_stack` and `warning()` :type widget: QtWidgets.QWidget :param undo_desc: Description for undo command :type undo_desc: str """ super().__init__(*args, **kwargs) self.widget = widget self.undo_desc = undo_desc def __enter__(self): """ Open the context manager stack and begin an undo command. """ super().__enter__() self.enter_context( command.compress_command(self.widget.undo_stack, self.undo_desc)) return self def __exit__(self, exc_type, exc, tb): """ Close the context manager stack, suppressing `CantAlignException`. """ if isinstance(exc, CantAlignException): msg = f"Problem running {self.undo_desc}:\n{exc}" return self.cantAlign(msg=msg) elif isinstance(exc, UserCanceledException): return self.cantAlign() super().__exit__(exc_type, exc, tb) def cantAlign(self, msg=None): """ Close the context manager stack, revert the undo command, and show a warning. :param msg: If not None, the message will be passed to widget.warning() :type msg: str or None """ self.close() if msg: self.widget.warning(msg) command.revert_command(self.widget.undo_stack) return True # Allow __exit__ to suppress specific exceptions
[docs]class UserCanceledException(Exception): """ An exception raised when the user cancels the alignment in response to a message box. """
# This class intentionally left blank
[docs]def partial_seq_selection(aln, *, ref_seq=None): """ Return whether some but not all non-reference seqs are selected :type aln: gui_alignment.GuiProteinAlignment :type ref_seq: sequence.ProteinSequence :rtype: bool """ if ref_seq is None: ref_seq = aln.getReferenceSeq() sel_seqs = aln.seq_selection_model.getSelection() sel_nonref_seqs = sel_seqs - {ref_seq} num_nonref_seqs = len(aln) - 1 return 0 < len(sel_nonref_seqs) < num_nonref_seqs
[docs]def get_contiguous_columns(aln): """ Get reference residues marking contiguous columns with any residue selected :type aln: gui_alignment.GuiProteinAlignment :return: [start, end] residues of contiguous columns with selected res :rtype: iterable(tuple(Residue, Residue)) """ orig_selected_res = aln.res_selection_model.getSelection() yield from aln.elementsToContiguousColumns(orig_selected_res)
[docs]def get_contiguous_multi_columns(aln): """ Get reference residues marking blocks of at least 2 contiguous columns with any res selected :type aln: gui_alignment.GuiProteinAlignment :return: [start, end] residues of contiguous columns with selected res where end.idx_in_seq > start.idx_in_seq :rtype: iterable(tuple(Residue, Residue)) """ for (start, end) in get_contiguous_columns(aln): more_than_one_column = start.idx_in_seq < end.idx_in_seq if more_than_one_column: yield (start, end)
[docs]def all_sel_multi_cols_anchored(aln): """ Return whether all blocks of multiple columns with any res selected are anchored :type aln: gui_alignment.GuiProteinAlignment :rtype: bool """ anchored_residues = aln.getAnchoredResidues() if not len(anchored_residues): return False anchored_res_idxs = {res.idx_in_seq for res in anchored_residues} blocks = list(get_contiguous_multi_columns(aln)) sel_multi_col_idxs = set() for start_idx, end_idx in _get_indices_for_res_blocks(blocks): block_idxs = range(start_idx, end_idx) sel_multi_col_idxs.update(block_idxs) if not sel_multi_col_idxs: return False return sel_multi_col_idxs <= anchored_res_idxs
[docs]def sel_multi_cols_left_of_anchors(aln): """ Return whether any blocks of multiple selected columns are anchored or left of anchors :type aln: gui_alignment.GuiProteinAlignment :rtype: bool """ anchored_residues = aln.getAnchoredResidues() if not len(anchored_residues) or not aln.res_selection_model.hasSelection(): return False blocks = get_contiguous_multi_columns(aln) first_block = next(blocks, None) if first_block is None: return False start_res, _ = first_block leftmost_sel_idx = start_res.idx_in_seq rightmost_anchor_idx = max(res.idx_in_seq for res in anchored_residues) return leftmost_sel_idx <= rightmost_anchor_idx
def _validate_align(aln, structure=False, split_by_anchors=False, split_res_blocks=True): """ Check whether align can run :type aln: gui_alignment.GuiProteinAlignment :param structure: Whether the aligner requires structures :type structure: bool :param split_by_anchors: Whether the aligner needs to split the alignment by anchored columns and align blocks between anchors :type split_by_anchors: bool :param split_res_blocks: Whether the aligner needs to split the alignment into selected residue blocks and align only them :type split_res_blocks: bool :return: Whether align can run and a message if validation fails. The message should contain an "{undo_desc}" format field :rtype: tuple(bool, str) """ seq_sel_model = aln.seq_selection_model valid, msg = validate_aln_sets(aln) if valid is False: return (valid, msg) if structure: if ((split_res_blocks and aln.res_selection_model.hasSelection()) or len(aln.getAnchoredResidues())): return (False, STRUCTURE_BLOCK_ERR) else: ref_seq = aln.getReferenceSeq() seqs_to_align = seq_sel_model.getSelection() seqs_to_align.discard(ref_seq) if not seqs_to_align: seqs_to_align = aln[1:] seqs_to_align = [s for s in seqs_to_align if s.hasStructure()] if not ref_seq.hasStructure() and len(seqs_to_align) <= 1: msg = (NO_REF_STRUCTURE_ERR) return (False, msg) elif len(seqs_to_align) == 0: return (False, TOO_FEW_STRUCTURES_ERR) if (split_by_anchors and partial_seq_selection(aln) and len(aln.getAnchoredResidues())): return (False, SEL_SEQS_ANCHOR_ERR) if all_sel_multi_cols_anchored(aln): return (False, ALL_SEL_ANCHORED_ERR) if sel_multi_cols_left_of_anchors(aln): return (False, SEL_ANCHOR_ERR) return (True, "") def _validate_composite_residues(dialog_parent, aln): """ If there are chimeric homology modeling alternate residues, confirm it's ok to clear them. :param dialog_parent: Widget to use to parent dialogs :type dialog_parent: QtWidgets.QWidget :param aln: Alignment :type aln: gui_alignment.GuiProteinAlignment :return: Whether align can run. :rtype: bool """ composite_res = aln.homology_composite_residues if not composite_res: return True yes_text = "Clear Alternates" text = ALTERNATE_REGIONS_TEXT.format(yes_text=yes_text) msg_box = messagebox.QuestionMessageBox( parent=dialog_parent, text=text, yes_text=yes_text, no_text=None, add_cancel_btn=True, ) response = msg_box.exec() if response: aln.updateHomologyCompositeResidues(to_remove=composite_res, to_add=()) return True else: return False def _validate_hidden_seqs(dialog_parent, aln): """ If there are hidden seqs, show a dialog and prevent aligning :param dialog_parent: Widget to use to parent dialogs :type dialog_parent: QtWidgets.QWidget :param aln: Alignment :type aln: gui_alignment.GuiProteinAlignment :return: Whether align can run. :rtype: bool """ any_hidden = dialogs.prompt_for_hidden_seqs(dialog_parent, aln) return not any_hidden
[docs]def validate_aln_sets(aln, *, aln_sets=None, loose_seqs=None): """ :return: Whether aln sets prevent aligning and an error message :rtype: tuple(bool, str) """ if aln_sets is None and loose_seqs is None: aln_sets, loose_seqs = get_aln_sets_and_seqs_to_align(aln) ok = (True, "") if not aln_sets and not loose_seqs: return ok num_sets = len(aln_sets) if num_sets > 2: return (False, "Can't align more than two Alignment Sets with " "{undo_desc}") if num_sets == 2 and loose_seqs: return (False, "Can't align two Alignment Sets and loose sequences " "with {undo_desc}") ref_seq = aln.getReferenceSeq() if aln_sets and not any(ref_seq in aln_set for aln_set in aln_sets): return (False, "Reference seq must be in one of the Alignment Sets for " "{undo_desc}") return ok
[docs]def get_aln_set_align_mode(aln, *, aln_sets=None, loose_seqs=None): """ :return: The SeqAlnMode to align the Aln Sets or whether aligning can proceed if the Aln Sets are invalid :rtype: SeqAlnMode or bool """ if aln_sets is None and loose_seqs is None: aln_sets, loose_seqs = get_aln_sets_and_seqs_to_align(aln) valid, _ = validate_aln_sets(aln, aln_sets=aln_sets, loose_seqs=loose_seqs) if not valid: return False num_sets = len(aln_sets) num_loose_seqs = len(loose_seqs) if num_sets: if num_sets == 1 and num_loose_seqs == 0: return SeqAlnMode.Multiple elif (num_sets == 2 and num_loose_seqs == 0) or (num_sets == 1 and num_loose_seqs == 1): return SeqAlnMode.Profile return False return True
[docs]def get_aln_sets_and_seqs_to_align(aln): """ Find the Alignment Sets that will be aligned and any loose sequences that need to be aligned to the Alignment Sets. :return: A list of Alignment Sets and a list of loose sequences to align to the Alignment Sets :rtype: tuple(list, list) """ aln_sets_to_align = [] loose_seqs_to_align = [] ret_value = (aln_sets_to_align, loose_seqs_to_align) if not aln.alnSets(): return ret_value ref_seq = aln.getReferenceSeq() seqs = aln.getSelectedSequences() # In sequence order if not seqs or seqs == [ref_seq]: seqs = aln # Include reference sequence for alignment if it is not selected already. if seqs[0] != ref_seq: seqs.insert(0, ref_seq) unique_aln_sets = set() for seq in seqs: aln_set = aln.alnSetForSeq(seq) if aln_set is None: loose_seqs_to_align.append(seq) elif aln_set.set_id not in unique_aln_sets: unique_aln_sets.add(aln_set.set_id) aln_sets_to_align.append(aln_set) if not aln_sets_to_align: # If none of the alignment sets should be aligned, the loose seqs # should be aligned normally loose_seqs_to_align.clear() return ret_value
[docs]def get_residue_map_to_superimpose(aln, seqs_to_align, selected_only=False): """ Create a map of aligned residues to superimpose. Residues are considered aligned if they are structured and in a gapless column. :param selected_only: Whether to only use aligned residues that are in columns with at least one residue selected. :return: Map of residues keyed by entry ID :rtype: dict(int, list[protein.residue.Residue]) """ # Find columns with no gaps skip_cols = set() consensus_len = min(len(seq) for seq in seqs_to_align) for ai in range(consensus_len): for seq in seqs_to_align: res = seq[ai] if not res.hasStructure(): skip_cols.add(ai) break keep_indexes = (idx for idx in range(consensus_len) if idx not in skip_cols) if not selected_only: keep_indexes = list(keep_indexes) else: # Find columns where at least one residue is selected selected_indexes = set() sel_blocks = get_contiguous_columns(aln) for start_res, end_res in sel_blocks: selected_indexes.update( range(start_res.idx_in_seq, end_res.idx_in_seq + 1)) keep_indexes = [idx for idx in keep_indexes if idx in selected_indexes] entry_res_map = dict() for seq in seqs_to_align: entry_id = seq.entry_id entry_res_map.setdefault(entry_id, []) for idx in keep_indexes: res = seq[idx] entry_res_map[entry_id].append(res) return entry_res_map
def _get_partially_anchored_indices(aln): """ Get indices of columns where some but not all seqs are anchored :rtype: iterable(int) """ anchored_residues = aln.getAnchoredResiduesWithRef() if not len(anchored_residues): return [] for col_idx, column_res in enumerate(aln.columns(omit_gaps=True)): n_anchored_col_res = len(anchored_residues.intersection(column_res)) if 0 < n_anchored_col_res < len(column_res): yield col_idx def _get_indices_for_res_blocks(blocks): """ Convert (start, end) residue tuples to (start_idx, end_idx) slice indices :type blocks: iterable(tuple(Residue, Residue)) :rtype: iterable(tuple(int, int)) """ for (start, end) in blocks: yield start.idx_in_seq, end.idx_in_seq + 1 def _get_slices_to_align(dialog_parent, aln, split_by_anchors=False, split_res_blocks=True, split_by_chain=False): """ Get residues marking contiguous blocks that should be aligned separately. The method may show dialogs to prompt the user to continue. Note that this method will remove any all-gap columns in the alignment. It may also remove anchors or add additional anchors (after prompting the user for confirmation) and it may align chain breaks in a combined-chain alignment. :param dialog_parent: Widget to use to parent dialogs :type dialog_parent: QtWidgets.QWidget :param aln: Alignment :type aln: gui_alignment._ProteinAlignment :param split_by_anchors: Whether the aligner needs to split the alignment by anchored columns and align blocks between anchors :type split_by_anchors: bool :param split_res_blocks: Whether the aligner needs to split the alignment into selected residue blocks and align only them :type split_res_blocks: bool :param split_by_chain: Whether the sequences in the alignment should be split based on chain breaks. Only possible in combined-chain mode. :type split_by_chain: bool :return: A tuple containing: - The list of blocks to align. Each block is given as a tuple of the top-left and top-right residues. (I.e., both residues will be from the reference sequence unless the reference sequence is too short.) - Whether we will need to correct chain breaks after the alignment is run. :rtype: tuple(list(tuple(Residue, Residue)), bool) :raise UserCanceledException: If the user cancels the alignment. """ aln.minimizeAlignment() block_residues = [] if split_res_blocks: selected_block_residues = list(get_contiguous_multi_columns(aln)) if selected_block_residues: blocks = _parse_selected_blocks(dialog_parent, aln, selected_block_residues) return blocks, False chain_start_indices = None past_ref_index = None num_chains = 1 need_chain_break_correction = False if split_by_chain: num_chains = max(len(seq.chains) for seq in aln) if num_chains > 1: chain_start_indices, past_ref_index = _align_chain_starts( dialog_parent, aln) need_chain_break_correction = True if split_by_anchors: partial_anchored_idxs = list(_get_partially_anchored_indices(aln)) if len(partial_anchored_idxs): _fix_partial_anchors(partial_anchored_idxs, dialog_parent, aln) anchored_res = aln.getAnchoredResidues() else: anchored_res = [] if anchored_res or num_chains > 1: block_residues = list( aln.elementsToContiguousColumns( anchored_res, invert=True, additional_breaks=chain_start_indices, last_col=past_ref_index)) return block_residues, need_chain_break_correction def _parse_selected_blocks(dialog_parent, aln, selected_block_residues): """ Assuming that the alignment contains selected residues, determine what blocks should be aligned. :param dialog_parent: The widget to use to parent dialogs :type dialog_parent: QtWidgets.QWidget :param aln: The alignment :type aln: gui_alignment._ProteinAlignment :param selected_block_residues: The top-left and bottom-right residues of contiguous blocks of selected residues. :type selected_block_residues: list[tuple(Residue, Residue)] :return: List of blocks. Each block is given as a tuple of the top-left and top-right residues. :rtype: list(tuple(Residue, Residue)) :raise UserCanceledException: If the user cancels the alignment. """ if len(selected_block_residues) == 1: # Don't align in blocks if the only block covers the entire aln start, end = selected_block_residues[0] if start.idx_in_seq == 0 and end.idx_in_seq == aln.num_columns - 1: return [] block_indices = list(_get_indices_for_res_blocks(selected_block_residues)) block_text = (f"<li>Block {idx + 1}: Indices {start + 1} - {end}</li>" for idx, (start, end) in enumerate(block_indices)) text = ALN_SELECTION_TEXT.format("\n".join(block_text)) save_response_key = "align_with_selected_residues" msg_box = messagebox.QuestionMessageBox(parent=dialog_parent, text=text, yes_text="OK", no_text=None, add_cancel_btn=True, save_response_key=save_response_key) # TODO MSV-1651: When highlighting is correctly implemented: # Spec: "selection will be temporarily replaced by highlight on # to-be-aligned residues (only)" # Select contiguous blocks to_select = set() for (start_idx, end_idx) in block_indices: for seq in aln: residues = seq[start_idx:end_idx] to_select.update(residues) sel_model = aln.res_selection_model with sel_model.suspendSelection(): sel_model.setSelectionState(to_select, True) response = msg_box.exec() if response is True: # OK clicked return selected_block_residues else: # Cancel clicked raise UserCanceledException def _align_chain_starts(dialog_parent, aln): """ Align chain starting positions for a combined-chain alignment (e.g. make sure that the start of the N-th chain occurs in the same column for all sequences). If any inter-chain anchors are present, the user will be prompted to remove them. :param dialog_parent: The widget to use to parent dialogs :type dialog_parent: QtWidgets.QWidget :param aln: The alignment :type aln: gui_alignment.GuiCombinedChainProteinAlignment :raise UserCanceledException: If the user cancels the alignment. """ inter_chain_anchors = aln.getInterChainAnchors() if inter_chain_anchors: msg_box = messagebox.QuestionMessageBox(parent=dialog_parent, text=INTERCHAIN_ANCHORS_TEXT, yes_text="OK", no_text=None, add_cancel_btn=True) response = msg_box.exec() if response: aln.removeAnchors(inter_chain_anchors) else: raise UserCanceledException chain_start_indices, past_ref_index = aln.alignChainStarts() # need to re-minimize and adjust chain_start_indices and past_ref_index # to account for removed columns gap_indices = [ i for (i, col) in enumerate(aln.columns(omit_gaps=True)) if not col ] aln.minimizeAlignment() for i, chain_start_index in enumerate(chain_start_indices): chain_start_index -= sum( 1 for gap_index in gap_indices if gap_index < chain_start_index) chain_start_indices[i] = chain_start_index if past_ref_index is not None: past_ref_index -= sum( 1 for gap_index in gap_indices if gap_index < past_ref_index) return chain_start_indices, past_ref_index def _fix_partial_anchors(partial_anchored_idxs, dialog_parent, aln): """ Prompt the user about whether they want to fully-anchor or fully-unanchor columns where only some of the residues are anchored. :param partial_anchored_idxs: Column indices where some but not all of the residues are anchored. Must not be empty. :type partial_anchored_idxs: list[int] :param dialog_parent: The widget to use to parent dialogs :type dialog_parent: QtWidgets.QWidget :param aln: The alignment :type aln: gui_alignment.GuiCombinedChainProteinAlignment :raise UserCanceledException: If the user cancels the alignment. """ clear_text = "Clear anchors" yes_text = "Fully anchor columns" partial_columns = ", ".join(str(i) for i in partial_anchored_idxs) text = PARTIAL_ANCHORS_TEXT.format(partial_columns, clear_text=clear_text, yes_text=yes_text) msg_box = messagebox.QuestionMessageBox(parent=dialog_parent, text=text, yes_text=yes_text, more_btns_list=[clear_text], no_text=None, add_cancel_btn=True) response = msg_box.exec() if response is True: # OK clicked res_to_anchor = [aln[0][idx] for idx in partial_anchored_idxs] aln.anchorResidues(res_to_anchor) elif response == clear_text: aln.clearAnchors() elif response is None: # Cancel clicked raise UserCanceledException def _correct_chain_breaks(aln): """ Fix any chain breaks that became unaligned during the alignment process. During the alignment, any gaps that should be added to the end of a chain will instead be added to the start of the next chain since the sequence API doesn't provide a way to distinguish between adding a gap after residue N versus adding a gap before residue N+1. To fix this, we simply shift chain breaks to the right along gaps until all chain breaks line up correctly. :param aln: The alignment :type aln: gui_alignment.GuiCombinedChainProteinAlignment """ chain_starts_by_seq = [ itertools.accumulate(len(chain) for chain in seq.chains) for seq in aln ] chain_starts_by_chain_num = itertools.zip_longest(*chain_starts_by_seq, fillvalue=0) chain_starts = list(map(max, chain_starts_by_chain_num)) num_gaps = [] for seq in aln: cur_chain_start = 0 num_gaps.append([]) for chain, correct_start in zip(seq.chains[:-1], chain_starts): cur_chain_start += len(chain) num_gaps[-1].append(correct_start - cur_chain_start) aln.adjustChainStarts(num_gaps)
[docs]def align_command(undo_desc, pairwise=False, structure=False, split_by_anchors=False, split_res_blocks=True, can_align_sets=False, superimpose_param=None): """ Decorator for methods that align the alignment. Within the decorated function, non-selected sequences will be removed from the alignment. The method's class must define `getAlignment()`, `undo_stack`, and `warning()`. If the decorated method raises `CantAlignException`, this will show a warning and roll back the undo stack. Example usage:: @align_method(undo_desc="Pairwise alignment", pairwise=True) def runPairwiseAlignment(self): :param undo_desc: Name for the compressed undo command :type undo_desc: str :param pairwise: Whether the method is a pairwise aligner :type pairwise: bool :param structure: Whether the aligner requires structures :type structure: bool :param split_by_anchors: Whether the aligner needs to split the alignment by anchored columns and align blocks between anchors :type split_by_anchors: bool :param split_res_blocks: Whether the aligner needs to split the alignment into selected residue blocks and align only them :type split_res_blocks: bool :param can_align_sets: Whether the aligner can align Alignment Sets :type can_align_sets: bool :param superimpose_param: Abstract param for the setting of whether the structures should be superimposed after alignment. Must start with gui_models.AlignSettingsModel. :type superimpose_param: parameters.Param or NoneType """ @decorator.decorator def dec(func, self, *args, **kwargs): aln = self.getAlignment() alignment_supported, msg = alignment_precheck(aln) if not alignment_supported: self.warning(msg) return has_aln_sets = bool(aln.alnSets()) if has_aln_sets and not can_align_sets: self.warning("Can only align Alignment Sets with Multiple and " "Profile alignment") return composite_ok = _validate_composite_residues(self, aln) if not composite_ok: return hidden_ok = _validate_hidden_seqs(self, aln) if not hidden_ok: return with contextlib.ExitStack() as stack: # Select sequences based on align_only_selected_seqs stack.enter_context(_update_seq_selection(self)) can_align, msg = _validate_align(aln, structure, split_by_anchors=split_by_anchors, split_res_blocks=split_res_blocks) if not can_align: self.warning(msg.format(undo_desc=undo_desc)) return # Handle sequence selection seq_cm = _AlignmentSelectedSeqs(aln, pairwise, structure) if seq_cm.user_canceled: return stack.enter_context(_AlignStack(widget=self, undo_desc=undo_desc)) if not pairwise: stack.enter_context(seq_cm) # Handle residue selection and anchoring split_by_chain = not (self.model.split_chain_view or structure) res_blocks_to_align, need_chain_break_correction = \ _get_slices_to_align( self, aln, split_by_anchors=split_by_anchors, split_res_blocks=split_res_blocks, split_by_chain=split_by_chain) if res_blocks_to_align: slices = _get_indices_for_res_blocks(res_blocks_to_align) kwargs.pop('_start', None) kwargs.pop('_end', None) for (start, end) in slices: func(self, *args, _start=start, _end=end, **kwargs) else: func(self, *args, **kwargs) if need_chain_break_correction: _correct_chain_breaks(aln) if superimpose_param is not None: model = self.model.options.align_settings do_superimpose = superimpose_param.getParamValue(model) if do_superimpose: self.superimposeStructures() return True return dec
@contextlib.contextmanager def _update_seq_selection(widget): """ If the widget's model.options.align_settings.align_only_selected_seqs is set to False, this context manager temporarily sets the selection model to include all sequences. It also deselects sequences that are not the same type as the reference. :type widget: msv_widget.AbstractMsvWidget """ aln = widget.getAlignment() seqs_matching_ref_type = aln.getSeqsMatchingRefType() if widget.model.options.align_settings.align_only_selected_seqs: new_selection = [ seq for seq in aln.seq_selection_model.getSelection() if seq in seqs_matching_ref_type ] else: new_selection = seqs_matching_ref_type with aln.seq_selection_model.suspendSelection(): aln.seq_selection_model.setSelectionState(new_selection, True) yield