Source code for schrodinger.application.livedesign.entry_types

import base64
import copy

import inflect
import functools

from schrodinger import structure
from schrodinger.infra import mm
from schrodinger.structutils import analyze
from schrodinger.ui.qt.appframework2 import validation

from . import data_classes

LIGAND = 'ligand'
RECEPTOR = 'receptor'

NUM_ENTRIES_VAL_ORDER = 100
NUM_TARGETS_VAL_ORDER = 200
NUM_NODES_VAL_ORDER = 300
NUM_ATOMS_VAL_ORDER = 400
LIGANDS_VAL_ORDER = 500

CDOCK_LIG_ATOM_PROP = 'i_cdock_ligand_atom'
CDOCK_BYTES = 's_cdock_compressed_input_ligand'
CDOCK_COMP_SIZE = 'i_cdock_compressed_input_size'
CDOCK_DECOMP_SIZE = 'i_cdock_decompressed_input_size'
CDOCK_LIG_DATA_NAMES = [CDOCK_BYTES, CDOCK_COMP_SIZE, CDOCK_DECOMP_SIZE]

LIG_DETECT_ERR_MSG = ('\n\nIf you believe this is detected in error, please'
                      ' confirm the ligand is being properly perceived as a'
                      ' ligand in Maestro.\n\nTip: export and re-import the'
                      ' ligand as a .sdf file.')

#===============================================================================
# Entry Type Classes
#===============================================================================


[docs]class BaseEntryData(validation.ValidationMixin): """ Abstract class for storing and validating data for various formats of ligand-based systems in anticipation of export to LiveDesign. """ name = '' description = '' min_nodes = 1 min_structures = 1 node_str = '' target_str = '' entry_singular = 'entry'
[docs] def __init__(self, structure_map): """ :param structure_map: a dictionary mapping unique strings to structures from some system :type structure_map: dict[str, structure.Structure] """ self._rl_map = self._createRLMap(structure_map)
def _createRLMap(self, structure_map): """ Organize the input structures into a receptor-ligand map. Must be implemented in concrete subclasses. :param structure_map: a dictionary mapping unique strings to structures from some system :type structure_map: dict[str, structure.Structure] :return: a receptor ligand map :rtype: data_classes.ReceptorLigandMap """ raise NotImplementedError
[docs] def getNumTargets(self): """ :return: the number of unique targets in the system :rtype: int """ return len(set(self._rl_map.receptors))
[docs] def getRLMap(self): """ :return: a copy of the stored receptor ligand map :rtype: data_classes.ReceptorLigandMap """ return copy.copy(self._rl_map)
@validation.validator(NUM_ENTRIES_VAL_ORDER) def checkNumEntries(self): if self.getNumNodes() + self.getNumTargets() < self.min_structures: base_msg = 'You must specify at least {number} {variable_str}.' msg = format_numbered_message(base_msg, self.min_structures, 'structure') return False, msg return True def _getSummaryMessages(self): """ Returns individual messages to be used in the entry data summary. Meant to be overriden by a subclass :return: summary messages to format :rtype: tuple(str) """ return tuple()
[docs] def getSummary(self): """ Return a summary of the receptor and ligand data stored in this object. :return: a summary of the data stored on this object :rtype: str """ return self.formatSummaryMessage(*self._getSummaryMessages())
[docs] def formatSummaryMessage(self, *msgs): """ Returns a formatted summary string. Takes the form of 'msg1 (msg2, msg3...)' :param msgs: messages to separate :type msgs: str :return: Formatted string :rtype: str """ summary_msg = '' if len(msgs) >= 1: summary_msg += f'{msgs[0]}' if len(msgs) >= 2: summary_msg += f' ({", ".join(msgs[1:])})' return summary_msg
[docs]class IndividualStructureBase(BaseEntryData): """ Class representing data types for which each structure in the input receptor-ligand map is either a single target or a single node structure. """ def _createRLMap(self, structure_map): """ Organize receptor-ligand map for non-complexed inputs. The supplied structures should include at most one receptor and any number of associated ligands. :param structure_map: a dictionary mapping unique strings to structures from some system :type structure_map: dict[str, structure.Structure] :return: a receptor ligand map :rtype: data_classes.ReceptorLigandMap """ first_receptor = None extra_receptor_map = {} rl_map = data_classes.ReceptorLigandMap() for key, st in structure_map.items(): if analyze.evaluate_asl(st, 'protein'): if first_receptor is None: first_receptor = st else: extra_receptor_map[key] = st else: rl_group = data_classes.ReceptorLigandGroup(ligand=st) rl_map[key].append(rl_group) for rl_group in rl_map.rl_groups: rl_group.receptor = first_receptor # Add any additional receptors to receptor-ligand map as new groups; # expect these to cause this class to fail validation for key, st in extra_receptor_map.items(): rl_group = data_classes.ReceptorLigandGroup(receptor=st) rl_map[key].append(rl_group) return rl_map
[docs] def getNumNodes(self): """ :return: the number of unique nodes in this system :rtype: int """ return len(get_ligands(self._rl_map.ligands, unique_smiles=True))
[docs] def getNumLigands(self): """ :return: the number of total ligands in this system :rtype: int """ return len(get_ligands(self._rl_map.ligands))
@validation.validator(NUM_NODES_VAL_ORDER) def checkNumNodes(self): for st in self._rl_map.structures: if len(analyze.find_ligands(st)) > 1: msg = (f'Entry {st.title} contains more than one' f' {self.node_str}.') return False, msg if self.getNumNodes() < self.min_nodes: base_msg = 'You must specify at least {number} {variable_str}.' msg = format_numbered_message(base_msg, self.min_nodes, self.node_str) if self.node_str == LIGAND: msg += LIG_DETECT_ERR_MSG return False, msg return True
[docs]class TargetlessBase(IndividualStructureBase): """ Base class for data types that do not have a target structure. Subclasses must redefine `max_num_atoms`. :cvar max_num_atoms: the maximum number of atoms per structure :vartype max_num_atoms: int """ max_num_atoms = 0 @validation.validator(NUM_ATOMS_VAL_ORDER) def checkNumAtoms(self): for st in self._rl_map.structures: if len(st.atom) > self.max_num_atoms: msg = (f'Entry {st.title} contains more than' f' {self.max_num_atoms} atoms.') return False, msg return True
[docs]class OrganometallicCompounds(TargetlessBase): name = 'Organometallic Compounds' description = ('Upload organometallic compounds. Each compound must be its' ' own entry.') max_num_atoms = 700 node_str = 'organometallic compound'
[docs] def getNumNodes(self): """ :return: the number of unique nodes in this system :rtype: int """ return self._rl_map.num_rl_groups
def _getSummaryMessages(self): base_entry_msg = '{number} total {variable_str}' formatted_entry_msg = format_numbered_message( base_msg=base_entry_msg, number=self.getNumNodes(), singular_str=self.entry_singular) base_cmpd_msg = '{number} {variable_str}' formatted_cmpd_msg = format_numbered_message(base_msg=base_cmpd_msg, number=self.getNumNodes(), singular_str='compound') return formatted_entry_msg, formatted_cmpd_msg
[docs]class Ligands(TargetlessBase): name = 'Ligands' description = ('Upload ligands. Each ligand must be in its own entry.' ' Non-ligand structures will be ignored.') max_num_atoms = mm.LigandParameters().max_atom_count node_str = LIGAND def _getSummaryMessages(self): all_ligands_base_msg = '{number} total {variable_str}' all_ligands_msg = format_numbered_message( base_msg=all_ligands_base_msg, number=self.getNumLigands(), singular_str=self.entry_singular) unique_ligands_base_msg = '{number} unique {variable_str}' unique_ligands_msg = format_numbered_message( base_msg=unique_ligands_base_msg, number=self.getNumNodes(), singular_str=self.node_str) return all_ligands_msg, unique_ligands_msg
[docs]class DockedPoses(IndividualStructureBase): name = 'Docked Poses' description = ('Upload docked poses based on the PV format. Selection' ' should include one receptor, in its own entry, and docked' ' ligands, each in a separate entry.') node_str = LIGAND target_str = RECEPTOR @validation.validator(NUM_NODES_VAL_ORDER) def checkNumNodes(self): """ Override this method to catch case where a single protein-ligand complex is supplied but the "Docked Poses" option is selected. Produces a helpful error in this case. Make sure this code is called before the superclass checkNumNodes() so that this error is produced rather than a less specific error. """ if self.getNumNodes() == 0: protein_ct = next(self._rl_map.receptors) if get_ligands([protein_ct], unique_smiles=True): # The only structure supplied is a protein-ligand complex msg = ('This panel does not support trying to export a protein-' 'ligand complex as a docked pose.' + LIG_DETECT_ERR_MSG) return False, msg return super().checkNumNodes() @validation.validator(NUM_TARGETS_VAL_ORDER) def checkTargets(self): if self.getNumTargets() == 0: return False, 'No receptor structure was found.' if self.getNumTargets() > 1: return False, 'Please specify only one receptor.' return True def _getSummaryMessages(self): base_msg = '{number} {variable_str}' lig_msg = format_numbered_message(base_msg=base_msg, number=self.getNumNodes(), singular_str=self.node_str) rec_msg = format_numbered_message(base_msg=base_msg, number=self.getNumTargets(), singular_str=self.target_str) tot_base_msg = '{number} total {variable_str}' tot_msg = format_numbered_message(base_msg=tot_base_msg, number=self._rl_map.num_rl_groups, singular_str=self.entry_singular) return tot_msg, rec_msg, lig_msg
[docs]class Complexes(BaseEntryData): name = 'Receptor-Ligand Complexes' description = ('Upload receptor-ligand complexes. Selection should include' ' at least one entry, where each entry includes a receptor' ' and at least one ligand.') node_str = LIGAND target_str = RECEPTOR def _createRLMap(self, structure_map): """ Organize a receptor ligand map. Each structure should be a non- covalently bound receptor-ligand complex containing one receptor and at least one ligand. :param structure_map: a dictionary mapping unique strings to structures from some system :type structure_map: dict[str, structure.Structure] :return: a receptor ligand map :rtype: data_classes.ReceptorLigandMap """ rl_map = data_classes.ReceptorLigandMap() for key, st in structure_map.items(): receptors, ligands = separate_complex(st) for receptor, ligand in zip(receptors, ligands): rl_group = data_classes.ReceptorLigandGroup(receptor=receptor, ligand=ligand) rl_map[key].append(rl_group) return rl_map
[docs] def getNumNodes(self): """ :return: the number of unique nodes in this system :rtype: int """ return self._rl_map.num_rl_groups
@validation.validator(NUM_TARGETS_VAL_ORDER) def checkTargets(self): if not all(rl_group.receptor for rl_group in self._rl_map.rl_groups): msg = f'There must be one {self.target_str} per entry.' return False, msg return True @validation.validator(LIGANDS_VAL_ORDER) def checkLigands(self): if not all(rl_group.ligand for rl_group in self._rl_map.rl_groups): msg = f'There must be at least one {self.node_str} per entry.' return False, msg return True def _getSummaryMessages(self): base_total_msg = '{number} total {variable_str}' total_msg = format_numbered_message(base_msg=base_total_msg, number=self._rl_map.num_rl_groups, singular_str=self.entry_singular) base_rec_lig_msg = '{number} receptor-ligand {variable_str}' rec_lig_msg = format_numbered_message(base_msg=base_rec_lig_msg, number=self.getNumNodes(), singular_str='complex') return total_msg, rec_lig_msg
[docs]class CovalentDockingComplexes(Complexes): name = 'Covalent Docking Complexes' description = ('Upload covalent docking structures. Selection should' ' include outputs from the covalent docking panel.' ' Uploaded ligands will be the original (input) structures' ' prior to covalent docking processing.') def _createRLMap(self, structure_map): """ Create a receptor ligand map. Each structure should be a covalently- bound receptor-ligand complex. Furthermore, because we expect each structure to be output from covalent_docking.py in psp-src, they should each have a structure property containing the ligand structure prior to docking with the receptor. :param structure_map: a dictionary mapping unique strings to structures from some system :type structure_map: dict[str, structure.Structure] :return: a receptor ligand map :rtype: data_classes.ReceptorLigandMap """ rl_map = data_classes.ReceptorLigandMap() # We are concerned with the original ligands, which are compressed and # stored as properties of the complex structures (see covalent_docking.py # in psp-src) for key, complex_st in structure_map.items(): # Re-build original ligand structure lig_b64 = complex_st.property.get(CDOCK_BYTES) comp_size = complex_st.property.get(CDOCK_COMP_SIZE) decomp_size = complex_st.property.get(CDOCK_DECOMP_SIZE) if lig_b64 is None or comp_size is None or decomp_size is None: continue lig_str = base64.b64decode(lig_b64.encode('ASCII')) handle = mm.mmct_ct_from_compressed_bytes(lig_str, comp_size, decomp_size) orig_lig_st = structure.Structure(handle) # Extract receptor structure (and attached residue) from complex # structure asl = f'fillres withinbonds 1 (atom.{CDOCK_LIG_ATOM_PROP} 1)' lig_atom_idcs = analyze.evaluate_asl(complex_st, asl) rec_atom_idcs = set( complex_st.getAtomIndices()).difference(lig_atom_idcs) mod_lig_st = complex_st.extract(lig_atom_idcs, True) receptor_st = complex_st.extract(rec_atom_idcs, True) # Copy properties from complex to ligand, remove certain # properties from receptor for data_name in complex_st.property: if data_name not in CDOCK_LIG_DATA_NAMES: orig_lig_st.property[data_name] = complex_st.property.get( data_name) for data_name in CDOCK_LIG_DATA_NAMES: del mod_lig_st.property[data_name] del receptor_st.property[data_name] # The primary ligand for the LiveDesign upload is the non-complexed # ligand structure; the alternate (3D data) ligand is the complexed # structure rl_group = data_classes.ReceptorLigandGroup(receptor=receptor_st, ligand=orig_lig_st, alt_ligand=mod_lig_st) rl_map[key].append(rl_group) return rl_map
#=============================================================================== # Utility functions #===============================================================================
[docs]def separate_complex(complex_st): """ Given a receptor-ligand complex structure, separate the ligands from the receptors. Return a list of 'receptor' structures and a corresponding list of ligand structures. The receptors are the rest of the complex once the ligand has been removed. :param complex_st: a ligand-receptor complex :type complex_st: structure.Structure :return: a receptor list and corresponding ligand list :rtype: tuple(list(structure.Structure), list(structure.Structure)) """ receptor_sts, ligand_sts = [], [] for ligand in analyze.find_ligands(complex_st): receptor_st = complex_st.copy() receptor_st.deleteAtoms(ligand.atom_indexes) if len(receptor_st.atom) == 0: receptor_st = None receptor_sts.append(receptor_st) ligand_sts.append(ligand.st.copy()) return receptor_sts, ligand_sts
[docs]def get_ligands(sts, unique_smiles=False): """ Get a list of ligand smiles represented in sts :param sts: the structures to analyze :type sts: list(structure.Structure) :param unique_smiles: whether to only count sts with unique SMILES :type unique_smiles: bool :return: list of smiles :rtype: list(str) """ st_strs = tuple(structure.write_ct_to_string(st) for st in sts) ligand_list = _find_ligands(st_strs) if unique_smiles: ligand_list = list(set(ligand_list)) return ligand_list
@functools.lru_cache def _find_ligands(st_strs): """ Find ligands from the given structures. Use string structures for caching purposes :param st_strs: the structures to analyze in string form :type st_strs: list(str) :return: list of smiles :rtype: list(str) """ sts = [list(structure.StructureReader.fromString(st))[0] for st in st_strs] ligand_list = [] for st in sts: ligands = analyze.find_ligands(st) ligand_list.extend([lig.unique_smiles for lig in ligands]) return ligand_list
[docs]def format_numbered_message(base_msg, number, singular_str, plural_str=None): """ Given a specially-formatted message containing a numbered word, return the proper human-readable translation using the correct form of that word. Example input: base_msg = 'I have {number} {variable_str}.' n = 2 singular_str = egg Example output: 'I have 2 eggs.' :param base_msg: the message to be formatted. Should contain two new-style formatting replacement points, `{number}` and `{variable_str}`, for the number and numbered word that should be added to the message :type base_msg: str :param number: the number to be added to the message :type number: int :param singular_str: the singular word for the object being described :type singular_str: str :param plural_str: optionally, the plural word for the object being described; by default, inflect will be used to pluralize :type plural_str: str or NoneType """ if number == 1: variable_str = singular_str else: variable_str = plural_str or inflect.engine().plural(singular_str) return base_msg.format(number=number, variable_str=variable_str)