import base64
import copy
import inflect
from schrodinger.infra import mm
from schrodinger.structure import structure
from schrodinger.structutils import analyze
from schrodinger.ui.qt.appframework2 import validation
from . import data_classes
LIGAND = 'ligand'
RECEPTOR = 'receptor'
NUM_ENTRIES_VAL_ORDER = 100
NUM_TARGETS_VAL_ORDER = 200
NUM_NODES_VAL_ORDER = 300
NUM_ATOMS_VAL_ORDER = 400
LIGANDS_VAL_ORDER = 500
CDOCK_LIG_ATOM_PROP = 'i_cdock_ligand_atom'
CDOCK_BYTES = 's_cdock_compressed_input_ligand'
CDOCK_COMP_SIZE = 'i_cdock_compressed_input_size'
CDOCK_DECOMP_SIZE = 'i_cdock_decompressed_input_size'
CDOCK_LIG_DATA_NAMES = [CDOCK_BYTES, CDOCK_COMP_SIZE, CDOCK_DECOMP_SIZE]
LIG_DETECT_ERR_MSG = ('\n\nIf you believe this is detected in error, please'
' confirm the ligand is being properly perceived as a'
' ligand in Maestro.\n\nTip: export and re-import the'
' ligand as a .sdf file.')
#===============================================================================
# Entry Type Classes
#===============================================================================
[docs]class BaseEntryData(validation.ValidationMixin):
"""
Abstract class for storing and validating data for various formats of
ligand-based systems in anticipation of export to LiveDesign.
"""
name = ''
description = ''
min_nodes = 1
min_structures = 1
node_str = ''
target_str = ''
entry_singular = 'entry'
[docs] def __init__(self, structure_map):
"""
:param structure_map: a dictionary mapping unique strings to structures
from some system
:type structure_map: dict[str, structure.Structure]
"""
self._rl_map = self._createRLMap(structure_map)
def _createRLMap(self, structure_map):
"""
Organize the input structures into a receptor-ligand map. Must be
implemented in concrete subclasses.
:param structure_map: a dictionary mapping unique strings to structures
from some system
:type structure_map: dict[str, structure.Structure]
:return: a receptor ligand map
:rtype: data_classes.ReceptorLigandMap
"""
raise NotImplementedError
[docs] def getNumTargets(self):
"""
:return: the number of unique targets in the system
:rtype: int
"""
return len(set(self._rl_map.receptors))
[docs] def getRLMap(self):
"""
:return: a copy of the stored receptor ligand map
:rtype: data_classes.ReceptorLigandMap
"""
return copy.copy(self._rl_map)
@validation.validator(NUM_ENTRIES_VAL_ORDER)
def checkNumEntries(self):
if self.getNumNodes() + self.getNumTargets() < self.min_structures:
base_msg = 'You must specify at least {number} {variable_str}.'
msg = format_numbered_message(base_msg, self.min_structures,
'structure')
return False, msg
return True
def _getSummaryMessages(self):
"""
Returns individual messages to be used in the entry data summary.
Meant to be overriden by a subclass
:return: summary messages to format
:rtype: tuple(str)
"""
return tuple()
[docs] def getSummary(self):
"""
Return a summary of the receptor and ligand data stored in this object.
:return: a summary of the data stored on this object
:rtype: str
"""
return self.formatSummaryMessage(*self._getSummaryMessages())
[docs] def formatSummaryMessage(self, *msgs):
"""
Returns a formatted summary string.
Takes the form of 'msg1 (msg2, msg3...)'
:param msgs: messages to separate
:type msgs: str
:return: Formatted string
:rtype: str
"""
summary_msg = ''
if len(msgs) >= 1:
summary_msg += f'{msgs[0]}'
if len(msgs) >= 2:
summary_msg += f' ({", ".join(msgs[1:])})'
return summary_msg
[docs]class IndividualStructureBase(BaseEntryData):
"""
Class representing data types for which each structure in the input
receptor-ligand map is either a single target or a single node structure.
"""
def _createRLMap(self, structure_map):
"""
Organize receptor-ligand map for non-complexed inputs. The supplied
structures should include at most one receptor and any number of
associated ligands.
:param structure_map: a dictionary mapping unique strings to structures
from some system
:type structure_map: dict[str, structure.Structure]
:return: a receptor ligand map
:rtype: data_classes.ReceptorLigandMap
"""
first_receptor = None
extra_receptor_map = {}
rl_map = data_classes.ReceptorLigandMap()
for key, st in structure_map.items():
if analyze.evaluate_asl(st, 'protein'):
if first_receptor is None:
first_receptor = st
else:
extra_receptor_map[key] = st
else:
rl_group = data_classes.ReceptorLigandGroup(ligand=st)
rl_map[key].append(rl_group)
for rl_group in rl_map.rl_groups:
rl_group.receptor = first_receptor
# Add any additional receptors to receptor-ligand map as new groups;
# expect these to cause this class to fail validation
for key, st in extra_receptor_map.items():
rl_group = data_classes.ReceptorLigandGroup(receptor=st)
rl_map[key].append(rl_group)
return rl_map
[docs] def getNumNodes(self):
"""
:return: the number of unique nodes in this system
:rtype: int
"""
return len(get_ligands(self._rl_map.ligands, unique_smiles=True))
[docs] def getNumLigands(self):
"""
:return: the number of total ligands in this system
:rtype: int
"""
return len(get_ligands(self._rl_map.ligands))
@validation.validator(NUM_NODES_VAL_ORDER)
def checkNumNodes(self):
for st in self._rl_map.structures:
if len(analyze.find_ligands(st)) > 1:
msg = (f'Entry {st.title} contains more than one'
f' {self.node_str}.')
return False, msg
if self.getNumNodes() < self.min_nodes:
base_msg = 'You must specify at least {number} {variable_str}.'
msg = format_numbered_message(base_msg, self.min_nodes,
self.node_str)
if self.node_str == LIGAND:
msg += LIG_DETECT_ERR_MSG
return False, msg
return True
[docs]class TargetlessBase(IndividualStructureBase):
"""
Base class for data types that do not have a target structure. Subclasses
must redefine `max_num_atoms`.
:cvar max_num_atoms: the maximum number of atoms per structure
:vartype max_num_atoms: int
"""
max_num_atoms = 0
@validation.validator(NUM_ATOMS_VAL_ORDER)
def checkNumAtoms(self):
for st in self._rl_map.structures:
if len(st.atom) > self.max_num_atoms:
msg = (f'Entry {st.title} contains more than'
f' {self.max_num_atoms} atoms.')
return False, msg
return True
[docs]class Ligands(TargetlessBase):
name = 'Ligands'
description = ('Upload ligands. Each ligand must be in its own entry.'
' Non-ligand structures will be ignored.')
max_num_atoms = mm.LigandParameters().max_atom_count
node_str = LIGAND
def _getSummaryMessages(self):
all_ligands_base_msg = '{number} total {variable_str}'
all_ligands_msg = format_numbered_message(
base_msg=all_ligands_base_msg,
number=self.getNumLigands(),
singular_str=self.entry_singular)
unique_ligands_base_msg = '{number} unique {variable_str}'
unique_ligands_msg = format_numbered_message(
base_msg=unique_ligands_base_msg,
number=self.getNumNodes(),
singular_str=self.node_str)
return all_ligands_msg, unique_ligands_msg
[docs]class DockedPoses(IndividualStructureBase):
name = 'Docked Poses'
description = ('Upload docked poses based on the PV format. Selection'
' should include one receptor, in its own entry, and docked'
' ligands, each in a separate entry.')
node_str = LIGAND
target_str = RECEPTOR
@validation.validator(NUM_NODES_VAL_ORDER)
def checkNumNodes(self):
"""
Override this method to catch case where a single protein-ligand complex
is supplied but the "Docked Poses" option is selected. Produces a
helpful error in this case. Make sure this code is called before the
superclass checkNumNodes() so that this error is produced rather than
a less specific error.
"""
if self.getNumNodes() == 0:
protein_ct = next(self._rl_map.receptors)
if get_ligands([protein_ct], unique_smiles=True):
# The only structure supplied is a protein-ligand complex
msg = ('This panel does not support trying to export a protein-'
'ligand complex as a docked pose.' + LIG_DETECT_ERR_MSG)
return False, msg
return super().checkNumNodes()
@validation.validator(NUM_TARGETS_VAL_ORDER)
def checkTargets(self):
if self.getNumTargets() == 0:
return False, 'No receptor structure was found.'
if self.getNumTargets() > 1:
return False, 'Please specify only one receptor.'
return True
def _getSummaryMessages(self):
base_msg = '{number} {variable_str}'
lig_msg = format_numbered_message(base_msg=base_msg,
number=self.getNumNodes(),
singular_str=self.node_str)
rec_msg = format_numbered_message(base_msg=base_msg,
number=self.getNumTargets(),
singular_str=self.target_str)
tot_base_msg = '{number} total {variable_str}'
tot_msg = format_numbered_message(base_msg=tot_base_msg,
number=self._rl_map.num_rl_groups,
singular_str=self.entry_singular)
return tot_msg, rec_msg, lig_msg
[docs]class Complexes(BaseEntryData):
name = 'Receptor-Ligand Complexes'
description = ('Upload receptor-ligand complexes. Selection should include'
' at least one entry, where each entry includes a receptor'
' and at least one ligand.')
node_str = LIGAND
target_str = RECEPTOR
def _createRLMap(self, structure_map):
"""
Organize a receptor ligand map. Each structure should be a non-
covalently bound receptor-ligand complex containing one receptor and at
least one ligand.
:param structure_map: a dictionary mapping unique strings to structures
from some system
:type structure_map: dict[str, structure.Structure]
:return: a receptor ligand map
:rtype: data_classes.ReceptorLigandMap
"""
rl_map = data_classes.ReceptorLigandMap()
for key, st in structure_map.items():
receptors, ligands = separate_complex(st)
for receptor, ligand in zip(receptors, ligands):
rl_group = data_classes.ReceptorLigandGroup(receptor=receptor,
ligand=ligand)
rl_map[key].append(rl_group)
return rl_map
[docs] def getNumNodes(self):
"""
:return: the number of unique nodes in this system
:rtype: int
"""
return self._rl_map.num_rl_groups
@validation.validator(NUM_TARGETS_VAL_ORDER)
def checkTargets(self):
if not all(rl_group.receptor for rl_group in self._rl_map.rl_groups):
msg = f'There must be one {self.target_str} per entry.'
return False, msg
return True
@validation.validator(LIGANDS_VAL_ORDER)
def checkLigands(self):
if not all(rl_group.ligand for rl_group in self._rl_map.rl_groups):
msg = f'There must be at least one {self.node_str} per entry.'
return False, msg
return True
def _getSummaryMessages(self):
base_total_msg = '{number} total {variable_str}'
total_msg = format_numbered_message(base_msg=base_total_msg,
number=self._rl_map.num_rl_groups,
singular_str=self.entry_singular)
base_rec_lig_msg = '{number} receptor-ligand {variable_str}'
rec_lig_msg = format_numbered_message(base_msg=base_rec_lig_msg,
number=self.getNumNodes(),
singular_str='complex')
return total_msg, rec_lig_msg
[docs]class CovalentDockingComplexes(Complexes):
name = 'Covalent Docking Complexes'
description = ('Upload covalent docking structures. Selection should'
' include outputs from the covalent docking panel.'
' Uploaded ligands will be the original (input) structures'
' prior to covalent docking processing.')
def _createRLMap(self, structure_map):
"""
Create a receptor ligand map. Each structure should be a covalently-
bound receptor-ligand complex.
Furthermore, because we expect each structure to be output from
covalent_docking.py in psp-src, they should each have a structure
property containing the ligand structure prior to docking with the
receptor.
:param structure_map: a dictionary mapping unique strings to structures
from some system
:type structure_map: dict[str, structure.Structure]
:return: a receptor ligand map
:rtype: data_classes.ReceptorLigandMap
"""
rl_map = data_classes.ReceptorLigandMap()
# We are concerned with the original ligands, which are compressed and
# stored as properties of the complex structures (see covalent_docking.py
# in psp-src)
for key, complex_st in structure_map.items():
# Re-build original ligand structure
lig_b64 = complex_st.property.get(CDOCK_BYTES)
comp_size = complex_st.property.get(CDOCK_COMP_SIZE)
decomp_size = complex_st.property.get(CDOCK_DECOMP_SIZE)
if lig_b64 is None or comp_size is None or decomp_size is None:
continue
lig_str = base64.b64decode(lig_b64.encode('ASCII'))
handle = mm.mmct_ct_from_compressed_bytes(lig_str, comp_size,
decomp_size)
orig_lig_st = structure.Structure(handle)
# Extract receptor structure (and attached residue) from complex
# structure
asl = f'fillres withinbonds 1 (atom.{CDOCK_LIG_ATOM_PROP} 1)'
lig_atom_idcs = analyze.evaluate_asl(complex_st, asl)
rec_atom_idcs = set(
complex_st.getAtomIndices()).difference(lig_atom_idcs)
mod_lig_st = complex_st.extract(lig_atom_idcs, True)
receptor_st = complex_st.extract(rec_atom_idcs, True)
# Copy properties from complex to ligand, remove certain
# properties from receptor
for data_name in complex_st.property:
if data_name not in CDOCK_LIG_DATA_NAMES:
orig_lig_st.property[data_name] = complex_st.property.get(
data_name)
for data_name in CDOCK_LIG_DATA_NAMES:
del mod_lig_st.property[data_name]
del receptor_st.property[data_name]
# The primary ligand for the LiveDesign upload is the non-complexed
# ligand structure; the alternate (3D data) ligand is the complexed
# structure
rl_group = data_classes.ReceptorLigandGroup(receptor=receptor_st,
ligand=orig_lig_st,
alt_ligand=mod_lig_st)
rl_map[key].append(rl_group)
return rl_map
#===============================================================================
# Utility functions
#===============================================================================
[docs]def separate_complex(complex_st):
"""
Given a receptor-ligand complex structure, separate the ligands from the
receptors. Return a list of 'receptor' structures and a corresponding list
of ligand structures. The receptors are the rest of the complex once the
ligand has been removed.
:param complex_st: a ligand-receptor complex
:type complex_st: structure.Structure
:return: a receptor list and corresponding ligand list
:rtype: tuple(list(structure.Structure), list(structure.Structure))
"""
receptor_sts, ligand_sts = [], []
for ligand in analyze.find_ligands(complex_st):
receptor_st = complex_st.copy()
receptor_st.deleteAtoms(ligand.atom_indexes)
if len(receptor_st.atom) == 0:
receptor_st = None
receptor_sts.append(receptor_st)
ligand_sts.append(ligand.st.copy())
return receptor_sts, ligand_sts
[docs]def get_ligands(sts, unique_smiles=False):
"""
Return a list of ligands represented in sts
:param sts: the structures to analyze
:type sts: structure.Structure
:param unique_smiles: whether to only count sts with unique SMILES
:type unique_smiles: bool
:return: list of smiles
:rtype: list(str)
"""
ligand_list = []
for st in sts:
ligands = analyze.find_ligands(st)
ligand_list.extend([lig.unique_smiles for lig in ligands])
if unique_smiles:
ligand_list = list(set(ligand_list))
return ligand_list