Source code for schrodinger.application.bioluminate.antibody.utils

"""
Useful classes and functions for working with Antibodies

Copyright (c) Schrodinger, LLC. All rights reserved
"""
# Contributors: Joshua Williams, David Giesen

#- Imports -------------------------------------------------------------------

import os.path
from collections import OrderedDict

from schrodinger.application.bioluminate.actions import icons  # noqa: F401
from schrodinger.application.bioluminate.ssv import *  # noqa F403
from schrodinger.job.util import hunt
from schrodinger.Qt import QtWidgets
from schrodinger.structutils.analyze import evaluate_asl
from schrodinger.ui.sequencealignment import sequence as msv_sequence
from schrodinger.ui.sequencealignment.globals import ANNOTATION_RESNUM
from schrodinger.utils import preferences as prefs

try:
    from schrodinger.application.prime.packages import \
        PrimeStructureDatabase as psd
    from schrodinger.application.prime.packages import antibody as psp_antibody
    from schrodinger.application.prime.packages import \
        search_antibody as psp_search
except ImportError:
    psd = None
    psp_search = None
    psp_antibody = None

try:
    from schrodinger.maestro import maestro
except:
    maestro = None

#- Globals -------------------------------------------------------------------

GENERAL_PREF_GROUP = 'antibody_general'
PREF_ACTIVE_DATABASES = 'active_databases'
PREF_ALL_DATABASES = 'all_databases'
PREF_AB_SCHEME = 'antibody_scheme'
DEFAULT_SCHEME = 'Chothia'

preferences = prefs.Preferences(prefs.SCRIPTS)
preferences.beginGroup(GENERAL_PREF_GROUP, toplevel=True)

# The token ussed to split and join multiple databases
DATABASE_TOKEN = '|'

#- Functions -----------------------------------------------------------------


[docs]def get_bio_database(): """ Gets the default antibody database in bioluminate-vxxx/data/antibody. """ bio_data = hunt('bioluminate', dir='data') return os.path.join(bio_data, 'antibody', 'antibody.db')
[docs]def set_active_databases(databases): """ Set the database paths as the default databases to open on panel initialization. """ databases = DATABASE_TOKEN.join(databases) preferences.set(PREF_ACTIVE_DATABASES, databases)
[docs]def set_all_databases(databases): """ Set the database paths for "all_databases" in the preferences. """ databases = DATABASE_TOKEN.join(databases) preferences.set(PREF_ALL_DATABASES, databases)
[docs]def set_antibody_scheme(scheme): """ Set antibody numbering scheme in the preferences. :param scheme: numbering scheme :type scheme: str """ preferences.set(PREF_AB_SCHEME, scheme)
[docs]def get_all_databases(default=None): """ Returns a list of all databases a user has imported from their preferences. """ # Get all of the databases added to the preferences databases = preferences.get(PREF_ALL_DATABASES, default=default) if databases: databases = databases.split(DATABASE_TOKEN) else: databases = [] return databases
[docs]def get_active_databases(default=None): """ Returns a list of all databases a user has selected as active. """ # Get all of the databases added to the preferences all_databases = preferences.get(PREF_ACTIVE_DATABASES, default=default) if all_databases: all_databases = all_databases.split(DATABASE_TOKEN) else: all_databases = [] # keep only databases where valid path exists (PANEL-1961) databases = [] for db in all_databases: if os.path.exists(db): databases.append(db) return databases
[docs]def get_antibody_scheme(default=DEFAULT_SCHEME): """ Returns default antibody numbering scheme. :param default: default numbering scheme :type default: str :return: AB numbering scheme :rtype: str """ scheme = preferences.get(PREF_AB_SCHEME, default=default) return scheme
[docs]def make_scheme_cb(parent=None): """ This function creates and returns antibody numbering scheme combo box with default item defined by preference settings. """ scheme_cb = QtWidgets.QComboBox(parent) scheme_cb.addItems(psp_antibody.supported_schemes) default = get_antibody_scheme() idx = scheme_cb.findText(default) scheme_cb.setCurrentIndex(idx) return scheme_cb
[docs]def get_databases(): """ Returns a list of all databases and whether they are active from the preferences set by the user. If there are none set by the user the default bio database will be returned. """ bio_database = get_bio_database() all_databases = get_all_databases(default=bio_database) act_databases = get_active_databases(default=bio_database) databases = [] for db in all_databases: if db in act_databases: active = True else: active = False if os.path.exists(db): databases.append((db, active)) if not databases: databases.append((bio_database, True)) return databases
[docs]def open_databases(database_paths=None): """ Returns a list of `open databases<PrimeStructureDB>`. If database_paths is not supplied the list of databases to open will be pulled from active databases in the preferences. """ bio_database = get_bio_database() if not database_paths: database_paths = get_active_databases(default=bio_database) open_databases = [] for db in database_paths: if os.path.exists(db): db_object = psd.PrimeStructureDB(db) open_databases.append(db_object) return open_databases
[docs]def search_antibody(light_seq, heavy_seq, databases, criteria=None, viewer=None, scheme=DEFAULT_SCHEME): """ Searches the database for a matching antibody to the `sequence` supplied. :type light_seq: str :param light_seq: The text representation of a sequence such as obtained from the `schrodinger.ui.sequencealignment.sequence.Sequence.text` method. This should be for the light variable region :type heavy_seq: str :param heavy_seq: The text representation of a sequence such as obtained from the `schrodinger.ui.sequencealignment.sequence.Sequence.text` method This should be for the heavy variable region :type criteria: list :param criteria: list of String, Numerical and Boolean `schrodinger.application.bioluminate.propfilter.Criterion` objects obtained from the `schrodinger.application.bioluminate.propfilter` for limited the antibodies searched in the database :type database: `schrodinger.application.prime.packages.PrimeStructureDatabase.PrimeStructureDB` :param database: The database to search :type viewer: `schrodinger.application.bioluminate.ssv.viewer.SimplifiedSequenceViewer` :param viewer: The SimplifiedSequenceViewer viewer to place results in. :type scheme: str :param scheme: db numbering scheme :rtype: list :return: List of results, each item is a `FrameworkTemplate` object """ # Results is [(score, light_alignment, heavy_alignment), (...), ...] results = psp_search.antibody_homology_search_full(light_seq, heavy_seq, criteria=criteria, db_list=databases, scheme=scheme) results_fr = [FrameworkTemplate(x, y, z) for x, y, z, t, c, seq in results] if not viewer: return results_fr # Clear the last imported sequences viewer.last_sequences_imported = [] # Define the current sequences to compare later current_seqs = [] for seq in viewer.sequence_group.sequences: current_seqs.append(seq) # Store the result sequences in the SSV for result in results_fr: for rtype in ['Light', 'Heavy']: pt_row = result.importRegion(rtype) selected = [r for r in maestro.project_table_get().selected_rows] maestro.command('entryselectonlyrow %s' % pt_row.entry_id) viewer.importFromMaestro('selected') for row in selected: row.is_selected = True if results: viewer.updateView() if viewer.cb_contents_changed: viewer.cb_contents_changed() viewer.sequencesImported.emit(viewer.MANUAL_SEQUENCE) # Find the new sequences and set to last added sequences for seq in viewer.sequence_group.sequences: if seq not in current_seqs and not seq.isRuler(): viewer.last_sequences_imported.append(seq) # Collapse the SSA and SSBOND info #for seq in self.sequence_group.sequences: # if seq.children: # seq.hideChildren() viewer.addAnnotation(ANNOTATION_RESNUM) viewer.updateView() return results_fr
[docs]def calc_cdr_shifts(cdr_regions, scheme): """ Convert a dict of CDR region boundraies to a parameter dict of shifts. :type cdr_regions: dict :param cdr_regions: Custom CDR region numbering (if any) :rtype: dict :return: Dictionary of parameters to pass to write_bld_input(). """ params = {} for cdr, (start, end) in cdr_regions.items(): left_shift = psp_antibody.calc_cdrshift(scheme, cdr, "left", start) right_shift = psp_antibody.calc_cdrshift(scheme, cdr, "right", end) params['CDRSHIFT_%s_LEFT' % cdr] = left_shift params['CDRSHIFT_%s_RIGHT' % cdr] = right_shift return params
[docs]def get_align_ct(align): """ Returns structure from a given alignment object. :param align: antibody alignment object :type align: schrodinger.application.prime.packages.antibody.AntibodyAlignment :return: structure :rtype: structure.Structure """ abobj, idx, *_ = align.dbentry return abobj.getCt(idx)
[docs]def get_align_data(align, name, default_value=None): """ Returns data from a given alignment object. :param align: antibody alignment object :type align: schrodinger.application.prime.packages.antibody.AntibodyAlignment :param name: name of property that we are retrieving data from. :type name: str :param default_value: optional default value with type appropriate for the given data name type :type default_value: str or float or int or bool or None :return: data value :rtype: str or int or float or bool """ abobj, idx, *_ = align.dbentry if abobj: return abobj.getDBRow(idx).data.get(name, default_value) return default_value
[docs]def get_align_seq_data(align, name, default_value=None): """ Returns sequence data from a given alignment object. This data is precompiled, so a different call is made compared to get_align_data :param align: antibody alignment object :type align: schrodinger.application.prime.packages.antibody.AntibodyAlignment :param name: name of property that we are retrieving data from. :type name: str :param default_value: optional default value with type appropriate for the given data name type :type default_value: str or float or int or bool or None :return: data value :rtype: str or int or float or bool """ abobj, idx, *_ = align.dbentry if abobj: return abobj.getSeqEntry(idx).get(name, default_value) return default_value
#- Classes -------------------------------------------------------------------
[docs]class FrameworkTemplate(object):
[docs] def __init__(self, score, light_alignment, heavy_alignment): """ :type score: float :param score: Search score for these alignments against the query :type light_alignment: `schrodinger.application.prime.packages.antibody.AntibodyAlignment` :param light_alignment: The alignment object describing the Light Variable region alignment :type heavy_alignment: `schrodinger.application.prime.packages.antibody.AntibodyAlignment` :param heavy_alignment: The alignment object describing the Heavy Variable region alignment """ self.score = score self.alignments = OrderedDict() self.alignments['Light'] = light_alignment self.alignments['Heavy'] = heavy_alignment
# Note - AntibodyAlignment properties: # align.title: Title of the template object for the alignment # align.score: Similarity score for the alignment # align.sequences: list of [query, template], items are aligned text # sequences with '.' representing missing residues # align.clean_seqs: list of [query, template], items are text # sequences. The index of a residue in this list # is the residue number of that residue. # align.cdrs: list of [query, template], items are 3 tuples (x, y) # denoting the residue number at the start and end # of the X1-X3 regions # align.maps: list of [query, template], items are a list of residue # number of the residue at that index in # align.sequences. -1 indicates missing. This # maps the index of a residue in sequences to the # index of that residue in clean_seqs. def __str__(self): return self.createLabel()
[docs] def createLabel(self, maxlen=15): """ :type maxlen: int :param maxlen: The maximum length for the name of each region template :rtype: str :return: A label describing this FrameworkTemplate """ label = 'Score: %.2f' % self.score for rtype, align in self.alignments.items(): title = align.title if len(title) > maxlen: # Cut out the middle part, but leave the chain letter title = title[:maxlen - 5] + '...' + title[-2:] label = label + ' ' + rtype + ': ' + title return label
[docs] def createSequence(self, region): """ :type region: str :param region: One of - Light - Heavy :rtype: `schrodinger.ui.sequencealignment.sequence.Sequence` :return: The Sequence object for this region - suitable for import into the SSV. """ align = self.alignments[region] seq = msv_sequence.Sequence() seq.name = align.title seq.short_name = '( %s )' % region + align.title seq.appendResidues(align.clean_seqs[1]) seq.sanitize() return seq
[docs] def getFilename(self, region): """ :type region: str :param region: One of - Light - Heavy :rtype: `schrodinger.ui.sequencealignment.sequence.Sequence` :return: The Sequence object for this region - suitable for import into the SSV. """ align = self.alignments[region] _filename = get_align_data(align, '_filename') filename = os.path.join(entry._DB._directory, _filename) return filename
[docs] def importRegion(self, region): """ :type region: str :param region: One of - Light - Heavy :rtype: `schrodinger.ui.sequencealignment.sequence.Sequence` :return: The Sequence object for this region - suitable for import into the SSV. """ align = self.alignments[region] full_ct = get_align_ct(align) chain = get_align_seq_data(align, 's_bioluminateReadOnly_Heavy_Chain') atom_indices = evaluate_asl(full_ct, '(chain.name %s)' % chain) seq_st = full_ct.extract(atom_indices, copy_props=True) pt = maestro.project_table_get() pt_row = pt.importStructure(seq_st, name=align.title) return pt_row