Source code for schrodinger.application.bioluminate.antibody.utils

"""
Useful classes and functions for working with Antibodies

Copyright (c) Schrodinger, LLC. All rights reserved
"""
# Contributors: Joshua Williams, David Giesen

#- Imports -------------------------------------------------------------------

import os.path
from collections import OrderedDict

from schrodinger.application.bioluminate.actions import icons  # noqa: F401
from schrodinger.application.bioluminate.ssv import *  # noqa F403
from schrodinger.job.util import hunt
from schrodinger.Qt import QtWidgets
from schrodinger.structutils.analyze import evaluate_asl
from schrodinger.ui.sequencealignment import sequence as msv_sequence
from schrodinger.ui.sequencealignment.globals import ANNOTATION_RESNUM
from schrodinger.utils import preferences as prefs

try:
    from schrodinger.application.prime.packages import \
        PrimeStructureDatabase as psd
    from schrodinger.application.prime.packages import antibody as psp_antibody
    from schrodinger.application.prime.packages import \
        search_antibody as psp_search
except ImportError:
    psd = None
    psp_search = None
    psp_antibody = None

try:
    from schrodinger.maestro import maestro
except:
    maestro = None

#- Globals -------------------------------------------------------------------

GENERAL_PREF_GROUP = 'antibody_general'
PREF_ACTIVE_DATABASES = 'active_databases'
PREF_ALL_DATABASES = 'all_databases'
PREF_AB_SCHEME = 'antibody_scheme'
DEFAULT_SCHEME = 'Chothia'

preferences = prefs.Preferences(prefs.SCRIPTS)
preferences.beginGroup(GENERAL_PREF_GROUP, toplevel=True)

# The token ussed to split and join multiple databases
DATABASE_TOKEN = '|'

#- Functions -----------------------------------------------------------------


[docs]def get_bio_database():
    """
    Gets the default antibody database in bioluminate-vxxx/data/antibody.

    """
    bio_data = hunt('bioluminate', dir='data')
    return os.path.join(bio_data, 'antibody', 'antibody.db')


[docs]def set_active_databases(databases):
    """
    Set the database paths as the default databases to open on panel
    initialization.

    """
    databases = DATABASE_TOKEN.join(databases)
    preferences.set(PREF_ACTIVE_DATABASES, databases)


[docs]def set_all_databases(databases):
    """
    Set the database paths for "all_databases" in the preferences.

    """
    databases = DATABASE_TOKEN.join(databases)
    preferences.set(PREF_ALL_DATABASES, databases)


[docs]def set_antibody_scheme(scheme):
    """
    Set antibody numbering scheme in the preferences.

    :param scheme: numbering scheme
    :type scheme: str
    """
    preferences.set(PREF_AB_SCHEME, scheme)


[docs]def get_all_databases(default=None):
    """
    Returns a list of all databases a user has imported from their
    preferences.
    """

    # Get all of the databases added to the preferences
    databases = preferences.get(PREF_ALL_DATABASES, default=default)
    if databases:
        databases = databases.split(DATABASE_TOKEN)
    else:
        databases = []

    return databases


[docs]def get_active_databases(default=None):
    """
    Returns a list of all databases a user has selected as active.
    """

    # Get all of the databases added to the preferences
    all_databases = preferences.get(PREF_ACTIVE_DATABASES, default=default)
    if all_databases:
        all_databases = all_databases.split(DATABASE_TOKEN)
    else:
        all_databases = []

    # keep only databases where valid path exists (PANEL-1961)
    databases = []
    for db in all_databases:
        if os.path.exists(db):
            databases.append(db)

    return databases


[docs]def get_antibody_scheme(default=DEFAULT_SCHEME):
    """
    Returns default antibody numbering scheme.

    :param default: default numbering scheme
    :type default: str

    :return: AB numbering scheme
    :rtype: str
    """

    scheme = preferences.get(PREF_AB_SCHEME, default=default)
    return scheme


[docs]def make_scheme_cb(parent=None):
    """
    This function creates and returns antibody numbering scheme combo box with
    default item defined by preference settings.
    """

    scheme_cb = QtWidgets.QComboBox(parent)
    scheme_cb.addItems(psp_antibody.supported_schemes)
    default = get_antibody_scheme()
    idx = scheme_cb.findText(default)
    scheme_cb.setCurrentIndex(idx)
    return scheme_cb


[docs]def get_databases():
    """
    Returns a list of all databases and whether they are active from the
    preferences set by the user. If there are none set by the user the
    default bio database will be returned.

    """
    bio_database = get_bio_database()

    all_databases = get_all_databases(default=bio_database)
    act_databases = get_active_databases(default=bio_database)

    databases = []
    for db in all_databases:
        if db in act_databases:
            active = True
        else:
            active = False
        if os.path.exists(db):
            databases.append((db, active))
    if not databases:
        databases.append((bio_database, True))

    return databases


[docs]def open_databases(database_paths=None):
    """
    Returns a list of `open databases<PrimeStructureDB>`. If database_paths is
    not supplied the list of databases to open will be pulled from active
    databases in the preferences.

    """
    bio_database = get_bio_database()

    if not database_paths:
        database_paths = get_active_databases(default=bio_database)

    open_databases = []
    for db in database_paths:
        if os.path.exists(db):
            db_object = psd.PrimeStructureDB(db)
            open_databases.append(db_object)

    return open_databases


[docs]def search_antibody(light_seq,
                    heavy_seq,
                    databases,
                    criteria=None,
                    viewer=None,
                    scheme=DEFAULT_SCHEME):
    """
    Searches the database for a matching antibody to the `sequence`
    supplied.

    :type light_seq: str
    :param light_seq: The text representation of a sequence such as obtained
        from the `schrodinger.ui.sequencealignment.sequence.Sequence.text` method.
        This should be for the light variable region

    :type heavy_seq: str
    :param heavy_seq: The text representation of a sequence such as obtained
        from the `schrodinger.ui.sequencealignment.sequence.Sequence.text` method
        This should be for the heavy variable region

    :type criteria: list
    :param criteria: list of String, Numerical and Boolean
        `schrodinger.application.bioluminate.propfilter.Criterion` objects obtained
        from the `schrodinger.application.bioluminate.propfilter` for limited the
        antibodies searched in the database

    :type database: `schrodinger.application.prime.packages.PrimeStructureDatabase.PrimeStructureDB`
    :param database: The database to search

    :type viewer:
        `schrodinger.application.bioluminate.ssv.viewer.SimplifiedSequenceViewer`
    :param viewer: The SimplifiedSequenceViewer viewer to place results in.

    :type scheme: str
    :param scheme: db numbering scheme

    :rtype: list
    :return: List of results, each item is a `FrameworkTemplate` object
    """

    # Results is [(score, light_alignment, heavy_alignment), (...), ...]
    results = psp_search.antibody_homology_search_full(light_seq,
                                                       heavy_seq,
                                                       criteria=criteria,
                                                       db_list=databases,
                                                       scheme=scheme)
    results_fr = [FrameworkTemplate(x, y, z) for x, y, z, t, c, seq in results]

    if not viewer:
        return results_fr

    # Clear the last imported sequences
    viewer.last_sequences_imported = []

    # Define the current sequences to compare later
    current_seqs = []
    for seq in viewer.sequence_group.sequences:
        current_seqs.append(seq)

    # Store the result sequences in the SSV
    for result in results_fr:
        for rtype in ['Light', 'Heavy']:
            pt_row = result.importRegion(rtype)
            selected = [r for r in maestro.project_table_get().selected_rows]
            maestro.command('entryselectonlyrow %s' % pt_row.entry_id)
            viewer.importFromMaestro('selected')
            for row in selected:
                row.is_selected = True

    if results:
        viewer.updateView()
        if viewer.cb_contents_changed:
            viewer.cb_contents_changed()
        viewer.sequencesImported.emit(viewer.MANUAL_SEQUENCE)

    # Find the new sequences and set to last added sequences
    for seq in viewer.sequence_group.sequences:
        if seq not in current_seqs and not seq.isRuler():
            viewer.last_sequences_imported.append(seq)

        # Collapse the SSA and SSBOND info
        #for seq in self.sequence_group.sequences:
        #    if seq.children:
        #        seq.hideChildren()

    viewer.addAnnotation(ANNOTATION_RESNUM)

    viewer.updateView()

    return results_fr


[docs]def calc_cdr_shifts(cdr_regions, scheme):
    """
    Convert a dict of CDR region boundraies to a parameter dict of shifts.

    :type cdr_regions: dict
    :param cdr_regions: Custom CDR region numbering (if any)

    :rtype: dict
    :return: Dictionary of parameters to pass to write_bld_input().
    """
    params = {}
    for cdr, (start, end) in cdr_regions.items():
        left_shift = psp_antibody.calc_cdrshift(scheme, cdr, "left", start)
        right_shift = psp_antibody.calc_cdrshift(scheme, cdr, "right", end)
        params['CDRSHIFT_%s_LEFT' % cdr] = left_shift
        params['CDRSHIFT_%s_RIGHT' % cdr] = right_shift
    return params


[docs]def get_align_ct(align):
    """
    Returns structure from a given alignment object.

    :param align: antibody alignment object
    :type align: schrodinger.application.prime.packages.antibody.AntibodyAlignment

    :return: structure
    :rtype: structure.Structure
    """
    abobj, idx, *_ = align.dbentry
    return abobj.getCt(idx)


[docs]def get_align_data(align, name, default_value=None):
    """
    Returns data from a given alignment object.

    :param align: antibody alignment object
    :type align: schrodinger.application.prime.packages.antibody.AntibodyAlignment

    :param name: name of property that we are retrieving data from.
    :type name: str

    :param default_value: optional default value with type appropriate for the given
        data name type
    :type default_value: str or float or int or bool or None

    :return: data value
    :rtype: str or int or float or bool
    """
    abobj, idx, *_ = align.dbentry
    if abobj:
        return abobj.getDBRow(idx).data.get(name, default_value)
    return default_value


[docs]def get_align_seq_data(align, name, default_value=None):
    """
    Returns sequence data from a given alignment object. This data is
    precompiled, so a different call is made compared to get_align_data

    :param align: antibody alignment object
    :type align: schrodinger.application.prime.packages.antibody.AntibodyAlignment

    :param name: name of property that we are retrieving data from.
    :type name: str

    :param default_value: optional default value with type appropriate for the given
        data name type
    :type default_value: str or float or int or bool or None

    :return: data value
    :rtype: str or int or float or bool
    """
    abobj, idx, *_ = align.dbentry
    if abobj:
        return abobj.getSeqEntry(idx).get(name, default_value)
    return default_value


#- Classes -------------------------------------------------------------------


[docs]class FrameworkTemplate(object):

[docs]    def __init__(self, score, light_alignment, heavy_alignment):
        """
        :type score: float
        :param score: Search score for these alignments against the query

        :type light_alignment: `schrodinger.application.prime.packages.antibody.AntibodyAlignment`
        :param light_alignment: The alignment object describing the Light
            Variable region alignment

        :type heavy_alignment: `schrodinger.application.prime.packages.antibody.AntibodyAlignment`
        :param heavy_alignment: The alignment object describing the Heavy
            Variable region alignment
        """

        self.score = score

        self.alignments = OrderedDict()
        self.alignments['Light'] = light_alignment
        self.alignments['Heavy'] = heavy_alignment
        # Note - AntibodyAlignment properties:

    # align.title: Title of the template object for the alignment
    # align.score: Similarity score for the alignment
    # align.sequences: list of [query, template], items are aligned text
    #                  sequences with '.' representing missing residues
    # align.clean_seqs: list of [query, template], items are text
    #                   sequences.  The index of a residue in this list
    #                   is the residue number of that residue.
    # align.cdrs: list of [query, template], items are 3 tuples (x, y)
    #                   denoting the residue number at the start and end
    #                   of the X1-X3 regions
    # align.maps: list of [query, template], items are a list of residue
    #                   number of the residue at that index in
    #                   align.sequences.  -1 indicates missing.  This
    #                   maps the index of a residue in sequences to the
    #                   index of that residue in clean_seqs.

    def __str__(self):
        return self.createLabel()

[docs]    def createLabel(self, maxlen=15):
        """
        :type maxlen: int
        :param maxlen: The maximum length for the name of each region template

        :rtype: str
        :return: A label describing this FrameworkTemplate
        """

        label = 'Score: %.2f' % self.score
        for rtype, align in self.alignments.items():
            title = align.title
            if len(title) > maxlen:
                # Cut out the middle part, but leave the chain letter
                title = title[:maxlen - 5] + '...' + title[-2:]
            label = label + '   ' + rtype + ': ' + title
        return label

[docs]    def createSequence(self, region):
        """
        :type region: str
        :param region: One of
                - Light
                - Heavy

        :rtype: `schrodinger.ui.sequencealignment.sequence.Sequence`
        :return: The Sequence object for this region - suitable for import into
            the SSV.
        """

        align = self.alignments[region]
        seq = msv_sequence.Sequence()
        seq.name = align.title
        seq.short_name = '( %s )' % region + align.title
        seq.appendResidues(align.clean_seqs[1])
        seq.sanitize()

        return seq

[docs]    def getFilename(self, region):
        """
        :type region: str
        :param region: One of
                - Light
                - Heavy

        :rtype: `schrodinger.ui.sequencealignment.sequence.Sequence`
        :return: The Sequence object for this region - suitable for import into
            the SSV.

        """
        align = self.alignments[region]
        _filename = get_align_data(align, '_filename')
        filename = os.path.join(entry._DB._directory, _filename)

        return filename

[docs]    def importRegion(self, region):
        """
        :type region: str
        :param region: One of
                - Light
                - Heavy

        :rtype: `schrodinger.ui.sequencealignment.sequence.Sequence`
        :return: The Sequence object for this region - suitable for import into
            the SSV.

        """
        align = self.alignments[region]
        full_ct = get_align_ct(align)
        chain = get_align_seq_data(align, 's_bioluminateReadOnly_Heavy_Chain')
        atom_indices = evaluate_asl(full_ct, '(chain.name %s)' % chain)

        seq_st = full_ct.extract(atom_indices, copy_props=True)

        pt = maestro.project_table_get()
        pt_row = pt.importStructure(seq_st, name=align.title)

        return pt_row