Source code for schrodinger.application.livedesign.export_tasks

import csv
import enum
import os
import typing
import zipfile
from collections import defaultdict
from contextlib import contextmanager

import requests.exceptions
from more_itertools import chunked

from schrodinger import structure
from schrodinger.infra import mm
from schrodinger.infra import mmproj
from schrodinger.models import parameters
from schrodinger.project import project
from schrodinger.Qt import QtCore
from schrodinger.structutils.transform import get_centroid
from schrodinger.tasks import tasks

from . import constants
from . import entry_types
from . import export_models
from . import ld_utils
from . import login
from . import upload_utils

BATCH_SIZE = 50
PROPNAME_COMPOUND_ID = constants.PROPNAME_COMPOUND_ID
DUMMY_PROP_DICT = export_models.make_prop_dict(name='Dummy prop dict')


[docs]class LDError(Exception): pass
FORMAT_PARAMS = { 'delimiter': '\t', 'lineterminator': '\n', 'quoting': csv.QUOTE_NONE } # Mapping file TSV strings TSV_CORP_ID = 'Corporate ID' TSV_MODEL_NAME = 'Model Name' TSV_TARGET_KEY = 'Target Key' TSV_LIGAND_KEY = 'Ligand Key' TSV_HEADER = [TSV_CORP_ID, TSV_MODEL_NAME, TSV_TARGET_KEY, TSV_LIGAND_KEY] TSV_ENTITY_ID = 'entity_id' TSV_MODEL_NAME87 = 'model_name' TSV_LIG_PATH = 'ligand_zip_file_path' TSV_REC_PATH = 'target_zip_file_path' TSV_LIG_NAME = 'ligand_name' TSV_REC_NAME = 'target_name' TSV_HEADER87 = [ PROPNAME_COMPOUND_ID, TSV_ENTITY_ID, TSV_MODEL_NAME87, TSV_LIG_PATH, TSV_REC_PATH, TSV_LIG_NAME, TSV_REC_NAME ] DUMMY_CORP_ID = 'dummy_corp_id' # LiveDesign export JSON properties RESPONSE = 'import_responses' LIVE_REPORT_URL = 'live_report_url' SUCCESS = 'success' CORPORATE_ID = 'corporate_id' ADD_VIEW_TSV = 'additional_view_information.txt'
[docs]class TaskType(enum.Enum): two_d = '2D' three_d = '3D'
[docs]class ExportType(enum.Enum): sdf = 'SDF' maestro = 'MAESTRO' maestro_sdf = 'MAESTRO_SDF' def __str__(self): return self.value
[docs]class LDExportTaskMixin:
[docs] @contextmanager def handleLDExceptions(self): """ A context manager for handling that occur when interacting with LD. Catches various exceptions and re-raises them as `LDError` with additional explanatory text. :raise LDError: if any exceptions occur in the context """ msg = None try: yield except requests.exceptions.ConnectionError: msg = ('Maestro was unable to connect to the LiveDesign server due' ' to a connection error.') except requests.exceptions.Timeout: msg = 'Attempt to connect to the LiveDesign server timed out.' except Exception as exc: msg = str(exc) if msg: raise LDError(f'Task {self.name} failed: {msg}')
def _getProjectID(self): """ :raise LDError: if the `LDClient` call fails :return: the project ID for this export, if found :rtype: int or NoneType """ proj_name = self.input.ld_destination.proj_name with self.handleLDExceptions(): proj_id = self.input.ld_client.get_project_id_by_name(proj_name) return None if proj_id == [] else proj_id
[docs]class MasterExportTask(LDExportTaskMixin, tasks.BlockingFunctionTask): """ Primary LD export task. Responsible for 1. Preparing data for export 2. Launching subtasks that perform export processes 3. Communicating with the LD Export panel :ivar exportFailed: a signal containing an error message describing export failures :vartype exportFailed: QtCore.pyqtSignal """ input: export_models.TaskInput exportFailed = QtCore.pyqtSignal(str) num_subtasks = parameters.NonParamAttribute()
[docs] class Output(parameters.CompoundParam): lr_url: str = None result_urls: typing.Set[str] num_success: int num_failure: int unexported_items: typing.List[export_models.ThreeDExportItem] corp_ids: typing.List[str]
[docs] def __init__(self, *args, **kwargs): super().__init__(*args, **kwargs) self.num_subtasks = 0
@tasks.preprocessor(order=constants.ORDER_POST_SUMMARY) def _createLiveReport(self): """ If necessary, create a new LiveReport. """ ld_dest = self.input.ld_destination if ld_dest.lr_id != '': return with self.handleLDExceptions(): live_report = create_live_report(self.input.ld_client, self.input.ld_models, ld_dest.proj_name, ld_dest.lr_name) ld_dest.lr_id = live_report.id @tasks.preprocessor(order=constants.ORDER_POST_SUMMARY) def _updateStructureProperties(self): """ Modify structure properties as necessary for export. """ for idx, st in enumerate(self.input.structures_for_2d_export): # Structures cannot be exported to LiveDesign without titles st.title = st.title or ' ' # In order to display titles in LiveDesign, they must be stored on # a particular property st.property[constants.PROPNAME_SD_TITLE] = st.title if use_new_export(self.input.ld_client): # For the new export API, we must also define a unique compound # ID for each structure compound_id = str(idx) st.property[PROPNAME_COMPOUND_ID] = compound_id # If the structure that is being written to the SDF file (the # "2D" structure) is not identical to the structure being # written to the .mae file (the "3D" structure), then the # same compound ID must be applied to that structure separately for item in self.input.three_d_export_items: if item.key == st: item.ligand.property[PROPNAME_COMPOUND_ID] = compound_id @tasks.preprocessor(order=constants.ORDER_POST_SUMMARY) def _removeRedundantCompounds(self): """ For exports using the old API, structures meant for 3D export should not also be exported as 2D compounds. """ if use_new_export(self.input.ld_client): return inp = self.input sts_for_2d_export = set(inp.structures_for_2d_export) sts_for_2d_export -= {item.ligand for item in inp.three_d_export_items} inp.structures_for_2d_export = list(sts_for_2d_export) @tasks.preprocessor(order=constants.ORDER_POST_SUMMARY) def _createDummyReceptor(self): """ If necessary, add a dummy receptor structure to 3D export items. The dummy structure contains a single, distant atom. """ if use_new_export(self.input.ld_client): return items = self.input.three_d_export_items if not any(item.receptor is None for item in items): return # Create a dummy receptor structure dummy_rec = structure.create_new_structure(num_atoms=1) dummy_rec.title = "Dummy receptor" # We don't want the dummy receptor to be seen by the user in the LD 3D # viewer, so keep it far away from the ligands atom = next(iter(dummy_rec.atom)) atom.xyz = list(get_centroid(items[0].ligand) + 1000)[:3] for item in items: item.receptor = item.receptor or dummy_rec
[docs] def mainFunction(self): """ Create and launch export subtasks. """ if use_new_export(self.input.ld_client): # Run a single export task for all data self._runStructureExportTask(TaskType.three_d) else: # If necessary, export 2D data if self.input.structures_for_2d_export: self._runStructureExportTask(TaskType.two_d) # If necessary, separately export 3D data _3d_items = self.input.three_d_export_items if _3d_items: task = self._runStructureExportTask(TaskType.three_d) _3d_items = task.output.three_d_export_items # If necessary, export more 3D data that relies on the original 3D # export to be categorized properly if _3d_items: task = self._runStructureExportTask( TaskType.three_d, three_d_export_items=_3d_items) _3d_items = task.output.three_d_export_items if _3d_items: num_items = len(_3d_items) msg = (f'Unable to identify export key for {num_items}' ' structures.') self.exportFailed.emit(msg) raise RuntimeError(msg) if self.input.ffc_export_specs: # If necessary, export FFC data task = AttachmentExportTask() task.input.setValue(self.input) task.input.corp_ids = self.output.corp_ids self._runExportTask(task)
def _runExportTask(self, task): """ Run the supplied export task, then process the result. :param task: an export task :type task: tasks.ThreadFunctionTask """ task_dir = self.getTaskDir() task.specifyTaskDir(task_dir) task.name += str(self.num_subtasks) self.num_subtasks += 1 task.start() task.wait() # TODO PANEL-18317 self.output.num_success += task.output.num_success self.output.num_failure += task.output.num_failure if isinstance(task, BaseStructureExportTask): self.output.unexported_items.extend(task.output.unexported_items) error_msg = None if task.failure_info: # If an exception was raised during the task error_msg = str(task.failure_info.exception) elif task.output.num_failure > 0: # If the export failed without raising an exception error_msg = (f'Task "{task.name}" failed to export all structures' ' to LD.') if error_msg: self.exportFailed.emit(error_msg) raise RuntimeError(error_msg) def _runStructureExportTask(self, task_type, three_d_export_items=None): """ Create the appropriate export task, run it, and collect the results. :param task_type: the type of export task to return :type task_type: TaskType :param three_d_export_items: optionally, a list of 3D export items to assign to the task input :type three_d_export_items: list[export_models.ThreeDExportItem] or NoneType :return: an structure export task object :rtype: BaseStructureExportTask """ if use_new_export(self.input.ld_client): task_class = ExportTask87 else: task_class = EXPORT_TASK_MAP[task_type] task = task_class() task.input.setValue(self.input) task.input.export_3d = task_type == TaskType.three_d if three_d_export_items: task.input.three_d_export_items = three_d_export_items self._runExportTask(task) output = self.output output.lr_url = task.output.lr_url output.result_urls |= task.output.result_urls output.corp_ids.extend(task.output.corp_ids) return task
[docs]class BaseStructureExportTask(LDExportTaskMixin, tasks.ThreadFunctionTask): """ Abstract task for exporting structure data to LiveDesign. """ file_batches: typing.List[export_models.FileBatch] input: export_models.TaskInput _map_file_header: typing.List[str] _map_file_base_name: str prop_dicts = parameters.NonParamAttribute()
[docs] class Output(parameters.CompoundParam): lr_url: str = None result_urls: typing.Set[str] num_success: int num_failure: int three_d_export_items: typing.List[export_models.ThreeDExportItem] unexported_items: typing.List[export_models.ThreeDExportItem] corp_ids: typing.List[str]
[docs] def __init__(self, *args, **kwargs): super().__init__(*args, **kwargs) self._results = [] self.prop_dicts = []
@tasks.preprocessor(order=constants.ORDER_PROP_DICT) def _addPropDicts(self): """ Update task property dictionaries used to specify exported structure property data. """ for prop_spec in self.input.property_export_specs: prop_spec.addDataToExportTask(self)
[docs] def mainFunction(self): """ Perform the LiveDesign export operation for each batch of structures. """ for batch_idx, file_batch in enumerate(self.file_batches): ld_client = self.input.ld_client ld_dest = self.input.ld_destination compound_source = self._getCompoundSource() prop_dicts = self.prop_dicts or [DUMMY_PROP_DICT] if file_batch.three_d_file_path: if use_new_export(ld_client): export_type = ExportType.maestro_sdf else: export_type = ExportType.maestro else: export_type = ExportType.sdf with self.handleLDExceptions(): task_id = export_to_ld( ld_client=ld_client, project_name=ld_dest.proj_name, lr_name=ld_dest.lr_name, lr_id=ld_dest.lr_id, prop_dicts=prop_dicts, publish_data=self.input.publish_data, compound_source=compound_source, export_type=export_type, map_file_path=file_batch.map_file_path, sdf_file_path=file_batch.sdf_file_path, three_d_file_path=file_batch.three_d_file_path) result_url = ld_client.wait_and_get_result_url(task_id, timeout=3600) self.output.result_urls.add(result_url) result = ld_client.get_task_result(result_url) self._results.append(result)
@tasks.postprocessor(order=constants.ORDER_COLLECT_RESULTS) def _collectExportResults(self): """ Collect results from completed export processes. """ lr_url = None output = self.output for result in self._results: if not result: # If something went wrong, `result` may be an empty string output.num_failure += 1 continue lr_url = lr_url or result[LIVE_REPORT_URL] for compound_data in result[RESPONSE]: if compound_data[SUCCESS]: output.num_success += 1 else: output.num_failure += 1 output.lr_url = output.lr_url or lr_url
[docs] def get3DExportItems(self): """ :return: a list of 3D export items associated with this task :rtype: list[export_models.ThreeDExportItem] """ return list(self.input.three_d_export_items)
def _getCompoundSource(self): """ Return the appropriate compound source attribute given the type of maestro data we are exporting to LD. For LD versions 8.6+, the `compound_source` argument must be passed to `LDClient.start_export_assay_and_pose_data()` as: - For DRUG_DISCOVERY mode: - None for exporting all compounds - For MATERIAL_SCIENCE mode: - 'pri' for exporting regular compounds - 'non_pri' for organometallic compounds. :return: the appropriate compound source argument. :rtype: str or None """ entry_type_name = self.input.entry_type_name compound_source = None ld_mode = login.get_LD_mode(self.input.ld_client) if ld_mode == login.LDMode.MATERIALS_SCIENCE: compound_source = constants.CompoundSource.pri if entry_type_name == entry_types.OrganometallicCompounds.name: compound_source = constants.CompoundSource.non_pri return compound_source def _createMapFile(self, three_d_items, batch_idx, *map_row_args): """ Create a LiveDesign export mapping file for the specified data. :param three_d_items: a list of 3D data for export :type three_d_items: list[export_models.ThreeDExportItem] :param batch_idx: the index of the export batch that this map file is for :type batch_idx: int :param `*map_row_args`: additional arguments to pass to `_getMapRows()`, if necessary :return: the name of the map file :type: str """ map_file_name = self._getFilePath(batch_idx, base_name=self._map_file_base_name, ext='tsv') header = self._map_file_header with open(map_file_name, 'w', newline='', encoding='utf-8') as fh: writer = csv.DictWriter(fh, **FORMAT_PARAMS, fieldnames=header) writer.writeheader() for item_3d in three_d_items: for row in self._getMapRows(item_3d, *map_row_args): writer.writerow(row) return map_file_name def _getMapRows(self, item_3d, *args): """ Return mapping file row dictionaries for the specified 3D export item. Should be overridden in subclasses that wish to add data rows to mapping files. :param item_3d: a 3D export item :type item_3d: export_models.ThreeDExportItem :return: a list of mapping file row dictionaries for this item :rtype: list[dict[str, str]] """ raise NotImplementedError def _getFilePath(self, batch_idx, base_name=None, ext=None): """ Construct a standardized file name. :param batch_idx: the batch index for this file :type batch_idx: int :param ext: optionally, the extension for this file :type ext: str or NoneType :return: a standardized file name :rtype: str """ path = f'maestro_export_{batch_idx:02d}_{self.name}' if base_name is not None: path += f'_{base_name}' if ext is not None: path += f'.{ext}' task_dir = self.getTaskDir() return os.path.join(task_dir, path) @tasks.postprocessor(order=constants.ORDER_ASSIGN_CORP_IDS) def _collectCorpIDs(self): """ Add the corporate ID of each exported item to the output model. """ corp_ids = self.output.corp_ids for result in self._results: for compound_data in result[RESPONSE]: if compound_data: corp_ids.append(compound_data[CORPORATE_ID])
[docs]class BaseStructureExportTask86(BaseStructureExportTask): """ Abstract structure export task for older (v8.6-) versions of LiveDesign. """ structure_index_map = parameters.NonParamAttribute() compound_batches = parameters.NonParamAttribute()
[docs] def __init__(self, *args, **kwargs): super().__init__(*args, **kwargs) self.structure_index_map = {} self.compound_batches = []
@tasks.postprocessor(order=constants.ORDER_ASSIGN_CORP_IDS) def _assignCorporateIDs(self): """ Assign corporate IDs from the recently exported 2D compounds to associated 3D compounds. Take corporate IDs assigned to 2D compounds after being uploaded to LiveDesign and apply them to corresponding 3D structures. This is only done if: 1. There are 3D structures to upload, and 2. The user did not manually assign corporate IDs to them already """ compound_3d_item_map = defaultdict(list) for item in self.get3DExportItems(): if isinstance(item.key, structure.Structure): # Keep track of 3D export items keyed to structures compound_3d_item_map[item.key].append(item) if not compound_3d_item_map: # Return early if none of the 3D structures were keyed by compound return for batch_idx, result in enumerate(self._results): # Responses are returned in the order that the compounds were # exported, so key the corporate IDs by the result index idx_corp_id_map = {} for compound_idx, compound_data in enumerate(result[RESPONSE]): if compound_data: idx_corp_id_map[compound_idx] = compound_data[CORPORATE_ID] for st in self.compound_batches[batch_idx]: compound_idx = self.structure_index_map.get(st) if compound_idx is None: continue corp_id = idx_corp_id_map.get(compound_idx) if corp_id is not None: for item in compound_3d_item_map[st]: item.setItemKey(corp_id)
[docs]class Export2DTask(BaseStructureExportTask86): """ Structure export task for 2D data for older (v8.6-) versions of LiveDesign. """
[docs] def __init__(self, *args, **kwargs): super().__init__(*args, **kwargs) self._map_file_header = TSV_HEADER self._map_file_base_name = '2Dmap'
[docs] @tasks.preprocessor(order=constants.ORDER_GEN_FILE) def generateExportFiles(self): """ Create files necessary for the LiveDesign export and add them to the `file_batches` parameter. This includes a 2D structure (.sdf) file and an empty (but necessary) map file. """ self.compound_batches = list( chunked(self.input.structures_for_2d_export, BATCH_SIZE)) for batch_idx, compound_batch in enumerate(self.compound_batches): map_file_path = self._createMapFile([], batch_idx) sdf_file_path = self._createSDFile(compound_batch, batch_idx) file_batch = export_models.FileBatch() file_batch.map_file_path = map_file_path file_batch.sdf_file_path = sdf_file_path self.file_batches.append(file_batch)
def _createSDFile(self, compounds, batch_idx): """ Create the SDF file for export to LiveDesign. :param compounds: a list of structures to be exported to LiveDesign :type compounds: list[structure.Structure] :param batch_idx: the index of the export batch that this map file is for :type batch_idx: int :return: the name of the SDF file :rtype: str """ for idx, st in enumerate(compounds): self.structure_index_map[st] = idx sdf_file_name = self._getFilePath(batch_idx, ext='sdf') with structure.StructureWriter(sdf_file_name) as writer: writer.extend(compounds) return sdf_file_name
[docs]class Export3DTask(BaseStructureExportTask86): """ Structure export task for 3D data for older (v8.6-) versions of LiveDesign. """
[docs] def __init__(self, *args, **kwargs): super().__init__(*args, **kwargs) self._map_file_header = TSV_HEADER self._map_file_base_name = '3Dmap'
[docs] @tasks.preprocessor(order=constants.ORDER_GEN_FILE) def generateExportFiles(self): """ Create files necessary for the LiveDesign export and add them to the `file_batches` parameter. This includes a 3D structure file and a corresponding map file. """ # Do not export 3D items for which the key/corporate ID is another # structure; they will have to wait until those other structures have # been exported and been associated with a corporate ID value items_to_export, other_items = [], [] for item in self.get3DExportItems(): if isinstance(item.key, structure.Structure): other_items.append(item) else: items_to_export.append(item) self.output.three_d_export_items = other_items item_batches = chunked(items_to_export, BATCH_SIZE) for batch_idx, item_batch in enumerate(item_batches): three_d_file_path, st_eid_map = self._createPrjZipFile( item_batch, batch_idx) map_file_path = self._createMapFile(item_batch, batch_idx, st_eid_map) file_batch = export_models.FileBatch() file_batch.map_file_path = map_file_path file_batch.three_d_file_path = three_d_file_path self.file_batches.append(file_batch)
def _createPrjZipFile(self, export_items, batch_idx): """ Write supplied 3D structures to a .prjzip file. :param export_items: a list of 3D data for export :type export_items: list[export_models.ThreeDExportItem] :param batch_idx: the index of the export batch to which these structures belong :type batch_idx: int :return: a tuple containing the .prjzip file path and a dictionary mapping structures to their entry IDs in the .prjzip :rtype: tuple[str, dict[structure.Structure, str]] """ structures = [] # Add all receptors first for item in export_items: rec = item.receptor if rec and rec not in structures: structures.append(rec) # Add all 3D ligands lig_idx = 0 ligands = [] for item in export_items: lig = item.ligand if lig and lig not in structures: ligands.append(lig) self.structure_index_map[lig] = lig_idx lig_idx += 1 structures += ligands self.compound_batches.append(ligands) st_eid_map = {} if mm.mmtable_refcount() == 0: mm.mmtable_initialize(mm.MMERR_DEFAULT_HANDLER) prj_path = self._getFilePath(batch_idx, ext='prj') ph = mmproj.mmproj_project_new(prj_path) proj = project.Project(project_handle=ph) for st in structures: st_eid_map[st] = proj.importStructure(st).entry_id proj.close() # The LiveDesign conversion script expects this file in the project # directory add_view_path = os.path.join(prj_path, ADD_VIEW_TSV) with open(add_view_path, 'w'): pass task_dir = self.getTaskDir() zip_file_name = project.zip_project(prj_path, task_dir) return zip_file_name, st_eid_map def _getMapRows(self, item_3d, st_eid_map): """ Return mapping file row dictionaries for the specified 3D export item. :param item_3d: a 3D export item :type item_3d: export_models.ThreeDExportItem :param st_eid_map: a dictionary mapping structures being exported to the entry IDs for those structures in the .prjzip file being sent to LiveDesign :type st_eid_map: dict[structure.Structure, int] :return: a list of mapping file row dictionaries for this item :rtype: list[dict[str, str]] """ rows = [] corp_id = item_3d.getLigandCorpID() rec_eid = st_eid_map.get(item_3d.receptor) lig_eid = st_eid_map.get(item_3d.ligand) for spec in item_3d.three_d_specs: row = { TSV_CORP_ID: corp_id, TSV_MODEL_NAME: spec.ld_model, TSV_TARGET_KEY: rec_eid, TSV_LIGAND_KEY: lig_eid } rows.append(row) return rows
[docs]class ExportTask87(BaseStructureExportTask): """ Export structure task for newer (v8.7+) versions of LiveDesign. :cvar EMPTY_PATH_TUPLE: a tuple of `None` to use as a default return value `st_path_map` if a key is not defined :vartype EMPTY_PATH_TUPLE: tuple[NoneType, NoneType] :ivar st_path_map: a dictionary mapping structures to a tuple of (absolute path, relative path) where they have been stored as .mae files :vartype st_path_map: dict[structure.Structure, tuple[str, str] or tuple[None, None]] """ EMPTY_PATH_TUPLE = (None, None) st_path_map = parameters.NonParamAttribute()
[docs] def __init__(self, *args, **kwargs): super().__init__(*args, **kwargs) self.st_path_map = {} self._map_file_header = TSV_HEADER87 self._map_file_base_name = '3Dmap'
[docs] @tasks.preprocessor(order=constants.ORDER_GEN_FILE) def generateExportFiles(self): """ Create files necessary for the LiveDesign export and add them to the `file_batches` parameter. This includes a SDF file (2D), a zipped directory of MAE files (3D), and a TSV file (mapping) that describes the relationship between the two. """ combined_batches = self._getCombinedBatches() for batch_idx, combined_batch in enumerate(combined_batches): compound_batch, item_batch = combined_batch sdf_file_path = self._createSDFile(compound_batch, batch_idx) zip_file_path = self._createMaeZip(item_batch, batch_idx) map_file_path = self._createMapFile(item_batch, batch_idx) file_batch = export_models.FileBatch() file_batch.map_file_path = map_file_path file_batch.sdf_file_path = sdf_file_path file_batch.three_d_file_path = zip_file_path self.file_batches.append(file_batch)
def _getCombinedBatches(self): """ Return a list of batches for simultaneous 2D and 3D export. :return: a list of tuples, where each tuple contains 1. A list of compounds for 2D export, and 2. A corresponding list of 3D export items for 3D export :rtype: List[Tuple[List[structure.Structure], List[export_models.ThreeDExportItem]]] """ combined_batches = [] compound_batch, item_batch = [], [] items = self.get3DExportItems() for st in self.input.structures_for_2d_export: compound_batch.append(st) compound_items = [] for item in items: if st in [item.ligand, item.key]: # The 3D batch should contain corresponding structures: # structures meant for 3D export that are either identical # to one of the 2D structures being exported, or which are # keyed by those structures (for when the 3D structure does # not match the 2D compound, such as for covalent docking # systems) compound_items.append(item) item_batch += compound_items for item in compound_items: items.remove(item) if max(len(compound_batch), len(item_batch)) > BATCH_SIZE: combined_batches += [(compound_batch, item_batch)] compound_batch, item_batch = [], [] if compound_batch or item_batch: combined_batches += [(compound_batch, item_batch)] # If any 3D items were not included in any batch, something went wrong. # Make sure it is recorded so it does not become a silent failure. self.output.unexported_items.extend(items) return combined_batches def _createSDFile(self, compounds, batch_idx): """ Create the SDF file for export to LiveDesign. :param compounds: a list of structures to be exported to LiveDesign :type compounds: list[structure.Structure] :param batch_idx: the index of the export batch that this map file is for :type batch_idx: int :return: the name of the SDF file :rtype: str """ sdf_file_name = self._getFilePath(batch_idx, ext='sdf') with structure.StructureWriter(sdf_file_name) as writer: writer.extend(compounds) return sdf_file_name def _createMaeZip(self, export_items, batch_idx): """ Write supplied 3D structures to a zip archive of .mae files. :param export_items: a list of 3D data for export :type export_items: list[export_models.ThreeDExportItem] :param batch_idx: the index of the export batch to which these structures belong :type batch_idx: int :return: the .zip file path :rtype: str """ structures = [] for item in export_items: rec = item.receptor if rec and rec not in structures: structures.append(rec) for item in export_items: lig = item.ligand if lig and lig not in structures: structures.append(lig) zip_file_path = self._getFilePath(batch_idx, ext='zip') with zipfile.ZipFile(zip_file_path, 'w') as zip_handle: for st_idx, st in enumerate(structures): path_tuple = self.st_path_map.get(st) if path_tuple: # This structure has already been written to file, so use # the existing files mae_file_path, rel_path = path_tuple else: # Write this structure to a new file mae_file_path = self._getFilePath(batch_idx, base_name=f'st{st_idx}', ext='mae') st.write(mae_file_path) com_path = os.path.commonpath( [zip_file_path, mae_file_path]) rel_path = os.path.relpath(mae_file_path, com_path) self.st_path_map[st] = (mae_file_path, rel_path) zip_handle.write(mae_file_path, arcname=rel_path) return zip_file_path def _getMapRows(self, item_3d): """ Return mapping file row dictionaries for the specified 3D export item. :param item_3d: a 3D export item :type item_3d: export_models.ThreeDExportItem :return: a list of mapping file row dictionaries for this item :rtype: list[dict[str, str]] """ rows = [] compound_id = item_3d.getLigandCompoundID() corp_id = item_3d.getLigandCorpID() _, rec_path = self.st_path_map.get(item_3d.receptor, self.EMPTY_PATH_TUPLE) _, lig_path = self.st_path_map.get(item_3d.ligand, self.EMPTY_PATH_TUPLE) custom_text = self.input.pose_name_custom_text propname = self.input.pose_name_propname lig_name = None if custom_text or propname: lig_name = custom_text if propname: data_name = propname.dataName() lig_name += str(item_3d.ligand.property.get(data_name, '')) rec_name = item_3d.receptor.title if item_3d.receptor else None for spec in item_3d.three_d_specs: row = { PROPNAME_COMPOUND_ID: compound_id, TSV_ENTITY_ID: corp_id, TSV_MODEL_NAME87: spec.ld_model, TSV_REC_PATH: rec_path, TSV_LIG_PATH: lig_path, TSV_REC_NAME: rec_name, TSV_LIG_NAME: lig_name } rows.append(row) return rows
[docs]class AttachmentExportTask(LDExportTaskMixin, tasks.ThreadFunctionTask): """ Export task for FFC attachment data. :ivar attachment_data_map: a dictionary mapping the column name for an attachment to a data class storing other information about that attachment :vartype attachment_data_map: dict[str, export_models.AttachmentData] """ input: export_models.AttachmentTaskInput output: export_models.AttachmentTaskOutput attachment_data_map = parameters.NonParamAttribute()
[docs] def __init__(self, *args, **kwargs): super().__init__(*args, **kwargs) self.attachment_data_map = {}
@tasks.preprocessor(order=constants.ORDER_PROP_DICT) def _addFFCData(self): """ Update task property dictionaries used to specify exported structure property data. """ for ffc_spec in self.input.ffc_export_specs: ffc_spec.addDataToExportTask(self)
[docs] def mainFunction(self): """ Upload the attachment files and associate them with the appropriate LiveReport cells. """ attachment_id_map = self._getAttachmentIDMap() for col_name, attachment_ids in attachment_id_map.items(): att_data = self.attachment_data_map[col_name] column_id = self._getFreeformColumn(col_name, att_data.description) success = self._addValuesToFreeformColumn(attachment_ids, column_id) if success: self.output.num_success += 1 else: self.output.num_failure += 1
def _getAttachmentIDMap(self): """ Get a map relating column names to their given attachment IDs based on the specifications given in the Attachment Data Map. Uploads attachments to LiveDesign. The list of attachment IDs will mirror the order of the attachment items. The list will exhaust all of the structures of the 1st item, then the 2nd, and so on. :return: Map relating column names to attachment IDs :rtype: dict(str, list(str)) """ ld_client = self.input.ld_client proj_id = self._getProjectID() attachment_id_map = {} with self.handleLDExceptions(): for col_name, att_data in self.attachment_data_map.items(): attachment_ids = [] for attachment_item in att_data.attachment_items: attachment_id = upload_utils.upload_ld_attachment( attachment_item.file_path, proj_id, ld_client, remote_file_name=attachment_item.remote_file_name, file_type=attachment_item.file_type) # Associate the same attachment ID with every structure for _ in attachment_item.row_structures: attachment_ids.append(attachment_id) attachment_id_map[col_name] = attachment_ids return attachment_id_map def _getFreeformColumn(self, column_name, description): """ Obtain an existing or create and add a new freeform column to the live report associated with this export process. :param column_type: the type of free form column, see constants defined in `FreeformColumn` the ldclient models. :type column_type: str :param description: the description :type description: str :return: the column ID of the new column, if available :rtype: str or None """ ld_client = self.input.ld_client ld_models = self.input.ld_models published = self.input.publish_data lr_id = self.input.ld_destination.lr_id proj_id = self._getProjectID() with self.handleLDExceptions(): # Keep the deprecated `freeform_columns()` project ID argument # to support older versions of LDClient. ffcs = ld_client.freeform_columns(proj_id) for ffc in ffcs: if published and ffc.project_id != str(proj_id): # If the column was published, compare the project ID continue elif not published and ffc.live_report_id != lr_id: # If the column was not published, compare the live # report ID continue elif ffc.published == published and ffc.name == column_name: # A FFC with our specifications already exists, so use # that one rather than adding a new one. column = ffc break else: # No column with our specifications exists, so create a new # one. column_model = ld_models.FreeformColumn( column_name, description, published=published, project_id=proj_id, live_report_id=lr_id, type=ld_models.FreeformColumn.COLUMN_ATTACHMENT) column = ld_client.create_freeform_column(column_model) ld_client.add_columns(lr_id, [column.id]) return column.id def _addValuesToFreeformColumn(self, values, column_id): """ Add values to the `column_id` free form column of the live report associated with this export process. :param values: the values to add :type values: list(object) :param column_id: the column ID of the attachment column :type column_id: str :return: whether the export is successful :rtype: bool """ lr_id = self.input.ld_destination.lr_id proj_id = self._getProjectID() ld_client = self.input.ld_client ld_models = self.input.ld_models published = self.input.publish_data observations = set() for value, corporate_id in zip(values, self.input.corp_ids): obs = ld_models.Observation(proj_id, corporate_id, column_id, value, live_report_id=lr_id, published=published) observations.add(obs) if not observations: msg = 'No observations prepared for export.' self.setErrorStatus(self.name, msg) return False with self.handleLDExceptions(): ld_client.add_freeform_column_values(observations) return True @tasks.postprocessor(order=constants.ORDER_COLLECT_RESULTS) def _collectExportResults(self): """ Keep track of the status of finished export processes. """ if self.status == tasks.Status.FAILED: self.output.num_failure += 1 @tasks.postprocessor() def _removeLocalFiles(self): for ffc_spec in self.input.ffc_export_specs: ffc_spec.removeLocalFiles()
EXPORT_TASK_MAP = { TaskType.two_d: Export2DTask, TaskType.three_d: Export3DTask } # yapf: disable
[docs]def export_to_ld(ld_client, project_name, lr_name, lr_id, prop_dicts, publish_data, compound_source, export_type, map_file_path, sdf_file_path=None, three_d_file_path=None): """ Export the supplied data to LiveDesign. :param ld_client: LD client session :type ld_client: ldclient.LDClient :param project_name: the name of the LiveDesign project :type project_name: str :param lr_name: the name of the LiveReport :type lr_name: str :param lr_id: the ID of the LiveReport :type lr_id: int :param prop_dicts: property arguments that specify which structure properties should be converted into LiveDesign columns :type prop_dicts: dict(str, str) :param publish_data: whether the exported data should be published globally for all LiveDesign users :type publish_data: bool :param compound_source: the compound source argument required for certain LD versions :type compound_source: str or None :param export_type: the type of export to perform. :type export_type: ExportType :param map_file_path: the path of the mapping file :type map_file_path: str :param sdf_file_path: optionally, the path of an SDF file :type sdf_file_path: str or NoneType :param three_d_file_path: optionally, the path of an 3D file :type three_d_file_path: str or NoneType :return: task id :rtype: int """ # The corporate_id_column controls which structure property should be # utilized to obtain any corporate ID data to match existing compounds # by. For SDF export type, the structure property data name must be # set under: s_m_Corporate_ID. For MAESTRO export type, the structure # property display name must be given and the prefix "s_m_" is appended # to the argument. However, since the corporate ID data might reside in # columns other than Maestro properties, in which case the appended # prefix will be incorrect, we resort to using a single property to # pass the corporate ID data: 'Corporate ID'. See SS-24830 for more # details. corporate_id_prop = ('Corporate ID' if export_type == ExportType.maestro else constants.PROPNAME_CORP_ID) kwargs = dict(project=project_name, mapping_file_name=map_file_path, corporate_id_column=corporate_id_prop, live_report_name=lr_name, published=publish_data, properties=prop_dicts, export_type=str(export_type), live_report_id=lr_id) # Add conditional arguments if compound_source: kwargs.update(compound_source=compound_source) if not use_new_export(ld_client): data_file_path = sdf_file_path or three_d_file_path sha1 = ld_utils.get_sha1(data_file_path) new_kwargs = dict(data_file_name=data_file_path, sha1=sha1) export_method = ld_client.start_export_assay_and_pose_data else: sdf_sha1 = ld_utils.get_sha1(sdf_file_path) three_d_sha1 = ld_utils.get_sha1(three_d_file_path) map_file_sha1 = ld_utils.get_sha1(map_file_path) new_kwargs = dict(sdf_file_name=sdf_file_path, sdf_file_sha1=sdf_sha1, mapping_file_sha1=map_file_sha1, three_d_file_name=three_d_file_path, three_d_file_sha1=three_d_sha1) export_method = ld_client.load_assay_and_pose_data kwargs.update(new_kwargs) return export_method(**kwargs)
[docs]def create_live_report(ld_client, ld_models, proj_name, title): """ Create a new LiveReport. :param ld_client: the LiveDesign client instance :type ld_client: client.LDClient :param ld_models: the livedesign models module :type ld_models: module :param proj_name: the name of the project to which the LiveReport should be added :type proj_name: str :param title: the desired LiveReport title :type title: str :return: the new LiveReport :rtype: models.LiveReport """ project_id = ld_client.get_project_id_by_name(proj_name) live_report = ld_models.LiveReport(title=title, project_id=project_id) return ld_client.create_live_report(live_report)
[docs]def use_new_export(ld_client): """ Whether to use the new (LD v8.7+) export API. :param ld_client: the LiveDesign client instance :type ld_client: client.LDClient :return: whether to use the new (LD v8.7+) export process :rtype: bool """ return login.get_LD_version(ld_client) >= login.LD_VERSION_NEW_EXPORT