Source code for schrodinger.pipeline.stages.canvas

# -*- coding: utf-8 -*-
"""
This module contains stages for running Canvas calculations.

Copyright Schrodinger, LLC. All rights reserved.
"""

# Contributors: Matvey Adzhigirey

import os

import schrodinger.application.canvas.fingerprint as canvas_fp
import schrodinger.application.canvas.similarity as canvas_sim
from schrodinger import structure
from schrodinger.infra import canvas
from schrodinger.pipeline import pipeio
from schrodinger.pipeline import stage
from schrodinger.structure import StructureReader
from schrodinger.utils import log

FINGERPRINT_TYPES = canvas_fp.CanvasFingerprintGenerator.FINGERPRINT_TYPES
ATOM_TYPING_SCHEMES = canvas_fp.CanvasFingerprintGenerator.ATOM_TYPING_SCHEMES
SIMILARITY_METRICS = canvas_sim.CanvasFingerprintSimilarity.SIMILARITY_METRICS

CANVAS_2D_SCORE_PROP = "r_canvas_FP_Similarity"


[docs]class Canvas2DSimilarityStage(stage.Stage): """ This stage calculates a similarity score for each input ligand. The simularity score is based on Canvas 2D fingerprints, and the reference ligand is the "query" structure. This stage is used by Data Fusion workflow (data_fusion_backend.py). STAGE: Canvas 2D FingePrints stage Input 1: Query structure Input 2: Ligand structures to screen Output 1: Resulting structures with similarity scores """
[docs] def __init__(self, *args, **kwargs): """ Creates the stage instance, and passes the <args> and <kwargs> to the stage.Stage's constructor. """ metric_options_str = str(SIMILARITY_METRICS)[1:-1] fp_types_str = str(FINGERPRINT_TYPES)[1:-1] num_schemes = len(ATOM_TYPING_SCHEMES) specs = """ FINGERPRINT_TYPE = options(%s) SIMILARITY_METRIC = options(%s) ATOM_TYPING_SCHEME = integer(min=1, max=%i) """ % (fp_types_str, metric_options_str, num_schemes) stage.Stage.__init__(self, specs=specs, *args, **kwargs) # Input pin #1 is the query structure: self.addExpectedInput(1, "structures", True) # Input pin #2 are the input ligands: self.addExpectedInput(2, "structures", True) # Output pin #1 are the result ligands with scores: self.addExpectedOutput(1, "structures", True)
[docs] def operate(self): """ The only overridden & required method in this class. Called by the Pipeline to run this stage's main code. """ # Get the file for the input pin #1: query_files = self.getInput(1).getFiles() if len(query_files) != 1: raise RuntimeError( "This stage requires exactly one query structure file") query_file = query_files[0] if not os.path.isfile(query_file): msg = "Query structure file is missing: " + query_file raise RuntimeError(msg) # Get the file for the input pin #2: screen_files = self.getInput(2).getFiles() if len(screen_files) != 1: raise RuntimeError( "This stage requires exactly one screening structure file") fp_type = self["FINGERPRINT_TYPE"] sim_metric = self["SIMILARITY_METRIC"] typing_scheme = self["ATOM_TYPING_SCHEME"] lic = canvas.ChmLicenseShared() if not lic.isValid(): msg = "A Canvas license is required to run this workflow" self.exit(msg) # A logger is required to be passed to the Canvas classes: logger = log.get_output_logger("data_fusion:canvas:") log.default_logging_config() logger.setLevel(log.WARNING) fp_gen = canvas_fp.CanvasFingerprintGeneratorCLI(logger=logger) fp_gen.setType(fp_type) fp_gen.setAtomBondTyping(typing_scheme) fp_sim = canvas_sim.CanvasFingerprintSimilarityCLI(logger=logger) if sim_metric != "None": fp_sim.setMetric(sim_metric) st = structure.Structure.read(query_file) query_fp = fp_gen.generate(st) print("Query fingerprint: %s" % query_fp) hits_file = self.genOutputFileName(1, extension='.maegz') num_written = 0 with structure.StructureWriter(hits_file) as writer: for fname in screen_files: with StructureReader(fname) as reader: for st in reader: fp = fp_gen.generate(st) print("Structure %i fingerprint: %s" % (num_written + 1, fp)) try: sim = fp_sim.calculateSimilarity(query_fp, fp) except canvas_sim.CanvasSimilarityNotImplemented as err: # This similarity metric is not supported self.exit(str(err)) print(" Similarity: %s" % sim) st.property[CANVAS_2D_SCORE_PROP] = sim writer.append(st) num_written += 1 if num_written == 0: self.info("No hits were generated; returning empty output.") else: self.info("\nNumber of hits generated: %i" % num_written) # Set the output #1 of this stage to the hits file: self.setOutput(1, pipeio.Structures([hits_file], num_written))