Source code for schrodinger.application.steps.scorers

from rdkit import Chem

from schrodinger import stepper
from schrodinger.application.pathfinder import filtering
from schrodinger.models import parameters

from . import utils
from .basesteps import MolReduceStep
from .dataclasses import ScoredMol
from .dataclasses import ScorerMixin

try:
    from ligand_ml.smasher import Smasher
except:
    Smasher = None

INF = float('inf')


[docs]class PropertyScorer(ScorerMixin, MolReduceStep): """ Yield scored molecules where the score is the property value. The `property` in the settings is the descriptor name of the property, e.g., 'MVCorrMW' or 'r_rdkit_MolWt'. """
[docs] class Settings(parameters.CompoundParam): property: str
[docs] def validateSettings(self): mol = Chem.MolFromSmiles('C') prop = self.settings.property try: filtering.add_descriptors(mol, [prop], refs=[]) except KeyError: return [ stepper.SettingsError(self, f'"{prop}" is not a known property') ] return []
[docs] def reduceFunction(self, inputs): for mol in inputs: filtering.add_descriptors(mol, [self.settings.property], refs=[]) value = float(mol.GetProp(self.settings.property)) yield ScoredMol(mol=mol, score=value)
[docs]class LigandMLScorer(ScorerMixin, MolReduceStep): """ Yield scored molecules where the score is the value predicted by the model. The only setting is the required `ml_file`: the path to the ML qzip file. """
[docs] class Settings(parameters.CompoundParam): ml_file: stepper.StepperFile = None validate_model: bool = True
[docs] def validateSettings(self, what='ml_file'): if issues := utils.validate_file(self, what, required=True): return issues if not self.settings.validate_model: return [] if issue := utils.validate_smasher_file(self.settings.ml_file): return [stepper.SettingsError(self, issue)] return []
[docs] def reduceFunction(self, inputs): # For performance reasons, this step combines all inputs into a list # for the score prediction, but yields every scored molecule one by one. mols = list(inputs) with Smasher.load(self.settings.ml_file) as model: results = model.predict_on_mols(mols) for result, mol in zip(results, mols): yield ScoredMol(mol=mol, score=result[0])