Source code for schrodinger.application.desmond.new_fep_edge_data

import contextlib
import math
import re
from collections import namedtuple
from itertools import combinations
from itertools import zip_longest
from typing import Dict
from typing import Iterator
from typing import List
from typing import Optional
from typing import Tuple
from typing import Union
from functools import cached_property

import numpy
from scipy.interpolate import interp1d

from schrodinger import structure
from schrodinger.application.desmond import arkdb
from schrodinger.application.desmond import cms
from schrodinger.application.desmond import constants
from schrodinger.application.desmond import struc
from schrodinger.application.desmond import task
from schrodinger.application.desmond.constants import FEP_MAPPING
from schrodinger.application.desmond.constants import FEP_TYPES
from schrodinger.application.desmond.measurement import Measurement
from schrodinger.structure import Structure

# User friendly leg names: (complex, solvent, vacuum)
# FIXME: Do all the names make sense?
_LEG_NAMES = {
    FEP_TYPES.PROTEIN_STABILITY: ('Protein', 'Frag. Solvent', None),
    FEP_TYPES.PROTEIN_SELECTIVITY:
        ('ProteinA-ProteinB Complex', 'ProteinA', None),
    FEP_TYPES.COVALENT_LIGAND: ('Complex', 'Frag. Solvent', None),
    FEP_TYPES.SMALL_MOLECULE: ('Complex', 'Solvent', 'Vacuum'),
    FEP_TYPES.METALLOPROTEIN: ('Complex', 'Solvent', 'Vacuum'),
    FEP_TYPES.LIGAND_SELECTIVITY: ('Holo', 'Apo', None),
}

# F-string variables for defining keys to data in `arkdb.ArkDb`.
_ONE_ = "Keywords[i].ResultLambda{fep_lambda}.Keywords[i]"
_ALL_ = "Keywords[i].ResultLambda{fep_lambda}.Keywords[*]"

FepLegs = namedtuple("FepLegs", "complex solvent vacuum")
FepLambdas = namedtuple("FepLambdas", "ref mut")


[docs]class BlameSidError(Exception): # Sorry, Sid. :D pass
# Some utilities def _mean_stdev_samples(samples): """ Returns the mean and the standard deviation of samples and samples itself as a numpy array. """ samples = numpy.asarray(samples) return numpy.mean(samples), numpy.std(samples), samples def _mean_stdev_counts(samples): """ Creates a list of new samples by counting the number of elements in each elements of the original `samples`, and then call `_mean_stdev_samples` on the new samples. """ return _mean_stdev_samples(tuple(map(len, samples)))
[docs]def compare_mean_stdev_samples(a, b): """ This is a utility function for unittesting. Tests if `a` and `b` are the same. Both arguments should be results from `_mean_stdev_samples`. """ assert math.isclose(a[0], b[0], rel_tol=1E-6, abs_tol=1E-6) assert math.isclose(a[1], b[1], rel_tol=1E-6, abs_tol=1E-6) assert numpy.allclose(a[2], b[2], rtol=1E-6, atol=1E-6, equal_nan=True)
[docs]def compare_mean_stdev_samples_2(a, b): """ This is a utility function for unittesting. Tests if `a` and `b` are the same. But arguments should be a pair of results from `_mean_stdev_samples`. """ compare_mean_stdev_samples(a[0], b[0]) compare_mean_stdev_samples(a[1], b[1])
[docs]def compare_mean_stdev_samples_4(a, b): """ This is a utility function for unittesting. Tests if `a` and `b` are the same. But arguments should be a pair of results for both lambda states, and each pair contains two results from `_mean_stdev_samples` (usually corresponding to the complex and the solvent legs) """ complex_results, solvent_results = zip(a, b) compare_mean_stdev_samples_2(*complex_results) compare_mean_stdev_samples_2(*solvent_results)
# Decorators
[docs]def arg_premise(key_pattern: str): """ This decorator is to pass the `val` of the `task.Premise` object via the corresponding argument to the function. See its usage below. The `task.Premise` object is created with the given `key_pattern`, which will be evaluated to the real key for getting the datum from the database. :param key_pattern: This argument can contain f-string variables, e.g., `"Keywords[i].ResultLambda{fep_lambda}.Keywords[i].ProtLigInter.LigWatResult"`, where the `{fep_lambda}` part is a f-string variable. Currently, only `{fep_lambda}` is supported. Note this argument's value itself is a simple string (not a f-string). It will be converted to a f-string and evaluated on the fly by the `on_*` decorators (see the docstrings there for detail), where all supported f-string variables must be defined. """ def decorator(func): def wrapper(*args): premise = task.Premise(key_pattern) return func(*args, premise) return wrapper return decorator
[docs]def arg_option(key_pattern: str, default=None): """ Similar to `arg_premise`, except that the type is `task.Option` instead of `task.Premise`. :param key_pattern: See the docstring of `arg_premise`. :param default: The default value of the datum if the key is not in the database. """ def decorator(func): def wrapper(*args): option = task.Option(key_pattern, default) return func(*args, option) return wrapper return decorator
class _AidOffset: """ A class to instruct the `on_*` decorator (see below) to pass in the atom ID offset for the corresponding FEP leg and the lambda state. """ pass
[docs]def arg_aid_offset(func): """ This decorator is to pass the value of `aid_offset`, which depends on the specific leg, via the corresponding argument to the function. See its usage below. """ def wrapper(*arg): return func(*arg, _AidOffset) return wrapper
def _on_leg_lambda(func, fepdata, aid_offset, ana_db, fep_lambda, *args): """ Calls `func` with proper arguments. :type fepdata: FepData :type aid_offset: int :type ana_db: arkdb.ArkDb :type fep_lambda: 0 | 1 `*args` defines the arguments to call `func` with. Some elements of `args` are "pointer" kind of values: They point to where to get the actual values, but themselves are not the values. These kinds of arguments are of the following types: - _AidOffset - Actual value is the AID offset of the current leg. - task.Premise - Actual value is from `ana_db`. - task.Option - Actual value is from `ana_db`, or the defaul value if the datum is missing there. This function will get all values of the arguments and call `func` with them. """ # Evaluates key-patterns in `Premise` and `Option` objects to keys, # and then gets the keyed data from `ana_db`. func_args = [] for a in args: if isinstance(a, task.Premise): a = ana_db.get(eval(f"f'{a.key}'"), BlameSidError) elif isinstance(a, task.Option): a = ana_db.get(eval(f"f'{a.key}'"), a.val) elif a is _AidOffset: a = aid_offset func_args.append(a) # Finally, calls `func` with all the arguments prepared for it. return func(fepdata, *func_args) def _on_leg(func, fepdata, aid_offset, ana_db, *args): """ Calls `_on_leg_lambda` with both FEP lambda states. See docstring of `_on_leg_lambda` for detail. :type fepdata: FepData :type aid_offset: Tuple[int, int] :type ana_db: arkdb.ArkDb :rtype: 2-tuple :return: The results returned by `func` for FEP lambda states 0 and 1, respectively. """ result0 = _on_leg_lambda(func, fepdata, aid_offset[0], ana_db, 0, *args) result1 = _on_leg_lambda(func, fepdata, aid_offset[1], ana_db, 1, *args) return result0, result1
[docs]def on_complex_lambda_0(func): """ Calls `func` with complex leg and FEP lambda state 0. """ def wrapper(fepdata, *args): return _on_leg_lambda(func, fepdata, fepdata._aid_offset.complex[0], fepdata._ana.complex, 0, *args) return wrapper
[docs]def on_complex(func): """ Calls `func` with complex leg. The FEP lambda state is irrelevant. """ return on_complex_lambda_0(func)
[docs]def on_complex_lambdas(func): """ Calls `func` with complex leg and both FEP lambda states 0 and 1. :rtype: 2-tuple :return: The results returned by `func` for FEP lambda states 0 and 1, respectively. """ def wrapper(fepdata, *args): return _on_leg(func, fepdata, fepdata._aid_offset.complex, fepdata._ana.complex, *args) return wrapper
[docs]def on_complex_solvent_lambda_0(func): """ Calls `func` with both complex and solvent legs and with FEP lambda state 0. :rtype: 2-tuple :return: The two elements are for the complex and solvent legs, respectively. """ def wrapper(fepdata, *args): ana_c, ana_s, _ = fepdata._ana aid_offset_c, aid_offset_s, _ = fepdata._aid_offset result_c = _on_leg_lambda(func, fepdata, aid_offset_c, ana_c, 0, *args) result_s = _on_leg_lambda(func, fepdata, aid_offset_s, ana_s, 0, *args) return result_c, result_s return wrapper
[docs]def on_complex_solvent(func): """ Calls `func` with both complex and solvent legs. `func` does NOT require a specification of the FEP lambda state. In most cases, the decorated property will return a 2-tuple. The elements correspond to the complex and the solvent legs. In these cases, the lambda states are irrelevant. In some other cases, the property will return four values for the following leg/lambda-state combinations: complex/lambda0, complex/lambda1, solvent/lambda0, and solvent/lambda1; in these case, the values will be rearranged into the `lambda0(complex, solvent), lambda1(complex, solvent)` format to be consistent with properties decorated by `on_complex_solvent_lambdas`. :rtype: 2-tuple """ def wrapper(fepdata, *args): ana_c, ana_s, _ = fepdata._ana aid_offset_c, aid_offset_s, _ = fepdata._aid_offset result_c = _on_leg_lambda(func, fepdata, aid_offset_c, ana_c, 0, *args) result_s = _on_leg_lambda(func, fepdata, aid_offset_s, ana_s, 0, *args) if isinstance(result_c, FepLambdas): # If the results are `FepLambdas` instances, we rearrange them into the # `lambda0(complex, solvent), lambda1(complex, solvent)` format to be # consistent with the `on_complex_solvent_lambdas` decorator. return (result_c.ref, result_s.ref), (result_c.mut, result_s.mut) return result_c, result_s return wrapper
[docs]def on_complex_solvent_lambdas(func): """ Calls `func` with both complex and solvent legs and with both FEP lambda states 0 and 1. :rtype: Tuple[2-tuple, 2-tuple] :return: The two elements are for FEP lambda states 0 and 1. Each element is a 2-tuple of the results by `func` for the complex and solvent legs, respectively. """ def wrapper(fepdata, *args): ana_c, ana_s, _ = fepdata._ana aid_offset_c, aid_offset_s, _ = fepdata._aid_offset result_c = _on_leg(func, fepdata, aid_offset_c, ana_c, *args) result_s = _on_leg(func, fepdata, aid_offset_s, ana_s, *args) return tuple(zip(result_c, result_s)) return wrapper
# Data classes
[docs]class ResData(namedtuple("ResData", "molnum chain name number commonname")): """ A class to store the molecule number, chain, name, and number of a residue """ _NAME_TO_COMMONNAME = { 'HIE': 'HIS', 'HID': 'HIS', 'HIP': 'HIS', 'CYX': 'CYS', 'CYM': 'CYS', 'LYN': 'LYS', 'ARN': 'ARG', 'ASH': 'ASP', 'GLH': 'GLU' }
[docs] @staticmethod def make(res: structure._Residue) -> "ResData": molnum = res.molecule_number molnum = res.atom[1].property.get(constants.ORIGINAL_MOLECULE_NUMBER, molnum) resname = res.pdbres.strip() return ResData(molnum, res.chain.strip() or '_', resname, res.resnum, ResData._NAME_TO_COMMONNAME.get(resname, resname))
@property def fullname(self) -> str: """ Return a string formatted as "<chain>:<resname>_<resnum>". """ return "%s:%s_%d" % (self.chain, self.commonname, self.number) @property def name_num(self) -> str: """ Return a string formatted as "<resname>_<resnum>". """ return "%s_%d" % (self.commonname, self.number)
# FIXME: Put the `TorsionPlot` class in a proper module? Putting it here will # make a dependency on `matplotlib` (which may in turns depend on some # X-Window backend). # # class TorsionPlot: # BAR_THEME_FEP_SOLVENT = { # "density" : 1, # "histtype" : "bar", # "edgecolor" : "grey", # "facecolor" : "white", # "alpha" : 0.9, # "hatch" : "\\\\" # } # BAR_THEME_FEP_COMPLEX = {**THEME_FEP_SOLVENT, # "facecolor" : "grey", # "alpha" : 0.75, # "hatch" : "//" # } # def __init__(self, for_print=False): # self._for_print = for_print # self._bins = [-180 + x * 10 for x in range(37)] # self._num_yticks = 3 # self._axes = matplotlib.axes.Axes() # self._axes2 = self._axes.twinx() # self._axes2.set_xlim(-200, 200) # self._axes2.set_xticks([-180, -90, 0, 90, 180]) # self._axes2.xaxis.set_ticks_position("bottom") # self._axes2.get_xaxis().set_visible(True) # self._axes2.yaxis.set_ticks_position("right") # self._axes.set_yticklabels([]) # self._axes.get_yaxis().set_visible(False) # self._axes.get_xaxis().set_visible(False) # def plot(self, # pot: TorsionPotential, # bar_theme: dict, # lin_theme: dict=None) -> matplotlib.Axes: # if pot.energy_profile is None: # return None # self._axes.hist(pot.energy_profile, self._bins, **bar_theme) # pot_max = 1.05 * pot.max # pot_ticks = numpy.linspace(0, pot_max, self._num_yticks) # if len(set(pot.energy_profile)) == 1: # self._axes2.set_ylim(-1, 1) # self._axes2.set_yticklabels(["", "", 0.0, "", ""]) # else: # self._axes2.set_ylim(0, pot_max) # self._axes2.set_yticks(pot_ticks) # self._axes2.set_yticklabels(["%.2f" % x for x in pot_ticks]) # for tick in self._axes2.get_yticklabels(): # tick.set_color('#0C2C84') # lin_theme = lin_theme or {} # self._axes2.plot(self._bins, pot.energy_profile, '-', color='#0C2C84', # **lin_theme) # if self._for_print: # for tick in self._axes.get_xticklabels(): # tick.set_fontsize(7) # change_plot_colors(self._axes2, spines=False) # change_plot_colors(self._axes) # for tick in self._axes2.get_yticklabels(): # tick.set_fontsize(6) # tick.set_color('#0C2C84') # return self._axes
[docs]class TorsionPotential:
[docs] def __init__(self, raw_data: List[float], a1, a2, a3, a4): self._profile = numpy.array(raw_data) self._profile -= min(self._profile) self._aids = (a1, a2, a3, a4)
def __str__(self): return "Torsion_" + "-".join(map(str, self._aids)) @property def energy_profile(self) -> numpy.ndarray: """ Returns energy profile that's offset to zero. """ return self._profile @property def max(self) -> float: """ Returns the maximum value of the energy profile. """ return max(self._profile) @property def aids(self): return self._aids
[docs]class FepData: """ A data scraper class to extract and reorganize from FEP simulations and raw SID analysis results. """ # Notes for developers: # - All nontrivial properties are cached by default. If you want to delete # the cached value, simply use `del` like `del a.atom_mapping`. # - Defines the data that a property/function needs from the database using # the `arg_*` decorators: `arg_premise`, `arg_option`, `arg_aidoffset`. # There are plenty of examples below, and the usage of these decorators # should be self-explaining enough. Also see docstrings of `task.Premise` # and `task.Option` if you are not familiar with these classes. The use of # these classes makes the data dependency much more explict. The # decorators save a lot of biolerplate code in checking data # availabilities, logging errors, and iterating over FEP legs and lambda # states. # - The order of the decorators matters. On the top is usually # `@cached_property` if you are defining a property, [of course, it's not # required if it's a simple method], followed by `@arg_*`, where the order # of the decorators should match that of the arguments of the function: # higher ones match those more to the left. If you have other arguments # than the correspondences to the `@arg_*` decorators, they should be the # leftmost arguments. The bottom decorator should be one of `@on_*`'s. # - To get a sense of typical data structure in SID analysis results: # Keywords = [ # {FEPSimulation = { # <key-value-pairs> # } # } # {LigandInfo = { # <key-value-pairs> # } # } # {LigandInfo = { # Another `LigandInfo` field # <key-value-pairs> # } # } # {ResultLambda0 = { # Keywords = [ # Can ResultLambda0's value directly be a list so # # that we can git rid of this `Keywords` map? # {RMSD = { # <key-value-pairs> # } # } # {ProtLigInter = { # LigWatResult = [ # # Column #1: Frame index # # Column #2: Fragment ID # # Column #3: Atom index of the water molecule's oxygen # # Column #4: Distance between oxygen and COM of fragment # [[0 L-FRAG_3 913 2.64203 ] # [0 L-FRAG_1 932 5.7398 ] # ] # # [[1 L-FRAG_1 243 4.89517 ] # [1 L-FRAG_3 460 4.42454 ] # [1 L-FRAG_1 771 5.72302 ] # ] # # This list is a time series, each to a frame in trajectory. # ] # } # ] # } # {ResultLambda1 = { # # Similar to ResultLambda0 # } # ]
[docs] def __init__(self, fep_type: str, complex_model: cms.Cms, complex_analysis_results: arkdb.ArkDb, solvent_analysis_results: arkdb.ArkDb, vacuum_analysis_results: Optional[arkdb.ArkDb] = None): self._fep_type = fep_type self._complex_model = complex_model component = struc.component_structures(self._complex_model) self._receptor = component.receptor self._ref = component.fep_ref self._mut = component.fep_mut self._ana = FepLegs(complex_analysis_results, solvent_analysis_results, vacuum_analysis_results) # Assumptions and non-assumptions: # 1. We assume the ordering of the `fep_ref` and the `fep_mut` component # structures in the CMS file is always the same in all legs. IOW, if # `fep_ref` is before `fep_mut` in the complex leg, that should be # the case also in the solvent and the vacuum legs. # 2. In the solvent leg, the `fep_ref` and the `fep_mut` component # structures are the first components in the CMS file. # 3. We do NOT assume the exact ordering of the two component # structures. IOW, `fep_ref` may be before or after `fep_mut`. comp_sts = complex_model.comp_ct i_ref_st = comp_sts.index(self._ref) i_mut_st = comp_sts.index(self._mut) complex_aid_offset = ( sum(comp_sts[i].atom_total for i in range(i_ref_st)), sum(comp_sts[i].atom_total for i in range(i_mut_st))) solvent_aid_offset = ( (0, self._ref.atom_total) if i_ref_st < i_mut_st else \ (self._mut.atom_total, 0) ) # For each leg, the aid_offset is always (for-lambda-0, for-lambda-1). self._aid_offset = FepLegs(complex_aid_offset, solvent_aid_offset, solvent_aid_offset)
@property def fep_type(self): return self._fep_type @property def leg_names(self): # FIXME: This used to be two properties: `leg1_name` (solvent) and # `leg2_name` (complex). return _LEG_NAMES[self.fep_type] @property def complex_model(self): return self._complex_model @property def receptor(self): return self._receptor @property def ref(self): return self._ref @property def mut(self): return self._mut @cached_property def atom_mapping(self) -> List[Tuple[int, int]]: """ Returns the AID (atom ID) pairs of the mapped atoms. Each element is a pair of AIDs, where the first one is of the mutant and the second of the reference. The AIDs are local with respect to the alchemical structures. """ try: return list((atom.property[FEP_MAPPING], atom.index) for atom in self.ref.atom) except KeyError: return list((atom.index, atom.property[FEP_MAPPING]) for atom in self.mut.atom) @cached_property @arg_premise(f"{_ONE_}.ProtLigInter.LigWatResult") @on_complex_lambdas def ligand_wateraround_counts(self, dat): """ Returns number of water molecules around the ligand molecule. """ return _mean_stdev_counts(dat) @cached_property @arg_premise(f"{_ONE_}.ProtLigInter.DictLigandFragment") @arg_aid_offset @on_complex_lambdas def ligand_fragments(self, frags: List[Tuple[str, List[int]]], aid_offset) \ -> Dict[str, List[int]]: """ Example `frags` :: DictLigandFragment = [ [L-FRAG_0 [4760 4761 4762 4763 4764 4765 4766 4767 6267 6268 ]] [L-FRAG_1 [4768 6278 ]]] FIXME: Doc on what the str and list of int are about? """ # Converts `frags` to a `dict` object: `Dict[str, List[int]]`. frags = dict(frags) for aids in frags.values(): for i, aid in enumerate(aids): aids[i] -= aid_offset return frags @arg_option(f"{_ALL_}.Torsion", ()) @arg_aid_offset @on_complex_solvent_lambdas def _get_torsion_potentials(self, torsions, aid_offset): result = [] for e in torsions: with contextlib.suppress(KeyError): a1 = e["a1"] - aid_offset a2 = e["a2"] - aid_offset a3 = e["a3"] - aid_offset a4 = e["a4"] - aid_offset profile = e["RBPotential"] result.append(TorsionPotential(profile, a1, a2, a3, a4)) return result @cached_property def ligand_torsions(self) -> \ (Iterator[Tuple[TorsionPotential, TorsionPotential]], Iterator[Tuple[TorsionPotential, TorsionPotential]]): """ Returns two iterators for lambda states 0 and 1, respectively. Each iterator is over a list of 2-tuples of torsion potentials respectively for the complex and solvent legs. """ (complex0, solvent0), (complex1, solvent1) = \ self._get_torsion_potentials() # For ligand-selectivity FEP, `solvent0` and `solvent1` are empty. if (self.fep_type != FEP_TYPES.LIGAND_SELECTIVITY and (len(solvent0) != len(complex0) or len(solvent1) != len(complex1))): raise BlameSidError( "Numbers of rotatable torsions are different " "in SID analyses of complex and solvent legs. Lambda0: " "%d vs %d; lambda1: %d vs %d" % (len(complex0), len(solvent0), len(complex1), len(solvent1))) return zip_longest(complex0, solvent0), zip_longest(complex1, solvent1) @cached_property @arg_option(f"{_ALL_}.Torsion", ()) @on_complex_solvent_lambdas def ligand_torsion_strain(self, torsions): """ Calculates and returns a time series of strains for each torsion. """ angles = list(range(-180, 181, 10)) torsion_potentials = [] for t in torsions: torsion_timeseries = t["Result"] potential_profile = t.get("RBPotential", None) if potential_profile is None: torsion_potentials.append([0.0] * len(torsion_timeseries)) continue # FIXME: We assume `angles` and `potential_profile` match # each other. ene_func = interp1d(angles, potential_profile, kind="cubic") energies = [ene_func(e) for e in torsion_timeseries] torsion_potentials.append(energies) # `torsion_potentials` is a list of time series of torsion potentials # for all torsions. We then sum up all torsions' potentials for each # time point to get a time series of total torsion potentials. ts_total_torsion_potential = numpy.array(torsion_potentials).sum(axis=0) return _mean_stdev_samples(ts_total_torsion_potential) @cached_property @arg_premise(f"{_ONE_}.LigandHBonds.Result") @on_complex_solvent_lambdas def ligand_intrahbond_counts(self, lig_hbonds): return _mean_stdev_counts(lig_hbonds) @cached_property @arg_premise(f"{_ONE_}.SA_Surface_Area.Result") @on_complex_solvent_lambdas def ligand_sasa(self, sasa): return _mean_stdev_samples(sasa) @cached_property @arg_premise(f"{_ONE_}.Molecular_Surface_Area.Result") @on_complex_solvent_lambdas def ligand_molecular_sa(self, molsa): return _mean_stdev_samples(molsa) @cached_property @arg_premise(f"{_ONE_}.Polar_Surface_Area.Result") @on_complex_solvent_lambdas def ligand_polar_sa(self, polar_sa): return _mean_stdev_samples(polar_sa) @cached_property @arg_premise(f"{_ONE_}.Rad_Gyration.Result") @on_complex_solvent_lambdas def ligand_gyration_radius(self, gyration): return _mean_stdev_samples(gyration) @cached_property @arg_premise(f"{_ALL_}.RMSD") @on_complex_solvent_lambdas def ligand_rmsd(self, rmsds): rmsd = arkdb.expect_single_datum(rmsds, BlameSidError("RMSD"), SelectionType="Ligand", FitBy=arkdb.DSC.NO_VALUE) return _mean_stdev_samples(rmsd["Result"]) @cached_property @arg_premise(f"{_ALL_}.RMSD") @on_complex_lambdas def ligand_rmsd_wrt_protein(self, rmsds): rmsd = arkdb.expect_single_datum(rmsds, BlameSidError("RMSD"), SelectionType="Ligand", FitBy=arkdb.DSC.ANY_VALUE)["Result"] return _mean_stdev_samples(rmsd) @cached_property @arg_premise(f"{_ONE_}.SecondaryStructure.Result") @on_complex_lambdas def protein_secondary_structure(self, ss: List[str]): """ :param ss: The value of `ss` is like this:: ["0.1.2.0.0.0", "0.1.1.0.0.0", "0.1.2.1.0.0",] Each element in the list corresponds to a trajectory frame. The numbers are the secondary-structure codes:: "-1: NONE; 0: LOOP; 1: HELIX; 2: STRAND; 3:TURN" :return: (helix-runs, strand-runs) """ ss = numpy.asarray(ss) def find_runs(ts, ss_code): CUTOFF = 0.7 average = numpy.mean(ts == ss_code, axis=0) ss = average >= CUTOFF # We convert the boolean sequence to a binary-digit string and then # we can employ regular expression to find the spans. bs = "".join("%d" % e for e in ss) # Finds all runs of '1' char. We allow gaps of a single '0'. return [m.span() for m in re.finditer(r"1+0?1+", bs)] return find_runs(ss, 1), find_runs(ss, 2) @cached_property def receptor_residue_sequence(self) -> List[ResData]: """ Return the residue sequence from N- to C-end. This property assumes that there is only one receptor structure. """ assert isinstance(self.receptor, Structure) return [ ResData.make(r) for r in structure.get_residues_by_connectivity(self.receptor) ] @cached_property @arg_premise(f"{_ALL_}.RMSF") @on_complex_lambda_0 def receptor_residue_b_factor(self, rmsfs) -> List[float]: """ Returns per-residue B factors for the receptor molecule. """ # FIXME: Should this be for both lambda states? But the old code only # returns the result for lambda 0. # After discussing with Dima, I decided to keep the code as is. # The assumption is that the B-factor will always be taken from # the experimental results, instead of being calculated on the # fly, and as such the result is indpendent on the simulations. # We will revisit this code if the assumption breaks. rmsf = arkdb.expect_single_datum(rmsfs, BlameSidError("RMSF"), SelectionType="Backbone") b_factor = rmsf and rmsf.get("BFactorResult") if not b_factor: # FIXME: Calculating the B-factor on the fly is essentially to # get the average B-factor values over the evenly weighted # atomic B-factors for each residue. The atomic B-factors are # obtained from the atom property 'r_m_pdb_tfactor'. raise NotImplementedError( "Calculating residue B-factors on the fly is not implemented") @cached_property @arg_premise(f"{_ALL_}.RMSD") @on_complex_lambdas def receptor_backbone_rmsd(self, rmsds): return arkdb.expect_single_datum(rmsds, BlameSidError("RMSD"), SelectionType="Backbone")["Result"] @cached_property @arg_premise(f"{_ALL_}.RMSF") @on_complex_lambdas def receptor_backbone_rmsf(self, rmsfs): rmsf = arkdb.expect_single_datum(rmsfs, BlameSidError("RMSF"), SelectionType="Backbone") if 'ProteinResidues' in rmsf: return rmsf['Result'] # 'ProteinResidues' is always expected. The following is to calculate # RMSF on the fly, for backward compatibility. aids = rmsf["AtomIndices"] aid_set = set(aids) atomic_rmsf = rmsf["Result"] residue_rmsf = [] for res in structure.get_residues_by_connectivity(self.receptor): res_aids = set(map(int, res.atom)) & aid_set indices = (aids.index(e) for e in res_aids) residue_atomic_rmsf = list(atomic_rmsf[i] for i in indices) if residue_atomic_rmsf: residue_rmsf.append(numpy.mean(residue_atomic_rmsf)) else: residue_rmsf.append(0) return residue_rmsf @staticmethod def _remove_atom_tag(res_tag: str) -> str: """ Given a residue tag of the form "<chain>:<resname>_<resnum>[:<atom-tag>]", returns a substring with the ":<atom-tag>" part (if exists) removed. """ # If the atom part exists, it must have a leading ":" at least 5 chars # away from the beginning of the string. index_of_second_colon = res_tag.find(":", 5) return (res_tag + "_")[:index_of_second_colon] @cached_property @arg_option(f"{_ONE_}.ProtLigInter.DictProteinHbond") @arg_option(f"{_ONE_}.ProtLigInter.DictProteinHydrophobic") @arg_option(f"{_ONE_}.ProtLigInter.DictProteinIonic") @arg_option(f"{_ONE_}.ProtLigInter.DictProteinPi") @arg_option(f"{_ONE_}.ProtLigInter.DictProteinWaterBridge") @on_complex_lambdas def residues_around_ligand(self, *args): """ Returns a list of residues around the ligand molecule. Each residue is a string of the form: "<chain>:<resname>_<resnum>", and the list is sorted by the chain and the resnum. """ # FIXME: This property used to be called # `receptor_residues_interaction_ligand1` and # `receptor_residues_interaction_ligand2`. # Each element of `args` is either `None` or a list of lists. Each # element list is either empty or having two elements # `[res_tag, <atom-indices>]`. We take out all `res_tag` and put them # into a set. Note that a `res_tag` *may* have an extra atom part, e.g., # the ":O" part in "H:GLY_219:O", which should be removed. res_tags = next(zip(*sum(filter(None, args), []))) res_tags = set(map(self._remove_atom_tag, res_tags)) # Example value: set(["H:ILE_174", "H:TRP_60", ...]) # Sorts by (<chain>, <resnum>, <resname>), and returns the result. res_tags = ((re.split(":|_", tag) + [tag]) for tag in res_tags) return [ e[-1] for e in sorted(res_tags, key=lambda x: (x[0], int(x[2]), x[1])) ] @cached_property def residues_around_ligand_both_lambda_states(self): """ Returns a list of residues around the ligand molecule in both lambda states. Eech residue is a string of the form: "<chain>:<resname>_<resnum>", and the list is sorted by the chain and the resnum. """ # FIXME: This property used to be called `cpx_sid_protein_residues`. res_tags = set(sum(self.residues_around_ligand, [])) # Sorts by (<chain>, <resnum>, <resname>), and returns the result. return [ e[-1] for e in sorted((re.split(":|_", tag) + [tag] for tag in res_tags), key=lambda x: (x[0], int(x[2]), x[1])) ] @cached_property @arg_option(f"{_ONE_}.ProtLigInter.HBondResult") @arg_option(f"{_ONE_}.ProtLigInter.HydrophobicResult") @arg_option(f"{_ONE_}.ProtLigInter.PiCatResult") @arg_option(f"{_ONE_}.ProtLigInter.PiPiResult") @arg_option(f"{_ONE_}.ProtLigInter.PolarResult") @arg_option(f"{_ONE_}.ProtLigInter.WaterBridgeResult") @on_complex_lambdas def similarity_matrix_by_protein_ligand_interactions(self, *args): # FIXME: This property used to be called # `pl_interaction_similarity_matrix0` and # `pl_interaction_similarity_matrix1`. # Each element of `args` is either `None` or a list of lists, where each # element list is either empty or a list of lists. For example: # # HBondResult = [ # [[0 "H:SER_214:O" a-b "L-FRAG_2:HN2" ] # [0 "H:GLY_216:O" a-b "L-FRAG_3:HN1" ] # [0 "H:GLY_216:H" d-b "L-FRAG_1:O32" ] # ] # [[1 "H:GLY_216:O" a-b "L-FRAG_3:HN1" ] # [1 "H:GLY_216:H" d-b "L-FRAG_1:O32" ] # ] # ... # ] # HydrophobicResult = [ # [[0 "H:TYR_60" L-FRAG_0 ] # [0 "H:ILE_174" L-FRAG_1 ] # ] # [] # [] # [[3 "H:TRP_60" L-FRAG_0 ] # ] # ... # ] # # So each innermost list is basically # [frame-index, residue-tags, <other-stuff>], where the frame-index is # zero based. args = list(filter(None, args)) if not args: raise BlameSidError( "No protein-ligand interaction data in " "the complex-leg SID file to calculate the similarity matrix") # FIXME: The following code for calculating similarity matrix is also # used by `process_fep_traj` module. Refactor it out? # Creates a "contact" matrix: # rows = frames # columns = residues # value = True if residue interacts with the ligand, or False if not. residues = self.residues_around_ligand_both_lambda_states num_frames = len(args[0]) num_residues = len(residues) contact_matrix = numpy.zeros([num_frames, num_residues], dtype=bool) for contacts_time_series in args: for i_frame, contacts in enumerate(contacts_time_series): for contact in contacts: res_tag = self._remove_atom_tag(contact[1]) i_residue = residues.index(res_tag) contact_matrix[i_frame, i_residue] = True # Tanimoto similarity similarity_matrix = numpy.eye(num_frames) for i, j in combinations(range(num_frames), 2): contact_i = contact_matrix[i] contact_j = contact_matrix[j] num_total_contacts = sum(contact_i | contact_j) if num_total_contacts: num_common_contacts = sum(contact_i & contact_j) similarity_matrix[i, j] = similarity_matrix[j, i] = \ num_common_contacts / float(num_total_contacts) return similarity_matrix @cached_property @arg_premise("Keywords[i].ResultLambda{fep_lambda}.TrajectoryInterval_ps") @on_complex_solvent_lambda_0 def trajectory_intervals(self, traj_interval_in_ps: float) -> float: """ Returns the trajectory intervals in ns for the corresponding leg. If the interval was not recorded in the database, `None` is returned. """ # FIXME: This property used to be called # `cpx_sid_trajectory_interval_ns` and # `sol_sid_trajectory_interval_ns`. return traj_interval_in_ps / 1000.0 @cached_property @arg_premise("Keywords[i].ResultLambda{fep_lambda}.TrajectoryNumFrames") @on_complex_solvent_lambda_0 def num_trajectory_frames(self, num_frames: int) -> int: """ Returns the number of trajectory frames for the corresponding leg. If the data was not recorded in the database, `None` is returned. """ # FIXME: This property used to be called # `cpx_sid_number_of_frames` and # `sol_sid_number_of_frames`. return num_frames @cached_property def trajectory_timelines(self) -> Tuple[numpy.ndarray, numpy.ndarray]: """ Returns two 1-D numpy arrays containing the trajectory-frames' times for the complex and the solvent legs, respectively. CAVEAT: This relies on a not-super-safe assumption: The trajectory frames are always recorded with a start at 0 and a uniform interval. """ # FIXME: This property used to be called # `cpx_sid_snashot_times_ps` and `cpx_sid_snashot_times_ps`, where # the `_ps` part is apparently a misnomer because the code really # returns times in ns. interval_c, interval_s = self.trajectory_intervals num_frames_c, num_frames_s = self.num_trajectory_frames # yapf: disable return (numpy.linspace(0, interval_c * num_frames_c, num=num_frames_c, endpoint=False), numpy.linspace(0, interval_s * num_frames_s, num=num_frames_s, endpoint=False)) # yapf: enable @staticmethod def _dg_time_function_impl(dg: List[float], err: List[float], start_time: float, end_time: float) -> Tuple[float, Measurement]: """ Returns a time series for the given dG data. """ time = numpy.linspace(start_time, end_time, len(dg), endpoint=False) return [(t, Measurement(d, e)) for t, d, e in zip(time, dg, err)] @cached_property @arg_premise("Keywords[i].dGSliding.dG") @arg_premise("Keywords[i].dGSliding.dG_error") @arg_premise("Keywords[i].dGSliding.StartTime_ns") @arg_premise("Keywords[i].dGSliding.EndTime_ns") @on_complex_solvent def dg_sliding_time_function(self, dg: List[float], err: List[float], start_time: float, end_time: float) -> Tuple[float, Measurement]: """ Returns the sliding dG as a function of time. """ # FIXME: This used to be called: # ((cpx_delta_g_sliding, cpx_delta_g_sliding_err), # (sol_delta_g_sliding, sol_delta_g_sliding_err)) return self._dg_time_function_impl(dg, err, start_time, end_time) @cached_property @arg_premise("Keywords[i].dGReverse.dG") @arg_premise("Keywords[i].dGReverse.dG_error") @arg_premise("Keywords[i].dGReverse.StartTime_ns") @arg_premise("Keywords[i].dGReverse.EndTime_ns") @on_complex_solvent def dg_reverse_time_function(self, dg: List[float], err: List[float], start_time: float, end_time: float) -> Tuple[float, Measurement]: """ Returns the reverse dG as a function of time. """ # FIXME: This used to be called: # ((cpx_delta_g_reverse, cpx_delta_g_reverse_err), # (sol_delta_g_reverse, sol_delta_g_reverse_err)) return self._dg_time_function_impl(dg, err, start_time, end_time) @cached_property @arg_premise("Keywords[i].dGForward.dG") @arg_premise("Keywords[i].dGForward.dG_error") @arg_premise("Keywords[i].dGForward.StartTime_ns") @arg_premise("Keywords[i].dGForward.EndTime_ns") @on_complex_solvent def dg_forward_time_function(self, dg: List[float], err: List[float], start_time: float, end_time: float) -> Tuple[float, Measurement]: """ Returns the forward dG as a function of time. """ # FIXME: This used to be called: # ((cpx_delta_g_forward, cpx_delta_g_forward_err), # (sol_delta_g_forward, sol_delta_g_forward_err)) return self._dg_time_function_impl(dg, err, start_time, end_time) @cached_property @arg_premise("Keywords[i].dGForward.Bennett.dG") @arg_premise("Keywords[i].dGForward.Bennett.Bootstrap_std") @arg_premise("Keywords[i].dGForward.Bennett.Analytical_std") @on_complex_solvent def dg(self, dg: float, bootstrap_err: float, analytical_err: float) \ -> Tuple[float, float, float]: """ Returns the dG, the boostrap error, and the analytical error. """ # FIXME: This used to be called: # ((cpx_delta_g_forward_dg, cpx_delta_g_forward_bootstrap_std, # cpx_delta_g_forward_analytical_std), # (sol_delta_g_forward_dg, sol_delta_g_forward_bootstrap_std, # sol_delta_g_forward_analytical_std)) return dg, bootstrap_err, analytical_err @cached_property def ddg(self) -> Measurement: """ Returns the ddG = dG_complex - dG_solvent. """ # FIXME: Old property name: delta_delta_g # FIXME: The old code has some logic for dealing with corrected solvent # dG, which is passed in as an `__init__` argument. Unsure what # the best way is in the new code. dg_c, dg_s = self.dg return Measurement(dg_c[0], dg_c[1]) - Measurement(dg_s[0], dg_s[1]) @cached_property @arg_premise("Keywords[i].dGForward.Bennett.dF_Per_Replica") @on_complex_solvent def df_per_window(self, df: List[List[float]]) \ -> List[Tuple[float, float, float]]: """ Returns the dF's and the corresponding boostrap and analytical errors for all neighboring lambda windows. """ # FIXME: This used to be called: # cpx_delta_g_forward_df_per_replica, and # sol_delta_g_forward_df_per_replica, where the `_replica` part is # a misnomer -- it should really be `_window`. return [tuple(e) for e in df] @cached_property @arg_premise("Keywords[i].ProteinInfo.FormalCharge") @on_complex def receptor_charge(self, charge: Union[int, float]) -> float: return float(charge) @cached_property @arg_premise("Keywords[i].ProteinInfo.NumberHeavyAtoms") @on_complex def receptor_num_heavy_atoms(self, num: int) -> int: # FIXME: Old property name: receptor_total_heavy return num @cached_property @arg_premise("Keywords[i].ProteinInfo.NumberAtoms") @on_complex def receptor_num_atoms(self, num: int) -> int: # FIXME: Old property name: receptor_total_atom return num @property def receptor_title(self) -> str: return self.receptor.title.strip() or "N/A" @cached_property @arg_premise("Keywords[i].ProteinInfo.ChainsResidues") @arg_premise("Keywords[i].ProteinInfo.ChainsNames") @on_complex def receptor_num_residues_per_chain(self, num_residues_per_chains: List[int], chain_names: List[str]) \ -> Dict[str, int]: """ Returns a `dict`: key = chain name, value = number of residues in chain """ # FIXME: Old property names: # receptor_total_residues_in_chains, receptor_chain_names return dict(zip(chain_names, num_residues_per_chains)) @cached_property @arg_premise("Keywords[i].ProteinInfo.NumberResidues") @on_complex def receptor_num_residues(self, num_residues: int) -> int: """ Returns the total number of residues in the receptor. """ # FIXME: Why get this from database instead of the receptor structure? # FIXME: Old property name: receptor_total_residues return num_residues @cached_property @arg_premise("Keywords[*].LigandInfo.NumberRotatableBonds") @on_complex def ligand_num_rotatable_bonds(self, nums: List[int]) -> Tuple[int, int]: """ Returns the numbers of rotatable bonds for the reference and the mutant structures, respectively. """ # FIXME: Old property names: # ligand1_total_rot_bonds, ligand2_total_rot_bonds # Notes: # - The data are available in both the solvent and the complex legs' # analysis databases. The values should be the same. # - We expect two numbers, and the ordering matters: The first # corresponds to the `ref` structure, and the second the `mut`. assert 2 == len(nums) return tuple(nums) @cached_property @arg_premise("Keywords[*].LigandInfo.NumberFragments") @on_complex def ligand_num_fragments(self, nums: List[int]) -> Tuple[int, int]: """ Returns the numbers of fragments of the reference and the mutant structures, respectively. """ # FIXME: Old property names: # ligand1_total_fragments, ligand2_total_fragments assert 2 == len(nums) return tuple(nums) @cached_property @arg_premise("Keywords[*].LigandInfo.MolecularFormula") @on_complex def ligand_molecular_formula(self, molecular_formulas: List[str]) \ -> Tuple[str, str]: """ Returns the molecular formulas of the reference and the mutant structures, respectively. """ # FIXME: Old property names: # ligand1_mol_formula, ligand2_mol_formula assert 2 == len(molecular_formulas) return tuple(molecular_formulas) @cached_property @arg_premise("Keywords[*].LigandInfo.Charge") @on_complex def ligand_charge(self, charges: List[Union[int, float]]) \ -> Tuple[float, float]: """ Returns the total charges of the reference and the mutant structures, respectively. """ # FIXME: Old property names: # ligand1_charge, ligand2_charge # FIXME: Why get this from database instead of `ref` and `mut`? assert 2 == len(charges) return float(charges[0]), float(charges[1]) @cached_property @arg_option("Keywords[*].LigandInfo.AlchemicalSolvent") @on_complex def alchemical_solvent(self, alchem_solvent): # FIXME: Old property names: # ligand1_alchemical_mols, ligand2_alchemical_mols # FIXME: What type is the element of `alchem_solvent`? if alchem_solvent: assert 2 == len(alchem_solvent) return tuple(alchem_solvent) return None @cached_property @arg_option("Keywords[*].LigandInfo.AlchemicalSolventAtoms") @on_complex def num_atoms_alchemical_solvent(self, num_atoms: Optional[List[int]]) \ -> Tuple[int, int]: """ Returns the numbers of atoms of the achemical solvent molecules in the reference and the mutant structures, respectively. """ # FIXME: Old property names: # ligand1_alchemical_atom_total, ligand2_alchemical_atom_total if num_atoms: assert 2 == len(num_atoms) return tuple(num_atoms) return (0, 0) @cached_property @arg_premise("Keywords[*].LigandInfo.AtomicMass") @on_complex def ligand_molecular_mass(self, molecular_masses: List[float]) \ -> Tuple[float, float]: """ Returns the molecular masses of the reference and the mutant structures, respectively. """ # FIXME: Old property names: # ligand1_atomic_mass, ligand2_atomic_mass # Why call molecular mass/weight "*_atomic_mass"? # FIXME: Shall we exclude the contribution of achemical solvent? assert 2 == len(molecular_masses) return tuple(molecular_masses) @cached_property @arg_premise("Keywords[*].LigandInfo.NumberHotAtoms") @on_complex def ligand_num_hotatoms(self, num_hotatoms: List[int]) -> Tuple[int, int]: """ Returns the numbers of hot atoms of the reference and the mutant structures, respectively. """ # FIXME: Old property names: # ligand1_total_hot, ligand2_total_hot assert 2 == len(num_hotatoms) return tuple(num_hotatoms) # FIXME: We will tentatively skip the following properties in the old code: # ligand1_total_heavy, ligand2_total_heavy, ligand1_total_atoms, # ligand2_total_atoms. These can be easily obtained from `ref` and # `mut` structures. @cached_property @arg_premise("Keywords[*].LigandInfo.ASL") @on_complex_solvent def ligand_asl(self, asls: Tuple[str, str]) -> FepLambdas: """ Returns two ASL expression strings, each of which selects the ligand atoms in the simulation system of the corresponding leg. The first string selects the ligand molecule in the lambda state 0, and the second in the lambda state 1. """ # FIXME: Old property names: # ligand1_cpx_asl, ligand2_cpx_asl # ligand1_sol_asl, ligand2_sol_asl # - We expect two strings, and the ordering matters: The first # corresponds to the `ref` structure, and the second the `mut`. assert len(asls) == 2 return FepLambdas(*asls) @cached_property @arg_premise("Keywords[*].LigandInfo.PDBResidueName") @on_complex def ligand_pdb_resname(self, resnames: Tuple[str, str]) -> Tuple[str, str]: """ Returns the residue names of the ligand molecule in the PDB file for the lambda states 0 and 1. """ # FIXME: Old property names: # ligand1_pdb_name, ligand2_pdb_name assert len(resnames) == 2 return resnames @cached_property @arg_premise("Keywords[*].LigandInfo.SMILES") @on_complex def ligand_smiles(self, smiles: Tuple[str, str]) -> Tuple[str, str]: """ Returns the SMILES of the ligand molecule in both the lambda states 0 and 1. """ # FIXME: Old property names: # ligand1_smiles, ligand2_smiles assert len(smiles) == 2 return smiles @cached_property @arg_premise("Keywords[i].FEPSimulation.JobName") @on_complex def edge_id(self, jobname: str) -> str: """ Extracts the edge ID from the jobname, and return it as a string. """ # FIXME: Old property name: short_hash # An example of `jobname`'s value: # "Thrombin_17-2_9b2c328_ac3478d_complex_10" # where the edge ID is the "9b2c328_ac3478d" substring. # The string pattern of jobname is assumed to be: # "<main-jobname>_<edge-id>_<leg-name>_<stage-index>", which the # old and the current code is based on. This is not bulletproof, but # it's very simple and seems working fine so far. edge_id = jobname.split('_')[-4:-2] return "_".join(edge_id) @cached_property @arg_premise("Keywords[i].FEPSimulation.JobName") @on_complex def node_ids(self, jobname: str) -> Tuple[str, str]: """ Returns the node IDs of the ref and the mut structures in the FMP graph. """ # FIXME: Old property name: ligand1_hash, ligand2_hash # See the comment within `edge_id`. # FIXME: Here, we have to assume the first hexcode corresponds to `ref`, # and the second corresponds to `mut`. To remove this assumption, # we have to fix the upstream logic to store the data directly. return tuple(jobname.split('_')[-4:-2]) @cached_property @arg_premise("Keywords[i].FEPSimulation.JobName") @on_complex def jobname(self, jobname: str) -> str: """ Returns the name of the FEP+ workflow job. """ # See the comment within `edge_id`. return "_".join(jobname.split('_')[0:-4]) @cached_property @arg_premise("Keywords[i].Replica") @on_complex_solvent def num_replicas(self, replicas: List) -> int: """ Returns the number of replicas in the simulation of the corresponding leg. """ # FIXME: sol_total_replicas, cpx_total_replicas return len(replicas) @cached_property @arg_premise("Keywords[i].FEPSimulation.NumberWaters") @on_complex_solvent def num_water_molecules(self, num: int) -> int: """ Returns the number of water molecules in the simulation of the corresponding leg. """ # FIXME: Old property names: sol_total_waters, cpx_total_waters return num @cached_property @arg_premise("Keywords[i].FEPSimulation.TotalAtoms") @on_complex_solvent def num_atoms(self, num: int) -> int: """ Returns the number of atoms in the simulation of the corresponding leg. """ # FIXME: Old property names: cpx_total_atoms, sol_total_atoms return num @cached_property @arg_premise("Keywords[i].FEPSimulation.TotalSimulationNs") @on_complex_solvent def sim_time(self, t: float) -> float: """ Returns the simulation time (in nanoseconds) of the corresponding leg. """ # FIXME: Old property names: sol_sim_time, cpx_sim_time return float(t) @cached_property @arg_premise("Keywords[i].FEPSimulation.TemperatureK") @on_complex_solvent def sim_temperature(self, temperature: float) -> float: """ Returns the simulation temperature (in Kelvins) of the corresponding leg. """ # FIXME: Old property names: sol_temperature, cpx_temperature return float(temperature) @cached_property @arg_premise("Keywords[i].FEPSimulation.ForceFields") @on_complex_solvent def sim_ff(self, ff: str) -> str: """ Returns the name of the force field used in the simulation. """ # FIXME: Old property names: sol_ff, cpx_ff return ff @cached_property @arg_premise("Keywords[i].FEPSimulation.Ensemble") @on_complex_solvent def sim_ensemble(self, ensemble: str) -> str: """ Returns the ensemble type of the simulation. """ # FIXME: Old property names: sol_ensemble, cpx_ensemble return ensemble @cached_property # FIXME: Didn't find "Keywords[i].FEPSimulation.FormalCharge" in the test # data, but found "Keywords[1].ProteinInfo.FormalCharge". But per # the old code the key should be the former. Tentatively make the # data an option, as opposed to a premise. # Confirm with Dima. @arg_option("Keywords[i].FEPSimulation.FormalCharge", 0) @on_complex_solvent def sim_charge(self, charge: float) -> float: """ Returns the total charge of the simulation system. """ # FIXME: Old property names: sol_charge, cpx_charge return float(charge) @cached_property @arg_option("Keywords[i].FEPSimulation.SaltMolecules") @on_complex_solvent def num_salt_atoms(self, num: Optional[int]) -> Optional[int]: """ Returns the total charge of the simulation system. """ # FIXME: Old property names: sol_salt_molecules, cpx_salt_molecules return num @cached_property @arg_option("Keywords[i].FEPSimulation.SaltConcentration") @on_complex_solvent def salt_concentration(self, c: Optional[float]) -> Optional[float]: """ Returns the concentration of salt. If the information is unavailable, returns `None`. """ # FIXME: Old property names: cpx_salt_info, sol_salt_info # FIXME: What is the unit of the concentration? Dima return c