Source code for schrodinger.test.stu.outcomes.correlate_to_reference_file
import numpy as np
import scipy.stats.stats
from schrodinger import structure
[docs]def read_prop_by_title(fn, prop_name):
"""
Read data from a structure file into a dictionary by title
fn {string} file to read
prop_name {string} ct-level property to read
"""
data = {}
# Get the data from file_name
for ct in structure.StructureReader(fn):
if prop_name in ct.property:
if ct.title in data:
raise RuntimeError('Titles must be unique in %s (%s)' %
(fn, ct.title))
data[ct.title] = ct.property[prop_name]
return data
[docs]def correlate_to_reference_file_workup(file_name, file_property, ref_file_name,
ref_file_property, min_correl):
r"""
Determine the correlation between ct-level real properties from one
file to a reference. CT's are matched by their titles.
:param file_name: Path to the file to check, readable by structurereader
:type file_name: str
:param file_property: Name of the ct-level property in the file_name to
use. This should start with r\_ or i\_
:type file_property: str
:param ref_file_name: Path to the reference file, readable by
structurereader
:type ref_file_name: str
:param ref_file_property: Name of the ct-level property in ref_file_name to
use. This should start with a r\_ or i\_
:type ref_file_property: str
:param min_correl: Correlation (R, not R-squared) should be less than this
value if this is a negative value (more negative
correlation) and more than this value if this is a
positive value (more positive correlation)
:type min_correl: float
"""
# Match by title
file_dict = read_prop_by_title(file_name, file_property)
ref_dict = read_prop_by_title(ref_file_name, ref_file_property)
# Create lists of the data
ref_data = []
file_data = []
for title in ref_dict:
if (title not in file_dict):
raise RuntimeError("Cannot find data for %s(%s) in %s" %
(title, file_property, file_name))
ref_data.append(ref_dict[title])
file_data.append(file_dict[title])
# Get the correlation
results = scipy.stats.stats.linregress(np.array(file_data),
np.array(ref_data))
correl = results[2]
report = f"Correlation is {correl:5.3f} -- Target is {min_correl:5.3f}"
if min_correl < 0 and correl > min_correl:
raise AssertionError(report)
elif min_correl > 0 and correl < min_correl:
raise AssertionError(report)
return True