Source code for schrodinger.test.stu.outcomes.correlate_workup
import numpy
import scipy.stats.stats
from schrodinger import structure
[docs]def correlate_workup(reference_name, calculated_name, min_correl, *filenames):
"""
Usage
$SCHRODINGER/run correlate_workup.py <ct-level property 1> <ct-level property 2> <minimum correlation (R)> <files(s)>
<ct-level-property 1> First proeprty for correlation. Does not have to be in every frame (skipped if not) So should be used for experimental data.
<ct-level-property 2> Second property for correlations. Does have to be in every frame so should be used for calculated data (it's an error if any frame is missing data)
<Minimum correlation> Correlation (R, not R-squared) should be less than this value if this is a negative value (more negative correlation) and more than this value if this is a positive value (more positive correlation)
<file(s)> One or more Maestro-format structure files"""
exp_data = []
calc_data = []
missing_data = []
for fn in filenames:
for ict, ct in enumerate(structure.StructureReader(fn)):
if reference_name in ct.property:
exp_data.append(ct.property[reference_name])
try:
calc_data.append(ct.property[calculated_name])
except KeyError:
missing_data.append("%s:%d" % (fn, ict + 1))
if len(missing_data) > 0:
msg = "Missing property {} in the following outputs {}".format(
calculated_name, " ".join(missing_data))
raise AssertionError(msg)
if len(exp_data) < 3:
msg = "Not enough data to calculate correlation"
raise AssertionError(msg)
results = scipy.stats.stats.linregress(numpy.array(exp_data),
numpy.array(calc_data))
r = results[2]
if (min_correl < 0 and r < min_correl) or (min_correl >= 0 and
r > min_correl):
return True
else:
msg = f"Correlation {r}"
raise AssertionError(msg)