Source code for schrodinger.test.stu.outcomes.correlate_workup

import numpy
import scipy.stats.stats

from schrodinger import structure


[docs]def correlate_workup(reference_name, calculated_name, min_correl, *filenames): """ Usage $SCHRODINGER/run correlate_workup.py <ct-level property 1> <ct-level property 2> <minimum correlation (R)> <files(s)> <ct-level-property 1> First proeprty for correlation. Does not have to be in every frame (skipped if not) So should be used for experimental data. <ct-level-property 2> Second property for correlations. Does have to be in every frame so should be used for calculated data (it's an error if any frame is missing data) <Minimum correlation> Correlation (R, not R-squared) should be less than this value if this is a negative value (more negative correlation) and more than this value if this is a positive value (more positive correlation) <file(s)> One or more Maestro-format structure files""" exp_data = [] calc_data = [] missing_data = [] for fn in filenames: for ict, ct in enumerate(structure.StructureReader(fn)): if reference_name in ct.property: exp_data.append(ct.property[reference_name]) try: calc_data.append(ct.property[calculated_name]) except KeyError: missing_data.append("%s:%d" % (fn, ict + 1)) if len(missing_data) > 0: msg = "Missing property {} in the following outputs {}".format( calculated_name, " ".join(missing_data)) raise AssertionError(msg) if len(exp_data) < 3: msg = "Not enough data to calculate correlation" raise AssertionError(msg) results = scipy.stats.stats.linregress(numpy.array(exp_data), numpy.array(calc_data)) r = results[2] if (min_correl < 0 and r < min_correl) or (min_correl >= 0 and r > min_correl): return True else: msg = f"Correlation {r}" raise AssertionError(msg)