Source code for schrodinger.project.pandasutils
"""
A module which contains functions to convert between Schrodinger project data
and a Pandas data frame.
"""
import collections
import enum
import re
import typing
import pandas as pd
from rdkit.Chem import PandasTools
from schrodinger import project
from schrodinger.structutils.smiles import SmilesGenerator
WhichRows = enum.Enum('WhichRows', ('SELECTED', 'ALL'))
WhichColumns = enum.Enum('WhichColumns', ('VISIBLE', 'ALL'))
[docs]def get_data_frame_from_project(pt: project.Project,
which_rows: WhichRows = WhichRows.ALL,
which_columns: WhichColumns = WhichColumns.ALL,
prop_filter: typing.Optional[str] = None,
with_rdkit: bool = False,
with_smiles: bool = False) -> pd.DataFrame:
"""
Return a Pandas frame given a Schrodinger project object (as might be
returned from maestro.get_project_table()
:param pt: Project (already open via Maestro or standalone) to convert
:param which_rows: Which rows from the project are to be converted (all or
selected)
:param which_columns: Which columns from the project are to be converted
:param prop_filter: A regular expression which, if defined, will restrict
the properties to datanames which match this expression
:param with_rdkit: A flag which indicates if RdKit MOL objects should be
added
:return: A Pandas dataframe populated with data from the project
"""
if which_columns == WhichColumns.ALL:
prop_names = pt.getPropertyNames()
else:
prop_names = pt.getVisiblePropertyNames()
if prop_filter:
prop_names = [p for p in prop_names if re.match(prop_filter, p)]
data_dict = collections.defaultdict(list)
sg = SmilesGenerator()
rows = pt.all_rows if (which_rows == WhichRows.ALL) else pt.selected_rows
for row in rows:
for p in prop_names:
data_dict[p].append(row.property[p])
if with_rdkit or with_smiles:
data_dict["smiles"].append(sg.getSmiles(row.getStructure()))
df = pd.DataFrame(data_dict)
if with_rdkit:
PandasTools.AddMoleculeColumnToFrame(df, "smiles", "RDKit Mol")
return df