Source code for schrodinger.test.performance.reporter

"""
Basic client-side components for performance testing.  Typical clients should
only need to use `Test` class.

@copyright: (c) Schrodinger, LLC All rights reserved.

"""

import json
import numbers
import os
import platform
import re
import typing
from past.utils import old_div
from typing import Optional

import psutil
import requests

import schrodinger.job.util
from schrodinger.infra import mm
from schrodinger.test.stu import client as stu_client
from schrodinger.test.stu import common
from schrodinger.utils import fileutils
from schrodinger.utils import mmutil
from schrodinger.utils import sysinfo

from schrodinger.test.performance import client
from schrodinger.test.performance import exceptions

MB = 1048576.
BUILD_TYPES = ('OB', 'NB', 'CB', 'Dev')

# @deprecated 2021-10-26
HOST = common.BASE_URL

### PUBLIC API:
##############


[docs]class Test:
    """
    A performance test. `name` and `product` must uniquely specify a test.
    `product` is required to match an existing product name in the database.
    New tests require descriptions when uploaded. The descriptions of existing
    tests are not changed by result upload.
    The baseClient must be an instance of PerformanceTestClient.

    Invididual results are added with addResult(). All results are uploaded to
    the database when report() is called.

    Instantiate with `scival` set to `True` if you are working with scival
    performance tests.

    Typical pattern::

        baseClient = create_performance_test_reporter(...)
        test = performance.Test(
            name = "distribution_size",
            product = "shared components",
            description = "Determine the size of the SCHRODINGER distribution and report it to the performance database.",
            baseClient = baseClient,
        )
        # Result with a metric name and value
        test.addResult('file count', 200000)
        # Result with a metric name, value, and units
        test.addResult('size', 20000, 'MB')
        test.report()

    """

[docs]    def __init__(
        self,
        name,
        product,
        description=None,
        scival=False,
        upload=True,
        baseClient=None,
    ):
        if not name or not product:
            raise TypeError('name, product and baseClient are required')
        if not isinstance(name, str):
            raise TypeError('Name must be a string')
        if not isinstance(product, str):
            raise TypeError('Product name must be a string')
        if description and not isinstance(description, str):
            raise TypeError('Description must be a string')
        if not isinstance(scival, bool):
            raise TypeError('scival must be a boolean')

        username = stu_client.get_stu_username()
        # WARNING: we prefer that you use `create_performance_test_reporter`, rather than access this constructor directly
        if not baseClient:
            baseClient = client.PerformanceTestClient(username)

        if upload:
            test = baseClient.get_or_create_test(name,
                                                 description,
                                                 product,
                                                 scival=scival)
            self.baseClient = baseClient
            self.username = username
            self.test = test
        else:
            self.username = None
            self.test = None
        self.results = []

[docs]    def addResult(self, name: str, value: float, units: Optional[str] = None):
        """
        Add a result to the current test. Results are not uploaded until
        report() is called.

        :param name: Name of the metric being reported
        :param value: Current value of the metric
        :param units: (optional) units of the value.

        """
        # Validate data types before attempting upload to the server.
        validate_types(name, value, units)

        metric = dict(name=name, units=units)
        result = dict(metric=metric, value=value)
        self.results.append(result)

[docs]    def report(self, build_id=None, buildtype=None, mmshare=None, release=None):
        """
        Once all results have been added to the test, report them to the
        database.

        """
        if not self.results:
            raise ValueError("No results to report")
        if not self.test:
            return

        host_data = host_information()
        host_uri = self.baseClient.get_or_create(api_url('host'), host_data)

        system_data = system_information(resource_id(host_uri))
        system_uri = self.baseClient.get_or_create(api_url('system'),
                                                   system_data)

        build_data = install_information(build_id,
                                         buildtype,
                                         mmshare=mmshare,
                                         release=release)
        build_uri = self.baseClient.get_or_create(api_url('build'), build_data)
        post_data = dict(test=self.test,
                         system=system_uri,
                         build=build_uri,
                         metrics=self.results)

        post_data = json.dumps(post_data)
        self.baseClient.post(performance_api_url('result'), data=post_data)


[docs]def validate_types(name, value, units=None):
    """Validate data types before attempting upload to the server."""
    if not isinstance(name, str):
        msg = f'Names of metrics values must be strings (found {name})'
        raise TypeError(msg)
    if not isinstance(value, numbers.Number):
        msg = 'Result values must be numeric (found {!r} for {})'.format(
            value, name)
        raise TypeError(msg)
    if units and not isinstance(units, str):
        msg = f'Units must be strings (found {units!r} for {name})'
        raise TypeError(msg)


[docs]def create_performance_test_reporter(name: str, product: str, description: str,
                                     scival: bool, upload: bool) -> Test:
    """
    Factory method for creating instances of PerformanceTestClient. This is
    the preferred method, over using the `Test` directly.

    :param name: name of the test
    :param product: product being tested
    :param description: description of the test
    :param scival: is the test a scival (scientific validation) test
    :param upload: should the results be uploaded to the STU service
    """
    username = stu_client.get_stu_username()
    baseClient = client.PerformanceTestClient(username)
    reporterTest = Test(name=name,
                        product=product,
                        description=description,
                        scival=scival,
                        upload=upload,
                        baseClient=baseClient)
    return reporterTest


### PRIVATE/SUPPORT code
###
### Everything below here is intended to support the public API above
#####################################################################


# @deprecated
[docs]def get_or_create_test(name,
                       description,
                       product_name,
                       username=None,
                       scival=False):
    """
    Get or create a single test from the performance database.

    Setting `scival` to `True` will add the 'scival' tag when creating a new test.
    """
    if username is None:
        username = stu_client.get_stu_username()
    auth = schrodinger.test.stu.client.ApiKeyAuth(username)

    product_url = api_url('product')
    params = dict(name=product_name)
    response = requests.get(product_url, params=params, auth=auth)

    no_product_msg = ('No product named "{}". See the list of product names '
                      'at {}/products. File a JIRA case in SHARED if you need '
                      'to add a product.'.format(product_name, common.BASE_URL))
    if response.status_code == 404:
        raise exceptions.BadResponse(no_product_msg)
    try:
        response.raise_for_status()
    except requests.exceptions.HTTPError as http_error:
        if response.status_code == 401:
            raise exceptions.BadResponse(
                f'{http_error}, please verify that the appropriate'
                f' user is making this request: {username=}')
        raise
    data = response.json()
    if not data['objects']:
        raise exceptions.BadResponse(no_product_msg)
    product = data['objects'][0]['resource_uri']
    product_id = resource_id(product)

    test_url = performance_api_url('test')
    test_dict = dict(name=name, product=product_id)
    # Get an existing test:
    response = requests.get(test_url, params=test_dict, auth=auth)
    objects = response.json()['objects']
    if objects:
        return objects[0]['resource_uri']

    # Create a new test:
    if not description:
        raise ValueError("Description is required when uploading a new test.")

    test_dict['description'] = description
    if scival:
        test_dict['tags'] = ['scival']
    response = requests.post(test_url,
                             data=json.dumps(test_dict),
                             headers={'content-type': 'application/json'},
                             auth=auth)
    response.raise_for_status()
    location = response.headers['location']
    return location.replace(common.BASE_URL, '')


[docs]def api_url(resource_name, item_id=None, host=None):
    """Get an address on the core server"""
    host = host or common.BASE_URL
    url = host + '/api/v1/' + resource_name + '/'
    if item_id is not None:
        url += str(item_id) + '/'
    return url


[docs]def performance_api_url(resource_name, item_id=None, host=None):
    """Get an address in the performance bit of the server."""
    host = host or common.BASE_URL
    host += '/performance'
    return api_url(resource_name, item_id, host)


[docs]def resource_id(uri):
    """Get the resource's ID number from a uri"""
    match = re.search(r'(\d+)/?$', uri)
    return match.group(1)


[docs]def get_or_create(url, auth, params):
    """Get or create a resource matching the parameters."""
    response = requests.get(url, params=params, auth=auth)
    objects = response.json()['objects']
    if objects:
        return objects[0]['resource_uri']

    response = requests.post(url,
                             data=json.dumps(params),
                             headers={'content-type': 'application/json'},
                             auth=auth)
    response.raise_for_status()
    location = response.headers['location']
    return location.replace(common.BASE_URL, '')


[docs]def system_information(host):
    """
    System information required to report results.

    """
    processor_count = psutil.cpu_count()
    memory = int(old_div(psutil.virtual_memory().total, MB))
    home = fileutils.get_directory_path(fileutils.HOME)
    home_size = int(old_div(psutil.disk_usage(home).total, MB))
    scratch = fileutils.get_directory_path(fileutils.TEMP)
    scratch_size = int(old_div(psutil.disk_usage(scratch).total, MB))

    return dict(host=host,
                processor_count=processor_count,
                memory=memory,
                home_size=home_size,
                scratch_size=scratch_size)


[docs]def host_information():
    hostname = platform.node()
    mmshare_exec = os.getenv('MMSHARE_EXEC')
    plat = os.path.basename(mmshare_exec)
    os_vers = sysinfo.get_osname()
    processor = sysinfo.get_cpu()
    jobserver = mmutil.feature_flag_is_enabled(mmutil.JOB_SERVER)
    host = dict(name=hostname,
                platform=plat,
                os_version=os_vers,
                jobserver=jobserver,
                processor=processor)
    return host


# @deprecated
[docs]def post_system(auth: schrodinger.test.stu.client.ApiKeyAuth, baseClient):
    """
    Post the current host's system information to the performance test server.

    :return URI for the new system.
    """
    host_data = host_information()
    host = get_or_create(api_url('host'), auth, host_data)
    baseClient.get_or_create(api_url('host'), host_data)
    system_data = system_information(resource_id(host))
    system = get_or_create(api_url('system'), auth, system_data)
    baseClient.get_or_create(api_url('system'), system_data)
    return system


[docs]def install_information(build_id=None,
                        buildtype=None,
                        mmshare=None,
                        release=None):
    """
    Execution environment information required to report results.

    """
    install_path = os.getenv("SCHRODINGER")
    install_path = os.path.realpath(install_path)
    if not mmshare:
        if release:
            raise ValueError('mmshare is required if release is provided')
        mmshare = mm.mmfile_get_product_version("mmshare")
    if not release:
        release = mm.mmfile_get_release_name()
    if not build_id or not buildtype:
        buildtype, build_id = guess_build_type_and_id(mmshare, buildtype)

    if buildtype not in BUILD_TYPES:
        raise ValueError('Build type must be one of {}. Found "{}".'.format(
            ', '.join(BUILD_TYPES), buildtype))

    return dict(mmshare=mmshare,
                release=release,
                build_id=build_id,
                buildtype=buildtype)


[docs]def guess_build_type_and_id(mmshare: int,
                            buildtype=None) -> typing.Tuple[str, str]:
    """
    Provide reasonable default values for the buildtype and build_id. When
    possible, reads from the environment variables SCHRODINGER_BUILDTYPE and
    SCHRODINGER_BUILD_ID. If SCHRODINGER_BUILDTYPE is not set then we assume
    the buildtype is NB.

    :param mmshare: mmshare version number, e.g. 54053
    :type mmshare: int
    :param buildtype: NB or OB
    :type buildtype: str

    :return: Tuple of buildtype and build ID, e.g. 'NB', 'build-075'
    :rtype: tuple
    """
    build_id = os.environ.get('SCHRODINGER_BUILD_ID', None)
    if not buildtype:
        buildtype = os.environ.get('SCHRODINGER_BUILDTYPE', 'NB')
    if not build_id:
        build_id = 'build-' + str(mmshare)[2:]
    return buildtype, build_id