Source code for schrodinger.application.desmond.autopartition

"""
A module to provide multiset partition, combination and processor topology
decomposition.

Copyright Schrodinger, LLC. All rights reserved.
"""

# Contributors: Teng Lin

import copy
from math import sqrt
from past.utils import old_div

import numpy


def _next_multiset_comb(m, state):
    """
    @todo: convert it to a generator.
    """
    finished = False
    changed = False
    k = len(state)
    n = len(m)
    i = k - 1
    if i >= 0:
        while not finished and not changed:

            if state[i] < m[i + n - k]:
                j = 0
                while m[j] <= state[i]:
                    j += 1
                state[i] = m[j]
                if i < k - 1:
                    l = i + 1  # noqa: E741
                    j = j + 1
                    while l < k:
                        state[l] = m[j]
                        l += 1  # noqa: E741
                        j += 1
                changed = True

            if i == 0:
                finished = True
            else:
                finished = False
            i -= 1
    return changed


def _multicomb(m, k):
    """
    :type m: list of integer
    :param m: multiset.

    :type k: int
    :param k: the total number of element that will show up in the combination.

    :return the combination in an integer number array.

    Code::

        for i in _multicomb([0, 0, 0, 1, 1, 2, 2, 2, 2, 2], 7):
            print i

    The above statement will generate below combinations::

        [0, 0, 0, 1, 1, 2, 2]
        [0, 0, 0, 1, 2, 2, 2]
        [0, 0, 0, 2, 2, 2, 2]
        [0, 0, 1, 1, 2, 2, 2]
        [0, 0, 1, 2, 2, 2, 2]
        [0, 0, 2, 2, 2, 2, 2]
        [0, 1, 1, 2, 2, 2, 2]
        [0, 1, 2, 2, 2, 2, 2]
        [1, 1, 2, 2, 2, 2, 2]

    @todo: provide a mapping function
    """
    # state will be changed in _next_multiset_comb
    # therefore its copy is yeild in the generator
    state = copy.copy(m[:k])
    yield state[:]
    while _next_multiset_comb(m, state):
        yield state[:]


def _list2array(m, n):
    c = numpy.zeros(n, dtype='int')
    for i in m:
        c[i] += 1
    return c


def _array2list(m):
    t = []
    for i, j in enumerate(m):
        t.extend([i] * j)
    return t


[docs]def all_multicomb(m):
    """
    Return all combinations for multiset m

    :type m: list of integer
    :param m: the array contains the maximum number of individual unique
        element in the multiset.
    """

    lst = _array2list(m)
    for i in range(0, len(lst) + 1):
        for q in _multicomb(lst, i):
            yield _list2array(q, len(m))


[docs]def get_next_partition(factors):
    """
    A partition generator

    :type factors: list of integer
    :param factors: a list of prime numbers
    :return: a three element tuple that represent the partition

    @todo:
        this algorithm is still not very efficient due to the generation
        of duplicate combination. A vector partition function with restricted
        growth order will be a better option.

    """
    # count and separate unique prime numbers
    d = {}
    for e in factors:
        if e not in d:
            d[e] = 0
        d[e] += 1
    items = list(d.items())
    items.sort()

    f = numpy.array([k for k, v in items], dtype='int')
    m = numpy.array([v for k, v in items], dtype='int')

    s = set()
    all_prod = numpy.prod(f**m)

    for s1 in all_multicomb(m):

        p1 = numpy.prod(f**s1)
        s2_s3 = m - s1

        for s2 in all_multicomb(s2_s3):
            p2 = numpy.prod(f**s2)
            p3 = all_prod / p1 / p2
            p = [p1, p2, p3]
            p.sort()
            p = tuple(p)

            # do not yield duplicate combination
            if p not in s:
                s.add(p)
                yield p


[docs]def next_prime(n):
    """
    A prmie number generator that generates prime number less than or equal to n
    :type n: int
    :param n:

    :return prime number
    """
    yield 2
    primes = []
    for m in range(3, n + 1, 2):
        if all(m % p for p in primes):
            primes.append(m)
            yield m


[docs]def get_prime_factors(n):
    """
    A function to generate all factorizations of a given integer n.
    :type n: int
    :param n: an integer to be factorized

    :return a list of prime factors.
    """
    factors = []
    for prime in next_prime(int(sqrt(n)) + 1):
        if prime is None or sqrt(n) + 1 < prime:
            break
        while not n % prime:
            n /= prime
            factors.append(prime)
    if n != 1:
        factors.append(n)
    return factors


[docs]def auto_partition(dims, nproc):
    """
    A function to calculate best cpu partition based on dimension and number of processor.
    The object function to optimize:

    :type dims: list of integer
    :param dims: simulation box

    :type nproc: int
    :param nproc: number of cores

    :return an three element integer array that represents the best partition.
    """

    def calc_g(n1, n2, n3, d1, d2, d3):
        """
        calculate object function
        """
        return (old_div(n2, n1) - old_div(d2, d1))**2 + (old_div(n3, n1) - old_div(d3, d1))**2 + (old_div(n3, n2) - old_div(d3, d2))**2 + \
               (old_div(n1, n2) - old_div(d1, d2))**2 + (old_div(n1, n3) - old_div(d1, d3))**2 + (old_div(n2, n3) - old_div(d2, d3))**2

    if nproc == 1:
        return [1, 1, 1]

    # calculate factors of nproc
    factors = get_prime_factors(nproc)

    dims = numpy.array(dims, dtype='int')
    # calculate normalized dimension
    normalized_dims = old_div(dims, min(dims))
    order = numpy.array(normalized_dims).argsort()

    # sort dimensions in ascending order
    normalized_dims.sort()
    d1, d2, d3 = normalized_dims

    partition_dict = {}
    for n1, n2, n3 in get_next_partition(factors):
        if (n1, n2, n3) not in partition_dict:
            partition_dict[(n1, n2, n3)] = calc_g(float(n1), float(n2),
                                                  float(n3), d1, d2, d3)

    # locate partition with minimum object function
    g = [v for v in partition_dict.values()]
    i = numpy.array(g).argmin()
    min_g = g[i]
    p = [k for k in partition_dict.keys()][i]
    partition = numpy.zeros(len(p), dtype='int')

    for i, o in enumerate(order):
        partition[o] = p[i]

    return partition


if __name__ == '__main__':

    factors = get_prime_factors(2 * 3**2 * 5**3)
    for n1, n2, n3 in get_next_partition(factors):
        print(n1, n2, n3)