Source code for schrodinger.infra.epydoc_to_sphinx

"""
Code for converting epytext docstrings so they can be used in Sphinx.  This
module may be used as either a Sphinx plugin to allow sphinx-build to parse
epytext, or as a command line program to convert files from epytext to
reST.
"""

import argparse
import glob
import os
import re

# A list of Epytext fields to convert to Sphinx fields.  Taken from
# (and listed in the same order as) http://epydoc.sourceforge.net/fields.html
FIELDS = [
    "param", "type", "return", "rtype", "keyword", "raise", "ivar", "cvar",
    "var", "see", "note", "attention", "bug", "warning", "deprecated"
]
ANY_FIELD = "|".join(FIELDS)


[docs]def setup(app):
    """
    Hook up `process_docstring` so it can convert all docstrings. This
    function is called automatically by Sphinx.

    :param app: The currently running Sphinx instance.
    :type app: sphinx.Sphinx
    """
    app.connect('autodoc-process-docstring', process_docstring)
    return {"parallel_read_safe": True}


[docs]def process_docstring(app, what, name, obj, options, lines):
    """
    Convert the given docstring from Epytext to Sphinx reST format.

    :param lines: A list of all lines in the docstring to convert.  This list
        will be modified in place.
    :type lines: list(str)

    All other arguments are ignored and are present only for compatibility
    with the autodoc-process-docstring connection.
    """
    _process_docstring(lines)


def _process_docstring(lines):
    """
    Convert the given docstring from Epytext to Sphinx reST format.

    :param lines: A list of all lines in the docstring to convert.  This list
        will be modified in place.
    :type lines: list(str)
    """
    _indent_field_continuations(lines)
    docstring = "\n".join(lines)
    # Convert L{...} and C{...} to backticks
    docstring = re.sub(r"(?:L|C){(.*?)}", r"`\1`", docstring, flags=re.DOTALL)
    # Remove any U{...}  markup for URLs, since sphinx recognizes URLs
    # automatically without any special markup.  We require that URLs start with
    # "protocol://".
    docstring = re.sub(r"U{(\w+://.*?)}", r"\1", docstring, flags=re.DOTALL)
    # Convert I{...} to asterisks (italics)
    docstring = re.sub(r"I{(.*?)}", r"*\1*", docstring, flags=re.DOTALL)
    # Convert B{...} to double asterisks (bold)
    docstring = re.sub(r"B{(.*?)}", r"**\1**", docstring, flags=re.DOTALL)
    # Convert M{...} to math role backticks (mathematical expressions)
    docstring = re.sub(r"M{(.*?)}", r":math:`\1`", docstring, flags=re.DOTALL)
    # convert all fields from @ to colons
    docstring = re.sub(r"@(%s)" % ANY_FIELD, r":\1", docstring)
    lines[:] = docstring.split("\n")


def _indent_field_continuations(lines):
    """
    Indent any lines that continue a field started on a prior line.

    :param lines: A list of all lines in the docstring to convert.  This list
        will be modified in place.
    :type lines: list

    In the following docstring::

        '''
        @param arg: This is a very long docstring.  It's so long that it
        continues on a second line.
        '''

    Epydoc recognizes that both sentences are part of the description of
    C{arg}.  However, in this docstring::

        '''
        :param arg: This is a very long docstring.  It's so long that it
        continues on a second line.
        '''

    Sphinx doesn't consider "continues on a second line" to be part of the
    description of `arg`. In order for Sphinx to recognize multi-line
    fields, any continuation lines need to be indented::

        '''
        :param arg: This is a very long docstring.  It's so long that it
            continues on a second line.
        '''

    This function adds this indentation so that Sphinx will properly
    recognize multi-line fields.  This function adds indentation only where
    it's required, so it will add indentation to::

        '''
        @param arg: This is a very long docstring.  It's so long that it
        continues on a second line.
        '''

    and::

        '''
        @param arg: This docstring has a bulleted list in it:
          - List item 1
          - List item 2
        '''

    but not::

        '''
        @param arg: This is a very long docstring.  It's so long that it
                    continues on a second line, but the second line has already
                    been indented.  So has the third line.
        '''
    """
    in_field = False
    indentation = ""
    for line_num, curline in enumerate(lines):
        if in_field:
            if re.match(indentation + r"(?:\t|(?:\ {0,4}))[^\s@]", curline):
                # this line continues a field started on a previous line
                # and needs additional indentation
                lines[line_num] = "    " + curline
            else:
                in_field = False
        if not in_field:
            match = re.match(r"([\ \t]*)@(?:%s)" % ANY_FIELD, curline)
            if match:
                # this line starts a new field (i.e. it starts with an @ sign
                # and a member of FIELDS
                in_field = True
                # store the leading whitespace from this line so we can find out
                # if the following lines are indented at least this much
                indentation = match.group(1)


[docs]def convert_file(filename, all_triple_quoted_strings=False):
    """
    Convert the specified file from epytext to reST.

    :param filename: The name of the file to convert.
    :type filename: str

    :param all_triple_quoted_strings: Whether to convert all comments enclosed within
                           triple quotes or only proper doc strings
    :type all_triple_quoted_strings: bool
    """
    with open(filename) as handle:
        file_text = handle.read()
    file_text = _convert_file_contents(file_text, all_triple_quoted_strings)
    with open(filename, "wt") as handle:
        handle.write(file_text)


def _convert_file_contents(file_text, all_triple_quoted_strings=False):
    """Convert the specified Python source code from epytext to reST.

    :param file_text: The source code to convert.
    :type file_text: str

    :param all_triple_quoted_strings: Whether to convert all comments enclosed within
                           triple quotes or only proper doc strings
    :type all_triple_quoted_strings: bool

    :return: The converted source code.
    :rtype: str

    """
    if all_triple_quoted_strings:
        # simply convert all comments enclosed within triple quotes
        file_text = re.sub(r"""
            (?P<pre>                  # capture everything before the docstring
            (?P<quote>"{3}|'{3})\n?)  # opening triple quote
            (?P<docstring>.+?)        # the docstring itself
            (?P<post>                 # capture everything after the docstring
            [\ \t]*(?P=quote))        # closing triple quote
        """,
                           _replace_docstring,
                           file_text,
                           flags=re.DOTALL | re.VERBOSE)

        return file_text

    # convert the module docstring
    file_text = re.sub(r"""
        (?P<pre>                      # capture everything before the docstring
        ^(?:[\ \t]*(?:\#[^\n]*)?\n)*  # blank or comment lines (we intentionally
                                      # match entire lines at a time to avoid
                                      # catastrophic backtracking)
        (?P<quote>"{3}|'{3})\n?)      # opening triple quote
        (?P<docstring>.+?)            # the docstring itself
        (?P<post>                     # capture everything after the docstring
        [\ \t]*(?P=quote))            # closing triple quote
    """,
                       _replace_docstring,
                       file_text,
                       flags=re.DOTALL | re.VERBOSE)
    # convert class or function docstrings
    file_text = re.sub(r"""
        (?P<pre>                     # capture everything before the docstring
        (?:def|class)\ +\w*\ *       # The function or class name
        (?:\(.*?\))?\ *:             # The arguments or inheritance list
                                     # (optional because of old-style classes)
        (?:[\ \t]*(?:\#[^\n]*)?\n)*  # blank or comment lines (matching
                                     # entire lines at a time to avoid
                                     # catastrophic backtracking)
        \s*(?P<quote>"{3}|'{3})\n?)  # opening triple quote
        (?P<docstring>.+?)           # the docstring itself
        (?P<post>                    # capture everything after the docstring
        [\ \t]*(?P=quote))           # closing triple quote
    """,
                       _replace_docstring,
                       file_text,
                       flags=re.DOTALL | re.VERBOSE)
    # convert docstrings that are explicitly specified using __doc__ or _doc
    # assignment.  (We assume that the docstring uses triple quotes.)
    file_text = re.sub(r"""
        (?P<pre>                  # capture everything before the docstring
        \b(__doc__|_doc)\s*=\s*     # __doc__ assignment
        (?P<quote>"{3}|'{3})\n?)  # opening triple quote
        (?P<docstring>.+?)        # the docstring itself
        (?P<post>                 # capture everything after the docstring
        [\ \t]*(?P=quote))        # closing triple quote
    """,
                       _replace_docstring,
                       file_text,
                       flags=re.DOTALL | re.VERBOSE)

    return file_text


def _replace_docstring(match):
    """
    Convert the matched docstring from epytext to reST.

    :param match: A regular expression match.  This match must contain three
        named groups:

        pre
            All matching text before the docsting including the opening triple
            quotes.

        docstring
            The docstring to be converted.

        post
            All matching text after the docstring including the closing triple
            quotes.
    :type match: re.MatchObject

    :return: A string to replace the match with.
    :rtype: str
    """
    docstring_lines = match.group("docstring").split("\n")
    _process_docstring(docstring_lines)
    new_docstring = "\n".join(docstring_lines)
    return match.group("pre") + new_docstring + match.group("post")


def _walk_paths(paths, exclusions):
    """
    Generate all filenames referenced by the command-line arguments.

    :param paths: A list of paths to yield.  For any directories, will
        yield all .py or wscript (waf Python files) files under that directory.
    :type paths: list

    :param exclusions: A list of files to exclude.  Any file name (excluding
        path) that exactly matches an element of this list will be skipped.
    :type exclusions: list
    """

    for cur_path in paths:
        for filename in glob.iglob(cur_path):
            if os.path.isdir(filename):
                for dirpath, dirnames, dirfiles in os.walk(filename):
                    for cur_filename in dirfiles:
                        # wscript files are waf Python files
                        if ((cur_filename == "wscript" or
                             cur_filename.endswith(".py")) and
                                cur_filename not in exclusions):
                            yield os.path.join(dirpath, cur_filename)
            elif os.path.isfile(filename):
                yield filename
            else:
                raise RuntimeError("Invalid path: %s" % filename)


[docs]def main():
    parser = argparse.ArgumentParser(prog="epydoc_to_sphinx")
    parser.add_argument("--exclude",
                        "-x",
                        action="append",
                        default=[],
                        help="Filenames to exclude from conversion")
    parser.add_argument("--verbose",
                        "-v",
                        action="store_true",
                        help="Print out filenames as files are converted.")
    parser.add_argument("paths",
                        nargs="*",
                        default=["."],
                        help="Files or directories to convert")
    parser.add_argument("--all-triple-quoted-strings",
                        "-s",
                        action='store_true',
                        help="Run on all triple quoted blocks of text")
    args = parser.parse_args()
    for filename in _walk_paths(args.paths, args.exclude):
        if args.verbose:
            print(filename)
        convert_file(filename,
                     all_triple_quoted_strings=args.all_triple_quoted_strings)


if __name__ == "__main__":
    main()