linux/Documentation/sphinx/kernel_include.py

#!/usr/bin/env python3
# SPDX-License-Identifier: GPL-2.0
# pylint: disable=R0903, R0912, R0914, R0915, C0209,W0707

"""
    kernel-include
    ~~~~~~~~~~~~~~

    Implementation of the ``kernel-include`` reST-directive.

    :copyright:  Copyright (C) 2016  Markus Heiser
    :license:    GPL Version 2, June 1991 see linux/COPYING for details.

    The ``kernel-include`` reST-directive is a replacement for the ``include``
    directive. The ``kernel-include`` directive expand environment variables in
    the path name and allows to include files from arbitrary locations.

    .. hint::

      Including files from arbitrary locations (e.g. from ``/etc``) is a
      security risk for builders. This is why the ``include`` directive from
      docutils *prohibit* pathnames pointing to locations *above* the filesystem
      tree where the reST document with the include directive is placed.

    Substrings of the form $name or ${name} are replaced by the value of
    environment variable name. Malformed variable names and references to
    non-existing variables are left unchanged.

    This extension overrides Sphinx include directory, adding some extra
    arguments:

    1. :generate-cross-refs:

        If present, instead of reading the file, it calls ParseDataStructs()
        class, which converts C data structures into cross-references to
        be linked to ReST files containing a more comprehensive documentation;

    2. :exception-file:

        Used together with :generate-cross-refs

        Points to a file containing rules to ignore C data structs or to
        use a different reference name, optionally using a different
        reference type.

    3. :warn-broken:

        Used together with :generate-cross-refs:

        Detect if the auto-generated cross references doesn't exist.

"""

# ==============================================================================
# imports
# ==============================================================================

import os.path
import re
import sys

from docutils import io, nodes, statemachine
from docutils.statemachine import ViewList
from docutils.utils.error_reporting import SafeString, ErrorString
from docutils.parsers.rst import directives
from docutils.parsers.rst.directives.body import CodeBlock, NumberLines
from docutils.parsers.rst.directives.misc import Include

from sphinx.util import logging

srctree = os.path.abspath(os.environ["srctree"])
sys.path.insert(0, os.path.join(srctree, "tools/docs/lib"))

from parse_data_structs import ParseDataStructs

__version__ = "1.0"
logger = logging.getLogger(__name__)

RE_DOMAIN_REF = re.compile(r'\\ :(ref|c:type|c:func):`([^<`]+)(?:<([^>]+)>)?`\\')
RE_SIMPLE_REF = re.compile(r'`([^`]+)`')


# ==============================================================================
class KernelInclude(Include):
    """KernelInclude (``kernel-include``) directive"""

    # Add extra options
    option_spec = Include.option_spec.copy()

    option_spec.update({
        'generate-cross-refs': directives.flag,
        'warn-broken': directives.flag,
        'toc': directives.flag,
        'exception-file': directives.unchanged,
    })

    def read_rawtext(self, path, encoding):
            """Read and process file content with error handling"""
            try:
                self.state.document.settings.record_dependencies.add(path)
                include_file = io.FileInput(source_path=path,
                                            encoding=encoding,
                                            error_handler=self.state.document.settings.input_encoding_error_handler)
            except UnicodeEncodeError:
                raise self.severe('Problems with directive path:\n'
                                'Cannot encode input file path "%s" '
                                '(wrong locale?).' % SafeString(path))
            except IOError as error:
                raise self.severe('Problems with directive path:\n%s.' % ErrorString(error))

            try:
                return include_file.read()
            except UnicodeError as error:
                raise self.severe('Problem with directive:\n%s' % ErrorString(error))

    def xref_text(self, env, path, tab_width):
        """
        Read and add contents from a C file parsed to have cross references.

        There are two types of supported output here:
        - A C source code with cross-references;
        - a TOC table containing cross references.
        """
        parser = ParseDataStructs()
        parser.parse_file(path)

        if 'exception-file' in self.options:
            source_dir = os.path.dirname(os.path.abspath(
                self.state_machine.input_lines.source(
                    self.lineno - self.state_machine.input_offset - 1)))
            exceptions_file = os.path.join(source_dir, self.options['exception-file'])
            parser.process_exceptions(exceptions_file)

        # Store references on a symbol dict to be used at check time
        if 'warn-broken' in self.options:
            env._xref_files.add(path)

        if "toc" in self.options:
            rawtext = parser.gen_toc()
        else:
            rawtext = ".. parsed-literal::\n\n" + parser.gen_output()
            self.apply_range(rawtext)

        title = os.path.basename(path)

        include_lines = statemachine.string2lines(rawtext, tab_width,
                                                  convert_whitespace=True)

        # Append line numbers data

        startline = self.options.get('start-line', None)

        result = ViewList()
        if startline and startline > 0:
            offset = startline - 1
        else:
            offset = 0

        for ln, line in enumerate(include_lines, start=offset):
            result.append(line, path, ln)

        self.state_machine.insert_input(result, path)

        return []

    def apply_range(self, rawtext):
        # Get to-be-included content
        startline = self.options.get('start-line', None)
        endline = self.options.get('end-line', None)
        try:
            if startline or (endline is not None):
                lines = rawtext.splitlines()
                rawtext = '\n'.join(lines[startline:endline])
        except UnicodeError as error:
            raise self.severe(f'Problem with "{self.name}" directive:\n'
                              + io.error_string(error))
        # start-after/end-before: no restrictions on newlines in match-text,
        # and no restrictions on matching inside lines vs. line boundaries
        after_text = self.options.get("start-after", None)
        if after_text:
            # skip content in rawtext before *and incl.* a matching text
            after_index = rawtext.find(after_text)
            if after_index < 0:
                raise self.severe('Problem with "start-after" option of "%s" '
                                  "directive:\nText not found." % self.name)
            rawtext = rawtext[after_index + len(after_text) :]
        before_text = self.options.get("end-before", None)
        if before_text:
            # skip content in rawtext after *and incl.* a matching text
            before_index = rawtext.find(before_text)
            if before_index < 0:
                raise self.severe('Problem with "end-before" option of "%s" '
                                  "directive:\nText not found." % self.name)
            rawtext = rawtext[:before_index]

        return rawtext

    def literal(self, path, tab_width, rawtext):
        """Output a literal block"""

        # Convert tabs to spaces, if `tab_width` is positive.
        if tab_width >= 0:
            text = rawtext.expandtabs(tab_width)
        else:
            text = rawtext
        literal_block = nodes.literal_block(rawtext, source=path,
                                            classes=self.options.get("class", []))
        literal_block.line = 1
        self.add_name(literal_block)
        if "number-lines" in self.options:
            try:
                startline = int(self.options["number-lines"] or 1)
            except ValueError:
                raise self.error(":number-lines: with non-integer start value")
            endline = startline + len(include_lines)
            if text.endswith("\n"):
                text = text[:-1]
            tokens = NumberLines([([], text)], startline, endline)
            for classes, value in tokens:
                if classes:
                    literal_block += nodes.inline(value, value,
                                                    classes=classes)
                else:
                    literal_block += nodes.Text(value, value)
        else:
            literal_block += nodes.Text(text, text)
        return [literal_block]

    def code(self, path, tab_width):
        """Output a code block"""

        include_lines = statemachine.string2lines(rawtext, tab_width,
                                                  convert_whitespace=True)

        self.options["source"] = path
        codeblock = CodeBlock(self.name,
                                [self.options.pop("code")],  # arguments
                                self.options,
                                include_lines,
                                self.lineno,
                                self.content_offset,
                                self.block_text,
                                self.state,
                                self.state_machine)
        return codeblock.run()

    def run(self):
        """Include a file as part of the content of this reST file."""
        env = self.state.document.settings.env
        path = os.path.realpath(os.path.expandvars(self.arguments[0]))

        # to get a bit security back, prohibit /etc:
        if path.startswith(os.sep + "etc"):
            raise self.severe('Problems with "%s" directive, prohibited path: %s' %
                              (self.name, path))

        self.arguments[0] = path

        env.note_dependency(os.path.abspath(path))

        # HINT: I had to copy&paste the whole Include.run method. I'am not happy
        # with this, but due to security reasons, the Include.run method does
        # not allow absolute or relative pathnames pointing to locations *above*
        # the filesystem tree where the reST document is placed.

        if not self.state.document.settings.file_insertion_enabled:
            raise self.warning('"%s" directive disabled.' % self.name)
        source = self.state_machine.input_lines.source(self.lineno -
                                                       self.state_machine.input_offset - 1)
        source_dir = os.path.dirname(os.path.abspath(source))
        path = directives.path(self.arguments[0])
        if path.startswith("<") and path.endswith(">"):
            path = os.path.join(self.standard_include_path, path[1:-1])
        path = os.path.normpath(os.path.join(source_dir, path))

        # HINT: this is the only line I had to change / commented out:
        # path = utils.relative_path(None, path)

        encoding = self.options.get("encoding",
                                    self.state.document.settings.input_encoding)
        tab_width = self.options.get("tab-width",
                                     self.state.document.settings.tab_width)

        # Get optional arguments to related to cross-references generation
        if "generate-cross-refs" in self.options:
            return self.xref_text(env, path, tab_width)

        rawtext = self.read_rawtext(path, encoding)
        rawtext = self.apply_range(rawtext)

        if "code" in self.options:
            return self.code(path, tab_width, rawtext)

        return self.literal(path, tab_width, rawtext)

# ==============================================================================

reported = set()

def check_missing_refs(app, env, node, contnode):
    """Check broken refs for the files it creates xrefs"""
    if not node.source:
        return None

    try:
        xref_files = env._xref_files
    except AttributeError:
        logger.critical("FATAL: _xref_files not initialized!")
        raise

    # Only show missing references for kernel-include reference-parsed files
    if node.source not in xref_files:
        return None

    target = node.get('reftarget', '')
    domain = node.get('refdomain', 'std')
    reftype = node.get('reftype', '')

    msg = f"can't link to: {domain}:{reftype}:: {target}"

    # Don't duplicate warnings
    data = (node.source, msg)
    if data in reported:
        return None
    reported.add(data)

    logger.warning(msg, location=node, type='ref', subtype='missing')

    return None

def merge_xref_info(app, env, docnames, other):
    """
    As each process modify env._xref_files, we need to merge them back.
    """
    if not hasattr(other, "_xref_files"):
        return
    env._xref_files.update(getattr(other, "_xref_files", set()))

def init_xref_docs(app, env, docnames):
    """Initialize a list of files that we're generating cross references¨"""
    app.env._xref_files = set()

# ==============================================================================

def setup(app):
    """Setup Sphinx exension"""

    app.connect("env-before-read-docs", init_xref_docs)
    app.connect("env-merge-info", merge_xref_info)
    app.add_directive("kernel-include", KernelInclude)
    app.connect("missing-reference", check_missing_refs)

    return {
        "version": __version__,
        "parallel_read_safe": True,
        "parallel_write_safe": True,
    }