Source code for src.checks.interfaces_existence_of_documentation_infrastructure

"""
This module contains the interfaces and common functionality used by the
Existence of Documentation Infrastructure check.
"""
from __future__ import annotations

import logging
import re
import string
from pathlib import Path
from typing import (
    Any,
    Dict,
    Iterable,
    List,
    NamedTuple,
    Optional,
    Tuple,
    Literal,
)
from urllib.parse import urlparse

import yaml
from git.repo import Repo
from gitlab.v4.objects import Project
from yaml.parser import ParserError
from yaml.scanner import ScannerError

from src.interfaces import Named
from src.utils import file_list

logger: logging.Logger = logging.getLogger(__name__)


[docs] class DocumentationTypeInterface(Named): """ Abstracts over the different kinds of documentation that a project might have. The business logic for finding and scoring documentation is in the implementing classes, this interface is used by the main check class to compute the final score. The class also contains some helpers for common operations. """ #: used to filter files that are likely not plain text TEXT_FILE_REGEX: re.Pattern[str] = re.compile(r"\.(md|rst|txt)$") #: used to find markdown links to documentation LINK_PATTERN: re.Pattern[str] = re.compile( r'\[([^(\]\[]*?[dD]oc[su][^(\]\[]*?)\]\((http[^("]*?)\)', re.IGNORECASE ) #: retuned by methods that collect links to documentation PubbliccodeymlDocLink = NamedTuple( "PubbliccodeymlDocLink", [("type", str), ("url", str)] ) ScrapedDocLink = NamedTuple( "ScrapedDocLink", [("file", str), ("preview", str), ("url", str)] ) RM_WHITESPACE_MAP: Dict[int, Literal[None]] = { ord(c): None for c in string.whitespace }
[docs] def __init__(self, repo: Repo, api: Project) -> None: self.repo: Repo = repo self.api: Project = api
[docs] def _is_external_url(self, url: Optional[str]) -> bool: """ Checks if a link points to a target outside of OpenCoDE. :param url: url to decide :return: True iff the url does not point to OpenCoDE """ if url is None: return False domain = urlparse(url).netloc return "opencode" not in domain
[docs] def _docs_in_publiccodeyml( self, only_external: bool = False, only_internal: bool = False ) -> List[DocumentationTypeInterface.PubbliccodeymlDocLink]: """ Checks if the `publiccode.yaml` exists, and if it does, whether it contains links to documentation. Optionally returns only links that point back to the project itself, or only links that point to an URL outside of OpenCoDE. :return: Tuples of (documentation type, link target) for all doc links that were found. """ if only_external and only_internal: raise ValueError( "Specify either only_internal or only_external, not both" ) ret: List[DocumentationTypeInterface.PubbliccodeymlDocLink] = [] pcy: Optional[Dict[str, Any]] = self._get_publiccodeyml() if not pcy: return ret desc: Optional[Dict[str, str]] = pcy.get("description") if not desc: return ret for lang, lang_desc in desc.items(): logger.debug(f"Project description in {lang} is {lang_desc}") if not isinstance(lang_desc, dict): logger.info( f"publiccode.yml of {self.api.name_with_namespace} has invalid format: {lang_desc=}" ) continue docs: Optional[str] = lang_desc.get("documentation") if docs: logging.info(f"Found user documentation: {docs}") ret.append(self.PubbliccodeymlDocLink("user", docs)) api_docs: Optional[str] = lang_desc.get("apiDocumentation") if api_docs: logger.info(f"Found api documentation: {api_docs}") ret.append(self.PubbliccodeymlDocLink("api", api_docs)) if only_external: ret = [ doc_link for doc_link in ret if self._is_external_url(doc_link.url) ] elif only_internal: ret = [ doc_link for doc_link in ret if not self._is_external_url(doc_link.url) ] return ret
[docs] def _amount(self, files: Iterable[Path]) -> int: """ :return: Returns total number on non-whitespace characters in `files`. """ ret: int = 0 for file in files: try: ret += len(self._remove_whitespace(file.read_text())) except UnicodeDecodeError as E: logger.error(f"Can not decode content of {file.name}: {E}") continue return ret
[docs] @classmethod def _text_file_filter(cls, file_name: str) -> bool: return ( False if re.search(cls.TEXT_FILE_REGEX, file_name) else True )
[docs] def _get_publiccodeyml(self) -> Optional[Dict[str, Any]]: """ Try to find and parse the projects publiccode.yaml. :return: a mapping that contains the parsed file """ content: Optional[str] = None try: content = Path( str(self.repo.working_tree_dir) + "/publiccode.yml" ).read_text() except FileNotFoundError: logger.info("Project has no publiccode.yml") try: content = Path( str(self.repo.working_tree_dir) + "/publiccode.yaml" ).read_text() except FileNotFoundError: logger.info("Project has no publiccode.yaml") try: if content: return yaml.safe_load(content) except (ParserError, ScannerError) as E: logger.info(f"Project has invalid publiccode.yml: {E}") return
[docs] def _remove_whitespace(self, s: str) -> str: """ :return: input string with all non-whitespace characters removed """ return s.translate(self.RM_WHITESPACE_MAP)
[docs] def delta(self) -> Tuple[float, int]: """ Restriction of the `delta` map to the documentation type represented by the implementor and the repository specified during the construction of this instance. :return: confidence into the result, and amount of documentation detected """ raise NotImplementedError()