#!/usr/bin/env python3 """Build the Python docs for various branches and various languages. Without any arguments builds docs for all active versions and languages. Environment variables for: - `SENTRY_DSN` (Error reporting) - `FASTLY_SERVICE_ID` / `FASTLY_TOKEN` (CDN purges) - `PYTHON_DOCS_ENABLE_ANALYTICS` (Enable Plausible for online docs) are read from the site configuration path for your platform (/etc/xdg/docsbuild-scripts on linux) if available, and can be overriden by writing a file to the user config dir for your platform ($HOME/.config/docsbuild-scripts on linux). The contents of the file is parsed as toml: ```toml [env] SENTRY_DSN = "https://0a0a0a0a0a0a0a0a0a0a0a@sentry.io/69420" FASTLY_SERVICE_ID = "deadbeefdeadbeefdead" FASTLY_TOKEN = "secureme!" PYTHON_DOCS_ENABLE_ANALYTICS = "1" ``` Languages are stored in `config.toml` while versions are discovered from the devguide. -q selects "quick build", which means to build only HTML. Translations are fetched from GitHub repositories according to PEP 545. `--languages` allows selecting translations, like `--languages en` to just build the English documents. This script was originally created by Georg Brandl in March 2010. Modified by Benjamin Peterson to do CDN cache invalidation. Modified by Julien Palard to build translations. """ from __future__ import annotations import argparse import concurrent.futures import dataclasses import datetime as dt import filecmp import json import logging import logging.handlers import os import re import shlex import shutil import stat import subprocess import sys import venv from bisect import bisect_left as bisect from contextlib import contextmanager, suppress from pathlib import Path from string import Template from time import perf_counter, sleep from urllib.parse import urljoin import jinja2 import platformdirs import tomlkit import urllib3 import zc.lockfile TYPE_CHECKING = False if TYPE_CHECKING: from collections.abc import Collection, Iterator, Sequence, Set from typing import Literal try: from os import EX_OK from os import EX_SOFTWARE as EX_FAILURE except ImportError: EX_OK, EX_FAILURE = 0, 1 try: import sentry_sdk except ImportError: sentry_sdk = None else: sentry_sdk.init() HERE = Path(__file__).resolve().parent @dataclasses.dataclass(frozen=True, slots=True) class Versions: _seq: Sequence[Version] def __iter__(self) -> Iterator[Version]: return iter(self._seq) def __reversed__(self) -> Iterator[Version]: return reversed(self._seq) @classmethod def from_json(cls, data: dict) -> Versions: """Load versions from the devguide's JSON representation.""" permitted = ", ".join(sorted(Version.STATUSES | Version.SYNONYMS.keys())) versions = [] for name, release in data.items(): branch = release["branch"] status = release["status"] status = Version.SYNONYMS.get(status, status) if status not in Version.STATUSES: msg = ( f"Saw invalid version status {status!r}, " f"expected to be one of {permitted}." ) raise ValueError(msg) versions.append(Version(name=name, status=status, branch_or_tag=branch)) return cls(sorted(versions, key=Version.as_tuple)) def filter(self, branches: Sequence[str] = ()) -> Sequence[Version]: """Filter the given versions. If *branches* is given, only *versions* matching *branches* are returned. Else all live versions are returned (this means no EOL and no security-fixes branches). """ if branches: branches = frozenset(branches) return [v for v in self if {v.name, v.branch_or_tag} & branches] return [v for v in self if v.status not in {"EOL", "security-fixes"}] @property def current_stable(self) -> Version: """Find the current stable CPython version.""" return max((v for v in self if v.status == "stable"), key=Version.as_tuple) @property def current_dev(self) -> Version: """Find the current CPython version in development.""" return max(self, key=Version.as_tuple) @dataclasses.dataclass(frozen=True, kw_only=True, slots=True) class Version: """Represents a CPython version and its documentation build dependencies.""" name: str status: Literal["EOL", "security-fixes", "stable", "pre-release", "in development"] branch_or_tag: str STATUSES = {"EOL", "security-fixes", "stable", "pre-release", "in development"} # Those synonyms map branch status vocabulary found in the devguide # with our vocabulary. SYNONYMS = { "feature": "in development", "bugfix": "stable", "security": "security-fixes", "end-of-life": "EOL", "prerelease": "pre-release", } def __eq__(self, other: Version) -> bool: return self.name == other.name @property def requirements(self) -> list[str]: """Generate the right requirements for this version. Since CPython 3.8 a Doc/requirements.txt file can be used. In case the Doc/requirements.txt is absent or wrong (a sub-dependency broke), use this function to override it. See https://github.com/python/cpython/issues/91294 See https://github.com/python/cpython/issues/91483 """ dependencies = [ "-rrequirements.txt", "jieba", # To improve zh search. "PyStemmer~=2.2.0", # To improve performance for word stemming. ] if self.as_tuple() >= (3, 11): return dependencies if self.as_tuple() >= (3, 8): # Restore the imghdr module for Python 3.8-3.10. return dependencies + ["standard-imghdr"] # Requirements/constraints for Python 3.7 and older, pre-requirements.txt reqs = [ "alabaster<0.7.12", "blurb<1.2", "docutils<=0.17.1", "jieba", "jinja2<3.1", "python-docs-theme<=2023.3.1", "sphinxcontrib-applehelp<=1.0.2", "sphinxcontrib-devhelp<=1.0.2", "sphinxcontrib-htmlhelp<=2.0", "sphinxcontrib-jsmath<=1.0.1", "sphinxcontrib-qthelp<=1.0.3", "sphinxcontrib-serializinghtml<=1.1.5", "standard-imghdr", ] if self.name in {"3.7", "3.6", "2.7"}: return reqs + ["sphinx==2.3.1"] if self.name == "3.5": return reqs + ["sphinx==1.8.4", "standard-pipes"] raise ValueError("unreachable") @property def changefreq(self) -> str: """Estimate this version change frequency, for the sitemap.""" return {"EOL": "never", "security-fixes": "yearly"}.get(self.status, "daily") def as_tuple(self) -> tuple[int, ...]: """This version name as tuple, for easy comparisons.""" return version_to_tuple(self.name) @property def url(self) -> str: """The doc URL of this version in production.""" return f"https://docs.python.org/{self.name}/" @property def title(self) -> str: """The title of this version's doc, for the sidebar.""" return f"Python {self.name} ({self.status})" @property def picker_label(self) -> str: """Forge the label of a version picker.""" if self.status == "in development": return f"dev ({self.name})" if self.status == "pre-release": return f"pre ({self.name})" return self.name @dataclasses.dataclass(frozen=True, slots=True) class Languages: _seq: Sequence[Language] def __iter__(self) -> Iterator[Language]: return iter(self._seq) def __reversed__(self) -> Iterator[Language]: return reversed(self._seq) @classmethod def from_json(cls, defaults: dict, languages: dict) -> Languages: default_translated_name = defaults.get("translated_name", "") default_in_prod = defaults.get("in_prod", True) default_sphinxopts = defaults.get("sphinxopts", []) default_html_only = defaults.get("html_only", False) langs = [ Language( iso639_tag=iso639_tag, name=section["name"], translated_name=section.get("translated_name", default_translated_name), in_prod=section.get("in_prod", default_in_prod), sphinxopts=section.get("sphinxopts", default_sphinxopts), html_only=section.get("html_only", default_html_only), ) for iso639_tag, section in languages.items() ] return cls(langs) def filter(self, language_tags: Sequence[str] = ()) -> Sequence[Language]: """Filter a sequence of languages according to --languages.""" if language_tags: language_tags = frozenset(language_tags) return [l for l in self if l.tag in language_tags] # NoQA: E741 return list(self) @dataclasses.dataclass(order=True, frozen=True, kw_only=True) class Language: iso639_tag: str name: str translated_name: str in_prod: bool sphinxopts: Sequence[str] html_only: bool = False @property def tag(self) -> str: return self.iso639_tag.replace("_", "-").lower() @property def switcher_label(self) -> str: if self.translated_name: return f"{self.name} | {self.translated_name}" return self.name @dataclasses.dataclass(frozen=True, kw_only=True, slots=True) class BuildMetadata: _version: Version _language: Language @property def sphinxopts(self) -> Sequence[str]: return self._language.sphinxopts @property def iso639_tag(self) -> str: return self._language.iso639_tag @property def html_only(self) -> bool: return self._language.html_only or not self._language.in_prod @property def url(self): """The URL of this version in production.""" if self.is_translation: return f"https://docs.python.org/{self.version}/{self.language}/" return f"https://docs.python.org/{self.version}/" @property def branch_or_tag(self) -> str: return self._version.branch_or_tag @property def status(self) -> str: return self._version.status @property def is_eol(self) -> bool: return self._version.status == "EOL" @property def dependencies(self) -> list[str]: return self._version.requirements @property def version(self): return self._version.name @property def version_tuple(self): return self._version.as_tuple() @property def language(self): return self._language.tag @property def is_translation(self): return self.language != "en" @property def slug(self) -> str: return f"{self.language}/{self.version}" @property def venv_name(self) -> str: return f"venv-{self.version}" @property def locale_repo_url(self) -> str: return f"https://github.com/python/python-docs-{self.language}.git" def run( cmd: Sequence[str | Path], cwd: Path | None = None ) -> subprocess.CompletedProcess: """Like subprocess.run, with logging before and after the command execution.""" cmd = list(map(str, cmd)) cmdstring = shlex.join(cmd) logging.debug("Run: '%s'", cmdstring) result = subprocess.run( cmd, cwd=cwd, stdin=subprocess.PIPE, stderr=subprocess.STDOUT, stdout=subprocess.PIPE, encoding="utf-8", errors="backslashreplace", check=False, ) if result.returncode: # Log last 20 lines, those are likely the interesting ones. logging.error( "Run: '%s' KO:\n%s", cmdstring, "\n".join(f" {line}" for line in result.stdout.split("\n")[-20:]), ) result.check_returncode() return result def run_with_logging(cmd: Sequence[str | Path], cwd: Path | None = None) -> None: """Like subprocess.check_call, with logging before the command execution.""" cmd = list(map(str, cmd)) logging.debug("Run: '%s'", shlex.join(cmd)) with subprocess.Popen( cmd, cwd=cwd, stdin=subprocess.PIPE, stderr=subprocess.STDOUT, stdout=subprocess.PIPE, encoding="utf-8", ) as p: try: for line in p.stdout or (): logging.debug(">>>> %s", line.rstrip()) except: p.kill() raise if return_code := p.poll(): raise subprocess.CalledProcessError(return_code, cmd[0]) def changed_files(left: Path, right: Path) -> int: """Compute the number of different files in the two directory trees.""" def traverse(dircmp_result: filecmp.dircmp) -> int: changed = len(dircmp_result.diff_files) changed += sum(map(traverse, dircmp_result.subdirs.values())) return changed return traverse(filecmp.dircmp(left, right)) @dataclasses.dataclass class Repository: """Git repository abstraction for our specific needs.""" remote: str directory: Path def run(self, *args: str) -> subprocess.CompletedProcess: """Run git command in the clone repository.""" return run(("git", "-C", self.directory) + args) def get_ref(self, pattern: str) -> str: """Return the reference of a given tag or branch.""" try: # Maybe it's a branch return self.run("show-ref", "-s", f"origin/{pattern}").stdout.strip() except subprocess.CalledProcessError: # Maybe it's a tag return self.run("show-ref", "-s", f"tags/{pattern}").stdout.strip() def fetch(self) -> subprocess.CompletedProcess: """Try (and retry) to run git fetch.""" try: return self.run("fetch") except subprocess.CalledProcessError as err: logging.error("'git fetch' failed (%s), retrying...", err.stderr) sleep(5) return self.run("fetch") def switch(self, branch_or_tag: str) -> None: """Reset and cleans the repository to the given branch or tag.""" self.run("reset", "--hard", self.get_ref(branch_or_tag), "--") self.run("clean", "-dfqx") def clone(self) -> bool: """Maybe clone the repository, if not already cloned.""" if (self.directory / ".git").is_dir(): return False # Already cloned logging.info("Cloning %s into %s", self.remote, self.directory) self.directory.mkdir(mode=0o775, parents=True, exist_ok=True) run(("git", "clone", self.remote, self.directory)) return True def update(self) -> None: self.clone() or self.fetch() def version_to_tuple(version: str) -> tuple[int, ...]: """Transform a version string to a tuple, for easy comparisons.""" return tuple(int(part) for part in version.split(".")) def tuple_to_version(version_tuple: tuple[int, ...]) -> str: """Reverse version_to_tuple.""" return ".".join(str(part) for part in version_tuple) def locate_nearest_version( available_versions: Collection[str], target_version: str ) -> str: """Look for the nearest version of target_version in available_versions. Versions are to be given as tuples, like (3, 7) for 3.7. >>> locate_nearest_version(["2.7", "3.6", "3.7", "3.8"], "3.9") '3.8' >>> locate_nearest_version(["2.7", "3.6", "3.7", "3.8"], "3.5") '3.6' >>> locate_nearest_version(["2.7", "3.6", "3.7", "3.8"], "2.6") '2.7' >>> locate_nearest_version(["2.7", "3.6", "3.7", "3.8"], "3.10") '3.8' >>> locate_nearest_version(["2.7", "3.6", "3.7", "3.8"], "3.7") '3.7' """ available_versions_tuples = sorted(map(version_to_tuple, set(available_versions))) target_version_tuple = version_to_tuple(target_version) try: found = available_versions_tuples[ bisect(available_versions_tuples, target_version_tuple) ] except IndexError: found = available_versions_tuples[-1] return tuple_to_version(found) @contextmanager def edit(file: Path): """Context manager to edit a file "in place", use it as: with edit("/etc/hosts") as (i, o): for line in i: o.write(line.replace("localhoat", "localhost")) """ temporary = file.with_name(file.name + ".tmp") with suppress(FileNotFoundError): temporary.unlink() with open(file, encoding="UTF-8") as input_file: with open(temporary, "w", encoding="UTF-8") as output_file: yield input_file, output_file temporary.rename(file) def setup_switchers(script_content: bytes, html_root: Path) -> None: """Setup cross-links between CPython versions: - Cross-link various languages in a language switcher - Cross-link various versions in a version switcher """ switchers_path = html_root / "_static" / "switchers.js" switchers_path.write_bytes(script_content) for file in html_root.glob("**/*.html"): depth = len(file.relative_to(html_root).parts) - 1 src = f"{'../' * depth}_static/switchers.js" script = f' \n' with edit(file) as (ifile, ofile): for line in ifile: if line == script: continue if line == " \n": ofile.write(script) ofile.write(line) def head(text: str, lines: int = 10) -> str: """Return the first *lines* lines from the given text.""" return "\n".join(text.split("\n")[:lines]) def version_info() -> None: """Handler for --version.""" try: platex_version = head( subprocess.check_output(("platex", "--version"), text=True), lines=3, ) except FileNotFoundError: platex_version = "Not installed." try: xelatex_version = head( subprocess.check_output(("xelatex", "--version"), text=True), lines=2, ) except FileNotFoundError: xelatex_version = "Not installed." print( f""" # platex {platex_version} # xelatex {xelatex_version} """ ) @dataclasses.dataclass class DocBuilder: """Builder for a CPython version and a language.""" build_meta: BuildMetadata cpython_repo: Repository docs_by_version_content: bytes switchers_content: bytes build_root: Path www_root: Path select_output: Literal["no-html", "only-html", "only-html-en"] | None quick: bool group: str log_directory: Path skip_cache_invalidation: bool theme: str @property def html_only(self) -> bool: return ( self.select_output in {"only-html", "only-html-en"} or self.quick or self.build_meta.html_only ) @property def includes_html(self) -> bool: """Does the build we are running include HTML output?""" return self.select_output != "no-html" def run(self, http: urllib3.PoolManager, force_build: bool) -> bool | None: """Build and publish a Python doc, for a language, and a version.""" start_time = perf_counter() start_timestamp = dt.datetime.now(tz=dt.UTC).replace(microsecond=0) logging.info("Running.") try: if self.build_meta.html_only and not self.includes_html: logging.info("Skipping non-HTML build (language is HTML-only).") return None # skipped self.cpython_repo.switch(self.build_meta.branch_or_tag) if self.build_meta.is_translation: self.clone_translation() if trigger_reason := self.should_rebuild(force_build): self.build_venv() self.build() self.copy_build_to_webroot(http) self.save_state( build_start=start_timestamp, build_duration=perf_counter() - start_time, trigger=trigger_reason, ) else: return None # skipped except Exception as err: logging.exception("Badly handled exception, human, please help.") if sentry_sdk: sentry_sdk.capture_exception(err) return False return True @property def locale_dir(self) -> Path: return self.build_root / self.build_meta.version / "locale" @property def checkout(self) -> Path: """Path to CPython git clone.""" return self.build_root / _checkout_name(self.select_output) def clone_translation(self) -> None: self.translation_repo.update() self.translation_repo.switch(self.translation_branch) @property def translation_repo(self) -> Repository: """See PEP 545 for translations repository naming convention.""" locale_clone_dir = self.locale_dir / self.build_meta.iso639_tag / "LC_MESSAGES" return Repository(self.build_meta.locale_repo_url, locale_clone_dir) @property def translation_branch(self) -> str: """Some CPython versions may be untranslated, being either too old or too new. This function looks for remote branches on the given repo, and returns the name of the nearest existing branch. It could be enhanced to also search for tags. """ remote_branches = self.translation_repo.run("branch", "-r").stdout branches = re.findall(r"/([0-9]+\.[0-9]+)$", remote_branches, re.M) return locate_nearest_version(branches, self.build_meta.version) def build(self) -> None: """Build this version/language doc.""" logging.info("Build start.") start_time = perf_counter() sphinxopts = list(self.build_meta.sphinxopts) if self.build_meta.is_translation: sphinxopts.extend(( f"-D locale_dirs={self.locale_dir}", f"-D language={self.build_meta.iso639_tag}", "-D gettext_compact=0", "-D translation_progress_classes=1", )) if self.build_meta.is_eol: sphinxopts.append("-D html_context.outdated=1") if self.build_meta.status in ("in development", "pre-release"): maketarget = "autobuild-dev" else: maketarget = "autobuild-stable" if self.html_only: maketarget += "-html" logging.info("Running make %s", maketarget) python = self.venv / "bin" / "python" sphinxbuild = self.venv / "bin" / "sphinx-build" blurb = self.venv / "bin" / "blurb" if self.includes_html: site_url = self.build_meta.url # Define a tag to enable opengraph socialcards previews # (used in Doc/conf.py and requires matplotlib) sphinxopts += ( "-t create-social-cards", f"-D ogp_site_url={site_url}", ) if self.build_meta.version_tuple < (3, 8): # Disable CPython switchers, we handle them now: text = (self.checkout / "Doc" / "Makefile").read_text(encoding="utf-8") text = text.replace(" -A switchers=1", "") (self.checkout / "Doc" / "Makefile").write_text(text, encoding="utf-8") self.setup_indexsidebar() run_with_logging(( "make", "-C", self.checkout / "Doc", f"PYTHON={python}", f"SPHINXBUILD={sphinxbuild}", f"BLURB={blurb}", f"VENVDIR={self.venv}", f"SPHINXOPTS={' '.join(sphinxopts)}", "SPHINXERRORHANDLING=", maketarget, )) self.log_directory.mkdir(parents=True, exist_ok=True) chgrp(self.log_directory, group=self.group, recursive=True) if self.includes_html: setup_switchers( self.switchers_content, self.checkout / "Doc" / "build" / "html" ) logging.info("Build done (%s).", format_seconds(perf_counter() - start_time)) def build_venv(self) -> None: """Build a venv for the specific Python version. So we can reuse them from builds to builds, while they contain different Sphinx versions. """ requirements = list(self.build_meta.dependencies) if self.includes_html: # opengraph previews requirements.append("matplotlib>=3") venv_path = self.build_root / self.build_meta.venv_name venv.create(venv_path, symlinks=os.name != "nt", with_pip=True) run( ( venv_path / "bin" / "python", "-m", "pip", "install", "--upgrade", "--upgrade-strategy=eager", self.theme, *requirements, ), cwd=self.checkout / "Doc", ) run((venv_path / "bin" / "python", "-m", "pip", "freeze", "--all")) self.venv = venv_path def setup_indexsidebar(self) -> None: """Copy indexsidebar.html for Sphinx.""" tmpl_src = HERE / "templates" tmpl_dst = self.checkout / "Doc" / "tools" / "templates" dbv_path = tmpl_dst / "_docs_by_version.html" shutil.copy(tmpl_src / "indexsidebar.html", tmpl_dst / "indexsidebar.html") if not self.build_meta.is_eol: dbv_path.write_bytes(self.docs_by_version_content) else: shutil.copy(tmpl_src / "_docs_by_version.html", dbv_path) def copy_build_to_webroot(self, http: urllib3.PoolManager) -> None: """Copy a given build to the appropriate webroot with appropriate rights.""" logging.info("Publishing start.") start_time = perf_counter() self.www_root.mkdir(parents=True, exist_ok=True) if not self.build_meta.is_translation: target = self.www_root / self.build_meta.version else: language_dir = self.www_root / self.build_meta.language language_dir.mkdir(parents=True, exist_ok=True) chgrp(language_dir, group=self.group, recursive=True) language_dir.chmod(0o775) target = language_dir / self.build_meta.version target.mkdir(parents=True, exist_ok=True) try: target.chmod(0o775) except PermissionError as err: logging.warning("Can't change mod of %s: %s", target, str(err)) chgrp(target, group=self.group, recursive=True) changed = 0 if self.includes_html: # Copy built HTML files to webroot (default /srv/docs.python.org) changed += changed_files(self.checkout / "Doc" / "build" / "html", target) logging.info("Copying HTML files to %s", target) chgrp( self.checkout / "Doc" / "build" / "html/", group=self.group, recursive=True, ) chmod_make_readable(self.checkout / "Doc" / "build" / "html") run(( "rsync", "-a", "--delete-delay", "--filter", "P archives/", str(self.checkout / "Doc" / "build" / "html") + "/", target, )) dist_dir = self.checkout / "Doc" / "dist" if dist_dir.is_dir(): # Copy archive files to /archives/ logging.debug("Copying dist files.") chgrp(dist_dir, group=self.group, recursive=True) chmod_make_readable(dist_dir) archives_dir = target / "archives" archives_dir.mkdir(parents=True, exist_ok=True) archives_dir.chmod( archives_dir.stat().st_mode | stat.S_IROTH | stat.S_IXOTH ) chgrp(archives_dir, group=self.group) changed += 1 for dist_file in dist_dir.iterdir(): shutil.copy2(dist_file, archives_dir / dist_file.name) changed += 1 logging.info("%s files changed", changed) if changed and not self.skip_cache_invalidation: purge_surrogate_key(http, self.build_meta.slug) logging.info( "Publishing done (%s).", format_seconds(perf_counter() - start_time) ) def should_rebuild(self, force: bool) -> str | Literal[False]: state = self.load_state() if not state: logging.info("Should rebuild: no previous state found.") return "no previous state" cpython_sha = self.cpython_repo.run("rev-parse", "HEAD").stdout.strip() if self.build_meta.is_translation: translation_sha = self.translation_repo.run( "rev-parse", "HEAD" ).stdout.strip() if translation_sha != state["translation_sha"]: logging.info( "Should rebuild: new translations (from %s to %s)", state["translation_sha"], translation_sha, ) return "new translations" if cpython_sha != state["cpython_sha"]: diff = self.cpython_repo.run( "diff", "--name-only", state["cpython_sha"], cpython_sha ).stdout if "Doc/" in diff or "Misc/NEWS.d/" in diff: logging.info( "Should rebuild: Doc/ has changed (from %s to %s)", state["cpython_sha"], cpython_sha, ) return "Doc/ has changed" if force: logging.info("Should rebuild: forced.") return "forced" logging.info("Nothing changed, no rebuild needed.") return False def load_state(self) -> dict: if self.select_output is not None: state_file = self.build_root / f"state-{self.select_output}.toml" else: state_file = self.build_root / "state.toml" try: return tomlkit.loads(state_file.read_text(encoding="UTF-8"))[ f"/{self.build_meta.slug}/" ] except (KeyError, FileNotFoundError): return {} def save_state( self, build_start: dt.datetime, build_duration: float, trigger: str ) -> None: """Save current CPython sha1 and current translation sha1. Using this we can deduce if a rebuild is needed or not. """ if self.select_output is not None: state_file = self.build_root / f"state-{self.select_output}.toml" else: state_file = self.build_root / "state.toml" try: states = tomlkit.parse(state_file.read_text(encoding="UTF-8")) except FileNotFoundError: states = tomlkit.document() key = f"/{self.build_meta.slug}/" state = { "last_build_start": build_start, "last_build_duration": round(build_duration, 0), "triggered_by": trigger, "cpython_sha": self.cpython_repo.run("rev-parse", "HEAD").stdout.strip(), } if self.build_meta.is_translation: state["translation_sha"] = self.translation_repo.run( "rev-parse", "HEAD" ).stdout.strip() states[key] = state state_file.write_text(tomlkit.dumps(states), encoding="UTF-8") table = tomlkit.inline_table() table |= state logging.info("Saved new rebuild state for %s: %s", key, table.as_string()) def chgrp( path: Path, /, group: int | str | None, *, recursive: bool = False, follow_symlinks: bool = True, ) -> None: if sys.platform == "win32": return from grp import getgrnam try: try: group_id = int(group) except ValueError: group_id = getgrnam(group)[2] except (LookupError, TypeError, ValueError): return try: os.chown(path, -1, group_id, follow_symlinks=follow_symlinks) if recursive: for p in path.rglob("*"): os.chown(p, -1, group_id, follow_symlinks=follow_symlinks) except OSError as err: logging.warning("Can't change group of %s: %s", path, str(err)) def chmod_make_readable(path: Path, /, mode: int = stat.S_IROTH) -> None: if not path.is_dir(): raise ValueError path.chmod(path.stat().st_mode | stat.S_IROTH | stat.S_IXOTH) # o+rx for p in path.rglob("*"): if p.is_dir(): p.chmod(p.stat().st_mode | stat.S_IROTH | stat.S_IXOTH) # o+rx else: p.chmod(p.stat().st_mode | stat.S_IROTH) # o+r def format_seconds(seconds: float) -> str: hours, remainder = divmod(seconds, 3600) minutes, seconds = divmod(remainder, 60) hours, minutes, seconds = int(hours), int(minutes), round(seconds) match (hours, minutes, seconds): case 0, 0, s: return f"{s}s" case 0, m, s: return f"{m}m {s}s" case h, m, s: return f"{h}h {m}m {s}s" raise ValueError("unreachable") def _checkout_name(select_output: str | None) -> str: if select_output is not None: return f"cpython-{select_output}" return "cpython" def main() -> int: """Script entry point.""" args = parse_args() setup_logging(args.log_directory, args.select_output) load_environment_variables() if args.select_output is None: return build_docs_with_lock(args, "build_docs.lock") if args.select_output == "no-html": return build_docs_with_lock(args, "build_docs_archives.lock") if args.select_output == "only-html": return build_docs_with_lock(args, "build_docs_html.lock") if args.select_output == "only-html-en": return build_docs_with_lock(args, "build_docs_html_en.lock") return EX_FAILURE def parse_args() -> argparse.Namespace: """Parse command-line arguments.""" parser = argparse.ArgumentParser( description="Runs a build of the Python docs for various branches.", allow_abbrev=False, ) parser.suggest_on_error = True parser.add_argument( "--select-output", choices=("no-html", "only-html", "only-html-en"), help="Choose what outputs to build.", ) parser.add_argument( "-q", "--quick", action="store_true", help="Run a quick build (only HTML files).", ) parser.add_argument( "-b", "--branches", nargs="*", metavar="3.12", help="Versions to build (defaults to all maintained branches).", ) parser.add_argument( "-r", "--build-root", type=Path, help="Path to a directory containing a checkout per branch.", default=Path("/srv/docsbuild"), ) parser.add_argument( "-w", "--www-root", type=Path, help="Path where generated files will be copied.", default=Path("/srv/docs.python.org"), ) parser.add_argument( "--force", action="store_true", help="Always build the chosen languages and versions, " "regardless of existing state.", ) parser.add_argument( "--skip-cache-invalidation", help="Skip Fastly cache invalidation.", action="store_true", ) parser.add_argument( "--group", help="Group files on targets and www-root file should get.", default="docs", ) parser.add_argument( "--log-directory", type=Path, help="Directory used to store logs.", default=Path("/var/log/docsbuild/"), ) parser.add_argument( "--languages", nargs="*", help="Language translation, as a PEP 545 language tag like" " 'fr' or 'pt-br'. " "Builds all available languages by default.", metavar="fr", ) parser.add_argument( "--version", action="store_true", help="Get build_docs and dependencies version info", ) parser.add_argument( "--theme", default="python-docs-theme", help="Python package to use for python-docs-theme: Useful to test branches:" " --theme git+https://github.com/obulat/python-docs-theme@master", ) args = parser.parse_args() if args.version: version_info() sys.exit(0) del args.version if args.log_directory: args.log_directory = args.log_directory.resolve() if args.build_root: args.build_root = args.build_root.resolve() if args.www_root: args.www_root = args.www_root.resolve() return args def setup_logging(log_directory: Path, select_output: str | None) -> None: """Setup logging to stderr if run by a human, or to a file if run from a cron.""" log_format = "%(asctime)s %(levelname)s: %(message)s" if sys.stderr.isatty() or "CI" in os.environ: logging.basicConfig(format=log_format, stream=sys.stderr) else: log_directory.mkdir(parents=True, exist_ok=True) if select_output is None: filename = log_directory / "docsbuild.log" else: filename = log_directory / f"docsbuild-{select_output}.log" handler = logging.handlers.WatchedFileHandler(filename) handler.setFormatter(logging.Formatter(log_format)) logging.getLogger().addHandler(handler) logging.getLogger().setLevel(logging.DEBUG) def load_environment_variables() -> None: dbs_user_config = platformdirs.user_config_path("docsbuild-scripts") dbs_site_config = platformdirs.site_config_path("docsbuild-scripts") if dbs_user_config.is_file(): env_conf_file = dbs_user_config elif dbs_site_config.is_file(): env_conf_file = dbs_site_config else: logging.info( "No environment variables configured. Configure in %s or %s.", dbs_site_config, dbs_user_config, ) return logging.info("Reading environment variables from %s.", env_conf_file) if env_conf_file == dbs_site_config: logging.info("You can override settings in %s.", dbs_user_config) elif dbs_site_config.is_file(): logging.info("Overriding %s.", dbs_site_config) env_config = env_conf_file.read_text(encoding="utf-8") for key, value in tomlkit.parse(env_config).get("env", {}).items(): logging.debug("Setting %s in environment.", key) os.environ[key] = value def build_docs_with_lock(args: argparse.Namespace, lockfile_name: str) -> int: try: lock = zc.lockfile.LockFile(HERE / lockfile_name) except zc.lockfile.LockError: logging.info("Another builder is running... dying...") return EX_FAILURE try: return build_docs(args) finally: lock.close() def build_docs(args: argparse.Namespace) -> int: """Build all docs (each language and each version).""" logging.info("Full build start.") start_time = perf_counter() http = urllib3.PoolManager() versions = parse_versions_from_devguide(http) languages = parse_languages_from_config() # Reverse languages but not versions, because we take version-language # pairs from the end of the list, effectively reversing it. # This runs languages in config.toml order and versions newest first. todo = [ BuildMetadata(_version=version, _language=language) for version in versions.filter(args.branches) for language in reversed(languages.filter(args.languages)) ] del args.branches del args.languages force_build = args.force del args.force docs_by_version_content = render_docs_by_version(versions).encode() switchers_content = render_switchers(versions, languages) build_succeeded = set() any_build_failed = False cpython_repo = Repository( "https://github.com/python/cpython.git", args.build_root / _checkout_name(args.select_output), ) while todo: build_props = todo.pop() logging.root.handlers[0].setFormatter( logging.Formatter( f"%(asctime)s %(levelname)s {build_props.slug}: %(message)s" ) ) if sentry_sdk: scope = sentry_sdk.get_isolation_scope() scope.set_tag("version", build_props.version) scope.set_tag("language", build_props.language) cpython_repo.update() builder = DocBuilder( build_props, cpython_repo, docs_by_version_content, switchers_content, **vars(args), ) built_successfully = builder.run(http, force_build=force_build) if built_successfully: build_succeeded.add(build_props.slug) elif built_successfully is not None: any_build_failed = True logging.root.handlers[0].setFormatter( logging.Formatter("%(asctime)s %(levelname)s: %(message)s") ) build_sitemap(versions, languages, args.www_root, args.group) build_404(args.www_root, args.group) copy_robots_txt( args.www_root, args.group, args.skip_cache_invalidation, http, ) make_symlinks( args.www_root, args.group, versions, languages, build_succeeded, args.skip_cache_invalidation, http, ) if build_succeeded: # Only check canonicals if at least one version was built. proofread_canonicals(args.www_root, args.skip_cache_invalidation, http) logging.info("Full build done (%s).", format_seconds(perf_counter() - start_time)) return EX_FAILURE if any_build_failed else EX_OK def parse_versions_from_devguide(http: urllib3.PoolManager) -> Versions: releases = http.request( "GET", "https://raw.githubusercontent.com/" "python/devguide/main/include/release-cycle.json", timeout=30, ).json() return Versions.from_json(releases) def parse_languages_from_config() -> Languages: """Read config.toml to discover languages to build.""" config = tomlkit.parse((HERE / "config.toml").read_text(encoding="UTF-8")) return Languages.from_json(config["defaults"], config["languages"]) def render_docs_by_version(versions: Versions) -> str: """Generate content for _docs_by_version.html.""" links = [f'
  • {v.title}
  • ' for v in reversed(versions)] return "\n".join(links) def render_switchers(versions: Versions, languages: Languages) -> bytes: language_pairs = sorted((l.tag, l.switcher_label) for l in languages if l.in_prod) # NoQA: E741 version_pairs = [(v.name, v.picker_label) for v in reversed(versions)] switchers_template_file = HERE / "templates" / "switchers.js" template = Template(switchers_template_file.read_text(encoding="UTF-8")) rendered_template = template.safe_substitute( LANGUAGES=json.dumps(language_pairs), VERSIONS=json.dumps(version_pairs), ) return rendered_template.encode("UTF-8") def build_sitemap( versions: Versions, languages: Languages, www_root: Path, group: str ) -> None: """Build a sitemap with all live versions and translations.""" if not www_root.exists(): logging.info("Skipping sitemap generation (www root does not even exist).") return logging.info("Starting sitemap generation...") template_path = HERE / "templates" / "sitemap.xml" template = jinja2.Template(template_path.read_text(encoding="UTF-8")) rendered_template = template.render(languages=languages, versions=versions) sitemap_path = www_root / "sitemap.xml" sitemap_path.write_text(rendered_template + "\n", encoding="UTF-8") sitemap_path.chmod(0o664) chgrp(sitemap_path, group=group) def build_404(www_root: Path, group: str) -> None: """Build a nice 404 error page to display in case PDFs are not built yet.""" if not www_root.exists(): logging.info("Skipping 404 page generation (www root does not even exist).") return logging.info("Copying 404 page...") not_found_file = www_root / "404.html" shutil.copyfile(HERE / "templates" / "404.html", not_found_file) not_found_file.chmod(0o664) chgrp(not_found_file, group=group) def copy_robots_txt( www_root: Path, group: str, skip_cache_invalidation: bool, http: urllib3.PoolManager, ) -> None: """Copy robots.txt to www_root.""" if not www_root.exists(): logging.info("Skipping copying robots.txt (www root does not even exist).") return logging.info("Copying robots.txt...") template_path = HERE / "templates" / "robots.txt" robots_path = www_root / "robots.txt" shutil.copyfile(template_path, robots_path) robots_path.chmod(0o775) chgrp(robots_path, group=group) if not skip_cache_invalidation: purge(http, "robots.txt") def make_symlinks( www_root: Path, group: str, versions: Versions, languages: Languages, successful_builds: Set[str], skip_cache_invalidation: bool, http: urllib3.PoolManager, ) -> None: """Maintains the /2/, /3/, and /dev/ symlinks for each language. Like: - /2/ → /2.7/ - /3/ → /3.12/ - /dev/ → /3.14/ - /fr/3/ → /fr/3.12/ - /es/dev/ → /es/3.14/ """ logging.info("Creating major and development version symlinks...") for symlink_name, symlink_target in ( ("3", versions.current_stable.name), ("2", "2.7"), ("dev", versions.current_dev.name), ): for language in languages: if f"{language.tag}/{symlink_target}" in successful_builds: symlink( www_root, language.tag, symlink_target, symlink_name, group, skip_cache_invalidation, http, ) def symlink( www_root: Path, language_tag: str, directory: str, name: str, group: str, skip_cache_invalidation: bool, http: urllib3.PoolManager, ) -> None: """Used by major_symlinks and dev_symlink to maintain symlinks.""" msg = "Creating symlink from /%s/ to /%s/" if language_tag == "en": # English is rooted on /, no /en/ path = www_root logging.debug(msg, name, directory) else: path = www_root / language_tag logging.debug(msg, f"{language_tag}/{name}", f"{language_tag}/{directory}") link = path / name directory_path = path / directory if not directory_path.exists(): return # No touching link, dest doc not built yet. if not link.exists() or os.readlink(link) != directory: # Link does not exist or points to the wrong target. link.unlink(missing_ok=True) link.symlink_to(directory) chgrp(link, group=group, follow_symlinks=False) if not skip_cache_invalidation: surrogate_key = f"{language_tag}/{name}" purge_surrogate_key(http, surrogate_key) def proofread_canonicals( www_root: Path, skip_cache_invalidation: bool, http: urllib3.PoolManager ) -> None: """In www_root we check that all canonical links point to existing contents. It can happen that a canonical is "broken": - /3.11/whatsnew/3.11.html typically would link to /3/whatsnew/3.11.html, which may not exist yet. """ logging.info("Checking canonical links...") worker_count = (os.cpu_count() or 1) + 2 with concurrent.futures.ThreadPoolExecutor(worker_count) as executor: futures = { executor.submit(_check_canonical_rel, file, www_root) for file in www_root.glob("**/*.html") } paths_to_purge = { res.relative_to(www_root) # strip the leading /srv/docs.python.org for fut in concurrent.futures.as_completed(futures) if (res := fut.result()) is not None } if not skip_cache_invalidation: purge(http, *paths_to_purge) # Python 3.12 onwards doesn't use self-closing tags for _canonical_re = re.compile( b"""""" ) def _check_canonical_rel(file: Path, www_root: Path) -> Path | None: # Check for a canonical relation link in the HTML. # If one exists, ensure that the target exists # or otherwise remove the canonical link element. html = file.read_bytes() canonical = _canonical_re.search(html) if canonical is None: return None target = canonical[1].decode(encoding="UTF-8", errors="surrogateescape") if (www_root / target).exists(): return None logging.info("Removing broken canonical from %s to %s", file, target) start, end = canonical.span() file.write_bytes(html[:start] + html[end:]) return file def purge(http: urllib3.PoolManager, *paths: Path | str) -> None: """Remove one or many paths from docs.python.org's CDN. To be used when a file changes, so the CDN fetches the new one. """ base = "https://docs.python.org/" for path in paths: url = urljoin(base, str(path)) logging.debug("Purging %s from CDN", url) http.request("PURGE", url, timeout=30) def purge_surrogate_key(http: urllib3.PoolManager, surrogate_key: str) -> None: """Remove paths from docs.python.org's CDN. All paths matching the given 'Surrogate-Key' will be removed. This is set by the Nginx server for every language-version pair. To be used when a directory changes, so the CDN fetches the new one. https://www.fastly.com/documentation/reference/api/purging/#purge-tag """ unset = "__UNSET__" service_id = os.environ.get("FASTLY_SERVICE_ID", unset) fastly_key = os.environ.get("FASTLY_TOKEN", unset) if service_id == unset or fastly_key == unset: logging.info("CDN secrets not set, skipping Surrogate-Key purge") return logging.info("Purging Surrogate-Key '%s' from CDN", surrogate_key) http.request( "POST", f"https://api.fastly.com/service/{service_id}/purge/{surrogate_key}", headers={"Fastly-Key": fastly_key}, timeout=30, ) if __name__ == "__main__": raise SystemExit(main())