# Copyright 2020 The TensorStore Authors # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. """Builds the documentation using sphinx.""" import argparse import contextlib import getpass import glob import os import pathlib import platform import re import shutil import sys import tempfile from typing import List import urllib.parse import zipfile DOCS_ROOT = 'docs' THIRD_PARTY_ROOT = 'third_party' CPP_ROOT = 'tensorstore' class WorkspaceDict(dict): """Dictionary type used to evaluate workspace.bzl files as python.""" def __init__(self): dict.__setitem__(self, 'native', self) self.maybe_args = {} def __setitem__(self, key, val): if not hasattr(self, key): dict.__setitem__(self, key, val) def __getitem__(self, key): if hasattr(self, key): return getattr(self, key) if dict.__contains__(self, key): return dict.__getitem__(self, key) return self._unimplemented def _unimplemented(self, *args, **kwargs): pass def glob(self, *args, **kwargs): # NOTE: Non-trivial uses of glob() in BUILD files will need attention. return [] def select(self, arg_dict): return [] def load(self, *args): pass def package_name(self, **kwargs): return '' def third_party_http_archive(self): pass def maybe(self, fn, **kwargs): self.maybe_args = kwargs def get_args(self) -> dict: self['repo']() return self.maybe_args def mirror_url(self, url: str) -> list[str]: self.mirror_url_args = url return [url] def _get_third_party_http_archive_args(workspace_text: str) -> dict: if not workspace_text: return {} d = WorkspaceDict() exec(workspace_text, d) return d.get_args() def _remove_mirror(url: str) -> str: for prefix in [ 'https://mirror.bazel.build/', 'https://storage.googleapis.com/tensorstore-bazel-mirror/', 'https://storage.googleapis.com/grpc-bazel-mirror/', ]: if url.startswith(prefix): return 'https://' + url[len(prefix) :] return url def _write_third_party_libraries_summary(runfiles_dir: str, output_path: str): """Generate the third_party_libraries.rst file.""" with open(output_path, 'w') as f: f.write(""" .. list-table:: Required third-party libraries :header-rows: 1 * - Identifier - Bundled library - Version """) third_party_libs = [] for dep in (pathlib.Path(runfiles_dir) / THIRD_PARTY_ROOT).iterdir(): if not dep.is_dir(): continue workspace_bzl_file = dep / 'workspace.bzl' if not workspace_bzl_file.exists(): continue identifier = dep.name system_lib_supported = (dep / 'system.BUILD.bazel').exists() if not system_lib_supported: continue args = _get_third_party_http_archive_args( workspace_bzl_file.read_text(encoding='utf-8') ) if not args: raise ValueError(f'Failed to evaluate {workspace_bzl_file}') if 'system_build_file' not in args: # Maybe log this; there's a system.BUILD.bazel, but it's not added # to the repo() method in workspace.bzl. continue if 'urls' not in args: raise ValueError(f'Failed to find urls in {workspace_bzl_file}') name = None version = None homepage = None for url in args['urls']: url = _remove_mirror(url) m = re.search(r'([^/]+)-([^-]*)(\.zip|\.tar|\.tgz|\.tar\.gz)$', url) if m is not None: name = m.group(1) version = m.group(2) parsed_url = urllib.parse.urlparse(url) if parsed_url.netloc in ('github.com', 'sourceware.org'): m = re.match('https://[^/]*/[^/]*/[^/]*/', url) if m is not None: homepage = m.group(0) else: homepage = parsed_url.scheme + '://' + parsed_url.netloc if 'strip_prefix' in args: m = re.search('(.*)-([^-]*)$', args['strip_prefix']) if m is None: raise ValueError( 'Failed to determine version from strip_prefix in' f' {workspace_bzl_file}' ) name = m.group(1) version = m.group(2)[:12] # Override doc metadata. name = args.get('doc_name', name) version = args.get('doc_version', version) homepage = args.get('doc_homepage', homepage) if not name or not homepage or not version: raise ValueError( 'Failed to determine full dependency information in' f' {workspace_bzl_file}; Found {name}, {homepage}, {version}' ) third_party_libs.append((identifier, name, homepage, version)) third_party_libs.sort(key=lambda x: x[1]) for identifier, name, homepage, version in third_party_libs: f.write(' * - ``%s``\n' % (identifier,)) f.write(' - `%s <%s>`_\n' % (name, homepage)) f.write(' - %s\n' % (version,)) @contextlib.contextmanager def _prepare_source_tree(runfiles_dir: str, excluded: List[str]): with tempfile.TemporaryDirectory() as temp_src_dir: _write_third_party_libraries_summary( runfiles_dir=runfiles_dir, output_path=os.path.join(temp_src_dir, 'third_party_libraries.rst'), ) abs_docs_root = os.path.join(runfiles_dir, DOCS_ROOT) # Exclude theme and extension directories from temporary directory since # they are not needed and slow down file globbing. excluded_paths = frozenset([ os.path.join(abs_docs_root, 'tensorstore_sphinx_ext'), ]) def create_symlinks(source_dir, target_dir): for name in os.listdir(source_dir): source_path = os.path.join(source_dir, name) if source_path in excluded_paths: continue target_path = os.path.join(target_dir, name) if os.path.isdir(source_path): os.makedirs(target_path, exist_ok=True) create_symlinks(source_path, target_path) continue if os.path.exists(target_path): # Remove target path if it already exists from a previous run. os.remove(target_path) os.symlink(os.path.abspath(source_path), target_path) create_symlinks(os.path.join(runfiles_dir, DOCS_ROOT), temp_src_dir) source_cpp_root = os.path.abspath(os.path.join(runfiles_dir, CPP_ROOT)) for name in ['driver', 'kvstore']: os.symlink( os.path.join(source_cpp_root, name), os.path.join(temp_src_dir, name) ) # Prepare the sphinx cache, if provided cache_env_key = 'SPHINX_IMMATERIAL_EXTERNAL_RESOURCE_CACHE_DIR' if cache_env_key not in os.environ: cache_dir = os.path.join( runfiles_dir, DOCS_ROOT, 'cached_external_resources', 'data' ) cache_zip = os.path.join( runfiles_dir, DOCS_ROOT, 'cached_external_resources', 'data.zip' ) if os.path.exists(cache_dir): os.environ[cache_env_key] = cache_dir elif os.path.exists(cache_zip): zip_path = os.path.join( temp_src_dir, 'sphinx_external_resource_cache_dir' ) os.makedirs(zip_path, exist_ok=True) zipfile.ZipFile(cache_zip).extractall(zip_path) os.environ[cache_env_key] = zip_path for excluded_glob in excluded: if excluded_glob.startswith('/'): excluded_glob = excluded_glob[1:] matching_paths = glob.glob( os.path.join(temp_src_dir, excluded_glob), recursive=True ) matching_paths.reverse() for matching_path in matching_paths: if os.path.islink(matching_path): os.remove(matching_path) else: shutil.rmtree(matching_path) yield temp_src_dir def run(args: argparse.Namespace, unknown: List[str]): # Ensure tensorstore sphinx extensions can be imported as absolute modules. sys.path.insert(0, os.path.abspath(DOCS_ROOT)) runfiles_dir = os.getcwd() sphinx_args = [ # Always write all files (incremental mode not used) '-a', # Don't look for saved environment (since a temporary directory is used). '-E', # Show full tracebacks for errors. '-T', ] if args.sphinx_help: sphinx_args.append('--help') if args.pdb_on_error: sphinx_args.append('-P') elif not args.profile: # Use the system number of CPU cores as the number of threads to use, by # default. num_cpus_str = 'auto' # Allow this limit to be overridden based on the username. This is useful # for CI builds run on shared machines where not all CPU cores are available # to be used. special_cpu_limits = {} for term in os.environ.get('TENSORSTORE_SPECIAL_CPU_USER_LIMITS', '').split( ' ' ): term = term.strip() if not term: continue parts = term.split('=', 2) assert len(parts) == 2 special_cpu_limits[parts[0]] = parts[1] if special_cpu_limits: try: username = getpass.getuser() if username in special_cpu_limits: num_cpus_str = special_cpu_limits[username] print( f'Using special CPU limit of {num_cpus_str} due to username of' f' {username}' ) except Exception as e: # Ignore failure to determine username. if special_cpu_limits: print('Failed to determine current username: %s' % (e,)) sphinx_args += ['-j', num_cpus_str] output_dir = os.path.join( os.getenv('BUILD_WORKING_DIRECTORY', os.getcwd()), args.output ) os.makedirs(output_dir, exist_ok=True) with _prepare_source_tree( runfiles_dir, excluded=args.exclude ) as temp_src_dir: # Use a separate temporary directory for the doctrees, since we don't want # them mixed into the output directory. with tempfile.TemporaryDirectory() as doctree_dir: sphinx_args += ['-d', doctree_dir] sphinx_args += [temp_src_dir, output_dir] sphinx_args += unknown import sphinx.cmd.build result = sphinx.cmd.build.main(sphinx_args) if result != 0: sys.exit(result) # Delete buildinfo file. buildinfo_path = os.path.join(output_dir, '.buildinfo') if os.path.exists(buildinfo_path): os.remove(buildinfo_path) print('Output written to: %s' % (os.path.abspath(output_dir),)) if not args.profile: sys.exit(result) _WINDOWS_UNC_PREFIX = '\\\\?\\' def _strip_windows_unc_path_prefix(p: str) -> str: if p.startswith(_WINDOWS_UNC_PREFIX): p = p[len(_WINDOWS_UNC_PREFIX) :] return p def main(): if platform.system() == 'Windows': # Bazel uses UNC `\\?\` paths to specify Python import directories on # Windows, and jinja2 does not correctly UNC paths # (https://github.com/pallets/jinja/issues/1675). As a workaround, convert # UNC paths to regular paths. sys.path[:] = [_strip_windows_unc_path_prefix(p) for p in sys.path] ap = argparse.ArgumentParser() default_output = os.environ.get('TEST_UNDECLARED_OUTPUTS_DIR', None) ap.add_argument( '--output', '-o', help='Output directory', default=default_output, required=default_output is None, ) ap.add_argument( '-P', dest='pdb_on_error', action='store_true', help='Run pdb on exception', ) ap.add_argument( '--sphinx-help', action='store_true', help='Show sphinx build command-line help', ) ap.add_argument('--pdb', action='store_true', help='Run under pdb') ap.add_argument( '--profile', type=str, help='Write performance profile to the specified file.', ) ap.add_argument( '--exclude', action='append', default=[], help='Glob pattern of sources to exclude', ) args, unknown = ap.parse_known_args() def do_run(): run(args, unknown) if args.pdb: import pdb pdb.runcall(do_run) elif args.profile: import cProfile cProfile.runctx( 'do_run()', globals=globals(), locals=locals(), filename=args.profile ) else: do_run() if __name__ == '__main__': main()