Add script to build release documentation
The script builds a snapshot of the documentation, based on the state in the release branch.
Next, it rewrites all links to point to the versioned documentation for that release.
Finally, it creates an archive of all documentation files.
Closes #15686.
PiperOrigin-RevId: 460684222
Change-Id: Ic466e9e72cc862079f0551703fc2a16701888a2e
diff --git a/scripts/docs/BUILD b/scripts/docs/BUILD
new file mode 100644
index 0000000..74bdaad
--- /dev/null
+++ b/scripts/docs/BUILD
@@ -0,0 +1,81 @@
+load("//tools/python:private/defs.bzl", "py_binary", "py_library")
+
+BUILD_SCM_REV_CMD = "$$(grep BUILD_SCM_REVISION bazel-out/volatile-status.txt | sed 's/^BUILD_SCM_REVISION //')"
+
+# Requires --config=docs
+genrule(
+ name = "gen_release_docs",
+ srcs = [
+ ":new_toc.yaml",
+ "//site/en:docs",
+ "//src/main/java/com/google/devtools/build/lib:reference-docs.zip",
+ ],
+ outs = [
+ "release_docs.zip",
+ ],
+ cmd = "$(location :create_release_docs)" +
+ " --version=" + BUILD_SCM_REV_CMD +
+ " --toc_path=$(location :new_toc.yaml)" +
+ " --narrative_docs_path=$(location //site/en:docs)" +
+ " --reference_docs_path=$(location //src/main/java/com/google/devtools/build/lib:reference-docs.zip)" +
+ " --output_path=$(OUTS)",
+ stamp = 1,
+ tools = [
+ ":create_release_docs",
+ ],
+)
+
+# Requires --config=docs
+genrule(
+ name = "gen_new_toc",
+ srcs = [
+ "//site/en:versions/_toc.yaml",
+ ],
+ outs = ["new_toc.yaml"],
+ cmd = "$(location //src/main/java/com/google/devtools/build/docgen/release:toc_updater)" +
+ " -i $(location //site/en:versions/_toc.yaml)" +
+ " -o $(OUTS)" +
+ " -v " + BUILD_SCM_REV_CMD,
+ stamp = 1,
+ tools = [
+ "//src/main/java/com/google/devtools/build/docgen/release:toc_updater",
+ ],
+)
+
+py_binary(
+ name = "create_release_docs",
+ srcs = ["create_release_docs.py"],
+ deps = [
+ ":rewriter",
+ "//third_party/py/abseil",
+ ],
+)
+
+py_library(
+ name = "rewriter",
+ srcs = ["rewriter.py"],
+)
+
+py_test(
+ name = "rewriter_test",
+ srcs = ["rewriter_test.py"],
+ data = [":testdata"],
+ deps = [
+ ":rewriter",
+ "//third_party/py/abseil",
+ ],
+)
+
+filegroup(
+ name = "srcs",
+ srcs = glob(["**"]),
+ visibility = ["//scripts:__pkg__"],
+)
+
+filegroup(
+ name = "testdata",
+ srcs = glob(["testdata/**"]),
+ visibility = [
+ ":__pkg__",
+ ],
+)
diff --git a/scripts/docs/README.md b/scripts/docs/README.md
new file mode 100644
index 0000000..73e2517
--- /dev/null
+++ b/scripts/docs/README.md
@@ -0,0 +1,21 @@
+# Bazel Release Documentation
+
+This directory contains scripts to build the versioned documentation for a new Bazel release.
+
+## Build release documentation
+
+You can build the release documentation by running this command from within a release branch:
+
+```
+bazel build //scripts/docs:gen_release_docs --config=docs
+```
+
+This is only necessary for testing, though. There is a separate pipeline that handles this task for actual Bazel releases.
+
+## Test scripts
+
+You can test some of the scripts by running the following command:
+
+```
+bazel test --test_output=streamed //scripts/docs:rewriter_test
+```
diff --git a/scripts/docs/create_release_docs.py b/scripts/docs/create_release_docs.py
new file mode 100644
index 0000000..3aa1c23
--- /dev/null
+++ b/scripts/docs/create_release_docs.py
@@ -0,0 +1,193 @@
+# Lint as: python3
+# pylint: disable=g-direct-third-party-import
+# Copyright 2022 The Bazel Authors. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""A tool for building the documentation for a Bazel release."""
+import os
+import shutil
+import sys
+import tarfile
+import tempfile
+import zipfile
+
+from absl import app
+from absl import flags
+
+from scripts.docs import rewriter
+
+FLAGS = flags.FLAGS
+
+flags.DEFINE_string("version", None, "Name of the Bazel release.")
+flags.DEFINE_string(
+ "toc_path",
+ None,
+ "Path to the _toc.yaml file that contains the table of contents for the versions menu.",
+)
+flags.DEFINE_string(
+ "narrative_docs_path",
+ None,
+ "Path of the archive (zip or tar) that contains the narrative documentation.",
+)
+flags.DEFINE_string(
+ "reference_docs_path",
+ None,
+ "Path of the archive (zip or tar) that contains the reference documentation.",
+)
+flags.DEFINE_string(
+ "output_path", None,
+ "Location where the zip'ed documentation should be written to.")
+
+_ARCHIVE_FUNCTIONS = {".tar": tarfile.open, ".zip": zipfile.ZipFile}
+
+
+def validate_flag(name):
+ """Ensures that a flag is set, and returns its value (if yes).
+
+ This function exits with an error if the flag was not set.
+
+ Args:
+ name: Name of the flag.
+
+ Returns:
+ The value of the flag, if set.
+ """
+ value = getattr(FLAGS, name, None)
+ if value:
+ return value
+
+ print("Missing --{} flag.".format(name), file=sys.stderr)
+ exit(1)
+
+
+def create_docs_tree(version, toc_path, narrative_docs_path,
+ reference_docs_path):
+ """Creates a directory tree containing the docs for the Bazel version.
+
+ Args:
+ version: Version of this Bazel release.
+ toc_path: Absolute path to the _toc.yaml file that lists the most recent
+ Bazel versions.
+ narrative_docs_path: Absolute path of an archive that contains the narrative
+ documentation (can be .zip or .tar).
+ reference_docs_path: Absolute path of an archive that contains the reference
+ documentation (can be .zip or .tar).
+
+ Returns:
+ The absolute paths of the root of the directory tree and of
+ the final _toc.yaml file.
+ """
+ root_dir = tempfile.mkdtemp()
+
+ versions_dir = os.path.join(root_dir, "versions")
+ os.makedirs(versions_dir)
+
+ toc_dest_path = os.path.join(versions_dir, "_toc.yaml")
+ shutil.copyfile(toc_path, toc_dest_path)
+
+ release_dir = os.path.join(versions_dir, version)
+ os.makedirs(release_dir)
+
+ try_extract(narrative_docs_path, release_dir)
+ try_extract(reference_docs_path, release_dir)
+
+ return root_dir, toc_dest_path
+
+
+def try_extract(archive_path, output_dir):
+ """Tries to extract the given archive into the given directory.
+
+ This function will raise an error if the archive type is not supported.
+
+ Args:
+ archive_path: Absolute path of an archive that should be extracted. Can be
+ .tar or .zip.
+ output_dir: Absolute path of the directory into which the archive should be
+ extracted
+
+ Raises:
+ ValueError: If the archive has an unsupported file type.
+ """
+ _, ext = os.path.splitext(archive_path)
+ open_func = _ARCHIVE_FUNCTIONS.get(ext)
+ if not open_func:
+ raise ValueError("File {}: Invalid file extension '{}'. Allowed: {}".format(
+ archive_path, ext, _ARCHIVE_FUNCTIONS.keys.join(", ")))
+
+ with open_func(archive_path, "r") as archive:
+ archive.extractall(output_dir)
+
+
+def build_archive(version, root_dir, toc_path, output_path):
+ """Builds a documentation archive for the given Bazel release.
+
+ This function reads all documentation files from the tree rooted in root_dir,
+ fixes all links so that they point at versioned files, then builds a zip
+ archive of all files.
+
+ Args:
+ version: Version of the Bazel release whose documentation is being built.
+ root_dir: Absolute path of the directory that contains the documentation
+ tree.
+ toc_path: Absolute path of the _toc.yaml file.
+ output_path: Absolute path where the archive should be written to.
+ """
+ with zipfile.ZipFile(output_path, "w") as archive:
+ for root, _, files in os.walk(root_dir):
+ for f in files:
+ src = os.path.join(root, f)
+ dest = src[len(root_dir) + 1:]
+
+ if src != toc_path and rewriter.can_rewrite(src):
+ archive.writestr(dest, get_versioned_content(src, version))
+ else:
+ archive.write(src, dest)
+
+
+def get_versioned_content(path, version):
+ """Rewrites links in the given file to point at versioned docs.
+
+ Args:
+ path: Absolute path of the file that should be rewritten.
+ version: Version of the Bazel release whose documentation is being built.
+
+ Returns:
+ The content of the given file, with rewritten links.
+ """
+ with open(path, "rt", encoding="utf-8") as f:
+ content = f.read()
+
+ return rewriter.rewrite_links(path, content, version)
+
+
+def main(unused_argv):
+ version = validate_flag("version")
+ output_path = validate_flag("output_path")
+ root_dir, toc_path = create_docs_tree(
+ version=version,
+ toc_path=validate_flag("toc_path"),
+ narrative_docs_path=validate_flag("narrative_docs_path"),
+ reference_docs_path=validate_flag("reference_docs_path"),
+ )
+
+ build_archive(
+ version=version,
+ root_dir=root_dir,
+ toc_path=toc_path,
+ output_path=output_path,
+ )
+
+
+if __name__ == "__main__":
+ FLAGS(sys.argv)
+ app.run(main)
diff --git a/scripts/docs/get_workspace_status.sh b/scripts/docs/get_workspace_status.sh
new file mode 100755
index 0000000..9c8fbcb
--- /dev/null
+++ b/scripts/docs/get_workspace_status.sh
@@ -0,0 +1,23 @@
+#!/bin/bash
+#
+# Copyright 2022 The Bazel Authors. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+RELEASE_NAME=$(source scripts/release/common.sh; get_full_release_name)
+
+if [[ -z "$RELEASE_NAME" ]]; then
+ echo BUILD_SCM_REVISION UNSAFE_"$(git rev-parse --abbrev-ref HEAD)"
+else
+ echo "BUILD_SCM_REVISION $RELEASE_NAME"
+fi
diff --git a/scripts/docs/rewriter.py b/scripts/docs/rewriter.py
new file mode 100644
index 0000000..7675e53
--- /dev/null
+++ b/scripts/docs/rewriter.py
@@ -0,0 +1,122 @@
+# Lint as: python3
+# pylint: disable=g-direct-third-party-import
+# Copyright 2022 The Bazel Authors. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""Module for fixing links in Bazel release docs."""
+import os
+import re
+
+_BASE_URL = "https://bazel.build"
+
+# We need to use regular expressions here since HTML can be embedded in
+# Markdown and Yaml, thus breaking XML parsers. Moreover, our use case is
+# simple, so regex should work (tm).
+_HTML_LINK_PATTERN = re.compile(
+ r"((href|src)\s*=\s*[\"']({})?)/".format(_BASE_URL))
+
+
+def _fix_html_links(content, version):
+ return _HTML_LINK_PATTERN.sub(r"\1/versions/{}/".format(version), content)
+
+
+def _fix_html_metadata(content, version):
+ return content.replace("value=\"/_book.yaml\"",
+ "value=\"/versions/{}/_book.yaml\"".format(version))
+
+
+_MD_LINK_OR_IMAGE_PATTERN = re.compile(
+ r"(\!?\[.*?\]\(({})?)(/.*?)\)".format(_BASE_URL))
+
+
+def _fix_md_links_and_images(content, version):
+ return _MD_LINK_OR_IMAGE_PATTERN.sub(r"\1/versions/{}\3)".format(version),
+ content)
+
+
+_MD_METADATA_PATTERN = re.compile(r"^(Book: )(/.+)$", re.MULTILINE)
+
+
+def _fix_md_metadata(content, version):
+ return _MD_METADATA_PATTERN.sub(r"\1/versions/{}\2".format(version), content)
+
+
+_YAML_PATH_PATTERN = re.compile(r"((book_|image_)?path: ['\"]?)(/.*?)(['\"]?)$",
+ re.MULTILINE)
+
+_YAML_IGNORE_LIST = frozenset(
+ ["/", "/_project.yaml", "/versions/", "/versions/_toc.yaml"])
+
+
+def _fix_yaml_paths(content, version):
+
+ def sub(m):
+ prefix, path, suffix = m.group(1, 3, 4)
+ if path in _YAML_IGNORE_LIST:
+ return m.group(0)
+
+ return "{}/versions/{}{}{}".format(prefix, version, path, suffix)
+
+ return _YAML_PATH_PATTERN.sub(sub, content)
+
+
+_PURE_HTML_FIXES = [_fix_html_links, _fix_html_metadata]
+_PURE_MD_FIXES = [_fix_md_links_and_images, _fix_md_metadata]
+_PURE_YAML_FIXES = [_fix_yaml_paths]
+
+_FIXES = {
+ ".html": _PURE_HTML_FIXES,
+ ".md": _PURE_MD_FIXES + _PURE_HTML_FIXES,
+ ".yaml": _PURE_YAML_FIXES + _PURE_HTML_FIXES,
+}
+
+
+def _get_fixes(path):
+ _, ext = os.path.splitext(path)
+ return _FIXES.get(ext)
+
+
+def can_rewrite(path):
+ """Returns whether links in this file can/should be rewritten.
+
+ Args:
+ path: Path of the file in question.
+
+ Returns:
+ True if the file can/should be rewritten.
+ """
+ return bool(_get_fixes(path))
+
+
+def rewrite_links(path, content, version):
+ """Rewrites links in the given file to point to versioned docs.
+
+ Args:
+ path: Absolute path of the file to be rewritten.
+ content: Content of said file, as text.
+ version: Version of the Bazel release that is being built.
+
+ Returns:
+ The rewritten content of the file, as text. Equal to `content`
+ if no links had to be rewritten.
+ """
+ fixes = _get_fixes(path)
+ if not fixes:
+ raise ValueError(
+ "Cannot rewrite {} due to unsupported file type.".format(path))
+
+ new_content = content
+ for f in fixes:
+ new_content = f(new_content, version)
+
+ return new_content
diff --git a/scripts/docs/rewriter_test.py b/scripts/docs/rewriter_test.py
new file mode 100644
index 0000000..731eeeb
--- /dev/null
+++ b/scripts/docs/rewriter_test.py
@@ -0,0 +1,60 @@
+# Copyright 2022 The Bazel Authors. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import os
+import unittest
+from absl.testing import parameterized
+from scripts.docs import rewriter
+
+
+class CanRewriteTest(parameterized.TestCase):
+
+ @parameterized.parameters(("/file/doc.md", True), ("/path/_book.yaml", True),
+ ("http://www.bazel.build/foo.html", True),
+ ("/dir/test.txt", False),
+ ("/images/aspects.svg", False))
+ def testCanRewrite(self, path, expected_can_rewrite):
+ self.assertEqual(rewriter.can_rewrite(path), expected_can_rewrite)
+
+
+def read_data_file(basename, in_or_out_fragment):
+ path = os.path.join(
+ os.getenv("TEST_SRCDIR"), "io_bazel/scripts/docs/testdata",
+ in_or_out_fragment, basename)
+ with open(path, "rt", encoding="utf-8") as f:
+ return path, f.read()
+
+
+class RewriteLinksTest(parameterized.TestCase):
+
+ @parameterized.parameters(("_book.yaml"), ("doc.md"),
+ ("markdown_with_html.md"), ("site.html"),
+ ("yaml_with_html.yaml"))
+ def testRewrite(self, basename):
+ input_path, content = read_data_file(basename, "input")
+ _, version = read_data_file("VERSION", "input")
+
+ actual = rewriter.rewrite_links(input_path, content, version)
+
+ _, expected = read_data_file(basename, "expected_output")
+
+ self.assertEqual(actual, expected)
+
+
+if __name__ == "__main__":
+ unittest.main()
diff --git a/scripts/docs/testdata/expected_output/_book.yaml b/scripts/docs/testdata/expected_output/_book.yaml
new file mode 100644
index 0000000..4860928
--- /dev/null
+++ b/scripts/docs/testdata/expected_output/_book.yaml
@@ -0,0 +1,35 @@
+upper_tabs:
+- name: Versions
+ menu:
+ - column:
+ - links:
+ - include: /versions/_toc.yaml
+ - column:
+ - links:
+ - label: "Nightly"
+ path: /
+ - label: "More…"
+ path: /versions/
+- name: Getting started
+ heading: Getting started with Bazel
+ lower_tabs:
+ other:
+ - name: Why Bazel?
+ contents:
+ - title: Overview
+ path: /versions/6.6.6/start/
+ - title: Intro to Bazel
+ path: /versions/6.6.6/start/bazel-intro
+ - title: Bazel vision
+ path: /versions/6.6.6/start/bazel-vision
+ - title: Get started
+ path: /versions/6.6.6/start/getting-started
+ - title: FAQ
+ path: /versions/6.6.6/faq
+ - name: Releases & Versions
+ contents:
+ - heading: Versions
+ - title: Choose a version
+ path: /versions/
+ - title: Update versions with Bazelisk
+ path: /versions/6.6.6/versions/updating-bazel
diff --git a/scripts/docs/testdata/expected_output/doc.md b/scripts/docs/testdata/expected_output/doc.md
new file mode 100644
index 0000000..fcee50a
--- /dev/null
+++ b/scripts/docs/testdata/expected_output/doc.md
@@ -0,0 +1,14 @@
+Project: /_project.yaml
+Book: /versions/6.6.6/_book.yaml
+
+# Configurations
+
+A build setting is a single piece of [configuration](/versions/6.6.6/rules/rules#configurations) information.
+
+Like all rules, build setting rules have [implementation functions](https://bazel.build/versions/6.6.6/rules/rules#implementation-function).
+
+In Starlark, transitions are defined much like rules, with a defining
+`transition()` [function](lib/transition#transition) and an implementation function.
+
+See [Accessing attributes with transitions](#accessing-attributes-with-transitions)
+for how to read these keys.
diff --git a/scripts/docs/testdata/expected_output/markdown_with_html.md b/scripts/docs/testdata/expected_output/markdown_with_html.md
new file mode 100644
index 0000000..4a21336
--- /dev/null
+++ b/scripts/docs/testdata/expected_output/markdown_with_html.md
@@ -0,0 +1,14 @@
+Project: /_project.yaml
+Book: /versions/6.6.6/_book.yaml
+
+Lorem ipsum [short link](/versions/6.6.6/foo/bar). Or rather a [long link](https://bazel.build/versions/6.6.6/foo/bar)?
+
+
+
+**Figure 1.** Scalability graph.
+
+Please ignore this [relative link](relative/link).
+
+This might be a <a href="/versions/6.6.6/foo/bar">test</a>,
+
+<img src="https://bazel.build/versions/6.6.6/images/test.jpg">
diff --git a/scripts/docs/testdata/expected_output/site.html b/scripts/docs/testdata/expected_output/site.html
new file mode 100644
index 0000000..5c26313
--- /dev/null
+++ b/scripts/docs/testdata/expected_output/site.html
@@ -0,0 +1,20 @@
+<html devsite>
+<head>
+ <meta name="project_path" value="/_project.yaml">
+ <meta name="book_path" value="/versions/6.6.6/_book.yaml">
+</head>
+<body>
+
+<h1 class="page-title">Command-Line Reference</h1>
+
+See the <a href="/versions/6.6.6/docs/build#specifying-build-targets">User's Guide</a> for the
+target patterns syntax.
+<a href="https://bazel.build/versions/6.6.6/docs/build#specifying-build-targets">This</a> is the same link, btw.
+
+Please ignore the <a href="relative/link">relative link</a> and external links such as <a href="https://github.com/bazelbuild/bazel">GitHub</a>.
+
+<img alt="wow" src = "/versions/6.6.6/images/great.png"/>
+<img src="https://bazel.build/versions/6.6.6/images/greater.jpg" alt=""/>
+
+</body>
+</html>
diff --git a/scripts/docs/testdata/expected_output/yaml_with_html.yaml b/scripts/docs/testdata/expected_output/yaml_with_html.yaml
new file mode 100644
index 0000000..4dad62b
--- /dev/null
+++ b/scripts/docs/testdata/expected_output/yaml_with_html.yaml
@@ -0,0 +1,17 @@
+book_path: /versions/6.6.6/_book.yaml
+project_path: /_project.yaml
+title: Reference
+landing_page:
+ nav: left
+ rows:
+ - background: BAZEL_THEME
+ options:
+ - hero
+ - description-50
+ - no-image-background
+ - padding-large
+ items:
+ - image_path: "/versions/6.6.6/images/placeholder_960.png"
+ heading: Reference
+ description: >
+ Look up commands, queries, and terminology necessary to working with <a href="/versions/6.6.6/guide">Bazel</a>.
\ No newline at end of file
diff --git a/scripts/docs/testdata/input/VERSION b/scripts/docs/testdata/input/VERSION
new file mode 100644
index 0000000..b0c4289
--- /dev/null
+++ b/scripts/docs/testdata/input/VERSION
@@ -0,0 +1 @@
+6.6.6
\ No newline at end of file
diff --git a/scripts/docs/testdata/input/_book.yaml b/scripts/docs/testdata/input/_book.yaml
new file mode 100644
index 0000000..5153515
--- /dev/null
+++ b/scripts/docs/testdata/input/_book.yaml
@@ -0,0 +1,35 @@
+upper_tabs:
+- name: Versions
+ menu:
+ - column:
+ - links:
+ - include: /versions/_toc.yaml
+ - column:
+ - links:
+ - label: "Nightly"
+ path: /
+ - label: "More…"
+ path: /versions/
+- name: Getting started
+ heading: Getting started with Bazel
+ lower_tabs:
+ other:
+ - name: Why Bazel?
+ contents:
+ - title: Overview
+ path: /start/
+ - title: Intro to Bazel
+ path: /start/bazel-intro
+ - title: Bazel vision
+ path: /start/bazel-vision
+ - title: Get started
+ path: /start/getting-started
+ - title: FAQ
+ path: /faq
+ - name: Releases & Versions
+ contents:
+ - heading: Versions
+ - title: Choose a version
+ path: /versions/
+ - title: Update versions with Bazelisk
+ path: /versions/updating-bazel
diff --git a/scripts/docs/testdata/input/doc.md b/scripts/docs/testdata/input/doc.md
new file mode 100644
index 0000000..e86b57e
--- /dev/null
+++ b/scripts/docs/testdata/input/doc.md
@@ -0,0 +1,14 @@
+Project: /_project.yaml
+Book: /_book.yaml
+
+# Configurations
+
+A build setting is a single piece of [configuration](/rules/rules#configurations) information.
+
+Like all rules, build setting rules have [implementation functions](https://bazel.build/rules/rules#implementation-function).
+
+In Starlark, transitions are defined much like rules, with a defining
+`transition()` [function](lib/transition#transition) and an implementation function.
+
+See [Accessing attributes with transitions](#accessing-attributes-with-transitions)
+for how to read these keys.
diff --git a/scripts/docs/testdata/input/markdown_with_html.md b/scripts/docs/testdata/input/markdown_with_html.md
new file mode 100644
index 0000000..7cf8c0f
--- /dev/null
+++ b/scripts/docs/testdata/input/markdown_with_html.md
@@ -0,0 +1,14 @@
+Project: /_project.yaml
+Book: /_book.yaml
+
+Lorem ipsum [short link](/foo/bar). Or rather a [long link](https://bazel.build/foo/bar)?
+
+
+
+**Figure 1.** Scalability graph.
+
+Please ignore this [relative link](relative/link).
+
+This might be a <a href="/foo/bar">test</a>,
+
+<img src="https://bazel.build/images/test.jpg">
diff --git a/scripts/docs/testdata/input/site.html b/scripts/docs/testdata/input/site.html
new file mode 100644
index 0000000..de4c3a2
--- /dev/null
+++ b/scripts/docs/testdata/input/site.html
@@ -0,0 +1,20 @@
+<html devsite>
+<head>
+ <meta name="project_path" value="/_project.yaml">
+ <meta name="book_path" value="/_book.yaml">
+</head>
+<body>
+
+<h1 class="page-title">Command-Line Reference</h1>
+
+See the <a href="/docs/build#specifying-build-targets">User's Guide</a> for the
+target patterns syntax.
+<a href="https://bazel.build/docs/build#specifying-build-targets">This</a> is the same link, btw.
+
+Please ignore the <a href="relative/link">relative link</a> and external links such as <a href="https://github.com/bazelbuild/bazel">GitHub</a>.
+
+<img alt="wow" src = "/images/great.png"/>
+<img src="https://bazel.build/images/greater.jpg" alt=""/>
+
+</body>
+</html>
diff --git a/scripts/docs/testdata/input/yaml_with_html.yaml b/scripts/docs/testdata/input/yaml_with_html.yaml
new file mode 100644
index 0000000..e222d8e
--- /dev/null
+++ b/scripts/docs/testdata/input/yaml_with_html.yaml
@@ -0,0 +1,17 @@
+book_path: /_book.yaml
+project_path: /_project.yaml
+title: Reference
+landing_page:
+ nav: left
+ rows:
+ - background: BAZEL_THEME
+ options:
+ - hero
+ - description-50
+ - no-image-background
+ - padding-large
+ items:
+ - image_path: "/images/placeholder_960.png"
+ heading: Reference
+ description: >
+ Look up commands, queries, and terminology necessary to working with <a href="/guide">Bazel</a>.
\ No newline at end of file