blob: e4571457ec77a3395a7d6dfee8c83666fe9d9c0c [file] [log] [blame]
fweikert69895ba2022-07-13 04:56:04 -07001# Lint as: python3
2# pylint: disable=g-direct-third-party-import
3# Copyright 2022 The Bazel Authors. All rights reserved.
4#
5# Licensed under the Apache License, Version 2.0 (the "License");
6# you may not use this file except in compliance with the License.
7# You may obtain a copy of the License at
8#
9# http://www.apache.org/licenses/LICENSE-2.0
10#
11# Unless required by applicable law or agreed to in writing, software
12# distributed under the License is distributed on an "AS IS" BASIS,
13# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14# See the License for the specific language governing permissions and
15# limitations under the License.
16"""Module for fixing links in Bazel release docs."""
17import os
18import re
19
20_BASE_URL = "https://bazel.build"
21
22# We need to use regular expressions here since HTML can be embedded in
23# Markdown and Yaml, thus breaking XML parsers. Moreover, our use case is
24# simple, so regex should work (tm).
25_HTML_LINK_PATTERN = re.compile(
26 r"((href|src)\s*=\s*[\"']({})?)/".format(_BASE_URL))
27
28
Fabian Meumertzheim25627072024-06-18 05:58:09 -070029def _fix_html_links(content, rel_path, version):
30 del rel_path # unused
fweikert69895ba2022-07-13 04:56:04 -070031 return _HTML_LINK_PATTERN.sub(r"\1/versions/{}/".format(version), content)
32
33
Fabian Meumertzheim25627072024-06-18 05:58:09 -070034def _fix_html_metadata(content, rel_path, version):
35 del rel_path # unused
fweikert69895ba2022-07-13 04:56:04 -070036 return content.replace("value=\"/_book.yaml\"",
37 "value=\"/versions/{}/_book.yaml\"".format(version))
38
39
Fabian Meumertzheim25627072024-06-18 05:58:09 -070040def _set_header_vars(content, rel_path, version):
41 return content.replace(
42 """{% include "_buttons.html" %}""",
43 f"""{{% dynamic setvar version "{version}" %}}
44{{% dynamic setvar original_path "/{os.path.splitext(rel_path)[0]}" %}}
45{{% include "_buttons.html" %}}""",
46 )
47
48
fweikert69895ba2022-07-13 04:56:04 -070049_MD_LINK_OR_IMAGE_PATTERN = re.compile(
50 r"(\!?\[.*?\]\(({})?)(/.*?)\)".format(_BASE_URL))
51
52
Fabian Meumertzheim25627072024-06-18 05:58:09 -070053def _fix_md_links_and_images(content, rel_path, version):
54 del rel_path # unused
fweikert69895ba2022-07-13 04:56:04 -070055 return _MD_LINK_OR_IMAGE_PATTERN.sub(r"\1/versions/{}\3)".format(version),
56 content)
57
58
59_MD_METADATA_PATTERN = re.compile(r"^(Book: )(/.+)$", re.MULTILINE)
60
61
Fabian Meumertzheim25627072024-06-18 05:58:09 -070062def _fix_md_metadata(content, rel_path, version):
63 del rel_path # unused
fweikert69895ba2022-07-13 04:56:04 -070064 return _MD_METADATA_PATTERN.sub(r"\1/versions/{}\2".format(version), content)
65
66
Googler468c0562023-05-30 02:31:21 -070067_YAML_PATH_PATTERN = re.compile(
68 r"(((book_|image_)?path|include): ['\"]?)(/.*?)(['\"]?)$", re.MULTILINE
69)
fweikert69895ba2022-07-13 04:56:04 -070070
71_YAML_IGNORE_LIST = frozenset(
72 ["/", "/_project.yaml", "/versions/", "/versions/_toc.yaml"])
73
74
Fabian Meumertzheim25627072024-06-18 05:58:09 -070075def _fix_yaml_paths(content, rel_path, version):
76 del rel_path # unused
fweikert69895ba2022-07-13 04:56:04 -070077 def sub(m):
Googler468c0562023-05-30 02:31:21 -070078 prefix, path, suffix = m.group(1, 4, 5)
fweikert69895ba2022-07-13 04:56:04 -070079 if path in _YAML_IGNORE_LIST:
80 return m.group(0)
81
82 return "{}/versions/{}{}{}".format(prefix, version, path, suffix)
83
84 return _YAML_PATH_PATTERN.sub(sub, content)
85
86
87_PURE_HTML_FIXES = [_fix_html_links, _fix_html_metadata]
Fabian Meumertzheim25627072024-06-18 05:58:09 -070088_PURE_MD_FIXES = [_fix_md_links_and_images, _fix_md_metadata, _set_header_vars]
fweikert69895ba2022-07-13 04:56:04 -070089_PURE_YAML_FIXES = [_fix_yaml_paths]
90
91_FIXES = {
92 ".html": _PURE_HTML_FIXES,
93 ".md": _PURE_MD_FIXES + _PURE_HTML_FIXES,
94 ".yaml": _PURE_YAML_FIXES + _PURE_HTML_FIXES,
95}
96
97
98def _get_fixes(path):
99 _, ext = os.path.splitext(path)
100 return _FIXES.get(ext)
101
102
103def can_rewrite(path):
104 """Returns whether links in this file can/should be rewritten.
105
106 Args:
107 path: Path of the file in question.
108
109 Returns:
110 True if the file can/should be rewritten.
111 """
112 return bool(_get_fixes(path))
113
114
Fabian Meumertzheim25627072024-06-18 05:58:09 -0700115def rewrite_links(path, content, rel_path, version):
fweikert69895ba2022-07-13 04:56:04 -0700116 """Rewrites links in the given file to point to versioned docs.
117
118 Args:
119 path: Absolute path of the file to be rewritten.
120 content: Content of said file, as text.
Fabian Meumertzheim25627072024-06-18 05:58:09 -0700121 rel_path: Relative path of the file to be rewritten.
fweikert69895ba2022-07-13 04:56:04 -0700122 version: Version of the Bazel release that is being built.
123
124 Returns:
125 The rewritten content of the file, as text. Equal to `content`
126 if no links had to be rewritten.
127 """
128 fixes = _get_fixes(path)
129 if not fixes:
130 raise ValueError(
131 "Cannot rewrite {} due to unsupported file type.".format(path))
132
133 new_content = content
134 for f in fixes:
Fabian Meumertzheim25627072024-06-18 05:58:09 -0700135 new_content = f(new_content, rel_path, version)
fweikert69895ba2022-07-13 04:56:04 -0700136
137 return new_content