blob: 7675e53f9240715008fcd17dbe0ea983cd1329ee [file] [log] [blame]
fweikert69895ba2022-07-13 04:56:04 -07001# Lint as: python3
2# pylint: disable=g-direct-third-party-import
3# Copyright 2022 The Bazel Authors. All rights reserved.
4#
5# Licensed under the Apache License, Version 2.0 (the "License");
6# you may not use this file except in compliance with the License.
7# You may obtain a copy of the License at
8#
9# http://www.apache.org/licenses/LICENSE-2.0
10#
11# Unless required by applicable law or agreed to in writing, software
12# distributed under the License is distributed on an "AS IS" BASIS,
13# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14# See the License for the specific language governing permissions and
15# limitations under the License.
16"""Module for fixing links in Bazel release docs."""
17import os
18import re
19
20_BASE_URL = "https://bazel.build"
21
22# We need to use regular expressions here since HTML can be embedded in
23# Markdown and Yaml, thus breaking XML parsers. Moreover, our use case is
24# simple, so regex should work (tm).
25_HTML_LINK_PATTERN = re.compile(
26 r"((href|src)\s*=\s*[\"']({})?)/".format(_BASE_URL))
27
28
29def _fix_html_links(content, version):
30 return _HTML_LINK_PATTERN.sub(r"\1/versions/{}/".format(version), content)
31
32
33def _fix_html_metadata(content, version):
34 return content.replace("value=\"/_book.yaml\"",
35 "value=\"/versions/{}/_book.yaml\"".format(version))
36
37
38_MD_LINK_OR_IMAGE_PATTERN = re.compile(
39 r"(\!?\[.*?\]\(({})?)(/.*?)\)".format(_BASE_URL))
40
41
42def _fix_md_links_and_images(content, version):
43 return _MD_LINK_OR_IMAGE_PATTERN.sub(r"\1/versions/{}\3)".format(version),
44 content)
45
46
47_MD_METADATA_PATTERN = re.compile(r"^(Book: )(/.+)$", re.MULTILINE)
48
49
50def _fix_md_metadata(content, version):
51 return _MD_METADATA_PATTERN.sub(r"\1/versions/{}\2".format(version), content)
52
53
54_YAML_PATH_PATTERN = re.compile(r"((book_|image_)?path: ['\"]?)(/.*?)(['\"]?)$",
55 re.MULTILINE)
56
57_YAML_IGNORE_LIST = frozenset(
58 ["/", "/_project.yaml", "/versions/", "/versions/_toc.yaml"])
59
60
61def _fix_yaml_paths(content, version):
62
63 def sub(m):
64 prefix, path, suffix = m.group(1, 3, 4)
65 if path in _YAML_IGNORE_LIST:
66 return m.group(0)
67
68 return "{}/versions/{}{}{}".format(prefix, version, path, suffix)
69
70 return _YAML_PATH_PATTERN.sub(sub, content)
71
72
73_PURE_HTML_FIXES = [_fix_html_links, _fix_html_metadata]
74_PURE_MD_FIXES = [_fix_md_links_and_images, _fix_md_metadata]
75_PURE_YAML_FIXES = [_fix_yaml_paths]
76
77_FIXES = {
78 ".html": _PURE_HTML_FIXES,
79 ".md": _PURE_MD_FIXES + _PURE_HTML_FIXES,
80 ".yaml": _PURE_YAML_FIXES + _PURE_HTML_FIXES,
81}
82
83
84def _get_fixes(path):
85 _, ext = os.path.splitext(path)
86 return _FIXES.get(ext)
87
88
89def can_rewrite(path):
90 """Returns whether links in this file can/should be rewritten.
91
92 Args:
93 path: Path of the file in question.
94
95 Returns:
96 True if the file can/should be rewritten.
97 """
98 return bool(_get_fixes(path))
99
100
101def rewrite_links(path, content, version):
102 """Rewrites links in the given file to point to versioned docs.
103
104 Args:
105 path: Absolute path of the file to be rewritten.
106 content: Content of said file, as text.
107 version: Version of the Bazel release that is being built.
108
109 Returns:
110 The rewritten content of the file, as text. Equal to `content`
111 if no links had to be rewritten.
112 """
113 fixes = _get_fixes(path)
114 if not fixes:
115 raise ValueError(
116 "Cannot rewrite {} due to unsupported file type.".format(path))
117
118 new_content = content
119 for f in fixes:
120 new_content = f(new_content, version)
121
122 return new_content