blob: a7f2aa7e0d8d107c1c83f83aee003ff2ff8a666a [file] [log] [blame]
fweikert69895ba2022-07-13 04:56:04 -07001# Lint as: python3
2# pylint: disable=g-direct-third-party-import
3# Copyright 2022 The Bazel Authors. All rights reserved.
4#
5# Licensed under the Apache License, Version 2.0 (the "License");
6# you may not use this file except in compliance with the License.
7# You may obtain a copy of the License at
8#
9# http://www.apache.org/licenses/LICENSE-2.0
10#
11# Unless required by applicable law or agreed to in writing, software
12# distributed under the License is distributed on an "AS IS" BASIS,
13# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14# See the License for the specific language governing permissions and
15# limitations under the License.
16"""Module for fixing links in Bazel release docs."""
17import os
18import re
19
20_BASE_URL = "https://bazel.build"
21
22# We need to use regular expressions here since HTML can be embedded in
23# Markdown and Yaml, thus breaking XML parsers. Moreover, our use case is
24# simple, so regex should work (tm).
25_HTML_LINK_PATTERN = re.compile(
26 r"((href|src)\s*=\s*[\"']({})?)/".format(_BASE_URL))
27
28
29def _fix_html_links(content, version):
30 return _HTML_LINK_PATTERN.sub(r"\1/versions/{}/".format(version), content)
31
32
33def _fix_html_metadata(content, version):
34 return content.replace("value=\"/_book.yaml\"",
35 "value=\"/versions/{}/_book.yaml\"".format(version))
36
37
38_MD_LINK_OR_IMAGE_PATTERN = re.compile(
39 r"(\!?\[.*?\]\(({})?)(/.*?)\)".format(_BASE_URL))
40
41
42def _fix_md_links_and_images(content, version):
43 return _MD_LINK_OR_IMAGE_PATTERN.sub(r"\1/versions/{}\3)".format(version),
44 content)
45
46
47_MD_METADATA_PATTERN = re.compile(r"^(Book: )(/.+)$", re.MULTILINE)
48
49
50def _fix_md_metadata(content, version):
51 return _MD_METADATA_PATTERN.sub(r"\1/versions/{}\2".format(version), content)
52
53
Googler468c0562023-05-30 02:31:21 -070054_YAML_PATH_PATTERN = re.compile(
55 r"(((book_|image_)?path|include): ['\"]?)(/.*?)(['\"]?)$", re.MULTILINE
56)
fweikert69895ba2022-07-13 04:56:04 -070057
58_YAML_IGNORE_LIST = frozenset(
59 ["/", "/_project.yaml", "/versions/", "/versions/_toc.yaml"])
60
61
62def _fix_yaml_paths(content, version):
63
64 def sub(m):
Googler468c0562023-05-30 02:31:21 -070065 prefix, path, suffix = m.group(1, 4, 5)
fweikert69895ba2022-07-13 04:56:04 -070066 if path in _YAML_IGNORE_LIST:
67 return m.group(0)
68
69 return "{}/versions/{}{}{}".format(prefix, version, path, suffix)
70
71 return _YAML_PATH_PATTERN.sub(sub, content)
72
73
74_PURE_HTML_FIXES = [_fix_html_links, _fix_html_metadata]
75_PURE_MD_FIXES = [_fix_md_links_and_images, _fix_md_metadata]
76_PURE_YAML_FIXES = [_fix_yaml_paths]
77
78_FIXES = {
79 ".html": _PURE_HTML_FIXES,
80 ".md": _PURE_MD_FIXES + _PURE_HTML_FIXES,
81 ".yaml": _PURE_YAML_FIXES + _PURE_HTML_FIXES,
82}
83
84
85def _get_fixes(path):
86 _, ext = os.path.splitext(path)
87 return _FIXES.get(ext)
88
89
90def can_rewrite(path):
91 """Returns whether links in this file can/should be rewritten.
92
93 Args:
94 path: Path of the file in question.
95
96 Returns:
97 True if the file can/should be rewritten.
98 """
99 return bool(_get_fixes(path))
100
101
102def rewrite_links(path, content, version):
103 """Rewrites links in the given file to point to versioned docs.
104
105 Args:
106 path: Absolute path of the file to be rewritten.
107 content: Content of said file, as text.
108 version: Version of the Bazel release that is being built.
109
110 Returns:
111 The rewritten content of the file, as text. Equal to `content`
112 if no links had to be rewritten.
113 """
114 fixes = _get_fixes(path)
115 if not fixes:
116 raise ValueError(
117 "Cannot rewrite {} due to unsupported file type.".format(path))
118
119 new_content = content
120 for f in fixes:
121 new_content = f(new_content, version)
122
123 return new_content