blob: 0388897027c5b93e4e74475744836fb6fed4830a [file] [log] [blame]
// Copyright 2014 The Bazel Authors. All rights reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package com.google.devtools.build.docgen;
import com.google.common.collect.ImmutableSet;
import java.util.ArrayDeque;
import java.util.Deque;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
/**
* A utility class to check the generated documentations.
*/
public class DocCheckerUtils {
// TODO(bazel-team): remove elements from this list and clean up the tested documentations.
private static final ImmutableSet<String> UNCHECKED_HTML_TAGS = ImmutableSet.<String>of(
"br", "li", "ul", "p");
private static final Pattern TAG_PATTERN = Pattern.compile(
"<([/]?[a-z0-9_]+)"
+ "([^>]*)"
+ ">",
Pattern.CASE_INSENSITIVE);
private static final Pattern COMMENT_PATTERN = Pattern.compile(
"<!--.*?-->",
Pattern.CASE_INSENSITIVE);
/**
* Returns the first unmatched html tag of srcs or null if no such tag exists.
* Note that this check is not performed on br, ul, li and p tags. The method also
* prints some help in case an unmatched tag is found. The check is performed
* inside comments too.
*/
public static String getFirstUnclosedTagAndPrintHelp(String src) {
return getFirstUnclosedTag(src, true);
}
static String getFirstUnclosedTag(String src) {
return getFirstUnclosedTag(src, false);
}
// TODO(bazel-team): run this on the Skylark docs too.
private static String getFirstUnclosedTag(String src, boolean printHelp) {
Matcher commentMatcher = COMMENT_PATTERN.matcher(src);
src = commentMatcher.replaceAll("");
Matcher tagMatcher = TAG_PATTERN.matcher(src);
Deque<String> tagStack = new ArrayDeque<>();
while (tagMatcher.find()) {
String tag = tagMatcher.group(1);
String rest = tagMatcher.group(2);
String strippedTag = tag.substring(1);
// Ignoring self closing tags.
if (!rest.endsWith("/")
// Ignoring unchecked tags.
&& !UNCHECKED_HTML_TAGS.contains(tag) && !UNCHECKED_HTML_TAGS.contains(strippedTag)) {
if (tag.startsWith("/")) {
// Closing tag. Removing '/' from the beginning.
tag = strippedTag;
String lastTag = tagStack.removeLast();
if (!lastTag.equals(tag)) {
if (printHelp) {
System.err.println(
"Unclosed tag: " + lastTag + "\n"
+ "Trying to close with: " + tag + "\n"
+ "Stack of open tags: " + tagStack + "\n"
+ "Last 200 characters:\n"
+ src.substring(Math.max(tagMatcher.start() - 200, 0), tagMatcher.start()));
}
return lastTag;
}
} else {
// Starting tag.
tagStack.addLast(tag);
}
}
}
return null;
}
}