| // Copyright 2014 The Bazel Authors. All rights reserved. |
| // |
| // Licensed under the Apache License, Version 2.0 (the "License"); |
| // you may not use this file except in compliance with the License. |
| // You may obtain a copy of the License at |
| // |
| // http://www.apache.org/licenses/LICENSE-2.0 |
| // |
| // Unless required by applicable law or agreed to in writing, software |
| // distributed under the License is distributed on an "AS IS" BASIS, |
| // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| // See the License for the specific language governing permissions and |
| // limitations under the License. |
| |
| package com.google.devtools.build.docgen; |
| |
| import com.google.common.collect.ImmutableSet; |
| |
| import java.util.ArrayDeque; |
| import java.util.Deque; |
| import java.util.regex.Matcher; |
| import java.util.regex.Pattern; |
| |
| /** |
| * A utility class to check the generated documentations. |
| */ |
| public class DocCheckerUtils { |
| |
| // TODO(bazel-team): remove elements from this list and clean up the tested documentations. |
| private static final ImmutableSet<String> UNCHECKED_HTML_TAGS = ImmutableSet.<String>of( |
| "br", "li", "ul", "p"); |
| |
| private static final Pattern TAG_PATTERN = Pattern.compile( |
| "<([/]?[a-z0-9_]+)" |
| + "([^>]*)" |
| + ">", |
| Pattern.CASE_INSENSITIVE); |
| |
| private static final Pattern COMMENT_PATTERN = Pattern.compile( |
| "<!--.*?-->", |
| Pattern.CASE_INSENSITIVE); |
| |
| /** |
| * Returns the first unmatched html tag of srcs or null if no such tag exists. |
| * Note that this check is not performed on br, ul, li and p tags. The method also |
| * prints some help in case an unmatched tag is found. The check is performed |
| * inside comments too. |
| */ |
| public static String getFirstUnclosedTagAndPrintHelp(String src) { |
| return getFirstUnclosedTag(src, true); |
| } |
| |
| static String getFirstUnclosedTag(String src) { |
| return getFirstUnclosedTag(src, false); |
| } |
| |
| // TODO(bazel-team): run this on the Skylark docs too. |
| private static String getFirstUnclosedTag(String src, boolean printHelp) { |
| Matcher commentMatcher = COMMENT_PATTERN.matcher(src); |
| src = commentMatcher.replaceAll(""); |
| Matcher tagMatcher = TAG_PATTERN.matcher(src); |
| Deque<String> tagStack = new ArrayDeque<>(); |
| while (tagMatcher.find()) { |
| String tag = tagMatcher.group(1); |
| String rest = tagMatcher.group(2); |
| String strippedTag = tag.substring(1); |
| |
| // Ignoring self closing tags. |
| if (!rest.endsWith("/") |
| // Ignoring unchecked tags. |
| && !UNCHECKED_HTML_TAGS.contains(tag) && !UNCHECKED_HTML_TAGS.contains(strippedTag)) { |
| if (tag.startsWith("/")) { |
| // Closing tag. Removing '/' from the beginning. |
| tag = strippedTag; |
| String lastTag = tagStack.removeLast(); |
| if (!lastTag.equals(tag)) { |
| if (printHelp) { |
| System.err.println( |
| "Unclosed tag: " + lastTag + "\n" |
| + "Trying to close with: " + tag + "\n" |
| + "Stack of open tags: " + tagStack + "\n" |
| + "Last 200 characters:\n" |
| + src.substring(Math.max(tagMatcher.start() - 200, 0), tagMatcher.start())); |
| } |
| return lastTag; |
| } |
| } else { |
| // Starting tag. |
| tagStack.addLast(tag); |
| } |
| } |
| } |
| return null; |
| } |
| } |