blob: ba3ae189be929e859835cb93e2759cf1ec1eaf55 [file] [log] [blame]
// Copyright 2016 The Bazel Authors. All rights reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package com.google.devtools.build.lib.util;
import javax.annotation.Nullable;
/**
* Class that provides functions to do spell checking, i.e. detect typos
* and make suggestions.
*/
public final class SpellChecker {
/**
* Computes the edit distance between two strings. The edit distance is
* the minimum number of insertions, deletions and replacements to
* transform a string into the other string.
*
* maxEditDistance is the maximum distance the function can return. If
* it would be greater, the function returns -1. It is useful for
* speeding up the computations.
*/
public static int editDistance(String s1, String s2, int maxEditDistance) {
// This is the Levenshtein distance, as described here:
// http://en.wikipedia.org/wiki/Levenshtein_distance
//
// We don't need to keep the full matrix. To update a cell, we only
// need top-left, top, and left values. Using a single array is
// sufficient. Top value is still in row[j] from the last iteration.
// Top-left value is stored in 'previous'. Left value is row[j - 1].
if (s1.equals(s2)) {
return 0;
}
// Short-circuit based on string length.
if (Math.abs(s1.length() - s2.length()) > maxEditDistance) {
return -1;
}
int[] row = new int[s2.length() + 1];
for (int i = 0; i <= s2.length(); i++) {
row[i] = i;
}
for (int i = 1; i <= s1.length(); i++) {
row[0] = i;
int bestInTheRow = row[0];
int previous = i - 1;
for (int j = 1; j <= s2.length(); j++) {
int old = row[j];
row[j] = Math.min(
previous + (s1.charAt(i - 1) == s2.charAt(j - 1) ? 0 : 1),
1 + Math.min(row[j - 1], row[j]));
previous = old;
bestInTheRow = Math.min(bestInTheRow, row[j]);
}
if (bestInTheRow > maxEditDistance) {
return -1;
}
}
int result = row[s2.length()];
return result <= maxEditDistance ? result : -1;
}
/**
* Find in words which string is the most similar to input (according to
* the edit distance, ignoring case) - or null if no string is similar
* enough. In case of equality, the first one in words wins.
*/
@Nullable
public static String suggest(String input, Iterable<String> words) {
String best = null;
// Heuristic: the expected number of typos depends on the length of the word.
int bestDistance = Math.min(5, (input.length() + 1) / 2);
input = input.toLowerCase();
for (String candidate : words) {
int d = editDistance(input, candidate.toLowerCase(), bestDistance);
if (d >= 0 && d < bestDistance) {
bestDistance = d;
best = candidate;
}
}
return best;
}
/**
* Return a string to be used at the end of an error message. It is either an empty string, or a
* spelling suggestion, e.g. " (did you mean 'x'?)".
*/
public static String didYouMean(String input, Iterable<String> words) {
String suggestion = suggest(input, words);
if (suggestion == null) {
return "";
} else {
return " (did you mean '" + suggestion + "'?)";
}
}
}