rustdoc: compute maximum Levenshtein distance based on the query

The heuristic is pretty close to the name resolver.

Fixes #103357
This commit is contained in:
Michael Howell 2022-10-28 21:54:09 -07:00
parent 5ce39f42bd
commit e09e6df787
3 changed files with 84 additions and 70 deletions

View file

@ -112,7 +112,6 @@ function levenshtein(s1, s2) {
}
function initSearch(rawSearchIndex) {
const MAX_LEV_DISTANCE = 3;
const MAX_RESULTS = 200;
const NO_TYPE_FILTER = -1;
/**
@ -897,13 +896,13 @@ function initSearch(rawSearchIndex) {
* @param {QueryElement} elem - The element from the parsed query.
* @param {integer} defaultLev - This is the value to return in case there are no generics.
*
* @return {integer} - Returns the best match (if any) or `MAX_LEV_DISTANCE + 1`.
* @return {integer} - Returns the best match (if any) or `maxLevDistance + 1`.
*/
function checkGenerics(row, elem, defaultLev) {
function checkGenerics(row, elem, defaultLev, maxLevDistance) {
if (row.generics.length === 0) {
return elem.generics.length === 0 ? defaultLev : MAX_LEV_DISTANCE + 1;
return elem.generics.length === 0 ? defaultLev : maxLevDistance + 1;
} else if (row.generics.length > 0 && row.generics[0].name === null) {
return checkGenerics(row.generics[0], elem, defaultLev);
return checkGenerics(row.generics[0], elem, defaultLev, maxLevDistance);
}
// The names match, but we need to be sure that all generics kinda
// match as well.
@ -914,8 +913,8 @@ function initSearch(rawSearchIndex) {
elem_name = entry.name;
if (elem_name === "") {
// Pure generic, needs to check into it.
if (checkGenerics(entry, elem, MAX_LEV_DISTANCE + 1) !== 0) {
return MAX_LEV_DISTANCE + 1;
if (checkGenerics(entry, elem, maxLevDistance + 1, maxLevDistance) !== 0) {
return maxLevDistance + 1;
}
continue;
}
@ -942,7 +941,7 @@ function initSearch(rawSearchIndex) {
}
}
if (match === null) {
return MAX_LEV_DISTANCE + 1;
return maxLevDistance + 1;
}
elems[match] -= 1;
if (elems[match] === 0) {
@ -951,7 +950,7 @@ function initSearch(rawSearchIndex) {
}
return 0;
}
return MAX_LEV_DISTANCE + 1;
return maxLevDistance + 1;
}
/**
@ -963,10 +962,10 @@ function initSearch(rawSearchIndex) {
*
* @return {integer} - Returns a Levenshtein distance to the best match.
*/
function checkIfInGenerics(row, elem) {
let lev = MAX_LEV_DISTANCE + 1;
function checkIfInGenerics(row, elem, maxLevDistance) {
let lev = maxLevDistance + 1;
for (const entry of row.generics) {
lev = Math.min(checkType(entry, elem, true), lev);
lev = Math.min(checkType(entry, elem, true, maxLevDistance), lev);
if (lev === 0) {
break;
}
@ -983,15 +982,15 @@ function initSearch(rawSearchIndex) {
* @param {boolean} literalSearch
*
* @return {integer} - Returns a Levenshtein distance to the best match. If there is
* no match, returns `MAX_LEV_DISTANCE + 1`.
* no match, returns `maxLevDistance + 1`.
*/
function checkType(row, elem, literalSearch) {
function checkType(row, elem, literalSearch, maxLevDistance) {
if (row.name === null) {
// This is a pure "generic" search, no need to run other checks.
if (row.generics.length > 0) {
return checkIfInGenerics(row, elem);
return checkIfInGenerics(row, elem, maxLevDistance);
}
return MAX_LEV_DISTANCE + 1;
return maxLevDistance + 1;
}
let lev = levenshtein(row.name, elem.name);
@ -1005,9 +1004,9 @@ function initSearch(rawSearchIndex) {
return 0;
}
}
return MAX_LEV_DISTANCE + 1;
return maxLevDistance + 1;
} else if (elem.generics.length > 0) {
return checkGenerics(row, elem, MAX_LEV_DISTANCE + 1);
return checkGenerics(row, elem, maxLevDistance + 1, maxLevDistance);
}
return 0;
} else if (row.generics.length > 0) {
@ -1017,22 +1016,20 @@ function initSearch(rawSearchIndex) {
}
// The name didn't match so we now check if the type we're looking for is inside
// the generics!
lev = checkIfInGenerics(row, elem);
// Now whatever happens, the returned distance is "less good" so we should mark
// it as such, and so we add 0.5 to the distance to make it "less good".
return lev + 0.5;
} else if (lev > MAX_LEV_DISTANCE) {
lev = Math.min(lev, checkIfInGenerics(row, elem, maxLevDistance));
return lev;
} else if (lev > maxLevDistance) {
// So our item's name doesn't match at all and has generics.
//
// Maybe it's present in a sub generic? For example "f<A<B<C>>>()", if we're
// looking for "B<C>", we'll need to go down.
return checkIfInGenerics(row, elem);
return checkIfInGenerics(row, elem, maxLevDistance);
} else {
// At this point, the name kinda match and we have generics to check, so
// let's go!
const tmp_lev = checkGenerics(row, elem, lev);
if (tmp_lev > MAX_LEV_DISTANCE) {
return MAX_LEV_DISTANCE + 1;
const tmp_lev = checkGenerics(row, elem, lev, maxLevDistance);
if (tmp_lev > maxLevDistance) {
return maxLevDistance + 1;
}
// We compute the median value of both checks and return it.
return (tmp_lev + lev) / 2;
@ -1040,7 +1037,7 @@ function initSearch(rawSearchIndex) {
} else if (elem.generics.length > 0) {
// In this case, we were expecting generics but there isn't so we simply reject this
// one.
return MAX_LEV_DISTANCE + 1;
return maxLevDistance + 1;
}
// No generics on our query or on the target type so we can return without doing
// anything else.
@ -1055,23 +1052,26 @@ function initSearch(rawSearchIndex) {
* @param {integer} typeFilter
*
* @return {integer} - Returns a Levenshtein distance to the best match. If there is no
* match, returns `MAX_LEV_DISTANCE + 1`.
* match, returns `maxLevDistance + 1`.
*/
function findArg(row, elem, typeFilter) {
let lev = MAX_LEV_DISTANCE + 1;
function findArg(row, elem, typeFilter, maxLevDistance) {
let lev = maxLevDistance + 1;
if (row && row.type && row.type.inputs && row.type.inputs.length > 0) {
for (const input of row.type.inputs) {
if (!typePassesFilter(typeFilter, input.ty)) {
continue;
}
lev = Math.min(lev, checkType(input, elem, parsedQuery.literalSearch));
lev = Math.min(
lev,
checkType(input, elem, parsedQuery.literalSearch, maxLevDistance)
);
if (lev === 0) {
return 0;
}
}
}
return parsedQuery.literalSearch ? MAX_LEV_DISTANCE + 1 : lev;
return parsedQuery.literalSearch ? maxLevDistance + 1 : lev;
}
/**
@ -1082,10 +1082,10 @@ function initSearch(rawSearchIndex) {
* @param {integer} typeFilter
*
* @return {integer} - Returns a Levenshtein distance to the best match. If there is no
* match, returns `MAX_LEV_DISTANCE + 1`.
* match, returns `maxLevDistance + 1`.
*/
function checkReturned(row, elem, typeFilter) {
let lev = MAX_LEV_DISTANCE + 1;
function checkReturned(row, elem, typeFilter, maxLevDistance) {
let lev = maxLevDistance + 1;
if (row && row.type && row.type.output.length > 0) {
const ret = row.type.output;
@ -1093,20 +1093,23 @@ function initSearch(rawSearchIndex) {
if (!typePassesFilter(typeFilter, ret_ty.ty)) {
continue;
}
lev = Math.min(lev, checkType(ret_ty, elem, parsedQuery.literalSearch));
lev = Math.min(
lev,
checkType(ret_ty, elem, parsedQuery.literalSearch, maxLevDistance)
);
if (lev === 0) {
return 0;
}
}
}
return parsedQuery.literalSearch ? MAX_LEV_DISTANCE + 1 : lev;
return parsedQuery.literalSearch ? maxLevDistance + 1 : lev;
}
function checkPath(contains, ty) {
function checkPath(contains, ty, maxLevDistance) {
if (contains.length === 0) {
return 0;
}
let ret_lev = MAX_LEV_DISTANCE + 1;
let ret_lev = maxLevDistance + 1;
const path = ty.path.split("::");
if (ty.parent && ty.parent.name) {
@ -1116,7 +1119,7 @@ function initSearch(rawSearchIndex) {
const length = path.length;
const clength = contains.length;
if (clength > length) {
return MAX_LEV_DISTANCE + 1;
return maxLevDistance + 1;
}
for (let i = 0; i < length; ++i) {
if (i + clength > length) {
@ -1126,7 +1129,7 @@ function initSearch(rawSearchIndex) {
let aborted = false;
for (let x = 0; x < clength; ++x) {
const lev = levenshtein(path[i + x], contains[x]);
if (lev > MAX_LEV_DISTANCE) {
if (lev > maxLevDistance) {
aborted = true;
break;
}
@ -1231,7 +1234,7 @@ function initSearch(rawSearchIndex) {
* following condition:
*
* * If it is a "literal search" (`parsedQuery.literalSearch`), then `lev` must be 0.
* * If it is not a "literal search", `lev` must be <= `MAX_LEV_DISTANCE`.
* * If it is not a "literal search", `lev` must be <= `maxLevDistance`.
*
* The `results` map contains information which will be used to sort the search results:
*
@ -1249,8 +1252,8 @@ function initSearch(rawSearchIndex) {
* @param {integer} lev
* @param {integer} path_lev
*/
function addIntoResults(results, fullId, id, index, lev, path_lev) {
const inBounds = lev <= MAX_LEV_DISTANCE || index !== -1;
function addIntoResults(results, fullId, id, index, lev, path_lev, maxLevDistance) {
const inBounds = lev <= maxLevDistance || index !== -1;
if (lev === 0 || (!parsedQuery.literalSearch && inBounds)) {
if (results[fullId] !== undefined) {
const result = results[fullId];
@ -1289,7 +1292,8 @@ function initSearch(rawSearchIndex) {
elem,
results_others,
results_in_args,
results_returned
results_returned,
maxLevDistance
) {
if (!row || (filterCrates !== null && row.crate !== filterCrates)) {
return;
@ -1298,13 +1302,13 @@ function initSearch(rawSearchIndex) {
const fullId = row.id;
const searchWord = searchWords[pos];
const in_args = findArg(row, elem, parsedQuery.typeFilter);
const returned = checkReturned(row, elem, parsedQuery.typeFilter);
const in_args = findArg(row, elem, parsedQuery.typeFilter, maxLevDistance);
const returned = checkReturned(row, elem, parsedQuery.typeFilter, maxLevDistance);
// path_lev is 0 because no parent path information is currently stored
// in the search index
addIntoResults(results_in_args, fullId, pos, -1, in_args, 0);
addIntoResults(results_returned, fullId, pos, -1, returned, 0);
addIntoResults(results_in_args, fullId, pos, -1, in_args, 0, maxLevDistance);
addIntoResults(results_returned, fullId, pos, -1, returned, 0, maxLevDistance);
if (!typePassesFilter(parsedQuery.typeFilter, row.ty)) {
return;
@ -1328,16 +1332,16 @@ function initSearch(rawSearchIndex) {
// No need to check anything else if it's a "pure" generics search.
if (elem.name.length === 0) {
if (row.type !== null) {
lev = checkGenerics(row.type, elem, MAX_LEV_DISTANCE + 1);
lev = checkGenerics(row.type, elem, maxLevDistance + 1, maxLevDistance);
// path_lev is 0 because we know it's empty
addIntoResults(results_others, fullId, pos, index, lev, 0);
addIntoResults(results_others, fullId, pos, index, lev, 0, maxLevDistance);
}
return;
}
if (elem.fullPath.length > 1) {
path_lev = checkPath(elem.pathWithoutLast, row);
if (path_lev > MAX_LEV_DISTANCE) {
path_lev = checkPath(elem.pathWithoutLast, row, maxLevDistance);
if (path_lev > maxLevDistance) {
return;
}
}
@ -1351,11 +1355,11 @@ function initSearch(rawSearchIndex) {
lev = levenshtein(searchWord, elem.pathLast);
if (index === -1 && lev + path_lev > MAX_LEV_DISTANCE) {
if (index === -1 && lev + path_lev > maxLevDistance) {
return;
}
addIntoResults(results_others, fullId, pos, index, lev, path_lev);
addIntoResults(results_others, fullId, pos, index, lev, path_lev, maxLevDistance);
}
/**
@ -1367,7 +1371,7 @@ function initSearch(rawSearchIndex) {
* @param {integer} pos - Position in the `searchIndex`.
* @param {Object} results
*/
function handleArgs(row, pos, results) {
function handleArgs(row, pos, results, maxLevDistance) {
if (!row || (filterCrates !== null && row.crate !== filterCrates)) {
return;
}
@ -1379,7 +1383,7 @@ function initSearch(rawSearchIndex) {
function checkArgs(elems, callback) {
for (const elem of elems) {
// There is more than one parameter to the query so all checks should be "exact"
const lev = callback(row, elem, NO_TYPE_FILTER);
const lev = callback(row, elem, NO_TYPE_FILTER, maxLevDistance);
if (lev <= 1) {
nbLev += 1;
totalLev += lev;
@ -1400,12 +1404,21 @@ function initSearch(rawSearchIndex) {
return;
}
const lev = Math.round(totalLev / nbLev);
addIntoResults(results, row.id, pos, 0, lev, 0);
addIntoResults(results, row.id, pos, 0, lev, 0, maxLevDistance);
}
function innerRunQuery() {
let elem, i, nSearchWords, in_returned, row;
let queryLen = 0;
for (const elem of parsedQuery.elems) {
queryLen += elem.name.length;
}
for (const elem of parsedQuery.returned) {
queryLen += elem.name.length;
}
const maxLevDistance = Math.floor(queryLen / 3);
if (parsedQuery.foundElems === 1) {
if (parsedQuery.elems.length === 1) {
elem = parsedQuery.elems[0];
@ -1418,7 +1431,8 @@ function initSearch(rawSearchIndex) {
elem,
results_others,
results_in_args,
results_returned
results_returned,
maxLevDistance
);
}
} else if (parsedQuery.returned.length === 1) {
@ -1426,13 +1440,18 @@ function initSearch(rawSearchIndex) {
elem = parsedQuery.returned[0];
for (i = 0, nSearchWords = searchWords.length; i < nSearchWords; ++i) {
row = searchIndex[i];
in_returned = checkReturned(row, elem, parsedQuery.typeFilter);
addIntoResults(results_others, row.id, i, -1, in_returned);
in_returned = checkReturned(
row,
elem,
parsedQuery.typeFilter,
maxLevDistance
);
addIntoResults(results_others, row.id, i, -1, in_returned, maxLevDistance);
}
}
} else if (parsedQuery.foundElems > 0) {
for (i = 0, nSearchWords = searchWords.length; i < nSearchWords; ++i) {
handleArgs(searchIndex[i], i, results_others);
handleArgs(searchIndex[i], i, results_others, maxLevDistance);
}
}
}
@ -1470,7 +1489,7 @@ function initSearch(rawSearchIndex) {
*
* @return {boolean} - Whether the result is valid or not
*/
function validateResult(name, path, keys, parent) {
function validateResult(name, path, keys, parent, maxLevDistance) {
if (!keys || !keys.length) {
return true;
}
@ -1485,7 +1504,7 @@ function initSearch(rawSearchIndex) {
(parent !== undefined && parent.name !== undefined &&
parent.name.toLowerCase().indexOf(key) > -1) ||
// lastly check to see if the name was a levenshtein match
levenshtein(name, key) <= MAX_LEV_DISTANCE)) {
levenshtein(name, key) <= maxLevDistance)) {
return false;
}
}

View file

@ -10,8 +10,5 @@ const EXPECTED = {
{ 'path': 'std', 'name': 'eprint' },
{ 'path': 'std', 'name': 'eprintln' },
{ 'path': 'std::pin', 'name': 'pin' },
{ 'path': 'std::future', 'name': 'join' },
{ 'path': 'std', 'name': 'line' },
{ 'path': 'std', 'name': 'write' },
],
};

View file

@ -1,5 +1,3 @@
// exact-check
const QUERY = [
'StructItem',
'StructFieldItem',