rustdoc: compute maximum Levenshtein distance based on the query
The heuristic is pretty close to the name resolver. Fixes #103357
This commit is contained in:
parent
5ce39f42bd
commit
e09e6df787
3 changed files with 84 additions and 70 deletions
|
@ -112,7 +112,6 @@ function levenshtein(s1, s2) {
|
|||
}
|
||||
|
||||
function initSearch(rawSearchIndex) {
|
||||
const MAX_LEV_DISTANCE = 3;
|
||||
const MAX_RESULTS = 200;
|
||||
const NO_TYPE_FILTER = -1;
|
||||
/**
|
||||
|
@ -897,13 +896,13 @@ function initSearch(rawSearchIndex) {
|
|||
* @param {QueryElement} elem - The element from the parsed query.
|
||||
* @param {integer} defaultLev - This is the value to return in case there are no generics.
|
||||
*
|
||||
* @return {integer} - Returns the best match (if any) or `MAX_LEV_DISTANCE + 1`.
|
||||
* @return {integer} - Returns the best match (if any) or `maxLevDistance + 1`.
|
||||
*/
|
||||
function checkGenerics(row, elem, defaultLev) {
|
||||
function checkGenerics(row, elem, defaultLev, maxLevDistance) {
|
||||
if (row.generics.length === 0) {
|
||||
return elem.generics.length === 0 ? defaultLev : MAX_LEV_DISTANCE + 1;
|
||||
return elem.generics.length === 0 ? defaultLev : maxLevDistance + 1;
|
||||
} else if (row.generics.length > 0 && row.generics[0].name === null) {
|
||||
return checkGenerics(row.generics[0], elem, defaultLev);
|
||||
return checkGenerics(row.generics[0], elem, defaultLev, maxLevDistance);
|
||||
}
|
||||
// The names match, but we need to be sure that all generics kinda
|
||||
// match as well.
|
||||
|
@ -914,8 +913,8 @@ function initSearch(rawSearchIndex) {
|
|||
elem_name = entry.name;
|
||||
if (elem_name === "") {
|
||||
// Pure generic, needs to check into it.
|
||||
if (checkGenerics(entry, elem, MAX_LEV_DISTANCE + 1) !== 0) {
|
||||
return MAX_LEV_DISTANCE + 1;
|
||||
if (checkGenerics(entry, elem, maxLevDistance + 1, maxLevDistance) !== 0) {
|
||||
return maxLevDistance + 1;
|
||||
}
|
||||
continue;
|
||||
}
|
||||
|
@ -942,7 +941,7 @@ function initSearch(rawSearchIndex) {
|
|||
}
|
||||
}
|
||||
if (match === null) {
|
||||
return MAX_LEV_DISTANCE + 1;
|
||||
return maxLevDistance + 1;
|
||||
}
|
||||
elems[match] -= 1;
|
||||
if (elems[match] === 0) {
|
||||
|
@ -951,7 +950,7 @@ function initSearch(rawSearchIndex) {
|
|||
}
|
||||
return 0;
|
||||
}
|
||||
return MAX_LEV_DISTANCE + 1;
|
||||
return maxLevDistance + 1;
|
||||
}
|
||||
|
||||
/**
|
||||
|
@ -963,10 +962,10 @@ function initSearch(rawSearchIndex) {
|
|||
*
|
||||
* @return {integer} - Returns a Levenshtein distance to the best match.
|
||||
*/
|
||||
function checkIfInGenerics(row, elem) {
|
||||
let lev = MAX_LEV_DISTANCE + 1;
|
||||
function checkIfInGenerics(row, elem, maxLevDistance) {
|
||||
let lev = maxLevDistance + 1;
|
||||
for (const entry of row.generics) {
|
||||
lev = Math.min(checkType(entry, elem, true), lev);
|
||||
lev = Math.min(checkType(entry, elem, true, maxLevDistance), lev);
|
||||
if (lev === 0) {
|
||||
break;
|
||||
}
|
||||
|
@ -983,15 +982,15 @@ function initSearch(rawSearchIndex) {
|
|||
* @param {boolean} literalSearch
|
||||
*
|
||||
* @return {integer} - Returns a Levenshtein distance to the best match. If there is
|
||||
* no match, returns `MAX_LEV_DISTANCE + 1`.
|
||||
* no match, returns `maxLevDistance + 1`.
|
||||
*/
|
||||
function checkType(row, elem, literalSearch) {
|
||||
function checkType(row, elem, literalSearch, maxLevDistance) {
|
||||
if (row.name === null) {
|
||||
// This is a pure "generic" search, no need to run other checks.
|
||||
if (row.generics.length > 0) {
|
||||
return checkIfInGenerics(row, elem);
|
||||
return checkIfInGenerics(row, elem, maxLevDistance);
|
||||
}
|
||||
return MAX_LEV_DISTANCE + 1;
|
||||
return maxLevDistance + 1;
|
||||
}
|
||||
|
||||
let lev = levenshtein(row.name, elem.name);
|
||||
|
@ -1005,9 +1004,9 @@ function initSearch(rawSearchIndex) {
|
|||
return 0;
|
||||
}
|
||||
}
|
||||
return MAX_LEV_DISTANCE + 1;
|
||||
return maxLevDistance + 1;
|
||||
} else if (elem.generics.length > 0) {
|
||||
return checkGenerics(row, elem, MAX_LEV_DISTANCE + 1);
|
||||
return checkGenerics(row, elem, maxLevDistance + 1, maxLevDistance);
|
||||
}
|
||||
return 0;
|
||||
} else if (row.generics.length > 0) {
|
||||
|
@ -1017,22 +1016,20 @@ function initSearch(rawSearchIndex) {
|
|||
}
|
||||
// The name didn't match so we now check if the type we're looking for is inside
|
||||
// the generics!
|
||||
lev = checkIfInGenerics(row, elem);
|
||||
// Now whatever happens, the returned distance is "less good" so we should mark
|
||||
// it as such, and so we add 0.5 to the distance to make it "less good".
|
||||
return lev + 0.5;
|
||||
} else if (lev > MAX_LEV_DISTANCE) {
|
||||
lev = Math.min(lev, checkIfInGenerics(row, elem, maxLevDistance));
|
||||
return lev;
|
||||
} else if (lev > maxLevDistance) {
|
||||
// So our item's name doesn't match at all and has generics.
|
||||
//
|
||||
// Maybe it's present in a sub generic? For example "f<A<B<C>>>()", if we're
|
||||
// looking for "B<C>", we'll need to go down.
|
||||
return checkIfInGenerics(row, elem);
|
||||
return checkIfInGenerics(row, elem, maxLevDistance);
|
||||
} else {
|
||||
// At this point, the name kinda match and we have generics to check, so
|
||||
// let's go!
|
||||
const tmp_lev = checkGenerics(row, elem, lev);
|
||||
if (tmp_lev > MAX_LEV_DISTANCE) {
|
||||
return MAX_LEV_DISTANCE + 1;
|
||||
const tmp_lev = checkGenerics(row, elem, lev, maxLevDistance);
|
||||
if (tmp_lev > maxLevDistance) {
|
||||
return maxLevDistance + 1;
|
||||
}
|
||||
// We compute the median value of both checks and return it.
|
||||
return (tmp_lev + lev) / 2;
|
||||
|
@ -1040,7 +1037,7 @@ function initSearch(rawSearchIndex) {
|
|||
} else if (elem.generics.length > 0) {
|
||||
// In this case, we were expecting generics but there isn't so we simply reject this
|
||||
// one.
|
||||
return MAX_LEV_DISTANCE + 1;
|
||||
return maxLevDistance + 1;
|
||||
}
|
||||
// No generics on our query or on the target type so we can return without doing
|
||||
// anything else.
|
||||
|
@ -1055,23 +1052,26 @@ function initSearch(rawSearchIndex) {
|
|||
* @param {integer} typeFilter
|
||||
*
|
||||
* @return {integer} - Returns a Levenshtein distance to the best match. If there is no
|
||||
* match, returns `MAX_LEV_DISTANCE + 1`.
|
||||
* match, returns `maxLevDistance + 1`.
|
||||
*/
|
||||
function findArg(row, elem, typeFilter) {
|
||||
let lev = MAX_LEV_DISTANCE + 1;
|
||||
function findArg(row, elem, typeFilter, maxLevDistance) {
|
||||
let lev = maxLevDistance + 1;
|
||||
|
||||
if (row && row.type && row.type.inputs && row.type.inputs.length > 0) {
|
||||
for (const input of row.type.inputs) {
|
||||
if (!typePassesFilter(typeFilter, input.ty)) {
|
||||
continue;
|
||||
}
|
||||
lev = Math.min(lev, checkType(input, elem, parsedQuery.literalSearch));
|
||||
lev = Math.min(
|
||||
lev,
|
||||
checkType(input, elem, parsedQuery.literalSearch, maxLevDistance)
|
||||
);
|
||||
if (lev === 0) {
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
}
|
||||
return parsedQuery.literalSearch ? MAX_LEV_DISTANCE + 1 : lev;
|
||||
return parsedQuery.literalSearch ? maxLevDistance + 1 : lev;
|
||||
}
|
||||
|
||||
/**
|
||||
|
@ -1082,10 +1082,10 @@ function initSearch(rawSearchIndex) {
|
|||
* @param {integer} typeFilter
|
||||
*
|
||||
* @return {integer} - Returns a Levenshtein distance to the best match. If there is no
|
||||
* match, returns `MAX_LEV_DISTANCE + 1`.
|
||||
* match, returns `maxLevDistance + 1`.
|
||||
*/
|
||||
function checkReturned(row, elem, typeFilter) {
|
||||
let lev = MAX_LEV_DISTANCE + 1;
|
||||
function checkReturned(row, elem, typeFilter, maxLevDistance) {
|
||||
let lev = maxLevDistance + 1;
|
||||
|
||||
if (row && row.type && row.type.output.length > 0) {
|
||||
const ret = row.type.output;
|
||||
|
@ -1093,20 +1093,23 @@ function initSearch(rawSearchIndex) {
|
|||
if (!typePassesFilter(typeFilter, ret_ty.ty)) {
|
||||
continue;
|
||||
}
|
||||
lev = Math.min(lev, checkType(ret_ty, elem, parsedQuery.literalSearch));
|
||||
lev = Math.min(
|
||||
lev,
|
||||
checkType(ret_ty, elem, parsedQuery.literalSearch, maxLevDistance)
|
||||
);
|
||||
if (lev === 0) {
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
}
|
||||
return parsedQuery.literalSearch ? MAX_LEV_DISTANCE + 1 : lev;
|
||||
return parsedQuery.literalSearch ? maxLevDistance + 1 : lev;
|
||||
}
|
||||
|
||||
function checkPath(contains, ty) {
|
||||
function checkPath(contains, ty, maxLevDistance) {
|
||||
if (contains.length === 0) {
|
||||
return 0;
|
||||
}
|
||||
let ret_lev = MAX_LEV_DISTANCE + 1;
|
||||
let ret_lev = maxLevDistance + 1;
|
||||
const path = ty.path.split("::");
|
||||
|
||||
if (ty.parent && ty.parent.name) {
|
||||
|
@ -1116,7 +1119,7 @@ function initSearch(rawSearchIndex) {
|
|||
const length = path.length;
|
||||
const clength = contains.length;
|
||||
if (clength > length) {
|
||||
return MAX_LEV_DISTANCE + 1;
|
||||
return maxLevDistance + 1;
|
||||
}
|
||||
for (let i = 0; i < length; ++i) {
|
||||
if (i + clength > length) {
|
||||
|
@ -1126,7 +1129,7 @@ function initSearch(rawSearchIndex) {
|
|||
let aborted = false;
|
||||
for (let x = 0; x < clength; ++x) {
|
||||
const lev = levenshtein(path[i + x], contains[x]);
|
||||
if (lev > MAX_LEV_DISTANCE) {
|
||||
if (lev > maxLevDistance) {
|
||||
aborted = true;
|
||||
break;
|
||||
}
|
||||
|
@ -1231,7 +1234,7 @@ function initSearch(rawSearchIndex) {
|
|||
* following condition:
|
||||
*
|
||||
* * If it is a "literal search" (`parsedQuery.literalSearch`), then `lev` must be 0.
|
||||
* * If it is not a "literal search", `lev` must be <= `MAX_LEV_DISTANCE`.
|
||||
* * If it is not a "literal search", `lev` must be <= `maxLevDistance`.
|
||||
*
|
||||
* The `results` map contains information which will be used to sort the search results:
|
||||
*
|
||||
|
@ -1249,8 +1252,8 @@ function initSearch(rawSearchIndex) {
|
|||
* @param {integer} lev
|
||||
* @param {integer} path_lev
|
||||
*/
|
||||
function addIntoResults(results, fullId, id, index, lev, path_lev) {
|
||||
const inBounds = lev <= MAX_LEV_DISTANCE || index !== -1;
|
||||
function addIntoResults(results, fullId, id, index, lev, path_lev, maxLevDistance) {
|
||||
const inBounds = lev <= maxLevDistance || index !== -1;
|
||||
if (lev === 0 || (!parsedQuery.literalSearch && inBounds)) {
|
||||
if (results[fullId] !== undefined) {
|
||||
const result = results[fullId];
|
||||
|
@ -1289,7 +1292,8 @@ function initSearch(rawSearchIndex) {
|
|||
elem,
|
||||
results_others,
|
||||
results_in_args,
|
||||
results_returned
|
||||
results_returned,
|
||||
maxLevDistance
|
||||
) {
|
||||
if (!row || (filterCrates !== null && row.crate !== filterCrates)) {
|
||||
return;
|
||||
|
@ -1298,13 +1302,13 @@ function initSearch(rawSearchIndex) {
|
|||
const fullId = row.id;
|
||||
const searchWord = searchWords[pos];
|
||||
|
||||
const in_args = findArg(row, elem, parsedQuery.typeFilter);
|
||||
const returned = checkReturned(row, elem, parsedQuery.typeFilter);
|
||||
const in_args = findArg(row, elem, parsedQuery.typeFilter, maxLevDistance);
|
||||
const returned = checkReturned(row, elem, parsedQuery.typeFilter, maxLevDistance);
|
||||
|
||||
// path_lev is 0 because no parent path information is currently stored
|
||||
// in the search index
|
||||
addIntoResults(results_in_args, fullId, pos, -1, in_args, 0);
|
||||
addIntoResults(results_returned, fullId, pos, -1, returned, 0);
|
||||
addIntoResults(results_in_args, fullId, pos, -1, in_args, 0, maxLevDistance);
|
||||
addIntoResults(results_returned, fullId, pos, -1, returned, 0, maxLevDistance);
|
||||
|
||||
if (!typePassesFilter(parsedQuery.typeFilter, row.ty)) {
|
||||
return;
|
||||
|
@ -1328,16 +1332,16 @@ function initSearch(rawSearchIndex) {
|
|||
// No need to check anything else if it's a "pure" generics search.
|
||||
if (elem.name.length === 0) {
|
||||
if (row.type !== null) {
|
||||
lev = checkGenerics(row.type, elem, MAX_LEV_DISTANCE + 1);
|
||||
lev = checkGenerics(row.type, elem, maxLevDistance + 1, maxLevDistance);
|
||||
// path_lev is 0 because we know it's empty
|
||||
addIntoResults(results_others, fullId, pos, index, lev, 0);
|
||||
addIntoResults(results_others, fullId, pos, index, lev, 0, maxLevDistance);
|
||||
}
|
||||
return;
|
||||
}
|
||||
|
||||
if (elem.fullPath.length > 1) {
|
||||
path_lev = checkPath(elem.pathWithoutLast, row);
|
||||
if (path_lev > MAX_LEV_DISTANCE) {
|
||||
path_lev = checkPath(elem.pathWithoutLast, row, maxLevDistance);
|
||||
if (path_lev > maxLevDistance) {
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
@ -1351,11 +1355,11 @@ function initSearch(rawSearchIndex) {
|
|||
|
||||
lev = levenshtein(searchWord, elem.pathLast);
|
||||
|
||||
if (index === -1 && lev + path_lev > MAX_LEV_DISTANCE) {
|
||||
if (index === -1 && lev + path_lev > maxLevDistance) {
|
||||
return;
|
||||
}
|
||||
|
||||
addIntoResults(results_others, fullId, pos, index, lev, path_lev);
|
||||
addIntoResults(results_others, fullId, pos, index, lev, path_lev, maxLevDistance);
|
||||
}
|
||||
|
||||
/**
|
||||
|
@ -1367,7 +1371,7 @@ function initSearch(rawSearchIndex) {
|
|||
* @param {integer} pos - Position in the `searchIndex`.
|
||||
* @param {Object} results
|
||||
*/
|
||||
function handleArgs(row, pos, results) {
|
||||
function handleArgs(row, pos, results, maxLevDistance) {
|
||||
if (!row || (filterCrates !== null && row.crate !== filterCrates)) {
|
||||
return;
|
||||
}
|
||||
|
@ -1379,7 +1383,7 @@ function initSearch(rawSearchIndex) {
|
|||
function checkArgs(elems, callback) {
|
||||
for (const elem of elems) {
|
||||
// There is more than one parameter to the query so all checks should be "exact"
|
||||
const lev = callback(row, elem, NO_TYPE_FILTER);
|
||||
const lev = callback(row, elem, NO_TYPE_FILTER, maxLevDistance);
|
||||
if (lev <= 1) {
|
||||
nbLev += 1;
|
||||
totalLev += lev;
|
||||
|
@ -1400,12 +1404,21 @@ function initSearch(rawSearchIndex) {
|
|||
return;
|
||||
}
|
||||
const lev = Math.round(totalLev / nbLev);
|
||||
addIntoResults(results, row.id, pos, 0, lev, 0);
|
||||
addIntoResults(results, row.id, pos, 0, lev, 0, maxLevDistance);
|
||||
}
|
||||
|
||||
function innerRunQuery() {
|
||||
let elem, i, nSearchWords, in_returned, row;
|
||||
|
||||
let queryLen = 0;
|
||||
for (const elem of parsedQuery.elems) {
|
||||
queryLen += elem.name.length;
|
||||
}
|
||||
for (const elem of parsedQuery.returned) {
|
||||
queryLen += elem.name.length;
|
||||
}
|
||||
const maxLevDistance = Math.floor(queryLen / 3);
|
||||
|
||||
if (parsedQuery.foundElems === 1) {
|
||||
if (parsedQuery.elems.length === 1) {
|
||||
elem = parsedQuery.elems[0];
|
||||
|
@ -1418,7 +1431,8 @@ function initSearch(rawSearchIndex) {
|
|||
elem,
|
||||
results_others,
|
||||
results_in_args,
|
||||
results_returned
|
||||
results_returned,
|
||||
maxLevDistance
|
||||
);
|
||||
}
|
||||
} else if (parsedQuery.returned.length === 1) {
|
||||
|
@ -1426,13 +1440,18 @@ function initSearch(rawSearchIndex) {
|
|||
elem = parsedQuery.returned[0];
|
||||
for (i = 0, nSearchWords = searchWords.length; i < nSearchWords; ++i) {
|
||||
row = searchIndex[i];
|
||||
in_returned = checkReturned(row, elem, parsedQuery.typeFilter);
|
||||
addIntoResults(results_others, row.id, i, -1, in_returned);
|
||||
in_returned = checkReturned(
|
||||
row,
|
||||
elem,
|
||||
parsedQuery.typeFilter,
|
||||
maxLevDistance
|
||||
);
|
||||
addIntoResults(results_others, row.id, i, -1, in_returned, maxLevDistance);
|
||||
}
|
||||
}
|
||||
} else if (parsedQuery.foundElems > 0) {
|
||||
for (i = 0, nSearchWords = searchWords.length; i < nSearchWords; ++i) {
|
||||
handleArgs(searchIndex[i], i, results_others);
|
||||
handleArgs(searchIndex[i], i, results_others, maxLevDistance);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -1470,7 +1489,7 @@ function initSearch(rawSearchIndex) {
|
|||
*
|
||||
* @return {boolean} - Whether the result is valid or not
|
||||
*/
|
||||
function validateResult(name, path, keys, parent) {
|
||||
function validateResult(name, path, keys, parent, maxLevDistance) {
|
||||
if (!keys || !keys.length) {
|
||||
return true;
|
||||
}
|
||||
|
@ -1485,7 +1504,7 @@ function initSearch(rawSearchIndex) {
|
|||
(parent !== undefined && parent.name !== undefined &&
|
||||
parent.name.toLowerCase().indexOf(key) > -1) ||
|
||||
// lastly check to see if the name was a levenshtein match
|
||||
levenshtein(name, key) <= MAX_LEV_DISTANCE)) {
|
||||
levenshtein(name, key) <= maxLevDistance)) {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
|
|
@ -10,8 +10,5 @@ const EXPECTED = {
|
|||
{ 'path': 'std', 'name': 'eprint' },
|
||||
{ 'path': 'std', 'name': 'eprintln' },
|
||||
{ 'path': 'std::pin', 'name': 'pin' },
|
||||
{ 'path': 'std::future', 'name': 'join' },
|
||||
{ 'path': 'std', 'name': 'line' },
|
||||
{ 'path': 'std', 'name': 'write' },
|
||||
],
|
||||
};
|
||||
|
|
|
@ -1,5 +1,3 @@
|
|||
// exact-check
|
||||
|
||||
const QUERY = [
|
||||
'StructItem',
|
||||
'StructFieldItem',
|
||||
|
|
Loading…
Add table
Reference in a new issue