Auto merge of #112233 - notriddle:notriddle/search-unify, r=GuillaumeGomez

rustdoc-search: clean up type unification and "unboxing"

This PR redesigns parameter matching, return matching, and generics matching to use a single function that compares two lists of types.

It also makes the algorithms more consistent, so the "unboxing" behavior where `Vec<i32>` is considered a match for `i32` works inside generics, and not just at the top level.
This commit is contained in:
bors 2023-06-15 03:04:46 +00:00
commit 314c39d2ea
7 changed files with 366 additions and 175 deletions

View file

@ -53,7 +53,7 @@ let ParsedQuery;
* parent: (Object|null|undefined),
* path: string,
* ty: (Number|null|number),
* type: (Array<?>|null)
* type: FunctionSearchType?
* }}
*/
let Row;
@ -135,7 +135,7 @@ let RawFunctionType;
/**
* @typedef {{
* inputs: Array<FunctionType>,
* outputs: Array<FunctionType>,
* output: Array<FunctionType>,
* }}
*/
let FunctionSearchType;

View file

@ -1216,113 +1216,182 @@ function initSearch(rawSearchIndex) {
}
/**
* This function checks if the object (`row`) generics match the given type (`elem`)
* generics. If there are no generics on `row`, `defaultDistance` is returned.
* This function checks generics in search query `queryElem` can all be found in the
* search index (`fnType`),
*
* @param {Row} row - The object to check.
* @param {QueryElement} elem - The element from the parsed query.
* @param {FunctionType} fnType - The object to check.
* @param {QueryElement} queryElem - The element from the parsed query.
*
* @return {boolean} - Returns true if a match, false otherwise.
* @return {boolean} - Returns true if a match, false otherwise.
*/
function checkGenerics(row, elem) {
if (row.generics.length === 0 || elem.generics.length === 0) {
return false;
}
// This function is called if the names match, but we need to make
// sure that all generics match as well.
//
function checkGenerics(fnType, queryElem) {
return unifyFunctionTypes(fnType.generics, queryElem.generics);
}
/**
* This function checks if a list of search query `queryElems` can all be found in the
* search index (`fnTypes`).
*
* @param {Array<FunctionType>} fnTypes - The objects to check.
* @param {Array<QueryElement>} queryElems - The elements from the parsed query.
*
* @return {boolean} - Returns true if a match, false otherwise.
*/
function unifyFunctionTypes(fnTypes, queryElems) {
// This search engine implements order-agnostic unification. There
// should be no missing duplicates (generics have "bag semantics"),
// and the row is allowed to have extras.
if (elem.generics.length > 0 && row.generics.length >= elem.generics.length) {
const elems = new Map();
const addEntryToElems = function addEntryToElems(entry) {
if (entry.id === -1) {
// Pure generic, needs to check into it.
for (const inner_entry of entry.generics) {
addEntryToElems(inner_entry);
}
return;
}
let currentEntryElems;
if (elems.has(entry.id)) {
currentEntryElems = elems.get(entry.id);
} else {
currentEntryElems = [];
elems.set(entry.id, currentEntryElems);
}
currentEntryElems.push(entry);
};
for (const entry of row.generics) {
addEntryToElems(entry);
}
// We need to find the type that matches the most to remove it in order
// to move forward.
const handleGeneric = generic => {
if (!elems.has(generic.id)) {
return false;
}
const matchElems = elems.get(generic.id);
const matchIdx = matchElems.findIndex(tmp_elem => {
if (generic.generics.length > 0 && !checkGenerics(tmp_elem, generic)) {
return false;
}
return typePassesFilter(generic.typeFilter, tmp_elem.ty);
});
if (matchIdx === -1) {
return false;
}
matchElems.splice(matchIdx, 1);
if (matchElems.length === 0) {
elems.delete(generic.id);
}
return true;
};
// To do the right thing with type filters, we first process generics
// that have them, removing matching ones from the "bag," then do the
// ones with no type filter, which can match any entry regardless of its
// own type.
for (const generic of elem.generics) {
if (generic.typeFilter === TY_PRIMITIVE &&
generic.id === typeNameIdOfArrayOrSlice) {
const genericArray = {
id: typeNameIdOfArray,
typeFilter: TY_PRIMITIVE,
generics: generic.generics,
};
const genericSlice = {
id: typeNameIdOfSlice,
typeFilter: TY_PRIMITIVE,
generics: generic.generics,
};
if (!handleGeneric(genericArray) && !handleGeneric(genericSlice)) {
return false;
}
} else if (generic.typeFilter !== -1 && !handleGeneric(generic)) {
return false;
}
}
for (const generic of elem.generics) {
if (generic.typeFilter === -1 && !handleGeneric(generic)) {
return false;
}
}
if (queryElems.length === 0) {
return true;
}
return false;
if (!fnTypes || fnTypes.length === 0) {
return false;
}
/**
* @type Map<integer, QueryElement[]>
*/
const queryElemSet = new Map();
const addQueryElemToQueryElemSet = function addQueryElemToQueryElemSet(queryElem) {
let currentQueryElemList;
if (queryElemSet.has(queryElem.id)) {
currentQueryElemList = queryElemSet.get(queryElem.id);
} else {
currentQueryElemList = [];
queryElemSet.set(queryElem.id, currentQueryElemList);
}
currentQueryElemList.push(queryElem);
};
for (const queryElem of queryElems) {
addQueryElemToQueryElemSet(queryElem);
}
/**
* @type Map<integer, FunctionType[]>
*/
const fnTypeSet = new Map();
const addFnTypeToFnTypeSet = function addFnTypeToFnTypeSet(fnType) {
// Pure generic, or an item that's not matched by any query elems.
// Try [unboxing] it.
//
// [unboxing]:
// http://ndmitchell.com/downloads/slides-hoogle_fast_type_searching-09_aug_2008.pdf
const queryContainsArrayOrSliceElem = queryElemSet.has(typeNameIdOfArrayOrSlice);
if (fnType.id === -1 || !(
queryElemSet.has(fnType.id) ||
(fnType.id === typeNameIdOfSlice && queryContainsArrayOrSliceElem) ||
(fnType.id === typeNameIdOfArray && queryContainsArrayOrSliceElem)
)) {
for (const innerFnType of fnType.generics) {
addFnTypeToFnTypeSet(innerFnType);
}
return;
}
let currentQueryElemList = queryElemSet.get(fnType.id) || [];
let matchIdx = currentQueryElemList.findIndex(queryElem => {
return typePassesFilter(queryElem.typeFilter, fnType.ty) &&
checkGenerics(fnType, queryElem);
});
if (matchIdx === -1 &&
(fnType.id === typeNameIdOfSlice || fnType.id === typeNameIdOfArray) &&
queryContainsArrayOrSliceElem
) {
currentQueryElemList = queryElemSet.get(typeNameIdOfArrayOrSlice) || [];
matchIdx = currentQueryElemList.findIndex(queryElem => {
return typePassesFilter(queryElem.typeFilter, fnType.ty) &&
checkGenerics(fnType, queryElem);
});
}
// None of the query elems match the function type.
// Try [unboxing] it.
if (matchIdx === -1) {
for (const innerFnType of fnType.generics) {
addFnTypeToFnTypeSet(innerFnType);
}
return;
}
let currentFnTypeList;
if (fnTypeSet.has(fnType.id)) {
currentFnTypeList = fnTypeSet.get(fnType.id);
} else {
currentFnTypeList = [];
fnTypeSet.set(fnType.id, currentFnTypeList);
}
currentFnTypeList.push(fnType);
};
for (const fnType of fnTypes) {
addFnTypeToFnTypeSet(fnType);
}
const doHandleQueryElemList = (currentFnTypeList, queryElemList) => {
if (queryElemList.length === 0) {
return true;
}
// Multiple items in one list might match multiple items in another.
// Since an item with fewer generics can match an item with more, we
// need to check all combinations for a potential match.
const queryElem = queryElemList.pop();
const l = currentFnTypeList.length;
for (let i = 0; i < l; i += 1) {
const fnType = currentFnTypeList[i];
if (!typePassesFilter(queryElem.typeFilter, fnType.ty)) {
continue;
}
if (queryElem.generics.length === 0 || checkGenerics(fnType, queryElem)) {
currentFnTypeList.splice(i, 1);
const result = doHandleQueryElemList(currentFnTypeList, queryElemList);
if (result) {
return true;
}
currentFnTypeList.splice(i, 0, fnType);
}
}
return false;
};
const handleQueryElemList = (id, queryElemList) => {
if (!fnTypeSet.has(id)) {
if (id === typeNameIdOfArrayOrSlice) {
return handleQueryElemList(typeNameIdOfSlice, queryElemList) ||
handleQueryElemList(typeNameIdOfArray, queryElemList);
}
return false;
}
const currentFnTypeList = fnTypeSet.get(id);
if (currentFnTypeList.length < queryElemList.length) {
// It's not possible for all the query elems to find a match.
return false;
}
const result = doHandleQueryElemList(currentFnTypeList, queryElemList);
if (result) {
// Found a solution.
// Any items that weren't used for it can be unboxed, and might form
// part of the solution for another item.
for (const innerFnType of currentFnTypeList) {
addFnTypeToFnTypeSet(innerFnType);
}
fnTypeSet.delete(id);
}
return result;
};
let queryElemSetSize = -1;
while (queryElemSetSize !== queryElemSet.size) {
queryElemSetSize = queryElemSet.size;
for (const [id, queryElemList] of queryElemSet) {
if (handleQueryElemList(id, queryElemList)) {
queryElemSet.delete(id);
}
}
}
return queryElemSetSize === 0;
}
/**
* This function checks if the object (`row`) matches the given type (`elem`) and its
* generics (if any).
*
* @param {Row} row
* @param {Array<FunctionType>} list
* @param {QueryElement} elem - The element from the parsed query.
*
* @return {boolean} - Returns true if found, false otherwise.
*/
function checkIfInGenerics(row, elem) {
for (const entry of row.generics) {
function checkIfInList(list, elem) {
for (const entry of list) {
if (checkType(entry, elem)) {
return true;
}
@ -1342,7 +1411,7 @@ function initSearch(rawSearchIndex) {
function checkType(row, elem) {
if (row.id === -1) {
// This is a pure "generic" search, no need to run other checks.
return row.generics.length > 0 ? checkIfInGenerics(row, elem) : false;
return row.generics.length > 0 ? checkIfInList(row.generics, elem) : false;
}
const matchesExact = row.id === elem.id;
@ -1360,59 +1429,7 @@ function initSearch(rawSearchIndex) {
// If the current item does not match, try [unboxing] the generic.
// [unboxing]:
// https://ndmitchell.com/downloads/slides-hoogle_fast_type_searching-09_aug_2008.pdf
return checkIfInGenerics(row, elem);
}
/**
* This function checks if the object (`row`) has an argument with the given type (`elem`).
*
* @param {Row} row
* @param {QueryElement} elem - The element from the parsed query.
* @param {Array<integer>} skipPositions - Do not return one of these positions.
*
* @return {integer} - Returns the position of the match, or -1 if none.
*/
function findArg(row, elem, skipPositions) {
if (row && row.type && row.type.inputs && row.type.inputs.length > 0) {
let i = 0;
for (const input of row.type.inputs) {
if (skipPositions.indexOf(i) !== -1) {
i += 1;
continue;
}
if (checkType(input, elem)) {
return i;
}
i += 1;
}
}
return -1;
}
/**
* This function checks if the object (`row`) returns the given type (`elem`).
*
* @param {Row} row
* @param {QueryElement} elem - The element from the parsed query.
* @param {Array<integer>} skipPositions - Do not return one of these positions.
*
* @return {integer} - Returns the position of the matching item, or -1 if none.
*/
function checkReturned(row, elem, skipPositions) {
if (row && row.type && row.type.output.length > 0) {
let i = 0;
for (const ret_ty of row.type.output) {
if (skipPositions.indexOf(i) !== -1) {
i += 1;
continue;
}
if (checkType(ret_ty, elem)) {
return i;
}
i += 1;
}
}
return -1;
return checkIfInList(row.generics, elem);
}
function checkPath(contains, ty, maxEditDistance) {
@ -1613,14 +1630,14 @@ function initSearch(rawSearchIndex) {
const fullId = row.id;
const searchWord = searchWords[pos];
const in_args = findArg(row, elem, []);
if (in_args !== -1) {
const in_args = row.type && row.type.inputs && checkIfInList(row.type.inputs, elem);
if (in_args) {
// path_dist is 0 because no parent path information is currently stored
// in the search index
addIntoResults(results_in_args, fullId, pos, -1, 0, 0, maxEditDistance);
}
const returned = checkReturned(row, elem, []);
if (returned !== -1) {
const returned = row.type && row.type.output && checkIfInList(row.type.output, elem);
if (returned) {
addIntoResults(results_returned, fullId, pos, -1, 0, 0, maxEditDistance);
}
@ -1676,32 +1693,15 @@ function initSearch(rawSearchIndex) {
* @param {Object} results
*/
function handleArgs(row, pos, results) {
if (!row || (filterCrates !== null && row.crate !== filterCrates)) {
if (!row || (filterCrates !== null && row.crate !== filterCrates) || !row.type) {
return;
}
// If the result is too "bad", we return false and it ends this search.
function checkArgs(elems, callback) {
const skipPositions = [];
for (const elem of elems) {
// There is more than one parameter to the query so all checks should be "exact"
const position = callback(
row,
elem,
skipPositions
);
if (position !== -1) {
skipPositions.push(position);
} else {
return false;
}
}
return true;
}
if (!checkArgs(parsedQuery.elems, findArg)) {
if (!unifyFunctionTypes(row.type.inputs, parsedQuery.elems)) {
return;
}
if (!checkArgs(parsedQuery.returned, checkReturned)) {
if (!unifyFunctionTypes(row.type.output, parsedQuery.returned)) {
return;
}
@ -1788,12 +1788,9 @@ function initSearch(rawSearchIndex) {
elem = parsedQuery.returned[0];
for (i = 0, nSearchWords = searchWords.length; i < nSearchWords; ++i) {
row = searchIndex[i];
in_returned = checkReturned(
row,
elem,
[]
);
if (in_returned !== -1) {
in_returned = row.type &&
unifyFunctionTypes(row.type.output, parsedQuery.returned);
if (in_returned) {
addIntoResults(
results_others,
row.id,

View file

@ -0,0 +1,13 @@
// ignore-order
const EXPECTED = [
{
'query': 'bufread -> result<u8>',
'others': [
{ 'path': 'std::io::Split', 'name': 'next' },
{ 'path': 'std::boxed::Box', 'name': 'fill_buf' },
{ 'path': 'std::io::Chain', 'name': 'fill_buf' },
{ 'path': 'std::io::Take', 'name': 'fill_buf' },
],
},
];

View file

@ -0,0 +1,91 @@
// ignore-order
// exact-check
// Make sure that results are order-agnostic, even when there's search items that only differ
// by generics.
const EXPECTED = [
{
'query': 'Wrap',
'in_args': [
{ 'path': 'generics_match_ambiguity', 'name': 'bar' },
{ 'path': 'generics_match_ambiguity', 'name': 'foo' },
],
},
{
'query': 'Wrap<i32>',
'in_args': [
{ 'path': 'generics_match_ambiguity', 'name': 'bar' },
{ 'path': 'generics_match_ambiguity', 'name': 'foo' },
],
},
{
'query': 'Wrap<i32>, Wrap<i32, u32>',
'others': [
{ 'path': 'generics_match_ambiguity', 'name': 'bar' },
{ 'path': 'generics_match_ambiguity', 'name': 'foo' },
],
},
{
'query': 'Wrap<i32, u32>, Wrap<i32>',
'others': [
{ 'path': 'generics_match_ambiguity', 'name': 'bar' },
{ 'path': 'generics_match_ambiguity', 'name': 'foo' },
],
},
{
'query': 'W3<i32>, W3<i32, u32>',
'others': [
{ 'path': 'generics_match_ambiguity', 'name': 'baaa' },
{ 'path': 'generics_match_ambiguity', 'name': 'baab' },
{ 'path': 'generics_match_ambiguity', 'name': 'baac' },
{ 'path': 'generics_match_ambiguity', 'name': 'baad' },
{ 'path': 'generics_match_ambiguity', 'name': 'baae' },
{ 'path': 'generics_match_ambiguity', 'name': 'baaf' },
{ 'path': 'generics_match_ambiguity', 'name': 'baag' },
{ 'path': 'generics_match_ambiguity', 'name': 'baah' },
],
},
{
'query': 'W3<i32, u32>, W3<i32>',
'others': [
{ 'path': 'generics_match_ambiguity', 'name': 'baaa' },
{ 'path': 'generics_match_ambiguity', 'name': 'baab' },
{ 'path': 'generics_match_ambiguity', 'name': 'baac' },
{ 'path': 'generics_match_ambiguity', 'name': 'baad' },
{ 'path': 'generics_match_ambiguity', 'name': 'baae' },
{ 'path': 'generics_match_ambiguity', 'name': 'baaf' },
{ 'path': 'generics_match_ambiguity', 'name': 'baag' },
{ 'path': 'generics_match_ambiguity', 'name': 'baah' },
],
},
{
'query': 'W2<i32>, W2<i32, u32>',
'others': [
{ 'path': 'generics_match_ambiguity', 'name': 'baag' },
{ 'path': 'generics_match_ambiguity', 'name': 'baah' },
],
},
{
'query': 'W2<i32, u32>, W2<i32>',
'others': [
{ 'path': 'generics_match_ambiguity', 'name': 'baag' },
{ 'path': 'generics_match_ambiguity', 'name': 'baah' },
],
},
{
'query': 'W2<i32>, W3<i32, u32>',
'others': [
{ 'path': 'generics_match_ambiguity', 'name': 'baac' },
{ 'path': 'generics_match_ambiguity', 'name': 'baaf' },
{ 'path': 'generics_match_ambiguity', 'name': 'baag' },
],
},
{
'query': 'W2<i32>, W2<i32>',
'others': [
{ 'path': 'generics_match_ambiguity', 'name': 'baag' },
{ 'path': 'generics_match_ambiguity', 'name': 'baah' },
],
},
];

View file

@ -0,0 +1,17 @@
pub struct Wrap<T, U = ()>(pub T, pub U);
pub fn foo(a: Wrap<i32>, b: Wrap<i32, u32>) {}
pub fn bar(a: Wrap<i32, u32>, b: Wrap<i32>) {}
pub struct W2<T>(pub T);
pub struct W3<T, U = ()>(pub T, pub U);
pub fn baaa(a: W3<i32>, b: W3<i32, u32>) {}
pub fn baab(a: W3<i32, u32>, b: W3<i32>) {}
pub fn baac(a: W2<W3<i32>>, b: W3<i32, u32>) {}
pub fn baad(a: W2<W3<i32, u32>>, b: W3<i32>) {}
pub fn baae(a: W3<i32>, b: W2<W3<i32, u32>>) {}
pub fn baaf(a: W3<i32, u32>, b: W2<W3<i32>>) {}
pub fn baag(a: W2<W3<i32>>, b: W2<W3<i32, u32>>) {}
pub fn baah(a: W2<W3<i32, u32>>, b: W2<W3<i32>>) {}
//

View file

@ -0,0 +1,68 @@
// exact-check
const EXPECTED = [
{
'query': '-> Result<Object, bool>',
'others': [
{ 'path': 'nested_unboxed', 'name': 'something' },
],
},
{
'query': '-> Result<Object<i32, u32>, bool>',
'others': [
{ 'path': 'nested_unboxed', 'name': 'something' },
],
},
{
'query': '-> Object, bool',
'others': [
{ 'path': 'nested_unboxed', 'name': 'something' },
],
},
{
'query': '-> Object<i32, u32>, bool',
'others': [
{ 'path': 'nested_unboxed', 'name': 'something' },
],
},
{
'query': '-> i32, u32, bool',
'others': [
{ 'path': 'nested_unboxed', 'name': 'something' },
],
},
{
'query': '-> Result<i32, u32, bool>',
'others': [
{ 'path': 'nested_unboxed', 'name': 'something' },
],
},
{
'query': '-> Result<Object<i32>, bool>',
'others': [
{ 'path': 'nested_unboxed', 'name': 'something' },
],
},
{
'query': '-> Result<Object<u32>, bool>',
'others': [
{ 'path': 'nested_unboxed', 'name': 'something' },
],
},
{
'query': '-> Result<Object<i32>, u32, bool>',
'others': [],
},
{
'query': '-> Result<i32, Object<u32>, bool>',
'others': [],
},
{
'query': '-> Result<i32, u32, Object<bool>>',
'others': [],
},
{
'query': '-> Result<Object<i32>, Object<u32>, bool>',
'others': [],
},
];

View file

@ -0,0 +1,5 @@
pub struct Object<T, U>(T, U);
pub fn something() -> Result<Object<i32, u32>, bool> {
loop {}
}