rustdoc-search: count path edits with separate edit limit

Since the two are counted separately elsewhere, they should get
their own limits, too. The biggest problem with combining them
is that paths are loosely checked by not requiring every component
to match, which means that if they are short and matched loosely,
they can easily find "drunk typist" matches that make no sense,
like this old result:

    std::collections::btree_map::itermut matching slice::itermut
    maxEditDistance = ("slice::itermut".length) / 3 = 14 / 3 = 4
    editDistance("std", "slice") = 4
    editDistance("itermut", "itermut") = 0
        4 + 0 <= 4 PASS

Of course, `slice::itermut` should not match stuff from btreemap.
`slice` should not match `std`.

The new result counts them separately:

    maxPathEditDistance = "slice".length / 3 = 5 / 3 = 1
    maxEditDistance = "itermut".length / 3 = 7 / 3 = 2
    editDistance("std", "slice") = 4
        4 <= 1 FAIL

Effectively, this makes path queries less "typo-resistant".
It's not zero, but it means `vec` won't match the `v1` prelude.

Queries without parent paths are unchanged.
This commit is contained in:
Michael Howell 2023-12-26 16:15:51 -07:00
parent a75fed74b6
commit 0ea58e2346
10 changed files with 152 additions and 44 deletions

View file

@ -1805,11 +1805,20 @@ function initSearch(rawSearchIndex) {
return unifyFunctionTypes([row], [elem], whereClause, mgens);
}
function checkPath(contains, ty, maxEditDistance) {
/**
* Compute an "edit distance" that ignores missing path elements.
* @param {string[]} contains search query path
* @param {Row} ty indexed item
* @returns {null|number} edit distance
*/
function checkPath(contains, ty) {
if (contains.length === 0) {
return 0;
}
let ret_dist = maxEditDistance + 1;
const maxPathEditDistance = Math.floor(
contains.reduce((acc, next) => acc + next.length, 0) / 3
);
let ret_dist = maxPathEditDistance + 1;
const path = ty.path.split("::");
if (ty.parent && ty.parent.name) {
@ -1821,15 +1830,23 @@ function initSearch(rawSearchIndex) {
pathiter: for (let i = length - clength; i >= 0; i -= 1) {
let dist_total = 0;
for (let x = 0; x < clength; ++x) {
const dist = editDistance(path[i + x], contains[x], maxEditDistance);
if (dist > maxEditDistance) {
continue pathiter;
const [p, c] = [path[i + x], contains[x]];
if (Math.floor((p.length - c.length) / 3) <= maxPathEditDistance &&
p.indexOf(c) !== -1
) {
// discount distance on substring match
dist_total += Math.floor((p.length - c.length) / 3);
} else {
const dist = editDistance(p, c, maxPathEditDistance);
if (dist > maxPathEditDistance) {
continue pathiter;
}
dist_total += dist;
}
dist_total += dist;
}
ret_dist = Math.min(ret_dist, Math.round(dist_total / clength));
}
return ret_dist;
return ret_dist > maxPathEditDistance ? null : ret_dist;
}
function typePassesFilter(filter, type) {
@ -2030,8 +2047,8 @@ function initSearch(rawSearchIndex) {
}
if (elem.fullPath.length > 1) {
path_dist = checkPath(elem.pathWithoutLast, row, maxEditDistance);
if (path_dist > maxEditDistance) {
path_dist = checkPath(elem.pathWithoutLast, row);
if (path_dist === null) {
return;
}
}
@ -2045,7 +2062,7 @@ function initSearch(rawSearchIndex) {
const dist = editDistance(row.normalizedName, elem.normalizedPathLast, maxEditDistance);
if (index === -1 && dist + path_dist > maxEditDistance) {
if (index === -1 && dist > maxEditDistance) {
return;
}
@ -2100,13 +2117,9 @@ function initSearch(rawSearchIndex) {
}
function innerRunQuery() {
let queryLen = 0;
for (const elem of parsedQuery.elems) {
queryLen += elem.name.length;
}
for (const elem of parsedQuery.returned) {
queryLen += elem.name.length;
}
const queryLen =
parsedQuery.elems.reduce((acc, next) => acc + next.pathLast.length, 0) +
parsedQuery.returned.reduce((acc, next) => acc + next.pathLast.length, 0);
const maxEditDistance = Math.floor(queryLen / 3);
/**

View file

@ -7,7 +7,6 @@ const EXPECTED = {
// Validate that type alias methods get the correct path.
{ 'path': 'std::os::fd::AsRawFd', 'name': 'as_raw_fd' },
{ 'path': 'std::os::fd::AsRawFd', 'name': 'as_raw_fd' },
{ 'path': 'std::os::linux::process::PidFd', 'name': 'as_raw_fd' },
{ 'path': 'std::os::fd::RawFd', 'name': 'as_raw_fd' },
],
};

View file

@ -0,0 +1,42 @@
// exact-check
const FILTER_CRATE = "std";
const EXPECTED = [
{
query: 'vec::intoiterator',
others: [
// trait std::iter::IntoIterator is not the first result
{ 'path': 'std::vec', 'name': 'IntoIter' },
{ 'path': 'std::vec::Vec', 'name': 'into_iter' },
{ 'path': 'std::vec::Drain', 'name': 'into_iter' },
{ 'path': 'std::vec::IntoIter', 'name': 'into_iter' },
{ 'path': 'std::vec::ExtractIf', 'name': 'into_iter' },
{ 'path': 'std::vec::Splice', 'name': 'into_iter' },
{ 'path': 'std::collections::VecDeque', 'name': 'into_iter' },
],
},
{
query: 'vec::iter',
others: [
// std::net::ToSocketAttrs::iter should not show up here
{ 'path': 'std::vec', 'name': 'IntoIter' },
{ 'path': 'std::vec::Vec', 'name': 'from_iter' },
{ 'path': 'std::vec::Vec', 'name': 'into_iter' },
{ 'path': 'std::vec::Drain', 'name': 'into_iter' },
{ 'path': 'std::vec::IntoIter', 'name': 'into_iter' },
{ 'path': 'std::vec::ExtractIf', 'name': 'into_iter' },
{ 'path': 'std::vec::Splice', 'name': 'into_iter' },
{ 'path': 'std::collections::VecDeque', 'name': 'iter' },
{ 'path': 'std::collections::VecDeque', 'name': 'iter_mut' },
{ 'path': 'std::collections::VecDeque', 'name': 'from_iter' },
{ 'path': 'std::collections::VecDeque', 'name': 'into_iter' },
],
},
{
query: 'slice::itermut',
others: [
// std::collections::btree_map::itermut should not show up here
{ 'path': 'std::slice', 'name': 'IterMut' },
{ 'path': 'std::slice', 'name': 'iter_mut' },
],
},
];

View file

@ -1,11 +1,20 @@
const EXPECTED = {
query: 'hashset::insert',
others: [
// ensure hashset::insert comes first
{ 'path': 'std::collections::hash_set::HashSet', 'name': 'insert' },
{ 'path': 'std::collections::hash_set::HashSet', 'name': 'get_or_insert' },
{ 'path': 'std::collections::hash_set::HashSet', 'name': 'get_or_insert_with' },
{ 'path': 'std::collections::hash_set::HashSet', 'name': 'get_or_insert_owned' },
{ 'path': 'std::collections::hash_map::HashMap', 'name': 'insert' },
],
};
const EXPECTED = [
{
query: 'hashset::insert',
others: [
// ensure hashset::insert comes first
{ 'path': 'std::collections::hash_set::HashSet', 'name': 'insert' },
{ 'path': 'std::collections::hash_set::HashSet', 'name': 'get_or_insert' },
{ 'path': 'std::collections::hash_set::HashSet', 'name': 'get_or_insert_with' },
{ 'path': 'std::collections::hash_set::HashSet', 'name': 'get_or_insert_owned' },
],
},
{
query: 'hash::insert',
others: [
// ensure hashset/hashmap::insert come first
{ 'path': 'std::collections::hash_map::HashMap', 'name': 'insert' },
{ 'path': 'std::collections::hash_set::HashSet', 'name': 'insert' },
],
},
];

View file

@ -3,6 +3,5 @@ const EXPECTED = {
'others': [
{ 'path': 'exact_match::Si', 'name': 'pc' },
{ 'path': 'exact_match::Psi', 'name': 'pc' },
{ 'path': 'exact_match::Si', 'name': 'pa' },
],
};

View file

@ -1,7 +1,15 @@
const EXPECTED = {
'query': 'ig::pc',
'others': [
{ 'path': 'module_substring::Sig', 'name': 'pc' },
{ 'path': 'module_substring::Si', 'name': 'pc' },
],
};
const EXPECTED = [
{
'query': 'ig::pc',
'others': [
{ 'path': 'module_substring::Sig', 'name': 'pc' },
],
},
{
'query': 'si::pc',
'others': [
{ 'path': 'module_substring::Si', 'name': 'pc' },
{ 'path': 'module_substring::Sig', 'name': 'pc' },
],
},
];

View file

@ -0,0 +1,35 @@
// exact-check
const EXPECTED = [
{
'query': 'xxxxxxxxxxx::hocuspocusprestidigitation',
// do not match abracadabra::hocuspocusprestidigitation
'others': [],
},
{
// exact match
'query': 'abracadabra::hocuspocusprestidigitation',
'others': [
{ 'path': 'abracadabra', 'name': 'HocusPocusPrestidigitation' },
],
},
{
// swap br/rb; that's edit distance 2, where maxPathEditDistance = 3 (11 / 3)
'query': 'arbacadarba::hocuspocusprestidigitation',
'others': [
{ 'path': 'abracadabra', 'name': 'HocusPocusPrestidigitation' },
],
},
{
// truncate 5 chars, where maxEditDistance = 7 (21 / 3)
'query': 'abracadarba::hocusprestidigitation',
'others': [
{ 'path': 'abracadabra', 'name': 'HocusPocusPrestidigitation' },
],
},
{
// truncate 9 chars, where maxEditDistance = 5 (17 / 3)
'query': 'abracadarba::hprestidigitation',
'others': [],
},
];

View file

@ -0,0 +1,3 @@
#![crate_name="abracadabra"]
pub struct HocusPocusPrestidigitation;

View file

@ -1,13 +1,13 @@
// exact-check
const EXPECTED = {
'query': 'b::ccccccc',
'query': 'bbbbbb::ccccccc',
'others': [
// `ccccccc` is an exact match for all three of these.
// However `b` is a closer match for `bb` than for any
// of the others, so it ought to go first.
{ 'path': 'path_ordering::bb', 'name': 'Ccccccc' },
{ 'path': 'path_ordering::aa', 'name': 'Ccccccc' },
{ 'path': 'path_ordering::dd', 'name': 'Ccccccc' },
{ 'path': 'path_ordering::bbbbbb', 'name': 'Ccccccc' },
{ 'path': 'path_ordering::abbbbb', 'name': 'Ccccccc' },
{ 'path': 'path_ordering::dbbbbb', 'name': 'Ccccccc' },
],
};

View file

@ -1,9 +1,9 @@
pub mod dd {
pub mod dbbbbb {
pub struct Ccccccc;
}
pub mod aa {
pub mod abbbbb {
pub struct Ccccccc;
}
pub mod bb {
pub mod bbbbbb {
pub struct Ccccccc;
}