rustdoc-search: remove parallel searchWords array

This might have made sense if the algorithm could use `searchWords`
to skip having to look at `searchIndex`, but since it always
does a substring check on both the stock word and the normalizedName,
it doesn't seem to help performance anyway.
This commit is contained in:
Michael Howell 2023-12-15 13:28:43 -07:00
parent e6707df0de
commit 6b69ebcae0
4 changed files with 48 additions and 56 deletions

View file

@ -499,6 +499,7 @@ function initSearch(rawSearchIndex) {
fullPath: ["never"], fullPath: ["never"],
pathWithoutLast: [], pathWithoutLast: [],
pathLast: "never", pathLast: "never",
normalizedPathLast: "never",
generics: [], generics: [],
bindings: new Map(), bindings: new Map(),
typeFilter: "primitive", typeFilter: "primitive",
@ -537,12 +538,14 @@ function initSearch(rawSearchIndex) {
const bindingName = parserState.isInBinding; const bindingName = parserState.isInBinding;
parserState.isInBinding = null; parserState.isInBinding = null;
const bindings = new Map(); const bindings = new Map();
const pathLast = pathSegments[pathSegments.length - 1];
return { return {
name: name.trim(), name: name.trim(),
id: null, id: null,
fullPath: pathSegments, fullPath: pathSegments,
pathWithoutLast: pathSegments.slice(0, pathSegments.length - 1), pathWithoutLast: pathSegments.slice(0, pathSegments.length - 1),
pathLast: pathSegments[pathSegments.length - 1], pathLast,
normalizedPathLast: pathLast.replace(/_/g, ""),
generics: generics.filter(gen => { generics: generics.filter(gen => {
// Syntactically, bindings are parsed as generics, // Syntactically, bindings are parsed as generics,
// but the query engine treats them differently. // but the query engine treats them differently.
@ -689,6 +692,7 @@ function initSearch(rawSearchIndex) {
fullPath: ["[]"], fullPath: ["[]"],
pathWithoutLast: [], pathWithoutLast: [],
pathLast: "[]", pathLast: "[]",
normalizedPathLast: "[]",
generics, generics,
typeFilter: "primitive", typeFilter: "primitive",
bindingName: isInBinding, bindingName: isInBinding,
@ -1168,13 +1172,12 @@ function initSearch(rawSearchIndex) {
* Executes the parsed query and builds a {ResultsTable}. * Executes the parsed query and builds a {ResultsTable}.
* *
* @param {ParsedQuery} parsedQuery - The parsed user query * @param {ParsedQuery} parsedQuery - The parsed user query
* @param {Object} searchWords - The list of search words to query against
* @param {Object} [filterCrates] - Crate to search in if defined * @param {Object} [filterCrates] - Crate to search in if defined
* @param {Object} [currentCrate] - Current crate, to rank results from this crate higher * @param {Object} [currentCrate] - Current crate, to rank results from this crate higher
* *
* @return {ResultsTable} * @return {ResultsTable}
*/ */
function execQuery(parsedQuery, searchWords, filterCrates, currentCrate) { function execQuery(parsedQuery, filterCrates, currentCrate) {
const results_others = new Map(), results_in_args = new Map(), const results_others = new Map(), results_in_args = new Map(),
results_returned = new Map(); results_returned = new Map();
@ -1232,8 +1235,8 @@ function initSearch(rawSearchIndex) {
const userQuery = parsedQuery.userQuery; const userQuery = parsedQuery.userQuery;
const result_list = []; const result_list = [];
for (const result of results.values()) { for (const result of results.values()) {
result.word = searchWords[result.id]; result.item = searchIndex[result.id];
result.item = searchIndex[result.id] || {}; result.word = searchIndex[result.id].word;
result_list.push(result); result_list.push(result);
} }
@ -1928,7 +1931,7 @@ function initSearch(rawSearchIndex) {
* The `results` map contains information which will be used to sort the search results: * The `results` map contains information which will be used to sort the search results:
* *
* * `fullId` is a `string`` used as the key of the object we use for the `results` map. * * `fullId` is a `string`` used as the key of the object we use for the `results` map.
* * `id` is the index in both `searchWords` and `searchIndex` arrays for this element. * * `id` is the index in the `searchIndex` array for this element.
* * `index` is an `integer`` used to sort by the position of the word in the item's name. * * `index` is an `integer`` used to sort by the position of the word in the item's name.
* * `dist` is the main metric used to sort the search results. * * `dist` is the main metric used to sort the search results.
* * `path_dist` is zero if a single-component search query is used, otherwise it's the * * `path_dist` is zero if a single-component search query is used, otherwise it's the
@ -1986,9 +1989,8 @@ function initSearch(rawSearchIndex) {
if (!row || (filterCrates !== null && row.crate !== filterCrates)) { if (!row || (filterCrates !== null && row.crate !== filterCrates)) {
return; return;
} }
let index = -1, path_dist = 0; let path_dist = 0;
const fullId = row.id; const fullId = row.id;
const searchWord = searchWords[pos];
// fpDist is a minimum possible type distance, where "type distance" is the number of // fpDist is a minimum possible type distance, where "type distance" is the number of
// atoms in the function not present in the query // atoms in the function not present in the query
@ -2021,19 +2023,10 @@ function initSearch(rawSearchIndex) {
return; return;
} }
const row_index = row.normalizedName.indexOf(elem.pathLast); let index = row.word.indexOf(elem.pathLast);
const word_index = searchWord.indexOf(elem.pathLast); const normalizedIndex = row.normalizedName.indexOf(elem.pathLast);
if (index === -1 || (index > normalizedIndex && normalizedIndex !== -1)) {
// lower indexes are "better" matches index = normalizedIndex;
// rank based on the "best" match
if (row_index === -1) {
index = word_index;
} else if (word_index === -1) {
index = row_index;
} else if (word_index < row_index) {
index = word_index;
} else {
index = row_index;
} }
if (elem.fullPath.length > 1) { if (elem.fullPath.length > 1) {
@ -2044,13 +2037,13 @@ function initSearch(rawSearchIndex) {
} }
if (parsedQuery.literalSearch) { if (parsedQuery.literalSearch) {
if (searchWord === elem.name) { if (row.word === elem.pathLast) {
addIntoResults(results_others, fullId, pos, index, 0, path_dist); addIntoResults(results_others, fullId, pos, index, 0, path_dist);
} }
return; return;
} }
const dist = editDistance(searchWord, elem.pathLast, maxEditDistance); const dist = editDistance(row.normalizedName, elem.normalizedPathLast, maxEditDistance);
if (index === -1 && dist + path_dist > maxEditDistance) { if (index === -1 && dist + path_dist > maxEditDistance) {
return; return;
@ -2135,15 +2128,15 @@ function initSearch(rawSearchIndex) {
* @param {boolean} isAssocType * @param {boolean} isAssocType
*/ */
function convertNameToId(elem, isAssocType) { function convertNameToId(elem, isAssocType) {
if (typeNameIdMap.has(elem.pathLast) && if (typeNameIdMap.has(elem.normalizedPathLast) &&
(isAssocType || !typeNameIdMap.get(elem.pathLast).assocOnly)) { (isAssocType || !typeNameIdMap.get(elem.normalizedPathLast).assocOnly)) {
elem.id = typeNameIdMap.get(elem.pathLast).id; elem.id = typeNameIdMap.get(elem.normalizedPathLast).id;
} else if (!parsedQuery.literalSearch) { } else if (!parsedQuery.literalSearch) {
let match = null; let match = null;
let matchDist = maxEditDistance + 1; let matchDist = maxEditDistance + 1;
let matchName = ""; let matchName = "";
for (const [name, {id, assocOnly}] of typeNameIdMap) { for (const [name, {id, assocOnly}] of typeNameIdMap) {
const dist = editDistance(name, elem.pathLast, maxEditDistance); const dist = editDistance(name, elem.normalizedPathLast, maxEditDistance);
if (dist <= matchDist && dist <= maxEditDistance && if (dist <= matchDist && dist <= maxEditDistance &&
(isAssocType || !assocOnly)) { (isAssocType || !assocOnly)) {
if (dist === matchDist && matchName > name) { if (dist === matchDist && matchName > name) {
@ -2236,7 +2229,7 @@ function initSearch(rawSearchIndex) {
if (parsedQuery.foundElems === 1 && parsedQuery.returned.length === 0) { if (parsedQuery.foundElems === 1 && parsedQuery.returned.length === 0) {
if (parsedQuery.elems.length === 1) { if (parsedQuery.elems.length === 1) {
const elem = parsedQuery.elems[0]; const elem = parsedQuery.elems[0];
for (let i = 0, nSearchWords = searchWords.length; i < nSearchWords; ++i) { for (let i = 0, nSearchIndex = searchIndex.length; i < nSearchIndex; ++i) {
// It means we want to check for this element everywhere (in names, args and // It means we want to check for this element everywhere (in names, args and
// returned). // returned).
handleSingleArg( handleSingleArg(
@ -2267,7 +2260,7 @@ function initSearch(rawSearchIndex) {
}; };
parsedQuery.elems.sort(sortQ); parsedQuery.elems.sort(sortQ);
parsedQuery.returned.sort(sortQ); parsedQuery.returned.sort(sortQ);
for (let i = 0, nSearchWords = searchWords.length; i < nSearchWords; ++i) { for (let i = 0, nSearchIndex = searchIndex.length; i < nSearchIndex; ++i) {
handleArgs(searchIndex[i], i, results_others); handleArgs(searchIndex[i], i, results_others);
} }
} }
@ -2651,7 +2644,7 @@ ${item.displayPath}<span class="${type}">${name}</span>\
updateSearchHistory(buildUrl(query.original, filterCrates)); updateSearchHistory(buildUrl(query.original, filterCrates));
showResults( showResults(
execQuery(query, searchWords, filterCrates, window.currentCrate), execQuery(query, filterCrates, window.currentCrate),
params.go_to_first, params.go_to_first,
filterCrates); filterCrates);
} }
@ -2920,12 +2913,6 @@ ${item.displayPath}<span class="${type}">${name}</span>\
function buildIndex(rawSearchIndex) { function buildIndex(rawSearchIndex) {
searchIndex = []; searchIndex = [];
/**
* List of normalized search words (ASCII lowercased, and undescores removed).
*
* @type {Array<string>}
*/
const searchWords = [];
typeNameIdMap = new Map(); typeNameIdMap = new Map();
const charA = "A".charCodeAt(0); const charA = "A".charCodeAt(0);
let currentIndex = 0; let currentIndex = 0;
@ -3004,7 +2991,6 @@ ${item.displayPath}<span class="${type}">${name}</span>\
* }} * }}
*/ */
for (const [crate, crateCorpus] of rawSearchIndex) { for (const [crate, crateCorpus] of rawSearchIndex) {
searchWords.push(crate);
// This object should have exactly the same set of fields as the "row" // This object should have exactly the same set of fields as the "row"
// object defined below. Your JavaScript runtime will thank you. // object defined below. Your JavaScript runtime will thank you.
// https://mathiasbynens.be/notes/shapes-ics // https://mathiasbynens.be/notes/shapes-ics
@ -3017,6 +3003,7 @@ ${item.displayPath}<span class="${type}">${name}</span>\
parent: undefined, parent: undefined,
type: null, type: null,
id: id, id: id,
word: crate,
normalizedName: crate.indexOf("_") === -1 ? crate : crate.replace(/_/g, ""), normalizedName: crate.indexOf("_") === -1 ? crate : crate.replace(/_/g, ""),
deprecated: null, deprecated: null,
implDisambiguator: null, implDisambiguator: null,
@ -3084,12 +3071,9 @@ ${item.displayPath}<span class="${type}">${name}</span>\
len = itemTypes.length; len = itemTypes.length;
for (let i = 0; i < len; ++i) { for (let i = 0; i < len; ++i) {
let word = ""; let word = "";
// This object should have exactly the same set of fields as the "crateRow"
// object defined above.
if (typeof itemNames[i] === "string") { if (typeof itemNames[i] === "string") {
word = itemNames[i].toLowerCase(); word = itemNames[i].toLowerCase();
} }
searchWords.push(word);
const path = itemPaths.has(i) ? itemPaths.get(i) : lastPath; const path = itemPaths.has(i) ? itemPaths.get(i) : lastPath;
let type = null; let type = null;
if (itemFunctionSearchTypes[i] !== 0) { if (itemFunctionSearchTypes[i] !== 0) {
@ -3113,6 +3097,8 @@ ${item.displayPath}<span class="${type}">${name}</span>\
} }
} }
} }
// This object should have exactly the same set of fields as the "crateRow"
// object defined above.
const row = { const row = {
crate: crate, crate: crate,
ty: itemTypes.charCodeAt(i) - charA, ty: itemTypes.charCodeAt(i) - charA,
@ -3122,6 +3108,7 @@ ${item.displayPath}<span class="${type}">${name}</span>\
parent: itemParentIdxs[i] > 0 ? paths[itemParentIdxs[i] - 1] : undefined, parent: itemParentIdxs[i] > 0 ? paths[itemParentIdxs[i] - 1] : undefined,
type, type,
id: id, id: id,
word,
normalizedName: word.indexOf("_") === -1 ? word : word.replace(/_/g, ""), normalizedName: word.indexOf("_") === -1 ? word : word.replace(/_/g, ""),
deprecated: deprecatedItems.has(i), deprecated: deprecatedItems.has(i),
implDisambiguator: implDisambiguator.has(i) ? implDisambiguator.get(i) : null, implDisambiguator: implDisambiguator.has(i) ? implDisambiguator.get(i) : null,
@ -3153,7 +3140,6 @@ ${item.displayPath}<span class="${type}">${name}</span>\
} }
currentIndex += itemTypes.length; currentIndex += itemTypes.length;
} }
return searchWords;
} }
/** /**
@ -3332,10 +3318,7 @@ ${item.displayPath}<span class="${type}">${name}</span>\
search(true); search(true);
} }
/** buildIndex(rawSearchIndex);
* @type {Array<string>}
*/
const searchWords = buildIndex(rawSearchIndex);
if (typeof window !== "undefined") { if (typeof window !== "undefined") {
registerSearchEvents(); registerSearchEvents();
// If there's a search term in the URL, execute the search now. // If there's a search term in the URL, execute the search now.
@ -3349,7 +3332,6 @@ ${item.displayPath}<span class="${type}">${name}</span>\
exports.execQuery = execQuery; exports.execQuery = execQuery;
exports.parseQuery = parseQuery; exports.parseQuery = parseQuery;
} }
return searchWords;
} }
if (typeof window !== "undefined") { if (typeof window !== "undefined") {

View file

@ -396,16 +396,16 @@ function loadSearchJS(doc_folder, resource_suffix) {
const staticFiles = path.join(doc_folder, "static.files"); const staticFiles = path.join(doc_folder, "static.files");
const searchJs = fs.readdirSync(staticFiles).find(f => f.match(/search.*\.js$/)); const searchJs = fs.readdirSync(staticFiles).find(f => f.match(/search.*\.js$/));
const searchModule = require(path.join(staticFiles, searchJs)); const searchModule = require(path.join(staticFiles, searchJs));
const searchWords = searchModule.initSearch(searchIndex.searchIndex); searchModule.initSearch(searchIndex.searchIndex);
return { return {
doSearch: function(queryStr, filterCrate, currentCrate) { doSearch: function(queryStr, filterCrate, currentCrate) {
return searchModule.execQuery(searchModule.parseQuery(queryStr), searchWords, return searchModule.execQuery(searchModule.parseQuery(queryStr),
filterCrate, currentCrate); filterCrate, currentCrate);
}, },
getCorrections: function(queryStr, filterCrate, currentCrate) { getCorrections: function(queryStr, filterCrate, currentCrate) {
const parsedQuery = searchModule.parseQuery(queryStr); const parsedQuery = searchModule.parseQuery(queryStr);
searchModule.execQuery(parsedQuery, searchWords, filterCrate, currentCrate); searchModule.execQuery(parsedQuery, filterCrate, currentCrate);
return parsedQuery.correction; return parsedQuery.correction;
}, },
parseQuery: searchModule.parseQuery, parseQuery: searchModule.parseQuery,

View file

@ -1,7 +1,15 @@
const EXPECTED = { const EXPECTED = [
'query': 'waker_from', {
'others': [ 'query': 'waker_from',
{ 'path': 'substring::SuperWaker', 'name': 'local_waker_from_nonlocal' }, 'others': [
{ 'path': 'substring::SuperWakerTask', 'name': 'local_waker_from_nonlocal' }, { 'path': 'substring::SuperWaker', 'name': 'local_waker_from_nonlocal' },
], { 'path': 'substring::SuperWakerTask', 'name': 'local_waker_from_nonlocal' },
}; ],
},
{
'query': 'my',
'others': [
{ 'path': 'substring', 'name': 'm_y_substringmatching' },
],
},
];

View file

@ -19,3 +19,5 @@ impl SuperWakerTask {
pub fn waker_non_local() {} pub fn waker_non_local() {}
pub fn from_non_local() {} pub fn from_non_local() {}
} }
pub fn m_y_substringmatching() {}