Rollup merge of #119468 - notriddle:notriddle/compression, r=GuillaumeGomez

rustdoc-search: tighter encoding for f index Depends on https://github.com/rust-lang/rust/pull/119457 Two optimizations for the function signature search: * Instead of using JSON arrays, like `[1,20]`, it uses VLQ hex with no commas, like `[aAd]`. * This also adds backrefs: if you have more than one function with exactly the same signature, it'll not only store it once, it'll *decode* it once, and store in the typeIdMap only once. Based partially on discussions on zulip: https://rust-lang.zulipchat.com/#narrow/stream/266220-t-rustdoc/topic/search.20index.20size Performance ----------- https://notriddle.com/rustdoc-html-demo-8/compression-perf-v2/index.html ### memory/time profiler output (for more details, consult the above link) <table> <thead><tr><th>benchmark<th>before<th>after</tr></thead> <tbody> <tr><th>arti<td> ``` user: 002.789 s sys: 000.390 s wall: 002.096 s child_RSS_high: 440796 KiB group_mem_high: 414924 KiB ``` </td><td> ``` user: 002.295 s sys: 000.278 s wall: 001.738 s child_RSS_high: 314588 KiB group_mem_high: 285220 KiB ``` </td></tr><tr><th>cortex-m<td> ``` user: 000.127 s sys: 000.030 s wall: 000.134 s child_RSS_high: 60264 KiB group_mem_high: 23824 KiB ``` </td><td> ``` user: 000.136 s sys: 000.038 s wall: 000.137 s child_RSS_high: 59204 KiB group_mem_high: 22712 KiB ``` </td></tr><tr><th>sqlx<td> ``` user: 000.887 s sys: 000.118 s wall: 000.592 s child_RSS_high: 190408 KiB group_mem_high: 157804 KiB ``` </td><td> ``` user: 000.798 s sys: 000.101 s wall: 000.525 s child_RSS_high: 159292 KiB group_mem_high: 126292 KiB ``` </td></tr><tr><th>stm32f4<td> ``` user: 013.884 s sys: 005.399 s wall: 013.149 s child_RSS_high: 1942244 KiB group_mem_high: 1954916 KiB ``` </td><td> ``` user: 006.128 s sys: 003.297 s wall: 007.994 s child_RSS_high: 1038108 KiB group_mem_high: 1023900 KiB ``` </td></tr><tr><th>ripgrep<td> ``` user: 000.441 s sys: 000.063 s wall: 000.264 s child_RSS_high: 109180 KiB group_mem_high: 74272 KiB ``` </td><td> ``` user: 000.408 s sys: 000.044 s wall: 000.238 s child_RSS_high: 101488 KiB group_mem_high: 66000 KiB ``` </td></tr></tbody></table> Size change ----------- standard library without gzip: ```console $ du -bs search-index-old.js search-index-new.js 4976370 search-index-old.js 4404391 search-index-new.js ``` ((4976370-4404391)/4404391)*100% = 12.9% with gzip: ```console $ du -hs search-index-old.js.gz search-index-new.js.gz 520K search-index-old.js.gz 504K search-index-new.js.gz $ du -bs search-index-old.js.gz search-index-new.js.gz 522092 search-index-old.js.gz 507654 search-index-new.js.gz ``` ((522092-507654)/507654)*100% = 2.8% Benchmarks are similarly shrunk. Without gzip: ```console $ du -hs tmp/{arti,cortex-m,sqlx,stm32f4,ripgrep}/toolchain_{old,new}/doc/search-index.js 10555067 tmp/arti/toolchain_old/doc/search-index.js 8921236 tmp/arti/toolchain_new/doc/search-index.js 77018 tmp/cortex-m/toolchain_old/doc/search-index.js 66676 tmp/cortex-m/toolchain_new/doc/search-index.js 2876330 tmp/sqlx/toolchain_old/doc/search-index.js 2436812 tmp/sqlx/toolchain_new/doc/search-index.js 63632890 tmp/stm32f4/toolchain_old/doc/search-index.js 52337438 tmp/stm32f4/toolchain_new/doc/search-index.js 631150 tmp/ripgrep/toolchain_old/doc/search-index.js 541646 tmp/ripgrep/toolchain_new/doc/search-index.js ``` With gzip: ```console $ du -bs tmp/{arti,cortex-m,sqlx,stm32f4,ripgrep}/toolchain_{old,new}/doc/search-index.js.gz 1618852 tmp/arti/toolchain_old/doc/search-index.js.gz 1582007 tmp/arti/toolchain_new/doc/search-index.js.gz 16109 tmp/cortex-m/toolchain_old/doc/search-index.js.gz 15831 tmp/cortex-m/toolchain_new/doc/search-index.js.gz 422257 tmp/sqlx/toolchain_old/doc/search-index.js.gz 411507 tmp/sqlx/toolchain_new/doc/search-index.js.gz 4454761 tmp/stm32f4/toolchain_old/doc/search-index.js.gz 4334924 tmp/stm32f4/toolchain_new/doc/search-index.js.gz 98312 tmp/ripgrep/toolchain_old/doc/search-index.js.gz 96864 tmp/ripgrep/toolchain_new/doc/search-index.js.gz $ du -hs tmp/{arti,cortex-m,sqlx,stm32f4,ripgrep}/toolchain_{old,new}/doc/search-index.j s.gz 1.6M tmp/arti/toolchain_old/doc/search-index.js.gz 1.6M tmp/arti/toolchain_new/doc/search-index.js.gz 24K tmp/cortex-m/toolchain_old/doc/search-index.js.gz 24K tmp/cortex-m/toolchain_new/doc/search-index.js.gz 424K tmp/sqlx/toolchain_old/doc/search-index.js.gz 412K tmp/sqlx/toolchain_new/doc/search-index.js.gz 4.3M tmp/stm32f4/toolchain_old/doc/search-index.js.gz 4.2M tmp/stm32f4/toolchain_new/doc/search-index.js.gz 108K tmp/ripgrep/toolchain_old/doc/search-index.js.gz 104K tmp/ripgrep/toolchain_new/doc/search-index.js.gz ```
2024-01-05 23:41:42 -05:00 · 2024-01-05 23:41:42 -05:00 · b3f307434e
commit b3f307434e
parent 9585ebc269 004bfc5eb2
4 changed files with 252 additions and 141 deletions
--- a/src/librustdoc/html/render/mod.rs
+++ b/src/librustdoc/html/render/mod.rs
@ -58,7 +58,7 @@ use rustc_span::{
    symbol::{sym, Symbol},
    BytePos, FileName, RealFileName,
 };
-use serde::ser::{SerializeMap, SerializeSeq};
+use serde::ser::SerializeMap;
 use serde::{Serialize, Serializer};

 use crate::clean::{self, ItemId, RenderedLink, SelfTy};
@ -123,44 +123,58 @@ pub(crate) struct IndexItem {
 }

 /// A type used for the search index.
-#[derive(Debug)]
+#[derive(Debug, Eq, PartialEq)]
 pub(crate) struct RenderType {
    id: Option<RenderTypeId>,
    generics: Option<Vec<RenderType>>,
    bindings: Option<Vec<(RenderTypeId, Vec<RenderType>)>>,
 }

-impl Serialize for RenderType {
-    fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error>
-    where
-        S: Serializer,
-    {
-        let id = match &self.id {
+impl RenderType {
+    // Types are rendered as lists of lists, because that's pretty compact.
+    // The contents of the lists are always integers in self-terminating hex
+    // form, handled by `RenderTypeId::write_to_string`, so no commas are
+    // needed to separate the items.
+    pub fn write_to_string(&self, string: &mut String) {
+        fn write_optional_id(id: Option<RenderTypeId>, string: &mut String) {
            // 0 is a sentinel, everything else is one-indexed
-            None => 0,
-            // concrete type
-            Some(RenderTypeId::Index(idx)) if *idx >= 0 => idx + 1,
-            // generic type parameter
-            Some(RenderTypeId::Index(idx)) => *idx,
-            _ => panic!("must convert render types to indexes before serializing"),
-        };
-        if self.generics.is_some() || self.bindings.is_some() {
-            let mut seq = serializer.serialize_seq(None)?;
-            seq.serialize_element(&id)?;
-            seq.serialize_element(self.generics.as_ref().map(Vec::as_slice).unwrap_or_default())?;
-            if self.bindings.is_some() {
-                seq.serialize_element(
-                    self.bindings.as_ref().map(Vec::as_slice).unwrap_or_default(),
-                )?;
+            match id {
+                Some(id) => id.write_to_string(string),
+                None => string.push('`'),
            }
-            seq.end()
+        }
+        // Either just the type id, or `{type, generics, bindings?}`
+        // where generics is a list of types,
+        // and bindings is a list of `{id, typelist}` pairs.
+        if self.generics.is_some() || self.bindings.is_some() {
+            string.push('{');
+            write_optional_id(self.id, string);
+            string.push('{');
+            for generic in &self.generics.as_ref().map(Vec::as_slice).unwrap_or_default()[..] {
+                generic.write_to_string(string);
+            }
+            string.push('}');
+            if self.bindings.is_some() {
+                string.push('{');
+                for binding in &self.bindings.as_ref().map(Vec::as_slice).unwrap_or_default()[..] {
+                    string.push('{');
+                    binding.0.write_to_string(string);
+                    string.push('{');
+                    for constraint in &binding.1[..] {
+                        constraint.write_to_string(string);
+                    }
+                    string.push_str("}}");
+                }
+                string.push('}');
+            }
+            string.push('}');
        } else {
-            id.serialize(serializer)
+            write_optional_id(self.id, string);
        }
    }
 }

-#[derive(Clone, Copy, Debug)]
+#[derive(Clone, Copy, Debug, Eq, PartialEq)]
 pub(crate) enum RenderTypeId {
    DefId(DefId),
    Primitive(clean::PrimitiveType),
@ -168,70 +182,122 @@ pub(crate) enum RenderTypeId {
    Index(isize),
 }

-impl Serialize for RenderTypeId {
-    fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error>
-    where
-        S: Serializer,
-    {
-        let id = match &self {
+impl RenderTypeId {
+    pub fn write_to_string(&self, string: &mut String) {
+        // (sign, value)
+        let (sign, id): (bool, u32) = match &self {
            // 0 is a sentinel, everything else is one-indexed
            // concrete type
-            RenderTypeId::Index(idx) if *idx >= 0 => idx + 1,
+            RenderTypeId::Index(idx) if *idx >= 0 => (false, (idx + 1isize).try_into().unwrap()),
            // generic type parameter
-            RenderTypeId::Index(idx) => *idx,
+            RenderTypeId::Index(idx) => (true, (-*idx).try_into().unwrap()),
            _ => panic!("must convert render types to indexes before serializing"),
        };
-        id.serialize(serializer)
+        // zig-zag encoding
+        let value: u32 = (id << 1) | (if sign { 1 } else { 0 });
+        // Self-terminating hex use capital letters for everything but the
+        // least significant digit, which is lowercase. For example, decimal 17
+        // would be `` Aa `` if zig-zag encoding weren't used.
+        //
+        // Zig-zag encoding, however, stores the sign bit as the last bit.
+        // This means, in the last hexit, 1 is actually `c`, -1 is `b`
+        // (`a` is the imaginary -0), and, because all the bits are shifted
+        // by one, `` A` `` is actually 8 and `` Aa `` is -8.
+        //
+        // https://rust-lang.github.io/rustc-dev-guide/rustdoc-internals/search.html
+        // describes the encoding in more detail.
+        let mut shift: u32 = 28;
+        let mut mask: u32 = 0xF0_00_00_00;
+        while shift < 32 {
+            let hexit = (value & mask) >> shift;
+            if hexit != 0 || shift == 0 {
+                let hex =
+                    char::try_from(if shift == 0 { '`' } else { '@' } as u32 + hexit).unwrap();
+                string.push(hex);
+            }
+            shift = shift.wrapping_sub(4);
+            mask = mask >> 4;
+        }
    }
 }

 /// Full type of functions/methods in the search index.
-#[derive(Debug)]
+#[derive(Debug, Eq, PartialEq)]
 pub(crate) struct IndexItemFunctionType {
    inputs: Vec<RenderType>,
    output: Vec<RenderType>,
    where_clause: Vec<Vec<RenderType>>,
 }

-impl Serialize for IndexItemFunctionType {
-    fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error>
-    where
-        S: Serializer,
-    {
-        // If we couldn't figure out a type, just write `0`.
+impl IndexItemFunctionType {
+    pub fn write_to_string<'a>(
+        &'a self,
+        string: &mut String,
+        backref_queue: &mut VecDeque<&'a IndexItemFunctionType>,
+    ) {
+        assert!(backref_queue.len() <= 16);
+        // If we couldn't figure out a type, just write 0,
+        // which is encoded as `` ` `` (see RenderTypeId::write_to_string).
        let has_missing = self
            .inputs
            .iter()
            .chain(self.output.iter())
            .any(|i| i.id.is_none() && i.generics.is_none());
        if has_missing {
-            0.serialize(serializer)
+            string.push('`');
+        } else if let Some(idx) = backref_queue.iter().position(|other| *other == self) {
+            // The backref queue has 16 items, so backrefs use
+            // a single hexit, disjoint from the ones used for numbers.
+            string.push(
+                char::try_from('0' as u32 + u32::try_from(idx).unwrap())
+                    .expect("last possible value is '?'"),
+            );
        } else {
-            let mut seq = serializer.serialize_seq(None)?;
+            backref_queue.push_front(self);
+            if backref_queue.len() > 16 {
+                backref_queue.pop_back();
+            }
+            string.push('{');
            match &self.inputs[..] {
                [one] if one.generics.is_none() && one.bindings.is_none() => {
-                    seq.serialize_element(one)?
+                    one.write_to_string(string);
+                }
+                _ => {
+                    string.push('{');
+                    for item in &self.inputs[..] {
+                        item.write_to_string(string);
+                    }
+                    string.push('}');
                }
-                _ => seq.serialize_element(&self.inputs)?,
            }
            match &self.output[..] {
                [] if self.where_clause.is_empty() => {}
                [one] if one.generics.is_none() && one.bindings.is_none() => {
-                    seq.serialize_element(one)?
+                    one.write_to_string(string);
+                }
+                _ => {
+                    string.push('{');
+                    for item in &self.output[..] {
+                        item.write_to_string(string);
+                    }
+                    string.push('}');
                }
-                _ => seq.serialize_element(&self.output)?,
            }
            for constraint in &self.where_clause {
                if let [one] = &constraint[..]
                    && one.generics.is_none()
                    && one.bindings.is_none()
                {
-                    seq.serialize_element(one)?;
+                    one.write_to_string(string);
                } else {
-                    seq.serialize_element(constraint)?;
+                    string.push('{');
+                    for item in &constraint[..] {
+                        item.write_to_string(string);
+                    }
+                    string.push('}');
                }
            }
-            seq.end()
+            string.push('}');
        }
    }
 }
--- a/src/librustdoc/html/render/search_index.rs
+++ b/src/librustdoc/html/render/search_index.rs
@ -1,5 +1,5 @@
 use std::collections::hash_map::Entry;
-use std::collections::BTreeMap;
+use std::collections::{BTreeMap, VecDeque};

 use rustc_data_structures::fx::{FxHashMap, FxIndexMap};
 use rustc_middle::ty::TyCtxt;
@ -409,9 +409,11 @@ pub(crate) fn build_index<'tcx>(
            let mut full_paths = Vec::with_capacity(self.items.len());
            let mut descriptions = Vec::with_capacity(self.items.len());
            let mut parents = Vec::with_capacity(self.items.len());
-            let mut functions = Vec::with_capacity(self.items.len());
+            let mut functions = String::with_capacity(self.items.len());
            let mut deprecated = Vec::with_capacity(self.items.len());

+            let mut backref_queue = VecDeque::new();
+
            for (index, item) in self.items.iter().enumerate() {
                let n = item.ty as u8;
                let c = char::try_from(n + b'A').expect("item types must fit in ASCII");
@ -434,27 +436,10 @@ pub(crate) fn build_index<'tcx>(
                    full_paths.push((index, &item.path));
                }

-                // Fake option to get `0` out as a sentinel instead of `null`.
-                // We want to use `0` because it's three less bytes.
-                enum FunctionOption<'a> {
-                    Function(&'a IndexItemFunctionType),
-                    None,
+                match &item.search_type {
+                    Some(ty) => ty.write_to_string(&mut functions, &mut backref_queue),
+                    None => functions.push('`'),
                }
-                impl<'a> Serialize for FunctionOption<'a> {
-                    fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error>
-                    where
-                        S: Serializer,
-                    {
-                        match self {
-                            FunctionOption::None => 0.serialize(serializer),
-                            FunctionOption::Function(ty) => ty.serialize(serializer),
-                        }
-                    }
-                }
-                functions.push(match &item.search_type {
-                    Some(ty) => FunctionOption::Function(ty),
-                    None => FunctionOption::None,
-                });

                if item.deprecation.is_some() {
                    deprecated.push(index);
--- a/src/librustdoc/html/static/js/externs.js
+++ b/src/librustdoc/html/static/js/externs.js
@ -200,3 +200,59 @@ let FunctionSearchType;
 * }}
 */
 let FunctionType;
+
+/**
+ * The raw search data for a given crate. `n`, `t`, `d`, `i`, and `f`
+ * are arrays with the same length. `q`, `a`, and `c` use a sparse
+ * representation for compactness.
+ *
+ * `n[i]` contains the name of an item.
+ *
+ * `t[i]` contains the type of that item
+ * (as a string of characters that represent an offset in `itemTypes`).
+ *
+ * `d[i]` contains the description of that item.
+ *
+ * `q` contains the full paths of the items. For compactness, it is a set of
+ * (index, path) pairs used to create a map. If a given index `i` is
+ * not present, this indicates "same as the last index present".
+ *
+ * `i[i]` contains an item's parent, usually a module. For compactness,
+ * it is a set of indexes into the `p` array.
+ *
+ * `f` contains function signatures, or `0` if the item isn't a function.
+ * More information on how they're encoded can be found in rustc-dev-guide
+ *
+ * Functions are themselves encoded as arrays. The first item is a list of
+ * types representing the function's inputs, and the second list item is a list
+ * of types representing the function's output. Tuples are flattened.
+ * Types are also represented as arrays; the first item is an index into the `p`
+ * array, while the second is a list of types representing any generic parameters.
+ *
+ * b[i] contains an item's impl disambiguator. This is only present if an item
+ * is defined in an impl block and, the impl block's type has more than one associated
+ * item with the same name.
+ *
+ * `a` defines aliases with an Array of pairs: [name, offset], where `offset`
+ * points into the n/t/d/q/i/f arrays.
+ *
+ * `doc` contains the description of the crate.
+ *
+ * `p` is a list of path/type pairs. It is used for parents and function parameters.
+ *
+ * `c` is an array of item indices that are deprecated.
+ * @typedef {{
+ *   doc: string,
+ *   a: Object,
+ *   n: Array<string>,
+ *   t: String,
+ *   d: Array<string>,
+ *   q: Array<[Number, string]>,
+ *   i: Array<Number>,
+ *   f: string,
+ *   p: Array<Object>,
+ *   b: Array<[Number, String]>,
+ *   c: Array<Number>
+ * }}
+ */
+let RawSearchIndexCrate;
--- a/src/librustdoc/html/static/js/search.js
+++ b/src/librustdoc/html/static/js/search.js
@ -2767,19 +2767,65 @@ ${item.displayPath}<span class="${type}">${name}</span>\
     * The raw function search type format is generated using serde in
     * librustdoc/html/render/mod.rs: impl Serialize for IndexItemFunctionType
     *
-     * @param {RawFunctionSearchType} functionSearchType
+     * @param {{
+     *  string: string,
+     *  offset: number,
+     *  backrefQueue: FunctionSearchType[]
+     * }} itemFunctionDecoder
     * @param {Array<{name: string, ty: number}>} lowercasePaths
     * @param {Map<string, integer>}
     *
     * @return {null|FunctionSearchType}
     */
-    function buildFunctionSearchType(functionSearchType, lowercasePaths) {
-        const INPUTS_DATA = 0;
-        const OUTPUT_DATA = 1;
-        // `0` is used as a sentinel because it's fewer bytes than `null`
-        if (functionSearchType === 0) {
+    function buildFunctionSearchType(itemFunctionDecoder, lowercasePaths) {
+        const c = itemFunctionDecoder.string.charCodeAt(itemFunctionDecoder.offset);
+        itemFunctionDecoder.offset += 1;
+        const [zero, ua, la, ob, cb] = ["0", "@", "`", "{", "}"].map(c => c.charCodeAt(0));
+        // `` ` `` is used as a sentinel because it's fewer bytes than `null`, and decodes to zero
+        // `0` is a backref
+        if (c === la) {
            return null;
        }
+        // sixteen characters after "0" are backref
+        if (c >= zero && c < ua) {
+            return itemFunctionDecoder.backrefQueue[c - zero];
+        }
+        if (c !== ob) {
+            throw ["Unexpected ", c, " in function: expected ", "{", "; this is a bug"];
+        }
+        // call after consuming `{`
+        function decodeList() {
+            let c = itemFunctionDecoder.string.charCodeAt(itemFunctionDecoder.offset);
+            const ret = [];
+            while (c !== cb) {
+                ret.push(decode());
+                c = itemFunctionDecoder.string.charCodeAt(itemFunctionDecoder.offset);
+            }
+            itemFunctionDecoder.offset += 1; // eat cb
+            return ret;
+        }
+        // consumes and returns a list or integer
+        function decode() {
+            let n = 0;
+            let c = itemFunctionDecoder.string.charCodeAt(itemFunctionDecoder.offset);
+            if (c === ob) {
+                itemFunctionDecoder.offset += 1;
+                return decodeList();
+            }
+            while (c < la) {
+                n = (n << 4) | (c & 0xF);
+                itemFunctionDecoder.offset += 1;
+                c = itemFunctionDecoder.string.charCodeAt(itemFunctionDecoder.offset);
+            }
+            // last character >= la
+            n = (n << 4) | (c & 0xF);
+            const [sign, value] = [n & 1, n >> 1];
+            itemFunctionDecoder.offset += 1;
+            return sign ? -value : value;
+        }
+        const functionSearchType = decodeList();
+        const INPUTS_DATA = 0;
+        const OUTPUT_DATA = 1;
        let inputs, output;
        if (typeof functionSearchType[INPUTS_DATA] === "number") {
            inputs = [buildItemSearchType(functionSearchType[INPUTS_DATA], lowercasePaths)];
@ -2808,9 +2854,14 @@ ${item.displayPath}<span class="${type}">${name}</span>\
                ? [buildItemSearchType(functionSearchType[i], lowercasePaths)]
                : buildItemSearchTypeAll(functionSearchType[i], lowercasePaths));
        }
-        return {
+        const ret = {
            inputs, output, where_clause,
        };
+        itemFunctionDecoder.backrefQueue.unshift(ret);
+        if (itemFunctionDecoder.backrefQueue.length > 16) {
+            itemFunctionDecoder.backrefQueue.pop();
+        }
+        return ret;
    }

    /**
@ -2924,6 +2975,11 @@ ${item.displayPath}<span class="${type}">${name}</span>\
        return functionTypeFingerprint[(fullId * 4) + 3];
    }

+    /**
+     * Convert raw search index into in-memory search index.
+     *
+     * @param {[string, RawSearchIndexCrate][]} rawSearchIndex
+     */
    function buildIndex(rawSearchIndex) {
        searchIndex = [];
        typeNameIdMap = new Map();
@ -2950,59 +3006,7 @@ ${item.displayPath}<span class="${type}">${name}</span>\
        // This loop actually generates the search item indexes, including
        // normalized names, type signature objects and fingerprints, and aliases.
        id = 0;
-        /**
-         * The raw search data for a given crate. `n`, `t`, `d`, `i`, and `f`
-         * are arrays with the same length. `q`, `a`, and `c` use a sparse
-         * representation for compactness.
-         *
-         * `n[i]` contains the name of an item.
-         *
-         * `t[i]` contains the type of that item
-         * (as a string of characters that represent an offset in `itemTypes`).
-         *
-         * `d[i]` contains the description of that item.
-         *
-         * `q` contains the full paths of the items. For compactness, it is a set of
-         * (index, path) pairs used to create a map. If a given index `i` is
-         * not present, this indicates "same as the last index present".
-         *
-         * `i[i]` contains an item's parent, usually a module. For compactness,
-         * it is a set of indexes into the `p` array.
-         *
-         * `f[i]` contains function signatures, or `0` if the item isn't a function.
-         * Functions are themselves encoded as arrays. The first item is a list of
-         * types representing the function's inputs, and the second list item is a list
-         * of types representing the function's output. Tuples are flattened.
-         * Types are also represented as arrays; the first item is an index into the `p`
-         * array, while the second is a list of types representing any generic parameters.
-         *
-         * b[i] contains an item's impl disambiguator. This is only present if an item
-         * is defined in an impl block and, the impl block's type has more than one associated
-         * item with the same name.
-         *
-         * `a` defines aliases with an Array of pairs: [name, offset], where `offset`
-         * points into the n/t/d/q/i/f arrays.
-         *
-         * `doc` contains the description of the crate.
-         *
-         * `p` is a list of path/type pairs. It is used for parents and function parameters.
-         *
-         * `c` is an array of item indices that are deprecated.
-         *
-         * @type {{
-         *   doc: string,
-         *   a: Object,
-         *   n: Array<string>,
-         *   t: String,
-         *   d: Array<string>,
-         *   q: Array<[Number, string]>,
-         *   i: Array<Number>,
-         *   f: Array<RawFunctionSearchType>,
-         *   p: Array<Object>,
-         *   b: Array<[Number, String]>,
-         *   c: Array<Number>
-         * }}
-         */
+
        for (const [crate, crateCorpus] of rawSearchIndex) {
            // This object should have exactly the same set of fields as the "row"
            // object defined below. Your JavaScript runtime will thank you.
@ -3039,8 +3043,12 @@ ${item.displayPath}<span class="${type}">${name}</span>\
            const itemDescs = crateCorpus.d;
            // an array of (Number) the parent path index + 1 to `paths`, or 0 if none
            const itemParentIdxs = crateCorpus.i;
-            // an array of (Object | null) the type of the function, if any
-            const itemFunctionSearchTypes = crateCorpus.f;
+            // a string representing the list of function types
+            const itemFunctionDecoder = {
+                string: crateCorpus.f,
+                offset: 0,
+                backrefQueue: [],
+            };
            // an array of (Number) indices for the deprecated items
            const deprecatedItems = new Set(crateCorpus.c);
            // an array of (Number) indices for the deprecated items
@ -3088,12 +3096,8 @@ ${item.displayPath}<span class="${type}">${name}</span>\
                    word = itemNames[i].toLowerCase();
                }
                const path = itemPaths.has(i) ? itemPaths.get(i) : lastPath;
-                let type = null;
-                if (itemFunctionSearchTypes[i] !== 0) {
-                    type = buildFunctionSearchType(
-                        itemFunctionSearchTypes[i],
-                        lowercasePaths
-                    );
+                const type = buildFunctionSearchType(itemFunctionDecoder, lowercasePaths);
+                if (type !== null) {
                    if (type) {
                        const fp = functionTypeFingerprint.subarray(id * 4, (id + 1) * 4);
                        const fps = new Set();