os-rust/tests/rustdoc-js/non-english-identifier.js

const PARSED = [
    {
        query: '中文',
        elems: [{
            name: "中文",
            fullPath: ["中文"],
            pathWithoutLast: [],
            pathLast: "中文",
            generics: [],
            typeFilter: -1,
        }],
        returned: [],
        foundElems: 1,
        userQuery: "中文",
        error: null,
    },
    {
        query: '_0Mixed中英文',
        elems: [{
            name: "_0Mixed中英文",
            fullPath: ["_0mixed中英文"],
            pathWithoutLast: [],
            pathLast: "_0mixed中英文",
            normalizedPathLast: "0mixed中英文",
            generics: [],
            typeFilter: -1,
        }],
        foundElems: 1,
        userQuery: "_0Mixed中英文",
        returned: [],
        error: null,
    },
    {
        query: 'my_crate::中文API',
        elems: [{
            name: "my_crate::中文API",
            fullPath: ["my_crate", "中文api"],
            pathWithoutLast: ["my_crate"],
            pathLast: "中文api",
            generics: [],
            typeFilter: -1,
        }],
        foundElems: 1,
        userQuery: "my_crate::中文API",
        returned: [],
        error: null,
    },
    {
        query: '类型A,类型B<约束C>->返回类型<关联类型=路径::约束D>',
        elems: [{
            name: "类型A",
            fullPath: ["类型a"],
            pathWithoutLast: [],
            pathLast: "类型a",
            generics: [],
        }, {
            name: "类型B",
            fullPath: ["类型b"],
            pathWithoutLast: [],
            pathLast: "类型b",
            generics: [{
                name: "约束C",
                fullPath: ["约束c"],
                pathWithoutLast: [],
                pathLast: "约束c",
                generics: [],
            }],
        }],
        foundElems: 3,
        totalElems: 5,
        literalSearch: true,
        userQuery: "类型A,类型B<约束C>->返回类型<关联类型=路径::约束D>",
        returned: [{
            name: "返回类型",
            fullPath: ["返回类型"],
            pathWithoutLast: [],
            pathLast: "返回类型",
            generics: [],
            bindings: [["关联类型", [{
                name: "路径::约束D",
                fullPath: ["路径", "约束d"],
                pathWithoutLast: ["路径"],
                pathLast: "约束d",
                generics: [],
            }]]],
        }],
        error: null,
    },
    {
        query: 'my_crate 中文宏!',
        elems: [{
            name: "my_crate 中文宏",
            fullPath: ["my_crate", "中文宏"],
            pathWithoutLast: ["my_crate"],
            pathLast: "中文宏",
            generics: [],
            typeFilter: 16,
        }],
        foundElems: 1,
        userQuery: "my_crate 中文宏!",
        returned: [],
        error: null,
    },
    {
        query: '非法符号——',
        elems: [],
        foundElems: 0,
        userQuery: "非法符号——",
        returned: [],
        error: "Unexpected `—` after `号` (not a valid identifier)",
    }
]
const EXPECTED = [
    {
        query: '加法',
        others: [
            {
                name: "add",
                path: "non_english_identifier",
                is_alias: true,
                alias: "加法",
                href: "../non_english_identifier/macro.add.html"
            },
            {
                name: "add",
                path: "non_english_identifier",
                is_alias: true,
                alias: "加法",
                href: "../non_english_identifier/fn.add.html"
            },
            {
                name: "加法",
                path: "non_english_identifier",
                href: "../non_english_identifier/trait.加法.html",
                desc: "Add"
            }],
        in_args: [{
            name: "加上",
            path: "non_english_identifier::加法",
            href: "../non_english_identifier/trait.加法.html#tymethod.加上",
        }],
        returned: [],
    },
    { // levensthein and substring checking only kick in at three characters
        query: '加法宏',
        others: [
            {
                name: "中文名称的加法宏",
                path: "non_english_identifier",
                href: "../non_english_identifier/macro.中文名称的加法宏.html",
            }],
        in_args: [],
        returned: [],
    },
    { // levensthein and substring checking only kick in at three characters
        query: '加法A',
        others: [
            {
                name: "中文名称的加法API",
                path: "non_english_identifier",
                href: "../non_english_identifier/fn.中文名称的加法API.html",
            }],
        in_args: [],
        returned: [],
    },
    { // Extensive type-based search is still buggy, experimental & work-in-progress.
        query: '可迭代->可选',
        others: [{
            name: "总计",
            path: "non_english_identifier",
            href: "../non_english_identifier/fn.总计.html",
            desc: "“sum”"
        }],
        in_args: [],
        returned: [],
    },
];
Add test for PR #126057 2024-06-07 05:49:46 +08:00			`const PARSED = [`
			`{`
			`query: '中文',`
			`elems: [{`
			`name: "中文",`
			`fullPath: ["中文"],`
			`pathWithoutLast: [],`
			`pathLast: "中文",`
			`generics: [],`
			`typeFilter: -1,`
			`}],`
			`returned: [],`
			`foundElems: 1,`
			`userQuery: "中文",`
			`error: null,`
			`},`
			`{`
			`query: '_0Mixed中英文',`
			`elems: [{`
rustdoc-search: pass original names through AST 2024-09-24 12:33:09 -07:00			`name: "_0Mixed中英文",`
Add test for PR #126057 2024-06-07 05:49:46 +08:00			`fullPath: ["_0mixed中英文"],`
			`pathWithoutLast: [],`
			`pathLast: "_0mixed中英文",`
rustdoc-search: pass original names through AST 2024-09-24 12:33:09 -07:00			`normalizedPathLast: "0mixed中英文",`
Add test for PR #126057 2024-06-07 05:49:46 +08:00			`generics: [],`
			`typeFilter: -1,`
			`}],`
			`foundElems: 1,`
rustdoc-search: pass original names through AST 2024-09-24 12:33:09 -07:00			`userQuery: "_0Mixed中英文",`
Add test for PR #126057 2024-06-07 05:49:46 +08:00			`returned: [],`
			`error: null,`
			`},`
			`{`
			`query: 'my_crate::中文API',`
			`elems: [{`
rustdoc-search: pass original names through AST 2024-09-24 12:33:09 -07:00			`name: "my_crate::中文API",`
Add test for PR #126057 2024-06-07 05:49:46 +08:00			`fullPath: ["my_crate", "中文api"],`
			`pathWithoutLast: ["my_crate"],`
			`pathLast: "中文api",`
			`generics: [],`
			`typeFilter: -1,`
			`}],`
			`foundElems: 1,`
rustdoc-search: pass original names through AST 2024-09-24 12:33:09 -07:00			`userQuery: "my_crate::中文API",`
Add test for PR #126057 2024-06-07 05:49:46 +08:00			`returned: [],`
			`error: null,`
			`},`
			`{`
			`query: '类型A,类型B<约束C>->返回类型<关联类型=路径::约束D>',`
			`elems: [{`
rustdoc-search: pass original names through AST 2024-09-24 12:33:09 -07:00			`name: "类型A",`
Add test for PR #126057 2024-06-07 05:49:46 +08:00			`fullPath: ["类型a"],`
			`pathWithoutLast: [],`
			`pathLast: "类型a",`
			`generics: [],`
			`}, {`
rustdoc-search: pass original names through AST 2024-09-24 12:33:09 -07:00			`name: "类型B",`
Add test for PR #126057 2024-06-07 05:49:46 +08:00			`fullPath: ["类型b"],`
			`pathWithoutLast: [],`
			`pathLast: "类型b",`
			`generics: [{`
rustdoc-search: pass original names through AST 2024-09-24 12:33:09 -07:00			`name: "约束C",`
Add test for PR #126057 2024-06-07 05:49:46 +08:00			`fullPath: ["约束c"],`
			`pathWithoutLast: [],`
			`pathLast: "约束c",`
			`generics: [],`
			`}],`
			`}],`
			`foundElems: 3,`
			`totalElems: 5,`
			`literalSearch: true,`
rustdoc-search: pass original names through AST 2024-09-24 12:33:09 -07:00			`userQuery: "类型A,类型B<约束C>->返回类型<关联类型=路径::约束D>",`
Add test for PR #126057 2024-06-07 05:49:46 +08:00			`returned: [{`
			`name: "返回类型",`
			`fullPath: ["返回类型"],`
			`pathWithoutLast: [],`
			`pathLast: "返回类型",`
			`generics: [],`
rustdoc-search: pass original names through AST 2024-09-24 12:33:09 -07:00			`bindings: [["关联类型", [{`
			`name: "路径::约束D",`
			`fullPath: ["路径", "约束d"],`
			`pathWithoutLast: ["路径"],`
			`pathLast: "约束d",`
			`generics: [],`
			`}]]],`
Add test for PR #126057 2024-06-07 05:49:46 +08:00			`}],`
			`error: null,`
			`},`
			`{`
			`query: 'my_crate 中文宏!',`
			`elems: [{`
			`name: "my_crate 中文宏",`
			`fullPath: ["my_crate", "中文宏"],`
			`pathWithoutLast: ["my_crate"],`
			`pathLast: "中文宏",`
			`generics: [],`
			`typeFilter: 16,`
			`}],`
			`foundElems: 1,`
			`userQuery: "my_crate 中文宏!",`
rustdoc-search: pass original names through AST 2024-09-24 12:33:09 -07:00			`returned: [],`
Add test for PR #126057 2024-06-07 05:49:46 +08:00			`error: null,`
			`},`
			`{`
			`query: '非法符号——',`
			`elems: [],`
			`foundElems: 0,`
			`userQuery: "非法符号——",`
rustdoc-search: pass original names through AST 2024-09-24 12:33:09 -07:00			`returned: [],`
Update tests 2024-06-07 11:42:17 +08:00			error: "Unexpected `—` after `号` (not a valid identifier)",
Add test for PR #126057 2024-06-07 05:49:46 +08:00			`}`
			`]`
			`const EXPECTED = [`
			`{`
			`query: '加法',`
			`others: [`
			`{`
			`name: "add",`
			`path: "non_english_identifier",`
			`is_alias: true,`
			`alias: "加法",`
			`href: "../non_english_identifier/macro.add.html"`
			`},`
			`{`
			`name: "add",`
			`path: "non_english_identifier",`
			`is_alias: true,`
			`alias: "加法",`
			`href: "../non_english_identifier/fn.add.html"`
			`},`
			`{`
			`name: "加法",`
			`path: "non_english_identifier",`
			`href: "../non_english_identifier/trait.加法.html",`
			`desc: "Add"`
rustdoc: use a trie for name-based search Preview and profiler results ---------------------------- Here's some quick profiling in Firefox done on the rust compiler docs: - Before: https://share.firefox.dev/3UPm3M8 - After: https://share.firefox.dev/40LXvYb Here's the results for the node.js profiler: - https://notriddle.com/rustdoc-html-demo-15/trie-perf/index.html Here's a copy that you can use to try it out. Compare it with [the nightly]. Try typing `typecheckercontext` one character at a time, slowly. - https://notriddle.com/rustdoc-html-demo-15/compiler-doc-trie/index.html [the nightly]: https://doc.rust-lang.org/nightly/nightly-rustc/ The fuzzy match algo is based on [Fast String Correction with Levenshtein-Automata] and the corresponding implementation code in [moman] and [Lucene]; the bit-packing representation comes from Lucene, but the actual matcher is more based on `fsc.py`. As suggested in the paper, a trie is used to represent the FSA dictionary. The same trie is used for prefix matching. Substring matching is done with a side table of three-character[^1] windows that point into the trie. [Fast String Correction with Levenshtein-Automata]: https://github.com/tpn/pdfs/blob/master/Fast%20String%20Correction%20with%20Levenshtein-Automata%20(2002)%20(10.1.1.16.652).pdf [Lucene]: https://fossies.org/linux/lucene/lucene/core/src/java/org/apache/lucene/util/automaton/Lev1TParametricDescription.java [moman]: https://gitlab.com/notriddle/moman-rustdoc User-visible changes -------------------- I don't expect anybody to notice anything, but it does cause two changes: - Substring matches, in the middle of a name, only apply if there's three or more characters in the search query. - Levenshtein distance limit now maxes out at two. In the old version, the limit was w/3, so you could get looser matches for queries with 9 or more characters[^1] in them. [^1]: technically utf-16 code units 2024-11-13 10:46:27 -07:00			`}],`
			`in_args: [{`
			`name: "加上",`
			`path: "non_english_identifier::加法",`
			`href: "../non_english_identifier/trait.加法.html#tymethod.加上",`
			`}],`
			`returned: [],`
			`},`
			`{ // levensthein and substring checking only kick in at three characters`
			`query: '加法宏',`
			`others: [`
Add test for PR #126057 2024-06-07 05:49:46 +08:00			`{`
			`name: "中文名称的加法宏",`
			`path: "non_english_identifier",`
			`href: "../non_english_identifier/macro.中文名称的加法宏.html",`
rustdoc: use a trie for name-based search Preview and profiler results ---------------------------- Here's some quick profiling in Firefox done on the rust compiler docs: - Before: https://share.firefox.dev/3UPm3M8 - After: https://share.firefox.dev/40LXvYb Here's the results for the node.js profiler: - https://notriddle.com/rustdoc-html-demo-15/trie-perf/index.html Here's a copy that you can use to try it out. Compare it with [the nightly]. Try typing `typecheckercontext` one character at a time, slowly. - https://notriddle.com/rustdoc-html-demo-15/compiler-doc-trie/index.html [the nightly]: https://doc.rust-lang.org/nightly/nightly-rustc/ The fuzzy match algo is based on [Fast String Correction with Levenshtein-Automata] and the corresponding implementation code in [moman] and [Lucene]; the bit-packing representation comes from Lucene, but the actual matcher is more based on `fsc.py`. As suggested in the paper, a trie is used to represent the FSA dictionary. The same trie is used for prefix matching. Substring matching is done with a side table of three-character[^1] windows that point into the trie. [Fast String Correction with Levenshtein-Automata]: https://github.com/tpn/pdfs/blob/master/Fast%20String%20Correction%20with%20Levenshtein-Automata%20(2002)%20(10.1.1.16.652).pdf [Lucene]: https://fossies.org/linux/lucene/lucene/core/src/java/org/apache/lucene/util/automaton/Lev1TParametricDescription.java [moman]: https://gitlab.com/notriddle/moman-rustdoc User-visible changes -------------------- I don't expect anybody to notice anything, but it does cause two changes: - Substring matches, in the middle of a name, only apply if there's three or more characters in the search query. - Levenshtein distance limit now maxes out at two. In the old version, the limit was w/3, so you could get looser matches for queries with 9 or more characters[^1] in them. [^1]: technically utf-16 code units 2024-11-13 10:46:27 -07:00			`}],`
			`in_args: [],`
			`returned: [],`
			`},`
			`{ // levensthein and substring checking only kick in at three characters`
			`query: '加法A',`
			`others: [`
Add test for PR #126057 2024-06-07 05:49:46 +08:00			`{`
			`name: "中文名称的加法API",`
			`path: "non_english_identifier",`
			`href: "../non_english_identifier/fn.中文名称的加法API.html",`
			`}],`
rustdoc: use a trie for name-based search Preview and profiler results ---------------------------- Here's some quick profiling in Firefox done on the rust compiler docs: - Before: https://share.firefox.dev/3UPm3M8 - After: https://share.firefox.dev/40LXvYb Here's the results for the node.js profiler: - https://notriddle.com/rustdoc-html-demo-15/trie-perf/index.html Here's a copy that you can use to try it out. Compare it with [the nightly]. Try typing `typecheckercontext` one character at a time, slowly. - https://notriddle.com/rustdoc-html-demo-15/compiler-doc-trie/index.html [the nightly]: https://doc.rust-lang.org/nightly/nightly-rustc/ The fuzzy match algo is based on [Fast String Correction with Levenshtein-Automata] and the corresponding implementation code in [moman] and [Lucene]; the bit-packing representation comes from Lucene, but the actual matcher is more based on `fsc.py`. As suggested in the paper, a trie is used to represent the FSA dictionary. The same trie is used for prefix matching. Substring matching is done with a side table of three-character[^1] windows that point into the trie. [Fast String Correction with Levenshtein-Automata]: https://github.com/tpn/pdfs/blob/master/Fast%20String%20Correction%20with%20Levenshtein-Automata%20(2002)%20(10.1.1.16.652).pdf [Lucene]: https://fossies.org/linux/lucene/lucene/core/src/java/org/apache/lucene/util/automaton/Lev1TParametricDescription.java [moman]: https://gitlab.com/notriddle/moman-rustdoc User-visible changes -------------------- I don't expect anybody to notice anything, but it does cause two changes: - Substring matches, in the middle of a name, only apply if there's three or more characters in the search query. - Levenshtein distance limit now maxes out at two. In the old version, the limit was w/3, so you could get looser matches for queries with 9 or more characters[^1] in them. [^1]: technically utf-16 code units 2024-11-13 10:46:27 -07:00			`in_args: [],`
Add test for PR #126057 2024-06-07 05:49:46 +08:00			`returned: [],`
			`},`
			`{ // Extensive type-based search is still buggy, experimental & work-in-progress.`
			`query: '可迭代->可选',`
			`others: [{`
			`name: "总计",`
			`path: "non_english_identifier",`
			`href: "../non_english_identifier/fn.总计.html",`
			`desc: "“sum”"`
			`}],`
			`in_args: [],`
			`returned: [],`
			`},`
Tidying 2024-06-07 06:09:30 +08:00			`];`