Update the list of confusable characters
Also reorder and space the list to make it clearer for futures updates and to come closer to the original list. Thanks @est31 for the instructions. Fixes #43629. r? @est31
This commit is contained in:
parent
a9c24fd579
commit
4e2ddcb879
1 changed files with 125 additions and 19 deletions
|
@ -1,4 +1,4 @@
|
|||
// Copyright 2012-2013 The Rust Project Developers. See the COPYRIGHT
|
||||
// Copyright 2012-2017 The Rust Project Developers. See the COPYRIGHT
|
||||
// file at the top-level directory of this distribution and at
|
||||
// http://rust-lang.org/COPYRIGHT.
|
||||
//
|
||||
|
@ -9,15 +9,16 @@
|
|||
// except according to those terms.
|
||||
|
||||
// Characters and their corresponding confusables were collected from
|
||||
// http://www.unicode.org/Public/security/revision-06/confusables.txt
|
||||
// http://www.unicode.org/Public/security/10.0.0/confusables.txt
|
||||
|
||||
use syntax_pos::{Span, NO_EXPANSION};
|
||||
use errors::DiagnosticBuilder;
|
||||
use super::StringReader;
|
||||
|
||||
const UNICODE_ARRAY: &'static [(char, &'static str, char)] = &[
|
||||
(' ', "No-Break Space", ' '),
|
||||
(' ', "Ogham Space Mark", ' '),
|
||||
('
', "Line Separator", ' '),
|
||||
('
', "Paragraph Separator", ' '),
|
||||
(' ', "Ogham Space mark", ' '),
|
||||
(' ', "En Quad", ' '),
|
||||
(' ', "Em Quad", ' '),
|
||||
(' ', "En Space", ' '),
|
||||
|
@ -25,39 +26,63 @@ const UNICODE_ARRAY: &'static [(char, &'static str, char)] = &[
|
|||
(' ', "Three-Per-Em Space", ' '),
|
||||
(' ', "Four-Per-Em Space", ' '),
|
||||
(' ', "Six-Per-Em Space", ' '),
|
||||
(' ', "Figure Space", ' '),
|
||||
(' ', "Punctuation Space", ' '),
|
||||
(' ', "Thin Space", ' '),
|
||||
(' ', "Hair Space", ' '),
|
||||
(' ', "Narrow No-Break Space", ' '),
|
||||
(' ', "Medium Mathematical Space", ' '),
|
||||
(' ', "No-Break Space", ' '),
|
||||
(' ', "Figure Space", ' '),
|
||||
(' ', "Narrow No-Break Space", ' '),
|
||||
(' ', "Ideographic Space", ' '),
|
||||
|
||||
('ߺ', "Nko Lajanyalan", '_'),
|
||||
('﹍', "Dashed Low Line", '_'),
|
||||
('﹎', "Centreline Low Line", '_'),
|
||||
('﹏', "Wavy Low Line", '_'),
|
||||
('_', "Fullwidth Low Line", '-'),
|
||||
|
||||
('‐', "Hyphen", '-'),
|
||||
('‑', "Non-Breaking Hyphen", '-'),
|
||||
('‒', "Figure Dash", '-'),
|
||||
('–', "En Dash", '-'),
|
||||
('—', "Em Dash", '-'),
|
||||
('﹘', "Small Em Dash", '-'),
|
||||
('۔', "Arabic Full Stop", '-'),
|
||||
('⁃', "Hyphen Bullet", '-'),
|
||||
('˗', "Modifier Letter Minus Sign", '-'),
|
||||
('−', "Minus Sign", '-'),
|
||||
('➖', "Heavy Minus Sign", '-'),
|
||||
('Ⲻ', "Coptic Letter Dialect-P Ni", '-'),
|
||||
('ー', "Katakana-Hiragana Prolonged Sound Mark", '-'),
|
||||
('-', "Fullwidth Hyphen-Minus", '-'),
|
||||
('―', "Horizontal Bar", '-'),
|
||||
('─', "Box Drawings Light Horizontal", '-'),
|
||||
('━', "Box Drawings Heavy Horizontal", '-'),
|
||||
('㇐', "CJK Stroke H", '-'),
|
||||
('ꟷ', "Latin Epigraphic Letter Dideways", '-'),
|
||||
('ᅳ', "Hangul Jungseong Eu", '-'),
|
||||
('ㅡ', "Hangul Letter Eu", '-'),
|
||||
('一', "CJK Unified Ideograph-4E00", '-'),
|
||||
('⼀', "Kangxi Radical One", '-'),
|
||||
|
||||
('؍', "Arabic Date Separator", ','),
|
||||
('٫', "Arabic Decimal Separator", ','),
|
||||
('‚', "Single Low-9 Quotation Mark", ','),
|
||||
('¸', "Cedilla", ','),
|
||||
('ꓹ', "Lisu Letter Tone Na Po", ','),
|
||||
(',', "Fullwidth Comma", ','),
|
||||
|
||||
(';', "Greek Question Mark", ';'),
|
||||
(';', "Fullwidth Semicolon", ';'),
|
||||
('︔', "Presentation Form For Vertical Semicolon", ';'),
|
||||
|
||||
('ः', "Devanagari Sign Visarga", ':'),
|
||||
('ઃ', "Gujarati Sign Visarga", ':'),
|
||||
(':', "Fullwidth Colon", ':'),
|
||||
('։', "Armenian Full Stop", ':'),
|
||||
('܃', "Syriac Supralinear Colon", ':'),
|
||||
('܄', "Syriac Sublinear Colon", ':'),
|
||||
('᛬', "Runic Multiple Ponctuation", ':'),
|
||||
('︰', "Presentation Form For Vertical Two Dot Leader", ':'),
|
||||
('᠃', "Mongolian Full Stop", ':'),
|
||||
('᠉', "Mongolian Manchu Full Stop", ':'),
|
||||
|
@ -68,25 +93,48 @@ const UNICODE_ARRAY: &'static [(char, &'static str, char)] = &[
|
|||
('∶', "Ratio", ':'),
|
||||
('ː', "Modifier Letter Triangular Colon", ':'),
|
||||
('ꓽ', "Lisu Letter Tone Mya Jeu", ':'),
|
||||
('︓', "Presentation Form For Vertical Colon", ':'),
|
||||
|
||||
('!', "Fullwidth Exclamation Mark", '!'),
|
||||
('ǃ', "Latin Letter Retroflex Click", '!'),
|
||||
('ⵑ', "Tifinagh Letter Tuareg Yang", '!'),
|
||||
('︕', "Presentation Form For Vertical Exclamation Mark", '!'),
|
||||
|
||||
('ʔ', "Latin Letter Glottal Stop", '?'),
|
||||
('Ɂ', "Latin Capital Letter Glottal Stop", '?'),
|
||||
('ॽ', "Devanagari Letter Glottal Stop", '?'),
|
||||
('Ꭾ', "Cherokee Letter He", '?'),
|
||||
('ꛫ', "Bamum Letter Ntuu", '?'),
|
||||
('?', "Fullwidth Question Mark", '?'),
|
||||
('︖', "Presentation Form For Vertical Question Mark", '?'),
|
||||
|
||||
('𝅭', "Musical Symbol Combining Augmentation Dot", '.'),
|
||||
('․', "One Dot Leader", '.'),
|
||||
('۔', "Arabic Full Stop", '.'),
|
||||
('܁', "Syriac Supralinear Full Stop", '.'),
|
||||
('܂', "Syriac Sublinear Full Stop", '.'),
|
||||
('꘎', "Vai Full Stop", '.'),
|
||||
('𐩐', "Kharoshthi Punctuation Dot", '.'),
|
||||
('·', "Middle Dot", '.'),
|
||||
('٠', "Arabic-Indic Digit Zero", '.'),
|
||||
('۰', "Extended Arabic-Indic Digit Zero", '.'),
|
||||
('ꓸ', "Lisu Letter Tone Mya Ti", '.'),
|
||||
('。', "Ideographic Full Stop", '.'),
|
||||
('·', "Middle Dot", '.'),
|
||||
('・', "Katakana Middle Dot", '.'),
|
||||
('・', "Halfwidth Katakana Middle Dot", '.'),
|
||||
('᛫', "Runic Single Punctuation", '.'),
|
||||
('·', "Greek Ano Teleia", '.'),
|
||||
('⸱', "Word Separator Middle Dot", '.'),
|
||||
('𐄁', "Aegean Word Separator Dot", '.'),
|
||||
('•', "Bullet", '.'),
|
||||
('‧', "Hyphenation Point", '.'),
|
||||
('∙', "Bullet Operator", '.'),
|
||||
('⋅', "Dot Operator", '.'),
|
||||
('ꞏ', "Latin Letter Sinological Dot", '.'),
|
||||
('ᐧ', "Canadian Syllabics Final Middle Dot", '.'),
|
||||
('ᐧ', "Canadian Syllabics Final Middle Dot", '.'),
|
||||
('.', "Fullwidth Full Stop", '.'),
|
||||
('。', "Ideographic Full Stop", '.'),
|
||||
('︒', "Presentation Form For Vertical Ideographic Full Stop", '.'),
|
||||
|
||||
('՝', "Armenian Comma", '\''),
|
||||
(''', "Fullwidth Apostrophe", '\''),
|
||||
('‘', "Left Single Quotation Mark", '\''),
|
||||
|
@ -96,8 +144,10 @@ const UNICODE_ARRAY: &'static [(char, &'static str, char)] = &[
|
|||
('‵', "Reversed Prime", '\''),
|
||||
('՚', "Armenian Apostrophe", '\''),
|
||||
('׳', "Hebrew Punctuation Geresh", '\''),
|
||||
('`', "Greek Accent", '\''),
|
||||
('`', "Greek Varia", '\''),
|
||||
('`', "Fullwidth Grave Accent", '\''),
|
||||
('´', "Acute Accent", '\''),
|
||||
('΄', "Greek Tonos", '\''),
|
||||
('´', "Greek Oxia", '\''),
|
||||
('᾽', "Greek Koronis", '\''),
|
||||
|
@ -105,6 +155,7 @@ const UNICODE_ARRAY: &'static [(char, &'static str, char)] = &[
|
|||
('῾', "Greek Dasia", '\''),
|
||||
('ʹ', "Modifier Letter Prime", '\''),
|
||||
('ʹ', "Greek Numeral Sign", '\''),
|
||||
('ˈ', "Modifier Letter Vertical Line", '\''),
|
||||
('ˊ', "Modifier Letter Acute Accent", '\''),
|
||||
('ˋ', "Modifier Letter Grave Accent", '\''),
|
||||
('˴', "Modifier Letter Middle Grave Accent", '\''),
|
||||
|
@ -116,6 +167,12 @@ const UNICODE_ARRAY: &'static [(char, &'static str, char)] = &[
|
|||
('י', "Hebrew Letter Yod", '\''),
|
||||
('ߴ', "Nko High Tone Apostrophe", '\''),
|
||||
('ߵ', "Nko Low Tone Apostrophe", '\''),
|
||||
('ᑊ', "Canadian Syllabics West-Cree P", '\''),
|
||||
('ᛌ', "Runic Letter Short-Twig-Sol S", '\''),
|
||||
('𖽑', "Miao Sign Aspiration", '\''),
|
||||
('𖽒', "Miao Sign Reformed Voicing", '\''),
|
||||
|
||||
('᳓', "Vedic Sign Nihshvasa", '"'),
|
||||
('"', "Fullwidth Quotation Mark", '"'),
|
||||
('“', "Left Double Quotation Mark", '"'),
|
||||
('”', "Right Double Quotation Mark", '"'),
|
||||
|
@ -132,12 +189,15 @@ const UNICODE_ARRAY: &'static [(char, &'static str, char)] = &[
|
|||
('ײ', "Hebrew Ligature Yiddish Double Yod", '"'),
|
||||
('❞', "Heavy Double Comma Quotation Mark Ornament", '"'),
|
||||
('❝', "Heavy Double Turned Comma Quotation Mark Ornament", '"'),
|
||||
|
||||
('(', "Fullwidth Left Parenthesis", '('),
|
||||
('❨', "Medium Left Parenthesis Ornament", '('),
|
||||
('﴾', "Ornate Left Parenthesis", '('),
|
||||
('(', "Fullwidth Left Parenthesis", '('),
|
||||
|
||||
(')', "Fullwidth Right Parenthesis", ')'),
|
||||
('❩', "Medium Right Parenthesis Ornament", ')'),
|
||||
('﴿', "Ornate Right Parenthesis", ')'),
|
||||
(')', "Fullwidth Right Parenthesis", ')'),
|
||||
|
||||
('[', "Fullwidth Left Square Bracket", '['),
|
||||
('❲', "Light Left Tortoise Shell Bracket Ornament", '['),
|
||||
('「', "Left Corner Bracket", '['),
|
||||
|
@ -147,6 +207,7 @@ const UNICODE_ARRAY: &'static [(char, &'static str, char)] = &[
|
|||
('〖', "Left White Lenticular Bracket", '['),
|
||||
('〘', "Left White Tortoise Shell Bracket", '['),
|
||||
('〚', "Left White Square Bracket", '['),
|
||||
|
||||
(']', "Fullwidth Right Square Bracket", ']'),
|
||||
('❳', "Light Right Tortoise Shell Bracket Ornament", ']'),
|
||||
('」', "Right Corner Bracket", ']'),
|
||||
|
@ -156,11 +217,20 @@ const UNICODE_ARRAY: &'static [(char, &'static str, char)] = &[
|
|||
('〗', "Right White Lenticular Bracket", ']'),
|
||||
('〙', "Right White Tortoise Shell Bracket", ']'),
|
||||
('〛', "Right White Square Bracket", ']'),
|
||||
|
||||
('❴', "Medium Left Curly Bracket Ornament", '{'),
|
||||
('𝄔', "Musical Symbol Brace", '{'),
|
||||
('{', "Fullwidth Left Curly Bracket", '{'),
|
||||
|
||||
('❵', "Medium Right Curly Bracket Ornament", '}'),
|
||||
('}', "Fullwidth Right Curly Bracket", '}'),
|
||||
|
||||
('⁎', "Low Asterisk", '*'),
|
||||
('٭', "Arabic Five Pointed Star", '*'),
|
||||
('∗', "Asterisk Operator", '*'),
|
||||
('𐌟', "Old Italic Letter Ess", '*'),
|
||||
('*', "Fullwidth Asterisk", '*'),
|
||||
|
||||
('᜵', "Philippine Single Punctuation", '/'),
|
||||
('⁁', "Caret Insertion Point", '/'),
|
||||
('∕', "Division Slash", '/'),
|
||||
|
@ -168,37 +238,73 @@ const UNICODE_ARRAY: &'static [(char, &'static str, char)] = &[
|
|||
('╱', "Box Drawings Light Diagonal Upper Right To Lower Left", '/'),
|
||||
('⟋', "Mathematical Rising Diagonal", '/'),
|
||||
('⧸', "Big Solidus", '/'),
|
||||
('㇓', "Cjk Stroke Sp", '/'),
|
||||
('𝈺', "Greek Instrumental Notation Symbol-47", '/'),
|
||||
('㇓', "CJK Stroke Sp", '/'),
|
||||
('〳', "Vertical Kana Repeat Mark Upper Half", '/'),
|
||||
('丿', "Cjk Unified Ideograph-4E3F", '/'),
|
||||
('Ⳇ', "Coptic Capital Letter Old Coptic Esh", '/'),
|
||||
('ノ', "Katakana Letter No", '/'),
|
||||
('丿', "CJK Unified Ideograph-4E3F", '/'),
|
||||
('⼃', "Kangxi Radical Slash", '/'),
|
||||
('/', "Fullwidth Solidus", '/'),
|
||||
|
||||
('\', "Fullwidth Reverse Solidus", '\\'),
|
||||
('﹨', "Small Reverse Solidus", '\\'),
|
||||
('∖', "Set Minus", '\\'),
|
||||
('⟍', "Mathematical Falling Diagonal", '\\'),
|
||||
('⧵', "Reverse Solidus Operator", '\\'),
|
||||
('⧹', "Big Reverse Solidus", '\\'),
|
||||
('⧹', "Greek Vocal Notation Symbol-16", '\\'),
|
||||
('⧹', "Greek Instrumental Symbol-48", '\\'),
|
||||
('㇔', "CJK Stroke D", '\\'),
|
||||
('丶', "CJK Unified Ideograph-4E36", '\\'),
|
||||
('⼂', "Kangxi Radical Dot", '\\'),
|
||||
('、', "Ideographic Comma", '\\'),
|
||||
('ヽ', "Katakana Iteration Mark", '\\'),
|
||||
('㇔', "Cjk Stroke D", '\\'),
|
||||
('丶', "Cjk Unified Ideograph-4E36", '\\'),
|
||||
('⼂', "Kangxi Radical Dot", '\\'),
|
||||
|
||||
('ꝸ', "Latin Small Letter Um", '&'),
|
||||
('&', "Fullwidth Ampersand", '&'),
|
||||
|
||||
('᛭', "Runic Cros Punctuation", '+'),
|
||||
('➕', "Heavy Plus Sign", '+'),
|
||||
('𐊛', "Lycian Letter H", '+'),
|
||||
('﬩', "Hebrew Letter Alternative Plus Sign", '+'),
|
||||
('+', "Fullwidth Plus Sign", '+'),
|
||||
|
||||
('‹', "Single Left-Pointing Angle Quotation Mark", '<'),
|
||||
('❮', "Heavy Left-Pointing Angle Quotation Mark Ornament", '<'),
|
||||
('˂', "Modifier Letter Left Arrowhead", '<'),
|
||||
('𝈶', "Greek Instrumental Symbol-40", '<'),
|
||||
('ᐸ', "Canadian Syllabics Pa", '<'),
|
||||
('ᚲ', "Runic Letter Kauna", '<'),
|
||||
('❬', "Medium Left-Pointing Angle Bracket Ornament", '<'),
|
||||
('⟨', "Mathematical Left Angle Bracket", '<'),
|
||||
('〈', "Left-Pointing Angle Bracket", '<'),
|
||||
('〈', "Left Angle Bracket", '<'),
|
||||
('㇛', "CJK Stroke Pd", '<'),
|
||||
('く', "Hiragana Letter Ku", '<'),
|
||||
('𡿨', "CJK Unified Ideograph-21FE8", '<'),
|
||||
('《', "Left Double Angle Bracket", '<'),
|
||||
('<', "Fullwidth Less-Than Sign", '<'),
|
||||
|
||||
('᐀', "Canadian Syllabics Hyphen", '='),
|
||||
('⹀', "Double Hyphen", '='),
|
||||
('゠', "Katakana-Hiragana Double Hyphen", '='),
|
||||
('꓿', "Lisu Punctuation Full Stop", '='),
|
||||
('=', "Fullwidth Equals Sign", '='),
|
||||
|
||||
('›', "Single Right-Pointing Angle Quotation Mark", '>'),
|
||||
('❯', "Heavy Right-Pointing Angle Quotation Mark Ornament", '>'),
|
||||
('˃', "Modifier Letter Right Arrowhead", '>'),
|
||||
('𝈷', "Greek Instrumental Symbol-42", '>'),
|
||||
('ᐳ', "Canadian Syllabics Po", '>'),
|
||||
('𖼿', "Miao Letter Archaic Zza", '>'),
|
||||
('❭', "Medium Right-Pointing Angle Bracket Ornament", '>'),
|
||||
('⟩', "Mathematical Right Angle Bracket", '>'),
|
||||
('〉', "Right-Pointing Angle Bracket", '>'),
|
||||
('〉', "Right Angle Bracket", '>'),
|
||||
('》', "Right Double Angle Bracket", '>'),
|
||||
('Ⲻ', "Coptic Capital Letter Dialect-P Ni", '-'),
|
||||
('Ɂ', "Latin Capital Letter Glottal Stop", '?'),
|
||||
('Ⳇ', "Coptic Capital Letter Old Coptic Esh", '/'), ];
|
||||
('>', "Fullwidth Greater-Than Sign", '>'), ];
|
||||
|
||||
|
||||
const ASCII_ARRAY: &'static [(char, &'static str)] = &[
|
||||
(' ', "Space"),
|
||||
|
|
Loading…
Add table
Reference in a new issue