Auto merge of #37855 - tbu-:pr_fix_debug_str, r=alexcrichton

Fix `fmt::Debug` for strings, e.g. for Chinese characters

The problem occured due to lines like

```
3400;<CJK Ideograph Extension A, First>;Lo;0;L;;;;;N;;;;;
4DB5;<CJK Ideograph Extension A, Last>;Lo;0;L;;;;;N;;;;;
```

in `UnicodeData.txt`, which the script previously interpreted as two
characters, although it represents the whole range.

Fixes #34318.
This commit is contained in:
bors 2016-11-20 03:13:58 -06:00 committed by GitHub
commit 224f2cec9c
3 changed files with 277 additions and 106 deletions

View file

@ -11,11 +11,16 @@
# except according to those terms.
# This script uses the following Unicode tables:
# - Categories.txt
# - UnicodeData.txt
from collections import namedtuple
import csv
import os
import subprocess
NUM_CODEPOINTS=0x110000
def to_ranges(iter):
current = None
for i in iter:
@ -28,10 +33,10 @@ def to_ranges(iter):
if current is not None:
yield tuple(current)
def get_escaped(dictionary):
for i in range(0x110000):
if dictionary.get(i, "Cn") in "Cc Cf Cs Co Cn Zl Zp Zs".split() and i != ord(' '):
yield i
def get_escaped(codepoints):
for c in codepoints:
if (c.class_ or "Cn") in "Cc Cf Cs Co Cn Zl Zp Zs".split() and c.value != ord(' '):
yield c.value
def get_file(f):
try:
@ -40,10 +45,41 @@ def get_file(f):
subprocess.run(["curl", "-O", f], check=True)
return open(os.path.basename(f))
def main():
file = get_file("http://www.unicode.org/notes/tn36/Categories.txt")
Codepoint = namedtuple('Codepoint', 'value class_')
dictionary = {int(line.split()[0], 16): line.split()[1] for line in file}
def get_codepoints(f):
r = csv.reader(f, delimiter=";")
prev_codepoint = 0
class_first = None
for row in r:
codepoint = int(row[0], 16)
name = row[1]
class_ = row[2]
if class_first is not None:
if not name.endswith("Last>"):
raise ValueError("Missing Last after First")
for c in range(prev_codepoint + 1, codepoint):
yield Codepoint(c, class_first)
class_first = None
if name.endswith("First>"):
class_first = class_
yield Codepoint(codepoint, class_)
prev_codepoint = codepoint
if class_first != None:
raise ValueError("Missing Last after First")
for c in range(prev_codepoint + 1, NUM_CODEPOINTS):
yield Codepoint(c, None)
def main():
file = get_file("http://www.unicode.org/Public/UNIDATA/UnicodeData.txt")
codepoints = get_codepoints(file)
CUTOFF=0x10000
singletons0 = []
@ -52,7 +88,7 @@ def main():
normal1 = []
extra = []
for a, b in to_ranges(get_escaped(dictionary)):
for a, b in to_ranges(get_escaped(codepoints)):
if a > 2 * CUTOFF:
extra.append((a, b - a))
elif a == b - 1:

View file

@ -11,6 +11,8 @@
// NOTE: The following code was generated by "src/etc/char_private.py",
// do not edit directly!
use slice::SliceExt;
fn check(x: u16, singletons: &[u16], normal: &[u16]) -> bool {
for &s in singletons {
if x == s {
@ -42,7 +44,16 @@ pub fn is_printable(x: char) -> bool {
} else if x < 0x20000 {
check(lower, SINGLETONS1, NORMAL1)
} else {
if 0x20000 <= x && x < 0x2f800 {
if 0x2a6d7 <= x && x < 0x2a700 {
return false;
}
if 0x2b735 <= x && x < 0x2b740 {
return false;
}
if 0x2b81e <= x && x < 0x2b820 {
return false;
}
if 0x2cea2 <= x && x < 0x2f800 {
return false;
}
if 0x2fa1e <= x && x < 0xe0100 {
@ -62,10 +73,13 @@ const SINGLETONS0: &'static [u16] = &[
0x38b,
0x38d,
0x3a2,
0x530,
0x557,
0x558,
0x560,
0x588,
0x58b,
0x58c,
0x590,
0x61c,
0x61d,
@ -79,10 +93,8 @@ const SINGLETONS0: &'static [u16] = &[
0x83f,
0x85c,
0x85d,
0x8a1,
0x8ff,
0x978,
0x980,
0x8b5,
0x8e2,
0x984,
0x98d,
0x98e,
@ -154,14 +166,11 @@ const SINGLETONS0: &'static [u16] = &[
0xc0d,
0xc11,
0xc29,
0xc34,
0xc45,
0xc49,
0xc57,
0xc64,
0xc65,
0xc80,
0xc81,
0xc84,
0xc8d,
0xc91,
@ -193,6 +202,8 @@ const SINGLETONS0: &'static [u16] = &[
0xdbf,
0xdd5,
0xdd7,
0xdf0,
0xdf1,
0xe83,
0xe85,
0xe86,
@ -245,6 +256,10 @@ const SINGLETONS0: &'static [u16] = &[
0x1317,
0x135b,
0x135c,
0x13f6,
0x13f7,
0x13fe,
0x13ff,
0x1680,
0x170d,
0x176d,
@ -253,6 +268,7 @@ const SINGLETONS0: &'static [u16] = &[
0x17df,
0x180e,
0x180f,
0x191f,
0x196e,
0x196f,
0x1a1c,
@ -260,6 +276,9 @@ const SINGLETONS0: &'static [u16] = &[
0x1a5f,
0x1a7d,
0x1a7e,
0x1aae,
0x1aaf,
0x1cf7,
0x1f16,
0x1f17,
0x1f1e,
@ -285,7 +304,12 @@ const SINGLETONS0: &'static [u16] = &[
0x2072,
0x2073,
0x208f,
0x2700,
0x23ff,
0x2b74,
0x2b75,
0x2b96,
0x2b97,
0x2bc9,
0x2c2f,
0x2c5f,
0x2d26,
@ -306,8 +330,11 @@ const SINGLETONS0: &'static [u16] = &[
0x318f,
0x321f,
0x32ff,
0xa78f,
0xa7af,
0xa8fe,
0xa8ff,
0xa9ce,
0xa9ff,
0xaa4e,
0xaa4f,
0xaa5a,
@ -317,6 +344,7 @@ const SINGLETONS0: &'static [u16] = &[
0xab0f,
0xab10,
0xab27,
0xab2f,
0xabee,
0xabef,
0xfa6e,
@ -350,7 +378,7 @@ const SINGLETONS1: &'static [u16] = &[
0x3e,
0x4e,
0x4f,
0x31f,
0x18f,
0x39e,
0x49e,
0x49f,
@ -361,6 +389,9 @@ const SINGLETONS1: &'static [u16] = &[
0x83d,
0x83e,
0x856,
0x8f3,
0x9d0,
0x9d1,
0xa04,
0xa14,
0xa18,
@ -368,6 +399,49 @@ const SINGLETONS1: &'static [u16] = &[
0xb57,
0x10bd,
0x1135,
0x11ce,
0x11cf,
0x11e0,
0x1212,
0x1287,
0x1289,
0x128e,
0x129e,
0x1304,
0x130d,
0x130e,
0x1311,
0x1312,
0x1329,
0x1331,
0x1334,
0x133a,
0x133b,
0x1345,
0x1346,
0x1349,
0x134a,
0x134e,
0x134f,
0x1364,
0x1365,
0x145a,
0x145c,
0x15b6,
0x15b7,
0x1c09,
0x1c37,
0x1c90,
0x1c91,
0x1ca8,
0x246f,
0x6a5f,
0x6aee,
0x6aef,
0x6b5a,
0x6b62,
0xbc9a,
0xbc9b,
0xd127,
0xd128,
0xd455,
@ -395,6 +469,14 @@ const SINGLETONS1: &'static [u16] = &[
0xd6a7,
0xd7cc,
0xd7cd,
0xdaa0,
0xe007,
0xe019,
0xe01a,
0xe022,
0xe025,
0xe8c5,
0xe8c6,
0xee04,
0xee20,
0xee23,
@ -429,31 +511,25 @@ const SINGLETONS1: &'static [u16] = &[
0xeeaa,
0xf0af,
0xf0b0,
0xf0bf,
0xf0c0,
0xf0d0,
0xf12f,
0xf336,
0xf3c5,
0xf43f,
0xf441,
0xf4f8,
0xf53e,
0xf53f,
0xf91f,
0xf931,
0xf932,
0xf93f,
];
const NORMAL0: &'static [u16] = &[
0x0, 0x20,
0x7f, 0x22,
0x37f, 0x5,
0x528, 0x9,
0x58b, 0x4,
0x380, 0x4,
0x5c8, 0x8,
0x5eb, 0x5,
0x5f5, 0x11,
0x7b2, 0xe,
0x7fb, 0x5,
0x85f, 0x41,
0x8ad, 0x37,
0x8be, 0x16,
0x9b3, 0x3,
0x9cf, 0x8,
0x9d8, 0x4,
@ -465,7 +541,8 @@ const NORMAL0: &'static [u16] = &[
0xa5f, 0x7,
0xa76, 0xb,
0xad1, 0xf,
0xaf2, 0xf,
0xaf2, 0x7,
0xafa, 0x7,
0xb4e, 0x8,
0xb58, 0x4,
0xb78, 0xa,
@ -478,21 +555,19 @@ const NORMAL0: &'static [u16] = &[
0xbc3, 0x3,
0xbd1, 0x6,
0xbd8, 0xe,
0xbfb, 0x6,
0xbfb, 0x5,
0xc3a, 0x3,
0xc4e, 0x7,
0xc5a, 0x6,
0xc5b, 0x5,
0xc70, 0x8,
0xcce, 0x7,
0xcd7, 0x7,
0xcf3, 0xf,
0xd4f, 0x8,
0xd58, 0x8,
0xd76, 0x3,
0xcf3, 0xe,
0xd50, 0x4,
0xd97, 0x3,
0xdc7, 0x3,
0xdcb, 0x4,
0xde0, 0x12,
0xde0, 0x6,
0xdf5, 0xc,
0xe3b, 0x4,
0xe5c, 0x25,
@ -503,9 +578,8 @@ const NORMAL0: &'static [u16] = &[
0x10c8, 0x5,
0x137d, 0x3,
0x139a, 0x6,
0x13f5, 0xb,
0x169d, 0x3,
0x16f1, 0xf,
0x16f9, 0x7,
0x1715, 0xb,
0x1737, 0x9,
0x1754, 0xc,
@ -516,7 +590,6 @@ const NORMAL0: &'static [u16] = &[
0x1878, 0x8,
0x18ab, 0x5,
0x18f6, 0xa,
0x191d, 0x3,
0x192c, 0x4,
0x193c, 0x4,
0x1941, 0x3,
@ -526,34 +599,34 @@ const NORMAL0: &'static [u16] = &[
0x19db, 0x3,
0x1a8a, 0x6,
0x1a9a, 0x6,
0x1aae, 0x52,
0x1abf, 0x41,
0x1b4c, 0x4,
0x1b7d, 0x3,
0x1bf4, 0x8,
0x1c38, 0x3,
0x1c4a, 0x3,
0x1c80, 0x40,
0x1c89, 0x37,
0x1cc8, 0x8,
0x1cf7, 0x9,
0x1de7, 0x15,
0x1cfa, 0x6,
0x1df6, 0x5,
0x1fff, 0x11,
0x2028, 0x8,
0x205f, 0x11,
0x209d, 0x3,
0x20ba, 0x16,
0x20bf, 0x11,
0x20f1, 0xf,
0x218a, 0x6,
0x23f4, 0xc,
0x218c, 0x4,
0x2427, 0x19,
0x244b, 0x15,
0x2b4d, 0x3,
0x2b5a, 0xa6,
0x2bba, 0x3,
0x2bd2, 0x1a,
0x2bf0, 0x10,
0x2cf4, 0x5,
0x2d28, 0x5,
0x2d68, 0x7,
0x2d71, 0xe,
0x2d97, 0x9,
0x2e3c, 0x44,
0x2e45, 0x3b,
0x2ef4, 0xc,
0x2fd6, 0x1a,
0x2ffc, 0x5,
@ -561,32 +634,28 @@ const NORMAL0: &'static [u16] = &[
0x312e, 0x3,
0x31bb, 0x5,
0x31e4, 0xc,
0x3400, 0x19c0,
0x4e00, 0x5200,
0x4db6, 0xa,
0x9fd6, 0x2a,
0xa48d, 0x3,
0xa4c7, 0x9,
0xa62c, 0x14,
0xa698, 0x7,
0xa6f8, 0x8,
0xa794, 0xc,
0xa7ab, 0x4d,
0xa7b8, 0x3f,
0xa82c, 0x4,
0xa83a, 0x6,
0xa878, 0x8,
0xa8c5, 0x9,
0xa8c6, 0x8,
0xa8da, 0x6,
0xa8fc, 0x4,
0xa954, 0xb,
0xa97d, 0x3,
0xa9da, 0x4,
0xa9e0, 0x20,
0xaa37, 0x9,
0xaa7c, 0x4,
0xaac3, 0x18,
0xaaf7, 0xa,
0xab17, 0x9,
0xab2f, 0x91,
0xabfa, 0x2bb6,
0xab66, 0xa,
0xabfa, 0x6,
0xd7a4, 0xc,
0xd7c7, 0x4,
0xd7fc, 0x2104,
0xfada, 0x26,
@ -596,7 +665,6 @@ const NORMAL0: &'static [u16] = &[
0xfd40, 0x10,
0xfdc8, 0x28,
0xfe1a, 0x6,
0xfe27, 0x9,
0xfe6c, 0x4,
0xfefd, 0x4,
0xffbf, 0x3,
@ -608,61 +676,123 @@ const NORMAL1: &'static [u16] = &[
0xfb, 0x5,
0x103, 0x4,
0x134, 0x3,
0x18b, 0x5,
0x19c, 0x34,
0x19c, 0x4,
0x1a1, 0x2f,
0x1fe, 0x82,
0x29d, 0x3,
0x2d1, 0x2f,
0x2d1, 0xf,
0x2fc, 0x4,
0x324, 0xc,
0x34b, 0x35,
0x34b, 0x5,
0x37b, 0x5,
0x3c4, 0x4,
0x3d6, 0x2a,
0x4aa, 0x356,
0x4aa, 0x6,
0x4d4, 0x4,
0x4fc, 0x4,
0x528, 0x8,
0x564, 0xb,
0x570, 0x90,
0x737, 0x9,
0x756, 0xa,
0x768, 0x98,
0x839, 0x3,
0x860, 0xa0,
0x89f, 0x8,
0x8b0, 0x30,
0x8f6, 0x5,
0x91c, 0x3,
0x93a, 0x5,
0x940, 0x40,
0x9b8, 0x6,
0x9c0, 0x40,
0x9b8, 0x4,
0xa07, 0x5,
0xa34, 0x4,
0xa3b, 0x4,
0xa48, 0x8,
0xa59, 0x7,
0xa80, 0x80,
0xaa0, 0x20,
0xae7, 0x4,
0xaf7, 0x9,
0xb36, 0x3,
0xb73, 0x5,
0xb80, 0x80,
0xc49, 0x217,
0xb92, 0x7,
0xb9d, 0xc,
0xbb0, 0x50,
0xc49, 0x37,
0xcb3, 0xd,
0xcf3, 0x7,
0xd00, 0x160,
0xe7f, 0x181,
0x104e, 0x4,
0x1070, 0x10,
0x1070, 0xf,
0x10c2, 0xe,
0x10e9, 0x7,
0x10fa, 0x6,
0x1144, 0x3c,
0x11c9, 0x7,
0x11da, 0x4a6,
0x1144, 0xc,
0x1177, 0x9,
0x11f5, 0xb,
0x123f, 0x41,
0x12aa, 0x6,
0x12eb, 0x5,
0x12fa, 0x6,
0x1351, 0x6,
0x1358, 0x5,
0x136d, 0x3,
0x1375, 0x8b,
0x145e, 0x22,
0x14c8, 0x8,
0x14da, 0xa6,
0x15de, 0x22,
0x1645, 0xb,
0x165a, 0x6,
0x166d, 0x13,
0x16b8, 0x8,
0x16ca, 0x936,
0x236f, 0x91,
0x2463, 0xd,
0x2474, 0xb8c,
0x342f, 0x33d1,
0x6a39, 0x4c7,
0x16ca, 0x36,
0x171a, 0x3,
0x172c, 0x4,
0x1740, 0x160,
0x18f3, 0xc,
0x1900, 0x1c0,
0x1af9, 0x107,
0x1c46, 0xa,
0x1c6d, 0x3,
0x1cb7, 0x349,
0x239a, 0x66,
0x2475, 0xb,
0x2544, 0xabc,
0x342f, 0xfd1,
0x4647, 0x21b9,
0x6a39, 0x7,
0x6a6a, 0x4,
0x6a70, 0x60,
0x6af6, 0xa,
0x6b46, 0xa,
0x6b78, 0x5,
0x6b90, 0x370,
0x6f45, 0xb,
0x6f7f, 0x10,
0x6fa0, 0x4060,
0xb002, 0x1ffe,
0x6fa0, 0x40,
0x6fe1, 0x1f,
0x87ed, 0x13,
0x8af3, 0x250d,
0xb002, 0xbfe,
0xbc6b, 0x5,
0xbc7d, 0x3,
0xbc89, 0x7,
0xbca0, 0x1360,
0xd0f6, 0xa,
0xd173, 0x8,
0xd1de, 0x22,
0xd1e9, 0x17,
0xd246, 0xba,
0xd357, 0x9,
0xd372, 0x8e,
0xd547, 0x3,
0xd800, 0x1600,
0xda8c, 0xf,
0xdab0, 0x550,
0xe02b, 0x7d5,
0xe8d7, 0x29,
0xe94b, 0x5,
0xe95a, 0x4,
0xe960, 0x4a0,
0xee3c, 0x6,
0xee43, 0x4,
0xee9c, 0x5,
@ -670,24 +800,27 @@ const NORMAL1: &'static [u16] = &[
0xeef2, 0x10e,
0xf02c, 0x4,
0xf094, 0xc,
0xf0e0, 0x20,
0xf10b, 0x5,
0xf0f6, 0xa,
0xf10d, 0x3,
0xf16c, 0x4,
0xf19b, 0x4b,
0xf1ad, 0x39,
0xf203, 0xd,
0xf23b, 0x5,
0xf23c, 0x4,
0xf249, 0x7,
0xf252, 0xae,
0xf321, 0xf,
0xf37d, 0x3,
0xf394, 0xc,
0xf3cb, 0x15,
0xf3f1, 0xf,
0xf4fd, 0x3,
0xf544, 0xc,
0xf568, 0x93,
0xf641, 0x4,
0xf650, 0x30,
0xf6c6, 0x3a,
0xf774, 0x88c,
0xf6d3, 0xd,
0xf6ed, 0x3,
0xf6f7, 0x9,
0xf774, 0xc,
0xf7d5, 0x2b,
0xf80c, 0x4,
0xf848, 0x8,
0xf85a, 0x6,
0xf888, 0x8,
0xf8ae, 0x62,
0xf928, 0x8,
0xf94c, 0x4,
0xf95f, 0x21,
0xf992, 0x2e,
0xf9c1, 0x63f,
];

View file

@ -162,6 +162,8 @@ fn test_escape_debug() {
assert_eq!(s, "~");
let s = string('é');
assert_eq!(s, "é");
let s = string('文');
assert_eq!(s, "");
let s = string('\x00');
assert_eq!(s, "\\u{0}");
let s = string('\x1f');