Support unicode in string literals.
This commit is contained in:
parent
b0baa3d06e
commit
61f642f6f8
8 changed files with 52 additions and 51 deletions
6
Cargo.lock
generated
6
Cargo.lock
generated
|
@ -8,6 +8,7 @@ dependencies = [
|
|||
"strings 0.0.1 (git+https://github.com/nrc/strings.rs.git)",
|
||||
"term 0.2.11 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"toml 0.1.21 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"unicode-segmentation 0.1.2 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
|
@ -87,6 +88,11 @@ dependencies = [
|
|||
"rustc-serialize 0.3.15 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "unicode-segmentation"
|
||||
version = "0.1.2"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
|
||||
[[package]]
|
||||
name = "winapi"
|
||||
version = "0.2.2"
|
||||
|
|
|
@ -15,6 +15,8 @@ git = "https://github.com/nrc/strings.rs.git"
|
|||
[dependencies]
|
||||
toml = "0.1.20"
|
||||
rustc-serialize = "0.3.14"
|
||||
unicode-segmentation = "0.1.2"
|
||||
regex = "0.1.41"
|
||||
|
||||
[dev-dependencies]
|
||||
diff = "0.1.0"
|
||||
|
|
10
src/expr.rs
10
src/expr.rs
|
@ -28,8 +28,8 @@ impl Rewrite for ast::Expr {
|
|||
match self.node {
|
||||
ast::Expr_::ExprLit(ref l) => {
|
||||
match l.node {
|
||||
ast::Lit_::LitStr(ref is, ast::StrStyle::CookedStr) => {
|
||||
rewrite_string_lit(context, &is, l.span, width, offset)
|
||||
ast::Lit_::LitStr(_, ast::StrStyle::CookedStr) => {
|
||||
rewrite_string_lit(context, l.span, width, offset)
|
||||
}
|
||||
_ => Some(context.snippet(self.span)),
|
||||
}
|
||||
|
@ -823,7 +823,6 @@ fn rewrite_pat_expr(context: &RewriteContext,
|
|||
}
|
||||
|
||||
fn rewrite_string_lit(context: &RewriteContext,
|
||||
s: &str,
|
||||
span: Span,
|
||||
width: usize,
|
||||
offset: usize)
|
||||
|
@ -842,7 +841,10 @@ fn rewrite_string_lit(context: &RewriteContext,
|
|||
trim_end: false,
|
||||
};
|
||||
|
||||
Some(rewrite_string(&s.escape_default(), &fmt))
|
||||
let string_lit = context.snippet(span);
|
||||
let str_lit = &string_lit[1..string_lit.len() - 1]; // Remove the quote characters.
|
||||
|
||||
Some(rewrite_string(str_lit, &fmt))
|
||||
}
|
||||
|
||||
fn rewrite_call(context: &RewriteContext,
|
||||
|
|
|
@ -9,8 +9,6 @@
|
|||
// except according to those terms.
|
||||
|
||||
#![feature(rustc_private)]
|
||||
#![feature(str_escape)]
|
||||
#![feature(str_char)]
|
||||
#![feature(custom_attribute)]
|
||||
#![allow(unused_attributes)]
|
||||
|
||||
|
@ -30,6 +28,9 @@ extern crate rustc_serialize;
|
|||
|
||||
extern crate strings;
|
||||
|
||||
extern crate unicode_segmentation;
|
||||
extern crate regex;
|
||||
|
||||
use rustc::session::Session;
|
||||
use rustc::session::config as rustc_config;
|
||||
use rustc::session::config::Input;
|
||||
|
|
|
@ -10,7 +10,12 @@
|
|||
|
||||
// Format string literals.
|
||||
|
||||
use utils::{make_indent, next_char, prev_char, round_up_to_power_of_two};
|
||||
|
||||
|
||||
use unicode_segmentation::UnicodeSegmentation;
|
||||
use regex::Regex;
|
||||
|
||||
use utils::{make_indent, round_up_to_power_of_two};
|
||||
|
||||
use MIN_STRING;
|
||||
|
||||
|
@ -26,8 +31,12 @@ pub struct StringFormat<'a> {
|
|||
|
||||
// TODO: simplify this!
|
||||
pub fn rewrite_string<'a>(s: &str, fmt: &StringFormat<'a>) -> String {
|
||||
// FIXME I bet this stomps unicode escapes in the source string
|
||||
// TODO if lo.col > IDEAL - 10, start a new line (need cur indent for that)
|
||||
// Strip line breaks.
|
||||
let re = Regex::new(r"(\\[:space:]+)").unwrap();
|
||||
let stripped_str = re.replace_all(s, "");
|
||||
|
||||
let graphemes = UnicodeSegmentation::graphemes(&*stripped_str, false).collect::<Vec<&str>>();
|
||||
|
||||
let indent = make_indent(fmt.offset);
|
||||
let indent = &indent;
|
||||
|
@ -39,41 +48,36 @@ pub fn rewrite_string<'a>(s: &str, fmt: &StringFormat<'a>) -> String {
|
|||
let ender_length = fmt.line_end.len();
|
||||
let max_chars = fmt.width.checked_sub(fmt.opener.len()).unwrap_or(0)
|
||||
.checked_sub(ender_length).unwrap_or(1);
|
||||
|
||||
loop {
|
||||
let mut cur_end = cur_start + max_chars;
|
||||
|
||||
if cur_end >= s.len() {
|
||||
result.push_str(&s[cur_start..]);
|
||||
if cur_end >= graphemes.len() {
|
||||
let line = &graphemes[cur_start..].join("");
|
||||
result.push_str(line);
|
||||
break;
|
||||
}
|
||||
|
||||
// Make sure we're on a char boundary.
|
||||
cur_end = next_char(&s, cur_end);
|
||||
|
||||
// Push cur_end left until we reach whitespace.
|
||||
while !s.char_at(cur_end - 1).is_whitespace() {
|
||||
cur_end = prev_char(&s, cur_end);
|
||||
|
||||
while !(graphemes[cur_end - 1].trim().len() == 0) {
|
||||
cur_end -= 1;
|
||||
if cur_end - cur_start < MIN_STRING {
|
||||
// We can't break at whitespace, fall back to splitting
|
||||
// anywhere that doesn't break an escape sequence.
|
||||
cur_end = next_char(&s, cur_start + max_chars);
|
||||
while s.char_at(prev_char(&s, cur_end)) == '\\' {
|
||||
cur_end = prev_char(&s, cur_end);
|
||||
cur_end = cur_start + max_chars;
|
||||
while graphemes[cur_end - 1] == "\\" {
|
||||
cur_end -= 1;
|
||||
}
|
||||
break;
|
||||
}
|
||||
}
|
||||
// Make sure there is no whitespace to the right of the break.
|
||||
while cur_end < s.len() && s.char_at(cur_end).is_whitespace() {
|
||||
cur_end = next_char(&s, cur_end + 1);
|
||||
while cur_end < s.len() && graphemes[cur_end].trim().len() == 0 {
|
||||
cur_end += 1;
|
||||
}
|
||||
|
||||
let raw_line = graphemes[cur_start..cur_end].join("");
|
||||
let line: &str = if fmt.trim_end {
|
||||
&s[cur_start..cur_end].trim_right_matches(char::is_whitespace)
|
||||
&(raw_line.trim())
|
||||
} else {
|
||||
&s[cur_start..cur_end]
|
||||
&raw_line
|
||||
};
|
||||
|
||||
result.push_str(line);
|
||||
|
|
25
src/utils.rs
25
src/utils.rs
|
@ -32,31 +32,6 @@ pub fn span_after(original: Span, needle: &str, codemap: &CodeMap) -> BytePos {
|
|||
original.lo + BytePos(snippet.find_uncommented(needle).unwrap() as u32 + 1)
|
||||
}
|
||||
|
||||
#[inline]
|
||||
pub fn prev_char(s: &str, mut i: usize) -> usize {
|
||||
if i == 0 {
|
||||
return 0;
|
||||
}
|
||||
|
||||
i -= 1;
|
||||
while !s.is_char_boundary(i) {
|
||||
i -= 1;
|
||||
}
|
||||
i
|
||||
}
|
||||
|
||||
#[inline]
|
||||
pub fn next_char(s: &str, mut i: usize) -> usize {
|
||||
if i >= s.len() {
|
||||
return s.len();
|
||||
}
|
||||
|
||||
while !s.is_char_boundary(i) {
|
||||
i += 1;
|
||||
}
|
||||
i
|
||||
}
|
||||
|
||||
#[inline]
|
||||
pub fn make_indent(width: usize) -> String {
|
||||
let mut indent = String::with_capacity(width);
|
||||
|
|
|
@ -25,5 +25,10 @@ formatting"#;
|
|||
let xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx =
|
||||
funktion("yyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyy");
|
||||
|
||||
let unicode = "a̐éö̲\r\n";
|
||||
let unicode2 = "Löwe 老虎 Léopard";
|
||||
let unicode3 = "中华Việt Nam";
|
||||
let unicode4 = "☃☃☃☃☃☃☃☃☃☃☃☃☃☃☃☃☃☃☃☃☃☃☃☃☃☃☃☃☃☃☃☃☃☃☃☃☃☃☃☃☃☃☃☃☃☃☃☃☃☃☃☃☃☃☃☃☃☃☃☃☃☃☃☃☃☃☃☃☃☃☃☃☃☃☃☃☃☃☃☃☃☃☃☃☃☃☃☃☃☃☃☃☃☃☃☃☃☃☃☃☃☃☃☃☃☃☃☃☃☃☃☃☃☃☃☃☃☃☃☃";
|
||||
|
||||
"stuff"
|
||||
}
|
||||
|
|
|
@ -30,5 +30,11 @@ formatting"#;
|
|||
yyyyyyyyyyyyyyyyyyyyy\
|
||||
yyyyyyyyyy");
|
||||
|
||||
let unicode = "a̐éö̲\r\n";
|
||||
let unicode2 = "Löwe 老虎 Léopard";
|
||||
let unicode3 = "中华Việt Nam";
|
||||
let unicode4 = "☃☃☃☃☃☃☃☃☃☃☃☃☃☃☃☃☃☃☃☃☃☃☃☃☃☃☃☃☃☃☃☃☃☃☃☃☃☃☃☃☃☃☃☃☃☃☃☃☃☃☃☃☃☃☃☃☃☃☃☃☃☃☃☃☃☃☃☃☃☃☃☃☃☃☃☃☃☃\
|
||||
☃☃☃☃☃☃☃☃☃☃☃☃☃☃☃☃☃☃☃☃☃☃☃☃☃☃☃☃☃☃☃☃☃☃☃☃☃☃☃☃☃☃";
|
||||
|
||||
"stuff"
|
||||
}
|
||||
|
|
Loading…
Add table
Reference in a new issue