Refactor cook_lexer_literal.

It deals with eight cases: ints, floats, and the six quoted types
(char/byte/strings). For ints and floats we have an early return, and
the other six types fall through to the code at the end, which makes the
function hard to read.

This commit rearranges things to avoid the early returns.
This commit is contained in:
Nicholas Nethercote 2022-11-04 09:19:34 +11:00
parent a21c045897
commit d963686f5a

View file

@ -363,55 +363,55 @@ impl<'a> StringReader<'a> {
fn cook_lexer_literal( fn cook_lexer_literal(
&self, &self,
start: BytePos, start: BytePos,
suffix_start: BytePos, end: BytePos,
kind: rustc_lexer::LiteralKind, kind: rustc_lexer::LiteralKind,
) -> (token::LitKind, Symbol) { ) -> (token::LitKind, Symbol) {
// prefix means `"` or `br"` or `r###"`, ... match kind {
let (lit_kind, mode, prefix_len, postfix_len) = match kind {
rustc_lexer::LiteralKind::Char { terminated } => { rustc_lexer::LiteralKind::Char { terminated } => {
if !terminated { if !terminated {
self.sess.span_diagnostic.span_fatal_with_code( self.sess.span_diagnostic.span_fatal_with_code(
self.mk_sp(start, suffix_start), self.mk_sp(start, end),
"unterminated character literal", "unterminated character literal",
error_code!(E0762), error_code!(E0762),
) )
} }
(token::Char, Mode::Char, 1, 1) // ' ' self.cook_quoted(token::Char, Mode::Char, start, end, 1, 1) // ' '
} }
rustc_lexer::LiteralKind::Byte { terminated } => { rustc_lexer::LiteralKind::Byte { terminated } => {
if !terminated { if !terminated {
self.sess.span_diagnostic.span_fatal_with_code( self.sess.span_diagnostic.span_fatal_with_code(
self.mk_sp(start + BytePos(1), suffix_start), self.mk_sp(start + BytePos(1), end),
"unterminated byte constant", "unterminated byte constant",
error_code!(E0763), error_code!(E0763),
) )
} }
(token::Byte, Mode::Byte, 2, 1) // b' ' self.cook_quoted(token::Byte, Mode::Byte, start, end, 2, 1) // b' '
} }
rustc_lexer::LiteralKind::Str { terminated } => { rustc_lexer::LiteralKind::Str { terminated } => {
if !terminated { if !terminated {
self.sess.span_diagnostic.span_fatal_with_code( self.sess.span_diagnostic.span_fatal_with_code(
self.mk_sp(start, suffix_start), self.mk_sp(start, end),
"unterminated double quote string", "unterminated double quote string",
error_code!(E0765), error_code!(E0765),
) )
} }
(token::Str, Mode::Str, 1, 1) // " " self.cook_quoted(token::Str, Mode::Str, start, end, 1, 1) // " "
} }
rustc_lexer::LiteralKind::ByteStr { terminated } => { rustc_lexer::LiteralKind::ByteStr { terminated } => {
if !terminated { if !terminated {
self.sess.span_diagnostic.span_fatal_with_code( self.sess.span_diagnostic.span_fatal_with_code(
self.mk_sp(start + BytePos(1), suffix_start), self.mk_sp(start + BytePos(1), end),
"unterminated double quote byte string", "unterminated double quote byte string",
error_code!(E0766), error_code!(E0766),
) )
} }
(token::ByteStr, Mode::ByteStr, 2, 1) // b" " self.cook_quoted(token::ByteStr, Mode::ByteStr, start, end, 2, 1) // b" "
} }
rustc_lexer::LiteralKind::RawStr { n_hashes } => { rustc_lexer::LiteralKind::RawStr { n_hashes } => {
if let Some(n_hashes) = n_hashes { if let Some(n_hashes) = n_hashes {
let n = u32::from(n_hashes); let n = u32::from(n_hashes);
(token::StrRaw(n_hashes), Mode::RawStr, 2 + n, 1 + n) // r##" "## let kind = token::StrRaw(n_hashes);
self.cook_quoted(kind, Mode::RawStr, start, end, 2 + n, 1 + n) // r##" "##
} else { } else {
self.report_raw_str_error(start, 1); self.report_raw_str_error(start, 1);
} }
@ -419,56 +419,47 @@ impl<'a> StringReader<'a> {
rustc_lexer::LiteralKind::RawByteStr { n_hashes } => { rustc_lexer::LiteralKind::RawByteStr { n_hashes } => {
if let Some(n_hashes) = n_hashes { if let Some(n_hashes) = n_hashes {
let n = u32::from(n_hashes); let n = u32::from(n_hashes);
(token::ByteStrRaw(n_hashes), Mode::RawByteStr, 3 + n, 1 + n) // br##" "## let kind = token::ByteStrRaw(n_hashes);
self.cook_quoted(kind, Mode::RawByteStr, start, end, 3 + n, 1 + n) // br##" "##
} else { } else {
self.report_raw_str_error(start, 2); self.report_raw_str_error(start, 2);
} }
} }
rustc_lexer::LiteralKind::Int { base, empty_int } => { rustc_lexer::LiteralKind::Int { base, empty_int } => {
return if empty_int { if empty_int {
self.sess self.sess
.span_diagnostic .span_diagnostic
.struct_span_err_with_code( .struct_span_err_with_code(
self.mk_sp(start, suffix_start), self.mk_sp(start, end),
"no valid digits found for number", "no valid digits found for number",
error_code!(E0768), error_code!(E0768),
) )
.emit(); .emit();
(token::Integer, sym::integer(0)) (token::Integer, sym::integer(0))
} else { } else {
self.validate_int_literal(base, start, suffix_start); self.validate_int_literal(base, start, end);
(token::Integer, self.symbol_from_to(start, suffix_start)) (token::Integer, self.symbol_from_to(start, end))
}; }
} }
rustc_lexer::LiteralKind::Float { base, empty_exponent } => { rustc_lexer::LiteralKind::Float { base, empty_exponent } => {
if empty_exponent { if empty_exponent {
self.err_span_(start, self.pos, "expected at least one digit in exponent"); self.err_span_(start, self.pos, "expected at least one digit in exponent");
} }
match base { match base {
Base::Hexadecimal => self.err_span_( Base::Hexadecimal => {
start, self.err_span_(start, end, "hexadecimal float literal is not supported")
suffix_start, }
"hexadecimal float literal is not supported",
),
Base::Octal => { Base::Octal => {
self.err_span_(start, suffix_start, "octal float literal is not supported") self.err_span_(start, end, "octal float literal is not supported")
} }
Base::Binary => { Base::Binary => {
self.err_span_(start, suffix_start, "binary float literal is not supported") self.err_span_(start, end, "binary float literal is not supported")
} }
_ => (), _ => {}
} }
(token::Float, self.symbol_from_to(start, end))
let id = self.symbol_from_to(start, suffix_start);
return (token::Float, id);
} }
}; }
let content_start = start + BytePos(prefix_len);
let content_end = suffix_start - BytePos(postfix_len);
let id = self.symbol_from_to(content_start, content_end);
self.validate_literal_escape(mode, content_start, content_end, prefix_len, postfix_len);
(lit_kind, id)
} }
#[inline] #[inline]
@ -659,20 +650,22 @@ impl<'a> StringReader<'a> {
) )
} }
fn validate_literal_escape( fn cook_quoted(
&self, &self,
kind: token::LitKind,
mode: Mode, mode: Mode,
content_start: BytePos, start: BytePos,
content_end: BytePos, end: BytePos,
prefix_len: u32, prefix_len: u32,
postfix_len: u32, postfix_len: u32,
) { ) -> (token::LitKind, Symbol) {
let content_start = start + BytePos(prefix_len);
let content_end = end - BytePos(postfix_len);
let lit_content = self.str_from_to(content_start, content_end); let lit_content = self.str_from_to(content_start, content_end);
unescape::unescape_literal(lit_content, mode, &mut |range, result| { unescape::unescape_literal(lit_content, mode, &mut |range, result| {
// Here we only check for errors. The actual unescaping is done later. // Here we only check for errors. The actual unescaping is done later.
if let Err(err) = result { if let Err(err) = result {
let span_with_quotes = self let span_with_quotes = self.mk_sp(start, end);
.mk_sp(content_start - BytePos(prefix_len), content_end + BytePos(postfix_len));
let (start, end) = (range.start as u32, range.end as u32); let (start, end) = (range.start as u32, range.end as u32);
let lo = content_start + BytePos(start); let lo = content_start + BytePos(start);
let hi = lo + BytePos(end - start); let hi = lo + BytePos(end - start);
@ -688,6 +681,7 @@ impl<'a> StringReader<'a> {
); );
} }
}); });
(kind, Symbol::intern(lit_content))
} }
fn validate_int_literal(&self, base: Base, content_start: BytePos, content_end: BytePos) { fn validate_int_literal(&self, base: Base, content_start: BytePos, content_end: BytePos) {