initial step towards implementing C string literals
This commit is contained in:
parent
7b99493492
commit
8ff3903643
17 changed files with 312 additions and 82 deletions
|
@ -1814,6 +1814,8 @@ pub enum LitKind {
|
|||
/// A byte string (`b"foo"`). Not stored as a symbol because it might be
|
||||
/// non-utf8, and symbols only allow utf8 strings.
|
||||
ByteStr(Lrc<[u8]>, StrStyle),
|
||||
/// A C String (`c"foo"`).
|
||||
CStr(Lrc<[u8]>, StrStyle),
|
||||
/// A byte char (`b'f'`).
|
||||
Byte(u8),
|
||||
/// A character literal (`'a'`).
|
||||
|
@ -1868,6 +1870,7 @@ impl LitKind {
|
|||
// unsuffixed variants
|
||||
LitKind::Str(..)
|
||||
| LitKind::ByteStr(..)
|
||||
| LitKind::CStr(..)
|
||||
| LitKind::Byte(..)
|
||||
| LitKind::Char(..)
|
||||
| LitKind::Int(_, LitIntType::Unsuffixed)
|
||||
|
|
|
@ -74,6 +74,8 @@ pub enum LitKind {
|
|||
StrRaw(u8), // raw string delimited by `n` hash symbols
|
||||
ByteStr,
|
||||
ByteStrRaw(u8), // raw byte string delimited by `n` hash symbols
|
||||
CStr,
|
||||
CStrRaw(u8),
|
||||
Err,
|
||||
}
|
||||
|
||||
|
@ -141,6 +143,10 @@ impl fmt::Display for Lit {
|
|||
delim = "#".repeat(n as usize),
|
||||
string = symbol
|
||||
)?,
|
||||
CStr => write!(f, "c\"{symbol}\"")?,
|
||||
CStrRaw(n) => {
|
||||
write!(f, "cr{delim}\"{symbol}\"{delim}", delim = "#".repeat(n as usize))?
|
||||
}
|
||||
Integer | Float | Bool | Err => write!(f, "{symbol}")?,
|
||||
}
|
||||
|
||||
|
@ -170,6 +176,7 @@ impl LitKind {
|
|||
Float => "float",
|
||||
Str | StrRaw(..) => "string",
|
||||
ByteStr | ByteStrRaw(..) => "byte string",
|
||||
CStr | CStrRaw(..) => "C string",
|
||||
Err => "error",
|
||||
}
|
||||
}
|
||||
|
|
|
@ -2,7 +2,10 @@
|
|||
|
||||
use crate::ast::{self, LitKind, MetaItemLit, StrStyle};
|
||||
use crate::token::{self, Token};
|
||||
use rustc_lexer::unescape::{byte_from_char, unescape_byte, unescape_char, unescape_literal, Mode};
|
||||
use rustc_lexer::unescape::{
|
||||
byte_from_char, unescape_byte, unescape_c_string, unescape_char, unescape_literal, CStrUnit,
|
||||
Mode,
|
||||
};
|
||||
use rustc_span::symbol::{kw, sym, Symbol};
|
||||
use rustc_span::Span;
|
||||
use std::{ascii, fmt, str};
|
||||
|
@ -158,6 +161,52 @@ impl LitKind {
|
|||
|
||||
LitKind::ByteStr(bytes.into(), StrStyle::Raw(n))
|
||||
}
|
||||
token::CStr => {
|
||||
let s = symbol.as_str();
|
||||
let mut buf = Vec::with_capacity(s.len());
|
||||
let mut error = Ok(());
|
||||
unescape_c_string(s, Mode::CStr, &mut |span, c| match c {
|
||||
Ok(CStrUnit::Byte(0) | CStrUnit::Char('\0')) => {
|
||||
error = Err(LitError::NulInCStr(span));
|
||||
}
|
||||
Ok(CStrUnit::Byte(b)) => buf.push(b),
|
||||
Ok(CStrUnit::Char(c)) if c.len_utf8() == 1 => buf.push(c as u8),
|
||||
Ok(CStrUnit::Char(c)) => {
|
||||
buf.extend_from_slice(c.encode_utf8(&mut [0; 4]).as_bytes())
|
||||
}
|
||||
Err(err) => {
|
||||
if err.is_fatal() {
|
||||
error = Err(LitError::LexerError);
|
||||
}
|
||||
}
|
||||
});
|
||||
error?;
|
||||
buf.push(b'\0');
|
||||
LitKind::CStr(buf.into(), StrStyle::Cooked)
|
||||
}
|
||||
token::CStrRaw(n) => {
|
||||
let s = symbol.as_str();
|
||||
let mut buf = Vec::with_capacity(s.len());
|
||||
let mut error = Ok(());
|
||||
unescape_c_string(s, Mode::RawCStr, &mut |span, c| match c {
|
||||
Ok(CStrUnit::Byte(0) | CStrUnit::Char('\0')) => {
|
||||
error = Err(LitError::NulInCStr(span));
|
||||
}
|
||||
Ok(CStrUnit::Byte(b)) => buf.push(b),
|
||||
Ok(CStrUnit::Char(c)) if c.len_utf8() == 1 => buf.push(c as u8),
|
||||
Ok(CStrUnit::Char(c)) => {
|
||||
buf.extend_from_slice(c.encode_utf8(&mut [0; 4]).as_bytes())
|
||||
}
|
||||
Err(err) => {
|
||||
if err.is_fatal() {
|
||||
error = Err(LitError::LexerError);
|
||||
}
|
||||
}
|
||||
});
|
||||
error?;
|
||||
buf.push(b'\0');
|
||||
LitKind::CStr(buf.into(), StrStyle::Raw(n))
|
||||
}
|
||||
token::Err => LitKind::Err,
|
||||
})
|
||||
}
|
||||
|
@ -191,6 +240,8 @@ impl fmt::Display for LitKind {
|
|||
string = symbol
|
||||
)?;
|
||||
}
|
||||
// TODO need to reescape
|
||||
LitKind::CStr(..) => todo!(),
|
||||
LitKind::Int(n, ty) => {
|
||||
write!(f, "{n}")?;
|
||||
match ty {
|
||||
|
@ -237,6 +288,8 @@ impl MetaItemLit {
|
|||
LitKind::Str(_, ast::StrStyle::Raw(n)) => token::StrRaw(n),
|
||||
LitKind::ByteStr(_, ast::StrStyle::Cooked) => token::ByteStr,
|
||||
LitKind::ByteStr(_, ast::StrStyle::Raw(n)) => token::ByteStrRaw(n),
|
||||
LitKind::CStr(_, ast::StrStyle::Cooked) => token::CStr,
|
||||
LitKind::CStr(_, ast::StrStyle::Raw(n)) => token::CStrRaw(n),
|
||||
LitKind::Byte(_) => token::Byte,
|
||||
LitKind::Char(_) => token::Char,
|
||||
LitKind::Int(..) => token::Integer,
|
||||
|
|
|
@ -210,6 +210,8 @@ pub fn literal_to_string(lit: token::Lit) -> String {
|
|||
token::ByteStrRaw(n) => {
|
||||
format!("br{delim}\"{string}\"{delim}", delim = "#".repeat(n as usize), string = symbol)
|
||||
}
|
||||
// TODO
|
||||
token::CStr | token::CStrRaw(_) => todo!(),
|
||||
token::Integer | token::Float | token::Bool | token::Err => symbol.to_string(),
|
||||
};
|
||||
|
||||
|
|
|
@ -32,6 +32,10 @@ pub fn expand_concat(
|
|||
Ok(ast::LitKind::Bool(b)) => {
|
||||
accumulator.push_str(&b.to_string());
|
||||
}
|
||||
Ok(ast::LitKind::CStr(..)) => {
|
||||
cx.span_err(e.span, "cannot concatenate a C string literal");
|
||||
has_errors = true;
|
||||
}
|
||||
Ok(ast::LitKind::Byte(..) | ast::LitKind::ByteStr(..)) => {
|
||||
cx.emit_err(errors::ConcatBytestr { span: e.span });
|
||||
has_errors = true;
|
||||
|
|
|
@ -18,6 +18,10 @@ fn invalid_type_err(
|
|||
};
|
||||
let snippet = cx.sess.source_map().span_to_snippet(span).ok();
|
||||
match ast::LitKind::from_token_lit(token_lit) {
|
||||
Ok(ast::LitKind::CStr(_, _)) => {
|
||||
// TODO
|
||||
cx.span_err(span, "cannot concatenate C string litearls");
|
||||
}
|
||||
Ok(ast::LitKind::Char(_)) => {
|
||||
let sugg =
|
||||
snippet.map(|snippet| ConcatBytesInvalidSuggestion::CharLit { span, snippet });
|
||||
|
|
|
@ -61,6 +61,8 @@ impl FromInternal<token::LitKind> for LitKind {
|
|||
token::StrRaw(n) => LitKind::StrRaw(n),
|
||||
token::ByteStr => LitKind::ByteStr,
|
||||
token::ByteStrRaw(n) => LitKind::ByteStrRaw(n),
|
||||
// TODO
|
||||
token::CStr | token::CStrRaw(_) => todo!(),
|
||||
token::Err => LitKind::Err,
|
||||
token::Bool => unreachable!(),
|
||||
}
|
||||
|
@ -436,6 +438,8 @@ impl server::FreeFunctions for Rustc<'_, '_> {
|
|||
| token::LitKind::StrRaw(_)
|
||||
| token::LitKind::ByteStr
|
||||
| token::LitKind::ByteStrRaw(_)
|
||||
| token::LitKind::CStr
|
||||
| token::LitKind::CStrRaw(_)
|
||||
| token::LitKind::Err => return Err(()),
|
||||
token::LitKind::Integer | token::LitKind::Float => {}
|
||||
}
|
||||
|
|
|
@ -332,6 +332,7 @@ language_item_table! {
|
|||
RangeTo, sym::RangeTo, range_to_struct, Target::Struct, GenericRequirement::None;
|
||||
|
||||
String, sym::String, string, Target::Struct, GenericRequirement::None;
|
||||
CStr, sym::CStr, c_str, Target::Struct, GenericRequirement::None;
|
||||
}
|
||||
|
||||
pub enum GenericRequirement {
|
||||
|
|
|
@ -1300,6 +1300,11 @@ impl<'a, 'tcx> FnCtxt<'a, 'tcx> {
|
|||
opt_ty.unwrap_or_else(|| self.next_float_var())
|
||||
}
|
||||
ast::LitKind::Bool(_) => tcx.types.bool,
|
||||
ast::LitKind::CStr(_, _) => tcx.mk_imm_ref(
|
||||
tcx.lifetimes.re_static,
|
||||
tcx.type_of(tcx.require_lang_item(hir::LangItem::CStr, Some(lit.span)))
|
||||
.skip_binder(),
|
||||
),
|
||||
ast::LitKind::Err => tcx.ty_error_misc(),
|
||||
}
|
||||
}
|
||||
|
|
|
@ -186,12 +186,16 @@ pub enum LiteralKind {
|
|||
Str { terminated: bool },
|
||||
/// "b"abc"", "b"abc"
|
||||
ByteStr { terminated: bool },
|
||||
/// `c"abc"`, `c"abc`
|
||||
CStr { terminated: bool },
|
||||
/// "r"abc"", "r#"abc"#", "r####"ab"###"c"####", "r#"a". `None` indicates
|
||||
/// an invalid literal.
|
||||
RawStr { n_hashes: Option<u8> },
|
||||
/// "br"abc"", "br#"abc"#", "br####"ab"###"c"####", "br#"a". `None`
|
||||
/// indicates an invalid literal.
|
||||
RawByteStr { n_hashes: Option<u8> },
|
||||
/// `cr"abc"`, "cr#"abc"#", `cr#"a`. `None` is invalid.
|
||||
RawCStr { n_hashes: Option<u8> },
|
||||
}
|
||||
|
||||
#[derive(Clone, Copy, Debug, PartialEq, Eq, PartialOrd, Ord)]
|
||||
|
@ -391,6 +395,32 @@ impl Cursor<'_> {
|
|||
_ => self.ident_or_unknown_prefix(),
|
||||
},
|
||||
|
||||
// TODO deduplicate this code
|
||||
// c-string literal, raw c-string literal or identifier.
|
||||
'c' => match (self.first(), self.second()) {
|
||||
('"', _) => {
|
||||
self.bump();
|
||||
let terminated = self.double_quoted_string();
|
||||
let suffix_start = self.pos_within_token();
|
||||
if terminated {
|
||||
self.eat_literal_suffix();
|
||||
}
|
||||
let kind = CStr { terminated };
|
||||
Literal { kind, suffix_start }
|
||||
}
|
||||
('r', '"') | ('r', '#') => {
|
||||
self.bump();
|
||||
let res = self.raw_double_quoted_string(2);
|
||||
let suffix_start = self.pos_within_token();
|
||||
if res.is_ok() {
|
||||
self.eat_literal_suffix();
|
||||
}
|
||||
let kind = RawCStr { n_hashes: res.ok() };
|
||||
Literal { kind, suffix_start }
|
||||
}
|
||||
_ => self.ident_or_unknown_prefix(),
|
||||
},
|
||||
|
||||
// Identifier (this should be checked after other variant that can
|
||||
// start as identifier).
|
||||
c if is_id_start(c) => self.ident_or_unknown_prefix(),
|
||||
|
|
|
@ -90,6 +90,39 @@ where
|
|||
Mode::RawStr | Mode::RawByteStr => {
|
||||
unescape_raw_str_or_raw_byte_str(src, mode == Mode::RawByteStr, callback)
|
||||
}
|
||||
Mode::CStr | Mode::RawCStr => unreachable!(),
|
||||
}
|
||||
}
|
||||
|
||||
pub enum CStrUnit {
|
||||
Byte(u8),
|
||||
Char(char),
|
||||
}
|
||||
|
||||
impl From<u8> for CStrUnit {
|
||||
fn from(value: u8) -> Self {
|
||||
CStrUnit::Byte(value)
|
||||
}
|
||||
}
|
||||
|
||||
impl From<char> for CStrUnit {
|
||||
fn from(value: char) -> Self {
|
||||
CStrUnit::Char(value)
|
||||
}
|
||||
}
|
||||
|
||||
pub fn unescape_c_string<F>(src: &str, mode: Mode, callback: &mut F)
|
||||
where
|
||||
F: FnMut(Range<usize>, Result<CStrUnit, EscapeError>),
|
||||
{
|
||||
if mode == Mode::RawCStr {
|
||||
unescape_raw_str_or_raw_byte_str(
|
||||
src,
|
||||
mode.characters_should_be_ascii(),
|
||||
&mut |r, result| callback(r, result.map(CStrUnit::Char)),
|
||||
);
|
||||
} else {
|
||||
unescape_str_common(src, mode, callback);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -114,19 +147,26 @@ pub enum Mode {
|
|||
ByteStr,
|
||||
RawStr,
|
||||
RawByteStr,
|
||||
CStr,
|
||||
RawCStr,
|
||||
}
|
||||
|
||||
impl Mode {
|
||||
pub fn in_double_quotes(self) -> bool {
|
||||
match self {
|
||||
Mode::Str | Mode::ByteStr | Mode::RawStr | Mode::RawByteStr => true,
|
||||
Mode::Str
|
||||
| Mode::ByteStr
|
||||
| Mode::RawStr
|
||||
| Mode::RawByteStr
|
||||
| Mode::CStr
|
||||
| Mode::RawCStr => true,
|
||||
Mode::Char | Mode::Byte => false,
|
||||
}
|
||||
}
|
||||
|
||||
pub fn is_byte(self) -> bool {
|
||||
match self {
|
||||
Mode::Byte | Mode::ByteStr | Mode::RawByteStr => true,
|
||||
Mode::Byte | Mode::ByteStr | Mode::RawByteStr | Mode::CStr | Mode::RawCStr => true,
|
||||
Mode::Char | Mode::Str | Mode::RawStr => false,
|
||||
}
|
||||
}
|
||||
|
@ -163,64 +203,65 @@ fn scan_escape(chars: &mut Chars<'_>, is_byte: bool) -> Result<char, EscapeError
|
|||
value as char
|
||||
}
|
||||
|
||||
'u' => {
|
||||
// We've parsed '\u', now we have to parse '{..}'.
|
||||
|
||||
if chars.next() != Some('{') {
|
||||
return Err(EscapeError::NoBraceInUnicodeEscape);
|
||||
}
|
||||
|
||||
// First character must be a hexadecimal digit.
|
||||
let mut n_digits = 1;
|
||||
let mut value: u32 = match chars.next().ok_or(EscapeError::UnclosedUnicodeEscape)? {
|
||||
'_' => return Err(EscapeError::LeadingUnderscoreUnicodeEscape),
|
||||
'}' => return Err(EscapeError::EmptyUnicodeEscape),
|
||||
c => c.to_digit(16).ok_or(EscapeError::InvalidCharInUnicodeEscape)?,
|
||||
};
|
||||
|
||||
// First character is valid, now parse the rest of the number
|
||||
// and closing brace.
|
||||
loop {
|
||||
match chars.next() {
|
||||
None => return Err(EscapeError::UnclosedUnicodeEscape),
|
||||
Some('_') => continue,
|
||||
Some('}') => {
|
||||
if n_digits > 6 {
|
||||
return Err(EscapeError::OverlongUnicodeEscape);
|
||||
}
|
||||
|
||||
// Incorrect syntax has higher priority for error reporting
|
||||
// than unallowed value for a literal.
|
||||
if is_byte {
|
||||
return Err(EscapeError::UnicodeEscapeInByte);
|
||||
}
|
||||
|
||||
break std::char::from_u32(value).ok_or_else(|| {
|
||||
if value > 0x10FFFF {
|
||||
EscapeError::OutOfRangeUnicodeEscape
|
||||
} else {
|
||||
EscapeError::LoneSurrogateUnicodeEscape
|
||||
}
|
||||
})?;
|
||||
}
|
||||
Some(c) => {
|
||||
let digit: u32 =
|
||||
c.to_digit(16).ok_or(EscapeError::InvalidCharInUnicodeEscape)?;
|
||||
n_digits += 1;
|
||||
if n_digits > 6 {
|
||||
// Stop updating value since we're sure that it's incorrect already.
|
||||
continue;
|
||||
}
|
||||
value = value * 16 + digit;
|
||||
}
|
||||
};
|
||||
}
|
||||
}
|
||||
'u' => scan_unicode(chars, is_byte)?,
|
||||
_ => return Err(EscapeError::InvalidEscape),
|
||||
};
|
||||
Ok(res)
|
||||
}
|
||||
|
||||
fn scan_unicode(chars: &mut Chars<'_>, is_byte: bool) -> Result<char, EscapeError> {
|
||||
// We've parsed '\u', now we have to parse '{..}'.
|
||||
|
||||
if chars.next() != Some('{') {
|
||||
return Err(EscapeError::NoBraceInUnicodeEscape);
|
||||
}
|
||||
|
||||
// First character must be a hexadecimal digit.
|
||||
let mut n_digits = 1;
|
||||
let mut value: u32 = match chars.next().ok_or(EscapeError::UnclosedUnicodeEscape)? {
|
||||
'_' => return Err(EscapeError::LeadingUnderscoreUnicodeEscape),
|
||||
'}' => return Err(EscapeError::EmptyUnicodeEscape),
|
||||
c => c.to_digit(16).ok_or(EscapeError::InvalidCharInUnicodeEscape)?,
|
||||
};
|
||||
|
||||
// First character is valid, now parse the rest of the number
|
||||
// and closing brace.
|
||||
loop {
|
||||
match chars.next() {
|
||||
None => return Err(EscapeError::UnclosedUnicodeEscape),
|
||||
Some('_') => continue,
|
||||
Some('}') => {
|
||||
if n_digits > 6 {
|
||||
return Err(EscapeError::OverlongUnicodeEscape);
|
||||
}
|
||||
|
||||
// Incorrect syntax has higher priority for error reporting
|
||||
// than unallowed value for a literal.
|
||||
if is_byte {
|
||||
return Err(EscapeError::UnicodeEscapeInByte);
|
||||
}
|
||||
|
||||
break std::char::from_u32(value).ok_or_else(|| {
|
||||
if value > 0x10FFFF {
|
||||
EscapeError::OutOfRangeUnicodeEscape
|
||||
} else {
|
||||
EscapeError::LoneSurrogateUnicodeEscape
|
||||
}
|
||||
});
|
||||
}
|
||||
Some(c) => {
|
||||
let digit: u32 = c.to_digit(16).ok_or(EscapeError::InvalidCharInUnicodeEscape)?;
|
||||
n_digits += 1;
|
||||
if n_digits > 6 {
|
||||
// Stop updating value since we're sure that it's incorrect already.
|
||||
continue;
|
||||
}
|
||||
value = value * 16 + digit;
|
||||
}
|
||||
};
|
||||
}
|
||||
}
|
||||
|
||||
#[inline]
|
||||
fn ascii_check(c: char, is_byte: bool) -> Result<char, EscapeError> {
|
||||
if is_byte && !c.is_ascii() {
|
||||
|
@ -266,7 +307,9 @@ where
|
|||
// if unescaped '\' character is followed by '\n'.
|
||||
// For details see [Rust language reference]
|
||||
// (https://doc.rust-lang.org/reference/tokens.html#string-literals).
|
||||
skip_ascii_whitespace(&mut chars, start, callback);
|
||||
skip_ascii_whitespace(&mut chars, start, &mut |range, err| {
|
||||
callback(range, Err(err))
|
||||
});
|
||||
continue;
|
||||
}
|
||||
_ => scan_escape(&mut chars, is_byte),
|
||||
|
@ -281,32 +324,32 @@ where
|
|||
let end = src.len() - chars.as_str().len();
|
||||
callback(start..end, res);
|
||||
}
|
||||
}
|
||||
|
||||
fn skip_ascii_whitespace<F>(chars: &mut Chars<'_>, start: usize, callback: &mut F)
|
||||
where
|
||||
F: FnMut(Range<usize>, Result<char, EscapeError>),
|
||||
{
|
||||
let tail = chars.as_str();
|
||||
let first_non_space = tail
|
||||
.bytes()
|
||||
.position(|b| b != b' ' && b != b'\t' && b != b'\n' && b != b'\r')
|
||||
.unwrap_or(tail.len());
|
||||
if tail[1..first_non_space].contains('\n') {
|
||||
// The +1 accounts for the escaping slash.
|
||||
let end = start + first_non_space + 1;
|
||||
callback(start..end, Err(EscapeError::MultipleSkippedLinesWarning));
|
||||
}
|
||||
let tail = &tail[first_non_space..];
|
||||
if let Some(c) = tail.chars().nth(0) {
|
||||
if c.is_whitespace() {
|
||||
// For error reporting, we would like the span to contain the character that was not
|
||||
// skipped. The +1 is necessary to account for the leading \ that started the escape.
|
||||
let end = start + first_non_space + c.len_utf8() + 1;
|
||||
callback(start..end, Err(EscapeError::UnskippedWhitespaceWarning));
|
||||
}
|
||||
}
|
||||
*chars = tail.chars();
|
||||
fn skip_ascii_whitespace<F>(chars: &mut Chars<'_>, start: usize, callback: &mut F)
|
||||
where
|
||||
F: FnMut(Range<usize>, EscapeError),
|
||||
{
|
||||
let tail = chars.as_str();
|
||||
let first_non_space = tail
|
||||
.bytes()
|
||||
.position(|b| b != b' ' && b != b'\t' && b != b'\n' && b != b'\r')
|
||||
.unwrap_or(tail.len());
|
||||
if tail[1..first_non_space].contains('\n') {
|
||||
// The +1 accounts for the escaping slash.
|
||||
let end = start + first_non_space + 1;
|
||||
callback(start..end, EscapeError::MultipleSkippedLinesWarning);
|
||||
}
|
||||
let tail = &tail[first_non_space..];
|
||||
if let Some(c) = tail.chars().nth(0) {
|
||||
if c.is_whitespace() {
|
||||
// For error reporting, we would like the span to contain the character that was not
|
||||
// skipped. The +1 is necessary to account for the leading \ that started the escape.
|
||||
let end = start + first_non_space + c.len_utf8() + 1;
|
||||
callback(start..end, EscapeError::UnskippedWhitespaceWarning);
|
||||
}
|
||||
}
|
||||
*chars = tail.chars();
|
||||
}
|
||||
|
||||
/// Takes a contents of a string literal (without quotes) and produces a
|
||||
|
|
|
@ -415,6 +415,16 @@ impl<'a> StringReader<'a> {
|
|||
}
|
||||
self.cook_quoted(token::ByteStr, Mode::ByteStr, start, end, 2, 1) // b" "
|
||||
}
|
||||
rustc_lexer::LiteralKind::CStr { terminated } => {
|
||||
if !terminated {
|
||||
self.sess.span_diagnostic.span_fatal_with_code(
|
||||
self.mk_sp(start + BytePos(1), end),
|
||||
"unterminated C string",
|
||||
error_code!(E0767),
|
||||
)
|
||||
}
|
||||
self.cook_c_string(token::CStr, Mode::CStr, start, end, 2, 1) // c" "
|
||||
}
|
||||
rustc_lexer::LiteralKind::RawStr { n_hashes } => {
|
||||
if let Some(n_hashes) = n_hashes {
|
||||
let n = u32::from(n_hashes);
|
||||
|
@ -433,6 +443,15 @@ impl<'a> StringReader<'a> {
|
|||
self.report_raw_str_error(start, 2);
|
||||
}
|
||||
}
|
||||
rustc_lexer::LiteralKind::RawCStr { n_hashes } => {
|
||||
if let Some(n_hashes) = n_hashes {
|
||||
let n = u32::from(n_hashes);
|
||||
let kind = token::CStrRaw(n_hashes);
|
||||
self.cook_c_string(kind, Mode::RawCStr, start, end, 3 + n, 1 + n) // cr##" "##
|
||||
} else {
|
||||
self.report_raw_str_error(start, 2);
|
||||
}
|
||||
}
|
||||
rustc_lexer::LiteralKind::Int { base, empty_int } => {
|
||||
if empty_int {
|
||||
let span = self.mk_sp(start, end);
|
||||
|
@ -692,6 +711,51 @@ impl<'a> StringReader<'a> {
|
|||
(token::Err, self.symbol_from_to(start, end))
|
||||
}
|
||||
}
|
||||
|
||||
fn cook_c_string(
|
||||
&self,
|
||||
kind: token::LitKind,
|
||||
mode: Mode,
|
||||
start: BytePos,
|
||||
end: BytePos,
|
||||
prefix_len: u32,
|
||||
postfix_len: u32,
|
||||
) -> (token::LitKind, Symbol) {
|
||||
let mut has_fatal_err = false;
|
||||
let content_start = start + BytePos(prefix_len);
|
||||
let content_end = end - BytePos(postfix_len);
|
||||
let lit_content = self.str_from_to(content_start, content_end);
|
||||
unescape::unescape_c_string(lit_content, mode, &mut |range, result| {
|
||||
// Here we only check for errors. The actual unescaping is done later.
|
||||
if let Err(err) = result {
|
||||
let span_with_quotes = self.mk_sp(start, end);
|
||||
let (start, end) = (range.start as u32, range.end as u32);
|
||||
let lo = content_start + BytePos(start);
|
||||
let hi = lo + BytePos(end - start);
|
||||
let span = self.mk_sp(lo, hi);
|
||||
if err.is_fatal() {
|
||||
has_fatal_err = true;
|
||||
}
|
||||
emit_unescape_error(
|
||||
&self.sess.span_diagnostic,
|
||||
lit_content,
|
||||
span_with_quotes,
|
||||
span,
|
||||
mode,
|
||||
range,
|
||||
err,
|
||||
);
|
||||
}
|
||||
});
|
||||
|
||||
// We normally exclude the quotes for the symbol, but for errors we
|
||||
// include it because it results in clearer error messages.
|
||||
if !has_fatal_err {
|
||||
(kind, Symbol::intern(lit_content))
|
||||
} else {
|
||||
(token::Err, self.symbol_from_to(start, end))
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
pub fn nfc_normalize(string: &str) -> Symbol {
|
||||
|
|
|
@ -82,6 +82,7 @@ use crate::str;
|
|||
#[cfg_attr(not(test), rustc_diagnostic_item = "CStr")]
|
||||
#[stable(feature = "core_c_str", since = "1.64.0")]
|
||||
#[rustc_has_incoherent_inherent_impls]
|
||||
#[cfg_attr(not(bootstrap), lang = "CStr")]
|
||||
// FIXME:
|
||||
// `fn from` in `impl From<&CStr> for Box<CStr>` current implementation relies
|
||||
// on `CStr` being layout-compatible with `[u8]`.
|
||||
|
|
|
@ -811,7 +811,9 @@ impl<'src> Classifier<'src> {
|
|||
| LiteralKind::Str { .. }
|
||||
| LiteralKind::ByteStr { .. }
|
||||
| LiteralKind::RawStr { .. }
|
||||
| LiteralKind::RawByteStr { .. } => Class::String,
|
||||
| LiteralKind::RawByteStr { .. }
|
||||
| LiteralKind::CStr { .. }
|
||||
| LiteralKind::RawCStr { .. } => Class::String,
|
||||
// Number literals.
|
||||
LiteralKind::Float { .. } | LiteralKind::Int { .. } => Class::Number,
|
||||
},
|
||||
|
|
|
@ -284,6 +284,7 @@ impl<'a> NormalizedPat<'a> {
|
|||
LitKind::Str(sym, _) => Self::LitStr(sym),
|
||||
LitKind::ByteStr(ref bytes, _) => Self::LitBytes(bytes),
|
||||
LitKind::Byte(val) => Self::LitInt(val.into()),
|
||||
LitKind::CStr(ref bytes, _) => Self::LitBytes(bytes),
|
||||
LitKind::Char(val) => Self::LitInt(val.into()),
|
||||
LitKind::Int(val, _) => Self::LitInt(val),
|
||||
LitKind::Bool(val) => Self::LitBool(val),
|
||||
|
|
|
@ -304,6 +304,11 @@ impl<'a, 'tcx> PrintVisitor<'a, 'tcx> {
|
|||
kind!("ByteStr(ref {vec})");
|
||||
chain!(self, "let [{:?}] = **{vec}", vec.value);
|
||||
},
|
||||
LitKind::CStr(ref vec, _) => {
|
||||
bind!(self, vec);
|
||||
kind!("CStr(ref {vec})");
|
||||
chain!(self, "let [{:?}] = **{vec}", vec.value);
|
||||
}
|
||||
LitKind::Str(s, _) => {
|
||||
bind!(self, s);
|
||||
kind!("Str({s}, _)");
|
||||
|
|
|
@ -211,6 +211,7 @@ pub fn lit_to_mir_constant(lit: &LitKind, ty: Option<Ty<'_>>) -> Constant {
|
|||
LitKind::Str(ref is, _) => Constant::Str(is.to_string()),
|
||||
LitKind::Byte(b) => Constant::Int(u128::from(b)),
|
||||
LitKind::ByteStr(ref s, _) => Constant::Binary(Lrc::clone(s)),
|
||||
LitKind::CStr(ref s, _) => Constant::Binary(Lrc::clone(s)),
|
||||
LitKind::Char(c) => Constant::Char(c),
|
||||
LitKind::Int(n, _) => Constant::Int(n),
|
||||
LitKind::Float(ref is, LitFloatType::Suffixed(fty)) => match fty {
|
||||
|
|
Loading…
Add table
Reference in a new issue