initial step towards implementing C string literals

2023-03-05 15:03:22 +00:00 · 2023-03-05 15:03:22 +00:00 · 8ff3903643
commit 8ff3903643
parent 7b99493492
17 changed files with 312 additions and 82 deletions
--- a/compiler/rustc_ast/src/ast.rs
+++ b/compiler/rustc_ast/src/ast.rs
@ -1814,6 +1814,8 @@ pub enum LitKind {
    /// A byte string (`b"foo"`). Not stored as a symbol because it might be
    /// non-utf8, and symbols only allow utf8 strings.
    ByteStr(Lrc<[u8]>, StrStyle),
+    /// A C String (`c"foo"`).
+    CStr(Lrc<[u8]>, StrStyle),
    /// A byte char (`b'f'`).
    Byte(u8),
    /// A character literal (`'a'`).
@ -1868,6 +1870,7 @@ impl LitKind {
            // unsuffixed variants
            LitKind::Str(..)
            | LitKind::ByteStr(..)
+            | LitKind::CStr(..)
            | LitKind::Byte(..)
            | LitKind::Char(..)
            | LitKind::Int(_, LitIntType::Unsuffixed)
--- a/compiler/rustc_ast/src/token.rs
+++ b/compiler/rustc_ast/src/token.rs
@ -74,6 +74,8 @@ pub enum LitKind {
    StrRaw(u8), // raw string delimited by `n` hash symbols
    ByteStr,
    ByteStrRaw(u8), // raw byte string delimited by `n` hash symbols
+    CStr,
+    CStrRaw(u8),
    Err,
 }

@ -141,6 +143,10 @@ impl fmt::Display for Lit {
                delim = "#".repeat(n as usize),
                string = symbol
            )?,
+            CStr => write!(f, "c\"{symbol}\"")?,
+            CStrRaw(n) => {
+                write!(f, "cr{delim}\"{symbol}\"{delim}", delim = "#".repeat(n as usize))?
+            }
            Integer | Float | Bool | Err => write!(f, "{symbol}")?,
        }

@ -170,6 +176,7 @@ impl LitKind {
            Float => "float",
            Str | StrRaw(..) => "string",
            ByteStr | ByteStrRaw(..) => "byte string",
+            CStr | CStrRaw(..) => "C string",
            Err => "error",
        }
    }
--- a/compiler/rustc_ast/src/util/literal.rs
+++ b/compiler/rustc_ast/src/util/literal.rs
@ -2,7 +2,10 @@

 use crate::ast::{self, LitKind, MetaItemLit, StrStyle};
 use crate::token::{self, Token};
-use rustc_lexer::unescape::{byte_from_char, unescape_byte, unescape_char, unescape_literal, Mode};
+use rustc_lexer::unescape::{
+    byte_from_char, unescape_byte, unescape_c_string, unescape_char, unescape_literal, CStrUnit,
+    Mode,
+};
 use rustc_span::symbol::{kw, sym, Symbol};
 use rustc_span::Span;
 use std::{ascii, fmt, str};
@ -158,6 +161,52 @@ impl LitKind {

                LitKind::ByteStr(bytes.into(), StrStyle::Raw(n))
            }
+            token::CStr => {
+                let s = symbol.as_str();
+                let mut buf = Vec::with_capacity(s.len());
+                let mut error = Ok(());
+                unescape_c_string(s, Mode::CStr, &mut |span, c| match c {
+                    Ok(CStrUnit::Byte(0) | CStrUnit::Char('\0')) => {
+                        error = Err(LitError::NulInCStr(span));
+                    }
+                    Ok(CStrUnit::Byte(b)) => buf.push(b),
+                    Ok(CStrUnit::Char(c)) if c.len_utf8() == 1 => buf.push(c as u8),
+                    Ok(CStrUnit::Char(c)) => {
+                        buf.extend_from_slice(c.encode_utf8(&mut [0; 4]).as_bytes())
+                    }
+                    Err(err) => {
+                        if err.is_fatal() {
+                            error = Err(LitError::LexerError);
+                        }
+                    }
+                });
+                error?;
+                buf.push(b'\0');
+                LitKind::CStr(buf.into(), StrStyle::Cooked)
+            }
+            token::CStrRaw(n) => {
+                let s = symbol.as_str();
+                let mut buf = Vec::with_capacity(s.len());
+                let mut error = Ok(());
+                unescape_c_string(s, Mode::RawCStr, &mut |span, c| match c {
+                    Ok(CStrUnit::Byte(0) | CStrUnit::Char('\0')) => {
+                        error = Err(LitError::NulInCStr(span));
+                    }
+                    Ok(CStrUnit::Byte(b)) => buf.push(b),
+                    Ok(CStrUnit::Char(c)) if c.len_utf8() == 1 => buf.push(c as u8),
+                    Ok(CStrUnit::Char(c)) => {
+                        buf.extend_from_slice(c.encode_utf8(&mut [0; 4]).as_bytes())
+                    }
+                    Err(err) => {
+                        if err.is_fatal() {
+                            error = Err(LitError::LexerError);
+                        }
+                    }
+                });
+                error?;
+                buf.push(b'\0');
+                LitKind::CStr(buf.into(), StrStyle::Raw(n))
+            }
            token::Err => LitKind::Err,
        })
    }
@ -191,6 +240,8 @@ impl fmt::Display for LitKind {
                    string = symbol
                )?;
            }
+            // TODO need to reescape
+            LitKind::CStr(..) => todo!(),
            LitKind::Int(n, ty) => {
                write!(f, "{n}")?;
                match ty {
@ -237,6 +288,8 @@ impl MetaItemLit {
            LitKind::Str(_, ast::StrStyle::Raw(n)) => token::StrRaw(n),
            LitKind::ByteStr(_, ast::StrStyle::Cooked) => token::ByteStr,
            LitKind::ByteStr(_, ast::StrStyle::Raw(n)) => token::ByteStrRaw(n),
+            LitKind::CStr(_, ast::StrStyle::Cooked) => token::CStr,
+            LitKind::CStr(_, ast::StrStyle::Raw(n)) => token::CStrRaw(n),
            LitKind::Byte(_) => token::Byte,
            LitKind::Char(_) => token::Char,
            LitKind::Int(..) => token::Integer,
--- a/compiler/rustc_ast_pretty/src/pprust/state.rs
+++ b/compiler/rustc_ast_pretty/src/pprust/state.rs
@ -210,6 +210,8 @@ pub fn literal_to_string(lit: token::Lit) -> String {
        token::ByteStrRaw(n) => {
            format!("br{delim}\"{string}\"{delim}", delim = "#".repeat(n as usize), string = symbol)
        }
+        // TODO
+        token::CStr | token::CStrRaw(_) => todo!(),
        token::Integer | token::Float | token::Bool | token::Err => symbol.to_string(),
    };

--- a/compiler/rustc_builtin_macros/src/concat.rs
+++ b/compiler/rustc_builtin_macros/src/concat.rs
@ -32,6 +32,10 @@ pub fn expand_concat(
                Ok(ast::LitKind::Bool(b)) => {
                    accumulator.push_str(&b.to_string());
                }
+                Ok(ast::LitKind::CStr(..)) => {
+                    cx.span_err(e.span, "cannot concatenate a C string literal");
+                    has_errors = true;
+                }
                Ok(ast::LitKind::Byte(..) | ast::LitKind::ByteStr(..)) => {
                    cx.emit_err(errors::ConcatBytestr { span: e.span });
                    has_errors = true;
--- a/compiler/rustc_builtin_macros/src/concat_bytes.rs
+++ b/compiler/rustc_builtin_macros/src/concat_bytes.rs
@ -18,6 +18,10 @@ fn invalid_type_err(
    };
    let snippet = cx.sess.source_map().span_to_snippet(span).ok();
    match ast::LitKind::from_token_lit(token_lit) {
+        Ok(ast::LitKind::CStr(_, _)) => {
+            // TODO
+            cx.span_err(span, "cannot concatenate C string litearls");
+        }
        Ok(ast::LitKind::Char(_)) => {
            let sugg =
                snippet.map(|snippet| ConcatBytesInvalidSuggestion::CharLit { span, snippet });
--- a/compiler/rustc_expand/src/proc_macro_server.rs
+++ b/compiler/rustc_expand/src/proc_macro_server.rs
@ -61,6 +61,8 @@ impl FromInternal<token::LitKind> for LitKind {
            token::StrRaw(n) => LitKind::StrRaw(n),
            token::ByteStr => LitKind::ByteStr,
            token::ByteStrRaw(n) => LitKind::ByteStrRaw(n),
+            // TODO
+            token::CStr | token::CStrRaw(_) => todo!(),
            token::Err => LitKind::Err,
            token::Bool => unreachable!(),
        }
@ -436,6 +438,8 @@ impl server::FreeFunctions for Rustc<'_, '_> {
                | token::LitKind::StrRaw(_)
                | token::LitKind::ByteStr
                | token::LitKind::ByteStrRaw(_)
+                | token::LitKind::CStr
+                | token::LitKind::CStrRaw(_)
                | token::LitKind::Err => return Err(()),
                token::LitKind::Integer | token::LitKind::Float => {}
            }
--- a/compiler/rustc_hir/src/lang_items.rs
+++ b/compiler/rustc_hir/src/lang_items.rs
@ -332,6 +332,7 @@ language_item_table! {
    RangeTo,                 sym::RangeTo,             range_to_struct,            Target::Struct,         GenericRequirement::None;

    String,                  sym::String,              string,                     Target::Struct,         GenericRequirement::None;
+    CStr,                    sym::CStr,                c_str,                      Target::Struct,         GenericRequirement::None;
 }

 pub enum GenericRequirement {
--- a/compiler/rustc_hir_typeck/src/fn_ctxt/checks.rs
+++ b/compiler/rustc_hir_typeck/src/fn_ctxt/checks.rs
@ -1300,6 +1300,11 @@ impl<'a, 'tcx> FnCtxt<'a, 'tcx> {
                opt_ty.unwrap_or_else(|| self.next_float_var())
            }
            ast::LitKind::Bool(_) => tcx.types.bool,
+            ast::LitKind::CStr(_, _) => tcx.mk_imm_ref(
+                tcx.lifetimes.re_static,
+                tcx.type_of(tcx.require_lang_item(hir::LangItem::CStr, Some(lit.span)))
+                    .skip_binder(),
+            ),
            ast::LitKind::Err => tcx.ty_error_misc(),
        }
    }
--- a/compiler/rustc_lexer/src/lib.rs
+++ b/compiler/rustc_lexer/src/lib.rs
@ -186,12 +186,16 @@ pub enum LiteralKind {
    Str { terminated: bool },
    /// "b"abc"", "b"abc"
    ByteStr { terminated: bool },
+    /// `c"abc"`, `c"abc`
+    CStr { terminated: bool },
    /// "r"abc"", "r#"abc"#", "r####"ab"###"c"####", "r#"a". `None` indicates
    /// an invalid literal.
    RawStr { n_hashes: Option<u8> },
    /// "br"abc"", "br#"abc"#", "br####"ab"###"c"####", "br#"a". `None`
    /// indicates an invalid literal.
    RawByteStr { n_hashes: Option<u8> },
+    /// `cr"abc"`, "cr#"abc"#", `cr#"a`. `None` is invalid.
+    RawCStr { n_hashes: Option<u8> },
 }

 #[derive(Clone, Copy, Debug, PartialEq, Eq, PartialOrd, Ord)]
@ -391,6 +395,32 @@ impl Cursor<'_> {
                _ => self.ident_or_unknown_prefix(),
            },

+            // TODO deduplicate this code
+            // c-string literal, raw c-string literal or identifier.
+            'c' => match (self.first(), self.second()) {
+                ('"', _) => {
+                    self.bump();
+                    let terminated = self.double_quoted_string();
+                    let suffix_start = self.pos_within_token();
+                    if terminated {
+                        self.eat_literal_suffix();
+                    }
+                    let kind = CStr { terminated };
+                    Literal { kind, suffix_start }
+                }
+                ('r', '"') | ('r', '#') => {
+                    self.bump();
+                    let res = self.raw_double_quoted_string(2);
+                    let suffix_start = self.pos_within_token();
+                    if res.is_ok() {
+                        self.eat_literal_suffix();
+                    }
+                    let kind = RawCStr { n_hashes: res.ok() };
+                    Literal { kind, suffix_start }
+                }
+                _ => self.ident_or_unknown_prefix(),
+            },
+
            // Identifier (this should be checked after other variant that can
            // start as identifier).
            c if is_id_start(c) => self.ident_or_unknown_prefix(),
--- a/compiler/rustc_lexer/src/unescape.rs
+++ b/compiler/rustc_lexer/src/unescape.rs
@ -90,6 +90,39 @@ where
        Mode::RawStr | Mode::RawByteStr => {
            unescape_raw_str_or_raw_byte_str(src, mode == Mode::RawByteStr, callback)
        }
+        Mode::CStr | Mode::RawCStr => unreachable!(),
+    }
+}
+
+pub enum CStrUnit {
+    Byte(u8),
+    Char(char),
+}
+
+impl From<u8> for CStrUnit {
+    fn from(value: u8) -> Self {
+        CStrUnit::Byte(value)
+    }
+}
+
+impl From<char> for CStrUnit {
+    fn from(value: char) -> Self {
+        CStrUnit::Char(value)
+    }
+}
+
+pub fn unescape_c_string<F>(src: &str, mode: Mode, callback: &mut F)
+where
+    F: FnMut(Range<usize>, Result<CStrUnit, EscapeError>),
+{
+    if mode == Mode::RawCStr {
+        unescape_raw_str_or_raw_byte_str(
+            src,
+            mode.characters_should_be_ascii(),
+            &mut |r, result| callback(r, result.map(CStrUnit::Char)),
+        );
+    } else {
+        unescape_str_common(src, mode, callback);
    }
 }

@ -114,19 +147,26 @@ pub enum Mode {
    ByteStr,
    RawStr,
    RawByteStr,
+    CStr,
+    RawCStr,
 }

 impl Mode {
    pub fn in_double_quotes(self) -> bool {
        match self {
-            Mode::Str | Mode::ByteStr | Mode::RawStr | Mode::RawByteStr => true,
+            Mode::Str
+            | Mode::ByteStr
+            | Mode::RawStr
+            | Mode::RawByteStr
+            | Mode::CStr
+            | Mode::RawCStr => true,
            Mode::Char | Mode::Byte => false,
        }
    }

    pub fn is_byte(self) -> bool {
        match self {
-            Mode::Byte | Mode::ByteStr | Mode::RawByteStr => true,
+            Mode::Byte | Mode::ByteStr | Mode::RawByteStr | Mode::CStr | Mode::RawCStr => true,
            Mode::Char | Mode::Str | Mode::RawStr => false,
        }
    }
@ -163,64 +203,65 @@ fn scan_escape(chars: &mut Chars<'_>, is_byte: bool) -> Result<char, EscapeError
            value as char
        }

-        'u' => {
-            // We've parsed '\u', now we have to parse '{..}'.
-
-            if chars.next() != Some('{') {
-                return Err(EscapeError::NoBraceInUnicodeEscape);
-            }
-
-            // First character must be a hexadecimal digit.
-            let mut n_digits = 1;
-            let mut value: u32 = match chars.next().ok_or(EscapeError::UnclosedUnicodeEscape)? {
-                '_' => return Err(EscapeError::LeadingUnderscoreUnicodeEscape),
-                '}' => return Err(EscapeError::EmptyUnicodeEscape),
-                c => c.to_digit(16).ok_or(EscapeError::InvalidCharInUnicodeEscape)?,
-            };
-
-            // First character is valid, now parse the rest of the number
-            // and closing brace.
-            loop {
-                match chars.next() {
-                    None => return Err(EscapeError::UnclosedUnicodeEscape),
-                    Some('_') => continue,
-                    Some('}') => {
-                        if n_digits > 6 {
-                            return Err(EscapeError::OverlongUnicodeEscape);
-                        }
-
-                        // Incorrect syntax has higher priority for error reporting
-                        // than unallowed value for a literal.
-                        if is_byte {
-                            return Err(EscapeError::UnicodeEscapeInByte);
-                        }
-
-                        break std::char::from_u32(value).ok_or_else(|| {
-                            if value > 0x10FFFF {
-                                EscapeError::OutOfRangeUnicodeEscape
-                            } else {
-                                EscapeError::LoneSurrogateUnicodeEscape
-                            }
-                        })?;
-                    }
-                    Some(c) => {
-                        let digit: u32 =
-                            c.to_digit(16).ok_or(EscapeError::InvalidCharInUnicodeEscape)?;
-                        n_digits += 1;
-                        if n_digits > 6 {
-                            // Stop updating value since we're sure that it's incorrect already.
-                            continue;
-                        }
-                        value = value * 16 + digit;
-                    }
-                };
-            }
-        }
+        'u' => scan_unicode(chars, is_byte)?,
        _ => return Err(EscapeError::InvalidEscape),
    };
    Ok(res)
 }

+fn scan_unicode(chars: &mut Chars<'_>, is_byte: bool) -> Result<char, EscapeError> {
+    // We've parsed '\u', now we have to parse '{..}'.
+
+    if chars.next() != Some('{') {
+        return Err(EscapeError::NoBraceInUnicodeEscape);
+    }
+
+    // First character must be a hexadecimal digit.
+    let mut n_digits = 1;
+    let mut value: u32 = match chars.next().ok_or(EscapeError::UnclosedUnicodeEscape)? {
+        '_' => return Err(EscapeError::LeadingUnderscoreUnicodeEscape),
+        '}' => return Err(EscapeError::EmptyUnicodeEscape),
+        c => c.to_digit(16).ok_or(EscapeError::InvalidCharInUnicodeEscape)?,
+    };
+
+    // First character is valid, now parse the rest of the number
+    // and closing brace.
+    loop {
+        match chars.next() {
+            None => return Err(EscapeError::UnclosedUnicodeEscape),
+            Some('_') => continue,
+            Some('}') => {
+                if n_digits > 6 {
+                    return Err(EscapeError::OverlongUnicodeEscape);
+                }
+
+                // Incorrect syntax has higher priority for error reporting
+                // than unallowed value for a literal.
+                if is_byte {
+                    return Err(EscapeError::UnicodeEscapeInByte);
+                }
+
+                break std::char::from_u32(value).ok_or_else(|| {
+                    if value > 0x10FFFF {
+                        EscapeError::OutOfRangeUnicodeEscape
+                    } else {
+                        EscapeError::LoneSurrogateUnicodeEscape
+                    }
+                });
+            }
+            Some(c) => {
+                let digit: u32 = c.to_digit(16).ok_or(EscapeError::InvalidCharInUnicodeEscape)?;
+                n_digits += 1;
+                if n_digits > 6 {
+                    // Stop updating value since we're sure that it's incorrect already.
+                    continue;
+                }
+                value = value * 16 + digit;
+            }
+        };
+    }
+}
+
 #[inline]
 fn ascii_check(c: char, is_byte: bool) -> Result<char, EscapeError> {
    if is_byte && !c.is_ascii() {
@ -266,7 +307,9 @@ where
                        // if unescaped '\' character is followed by '\n'.
                        // For details see [Rust language reference]
                        // (https://doc.rust-lang.org/reference/tokens.html#string-literals).
-                        skip_ascii_whitespace(&mut chars, start, callback);
+                        skip_ascii_whitespace(&mut chars, start, &mut |range, err| {
+                            callback(range, Err(err))
+                        });
                        continue;
                    }
                    _ => scan_escape(&mut chars, is_byte),
@ -281,32 +324,32 @@ where
        let end = src.len() - chars.as_str().len();
        callback(start..end, res);
    }
+}

-    fn skip_ascii_whitespace<F>(chars: &mut Chars<'_>, start: usize, callback: &mut F)
-    where
-        F: FnMut(Range<usize>, Result<char, EscapeError>),
-    {
-        let tail = chars.as_str();
-        let first_non_space = tail
-            .bytes()
-            .position(|b| b != b' ' && b != b'\t' && b != b'\n' && b != b'\r')
-            .unwrap_or(tail.len());
-        if tail[1..first_non_space].contains('\n') {
-            // The +1 accounts for the escaping slash.
-            let end = start + first_non_space + 1;
-            callback(start..end, Err(EscapeError::MultipleSkippedLinesWarning));
-        }
-        let tail = &tail[first_non_space..];
-        if let Some(c) = tail.chars().nth(0) {
-            if c.is_whitespace() {
-                // For error reporting, we would like the span to contain the character that was not
-                // skipped. The +1 is necessary to account for the leading \ that started the escape.
-                let end = start + first_non_space + c.len_utf8() + 1;
-                callback(start..end, Err(EscapeError::UnskippedWhitespaceWarning));
-            }
-        }
-        *chars = tail.chars();
+fn skip_ascii_whitespace<F>(chars: &mut Chars<'_>, start: usize, callback: &mut F)
+where
+    F: FnMut(Range<usize>, EscapeError),
+{
+    let tail = chars.as_str();
+    let first_non_space = tail
+        .bytes()
+        .position(|b| b != b' ' && b != b'\t' && b != b'\n' && b != b'\r')
+        .unwrap_or(tail.len());
+    if tail[1..first_non_space].contains('\n') {
+        // The +1 accounts for the escaping slash.
+        let end = start + first_non_space + 1;
+        callback(start..end, EscapeError::MultipleSkippedLinesWarning);
    }
+    let tail = &tail[first_non_space..];
+    if let Some(c) = tail.chars().nth(0) {
+        if c.is_whitespace() {
+            // For error reporting, we would like the span to contain the character that was not
+            // skipped. The +1 is necessary to account for the leading \ that started the escape.
+            let end = start + first_non_space + c.len_utf8() + 1;
+            callback(start..end, EscapeError::UnskippedWhitespaceWarning);
+        }
+    }
+    *chars = tail.chars();
 }

 /// Takes a contents of a string literal (without quotes) and produces a
--- a/compiler/rustc_parse/src/lexer/mod.rs
+++ b/compiler/rustc_parse/src/lexer/mod.rs
@ -415,6 +415,16 @@ impl<'a> StringReader<'a> {
                }
                self.cook_quoted(token::ByteStr, Mode::ByteStr, start, end, 2, 1) // b" "
            }
+            rustc_lexer::LiteralKind::CStr { terminated } => {
+                if !terminated {
+                    self.sess.span_diagnostic.span_fatal_with_code(
+                        self.mk_sp(start + BytePos(1), end),
+                        "unterminated C string",
+                        error_code!(E0767),
+                    )
+                }
+                self.cook_c_string(token::CStr, Mode::CStr, start, end, 2, 1) // c" "
+            }
            rustc_lexer::LiteralKind::RawStr { n_hashes } => {
                if let Some(n_hashes) = n_hashes {
                    let n = u32::from(n_hashes);
@ -433,6 +443,15 @@ impl<'a> StringReader<'a> {
                    self.report_raw_str_error(start, 2);
                }
            }
+            rustc_lexer::LiteralKind::RawCStr { n_hashes } => {
+                if let Some(n_hashes) = n_hashes {
+                    let n = u32::from(n_hashes);
+                    let kind = token::CStrRaw(n_hashes);
+                    self.cook_c_string(kind, Mode::RawCStr, start, end, 3 + n, 1 + n) // cr##" "##
+                } else {
+                    self.report_raw_str_error(start, 2);
+                }
+            }
            rustc_lexer::LiteralKind::Int { base, empty_int } => {
                if empty_int {
                    let span = self.mk_sp(start, end);
@ -692,6 +711,51 @@ impl<'a> StringReader<'a> {
            (token::Err, self.symbol_from_to(start, end))
        }
    }
+
+    fn cook_c_string(
+        &self,
+        kind: token::LitKind,
+        mode: Mode,
+        start: BytePos,
+        end: BytePos,
+        prefix_len: u32,
+        postfix_len: u32,
+    ) -> (token::LitKind, Symbol) {
+        let mut has_fatal_err = false;
+        let content_start = start + BytePos(prefix_len);
+        let content_end = end - BytePos(postfix_len);
+        let lit_content = self.str_from_to(content_start, content_end);
+        unescape::unescape_c_string(lit_content, mode, &mut |range, result| {
+            // Here we only check for errors. The actual unescaping is done later.
+            if let Err(err) = result {
+                let span_with_quotes = self.mk_sp(start, end);
+                let (start, end) = (range.start as u32, range.end as u32);
+                let lo = content_start + BytePos(start);
+                let hi = lo + BytePos(end - start);
+                let span = self.mk_sp(lo, hi);
+                if err.is_fatal() {
+                    has_fatal_err = true;
+                }
+                emit_unescape_error(
+                    &self.sess.span_diagnostic,
+                    lit_content,
+                    span_with_quotes,
+                    span,
+                    mode,
+                    range,
+                    err,
+                );
+            }
+        });
+
+        // We normally exclude the quotes for the symbol, but for errors we
+        // include it because it results in clearer error messages.
+        if !has_fatal_err {
+            (kind, Symbol::intern(lit_content))
+        } else {
+            (token::Err, self.symbol_from_to(start, end))
+        }
+    }
 }

 pub fn nfc_normalize(string: &str) -> Symbol {
--- a/library/core/src/ffi/c_str.rs
+++ b/library/core/src/ffi/c_str.rs
@ -82,6 +82,7 @@ use crate::str;
 #[cfg_attr(not(test), rustc_diagnostic_item = "CStr")]
 #[stable(feature = "core_c_str", since = "1.64.0")]
 #[rustc_has_incoherent_inherent_impls]
+#[cfg_attr(not(bootstrap), lang = "CStr")]
 // FIXME:
 // `fn from` in `impl From<&CStr> for Box<CStr>` current implementation relies
 // on `CStr` being layout-compatible with `[u8]`.
--- a/src/librustdoc/html/highlight.rs
+++ b/src/librustdoc/html/highlight.rs
@ -811,7 +811,9 @@ impl<'src> Classifier<'src> {
                | LiteralKind::Str { .. }
                | LiteralKind::ByteStr { .. }
                | LiteralKind::RawStr { .. }
-                | LiteralKind::RawByteStr { .. } => Class::String,
+                | LiteralKind::RawByteStr { .. }
+                | LiteralKind::CStr { .. }
+                | LiteralKind::RawCStr { .. } => Class::String,
                // Number literals.
                LiteralKind::Float { .. } | LiteralKind::Int { .. } => Class::Number,
            },
--- a/src/tools/clippy/clippy_lints/src/matches/match_same_arms.rs
+++ b/src/tools/clippy/clippy_lints/src/matches/match_same_arms.rs
@ -284,6 +284,7 @@ impl<'a> NormalizedPat<'a> {
                    LitKind::Str(sym, _) => Self::LitStr(sym),
                    LitKind::ByteStr(ref bytes, _) => Self::LitBytes(bytes),
                    LitKind::Byte(val) => Self::LitInt(val.into()),
+                    LitKind::CStr(ref bytes, _) => Self::LitBytes(bytes),
                    LitKind::Char(val) => Self::LitInt(val.into()),
                    LitKind::Int(val, _) => Self::LitInt(val),
                    LitKind::Bool(val) => Self::LitBool(val),
--- a/src/tools/clippy/clippy_lints/src/utils/author.rs
+++ b/src/tools/clippy/clippy_lints/src/utils/author.rs
@ -304,6 +304,11 @@ impl<'a, 'tcx> PrintVisitor<'a, 'tcx> {
                kind!("ByteStr(ref {vec})");
                chain!(self, "let [{:?}] = **{vec}", vec.value);
            },
+            LitKind::CStr(ref vec, _) => {
+                bind!(self, vec);
+                kind!("CStr(ref {vec})");
+                chain!(self, "let [{:?}] = **{vec}", vec.value);
+            }
            LitKind::Str(s, _) => {
                bind!(self, s);
                kind!("Str({s}, _)");
--- a/src/tools/clippy/clippy_utils/src/consts.rs
+++ b/src/tools/clippy/clippy_utils/src/consts.rs
@ -211,6 +211,7 @@ pub fn lit_to_mir_constant(lit: &LitKind, ty: Option<Ty<'_>>) -> Constant {
        LitKind::Str(ref is, _) => Constant::Str(is.to_string()),
        LitKind::Byte(b) => Constant::Int(u128::from(b)),
        LitKind::ByteStr(ref s, _) => Constant::Binary(Lrc::clone(s)),
+        LitKind::CStr(ref s, _) => Constant::Binary(Lrc::clone(s)),
        LitKind::Char(c) => Constant::Char(c),
        LitKind::Int(n, _) => Constant::Int(n),
        LitKind::Float(ref is, LitFloatType::Suffixed(fty)) => match fty {