Rollup merge of #126994 - Alexendoo:explain-markdown, r=tgross35

Support lists and stylings in more places for `rustc --explain`

Adds support for `*foo*`, stylings not immediately following whitespace e.g. ``(`Foo`)`` and lists starting with whitespace:

```md
* previously supported
```
```md
 * now also supported
 ```

These are fairly common in the existing error docs, some before/after examples:

### E0460

![image](https://github.com/rust-lang/rust/assets/1830331/4d0dc5dd-b71f-48b1-97ae-9f7199e952ed)
![image](https://github.com/rust-lang/rust/assets/1830331/4bbcb1e4-99ba-4d0d-b338-fe19d96a5eb1)

### E0059

![image](https://github.com/rust-lang/rust/assets/1830331/8457f69a-3126-4777-aa4a-953f7b29f59b)
![image](https://github.com/rust-lang/rust/assets/1830331/ac2189f8-512e-4b3b-886d-6c4a619d17f2)
This commit is contained in:
Matthias Krüger 2024-07-23 19:42:35 +02:00 committed by GitHub
commit 8e206c0387
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
5 changed files with 96 additions and 36 deletions

View file

@ -70,4 +70,4 @@ fn spawn<F: Future + Send + 'static>(future: F) {
Similarly to closures, `async` blocks are not executed immediately and may
capture closed-over data by reference. For more information, see
https://rust-lang.github.io/async-book/03_async_await/01_chapter.html.
<https://rust-lang.github.io/async-book/03_async_await/01_chapter.html>.

View file

@ -20,7 +20,7 @@ where
The `DispatchFromDyn` trait currently can only be implemented for
builtin pointer types and structs that are newtype wrappers around them
— that is, the struct must have only one field (except for`PhantomData`),
— that is, the struct must have only one field (except for `PhantomData`),
and that field must itself implement `DispatchFromDyn`.
```

View file

@ -15,6 +15,7 @@
#![feature(box_patterns)]
#![feature(error_reporter)]
#![feature(extract_if)]
#![feature(if_let_guard)]
#![feature(let_chains)]
#![feature(negative_impls)]
#![feature(never_type)]

View file

@ -10,15 +10,15 @@ const CBK: &[u8] = b"```";
const CIL: &[u8] = b"`";
const CMT_E: &[u8] = b"-->";
const CMT_S: &[u8] = b"<!--";
const EMP: &[u8] = b"_";
const EMP_U: &[u8] = b"_";
const EMP_A: &[u8] = b"*";
const HDG: &[u8] = b"#";
const LNK_CHARS: &str = "$-_.+!*'()/&?=:%";
const LNK_E: &[u8] = b"]";
const LNK_S: &[u8] = b"[";
const STG: &[u8] = b"**";
const STG_U: &[u8] = b"__";
const STG_A: &[u8] = b"**";
const STK: &[u8] = b"~~";
const UL1: &[u8] = b"* ";
const UL2: &[u8] = b"- ";
/// Pattern replacements
const REPLACEMENTS: &[(&str, &str)] = &[
@ -100,22 +100,29 @@ fn parse_recursive<'a>(buf: &'a [u8], ctx: Context) -> MdStream<'_> {
};
let res: ParseResult<'_> = match (top_blk, prev) {
(_, Newline | Whitespace) if loop_buf.starts_with(CMT_S) => {
_ if loop_buf.starts_with(CMT_S) => {
parse_simple_pat(loop_buf, CMT_S, CMT_E, Po::TrimNoEsc, MdTree::Comment)
}
(true, Newline) if loop_buf.starts_with(CBK) => Some(parse_codeblock(loop_buf)),
(_, Newline | Whitespace) if loop_buf.starts_with(CIL) => parse_codeinline(loop_buf),
_ if loop_buf.starts_with(CIL) => parse_codeinline(loop_buf),
(true, Newline | Whitespace) if loop_buf.starts_with(HDG) => parse_heading(loop_buf),
(true, Newline) if loop_buf.starts_with(BRK) => {
Some((MdTree::HorizontalRule, parse_to_newline(loop_buf).1))
}
(_, Newline | Whitespace) if loop_buf.starts_with(EMP) => {
parse_simple_pat(loop_buf, EMP, EMP, Po::None, MdTree::Emphasis)
(_, Newline) if unordered_list_start(loop_buf) => Some(parse_unordered_li(loop_buf)),
(_, Newline | Whitespace) if loop_buf.starts_with(STG_U) => {
parse_simple_pat(loop_buf, STG_U, STG_U, Po::None, MdTree::Strong)
}
(_, Newline | Whitespace) if loop_buf.starts_with(STG) => {
parse_simple_pat(loop_buf, STG, STG, Po::None, MdTree::Strong)
_ if loop_buf.starts_with(STG_A) => {
parse_simple_pat(loop_buf, STG_A, STG_A, Po::None, MdTree::Strong)
}
(_, Newline | Whitespace) if loop_buf.starts_with(STK) => {
(_, Newline | Whitespace) if loop_buf.starts_with(EMP_U) => {
parse_simple_pat(loop_buf, EMP_U, EMP_U, Po::None, MdTree::Emphasis)
}
_ if loop_buf.starts_with(EMP_A) => {
parse_simple_pat(loop_buf, EMP_A, EMP_A, Po::None, MdTree::Emphasis)
}
_ if loop_buf.starts_with(STK) => {
parse_simple_pat(loop_buf, STK, STK, Po::None, MdTree::Strikethrough)
}
(_, Newline | Whitespace) if loop_buf.starts_with(ANC_S) => {
@ -130,11 +137,8 @@ fn parse_recursive<'a>(buf: &'a [u8], ctx: Context) -> MdStream<'_> {
_ => None,
}
}
(_, Newline) if (loop_buf.starts_with(UL1) || loop_buf.starts_with(UL2)) => {
Some(parse_unordered_li(loop_buf))
}
(_, Newline) if ord_list_start(loop_buf).is_some() => Some(parse_ordered_li(loop_buf)),
(_, Newline | Whitespace) if loop_buf.starts_with(LNK_S) => {
_ if loop_buf.starts_with(LNK_S) => {
parse_any_link(loop_buf, top_blk && prev == Prev::Newline)
}
(_, Escape | _) => None,
@ -251,7 +255,6 @@ fn parse_heading(buf: &[u8]) -> ParseResult<'_> {
/// Bulleted list
fn parse_unordered_li(buf: &[u8]) -> Parsed<'_> {
debug_assert!(buf.starts_with(b"* ") || buf.starts_with(b"- "));
let (txt, rest) = get_indented_section(&buf[2..]);
let ctx = Context { top_block: false, prev: Prev::Whitespace };
let stream = parse_recursive(trim_ascii_start(txt), ctx);
@ -267,25 +270,28 @@ fn parse_ordered_li(buf: &[u8]) -> Parsed<'_> {
(MdTree::OrderedListItem(num, stream), rest)
}
/// Find first line that isn't empty or doesn't start with whitespace, that will
/// be our contents
fn get_indented_section(buf: &[u8]) -> (&[u8], &[u8]) {
let mut end = buf.len();
for (idx, window) in buf.windows(2).enumerate() {
let &[ch, next_ch] = window else { unreachable!("always 2 elements") };
if idx >= buf.len().saturating_sub(2) && next_ch == b'\n' {
// End of stream
end = buf.len().saturating_sub(1);
break;
} else if ch == b'\n' && (!next_ch.is_ascii_whitespace() || next_ch == b'\n') {
end = idx;
break;
let mut lines = buf.split(|&byte| byte == b'\n');
let mut end = lines.next().map_or(0, |line| line.len());
for line in lines {
if let Some(first) = line.first() {
if unordered_list_start(line) || !first.is_ascii_whitespace() {
break;
}
}
end += line.len() + 1;
}
(&buf[..end], &buf[end..])
}
fn unordered_list_start(mut buf: &[u8]) -> bool {
while let [b' ', rest @ ..] = buf {
buf = rest;
}
matches!(buf, [b'*' | b'-', b' ', ..])
}
/// Verify a valid ordered list start (e.g. `1.`) and parse it. Returns the
/// parsed number and offset of character after the dot.
fn ord_list_start(buf: &[u8]) -> Option<(u16, usize)> {

View file

@ -4,13 +4,13 @@ use ParseOpt as PO;
#[test]
fn test_parse_simple() {
let buf = "**abcd** rest";
let (t, r) = parse_simple_pat(buf.as_bytes(), STG, STG, PO::None, MdTree::Strong).unwrap();
let (t, r) = parse_simple_pat(buf.as_bytes(), b"**", b"**", PO::None, MdTree::Strong).unwrap();
assert_eq!(t, MdTree::Strong("abcd"));
assert_eq!(r, b" rest");
// Escaping should fail
let buf = r"**abcd\** rest";
let res = parse_simple_pat(buf.as_bytes(), STG, STG, PO::None, MdTree::Strong);
let res = parse_simple_pat(buf.as_bytes(), b"**", b"**", PO::None, MdTree::Strong);
assert!(res.is_none());
}
@ -141,12 +141,12 @@ fn test_indented_section() {
assert_eq!(str::from_utf8(r).unwrap(), "\nnot ind");
let (txt, rest) = get_indented_section(IND2.as_bytes());
assert_eq!(str::from_utf8(txt).unwrap(), "test end of stream\n 1\n 2");
assert_eq!(str::from_utf8(rest).unwrap(), "\n");
assert_eq!(str::from_utf8(txt).unwrap(), "test end of stream\n 1\n 2\n");
assert_eq!(str::from_utf8(rest).unwrap(), "");
let (txt, rest) = get_indented_section(IND3.as_bytes());
assert_eq!(str::from_utf8(txt).unwrap(), "test empty lines\n 1\n 2");
assert_eq!(str::from_utf8(rest).unwrap(), "\n\nnot ind");
assert_eq!(str::from_utf8(txt).unwrap(), "test empty lines\n 1\n 2\n");
assert_eq!(str::from_utf8(rest).unwrap(), "\nnot ind");
}
const HBT: &str = r"# Heading
@ -310,3 +310,56 @@ fn test_code_at_start() {
let res = entrypoint(CODE_STARTLINE);
assert_eq!(res, expected);
}
#[test]
fn test_code_in_parens() {
let expected =
vec![MdTree::PlainText("("), MdTree::CodeInline("Foo"), MdTree::PlainText(")")].into();
let res = entrypoint("(`Foo`)");
assert_eq!(res, expected);
}
const LIST_WITH_SPACE: &str = "
para
* l1
* l2
";
#[test]
fn test_list_with_space() {
let expected = vec![
MdTree::PlainText("para"),
MdTree::ParagraphBreak,
MdTree::UnorderedListItem(vec![MdTree::PlainText("l1")].into()),
MdTree::LineBreak,
MdTree::UnorderedListItem(vec![MdTree::PlainText("l2")].into()),
]
.into();
let res = entrypoint(LIST_WITH_SPACE);
assert_eq!(res, expected);
}
const SNAKE_CASE: &str = "
foo*bar*
foo**bar**
foo_bar_
foo__bar__
";
#[test]
fn test_snake_case() {
let expected = vec![
MdTree::PlainText("foo"),
MdTree::Emphasis("bar"),
MdTree::PlainText(" "),
MdTree::PlainText("foo"),
MdTree::Strong("bar"),
MdTree::PlainText(" "),
MdTree::PlainText("foo_bar_"),
MdTree::PlainText(" "),
MdTree::PlainText("foo__bar__"),
]
.into();
let res = entrypoint(SNAKE_CASE);
assert_eq!(res, expected);
}