From 294c3dda881ae65d528ee0380b7628deaf33ae96 Mon Sep 17 00:00:00 2001 From: Michael Howell Date: Sat, 29 Jun 2024 11:24:36 -0700 Subject: [PATCH] rustdoc: add usable lint for pulldown-cmark-0.11 parsing changes --- Cargo.lock | 12 ++ src/librustdoc/Cargo.toml | 1 + src/librustdoc/lint.rs | 9 ++ src/librustdoc/passes/lint.rs | 2 + .../passes/lint/unportable_markdown.rs | 152 ++++++++++++++++++ tests/rustdoc-ui/unportable-markdown.rs | 63 ++++++++ tests/rustdoc-ui/unportable-markdown.stderr | 39 +++++ 7 files changed, 278 insertions(+) create mode 100644 src/librustdoc/passes/lint/unportable_markdown.rs create mode 100644 tests/rustdoc-ui/unportable-markdown.rs create mode 100644 tests/rustdoc-ui/unportable-markdown.stderr diff --git a/Cargo.lock b/Cargo.lock index 3af90a252ae..96cef907084 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -3121,6 +3121,17 @@ dependencies = [ "cc", ] +[[package]] +name = "pulldown-cmark" +version = "0.9.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "57206b407293d2bcd3af849ce869d52068623f19e1b5ff8e8778e3309439682b" +dependencies = [ + "bitflags 2.5.0", + "memchr", + "unicase", +] + [[package]] name = "pulldown-cmark" version = "0.10.3" @@ -4890,6 +4901,7 @@ dependencies = [ "indexmap", "itertools", "minifier", + "pulldown-cmark 0.9.6", "regex", "rustdoc-json-types", "serde", diff --git a/src/librustdoc/Cargo.toml b/src/librustdoc/Cargo.toml index 31222f213d8..51fb126cb34 100644 --- a/src/librustdoc/Cargo.toml +++ b/src/librustdoc/Cargo.toml @@ -13,6 +13,7 @@ base64 = "0.21.7" itertools = "0.12" indexmap = "2" minifier = "0.3.0" +pulldown-cmark-old = { version = "0.9.6", package = "pulldown-cmark", default-features = false } regex = "1" rustdoc-json-types = { path = "../rustdoc-json-types" } serde_json = "1.0" diff --git a/src/librustdoc/lint.rs b/src/librustdoc/lint.rs index dd2bb47e592..8eaca70eaff 100644 --- a/src/librustdoc/lint.rs +++ b/src/librustdoc/lint.rs @@ -196,6 +196,14 @@ declare_rustdoc_lint! { "detects redundant explicit links in doc comments" } +declare_rustdoc_lint! { + /// This compatibility lint checks for Markdown syntax that works in the old engine but not + /// the new one. + UNPORTABLE_MARKDOWN, + Warn, + "detects markdown that is interpreted differently in different parser" +} + pub(crate) static RUSTDOC_LINTS: Lazy> = Lazy::new(|| { vec![ BROKEN_INTRA_DOC_LINKS, @@ -209,6 +217,7 @@ pub(crate) static RUSTDOC_LINTS: Lazy> = Lazy::new(|| { MISSING_CRATE_LEVEL_DOCS, UNESCAPED_BACKTICKS, REDUNDANT_EXPLICIT_LINKS, + UNPORTABLE_MARKDOWN, ] }); diff --git a/src/librustdoc/passes/lint.rs b/src/librustdoc/passes/lint.rs index c6d5b7bd346..bc804a340bf 100644 --- a/src/librustdoc/passes/lint.rs +++ b/src/librustdoc/passes/lint.rs @@ -6,6 +6,7 @@ mod check_code_block_syntax; mod html_tags; mod redundant_explicit_links; mod unescaped_backticks; +mod unportable_markdown; use super::Pass; use crate::clean::*; @@ -31,6 +32,7 @@ impl<'a, 'tcx> DocVisitor for Linter<'a, 'tcx> { html_tags::visit_item(self.cx, item); unescaped_backticks::visit_item(self.cx, item); redundant_explicit_links::visit_item(self.cx, item); + unportable_markdown::visit_item(self.cx, item); self.visit_item_recur(item) } diff --git a/src/librustdoc/passes/lint/unportable_markdown.rs b/src/librustdoc/passes/lint/unportable_markdown.rs new file mode 100644 index 00000000000..5f185377634 --- /dev/null +++ b/src/librustdoc/passes/lint/unportable_markdown.rs @@ -0,0 +1,152 @@ +//! Detects specific markdown syntax that's different between pulldown-cmark +//! 0.9 and 0.11. +//! +//! This is a mitigation for old parser bugs that affected some +//! real crates' docs. The old parser claimed to comply with CommonMark, +//! but it did not. These warnings will eventually be removed, +//! though some of them may become Clippy lints. +//! +//! +//! +//! + +use crate::clean::Item; +use crate::core::DocContext; +use pulldown_cmark as cmarkn; +use pulldown_cmark_old as cmarko; +use rustc_lint_defs::Applicability; +use rustc_resolve::rustdoc::source_span_for_markdown_range; +use std::collections::{BTreeMap, BTreeSet}; + +pub(crate) fn visit_item(cx: &DocContext<'_>, item: &Item) { + let tcx = cx.tcx; + let Some(hir_id) = DocContext::as_local_hir_id(tcx, item.item_id) else { + // If non-local, no need to check anything. + return; + }; + + let dox = item.doc_value(); + if dox.is_empty() { + return; + } + + // P1: unintended strikethrough was fixed by requiring single-tildes to flank + // the same way underscores do, so nothing is done here + + // P2: block quotes without following space parsed wrong + // + // This is the set of starting points for block quotes with no space after + // the `>`. It is populated by the new parser, and if the old parser fails to + // clear it out, it'll produce a warning. + let mut spaceless_block_quotes = BTreeSet::new(); + + // P3: missing footnote references + // + // This is populated by listening for FootnoteReference from + // the new parser and old parser. + let mut missing_footnote_references = BTreeMap::new(); + let mut found_footnote_references = BTreeSet::new(); + + // populate problem cases from new parser + { + pub fn main_body_opts_new() -> cmarkn::Options { + cmarkn::Options::ENABLE_TABLES + | cmarkn::Options::ENABLE_FOOTNOTES + | cmarkn::Options::ENABLE_STRIKETHROUGH + | cmarkn::Options::ENABLE_TASKLISTS + | cmarkn::Options::ENABLE_SMART_PUNCTUATION + } + let mut parser_new = cmarkn::Parser::new_ext(&dox, main_body_opts_new()).into_offset_iter(); + while let Some((event, span)) = parser_new.next() { + if let cmarkn::Event::Start(cmarkn::Tag::BlockQuote(_)) = event { + if !dox[span.clone()].starts_with("> ") { + spaceless_block_quotes.insert(span.start); + } + } + if let cmarkn::Event::FootnoteReference(_) = event { + found_footnote_references.insert(span.start + 1); + } + } + } + + // remove cases where they don't actually differ + { + pub fn main_body_opts_old() -> cmarko::Options { + cmarko::Options::ENABLE_TABLES + | cmarko::Options::ENABLE_FOOTNOTES + | cmarko::Options::ENABLE_STRIKETHROUGH + | cmarko::Options::ENABLE_TASKLISTS + | cmarko::Options::ENABLE_SMART_PUNCTUATION + } + let mut parser_old = cmarko::Parser::new_ext(&dox, main_body_opts_old()).into_offset_iter(); + while let Some((event, span)) = parser_old.next() { + if let cmarko::Event::Start(cmarko::Tag::BlockQuote) = event { + if !dox[span.clone()].starts_with("> ") { + spaceless_block_quotes.remove(&span.start); + } + } + if let cmarko::Event::FootnoteReference(_) = event { + if !found_footnote_references.contains(&(span.start + 1)) { + missing_footnote_references.insert(span.start + 1, span); + } + } + } + } + + for start in spaceless_block_quotes { + let (span, precise) = + source_span_for_markdown_range(tcx, &dox, &(start..start + 1), &item.attrs.doc_strings) + .map(|span| (span, true)) + .unwrap_or_else(|| (item.attr_span(tcx), false)); + + tcx.node_span_lint(crate::lint::UNPORTABLE_MARKDOWN, hir_id, span, |lint| { + lint.primary_message("unportable markdown"); + lint.help(format!("confusing block quote with no space after the `>` marker")); + if precise { + lint.span_suggestion( + span.shrink_to_hi(), + "if the quote is intended, add a space", + " ", + Applicability::MaybeIncorrect, + ); + lint.span_suggestion( + span.shrink_to_lo(), + "if it should not be a quote, escape it", + "\\", + Applicability::MaybeIncorrect, + ); + } + }); + } + for (_caret, span) in missing_footnote_references { + let (ref_span, precise) = + source_span_for_markdown_range(tcx, &dox, &span, &item.attrs.doc_strings) + .map(|span| (span, true)) + .unwrap_or_else(|| (item.attr_span(tcx), false)); + + tcx.node_span_lint(crate::lint::UNPORTABLE_MARKDOWN, hir_id, ref_span, |lint| { + lint.primary_message("unportable markdown"); + if precise { + lint.span_suggestion( + ref_span.shrink_to_lo(), + "if it should not be a footnote, escape it", + "\\", + Applicability::MaybeIncorrect, + ); + } + if dox.as_bytes().get(span.end) == Some(&b'[') { + lint.help("confusing footnote reference and link"); + if precise { + lint.span_suggestion( + ref_span.shrink_to_hi(), + "if the footnote is intended, add a space", + " ", + Applicability::MaybeIncorrect, + ); + } else { + lint.help("there should be a space between the link and the footnote"); + } + } + }); + } +} diff --git a/tests/rustdoc-ui/unportable-markdown.rs b/tests/rustdoc-ui/unportable-markdown.rs new file mode 100644 index 00000000000..8035e680f3c --- /dev/null +++ b/tests/rustdoc-ui/unportable-markdown.rs @@ -0,0 +1,63 @@ +// https://internals.rust-lang.org/t/proposal-migrate-the-syntax-of-rustdoc-markdown-footnotes-to-be-compatible-with-the-syntax-used-in-github/18929 +// +// A series of test cases for CommonMark corner cases that pulldown-cmark 0.11 fixes. +// +// This version of the lint is targeted at two especially-common cases where docs got broken. +// Other differences in parsing should not warn. +#![allow(rustdoc::broken_intra_doc_links)] +#![deny(rustdoc::unportable_markdown)] + +/// +/// +/// Test footnote [^foot]. +/// +/// [^foot]: This is nested within the footnote now, but didn't used to be. +/// +/// This is a multi-paragraph footnote. +pub struct GfmFootnotes; + +/// +/// +/// test [^foo][^bar] +//~^ ERROR unportable markdown +/// +/// [^foo]: test +/// [^bar]: test2 +pub struct FootnoteSmashedName; + +/// +/// +/// - _t +/// # test +/// t_ +pub struct NestingCornerCase; + +/// +/// +/// *~~__emphasis strike strong__~~* ~~*__strike emphasis strong__*~~ +pub struct Emphasis1; + +/// +/// +/// | +/// | +pub struct NotEnoughTable; + +/// +/// +/// foo +/// >bar +//~^ ERROR unportable markdown +pub struct BlockQuoteNoSpace; + +/// Negative test. +/// +/// foo +/// > bar +pub struct BlockQuoteSpace; + +/// Negative test. +/// +/// >bar +/// baz +pub struct BlockQuoteNoSpaceStart; diff --git a/tests/rustdoc-ui/unportable-markdown.stderr b/tests/rustdoc-ui/unportable-markdown.stderr new file mode 100644 index 00000000000..b524aca25ae --- /dev/null +++ b/tests/rustdoc-ui/unportable-markdown.stderr @@ -0,0 +1,39 @@ +error: unportable markdown + --> $DIR/unportable-markdown.rs:21:10 + | +LL | /// test [^foo][^bar] + | ^^^^^^ + | + = help: confusing footnote reference and link +note: the lint level is defined here + --> $DIR/unportable-markdown.rs:8:9 + | +LL | #![deny(rustdoc::unportable_markdown)] + | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^ +help: if it should not be a footnote, escape it + | +LL | /// test \[^foo][^bar] + | + +help: if the footnote is intended, add a space + | +LL | /// test [^foo] [^bar] + | + + +error: unportable markdown + --> $DIR/unportable-markdown.rs:49:5 + | +LL | /// >bar + | ^ + | + = help: confusing block quote with no space after the `>` marker +help: if the quote is intended, add a space + | +LL | /// > bar + | + +help: if it should not be a quote, escape it + | +LL | /// \>bar + | + + +error: aborting due to 2 previous errors +