From 15fbe618a14ddde520561c4cf1b85d4e4c9005f8 Mon Sep 17 00:00:00 2001 From: Michael Howell Date: Sat, 29 Jun 2024 09:34:52 -0700 Subject: [PATCH] rustdoc: update to pulldown-cmark 0.11 --- Cargo.lock | 33 ++++-- compiler/rustc_resolve/Cargo.toml | 2 +- compiler/rustc_resolve/src/rustdoc.rs | 14 ++- src/librustdoc/html/markdown.rs | 111 +++++++++--------- src/librustdoc/passes/lint/bare_urls.rs | 4 +- src/librustdoc/passes/lint/html_tags.rs | 6 +- .../passes/lint/redundant_explicit_links.rs | 16 ++- src/tools/tidy/src/deps.rs | 1 + 8 files changed, 100 insertions(+), 87 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 94d70a020a4..3af90a252ae 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -3121,17 +3121,6 @@ dependencies = [ "cc", ] -[[package]] -name = "pulldown-cmark" -version = "0.9.6" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "57206b407293d2bcd3af849ce869d52068623f19e1b5ff8e8778e3309439682b" -dependencies = [ - "bitflags 2.5.0", - "memchr", - "unicase", -] - [[package]] name = "pulldown-cmark" version = "0.10.3" @@ -3141,7 +3130,19 @@ dependencies = [ "bitflags 2.5.0", "getopts", "memchr", - "pulldown-cmark-escape", + "pulldown-cmark-escape 0.10.1", + "unicase", +] + +[[package]] +name = "pulldown-cmark" +version = "0.11.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8746739f11d39ce5ad5c2520a9b75285310dbfe78c541ccf832d38615765aec0" +dependencies = [ + "bitflags 2.5.0", + "memchr", + "pulldown-cmark-escape 0.11.0", "unicase", ] @@ -3151,6 +3152,12 @@ version = "0.10.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "bd348ff538bc9caeda7ee8cad2d1d48236a1f443c1fa3913c6a02fe0043b1dd3" +[[package]] +name = "pulldown-cmark-escape" +version = "0.11.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "007d8adb5ddab6f8e3f491ac63566a7d5002cc7ed73901f72057943fa71ae1ae" + [[package]] name = "pulldown-cmark-to-cmark" version = "13.0.0" @@ -4604,7 +4611,7 @@ name = "rustc_resolve" version = "0.0.0" dependencies = [ "bitflags 2.5.0", - "pulldown-cmark 0.9.6", + "pulldown-cmark 0.11.0", "rustc_arena", "rustc_ast", "rustc_ast_pretty", diff --git a/compiler/rustc_resolve/Cargo.toml b/compiler/rustc_resolve/Cargo.toml index b6ae54010c2..b71853b871d 100644 --- a/compiler/rustc_resolve/Cargo.toml +++ b/compiler/rustc_resolve/Cargo.toml @@ -6,7 +6,7 @@ edition = "2021" [dependencies] # tidy-alphabetical-start bitflags = "2.4.1" -pulldown-cmark = { version = "0.9.6", default-features = false } +pulldown-cmark = { version = "0.11", features = ["html"], default-features = false } rustc_arena = { path = "../rustc_arena" } rustc_ast = { path = "../rustc_ast" } rustc_ast_pretty = { path = "../rustc_ast_pretty" } diff --git a/compiler/rustc_resolve/src/rustdoc.rs b/compiler/rustc_resolve/src/rustdoc.rs index 66b4981eb55..59460815321 100644 --- a/compiler/rustc_resolve/src/rustdoc.rs +++ b/compiler/rustc_resolve/src/rustdoc.rs @@ -1,4 +1,6 @@ -use pulldown_cmark::{BrokenLink, CowStr, Event, LinkType, Options, Parser, Tag}; +use pulldown_cmark::{ + BrokenLink, BrokenLinkCallback, CowStr, Event, LinkType, Options, Parser, Tag, +}; use rustc_ast as ast; use rustc_ast::util::comments::beautify_doc_string; use rustc_data_structures::fx::FxHashMap; @@ -427,7 +429,9 @@ fn parse_links<'md>(doc: &'md str) -> Vec> { while let Some(event) = event_iter.next() { match event { - Event::Start(Tag::Link(link_type, dest, _)) if may_be_doc_link(link_type) => { + Event::Start(Tag::Link { link_type, dest_url, title: _, id: _ }) + if may_be_doc_link(link_type) => + { if matches!( link_type, LinkType::Inline @@ -441,7 +445,7 @@ fn parse_links<'md>(doc: &'md str) -> Vec> { } } - links.push(preprocess_link(&dest)); + links.push(preprocess_link(&dest_url)); } _ => {} } @@ -451,8 +455,8 @@ fn parse_links<'md>(doc: &'md str) -> Vec> { } /// Collects additional data of link. -fn collect_link_data<'input, 'callback>( - event_iter: &mut Parser<'input, 'callback>, +fn collect_link_data<'input, F: BrokenLinkCallback<'input>>( + event_iter: &mut Parser<'input, F>, ) -> Option> { let mut display_text: Option = None; let mut append_text = |text: CowStr<'_>| { diff --git a/src/librustdoc/html/markdown.rs b/src/librustdoc/html/markdown.rs index bae929c64ea..a7f0df5afa9 100644 --- a/src/librustdoc/html/markdown.rs +++ b/src/librustdoc/html/markdown.rs @@ -54,7 +54,8 @@ use crate::html::render::small_url_encode; use crate::html::toc::TocBuilder; use pulldown_cmark::{ - html, BrokenLink, CodeBlockKind, CowStr, Event, LinkType, OffsetIter, Options, Parser, Tag, + html, BrokenLink, BrokenLinkCallback, CodeBlockKind, CowStr, Event, LinkType, OffsetIter, + Options, Parser, Tag, TagEnd, }; #[cfg(test)] @@ -230,7 +231,7 @@ impl<'a, I: Iterator>> Iterator for CodeBlocks<'_, 'a, I> { let mut original_text = String::new(); for event in &mut self.inner { match event { - Event::End(Tag::CodeBlock(..)) => break, + Event::End(TagEnd::CodeBlock) => break, Event::Text(ref s) => { original_text.push_str(s); } @@ -359,16 +360,17 @@ impl<'a, I: Iterator>> Iterator for LinkReplacer<'a, I> { match &mut event { // This is a shortcut link that was resolved by the broken_link_callback: `[fn@f]` // Remove any disambiguator. - Some(Event::Start(Tag::Link( + Some(Event::Start(Tag::Link { // [fn@f] or [fn@f][] - LinkType::ShortcutUnknown | LinkType::CollapsedUnknown, - dest, + link_type: LinkType::ShortcutUnknown | LinkType::CollapsedUnknown, + dest_url, title, - ))) => { - debug!("saw start of shortcut link to {dest} with title {title}"); + .. + })) => { + debug!("saw start of shortcut link to {dest_url} with title {title}"); // If this is a shortcut link, it was resolved by the broken_link_callback. // So the URL will already be updated properly. - let link = self.links.iter().find(|&link| *link.href == **dest); + let link = self.links.iter().find(|&link| *link.href == **dest_url); // Since this is an external iterator, we can't replace the inner text just yet. // Store that we saw a link so we know to replace it later. if let Some(link) = link { @@ -381,16 +383,9 @@ impl<'a, I: Iterator>> Iterator for LinkReplacer<'a, I> { } } // Now that we're done with the shortcut link, don't replace any more text. - Some(Event::End(Tag::Link( - LinkType::ShortcutUnknown | LinkType::CollapsedUnknown, - dest, - _, - ))) => { - debug!("saw end of shortcut link to {dest}"); - if self.links.iter().any(|link| *link.href == **dest) { - assert!(self.shortcut_link.is_some(), "saw closing link without opening tag"); - self.shortcut_link = None; - } + Some(Event::End(TagEnd::Link)) if self.shortcut_link.is_some() => { + debug!("saw end of shortcut link"); + self.shortcut_link = None; } // Handle backticks in inline code blocks, but only if we're in the middle of a shortcut link. // [`fn@f`] @@ -433,9 +428,11 @@ impl<'a, I: Iterator>> Iterator for LinkReplacer<'a, I> { } // If this is a link, but not a shortcut link, // replace the URL, since the broken_link_callback was not called. - Some(Event::Start(Tag::Link(_, dest, title))) => { - if let Some(link) = self.links.iter().find(|&link| *link.original_text == **dest) { - *dest = CowStr::Borrowed(link.href.as_ref()); + Some(Event::Start(Tag::Link { dest_url, title, .. })) => { + if let Some(link) = + self.links.iter().find(|&link| *link.original_text == **dest_url) + { + *dest_url = CowStr::Borrowed(link.href.as_ref()); if title.is_empty() && !link.tooltip.is_empty() { *title = CowStr::Borrowed(link.tooltip.as_ref()); } @@ -477,9 +474,9 @@ impl<'a, I: Iterator>> Iterator for TableWrapper<'a, I> { self.stored_events.push_back(Event::Start(Tag::Table(t))); Event::Html(CowStr::Borrowed("
")) } - Event::End(Tag::Table(t)) => { + Event::End(TagEnd::Table) => { self.stored_events.push_back(Event::Html(CowStr::Borrowed("
"))); - Event::End(Tag::Table(t)) + Event::End(TagEnd::Table) } e => e, }) @@ -519,11 +516,11 @@ impl<'a, 'b, 'ids, I: Iterator>> Iterator } let event = self.inner.next(); - if let Some((Event::Start(Tag::Heading(level, _, _)), _)) = event { + if let Some((Event::Start(Tag::Heading { level, .. }), _)) = event { let mut id = String::new(); for event in &mut self.inner { match &event.0 { - Event::End(Tag::Heading(..)) => break, + Event::End(TagEnd::Heading(_)) => break, Event::Text(text) | Event::Code(text) => { id.extend(text.chars().filter_map(slugify)); self.buf.push_back(event); @@ -566,27 +563,27 @@ impl<'a, I: Iterator>> SummaryLine<'a, I> { } } -fn check_if_allowed_tag(t: &Tag<'_>) -> bool { +fn check_if_allowed_tag(t: &TagEnd) -> bool { matches!( t, - Tag::Paragraph - | Tag::Emphasis - | Tag::Strong - | Tag::Strikethrough - | Tag::Link(..) - | Tag::BlockQuote + TagEnd::Paragraph + | TagEnd::Emphasis + | TagEnd::Strong + | TagEnd::Strikethrough + | TagEnd::Link + | TagEnd::BlockQuote ) } -fn is_forbidden_tag(t: &Tag<'_>) -> bool { +fn is_forbidden_tag(t: &TagEnd) -> bool { matches!( t, - Tag::CodeBlock(_) - | Tag::Table(_) - | Tag::TableHead - | Tag::TableRow - | Tag::TableCell - | Tag::FootnoteDefinition(_) + TagEnd::CodeBlock + | TagEnd::Table + | TagEnd::TableHead + | TagEnd::TableRow + | TagEnd::TableCell + | TagEnd::FootnoteDefinition ) } @@ -604,12 +601,12 @@ impl<'a, I: Iterator>> Iterator for SummaryLine<'a, I> { let mut is_start = true; let is_allowed_tag = match event { Event::Start(ref c) => { - if is_forbidden_tag(c) { + if is_forbidden_tag(&c.to_end()) { self.skipped_tags += 1; return None; } self.depth += 1; - check_if_allowed_tag(c) + check_if_allowed_tag(&c.to_end()) } Event::End(ref c) => { if is_forbidden_tag(c) { @@ -633,7 +630,7 @@ impl<'a, I: Iterator>> Iterator for SummaryLine<'a, I> { if is_start { Some(Event::Start(Tag::Paragraph)) } else { - Some(Event::End(Tag::Paragraph)) + Some(Event::End(TagEnd::Paragraph)) } } else { Some(event) @@ -679,7 +676,7 @@ impl<'a, I: Iterator>> Iterator for Footnotes<'a, I> { Some((Event::Start(Tag::FootnoteDefinition(def)), _)) => { let mut content = Vec::new(); for (event, _) in &mut self.inner { - if let Event::End(Tag::FootnoteDefinition(..)) = event { + if let Event::End(TagEnd::FootnoteDefinition) = event { break; } content.push(event); @@ -696,7 +693,7 @@ impl<'a, I: Iterator>> Iterator for Footnotes<'a, I> { for (mut content, id) in v { write!(ret, "
  • ").unwrap(); let mut is_paragraph = false; - if let Some(&Event::End(Tag::Paragraph)) = content.last() { + if let Some(&Event::End(TagEnd::Paragraph)) = content.last() { content.pop(); is_paragraph = true; } @@ -806,7 +803,7 @@ pub(crate) fn find_codes( tests.visit_test(text, block_info, line); prev_offset = offset.start; } - Event::Start(Tag::Heading(level, _, _)) => { + Event::Start(Tag::Heading { level, .. }) => { register_header = Some(level as u32); } Event::Text(ref s) if register_header.is_some() => { @@ -1432,7 +1429,7 @@ impl MarkdownItemInfo<'_> { // Treat inline HTML as plain text. let p = p.map(|event| match event.0 { - Event::Html(text) => (Event::Text(text), event.1), + Event::Html(text) | Event::InlineHtml(text) => (Event::Text(text), event.1), _ => event, }); @@ -1442,7 +1439,7 @@ impl MarkdownItemInfo<'_> { let p = Footnotes::new(p); let p = TableWrapper::new(p.map(|(ev, _)| ev)); let p = p.filter(|event| { - !matches!(event, Event::Start(Tag::Paragraph) | Event::End(Tag::Paragraph)) + !matches!(event, Event::Start(Tag::Paragraph) | Event::End(TagEnd::Paragraph)) }); html::push_html(&mut s, p); @@ -1472,7 +1469,7 @@ impl MarkdownSummaryLine<'_> { let mut s = String::new(); let without_paragraphs = LinkReplacer::new(&mut summary, links).filter(|event| { - !matches!(event, Event::Start(Tag::Paragraph) | Event::End(Tag::Paragraph)) + !matches!(event, Event::Start(Tag::Paragraph) | Event::End(TagEnd::Paragraph)) }); html::push_html(&mut s, without_paragraphs); @@ -1544,8 +1541,8 @@ fn markdown_summary_with_limit( _ => {} }, Event::End(tag) => match tag { - Tag::Emphasis | Tag::Strong => buf.close_tag(), - Tag::Paragraph | Tag::Heading(..) => return ControlFlow::Break(()), + TagEnd::Emphasis | TagEnd::Strong => buf.close_tag(), + TagEnd::Paragraph | TagEnd::Heading(_) => return ControlFlow::Break(()), _ => {} }, Event::HardBreak | Event::SoftBreak => buf.push(" ")?, @@ -1605,8 +1602,8 @@ pub(crate) fn plain_text_summary(md: &str, link_names: &[RenderedLink]) -> Strin } Event::HardBreak | Event::SoftBreak => s.push(' '), Event::Start(Tag::CodeBlock(..)) => break, - Event::End(Tag::Paragraph) => break, - Event::End(Tag::Heading(..)) => break, + Event::End(TagEnd::Paragraph) => break, + Event::End(TagEnd::Heading(..)) => break, _ => (), } } @@ -1765,7 +1762,7 @@ pub(crate) fn markdown_links<'md, R>( while let Some((event, span)) = event_iter.next() { match event { - Event::Start(Tag::Link(link_type, dest, _)) if may_be_doc_link(link_type) => { + Event::Start(Tag::Link { link_type, dest_url, .. }) if may_be_doc_link(link_type) => { let range = match link_type { // Link is pulled from the link itself. LinkType::ReferenceUnknown | LinkType::ShortcutUnknown => { @@ -1775,7 +1772,7 @@ pub(crate) fn markdown_links<'md, R>( LinkType::Inline => span_for_offset_backward(span, b'(', b')'), // Link is pulled from elsewhere in the document. LinkType::Reference | LinkType::Collapsed | LinkType::Shortcut => { - span_for_link(&dest, span) + span_for_link(&dest_url, span) } LinkType::Autolink | LinkType::Email => unreachable!(), }; @@ -1795,7 +1792,7 @@ pub(crate) fn markdown_links<'md, R>( if let Some(link) = preprocess_link(MarkdownLink { kind: link_type, - link: dest.into_string(), + link: dest_url.into_string(), display_text, range, }) { @@ -1810,8 +1807,8 @@ pub(crate) fn markdown_links<'md, R>( } /// Collects additional data of link. -fn collect_link_data<'input, 'callback>( - event_iter: &mut OffsetIter<'input, 'callback>, +fn collect_link_data<'input, F: BrokenLinkCallback<'input>>( + event_iter: &mut OffsetIter<'input, F>, ) -> Option { let mut display_text: Option = None; let mut append_text = |text: CowStr<'_>| { diff --git a/src/librustdoc/passes/lint/bare_urls.rs b/src/librustdoc/passes/lint/bare_urls.rs index 8f68f6ff476..4b2d3092837 100644 --- a/src/librustdoc/passes/lint/bare_urls.rs +++ b/src/librustdoc/passes/lint/bare_urls.rs @@ -42,11 +42,11 @@ pub(super) fn visit_item(cx: &DocContext<'_>, item: &Item) { match event { Event::Text(s) => find_raw_urls(cx, &s, range, &report_diag), // We don't want to check the text inside code blocks or links. - Event::Start(tag @ (Tag::CodeBlock(_) | Tag::Link(..))) => { + Event::Start(tag @ (Tag::CodeBlock(_) | Tag::Link { .. })) => { while let Some((event, _)) = p.next() { match event { Event::End(end) - if mem::discriminant(&end) == mem::discriminant(&tag) => + if mem::discriminant(&end) == mem::discriminant(&tag.to_end()) => { break; } diff --git a/src/librustdoc/passes/lint/html_tags.rs b/src/librustdoc/passes/lint/html_tags.rs index a0064a90112..87dfa5d5389 100644 --- a/src/librustdoc/passes/lint/html_tags.rs +++ b/src/librustdoc/passes/lint/html_tags.rs @@ -4,7 +4,7 @@ use crate::clean::*; use crate::core::DocContext; use crate::html::markdown::main_body_opts; -use pulldown_cmark::{BrokenLink, Event, LinkType, Parser, Tag}; +use pulldown_cmark::{BrokenLink, Event, LinkType, Parser, Tag, TagEnd}; use rustc_resolve::rustdoc::source_span_for_markdown_range; use std::iter::Peekable; @@ -140,10 +140,10 @@ pub(crate) fn visit_item(cx: &DocContext<'_>, item: &Item) { for (event, range) in p { match event { Event::Start(Tag::CodeBlock(_)) => in_code_block = true, - Event::Html(text) if !in_code_block => { + Event::Html(text) | Event::InlineHtml(text) if !in_code_block => { extract_tags(&mut tags, &text, range, &mut is_in_comment, &report_diag) } - Event::End(Tag::CodeBlock(_)) => in_code_block = false, + Event::End(TagEnd::CodeBlock) => in_code_block = false, _ => {} } } diff --git a/src/librustdoc/passes/lint/redundant_explicit_links.rs b/src/librustdoc/passes/lint/redundant_explicit_links.rs index 7ab974046b9..b36b41c9f2d 100644 --- a/src/librustdoc/passes/lint/redundant_explicit_links.rs +++ b/src/librustdoc/passes/lint/redundant_explicit_links.rs @@ -1,6 +1,8 @@ use std::ops::Range; -use pulldown_cmark::{BrokenLink, CowStr, Event, LinkType, OffsetIter, Parser, Tag}; +use pulldown_cmark::{ + BrokenLink, BrokenLinkCallback, CowStr, Event, LinkType, OffsetIter, Parser, Tag, +}; use rustc_ast::NodeId; use rustc_errors::SuggestionStyle; use rustc_hir::def::{DefKind, DocLinkResMap, Namespace, Res}; @@ -95,7 +97,7 @@ fn check_redundant_explicit_link<'md>( while let Some((event, link_range)) = offset_iter.next() { match event { - Event::Start(Tag::Link(link_type, dest, _)) => { + Event::Start(Tag::Link { link_type, dest_url, .. }) => { let link_data = collect_link_data(&mut offset_iter); if let Some(resolvable_link) = link_data.resolvable_link.as_ref() { @@ -108,7 +110,7 @@ fn check_redundant_explicit_link<'md>( } } - let explicit_link = dest.to_string(); + let explicit_link = dest_url.to_string(); let display_link = link_data.resolvable_link.clone()?; if explicit_link.ends_with(&display_link) || display_link.ends_with(&explicit_link) @@ -122,7 +124,7 @@ fn check_redundant_explicit_link<'md>( doc, resolutions, link_range, - dest.to_string(), + dest_url.to_string(), link_data, if link_type == LinkType::Inline { (b'(', b')') @@ -139,7 +141,7 @@ fn check_redundant_explicit_link<'md>( doc, resolutions, link_range, - &dest, + &dest_url, link_data, ); } @@ -259,7 +261,9 @@ fn find_resolution(resolutions: &DocLinkResMap, path: &str) -> Option) -> LinkData { +fn collect_link_data<'input, F: BrokenLinkCallback<'input>>( + offset_iter: &mut OffsetIter<'input, F>, +) -> LinkData { let mut resolvable_link = None; let mut resolvable_link_range = None; let mut display_link = String::new(); diff --git a/src/tools/tidy/src/deps.rs b/src/tools/tidy/src/deps.rs index aa119819aaa..82fa43f581f 100644 --- a/src/tools/tidy/src/deps.rs +++ b/src/tools/tidy/src/deps.rs @@ -335,6 +335,7 @@ const PERMITTED_RUSTC_DEPENDENCIES: &[&str] = &[ "proc-macro2", "psm", "pulldown-cmark", + "pulldown-cmark-escape", "punycode", "quote", "r-efi",