Rollup merge of #120729 - ehuss:update-mdbook, r=Mark-Simulacrum
Update mdbook to 0.4.37 This updates mdbook to 0.4.37. Changelog: https://github.com/rust-lang/mdBook/blob/master/CHANGELOG.md#mdbook-0437 The primary change is the update to pulldown-cmark which has a large number of markdown parsing changes. There shouldn't be any significant changes to the rendering of any of the books (I have posted some PRs to fix some minor issues to the ones that were affected).
This commit is contained in:
commit
9bbd146e86
6 changed files with 338 additions and 222 deletions
97
Cargo.lock
97
Cargo.lock
|
@ -147,7 +147,21 @@ dependencies = [
|
|||
"anstyle",
|
||||
"anstyle-parse",
|
||||
"anstyle-query",
|
||||
"anstyle-wincon",
|
||||
"anstyle-wincon 2.1.0",
|
||||
"colorchoice",
|
||||
"utf8parse",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "anstream"
|
||||
version = "0.6.11"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "6e2e1ebcb11de5c03c67de28a7df593d32191b44939c482e97702baaaa6ab6a5"
|
||||
dependencies = [
|
||||
"anstyle",
|
||||
"anstyle-parse",
|
||||
"anstyle-query",
|
||||
"anstyle-wincon 3.0.2",
|
||||
"colorchoice",
|
||||
"utf8parse",
|
||||
]
|
||||
|
@ -186,6 +200,16 @@ dependencies = [
|
|||
"windows-sys 0.48.0",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "anstyle-wincon"
|
||||
version = "3.0.2"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "1cd54b81ec8d6180e24654d0b371ad22fc3dd083b6ff8ba325b72e00c87660a7"
|
||||
dependencies = [
|
||||
"anstyle",
|
||||
"windows-sys 0.52.0",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "anyhow"
|
||||
version = "1.0.75"
|
||||
|
@ -520,7 +544,7 @@ version = "4.4.4"
|
|||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "5179bb514e4d7c2051749d8fcefa2ed6d06a9f4e6d69faf3805f5d80b8cf8d56"
|
||||
dependencies = [
|
||||
"anstream",
|
||||
"anstream 0.5.0",
|
||||
"anstyle",
|
||||
"clap_lex",
|
||||
"strsim",
|
||||
|
@ -558,7 +582,7 @@ checksum = "2da6da31387c7e4ef160ffab6d5e7f00c42626fe39aea70a7b0f1773f7dd6c1b"
|
|||
name = "clippy"
|
||||
version = "0.1.78"
|
||||
dependencies = [
|
||||
"anstream",
|
||||
"anstream 0.5.0",
|
||||
"clippy_config",
|
||||
"clippy_lints",
|
||||
"clippy_utils",
|
||||
|
@ -1234,6 +1258,16 @@ dependencies = [
|
|||
"cfg-if",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "env_filter"
|
||||
version = "0.1.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "a009aa4810eb158359dda09d0c87378e4bbb89b5a801f016885a4707ba24f7ea"
|
||||
dependencies = [
|
||||
"log",
|
||||
"regex",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "env_logger"
|
||||
version = "0.10.0"
|
||||
|
@ -1247,6 +1281,19 @@ dependencies = [
|
|||
"termcolor",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "env_logger"
|
||||
version = "0.11.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "05e7cf40684ae96ade6232ed84582f40ce0a66efcd43a5117aef610534f8e0b8"
|
||||
dependencies = [
|
||||
"anstream 0.6.11",
|
||||
"anstyle",
|
||||
"env_filter",
|
||||
"humantime",
|
||||
"log",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "equivalent"
|
||||
version = "1.0.0"
|
||||
|
@ -1638,9 +1685,9 @@ dependencies = [
|
|||
|
||||
[[package]]
|
||||
name = "handlebars"
|
||||
version = "4.3.7"
|
||||
version = "5.1.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "83c3372087601b532857d332f5957cbae686da52bb7810bf038c3e3c3cc2fa0d"
|
||||
checksum = "ab283476b99e66691dee3f1640fea91487a8d81f50fb5ecc75538f8f8879a1e4"
|
||||
dependencies = [
|
||||
"log",
|
||||
"pest",
|
||||
|
@ -2227,6 +2274,7 @@ dependencies = [
|
|||
name = "linkchecker"
|
||||
version = "0.1.0"
|
||||
dependencies = [
|
||||
"html5ever",
|
||||
"once_cell",
|
||||
"regex",
|
||||
]
|
||||
|
@ -2335,9 +2383,9 @@ dependencies = [
|
|||
|
||||
[[package]]
|
||||
name = "mdbook"
|
||||
version = "0.4.36"
|
||||
version = "0.4.37"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "80992cb0e05f22cc052c99f8e883f1593b891014b96a8b4637fd274d7030c85e"
|
||||
checksum = "0c33564061c3c640bed5ace7d6a2a1b65f2c64257d1ac930c15e94ed0fb561d3"
|
||||
dependencies = [
|
||||
"ammonia",
|
||||
"anyhow",
|
||||
|
@ -2345,14 +2393,13 @@ dependencies = [
|
|||
"clap",
|
||||
"clap_complete",
|
||||
"elasticlunr-rs",
|
||||
"env_logger",
|
||||
"env_logger 0.11.1",
|
||||
"handlebars",
|
||||
"log",
|
||||
"memchr",
|
||||
"once_cell",
|
||||
"opener",
|
||||
"pathdiff",
|
||||
"pulldown-cmark",
|
||||
"pulldown-cmark 0.10.0",
|
||||
"regex",
|
||||
"serde",
|
||||
"serde_json",
|
||||
|
@ -2471,7 +2518,7 @@ dependencies = [
|
|||
"aes",
|
||||
"colored",
|
||||
"ctrlc",
|
||||
"env_logger",
|
||||
"env_logger 0.10.0",
|
||||
"getrandom",
|
||||
"jemalloc-sys",
|
||||
"lazy_static",
|
||||
|
@ -2689,7 +2736,7 @@ dependencies = [
|
|||
"camino",
|
||||
"clap",
|
||||
"derive_builder",
|
||||
"env_logger",
|
||||
"env_logger 0.10.0",
|
||||
"fs_extra",
|
||||
"glob",
|
||||
"humansize",
|
||||
|
@ -3012,6 +3059,24 @@ dependencies = [
|
|||
"unicase",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "pulldown-cmark"
|
||||
version = "0.10.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "dce76ce678ffc8e5675b22aa1405de0b7037e2fdf8913fea40d1926c6fe1e6e7"
|
||||
dependencies = [
|
||||
"bitflags 2.4.1",
|
||||
"memchr",
|
||||
"pulldown-cmark-escape",
|
||||
"unicase",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "pulldown-cmark-escape"
|
||||
version = "0.10.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "d5d8f9aa0e3cbcfaf8bf00300004ee3b72f74770f9cbac93f6928771f613276b"
|
||||
|
||||
[[package]]
|
||||
name = "punycode"
|
||||
version = "0.4.1"
|
||||
|
@ -3271,7 +3336,7 @@ name = "rustbook"
|
|||
version = "0.1.0"
|
||||
dependencies = [
|
||||
"clap",
|
||||
"env_logger",
|
||||
"env_logger 0.10.0",
|
||||
"mdbook",
|
||||
]
|
||||
|
||||
|
@ -4427,7 +4492,7 @@ name = "rustc_resolve"
|
|||
version = "0.0.0"
|
||||
dependencies = [
|
||||
"bitflags 2.4.1",
|
||||
"pulldown-cmark",
|
||||
"pulldown-cmark 0.9.6",
|
||||
"rustc_arena",
|
||||
"rustc_ast",
|
||||
"rustc_ast_pretty",
|
||||
|
@ -4971,9 +5036,9 @@ checksum = "45bb67a18fa91266cc7807181f62f9178a6873bfad7dc788c42e6430db40184f"
|
|||
|
||||
[[package]]
|
||||
name = "shlex"
|
||||
version = "1.1.0"
|
||||
version = "1.3.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "43b2853a4d09f215c24cc5489c992ce46052d359b5109343cbafbf26bc62f8a3"
|
||||
checksum = "0fda2ff0d084019ba4d7c6f371c95d8fd75ce3524c3cb8fb653a3023f6323e64"
|
||||
|
||||
[[package]]
|
||||
name = "siphasher"
|
||||
|
|
|
@ -10,3 +10,4 @@ path = "main.rs"
|
|||
[dependencies]
|
||||
regex = "1"
|
||||
once_cell = "1"
|
||||
html5ever = "0.26.0"
|
||||
|
|
|
@ -14,6 +14,12 @@
|
|||
//! A few exceptions are allowed as there's known bugs in rustdoc, but this
|
||||
//! should catch the majority of "broken link" cases.
|
||||
|
||||
use html5ever::tendril::ByteTendril;
|
||||
use html5ever::tokenizer::{
|
||||
BufferQueue, TagToken, Token, TokenSink, TokenSinkResult, Tokenizer, TokenizerOpts,
|
||||
};
|
||||
use once_cell::sync::Lazy;
|
||||
use regex::Regex;
|
||||
use std::cell::RefCell;
|
||||
use std::collections::{HashMap, HashSet};
|
||||
use std::env;
|
||||
|
@ -23,9 +29,6 @@ use std::path::{Component, Path, PathBuf};
|
|||
use std::rc::Rc;
|
||||
use std::time::Instant;
|
||||
|
||||
use once_cell::sync::Lazy;
|
||||
use regex::Regex;
|
||||
|
||||
// Add linkcheck exceptions here
|
||||
// If at all possible you should use intra-doc links to avoid linkcheck issues. These
|
||||
// are cases where that does not work
|
||||
|
@ -182,163 +185,10 @@ impl Checker {
|
|||
}
|
||||
};
|
||||
|
||||
// Search for anything that's the regex 'href[ ]*=[ ]*".*?"'
|
||||
with_attrs_in_source(&source, " href", |url, i, base| {
|
||||
// Ignore external URLs
|
||||
if url.starts_with("http:")
|
||||
|| url.starts_with("https:")
|
||||
|| url.starts_with("javascript:")
|
||||
|| url.starts_with("ftp:")
|
||||
|| url.starts_with("irc:")
|
||||
|| url.starts_with("data:")
|
||||
|| url.starts_with("mailto:")
|
||||
{
|
||||
report.links_ignored_external += 1;
|
||||
return;
|
||||
}
|
||||
report.links_checked += 1;
|
||||
let (url, fragment) = match url.split_once('#') {
|
||||
None => (url, None),
|
||||
Some((url, fragment)) => (url, Some(fragment)),
|
||||
};
|
||||
// NB: the `splitn` always succeeds, even if the delimiter is not present.
|
||||
let url = url.splitn(2, '?').next().unwrap();
|
||||
|
||||
// Once we've plucked out the URL, parse it using our base url and
|
||||
// then try to extract a file path.
|
||||
let mut path = file.to_path_buf();
|
||||
if !base.is_empty() || !url.is_empty() {
|
||||
path.pop();
|
||||
for part in Path::new(base).join(url).components() {
|
||||
match part {
|
||||
Component::Prefix(_) | Component::RootDir => {
|
||||
// Avoid absolute paths as they make the docs not
|
||||
// relocatable by making assumptions on where the docs
|
||||
// are hosted relative to the site root.
|
||||
report.errors += 1;
|
||||
println!(
|
||||
"{}:{}: absolute path - {}",
|
||||
pretty_path,
|
||||
i + 1,
|
||||
Path::new(base).join(url).display()
|
||||
);
|
||||
return;
|
||||
}
|
||||
Component::CurDir => {}
|
||||
Component::ParentDir => {
|
||||
path.pop();
|
||||
}
|
||||
Component::Normal(s) => {
|
||||
path.push(s);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
let (target_pretty_path, target_entry) = self.load_file(&path, report);
|
||||
let (target_source, target_ids) = match target_entry {
|
||||
FileEntry::Missing => {
|
||||
if is_exception(file, &target_pretty_path) {
|
||||
report.links_ignored_exception += 1;
|
||||
} else {
|
||||
report.errors += 1;
|
||||
println!(
|
||||
"{}:{}: broken link - `{}`",
|
||||
pretty_path,
|
||||
i + 1,
|
||||
target_pretty_path
|
||||
);
|
||||
}
|
||||
return;
|
||||
}
|
||||
FileEntry::Dir => {
|
||||
// Links to directories show as directory listings when viewing
|
||||
// the docs offline so it's best to avoid them.
|
||||
report.errors += 1;
|
||||
println!(
|
||||
"{}:{}: directory link to `{}` \
|
||||
(directory links should use index.html instead)",
|
||||
pretty_path,
|
||||
i + 1,
|
||||
target_pretty_path
|
||||
);
|
||||
return;
|
||||
}
|
||||
FileEntry::OtherFile => return,
|
||||
FileEntry::Redirect { target } => {
|
||||
let t = target.clone();
|
||||
let (target, redir_entry) = self.load_file(&t, report);
|
||||
match redir_entry {
|
||||
FileEntry::Missing => {
|
||||
report.errors += 1;
|
||||
println!(
|
||||
"{}:{}: broken redirect from `{}` to `{}`",
|
||||
pretty_path,
|
||||
i + 1,
|
||||
target_pretty_path,
|
||||
target
|
||||
);
|
||||
return;
|
||||
}
|
||||
FileEntry::Redirect { target } => {
|
||||
// Redirect to a redirect, this link checker
|
||||
// currently doesn't support this, since it would
|
||||
// require cycle checking, etc.
|
||||
report.errors += 1;
|
||||
println!(
|
||||
"{}:{}: redirect from `{}` to `{}` \
|
||||
which is also a redirect (not supported)",
|
||||
pretty_path,
|
||||
i + 1,
|
||||
target_pretty_path,
|
||||
target.display()
|
||||
);
|
||||
return;
|
||||
}
|
||||
FileEntry::Dir => {
|
||||
report.errors += 1;
|
||||
println!(
|
||||
"{}:{}: redirect from `{}` to `{}` \
|
||||
which is a directory \
|
||||
(directory links should use index.html instead)",
|
||||
pretty_path,
|
||||
i + 1,
|
||||
target_pretty_path,
|
||||
target
|
||||
);
|
||||
return;
|
||||
}
|
||||
FileEntry::OtherFile => return,
|
||||
FileEntry::HtmlFile { source, ids } => (source, ids),
|
||||
}
|
||||
}
|
||||
FileEntry::HtmlFile { source, ids } => (source, ids),
|
||||
};
|
||||
|
||||
// Alright, if we've found an HTML file for the target link. If
|
||||
// this is a fragment link, also check that the `id` exists.
|
||||
if let Some(ref fragment) = fragment {
|
||||
// Fragments like `#1-6` are most likely line numbers to be
|
||||
// interpreted by javascript, so we're ignoring these
|
||||
if fragment.splitn(2, '-').all(|f| f.chars().all(|c| c.is_numeric())) {
|
||||
return;
|
||||
}
|
||||
|
||||
parse_ids(&mut target_ids.borrow_mut(), &pretty_path, target_source, report);
|
||||
|
||||
if target_ids.borrow().contains(*fragment) {
|
||||
return;
|
||||
}
|
||||
|
||||
if is_exception(file, &format!("#{}", fragment)) {
|
||||
report.links_ignored_exception += 1;
|
||||
} else {
|
||||
report.errors += 1;
|
||||
print!("{}:{}: broken link fragment ", pretty_path, i + 1);
|
||||
println!("`#{}` pointing to `{}`", fragment, target_pretty_path);
|
||||
};
|
||||
}
|
||||
});
|
||||
let (base, urls) = get_urls(&source);
|
||||
for (i, url) in urls {
|
||||
self.check_url(file, &pretty_path, report, &base, i, &url);
|
||||
}
|
||||
|
||||
self.check_intra_doc_links(file, &pretty_path, &source, report);
|
||||
|
||||
|
@ -350,6 +200,159 @@ impl Checker {
|
|||
}
|
||||
}
|
||||
|
||||
fn check_url(
|
||||
&mut self,
|
||||
file: &Path,
|
||||
pretty_path: &str,
|
||||
report: &mut Report,
|
||||
base: &Option<String>,
|
||||
i: u64,
|
||||
url: &str,
|
||||
) {
|
||||
// Ignore external URLs
|
||||
if url.starts_with("http:")
|
||||
|| url.starts_with("https:")
|
||||
|| url.starts_with("javascript:")
|
||||
|| url.starts_with("ftp:")
|
||||
|| url.starts_with("irc:")
|
||||
|| url.starts_with("data:")
|
||||
|| url.starts_with("mailto:")
|
||||
{
|
||||
report.links_ignored_external += 1;
|
||||
return;
|
||||
}
|
||||
report.links_checked += 1;
|
||||
let (url, fragment) = match url.split_once('#') {
|
||||
None => (url, None),
|
||||
Some((url, fragment)) => (url, Some(fragment)),
|
||||
};
|
||||
// NB: the `splitn` always succeeds, even if the delimiter is not present.
|
||||
let url = url.splitn(2, '?').next().unwrap();
|
||||
|
||||
// Once we've plucked out the URL, parse it using our base url and
|
||||
// then try to extract a file path.
|
||||
let mut path = file.to_path_buf();
|
||||
if base.is_some() || !url.is_empty() {
|
||||
let base = base.as_deref().unwrap_or("");
|
||||
path.pop();
|
||||
for part in Path::new(base).join(url).components() {
|
||||
match part {
|
||||
Component::Prefix(_) | Component::RootDir => {
|
||||
// Avoid absolute paths as they make the docs not
|
||||
// relocatable by making assumptions on where the docs
|
||||
// are hosted relative to the site root.
|
||||
report.errors += 1;
|
||||
println!(
|
||||
"{}:{}: absolute path - {}",
|
||||
pretty_path,
|
||||
i,
|
||||
Path::new(base).join(url).display()
|
||||
);
|
||||
return;
|
||||
}
|
||||
Component::CurDir => {}
|
||||
Component::ParentDir => {
|
||||
path.pop();
|
||||
}
|
||||
Component::Normal(s) => {
|
||||
path.push(s);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
let (target_pretty_path, target_entry) = self.load_file(&path, report);
|
||||
let (target_source, target_ids) = match target_entry {
|
||||
FileEntry::Missing => {
|
||||
if is_exception(file, &target_pretty_path) {
|
||||
report.links_ignored_exception += 1;
|
||||
} else {
|
||||
report.errors += 1;
|
||||
println!("{}:{}: broken link - `{}`", pretty_path, i, target_pretty_path);
|
||||
}
|
||||
return;
|
||||
}
|
||||
FileEntry::Dir => {
|
||||
// Links to directories show as directory listings when viewing
|
||||
// the docs offline so it's best to avoid them.
|
||||
report.errors += 1;
|
||||
println!(
|
||||
"{}:{}: directory link to `{}` \
|
||||
(directory links should use index.html instead)",
|
||||
pretty_path, i, target_pretty_path
|
||||
);
|
||||
return;
|
||||
}
|
||||
FileEntry::OtherFile => return,
|
||||
FileEntry::Redirect { target } => {
|
||||
let t = target.clone();
|
||||
let (target, redir_entry) = self.load_file(&t, report);
|
||||
match redir_entry {
|
||||
FileEntry::Missing => {
|
||||
report.errors += 1;
|
||||
println!(
|
||||
"{}:{}: broken redirect from `{}` to `{}`",
|
||||
pretty_path, i, target_pretty_path, target
|
||||
);
|
||||
return;
|
||||
}
|
||||
FileEntry::Redirect { target } => {
|
||||
// Redirect to a redirect, this link checker
|
||||
// currently doesn't support this, since it would
|
||||
// require cycle checking, etc.
|
||||
report.errors += 1;
|
||||
println!(
|
||||
"{}:{}: redirect from `{}` to `{}` \
|
||||
which is also a redirect (not supported)",
|
||||
pretty_path,
|
||||
i,
|
||||
target_pretty_path,
|
||||
target.display()
|
||||
);
|
||||
return;
|
||||
}
|
||||
FileEntry::Dir => {
|
||||
report.errors += 1;
|
||||
println!(
|
||||
"{}:{}: redirect from `{}` to `{}` \
|
||||
which is a directory \
|
||||
(directory links should use index.html instead)",
|
||||
pretty_path, i, target_pretty_path, target
|
||||
);
|
||||
return;
|
||||
}
|
||||
FileEntry::OtherFile => return,
|
||||
FileEntry::HtmlFile { source, ids } => (source, ids),
|
||||
}
|
||||
}
|
||||
FileEntry::HtmlFile { source, ids } => (source, ids),
|
||||
};
|
||||
|
||||
// Alright, if we've found an HTML file for the target link. If
|
||||
// this is a fragment link, also check that the `id` exists.
|
||||
if let Some(ref fragment) = fragment {
|
||||
// Fragments like `#1-6` are most likely line numbers to be
|
||||
// interpreted by javascript, so we're ignoring these
|
||||
if fragment.splitn(2, '-').all(|f| f.chars().all(|c| c.is_numeric())) {
|
||||
return;
|
||||
}
|
||||
|
||||
parse_ids(&mut target_ids.borrow_mut(), &pretty_path, target_source, report);
|
||||
|
||||
if target_ids.borrow().contains(*fragment) {
|
||||
return;
|
||||
}
|
||||
|
||||
if is_exception(file, &format!("#{}", fragment)) {
|
||||
report.links_ignored_exception += 1;
|
||||
} else {
|
||||
report.errors += 1;
|
||||
print!("{}:{}: broken link fragment ", pretty_path, i);
|
||||
println!("`#{}` pointing to `{}`", fragment, target_pretty_path);
|
||||
};
|
||||
}
|
||||
}
|
||||
|
||||
fn check_intra_doc_links(
|
||||
&mut self,
|
||||
file: &Path,
|
||||
|
@ -496,59 +499,93 @@ fn maybe_redirect(source: &str) -> Option<String> {
|
|||
find_redirect(REDIRECT_RUSTDOC).or_else(|| find_redirect(REDIRECT_MDBOOK))
|
||||
}
|
||||
|
||||
fn with_attrs_in_source<F: FnMut(&str, usize, &str)>(source: &str, attr: &str, mut f: F) {
|
||||
let mut base = "";
|
||||
for (i, mut line) in source.lines().enumerate() {
|
||||
while let Some(j) = line.find(attr) {
|
||||
let rest = &line[j + attr.len()..];
|
||||
// The base tag should always be the first link in the document so
|
||||
// we can get away with using one pass.
|
||||
let is_base = line[..j].ends_with("<base");
|
||||
line = rest;
|
||||
let pos_equals = match rest.find('=') {
|
||||
Some(i) => i,
|
||||
None => continue,
|
||||
};
|
||||
if rest[..pos_equals].trim_start_matches(' ') != "" {
|
||||
continue;
|
||||
}
|
||||
fn parse_html<Sink: TokenSink>(source: &str, sink: Sink) -> Sink {
|
||||
let tendril: ByteTendril = source.as_bytes().into();
|
||||
let mut input = BufferQueue::new();
|
||||
input.push_back(tendril.try_reinterpret().unwrap());
|
||||
|
||||
let rest = &rest[pos_equals + 1..];
|
||||
let mut tok = Tokenizer::new(sink, TokenizerOpts::default());
|
||||
let _ = tok.feed(&mut input);
|
||||
assert!(input.is_empty());
|
||||
tok.end();
|
||||
tok.sink
|
||||
}
|
||||
|
||||
let pos_quote = match rest.find(&['"', '\''][..]) {
|
||||
Some(i) => i,
|
||||
None => continue,
|
||||
};
|
||||
let quote_delim = rest.as_bytes()[pos_quote] as char;
|
||||
#[derive(Default)]
|
||||
struct AttrCollector {
|
||||
attr_name: &'static [u8],
|
||||
base: Option<String>,
|
||||
found_attrs: Vec<(u64, String)>,
|
||||
/// Tracks whether or not it is inside a <script> tag.
|
||||
///
|
||||
/// A lot of our sources have JSON script tags which have HTML embedded
|
||||
/// within, but that cannot be parsed or processed correctly (since it is
|
||||
/// JSON, not HTML). I think the sink is supposed to return
|
||||
/// `TokenSinkResult::Script(…)` (and then maybe switch parser?), but I
|
||||
/// don't fully understand the best way to use that, and this seems good
|
||||
/// enough for now.
|
||||
in_script: bool,
|
||||
}
|
||||
|
||||
if rest[..pos_quote].trim_start_matches(' ') != "" {
|
||||
continue;
|
||||
impl TokenSink for AttrCollector {
|
||||
type Handle = ();
|
||||
|
||||
fn process_token(&mut self, token: Token, line_number: u64) -> TokenSinkResult<()> {
|
||||
match token {
|
||||
TagToken(tag) => {
|
||||
let tag_name = tag.name.as_bytes();
|
||||
if tag_name == b"base" {
|
||||
if let Some(href) =
|
||||
tag.attrs.iter().find(|attr| attr.name.local.as_bytes() == b"href")
|
||||
{
|
||||
self.base = Some(href.value.to_string());
|
||||
}
|
||||
return TokenSinkResult::Continue;
|
||||
} else if tag_name == b"script" {
|
||||
self.in_script = !self.in_script;
|
||||
}
|
||||
if self.in_script {
|
||||
return TokenSinkResult::Continue;
|
||||
}
|
||||
for attr in tag.attrs.iter() {
|
||||
let name = attr.name.local.as_bytes();
|
||||
if name == self.attr_name {
|
||||
let url = attr.value.to_string();
|
||||
self.found_attrs.push((line_number, url));
|
||||
}
|
||||
}
|
||||
}
|
||||
let rest = &rest[pos_quote + 1..];
|
||||
let url = match rest.find(quote_delim) {
|
||||
Some(i) => &rest[..i],
|
||||
None => continue,
|
||||
};
|
||||
if is_base {
|
||||
base = url;
|
||||
continue;
|
||||
}
|
||||
f(url, i, base)
|
||||
// Note: ParseError is pretty noisy. It seems html5ever does not
|
||||
// particularly like some kinds of HTML comments.
|
||||
_ => {}
|
||||
}
|
||||
TokenSinkResult::Continue
|
||||
}
|
||||
}
|
||||
|
||||
/// Retrieves href="..." attributes from HTML elements.
|
||||
fn get_urls(source: &str) -> (Option<String>, Vec<(u64, String)>) {
|
||||
let collector = AttrCollector { attr_name: b"href", ..AttrCollector::default() };
|
||||
let sink = parse_html(source, collector);
|
||||
(sink.base, sink.found_attrs)
|
||||
}
|
||||
|
||||
/// Retrieves id="..." attributes from HTML elements.
|
||||
fn parse_ids(ids: &mut HashSet<String>, file: &str, source: &str, report: &mut Report) {
|
||||
if ids.is_empty() {
|
||||
with_attrs_in_source(source, " id", |fragment, i, _| {
|
||||
let frag = fragment.trim_start_matches('#').to_owned();
|
||||
let encoded = small_url_encode(&frag);
|
||||
if !ids.insert(frag) {
|
||||
report.errors += 1;
|
||||
println!("{}:{}: id is not unique: `{}`", file, i, fragment);
|
||||
}
|
||||
// Just in case, we also add the encoded id.
|
||||
ids.insert(encoded);
|
||||
});
|
||||
if !ids.is_empty() {
|
||||
// ids have already been parsed
|
||||
return;
|
||||
}
|
||||
|
||||
let collector = AttrCollector { attr_name: b"id", ..AttrCollector::default() };
|
||||
let sink = parse_html(source, collector);
|
||||
for (line_number, id) in sink.found_attrs {
|
||||
let encoded = small_url_encode(&id);
|
||||
if let Some(id) = ids.replace(id) {
|
||||
report.errors += 1;
|
||||
println!("{}:{}: id is not unique: `{}`", file, line_number, id);
|
||||
}
|
||||
// Just in case, we also add the encoded id.
|
||||
ids.insert(encoded);
|
||||
}
|
||||
}
|
||||
|
|
|
@ -0,0 +1,5 @@
|
|||
<html>
|
||||
<body>
|
||||
[<code>std::ffi::CString</code>]
|
||||
</body>
|
||||
</html>
|
|
@ -111,3 +111,11 @@ fn redirect_loop() {
|
|||
which is also a redirect (not supported)",
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn broken_intra_doc_link() {
|
||||
broken_test(
|
||||
"broken_intra_doc_link",
|
||||
"foo.html:3: broken intra-doc link - [<code>std::ffi::CString</code>]",
|
||||
);
|
||||
}
|
||||
|
|
|
@ -9,6 +9,6 @@ clap = "4.0.32"
|
|||
env_logger = "0.10"
|
||||
|
||||
[dependencies.mdbook]
|
||||
version = "0.4.28"
|
||||
version = "0.4.37"
|
||||
default-features = false
|
||||
features = ["search"]
|
||||
|
|
Loading…
Add table
Reference in a new issue