Add tool src/tools/coverage-dump
for use by some new coverage tests
This commit is contained in:
parent
04374cd742
commit
1367104cb2
11 changed files with 562 additions and 1 deletions
18
Cargo.lock
18
Cargo.lock
|
@ -722,6 +722,18 @@ version = "0.8.4"
|
|||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "e496a50fda8aacccc86d7529e2c1e0892dbd0f898a6b5645b5561b89c3210efa"
|
||||
|
||||
[[package]]
|
||||
name = "coverage-dump"
|
||||
version = "0.1.0"
|
||||
dependencies = [
|
||||
"anyhow",
|
||||
"leb128",
|
||||
"md-5",
|
||||
"miniz_oxide",
|
||||
"regex",
|
||||
"rustc-demangle",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "coverage_test_macros"
|
||||
version = "0.0.0"
|
||||
|
@ -2041,6 +2053,12 @@ version = "1.3.0"
|
|||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "830d08ce1d1d941e6b30645f1a0eb5643013d835ce3779a5fc208261dbe10f55"
|
||||
|
||||
[[package]]
|
||||
name = "leb128"
|
||||
version = "0.2.5"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "884e2677b40cc8c339eaefcb701c32ef1fd2493d71118dc0ca4b6a736c93bd67"
|
||||
|
||||
[[package]]
|
||||
name = "levenshtein"
|
||||
version = "1.0.5"
|
||||
|
|
|
@ -43,6 +43,7 @@ members = [
|
|||
"src/tools/generate-windows-sys",
|
||||
"src/tools/rustdoc-gui-test",
|
||||
"src/tools/opt-dist",
|
||||
"src/tools/coverage-dump",
|
||||
]
|
||||
|
||||
exclude = [
|
||||
|
|
|
@ -703,7 +703,8 @@ impl<'a> Builder<'a> {
|
|||
llvm::Lld,
|
||||
llvm::CrtBeginEnd,
|
||||
tool::RustdocGUITest,
|
||||
tool::OptimizedDist
|
||||
tool::OptimizedDist,
|
||||
tool::CoverageDump,
|
||||
),
|
||||
Kind::Check | Kind::Clippy | Kind::Fix => describe!(
|
||||
check::Std,
|
||||
|
|
|
@ -306,6 +306,7 @@ bootstrap_tool!(
|
|||
GenerateWindowsSys, "src/tools/generate-windows-sys", "generate-windows-sys";
|
||||
RustdocGUITest, "src/tools/rustdoc-gui-test", "rustdoc-gui-test", is_unstable_tool = true, allow_features = "test";
|
||||
OptimizedDist, "src/tools/opt-dist", "opt-dist";
|
||||
CoverageDump, "src/tools/coverage-dump", "coverage-dump";
|
||||
);
|
||||
|
||||
#[derive(Debug, Copy, Clone, Hash, PartialEq, Eq, Ord, PartialOrd)]
|
||||
|
|
14
src/tools/coverage-dump/Cargo.toml
Normal file
14
src/tools/coverage-dump/Cargo.toml
Normal file
|
@ -0,0 +1,14 @@
|
|||
[package]
|
||||
name = "coverage-dump"
|
||||
version = "0.1.0"
|
||||
edition = "2021"
|
||||
|
||||
# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
|
||||
|
||||
[dependencies]
|
||||
anyhow = "1.0.71"
|
||||
leb128 = "0.2.5"
|
||||
md5 = { package = "md-5" , version = "0.10.5" }
|
||||
miniz_oxide = "0.7.1"
|
||||
regex = "1.8.4"
|
||||
rustc-demangle = "0.1.23"
|
8
src/tools/coverage-dump/README.md
Normal file
8
src/tools/coverage-dump/README.md
Normal file
|
@ -0,0 +1,8 @@
|
|||
This tool extracts coverage mapping information from an LLVM IR assembly file
|
||||
(`.ll`), and prints it in a more human-readable form that can be used for
|
||||
snapshot tests.
|
||||
|
||||
The output format is mostly arbitrary, so it's OK to change the output as long
|
||||
as any affected tests are also re-blessed. However, the output should be
|
||||
consistent across different executions on different platforms, so avoid
|
||||
printing any information that is platform-specific or non-deterministic.
|
296
src/tools/coverage-dump/src/covfun.rs
Normal file
296
src/tools/coverage-dump/src/covfun.rs
Normal file
|
@ -0,0 +1,296 @@
|
|||
use crate::parser::{unescape_llvm_string_contents, Parser};
|
||||
use anyhow::{anyhow, Context};
|
||||
use regex::Regex;
|
||||
use std::collections::HashMap;
|
||||
use std::fmt::{self, Debug, Write as _};
|
||||
use std::sync::OnceLock;
|
||||
|
||||
pub(crate) fn dump_covfun_mappings(
|
||||
llvm_ir: &str,
|
||||
function_names: &HashMap<u64, String>,
|
||||
) -> anyhow::Result<()> {
|
||||
// Extract function coverage entries from the LLVM IR assembly, and associate
|
||||
// each entry with its (demangled) name.
|
||||
let mut covfun_entries = llvm_ir
|
||||
.lines()
|
||||
.filter_map(covfun_line_data)
|
||||
.map(|line_data| (function_names.get(&line_data.name_hash).map(String::as_str), line_data))
|
||||
.collect::<Vec<_>>();
|
||||
covfun_entries.sort_by(|a, b| {
|
||||
// Sort entries primarily by name, to help make the order consistent
|
||||
// across platforms and relatively insensitive to changes.
|
||||
// (Sadly we can't use `sort_by_key` because we would need to return references.)
|
||||
Ord::cmp(&a.0, &b.0)
|
||||
.then_with(|| Ord::cmp(&a.1.is_used, &b.1.is_used))
|
||||
.then_with(|| Ord::cmp(a.1.payload.as_slice(), b.1.payload.as_slice()))
|
||||
});
|
||||
|
||||
for (name, line_data) in &covfun_entries {
|
||||
let name = name.unwrap_or("(unknown)");
|
||||
let unused = if line_data.is_used { "" } else { " (unused)" };
|
||||
println!("Function name: {name}{unused}");
|
||||
|
||||
let payload: &[u8] = &line_data.payload;
|
||||
println!("Raw bytes ({len}): 0x{payload:02x?}", len = payload.len());
|
||||
|
||||
let mut parser = Parser::new(payload);
|
||||
|
||||
let num_files = parser.read_uleb128_u32()?;
|
||||
println!("Number of files: {num_files}");
|
||||
|
||||
for i in 0..num_files {
|
||||
let global_file_id = parser.read_uleb128_u32()?;
|
||||
println!("- file {i} => global file {global_file_id}");
|
||||
}
|
||||
|
||||
let num_expressions = parser.read_uleb128_u32()?;
|
||||
println!("Number of expressions: {num_expressions}");
|
||||
|
||||
let mut expression_resolver = ExpressionResolver::new();
|
||||
for i in 0..num_expressions {
|
||||
let lhs = parser.read_simple_term()?;
|
||||
let rhs = parser.read_simple_term()?;
|
||||
println!("- expression {i} operands: lhs = {lhs:?}, rhs = {rhs:?}");
|
||||
expression_resolver.push_operands(lhs, rhs);
|
||||
}
|
||||
|
||||
for i in 0..num_files {
|
||||
let num_mappings = parser.read_uleb128_u32()?;
|
||||
println!("Number of file {i} mappings: {num_mappings}");
|
||||
|
||||
for _ in 0..num_mappings {
|
||||
let (kind, region) = parser.read_mapping_kind_and_region()?;
|
||||
println!("- {kind:?} at {region:?}");
|
||||
|
||||
match kind {
|
||||
// Also print expression mappings in resolved form.
|
||||
MappingKind::Code(term @ CovTerm::Expression { .. })
|
||||
| MappingKind::Gap(term @ CovTerm::Expression { .. }) => {
|
||||
println!(" = {}", expression_resolver.format_term(term));
|
||||
}
|
||||
// If the mapping is a branch region, print both of its arms
|
||||
// in resolved form (even if they aren't expressions).
|
||||
MappingKind::Branch { r#true, r#false } => {
|
||||
println!(" true = {}", expression_resolver.format_term(r#true));
|
||||
println!(" false = {}", expression_resolver.format_term(r#false));
|
||||
}
|
||||
_ => (),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
parser.ensure_empty()?;
|
||||
println!();
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
|
||||
struct CovfunLineData {
|
||||
name_hash: u64,
|
||||
is_used: bool,
|
||||
payload: Vec<u8>,
|
||||
}
|
||||
|
||||
/// Checks a line of LLVM IR assembly to see if it contains an `__llvm_covfun`
|
||||
/// entry, and if so extracts relevant data in a `CovfunLineData`.
|
||||
fn covfun_line_data(line: &str) -> Option<CovfunLineData> {
|
||||
let re = {
|
||||
// We cheat a little bit and match variable names `@__covrec_[HASH]u`
|
||||
// rather than the section name, because the section name is harder to
|
||||
// extract and differs across Linux/Windows/macOS. We also extract the
|
||||
// symbol name hash from the variable name rather than the data, since
|
||||
// it's easier and both should match.
|
||||
static RE: OnceLock<Regex> = OnceLock::new();
|
||||
RE.get_or_init(|| {
|
||||
Regex::new(
|
||||
r#"^@__covrec_(?<name_hash>[0-9A-Z]+)(?<is_used>u)? = .*\[[0-9]+ x i8\] c"(?<payload>[^"]*)".*$"#,
|
||||
)
|
||||
.unwrap()
|
||||
})
|
||||
};
|
||||
|
||||
let captures = re.captures(line)?;
|
||||
let name_hash = u64::from_str_radix(&captures["name_hash"], 16).unwrap();
|
||||
let is_used = captures.name("is_used").is_some();
|
||||
let payload = unescape_llvm_string_contents(&captures["payload"]);
|
||||
|
||||
Some(CovfunLineData { name_hash, is_used, payload })
|
||||
}
|
||||
|
||||
// Extra parser methods only needed when parsing `covfun` payloads.
|
||||
impl<'a> Parser<'a> {
|
||||
fn read_simple_term(&mut self) -> anyhow::Result<CovTerm> {
|
||||
let raw_term = self.read_uleb128_u32()?;
|
||||
CovTerm::decode(raw_term).context("decoding term")
|
||||
}
|
||||
|
||||
fn read_mapping_kind_and_region(&mut self) -> anyhow::Result<(MappingKind, MappingRegion)> {
|
||||
let mut kind = self.read_raw_mapping_kind()?;
|
||||
let mut region = self.read_raw_mapping_region()?;
|
||||
|
||||
const HIGH_BIT: u32 = 1u32 << 31;
|
||||
if region.end_column & HIGH_BIT != 0 {
|
||||
region.end_column &= !HIGH_BIT;
|
||||
kind = match kind {
|
||||
MappingKind::Code(term) => MappingKind::Gap(term),
|
||||
// LLVM's coverage mapping reader will actually handle this
|
||||
// case without complaint, but the result is almost certainly
|
||||
// a meaningless implementation artifact.
|
||||
_ => return Err(anyhow!("unexpected base kind for gap region: {kind:?}")),
|
||||
}
|
||||
}
|
||||
|
||||
Ok((kind, region))
|
||||
}
|
||||
|
||||
fn read_raw_mapping_kind(&mut self) -> anyhow::Result<MappingKind> {
|
||||
let raw_mapping_kind = self.read_uleb128_u32()?;
|
||||
if let Some(term) = CovTerm::decode(raw_mapping_kind) {
|
||||
return Ok(MappingKind::Code(term));
|
||||
}
|
||||
|
||||
assert_eq!(raw_mapping_kind & 0b11, 0);
|
||||
assert_ne!(raw_mapping_kind, 0);
|
||||
|
||||
let (high, is_expansion) = (raw_mapping_kind >> 3, raw_mapping_kind & 0b100 != 0);
|
||||
if is_expansion {
|
||||
Ok(MappingKind::Expansion(high))
|
||||
} else {
|
||||
match high {
|
||||
0 => unreachable!("zero kind should have already been handled as a code mapping"),
|
||||
2 => Ok(MappingKind::Skip),
|
||||
4 => {
|
||||
let r#true = self.read_simple_term()?;
|
||||
let r#false = self.read_simple_term()?;
|
||||
Ok(MappingKind::Branch { r#true, r#false })
|
||||
}
|
||||
_ => Err(anyhow!("unknown mapping kind: {raw_mapping_kind:#x}")),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
fn read_raw_mapping_region(&mut self) -> anyhow::Result<MappingRegion> {
|
||||
let start_line_offset = self.read_uleb128_u32()?;
|
||||
let start_column = self.read_uleb128_u32()?;
|
||||
let end_line_offset = self.read_uleb128_u32()?;
|
||||
let end_column = self.read_uleb128_u32()?;
|
||||
Ok(MappingRegion { start_line_offset, start_column, end_line_offset, end_column })
|
||||
}
|
||||
}
|
||||
|
||||
/// Enum that can hold a constant zero value, the ID of an physical coverage
|
||||
/// counter, or the ID (and operation) of a coverage-counter expression.
|
||||
///
|
||||
/// Terms are used as the operands of coverage-counter expressions, as the arms
|
||||
/// of branch mappings, and as the value of code/gap mappings.
|
||||
#[derive(Clone, Copy, Debug)]
|
||||
pub(crate) enum CovTerm {
|
||||
Zero,
|
||||
Counter(u32),
|
||||
Expression(u32, Op),
|
||||
}
|
||||
|
||||
/// Operator (addition or subtraction) used by an expression.
|
||||
#[derive(Clone, Copy, Debug)]
|
||||
pub(crate) enum Op {
|
||||
Sub,
|
||||
Add,
|
||||
}
|
||||
|
||||
impl CovTerm {
|
||||
pub(crate) fn decode(input: u32) -> Option<Self> {
|
||||
let (high, tag) = (input >> 2, input & 0b11);
|
||||
match tag {
|
||||
0b00 if high == 0 => Some(Self::Zero),
|
||||
0b01 => Some(Self::Counter(high)),
|
||||
0b10 => Some(Self::Expression(high, Op::Sub)),
|
||||
0b11 => Some(Self::Expression(high, Op::Add)),
|
||||
// When reading expression operands or branch arms, the LLVM coverage
|
||||
// mapping reader will always interpret a `0b00` tag as a zero
|
||||
// term, even when the high bits are non-zero.
|
||||
// We treat that case as failure instead, so that this code can be
|
||||
// shared by the full mapping-kind reader as well.
|
||||
_ => None,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug)]
|
||||
enum MappingKind {
|
||||
Code(CovTerm),
|
||||
Gap(CovTerm),
|
||||
Expansion(u32),
|
||||
Skip,
|
||||
// Using raw identifiers here makes the dump output a little bit nicer
|
||||
// (via the derived Debug), at the expense of making this tool's source
|
||||
// code a little bit uglier.
|
||||
Branch { r#true: CovTerm, r#false: CovTerm },
|
||||
}
|
||||
|
||||
struct MappingRegion {
|
||||
/// Offset of this region's start line, relative to the *start line* of
|
||||
/// the *previous mapping* (or 0). Line numbers are 1-based.
|
||||
start_line_offset: u32,
|
||||
/// This region's start column, absolute and 1-based.
|
||||
start_column: u32,
|
||||
/// Offset of this region's end line, relative to the *this mapping's*
|
||||
/// start line. Line numbers are 1-based.
|
||||
end_line_offset: u32,
|
||||
/// This region's end column, absolute, 1-based, and exclusive.
|
||||
///
|
||||
/// If the highest bit is set, that bit is cleared and the associated
|
||||
/// mapping becomes a gap region mapping.
|
||||
end_column: u32,
|
||||
}
|
||||
|
||||
impl Debug for MappingRegion {
|
||||
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
|
||||
write!(
|
||||
f,
|
||||
"(prev + {}, {}) to (start + {}, {})",
|
||||
self.start_line_offset, self.start_column, self.end_line_offset, self.end_column
|
||||
)
|
||||
}
|
||||
}
|
||||
|
||||
/// Helper type that prints expressions in a "resolved" form, so that
|
||||
/// developers reading the dump don't need to resolve expressions by hand.
|
||||
struct ExpressionResolver {
|
||||
operands: Vec<(CovTerm, CovTerm)>,
|
||||
}
|
||||
|
||||
impl ExpressionResolver {
|
||||
fn new() -> Self {
|
||||
Self { operands: Vec::new() }
|
||||
}
|
||||
|
||||
fn push_operands(&mut self, lhs: CovTerm, rhs: CovTerm) {
|
||||
self.operands.push((lhs, rhs));
|
||||
}
|
||||
|
||||
fn format_term(&self, term: CovTerm) -> String {
|
||||
let mut output = String::new();
|
||||
self.write_term(&mut output, term);
|
||||
output
|
||||
}
|
||||
|
||||
fn write_term(&self, output: &mut String, term: CovTerm) {
|
||||
match term {
|
||||
CovTerm::Zero => output.push_str("Zero"),
|
||||
CovTerm::Counter(id) => write!(output, "c{id}").unwrap(),
|
||||
CovTerm::Expression(id, op) => {
|
||||
let (lhs, rhs) = self.operands[id as usize];
|
||||
let op = match op {
|
||||
Op::Sub => "-",
|
||||
Op::Add => "+",
|
||||
};
|
||||
|
||||
output.push('(');
|
||||
self.write_term(output, lhs);
|
||||
write!(output, " {op} ").unwrap();
|
||||
self.write_term(output, rhs);
|
||||
output.push(')');
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
17
src/tools/coverage-dump/src/main.rs
Normal file
17
src/tools/coverage-dump/src/main.rs
Normal file
|
@ -0,0 +1,17 @@
|
|||
mod covfun;
|
||||
mod parser;
|
||||
mod prf_names;
|
||||
|
||||
fn main() -> anyhow::Result<()> {
|
||||
use anyhow::Context as _;
|
||||
|
||||
let args = std::env::args().collect::<Vec<_>>();
|
||||
|
||||
let llvm_ir_path = args.get(1).context("LLVM IR file not specified")?;
|
||||
let llvm_ir = std::fs::read_to_string(llvm_ir_path).context("couldn't read LLVM IR file")?;
|
||||
|
||||
let function_names = crate::prf_names::make_function_names_table(&llvm_ir)?;
|
||||
crate::covfun::dump_covfun_mappings(&llvm_ir, &function_names)?;
|
||||
|
||||
Ok(())
|
||||
}
|
80
src/tools/coverage-dump/src/parser.rs
Normal file
80
src/tools/coverage-dump/src/parser.rs
Normal file
|
@ -0,0 +1,80 @@
|
|||
#[cfg(test)]
|
||||
mod tests;
|
||||
|
||||
use anyhow::ensure;
|
||||
use regex::bytes;
|
||||
use std::sync::OnceLock;
|
||||
|
||||
/// Given the raw contents of a string literal in LLVM IR assembly, decodes any
|
||||
/// backslash escapes and returns a vector containing the resulting byte string.
|
||||
pub(crate) fn unescape_llvm_string_contents(contents: &str) -> Vec<u8> {
|
||||
let escape_re = {
|
||||
static RE: OnceLock<bytes::Regex> = OnceLock::new();
|
||||
// LLVM IR supports two string escapes: `\\` and `\xx`.
|
||||
RE.get_or_init(|| bytes::Regex::new(r"\\\\|\\([0-9A-Za-z]{2})").unwrap())
|
||||
};
|
||||
|
||||
fn u8_from_hex_digits(digits: &[u8]) -> u8 {
|
||||
// We know that the input contains exactly 2 hex digits, so these calls
|
||||
// should never fail.
|
||||
assert_eq!(digits.len(), 2);
|
||||
let digits = std::str::from_utf8(digits).unwrap();
|
||||
u8::from_str_radix(digits, 16).unwrap()
|
||||
}
|
||||
|
||||
escape_re
|
||||
.replace_all(contents.as_bytes(), |captures: &bytes::Captures<'_>| {
|
||||
let byte = match captures.get(1) {
|
||||
None => b'\\',
|
||||
Some(hex_digits) => u8_from_hex_digits(hex_digits.as_bytes()),
|
||||
};
|
||||
[byte]
|
||||
})
|
||||
.into_owned()
|
||||
}
|
||||
|
||||
pub(crate) struct Parser<'a> {
|
||||
rest: &'a [u8],
|
||||
}
|
||||
|
||||
impl<'a> Parser<'a> {
|
||||
pub(crate) fn new(input: &'a [u8]) -> Self {
|
||||
Self { rest: input }
|
||||
}
|
||||
|
||||
pub(crate) fn ensure_empty(self) -> anyhow::Result<()> {
|
||||
ensure!(self.rest.is_empty(), "unparsed bytes: 0x{:02x?}", self.rest);
|
||||
Ok(())
|
||||
}
|
||||
|
||||
pub(crate) fn read_n_bytes(&mut self, n: usize) -> anyhow::Result<&'a [u8]> {
|
||||
ensure!(n <= self.rest.len());
|
||||
|
||||
let (bytes, rest) = self.rest.split_at(n);
|
||||
self.rest = rest;
|
||||
Ok(bytes)
|
||||
}
|
||||
|
||||
pub(crate) fn read_uleb128_u32(&mut self) -> anyhow::Result<u32> {
|
||||
self.read_uleb128_u64_and_convert()
|
||||
}
|
||||
|
||||
pub(crate) fn read_uleb128_usize(&mut self) -> anyhow::Result<usize> {
|
||||
self.read_uleb128_u64_and_convert()
|
||||
}
|
||||
|
||||
fn read_uleb128_u64_and_convert<T>(&mut self) -> anyhow::Result<T>
|
||||
where
|
||||
T: TryFrom<u64> + 'static,
|
||||
T::Error: std::error::Error + Send + Sync,
|
||||
{
|
||||
let mut temp_rest = self.rest;
|
||||
let raw_value: u64 = leb128::read::unsigned(&mut temp_rest)?;
|
||||
let converted_value = T::try_from(raw_value)?;
|
||||
|
||||
// Only update `self.rest` if the above steps succeeded, so that the
|
||||
// parser position can be used for error reporting if desired.
|
||||
self.rest = temp_rest;
|
||||
Ok(converted_value)
|
||||
}
|
||||
}
|
38
src/tools/coverage-dump/src/parser/tests.rs
Normal file
38
src/tools/coverage-dump/src/parser/tests.rs
Normal file
|
@ -0,0 +1,38 @@
|
|||
use super::unescape_llvm_string_contents;
|
||||
|
||||
// WARNING: These tests don't necessarily run in CI, and were mainly used to
|
||||
// help track down problems when originally developing this tool.
|
||||
// (The tool is still tested indirectly by snapshot tests that rely on it.)
|
||||
|
||||
// Tests for `unescape_llvm_string_contents`:
|
||||
|
||||
#[test]
|
||||
fn unescape_empty() {
|
||||
assert_eq!(unescape_llvm_string_contents(""), &[]);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn unescape_noop() {
|
||||
let input = "The quick brown fox jumps over the lazy dog.";
|
||||
assert_eq!(unescape_llvm_string_contents(input), input.as_bytes());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn unescape_backslash() {
|
||||
let input = r"\\Hello\\world\\";
|
||||
assert_eq!(unescape_llvm_string_contents(input), r"\Hello\world\".as_bytes());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn unescape_hex() {
|
||||
let input = r"\01\02\03\04\0a\0b\0C\0D\fd\fE\FF";
|
||||
let expected: &[u8] = &[0x01, 0x02, 0x03, 0x04, 0x0a, 0x0b, 0x0c, 0x0d, 0xfd, 0xfe, 0xff];
|
||||
assert_eq!(unescape_llvm_string_contents(input), expected);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn unescape_mixed() {
|
||||
let input = r"\\01.\5c\5c";
|
||||
let expected: &[u8] = br"\01.\\";
|
||||
assert_eq!(unescape_llvm_string_contents(input), expected);
|
||||
}
|
87
src/tools/coverage-dump/src/prf_names.rs
Normal file
87
src/tools/coverage-dump/src/prf_names.rs
Normal file
|
@ -0,0 +1,87 @@
|
|||
use crate::parser::{unescape_llvm_string_contents, Parser};
|
||||
use anyhow::{anyhow, ensure};
|
||||
use regex::Regex;
|
||||
use std::collections::HashMap;
|
||||
use std::sync::OnceLock;
|
||||
|
||||
/// Scans through the contents of an LLVM IR assembly file to find `__llvm_prf_names`
|
||||
/// entries, decodes them, and creates a table that maps name hash values to
|
||||
/// (demangled) function names.
|
||||
pub(crate) fn make_function_names_table(llvm_ir: &str) -> anyhow::Result<HashMap<u64, String>> {
|
||||
fn prf_names_payload(line: &str) -> Option<&str> {
|
||||
let re = {
|
||||
// We cheat a little bit and match the variable name `@__llvm_prf_nm`
|
||||
// rather than the section name, because the section name is harder
|
||||
// to extract and differs across Linux/Windows/macOS.
|
||||
static RE: OnceLock<Regex> = OnceLock::new();
|
||||
RE.get_or_init(|| {
|
||||
Regex::new(r#"^@__llvm_prf_nm =.*\[[0-9]+ x i8\] c"([^"]*)".*$"#).unwrap()
|
||||
})
|
||||
};
|
||||
|
||||
let payload = re.captures(line)?.get(1).unwrap().as_str();
|
||||
Some(payload)
|
||||
}
|
||||
|
||||
/// LLVM's profiler/coverage metadata often uses an MD5 hash truncated to
|
||||
/// 64 bits as a way to associate data stored in different tables/sections.
|
||||
fn truncated_md5(bytes: &[u8]) -> u64 {
|
||||
use md5::{Digest, Md5};
|
||||
let mut hasher = Md5::new();
|
||||
hasher.update(bytes);
|
||||
let hash: [u8; 8] = hasher.finalize().as_slice()[..8].try_into().unwrap();
|
||||
// The truncated hash is explicitly little-endian, regardless of host
|
||||
// or target platform. (See `MD5Result::low` in LLVM's `MD5.h`.)
|
||||
u64::from_le_bytes(hash)
|
||||
}
|
||||
|
||||
fn demangle_if_able(symbol_name_bytes: &[u8]) -> anyhow::Result<String> {
|
||||
// In practice, raw symbol names should always be ASCII.
|
||||
let symbol_name_str = std::str::from_utf8(symbol_name_bytes)?;
|
||||
match rustc_demangle::try_demangle(symbol_name_str) {
|
||||
Ok(d) => Ok(format!("{d:#}")),
|
||||
// If demangling failed, don't treat it as an error. This lets us
|
||||
// run the dump tool against non-Rust coverage maps produced by
|
||||
// `clang`, for testing purposes.
|
||||
Err(_) => Ok(format!("(couldn't demangle) {symbol_name_str}")),
|
||||
}
|
||||
}
|
||||
|
||||
let mut map = HashMap::new();
|
||||
|
||||
for payload in llvm_ir.lines().filter_map(prf_names_payload).map(unescape_llvm_string_contents)
|
||||
{
|
||||
let mut parser = Parser::new(&payload);
|
||||
let uncompressed_len = parser.read_uleb128_usize()?;
|
||||
let compressed_len = parser.read_uleb128_usize()?;
|
||||
|
||||
let uncompressed_bytes_vec;
|
||||
let uncompressed_bytes: &[u8] = if compressed_len == 0 {
|
||||
// The symbol name bytes are uncompressed, so read them directly.
|
||||
parser.read_n_bytes(uncompressed_len)?
|
||||
} else {
|
||||
// The symbol name bytes are compressed, so read and decompress them.
|
||||
let compressed_bytes = parser.read_n_bytes(compressed_len)?;
|
||||
|
||||
uncompressed_bytes_vec = miniz_oxide::inflate::decompress_to_vec_zlib_with_limit(
|
||||
compressed_bytes,
|
||||
uncompressed_len,
|
||||
)
|
||||
.map_err(|e| anyhow!("{e:?}"))?;
|
||||
ensure!(uncompressed_bytes_vec.len() == uncompressed_len);
|
||||
|
||||
&uncompressed_bytes_vec
|
||||
};
|
||||
|
||||
// Symbol names in the payload are separated by `0x01` bytes.
|
||||
for raw_name in uncompressed_bytes.split(|&b| b == 0x01) {
|
||||
let hash = truncated_md5(raw_name);
|
||||
let demangled = demangle_if_able(raw_name)?;
|
||||
map.insert(hash, demangled);
|
||||
}
|
||||
|
||||
parser.ensure_empty()?;
|
||||
}
|
||||
|
||||
Ok(map)
|
||||
}
|
Loading…
Add table
Reference in a new issue