Implement confusable_idents lint.

This commit is contained in:
Charles Lew 2020-04-25 09:38:31 +08:00
parent 53d3bc02ed
commit c05961c2db
9 changed files with 192 additions and 48 deletions

View file

@ -74,7 +74,7 @@ name = "arena"
version = "0.0.0"
dependencies = [
"rustc_data_structures",
"smallvec 1.0.0",
"smallvec 1.4.0",
]
[[package]]
@ -492,7 +492,7 @@ dependencies = [
"regex-syntax",
"semver",
"serde",
"smallvec 1.0.0",
"smallvec 1.4.0",
"toml",
"unicode-normalization",
"url 2.1.0",
@ -2428,7 +2428,7 @@ dependencies = [
"cloudabi",
"libc",
"redox_syscall",
"smallvec 1.0.0",
"smallvec 1.4.0",
"winapi 0.3.8",
]
@ -3151,7 +3151,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "81dfcfbb0ddfd533abf8c076e3b49d1e5042d1962526a12ce2c66d514b24cca3"
dependencies = [
"rustc-ap-rustc_data_structures",
"smallvec 1.0.0",
"smallvec 1.4.0",
]
[[package]]
@ -3174,7 +3174,7 @@ dependencies = [
"rustc-ap-rustc_span",
"rustc-ap-serialize",
"scoped-tls",
"smallvec 1.0.0",
"smallvec 1.4.0",
]
[[package]]
@ -3223,7 +3223,7 @@ dependencies = [
"rustc-ap-rustc_session",
"rustc-ap-rustc_span",
"rustc-ap-serialize",
"smallvec 1.0.0",
"smallvec 1.4.0",
]
[[package]]
@ -3249,7 +3249,7 @@ dependencies = [
"rustc-hash",
"rustc-rayon",
"rustc-rayon-core",
"smallvec 1.0.0",
"smallvec 1.4.0",
"stable_deref_trait",
"winapi 0.3.8",
]
@ -3291,7 +3291,7 @@ dependencies = [
"rustc-ap-rustc_session",
"rustc-ap-rustc_span",
"rustc-ap-serialize",
"smallvec 1.0.0",
"smallvec 1.4.0",
]
[[package]]
@ -3318,7 +3318,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "32220c3e6cdf226f38e4474b747dca15f3106bb680c74f10b299af3f6cdb1663"
dependencies = [
"rustc-ap-serialize",
"smallvec 1.0.0",
"smallvec 1.4.0",
]
[[package]]
@ -3358,7 +3358,7 @@ dependencies = [
"rustc-ap-rustc_lexer",
"rustc-ap-rustc_session",
"rustc-ap-rustc_span",
"smallvec 1.0.0",
"smallvec 1.4.0",
"unicode-normalization",
]
@ -3423,7 +3423,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "756e8f526ec7906e132188bf25e3c10a6ee42ab77294ecb3b3602647f0508eef"
dependencies = [
"indexmap",
"smallvec 1.0.0",
"smallvec 1.4.0",
]
[[package]]
@ -3507,7 +3507,7 @@ dependencies = [
"serde",
"serde_json",
"smallvec 0.6.10",
"smallvec 1.0.0",
"smallvec 1.4.0",
"syn 0.15.35",
"url 2.1.0",
"winapi 0.3.8",
@ -3518,7 +3518,7 @@ name = "rustc_apfloat"
version = "0.0.0"
dependencies = [
"bitflags",
"smallvec 1.0.0",
"smallvec 1.4.0",
]
[[package]]
@ -3533,7 +3533,7 @@ dependencies = [
"rustc_span",
"scoped-tls",
"serialize",
"smallvec 1.0.0",
"smallvec 1.4.0",
]
[[package]]
@ -3551,7 +3551,7 @@ dependencies = [
"rustc_session",
"rustc_span",
"rustc_target",
"smallvec 1.0.0",
"smallvec 1.4.0",
]
[[package]]
@ -3612,7 +3612,7 @@ dependencies = [
"rustc_session",
"rustc_span",
"rustc_target",
"smallvec 1.0.0",
"smallvec 1.4.0",
]
[[package]]
@ -3641,7 +3641,7 @@ dependencies = [
"rustc_span",
"rustc_target",
"serialize",
"smallvec 1.0.0",
"smallvec 1.4.0",
]
[[package]]
@ -3694,7 +3694,7 @@ dependencies = [
"rustc-rayon-core",
"rustc_index",
"serialize",
"smallvec 1.0.0",
"smallvec 1.4.0",
"stable_deref_trait",
"winapi 0.3.8",
]
@ -3768,7 +3768,7 @@ dependencies = [
"rustc_session",
"rustc_span",
"serialize",
"smallvec 1.0.0",
"smallvec 1.4.0",
]
[[package]]
@ -3797,7 +3797,7 @@ dependencies = [
"rustc_span",
"rustc_target",
"serialize",
"smallvec 1.0.0",
"smallvec 1.4.0",
]
[[package]]
@ -3833,7 +3833,7 @@ name = "rustc_index"
version = "0.0.0"
dependencies = [
"serialize",
"smallvec 1.0.0",
"smallvec 1.4.0",
]
[[package]]
@ -3853,7 +3853,7 @@ dependencies = [
"rustc_span",
"rustc_target",
"serialize",
"smallvec 1.0.0",
"smallvec 1.4.0",
]
[[package]]
@ -3895,7 +3895,7 @@ dependencies = [
"rustc_ty",
"rustc_typeck",
"serialize",
"smallvec 1.0.0",
"smallvec 1.4.0",
"tempfile",
"winapi 0.3.8",
]
@ -3968,7 +3968,7 @@ dependencies = [
"rustc_span",
"rustc_target",
"serialize",
"smallvec 1.0.0",
"smallvec 1.4.0",
"stable_deref_trait",
"winapi 0.3.8",
]
@ -4000,7 +4000,7 @@ dependencies = [
"rustc_target",
"scoped-tls",
"serialize",
"smallvec 1.0.0",
"smallvec 1.4.0",
]
[[package]]
@ -4029,7 +4029,7 @@ dependencies = [
"rustc_target",
"rustc_trait_selection",
"serialize",
"smallvec 1.0.0",
"smallvec 1.4.0",
]
[[package]]
@ -4053,7 +4053,7 @@ dependencies = [
"rustc_target",
"rustc_trait_selection",
"serialize",
"smallvec 1.0.0",
"smallvec 1.4.0",
]
[[package]]
@ -4134,7 +4134,7 @@ dependencies = [
"rustc_index",
"rustc_span",
"serialize",
"smallvec 1.0.0",
"smallvec 1.4.0",
]
[[package]]
@ -4157,7 +4157,7 @@ dependencies = [
"rustc_middle",
"rustc_session",
"rustc_span",
"smallvec 1.0.0",
"smallvec 1.4.0",
]
[[package]]
@ -4272,7 +4272,7 @@ dependencies = [
"rustc_session",
"rustc_span",
"rustc_target",
"smallvec 1.0.0",
"smallvec 1.4.0",
]
[[package]]
@ -4287,7 +4287,7 @@ dependencies = [
"rustc_middle",
"rustc_span",
"rustc_trait_selection",
"smallvec 1.0.0",
"smallvec 1.4.0",
]
[[package]]
@ -4324,7 +4324,7 @@ dependencies = [
"rustc_span",
"rustc_target",
"rustc_trait_selection",
"smallvec 1.0.0",
"smallvec 1.4.0",
]
[[package]]
@ -4568,7 +4568,7 @@ name = "serialize"
version = "0.0.0"
dependencies = [
"indexmap",
"smallvec 1.0.0",
"smallvec 1.4.0",
]
[[package]]
@ -4635,9 +4635,9 @@ checksum = "ab606a9c5e214920bb66c458cd7be8ef094f813f20fe77a54cc7dbfff220d4b7"
[[package]]
name = "smallvec"
version = "1.0.0"
version = "1.4.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "4ecf3b85f68e8abaa7555aa5abdb1153079387e60b718283d732f03897fcfc86"
checksum = "c7cb5678e1615754284ec264d9bb5b4c27d2018577fd90ac0ceb578591ed5ee4"
[[package]]
name = "socket2"
@ -5359,11 +5359,11 @@ dependencies = [
[[package]]
name = "unicode-normalization"
version = "0.1.11"
version = "0.1.12"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "b561e267b2326bb4cebfc0ef9e68355c7abe6c6f522aeac2f5bf95d56c59bdcf"
checksum = "5479532badd04e128284890390c1e876ef7a993d0570b3597ae43dfa1d59afa4"
dependencies = [
"smallvec 1.0.0",
"smallvec 1.4.0",
]
[[package]]
@ -5374,10 +5374,11 @@ checksum = "5b2c5c29e805da6817f5af6a627d65adb045cebf05cccd5a3493d6109454391c"
[[package]]
name = "unicode-security"
version = "0.0.2"
version = "0.0.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "c49d35967fa037b881acc34ef717c38c4b5560eba10e3685271b3f530bb19634"
checksum = "a5f9011bbed9c13372bc8df618b55a38138445199caf3b61d432c6859c36dee0"
dependencies = [
"unicode-normalization",
"unicode-script",
]

View file

@ -141,10 +141,10 @@ impl FromInternal<(TreeAndJoint, &'_ ParseSess, &'_ mut Vec<Self>)>
SingleQuote => op!('\''),
Ident(name, false) if name == kw::DollarCrate => tt!(Ident::dollar_crate()),
Ident(name, is_raw) => tt!(Ident::new(name, is_raw)),
Ident(name, is_raw) => tt!(Ident::new(sess, name, is_raw)),
Lifetime(name) => {
let ident = ast::Ident::new(name, span).without_first_quote();
stack.push(tt!(Ident::new(ident.name, false)));
stack.push(tt!(Ident::new(sess, ident.name, false)));
tt!(Punct::new('\'', true))
}
Literal(lit) => tt!(Literal { lit }),
@ -322,7 +322,7 @@ impl Ident {
false
}
}
fn new(sym: Symbol, is_raw: bool, span: Span) -> Ident {
fn new(sess: &ParseSess, sym: Symbol, is_raw: bool, span: Span) -> Ident {
let sym = nfc_normalize(&sym.as_str());
let string = sym.as_str();
if !Self::is_valid(&string) {
@ -331,6 +331,7 @@ impl Ident {
if is_raw && !sym.can_be_raw() {
panic!("`{}` cannot be a raw identifier", string);
}
sess.symbol_gallery.insert(sym, span);
Ident { sym, is_raw, span }
}
fn dollar_crate(span: Span) -> Ident {
@ -495,7 +496,7 @@ impl server::Punct for Rustc<'_> {
impl server::Ident for Rustc<'_> {
fn new(&mut self, string: &str, span: Self::Span, is_raw: bool) -> Self::Ident {
Ident::new(Symbol::intern(string), is_raw, span)
Ident::new(self.sess, Symbol::intern(string), is_raw, span)
}
fn span(&mut self, ident: Self::Ident) -> Self::Span {
ident.span

View file

@ -10,7 +10,7 @@ path = "lib.rs"
[dependencies]
log = "0.4"
unicode-security = "0.0.2"
unicode-security = "0.0.3"
rustc_middle = { path = "../librustc_middle" }
rustc_ast_pretty = { path = "../librustc_ast_pretty" }
rustc_attr = { path = "../librustc_attr" }

View file

@ -388,6 +388,11 @@ impl<'s> LintLevelsBuilder<'s> {
self.cur = push.prev;
}
/// Find the lint level for a lint.
pub fn lint_level(&self, lint: &'static Lint) -> (Level, LintSource) {
self.sets.get_lint_level(lint, self.cur, None, self.sess)
}
/// Used to emit a lint-related diagnostic based on the current state of
/// this lint context.
pub fn struct_lint(
@ -396,7 +401,7 @@ impl<'s> LintLevelsBuilder<'s> {
span: Option<MultiSpan>,
decorate: impl for<'a> FnOnce(LintDiagnosticBuilder<'a>),
) {
let (level, src) = self.sets.get_lint_level(lint, self.cur, None, self.sess);
let (level, src) = self.lint_level(lint);
struct_lint_level(self.sess, lint, level, src, span, decorate)
}

View file

@ -1,5 +1,7 @@
use crate::{EarlyContext, EarlyLintPass, LintContext};
use rustc_ast::ast;
use rustc_data_structures::fx::FxHashMap;
use rustc_span::symbol::SymbolStr;
declare_lint! {
pub NON_ASCII_IDENTS,
@ -13,9 +15,101 @@ declare_lint! {
"detects uncommon Unicode codepoints in identifiers"
}
declare_lint_pass!(NonAsciiIdents => [NON_ASCII_IDENTS, UNCOMMON_CODEPOINTS]);
// FIXME: Change this to warn.
declare_lint! {
pub CONFUSABLE_IDENTS,
Allow,
"detects visually confusable pairs between identifiers"
}
declare_lint_pass!(NonAsciiIdents => [NON_ASCII_IDENTS, UNCOMMON_CODEPOINTS, CONFUSABLE_IDENTS]);
use std::hash::{Hash, Hasher};
use std::ops::Deref;
enum CowBoxSymStr {
Interned(SymbolStr),
Owned(Box<str>),
}
impl Deref for CowBoxSymStr {
type Target = str;
fn deref(&self) -> &str {
match self {
CowBoxSymStr::Interned(interned) => interned,
CowBoxSymStr::Owned(ref owned) => owned,
}
}
}
impl Hash for CowBoxSymStr {
#[inline]
fn hash<H: Hasher>(&self, state: &mut H) {
Hash::hash(&**self, state)
}
}
impl PartialEq<CowBoxSymStr> for CowBoxSymStr {
#[inline]
fn eq(&self, other: &CowBoxSymStr) -> bool {
PartialEq::eq(&**self, &**other)
}
}
impl Eq for CowBoxSymStr {}
fn calc_skeleton(symbol_str: SymbolStr, buffer: &'_ mut String) -> CowBoxSymStr {
use std::mem::swap;
use unicode_security::confusable_detection::skeleton;
buffer.clear();
buffer.extend(skeleton(&symbol_str));
if symbol_str == *buffer {
CowBoxSymStr::Interned(symbol_str)
} else {
let mut owned = String::new();
swap(buffer, &mut owned);
CowBoxSymStr::Owned(owned.into_boxed_str())
}
}
impl EarlyLintPass for NonAsciiIdents {
fn check_crate(&mut self, cx: &EarlyContext<'_>, _: &ast::Crate) {
use rustc_session::lint::Level;
if cx.builder.lint_level(CONFUSABLE_IDENTS).0 == Level::Allow {
return;
}
let symbols = cx.sess.parse_sess.symbol_gallery.symbols.lock();
let mut symbol_strs_and_spans = Vec::with_capacity(symbols.len());
for (symbol, sp) in symbols.iter() {
let symbol_str = symbol.as_str();
symbol_strs_and_spans.push((symbol_str, *sp));
}
drop(symbols);
symbol_strs_and_spans.sort_by_key(|x| x.0.clone());
let mut skeleton_map =
FxHashMap::with_capacity_and_hasher(symbol_strs_and_spans.len(), Default::default());
let mut str_buf = String::new();
for (symbol_str, sp) in symbol_strs_and_spans {
let skeleton = calc_skeleton(symbol_str.clone(), &mut str_buf);
skeleton_map
.entry(skeleton)
.and_modify(|(existing_symbolstr, existing_span)| {
cx.struct_span_lint(CONFUSABLE_IDENTS, sp, |lint| {
lint.build(&format!(
"identifier pair considered confusable between `{}` and `{}`",
existing_symbolstr, symbol_str
))
.span_label(
*existing_span,
"this is where the previous identifier occurred",
)
.emit();
});
})
.or_insert((symbol_str, sp));
}
}
fn check_ident(&mut self, cx: &EarlyContext<'_>, ident: ast::Ident) {
use unicode_security::GeneralSecurityProfile;
let name_str = ident.name.as_str();

View file

@ -222,8 +222,9 @@ impl<'a> StringReader<'a> {
ident_start = ident_start + BytePos(2);
}
let sym = nfc_normalize(self.str_from(ident_start));
let span = self.mk_sp(start, self.pos);
self.sess.symbol_gallery.insert(sym, span);
if is_raw_ident {
let span = self.mk_sp(start, self.pos);
if !sym.can_be_raw() {
self.err_span(span, &format!("`{}` cannot be a raw identifier", sym));
}

View file

@ -60,6 +60,20 @@ impl GatedSpans {
}
}
#[derive(Default)]
pub struct SymbolGallery {
/// All symbols occurred and their first occurrance span.
pub symbols: Lock<FxHashMap<Symbol, Span>>,
}
impl SymbolGallery {
/// Insert a symbol and its span into symbol gallery.
/// If the symbol has occurred before, ignore the new occurance.
pub fn insert(&self, symbol: Symbol, span: Span) {
self.symbols.lock().entry(symbol).or_insert(span);
}
}
/// Construct a diagnostic for a language feature error due to the given `span`.
/// The `feature`'s `Symbol` is the one you used in `active.rs` and `rustc_span::symbols`.
pub fn feature_err<'a>(
@ -118,6 +132,7 @@ pub struct ParseSess {
pub ambiguous_block_expr_parse: Lock<FxHashMap<Span, Span>>,
pub injected_crate_name: Once<Symbol>,
pub gated_spans: GatedSpans,
pub symbol_gallery: SymbolGallery,
/// The parser has reached `Eof` due to an unclosed brace. Used to silence unnecessary errors.
pub reached_eof: Lock<bool>,
}
@ -143,6 +158,7 @@ impl ParseSess {
ambiguous_block_expr_parse: Lock::new(FxHashMap::default()),
injected_crate_name: Once::new(),
gated_spans: GatedSpans::default(),
symbol_gallery: SymbolGallery::default(),
reached_eof: Lock::new(false),
}
}

View file

@ -0,0 +1,9 @@
#![feature(non_ascii_idents)]
#![deny(confusable_idents)]
#![allow(uncommon_codepoints, non_upper_case_globals)]
const : usize = 42; //~ ERROR identifier pair considered confusable
fn main() {
let s = "rust";
}

View file

@ -0,0 +1,17 @@
error: identifier pair considered confusable between `s` and ``
--> $DIR/lint-confusable-idents.rs:5:7
|
LL | const : usize = 42;
| ^^
...
LL | let s = "rust";
| - this is where the previous identifier occurred
|
note: the lint level is defined here
--> $DIR/lint-confusable-idents.rs:2:9
|
LL | #![deny(confusable_idents)]
| ^^^^^^^^^^^^^^^^^
error: aborting due to previous error