Rollup merge of #128864 - jieyouxu:funnicode, r=Urgau

Use `SourceMap::end_point` instead of `- BytePos(1)` in arg removal suggestion

Previously, we tried to remove extra arg commas when providing extra arg removal suggestions. One of
the edge cases is having to account for an arg that has a closing delimiter `)` following it.
However, the previous suggestion code assumed that the delimiter is in fact exactly the 1-byte `)`
character. This assumption was proven incorrect, because we recover from Unicode-confusable
delimiters in the parser, which means that the ending delimiter could be a multi-byte codepoint
that looks *like* a `)`. Subtracing 1 byte could land us in the middle of a codepoint, triggering a
codepoint boundary assertion.

This is fixed by using `SourceMap::end_point` which properly accounts for codepoint boundaries.

Fixes #128717.

cc ````@fmease```` and #128790
This commit is contained in:
Matthias Krüger 2024-08-09 18:24:59 +02:00 committed by GitHub
commit 9eb77ac3e0
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
3 changed files with 63 additions and 3 deletions

View file

@ -22,7 +22,7 @@ use rustc_middle::ty::{self, IsSuggestable, Ty, TyCtxt};
use rustc_middle::{bug, span_bug};
use rustc_session::Session;
use rustc_span::symbol::{kw, Ident};
use rustc_span::{sym, BytePos, Span, DUMMY_SP};
use rustc_span::{sym, Span, DUMMY_SP};
use rustc_trait_selection::error_reporting::infer::{FailureCode, ObligationCauseExt};
use rustc_trait_selection::infer::InferCtxtExt;
use rustc_trait_selection::traits::{self, ObligationCauseCode, SelectionContext};
@ -1140,8 +1140,11 @@ impl<'a, 'tcx> FnCtxt<'a, 'tcx> {
.get(arg_idx + 1)
.map(|&(_, sp)| sp)
.unwrap_or_else(|| {
// Subtract one to move before `)`
call_expr.span.with_lo(call_expr.span.hi() - BytePos(1))
// Try to move before `)`. Note that `)` here is not necessarily
// the latin right paren, it could be a Unicode-confusable that
// looks like a `)`, so we must not use `- BytePos(1)`
// manipulations here.
self.tcx().sess.source_map().end_point(call_expr.span)
});
// Include next comma

View file

@ -0,0 +1,19 @@
//! Previously, we tried to remove extra arg commas when providing extra arg removal suggestions.
//! One of the edge cases is having to account for an arg that has a closing delimiter `)`
//! following it. However, the previous suggestion code assumed that the delimiter is in fact
//! exactly the 1-byte `)` character. This assumption was proven incorrect, because we recover
//! from Unicode-confusable delimiters in the parser, which means that the ending delimiter could be
//! a multi-byte codepoint that looks *like* a `)`. Subtracing 1 byte could land us in the middle of
//! a codepoint, triggering a codepoint boundary assertion.
//!
//! issue: rust-lang/rust#128717
fn main() {
// The following example has been modified from #128717 to remove irrelevant Unicode as they do
// not otherwise partake in the right delimiter calculation causing the codepoint boundary
// assertion.
main(rahh;
//~^ ERROR unknown start of token
//~| ERROR this function takes 0 arguments but 1 argument was supplied
//~| ERROR cannot find value `rahh` in this scope
}

View file

@ -0,0 +1,38 @@
error: unknown start of token: \u{ff09}
--> $DIR/suggest-arg-comma-delete-ice.rs:15:14
|
LL | main(rahh;
| ^^
|
help: Unicode character '' (Fullwidth Right Parenthesis) looks like ')' (Right Parenthesis), but it is not
|
LL | main(rahh);
| ~
error[E0425]: cannot find value `rahh` in this scope
--> $DIR/suggest-arg-comma-delete-ice.rs:15:10
|
LL | main(rahh;
| ^^^^ not found in this scope
error[E0061]: this function takes 0 arguments but 1 argument was supplied
--> $DIR/suggest-arg-comma-delete-ice.rs:15:5
|
LL | main(rahh;
| ^^^^ ---- unexpected argument
|
note: function defined here
--> $DIR/suggest-arg-comma-delete-ice.rs:11:4
|
LL | fn main() {
| ^^^^
help: remove the extra argument
|
LL - main(rahh;
LL + main(;
|
error: aborting due to 3 previous errors
Some errors have detailed explanations: E0061, E0425.
For more information about an error, try `rustc --explain E0061`.