Rollup merge of #95407 - xfix:inline-u8-is_utf8_char_boundary, r=scottmcm

Inline u8::is_utf8_char_boundary

Since Rust beta, Rust is incapable of inlining this function in the following example function.

```rust
pub fn safe_substr_to(s: &str, mut length: usize) -> &str {
    loop {
        if let Some(s) = s.get(..length) {
            return s;
        }
        length -= 1;
    }
}
```

When compiled with beta or nightly compiler on Godbolt with `-C opt-level=3` flag it prints the following assembly.

```asm
example::safe_substr_to:
        push    r15
        push    r14
        push    r12
        push    rbx
        push    rax
        mov     r14, rdi
        test    rdx, rdx
        je      .LBB0_8
        mov     rbx, rdx
        mov     r15, rsi
        mov     r12, qword ptr [rip + core::num::<impl u8>::is_utf8_char_boundary@GOTPCREL]
        jmp     .LBB0_4
.LBB0_2:
        je      .LBB0_9
.LBB0_3:
        add     rbx, -1
        je      .LBB0_8
.LBB0_4:
        cmp     rbx, r15
        jae     .LBB0_2
        movzx   edi, byte ptr [r14 + rbx]
        call    r12
        test    al, al
        je      .LBB0_3
        mov     r15, rbx
        jmp     .LBB0_9
.LBB0_8:
        xor     r15d, r15d
.LBB0_9:
        mov     rax, r14
        mov     rdx, r15
        add     rsp, 8
        pop     rbx
        pop     r12
        pop     r14
        pop     r15
        ret
```

`qword ptr [rip + core::num::<impl u8>::is_utf8_char_boundary@GOTPCREL]` is not inlined. `-C remark=all` outputs the following message:

```
note: /rustc/7bccde19767082c7865a12902fa614ed4f8fed73/library/core/src/str/mod.rs:214:25: inline: _ZN4core3num20_$LT$impl$u20$u8$GT$21is_utf8_char_boundary17hace9f12f5ba07a7fE will not be inlined into _ZN4core3str21_$LT$impl$u20$str$GT$16is_char_boundary17hf2587e9a6b8c5e43E because its definition is unavailable
```

Stable compiler outputs more reasonable code:

```asm
example::safe_substr_to:
        mov     rcx, rdx
        mov     rax, rdi
        test    rdx, rdx
        je      .LBB0_9
        mov     rdx, rsi
        jmp     .LBB0_4
.LBB0_2:
        cmp     rdx, rcx
        je      .LBB0_7
.LBB0_3:
        add     rcx, -1
        je      .LBB0_9
.LBB0_4:
        cmp     rcx, rdx
        jae     .LBB0_2
        cmp     byte ptr [rax + rcx], -64
        jl      .LBB0_3
        mov     rdx, rcx
.LBB0_7:
        ret
.LBB0_9:
        xor     edx, edx
        ret
```
This commit is contained in:
Dylan DPC 2022-03-28 20:41:53 +02:00 committed by GitHub
commit 1f33cd1827
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23

View file

@ -809,6 +809,7 @@ impl u8 {
ascii::escape_default(self)
}
#[inline]
pub(crate) const fn is_utf8_char_boundary(self) -> bool {
// This is bit magic equivalent to: b < 128 || b >= 192
(self as i8) >= -0x40