Rollup merge of #84751 - Soveu:is_char_boundary_opt, r=Amanieu
str::is_char_boundary - slight optimization
Current `str::is_char_boundary` implementation emits slightly more instructions, because it includes an additional branch for `index == s.len()`
```rust
pub fn is_char_boundary(s: &str, index: usize) -> bool {
if index == 0 || index == s.len() {
return true;
}
match s.as_bytes().get(index) {
None => false,
Some(&b) => (b as i8) >= -0x40,
}
}
```
Just changing the place of `index == s.len()` merges it with `index < s.len()` from `s.as_bytes().get(index)`
```rust
pub fn is_char_boundary2(s: &str, index: usize) -> bool {
if index == 0 {
return true;
}
match s.as_bytes().get(index) {
// For some reason, LLVM likes this comparison here more
None => index == s.len(),
// This is bit magic equivalent to: b < 128 || b >= 192
Some(&b) => (b as i8) >= -0x40,
}
}
```
This one has better codegen on every platform, except powerpc
<details><summary>x86 codegen</summary>
<p>
```nasm
example::is_char_boundary:
mov al, 1
test rdx, rdx
je .LBB0_5
cmp rsi, rdx
je .LBB0_5
cmp rsi, rdx
jbe .LBB0_3
cmp byte ptr [rdi + rdx], -65
setg al
.LBB0_5:
ret
.LBB0_3:
xor eax, eax
ret
example::is_char_boundary2:
test rdx, rdx
je .LBB1_1
cmp rsi, rdx
jbe .LBB1_4
cmp byte ptr [rdi + rdx], -65
setg al
ret
.LBB1_1: ; technically this branch is the same as LBB1_4
mov al, 1
ret
.LBB1_4:
sete al
ret
```
</p>
</details>
<details><summary>aarch64 codegen</summary>
<p>
```as
example::is_char_boundary:
mov x8, x0
mov w0, #1
cbz x2, .LBB0_4
cmp x1, x2
b.eq .LBB0_4
b.ls .LBB0_5
ldrsb w8, [x8, x2]
cmn w8, #65
cset w0, gt
.LBB0_4:
ret
.LBB0_5:
mov w0, wzr
ret
example::is_char_boundary2:
cbz x2, .LBB1_3
cmp x1, x2
b.ls .LBB1_4
ldrsb w8, [x0, x2]
cmn w8, #65
cset w0, gt
ret
.LBB1_3:
mov w0, #1
ret
.LBB1_4:
cset w0, eq
ret
```
</p>
</details>
<details><summary>riscv64gc codegen</summary>
<p>
example::is_char_boundary:
seqz a3, a2
xor a4, a1, a2
seqz a4, a4
or a4, a4, a3
addi a3, zero, 1
bnez a4, .LBB0_3
bgeu a2, a1, .LBB0_4
add a0, a0, a2
lb a0, 0(a0)
addi a1, zero, -65
slt a3, a1, a0
.LBB0_3:
mv a0, a3
ret
.LBB0_4:
mv a0, zero
ret
example::is_char_boundary2:
beqz a2, .LBB1_3
bgeu a2, a1, .LBB1_4
add a0, a0, a2
lb a0, 0(a0)
addi a1, zero, -65
slt a0, a1, a0
ret
.LBB1_3:
addi a0, zero, 1
ret
.LBB1_4:
xor a0, a1, a2
seqz a0, a0
ret
</p>
</details>
[Link to godbolt](https://godbolt.org/z/K8avEz8Gr)
`@rustbot` label: A-codegen