Auto merge of #108717 - TDecki:dec2flt-inline, r=thomcc

Add inlining annotations in `dec2flt`. Currently, the combination of `dec2flt` being generic and the `FromStr` implementaions containing inline anttributes causes massive amounts of assembly to be generated whenever these implementation are used. In addition, the assembly has calls to function which ought to be inlined, but they are not (even when using lto). This Pr fixes this.
2023-03-21 04:55:02 +00:00 · 2023-03-21 04:55:02 +00:00 · 84c47b8279
commit 84c47b8279
parent 3ff4d56650 db2c6e0385
5 changed files with 18 additions and 1 deletions
--- a/library/core/src/num/dec2flt/common.rs
+++ b/library/core/src/num/dec2flt/common.rs
@ -192,6 +192,7 @@ pub struct BiasedFp {
 }

 impl BiasedFp {
+    #[inline]
    pub const fn zero_pow2(e: i32) -> Self {
        Self { f: 0, e }
    }
--- a/library/core/src/num/dec2flt/float.rs
+++ b/library/core/src/num/dec2flt/float.rs
@ -118,11 +118,13 @@ impl RawFloat for f32 {
    const SMALLEST_POWER_OF_TEN: i32 = -65;
    const LARGEST_POWER_OF_TEN: i32 = 38;

+    #[inline]
    fn from_u64(v: u64) -> Self {
        debug_assert!(v <= Self::MAX_MANTISSA_FAST_PATH);
        v as _
    }

+    #[inline]
    fn from_u64_bits(v: u64) -> Self {
        f32::from_bits((v & 0xFFFFFFFF) as u32)
    }
@ -169,11 +171,13 @@ impl RawFloat for f64 {
    const SMALLEST_POWER_OF_TEN: i32 = -342;
    const LARGEST_POWER_OF_TEN: i32 = 308;

+    #[inline]
    fn from_u64(v: u64) -> Self {
        debug_assert!(v <= Self::MAX_MANTISSA_FAST_PATH);
        v as _
    }

+    #[inline]
    fn from_u64_bits(v: u64) -> Self {
        f64::from_bits(v)
    }
--- a/library/core/src/num/dec2flt/lemire.rs
+++ b/library/core/src/num/dec2flt/lemire.rs
@ -118,10 +118,12 @@ pub fn compute_float<F: RawFloat>(q: i64, mut w: u64) -> BiasedFp {
 /// This uses a pre-computed integer approximation for
 /// log2(10), where 217706 / 2^16 is accurate for the
 /// entire range of non-finite decimal exponents.
+#[inline]
 fn power(q: i32) -> i32 {
    (q.wrapping_mul(152_170 + 65536) >> 16) + 63
 }

+#[inline]
 fn full_multiplication(a: u64, b: u64) -> (u64, u64) {
    let r = (a as u128) * (b as u128);
    (r as u64, (r >> 64) as u64)
--- a/library/core/src/num/dec2flt/mod.rs
+++ b/library/core/src/num/dec2flt/mod.rs
@ -147,7 +147,13 @@ macro_rules! from_str_float_impl {
            /// representable floating-point number to the number represented
            /// by `src` (following the same rules for rounding as for the
            /// results of primitive operations).
-            #[inline]
+            // We add the `#[inline(never)]` attribute, since its content will
+            // be filled with that of `dec2flt`, which has #[inline(always)].
+            // Since `dec2flt` is generic, a normal inline attribute on this function
+            // with `dec2flt` having no attributes results in heavily repeated
+            // generation of `dec2flt`, despite the fact only a maximum of 2
+            // possible instances can ever exist. Adding #[inline(never)] avoids this.
+            #[inline(never)]
            fn from_str(src: &str) -> Result<Self, ParseFloatError> {
                dec2flt(src)
            }
@ -202,12 +208,14 @@ impl fmt::Display for ParseFloatError {
    }
 }

+#[inline]
 pub(super) fn pfe_empty() -> ParseFloatError {
    ParseFloatError { kind: FloatErrorKind::Empty }
 }

 // Used in unit tests, keep public.
 // This is much better than making FloatErrorKind and ParseFloatError::kind public.
+#[inline]
 pub fn pfe_invalid() -> ParseFloatError {
    ParseFloatError { kind: FloatErrorKind::Invalid }
 }
@ -220,6 +228,7 @@ fn biased_fp_to_float<T: RawFloat>(x: BiasedFp) -> T {
 }

 /// Converts a decimal string into a floating point number.
+#[inline(always)] // Will be inlined into a function with `#[inline(never)]`, see above
 pub fn dec2flt<F: RawFloat>(s: &str) -> Result<F, ParseFloatError> {
    let mut s = s.as_bytes();
    let c = if let Some(&c) = s.first() {
--- a/library/core/src/num/dec2flt/number.rs
+++ b/library/core/src/num/dec2flt/number.rs
@ -33,6 +33,7 @@ pub struct Number {

 impl Number {
    /// Detect if the float can be accurately reconstructed from native floats.
+    #[inline]
    fn is_fast_path<F: RawFloat>(&self) -> bool {
        F::MIN_EXPONENT_FAST_PATH <= self.exponent
            && self.exponent <= F::MAX_EXPONENT_DISGUISED_FAST_PATH