From 5a0078520e80579307124aca567a70e195637682 Mon Sep 17 00:00:00 2001
From: Nathan Froyd <froydnj@gmail.com>
Date: Thu, 13 Apr 2017 21:33:24 -0400
Subject: [PATCH 1/4] num: add minimal benchmarks for full floating-point
 formatting

We have benchmarks for the floating-point formatting algorithms
themselves, but not for the surrounding machinery like Formatter and
translating to the flt2dec::Part slices.
---
 src/libcore/benches/num/flt2dec/mod.rs | 24 ++++++++++++++++++++++++
 1 file changed, 24 insertions(+)
diff --git a/src/libcore/benches/num/flt2dec/mod.rs b/src/libcore/benches/num/flt2dec/mod.rs
index 1de2bf4921f..7f3b98a1c76 100644
--- a/src/libcore/benches/num/flt2dec/mod.rs
+++ b/src/libcore/benches/num/flt2dec/mod.rs
@@ -13,6 +13,10 @@ mod strategy {
     mod grisu;
 }
 
+use std::f64;
+use std::io::Write;
+use std::vec::Vec;
+use test::Bencher;
 use core::num::flt2dec::{decode, DecodableFloat, FullDecoded, Decoded};
 use core::num::flt2dec::MAX_SIG_DIGITS;
 
@@ -22,3 +26,23 @@ pub fn decode_finite<T: DecodableFloat>(v: T) -> Decoded {
         full_decoded => panic!("expected finite, got {:?} instead", full_decoded)
     }
 }
+
+#[bench]
+fn bench_small_shortest(b: &mut Bencher) {
+    let mut buf = Vec::with_capacity(20);
+
+    b.iter(|| {
+        buf.clear();
+        write!(&mut buf, "{}", 3.1415926f64).unwrap()
+    });
+}
+
+#[bench]
+fn bench_big_shortest(b: &mut Bencher) {
+    let mut buf = Vec::with_capacity(300);
+
+    b.iter(|| {
+        buf.clear();
+        write!(&mut buf, "{}", f64::MAX).unwrap()
+    });
+}

From a21f61685d4aa2bd93152eaf53c469b29a3a9bee Mon Sep 17 00:00:00 2001
From: Nathan Froyd <froydnj@gmail.com>
Date: Thu, 13 Apr 2017 15:49:37 -0400
Subject: [PATCH 2/4] fmt: reduce the stack space required by float formatting

For the two major entry points for float formatting, we split the exact
case and the shortest cases into separate functions.  We mark the
separate functions as #[inline(never) so the exact cases won't bloat
stack space in their callers unnecessarily.  The shortest cases are
marked so for similar reasons.

Fixes #41234.
---
 src/libcore/fmt/float.rs | 82 ++++++++++++++++++++++++++++++++--------
 1 file changed, 66 insertions(+), 16 deletions(-)

diff --git a/src/libcore/fmt/float.rs b/src/libcore/fmt/float.rs
index 50248fabfcd..60c291ef845 100644
--- a/src/libcore/fmt/float.rs
+++ b/src/libcore/fmt/float.rs
@@ -11,6 +11,35 @@
 use fmt::{Formatter, Result, LowerExp, UpperExp, Display, Debug};
 use num::flt2dec;
 
+// Don't inline this so callers don't use the stack space this function
+// requires unless they have to.
+#[inline(never)]
+fn float_to_decimal_common_exact<T>(fmt: &mut Formatter, num: &T,
+                                    sign: flt2dec::Sign, precision: usize) -> Result
+    where T: flt2dec::DecodableFloat
+{
+    let mut buf = [0; 1024]; // enough for f32 and f64
+    let mut parts = [flt2dec::Part::Zero(0); 16];
+    let formatted = flt2dec::to_exact_fixed_str(flt2dec::strategy::grisu::format_exact,
+                                                *num, sign, precision,
+                                                false, &mut buf, &mut parts);
+    fmt.pad_formatted_parts(&formatted)
+}
+
+// Don't inline this so callers that call both this and the above won't wind
+// up using the combined stack space of both functions in some cases.
+#[inline(never)]
+fn float_to_decimal_common_shortest<T>(fmt: &mut Formatter,
+                                       num: &T, sign: flt2dec::Sign) -> Result
+    where T: flt2dec::DecodableFloat
+{
+    let mut buf = [0; flt2dec::MAX_SIG_DIGITS]; // enough for f32 and f64
+    let mut parts = [flt2dec::Part::Zero(0); 16];
+    let formatted = flt2dec::to_shortest_str(flt2dec::strategy::grisu::format_shortest,
+                                             *num, sign, 0, false, &mut buf, &mut parts);
+    fmt.pad_formatted_parts(&formatted)
+}
+
 // Common code of floating point Debug and Display.
 fn float_to_decimal_common<T>(fmt: &mut Formatter, num: &T, negative_zero: bool) -> Result
     where T: flt2dec::DecodableFloat
@@ -23,15 +52,41 @@ fn float_to_decimal_common<T>(fmt: &mut Formatter, num: &T, negative_zero: bool)
         (true,  true)  => flt2dec::Sign::MinusPlusRaw,
     };
 
+    if let Some(precision) = fmt.precision {
+        float_to_decimal_common_exact(fmt, num, sign, precision)
+    } else {
+        float_to_decimal_common_shortest(fmt, num, sign)
+    }
+}
+
+// Don't inline this so callers don't use the stack space this function
+// requires unless they have to.
+#[inline(never)]
+fn float_to_exponential_common_exact<T>(fmt: &mut Formatter, num: &T,
+                                        sign: flt2dec::Sign, precision: usize,
+                                        upper: bool) -> Result
+    where T: flt2dec::DecodableFloat
+{
     let mut buf = [0; 1024]; // enough for f32 and f64
     let mut parts = [flt2dec::Part::Zero(0); 16];
-    let formatted = if let Some(precision) = fmt.precision {
-        flt2dec::to_exact_fixed_str(flt2dec::strategy::grisu::format_exact, *num, sign,
-                                    precision, false, &mut buf, &mut parts)
-    } else {
-        flt2dec::to_shortest_str(flt2dec::strategy::grisu::format_shortest, *num, sign,
-                                 0, false, &mut buf, &mut parts)
-    };
+    let formatted = flt2dec::to_exact_exp_str(flt2dec::strategy::grisu::format_exact,
+                                              *num, sign, precision,
+                                              upper, &mut buf, &mut parts);
+    fmt.pad_formatted_parts(&formatted)
+}
+
+// Don't inline this so callers that call both this and the above won't wind
+// up using the combined stack space of both functions in some cases.
+#[inline(never)]
+fn float_to_exponential_common_shortest<T>(fmt: &mut Formatter,
+                                           num: &T, sign: flt2dec::Sign,
+                                           upper: bool) -> Result
+    where T: flt2dec::DecodableFloat
+{
+    let mut buf = [0; flt2dec::MAX_SIG_DIGITS]; // enough for f32 and f64
+    let mut parts = [flt2dec::Part::Zero(0); 16];
+    let formatted = flt2dec::to_shortest_exp_str(flt2dec::strategy::grisu::format_shortest, *num,
+                                                 sign, (0, 0), upper, &mut buf, &mut parts);
     fmt.pad_formatted_parts(&formatted)
 }
 
@@ -45,17 +100,12 @@ fn float_to_exponential_common<T>(fmt: &mut Formatter, num: &T, upper: bool) ->
         true  => flt2dec::Sign::MinusPlus,
     };
 
-    let mut buf = [0; 1024]; // enough for f32 and f64
-    let mut parts = [flt2dec::Part::Zero(0); 16];
-    let formatted = if let Some(precision) = fmt.precision {
+    if let Some(precision) = fmt.precision {
         // 1 integral digit + `precision` fractional digits = `precision + 1` total digits
-        flt2dec::to_exact_exp_str(flt2dec::strategy::grisu::format_exact, *num, sign,
-                                  precision + 1, upper, &mut buf, &mut parts)
+        float_to_exponential_common_exact(fmt, num, sign, precision + 1, upper)
     } else {
-        flt2dec::to_shortest_exp_str(flt2dec::strategy::grisu::format_shortest, *num, sign,
-                                     (0, 0), upper, &mut buf, &mut parts)
-    };
-    fmt.pad_formatted_parts(&formatted)
+        float_to_exponential_common_shortest(fmt, num, sign, upper)
+    }
 }
 
 macro_rules! floating {

From 2499d819d41b87d9b562377479e4a81f74bd49f1 Mon Sep 17 00:00:00 2001
From: Nathan Froyd <froydnj@gmail.com>
Date: Thu, 13 Apr 2017 16:24:30 -0400
Subject: [PATCH 3/4] fmt: use the minimum parts array size

The comments for flt2dec::to_shortest_str says that we only need a slice
of length 5 for the parts array.  Initializing a 16-part array is just
wasted effort and wasted stack space.  Other functions in the flt2dec
module have similar comments, so we adjust the parts arrays passed to
those functions accordingly.
---
 src/libcore/fmt/float.rs | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/src/libcore/fmt/float.rs b/src/libcore/fmt/float.rs
index 60c291ef845..4252807fb9f 100644
--- a/src/libcore/fmt/float.rs
+++ b/src/libcore/fmt/float.rs
@@ -19,7 +19,7 @@ fn float_to_decimal_common_exact<T>(fmt: &mut Formatter, num: &T,
     where T: flt2dec::DecodableFloat
 {
     let mut buf = [0; 1024]; // enough for f32 and f64
-    let mut parts = [flt2dec::Part::Zero(0); 16];
+    let mut parts = [flt2dec::Part::Zero(0); 5];
     let formatted = flt2dec::to_exact_fixed_str(flt2dec::strategy::grisu::format_exact,
                                                 *num, sign, precision,
                                                 false, &mut buf, &mut parts);
@@ -34,7 +34,7 @@ fn float_to_decimal_common_shortest<T>(fmt: &mut Formatter,
     where T: flt2dec::DecodableFloat
 {
     let mut buf = [0; flt2dec::MAX_SIG_DIGITS]; // enough for f32 and f64
-    let mut parts = [flt2dec::Part::Zero(0); 16];
+    let mut parts = [flt2dec::Part::Zero(0); 5];
     let formatted = flt2dec::to_shortest_str(flt2dec::strategy::grisu::format_shortest,
                                              *num, sign, 0, false, &mut buf, &mut parts);
     fmt.pad_formatted_parts(&formatted)
@@ -68,7 +68,7 @@ fn float_to_exponential_common_exact<T>(fmt: &mut Formatter, num: &T,
     where T: flt2dec::DecodableFloat
 {
     let mut buf = [0; 1024]; // enough for f32 and f64
-    let mut parts = [flt2dec::Part::Zero(0); 16];
+    let mut parts = [flt2dec::Part::Zero(0); 7];
     let formatted = flt2dec::to_exact_exp_str(flt2dec::strategy::grisu::format_exact,
                                               *num, sign, precision,
                                               upper, &mut buf, &mut parts);
@@ -84,7 +84,7 @@ fn float_to_exponential_common_shortest<T>(fmt: &mut Formatter,
     where T: flt2dec::DecodableFloat
 {
     let mut buf = [0; flt2dec::MAX_SIG_DIGITS]; // enough for f32 and f64
-    let mut parts = [flt2dec::Part::Zero(0); 16];
+    let mut parts = [flt2dec::Part::Zero(0); 7];
     let formatted = flt2dec::to_shortest_exp_str(flt2dec::strategy::grisu::format_shortest, *num,
                                                  sign, (0, 0), upper, &mut buf, &mut parts);
     fmt.pad_formatted_parts(&formatted)

From b2c3102e625f94a084e357ceb976fc98b42b79bf Mon Sep 17 00:00:00 2001
From: Nathan Froyd <froydnj@gmail.com>
Date: Fri, 14 Apr 2017 16:25:49 -0400
Subject: [PATCH 4/4] fmt: use mem::uninitialized for float formatting buffers

Spending time to initialize these is just wasted work, as we'll
overwrite them soon anyway.

Fixes #41259.
---
 src/libcore/fmt/float.rs | 56 ++++++++++++++++++++++++----------------
 1 file changed, 34 insertions(+), 22 deletions(-)

diff --git a/src/libcore/fmt/float.rs b/src/libcore/fmt/float.rs
index 4252807fb9f..87def375b20 100644
--- a/src/libcore/fmt/float.rs
+++ b/src/libcore/fmt/float.rs
@@ -9,6 +9,7 @@
 // except according to those terms.
 
 use fmt::{Formatter, Result, LowerExp, UpperExp, Display, Debug};
+use mem;
 use num::flt2dec;
 
 // Don't inline this so callers don't use the stack space this function
@@ -18,12 +19,14 @@ fn float_to_decimal_common_exact<T>(fmt: &mut Formatter, num: &T,
                                     sign: flt2dec::Sign, precision: usize) -> Result
     where T: flt2dec::DecodableFloat
 {
-    let mut buf = [0; 1024]; // enough for f32 and f64
-    let mut parts = [flt2dec::Part::Zero(0); 5];
-    let formatted = flt2dec::to_exact_fixed_str(flt2dec::strategy::grisu::format_exact,
-                                                *num, sign, precision,
-                                                false, &mut buf, &mut parts);
-    fmt.pad_formatted_parts(&formatted)
+    unsafe {
+        let mut buf: [u8; 1024] = mem::uninitialized(); // enough for f32 and f64
+        let mut parts: [flt2dec::Part; 5] = mem::uninitialized();
+        let formatted = flt2dec::to_exact_fixed_str(flt2dec::strategy::grisu::format_exact,
+                                                    *num, sign, precision,
+                                                    false, &mut buf, &mut parts);
+        fmt.pad_formatted_parts(&formatted)
+    }
 }
 
 // Don't inline this so callers that call both this and the above won't wind
@@ -33,11 +36,14 @@ fn float_to_decimal_common_shortest<T>(fmt: &mut Formatter,
                                        num: &T, sign: flt2dec::Sign) -> Result
     where T: flt2dec::DecodableFloat
 {
-    let mut buf = [0; flt2dec::MAX_SIG_DIGITS]; // enough for f32 and f64
-    let mut parts = [flt2dec::Part::Zero(0); 5];
-    let formatted = flt2dec::to_shortest_str(flt2dec::strategy::grisu::format_shortest,
-                                             *num, sign, 0, false, &mut buf, &mut parts);
-    fmt.pad_formatted_parts(&formatted)
+    unsafe {
+        // enough for f32 and f64
+        let mut buf: [u8; flt2dec::MAX_SIG_DIGITS] = mem::uninitialized();
+        let mut parts: [flt2dec::Part; 5] = mem::uninitialized();
+        let formatted = flt2dec::to_shortest_str(flt2dec::strategy::grisu::format_shortest,
+                                                 *num, sign, 0, false, &mut buf, &mut parts);
+        fmt.pad_formatted_parts(&formatted)
+    }
 }
 
 // Common code of floating point Debug and Display.
@@ -67,12 +73,14 @@ fn float_to_exponential_common_exact<T>(fmt: &mut Formatter, num: &T,
                                         upper: bool) -> Result
     where T: flt2dec::DecodableFloat
 {
-    let mut buf = [0; 1024]; // enough for f32 and f64
-    let mut parts = [flt2dec::Part::Zero(0); 7];
-    let formatted = flt2dec::to_exact_exp_str(flt2dec::strategy::grisu::format_exact,
-                                              *num, sign, precision,
-                                              upper, &mut buf, &mut parts);
-    fmt.pad_formatted_parts(&formatted)
+    unsafe {
+        let mut buf: [u8; 1024] = mem::uninitialized(); // enough for f32 and f64
+        let mut parts: [flt2dec::Part; 7] = mem::uninitialized();
+        let formatted = flt2dec::to_exact_exp_str(flt2dec::strategy::grisu::format_exact,
+                                                  *num, sign, precision,
+                                                  upper, &mut buf, &mut parts);
+        fmt.pad_formatted_parts(&formatted)
+    }
 }
 
 // Don't inline this so callers that call both this and the above won't wind
@@ -83,11 +91,15 @@ fn float_to_exponential_common_shortest<T>(fmt: &mut Formatter,
                                            upper: bool) -> Result
     where T: flt2dec::DecodableFloat
 {
-    let mut buf = [0; flt2dec::MAX_SIG_DIGITS]; // enough for f32 and f64
-    let mut parts = [flt2dec::Part::Zero(0); 7];
-    let formatted = flt2dec::to_shortest_exp_str(flt2dec::strategy::grisu::format_shortest, *num,
-                                                 sign, (0, 0), upper, &mut buf, &mut parts);
-    fmt.pad_formatted_parts(&formatted)
+    unsafe {
+        // enough for f32 and f64
+        let mut buf: [u8; flt2dec::MAX_SIG_DIGITS] = mem::uninitialized();
+        let mut parts: [flt2dec::Part; 7] = mem::uninitialized();
+        let formatted = flt2dec::to_shortest_exp_str(flt2dec::strategy::grisu::format_shortest,
+                                                     *num, sign, (0, 0), upper,
+                                                     &mut buf, &mut parts);
+        fmt.pad_formatted_parts(&formatted)
+    }
 }
 
 // Common code of floating point LowerExp and UpperExp.