From 0ba1c101dce22fbe30933a90efd237a09227e07d Mon Sep 17 00:00:00 2001
From: Corey Farwell <coreyf@rwell.org>
Date: Wed, 9 May 2018 06:47:37 -0700
Subject: [PATCH] Clarify in the docs that `mul_add` is not always faster.

Fixes https://github.com/rust-lang/rust/issues/49842.

Other resources:

- https://users.rust-lang.org/t/why-does-the-mul-add-method-produce-a-more-accurate-result-with-better-performance/1626
- https://en.wikipedia.org/wiki/Multiply%E2%80%93accumulate_operation
---
 src/libstd/f32.rs | 6 ++++--
 src/libstd/f64.rs | 6 ++++--
 2 files changed, 8 insertions(+), 4 deletions(-)

diff --git a/src/libstd/f32.rs b/src/libstd/f32.rs
index 26644c76957..4f4baf1e8cd 100644
--- a/src/libstd/f32.rs
+++ b/src/libstd/f32.rs
@@ -195,8 +195,10 @@ impl f32 {
     }
 
     /// Fused multiply-add. Computes `(self * a) + b` with only one rounding
-    /// error. This produces a more accurate result with better performance than
-    /// a separate multiplication operation followed by an add.
+    /// error, yielding a more accurate result than an unfused multiply-add.
+    ///
+    /// Using `mul_add` can be more performant than an unfused multiply-add if
+    /// the target architecture has a dedicated `fma` CPU instruction.
     ///
     /// ```
     /// use std::f32;
diff --git a/src/libstd/f64.rs b/src/libstd/f64.rs
index a7e63f59b1c..e00ff60452d 100644
--- a/src/libstd/f64.rs
+++ b/src/libstd/f64.rs
@@ -173,8 +173,10 @@ impl f64 {
     }
 
     /// Fused multiply-add. Computes `(self * a) + b` with only one rounding
-    /// error. This produces a more accurate result with better performance than
-    /// a separate multiplication operation followed by an add.
+    /// error, yielding a more accurate result than an unfused multiply-add.
+    ///
+    /// Using `mul_add` can be more performant than an unfused multiply-add if
+    /// the target architecture has a dedicated `fma` CPU instruction.
     ///
     /// ```
     /// let m = 10.0_f64;