@@ -42,13 +42,27 @@ extension Complex: AlgebraicField {
4242 return z * w. conjugate. divided ( by: lenSq)
4343 }
4444
45- @_alwaysEmitIntoClient // specialization
46- @inline ( never)
47- public static func rescaledDivide( _ z: Complex , _ w: Complex ) -> Complex {
45+ @usableFromInline @_alwaysEmitIntoClient @inline ( never)
46+ internal static func rescaledDivide( _ z: Complex , _ w: Complex ) -> Complex {
4847 if w. isZero { return . infinity }
4948 if !w. isFinite { return . zero }
5049 // Scaling algorithm adapted from Doug Priest's "Efficient Scaling for
5150 // Complex Division":
51+ if w. magnitude < . leastNormalMagnitude {
52+ // A difference from Priest's algorithm is that he didn't have to worry
53+ // about types like Float16, where the significand width is comparable
54+ // to the exponent range, such that |leastNormalMagnitude|^(-¾) isn't
55+ // representable (e.g. for Float16 it would want to be 2¹⁸, but the
56+ // largest allowed exponent is 15). Note that it's critical to use zʹ/wʹ
57+ // after rescaling to avoid this, rather than falling through into the
58+ // normal rescaling, because otherwise we might end up back in the
59+ // situation where |w| ~ 1.
60+ let s = 1 / ( RealType ( RealType . radix) * . leastNormalMagnitude)
61+ let wʹ = w. multiplied ( by: s)
62+ let zʹ = z. multiplied ( by: s)
63+ return zʹ / wʹ
64+ }
65+ // Having handled that case, we proceed pretty similarly to Priest:
5266 //
5367 // 1. Choose real scale s ~ |w|^(-¾), an exact power of the radix.
5468 // 2. wʹ ← sw
@@ -80,28 +94,26 @@ extension Complex: AlgebraicField {
8094 // invariant compared to the mainline `/` implementation, up to the
8195 // underflow boundary.
8296 //
83- // Another difference from Priest's algorithm is that he didn't have
84- // to worry about types like Float16, where the significand width is
85- // comparable to the exponent range, such that |leastNormalMagnitude|^(-¾)
86- // isn't representable (e.g. for Float16 it would want to be 2¹⁸, while
87- // the largest allowed exponent is 15). Note that it's critical to
88- // use zʹ/wʹ after rescaling to avoid this, rather than falling through
89- // into the normal rescaling, because otherwise we might end up back in
90- // the situation where |w| ~ 1.
91- if w. magnitude < RealType . leastNormalMagnitude {
92- let wʹ = w. multiplied ( by: 1 / ( 2 * RealType. leastNormalMagnitude) )
93- let zʹ = z. multiplied ( by: 1 / ( 2 * RealType. leastNormalMagnitude) )
94- return zʹ/ wʹ
95- } else {
96- let s = RealType (
97- sign: . plus,
98- exponent: - 3 * w. magnitude. exponent/ 4 ,
99- significand: 1
100- )
101- let wʹ = w. multiplied ( by: s)
102- let zʹ = z. multiplied ( by: s)
103- return zʹ * wʹ. conjugate. divided ( by: wʹ. lengthSquared)
104- }
97+ // Note that our final assembly of the result is different from Priest;
98+ // he applies s to w twice, instead of once to w and once to z, and
99+ // does the product as (zw̅ʺ)*1/|wʹ|², while we do zʹ(w̅ʹ/|wʹ|²). We
100+ // prefer our version for three reasons:
101+ //
102+ // 1. it extracts a little more ILP
103+ // 2. it makes it so that we get exactly the same roundings on the
104+ // rescaled divide path as on the fast path, so that z/w = tz/tw
105+ // when tz and tw are computed exactly.
106+ // 3. it unlocks a future optimization where we hoist s and
107+ // (w̅ʹ/|wʹ|²) and make divisions all fast-path without perturbing
108+ // rouding.
109+ let s = RealType (
110+ sign: . plus,
111+ exponent: - 3 * w. magnitude. exponent/ 4 ,
112+ significand: 1
113+ )
114+ let wʹ = w. multiplied ( by: s)
115+ let zʹ = z. multiplied ( by: s)
116+ return zʹ * wʹ. conjugate. divided ( by: wʹ. lengthSquared)
105117 }
106118
107119 /// A normalized complex number with the same phase as this value.
0 commit comments