| // Adapted from https://github.com/Alexhuszagh/rust-lexical. |
| |
| //! Defines rounding schemes for floating-point numbers. |
| |
| use super::float::ExtendedFloat; |
| use super::num::*; |
| use super::shift::*; |
| use core::mem; |
| |
| // MASKS |
| |
| /// Calculate a scalar factor of 2 above the halfway point. |
| #[inline] |
| pub(crate) fn nth_bit(n: u64) -> u64 { |
| let bits: u64 = mem::size_of::<u64>() as u64 * 8; |
| debug_assert!(n < bits, "nth_bit() overflow in shl."); |
| |
| 1 << n |
| } |
| |
| /// Generate a bitwise mask for the lower `n` bits. |
| #[inline] |
| pub(crate) fn lower_n_mask(n: u64) -> u64 { |
| let bits: u64 = mem::size_of::<u64>() as u64 * 8; |
| debug_assert!(n <= bits, "lower_n_mask() overflow in shl."); |
| |
| if n == bits { |
| u64::max_value() |
| } else { |
| (1 << n) - 1 |
| } |
| } |
| |
| /// Calculate the halfway point for the lower `n` bits. |
| #[inline] |
| pub(crate) fn lower_n_halfway(n: u64) -> u64 { |
| let bits: u64 = mem::size_of::<u64>() as u64 * 8; |
| debug_assert!(n <= bits, "lower_n_halfway() overflow in shl."); |
| |
| if n == 0 { |
| 0 |
| } else { |
| nth_bit(n - 1) |
| } |
| } |
| |
| /// Calculate a bitwise mask with `n` 1 bits starting at the `bit` position. |
| #[inline] |
| pub(crate) fn internal_n_mask(bit: u64, n: u64) -> u64 { |
| let bits: u64 = mem::size_of::<u64>() as u64 * 8; |
| debug_assert!(bit <= bits, "internal_n_halfway() overflow in shl."); |
| debug_assert!(n <= bits, "internal_n_halfway() overflow in shl."); |
| debug_assert!(bit >= n, "internal_n_halfway() overflow in sub."); |
| |
| lower_n_mask(bit) ^ lower_n_mask(bit - n) |
| } |
| |
| // NEAREST ROUNDING |
| |
| // Shift right N-bytes and round to the nearest. |
| // |
| // Return if we are above halfway and if we are halfway. |
| #[inline] |
| pub(crate) fn round_nearest(fp: &mut ExtendedFloat, shift: i32) -> (bool, bool) { |
| // Extract the truncated bits using mask. |
| // Calculate if the value of the truncated bits are either above |
| // the mid-way point, or equal to it. |
| // |
| // For example, for 4 truncated bytes, the mask would be b1111 |
| // and the midway point would be b1000. |
| let mask: u64 = lower_n_mask(shift as u64); |
| let halfway: u64 = lower_n_halfway(shift as u64); |
| |
| let truncated_bits = fp.mant & mask; |
| let is_above = truncated_bits > halfway; |
| let is_halfway = truncated_bits == halfway; |
| |
| // Bit shift so the leading bit is in the hidden bit. |
| overflowing_shr(fp, shift); |
| |
| (is_above, is_halfway) |
| } |
| |
| // Tie rounded floating point to event. |
| #[inline] |
| pub(crate) fn tie_even(fp: &mut ExtendedFloat, is_above: bool, is_halfway: bool) { |
| // Extract the last bit after shifting (and determine if it is odd). |
| let is_odd = fp.mant & 1 == 1; |
| |
| // Calculate if we need to roundup. |
| // We need to roundup if we are above halfway, or if we are odd |
| // and at half-way (need to tie-to-even). |
| if is_above || (is_odd && is_halfway) { |
| fp.mant += 1; |
| } |
| } |
| |
| // Shift right N-bytes and round nearest, tie-to-even. |
| // |
| // Floating-point arithmetic uses round to nearest, ties to even, |
| // which rounds to the nearest value, if the value is halfway in between, |
| // round to an even value. |
| #[inline] |
| pub(crate) fn round_nearest_tie_even(fp: &mut ExtendedFloat, shift: i32) { |
| let (is_above, is_halfway) = round_nearest(fp, shift); |
| tie_even(fp, is_above, is_halfway); |
| } |
| |
| // DIRECTED ROUNDING |
| |
| // Shift right N-bytes and round towards a direction. |
| // |
| // Return if we have any truncated bytes. |
| #[inline] |
| fn round_toward(fp: &mut ExtendedFloat, shift: i32) -> bool { |
| let mask: u64 = lower_n_mask(shift as u64); |
| let truncated_bits = fp.mant & mask; |
| |
| // Bit shift so the leading bit is in the hidden bit. |
| overflowing_shr(fp, shift); |
| |
| truncated_bits != 0 |
| } |
| |
| // Round down. |
| #[inline] |
| fn downard(_: &mut ExtendedFloat, _: bool) {} |
| |
| // Shift right N-bytes and round toward zero. |
| // |
| // Floating-point arithmetic defines round toward zero, which rounds |
| // towards positive zero. |
| #[inline] |
| pub(crate) fn round_downward(fp: &mut ExtendedFloat, shift: i32) { |
| // Bit shift so the leading bit is in the hidden bit. |
| // No rounding schemes, so we just ignore everything else. |
| let is_truncated = round_toward(fp, shift); |
| downard(fp, is_truncated); |
| } |
| |
| // ROUND TO FLOAT |
| |
| // Shift the ExtendedFloat fraction to the fraction bits in a native float. |
| // |
| // Floating-point arithmetic uses round to nearest, ties to even, |
| // which rounds to the nearest value, if the value is halfway in between, |
| // round to an even value. |
| #[inline] |
| pub(crate) fn round_to_float<F, Algorithm>(fp: &mut ExtendedFloat, algorithm: Algorithm) |
| where |
| F: Float, |
| Algorithm: FnOnce(&mut ExtendedFloat, i32), |
| { |
| // Calculate the difference to allow a single calculation |
| // rather than a loop, to minimize the number of ops required. |
| // This does underflow detection. |
| let final_exp = fp.exp + F::DEFAULT_SHIFT; |
| if final_exp < F::DENORMAL_EXPONENT { |
| // We would end up with a denormal exponent, try to round to more |
| // digits. Only shift right if we can avoid zeroing out the value, |
| // which requires the exponent diff to be < M::BITS. The value |
| // is already normalized, so we shouldn't have any issue zeroing |
| // out the value. |
| let diff = F::DENORMAL_EXPONENT - fp.exp; |
| if diff <= u64::FULL { |
| // We can avoid underflow, can get a valid representation. |
| algorithm(fp, diff); |
| } else { |
| // Certain underflow, assign literal 0s. |
| fp.mant = 0; |
| fp.exp = 0; |
| } |
| } else { |
| algorithm(fp, F::DEFAULT_SHIFT); |
| } |
| |
| if fp.mant & F::CARRY_MASK == F::CARRY_MASK { |
| // Roundup carried over to 1 past the hidden bit. |
| shr(fp, 1); |
| } |
| } |
| |
| // AVOID OVERFLOW/UNDERFLOW |
| |
| // Avoid overflow for large values, shift left as needed. |
| // |
| // Shift until a 1-bit is in the hidden bit, if the mantissa is not 0. |
| #[inline] |
| pub(crate) fn avoid_overflow<F>(fp: &mut ExtendedFloat) |
| where |
| F: Float, |
| { |
| // Calculate the difference to allow a single calculation |
| // rather than a loop, minimizing the number of ops required. |
| if fp.exp >= F::MAX_EXPONENT { |
| let diff = fp.exp - F::MAX_EXPONENT; |
| if diff <= F::MANTISSA_SIZE { |
| // Our overflow mask needs to start at the hidden bit, or at |
| // `F::MANTISSA_SIZE+1`, and needs to have `diff+1` bits set, |
| // to see if our value overflows. |
| let bit = (F::MANTISSA_SIZE + 1) as u64; |
| let n = (diff + 1) as u64; |
| let mask = internal_n_mask(bit, n); |
| if (fp.mant & mask) == 0 { |
| // If we have no 1-bit in the hidden-bit position, |
| // which is index 0, we need to shift 1. |
| let shift = diff + 1; |
| shl(fp, shift); |
| } |
| } |
| } |
| } |
| |
| // ROUND TO NATIVE |
| |
| // Round an extended-precision float to a native float representation. |
| #[inline] |
| pub(crate) fn round_to_native<F, Algorithm>(fp: &mut ExtendedFloat, algorithm: Algorithm) |
| where |
| F: Float, |
| Algorithm: FnOnce(&mut ExtendedFloat, i32), |
| { |
| // Shift all the way left, to ensure a consistent representation. |
| // The following right-shifts do not work for a non-normalized number. |
| fp.normalize(); |
| |
| // Round so the fraction is in a native mantissa representation, |
| // and avoid overflow/underflow. |
| round_to_float::<F, _>(fp, algorithm); |
| avoid_overflow::<F>(fp); |
| } |