diff --git a/AK/FloatingPoint.h b/AK/FloatingPoint.h index 86bb813c97e5..401d70185a71 100644 --- a/AK/FloatingPoint.h +++ b/AK/FloatingPoint.h @@ -6,8 +6,6 @@ #pragma once -#include -#include #include namespace AK { @@ -125,193 +123,8 @@ union FloatExtractor { }; static_assert(AssertSize, sizeof(f32)>()); -template -requires(S <= 1 && E >= 1 && M >= 1 && (S + E + M) <= 64) class FloatingPointBits final { -public: - static size_t const signbit = S; - static size_t const exponentbits = E; - static size_t const mantissabits = M; - - template - requires(IsIntegral && IsUnsigned && sizeof(T) <= 8) constexpr FloatingPointBits(T bits) - : m_bits(bits) - { - } - - constexpr FloatingPointBits(double value) - : m_bits(bit_cast(value)) - { - } - - constexpr FloatingPointBits(float value) - : m_bits(bit_cast(value)) - { - } - - double as_double() const - requires(S == 1 && E == 11 && M == 52) - { - return bit_cast(m_bits); - } - float as_float() const - requires(S == 1 && E == 8 && M == 23) - { - return bit_cast(static_cast(m_bits)); - } - u64 bits() const { return m_bits; } - -private: - u64 m_bits; -}; - -typedef FloatingPointBits<1, 8, 23> SingleFloatingPointBits; -typedef FloatingPointBits<1, 11, 52> DoubleFloatingPointBits; - -/** - * Convert between two IEEE 754 floating point types in any arrangement of sign, exponent and mantissa bits. - */ -template -constexpr To float_to_float(From const input) -{ - constexpr u64 from_exponent_nonnumber = (1ull << From::exponentbits) - 1; - constexpr u64 from_exponent_bias = (1ull << (From::exponentbits - 1)) - 1; - constexpr u64 to_exponent_nonnumber = (1ull << To::exponentbits) - 1; - constexpr u64 to_exponent_bias = (1ull << (To::exponentbits - 1)) - 1; - constexpr u64 to_exponent_max = (1ull << To::exponentbits) - 2; - - // Deconstruct input bits to float components - u64 from_sign = (input.bits() >> (From::exponentbits + From::mantissabits)) & From::signbit; - u64 from_exponent = (input.bits() >> From::mantissabits) & ((1ull << From::exponentbits) - 1); - u64 from_mantissa = input.bits() & ((1ull << From::mantissabits) - 1); - - u64 to_sign = from_sign & To::signbit; - u64 to_exponent; - u64 to_mantissa; - auto target_value = [&to_sign, &to_exponent, &to_mantissa]() { - return To((to_sign << (To::exponentbits + To::mantissabits)) | (to_exponent << To::mantissabits) | to_mantissa); - }; - - auto shift_mantissa = [](u64 mantissa) -> u64 { - if constexpr (From::mantissabits < To::mantissabits) - return mantissa << (To::mantissabits - From::mantissabits); - else - return mantissa >> (From::mantissabits - To::mantissabits); - }; - - // If target is unsigned and source is negative, clamp to 0 or keep NaN - if constexpr (To::signbit == 0) { - if (from_sign == 1) { - if (from_exponent == from_exponent_nonnumber && from_mantissa > 0) { - to_exponent = to_exponent_nonnumber; - to_mantissa = 1; - } else { - to_exponent = 0; - to_mantissa = 0; - } - return target_value(); - } - } - - // If the source floating point is denormalized; - if (from_exponent == 0) { - // If the source mantissa is 0, the value is +/-0 - if (from_mantissa == 0) { - to_exponent = 0; - to_mantissa = 0; - return target_value(); - } - - // If the source has more exponent bits than the target, then the largest possible - // source mantissa still cannot be represented in the target denormalized value. - if constexpr (From::exponentbits > To::exponentbits) { - to_exponent = 0; - to_mantissa = 0; - return target_value(); - } - - // If the source and target have the same number of exponent bits, we only need to - // shift the mantissa. - if constexpr (From::exponentbits == To::exponentbits) { - to_exponent = 0; - to_mantissa = shift_mantissa(from_mantissa); - return target_value(); - } - - // The target has more exponent bits, so our denormalized value can be represented - // as a normalized value in the target floating point. Normalized values have an - // implicit leading 1, so we shift the mantissa left until we find our explicit - // leading 1 which is then dropped. - int adjust_exponent = -1; - to_mantissa = from_mantissa; - do { - ++adjust_exponent; - to_mantissa <<= 1; - } while ((to_mantissa & (1ull << From::mantissabits)) == 0); - to_exponent = to_exponent_bias - from_exponent_bias - adjust_exponent; - - // Drop the most significant bit from the mantissa - to_mantissa &= (1ull << From::mantissabits) - 1; - to_mantissa = shift_mantissa(to_mantissa); - return target_value(); - } - - // If the source is NaN or +/-Inf, keep it that way - if (from_exponent == from_exponent_nonnumber) { - to_exponent = to_exponent_nonnumber; - to_mantissa = (from_mantissa == 0) ? 0 : 1; - return target_value(); - } - - // Determine the target exponent - to_exponent = to_exponent_bias - from_exponent_bias + from_exponent; - - // If the calculated exponent exceeds the target's capacity, clamp both the exponent and the - // mantissa to their maximum values. - if (to_exponent > to_exponent_max) { - to_exponent = to_exponent_max; - to_mantissa = (1ull << To::mantissabits) - 1; - return target_value(); - } - - // If the new exponent is less than 1, we can only represent this value as a denormalized number - if (to_exponent < 1) { - to_exponent = 0; - - // Add a leading 1 and shift the mantissa right - int adjust_exponent = 1 - to_exponent_bias - from_exponent + from_exponent_bias; - to_mantissa = ((1ull << From::mantissabits) | from_mantissa) >> adjust_exponent; - to_mantissa = shift_mantissa(to_mantissa); - return target_value(); - } - - // New exponent fits; shift the mantissa to fit as well - to_mantissa = shift_mantissa(from_mantissa); - return target_value(); -} - -template -constexpr O convert_from_native_double(double input) { return float_to_float(DoubleFloatingPointBits(input)); } - -template -constexpr O convert_from_native_float(float input) { return float_to_float(SingleFloatingPointBits(input)); } - -template -constexpr double convert_to_native_double(I input) { return float_to_float(input).as_double(); } - -template -constexpr float convert_to_native_float(I input) { return float_to_float(input).as_float(); } - } #if USING_AK_GLOBALLY -using AK::DoubleFloatingPointBits; using AK::FloatExtractor; -using AK::FloatingPointBits; -using AK::SingleFloatingPointBits; - -using AK::convert_from_native_double; -using AK::convert_from_native_float; -using AK::convert_to_native_double; -using AK::convert_to_native_float; -using AK::float_to_float; #endif diff --git a/Meta/gn/secondary/Tests/AK/BUILD.gn b/Meta/gn/secondary/Tests/AK/BUILD.gn index e6836512c772..0bdb132a4dfb 100644 --- a/Meta/gn/secondary/Tests/AK/BUILD.gn +++ b/Meta/gn/secondary/Tests/AK/BUILD.gn @@ -28,7 +28,6 @@ tests = [ "TestFind", "TestFixedArray", "TestFixedPoint", - "TestFloatingPoint", "TestFloatingPointParsing", "TestFlyString", "TestFormat", diff --git a/Tests/AK/CMakeLists.txt b/Tests/AK/CMakeLists.txt index 7fbf86927bd3..0a99b0432d9c 100644 --- a/Tests/AK/CMakeLists.txt +++ b/Tests/AK/CMakeLists.txt @@ -26,7 +26,6 @@ set(AK_TEST_SOURCES TestFind.cpp TestFixedArray.cpp TestFixedPoint.cpp - TestFloatingPoint.cpp TestFloatingPointParsing.cpp TestFlyString.cpp TestFormat.cpp diff --git a/Tests/AK/TestFloatingPoint.cpp b/Tests/AK/TestFloatingPoint.cpp deleted file mode 100644 index da6a2fe4ccec..000000000000 --- a/Tests/AK/TestFloatingPoint.cpp +++ /dev/null @@ -1,85 +0,0 @@ -/* - * Copyright (c) 2022, Jelle Raaijmakers - * - * SPDX-License-Identifier: BSD-2-Clause - */ - -#include -#include -#include - -TEST_CASE(f16_1_5_10_to_native_float) -{ - auto within_approximate = [](u16 lhs, float rhs) -> bool { - auto f32_lhs = convert_to_native_float(FloatingPointBits<1, 5, 10>(lhs)); - return fabsf(f32_lhs - rhs) <= 0.00001f; - }; - - EXPECT(within_approximate(0x0000, 0.f)); - EXPECT(within_approximate(0x03FF, 0.000061f)); - EXPECT(within_approximate(0x3CEF, 1.23339f)); - EXPECT(within_approximate(0xBC00, -1.f)); - EXPECT(within_approximate(0xA266, -0.0125f)); - - float result; - result = convert_to_native_float(FloatingPointBits<1, 5, 10>(0xFC01u)); - EXPECT(isnan(result)); - - result = convert_to_native_float(FloatingPointBits<1, 5, 10>(0x7C00u)); - EXPECT(isinf(result)); -} - -TEST_CASE(float_to_double_roundtrips) -{ - auto roundtrip = [](float floatvalue1) { - auto doublevalue = convert_from_native_float(floatvalue1).as_double(); - auto floatbits = convert_from_native_double(doublevalue); - auto floatvalue2 = convert_to_native_float(floatbits); - - EXPECT_APPROXIMATE(floatvalue1, floatvalue2); - }; - - roundtrip(-1.0f); - roundtrip(-0.1f); - roundtrip(0.0f); - roundtrip(0.000001f); - roundtrip(0.1f); - roundtrip(1.0f); - roundtrip(3.141592f); - roundtrip(16777216.0f); - roundtrip(33554432.0f); - - roundtrip(1 / 0.0f); - roundtrip(1 / -0.0f); - roundtrip(0 / 0.0f); -} - -TEST_CASE(normalize_denormalize) -{ - // Go from denormalized float to normalized double - auto denormalized_float = 6.709679e-39f; - auto denormalized_float_bits = SingleFloatingPointBits(denormalized_float); - auto normalized_double = convert_to_native_double(denormalized_float_bits); - EXPECT_APPROXIMATE(denormalized_float, normalized_double); - - // Go back from normalized double to denormalized float - auto normalized_double_bits = DoubleFloatingPointBits(normalized_double); - auto reconstructed_denormalized_float = convert_to_native_float(normalized_double_bits); - EXPECT_APPROXIMATE(denormalized_float, reconstructed_denormalized_float); -} - -TEST_CASE(large_exponent) -{ - // Make sure we support at least 62 bits of exponent - auto large_exponent_float = convert_from_native_double>(1.0); - auto converted_double = convert_to_native_double(large_exponent_float); - EXPECT_APPROXIMATE(converted_double, 1.0); -} - -TEST_CASE(large_mantissa) -{ - // Make sure we support at least 62 bits of mantissa - auto large_exponent_float = convert_from_native_double>(1.0); - auto converted_double = convert_to_native_double(large_exponent_float); - EXPECT_APPROXIMATE(converted_double, 1.0); -}