Skip to content

Commit

Permalink
Make a subset of libm symbols weakly available on all platforms
Browse files Browse the repository at this point in the history
018616e ("Always have math functions but with `weak` linking
attribute if we can") made all math symbols available on platforms that
support weak linkage. This caused some unexpected regressions, however,
because our less accurate and sometimes slow routines were being
selected over the system `libm`, which also tends to be weak [1]. Thus,
0fab77e ("Don't include `math` for `unix` and `wasi` targets") was
applied to undo these changes on many platforms.

Now that some improvements have been made to `libm`, add back a subset
of these functions:

* cbrt
* ceil
* copysign
* fabs
* fdim
* floor
* fma
* fmax
* fmaximum
* fmin
* fminimum
* fmod
* rint
* round
* roundeven
* sqrt
* trunc

This list includes only functions that produce exact results (verified
with exhaustive / extensive tests, and also required by IEEE in most
cases), and for which benchmarks indicate performance similar to or
better than Musl's soft float math routines [^1]. All except `cbrt` also
have `f16` and `f128` implementations. Once more routines meet these
criteria, we can move them from platform-specific availability to always
available.

Once this change makes it to rust-lang/rust, we will also be able to
move the relevant functions from `std` to `core`.

[^1]: We still rely on the backend to provide optimized assmebly
      routines when available.

[1]: rust-lang/rust#128386
  • Loading branch information
tgross35 committed Feb 25, 2025
1 parent 342ce46 commit 7b62812
Show file tree
Hide file tree
Showing 2 changed files with 173 additions and 115 deletions.
29 changes: 1 addition & 28 deletions src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -41,40 +41,13 @@ mod macros;

pub mod float;
pub mod int;

// Disable for any of the following:
// - x86 without sse2 due to ABI issues
// - <https://github.com/rust-lang/rust/issues/114479>
// - but exclude UEFI since it is a soft-float target
// - <https://github.com/rust-lang/rust/issues/128533>
// - All unix targets (linux, macos, freebsd, android, etc)
// - wasm with known target_os
#[cfg(not(any(
all(
target_arch = "x86",
not(target_feature = "sse2"),
not(target_os = "uefi"),
),
unix,
all(target_family = "wasm", not(target_os = "unknown"))
)))]
pub mod math;
pub mod mem;

// `libm` expects its `support` module to be available in the crate root. This config can be
// cleaned up once `libm` is made always available.
#[cfg(not(any(
all(
target_arch = "x86",
not(target_feature = "sse2"),
not(target_os = "uefi"),
),
unix,
all(target_family = "wasm", not(target_os = "unknown"))
)))]
use math::libm::support;

pub mod mem;

#[cfg(target_arch = "arm")]
pub mod arm;

Expand Down
259 changes: 172 additions & 87 deletions src/math.rs
Original file line number Diff line number Diff line change
Expand Up @@ -5,110 +5,195 @@
#[path = "../libm/src/math/mod.rs"]
pub(crate) mod libm;

#[allow(unused_macros)]
macro_rules! no_mangle {
macro_rules! libm_intrinsics {
($(fn $fun:ident($($iid:ident : $ity:ty),+) -> $oty:ty;)+) => {
intrinsics! {
$(
pub extern "C" fn $fun($($iid: $ity),+) -> $oty {
self::libm::$fun($($iid),+)
$crate::math::libm::$fun($($iid),+)
}
)+
}
}
}

#[cfg(not(windows))]
no_mangle! {
fn acos(x: f64) -> f64;
fn asin(x: f64) -> f64;
fn cbrt(x: f64) -> f64;
fn expm1(x: f64) -> f64;
fn hypot(x: f64, y: f64) -> f64;
fn tan(x: f64) -> f64;
fn cos(x: f64) -> f64;
fn expf(x: f32) -> f32;
fn log2(x: f64) -> f64;
fn log2f(x: f32) -> f32;
fn log10(x: f64) -> f64;
fn log10f(x: f32) -> f32;
fn log(x: f64) -> f64;
fn logf(x: f32) -> f32;
fn round(x: f64) -> f64;
fn roundf(x: f32) -> f32;
fn rint(x: f64) -> f64;
fn rintf(x: f32) -> f32;
fn sin(x: f64) -> f64;
fn pow(x: f64, y: f64) -> f64;
fn powf(x: f32, y: f32) -> f32;
fn acosf(n: f32) -> f32;
fn atan2f(a: f32, b: f32) -> f32;
fn atanf(n: f32) -> f32;
fn coshf(n: f32) -> f32;
fn expm1f(n: f32) -> f32;
fn fdim(a: f64, b: f64) -> f64;
fn fdimf(a: f32, b: f32) -> f32;
fn log1pf(n: f32) -> f32;
fn sinhf(n: f32) -> f32;
fn tanhf(n: f32) -> f32;
fn ldexp(f: f64, n: i32) -> f64;
fn ldexpf(f: f32, n: i32) -> f32;
fn tgamma(x: f64) -> f64;
fn tgammaf(x: f32) -> f32;
fn atan(x: f64) -> f64;
fn atan2(x: f64, y: f64) -> f64;
fn cosh(x: f64) -> f64;
fn log1p(x: f64) -> f64;
fn sinh(x: f64) -> f64;
fn tanh(x: f64) -> f64;
fn cosf(x: f32) -> f32;
fn exp(x: f64) -> f64;
fn sinf(x: f32) -> f32;
fn exp2(x: f64) -> f64;
fn exp2f(x: f32) -> f32;
fn fma(x: f64, y: f64, z: f64) -> f64;
fn fmaf(x: f32, y: f32, z: f32) -> f32;
fn asinf(n: f32) -> f32;
fn cbrtf(n: f32) -> f32;
fn hypotf(x: f32, y: f32) -> f32;
fn tanf(n: f32) -> f32;
/// This set of functions is well tested in `libm` and known to provide similar performance to
/// system `libm`, as well as the same or better accuracy.
pub mod full_availability {
#[cfg(f16_enabled)]
libm_intrinsics! {
fn ceilf16(x: f16) -> f16;
fn copysignf16(x: f16, y: f16) -> f16;
fn fabsf16(x: f16) -> f16;
fn fdimf16(x: f16, y: f16) -> f16;
fn floorf16(x: f16) -> f16;
fn fmaxf16(x: f16, y: f16) -> f16;
fn fmaximumf16(x: f16, y: f16) -> f16;
fn fminf16(x: f16, y: f16) -> f16;
fn fminimumf16(x: f16, y: f16) -> f16;
fn fmodf16(x: f16, y: f16) -> f16;
fn rintf16(x: f16) -> f16;
fn roundevenf16(x: f16) -> f16;
fn roundf16(x: f16) -> f16;
fn sqrtf16(x: f16) -> f16;
fn truncf16(x: f16) -> f16;
}

/* Weak linkage is unreliable on Windows and Apple, so we don't expose symbols that we know
* the system libc provides in order to avoid conflicts. */

fn sqrtf(x: f32) -> f32;
fn sqrt(x: f64) -> f64;
#[cfg(all(not(windows), not(target_vendor = "apple")))]
libm_intrinsics! {
/* f32 */
fn cbrtf(n: f32) -> f32;
fn ceilf(x: f32) -> f32;
fn copysignf(x: f32, y: f32) -> f32;
fn fabsf(x: f32) -> f32;
fn fdimf(a: f32, b: f32) -> f32;
fn floorf(x: f32) -> f32;
fn fmaf(x: f32, y: f32, z: f32) -> f32;
fn fmaxf(x: f32, y: f32) -> f32;
fn fminf(x: f32, y: f32) -> f32;
fn fmodf(x: f32, y: f32) -> f32;
fn rintf(x: f32) -> f32;
fn roundf(x: f32) -> f32;
fn sqrtf(x: f32) -> f32;
fn truncf(x: f32) -> f32;

fn ceil(x: f64) -> f64;
fn ceilf(x: f32) -> f32;
fn floor(x: f64) -> f64;
fn floorf(x: f32) -> f32;
fn trunc(x: f64) -> f64;
fn truncf(x: f32) -> f32;
/* f64 */
fn cbrt(x: f64) -> f64;
fn ceil(x: f64) -> f64;
fn copysign(x: f64, y: f64) -> f64;
fn fabs(x: f64) -> f64;
fn fdim(a: f64, b: f64) -> f64;
fn floor(x: f64) -> f64;
fn fma(x: f64, y: f64, z: f64) -> f64;
fn fmax(x: f64, y: f64) -> f64;
fn fmin(x: f64, y: f64) -> f64;
fn fmod(x: f64, y: f64) -> f64;
fn rint(x: f64) -> f64;
fn round(x: f64) -> f64;
fn sqrt(x: f64) -> f64;
fn trunc(x: f64) -> f64;
}

fn fmin(x: f64, y: f64) -> f64;
fn fminf(x: f32, y: f32) -> f32;
fn fmax(x: f64, y: f64) -> f64;
fn fmaxf(x: f32, y: f32) -> f32;
// `f64 % f64`
fn fmod(x: f64, y: f64) -> f64;
// `f32 % f32`
fn fmodf(x: f32, y: f32) -> f32;
// Windows and MacOS do not yet expose roundeven and IEEE 754-2019 `maximum` / `minimum`,
// however, so we still provide a fallback.
libm_intrinsics! {
fn fmaximum(x: f64, y: f64) -> f64;
fn fmaximumf(x: f32, y: f32) -> f32;
fn fminimum(x: f64, y: f64) -> f64;
fn fminimumf(x: f32, y: f32) -> f32;
fn roundeven(x: f64) -> f64;
fn roundevenf(x: f32) -> f32;
}

fn erf(x: f64) -> f64;
fn erff(x: f32) -> f32;
fn erfc(x: f64) -> f64;
fn erfcf(x: f32) -> f32;
#[cfg(f128_enabled)]
libm_intrinsics! {
fn ceilf128(x: f128) -> f128;
fn copysignf128(x: f128, y: f128) -> f128;
fn fabsf128(x: f128) -> f128;
fn fdimf128(x: f128, y: f128) -> f128;
fn floorf128(x: f128) -> f128;
fn fmaf128(x: f128, y: f128, z: f128) -> f128;
fn fmaxf128(x: f128, y: f128) -> f128;
fn fmaximumf128(x: f128, y: f128) -> f128;
fn fminf128(x: f128, y: f128) -> f128;
fn fminimumf128(x: f128, y: f128) -> f128;
fn fmodf128(x: f128, y: f128) -> f128;
fn rintf128(x: f128) -> f128;
fn roundevenf128(x: f128) -> f128;
fn roundf128(x: f128) -> f128;
fn sqrtf128(x: f128) -> f128;
fn truncf128(x: f128) -> f128;
}
}

// allow for windows (and other targets)
intrinsics! {
pub extern "C" fn lgamma_r(x: f64, s: &mut i32) -> f64 {
let r = self::libm::lgamma_r(x);
*s = r.1;
r.0
/// This group of functions has more performance or precision issues than system versions, or
/// are otherwise less well tested. Provide them only on platforms that have problems with the
/// system `libm`.
///
/// As `libm` improves, more functions will be moved from this group to the first group.
///
/// Do not supply for any of the following:
/// - x86 without sse2 due to ABI issues
/// - <https://github.com/rust-lang/rust/issues/114479>
/// - but exclude UEFI since it is a soft-float target
/// - <https://github.com/rust-lang/rust/issues/128533>
/// - All unix targets (linux, macos, freebsd, android, etc)
/// - wasm with known target_os
#[cfg(not(any(
all(
target_arch = "x86",
not(target_feature = "sse2"),
not(target_os = "uefi"),
),
unix,
all(target_family = "wasm", not(target_os = "unknown"))
)))]
pub mod partial_availability {
#[cfg(not(windows))]
libm_intrinsics! {
fn acos(x: f64) -> f64;
fn acosf(n: f32) -> f32;
fn asin(x: f64) -> f64;
fn asinf(n: f32) -> f32;
fn atan(x: f64) -> f64;
fn atan2(x: f64, y: f64) -> f64;
fn atan2f(a: f32, b: f32) -> f32;
fn atanf(n: f32) -> f32;
fn cos(x: f64) -> f64;
fn cosf(x: f32) -> f32;
fn cosh(x: f64) -> f64;
fn coshf(n: f32) -> f32;
fn erf(x: f64) -> f64;
fn erfc(x: f64) -> f64;
fn erfcf(x: f32) -> f32;
fn erff(x: f32) -> f32;
fn exp(x: f64) -> f64;
fn exp2(x: f64) -> f64;
fn exp2f(x: f32) -> f32;
fn expf(x: f32) -> f32;
fn expm1(x: f64) -> f64;
fn expm1f(n: f32) -> f32;
fn hypot(x: f64, y: f64) -> f64;
fn hypotf(x: f32, y: f32) -> f32;
fn ldexp(f: f64, n: i32) -> f64;
fn ldexpf(f: f32, n: i32) -> f32;
fn log(x: f64) -> f64;
fn log10(x: f64) -> f64;
fn log10f(x: f32) -> f32;
fn log1p(x: f64) -> f64;
fn log1pf(n: f32) -> f32;
fn log2(x: f64) -> f64;
fn log2f(x: f32) -> f32;
fn logf(x: f32) -> f32;
fn pow(x: f64, y: f64) -> f64;
fn powf(x: f32, y: f32) -> f32;
fn sin(x: f64) -> f64;
fn sinf(x: f32) -> f32;
fn sinh(x: f64) -> f64;
fn sinhf(n: f32) -> f32;
fn tan(x: f64) -> f64;
fn tanf(n: f32) -> f32;
fn tanh(x: f64) -> f64;
fn tanhf(n: f32) -> f32;
fn tgamma(x: f64) -> f64;
fn tgammaf(x: f32) -> f32;
}

pub extern "C" fn lgammaf_r(x: f32, s: &mut i32) -> f32 {
let r = self::libm::lgammaf_r(x);
*s = r.1;
r.0
// allow for windows (and other targets)
intrinsics! {
pub extern "C" fn lgamma_r(x: f64, s: &mut i32) -> f64 {
let r = super::libm::lgamma_r(x);
*s = r.1;
r.0
}

pub extern "C" fn lgammaf_r(x: f32, s: &mut i32) -> f32 {
let r = super::libm::lgammaf_r(x);
*s = r.1;
r.0
}
}
}

0 comments on commit 7b62812

Please sign in to comment.