Skip to content

Commit

Permalink
Add Julia routines for setting/getting "subnormals are zero" mode.
Browse files Browse the repository at this point in the history
This mode sets the FZ/DAZ features on x86 processors that support them.
See issue JuliaLang#12132 for discussion.
  • Loading branch information
Arch D. Robison committed Jul 16, 2015
1 parent 0eae746 commit 9c4b94a
Show file tree
Hide file tree
Showing 4 changed files with 63 additions and 27 deletions.
2 changes: 2 additions & 0 deletions base/exports.jl
Original file line number Diff line number Diff line change
Expand Up @@ -915,6 +915,8 @@ export
get_rounding,
set_rounding,
with_rounding,
get_zero_subnormals,
set_zero_subnormals,

# statistics
cor,
Expand Down
6 changes: 5 additions & 1 deletion base/rounding.jl
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,8 @@ include("fenv_constants.jl")
export
RoundingMode, RoundNearest, RoundToZero, RoundUp, RoundDown, RoundFromZero,
RoundNearestTiesAway, RoundNearestTiesUp,
get_rounding, set_rounding, with_rounding
get_rounding, set_rounding, with_rounding,
get_zero_subnormals, set_zero_subnormals

## rounding modes ##
immutable RoundingMode{T} end
Expand Down Expand Up @@ -85,4 +86,7 @@ function _convert_rounding{T<:FloatingPoint}(::Type{T},x::Real,r::RoundingMode{:
end
end

set_zero_subnormals(yes::Bool) = ccall(:jl_set_zero_subnormals,Int32,(Int8,),yes)==0
get_zero_subnormals() = ccall(:jl_get_zero_subnormals,Int32,())!=0

end #module
76 changes: 52 additions & 24 deletions src/sys.c
Original file line number Diff line number Diff line change
Expand Up @@ -430,39 +430,67 @@ DLLEXPORT void jl_cpuid(int32_t CPUInfo[4], int32_t InfoType)
// -- set/clear the FZ/DAZ flags on x86 & x86-64 --
#ifdef __SSE__

DLLEXPORT uint8_t jl_zero_subnormals(uint8_t isZero)
{
uint32_t flags = 0x00000000;
int32_t info[4];

jl_cpuid(info, 0);
if (info[0] >= 1) {
jl_cpuid(info, 0x00000001);
if ((info[3] & ((int)1 << 26)) != 0) {
// SSE2 supports both FZ and DAZ
flags = 0x00008040;
}
else if ((info[3] & ((int)1 << 25)) != 0) {
// SSE supports only the FZ flag
flags = 0x00008000;
// Cache of information recovered from jl_cpuid.
// In a multithreaded environment, there will be races on subnormal_flags,
// but they are harmless idempotent races. If we ever embrace C11, then
// subnormal_flags should be declared atomic.
static volatile int32_t subnormal_flags = 1;

static int32_t get_subnormal_flags() {
uint32_t f = subnormal_flags;
if (f & 1) {
// CPU capabilities not yet inspected.
f = 0;
int32_t info[4];
jl_cpuid(info, 0);
if (info[0] >= 1) {
jl_cpuid(info, 0x00000001);
if (info[3] & (1 << 26)) {
// SSE2 supports both FZ and DAZ
f = 0x00008040;
} else if (info[3] & (1 << 25)) {
// SSE supports only the FZ flag
f = 0x00008000;
}
}
subnormal_flags = f;
}
return f;
}

// Returns non-zero if subnormals go to 0; zero otherwise.
DLLEXPORT uint32_t jl_get_zero_subnormals(int8_t isZero)
{
uint32_t flags = get_subnormal_flags();
return _mm_getcsr() & flags;
}

// Return zero on success, non-zero on failure.
DLLEXPORT int32_t jl_set_zero_subnormals(int8_t isZero)
{
uint32_t flags = get_subnormal_flags();
if (flags) {
if (isZero) {
_mm_setcsr(_mm_getcsr() | flags);
}
else {
_mm_setcsr(_mm_getcsr() & ~flags);
}
return 1;
uint32_t state = _mm_getcsr();
if (isZero)
state |= flags;
else
state &= ~flags;
_mm_setcsr(state);
return 0;
} else {
// Report a failure only if user is trying to enable FTZ/DAZ.
return isZero;
}
return 0;
}

#else

DLLEXPORT uint8_t jl_zero_subnormals(uint8_t isZero)
DLLEXPORT int32_t jl_get_zero_subnormals(int8_t isZero)
{
return 0;
}

DLLEXPORT int32_t jl_set_zero_subnormals(int8_t isZero)
{
return 0;
}
Expand Down
6 changes: 4 additions & 2 deletions test/math.jl
Original file line number Diff line number Diff line change
Expand Up @@ -354,8 +354,10 @@ end
@test_approx_eq quadgk(cos, 0,0.7,1, norm=abs)[1] sin(1)

# Ensure subnormal flags functions don't segfault
@test any(ccall("jl_zero_subnormals", UInt8, (UInt8,), 1) .== [0x00 0x01])
@test any(ccall("jl_zero_subnormals", UInt8, (UInt8,), 0) .== [0x00 0x01])
@test any(set_zero_subnormals(true) .== [false,true])
@test any(get_zero_subnormals() .== [false,true])
@test set_zero_subnormals(false)
@test !get_zero_subnormals()

# useful test functions for relative error
err(z, x) = abs(z - x) / abs(x)
Expand Down

0 comments on commit 9c4b94a

Please sign in to comment.