Skip to content

Commit

Permalink
fast math 2
Browse files Browse the repository at this point in the history
  • Loading branch information
Emerson Coskey committed Dec 18, 2024
1 parent 64082da commit bc8a4d2
Show file tree
Hide file tree
Showing 5 changed files with 26 additions and 27 deletions.
11 changes: 5 additions & 6 deletions crates/bevy_pbr/src/atmosphere/functions.wgsl
Original file line number Diff line number Diff line change
@@ -1,5 +1,7 @@
#define_import_path bevy_pbr::atmosphere::functions

#import bevy_render::maths::{PI, HALF_PI, PI_2}

#import bevy_pbr::atmosphere::{
types::Atmosphere,
bindings::{
Expand Down Expand Up @@ -35,9 +37,6 @@

// CONSTANTS

const PI: f32 = 3.141592653589793238462;
const TAU: f32 = 6.283185307179586476925;
const FRAC_PI: f32 = 0.31830988618379067153; // 1 / π
const FRAC_3_16_PI: f32 = 0.0596831036594607509; // 3 / (16π)
const FRAC_4_PI: f32 = 0.07957747154594767; // 1 / (4π)

Expand All @@ -59,7 +58,7 @@ fn multiscattering_lut_uv_to_r_mu(uv: vec2<f32>) -> vec2<f32> {
// non-linear latitude parametrization, though we use a cubemap instead.

fn sky_view_lut_squash_ray_dir(ray_dir_as: vec3<f32>) -> vec3<f32> {
let new_y = sqrt(abs(ray_dir_as.y)) * sign(ray_dir_as.y);
let new_y = fast_sqrt(abs(ray_dir_as.y)) * sign(ray_dir_as.y);
return normalize(vec3(ray_dir_as.x, new_y, ray_dir_as.z));
}

Expand Down Expand Up @@ -109,7 +108,7 @@ fn rayleigh(neg_LdotV: f32) -> f32 {
fn henyey_greenstein(neg_LdotV: f32) -> f32 {
let g = atmosphere.mie_asymmetry;
let denom = 1.0 + g * g - 2.0 * g * neg_LdotV;
return FRAC_4_PI * (1.0 - g * g) / (denom * sqrt(denom));
return FRAC_4_PI * (1.0 - g * g) / (denom * fast_sqrt(denom));
}

// ATMOSPHERE SAMPLING
Expand Down Expand Up @@ -231,7 +230,7 @@ fn get_local_up(r: f32, t: f32, ray_dir: vec3<f32>) -> vec3<f32> {
// given a ray starting at radius r, with mu = cos(zenith angle),
// and a t = distance along the ray, gives the new radius at point t
fn get_local_r(r: f32, mu: f32, t: f32) -> f32 {
return sqrt(t * t + 2.0 * r * mu * t + r * r);
return fast_sqrt(t * t + 2.0 * r * mu * t + r * r);
}

// Convert uv [0.0 .. 1.0] coordinate to ndc space xy [-1.0 .. 1.0]
Expand Down
6 changes: 4 additions & 2 deletions crates/bevy_pbr/src/atmosphere/multiscattering_lut.wgsl
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
bindings::{atmosphere, settings},
functions::{
multiscattering_lut_uv_to_r_mu, sample_transmittance_lut, isotropic,
get_local_r, get_local_up, sample_atmosphere, FRAC_4_PI, TAU,
get_local_r, get_local_up, sample_atmosphere, FRAC_4_PI,
max_atmosphere_distance, rayleigh, henyey_greenstein
},
bruneton_functions::{
Expand All @@ -14,6 +14,8 @@
}
}

#import bevy_render::maths::PI_2


const PHI_2: vec2<f32> = vec2(1.3247179572447460259609088, 1.7548776662466927600495087);

Expand All @@ -26,7 +28,7 @@ fn s2_sequence(n: u32) -> vec2<f32> {

//Lambert equal-area projection.
fn uv_to_sphere(uv: vec2<f32>) -> vec3<f32> {
let phi = TAU * uv.y;
let phi = PI_2 * uv.y;
let sin_lambda = 2 * uv.x - 1;
let cos_lambda = sqrt(1 - sin_lambda * sin_lambda);

Expand Down
23 changes: 16 additions & 7 deletions crates/bevy_pbr/src/render/fast_math.wgsl
Original file line number Diff line number Diff line change
@@ -1,24 +1,33 @@
#define_import_path bevy_pbr::fast_math

#import bevy_render::maths::PI;
#import bevy_render::maths::{PI, HALF_PI};

// Reference: XEngine
// https://github.com/ShawnTSH1229/XEngine/blob/main/Source/Shaders/FastMath.hlsl
fn fast_sqrt(x: f32) -> f32 {
return bitcast<f32>(0x1fbd1df5 + (bitcast<i32>(x) >> 1u));
}

//slightly less accurate than fast_acos_4, but much simpler.
fn fast_acos(in_x: f32) -> f32 {
let x = abs(in_x);
var res = -0.156583 * x + HALF_PI;
res *= fast_sqrt(1.0 - x);
return select(PI - res, res, in_x >= 0.0);
}

// 4th order polynomial approximation
// 4 VGRP, 16 ALU Full Rate
// 7 * 10^-5 radians precision
// Reference : Handbook of Mathematical Functions (chapter : Elementary Transcendental Functions), M. Abramowitz and I.A. Stegun, Ed.
fn fast_acos(x: f32) -> f32 {
fn fast_acos_4(x: f32) -> f32 {
let x1 = abs(x);
let x2 = x1 * x1;
let x3 = x2 * x1;
var s;
var s: f32;

s = -0.2121144 * x1 + 1.5707288;
s = 0.0742610 * x2 + s;
s = -0.0187293 * x3 + s;
s = sqrt(1.0 - x1) * s;
s = fast_sqrt(1.0 - x1) * s;

// acos function mirroring
return select(PI - s, s, x >= 0.0);
Expand All @@ -37,7 +46,7 @@ fn fast_atan2(y: f32, x: f32) {

t3 = select(t3, (0.5 * PI) - t3, abs(y) > abs(x));
t3 = select(t3, PI - t3, x < 0);
t3 *= sign(y)
t3 = select(-t3, t3, y > 0);

return t3;
}
2 changes: 1 addition & 1 deletion crates/bevy_pbr/src/ssao/ssao.wgsl
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@
// Source code base on SSRT3 implementation
// https://github.com/cdrinmatane/SSRT3

#import bevy_pbr::ssao_utils::fast_acos
#import bevy_pbr::fast_math::fast_acos

#import bevy_render::{
view::View,
Expand Down
11 changes: 0 additions & 11 deletions crates/bevy_pbr/src/ssao/ssao_utils.wgsl
Original file line number Diff line number Diff line change
Expand Up @@ -11,14 +11,3 @@ fn ssao_multibounce(visibility: f32, base_color: vec3<f32>) -> vec3<f32> {
let x = vec3<f32>(visibility);
return max(x, ((x * a + b) * x + c) * x);
}

fn fast_sqrt(x: f32) -> f32 {
return bitcast<f32>(0x1fbd1df5 + (bitcast<i32>(x) >> 1u));
}

fn fast_acos(in_x: f32) -> f32 {
let x = abs(in_x);
var res = -0.156583 * x + HALF_PI;
res *= fast_sqrt(1.0 - x);
return select(PI - res, res, in_x >= 0.0);
}

0 comments on commit bc8a4d2

Please sign in to comment.