Attempt to fix Ogre2DepthCamera failures in macOS

Signed-off-by: Matias N. Goldberg <[email protected]>
gazebosim · Dec 24, 2022 · 2b4f792 · 2b4f792
1 parent 815baaf
commit 2b4f792
Show file tree

Hide file tree

Showing 6 changed files with 29 additions and 68 deletions.
diff --git a/ogre2/src/Ogre2DepthCamera.cc b/ogre2/src/Ogre2DepthCamera.cc
@@ -909,7 +909,7 @@ void Ogre2DepthCamera::CreateDepthTexture()
         this->ImageWidth(), this->ImageHeight());
       this->dataPtr->ogreDepthTexture[i]->setNumMipmaps(1u);
       this->dataPtr->ogreDepthTexture[i]->setPixelFormat(
-        Ogre::PFG_RGBA32_FLOAT);
+        Ogre::PFG_RGBA32_UINT);
 
       this->dataPtr->ogreDepthTexture[i]->scheduleTransitionTo(
         Ogre::GpuResidency::Resident);

diff --git a/ogre2/src/media/materials/programs/GLSL/depth_camera_final_fs.glsl b/ogre2/src/media/materials/programs/GLSL/depth_camera_final_fs.glsl
@@ -23,11 +23,10 @@ in block
   vec2 uv0;
 } inPs;
 
-vulkan_layout( ogre_t0 ) uniform texture2D inputTexture;
-vulkan( layout( ogre_s0 ) uniform sampler texSampler );
+vulkan_layout( ogre_t0 ) uniform utexture2D inputTexture;
 
 vulkan_layout( location = 0 )
-out vec4 fragColor;
+out uvec4 fragColor;
 
 vulkan( layout( ogre_P0 ) uniform Params { )
 	uniform float near;
@@ -42,14 +41,14 @@ void main()
 {
   float tolerance = 1e-6;
 
-  // Note: We use texelFetch because p.a contains an uint32 and sampling
+  // Note: We use PFG_RGBA32_UINT because p.a contains an uint32 and sampling
   // (even w/ point filtering) causes p.a to loss information (e.g.
   // values close to 0 get rounded to 0)
   //
   // See https://github.com/gazebosim/gz-rendering/issues/332
-  vec4 p = texelFetch(vkSampler2D(inputTexture,texSampler), ivec2(inPs.uv0 *texResolution.xy), 0);
+  uvec4 p = texelFetch(inputTexture, ivec2(inPs.uv0 *texResolution.xy), 0);
 
-  vec3 point = p.xyz;
+  vec3 point = uintBitsToFloat(p.xyz);
 
   // Clamp again in case render passes changed depth values
   // to be outside of min/max range
@@ -78,5 +77,5 @@ void main()
     }
   }
 
-  fragColor = vec4(point, p.a);
+  fragColor = uvec4(floatBitsToUint(point), p.a);
 }
diff --git a/ogre2/src/media/materials/programs/GLSL/depth_camera_fs.glsl b/ogre2/src/media/materials/programs/GLSL/depth_camera_fs.glsl
@@ -32,7 +32,7 @@ vulkan_layout( ogre_t3 ) uniform texture2D particleDepthTexture;
 vulkan( layout( ogre_s0 ) uniform sampler texSampler );
 
 vulkan_layout( location = 0 )
-out vec4 fragColor;
+out uvec4 fragColor;
 
 vulkan( layout( ogre_P0 ) uniform Params { )
 	uniform vec2 projectionParams;
@@ -49,13 +49,13 @@ vulkan( layout( ogre_P0 ) uniform Params { )
 	uniform float rnd;
 vulkan( }; )
 
-float packFloat(vec4 color)
+uint packUnorm4x8Gz(vec4 color)
 {
-  int rgba = (int(round(color.x * 255.0)) << 24) +
-             (int(round(color.y * 255.0)) << 16) +
-             (int(round(color.z * 255.0)) << 8) +
-             int(round(color.w * 255.0));
-  return intBitsToFloat(rgba);
+  uint rgba = (uint(round(color.x * 255.0)) << 24u) +
+              (uint(round(color.y * 255.0)) << 16u) +
+              (uint(round(color.z * 255.0)) << 8u) +
+              uint(round(color.w * 255.0));
+  return rgba;
 }
 
 float toSRGB( float x )
@@ -199,6 +199,5 @@ void main()
   // gamma correct
   color = toSRGB(color);
 
-  float rgba = packFloat(color);
-  fragColor = vec4(point, rgba);
+  fragColor = uvec4(floatBitsToUint(point), packUnorm4x8Gz(color));
 }
diff --git a/ogre2/src/media/materials/programs/Metal/depth_camera_final_fs.metal b/ogre2/src/media/materials/programs/Metal/depth_camera_final_fs.metal
@@ -34,27 +34,24 @@ struct Params
   float4 texResolution;
 };
 
-fragment float4 main_metal
+fragment uint4 main_metal
 (
   PS_INPUT inPs [[stage_in]],
-  texture2d<float>  inputTexture [[texture(0)]],
-  sampler           inputSampler [[sampler(0)]],
+  texture2d<uint> inputTexture [[texture(0)]],
   constant Params &params [[buffer(PARAMETER_SLOT)]]
 )
 {
   float tolerance = 1e-6;
 
-  // Note: We use texelFetch because p.a contains an uint32 and sampling
+  // Note: We use PFG_RGBA32_UINT because p.a contains an uint32 and sampling
   // (even w/ point filtering) causes p.a to loss information (e.g.
   // values close to 0 get rounded to 0)
   //
   // See https://github.com/gazebosim/gz-rendering/issues/332
   // Either: Metal equivalent of texelFetch
-  float4 p = inputTexture.read(uint2(inPs.uv0 * params.texResolution.xy), 0);
-  // Or: Use standard sampler
-  // float4 p = inputTexture.sample(inputSampler, inPs.uv0);
+  uint4 p = inputTexture.read(uint2(inPs.uv0 * params.texResolution.xy), 0);
 
-  float3 point = p.xyz;
+  float3 point = as_type<float3>(p.xyz);
 
   // Clamp again in case render passes changed depth values
   // to be outside of min/max range
@@ -83,6 +80,6 @@ fragment float4 main_metal
     }
   }
 
-  float4 fragColor(point, p.a);
+  uint4 fragColor(as_type<uint3>(point), p.a);
   return fragColor;
 }
diff --git a/ogre2/src/media/materials/programs/Metal/depth_camera_fs.metal b/ogre2/src/media/materials/programs/Metal/depth_camera_fs.metal
@@ -42,13 +42,13 @@ struct Params
   float rnd;
 };
 
-float packFloat(float4 color)
+uint packUnorm4x8Gz(vec4 color)
 {
-  int rgba = (int(round(color.x * 255.0)) << 24) +
-             (int(round(color.y * 255.0)) << 16) +
-             (int(round(color.z * 255.0)) << 8) +
-             int(round(color.w * 255.0));
-  return as_type<float>(rgba);
+  uint rgba = (uint(round(color.x * 255.0)) << 24u) +
+              (uint(round(color.y * 255.0)) << 16u) +
+              (uint(round(color.z * 255.0)) << 8u) +
+              uint(round(color.w * 255.0));
+  return rgba;
 }
 
 inline float toSRGB( float x )
@@ -89,7 +89,7 @@ float4 gaussrand(float2 co, float3 offsets, float mean, float stddev)
   return float4(Z, Z, Z, 0.0);
 }
 
-fragment float4 main_metal
+fragment uint4 main_metal
 (
   PS_INPUT inPs [[stage_in]],
   texture2d<float>  depthTexture [[texture(0)]],
@@ -204,7 +204,6 @@ fragment float4 main_metal
   // gamma correct
   color = toSRGB(color);
 
-  float rgba = packFloat(color);
-  float4 fragColor(point, rgba);
+  uint4 fragColor(as_type<uint3>(point), packUnorm4x8Gz(color));
   return fragColor;
 }
diff --git a/test/integration/heightmap.cc b/test/integration/heightmap.cc
@@ -349,31 +349,7 @@ TEST_F(HeightmapTest, GZ_UTILS_TEST_DISABLED_ON_WIN32(Heightmap))
             abs(depthg - normalData[normalIdx + 1]) > largeError ||
             abs(depthb - normalData[normalIdx + 2]) > largeError)
         {
-#ifdef __APPLE__
-          // TODO(anyone): It seems there is a HW/SW issue in Metal where the
-          // certain values end up as invalid floating point values and thus get
-          // either flushed to zero or as a generic NaN, destroying the
-          // binary data we were trying to store in the alpha channel of
-          // depthrgba.
-          //
-          // Some HW / driver in Linux seems to also be affected.
-          //
-          // Most likely the solution would be to change Ogre2DepthCamera to
-          // use PFG_RGBA32_UINT instead of PFG_RGBA32_FLOAT and deal
-          // with uint <-> float reinterpretation via:
-          //    - GLSL: uintBitsToFloat()
-          //    - Metal: as_type<uint4>()
-          //
-          // Because this test is still very useful to find Metal errors,
-          // we try to run it anyway.
-          //
-          // See
-          // github.com/gazebosim/gz-rendering/pull/785#issuecomment-1345648893
-          const uint8_t error =
-            depthr == 0u && depthg == 0u && depthb == 0u ? 75u : 9u;
-#else
           const uint8_t error = 9u;
-#endif
           EXPECT_NEAR(depthr, normalData[normalIdx + 0], error);
           EXPECT_NEAR(depthg, normalData[normalIdx + 1], error);
           EXPECT_NEAR(depthb, normalData[normalIdx + 2], error);
@@ -433,22 +409,13 @@ TEST_F(HeightmapTest, GZ_UTILS_TEST_DISABLED_ON_WIN32(Heightmap))
       }
     }
 
-#ifdef __APPLE__
-    // TODO(anyone): These thresholds are relaxed to account PFG_RGBA32_UINT
-    // issue mentioned earlier. Once that problem is fixed, this section
-    // should also be removed.
-    EXPECT_LE(numErrors, width * height * 30 / 10000);
-    EXPECT_LE(accumError, 1600);
-    EXPECT_LE(numLargeErrors, width * height * 25 / 10000);
-#else
     // Expect less than 15 pixels in 10k to be different due to GPU &
     // floating point differences when optimizing shaders
     EXPECT_LE(numErrors, width * height * 15 / 10000);
     // Expect less than an accumulated deviation of 25 per channel (RGB)
     EXPECT_LE(accumError, 25 * 3);
     // Expect very few "large" errors.
     EXPECT_LE(numLargeErrors, width * height * 5 / 10000);
-#endif
 
     if (this->HasFailure())
     {