From b99ae6e6ceb43f620b4e8c6300f495e67530615f Mon Sep 17 00:00:00 2001 From: kevyuu Date: Tue, 11 Nov 2025 15:50:19 +0700 Subject: [PATCH 01/69] Fix small bug in GenericDataAccessor definition --- .../builtin/hlsl/concepts/accessors/generic_shared_data.hlsl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/nbl/builtin/hlsl/concepts/accessors/generic_shared_data.hlsl b/include/nbl/builtin/hlsl/concepts/accessors/generic_shared_data.hlsl index cc22595444..ab7a87c7dd 100644 --- a/include/nbl/builtin/hlsl/concepts/accessors/generic_shared_data.hlsl +++ b/include/nbl/builtin/hlsl/concepts/accessors/generic_shared_data.hlsl @@ -69,7 +69,7 @@ NBL_CONCEPT_END( #include template -NBL_BOOL_CONCEPT GenericDataAccessor = GenericWriteAccessor && GenericWriteAccessor; +NBL_BOOL_CONCEPT GenericDataAccessor = GenericReadAccessor && GenericWriteAccessor; } } From b9537ea7f623ca275236079acf9a8cd43e910909 Mon Sep 17 00:00:00 2001 From: kevyuu Date: Tue, 11 Nov 2025 15:51:30 +0700 Subject: [PATCH 02/69] First draft of Warpmap Generation workgroup implementation --- .../hlsl/concepts/accessors/envmap.hlsl | 47 ++++++++ .../nbl/builtin/hlsl/workgroup/envmap.hlsl | 108 ++++++++++++++++++ 2 files changed, 155 insertions(+) create mode 100644 include/nbl/builtin/hlsl/concepts/accessors/envmap.hlsl create mode 100644 include/nbl/builtin/hlsl/workgroup/envmap.hlsl diff --git a/include/nbl/builtin/hlsl/concepts/accessors/envmap.hlsl b/include/nbl/builtin/hlsl/concepts/accessors/envmap.hlsl new file mode 100644 index 0000000000..1d1ad2a344 --- /dev/null +++ b/include/nbl/builtin/hlsl/concepts/accessors/envmap.hlsl @@ -0,0 +1,47 @@ +#ifndef _NBL_BUILTIN_HLSL_CONCEPTS_ACCESSORS_ENVMAP_INCLUDED_ +#define _NBL_BUILTIN_HLSL_CONCEPTS_ACCESSORS_ENVMAP_INCLUDED_ + +#include "nbl/builtin/hlsl/concepts/accessors/generic_shared_data.hlsl" + +namespace nbl +{ +namespace hlsl +{ +namespace workgroup +{ +namespace envmap +{ +// declare concept +#define NBL_CONCEPT_NAME LuminanceReadAccessor +#define NBL_CONCEPT_TPLT_PRM_KINDS (typename) +#define NBL_CONCEPT_TPLT_PRM_NAMES (U) +// not the greatest syntax but works +#define NBL_CONCEPT_PARAM_0 (a,U) +#define NBL_CONCEPT_PARAM_1 (uv,uint32_t2) +#define NBL_CONCEPT_PARAM_2 (level,uint32_t) +#define NBL_CONCEPT_PARAM_3 (offset,uint32_t2) +// start concept +NBL_CONCEPT_BEGIN(4) +// need to be defined AFTER the concept begins +#define a NBL_CONCEPT_PARAM_T NBL_CONCEPT_PARAM_0 +#define uv NBL_CONCEPT_PARAM_T NBL_CONCEPT_PARAM_1 +#define level NBL_CONCEPT_PARAM_T NBL_CONCEPT_PARAM_2 +#define offset NBL_CONCEPT_PARAM_T NBL_CONCEPT_PARAM_3 +NBL_CONCEPT_END( + ((NBL_CONCEPT_REQ_EXPR_RET_TYPE)((a.template get(uv,level,offset)) , ::nbl::hlsl::is_same_v, float32_t4>)) +); +#undef offset +#undef level +#undef uv +#undef a +#include + +template +NBL_BOOL_CONCEPT WarpmapWriteAccessor = concepts::accessors::GenericWriteAccessor; + +} +} +} +} + +#endif \ No newline at end of file diff --git a/include/nbl/builtin/hlsl/workgroup/envmap.hlsl b/include/nbl/builtin/hlsl/workgroup/envmap.hlsl new file mode 100644 index 0000000000..df452fb0e8 --- /dev/null +++ b/include/nbl/builtin/hlsl/workgroup/envmap.hlsl @@ -0,0 +1,108 @@ + +#ifndef _NBL_BUILTIN_HLSL_WORKGROUP_ENVMAP_IMPORTANCE_SAMPLING_INCLUDED_ +#define _NBL_BUILTIN_HLSL_WORKGROUP_ENVMAP_IMPORTANCE_SAMPLING_INCLUDED_ + +namespace nbl +{ +namespace hlsl +{ +namespace workgroup +{ +namespace envmap +{ +namespace impl +{ + bool choseSecond(float first, float second, NBL_REF_ARG(float) xi) + { + // numerical resilience against IEEE754 + float firstProb = 1.0f / (1.0f + second / first); + float dummy = 0.0f; + return math::partitionRandVariable(firstProb, xi, dummy); + } + +} + +} +} +} +} + +#ifdef __HLSL_VERSION +namespace nbl +{ +namespace hlsl +{ +namespace workgroup +{ +namespace envmap +{ + +struct WarpmapGeneration +{ + + template && envmap::WarpmapWriteAccessor) + // TODO(kevinyu): Should lumapMapSize and warpMapSize provided by Accessor? + static void __call(NBL_CONST_REF_ARG(LuminanceAccessor) luminanceAccessor, NBL_REF_ARG(OutputAcessor) outputAccessor, uint32_t2 lumaMapSize, uint32_t2 warpMapSize) + { + const uint32_t threadID = uint32_t(SubgroupContiguousIndex()); + const uint32_t lastWarpMapPixel = warpMapSize - uint32_t2(1, 1); + + if (all(threadID < warpMapSize)) + { + float32_t2 xi = float32_t2(threadID) / float32_t2(lastWarpMapPixel); + + uint32_t2 p; + p.y = 0; + + // TODO(kevinyu): Implement findMSB + const uint32_t2 mip2x1 = findMSB(lumaMapSize.x) - 1; + // do one split in the X axis first cause penultimate full mip would have been 2x1 + p.x = impl::choseSecond(luminanceAccessor.get(uint32_t2(0, 0), mip2x1, uint32_t2(0, 0)), luminanceAccessor.get(uint32_t2(0, 0), mip2x1, uint32_t2(1, 0), xi.x) ? 1 : 0; + for (uint32_t i = mip2x1; i != 0;) + { + --i; + p <<= 1; + const float32_t4 values = float32_t4( + luminanceAccessor.get(p, i, uint32_t2(0, 1)), + luminanceAccessor.get(p, i, uint32_t2(1, 1)), + luminanceAccessor.get(p, i, uint32_t2(1, 0)), + luminanceAccessor.get(p, i, uint32_t2(0, 0)) + ); + + float32_t wx_0, wx_1; + { + const float32_t wy_0 = values[3] + values[2]; + const float32_t wy_1 = values[1] + values[0]; + if (impl::choseSecond(wy_0, wy_1, xi.y)) + { + p.y |= 1; + wx_0 = values[0]; + wx_1 = values[1]; + } + else + { + wx_0 = values[3]; + wx_1 = values[2]; + } + } + + if (impl::choseSecond(wx_0, wx_1, xi.x)) + { + p.x |= 1; + } + } + + const float32_t2 directionUV = (float32_t2(p.x, p.y) + xi) / float32_t2(lumaMapSize); + outputAccessor.set(threadID, directionUV); + } + } + +}; + +} +} +} +} +#endif + +#endif \ No newline at end of file From a7371738facb18bf22e89335a377492e8e3e58f5 Mon Sep 17 00:00:00 2001 From: kevyuu Date: Tue, 18 Nov 2025 16:48:13 +0700 Subject: [PATCH 03/69] Add warp concept --- include/nbl/builtin/hlsl/concepts/warp.hlsl | 41 +++++++++++++++++++++ 1 file changed, 41 insertions(+) create mode 100644 include/nbl/builtin/hlsl/concepts/warp.hlsl diff --git a/include/nbl/builtin/hlsl/concepts/warp.hlsl b/include/nbl/builtin/hlsl/concepts/warp.hlsl new file mode 100644 index 0000000000..e9e981a243 --- /dev/null +++ b/include/nbl/builtin/hlsl/concepts/warp.hlsl @@ -0,0 +1,41 @@ +#ifndef _NBL_BUILTIN_HLSL_CONCEPTS_WARP_INCLUDED_ +#define _NBL_BUILTIN_HLSL_CONCEPTS_WARP_INCLUDED_ + +#include "nbl/builtin/hlsl/concepts/accessors/generic_shared_data.hlsl" +#include "nbl/builtin/hlsl/fft/common.hlsl" + +namespace nbl +{ +namespace hlsl +{ +namespace concepts +{ + +// declare concept +#define NBL_CONCEPT_NAME WARP +#define NBL_CONCEPT_TPLT_PRM_KINDS (typename)(typename) +#define NBL_CONCEPT_TPLT_PRM_NAMES (U)(C) +// not the greatest syntax but works +#define NBL_CONCEPT_PARAM_0 (warp,U) +#define NBL_CONCEPT_PARAM_1 (uv,float32_t2) +#define NBL_CONCEPT_PARAM_2 (out,C) +// start concept +NBL_CONCEPT_BEGIN(3) +#define warp NBL_CONCEPT_PARAM_T NBL_CONCEPT_PARAM_0 +#define uv NBL_CONCEPT_PARAM_T NBL_CONCEPT_PARAM_1 +#define out NBL_CONCEPT_PARAM_T NBL_CONCEPT_PARAM_2 +NBL_CONCEPT_END( + ((NBL_CONCEPT_REQ_EXPR_RET_TYPE)((a.template warp(uv)) , ::nbl::hlsl::is_same_v, C)) + ((NBL_CONCEPT_REQ_EXPR_RET_TYPE)((a.template forwardDensity(uv)) , ::nbl::hlsl::is_same_v, float32_t)) + ((NBL_CONCEPT_REQ_EXPR_RET_TYPE)((a.template backwardDensity(out)) , ::nbl::hlsl::is_same_v, float32_t)) +); +#undef out +#undef warp +#undef uv +#include + +} +} +} + +#endif \ No newline at end of file From 64349db3ac20d982fcffd871cc43308c0b0c3cc7 Mon Sep 17 00:00:00 2001 From: kevyuu Date: Tue, 18 Nov 2025 16:48:37 +0700 Subject: [PATCH 04/69] Add spherical warp --- include/nbl/builtin/hlsl/warp/spherical.hlsl | 53 ++++++++++++++++++++ 1 file changed, 53 insertions(+) create mode 100644 include/nbl/builtin/hlsl/warp/spherical.hlsl diff --git a/include/nbl/builtin/hlsl/warp/spherical.hlsl b/include/nbl/builtin/hlsl/warp/spherical.hlsl new file mode 100644 index 0000000000..10c341f06b --- /dev/null +++ b/include/nbl/builtin/hlsl/warp/spherical.hlsl @@ -0,0 +1,53 @@ +#ifndef _NBL_BUILTIN_HLSL_WARP_SPHERICAL_INCLUDED_ +#define _NBL_BUILTIN_HLSL_WARP_SPHERICAL_INCLUDED_ + +#include + +namespace nbl +{ +namespace hlsl +{ +namespace warp +{ + + class Spherical + { + public: + using codomain_type = float32_t3; + + template ) + static codomain_type warp(const UV uv) + { + const float32_t phi = 2 * uv.x * numbers::pi; + const float32_t theta = uv.y * numbers::pi; + float32_t3 dir; + dir.x = cos(uv.x * 2.f * numbers::pi); + dir.y = sqrt(1.f - dir.x * dir.x); + if (uv.x > 0.5f) dir.y = -dir.y; + const float32_t cosTheta = cos(theta); + float32_t sinTheta = (1.0 - cosTheta * cosTheta); + dir.xy *= sinTheta; + dir.z = cosTheta; + return dir; + } + + template ) + static float32_t forwardDensity(const UV uv) + { + const float32_t theta = uv.y * numbers::pi; + return 1.0f / (sin(theta) * 2 * PI * PI); + + } + + template ) + static float32_t backwardDensity(const C out) + { + //TODO(kevinyu): Derive this density + } + }; + +} +} +} + +#endif \ No newline at end of file From e44fcf44b71164cc938d49c740c0be634c8e5d8b Mon Sep 17 00:00:00 2001 From: kevyuu Date: Tue, 18 Nov 2025 16:49:02 +0700 Subject: [PATCH 05/69] Remove envmap accessors.hlsl --- .../hlsl/concepts/accessors/envmap.hlsl | 47 ------------------- 1 file changed, 47 deletions(-) delete mode 100644 include/nbl/builtin/hlsl/concepts/accessors/envmap.hlsl diff --git a/include/nbl/builtin/hlsl/concepts/accessors/envmap.hlsl b/include/nbl/builtin/hlsl/concepts/accessors/envmap.hlsl deleted file mode 100644 index 1d1ad2a344..0000000000 --- a/include/nbl/builtin/hlsl/concepts/accessors/envmap.hlsl +++ /dev/null @@ -1,47 +0,0 @@ -#ifndef _NBL_BUILTIN_HLSL_CONCEPTS_ACCESSORS_ENVMAP_INCLUDED_ -#define _NBL_BUILTIN_HLSL_CONCEPTS_ACCESSORS_ENVMAP_INCLUDED_ - -#include "nbl/builtin/hlsl/concepts/accessors/generic_shared_data.hlsl" - -namespace nbl -{ -namespace hlsl -{ -namespace workgroup -{ -namespace envmap -{ -// declare concept -#define NBL_CONCEPT_NAME LuminanceReadAccessor -#define NBL_CONCEPT_TPLT_PRM_KINDS (typename) -#define NBL_CONCEPT_TPLT_PRM_NAMES (U) -// not the greatest syntax but works -#define NBL_CONCEPT_PARAM_0 (a,U) -#define NBL_CONCEPT_PARAM_1 (uv,uint32_t2) -#define NBL_CONCEPT_PARAM_2 (level,uint32_t) -#define NBL_CONCEPT_PARAM_3 (offset,uint32_t2) -// start concept -NBL_CONCEPT_BEGIN(4) -// need to be defined AFTER the concept begins -#define a NBL_CONCEPT_PARAM_T NBL_CONCEPT_PARAM_0 -#define uv NBL_CONCEPT_PARAM_T NBL_CONCEPT_PARAM_1 -#define level NBL_CONCEPT_PARAM_T NBL_CONCEPT_PARAM_2 -#define offset NBL_CONCEPT_PARAM_T NBL_CONCEPT_PARAM_3 -NBL_CONCEPT_END( - ((NBL_CONCEPT_REQ_EXPR_RET_TYPE)((a.template get(uv,level,offset)) , ::nbl::hlsl::is_same_v, float32_t4>)) -); -#undef offset -#undef level -#undef uv -#undef a -#include - -template -NBL_BOOL_CONCEPT WarpmapWriteAccessor = concepts::accessors::GenericWriteAccessor; - -} -} -} -} - -#endif \ No newline at end of file From 9b29dfd0a08acd2414cb67a6ac2ea72ed2386e4c Mon Sep 17 00:00:00 2001 From: kevyuu Date: Tue, 18 Nov 2025 16:49:19 +0700 Subject: [PATCH 06/69] Hierarchical image sampling implementation --- .../hlsl/sampling/hierarchical_image.hlsl | 136 ++++++++++++++++++ 1 file changed, 136 insertions(+) create mode 100644 include/nbl/builtin/hlsl/sampling/hierarchical_image.hlsl diff --git a/include/nbl/builtin/hlsl/sampling/hierarchical_image.hlsl b/include/nbl/builtin/hlsl/sampling/hierarchical_image.hlsl new file mode 100644 index 0000000000..bfcd9ffec7 --- /dev/null +++ b/include/nbl/builtin/hlsl/sampling/hierarchical_image.hlsl @@ -0,0 +1,136 @@ +// Copyright (C) 2018-2025 - DevSH Graphics Programming Sp. z O.O. +// This file is part of the "Nabla Engine". +// For conditions of distribution and use, see copyright notice in nabla.h + +#ifndef _NBL_BUILTIN_HLSL_SAMPLING_HIERARCHICAL_IMAGE_INCLUDED_ +#define _NBL_BUILTIN_HLSL_SAMPLING_HIERARCHICAL_IMAGE_INCLUDED_ + +#include +#include + +namespace nbl +{ +namespace hlsl +{ +namespace sampling +{ + +class HierarchicalImage +{ + private: + + static float32_t3 calculateSampleAndPdf(float32_t4 dirsX, float32_t4 dirsY, float32_t2 unnormCoord, uint32_t2 lastWarpmapPixel, NBL_REF_ARG(float32_t) pdf) + { + const float32_t2 interpolant = frac(unnormCoord); + const float32_t4x2 uvs = transpose(float32_t2x4(dirsX, dirsY)); + + const float32_t2 xDiffs[] = { + uvs[2] - uvs[3], + uvs[1] - uvs[0] + }; + const float32_t2 yVals[] = { + xDiffs[0] * interpolant.x + uvs[3], + xDiffs[1] * interpolant.x + uvs[0] + }; + const float32_t2 yDiff = yVals[1] - yVals[0]; + const float32_t2 uv = yDiff * interpolant.y + yVals[0]; + + // Note(kevinyu): sinTheta is calculated twice inside PostWarp::warp and PostWarp::forwardDensity + const float32_t3 L = PostWarp::warp(uv); + + const float detInterpolJacobian = determinant(float32_t2x2( + lerp(xDiffs[0], xDiffs[1], interpolant.y), // first column dFdx + yDiff // second column dFdy + )); + + pdf = abs(PostWarp::forwardDensity(uv) / (detInterpolJacobian * float32_t(lastWarpmapPixel.x * lastWarpmapPixel.y)); + + return L; + } + + public: + template ) + static float32_t2 binarySearch(NBL_CONST_REF_ARG(LuminanceAccessor) luminanceAccessor, const uint32_t2 lumaMapSize, const float32_t2 xi, const bool aspect2x1) + { + + uint32_t2 p = uint32_t2(0, 0); + + if (aspect2x1) { + // TODO(kevinyu): Implement findMSB + const uint32_t2 mip2x1 = findMSB(lumaMapSize.x) - 1; + + // do one split in the X axis first cause penultimate full mip would have been 2x1 + p.x = impl::choseSecond(luminanceAccessor.fetch(uint32_t2(0, 0), mip2x1), luminanceAccessor.fetch(uint32_t2(0, 1), mip2x1), xi.x) ? 1 : 0; + } + + for (uint32_t i = mip2x1; i != 0;) + { + --i; + p <<= 1; + const float32_t4 values = luminanceAccessor.gather(p, i); + float32_t wx_0, wx_1; + { + const float32_t wy_0 = values[3] + values[2]; + const float32_t wy_1 = values[1] + values[0]; + if (impl::choseSecond(wy_0, wy_1, xi.y)) + { + p.y |= 1; + wx_0 = values[0]; + wx_1 = values[1]; + } + else + { + wx_0 = values[3]; + wx_1 = values[2]; + } + } + + if (impl::choseSecond(wx_0, wx_1, xi.x)) + p.x |= 1; + } + + // TODO(kevinyu): Add some comment why we add xi. + const float32_t2 directionUV = (float32_t2(p.x, p.y) + xi) / float32_t2(lumaMapSize); + return directionUV; + } + + + template && Warp) + static float32_t3 sampleWarpmap(NBL_CONST_REF_ARG(WarpmapAccessor) warpmap, const uint32_t2 warpmapSize, const float32_t2 xi, NBL_REF_ARG(float32_t) pdf) { + + // TODO(kevinyu): Add some comment why we substract by 1 + const uint32_t3 lastWarpmapPixel = warpmapSize - uint32_t3(1, 1, 1); + + const float32_t2 unnormCoord = xi * lastWarpmapPixel; + const float32_t2 interpolant = frac(unnormCoord); + const float32_t2 warpSampleCoord = (unnormCoord + float32_t2(0.5f, 0.5f)) / float32_t2(warpmapSize.x, warpmapSize.y); + const float32_t4 dirsX = warpmap.gatherU(warpSampleCoord); + const float32_t4 dirsY = warpmap.gatherV(warpSampleCoord); + + return calculateSampleAndPdf(dirsX, dirsY, unnormCoord, lastWarpmapPixel, pdf); + + } + + template && Warp) + static float32_t3 sample(NBL_CONST_REF_ARG(LuminanceReadAccessor) luminanceMap, const uint32_t2 lumaMapSize, const bool lumaAspect2x1, const uint32_t2 warpmapSize, const float32_t2 xi, NBL_REF_ARG(float32_t) pdf) { + + const uint32_t3 lastWarpmapPixel = warpmapSize - uint32_t3(1, 1, 1); + const float32_t2 unnormCoord = xi * lastWarpmapPixel; + const float32_t2 warpSampleCoord = (unnormCoord + float32_t2(0.5f, 0.5f)) / float32_t2(warpmapSize.x, warpmapSize.y); + const float32_t2 dir0 = binarySearch(luminanceMap, lumaMapSize, warpSampleCoord + float32_t2(0, 1), lumaAspect2x1); + const float32_t2 dir1 = binarySearch(luminanceMap, lumaMapSize, warpSampleCoord + float32_t2(1, 1), lumaAspect2x1); + const float32_t2 dir2 = binarySearch(luminanceMap, lumaMapSize, warpSampleCoord + float32_t2(1, 0), lumaAspect2x1); + const float32_t2 dir3 = binarySearch(luminanceMap, lumaMapSize, warpSampleCoord, lumaAspect2x1); + + const float32_t4 dirsX = float32_t4(dir0.x, dir1.x, dir2.x, dir3.x); + const float32_t4 dirsY = float32_t4(dir1.y, dir1.y, dir2.y, dir3.y); + + return calculateSampleAndPdf(dirsX, dirsY, unnormCoord, lastWarpmapPixel, pdf); + + } +}; + +} +} + +#endif From 8d682b948a2ca616c6c5681c4be7bcd12b586246 Mon Sep 17 00:00:00 2001 From: kevyuu Date: Sat, 20 Dec 2025 07:38:52 +0700 Subject: [PATCH 07/69] Remove envmap.hlsl --- .../nbl/builtin/hlsl/workgroup/envmap.hlsl | 108 ------------------ 1 file changed, 108 deletions(-) delete mode 100644 include/nbl/builtin/hlsl/workgroup/envmap.hlsl diff --git a/include/nbl/builtin/hlsl/workgroup/envmap.hlsl b/include/nbl/builtin/hlsl/workgroup/envmap.hlsl deleted file mode 100644 index df452fb0e8..0000000000 --- a/include/nbl/builtin/hlsl/workgroup/envmap.hlsl +++ /dev/null @@ -1,108 +0,0 @@ - -#ifndef _NBL_BUILTIN_HLSL_WORKGROUP_ENVMAP_IMPORTANCE_SAMPLING_INCLUDED_ -#define _NBL_BUILTIN_HLSL_WORKGROUP_ENVMAP_IMPORTANCE_SAMPLING_INCLUDED_ - -namespace nbl -{ -namespace hlsl -{ -namespace workgroup -{ -namespace envmap -{ -namespace impl -{ - bool choseSecond(float first, float second, NBL_REF_ARG(float) xi) - { - // numerical resilience against IEEE754 - float firstProb = 1.0f / (1.0f + second / first); - float dummy = 0.0f; - return math::partitionRandVariable(firstProb, xi, dummy); - } - -} - -} -} -} -} - -#ifdef __HLSL_VERSION -namespace nbl -{ -namespace hlsl -{ -namespace workgroup -{ -namespace envmap -{ - -struct WarpmapGeneration -{ - - template && envmap::WarpmapWriteAccessor) - // TODO(kevinyu): Should lumapMapSize and warpMapSize provided by Accessor? - static void __call(NBL_CONST_REF_ARG(LuminanceAccessor) luminanceAccessor, NBL_REF_ARG(OutputAcessor) outputAccessor, uint32_t2 lumaMapSize, uint32_t2 warpMapSize) - { - const uint32_t threadID = uint32_t(SubgroupContiguousIndex()); - const uint32_t lastWarpMapPixel = warpMapSize - uint32_t2(1, 1); - - if (all(threadID < warpMapSize)) - { - float32_t2 xi = float32_t2(threadID) / float32_t2(lastWarpMapPixel); - - uint32_t2 p; - p.y = 0; - - // TODO(kevinyu): Implement findMSB - const uint32_t2 mip2x1 = findMSB(lumaMapSize.x) - 1; - // do one split in the X axis first cause penultimate full mip would have been 2x1 - p.x = impl::choseSecond(luminanceAccessor.get(uint32_t2(0, 0), mip2x1, uint32_t2(0, 0)), luminanceAccessor.get(uint32_t2(0, 0), mip2x1, uint32_t2(1, 0), xi.x) ? 1 : 0; - for (uint32_t i = mip2x1; i != 0;) - { - --i; - p <<= 1; - const float32_t4 values = float32_t4( - luminanceAccessor.get(p, i, uint32_t2(0, 1)), - luminanceAccessor.get(p, i, uint32_t2(1, 1)), - luminanceAccessor.get(p, i, uint32_t2(1, 0)), - luminanceAccessor.get(p, i, uint32_t2(0, 0)) - ); - - float32_t wx_0, wx_1; - { - const float32_t wy_0 = values[3] + values[2]; - const float32_t wy_1 = values[1] + values[0]; - if (impl::choseSecond(wy_0, wy_1, xi.y)) - { - p.y |= 1; - wx_0 = values[0]; - wx_1 = values[1]; - } - else - { - wx_0 = values[3]; - wx_1 = values[2]; - } - } - - if (impl::choseSecond(wx_0, wx_1, xi.x)) - { - p.x |= 1; - } - } - - const float32_t2 directionUV = (float32_t2(p.x, p.y) + xi) / float32_t2(lumaMapSize); - outputAccessor.set(threadID, directionUV); - } - } - -}; - -} -} -} -} -#endif - -#endif \ No newline at end of file From 890f7c6080a4d234d231b24ce9e1073f9e106f34 Mon Sep 17 00:00:00 2001 From: kevyuu Date: Mon, 22 Dec 2025 23:59:45 +0700 Subject: [PATCH 08/69] Move to sampling namespace and implement backward density --- include/nbl/builtin/hlsl/concepts/warp.hlsl | 41 -------------- .../hlsl/sampling/hierarchical_image.hlsl | 2 +- include/nbl/builtin/hlsl/sampling/warp.hlsl | 54 +++++++++++++++++++ .../{warp => sampling/warps}/spherical.hlsl | 29 ++++++---- 4 files changed, 73 insertions(+), 53 deletions(-) delete mode 100644 include/nbl/builtin/hlsl/concepts/warp.hlsl create mode 100644 include/nbl/builtin/hlsl/sampling/warp.hlsl rename include/nbl/builtin/hlsl/{warp => sampling/warps}/spherical.hlsl (51%) diff --git a/include/nbl/builtin/hlsl/concepts/warp.hlsl b/include/nbl/builtin/hlsl/concepts/warp.hlsl deleted file mode 100644 index e9e981a243..0000000000 --- a/include/nbl/builtin/hlsl/concepts/warp.hlsl +++ /dev/null @@ -1,41 +0,0 @@ -#ifndef _NBL_BUILTIN_HLSL_CONCEPTS_WARP_INCLUDED_ -#define _NBL_BUILTIN_HLSL_CONCEPTS_WARP_INCLUDED_ - -#include "nbl/builtin/hlsl/concepts/accessors/generic_shared_data.hlsl" -#include "nbl/builtin/hlsl/fft/common.hlsl" - -namespace nbl -{ -namespace hlsl -{ -namespace concepts -{ - -// declare concept -#define NBL_CONCEPT_NAME WARP -#define NBL_CONCEPT_TPLT_PRM_KINDS (typename)(typename) -#define NBL_CONCEPT_TPLT_PRM_NAMES (U)(C) -// not the greatest syntax but works -#define NBL_CONCEPT_PARAM_0 (warp,U) -#define NBL_CONCEPT_PARAM_1 (uv,float32_t2) -#define NBL_CONCEPT_PARAM_2 (out,C) -// start concept -NBL_CONCEPT_BEGIN(3) -#define warp NBL_CONCEPT_PARAM_T NBL_CONCEPT_PARAM_0 -#define uv NBL_CONCEPT_PARAM_T NBL_CONCEPT_PARAM_1 -#define out NBL_CONCEPT_PARAM_T NBL_CONCEPT_PARAM_2 -NBL_CONCEPT_END( - ((NBL_CONCEPT_REQ_EXPR_RET_TYPE)((a.template warp(uv)) , ::nbl::hlsl::is_same_v, C)) - ((NBL_CONCEPT_REQ_EXPR_RET_TYPE)((a.template forwardDensity(uv)) , ::nbl::hlsl::is_same_v, float32_t)) - ((NBL_CONCEPT_REQ_EXPR_RET_TYPE)((a.template backwardDensity(out)) , ::nbl::hlsl::is_same_v, float32_t)) -); -#undef out -#undef warp -#undef uv -#include - -} -} -} - -#endif \ No newline at end of file diff --git a/include/nbl/builtin/hlsl/sampling/hierarchical_image.hlsl b/include/nbl/builtin/hlsl/sampling/hierarchical_image.hlsl index bfcd9ffec7..f2b2750703 100644 --- a/include/nbl/builtin/hlsl/sampling/hierarchical_image.hlsl +++ b/include/nbl/builtin/hlsl/sampling/hierarchical_image.hlsl @@ -5,7 +5,7 @@ #ifndef _NBL_BUILTIN_HLSL_SAMPLING_HIERARCHICAL_IMAGE_INCLUDED_ #define _NBL_BUILTIN_HLSL_SAMPLING_HIERARCHICAL_IMAGE_INCLUDED_ -#include +#include #include namespace nbl diff --git a/include/nbl/builtin/hlsl/sampling/warp.hlsl b/include/nbl/builtin/hlsl/sampling/warp.hlsl new file mode 100644 index 0000000000..b8936c09f3 --- /dev/null +++ b/include/nbl/builtin/hlsl/sampling/warp.hlsl @@ -0,0 +1,54 @@ +#ifndef _NBL_BUILTIN_HLSL_CONCEPTS_WARP_INCLUDED_ +#define _NBL_BUILTIN_HLSL_CONCEPTS_WARP_INCLUDED_ + +#include "nbl/builtin/hlsl/concepts/accessors/generic_shared_data.hlsl" +#include "nbl/builtin/hlsl/fft/common.hlsl" + +namespace nbl +{ +namespace hlsl +{ +namespace sampling +{ + +template +struct WarpResult +{ + C dst; + float32_t density; +}; + +namespace concepts +{ + +// declare concept +#define NBL_CONCEPT_NAME WARP +#define NBL_CONCEPT_TPLT_PRM_KINDS (typename) +#define NBL_CONCEPT_TPLT_PRM_NAMES (U) +// not the greatest syntax but works +#define NBL_CONCEPT_PARAM_0 (warper,U) +#define NBL_CONCEPT_PARAM_1 (xi,typename U::domain_type) +#define NBL_CONCEPT_PARAM_2 (dst,typename U::codomain_type) +// start concept +NBL_CONCEPT_BEGIN(3) +#define warper NBL_CONCEPT_PARAM_T NBL_CONCEPT_PARAM_0 +#define xi NBL_CONCEPT_PARAM_T NBL_CONCEPT_PARAM_1 +#define dst NBL_CONCEPT_PARAM_T NBL_CONCEPT_PARAM_2 +NBL_CONCEPT_END( + ((NBL_CONCEPT_REQ_TYPE)(U::domain_type)) + ((NBL_CONCEPT_REQ_TYPE)(U::codomain_type)) + ((NBL_CONCEPT_REQ_EXPR_RET_TYPE)((warper.template warp(xi)) , ::nbl::hlsl::is_same_v, WarpResult)) + ((NBL_CONCEPT_REQ_EXPR_RET_TYPE)((warper.template forwardDensity(xi)) , ::nbl::hlsl::is_same_v, float32_t)) + ((NBL_CONCEPT_REQ_EXPR_RET_TYPE)((warper.template backwardDensity(dst)) , ::nbl::hlsl::is_same_v, float32_t)) +); +#undef dst +#undef xi +#undef warper +#include + +} +} +} +} + +#endif \ No newline at end of file diff --git a/include/nbl/builtin/hlsl/warp/spherical.hlsl b/include/nbl/builtin/hlsl/sampling/warps/spherical.hlsl similarity index 51% rename from include/nbl/builtin/hlsl/warp/spherical.hlsl rename to include/nbl/builtin/hlsl/sampling/warps/spherical.hlsl index 10c341f06b..095e138d60 100644 --- a/include/nbl/builtin/hlsl/warp/spherical.hlsl +++ b/include/nbl/builtin/hlsl/sampling/warps/spherical.hlsl @@ -2,21 +2,24 @@ #define _NBL_BUILTIN_HLSL_WARP_SPHERICAL_INCLUDED_ #include +#include +#include namespace nbl { namespace hlsl { +namespace sampling +{ namespace warp { - - class Spherical + struct Spherical { - public: + using domain_type = float32_t2; using codomain_type = float32_t3; - template ) - static codomain_type warp(const UV uv) + template ) + static WarpResult warp(const D uv) { const float32_t phi = 2 * uv.x * numbers::pi; const float32_t theta = uv.y * numbers::pi; @@ -28,26 +31,30 @@ namespace warp float32_t sinTheta = (1.0 - cosTheta * cosTheta); dir.xy *= sinTheta; dir.z = cosTheta; - return dir; + WarpResult warpResult; + warpResult.dst = dir; + warpResult.density = 1 / (sinTheta * numbers::pi * numbers::pi); + return warpResult; } - template ) - static float32_t forwardDensity(const UV uv) + template ) + static float32_t forwardDensity(const D uv) { const float32_t theta = uv.y * numbers::pi; - return 1.0f / (sin(theta) * 2 * PI * PI); + return 1.0f / (sin(theta) * 2 * numbers::pi * numbers::pi); } template ) - static float32_t backwardDensity(const C out) + static float32_t backwardDensity(const C dst) { - //TODO(kevinyu): Derive this density + return 1.0f / (sqrt(1.0f - dst.z * dst.z) * 2 * numbers::pi * numbers::pi); } }; } } } +} #endif \ No newline at end of file From f99c63ba8b8c4ce97dd73d18c1f3b9c51b321eac Mon Sep 17 00:00:00 2001 From: kevyuu Date: Tue, 23 Dec 2025 00:02:15 +0700 Subject: [PATCH 09/69] Remove private, public from hierarchical_image --- include/nbl/builtin/hlsl/sampling/hierarchical_image.hlsl | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/include/nbl/builtin/hlsl/sampling/hierarchical_image.hlsl b/include/nbl/builtin/hlsl/sampling/hierarchical_image.hlsl index f2b2750703..de50a6b0d3 100644 --- a/include/nbl/builtin/hlsl/sampling/hierarchical_image.hlsl +++ b/include/nbl/builtin/hlsl/sampling/hierarchical_image.hlsl @@ -15,9 +15,8 @@ namespace hlsl namespace sampling { -class HierarchicalImage +struct HierarchicalImage { - private: static float32_t3 calculateSampleAndPdf(float32_t4 dirsX, float32_t4 dirsY, float32_t2 unnormCoord, uint32_t2 lastWarpmapPixel, NBL_REF_ARG(float32_t) pdf) { @@ -48,7 +47,6 @@ class HierarchicalImage return L; } - public: template ) static float32_t2 binarySearch(NBL_CONST_REF_ARG(LuminanceAccessor) luminanceAccessor, const uint32_t2 lumaMapSize, const float32_t2 xi, const bool aspect2x1) { From 3ff2791f1bb47feb3a232fcfecc1dca0005dc461 Mon Sep 17 00:00:00 2001 From: kevyuu Date: Tue, 23 Dec 2025 01:06:03 +0700 Subject: [PATCH 10/69] Refactor hierarchical image to keep accessor and common data as member --- .../hlsl/sampling/hierarchical_image.hlsl | 61 +++++++++---------- 1 file changed, 30 insertions(+), 31 deletions(-) diff --git a/include/nbl/builtin/hlsl/sampling/hierarchical_image.hlsl b/include/nbl/builtin/hlsl/sampling/hierarchical_image.hlsl index de50a6b0d3..5509ce65c3 100644 --- a/include/nbl/builtin/hlsl/sampling/hierarchical_image.hlsl +++ b/include/nbl/builtin/hlsl/sampling/hierarchical_image.hlsl @@ -14,12 +14,21 @@ namespace hlsl { namespace sampling { - +template && hierarchical_image::LuminanceReadAccessor && Warp) struct HierarchicalImage { - - static float32_t3 calculateSampleAndPdf(float32_t4 dirsX, float32_t4 dirsY, float32_t2 unnormCoord, uint32_t2 lastWarpmapPixel, NBL_REF_ARG(float32_t) pdf) + using scalar_type = T; + using vector2_type = vector; + using vector3_type = vector; + using vector4_type = vector; + LuminanceAccessor accessor; + uint32_t2 lumaMapSize; + bool lumaAspect2x1; + uint32_t2 lastWarpPixel; + + static vector2_type calculateSampleAndPdf(NBL_REF_ARG(scalar_type) rcpPdf, vector4_type dirsX, vector4_type dirsY, vector2_type unnormCoord, uint32_t2 lastWarpPixel) { + // TODO(kevinyu): Convert float32_t to scalar_type const float32_t2 interpolant = frac(unnormCoord); const float32_t4x2 uvs = transpose(float32_t2x4(dirsX, dirsY)); @@ -42,15 +51,23 @@ struct HierarchicalImage yDiff // second column dFdy )); - pdf = abs(PostWarp::forwardDensity(uv) / (detInterpolJacobian * float32_t(lastWarpmapPixel.x * lastWarpmapPixel.y)); + rcpPdf = abs((detInterpolJacobian * scalar_t(lastWarpPixel.x * lastWarpPixel.y) / PostWarp::forwardDensity(uv)); return L; } - template ) - static float32_t2 binarySearch(NBL_CONST_REF_ARG(LuminanceAccessor) luminanceAccessor, const uint32_t2 lumaMapSize, const float32_t2 xi, const bool aspect2x1) + static HierarchicalImage create(NBL_CONST_REF_ARG(LuminanceAccessor) accessor, const uint32_t2 lumaMapSize, const bool lumaAspect2x1, const uint32_t2 warpSize) { + HierarchicalImage result; + result.accessor = accessor; + result.lumaMapSize = lumaMapSize; + result.lumaAspect2x1 = lumaAspect2x1; + result.lastWarpPixel = warpSize - uint32_t2(1, 1); + return result; + } + static vector binarySearch(const vector xi) + { uint32_t2 p = uint32_t2(0, 0); if (aspect2x1) { @@ -92,28 +109,9 @@ struct HierarchicalImage return directionUV; } - - template && Warp) - static float32_t3 sampleWarpmap(NBL_CONST_REF_ARG(WarpmapAccessor) warpmap, const uint32_t2 warpmapSize, const float32_t2 xi, NBL_REF_ARG(float32_t) pdf) { - - // TODO(kevinyu): Add some comment why we substract by 1 - const uint32_t3 lastWarpmapPixel = warpmapSize - uint32_t3(1, 1, 1); - - const float32_t2 unnormCoord = xi * lastWarpmapPixel; - const float32_t2 interpolant = frac(unnormCoord); - const float32_t2 warpSampleCoord = (unnormCoord + float32_t2(0.5f, 0.5f)) / float32_t2(warpmapSize.x, warpmapSize.y); - const float32_t4 dirsX = warpmap.gatherU(warpSampleCoord); - const float32_t4 dirsY = warpmap.gatherV(warpSampleCoord); - - return calculateSampleAndPdf(dirsX, dirsY, unnormCoord, lastWarpmapPixel, pdf); - - } - - template && Warp) - static float32_t3 sample(NBL_CONST_REF_ARG(LuminanceReadAccessor) luminanceMap, const uint32_t2 lumaMapSize, const bool lumaAspect2x1, const uint32_t2 warpmapSize, const float32_t2 xi, NBL_REF_ARG(float32_t) pdf) { - - const uint32_t3 lastWarpmapPixel = warpmapSize - uint32_t3(1, 1, 1); - const float32_t2 unnormCoord = xi * lastWarpmapPixel; + uint32_t2 generate(NBL_REF_ARG(scalar_type) rcpPdf, vector xi) + { + const float32_t2 unnormCoord = xi * lastWarpPixel; const float32_t2 warpSampleCoord = (unnormCoord + float32_t2(0.5f, 0.5f)) / float32_t2(warpmapSize.x, warpmapSize.y); const float32_t2 dir0 = binarySearch(luminanceMap, lumaMapSize, warpSampleCoord + float32_t2(0, 1), lumaAspect2x1); const float32_t2 dir1 = binarySearch(luminanceMap, lumaMapSize, warpSampleCoord + float32_t2(1, 1), lumaAspect2x1); @@ -123,11 +121,12 @@ struct HierarchicalImage const float32_t4 dirsX = float32_t4(dir0.x, dir1.x, dir2.x, dir3.x); const float32_t4 dirsY = float32_t4(dir1.y, dir1.y, dir2.y, dir3.y); - return calculateSampleAndPdf(dirsX, dirsY, unnormCoord, lastWarpmapPixel, pdf); - - } + return calculateSampleAndPdf(rcpPdf, dirsX, dirsY, unnormCoord, lastWarpPixel); + } }; +//TODO(kevinyu): Impelemnt cached warp map sampler + } } From 76ef53697fe9dffae24cf209f11b2894af63f526 Mon Sep 17 00:00:00 2001 From: kevyuu Date: Fri, 26 Dec 2025 16:02:12 +0700 Subject: [PATCH 11/69] Refactor hierarchical image to separate binarySearch from HierarchicalImage class --- .../accessors/hierarchical_image.hlsl | 61 +++++ .../hlsl/sampling/hierarchical_image.hlsl | 240 ++++++++++-------- include/nbl/builtin/hlsl/sampling/warp.hlsl | 19 +- 3 files changed, 203 insertions(+), 117 deletions(-) create mode 100644 include/nbl/builtin/hlsl/concepts/accessors/hierarchical_image.hlsl diff --git a/include/nbl/builtin/hlsl/concepts/accessors/hierarchical_image.hlsl b/include/nbl/builtin/hlsl/concepts/accessors/hierarchical_image.hlsl new file mode 100644 index 0000000000..a7326ee3da --- /dev/null +++ b/include/nbl/builtin/hlsl/concepts/accessors/hierarchical_image.hlsl @@ -0,0 +1,61 @@ +#ifndef _NBL_BUILTIN_HLSL_CONCEPTS_ACCESSORS_HIERARCHICAL_IMAGE_INCLUDED_ +#define _NBL_BUILTIN_HLSL_CONCEPTS_ACCESSORS_HIERARCHICAL_IMAGE_INCLUDED_ + +#include "nbl/builtin/hlsl/concepts/accessors/generic_shared_data.hlsl" + +namespace nbl +{ +namespace hlsl +{ +namespace sampling +{ +namespace hierarchical_image +{ +// declare concept +#define NBL_CONCEPT_NAME LuminanceReadAccessor +#define NBL_CONCEPT_TPLT_PRM_KINDS (typename) +#define NBL_CONCEPT_TPLT_PRM_NAMES (U) +// not the greatest syntax but works +#define NBL_CONCEPT_PARAM_0 (a,U) +#define NBL_CONCEPT_PARAM_1 (uv,uint32_t2) +#define NBL_CONCEPT_PARAM_2 (level,uint32_t) +// start concept +NBL_CONCEPT_BEGIN(3) +// need to be defined AFTER the concept begins +#define a NBL_CONCEPT_PARAM_T NBL_CONCEPT_PARAM_0 +#define uv NBL_CONCEPT_PARAM_T NBL_CONCEPT_PARAM_1 +#define level NBL_CONCEPT_PARAM_T NBL_CONCEPT_PARAM_2 +NBL_CONCEPT_END( + ((NBL_CONCEPT_REQ_EXPR_RET_TYPE)((a.template get(uv,level)) , ::nbl::hlsl::is_same_v, float32_t)) + ((NBL_CONCEPT_REQ_EXPR_RET_TYPE)((a.template gather(uv,level)) , ::nbl::hlsl::is_same_v, float32_t4)) +); +#undef level +#undef uv +#undef a +#include + +// declare concept +#define NBL_CONCEPT_NAME HierarchicalSampler +#define NBL_CONCEPT_TPLT_PRM_KINDS (typename)(typename) +#define NBL_CONCEPT_TPLT_PRM_NAMES (HierarchicalSamplerT)(ScalarT) +// not the greatest syntax but works +#define NBL_CONCEPT_PARAM_0 (sampler,HierarchicalSamplerT) +#define NBL_CONCEPT_PARAM_1 (coord,vector) +// start concept +NBL_CONCEPT_BEGIN(2) +// need to be defined AFTER the concept begins +#define sampler NBL_CONCEPT_PARAM_T NBL_CONCEPT_PARAM_0 +#define coord NBL_CONCEPT_PARAM_T NBL_CONCEPT_PARAM_1 +NBL_CONCEPT_END( + ((NBL_CONCEPT_REQ_EXPR_RET_TYPE)((sampler.template sampleUvs(coord)) , ::nbl::hlsl::is_same_v, matrix)) +); +#undef sampler +#undef coord +#include + +} +} +} +} + +#endif \ No newline at end of file diff --git a/include/nbl/builtin/hlsl/sampling/hierarchical_image.hlsl b/include/nbl/builtin/hlsl/sampling/hierarchical_image.hlsl index 5509ce65c3..5adbb5fb82 100644 --- a/include/nbl/builtin/hlsl/sampling/hierarchical_image.hlsl +++ b/include/nbl/builtin/hlsl/sampling/hierarchical_image.hlsl @@ -5,8 +5,10 @@ #ifndef _NBL_BUILTIN_HLSL_SAMPLING_HIERARCHICAL_IMAGE_INCLUDED_ #define _NBL_BUILTIN_HLSL_SAMPLING_HIERARCHICAL_IMAGE_INCLUDED_ +#include #include #include +#include namespace nbl { @@ -14,119 +16,147 @@ namespace hlsl { namespace sampling { -template && hierarchical_image::LuminanceReadAccessor && Warp) -struct HierarchicalImage + +template && hierarchical_image::LuminanceReadAccessor) +struct LuminanceMapSampler { - using scalar_type = T; - using vector2_type = vector; - using vector3_type = vector; - using vector4_type = vector; - LuminanceAccessor accessor; - uint32_t2 lumaMapSize; - bool lumaAspect2x1; - uint32_t2 lastWarpPixel; - - static vector2_type calculateSampleAndPdf(NBL_REF_ARG(scalar_type) rcpPdf, vector4_type dirsX, vector4_type dirsY, vector2_type unnormCoord, uint32_t2 lastWarpPixel) - { - // TODO(kevinyu): Convert float32_t to scalar_type - const float32_t2 interpolant = frac(unnormCoord); - const float32_t4x2 uvs = transpose(float32_t2x4(dirsX, dirsY)); - - const float32_t2 xDiffs[] = { - uvs[2] - uvs[3], - uvs[1] - uvs[0] - }; - const float32_t2 yVals[] = { - xDiffs[0] * interpolant.x + uvs[3], - xDiffs[1] * interpolant.x + uvs[0] - }; - const float32_t2 yDiff = yVals[1] - yVals[0]; - const float32_t2 uv = yDiff * interpolant.y + yVals[0]; - - // Note(kevinyu): sinTheta is calculated twice inside PostWarp::warp and PostWarp::forwardDensity - const float32_t3 L = PostWarp::warp(uv); - - const float detInterpolJacobian = determinant(float32_t2x2( - lerp(xDiffs[0], xDiffs[1], interpolant.y), // first column dFdx - yDiff // second column dFdy - )); - - rcpPdf = abs((detInterpolJacobian * scalar_t(lastWarpPixel.x * lastWarpPixel.y) / PostWarp::forwardDensity(uv)); - - return L; - } - - static HierarchicalImage create(NBL_CONST_REF_ARG(LuminanceAccessor) accessor, const uint32_t2 lumaMapSize, const bool lumaAspect2x1, const uint32_t2 warpSize) - { - HierarchicalImage result; - result.accessor = accessor; - result.lumaMapSize = lumaMapSize; - result.lumaAspect2x1 = lumaAspect2x1; - result.lastWarpPixel = warpSize - uint32_t2(1, 1); - return result; - } - - static vector binarySearch(const vector xi) - { - uint32_t2 p = uint32_t2(0, 0); - - if (aspect2x1) { - // TODO(kevinyu): Implement findMSB - const uint32_t2 mip2x1 = findMSB(lumaMapSize.x) - 1; - - // do one split in the X axis first cause penultimate full mip would have been 2x1 - p.x = impl::choseSecond(luminanceAccessor.fetch(uint32_t2(0, 0), mip2x1), luminanceAccessor.fetch(uint32_t2(0, 1), mip2x1), xi.x) ? 1 : 0; - } - - for (uint32_t i = mip2x1; i != 0;) - { - --i; - p <<= 1; - const float32_t4 values = luminanceAccessor.gather(p, i); - float32_t wx_0, wx_1; - { - const float32_t wy_0 = values[3] + values[2]; - const float32_t wy_1 = values[1] + values[0]; - if (impl::choseSecond(wy_0, wy_1, xi.y)) - { - p.y |= 1; - wx_0 = values[0]; - wx_1 = values[1]; - } - else - { - wx_0 = values[3]; - wx_1 = values[2]; - } - } - - if (impl::choseSecond(wx_0, wx_1, xi.x)) - p.x |= 1; - } - - // TODO(kevinyu): Add some comment why we add xi. - const float32_t2 directionUV = (float32_t2(p.x, p.y) + xi) / float32_t2(lumaMapSize); - return directionUV; - } - - uint32_t2 generate(NBL_REF_ARG(scalar_type) rcpPdf, vector xi) + using scalar_type = T; + using vector2_type = vector; + using vector4_type = vector; + + LuminanceAccessor _map; + uint32_t _mapSize; + bool _aspect2x1; + + static LuminanceMapSampler create(NBL_CONST_REF_ARG(LuminanceAccessor) lumaMap, vector2_type mapSize, bool aspect2x1) + { + LuminanceAccessor result; + result._map = lumaMap; + result._mapSize = mapSize; + result._aspect2x1 = aspect2x1; + return result; + } + + static bool choseSecond(scalar_type first, scalar_type second, NBL_REF_ARG(scalar_type) xi) { - const float32_t2 unnormCoord = xi * lastWarpPixel; - const float32_t2 warpSampleCoord = (unnormCoord + float32_t2(0.5f, 0.5f)) / float32_t2(warpmapSize.x, warpmapSize.y); - const float32_t2 dir0 = binarySearch(luminanceMap, lumaMapSize, warpSampleCoord + float32_t2(0, 1), lumaAspect2x1); - const float32_t2 dir1 = binarySearch(luminanceMap, lumaMapSize, warpSampleCoord + float32_t2(1, 1), lumaAspect2x1); - const float32_t2 dir2 = binarySearch(luminanceMap, lumaMapSize, warpSampleCoord + float32_t2(1, 0), lumaAspect2x1); - const float32_t2 dir3 = binarySearch(luminanceMap, lumaMapSize, warpSampleCoord, lumaAspect2x1); + // numerical resilience against IEEE754 + scalar_type dummy = 0.0f; + PartitionRandVariable partition; + partition.leftProb = 1.0f / (1.0f + second/ first); + return partition(xi, dummy); + } - const float32_t4 dirsX = float32_t4(dir0.x, dir1.x, dir2.x, dir3.x); - const float32_t4 dirsY = float32_t4(dir1.y, dir1.y, dir2.y, dir3.y); + vector2_type binarySearch(const vector2_type xi) + { + uint32_t2 p = uint32_t2(0, 0); + const uint32_t2 mip2x1 = findMSB(_mapSize.x) - 1; + + if (_aspect2x1) { + // do one split in the X axis first cause penultimate full mip would have been 2x1 + p.x = choseSecond(_map.get(uint32_t2(0, 0), mip2x1), _map.get(uint32_t2(0, 1), mip2x1), xi.x) ? 1 : 0; + } + + for (uint32_t i = mip2x1; i != 0;) + { + --i; + p <<= 1; + const vector4_type values = _map.gather(p, i); + scalar_type wx_0, wx_1; + { + const scalar_type wy_0 = values[3] + values[2]; + const scalar_type wy_1 = values[1] + values[0]; + if (choseSecond(wy_0, wy_1, xi.y)) + { + p.y |= 1; + wx_0 = values[0]; + wx_1 = values[1]; + } + else + { + wx_0 = values[3]; + wx_1 = values[2]; + } + } + + if (choseSecond(wx_0, wx_1, xi.x)) + p.x |= 1; + } + + // TODO(kevinyu): Add some comment why we add xi. + const vector2_type directionUV = (vector2_type(p.x, p.y) + xi) / vector2_type(_mapSize); + return directionUV; + } - return calculateSampleAndPdf(rcpPdf, dirsX, dirsY, unnormCoord, lastWarpPixel); + matrix sampleUvs(vector2_type sampleCoord) NBL_CONST_MEMBER_FUNC + { + const vector2_type dir0 = binarySearch(_map, _mapSize, sampleCoord + vector2_type(0, 1), _aspect2x1); + const vector2_type dir1 = binarySearch(_map, _mapSize, sampleCoord + vector2_type(1, 1), _aspect2x1); + const vector2_type dir2 = binarySearch(_map, _mapSize, sampleCoord + vector2_type(1, 0), _aspect2x1); + const vector2_type dir3 = binarySearch(_map, _mapSize, sampleCoord, _aspect2x1); + return { + dir0, + dir1, + dir2, + dir3 + }; } }; -//TODO(kevinyu): Impelemnt cached warp map sampler +template && hierarchical_image::HierarchicalSampler && concepts::Warp) +struct HierarchicalImage +{ + using scalar_type = T; + using vector2_type = vector; + using vector3_type = vector; + using vector4_type = vector; + HierarchicalSamplerT sampler; + uint32_t warpSize; + uint32_t2 lastWarpPixel; + + static HierarchicalImage create(NBL_CONST_REF_ARG(HierarchicalSamplerT) sampler, uint32_t2 warpSize) + { + HierarchicalImage result; + result.sampler = sampler; + result.warpSize = warpSize; + result.lastWarpPixel = warpSize - uint32_t2(1, 1); + return result; + } + + + uint32_t2 generate(NBL_REF_ARG(scalar_type) rcpPdf, vector2_type xi) NBL_CONST_MEMBER_FUNC + { + const vector2_type texelCoord = xi * lastWarpPixel; + const vector2_type sampleCoord = (texelCoord + vector2_type(0.5f, 0.5f)) / vector2_type(warpSize.x, warpSize.y); + + matrix uvs = sampler.sampleUvs(sampleCoord); + + const vector2_type interpolant = frac(texelCoord); + + const vector2_type xDiffs[] = { + uvs[2] - uvs[3], + uvs[1] - uvs[0] + }; + const vector2_type yVals[] = { + xDiffs[0] * interpolant.x + uvs[3], + xDiffs[1] * interpolant.x + uvs[0] + }; + const vector2_type yDiff = yVals[1] - yVals[0]; + const vector2_type uv = yDiff * interpolant.y + yVals[0]; + const WarpResult warpResult = PostWarpT::warp(uv); + + const scalar_type detInterpolJacobian = determinant(matrix( + lerp(xDiffs[0], xDiffs[1], interpolant.y), // first column dFdx + yDiff // second column dFdy + )); + + rcpPdf = abs((detInterpolJacobian * scalar_type(lastWarpPixel.x * lastWarpPixel.y)) / warpResult.density); + + return warpResult.dst; + } +}; + +} } } diff --git a/include/nbl/builtin/hlsl/sampling/warp.hlsl b/include/nbl/builtin/hlsl/sampling/warp.hlsl index b8936c09f3..b1c1fcb5b2 100644 --- a/include/nbl/builtin/hlsl/sampling/warp.hlsl +++ b/include/nbl/builtin/hlsl/sampling/warp.hlsl @@ -1,8 +1,6 @@ -#ifndef _NBL_BUILTIN_HLSL_CONCEPTS_WARP_INCLUDED_ -#define _NBL_BUILTIN_HLSL_CONCEPTS_WARP_INCLUDED_ +#ifndef _NBL_BUILTIN_HLSL_SAMPLING_CONCEPTS_WARP_INCLUDED_ +#define _NBL_BUILTIN_HLSL_SAMPLING_CONCEPTS_WARP_INCLUDED_ -#include "nbl/builtin/hlsl/concepts/accessors/generic_shared_data.hlsl" -#include "nbl/builtin/hlsl/fft/common.hlsl" namespace nbl { @@ -11,18 +9,19 @@ namespace hlsl namespace sampling { -template +template struct WarpResult { - C dst; + CodomainT dst; float32_t density; }; +} namespace concepts { // declare concept -#define NBL_CONCEPT_NAME WARP +#define NBL_CONCEPT_NAME Warp #define NBL_CONCEPT_TPLT_PRM_KINDS (typename) #define NBL_CONCEPT_TPLT_PRM_NAMES (U) // not the greatest syntax but works @@ -36,10 +35,6 @@ NBL_CONCEPT_BEGIN(3) #define dst NBL_CONCEPT_PARAM_T NBL_CONCEPT_PARAM_2 NBL_CONCEPT_END( ((NBL_CONCEPT_REQ_TYPE)(U::domain_type)) - ((NBL_CONCEPT_REQ_TYPE)(U::codomain_type)) - ((NBL_CONCEPT_REQ_EXPR_RET_TYPE)((warper.template warp(xi)) , ::nbl::hlsl::is_same_v, WarpResult)) - ((NBL_CONCEPT_REQ_EXPR_RET_TYPE)((warper.template forwardDensity(xi)) , ::nbl::hlsl::is_same_v, float32_t)) - ((NBL_CONCEPT_REQ_EXPR_RET_TYPE)((warper.template backwardDensity(dst)) , ::nbl::hlsl::is_same_v, float32_t)) ); #undef dst #undef xi @@ -47,7 +42,7 @@ NBL_CONCEPT_END( #include } -} + } } From ef773fdf1cfe50182eb6b2868965a0712b0af987 Mon Sep 17 00:00:00 2001 From: kevyuu Date: Fri, 26 Dec 2025 16:02:25 +0700 Subject: [PATCH 12/69] Fix Spherical warp indentation --- .../hlsl/sampling/warps/spherical.hlsl | 81 ++++++++++--------- 1 file changed, 41 insertions(+), 40 deletions(-) diff --git a/include/nbl/builtin/hlsl/sampling/warps/spherical.hlsl b/include/nbl/builtin/hlsl/sampling/warps/spherical.hlsl index 095e138d60..9443151c6f 100644 --- a/include/nbl/builtin/hlsl/sampling/warps/spherical.hlsl +++ b/include/nbl/builtin/hlsl/sampling/warps/spherical.hlsl @@ -1,5 +1,5 @@ -#ifndef _NBL_BUILTIN_HLSL_WARP_SPHERICAL_INCLUDED_ -#define _NBL_BUILTIN_HLSL_WARP_SPHERICAL_INCLUDED_ +#ifndef _NBL_BUILTIN_HLSL_SAMPLING_WARP_SPHERICAL_INCLUDED_ +#define _NBL_BUILTIN_HLSL_SAMPLING_WARP_SPHERICAL_INCLUDED_ #include #include @@ -13,44 +13,45 @@ namespace sampling { namespace warp { - struct Spherical - { - using domain_type = float32_t2; - using codomain_type = float32_t3; - - template ) - static WarpResult warp(const D uv) - { - const float32_t phi = 2 * uv.x * numbers::pi; - const float32_t theta = uv.y * numbers::pi; - float32_t3 dir; - dir.x = cos(uv.x * 2.f * numbers::pi); - dir.y = sqrt(1.f - dir.x * dir.x); - if (uv.x > 0.5f) dir.y = -dir.y; - const float32_t cosTheta = cos(theta); - float32_t sinTheta = (1.0 - cosTheta * cosTheta); - dir.xy *= sinTheta; - dir.z = cosTheta; - WarpResult warpResult; - warpResult.dst = dir; - warpResult.density = 1 / (sinTheta * numbers::pi * numbers::pi); - return warpResult; - } - - template ) - static float32_t forwardDensity(const D uv) - { - const float32_t theta = uv.y * numbers::pi; - return 1.0f / (sin(theta) * 2 * numbers::pi * numbers::pi); - - } - - template ) - static float32_t backwardDensity(const C dst) - { - return 1.0f / (sqrt(1.0f - dst.z * dst.z) * 2 * numbers::pi * numbers::pi); - } - }; + +struct Spherical +{ + using domain_type = float32_t2; + using codomain_type = float32_t3; + + template ) + static WarpResult warp(const D uv) + { + const float32_t phi = 2 * uv.x * numbers::pi; + const float32_t theta = uv.y * numbers::pi; + float32_t3 dir; + dir.x = cos(uv.x * 2.f * numbers::pi); + dir.y = sqrt(1.f - dir.x * dir.x); + if (uv.x > 0.5f) dir.y = -dir.y; + const float32_t cosTheta = cos(theta); + float32_t sinTheta = (1.0 - cosTheta * cosTheta); + dir.xy *= sinTheta; + dir.z = cosTheta; + WarpResult warpResult; + warpResult.dst = dir; + warpResult.density = 1 / (sinTheta * numbers::pi * numbers::pi); + return warpResult; + } + + template ) + static float32_t forwardDensity(const D uv) + { + const float32_t theta = uv.y * numbers::pi; + return 1.0f / (sin(theta) * 2 * numbers::pi * numbers::pi); + + } + + template ) + static float32_t backwardDensity(const C dst) + { + return 1.0f / (sqrt(1.0f - dst.z * dst.z) * 2 * numbers::pi * numbers::pi); + } +}; } } From b9467fee56c7069a841137bc6438fdf753b585b1 Mon Sep 17 00:00:00 2001 From: kevyuu Date: Fri, 26 Dec 2025 16:56:47 +0700 Subject: [PATCH 13/69] Add some comment why we add xi to the sample uvs --- include/nbl/builtin/hlsl/sampling/hierarchical_image.hlsl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/nbl/builtin/hlsl/sampling/hierarchical_image.hlsl b/include/nbl/builtin/hlsl/sampling/hierarchical_image.hlsl index 5adbb5fb82..f57ce8f050 100644 --- a/include/nbl/builtin/hlsl/sampling/hierarchical_image.hlsl +++ b/include/nbl/builtin/hlsl/sampling/hierarchical_image.hlsl @@ -82,7 +82,7 @@ struct LuminanceMapSampler p.x |= 1; } - // TODO(kevinyu): Add some comment why we add xi. + // If we don`t add xi, the sample will clump to the lowest corner of environment map texel. We add xi to simulate uniform distribution within a pixel and make the sample continuous. This is why we compute the pdf not from the normalized luminance of the texel, instead from the reciprocal of the Jacobian. const vector2_type directionUV = (vector2_type(p.x, p.y) + xi) / vector2_type(_mapSize); return directionUV; } From ac1e2f3fd9fb4850d0a52d96889e37d5bd2b7c14 Mon Sep 17 00:00:00 2001 From: kevyuu Date: Tue, 6 Jan 2026 21:54:47 +0700 Subject: [PATCH 14/69] WIP --- CMakeLists.txt | 1 + .../hlsl/sampling/warps/spherical.hlsl | 24 ++++++++++++++----- src/nbl/builtin/CMakeLists.txt | 3 +++ src/nbl/ext/CMakeLists.txt | 12 ++++++++++ 4 files changed, 34 insertions(+), 6 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 84c9a99dc4..2ffac18cd6 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -176,6 +176,7 @@ option(NBL_BUILD_EXAMPLES "Enable building examples" ON) option(NBL_BUILD_MITSUBA_LOADER "Enable nbl::ext::MitsubaLoader?" OFF) # TODO: once it compies turn this ON by default! option(NBL_BUILD_IMGUI "Enable nbl::ext::ImGui?" ON) option(NBL_BUILD_DEBUG_DRAW "Enable Nabla Debug Draw extension?" ON) +option(NBL_BUILD_ENVMAP_IMPORTANCE_SAMPLING "Enable Nabla Envmap Importance Sampling extension?" ON) option(NBL_BUILD_OPTIX "Enable nbl::ext::OptiX?" OFF) if(NBL_COMPILE_WITH_CUDA) diff --git a/include/nbl/builtin/hlsl/sampling/warps/spherical.hlsl b/include/nbl/builtin/hlsl/sampling/warps/spherical.hlsl index 9443151c6f..ecc9423916 100644 --- a/include/nbl/builtin/hlsl/sampling/warps/spherical.hlsl +++ b/include/nbl/builtin/hlsl/sampling/warps/spherical.hlsl @@ -19,8 +19,8 @@ struct Spherical using domain_type = float32_t2; using codomain_type = float32_t3; - template ) - static WarpResult warp(const D uv) + template ) + static WarpResult warp(const DomainT uv) { const float32_t phi = 2 * uv.x * numbers::pi; const float32_t theta = uv.y * numbers::pi; @@ -38,16 +38,28 @@ struct Spherical return warpResult; } - template ) - static float32_t forwardDensity(const D uv) + template ) + static domain_type inverseWarp(const CodomainT v) + { + float32_t2 uv = float32_t2(atan(v.y, v.x), acos(v.z)); + uv.x *= (numbers::inv_pi * 0.5); + if (v.y < 0.0f) + uv.x += 1.0f; + uv.y *= numbers::inv_pi; + return uv; + } + + + template ) + static float32_t forwardDensity(const DomainT uv) { const float32_t theta = uv.y * numbers::pi; return 1.0f / (sin(theta) * 2 * numbers::pi * numbers::pi); } - template ) - static float32_t backwardDensity(const C dst) + template ) + static float32_t backwardDensity(const CodomainT dst) { return 1.0f / (sqrt(1.0f - dst.z * dst.z) * 2 * numbers::pi * numbers::pi); } diff --git a/src/nbl/builtin/CMakeLists.txt b/src/nbl/builtin/CMakeLists.txt index 085ed3c923..050907b3a3 100644 --- a/src/nbl/builtin/CMakeLists.txt +++ b/src/nbl/builtin/CMakeLists.txt @@ -339,6 +339,9 @@ LIST_BUILTIN_RESOURCE(NBL_RESOURCES_TO_EMBED "hlsl/workgroup2/shared_scan.hlsl") #Extensions LIST_BUILTIN_RESOURCE(NBL_RESOURCES_TO_EMBED "hlsl/ext/FullScreenTriangle/SVertexAttributes.hlsl") LIST_BUILTIN_RESOURCE(NBL_RESOURCES_TO_EMBED "hlsl/ext/FullScreenTriangle/default.vert.hlsl") +LIST_BUILTIN_RESOURCE(NBL_RESOURCES_TO_EMBED "hlsl/ext/EnvmapImportanceSampling/structs.hlsl") +LIST_BUILTIN_RESOURCE(NBL_RESOURCES_TO_EMBED "hlsl/ext/EnvmapImportanceSampling/gen_luma.comp.hlsl") +LIST_BUILTIN_RESOURCE(NBL_RESOURCES_TO_EMBED "hlsl/ext/EnvmapImportanceSampling/measure_luma.comp.hlsl") LIST_BUILTIN_RESOURCE(NBL_RESOURCES_TO_EMBED "hlsl/text_rendering/msdf.hlsl") #memory LIST_BUILTIN_RESOURCE(NBL_RESOURCES_TO_EMBED "hlsl/memory.hlsl") diff --git a/src/nbl/ext/CMakeLists.txt b/src/nbl/ext/CMakeLists.txt index af46b29aab..221c1fe88e 100644 --- a/src/nbl/ext/CMakeLists.txt +++ b/src/nbl/ext/CMakeLists.txt @@ -66,6 +66,18 @@ if(NBL_BUILD_DEBUG_DRAW) ) endif() +if(NBL_BUILD_ENVMAP_IMPORTANCE_SAMPLING) + add_subdirectory(EnvmapImportanceSampling) + set(NBL_EXT_ENVMAP_IMPORTANCE_SAMPLING_INCLUDE_DIRS + ${NBL_EXT_ENVMAP_IMPORTANCE_SAMPLING_INCLUDE_DIRS} + PARENT_SCOPE + ) + set(NBL_EXT_ENVMAP_IMPORTANCE_SAMPLING_LIB + ${NBL_EXT_ENVMAP_IMPORTANCE_SAMPLING_LIB} + PARENT_SCOPE + ) +endif() + propagate_changed_variables_to_parent_scope() NBL_ADJUST_FOLDERS(ext) \ No newline at end of file From baca1cf2044853df7b5f4d6d532d63960b81b482 Mon Sep 17 00:00:00 2001 From: kevyuu Date: Sat, 10 Jan 2026 00:48:16 +0700 Subject: [PATCH 15/69] Rename uv to coord for LuminanceAccessor concepts --- .../hlsl/concepts/accessors/hierarchical_image.hlsl | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/include/nbl/builtin/hlsl/concepts/accessors/hierarchical_image.hlsl b/include/nbl/builtin/hlsl/concepts/accessors/hierarchical_image.hlsl index a7326ee3da..09abd08615 100644 --- a/include/nbl/builtin/hlsl/concepts/accessors/hierarchical_image.hlsl +++ b/include/nbl/builtin/hlsl/concepts/accessors/hierarchical_image.hlsl @@ -17,20 +17,20 @@ namespace hierarchical_image #define NBL_CONCEPT_TPLT_PRM_NAMES (U) // not the greatest syntax but works #define NBL_CONCEPT_PARAM_0 (a,U) -#define NBL_CONCEPT_PARAM_1 (uv,uint32_t2) +#define NBL_CONCEPT_PARAM_1 (coord,uint32_t2) #define NBL_CONCEPT_PARAM_2 (level,uint32_t) // start concept NBL_CONCEPT_BEGIN(3) // need to be defined AFTER the concept begins #define a NBL_CONCEPT_PARAM_T NBL_CONCEPT_PARAM_0 -#define uv NBL_CONCEPT_PARAM_T NBL_CONCEPT_PARAM_1 +#define coord NBL_CONCEPT_PARAM_T NBL_CONCEPT_PARAM_1 #define level NBL_CONCEPT_PARAM_T NBL_CONCEPT_PARAM_2 NBL_CONCEPT_END( - ((NBL_CONCEPT_REQ_EXPR_RET_TYPE)((a.template get(uv,level)) , ::nbl::hlsl::is_same_v, float32_t)) - ((NBL_CONCEPT_REQ_EXPR_RET_TYPE)((a.template gather(uv,level)) , ::nbl::hlsl::is_same_v, float32_t4)) + ((NBL_CONCEPT_REQ_EXPR_RET_TYPE)((a.template get(coord,level)) , ::nbl::hlsl::is_same_v, float32_t)) + ((NBL_CONCEPT_REQ_EXPR_RET_TYPE)((a.template gather(coord,level)) , ::nbl::hlsl::is_same_v, float32_t4)) ); #undef level -#undef uv +#undef coord #undef a #include From f12b7970b702f44030f31a425e8dce30bfe3dd27 Mon Sep 17 00:00:00 2001 From: kevyuu Date: Sat, 10 Jan 2026 00:48:39 +0700 Subject: [PATCH 16/69] Fix hierarchical_image.hlsl --- .../hlsl/sampling/hierarchical_image.hlsl | 35 ++++++++++--------- 1 file changed, 19 insertions(+), 16 deletions(-) diff --git a/include/nbl/builtin/hlsl/sampling/hierarchical_image.hlsl b/include/nbl/builtin/hlsl/sampling/hierarchical_image.hlsl index f57ce8f050..82637a42f8 100644 --- a/include/nbl/builtin/hlsl/sampling/hierarchical_image.hlsl +++ b/include/nbl/builtin/hlsl/sampling/hierarchical_image.hlsl @@ -17,27 +17,29 @@ namespace hlsl namespace sampling { -template && hierarchical_image::LuminanceReadAccessor) +template && hierarchical_image::LuminanceReadAccessor) struct LuminanceMapSampler { using scalar_type = T; using vector2_type = vector; using vector4_type = vector; - LuminanceAccessor _map; - uint32_t _mapSize; + LuminanceAccessorT _map; + uint32_t2 _mapSize; + uint32_t2 _lastWarpPixel; bool _aspect2x1; - static LuminanceMapSampler create(NBL_CONST_REF_ARG(LuminanceAccessor) lumaMap, vector2_type mapSize, bool aspect2x1) + static LuminanceMapSampler create(NBL_CONST_REF_ARG(LuminanceAccessorT) lumaMap, uint32_t2 mapSize, bool aspect2x1, uint32_t2 warpSize) { - LuminanceAccessor result; + LuminanceMapSampler result; result._map = lumaMap; result._mapSize = mapSize; + result._lastWarpPixel = warpSize - uint32_t2(1, 1); result._aspect2x1 = aspect2x1; return result; } - static bool choseSecond(scalar_type first, scalar_type second, NBL_REF_ARG(scalar_type) xi) + static bool choseSecond(scalar_type first, scalar_type second, NBL_REF_ARG(float32_t) xi) { // numerical resilience against IEEE754 scalar_type dummy = 0.0f; @@ -46,8 +48,9 @@ struct LuminanceMapSampler return partition(xi, dummy); } - vector2_type binarySearch(const vector2_type xi) + vector2_type binarySearch(const uint32_t2 coord) { + float32_t2 xi = float32_t2(coord)/ _lastWarpPixel; uint32_t2 p = uint32_t2(0, 0); const uint32_t2 mip2x1 = findMSB(_mapSize.x) - 1; @@ -87,18 +90,18 @@ struct LuminanceMapSampler return directionUV; } - matrix sampleUvs(vector2_type sampleCoord) NBL_CONST_MEMBER_FUNC + matrix sampleUvs(uint32_t2 sampleCoord) NBL_CONST_MEMBER_FUNC { - const vector2_type dir0 = binarySearch(_map, _mapSize, sampleCoord + vector2_type(0, 1), _aspect2x1); - const vector2_type dir1 = binarySearch(_map, _mapSize, sampleCoord + vector2_type(1, 1), _aspect2x1); - const vector2_type dir2 = binarySearch(_map, _mapSize, sampleCoord + vector2_type(1, 0), _aspect2x1); - const vector2_type dir3 = binarySearch(_map, _mapSize, sampleCoord, _aspect2x1); - return { + const vector2_type dir0 = binarySearch(sampleCoord + vector2_type(0, 1)); + const vector2_type dir1 = binarySearch(sampleCoord + vector2_type(1, 1)); + const vector2_type dir2 = binarySearch(sampleCoord + vector2_type(1, 0)); + const vector2_type dir3 = binarySearch(sampleCoord); + return matrix( dir0, dir1, dir2, dir3 - }; + ); } }; @@ -110,7 +113,7 @@ struct HierarchicalImage using vector3_type = vector; using vector4_type = vector; HierarchicalSamplerT sampler; - uint32_t warpSize; + uint32_t2 warpSize; uint32_t2 lastWarpPixel; static HierarchicalImage create(NBL_CONST_REF_ARG(HierarchicalSamplerT) sampler, uint32_t2 warpSize) @@ -143,7 +146,7 @@ struct HierarchicalImage const vector2_type yDiff = yVals[1] - yVals[0]; const vector2_type uv = yDiff * interpolant.y + yVals[0]; - const WarpResult warpResult = PostWarpT::warp(uv); + const WarpResult warpResult = PostWarpT::warp(uv); const scalar_type detInterpolJacobian = determinant(matrix( lerp(xDiffs[0], xDiffs[1], interpolant.y), // first column dFdx From 0957aed7b7cc608b265a41739356325d0ab95640 Mon Sep 17 00:00:00 2001 From: kevyuu Date: Sat, 10 Jan 2026 00:49:06 +0700 Subject: [PATCH 17/69] Fix typo in spherical.hlsl --- include/nbl/builtin/hlsl/sampling/warps/spherical.hlsl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/nbl/builtin/hlsl/sampling/warps/spherical.hlsl b/include/nbl/builtin/hlsl/sampling/warps/spherical.hlsl index ecc9423916..48237c7e2a 100644 --- a/include/nbl/builtin/hlsl/sampling/warps/spherical.hlsl +++ b/include/nbl/builtin/hlsl/sampling/warps/spherical.hlsl @@ -38,7 +38,7 @@ struct Spherical return warpResult; } - template ) + template ) static domain_type inverseWarp(const CodomainT v) { float32_t2 uv = float32_t2(atan(v.y, v.x), acos(v.z)); From 1b35d34e0bf4e63a59f0fc34922af4590a21e589 Mon Sep 17 00:00:00 2001 From: kevyuu Date: Sat, 10 Jan 2026 00:49:39 +0700 Subject: [PATCH 18/69] Implement gen_luma, gen_warpmap and measure_luma shaders --- .../builtin/hlsl/common.hlsl | 49 ++++++ .../builtin/hlsl/gen_luma.comp.hlsl | 30 ++++ .../builtin/hlsl/gen_warpmap.comp.hlsl | 51 +++++++ .../builtin/hlsl/measure_luma.comp.hlsl | 143 ++++++++++++++++++ 4 files changed, 273 insertions(+) create mode 100644 include/nbl/ext/EnvmapImportanceSampling/builtin/hlsl/common.hlsl create mode 100644 include/nbl/ext/EnvmapImportanceSampling/builtin/hlsl/gen_luma.comp.hlsl create mode 100644 include/nbl/ext/EnvmapImportanceSampling/builtin/hlsl/gen_warpmap.comp.hlsl create mode 100644 include/nbl/ext/EnvmapImportanceSampling/builtin/hlsl/measure_luma.comp.hlsl diff --git a/include/nbl/ext/EnvmapImportanceSampling/builtin/hlsl/common.hlsl b/include/nbl/ext/EnvmapImportanceSampling/builtin/hlsl/common.hlsl new file mode 100644 index 0000000000..e0240909f0 --- /dev/null +++ b/include/nbl/ext/EnvmapImportanceSampling/builtin/hlsl/common.hlsl @@ -0,0 +1,49 @@ +#ifndef _NBL_HLSL_EXT_ENVMAP_IMPORTANCE_SAMPLING_PARAMETERS_COMMON_INCLUDED_ +#define _NBL_HLSL_EXT_ENVMAP_IMPORTANCE_SAMPLING_PARAMETERS_COMMON_INCLUDED_ + +#include "nbl/builtin/hlsl/cpp_compat.hlsl" + +namespace nbl +{ +namespace hlsl +{ +namespace ext +{ +namespace envmap_importance_sampling +{ + +struct SLumaGenPushConstants +{ + float32_t4 luminanceScales; + uint32_t2 lumaMapResolution; +}; + +struct SLumaMeasurePushConstants +{ + float32_t4 luminanceScales; + uint32_t2 lumaMapResolution; + uint64_t lumaMeasurementBuf; +}; + +struct SLumaMeasurement +{ + float32_t3 weightedDir; + float32_t luma; + float32_t maxLuma; +}; + +struct device_capabilities +{ +#ifdef TEST_NATIVE + NBL_CONSTEXPR_STATIC_INLINE bool shaderSubgroupArithmetic = true; +#else + NBL_CONSTEXPR_STATIC_INLINE bool shaderSubgroupArithmetic = false; +#endif +}; + +} +} +} +} + +#endif diff --git a/include/nbl/ext/EnvmapImportanceSampling/builtin/hlsl/gen_luma.comp.hlsl b/include/nbl/ext/EnvmapImportanceSampling/builtin/hlsl/gen_luma.comp.hlsl new file mode 100644 index 0000000000..e701f0b00d --- /dev/null +++ b/include/nbl/ext/EnvmapImportanceSampling/builtin/hlsl/gen_luma.comp.hlsl @@ -0,0 +1,30 @@ +#include "nbl/ext/EnvmapImportanceSampling/builtin/hlsl/common.hlsl" + +using namespace nbl; +using namespace nbl::hlsl; +using namespace nbl::hlsl::ext::envmap_importance_sampling; + +[[vk::push_constant]] SLumaGenPushConstants pc; + +[[vk::combinedImageSampler]][[vk::binding(0, 0)]] Texture2D envMap; +[[vk::combinedImageSampler]][[vk::binding(0, 0)]] SamplerState envMapSampler; + +[[vk::binding(1, 0)]] RWTexture2D outImage; + +// TODO(kevinyu): Temporary to make nsc compiles +#define LUMA_MAP_GEN_WORKGROUP_DIM 16 + +[numthreads(LUMA_MAP_GEN_WORKGROUP_DIM, LUMA_MAP_GEN_WORKGROUP_DIM, 1)] +[shader("compute")] +void main(uint32_t3 threadID : SV_DispatchThreadID) +{ + if (all(threadID < pc.lumaMapResolution)) + { + + const float32_t2 uv = (float32_t2(threadID.xy) + float32_t2(0.5, 0.5)) / float32_t2(pc.lumaMapResolution); + const float32_t3 envMapSample = envMap.Sample(envMapSampler, uv).rgb; + const float32_t luma = hlsl::dot(float32_t4(envMapSample, 1.0f), pc.luminanceScales) * sin(numbers::pi * uv.y); + + outImage[threadID.xy] = luma; + } +} diff --git a/include/nbl/ext/EnvmapImportanceSampling/builtin/hlsl/gen_warpmap.comp.hlsl b/include/nbl/ext/EnvmapImportanceSampling/builtin/hlsl/gen_warpmap.comp.hlsl new file mode 100644 index 0000000000..063dfaf9b9 --- /dev/null +++ b/include/nbl/ext/EnvmapImportanceSampling/builtin/hlsl/gen_warpmap.comp.hlsl @@ -0,0 +1,51 @@ +#include "nbl/ext/EnvmapImportanceSampling/builtin/hlsl/common.hlsl" +#include "nbl/builtin/hlsl/sampling/hierarchical_image.hlsl" + + +[[vk::binding(0, 0)]] Texture2D lumaMap; + +[[vk::binding(1, 0)]] RWTexture2D outImage; + +// TODO(kevinyu): Temporary to make nsc compiles +#define WARPMAP_GEN_WORKGROUP_DIM 16 + +using namespace nbl; +using namespace nbl::hlsl; +using namespace nbl::hlsl::sampling; + +struct LuminanceAccessor +{ + float32_t get(uint32_t2 coord, uint32_t level) + { + return lumaMap.Load(uint32_t3(coord, level)); + } + + float32_t4 gather(uint32_t2 coord, uint32_t level) + { + return float32_t4( + lumaMap.Load(uint32_t3(coord, level), uint32_t2(0, 1)), + lumaMap.Load(uint32_t3(coord, level), uint32_t2(1, 1)), + lumaMap.Load(uint32_t3(coord, level), uint32_t2(1, 0)), + lumaMap.Load(uint32_t3(coord, level), uint32_t2(0, 0)) + ); + + } +}; + +[numthreads(WARPMAP_GEN_WORKGROUP_DIM, WARPMAP_GEN_WORKGROUP_DIM, 1)] +[shader("compute")] +void main(uint32_t3 threadID : SV_DispatchThreadID) +{ + LuminanceAccessor luminanceAccessor; + uint32_t lumaMapWidth, lumaMapHeight; + + using LuminanceSampler = LuminanceMapSampler; + + LuminanceSampler luminanceSampler = + LuminanceSampler::create(luminanceAccessor, lumaMapWidth, lumaMapHeight, lumaMapWidth != lumaMapHeight); + + uint32_t2 pixelCoord = threadID.xy; + + outImage[pixelCoord] = luminanceSampler.binarySearch(pixelCoord); + +} diff --git a/include/nbl/ext/EnvmapImportanceSampling/builtin/hlsl/measure_luma.comp.hlsl b/include/nbl/ext/EnvmapImportanceSampling/builtin/hlsl/measure_luma.comp.hlsl new file mode 100644 index 0000000000..845d12632d --- /dev/null +++ b/include/nbl/ext/EnvmapImportanceSampling/builtin/hlsl/measure_luma.comp.hlsl @@ -0,0 +1,143 @@ +#include "nbl/builtin/hlsl/sampling/warps/spherical.hlsl" +#include "nbl/builtin/hlsl/workgroup2/arithmetic.hlsl" + +#include "nbl/ext/EnvmapImportanceSampling/builtin/hlsl/common.hlsl" + +using namespace nbl; +using namespace nbl::hlsl; +using namespace nbl::hlsl::ext::envmap_importance_sampling; + +// TODO(kevinyu): Temporary to make nsc works +using config_t = nbl::hlsl::workgroup2::ArithmeticConfiguration<4, 4, 2>; + +[[vk::push_constant]] SLumaMeasurePushConstants pc; + +[[vk::binding(0, 0)]] Texture2D lumaMap; + +// final (level 1/2) scan needs to fit in one subgroup exactly +groupshared float32_t scratch[mpl::max_v]; + +struct PreloadedUnitData +{ + float32_t3 weightedDir; + float32_t luma; +}; + +struct ScratchProxy +{ + template + void get(const uint32_t ix, NBL_REF_ARG(AccessType) value) + { + value = scratch[ix]; + } + + template + void set(const uint32_t ix, const AccessType value) + { + scratch[ix] = value; + } + + void workgroupExecutionAndMemoryBarrier() + { + glsl::barrier(); + } +}; + +struct PreloadedData +{ + NBL_CONSTEXPR_STATIC_INLINE uint16_t WorkgroupSize = uint16_t(1u) << config_t::WorkgroupSizeLog2; + NBL_CONSTEXPR_STATIC_INLINE uint16_t PreloadedDataCount = config_t::VirtualWorkgroupSize / WorkgroupSize; + + PreloadedUnitData getData(const uint32_t ix) + { + PreloadedUnitData value; + const int32_t2 pixelCoord = int32_t2(ix % pc.lumaMapResolution.x, ix / pc.lumaMapResolution.x); + const float32_t2 uv = (float32_t2(pixelCoord) + float32_t2(0.5, 0.5)) / float32_t2(pc.lumaMapResolution); + const float32_t luma = lumaMap.Load(int32_t3(pixelCoord, 0)); + value.weightedDir = sampling::warp::Spherical::warp(uv).dst * luma; + value.luma = luma; + return value; + } + + void preload() + { + const uint16_t invocationIndex = hlsl::workgroup::SubgroupContiguousIndex(); + [unroll] + for (uint16_t idx = 0; idx < PreloadedDataCount; idx++) + data[idx] = getData(idx * WorkgroupSize + invocationIndex); + } + + void workgroupExecutionAndMemoryBarrier() + { + glsl::barrier(); + } + + PreloadedUnitData data[config_t::ItemsPerInvocation_0]; +}; + +static PreloadedData preloadData; + +struct DirXAccessor +{ + template + void get(const IndexType ix, NBL_REF_ARG(AccessType) value) + { + value = preloadData.data[ix >> config_t::WorkgroupSizeLog2].weightedDir.x; + } +}; + +struct DirYAccessor +{ + template + void get(const IndexType ix, NBL_REF_ARG(AccessType) value) + { + value = preloadData.data[ix >> config_t::WorkgroupSizeLog2].weightedDir.y; + } +}; + +struct DirZAccessor +{ + template + void get(const IndexType ix, NBL_REF_ARG(AccessType) value) + { + value = preloadData.data[ix >> config_t::WorkgroupSizeLog2].weightedDir.z; + } +}; + +struct LumaAccessor +{ + template + void get(const IndexType ix, NBL_REF_ARG(AccessType) value) + { + value = preloadData.data[ix >> config_t::WorkgroupSizeLog2].luma; + } +}; + +[numthreads(config_t::WorkgroupSize, 1, 1)] +[shader("compute")] +void main(uint32_t3 threadID : SV_DispatchThreadID) +{ + ScratchProxy scratchAccessor; + + preloadData.preload(); + preloadData.workgroupExecutionAndMemoryBarrier(); + + SLumaMeasurement measurement; + + DirXAccessor dirXAccessor; + measurement.weightedDir.x= workgroup2::reduction, device_capabilities>::template __call(dirXAccessor, scratchAccessor); + + DirYAccessor dirYAccessor; + measurement.weightedDir.y = workgroup2::reduction, device_capabilities>::template __call(dirYAccessor, scratchAccessor); + + DirZAccessor dirZAccessor; + measurement.weightedDir.z = workgroup2::reduction, device_capabilities>::template __call(dirZAccessor, scratchAccessor); + + LumaAccessor lumaAccessor; + measurement.luma = workgroup2::reduction, device_capabilities>::template __call(lumaAccessor, scratchAccessor); + + measurement.maxLuma = workgroup2::reduction, device_capabilities>::template __call(lumaAccessor, scratchAccessor); + + if (all(threadID == uint32_t3(0, 0, 0))) + vk::RawBufferStore(pc.lumaMeasurementBuf, measurement); +} From 665bb8ded899a7e597a06abcdf1996beff7f4ae8 Mon Sep 17 00:00:00 2001 From: kevyuu Date: Sat, 10 Jan 2026 00:50:18 +0700 Subject: [PATCH 19/69] EnvmapImportanceSampling CMakeLists --- .../EnvmapImportanceSampling/CMakeLists.txt | 83 +++++++++++++++++++ 1 file changed, 83 insertions(+) create mode 100644 src/nbl/ext/EnvmapImportanceSampling/CMakeLists.txt diff --git a/src/nbl/ext/EnvmapImportanceSampling/CMakeLists.txt b/src/nbl/ext/EnvmapImportanceSampling/CMakeLists.txt new file mode 100644 index 0000000000..fabd4b8b50 --- /dev/null +++ b/src/nbl/ext/EnvmapImportanceSampling/CMakeLists.txt @@ -0,0 +1,83 @@ +include(${NBL_ROOT_PATH}/cmake/common.cmake) + +set(NBL_EXT_INTERNAL_INCLUDE_DIR "${NBL_ROOT_PATH}/include") + +set(NBL_EXT_ENVMAP_IMPORTANCE_SAMPLING_H + ${NBL_EXT_INTERNAL_INCLUDE_DIR}/nbl/ext/EnvmapImportanceSampling/CEnvmapImportanceSampling.h +) + +set(NBL_EXT_ENVMAP_IMPORTANCE_SAMPLING_SRC + "${CMAKE_CURRENT_SOURCE_DIR}/CEnvmapImportanceSampling.cpp" +) + +nbl_create_ext_library_project( + ENVMAP_IMPORTANCE_SAMPLING + "${NBL_EXT_ENVMAP_IMPORTANCE_SAMPLING_H}" + "${NBL_EXT_ENVMAP_IMPORTANCE_SAMPLING_SRC}" + "${NBL_EXT_ENVMAP_IMPORTANCE_SAMPLING_EXTERNAL_INCLUDE}" + "" + "" +) + +get_filename_component(_ARCHIVE_ABSOLUTE_ENTRY_PATH_ "${NBL_EXT_INTERNAL_INCLUDE_DIR}" ABSOLUTE) + +set(NBL_ENVMAP_IMPORTANCE_SAMPLING_HLSL_MOUNT_POINT "${_ARCHIVE_ABSOLUTE_ENTRY_PATH_}/nbl/ext/EnvmapImportanceSampling/builtin/hlsl") +set(OUTPUT_DIRECTORY "${CMAKE_CURRENT_BINARY_DIR}/auto-gen") +set(DEPENDS + ${NBL_ENVMAP_IMPORTANCE_SAMPLING_HLSL_MOUNT_POINT}/common.hlsl + ${NBL_ENVMAP_IMPORTANCE_SAMPLING_HLSL_MOUNT_POINT}/gen_warpmap.comp.hlsl + ${NBL_ENVMAP_IMPORTANCE_SAMPLING_HLSL_MOUNT_POINT}/gen_luma.comp.hlsl + ${NBL_ENVMAP_IMPORTANCE_SAMPLING_HLSL_MOUNT_POINT}/measure_luma.comp.hlsl +) +target_sources(${LIB_NAME} PRIVATE ${DEPENDS}) +set_source_files_properties(${DEPENDS} PROPERTIES HEADER_FILE_ONLY ON) + +set(SM 6_8) +set(JSON [=[ +[ + { + "INPUT": "${NBL_ENVMAP_IMPORTANCE_SAMPLING_HLSL_MOUNT_POINT}/gen_warpmap.comp.hlsl", + "KEY": "gen_warpmap", + }, + { + "INPUT": "${NBL_ENVMAP_IMPORTANCE_SAMPLING_HLSL_MOUNT_POINT}/gen_luma.comp.hlsl", + "KEY": "gen_luma", + }, + { + "INPUT": "${NBL_ENVMAP_IMPORTANCE_SAMPLING_HLSL_MOUNT_POINT}/measure_luma.comp.hlsl", + "KEY": "measure_luma", + } + +] +]=]) +string(CONFIGURE "${JSON}" JSON) + +set(COMPILE_OPTIONS + -I "${NBL_ROOT_PATH}/include" # a workaround due to envmap importance sampling ext common header which is not part of Nabla builtin archive + -I "${CMAKE_CURRENT_SOURCE_DIR}" + -T lib_${SM} +) + +NBL_CREATE_NSC_COMPILE_RULES( + TARGET ${LIB_NAME}SPIRV + LINK_TO ${LIB_NAME} + DEPENDS ${DEPENDS} + BINARY_DIR ${OUTPUT_DIRECTORY} + MOUNT_POINT_DEFINE NBL_ENVMAP_IMPORTANCE_SAMPLING_HLSL_MOUNT_POINT + COMMON_OPTIONS ${COMPILE_OPTIONS} + OUTPUT_VAR KEYS + INCLUDE nbl/ext/EnvmapImportanceSampling/builtin/build/spirv/keys.hpp + NAMESPACE nbl::ext::envmap_importance_sampling::builtin::build + INPUTS ${JSON} +) + +NBL_CREATE_RESOURCE_ARCHIVE( + NAMESPACE nbl::ext::envmap_importance_sampling::builtin::build + TARGET ${LIB_NAME}_builtinsBuild + LINK_TO ${LIB_NAME} + BIND ${OUTPUT_DIRECTORY} + BUILTINS ${KEYS} +) + + +add_library(Nabla::ext::EnvmapImportanceSampling ALIAS ${LIB_NAME}) From b522b4f826a737b43ffb3a1d9dca681c0b9a5aef Mon Sep 17 00:00:00 2001 From: kevyuu Date: Sat, 10 Jan 2026 00:50:40 +0700 Subject: [PATCH 20/69] Initial implementation of CEnvmapImportanceSampling --- .../CEnvmapImportanceSampling.cpp | 114 ++++++++++++++++++ 1 file changed, 114 insertions(+) create mode 100644 src/nbl/ext/EnvmapImportanceSampling/CEnvmapImportanceSampling.cpp diff --git a/src/nbl/ext/EnvmapImportanceSampling/CEnvmapImportanceSampling.cpp b/src/nbl/ext/EnvmapImportanceSampling/CEnvmapImportanceSampling.cpp new file mode 100644 index 0000000000..87ede95598 --- /dev/null +++ b/src/nbl/ext/EnvmapImportanceSampling/CEnvmapImportanceSampling.cpp @@ -0,0 +1,114 @@ +#include "nbl/ext/EnvmapImportanceSampling/CEnvmapImportanceSampling.h" +#include "nbl/ext/EnvmapImportanceSampling/builtin/hlsl/common.hlsl" + +using namespace nbl::hlsl::ext::envmap_importance_sampling; + +#ifdef NBL_EMBED_BUILTIN_RESOURCES +#include "nbl/ext/debug_draw/builtin/build/CArchive.h" +#endif + +#include "nbl/ext/EnvmapImportanceSampling/builtin/build/spirv/keys.hpp" + +using namespace nbl; +using namespace core; +using namespace video; +using namespace system; +using namespace asset; +using namespace hlsl; + +namespace nbl::ext::envmap_importance_sampling +{ + +constexpr std::string_view NBL_EXT_MOUNT_ENTRY = "nbl/ext/EnvmapImportanceSampling"; + +const smart_refctd_ptr EnvmapImportanceSampling::mount(core::smart_refctd_ptr logger, ISystem* system, video::ILogicalDevice* device, const std::string_view archiveAlias) +{ + assert(system); + + if (!system) + return nullptr; + + // extension should mount everything for you, regardless if content goes from virtual filesystem + // or disk directly - and you should never rely on application framework to expose extension data + #ifdef NBL_EMBED_BUILTIN_RESOURCES + auto archive = make_smart_refctd_ptr(smart_refctd_ptr(logger)); + #else + auto archive = make_smart_refctd_ptr(std::string_view(NBL_ENVMAP_IMPORTANCE_SAMPLING_HLSL_MOUNT_POINT), smart_refctd_ptr(logger), system); + #endif + + system->mount(smart_refctd_ptr(archive), archiveAlias.data()); + return smart_refctd_ptr(archive); +} + +core::smart_refctd_ptr EnvmapImportanceSampling::createGenLumaPipeline(const SCreationParameters& params, const video::IGPUPipelineLayout* pipelineLayout) +{ + system::logger_opt_ptr logger = params.utilities->getLogger(); + auto system = smart_refctd_ptr(params.assetManager->getSystem()); + auto* device = params.utilities->getLogicalDevice(); + mount(smart_refctd_ptr(params.utilities->getLogger()), system.get(), params.utilities->getLogicalDevice(), NBL_EXT_MOUNT_ENTRY); + + auto getShader = [&](const core::string& key)->smart_refctd_ptr { + IAssetLoader::SAssetLoadParams lp = {}; + lp.logger = params.utilities->getLogger(); + lp.workingDirectory = NBL_EXT_MOUNT_ENTRY; + auto bundle = params.assetManager->getAsset(key.c_str(), lp); + + const auto contents = bundle.getContents(); + + if (contents.empty()) + { + logger.log("Failed to load shader %s from disk", ILogger::ELL_ERROR, key.c_str()); + return nullptr; + } + + if (bundle.getAssetType() != IAsset::ET_SHADER) + { + logger.log("Loaded asset has wrong type!", ILogger::ELL_ERROR); + return nullptr; + } + + return IAsset::castDown(contents[0]); + }; + + const auto key = nbl::ext::envmap_importance_sampling::builtin::build::get_spirv_key<"measure_luma">(device); + smart_refctd_ptr genLumaShader = getShader(key); + if (!genLumaShader) + { + params.utilities->getLogger()->log("Could not compile shaders!", ILogger::ELL_ERROR); + return nullptr; + } + + return nullptr; + +} + +// +// core::smart_refctd_ptr < video::IGPUPipelineLayout> EnvmapImportanceSampling::createLumaGenPipelineLayout(video::ILogicalDevice* device) +// { +// asset::SPushConstantRange pcRange = { +// .stageFlags = hlsl::ESS_COMPUTE, +// .offset = 0, +// .size = sizeof(SLumaGenPushConstants) +// }; +// +// const IGPUDescriptorSetLayout::SBinding bindings[] = { +// { +// .binding = 0u, +// .type = nbl::asset::IDescriptor::E_TYPE::ET_COMBINED_IMAGE_SAMPLER, +// .createFlags = IGPUDescriptorSetLayout::SBinding::E_CREATE_FLAGS::ECF_NONE, +// .stageFlags = IShader::E_SHADER_STAGE::ESS_COMPUTE, +// .count = 1u, +// .immutableSamplers = &defaultSampler +// }, +// { +// .binding = 1u, +// .type = nbl::asset::IDescriptor::E_TYPE::ET_STORAGE_IMAGE, +// .createFlags = IGPUDescriptorSetLayout::SBinding::E_CREATE_FLAGS::ECF_NONE, +// .stageFlags = IShader::E_SHADER_STAGE::ESS_COMPUTE, +// .count = 1u +// } +// }; +// +// } + +} From 3e51c69ce2072549cc130961250a6456450d58c6 Mon Sep 17 00:00:00 2001 From: kevyuu Date: Mon, 12 Jan 2026 18:12:53 +0700 Subject: [PATCH 21/69] Initial implementation of CEnvmapImportanceSampling --- .../CEnvmapImportanceSampling.h | 79 ++++++++++++ .../CEnvmapImportanceSampling.cpp | 120 +++++++++++++----- 2 files changed, 168 insertions(+), 31 deletions(-) create mode 100644 include/nbl/ext/EnvmapImportanceSampling/CEnvmapImportanceSampling.h diff --git a/include/nbl/ext/EnvmapImportanceSampling/CEnvmapImportanceSampling.h b/include/nbl/ext/EnvmapImportanceSampling/CEnvmapImportanceSampling.h new file mode 100644 index 0000000000..039874202d --- /dev/null +++ b/include/nbl/ext/EnvmapImportanceSampling/CEnvmapImportanceSampling.h @@ -0,0 +1,79 @@ +#ifndef _NBL_EXT_ENVMAP_IMPORTANCE_SAMPLING_INCLUDED_ +#define _NBL_EXT_ENVMAP_IMPORTANCE_SAMPLING_INCLUDED_ + +#include "nbl/asset/IPipelineLayout.h" +#include "nbl/video/declarations.h" + +namespace nbl::ext::envmap_importance_sampling +{ + +class EnvmapImportanceSampling +{ + public: + + struct SCachedCreationParameters + { + // using streaming_buffer_t = video::StreamingTransientDataBufferST>; + // + // static constexpr inline auto RequiredAllocateFlags = core::bitflag(video::IDeviceMemoryAllocation::EMAF_DEVICE_ADDRESS_BIT); + // static constexpr inline auto RequiredUsageFlags = core::bitflag(asset::IBuffer::EUF_STORAGE_BUFFER_BIT) | asset::IBuffer::EUF_SHADER_DEVICE_ADDRESS_BIT; + // + // DrawMode drawMode = ADM_DRAW_BOTH; + + core::smart_refctd_ptr utilities; + + //! optional, default MDI buffer allocated if not provided + // core::smart_refctd_ptr streamingBuffer = nullptr; + }; + + struct SCreationParameters : public SCachedCreationParameters + { + video::IQueue* transfer = nullptr; // only used to make the 24 element index buffer and instanced pipeline on create + core::smart_refctd_ptr assetManager = nullptr; + + core::smart_refctd_ptr genLumaPipelineLayout = nullptr; + + inline bool validate() const + { + const auto validation = std::to_array + ({ + std::make_pair(bool(assetManager), "Invalid `creationParams.assetManager` is nullptr!"), + std::make_pair(bool(utilities), "Invalid `creationParams.utilities` is nullptr!"), + std::make_pair(bool(transfer), "Invalid `creationParams.transfer` is nullptr!"), + std::make_pair(bool(utilities->getLogicalDevice()->getPhysicalDevice()->getQueueFamilyProperties()[transfer->getFamilyIndex()].queueFlags.hasFlags(video::IQueue::FAMILY_FLAGS::TRANSFER_BIT)), "Invalid `creationParams.transfer` is not capable of transfer operations!") + }); + + system::logger_opt_ptr logger = utilities->getLogger(); + for (const auto& [ok, error] : validation) + if (!ok) + { + logger.log(error, system::ILogger::ELL_ERROR); + return false; + } + + assert(bool(assetManager->getSystem())); + + return true; + } + + }; + + static core::smart_refctd_ptr createGenLumaPipelineLayout(video::ILogicalDevice* device, const core::smart_refctd_ptr* sampler); + + static core::smart_refctd_ptr createMeasureLumaPipelineLayout(video::ILogicalDevice* device); + + static core::smart_refctd_ptr createGenWarpMapPipelineLayout(video::ILogicalDevice* device); + + //! mounts the extension's archive to given system - useful if you want to create your own shaders with common header included + static const core::smart_refctd_ptr mount(core::smart_refctd_ptr logger, system::ISystem* system, video::ILogicalDevice* device, const std::string_view archiveAlias = ""); + + static core::smart_refctd_ptr createGenLumaPipeline(const SCreationParameters& params, const video::IGPUPipelineLayout* pipelineLayout); + + static core::smart_refctd_ptr createMeasureLumaPipeline(const SCreationParameters& params, const video::IGPUPipelineLayout* pipelineLayout); + private: + core::smart_refctd_ptr m_lumaGenPipeline; + +}; + +} +#endif diff --git a/src/nbl/ext/EnvmapImportanceSampling/CEnvmapImportanceSampling.cpp b/src/nbl/ext/EnvmapImportanceSampling/CEnvmapImportanceSampling.cpp index 87ede95598..a4517123b9 100644 --- a/src/nbl/ext/EnvmapImportanceSampling/CEnvmapImportanceSampling.cpp +++ b/src/nbl/ext/EnvmapImportanceSampling/CEnvmapImportanceSampling.cpp @@ -70,45 +70,103 @@ core::smart_refctd_ptr EnvmapImportanceSampling::cre return IAsset::castDown(contents[0]); }; - const auto key = nbl::ext::envmap_importance_sampling::builtin::build::get_spirv_key<"measure_luma">(device); + const auto key = nbl::ext::envmap_importance_sampling::builtin::build::get_spirv_key<"gen_luma">(device); smart_refctd_ptr genLumaShader = getShader(key); if (!genLumaShader) { - params.utilities->getLogger()->log("Could not compile shaders!", ILogger::ELL_ERROR); + logger.log("Could not compile shaders!", ILogger::ELL_ERROR); return nullptr; } - return nullptr; + video::IGPUComputePipeline::SCreationParams pipelineParams[1] = {}; + pipelineParams[0].layout = pipelineLayout; + pipelineParams[0].shader = { .shader = genLumaShader.get(), .entryPoint = "main" }; + + smart_refctd_ptr pipeline; + params.utilities->getLogicalDevice()->createComputePipelines(nullptr, pipelineParams, &pipeline); + if (!pipeline) + { + logger.log("Could not create pipeline!", ILogger::ELL_ERROR); + return nullptr; + } + + return pipeline; +} + + +core::smart_refctd_ptr < video::IGPUPipelineLayout> EnvmapImportanceSampling::createGenLumaPipelineLayout(video::ILogicalDevice* device, const smart_refctd_ptr* sampler) +{ + asset::SPushConstantRange pcRange = { + .stageFlags = hlsl::ESS_COMPUTE, + .offset = 0, + .size = sizeof(SLumaGenPushConstants) + }; + + const IGPUDescriptorSetLayout::SBinding bindings[] = { + { + .binding = 0u, + .type = nbl::asset::IDescriptor::E_TYPE::ET_COMBINED_IMAGE_SAMPLER, + .createFlags = IGPUDescriptorSetLayout::SBinding::E_CREATE_FLAGS::ECF_NONE, + .stageFlags = IShader::E_SHADER_STAGE::ESS_COMPUTE, + .count = 1u, + .immutableSamplers = sampler + }, + { + .binding = 1u, + .type = nbl::asset::IDescriptor::E_TYPE::ET_STORAGE_IMAGE, + .createFlags = IGPUDescriptorSetLayout::SBinding::E_CREATE_FLAGS::ECF_NONE, + .stageFlags = IShader::E_SHADER_STAGE::ESS_COMPUTE, + .count = 1u + } + }; + + const auto setLayout = device->createDescriptorSetLayout(bindings); + return device->createPipelineLayout({ &pcRange, 1 }, setLayout, nullptr, nullptr, nullptr); } -// -// core::smart_refctd_ptr < video::IGPUPipelineLayout> EnvmapImportanceSampling::createLumaGenPipelineLayout(video::ILogicalDevice* device) -// { -// asset::SPushConstantRange pcRange = { -// .stageFlags = hlsl::ESS_COMPUTE, -// .offset = 0, -// .size = sizeof(SLumaGenPushConstants) -// }; -// -// const IGPUDescriptorSetLayout::SBinding bindings[] = { -// { -// .binding = 0u, -// .type = nbl::asset::IDescriptor::E_TYPE::ET_COMBINED_IMAGE_SAMPLER, -// .createFlags = IGPUDescriptorSetLayout::SBinding::E_CREATE_FLAGS::ECF_NONE, -// .stageFlags = IShader::E_SHADER_STAGE::ESS_COMPUTE, -// .count = 1u, -// .immutableSamplers = &defaultSampler -// }, -// { -// .binding = 1u, -// .type = nbl::asset::IDescriptor::E_TYPE::ET_STORAGE_IMAGE, -// .createFlags = IGPUDescriptorSetLayout::SBinding::E_CREATE_FLAGS::ECF_NONE, -// .stageFlags = IShader::E_SHADER_STAGE::ESS_COMPUTE, -// .count = 1u -// } -// }; -// -// } +core::smart_refctd_ptr EnvmapImportanceSampling::createMeasureLumaPipelineLayout(video::ILogicalDevice* device) +{ + asset::SPushConstantRange pcRange = { + .stageFlags = hlsl::ESS_COMPUTE, + .offset = 0, + .size = sizeof(SLumaMeasurePushConstants) + }; + + const IGPUDescriptorSetLayout::SBinding bindings[] = { + { + .binding = 0u, + .type = nbl::asset::IDescriptor::E_TYPE::ET_SAMPLED_IMAGE, + .createFlags = IGPUDescriptorSetLayout::SBinding::E_CREATE_FLAGS::ECF_NONE, + .stageFlags = IShader::E_SHADER_STAGE::ESS_COMPUTE, + .count = 1u, + } + }; + + const auto setLayout = device->createDescriptorSetLayout(bindings); + return device->createPipelineLayout({ &pcRange, 1 }, setLayout, nullptr, nullptr, nullptr); +} +core::smart_refctd_ptr EnvmapImportanceSampling::createGenWarpMapPipelineLayout(video::ILogicalDevice* device) +{ + const IGPUDescriptorSetLayout::SBinding bindings[] = { + { + .binding = 0u, + .type = nbl::asset::IDescriptor::E_TYPE::ET_SAMPLED_IMAGE, + .createFlags = IGPUDescriptorSetLayout::SBinding::E_CREATE_FLAGS::ECF_NONE, + .stageFlags = IShader::E_SHADER_STAGE::ESS_COMPUTE, + .count = 1u, + }, + { + .binding = 1u, + .type = nbl::asset::IDescriptor::E_TYPE::ET_STORAGE_IMAGE, + .createFlags = IGPUDescriptorSetLayout::SBinding::E_CREATE_FLAGS::ECF_NONE, + .stageFlags = IShader::E_SHADER_STAGE::ESS_COMPUTE, + .count = 1u + } + }; + + const auto setLayout = device->createDescriptorSetLayout(bindings); + return device->createPipelineLayout({}, setLayout, nullptr, nullptr, nullptr); +} } From c72d305b9e38ddf736778a8fc23438c5a114f0f8 Mon Sep 17 00:00:00 2001 From: kevyuu Date: Mon, 12 Jan 2026 20:31:59 +0700 Subject: [PATCH 22/69] Small fixes --- include/nbl/builtin/hlsl/sampling/hierarchical_image.hlsl | 2 +- include/nbl/builtin/hlsl/sampling/warps/spherical.hlsl | 8 ++++---- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/include/nbl/builtin/hlsl/sampling/hierarchical_image.hlsl b/include/nbl/builtin/hlsl/sampling/hierarchical_image.hlsl index 82637a42f8..23011219f8 100644 --- a/include/nbl/builtin/hlsl/sampling/hierarchical_image.hlsl +++ b/include/nbl/builtin/hlsl/sampling/hierarchical_image.hlsl @@ -39,7 +39,7 @@ struct LuminanceMapSampler return result; } - static bool choseSecond(scalar_type first, scalar_type second, NBL_REF_ARG(float32_t) xi) + static bool choseSecond(scalar_type first, scalar_type second, NBL_REF_ARG(scalar_type) xi) { // numerical resilience against IEEE754 scalar_type dummy = 0.0f; diff --git a/include/nbl/builtin/hlsl/sampling/warps/spherical.hlsl b/include/nbl/builtin/hlsl/sampling/warps/spherical.hlsl index 48237c7e2a..7df93ac651 100644 --- a/include/nbl/builtin/hlsl/sampling/warps/spherical.hlsl +++ b/include/nbl/builtin/hlsl/sampling/warps/spherical.hlsl @@ -38,14 +38,14 @@ struct Spherical return warpResult; } - template ) + template ) static domain_type inverseWarp(const CodomainT v) { - float32_t2 uv = float32_t2(atan(v.y, v.x), acos(v.z)); + float32_t2 uv = float32_t2(atan(v.y, v.x), acos(v.z)); uv.x *= (numbers::inv_pi * 0.5); if (v.y < 0.0f) - uv.x += 1.0f; - uv.y *= numbers::inv_pi; + uv.x += 1.0f; + uv.y *= numbers::inv_pi; return uv; } From 5ee2ce763057d4d5c322616fd5af600ac63ad742 Mon Sep 17 00:00:00 2001 From: kevyuu Date: Tue, 20 Jan 2026 23:51:51 +0700 Subject: [PATCH 23/69] Initial implementation of computeWarpMap --- .../CEnvmapImportanceSampling.h | 87 +++++-- .../builtin/hlsl/gen_luma.comp.hlsl | 17 +- .../CEnvmapImportanceSampling.cpp | 240 +++++++++++++++--- .../EnvmapImportanceSampling/CMakeLists.txt | 66 ++--- 4 files changed, 300 insertions(+), 110 deletions(-) diff --git a/include/nbl/ext/EnvmapImportanceSampling/CEnvmapImportanceSampling.h b/include/nbl/ext/EnvmapImportanceSampling/CEnvmapImportanceSampling.h index 039874202d..e552635d3a 100644 --- a/include/nbl/ext/EnvmapImportanceSampling/CEnvmapImportanceSampling.h +++ b/include/nbl/ext/EnvmapImportanceSampling/CEnvmapImportanceSampling.h @@ -7,31 +7,25 @@ namespace nbl::ext::envmap_importance_sampling { -class EnvmapImportanceSampling +class EnvmapImportanceSampling final : public core::IReferenceCounted { public: + static constexpr uint32_t MaxMipCountLuminance = 13u; + static constexpr uint32_t DefaultLumaMipMapGenWorkgroupDimension = 16u; + static constexpr uint32_t DefaultWarpMapGenWorkgroupDimension = 16u; + struct SCachedCreationParameters { - // using streaming_buffer_t = video::StreamingTransientDataBufferST>; - // - // static constexpr inline auto RequiredAllocateFlags = core::bitflag(video::IDeviceMemoryAllocation::EMAF_DEVICE_ADDRESS_BIT); - // static constexpr inline auto RequiredUsageFlags = core::bitflag(asset::IBuffer::EUF_STORAGE_BUFFER_BIT) | asset::IBuffer::EUF_SHADER_DEVICE_ADDRESS_BIT; - // - // DrawMode drawMode = ADM_DRAW_BOTH; - core::smart_refctd_ptr utilities; - - //! optional, default MDI buffer allocated if not provided - // core::smart_refctd_ptr streamingBuffer = nullptr; + uint32_t genLumaMapWorkgroupDimension = DefaultLumaMipMapGenWorkgroupDimension; + uint32_t genWarpMapWorkgroupDimension = DefaultWarpMapGenWorkgroupDimension; }; struct SCreationParameters : public SCachedCreationParameters { - video::IQueue* transfer = nullptr; // only used to make the 24 element index buffer and instanced pipeline on create core::smart_refctd_ptr assetManager = nullptr; - - core::smart_refctd_ptr genLumaPipelineLayout = nullptr; + core::smart_refctd_ptr envMap = nullptr; inline bool validate() const { @@ -39,8 +33,7 @@ class EnvmapImportanceSampling ({ std::make_pair(bool(assetManager), "Invalid `creationParams.assetManager` is nullptr!"), std::make_pair(bool(utilities), "Invalid `creationParams.utilities` is nullptr!"), - std::make_pair(bool(transfer), "Invalid `creationParams.transfer` is nullptr!"), - std::make_pair(bool(utilities->getLogicalDevice()->getPhysicalDevice()->getQueueFamilyProperties()[transfer->getFamilyIndex()].queueFlags.hasFlags(video::IQueue::FAMILY_FLAGS::TRANSFER_BIT)), "Invalid `creationParams.transfer` is not capable of transfer operations!") + std::make_pair(bool(envMap), "Invalid `creationParams.envMap` is nullptr!"), }); system::logger_opt_ptr logger = utilities->getLogger(); @@ -58,20 +51,76 @@ class EnvmapImportanceSampling }; - static core::smart_refctd_ptr createGenLumaPipelineLayout(video::ILogicalDevice* device, const core::smart_refctd_ptr* sampler); + static core::smart_refctd_ptr create(SCreationParameters&& params); + + static core::smart_refctd_ptr createGenLumaPipelineLayout(video::ILogicalDevice* device); static core::smart_refctd_ptr createMeasureLumaPipelineLayout(video::ILogicalDevice* device); static core::smart_refctd_ptr createGenWarpMapPipelineLayout(video::ILogicalDevice* device); //! mounts the extension's archive to given system - useful if you want to create your own shaders with common header included - static const core::smart_refctd_ptr mount(core::smart_refctd_ptr logger, system::ISystem* system, video::ILogicalDevice* device, const std::string_view archiveAlias = ""); + static core::smart_refctd_ptr mount(core::smart_refctd_ptr logger, system::ISystem* system, video::ILogicalDevice* device, const std::string_view archiveAlias = ""); static core::smart_refctd_ptr createGenLumaPipeline(const SCreationParameters& params, const video::IGPUPipelineLayout* pipelineLayout); static core::smart_refctd_ptr createMeasureLumaPipeline(const SCreationParameters& params, const video::IGPUPipelineLayout* pipelineLayout); + + static core::smart_refctd_ptr createLumaMap(video::ILogicalDevice* device, asset::VkExtent3D extent, uint32_t mipCount, std::string_view debugName = ""); + + static core::smart_refctd_ptr createWarpMap(video::ILogicalDevice* device, asset::VkExtent3D extent, std::string_view debugName = ""); + + bool computeWarpMap(video::IGPUCommandBuffer* cmdBuf, float envMapRegularizationFactor, float& pdfNormalizationFactor, float& maxEmittanceLuma); + + // returns if RIS should be enabled based on variance calculations + inline bool computeWarpMap(video::IGPUCommandBuffer* cmdBuf, float envMapRegularizationFactor, float& pdfNormalizationFactor) + { + [[maybe_unused]] float dummy; + return computeWarpMap(cmdBuf, envMapRegularizationFactor, pdfNormalizationFactor, dummy); + } + + + inline core::smart_refctd_ptr getLumaMapView() + { + return m_lumaMap; + } + + protected: + struct ConstructorParams + { + SCachedCreationParameters creationParams; + hlsl::uint32_t2 lumaWorkgroupSize; + hlsl::uint32_t2 warpWorkgroupSize; + core::smart_refctd_ptr lumaMap; + core::smart_refctd_ptr warpMap; + core::smart_refctd_ptr genLumaPipeline; + core::smart_refctd_ptr genLumaDescriptorSet; + }; + + explicit EnvmapImportanceSampling(ConstructorParams&& params) : + m_cachedCreationParams(std::move(params.creationParams)), + m_lumaWorkgroupSize(params.lumaWorkgroupSize), + m_warpWorkgroupSize(params.warpWorkgroupSize), + m_lumaMap(std::move(params.lumaMap)), + m_warpMap(std::move(params.warpMap)), + m_genLumaPipeline(std::move(params.genLumaPipeline)), + m_genLumaDescriptorSet(std::move(params.genLumaDescriptorSet)) + {} + + ~EnvmapImportanceSampling() override {} + private: - core::smart_refctd_ptr m_lumaGenPipeline; + + SCachedCreationParameters m_cachedCreationParams; + + hlsl::uint32_t2 m_lumaWorkgroupSize; + hlsl::uint32_t2 m_warpWorkgroupSize; + + core::smart_refctd_ptr m_lumaMap; + core::smart_refctd_ptr m_warpMap; + + core::smart_refctd_ptr m_genLumaPipeline; + core::smart_refctd_ptr m_genLumaDescriptorSet; }; diff --git a/include/nbl/ext/EnvmapImportanceSampling/builtin/hlsl/gen_luma.comp.hlsl b/include/nbl/ext/EnvmapImportanceSampling/builtin/hlsl/gen_luma.comp.hlsl index e701f0b00d..3a039945b4 100644 --- a/include/nbl/ext/EnvmapImportanceSampling/builtin/hlsl/gen_luma.comp.hlsl +++ b/include/nbl/ext/EnvmapImportanceSampling/builtin/hlsl/gen_luma.comp.hlsl @@ -1,4 +1,4 @@ -#include "nbl/ext/EnvmapImportanceSampling/builtin/hlsl/common.hlsl" +#include "common.hlsl" using namespace nbl; using namespace nbl::hlsl; @@ -6,24 +6,19 @@ using namespace nbl::hlsl::ext::envmap_importance_sampling; [[vk::push_constant]] SLumaGenPushConstants pc; -[[vk::combinedImageSampler]][[vk::binding(0, 0)]] Texture2D envMap; -[[vk::combinedImageSampler]][[vk::binding(0, 0)]] SamplerState envMapSampler; - +[[vk::binding(0, 0)]] Texture2D envMap; [[vk::binding(1, 0)]] RWTexture2D outImage; -// TODO(kevinyu): Temporary to make nsc compiles -#define LUMA_MAP_GEN_WORKGROUP_DIM 16 - -[numthreads(LUMA_MAP_GEN_WORKGROUP_DIM, LUMA_MAP_GEN_WORKGROUP_DIM, 1)] +[numthreads(WORKGROUP_DIM, WORKGROUP_DIM, 1)] [shader("compute")] void main(uint32_t3 threadID : SV_DispatchThreadID) { if (all(threadID < pc.lumaMapResolution)) { - const float32_t2 uv = (float32_t2(threadID.xy) + float32_t2(0.5, 0.5)) / float32_t2(pc.lumaMapResolution); - const float32_t3 envMapSample = envMap.Sample(envMapSampler, uv).rgb; - const float32_t luma = hlsl::dot(float32_t4(envMapSample, 1.0f), pc.luminanceScales) * sin(numbers::pi * uv.y); + const float uv_y = (float(threadID.y) + 0.5) / pc.lumaMapResolution.y; + const float32_t3 envMapSample = envMap.Load(float32_t3(threadID.xy, 0)); + const float32_t luma = hlsl::dot(float32_t4(envMapSample, 1.0f), pc.luminanceScales) * sin(numbers::pi * uv_y); outImage[threadID.xy] = luma; } diff --git a/src/nbl/ext/EnvmapImportanceSampling/CEnvmapImportanceSampling.cpp b/src/nbl/ext/EnvmapImportanceSampling/CEnvmapImportanceSampling.cpp index a4517123b9..f6aad7c25e 100644 --- a/src/nbl/ext/EnvmapImportanceSampling/CEnvmapImportanceSampling.cpp +++ b/src/nbl/ext/EnvmapImportanceSampling/CEnvmapImportanceSampling.cpp @@ -1,5 +1,6 @@ #include "nbl/ext/EnvmapImportanceSampling/CEnvmapImportanceSampling.h" #include "nbl/ext/EnvmapImportanceSampling/builtin/hlsl/common.hlsl" +#include "nlohmann/detail/input/parser.hpp" using namespace nbl::hlsl::ext::envmap_importance_sampling; @@ -7,8 +8,6 @@ using namespace nbl::hlsl::ext::envmap_importance_sampling; #include "nbl/ext/debug_draw/builtin/build/CArchive.h" #endif -#include "nbl/ext/EnvmapImportanceSampling/builtin/build/spirv/keys.hpp" - using namespace nbl; using namespace core; using namespace video; @@ -19,9 +18,145 @@ using namespace hlsl; namespace nbl::ext::envmap_importance_sampling { -constexpr std::string_view NBL_EXT_MOUNT_ENTRY = "nbl/ext/EnvmapImportanceSampling"; +namespace +{ + constexpr std::string_view NBL_EXT_MOUNT_ENTRY = "nbl/ext/EnvmapImportanceSampling"; + + void generateMipmap(video::IGPUCommandBuffer* cmdBuf, core::smart_refctd_ptr textureView) + { + + } + + core::smart_refctd_ptr createTexture(video::ILogicalDevice* device, const asset::VkExtent3D extent, E_FORMAT format, uint32_t mipLevels = 1u, uint32_t layers = 0u) + { + const auto real_layers = layers ? layers:1u; + + IGPUImage::SCreationParams imgParams; + imgParams.extent = extent; + imgParams.arrayLayers = real_layers; + imgParams.flags = static_cast(0); + imgParams.format = format; + imgParams.mipLevels = mipLevels; + imgParams.samples = IImage::ESCF_1_BIT; + imgParams.type = IImage::ET_2D; + imgParams.usage = IImage::EUF_STORAGE_BIT; + const auto image = device->createImage(std::move(imgParams)); + auto imageMemReqs = image->getMemoryReqs(); + imageMemReqs.memoryTypeBits &= device->getPhysicalDevice()->getDeviceLocalMemoryTypeBits(); + device->allocate(imageMemReqs, image.get()); + + IGPUImageView::SCreationParams viewparams; + viewparams.subUsages = IImage::EUF_STORAGE_BIT; + viewparams.flags = static_cast(0); + viewparams.format = format; + viewparams.image = std::move(image); + viewparams.viewType = layers ? IGPUImageView::ET_2D_ARRAY:IGPUImageView::ET_2D; + viewparams.subresourceRange.aspectMask = IImage::EAF_COLOR_BIT; + viewparams.subresourceRange.baseArrayLayer = 0u; + viewparams.subresourceRange.layerCount = real_layers; + viewparams.subresourceRange.baseMipLevel = 0u; + viewparams.subresourceRange.levelCount = mipLevels; + + return device->createImageView(std::move(viewparams)); + } + + core::smart_refctd_ptr getShaderSource( asset::IAssetManager* assetManager, const char* filePath, system::ILogger* logger) + { + IAssetLoader::SAssetLoadParams lparams = {}; + lparams.logger = logger; + lparams.workingDirectory = NBL_EXT_MOUNT_ENTRY; + auto bundle = assetManager->getAsset(filePath, lparams); + if (bundle.getContents().empty() || bundle.getAssetType()!=IAsset::ET_SHADER) + { + const auto assetType = bundle.getAssetType(); + logger->log("Shader %s not found!", ILogger::ELL_ERROR, filePath); + exit(-1); + } + auto firstAssetInBundle = bundle.getContents()[0]; + return smart_refctd_ptr_static_cast(firstAssetInBundle); + } +} + + + +core::smart_refctd_ptr EnvmapImportanceSampling::create(SCreationParameters&& params) +{ + auto* const logger = params.utilities->getLogger(); + + if (!params.validate()) + { + logger->log("Failed creation parameters validation!", ILogger::ELL_ERROR); + return nullptr; + } + + const auto EnvmapExtent = params.envMap->getCreationParameters().image->getCreationParameters().extent; + // we don't need the 1x1 mip for anything + const uint32_t MipCountLuminance = IImage::calculateFullMipPyramidLevelCount(EnvmapExtent,IImage::ET_2D)-1; + const auto EnvMapPoTExtent = [MipCountLuminance]() -> asset::VkExtent3D + { + const uint32_t width = 0x1u<>1u,1u }; + }(); + auto calcWorkgroupSize = [](const asset::VkExtent3D extent, const uint32_t workgroupDimension) -> uint32_t2 + { + return uint32_t2(extent.width - 1, extent.height - 1) / workgroupDimension + uint32_t2(1); + }; + + const auto device = params.utilities->getLogicalDevice(); + + ConstructorParams constructorParams; + + constructorParams.lumaWorkgroupSize = calcWorkgroupSize(EnvMapPoTExtent, params.genLumaMapWorkgroupDimension); + + constructorParams.lumaMap = createLumaMap(device, EnvMapPoTExtent, MipCountLuminance); + + const auto upscale = 0; + const asset::VkExtent3D WarpMapExtent = {EnvMapPoTExtent.width<createDescriptorPoolForDSLayouts(IDescriptorPool::ECF_UPDATE_AFTER_BIND_BIT, genLumaPipelineLayout->getDescriptorSetLayouts()); + const auto genLumaDescriptorSet = genLumaDescriptorPool->createDescriptorSet(core::smart_refctd_ptr(genLumaPipelineLayout->getDescriptorSetLayouts()[0])); + + IGPUDescriptorSet::SDescriptorInfo envMapDescriptorInfo; + envMapDescriptorInfo.desc = params.envMap; + envMapDescriptorInfo.info.image.imageLayout = IImage::LAYOUT::READ_ONLY_OPTIMAL; + + IGPUDescriptorSet::SDescriptorInfo lumaMapDescriptorInfo; + lumaMapDescriptorInfo.desc = constructorParams.lumaMap; + lumaMapDescriptorInfo.info.image.imageLayout = IImage::LAYOUT::GENERAL; + + const IGPUDescriptorSet::SWriteDescriptorSet writes[] = { + { + .dstSet = genLumaDescriptorSet.get(), .binding = 0, .count = 1, .info = &envMapDescriptorInfo + }, + { + .dstSet = genLumaDescriptorSet.get(), .binding = 1, .count = 1, .info = &lumaMapDescriptorInfo + } + }; + + device->updateDescriptorSets(writes, {}); + + constructorParams.genLumaDescriptorSet = genLumaDescriptorSet; + + constructorParams.creationParams = std::move(params); + + return core::smart_refctd_ptr(new EnvmapImportanceSampling(std::move(constructorParams))); +} + +core::smart_refctd_ptr EnvmapImportanceSampling::createLumaMap(video::ILogicalDevice* device, asset::VkExtent3D extent, uint32_t mipCount, const std::string_view debugName) +{ + return createTexture(device, extent, EF_R32_SFLOAT, mipCount); +} + +core::smart_refctd_ptr EnvmapImportanceSampling::createWarpMap(video::ILogicalDevice* device, asset::VkExtent3D extent, const std::string_view debugName) +{ + return createTexture(device, extent, EF_R32G32_SFLOAT); +} -const smart_refctd_ptr EnvmapImportanceSampling::mount(core::smart_refctd_ptr logger, ISystem* system, video::ILogicalDevice* device, const std::string_view archiveAlias) +smart_refctd_ptr EnvmapImportanceSampling::mount(core::smart_refctd_ptr logger, ISystem* system, video::ILogicalDevice* device, const std::string_view archiveAlias) { assert(system); @@ -47,32 +182,33 @@ core::smart_refctd_ptr EnvmapImportanceSampling::cre auto* device = params.utilities->getLogicalDevice(); mount(smart_refctd_ptr(params.utilities->getLogger()), system.get(), params.utilities->getLogicalDevice(), NBL_EXT_MOUNT_ENTRY); - auto getShader = [&](const core::string& key)->smart_refctd_ptr { - IAssetLoader::SAssetLoadParams lp = {}; - lp.logger = params.utilities->getLogger(); - lp.workingDirectory = NBL_EXT_MOUNT_ENTRY; - auto bundle = params.assetManager->getAsset(key.c_str(), lp); + const auto shaderSource = getShaderSource(params.assetManager.get(), "gen_luma.comp.hlsl", logger.get()); + auto compiler = make_smart_refctd_ptr(smart_refctd_ptr(system)); + CHLSLCompiler::SOptions options = {}; + options.stage = IShader::E_SHADER_STAGE::ESS_COMPUTE; + options.preprocessorOptions.targetSpirvVersion = device->getPhysicalDevice()->getLimits().spirvVersion; + options.spirvOptimizer = nullptr; - const auto contents = bundle.getContents(); +#ifndef _NBL_DEBUG + ISPIRVOptimizer::E_OPTIMIZER_PASS optPasses = ISPIRVOptimizer::EOP_STRIP_DEBUG_INFO; + auto opt = make_smart_refctd_ptr(std::span(&optPasses, 1)); + options.spirvOptimizer = opt.get(); +#else + options.debugInfoFlags |= IShaderCompiler::E_DEBUG_INFO_FLAGS::EDIF_LINE_BIT; +#endif + options.preprocessorOptions.sourceIdentifier = shaderSource->getFilepathHint(); + options.preprocessorOptions.logger = logger.get(); + options.preprocessorOptions.includeFinder = compiler->getDefaultIncludeFinder(); - if (contents.empty()) - { - logger.log("Failed to load shader %s from disk", ILogger::ELL_ERROR, key.c_str()); - return nullptr; - } + const IShaderCompiler::SMacroDefinition defines[] = { + { "WORKGROUP_DIM", "16" }, + }; - if (bundle.getAssetType() != IAsset::ET_SHADER) - { - logger.log("Loaded asset has wrong type!", ILogger::ELL_ERROR); - return nullptr; - } + options.preprocessorOptions.extraDefines = defines; - return IAsset::castDown(contents[0]); - }; - - const auto key = nbl::ext::envmap_importance_sampling::builtin::build::get_spirv_key<"gen_luma">(device); - smart_refctd_ptr genLumaShader = getShader(key); - if (!genLumaShader) + const auto overridenUnspecialized = compiler->compileToSPIRV((const char*)shaderSource->getContent()->getPointer(), options); + const auto shader = device->compileShader({ overridenUnspecialized.get() }); + if (!shader) { logger.log("Could not compile shaders!", ILogger::ELL_ERROR); return nullptr; @@ -80,7 +216,7 @@ core::smart_refctd_ptr EnvmapImportanceSampling::cre video::IGPUComputePipeline::SCreationParams pipelineParams[1] = {}; pipelineParams[0].layout = pipelineLayout; - pipelineParams[0].shader = { .shader = genLumaShader.get(), .entryPoint = "main" }; + pipelineParams[0].shader = { .shader = shader.get(), .entryPoint = "main" }; smart_refctd_ptr pipeline; params.utilities->getLogicalDevice()->createComputePipelines(nullptr, pipelineParams, &pipeline); @@ -94,7 +230,7 @@ core::smart_refctd_ptr EnvmapImportanceSampling::cre } -core::smart_refctd_ptr < video::IGPUPipelineLayout> EnvmapImportanceSampling::createGenLumaPipelineLayout(video::ILogicalDevice* device, const smart_refctd_ptr* sampler) +core::smart_refctd_ptr < video::IGPUPipelineLayout> EnvmapImportanceSampling::createGenLumaPipelineLayout(video::ILogicalDevice* device) { asset::SPushConstantRange pcRange = { .stageFlags = hlsl::ESS_COMPUTE, @@ -105,11 +241,10 @@ core::smart_refctd_ptr < video::IGPUPipelineLayout> EnvmapImportanceSampling::cr const IGPUDescriptorSetLayout::SBinding bindings[] = { { .binding = 0u, - .type = nbl::asset::IDescriptor::E_TYPE::ET_COMBINED_IMAGE_SAMPLER, + .type = nbl::asset::IDescriptor::E_TYPE::ET_SAMPLED_IMAGE, .createFlags = IGPUDescriptorSetLayout::SBinding::E_CREATE_FLAGS::ECF_NONE, .stageFlags = IShader::E_SHADER_STAGE::ESS_COMPUTE, - .count = 1u, - .immutableSamplers = sampler + .count = 1u }, { .binding = 1u, @@ -169,4 +304,47 @@ core::smart_refctd_ptr EnvmapImportanceSampling::crea const auto setLayout = device->createDescriptorSetLayout(bindings); return device->createPipelineLayout({}, setLayout, nullptr, nullptr, nullptr); } + +bool EnvmapImportanceSampling::computeWarpMap(video::IGPUCommandBuffer* cmdBuf, const float envMapRegularizationFactor, float& pdfNormalizationFactor, float& maxEmittanceLuma) +{ + bool enableRIS = false; + + SLumaGenPushConstants pcData = {}; + pcData.luminanceScales = { 0.2126729f, 0.7151522f, 0.0721750f, 0.0f }; + { + const auto imageExtent = m_lumaMap->getCreationParameters().image->getCreationParameters().extent; + pcData.lumaMapResolution = {imageExtent.width, imageExtent.height}; + } + + IGPUCommandBuffer::SPipelineBarrierDependencyInfo::image_barrier_t barrier; + barrier.barrier = { + .dep = { + .srcStageMask = PIPELINE_STAGE_FLAGS::NONE, + .srcAccessMask = ACCESS_FLAGS::NONE, + .dstStageMask = PIPELINE_STAGE_FLAGS::COMPUTE_SHADER_BIT, + .dstAccessMask = ACCESS_FLAGS::SHADER_WRITE_BITS + } + }; + barrier.image = m_lumaMap->getCreationParameters().image.get(); + barrier.subresourceRange = { + .aspectMask = IImage::EAF_COLOR_BIT, + .baseMipLevel = 0u, + .levelCount = m_lumaMap->getCreationParameters().image->getCreationParameters().mipLevels, + .baseArrayLayer = 0u, + .layerCount = 1u + }; + barrier.oldLayout = IImage::LAYOUT::UNDEFINED; + barrier.newLayout = IImage::LAYOUT::GENERAL; + cmdBuf->pipelineBarrier(E_DEPENDENCY_FLAGS::EDF_NONE, { .imgBarriers = {&barrier, 1} }); + + cmdBuf->bindComputePipeline(m_genLumaPipeline.get()); + cmdBuf->pushConstants(m_genLumaPipeline->getLayout(), IShader::E_SHADER_STAGE::ESS_COMPUTE, + 0, sizeof(SLumaGenPushConstants), &pcData); + cmdBuf->bindDescriptorSets(EPBP_COMPUTE, m_genLumaPipeline->getLayout(), + 0, 1, &m_genLumaDescriptorSet.get()); + cmdBuf->dispatch(m_lumaWorkgroupSize.x, m_lumaWorkgroupSize.y, 1); + + return enableRIS; + +} } diff --git a/src/nbl/ext/EnvmapImportanceSampling/CMakeLists.txt b/src/nbl/ext/EnvmapImportanceSampling/CMakeLists.txt index fabd4b8b50..f7fbd6c55f 100644 --- a/src/nbl/ext/EnvmapImportanceSampling/CMakeLists.txt +++ b/src/nbl/ext/EnvmapImportanceSampling/CMakeLists.txt @@ -10,73 +10,41 @@ set(NBL_EXT_ENVMAP_IMPORTANCE_SAMPLING_SRC "${CMAKE_CURRENT_SOURCE_DIR}/CEnvmapImportanceSampling.cpp" ) -nbl_create_ext_library_project( - ENVMAP_IMPORTANCE_SAMPLING - "${NBL_EXT_ENVMAP_IMPORTANCE_SAMPLING_H}" - "${NBL_EXT_ENVMAP_IMPORTANCE_SAMPLING_SRC}" - "${NBL_EXT_ENVMAP_IMPORTANCE_SAMPLING_EXTERNAL_INCLUDE}" - "" - "" -) - get_filename_component(_ARCHIVE_ABSOLUTE_ENTRY_PATH_ "${NBL_EXT_INTERNAL_INCLUDE_DIR}" ABSOLUTE) set(NBL_ENVMAP_IMPORTANCE_SAMPLING_HLSL_MOUNT_POINT "${_ARCHIVE_ABSOLUTE_ENTRY_PATH_}/nbl/ext/EnvmapImportanceSampling/builtin/hlsl") + set(OUTPUT_DIRECTORY "${CMAKE_CURRENT_BINARY_DIR}/auto-gen") set(DEPENDS ${NBL_ENVMAP_IMPORTANCE_SAMPLING_HLSL_MOUNT_POINT}/common.hlsl ${NBL_ENVMAP_IMPORTANCE_SAMPLING_HLSL_MOUNT_POINT}/gen_warpmap.comp.hlsl - ${NBL_ENVMAP_IMPORTANCE_SAMPLING_HLSL_MOUNT_POINT}/gen_luma.comp.hlsl ${NBL_ENVMAP_IMPORTANCE_SAMPLING_HLSL_MOUNT_POINT}/measure_luma.comp.hlsl ) -target_sources(${LIB_NAME} PRIVATE ${DEPENDS}) -set_source_files_properties(${DEPENDS} PROPERTIES HEADER_FILE_ONLY ON) - -set(SM 6_8) -set(JSON [=[ -[ - { - "INPUT": "${NBL_ENVMAP_IMPORTANCE_SAMPLING_HLSL_MOUNT_POINT}/gen_warpmap.comp.hlsl", - "KEY": "gen_warpmap", - }, - { - "INPUT": "${NBL_ENVMAP_IMPORTANCE_SAMPLING_HLSL_MOUNT_POINT}/gen_luma.comp.hlsl", - "KEY": "gen_luma", - }, - { - "INPUT": "${NBL_ENVMAP_IMPORTANCE_SAMPLING_HLSL_MOUNT_POINT}/measure_luma.comp.hlsl", - "KEY": "measure_luma", - } - -] -]=]) -string(CONFIGURE "${JSON}" JSON) -set(COMPILE_OPTIONS - -I "${NBL_ROOT_PATH}/include" # a workaround due to envmap importance sampling ext common header which is not part of Nabla builtin archive - -I "${CMAKE_CURRENT_SOURCE_DIR}" - -T lib_${SM} +nbl_create_ext_library_project( + ENVMAP_IMPORTANCE_SAMPLING + "${NBL_EXT_ENVMAP_IMPORTANCE_SAMPLING_H}" + "${NBL_EXT_ENVMAP_IMPORTANCE_SAMPLING_SRC}" + "${NBL_EXT_ENVMAP_IMPORTANCE_SAMPLING_EXTERNAL_INCLUDE}" + "" + NBL_ENVMAP_IMPORTANCE_SAMPLING_HLSL_MOUNT_POINT="${NBL_ENVMAP_IMPORTANCE_SAMPLING_HLSL_MOUNT_POINT}" ) -NBL_CREATE_NSC_COMPILE_RULES( - TARGET ${LIB_NAME}SPIRV - LINK_TO ${LIB_NAME} - DEPENDS ${DEPENDS} - BINARY_DIR ${OUTPUT_DIRECTORY} - MOUNT_POINT_DEFINE NBL_ENVMAP_IMPORTANCE_SAMPLING_HLSL_MOUNT_POINT - COMMON_OPTIONS ${COMPILE_OPTIONS} - OUTPUT_VAR KEYS - INCLUDE nbl/ext/EnvmapImportanceSampling/builtin/build/spirv/keys.hpp - NAMESPACE nbl::ext::envmap_importance_sampling::builtin::build - INPUTS ${JSON} -) +target_sources(${LIB_NAME} PRIVATE ${DEPENDS}) +set_source_files_properties(${DEPENDS} PROPERTIES HEADER_FILE_ONLY ON) + NBL_CREATE_RESOURCE_ARCHIVE( NAMESPACE nbl::ext::envmap_importance_sampling::builtin::build TARGET ${LIB_NAME}_builtinsBuild LINK_TO ${LIB_NAME} BIND ${OUTPUT_DIRECTORY} - BUILTINS ${KEYS} + BUILTINS + common.hlsl + gen_luma.comp.hlsl + gen_warpmap.comp.hlsl + measure_luma.comp.hlsl + ) From 867868c2f0697b12d52bffb7c6ed8ee97c7ee27c Mon Sep 17 00:00:00 2001 From: kevyuu Date: Fri, 30 Jan 2026 15:15:40 +0700 Subject: [PATCH 24/69] Fix arithmetic config no const specifier for method --- include/nbl/builtin/hlsl/workgroup2/arithmetic_config.hlsl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/nbl/builtin/hlsl/workgroup2/arithmetic_config.hlsl b/include/nbl/builtin/hlsl/workgroup2/arithmetic_config.hlsl index 22c93ce193..aa395ad524 100644 --- a/include/nbl/builtin/hlsl/workgroup2/arithmetic_config.hlsl +++ b/include/nbl/builtin/hlsl/workgroup2/arithmetic_config.hlsl @@ -205,7 +205,7 @@ struct SArithmeticConfiguration #undef DEFINE_ASSIGN } - std::string getConfigTemplateStructString() + std::string getConfigTemplateStructString() NBL_CONST_MEMBER_FUNC { std::ostringstream os; os << "nbl::hlsl::workgroup2::ArithmeticConfiguration<" << WorkgroupSizeLog2 << "," << SubgroupSizeLog2 << "," << ItemsPerInvocation_0 << ">;"; From 1a66157391a2d4009e4b1b85e0080fc153ac4754 Mon Sep 17 00:00:00 2001 From: kevyuu Date: Fri, 30 Jan 2026 15:19:27 +0700 Subject: [PATCH 25/69] Define config_t from outside --- .../builtin/hlsl/measure_luma.comp.hlsl | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/include/nbl/ext/EnvmapImportanceSampling/builtin/hlsl/measure_luma.comp.hlsl b/include/nbl/ext/EnvmapImportanceSampling/builtin/hlsl/measure_luma.comp.hlsl index 845d12632d..ffe6477f5a 100644 --- a/include/nbl/ext/EnvmapImportanceSampling/builtin/hlsl/measure_luma.comp.hlsl +++ b/include/nbl/ext/EnvmapImportanceSampling/builtin/hlsl/measure_luma.comp.hlsl @@ -1,14 +1,14 @@ #include "nbl/builtin/hlsl/sampling/warps/spherical.hlsl" #include "nbl/builtin/hlsl/workgroup2/arithmetic.hlsl" -#include "nbl/ext/EnvmapImportanceSampling/builtin/hlsl/common.hlsl" +#include "common.hlsl" using namespace nbl; using namespace nbl::hlsl; using namespace nbl::hlsl::ext::envmap_importance_sampling; // TODO(kevinyu): Temporary to make nsc works -using config_t = nbl::hlsl::workgroup2::ArithmeticConfiguration<4, 4, 2>; +using config_t = WORKGROUP_CONFIG_T; [[vk::push_constant]] SLumaMeasurePushConstants pc; @@ -115,7 +115,7 @@ struct LumaAccessor [numthreads(config_t::WorkgroupSize, 1, 1)] [shader("compute")] -void main(uint32_t3 threadID : SV_DispatchThreadID) +void main(uint32_t localInvocationIndex : SV_GroupIndex, uint32_t3 groupID: SV_GroupID) { ScratchProxy scratchAccessor; @@ -138,6 +138,6 @@ void main(uint32_t3 threadID : SV_DispatchThreadID) measurement.maxLuma = workgroup2::reduction, device_capabilities>::template __call(lumaAccessor, scratchAccessor); - if (all(threadID == uint32_t3(0, 0, 0))) - vk::RawBufferStore(pc.lumaMeasurementBuf, measurement); + if (localInvocationIndex == 0) + vk::RawBufferStore(pc.lumaMeasurementBuf + (groupID.x * sizeof(SLumaMeasurement)), measurement); } From d4b81050ea895b130ea32f462a31951d100c22bd Mon Sep 17 00:00:00 2001 From: kevyuu Date: Fri, 30 Jan 2026 15:19:49 +0700 Subject: [PATCH 26/69] More fixes on computeWarpMap implementation --- .../CEnvmapImportanceSampling.h | 47 ++- .../CEnvmapImportanceSampling.cpp | 398 ++++++++++++++---- 2 files changed, 350 insertions(+), 95 deletions(-) diff --git a/include/nbl/ext/EnvmapImportanceSampling/CEnvmapImportanceSampling.h b/include/nbl/ext/EnvmapImportanceSampling/CEnvmapImportanceSampling.h index e552635d3a..b493e88b4d 100644 --- a/include/nbl/ext/EnvmapImportanceSampling/CEnvmapImportanceSampling.h +++ b/include/nbl/ext/EnvmapImportanceSampling/CEnvmapImportanceSampling.h @@ -3,6 +3,7 @@ #include "nbl/asset/IPipelineLayout.h" #include "nbl/video/declarations.h" +#include "nbl/builtin/hlsl/workgroup2/arithmetic_config.hlsl" namespace nbl::ext::envmap_importance_sampling { @@ -55,56 +56,61 @@ class EnvmapImportanceSampling final : public core::IReferenceCounted static core::smart_refctd_ptr createGenLumaPipelineLayout(video::ILogicalDevice* device); - static core::smart_refctd_ptr createMeasureLumaPipelineLayout(video::ILogicalDevice* device); - - static core::smart_refctd_ptr createGenWarpMapPipelineLayout(video::ILogicalDevice* device); + static core::smart_refctd_ptr createGenWarpPipelineLayout(video::ILogicalDevice* device); //! mounts the extension's archive to given system - useful if you want to create your own shaders with common header included static core::smart_refctd_ptr mount(core::smart_refctd_ptr logger, system::ISystem* system, video::ILogicalDevice* device, const std::string_view archiveAlias = ""); static core::smart_refctd_ptr createGenLumaPipeline(const SCreationParameters& params, const video::IGPUPipelineLayout* pipelineLayout); - static core::smart_refctd_ptr createMeasureLumaPipeline(const SCreationParameters& params, const video::IGPUPipelineLayout* pipelineLayout); + static core::smart_refctd_ptr createGenWarpPipeline(const SCreationParameters& params, const video::IGPUPipelineLayout* pipelineLayout); static core::smart_refctd_ptr createLumaMap(video::ILogicalDevice* device, asset::VkExtent3D extent, uint32_t mipCount, std::string_view debugName = ""); static core::smart_refctd_ptr createWarpMap(video::ILogicalDevice* device, asset::VkExtent3D extent, std::string_view debugName = ""); - bool computeWarpMap(video::IGPUCommandBuffer* cmdBuf, float envMapRegularizationFactor, float& pdfNormalizationFactor, float& maxEmittanceLuma); - - // returns if RIS should be enabled based on variance calculations - inline bool computeWarpMap(video::IGPUCommandBuffer* cmdBuf, float envMapRegularizationFactor, float& pdfNormalizationFactor) - { - [[maybe_unused]] float dummy; - return computeWarpMap(cmdBuf, envMapRegularizationFactor, pdfNormalizationFactor, dummy); - } + void computeWarpMap(video::IGPUCommandBuffer* cmdBuf); + // use this to synchronize warp map after computeWarpMap call + nbl::video::IGPUCommandBuffer::SPipelineBarrierDependencyInfo::image_barrier_t getWarpMapBarrier( + core::bitflag dstStageMask, + core::bitflag dstAccessMask, + nbl::video::IGPUImage::LAYOUT oldLayout); inline core::smart_refctd_ptr getLumaMapView() { return m_lumaMap; } + inline core::smart_refctd_ptr getWarpMapView() + { + return m_warpMap; + } + protected: struct ConstructorParams { SCachedCreationParameters creationParams; - hlsl::uint32_t2 lumaWorkgroupSize; - hlsl::uint32_t2 warpWorkgroupSize; + hlsl::uint32_t2 lumaWorkgroupCount; + hlsl::uint32_t2 warpWorkgroupCount; core::smart_refctd_ptr lumaMap; core::smart_refctd_ptr warpMap; core::smart_refctd_ptr genLumaPipeline; core::smart_refctd_ptr genLumaDescriptorSet; + core::smart_refctd_ptr genWarpPipeline; + core::smart_refctd_ptr genWarpDescriptorSet; }; explicit EnvmapImportanceSampling(ConstructorParams&& params) : m_cachedCreationParams(std::move(params.creationParams)), - m_lumaWorkgroupSize(params.lumaWorkgroupSize), - m_warpWorkgroupSize(params.warpWorkgroupSize), + m_lumaWorkgroupCount(params.lumaWorkgroupCount), + m_warpWorkgroupCount(params.warpWorkgroupCount), m_lumaMap(std::move(params.lumaMap)), m_warpMap(std::move(params.warpMap)), m_genLumaPipeline(std::move(params.genLumaPipeline)), - m_genLumaDescriptorSet(std::move(params.genLumaDescriptorSet)) + m_genLumaDescriptorSet(std::move(params.genLumaDescriptorSet)), + m_genWarpPipeline(std::move(params.genWarpPipeline)), + m_genWarpDescriptorSet(std::move(params.genWarpDescriptorSet)) {} ~EnvmapImportanceSampling() override {} @@ -113,14 +119,17 @@ class EnvmapImportanceSampling final : public core::IReferenceCounted SCachedCreationParameters m_cachedCreationParams; - hlsl::uint32_t2 m_lumaWorkgroupSize; - hlsl::uint32_t2 m_warpWorkgroupSize; + hlsl::uint32_t2 m_lumaWorkgroupCount; + hlsl::uint32_t2 m_warpWorkgroupCount; core::smart_refctd_ptr m_lumaMap; core::smart_refctd_ptr m_warpMap; core::smart_refctd_ptr m_genLumaPipeline; core::smart_refctd_ptr m_genLumaDescriptorSet; + + core::smart_refctd_ptr m_genWarpPipeline; + core::smart_refctd_ptr m_genWarpDescriptorSet; }; diff --git a/src/nbl/ext/EnvmapImportanceSampling/CEnvmapImportanceSampling.cpp b/src/nbl/ext/EnvmapImportanceSampling/CEnvmapImportanceSampling.cpp index f6aad7c25e..bfe25b625e 100644 --- a/src/nbl/ext/EnvmapImportanceSampling/CEnvmapImportanceSampling.cpp +++ b/src/nbl/ext/EnvmapImportanceSampling/CEnvmapImportanceSampling.cpp @@ -22,9 +22,51 @@ namespace { constexpr std::string_view NBL_EXT_MOUNT_ENTRY = "nbl/ext/EnvmapImportanceSampling"; - void generateMipmap(video::IGPUCommandBuffer* cmdBuf, core::smart_refctd_ptr textureView) + // image must have the first mip layout set to transfer src, and the rest to dst + void generateMipmap(video::IGPUCommandBuffer* cmdBuf, IGPUImage* image) { - + const auto mipLevels = image->getCreationParameters().mipLevels; + const auto extent = image->getCreationParameters().extent; + for (uint32_t mip_i = 1; mip_i < mipLevels; mip_i++) + { + + const IGPUCommandBuffer::SImageBlit blit = { + .srcMinCoord = {0, 0, 0}, + .srcMaxCoord = {extent.width >> (mip_i - 1), extent.height >> (mip_i - 1), 1}, + .dstMinCoord = {0, 0, 0}, + .dstMaxCoord = {extent.width >> mip_i, extent.height >> mip_i, 1}, + .layerCount = 1, + .srcBaseLayer = 0, + .dstBaseLayer = 0, + .srcMipLevel = mip_i - 1, + .dstMipLevel = mip_i, + .aspectMask = IGPUImage::E_ASPECT_FLAGS::EAF_COLOR_BIT, + }; + cmdBuf->blitImage(image, IImage::LAYOUT::TRANSFER_SRC_OPTIMAL, image, IImage::LAYOUT::TRANSFER_DST_OPTIMAL, { &blit, 1 }, IGPUSampler::E_TEXTURE_FILTER::ETF_LINEAR); + + IGPUCommandBuffer::SPipelineBarrierDependencyInfo::image_barrier_t barrier = { + .barrier = { + .dep = { + .srcStageMask = PIPELINE_STAGE_FLAGS::BLIT_BIT, + .srcAccessMask = ACCESS_FLAGS::TRANSFER_WRITE_BIT, + .dstStageMask = PIPELINE_STAGE_FLAGS::BLIT_BIT, + .dstAccessMask = ACCESS_FLAGS::TRANSFER_READ_BIT + } + }, + .image = image, + .subresourceRange = { + .aspectMask = IImage::EAF_COLOR_BIT, + .baseMipLevel = mip_i, + .levelCount = 1, + .baseArrayLayer = 0u, + .layerCount = 1u + }, + .oldLayout = IImage::LAYOUT::TRANSFER_DST_OPTIMAL, + .newLayout = IImage::LAYOUT::TRANSFER_SRC_OPTIMAL, + }; + cmdBuf->pipelineBarrier(E_DEPENDENCY_FLAGS::EDF_NONE, { .imgBarriers = {&barrier, 1} }); + + } } core::smart_refctd_ptr createTexture(video::ILogicalDevice* device, const asset::VkExtent3D extent, E_FORMAT format, uint32_t mipLevels = 1u, uint32_t layers = 0u) @@ -39,14 +81,14 @@ namespace imgParams.mipLevels = mipLevels; imgParams.samples = IImage::ESCF_1_BIT; imgParams.type = IImage::ET_2D; - imgParams.usage = IImage::EUF_STORAGE_BIT; + imgParams.usage = IImage::EUF_STORAGE_BIT | IImage::EUF_TRANSFER_SRC_BIT | IImage::EUF_TRANSFER_DST_BIT | IImage::EUF_SAMPLED_BIT; const auto image = device->createImage(std::move(imgParams)); auto imageMemReqs = image->getMemoryReqs(); imageMemReqs.memoryTypeBits &= device->getPhysicalDevice()->getDeviceLocalMemoryTypeBits(); device->allocate(imageMemReqs, image.get()); IGPUImageView::SCreationParams viewparams; - viewparams.subUsages = IImage::EUF_STORAGE_BIT; + viewparams.subUsages = IImage::EUF_STORAGE_BIT | IImage::EUF_SAMPLED_BIT; viewparams.flags = static_cast(0); viewparams.format = format; viewparams.image = std::move(image); @@ -77,8 +119,6 @@ namespace } } - - core::smart_refctd_ptr EnvmapImportanceSampling::create(SCreationParameters&& params) { auto* const logger = params.utilities->getLogger(); @@ -106,40 +146,59 @@ core::smart_refctd_ptr EnvmapImportanceSampling::creat ConstructorParams constructorParams; - constructorParams.lumaWorkgroupSize = calcWorkgroupSize(EnvMapPoTExtent, params.genLumaMapWorkgroupDimension); - + constructorParams.lumaWorkgroupCount = calcWorkgroupSize(EnvMapPoTExtent, params.genLumaMapWorkgroupDimension); constructorParams.lumaMap = createLumaMap(device, EnvMapPoTExtent, MipCountLuminance); const auto upscale = 0; const asset::VkExtent3D WarpMapExtent = {EnvMapPoTExtent.width<createDescriptorPoolForDSLayouts(IDescriptorPool::ECF_UPDATE_AFTER_BIND_BIT, genLumaPipelineLayout->getDescriptorSetLayouts()); const auto genLumaDescriptorSet = genLumaDescriptorPool->createDescriptorSet(core::smart_refctd_ptr(genLumaPipelineLayout->getDescriptorSetLayouts()[0])); + const auto genWarpPipelineLayout = createGenWarpPipelineLayout(device); + constructorParams.genWarpPipeline = createGenWarpPipeline(params, genWarpPipelineLayout.get()); + const auto genWarpDescriptorPool = device->createDescriptorPoolForDSLayouts(IDescriptorPool::ECF_UPDATE_AFTER_BIND_BIT, genWarpPipelineLayout->getDescriptorSetLayouts()); + const auto genWarpDescriptorSet = genWarpDescriptorPool->createDescriptorSet(core::smart_refctd_ptr(genWarpPipelineLayout->getDescriptorSetLayouts()[0])); + IGPUDescriptorSet::SDescriptorInfo envMapDescriptorInfo; envMapDescriptorInfo.desc = params.envMap; envMapDescriptorInfo.info.image.imageLayout = IImage::LAYOUT::READ_ONLY_OPTIMAL; - IGPUDescriptorSet::SDescriptorInfo lumaMapDescriptorInfo; - lumaMapDescriptorInfo.desc = constructorParams.lumaMap; - lumaMapDescriptorInfo.info.image.imageLayout = IImage::LAYOUT::GENERAL; + IGPUDescriptorSet::SDescriptorInfo lumaMapGeneralDescriptorInfo; + lumaMapGeneralDescriptorInfo.desc = constructorParams.lumaMap; + lumaMapGeneralDescriptorInfo.info.image.imageLayout = IImage::LAYOUT::GENERAL; + + IGPUDescriptorSet::SDescriptorInfo lumaMapReadDescriptorInfo; + lumaMapReadDescriptorInfo.desc = constructorParams.lumaMap; + lumaMapReadDescriptorInfo.info.image.imageLayout = IImage::LAYOUT::READ_ONLY_OPTIMAL; + + IGPUDescriptorSet::SDescriptorInfo warpMapDescriptorInfo; + warpMapDescriptorInfo.desc = constructorParams.warpMap; + warpMapDescriptorInfo.info.image.imageLayout = IImage::LAYOUT::GENERAL; const IGPUDescriptorSet::SWriteDescriptorSet writes[] = { { .dstSet = genLumaDescriptorSet.get(), .binding = 0, .count = 1, .info = &envMapDescriptorInfo }, { - .dstSet = genLumaDescriptorSet.get(), .binding = 1, .count = 1, .info = &lumaMapDescriptorInfo - } + .dstSet = genLumaDescriptorSet.get(), .binding = 1, .count = 1, .info = &lumaMapGeneralDescriptorInfo + }, + { + .dstSet = genWarpDescriptorSet.get(), .binding = 0, .count = 1, .info = &lumaMapReadDescriptorInfo + }, + { + .dstSet = genWarpDescriptorSet.get(), .binding = 1, .count = 1, .info = &warpMapDescriptorInfo + }, }; device->updateDescriptorSets(writes, {}); constructorParams.genLumaDescriptorSet = genLumaDescriptorSet; + constructorParams.genWarpDescriptorSet = genWarpDescriptorSet; constructorParams.creationParams = std::move(params); @@ -148,12 +207,12 @@ core::smart_refctd_ptr EnvmapImportanceSampling::creat core::smart_refctd_ptr EnvmapImportanceSampling::createLumaMap(video::ILogicalDevice* device, asset::VkExtent3D extent, uint32_t mipCount, const std::string_view debugName) { - return createTexture(device, extent, EF_R32_SFLOAT, mipCount); + return createTexture(device, extent, EF_R32_SFLOAT, mipCount); } core::smart_refctd_ptr EnvmapImportanceSampling::createWarpMap(video::ILogicalDevice* device, asset::VkExtent3D extent, const std::string_view debugName) { - return createTexture(device, extent, EF_R32G32_SFLOAT); + return createTexture(device, extent, EF_R32G32_SFLOAT); } smart_refctd_ptr EnvmapImportanceSampling::mount(core::smart_refctd_ptr logger, ISystem* system, video::ILogicalDevice* device, const std::string_view archiveAlias) @@ -200,8 +259,9 @@ core::smart_refctd_ptr EnvmapImportanceSampling::cre options.preprocessorOptions.logger = logger.get(); options.preprocessorOptions.includeFinder = compiler->getDefaultIncludeFinder(); + const auto workgroupDimStr = std::to_string(params.genLumaMapWorkgroupDimension); const IShaderCompiler::SMacroDefinition defines[] = { - { "WORKGROUP_DIM", "16" }, + { "WORKGROUP_DIM", workgroupDimStr.data() }, }; options.preprocessorOptions.extraDefines = defines; @@ -229,6 +289,60 @@ core::smart_refctd_ptr EnvmapImportanceSampling::cre return pipeline; } +core::smart_refctd_ptr EnvmapImportanceSampling::createGenWarpPipeline(const SCreationParameters& params, const video::IGPUPipelineLayout* pipelineLayout) +{ + system::logger_opt_ptr logger = params.utilities->getLogger(); + auto system = smart_refctd_ptr(params.assetManager->getSystem()); + auto* device = params.utilities->getLogicalDevice(); + mount(smart_refctd_ptr(params.utilities->getLogger()), system.get(), params.utilities->getLogicalDevice(), NBL_EXT_MOUNT_ENTRY); + + const auto shaderSource = getShaderSource(params.assetManager.get(), "gen_warp.comp.hlsl", logger.get()); + auto compiler = make_smart_refctd_ptr(smart_refctd_ptr(system)); + CHLSLCompiler::SOptions options = {}; + options.stage = IShader::E_SHADER_STAGE::ESS_COMPUTE; + options.preprocessorOptions.targetSpirvVersion = device->getPhysicalDevice()->getLimits().spirvVersion; + options.spirvOptimizer = nullptr; + +#ifndef _NBL_DEBUG + ISPIRVOptimizer::E_OPTIMIZER_PASS optPasses = ISPIRVOptimizer::EOP_STRIP_DEBUG_INFO; + auto opt = make_smart_refctd_ptr(std::span(&optPasses, 1)); + options.spirvOptimizer = opt.get(); +#else + options.debugInfoFlags |= IShaderCompiler::E_DEBUG_INFO_FLAGS::EDIF_LINE_BIT; +#endif + options.preprocessorOptions.sourceIdentifier = shaderSource->getFilepathHint(); + options.preprocessorOptions.logger = logger.get(); + options.preprocessorOptions.includeFinder = compiler->getDefaultIncludeFinder(); + + const auto workgroupDimStr = std::to_string(params.genWarpMapWorkgroupDimension); + const IShaderCompiler::SMacroDefinition defines[] = { + { "WORKGROUP_DIM", workgroupDimStr.data() }, + }; + + options.preprocessorOptions.extraDefines = defines; + + const auto overridenUnspecialized = compiler->compileToSPIRV((const char*)shaderSource->getContent()->getPointer(), options); + const auto shader = device->compileShader({ overridenUnspecialized.get() }); + if (!shader) + { + logger.log("Could not compile shaders!", ILogger::ELL_ERROR); + return nullptr; + } + + video::IGPUComputePipeline::SCreationParams pipelineParams[1] = {}; + pipelineParams[0].layout = pipelineLayout; + pipelineParams[0].shader = { .shader = shader.get(), .entryPoint = "main" }; + + smart_refctd_ptr pipeline; + params.utilities->getLogicalDevice()->createComputePipelines(nullptr, pipelineParams, &pipeline); + if (!pipeline) + { + logger.log("Could not create pipeline!", ILogger::ELL_ERROR); + return nullptr; + } + + return pipeline; +} core::smart_refctd_ptr < video::IGPUPipelineLayout> EnvmapImportanceSampling::createGenLumaPipelineLayout(video::ILogicalDevice* device) { @@ -260,29 +374,7 @@ core::smart_refctd_ptr < video::IGPUPipelineLayout> EnvmapImportanceSampling::cr } -core::smart_refctd_ptr EnvmapImportanceSampling::createMeasureLumaPipelineLayout(video::ILogicalDevice* device) -{ - asset::SPushConstantRange pcRange = { - .stageFlags = hlsl::ESS_COMPUTE, - .offset = 0, - .size = sizeof(SLumaMeasurePushConstants) - }; - - const IGPUDescriptorSetLayout::SBinding bindings[] = { - { - .binding = 0u, - .type = nbl::asset::IDescriptor::E_TYPE::ET_SAMPLED_IMAGE, - .createFlags = IGPUDescriptorSetLayout::SBinding::E_CREATE_FLAGS::ECF_NONE, - .stageFlags = IShader::E_SHADER_STAGE::ESS_COMPUTE, - .count = 1u, - } - }; - - const auto setLayout = device->createDescriptorSetLayout(bindings); - return device->createPipelineLayout({ &pcRange, 1 }, setLayout, nullptr, nullptr, nullptr); -} - -core::smart_refctd_ptr EnvmapImportanceSampling::createGenWarpMapPipelineLayout(video::ILogicalDevice* device) +core::smart_refctd_ptr EnvmapImportanceSampling::createGenWarpPipelineLayout(video::ILogicalDevice* device) { const IGPUDescriptorSetLayout::SBinding bindings[] = { { @@ -305,46 +397,200 @@ core::smart_refctd_ptr EnvmapImportanceSampling::crea return device->createPipelineLayout({}, setLayout, nullptr, nullptr, nullptr); } -bool EnvmapImportanceSampling::computeWarpMap(video::IGPUCommandBuffer* cmdBuf, const float envMapRegularizationFactor, float& pdfNormalizationFactor, float& maxEmittanceLuma) +void EnvmapImportanceSampling::computeWarpMap(video::IGPUCommandBuffer* cmdBuf) { - bool enableRIS = false; - - SLumaGenPushConstants pcData = {}; - pcData.luminanceScales = { 0.2126729f, 0.7151522f, 0.0721750f, 0.0f }; + const auto lumaMapImage = m_lumaMap->getCreationParameters().image.get(); + const auto lumaMapMipLevels = lumaMapImage->getCreationParameters().mipLevels; + const auto lumaMapExtent = lumaMapImage->getCreationParameters().extent; + + const auto warpMapImage = m_warpMap->getCreationParameters().image.get(); + { - const auto imageExtent = m_lumaMap->getCreationParameters().image->getCreationParameters().extent; - pcData.lumaMapResolution = {imageExtent.width, imageExtent.height}; + IGPUCommandBuffer::SPipelineBarrierDependencyInfo::image_barrier_t barriers[] = { + { + .barrier = { + .dep = { + .srcStageMask = PIPELINE_STAGE_FLAGS::NONE, + .srcAccessMask = ACCESS_FLAGS::NONE, + .dstStageMask = PIPELINE_STAGE_FLAGS::COMPUTE_SHADER_BIT, + .dstAccessMask = ACCESS_FLAGS::SHADER_WRITE_BITS + } + }, + .image = lumaMapImage, + .subresourceRange = { + .aspectMask = IImage::EAF_COLOR_BIT, + .baseMipLevel = 0u, + .levelCount = lumaMapMipLevels, + .baseArrayLayer = 0u, + .layerCount = 1u + }, + .oldLayout = IImage::LAYOUT::UNDEFINED, + .newLayout = IImage::LAYOUT::GENERAL, + } + }; + cmdBuf->pipelineBarrier(E_DEPENDENCY_FLAGS::EDF_NONE, { .imgBarriers = barriers }); } - IGPUCommandBuffer::SPipelineBarrierDependencyInfo::image_barrier_t barrier; - barrier.barrier = { - .dep = { - .srcStageMask = PIPELINE_STAGE_FLAGS::NONE, - .srcAccessMask = ACCESS_FLAGS::NONE, - .dstStageMask = PIPELINE_STAGE_FLAGS::COMPUTE_SHADER_BIT, - .dstAccessMask = ACCESS_FLAGS::SHADER_WRITE_BITS - } - }; - barrier.image = m_lumaMap->getCreationParameters().image.get(); - barrier.subresourceRange = { - .aspectMask = IImage::EAF_COLOR_BIT, - .baseMipLevel = 0u, - .levelCount = m_lumaMap->getCreationParameters().image->getCreationParameters().mipLevels, - .baseArrayLayer = 0u, - .layerCount = 1u + // Gen Luma Map + { + SLumaGenPushConstants pcData = {}; + pcData.luminanceScales = { 0.2126729f, 0.7151522f, 0.0721750f, 0.0f }; + pcData.lumaMapResolution = {lumaMapExtent.width, lumaMapExtent.height}; + + cmdBuf->bindComputePipeline(m_genLumaPipeline.get()); + cmdBuf->pushConstants(m_genLumaPipeline->getLayout(), IShader::E_SHADER_STAGE::ESS_COMPUTE, + 0, sizeof(SLumaGenPushConstants), &pcData); + cmdBuf->bindDescriptorSets(EPBP_COMPUTE, m_genLumaPipeline->getLayout(), + 0, 1, &m_genLumaDescriptorSet.get()); + cmdBuf->dispatch(m_lumaWorkgroupCount.x, m_lumaWorkgroupCount.y, 1); + } + + // Generate luminance mip map + { + IGPUCommandBuffer::SPipelineBarrierDependencyInfo::image_barrier_t barriers[] = { + { + .barrier = { + .dep = { + .srcStageMask = PIPELINE_STAGE_FLAGS::COMPUTE_SHADER_BIT, + .srcAccessMask = ACCESS_FLAGS::SHADER_WRITE_BITS, + .dstStageMask = PIPELINE_STAGE_FLAGS::BLIT_BIT, + .dstAccessMask = ACCESS_FLAGS::TRANSFER_READ_BIT + } + }, + .image = lumaMapImage, + .subresourceRange = { + .aspectMask = IImage::EAF_COLOR_BIT, + .baseMipLevel = 0u, + .levelCount = 1, + .baseArrayLayer = 0u, + .layerCount = 1u + }, + .oldLayout = IImage::LAYOUT::GENERAL, + .newLayout = IImage::LAYOUT::TRANSFER_SRC_OPTIMAL, + }, + { + .barrier = { + .dep = { + .srcStageMask = PIPELINE_STAGE_FLAGS::NONE, + .srcAccessMask = ACCESS_FLAGS::NONE, + .dstStageMask = PIPELINE_STAGE_FLAGS::BLIT_BIT, + .dstAccessMask = ACCESS_FLAGS::TRANSFER_WRITE_BIT + } + }, + .image = lumaMapImage, + .subresourceRange = { + .aspectMask = IImage::EAF_COLOR_BIT, + .baseMipLevel = 1u, + .levelCount = lumaMapMipLevels - 1, + .baseArrayLayer = 0u, + .layerCount = 1u + }, + .oldLayout = IImage::LAYOUT::GENERAL, + .newLayout = IImage::LAYOUT::TRANSFER_DST_OPTIMAL, + } + }; + cmdBuf->pipelineBarrier(E_DEPENDENCY_FLAGS::EDF_NONE, { .imgBarriers = barriers }); + generateMipmap(cmdBuf, lumaMapImage); + } + + { + IGPUCommandBuffer::SPipelineBarrierDependencyInfo::image_barrier_t barriers[] = { + { + .barrier = { + .dep = { + .srcStageMask = PIPELINE_STAGE_FLAGS::BLIT_BIT, + .srcAccessMask = ACCESS_FLAGS::TRANSFER_READ_BIT, + .dstStageMask = PIPELINE_STAGE_FLAGS::COMPUTE_SHADER_BIT, + .dstAccessMask = ACCESS_FLAGS::SHADER_READ_BITS + } + }, + .image = lumaMapImage, + .subresourceRange = { + .aspectMask = IImage::EAF_COLOR_BIT, + .baseMipLevel = 0u, + .levelCount = lumaMapMipLevels - 1, + .baseArrayLayer = 0u, + .layerCount = 1u + }, + .oldLayout = IImage::LAYOUT::TRANSFER_SRC_OPTIMAL, + .newLayout = IImage::LAYOUT::READ_ONLY_OPTIMAL, + }, + { + .barrier = { + .dep = { + .srcStageMask = PIPELINE_STAGE_FLAGS::BLIT_BIT, + .srcAccessMask = ACCESS_FLAGS::TRANSFER_WRITE_BIT, + .dstStageMask = PIPELINE_STAGE_FLAGS::COMPUTE_SHADER_BIT, + .dstAccessMask = ACCESS_FLAGS::SHADER_WRITE_BITS + } + }, + .image = lumaMapImage, + .subresourceRange = { + .aspectMask = IImage::EAF_COLOR_BIT, + .baseMipLevel = lumaMapMipLevels - 1, + .levelCount = 1, + .baseArrayLayer = 0u, + .layerCount = 1u + }, + .oldLayout = IImage::LAYOUT::TRANSFER_DST_OPTIMAL, + .newLayout = IImage::LAYOUT::READ_ONLY_OPTIMAL, + }, + { + .barrier = { + .dep = { + .srcStageMask = PIPELINE_STAGE_FLAGS::NONE, + .srcAccessMask = ACCESS_FLAGS::NONE, + .dstStageMask = PIPELINE_STAGE_FLAGS::COMPUTE_SHADER_BIT, + .dstAccessMask = ACCESS_FLAGS::SHADER_WRITE_BITS + } + }, + .image = warpMapImage, + .subresourceRange = { + .aspectMask = IImage::EAF_COLOR_BIT, + .baseMipLevel = 0, + .levelCount = 1, + .baseArrayLayer = 0u, + .layerCount = 1u + }, + .oldLayout = IImage::LAYOUT::UNDEFINED, + .newLayout = IImage::LAYOUT::GENERAL, + } + }; + cmdBuf->pipelineBarrier(E_DEPENDENCY_FLAGS::EDF_NONE, { .imgBarriers = barriers }); + cmdBuf->bindComputePipeline(m_genWarpPipeline.get()); + cmdBuf->bindDescriptorSets(EPBP_COMPUTE, m_genWarpPipeline->getLayout(), + 0, 1, &m_genWarpDescriptorSet.get()); + cmdBuf->dispatch(m_warpWorkgroupCount.x, m_warpWorkgroupCount.y, 1); + } + +} + +nbl::video::IGPUCommandBuffer::SPipelineBarrierDependencyInfo::image_barrier_t EnvmapImportanceSampling::getWarpMapBarrier( + core::bitflag dstStageMask, + core::bitflag dstAccessMask, + nbl::video::IGPUImage::LAYOUT newLayout) +{ + const auto warpMapImage = m_warpMap->getCreationParameters().image.get(); + return { + .barrier = { + .dep = { + .srcStageMask = PIPELINE_STAGE_FLAGS::COMPUTE_SHADER_BIT, + .srcAccessMask = ACCESS_FLAGS::SHADER_WRITE_BITS, + .dstStageMask = dstStageMask, + .dstAccessMask = dstAccessMask + } + }, + .image = warpMapImage, + .subresourceRange = { + .aspectMask = IImage::EAF_COLOR_BIT, + .baseMipLevel = 0, + .levelCount = 1, + .baseArrayLayer = 0u, + .layerCount = 1u + }, + .oldLayout = IImage::LAYOUT::GENERAL, + .newLayout = newLayout, }; - barrier.oldLayout = IImage::LAYOUT::UNDEFINED; - barrier.newLayout = IImage::LAYOUT::GENERAL; - cmdBuf->pipelineBarrier(E_DEPENDENCY_FLAGS::EDF_NONE, { .imgBarriers = {&barrier, 1} }); - - cmdBuf->bindComputePipeline(m_genLumaPipeline.get()); - cmdBuf->pushConstants(m_genLumaPipeline->getLayout(), IShader::E_SHADER_STAGE::ESS_COMPUTE, - 0, sizeof(SLumaGenPushConstants), &pcData); - cmdBuf->bindDescriptorSets(EPBP_COMPUTE, m_genLumaPipeline->getLayout(), - 0, 1, &m_genLumaDescriptorSet.get()); - cmdBuf->dispatch(m_lumaWorkgroupSize.x, m_lumaWorkgroupSize.y, 1); - - return enableRIS; - } + } From 8853738d6a4b1d1d4a12908222ee3cf4344abf04 Mon Sep 17 00:00:00 2001 From: kevyuu Date: Fri, 30 Jan 2026 15:20:48 +0700 Subject: [PATCH 27/69] Fix chose second to be placed inside the loop --- include/nbl/builtin/hlsl/sampling/hierarchical_image.hlsl | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/include/nbl/builtin/hlsl/sampling/hierarchical_image.hlsl b/include/nbl/builtin/hlsl/sampling/hierarchical_image.hlsl index 23011219f8..61c2ce7dde 100644 --- a/include/nbl/builtin/hlsl/sampling/hierarchical_image.hlsl +++ b/include/nbl/builtin/hlsl/sampling/hierarchical_image.hlsl @@ -79,11 +79,11 @@ struct LuminanceMapSampler wx_0 = values[3]; wx_1 = values[2]; } + } + if (choseSecond(wx_0, wx_1, xi.x)) + p.x |= 1; } - if (choseSecond(wx_0, wx_1, xi.x)) - p.x |= 1; - } // If we don`t add xi, the sample will clump to the lowest corner of environment map texel. We add xi to simulate uniform distribution within a pixel and make the sample continuous. This is why we compute the pdf not from the normalized luminance of the texel, instead from the reciprocal of the Jacobian. const vector2_type directionUV = (vector2_type(p.x, p.y) + xi) / vector2_type(_mapSize); From 6bde48958c43d77ec04fb24a7f94f0678d8731cc Mon Sep 17 00:00:00 2001 From: kevyuu Date: Fri, 30 Jan 2026 15:21:29 +0700 Subject: [PATCH 28/69] LuminanceReadAccessor take ScalarT as template parameter --- .../hlsl/concepts/accessors/hierarchical_image.hlsl | 8 ++++---- .../nbl/builtin/hlsl/sampling/hierarchical_image.hlsl | 10 ++++------ 2 files changed, 8 insertions(+), 10 deletions(-) diff --git a/include/nbl/builtin/hlsl/concepts/accessors/hierarchical_image.hlsl b/include/nbl/builtin/hlsl/concepts/accessors/hierarchical_image.hlsl index 09abd08615..51bcce8c92 100644 --- a/include/nbl/builtin/hlsl/concepts/accessors/hierarchical_image.hlsl +++ b/include/nbl/builtin/hlsl/concepts/accessors/hierarchical_image.hlsl @@ -13,8 +13,8 @@ namespace hierarchical_image { // declare concept #define NBL_CONCEPT_NAME LuminanceReadAccessor -#define NBL_CONCEPT_TPLT_PRM_KINDS (typename) -#define NBL_CONCEPT_TPLT_PRM_NAMES (U) +#define NBL_CONCEPT_TPLT_PRM_KINDS (typename)(typename) +#define NBL_CONCEPT_TPLT_PRM_NAMES (U)(ScalarT) // not the greatest syntax but works #define NBL_CONCEPT_PARAM_0 (a,U) #define NBL_CONCEPT_PARAM_1 (coord,uint32_t2) @@ -26,8 +26,8 @@ NBL_CONCEPT_BEGIN(3) #define coord NBL_CONCEPT_PARAM_T NBL_CONCEPT_PARAM_1 #define level NBL_CONCEPT_PARAM_T NBL_CONCEPT_PARAM_2 NBL_CONCEPT_END( - ((NBL_CONCEPT_REQ_EXPR_RET_TYPE)((a.template get(coord,level)) , ::nbl::hlsl::is_same_v, float32_t)) - ((NBL_CONCEPT_REQ_EXPR_RET_TYPE)((a.template gather(coord,level)) , ::nbl::hlsl::is_same_v, float32_t4)) + ((NBL_CONCEPT_REQ_EXPR_RET_TYPE)((a.template get(coord,level)) , ::nbl::hlsl::is_same_v, ScalarT)) + ((NBL_CONCEPT_REQ_EXPR_RET_TYPE)((a.template gather(coord,level)) , ::nbl::hlsl::is_same_v, vector)) ); #undef level #undef coord diff --git a/include/nbl/builtin/hlsl/sampling/hierarchical_image.hlsl b/include/nbl/builtin/hlsl/sampling/hierarchical_image.hlsl index 61c2ce7dde..de03696e53 100644 --- a/include/nbl/builtin/hlsl/sampling/hierarchical_image.hlsl +++ b/include/nbl/builtin/hlsl/sampling/hierarchical_image.hlsl @@ -17,7 +17,7 @@ namespace hlsl namespace sampling { -template && hierarchical_image::LuminanceReadAccessor) +template && hierarchical_image::LuminanceReadAccessor) struct LuminanceMapSampler { using scalar_type = T; @@ -44,7 +44,7 @@ struct LuminanceMapSampler // numerical resilience against IEEE754 scalar_type dummy = 0.0f; PartitionRandVariable partition; - partition.leftProb = 1.0f / (1.0f + second/ first); + partition.leftProb = 1.0f / (1.0f + (second / first)); return partition(xi, dummy); } @@ -52,16 +52,15 @@ struct LuminanceMapSampler { float32_t2 xi = float32_t2(coord)/ _lastWarpPixel; uint32_t2 p = uint32_t2(0, 0); - const uint32_t2 mip2x1 = findMSB(_mapSize.x) - 1; + const uint32_t2 mip2x1 = findMSB(_mapSize.y); if (_aspect2x1) { // do one split in the X axis first cause penultimate full mip would have been 2x1 p.x = choseSecond(_map.get(uint32_t2(0, 0), mip2x1), _map.get(uint32_t2(0, 1), mip2x1), xi.x) ? 1 : 0; } - for (uint32_t i = mip2x1; i != 0;) + for (int i = mip2x1 - 1; i >= 0; i--) { - --i; p <<= 1; const vector4_type values = _map.gather(p, i); scalar_type wx_0, wx_1; @@ -125,7 +124,6 @@ struct HierarchicalImage return result; } - uint32_t2 generate(NBL_REF_ARG(scalar_type) rcpPdf, vector2_type xi) NBL_CONST_MEMBER_FUNC { const vector2_type texelCoord = xi * lastWarpPixel; From 756fbb051092ec729b3cf69400e2d895af3a2700 Mon Sep 17 00:00:00 2001 From: kevyuu Date: Fri, 30 Jan 2026 15:23:37 +0700 Subject: [PATCH 29/69] gen_warpmap to gen_warp --- .../{gen_warpmap.comp.hlsl => gen_warp.comp.hlsl} | 13 +++++-------- src/nbl/ext/EnvmapImportanceSampling/CMakeLists.txt | 5 +++-- 2 files changed, 8 insertions(+), 10 deletions(-) rename include/nbl/ext/EnvmapImportanceSampling/builtin/hlsl/{gen_warpmap.comp.hlsl => gen_warp.comp.hlsl} (74%) diff --git a/include/nbl/ext/EnvmapImportanceSampling/builtin/hlsl/gen_warpmap.comp.hlsl b/include/nbl/ext/EnvmapImportanceSampling/builtin/hlsl/gen_warp.comp.hlsl similarity index 74% rename from include/nbl/ext/EnvmapImportanceSampling/builtin/hlsl/gen_warpmap.comp.hlsl rename to include/nbl/ext/EnvmapImportanceSampling/builtin/hlsl/gen_warp.comp.hlsl index 063dfaf9b9..c621efd4af 100644 --- a/include/nbl/ext/EnvmapImportanceSampling/builtin/hlsl/gen_warpmap.comp.hlsl +++ b/include/nbl/ext/EnvmapImportanceSampling/builtin/hlsl/gen_warp.comp.hlsl @@ -1,13 +1,8 @@ -#include "nbl/ext/EnvmapImportanceSampling/builtin/hlsl/common.hlsl" #include "nbl/builtin/hlsl/sampling/hierarchical_image.hlsl" - [[vk::binding(0, 0)]] Texture2D lumaMap; -[[vk::binding(1, 0)]] RWTexture2D outImage; - -// TODO(kevinyu): Temporary to make nsc compiles -#define WARPMAP_GEN_WORKGROUP_DIM 16 +[[vk::binding(1, 0)]] RWTexture2D outImage; using namespace nbl; using namespace nbl::hlsl; @@ -32,17 +27,19 @@ struct LuminanceAccessor } }; -[numthreads(WARPMAP_GEN_WORKGROUP_DIM, WARPMAP_GEN_WORKGROUP_DIM, 1)] +[numthreads(WORKGROUP_DIM, WORKGROUP_DIM, 1)] [shader("compute")] void main(uint32_t3 threadID : SV_DispatchThreadID) { LuminanceAccessor luminanceAccessor; uint32_t lumaMapWidth, lumaMapHeight; + lumaMap.GetDimensions(lumaMapWidth, lumaMapHeight); + using LuminanceSampler = LuminanceMapSampler; LuminanceSampler luminanceSampler = - LuminanceSampler::create(luminanceAccessor, lumaMapWidth, lumaMapHeight, lumaMapWidth != lumaMapHeight); + LuminanceSampler::create(luminanceAccessor, uint32_t2(lumaMapWidth, lumaMapHeight), lumaMapWidth != lumaMapHeight, uint32_t2(lumaMapWidth, lumaMapHeight)); uint32_t2 pixelCoord = threadID.xy; diff --git a/src/nbl/ext/EnvmapImportanceSampling/CMakeLists.txt b/src/nbl/ext/EnvmapImportanceSampling/CMakeLists.txt index f7fbd6c55f..7486ba8923 100644 --- a/src/nbl/ext/EnvmapImportanceSampling/CMakeLists.txt +++ b/src/nbl/ext/EnvmapImportanceSampling/CMakeLists.txt @@ -17,7 +17,8 @@ set(NBL_ENVMAP_IMPORTANCE_SAMPLING_HLSL_MOUNT_POINT "${_ARCHIVE_ABSOLUTE_ENTRY_P set(OUTPUT_DIRECTORY "${CMAKE_CURRENT_BINARY_DIR}/auto-gen") set(DEPENDS ${NBL_ENVMAP_IMPORTANCE_SAMPLING_HLSL_MOUNT_POINT}/common.hlsl - ${NBL_ENVMAP_IMPORTANCE_SAMPLING_HLSL_MOUNT_POINT}/gen_warpmap.comp.hlsl + ${NBL_ENVMAP_IMPORTANCE_SAMPLING_HLSL_MOUNT_POINT}/gen_luma.comp.hlsl + ${NBL_ENVMAP_IMPORTANCE_SAMPLING_HLSL_MOUNT_POINT}/gen_warp.comp.hlsl ${NBL_ENVMAP_IMPORTANCE_SAMPLING_HLSL_MOUNT_POINT}/measure_luma.comp.hlsl ) @@ -42,7 +43,7 @@ NBL_CREATE_RESOURCE_ARCHIVE( BUILTINS common.hlsl gen_luma.comp.hlsl - gen_warpmap.comp.hlsl + gen_warp.comp.hlsl measure_luma.comp.hlsl ) From 8d64a19a8930cace421526efd8e61cd2768fbe7a Mon Sep 17 00:00:00 2001 From: kevyuu Date: Wed, 18 Feb 2026 23:22:33 +0700 Subject: [PATCH 30/69] Move hierarchical_image concepts to sampling subdirectory --- include/nbl/builtin/hlsl/sampling/hierarchical_image.hlsl | 2 +- .../hierarchical_image/accessors.hlsl} | 7 ++++--- 2 files changed, 5 insertions(+), 4 deletions(-) rename include/nbl/builtin/hlsl/{concepts/accessors/hierarchical_image.hlsl => sampling/hierarchical_image/accessors.hlsl} (90%) diff --git a/include/nbl/builtin/hlsl/sampling/hierarchical_image.hlsl b/include/nbl/builtin/hlsl/sampling/hierarchical_image.hlsl index de03696e53..bc75423914 100644 --- a/include/nbl/builtin/hlsl/sampling/hierarchical_image.hlsl +++ b/include/nbl/builtin/hlsl/sampling/hierarchical_image.hlsl @@ -7,7 +7,7 @@ #include #include -#include +#include #include namespace nbl diff --git a/include/nbl/builtin/hlsl/concepts/accessors/hierarchical_image.hlsl b/include/nbl/builtin/hlsl/sampling/hierarchical_image/accessors.hlsl similarity index 90% rename from include/nbl/builtin/hlsl/concepts/accessors/hierarchical_image.hlsl rename to include/nbl/builtin/hlsl/sampling/hierarchical_image/accessors.hlsl index 51bcce8c92..0ee7423031 100644 --- a/include/nbl/builtin/hlsl/concepts/accessors/hierarchical_image.hlsl +++ b/include/nbl/builtin/hlsl/sampling/hierarchical_image/accessors.hlsl @@ -1,4 +1,4 @@ -#ifndef _NBL_BUILTIN_HLSL_CONCEPTS_ACCESSORS_HIERARCHICAL_IMAGE_INCLUDED_ +#ifndef _NBL_BUILTIN_HLSL_HIERARCHICAL_IMAGE_ACCESSORS_INCLUDED_ #define _NBL_BUILTIN_HLSL_CONCEPTS_ACCESSORS_HIERARCHICAL_IMAGE_INCLUDED_ #include "nbl/builtin/hlsl/concepts/accessors/generic_shared_data.hlsl" @@ -34,13 +34,14 @@ NBL_CONCEPT_END( #undef a #include +// sampleUvs return 4 UVs in a square to calculate the jacobian matrix // declare concept #define NBL_CONCEPT_NAME HierarchicalSampler #define NBL_CONCEPT_TPLT_PRM_KINDS (typename)(typename) #define NBL_CONCEPT_TPLT_PRM_NAMES (HierarchicalSamplerT)(ScalarT) // not the greatest syntax but works #define NBL_CONCEPT_PARAM_0 (sampler,HierarchicalSamplerT) -#define NBL_CONCEPT_PARAM_1 (coord,vector) +#define NBL_CONCEPT_PARAM_1 (coord,vector) // start concept NBL_CONCEPT_BEGIN(2) // need to be defined AFTER the concept begins @@ -58,4 +59,4 @@ NBL_CONCEPT_END( } } -#endif \ No newline at end of file +#endif From 70d8423329ebe7ffc82cba1d766699a6cfd62116 Mon Sep 17 00:00:00 2001 From: kevyuu Date: Wed, 18 Feb 2026 23:23:36 +0700 Subject: [PATCH 31/69] Add some comment regarding corner sampling --- include/nbl/builtin/hlsl/sampling/hierarchical_image.hlsl | 1 + 1 file changed, 1 insertion(+) diff --git a/include/nbl/builtin/hlsl/sampling/hierarchical_image.hlsl b/include/nbl/builtin/hlsl/sampling/hierarchical_image.hlsl index bc75423914..82bde0cdfa 100644 --- a/include/nbl/builtin/hlsl/sampling/hierarchical_image.hlsl +++ b/include/nbl/builtin/hlsl/sampling/hierarchical_image.hlsl @@ -50,6 +50,7 @@ struct LuminanceMapSampler vector2_type binarySearch(const uint32_t2 coord) { + // We use _lastWarpPixel here for corner sampling float32_t2 xi = float32_t2(coord)/ _lastWarpPixel; uint32_t2 p = uint32_t2(0, 0); const uint32_t2 mip2x1 = findMSB(_mapSize.y); From 2842d29c0a39027f9e94eaf0b610ec919788288c Mon Sep 17 00:00:00 2001 From: kevyuu Date: Wed, 18 Feb 2026 23:24:29 +0700 Subject: [PATCH 32/69] Parameterize spherical warp and make sure all literal is in the correct type --- .../hlsl/sampling/warps/spherical.hlsl | 63 ++++++++++++------- 1 file changed, 39 insertions(+), 24 deletions(-) diff --git a/include/nbl/builtin/hlsl/sampling/warps/spherical.hlsl b/include/nbl/builtin/hlsl/sampling/warps/spherical.hlsl index 7df93ac651..53f2be401d 100644 --- a/include/nbl/builtin/hlsl/sampling/warps/spherical.hlsl +++ b/include/nbl/builtin/hlsl/sampling/warps/spherical.hlsl @@ -14,54 +14,69 @@ namespace sampling namespace warp { +template struct Spherical { - using domain_type = float32_t2; - using codomain_type = float32_t3; + using scalar_type = T; + using domain_type = vector; + using codomain_type = vector; template ) static WarpResult warp(const DomainT uv) { - const float32_t phi = 2 * uv.x * numbers::pi; - const float32_t theta = uv.y * numbers::pi; - float32_t3 dir; - dir.x = cos(uv.x * 2.f * numbers::pi); - dir.y = sqrt(1.f - dir.x * dir.x); - if (uv.x > 0.5f) dir.y = -dir.y; - const float32_t cosTheta = cos(theta); - float32_t sinTheta = (1.0 - cosTheta * cosTheta); - dir.xy *= sinTheta; - dir.z = cosTheta; + codomain_type dir; + dir.x = cos(uv.x * scalar_type(2) * numbers::pi); + dir.z = sqrt(scalar_type(1) - (dir.x * dir.x)); + if (uv.x > scalar_type(0.5)) + dir.z = -dir.z; + const scalar_type theta = uv.y * numbers::pi; + const scalar_type cosTheta = cos(theta); + const scalar_type sinTheta = sqrt(scalar_type(1) - (cosTheta * cosTheta)); + dir.xz *= sinTheta; + dir.y = cosTheta; + WarpResult warpResult; warpResult.dst = dir; - warpResult.density = 1 / (sinTheta * numbers::pi * numbers::pi); + warpResult.density = scalar_type(1) / (scalar_type(2) * sinTheta * numbers::pi * numbers::pi); + return warpResult; } + template ) + static float32_t2 warp2(const DomainT uv) + { + const scalar_type phi = scalar_type(2) * uv.x * numbers::pi - numbers::pi; + const scalar_type theta = uv.y * numbers::pi; + return float32_t2(phi, theta); + } + template ) static domain_type inverseWarp(const CodomainT v) { - float32_t2 uv = float32_t2(atan(v.y, v.x), acos(v.z)); - uv.x *= (numbers::inv_pi * 0.5); - if (v.y < 0.0f) - uv.x += 1.0f; - uv.y *= numbers::inv_pi; - return uv; + const scalar_type phi = atan2(v.z, v.x); + const scalar_type theta = acos(v.y); + scalar_type uv_x = phi * scalar_type(0.5) * numbers::inv_pi; + if (uv_x < scalar_type(0)) + uv_x += scalar_type(1); + scalar_type uv_y = theta * numbers::inv_pi; + return domain_type(uv_x, uv_y); } template ) - static float32_t forwardDensity(const DomainT uv) + static scalar_type forwardDensity(const DomainT uv) { - const float32_t theta = uv.y * numbers::pi; - return 1.0f / (sin(theta) * 2 * numbers::pi * numbers::pi); + const scalar_type theta = uv.y * numbers::pi; + return scalar_type(1) / (sin(theta) * scalar_type(2) * numbers::pi * numbers::pi); } template ) - static float32_t backwardDensity(const CodomainT dst) + static scalar_type backwardDensity(const CodomainT dst) { - return 1.0f / (sqrt(1.0f - dst.z * dst.z) * 2 * numbers::pi * numbers::pi); + const scalar_type cosTheta = dst.y; + const scalar_type sinTheta = sqrt(scalar_type(1) - (cosTheta * cosTheta)); + return scalar_type(1) / (sinTheta * scalar_type(2) * numbers::pi * numbers::pi); } }; From a51848cc46ac337382ab902f29b68bc55284fa03 Mon Sep 17 00:00:00 2001 From: kevyuu Date: Wed, 18 Feb 2026 23:25:17 +0700 Subject: [PATCH 33/69] Refactor CEnvmapImportanceSampling to block and calculate avgLuma --- .../CEnvmapImportanceSampling.h | 19 +- .../CEnvmapImportanceSampling.cpp | 223 ++++++++++++++++-- 2 files changed, 223 insertions(+), 19 deletions(-) diff --git a/include/nbl/ext/EnvmapImportanceSampling/CEnvmapImportanceSampling.h b/include/nbl/ext/EnvmapImportanceSampling/CEnvmapImportanceSampling.h index b493e88b4d..32f32dfd10 100644 --- a/include/nbl/ext/EnvmapImportanceSampling/CEnvmapImportanceSampling.h +++ b/include/nbl/ext/EnvmapImportanceSampling/CEnvmapImportanceSampling.h @@ -69,7 +69,7 @@ class EnvmapImportanceSampling final : public core::IReferenceCounted static core::smart_refctd_ptr createWarpMap(video::ILogicalDevice* device, asset::VkExtent3D extent, std::string_view debugName = ""); - void computeWarpMap(video::IGPUCommandBuffer* cmdBuf); + void computeWarpMap(video::IQueue* queue); // use this to synchronize warp map after computeWarpMap call nbl::video::IGPUCommandBuffer::SPipelineBarrierDependencyInfo::image_barrier_t getWarpMapBarrier( @@ -77,16 +77,27 @@ class EnvmapImportanceSampling final : public core::IReferenceCounted core::bitflag dstAccessMask, nbl::video::IGPUImage::LAYOUT oldLayout); - inline core::smart_refctd_ptr getLumaMapView() + // use this to synchronize luma map after computeWarpMap call + nbl::video::IGPUCommandBuffer::SPipelineBarrierDependencyInfo::image_barrier_t getLumaMapBarrier( + core::bitflag dstStageMask, + core::bitflag dstAccessMask, + nbl::video::IGPUImage::LAYOUT oldLayout); + + inline core::smart_refctd_ptr getLumaMapView() const { return m_lumaMap; } - inline core::smart_refctd_ptr getWarpMapView() + inline core::smart_refctd_ptr getWarpMapView() const { return m_warpMap; } + inline hlsl::float32_t getAvgLuma() const + { + return m_avgLuma; + } + protected: struct ConstructorParams { @@ -122,6 +133,8 @@ class EnvmapImportanceSampling final : public core::IReferenceCounted hlsl::uint32_t2 m_lumaWorkgroupCount; hlsl::uint32_t2 m_warpWorkgroupCount; + hlsl::float32_t m_avgLuma; + core::smart_refctd_ptr m_lumaMap; core::smart_refctd_ptr m_warpMap; diff --git a/src/nbl/ext/EnvmapImportanceSampling/CEnvmapImportanceSampling.cpp b/src/nbl/ext/EnvmapImportanceSampling/CEnvmapImportanceSampling.cpp index bfe25b625e..889795e705 100644 --- a/src/nbl/ext/EnvmapImportanceSampling/CEnvmapImportanceSampling.cpp +++ b/src/nbl/ext/EnvmapImportanceSampling/CEnvmapImportanceSampling.cpp @@ -27,23 +27,26 @@ namespace { const auto mipLevels = image->getCreationParameters().mipLevels; const auto extent = image->getCreationParameters().extent; - for (uint32_t mip_i = 1; mip_i < mipLevels; mip_i++) + for (uint32_t srcMip_i = 0; srcMip_i < mipLevels-1; srcMip_i++) { const IGPUCommandBuffer::SImageBlit blit = { .srcMinCoord = {0, 0, 0}, - .srcMaxCoord = {extent.width >> (mip_i - 1), extent.height >> (mip_i - 1), 1}, + .srcMaxCoord = {extent.width >> (srcMip_i), extent.height >> (srcMip_i), 1}, .dstMinCoord = {0, 0, 0}, - .dstMaxCoord = {extent.width >> mip_i, extent.height >> mip_i, 1}, + .dstMaxCoord = {extent.width >> srcMip_i + 1, extent.height >> srcMip_i + 1, 1}, .layerCount = 1, .srcBaseLayer = 0, .dstBaseLayer = 0, - .srcMipLevel = mip_i - 1, - .dstMipLevel = mip_i, + .srcMipLevel = srcMip_i, + .dstMipLevel = srcMip_i + 1, .aspectMask = IGPUImage::E_ASPECT_FLAGS::EAF_COLOR_BIT, }; cmdBuf->blitImage(image, IImage::LAYOUT::TRANSFER_SRC_OPTIMAL, image, IImage::LAYOUT::TRANSFER_DST_OPTIMAL, { &blit, 1 }, IGPUSampler::E_TEXTURE_FILTER::ETF_LINEAR); + // last mip no need to transition + if (srcMip_i + 1 == mipLevels - 1) break; + IGPUCommandBuffer::SPipelineBarrierDependencyInfo::image_barrier_t barrier = { .barrier = { .dep = { @@ -56,7 +59,7 @@ namespace .image = image, .subresourceRange = { .aspectMask = IImage::EAF_COLOR_BIT, - .baseMipLevel = mip_i, + .baseMipLevel = srcMip_i + 1, .levelCount = 1, .baseArrayLayer = 0u, .layerCount = 1u @@ -370,7 +373,7 @@ core::smart_refctd_ptr < video::IGPUPipelineLayout> EnvmapImportanceSampling::cr }; const auto setLayout = device->createDescriptorSetLayout(bindings); - return device->createPipelineLayout({ &pcRange, 1 }, setLayout, nullptr, nullptr, nullptr); + return device->createPipelineLayout({ &pcRange, 1 }, setLayout); } @@ -397,8 +400,36 @@ core::smart_refctd_ptr EnvmapImportanceSampling::crea return device->createPipelineLayout({}, setLayout, nullptr, nullptr, nullptr); } -void EnvmapImportanceSampling::computeWarpMap(video::IGPUCommandBuffer* cmdBuf) +void EnvmapImportanceSampling::computeWarpMap(video::IQueue* queue) { + const auto logicalDevice = m_cachedCreationParams.utilities->getLogicalDevice(); + + core::smart_refctd_ptr cmdBuf; + { + // commandbuffer should refcount the pool, so it should be 100% legal to drop at the end of the scope + auto gpuCommandPool = logicalDevice->createCommandPool(queue->getFamilyIndex(),IGPUCommandPool::CREATE_FLAGS::TRANSIENT_BIT); + if (!gpuCommandPool) + { + if (auto* logger = logicalDevice->getLogger()) + logger->log("Compute Warpmap: failed to create command pool.", system::ILogger::ELL_ERROR); + return; + } + gpuCommandPool->createCommandBuffers(IGPUCommandPool::BUFFER_LEVEL::PRIMARY, 1u, &cmdBuf); + if (!cmdBuf) + { + if (auto* logger = logicalDevice->getLogger()) + logger->log("Compute Warpmap: failed to create command buffer.", system::ILogger::ELL_ERROR); + return; + } + } + + if (!cmdBuf->begin(IGPUCommandBuffer::USAGE::ONE_TIME_SUBMIT_BIT)) + { + if (auto* logger = logicalDevice->getLogger()) + logger->log("Compute Warpmap: failed to begin command buffer.", system::ILogger::ELL_ERROR); + return; + } + const auto lumaMapImage = m_lumaMap->getCreationParameters().image.get(); const auto lumaMapMipLevels = lumaMapImage->getCreationParameters().mipLevels; const auto lumaMapExtent = lumaMapImage->getCreationParameters().extent; @@ -434,7 +465,7 @@ void EnvmapImportanceSampling::computeWarpMap(video::IGPUCommandBuffer* cmdBuf) // Gen Luma Map { SLumaGenPushConstants pcData = {}; - pcData.luminanceScales = { 0.2126729f, 0.7151522f, 0.0721750f, 0.0f }; + pcData.luminanceScales = { 0.2126729f, 0.7151522f, 0.0721750f }; pcData.lumaMapResolution = {lumaMapExtent.width, lumaMapExtent.height}; cmdBuf->bindComputePipeline(m_genLumaPipeline.get()); @@ -490,7 +521,67 @@ void EnvmapImportanceSampling::computeWarpMap(video::IGPUCommandBuffer* cmdBuf) } }; cmdBuf->pipelineBarrier(E_DEPENDENCY_FLAGS::EDF_NONE, { .imgBarriers = barriers }); - generateMipmap(cmdBuf, lumaMapImage); + generateMipmap(cmdBuf.get(), lumaMapImage); + } + + core::smart_refctd_ptr lumaTexelBuffer; + const auto lumaMapLastMip = lumaMapMipLevels - 1; + const auto lumaMapLastMipExtent = lumaMapImage->getMipSize(lumaMapLastMip); + const auto lumaMapLastTexelCount = lumaMapLastMipExtent.x * lumaMapLastMipExtent.y * lumaMapLastMipExtent.z; + { + IGPUImage::SBufferCopy region = {}; + region.imageSubresource.aspectMask = IImage::EAF_COLOR_BIT; + region.imageSubresource.mipLevel = lumaMapLastMip; + region.imageSubresource.baseArrayLayer = 0; + region.imageSubresource.layerCount = 1; + region.imageExtent = { lumaMapLastMipExtent.x, lumaMapLastMipExtent.y, lumaMapLastMipExtent.z }; + + IGPUBuffer::SCreationParams bufferCreationParams = {}; + bufferCreationParams.size = lumaMapLastTexelCount * getTexelOrBlockBytesize(EF_R32_SFLOAT); + bufferCreationParams.usage = IGPUBuffer::EUF_TRANSFER_DST_BIT; + lumaTexelBuffer = logicalDevice->createBuffer(std::move(bufferCreationParams)); + if (!lumaTexelBuffer) + { + if (auto* logger = logicalDevice->getLogger()) + logger->log("ScreenShot: failed to create GPU texel buffer.", system::ILogger::ELL_ERROR); + return; + } + auto gpuTexelBufferMemReqs = lumaTexelBuffer->getMemoryReqs(); + gpuTexelBufferMemReqs.memoryTypeBits &= logicalDevice->getPhysicalDevice()->getDownStreamingMemoryTypeBits(); + if (!gpuTexelBufferMemReqs.memoryTypeBits) + { + if (auto* logger = logicalDevice->getLogger()) + logger->log("ScreenShot: no down-streaming memory type for texel buffer.", system::ILogger::ELL_ERROR); + return; + } + auto gpuTexelBufferMem = logicalDevice->allocate(gpuTexelBufferMemReqs, lumaTexelBuffer.get()); + if (!gpuTexelBufferMem.isValid()) + { + if (auto* logger = logicalDevice->getLogger()) + logger->log("ScreenShot: failed to allocate texel buffer memory.", system::ILogger::ELL_ERROR); + return; + } + + IGPUCommandBuffer::SPipelineBarrierDependencyInfo info = {}; + decltype(info)::image_barrier_t barrier = {}; + info.imgBarriers = { &barrier, &barrier + 1 }; + + { + barrier.barrier.dep.srcStageMask = PIPELINE_STAGE_FLAGS::BLIT_BIT; + barrier.barrier.dep.srcAccessMask = ACCESS_FLAGS::TRANSFER_WRITE_BIT; + barrier.barrier.dep.dstStageMask = PIPELINE_STAGE_FLAGS::COPY_BIT; + barrier.barrier.dep.dstAccessMask = ACCESS_FLAGS::TRANSFER_READ_BIT; + barrier.oldLayout = IImage::LAYOUT::TRANSFER_DST_OPTIMAL; + barrier.newLayout = IImage::LAYOUT::TRANSFER_SRC_OPTIMAL; + barrier.image = lumaMapImage; + barrier.subresourceRange.aspectMask = IImage::EAF_COLOR_BIT; + barrier.subresourceRange.baseMipLevel = lumaMapMipLevels - 1; + barrier.subresourceRange.levelCount = 1u; + barrier.subresourceRange.baseArrayLayer = 0; + barrier.subresourceRange.layerCount = 1; + cmdBuf->pipelineBarrier(EDF_NONE,info); + } + cmdBuf->copyImageToBuffer(lumaMapImage,IImage::LAYOUT::TRANSFER_SRC_OPTIMAL,lumaTexelBuffer.get(),1,®ion); } { @@ -500,7 +591,7 @@ void EnvmapImportanceSampling::computeWarpMap(video::IGPUCommandBuffer* cmdBuf) .dep = { .srcStageMask = PIPELINE_STAGE_FLAGS::BLIT_BIT, .srcAccessMask = ACCESS_FLAGS::TRANSFER_READ_BIT, - .dstStageMask = PIPELINE_STAGE_FLAGS::COMPUTE_SHADER_BIT, + .dstStageMask = PIPELINE_STAGE_FLAGS::ALL_COMMANDS_BITS, .dstAccessMask = ACCESS_FLAGS::SHADER_READ_BITS } }, @@ -518,10 +609,10 @@ void EnvmapImportanceSampling::computeWarpMap(video::IGPUCommandBuffer* cmdBuf) { .barrier = { .dep = { - .srcStageMask = PIPELINE_STAGE_FLAGS::BLIT_BIT, - .srcAccessMask = ACCESS_FLAGS::TRANSFER_WRITE_BIT, - .dstStageMask = PIPELINE_STAGE_FLAGS::COMPUTE_SHADER_BIT, - .dstAccessMask = ACCESS_FLAGS::SHADER_WRITE_BITS + .srcStageMask = PIPELINE_STAGE_FLAGS::COPY_BIT, + .srcAccessMask = ACCESS_FLAGS::TRANSFER_READ_BIT, + .dstStageMask = PIPELINE_STAGE_FLAGS::ALL_COMMANDS_BITS, + .dstAccessMask = ACCESS_FLAGS::SHADER_READ_BITS } }, .image = lumaMapImage, @@ -532,7 +623,7 @@ void EnvmapImportanceSampling::computeWarpMap(video::IGPUCommandBuffer* cmdBuf) .baseArrayLayer = 0u, .layerCount = 1u }, - .oldLayout = IImage::LAYOUT::TRANSFER_DST_OPTIMAL, + .oldLayout = IImage::LAYOUT::TRANSFER_SRC_OPTIMAL, .newLayout = IImage::LAYOUT::READ_ONLY_OPTIMAL, }, { @@ -563,6 +654,77 @@ void EnvmapImportanceSampling::computeWarpMap(video::IGPUCommandBuffer* cmdBuf) cmdBuf->dispatch(m_warpWorkgroupCount.x, m_warpWorkgroupCount.y, 1); } + { + IGPUCommandBuffer::SPipelineBarrierDependencyInfo::image_barrier_t barriers[] = { + { + .barrier = { + .dep = { + .srcStageMask = PIPELINE_STAGE_FLAGS::COMPUTE_SHADER_BIT, + .srcAccessMask = ACCESS_FLAGS::SHADER_WRITE_BITS, + .dstStageMask = PIPELINE_STAGE_FLAGS::ALL_COMMANDS_BITS, + .dstAccessMask = ACCESS_FLAGS::SHADER_READ_BITS + } + }, + .image = warpMapImage, + .subresourceRange = { + .aspectMask = IImage::EAF_COLOR_BIT, + .baseMipLevel = 0, + .levelCount = 1, + .baseArrayLayer = 0u, + .layerCount = 1u + }, + .oldLayout = IImage::LAYOUT::GENERAL, + .newLayout = IImage::LAYOUT::READ_ONLY_OPTIMAL, + } + }; + cmdBuf->pipelineBarrier(E_DEPENDENCY_FLAGS::EDF_NONE, { .imgBarriers = barriers }); + } + + if (!cmdBuf->end()) + { + if (auto* logger = logicalDevice->getLogger()) + logger->log("ScreenShot: failed to end command buffer.", system::ILogger::ELL_ERROR); + return; + } + + { + auto signalSemaphore = logicalDevice->createSemaphore(0); + + IQueue::SSubmitInfo info; + IQueue::SSubmitInfo::SCommandBufferInfo cmdBufferInfo{ cmdBuf.get() }; + IQueue::SSubmitInfo::SSemaphoreInfo signalSemaphoreInfo; + signalSemaphoreInfo.semaphore = signalSemaphore.get(); + signalSemaphoreInfo.value = 1; + signalSemaphoreInfo.stageMask = PIPELINE_STAGE_FLAGS::ALL_COMMANDS_BITS; + info.commandBuffers = { &cmdBufferInfo, &cmdBufferInfo + 1 }; + info.signalSemaphores = { &signalSemaphoreInfo, &signalSemaphoreInfo + 1 }; + + if (auto* logger = logicalDevice->getLogger()) + logger->log("Compute Warpmap: submitting copy command buffer.", system::ILogger::ELL_INFO); + if (queue->submit({ &info, &info + 1}) != IQueue::RESULT::SUCCESS) + { + if (auto* logger = logicalDevice->getLogger()) + logger->log("Compute Warpmap: failed to submit copy command buffer.", system::ILogger::ELL_ERROR); + return; + } + + ISemaphore::SWaitInfo waitInfo{ signalSemaphore.get(), 1u}; + + if (auto* logger = logicalDevice->getLogger()) + logger->log("Compute Warpmap: waiting for copy completion.", system::ILogger::ELL_INFO); + if (logicalDevice->blockForSemaphores({&waitInfo, &waitInfo + 1}) != ISemaphore::WAIT_RESULT::SUCCESS) + { + if (auto* logger = logicalDevice->getLogger()) + logger->log("Compute Warpmap: failed to wait for copy completion.", system::ILogger::ELL_ERROR); + return; + } + + auto* allocation = lumaTexelBuffer->getBoundMemory().memory; + const IDeviceMemoryAllocation::MemoryRange range = { 0u, lumaTexelBuffer->getSize() }; + auto* ptr = reinterpret_cast(allocation->map(range, IDeviceMemoryAllocation::EMCAF_READ)); + + m_avgLuma = std::reduce(ptr, ptr + lumaMapLastTexelCount) / float32_t(lumaMapLastTexelCount); + } } nbl::video::IGPUCommandBuffer::SPipelineBarrierDependencyInfo::image_barrier_t EnvmapImportanceSampling::getWarpMapBarrier( @@ -593,4 +755,33 @@ nbl::video::IGPUCommandBuffer::SPipelineBarrierDependencyInfo::image_barrier_t E }; } +nbl::video::IGPUCommandBuffer::SPipelineBarrierDependencyInfo::image_barrier_t EnvmapImportanceSampling::getLumaMapBarrier( + core::bitflag dstStageMask, + core::bitflag dstAccessMask, + nbl::video::IGPUImage::LAYOUT newLayout) +{ + const auto lumaMapImage = m_lumaMap->getCreationParameters().image.get(); + return { + .barrier = { + .dep = { + .srcStageMask = PIPELINE_STAGE_FLAGS::COMPUTE_SHADER_BIT, + .srcAccessMask = ACCESS_FLAGS::SHADER_READ_BITS, + .dstStageMask = dstStageMask, + .dstAccessMask = dstAccessMask + } + }, + .image = lumaMapImage, + .subresourceRange = { + .aspectMask = IImage::EAF_COLOR_BIT, + .baseMipLevel = 0, + .levelCount = 1, + .baseArrayLayer = 0u, + .layerCount = 1u + }, + .oldLayout = IImage::LAYOUT::READ_ONLY_OPTIMAL, + .newLayout = newLayout, + }; +} + + } From fa94ac22eec5877beba32163b116ee0cc2167310 Mon Sep 17 00:00:00 2001 From: kevyuu Date: Thu, 19 Feb 2026 11:45:41 +0700 Subject: [PATCH 34/69] Fix warp concept and add density type to warp concept --- include/nbl/builtin/hlsl/sampling/warp.hlsl | 10 +++- .../hlsl/sampling/warps/spherical.hlsl | 54 ++++++++----------- 2 files changed, 31 insertions(+), 33 deletions(-) diff --git a/include/nbl/builtin/hlsl/sampling/warp.hlsl b/include/nbl/builtin/hlsl/sampling/warp.hlsl index b1c1fcb5b2..37c1800f51 100644 --- a/include/nbl/builtin/hlsl/sampling/warp.hlsl +++ b/include/nbl/builtin/hlsl/sampling/warp.hlsl @@ -1,6 +1,7 @@ #ifndef _NBL_BUILTIN_HLSL_SAMPLING_CONCEPTS_WARP_INCLUDED_ #define _NBL_BUILTIN_HLSL_SAMPLING_CONCEPTS_WARP_INCLUDED_ +#include namespace nbl { @@ -9,11 +10,11 @@ namespace hlsl namespace sampling { -template +template struct WarpResult { CodomainT dst; - float32_t density; + DensityT density; }; } @@ -35,6 +36,11 @@ NBL_CONCEPT_BEGIN(3) #define dst NBL_CONCEPT_PARAM_T NBL_CONCEPT_PARAM_2 NBL_CONCEPT_END( ((NBL_CONCEPT_REQ_TYPE)(U::domain_type)) + ((NBL_CONCEPT_REQ_TYPE)(U::codomain_type)) + ((NBL_CONCEPT_REQ_TYPE)(U::density_type)) + ((NBL_CONCEPT_REQ_EXPR_RET_TYPE)((warper.template warp(xi)) , ::nbl::hlsl::is_same_v, sampling::WarpResult)) + ((NBL_CONCEPT_REQ_EXPR_RET_TYPE)((warper.template forwardDensity(xi)) , ::nbl::hlsl::is_same_v, typename U::density_type)) + ((NBL_CONCEPT_REQ_EXPR_RET_TYPE)((warper.template backwardDensity(dst)) , ::nbl::hlsl::is_same_v, typename U::density_type)) ); #undef dst #undef xi diff --git a/include/nbl/builtin/hlsl/sampling/warps/spherical.hlsl b/include/nbl/builtin/hlsl/sampling/warps/spherical.hlsl index 53f2be401d..6094befe45 100644 --- a/include/nbl/builtin/hlsl/sampling/warps/spherical.hlsl +++ b/include/nbl/builtin/hlsl/sampling/warps/spherical.hlsl @@ -17,66 +17,58 @@ namespace warp template struct Spherical { - using scalar_type = T; - using domain_type = vector; - using codomain_type = vector; + using density_type = T; + using domain_type = vector; + using codomain_type = vector; template ) static WarpResult warp(const DomainT uv) { codomain_type dir; - dir.x = cos(uv.x * scalar_type(2) * numbers::pi); - dir.z = sqrt(scalar_type(1) - (dir.x * dir.x)); - if (uv.x > scalar_type(0.5)) + dir.x = cos(uv.x * density_type(2) * numbers::pi); + dir.z = sqrt(density_type(1) - (dir.x * dir.x)); + if (uv.x > density_type(0.5)) dir.z = -dir.z; - const scalar_type theta = uv.y * numbers::pi; - const scalar_type cosTheta = cos(theta); - const scalar_type sinTheta = sqrt(scalar_type(1) - (cosTheta * cosTheta)); + const density_type theta = uv.y * numbers::pi; + const density_type cosTheta = cos(theta); + const density_type sinTheta = sqrt(density_type(1) - (cosTheta * cosTheta)); dir.xz *= sinTheta; dir.y = cosTheta; WarpResult warpResult; warpResult.dst = dir; - warpResult.density = scalar_type(1) / (scalar_type(2) * sinTheta * numbers::pi * numbers::pi); + warpResult.density = density_type(1) / (density_type(2) * sinTheta * numbers::pi * numbers::pi); return warpResult; } - template ) - static float32_t2 warp2(const DomainT uv) - { - const scalar_type phi = scalar_type(2) * uv.x * numbers::pi - numbers::pi; - const scalar_type theta = uv.y * numbers::pi; - return float32_t2(phi, theta); - } - template ) static domain_type inverseWarp(const CodomainT v) { - const scalar_type phi = atan2(v.z, v.x); - const scalar_type theta = acos(v.y); - scalar_type uv_x = phi * scalar_type(0.5) * numbers::inv_pi; - if (uv_x < scalar_type(0)) - uv_x += scalar_type(1); - scalar_type uv_y = theta * numbers::inv_pi; + const density_type phi = atan2(v.z, v.x); + const density_type theta = acos(v.y); + density_type uv_x = phi * density_type(0.5) * numbers::inv_pi; + if (uv_x < density_type(0)) + uv_x += density_type(1); + density_type uv_y = theta * numbers::inv_pi; return domain_type(uv_x, uv_y); } template ) - static scalar_type forwardDensity(const DomainT uv) + static density_type forwardDensity(const DomainT uv) { - const scalar_type theta = uv.y * numbers::pi; - return scalar_type(1) / (sin(theta) * scalar_type(2) * numbers::pi * numbers::pi); + const density_type theta = uv.y * numbers::pi; + return density_type(1) / (sin(theta) * density_type(2) * numbers::pi * numbers::pi); } template ) - static scalar_type backwardDensity(const CodomainT dst) + static density_type backwardDensity(const CodomainT dst) { - const scalar_type cosTheta = dst.y; - const scalar_type sinTheta = sqrt(scalar_type(1) - (cosTheta * cosTheta)); - return scalar_type(1) / (sinTheta * scalar_type(2) * numbers::pi * numbers::pi); + const density_type cosTheta = dst.y; + const density_type sinTheta = sqrt(density_type(1) - (cosTheta * cosTheta)); + return density_type(1) / (sinTheta * density_type(2) * numbers::pi * numbers::pi); } }; From 849412416e2ffa4260374eaa80c6462d0e226dac Mon Sep 17 00:00:00 2001 From: kevyuu Date: Thu, 19 Feb 2026 11:46:22 +0700 Subject: [PATCH 35/69] Rename luminanceScale to lumaRGBCoefficients --- .../builtin/hlsl/common.hlsl | 25 +------------------ .../builtin/hlsl/gen_luma.comp.hlsl | 4 +-- 2 files changed, 3 insertions(+), 26 deletions(-) diff --git a/include/nbl/ext/EnvmapImportanceSampling/builtin/hlsl/common.hlsl b/include/nbl/ext/EnvmapImportanceSampling/builtin/hlsl/common.hlsl index e0240909f0..6f37f80206 100644 --- a/include/nbl/ext/EnvmapImportanceSampling/builtin/hlsl/common.hlsl +++ b/include/nbl/ext/EnvmapImportanceSampling/builtin/hlsl/common.hlsl @@ -14,33 +14,10 @@ namespace envmap_importance_sampling struct SLumaGenPushConstants { - float32_t4 luminanceScales; + float32_t3 lumaRGBCoefficients; uint32_t2 lumaMapResolution; }; -struct SLumaMeasurePushConstants -{ - float32_t4 luminanceScales; - uint32_t2 lumaMapResolution; - uint64_t lumaMeasurementBuf; -}; - -struct SLumaMeasurement -{ - float32_t3 weightedDir; - float32_t luma; - float32_t maxLuma; -}; - -struct device_capabilities -{ -#ifdef TEST_NATIVE - NBL_CONSTEXPR_STATIC_INLINE bool shaderSubgroupArithmetic = true; -#else - NBL_CONSTEXPR_STATIC_INLINE bool shaderSubgroupArithmetic = false; -#endif -}; - } } } diff --git a/include/nbl/ext/EnvmapImportanceSampling/builtin/hlsl/gen_luma.comp.hlsl b/include/nbl/ext/EnvmapImportanceSampling/builtin/hlsl/gen_luma.comp.hlsl index 3a039945b4..9d80e60750 100644 --- a/include/nbl/ext/EnvmapImportanceSampling/builtin/hlsl/gen_luma.comp.hlsl +++ b/include/nbl/ext/EnvmapImportanceSampling/builtin/hlsl/gen_luma.comp.hlsl @@ -16,9 +16,9 @@ void main(uint32_t3 threadID : SV_DispatchThreadID) if (all(threadID < pc.lumaMapResolution)) { - const float uv_y = (float(threadID.y) + 0.5) / pc.lumaMapResolution.y; + const float uv_y = (float(threadID.y) + float(0.5f)) / pc.lumaMapResolution.y; const float32_t3 envMapSample = envMap.Load(float32_t3(threadID.xy, 0)); - const float32_t luma = hlsl::dot(float32_t4(envMapSample, 1.0f), pc.luminanceScales) * sin(numbers::pi * uv_y); + const float32_t luma = hlsl::dot(envMapSample, pc.lumaRGBCoefficients) * sin(numbers::pi * uv_y); outImage[threadID.xy] = luma; } From b273d87bcdd41c6a26d2e808b5d679c2cced07de Mon Sep 17 00:00:00 2001 From: kevyuu Date: Thu, 19 Feb 2026 11:46:40 +0700 Subject: [PATCH 36/69] Remove measure_luma.comp.hlsl --- .../builtin/hlsl/measure_luma.comp.hlsl | 143 ------------------ 1 file changed, 143 deletions(-) delete mode 100644 include/nbl/ext/EnvmapImportanceSampling/builtin/hlsl/measure_luma.comp.hlsl diff --git a/include/nbl/ext/EnvmapImportanceSampling/builtin/hlsl/measure_luma.comp.hlsl b/include/nbl/ext/EnvmapImportanceSampling/builtin/hlsl/measure_luma.comp.hlsl deleted file mode 100644 index ffe6477f5a..0000000000 --- a/include/nbl/ext/EnvmapImportanceSampling/builtin/hlsl/measure_luma.comp.hlsl +++ /dev/null @@ -1,143 +0,0 @@ -#include "nbl/builtin/hlsl/sampling/warps/spherical.hlsl" -#include "nbl/builtin/hlsl/workgroup2/arithmetic.hlsl" - -#include "common.hlsl" - -using namespace nbl; -using namespace nbl::hlsl; -using namespace nbl::hlsl::ext::envmap_importance_sampling; - -// TODO(kevinyu): Temporary to make nsc works -using config_t = WORKGROUP_CONFIG_T; - -[[vk::push_constant]] SLumaMeasurePushConstants pc; - -[[vk::binding(0, 0)]] Texture2D lumaMap; - -// final (level 1/2) scan needs to fit in one subgroup exactly -groupshared float32_t scratch[mpl::max_v]; - -struct PreloadedUnitData -{ - float32_t3 weightedDir; - float32_t luma; -}; - -struct ScratchProxy -{ - template - void get(const uint32_t ix, NBL_REF_ARG(AccessType) value) - { - value = scratch[ix]; - } - - template - void set(const uint32_t ix, const AccessType value) - { - scratch[ix] = value; - } - - void workgroupExecutionAndMemoryBarrier() - { - glsl::barrier(); - } -}; - -struct PreloadedData -{ - NBL_CONSTEXPR_STATIC_INLINE uint16_t WorkgroupSize = uint16_t(1u) << config_t::WorkgroupSizeLog2; - NBL_CONSTEXPR_STATIC_INLINE uint16_t PreloadedDataCount = config_t::VirtualWorkgroupSize / WorkgroupSize; - - PreloadedUnitData getData(const uint32_t ix) - { - PreloadedUnitData value; - const int32_t2 pixelCoord = int32_t2(ix % pc.lumaMapResolution.x, ix / pc.lumaMapResolution.x); - const float32_t2 uv = (float32_t2(pixelCoord) + float32_t2(0.5, 0.5)) / float32_t2(pc.lumaMapResolution); - const float32_t luma = lumaMap.Load(int32_t3(pixelCoord, 0)); - value.weightedDir = sampling::warp::Spherical::warp(uv).dst * luma; - value.luma = luma; - return value; - } - - void preload() - { - const uint16_t invocationIndex = hlsl::workgroup::SubgroupContiguousIndex(); - [unroll] - for (uint16_t idx = 0; idx < PreloadedDataCount; idx++) - data[idx] = getData(idx * WorkgroupSize + invocationIndex); - } - - void workgroupExecutionAndMemoryBarrier() - { - glsl::barrier(); - } - - PreloadedUnitData data[config_t::ItemsPerInvocation_0]; -}; - -static PreloadedData preloadData; - -struct DirXAccessor -{ - template - void get(const IndexType ix, NBL_REF_ARG(AccessType) value) - { - value = preloadData.data[ix >> config_t::WorkgroupSizeLog2].weightedDir.x; - } -}; - -struct DirYAccessor -{ - template - void get(const IndexType ix, NBL_REF_ARG(AccessType) value) - { - value = preloadData.data[ix >> config_t::WorkgroupSizeLog2].weightedDir.y; - } -}; - -struct DirZAccessor -{ - template - void get(const IndexType ix, NBL_REF_ARG(AccessType) value) - { - value = preloadData.data[ix >> config_t::WorkgroupSizeLog2].weightedDir.z; - } -}; - -struct LumaAccessor -{ - template - void get(const IndexType ix, NBL_REF_ARG(AccessType) value) - { - value = preloadData.data[ix >> config_t::WorkgroupSizeLog2].luma; - } -}; - -[numthreads(config_t::WorkgroupSize, 1, 1)] -[shader("compute")] -void main(uint32_t localInvocationIndex : SV_GroupIndex, uint32_t3 groupID: SV_GroupID) -{ - ScratchProxy scratchAccessor; - - preloadData.preload(); - preloadData.workgroupExecutionAndMemoryBarrier(); - - SLumaMeasurement measurement; - - DirXAccessor dirXAccessor; - measurement.weightedDir.x= workgroup2::reduction, device_capabilities>::template __call(dirXAccessor, scratchAccessor); - - DirYAccessor dirYAccessor; - measurement.weightedDir.y = workgroup2::reduction, device_capabilities>::template __call(dirYAccessor, scratchAccessor); - - DirZAccessor dirZAccessor; - measurement.weightedDir.z = workgroup2::reduction, device_capabilities>::template __call(dirZAccessor, scratchAccessor); - - LumaAccessor lumaAccessor; - measurement.luma = workgroup2::reduction, device_capabilities>::template __call(lumaAccessor, scratchAccessor); - - measurement.maxLuma = workgroup2::reduction, device_capabilities>::template __call(lumaAccessor, scratchAccessor); - - if (localInvocationIndex == 0) - vk::RawBufferStore(pc.lumaMeasurementBuf + (groupID.x * sizeof(SLumaMeasurement)), measurement); -} From 3bc0e57f258063e7c1c67552c7d506cacb63711b Mon Sep 17 00:00:00 2001 From: kevyuu Date: Thu, 19 Feb 2026 11:47:41 +0700 Subject: [PATCH 37/69] Fix some bug in hierarchical_image.hlsl --- .../hlsl/sampling/hierarchical_image.hlsl | 76 +++++++++++++------ 1 file changed, 52 insertions(+), 24 deletions(-) diff --git a/include/nbl/builtin/hlsl/sampling/hierarchical_image.hlsl b/include/nbl/builtin/hlsl/sampling/hierarchical_image.hlsl index 82bde0cdfa..10d3fad6f2 100644 --- a/include/nbl/builtin/hlsl/sampling/hierarchical_image.hlsl +++ b/include/nbl/builtin/hlsl/sampling/hierarchical_image.hlsl @@ -17,10 +17,14 @@ namespace hlsl namespace sampling { -template && hierarchical_image::LuminanceReadAccessor) +template && + hierarchical_image::LuminanceReadAccessor + ) struct LuminanceMapSampler { - using scalar_type = T; + using scalar_type = ScalarT; using vector2_type = vector; using vector4_type = vector; @@ -29,9 +33,9 @@ struct LuminanceMapSampler uint32_t2 _lastWarpPixel; bool _aspect2x1; - static LuminanceMapSampler create(NBL_CONST_REF_ARG(LuminanceAccessorT) lumaMap, uint32_t2 mapSize, bool aspect2x1, uint32_t2 warpSize) + static LuminanceMapSampler create(NBL_CONST_REF_ARG(LuminanceAccessorT) lumaMap, uint32_t2 mapSize, bool aspect2x1, uint32_t2 warpSize) { - LuminanceMapSampler result; + LuminanceMapSampler result; result._map = lumaMap; result._mapSize = mapSize; result._lastWarpPixel = warpSize - uint32_t2(1, 1); @@ -42,9 +46,9 @@ struct LuminanceMapSampler static bool choseSecond(scalar_type first, scalar_type second, NBL_REF_ARG(scalar_type) xi) { // numerical resilience against IEEE754 - scalar_type dummy = 0.0f; + scalar_type dummy = scalar_type(0); PartitionRandVariable partition; - partition.leftProb = 1.0f / (1.0f + (second / first)); + partition.leftProb = scalar_type(1) / (scalar_type(1) + (second / first)); return partition(xi, dummy); } @@ -105,32 +109,56 @@ struct LuminanceMapSampler } }; -template && hierarchical_image::HierarchicalSampler && concepts::Warp) +template && + concepts::accessors::GenericReadAccessor && + hierarchical_image::HierarchicalSampler && + concepts::Warp) struct HierarchicalImage { - using scalar_type = T; - using vector2_type = vector; - using vector3_type = vector; - using vector4_type = vector; - HierarchicalSamplerT sampler; + using scalar_type = ScalarT; + using vector2_type = vector; + using vector3_type = vector; + using vector4_type = vector; + LuminanceAccessorT lumaMap; + HierarchicalSamplerT warpMap; uint32_t2 warpSize; uint32_t2 lastWarpPixel; + scalar_type invAvgLuma; - static HierarchicalImage create(NBL_CONST_REF_ARG(HierarchicalSamplerT) sampler, uint32_t2 warpSize) + static HierarchicalImage create(NBL_CONST_REF_ARG(LuminanceAccessorT) lumaMap, NBL_CONST_REF_ARG(HierarchicalSamplerT) warpMap, uint32_t2 warpSize, scalar_type avgLuma) { - HierarchicalImage result; - result.sampler = sampler; + HierarchicalImage result; + result.lumaMap = lumaMap; + result.warpMap = warpMap; result.warpSize = warpSize; result.lastWarpPixel = warpSize - uint32_t2(1, 1); + result.invAvgLuma = ScalarT(1.0) / avgLuma; return result; } - uint32_t2 generate(NBL_REF_ARG(scalar_type) rcpPdf, vector2_type xi) NBL_CONST_MEMBER_FUNC + vector2_type inverseWarp_and_deferredPdf(NBL_REF_ARG(scalar_type) pdf, vector3_type direction) NBL_CONST_MEMBER_FUNC + { + vector2_type envmapUv = PostWarpT::inverseWarp(direction); + scalar_type luma; + lumaMap.get(envmapUv, luma); + pdf = (luma * invAvgLuma) * PostWarpT::backwardDensity(direction); + return envmapUv; + } + + scalar_type deferredPdf(vector3_type direction) NBL_CONST_MEMBER_FUNC + { + vector2_type envmapUv = PostWarpT::inverseWarp(direction); + scalar_type luma; + lumaMap.get(envmapUv, luma); + return luma * invAvgLuma * PostWarpT::backwardDensity(direction); + } + + vector3_type generate_and_pdf(NBL_REF_ARG(scalar_type) pdf, NBL_REF_ARG(vector2_type) uv, vector2_type xi) NBL_CONST_MEMBER_FUNC { - const vector2_type texelCoord = xi * lastWarpPixel; - const vector2_type sampleCoord = (texelCoord + vector2_type(0.5f, 0.5f)) / vector2_type(warpSize.x, warpSize.y); + const vector2_type texelCoord = xi * float32_t2(lastWarpPixel); - matrix uvs = sampler.sampleUvs(sampleCoord); + matrix uvs = warpMap.sampleUvs(uint32_t2(texelCoord)); const vector2_type interpolant = frac(texelCoord); @@ -143,16 +171,16 @@ struct HierarchicalImage xDiffs[1] * interpolant.x + uvs[0] }; const vector2_type yDiff = yVals[1] - yVals[0]; - const vector2_type uv = yDiff * interpolant.y + yVals[0]; + uv = yDiff * interpolant.y + yVals[0]; - const WarpResult warpResult = PostWarpT::warp(uv); + const WarpResult warpResult = PostWarpT::warp(uv); - const scalar_type detInterpolJacobian = determinant(matrix( + const scalar_type detInterpolJacobian = determinant(transpose(matrix( lerp(xDiffs[0], xDiffs[1], interpolant.y), // first column dFdx yDiff // second column dFdy - )); + ))) * lastWarpPixel.x * lastWarpPixel.y; - rcpPdf = abs((detInterpolJacobian * scalar_type(lastWarpPixel.x * lastWarpPixel.y)) / warpResult.density); + pdf = abs(warpResult.density / detInterpolJacobian); return warpResult.dst; } From 1dadf9235198c6ad1d910366a000e582f21e5606 Mon Sep 17 00:00:00 2001 From: kevyuu Date: Thu, 19 Feb 2026 11:47:59 +0700 Subject: [PATCH 38/69] Rename luminanceScales to lumaRGBCoefficients --- .../ext/EnvmapImportanceSampling/CEnvmapImportanceSampling.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/nbl/ext/EnvmapImportanceSampling/CEnvmapImportanceSampling.cpp b/src/nbl/ext/EnvmapImportanceSampling/CEnvmapImportanceSampling.cpp index 889795e705..1fdd7cc29e 100644 --- a/src/nbl/ext/EnvmapImportanceSampling/CEnvmapImportanceSampling.cpp +++ b/src/nbl/ext/EnvmapImportanceSampling/CEnvmapImportanceSampling.cpp @@ -465,7 +465,7 @@ void EnvmapImportanceSampling::computeWarpMap(video::IQueue* queue) // Gen Luma Map { SLumaGenPushConstants pcData = {}; - pcData.luminanceScales = { 0.2126729f, 0.7151522f, 0.0721750f }; + pcData.lumaRGBCoefficients = { 0.2126729f, 0.7151522f, 0.0721750f }; pcData.lumaMapResolution = {lumaMapExtent.width, lumaMapExtent.height}; cmdBuf->bindComputePipeline(m_genLumaPipeline.get()); From f19cbe9952214d3fca3ec26067d0963849eaa3c6 Mon Sep 17 00:00:00 2001 From: kevyuu Date: Sat, 21 Feb 2026 13:52:21 +0700 Subject: [PATCH 39/69] Move EnvmapImportanceSampling from ext to core --- .../CEnvmapImportanceSampling.h | 150 ---- .../EnvmapImportanceSampling.h | 92 -- .../builtin/hlsl/common.hlsl | 26 - .../builtin/hlsl/gen_luma.comp.hlsl | 25 - .../builtin/hlsl/gen_warp.comp.hlsl | 48 -- src/nbl/CMakeLists.txt | 2 + src/nbl/builtin/CMakeLists.txt | 11 +- src/nbl/ext/CMakeLists.txt | 12 - .../CEnvmapImportanceSampling.cpp | 787 ------------------ .../EnvmapImportanceSampling/CMakeLists.txt | 52 -- .../EnvmapImportanceSampling.cpp | 426 ---------- 11 files changed, 9 insertions(+), 1622 deletions(-) delete mode 100644 include/nbl/ext/EnvmapImportanceSampling/CEnvmapImportanceSampling.h delete mode 100644 include/nbl/ext/EnvmapImportanceSampling/EnvmapImportanceSampling.h delete mode 100644 include/nbl/ext/EnvmapImportanceSampling/builtin/hlsl/common.hlsl delete mode 100644 include/nbl/ext/EnvmapImportanceSampling/builtin/hlsl/gen_luma.comp.hlsl delete mode 100644 include/nbl/ext/EnvmapImportanceSampling/builtin/hlsl/gen_warp.comp.hlsl delete mode 100644 src/nbl/ext/EnvmapImportanceSampling/CEnvmapImportanceSampling.cpp delete mode 100644 src/nbl/ext/EnvmapImportanceSampling/CMakeLists.txt delete mode 100644 src/nbl/ext/EnvmapImportanceSampling/EnvmapImportanceSampling.cpp diff --git a/include/nbl/ext/EnvmapImportanceSampling/CEnvmapImportanceSampling.h b/include/nbl/ext/EnvmapImportanceSampling/CEnvmapImportanceSampling.h deleted file mode 100644 index 32f32dfd10..0000000000 --- a/include/nbl/ext/EnvmapImportanceSampling/CEnvmapImportanceSampling.h +++ /dev/null @@ -1,150 +0,0 @@ -#ifndef _NBL_EXT_ENVMAP_IMPORTANCE_SAMPLING_INCLUDED_ -#define _NBL_EXT_ENVMAP_IMPORTANCE_SAMPLING_INCLUDED_ - -#include "nbl/asset/IPipelineLayout.h" -#include "nbl/video/declarations.h" -#include "nbl/builtin/hlsl/workgroup2/arithmetic_config.hlsl" - -namespace nbl::ext::envmap_importance_sampling -{ - -class EnvmapImportanceSampling final : public core::IReferenceCounted -{ - public: - - static constexpr uint32_t MaxMipCountLuminance = 13u; - static constexpr uint32_t DefaultLumaMipMapGenWorkgroupDimension = 16u; - static constexpr uint32_t DefaultWarpMapGenWorkgroupDimension = 16u; - - struct SCachedCreationParameters - { - core::smart_refctd_ptr utilities; - uint32_t genLumaMapWorkgroupDimension = DefaultLumaMipMapGenWorkgroupDimension; - uint32_t genWarpMapWorkgroupDimension = DefaultWarpMapGenWorkgroupDimension; - }; - - struct SCreationParameters : public SCachedCreationParameters - { - core::smart_refctd_ptr assetManager = nullptr; - core::smart_refctd_ptr envMap = nullptr; - - inline bool validate() const - { - const auto validation = std::to_array - ({ - std::make_pair(bool(assetManager), "Invalid `creationParams.assetManager` is nullptr!"), - std::make_pair(bool(utilities), "Invalid `creationParams.utilities` is nullptr!"), - std::make_pair(bool(envMap), "Invalid `creationParams.envMap` is nullptr!"), - }); - - system::logger_opt_ptr logger = utilities->getLogger(); - for (const auto& [ok, error] : validation) - if (!ok) - { - logger.log(error, system::ILogger::ELL_ERROR); - return false; - } - - assert(bool(assetManager->getSystem())); - - return true; - } - - }; - - static core::smart_refctd_ptr create(SCreationParameters&& params); - - static core::smart_refctd_ptr createGenLumaPipelineLayout(video::ILogicalDevice* device); - - static core::smart_refctd_ptr createGenWarpPipelineLayout(video::ILogicalDevice* device); - - //! mounts the extension's archive to given system - useful if you want to create your own shaders with common header included - static core::smart_refctd_ptr mount(core::smart_refctd_ptr logger, system::ISystem* system, video::ILogicalDevice* device, const std::string_view archiveAlias = ""); - - static core::smart_refctd_ptr createGenLumaPipeline(const SCreationParameters& params, const video::IGPUPipelineLayout* pipelineLayout); - - static core::smart_refctd_ptr createGenWarpPipeline(const SCreationParameters& params, const video::IGPUPipelineLayout* pipelineLayout); - - static core::smart_refctd_ptr createLumaMap(video::ILogicalDevice* device, asset::VkExtent3D extent, uint32_t mipCount, std::string_view debugName = ""); - - static core::smart_refctd_ptr createWarpMap(video::ILogicalDevice* device, asset::VkExtent3D extent, std::string_view debugName = ""); - - void computeWarpMap(video::IQueue* queue); - - // use this to synchronize warp map after computeWarpMap call - nbl::video::IGPUCommandBuffer::SPipelineBarrierDependencyInfo::image_barrier_t getWarpMapBarrier( - core::bitflag dstStageMask, - core::bitflag dstAccessMask, - nbl::video::IGPUImage::LAYOUT oldLayout); - - // use this to synchronize luma map after computeWarpMap call - nbl::video::IGPUCommandBuffer::SPipelineBarrierDependencyInfo::image_barrier_t getLumaMapBarrier( - core::bitflag dstStageMask, - core::bitflag dstAccessMask, - nbl::video::IGPUImage::LAYOUT oldLayout); - - inline core::smart_refctd_ptr getLumaMapView() const - { - return m_lumaMap; - } - - inline core::smart_refctd_ptr getWarpMapView() const - { - return m_warpMap; - } - - inline hlsl::float32_t getAvgLuma() const - { - return m_avgLuma; - } - - protected: - struct ConstructorParams - { - SCachedCreationParameters creationParams; - hlsl::uint32_t2 lumaWorkgroupCount; - hlsl::uint32_t2 warpWorkgroupCount; - core::smart_refctd_ptr lumaMap; - core::smart_refctd_ptr warpMap; - core::smart_refctd_ptr genLumaPipeline; - core::smart_refctd_ptr genLumaDescriptorSet; - core::smart_refctd_ptr genWarpPipeline; - core::smart_refctd_ptr genWarpDescriptorSet; - }; - - explicit EnvmapImportanceSampling(ConstructorParams&& params) : - m_cachedCreationParams(std::move(params.creationParams)), - m_lumaWorkgroupCount(params.lumaWorkgroupCount), - m_warpWorkgroupCount(params.warpWorkgroupCount), - m_lumaMap(std::move(params.lumaMap)), - m_warpMap(std::move(params.warpMap)), - m_genLumaPipeline(std::move(params.genLumaPipeline)), - m_genLumaDescriptorSet(std::move(params.genLumaDescriptorSet)), - m_genWarpPipeline(std::move(params.genWarpPipeline)), - m_genWarpDescriptorSet(std::move(params.genWarpDescriptorSet)) - {} - - ~EnvmapImportanceSampling() override {} - - private: - - SCachedCreationParameters m_cachedCreationParams; - - hlsl::uint32_t2 m_lumaWorkgroupCount; - hlsl::uint32_t2 m_warpWorkgroupCount; - - hlsl::float32_t m_avgLuma; - - core::smart_refctd_ptr m_lumaMap; - core::smart_refctd_ptr m_warpMap; - - core::smart_refctd_ptr m_genLumaPipeline; - core::smart_refctd_ptr m_genLumaDescriptorSet; - - core::smart_refctd_ptr m_genWarpPipeline; - core::smart_refctd_ptr m_genWarpDescriptorSet; - -}; - -} -#endif diff --git a/include/nbl/ext/EnvmapImportanceSampling/EnvmapImportanceSampling.h b/include/nbl/ext/EnvmapImportanceSampling/EnvmapImportanceSampling.h deleted file mode 100644 index 678adf59a9..0000000000 --- a/include/nbl/ext/EnvmapImportanceSampling/EnvmapImportanceSampling.h +++ /dev/null @@ -1,92 +0,0 @@ -// Copyright (C) 2018-2020 - DevSH Graphics Programming Sp. z O.O. -// This file is part of the "Nabla Engine". -// For conditions of distribution and use, see copyright notice in nabla.h - -#ifndef _NBL_EXT_ENVMAP_IMPORTANCE_SAMPLING_INCLUDED_ -#define _NBL_EXT_ENVMAP_IMPORTANCE_SAMPLING_INCLUDED_ - -#include "nabla.h" -#include "nbl/video/IGPUShader.h" -#include "nbl/asset/ICPUShader.h" - -namespace nbl::ext::EnvmapImportanceSampling -{ - -class EnvmapImportanceSampling -{ - public: - EnvmapImportanceSampling(video::IVideoDriver* _driver) : m_driver(_driver) - {} - ~EnvmapImportanceSampling() = default; - - // Shader and Resources for Generating Luminance MipMaps from EnvMap - static constexpr uint32_t MaxMipCountLuminance = 13u; - static constexpr uint32_t DefaultLumaMipMapGenWorkgroupDimension = 16u; - static constexpr uint32_t DefaultWarpMapGenWorkgroupDimension = 16u; - - void initResources( - core::smart_refctd_ptr envmap, - uint32_t lumaGenWorkgroupDimension = DefaultLumaMipMapGenWorkgroupDimension, - uint32_t warpMapGenWorkgroupDimension = DefaultWarpMapGenWorkgroupDimension); - void deinitResources(); - - // returns if RIS should be enabled based on variance calculations - inline bool computeWarpMap(const float envMapRegularizationFactor, float& pdfNormalizationFactor) - { - [[maybe_unused]] float dummy; - return computeWarpMap(envMapRegularizationFactor,pdfNormalizationFactor,dummy); - } - bool computeWarpMap(const float envMapRegularizationFactor, float& pdfNormalizationFactor, float& maxEmittanceLuma); - - core::smart_refctd_ptr getLuminanceImageView() { return m_luminance; } - core::smart_refctd_ptr getWarpMapImageView() { return m_warpMap; } - - private: - #define uint uint32_t - struct uvec2 - { - uint x,y; - }; - struct vec2 - { - float x,y; - }; - struct vec3 - { - float x,y,z; - }; - #define vec4 core::vectorSIMDf - #define mat4 core::matrix4SIMD - #define mat4x3 core::matrix3x4SIMD - #include "nbl/builtin/glsl/ext/EnvmapImportanceSampling/structs.glsl" - #undef uint - #undef vec4 - #undef mat4 - #undef mat4x3 - inline uint32_t calcMeasurementBufferSize() const - { - return sizeof(nbl_glsl_ext_EnvmapSampling_LumaMeasurement_t)*m_lumaWorkgroups[0]*m_lumaWorkgroups[1]; - } - #undef NBL_GLSL_EXT_ENVMAP_SAMPLING_LUMA_MEASUREMENTS - - uint32_t m_lumaWorkgroups[2]; - uint32_t m_warpWorkgroups[2]; - - core::smart_refctd_ptr m_luminance; - core::smart_refctd_ptr m_warpMap; // Warps Sample based on EnvMap Luminance - - core::smart_refctd_ptr m_lumaDS; - core::smart_refctd_ptr m_lumaMeasurePipeline; - core::smart_refctd_ptr m_lumaGenPipeline; - - // Shader and Resources for EnvironmentalMap Sample Warping - core::smart_refctd_ptr m_warpDS; - core::smart_refctd_ptr m_warpGPUShader; - core::smart_refctd_ptr m_warpPipeline; - - video::IVideoDriver* m_driver; -}; - -} - -#endif diff --git a/include/nbl/ext/EnvmapImportanceSampling/builtin/hlsl/common.hlsl b/include/nbl/ext/EnvmapImportanceSampling/builtin/hlsl/common.hlsl deleted file mode 100644 index 6f37f80206..0000000000 --- a/include/nbl/ext/EnvmapImportanceSampling/builtin/hlsl/common.hlsl +++ /dev/null @@ -1,26 +0,0 @@ -#ifndef _NBL_HLSL_EXT_ENVMAP_IMPORTANCE_SAMPLING_PARAMETERS_COMMON_INCLUDED_ -#define _NBL_HLSL_EXT_ENVMAP_IMPORTANCE_SAMPLING_PARAMETERS_COMMON_INCLUDED_ - -#include "nbl/builtin/hlsl/cpp_compat.hlsl" - -namespace nbl -{ -namespace hlsl -{ -namespace ext -{ -namespace envmap_importance_sampling -{ - -struct SLumaGenPushConstants -{ - float32_t3 lumaRGBCoefficients; - uint32_t2 lumaMapResolution; -}; - -} -} -} -} - -#endif diff --git a/include/nbl/ext/EnvmapImportanceSampling/builtin/hlsl/gen_luma.comp.hlsl b/include/nbl/ext/EnvmapImportanceSampling/builtin/hlsl/gen_luma.comp.hlsl deleted file mode 100644 index 9d80e60750..0000000000 --- a/include/nbl/ext/EnvmapImportanceSampling/builtin/hlsl/gen_luma.comp.hlsl +++ /dev/null @@ -1,25 +0,0 @@ -#include "common.hlsl" - -using namespace nbl; -using namespace nbl::hlsl; -using namespace nbl::hlsl::ext::envmap_importance_sampling; - -[[vk::push_constant]] SLumaGenPushConstants pc; - -[[vk::binding(0, 0)]] Texture2D envMap; -[[vk::binding(1, 0)]] RWTexture2D outImage; - -[numthreads(WORKGROUP_DIM, WORKGROUP_DIM, 1)] -[shader("compute")] -void main(uint32_t3 threadID : SV_DispatchThreadID) -{ - if (all(threadID < pc.lumaMapResolution)) - { - - const float uv_y = (float(threadID.y) + float(0.5f)) / pc.lumaMapResolution.y; - const float32_t3 envMapSample = envMap.Load(float32_t3(threadID.xy, 0)); - const float32_t luma = hlsl::dot(envMapSample, pc.lumaRGBCoefficients) * sin(numbers::pi * uv_y); - - outImage[threadID.xy] = luma; - } -} diff --git a/include/nbl/ext/EnvmapImportanceSampling/builtin/hlsl/gen_warp.comp.hlsl b/include/nbl/ext/EnvmapImportanceSampling/builtin/hlsl/gen_warp.comp.hlsl deleted file mode 100644 index c621efd4af..0000000000 --- a/include/nbl/ext/EnvmapImportanceSampling/builtin/hlsl/gen_warp.comp.hlsl +++ /dev/null @@ -1,48 +0,0 @@ -#include "nbl/builtin/hlsl/sampling/hierarchical_image.hlsl" - -[[vk::binding(0, 0)]] Texture2D lumaMap; - -[[vk::binding(1, 0)]] RWTexture2D outImage; - -using namespace nbl; -using namespace nbl::hlsl; -using namespace nbl::hlsl::sampling; - -struct LuminanceAccessor -{ - float32_t get(uint32_t2 coord, uint32_t level) - { - return lumaMap.Load(uint32_t3(coord, level)); - } - - float32_t4 gather(uint32_t2 coord, uint32_t level) - { - return float32_t4( - lumaMap.Load(uint32_t3(coord, level), uint32_t2(0, 1)), - lumaMap.Load(uint32_t3(coord, level), uint32_t2(1, 1)), - lumaMap.Load(uint32_t3(coord, level), uint32_t2(1, 0)), - lumaMap.Load(uint32_t3(coord, level), uint32_t2(0, 0)) - ); - - } -}; - -[numthreads(WORKGROUP_DIM, WORKGROUP_DIM, 1)] -[shader("compute")] -void main(uint32_t3 threadID : SV_DispatchThreadID) -{ - LuminanceAccessor luminanceAccessor; - uint32_t lumaMapWidth, lumaMapHeight; - - lumaMap.GetDimensions(lumaMapWidth, lumaMapHeight); - - using LuminanceSampler = LuminanceMapSampler; - - LuminanceSampler luminanceSampler = - LuminanceSampler::create(luminanceAccessor, uint32_t2(lumaMapWidth, lumaMapHeight), lumaMapWidth != lumaMapHeight, uint32_t2(lumaMapWidth, lumaMapHeight)); - - uint32_t2 pixelCoord = threadID.xy; - - outImage[pixelCoord] = luminanceSampler.binarySearch(pixelCoord); - -} diff --git a/src/nbl/CMakeLists.txt b/src/nbl/CMakeLists.txt index 76e046848c..6c62419719 100644 --- a/src/nbl/CMakeLists.txt +++ b/src/nbl/CMakeLists.txt @@ -127,7 +127,9 @@ unset(NABLA_HEADERS_PUBLIC2 ${NBL_TMP_FULL_PATHS}) set(NBL_CORE_SOURCES core/alloc/refctd_memory_resource.cpp core/hash/blake.cpp + core/sampling/EnvmapSampler.cpp ) + set(NBL_SYSTEM_SOURCES system/DefaultFuncPtrLoader.cpp system/IFileBase.cpp diff --git a/src/nbl/builtin/CMakeLists.txt b/src/nbl/builtin/CMakeLists.txt index 050907b3a3..f1fdf1bb95 100644 --- a/src/nbl/builtin/CMakeLists.txt +++ b/src/nbl/builtin/CMakeLists.txt @@ -270,6 +270,13 @@ LIST_BUILTIN_RESOURCE(NBL_RESOURCES_TO_EMBED "hlsl/sampling/spherical_rectangle. LIST_BUILTIN_RESOURCE(NBL_RESOURCES_TO_EMBED "hlsl/sampling/cos_weighted_spheres.hlsl") LIST_BUILTIN_RESOURCE(NBL_RESOURCES_TO_EMBED "hlsl/sampling/quotient_and_pdf.hlsl") LIST_BUILTIN_RESOURCE(NBL_RESOURCES_TO_EMBED "hlsl/sampling/uniform_spheres.hlsl") +LIST_BUILTIN_RESOURCE(NBL_RESOURCES_TO_EMBED "hlsl/sampling/warp.hlsl") +LIST_BUILTIN_RESOURCE(NBL_RESOURCES_TO_EMBED "hlsl/sampling/warps/spherical.hlsl") +LIST_BUILTIN_RESOURCE(NBL_RESOURCES_TO_EMBED "hlsl/sampling/hierarchical_image.hlsl") +LIST_BUILTIN_RESOURCE(NBL_RESOURCES_TO_EMBED "hlsl/sampling/hierarchical_image/accessors.hlsl") +LIST_BUILTIN_RESOURCE(NBL_RESOURCES_TO_EMBED "hlsl/sampling/hierarchical_image/common.hlsl") +LIST_BUILTIN_RESOURCE(NBL_RESOURCES_TO_EMBED "hlsl/sampling/hierarchical_image/gen_luma.comp.hlsl") +LIST_BUILTIN_RESOURCE(NBL_RESOURCES_TO_EMBED "hlsl/sampling/hierarchical_image/gen_warp.comp.hlsl") # LIST_BUILTIN_RESOURCE(NBL_RESOURCES_TO_EMBED "hlsl/ndarray_addressing.hlsl") # @@ -339,10 +346,6 @@ LIST_BUILTIN_RESOURCE(NBL_RESOURCES_TO_EMBED "hlsl/workgroup2/shared_scan.hlsl") #Extensions LIST_BUILTIN_RESOURCE(NBL_RESOURCES_TO_EMBED "hlsl/ext/FullScreenTriangle/SVertexAttributes.hlsl") LIST_BUILTIN_RESOURCE(NBL_RESOURCES_TO_EMBED "hlsl/ext/FullScreenTriangle/default.vert.hlsl") -LIST_BUILTIN_RESOURCE(NBL_RESOURCES_TO_EMBED "hlsl/ext/EnvmapImportanceSampling/structs.hlsl") -LIST_BUILTIN_RESOURCE(NBL_RESOURCES_TO_EMBED "hlsl/ext/EnvmapImportanceSampling/gen_luma.comp.hlsl") -LIST_BUILTIN_RESOURCE(NBL_RESOURCES_TO_EMBED "hlsl/ext/EnvmapImportanceSampling/measure_luma.comp.hlsl") -LIST_BUILTIN_RESOURCE(NBL_RESOURCES_TO_EMBED "hlsl/text_rendering/msdf.hlsl") #memory LIST_BUILTIN_RESOURCE(NBL_RESOURCES_TO_EMBED "hlsl/memory.hlsl") LIST_BUILTIN_RESOURCE(NBL_RESOURCES_TO_EMBED "hlsl/memory_accessor.hlsl") diff --git a/src/nbl/ext/CMakeLists.txt b/src/nbl/ext/CMakeLists.txt index 221c1fe88e..af46b29aab 100644 --- a/src/nbl/ext/CMakeLists.txt +++ b/src/nbl/ext/CMakeLists.txt @@ -66,18 +66,6 @@ if(NBL_BUILD_DEBUG_DRAW) ) endif() -if(NBL_BUILD_ENVMAP_IMPORTANCE_SAMPLING) - add_subdirectory(EnvmapImportanceSampling) - set(NBL_EXT_ENVMAP_IMPORTANCE_SAMPLING_INCLUDE_DIRS - ${NBL_EXT_ENVMAP_IMPORTANCE_SAMPLING_INCLUDE_DIRS} - PARENT_SCOPE - ) - set(NBL_EXT_ENVMAP_IMPORTANCE_SAMPLING_LIB - ${NBL_EXT_ENVMAP_IMPORTANCE_SAMPLING_LIB} - PARENT_SCOPE - ) -endif() - propagate_changed_variables_to_parent_scope() NBL_ADJUST_FOLDERS(ext) \ No newline at end of file diff --git a/src/nbl/ext/EnvmapImportanceSampling/CEnvmapImportanceSampling.cpp b/src/nbl/ext/EnvmapImportanceSampling/CEnvmapImportanceSampling.cpp deleted file mode 100644 index 1fdd7cc29e..0000000000 --- a/src/nbl/ext/EnvmapImportanceSampling/CEnvmapImportanceSampling.cpp +++ /dev/null @@ -1,787 +0,0 @@ -#include "nbl/ext/EnvmapImportanceSampling/CEnvmapImportanceSampling.h" -#include "nbl/ext/EnvmapImportanceSampling/builtin/hlsl/common.hlsl" -#include "nlohmann/detail/input/parser.hpp" - -using namespace nbl::hlsl::ext::envmap_importance_sampling; - -#ifdef NBL_EMBED_BUILTIN_RESOURCES -#include "nbl/ext/debug_draw/builtin/build/CArchive.h" -#endif - -using namespace nbl; -using namespace core; -using namespace video; -using namespace system; -using namespace asset; -using namespace hlsl; - -namespace nbl::ext::envmap_importance_sampling -{ - -namespace -{ - constexpr std::string_view NBL_EXT_MOUNT_ENTRY = "nbl/ext/EnvmapImportanceSampling"; - - // image must have the first mip layout set to transfer src, and the rest to dst - void generateMipmap(video::IGPUCommandBuffer* cmdBuf, IGPUImage* image) - { - const auto mipLevels = image->getCreationParameters().mipLevels; - const auto extent = image->getCreationParameters().extent; - for (uint32_t srcMip_i = 0; srcMip_i < mipLevels-1; srcMip_i++) - { - - const IGPUCommandBuffer::SImageBlit blit = { - .srcMinCoord = {0, 0, 0}, - .srcMaxCoord = {extent.width >> (srcMip_i), extent.height >> (srcMip_i), 1}, - .dstMinCoord = {0, 0, 0}, - .dstMaxCoord = {extent.width >> srcMip_i + 1, extent.height >> srcMip_i + 1, 1}, - .layerCount = 1, - .srcBaseLayer = 0, - .dstBaseLayer = 0, - .srcMipLevel = srcMip_i, - .dstMipLevel = srcMip_i + 1, - .aspectMask = IGPUImage::E_ASPECT_FLAGS::EAF_COLOR_BIT, - }; - cmdBuf->blitImage(image, IImage::LAYOUT::TRANSFER_SRC_OPTIMAL, image, IImage::LAYOUT::TRANSFER_DST_OPTIMAL, { &blit, 1 }, IGPUSampler::E_TEXTURE_FILTER::ETF_LINEAR); - - // last mip no need to transition - if (srcMip_i + 1 == mipLevels - 1) break; - - IGPUCommandBuffer::SPipelineBarrierDependencyInfo::image_barrier_t barrier = { - .barrier = { - .dep = { - .srcStageMask = PIPELINE_STAGE_FLAGS::BLIT_BIT, - .srcAccessMask = ACCESS_FLAGS::TRANSFER_WRITE_BIT, - .dstStageMask = PIPELINE_STAGE_FLAGS::BLIT_BIT, - .dstAccessMask = ACCESS_FLAGS::TRANSFER_READ_BIT - } - }, - .image = image, - .subresourceRange = { - .aspectMask = IImage::EAF_COLOR_BIT, - .baseMipLevel = srcMip_i + 1, - .levelCount = 1, - .baseArrayLayer = 0u, - .layerCount = 1u - }, - .oldLayout = IImage::LAYOUT::TRANSFER_DST_OPTIMAL, - .newLayout = IImage::LAYOUT::TRANSFER_SRC_OPTIMAL, - }; - cmdBuf->pipelineBarrier(E_DEPENDENCY_FLAGS::EDF_NONE, { .imgBarriers = {&barrier, 1} }); - - } - } - - core::smart_refctd_ptr createTexture(video::ILogicalDevice* device, const asset::VkExtent3D extent, E_FORMAT format, uint32_t mipLevels = 1u, uint32_t layers = 0u) - { - const auto real_layers = layers ? layers:1u; - - IGPUImage::SCreationParams imgParams; - imgParams.extent = extent; - imgParams.arrayLayers = real_layers; - imgParams.flags = static_cast(0); - imgParams.format = format; - imgParams.mipLevels = mipLevels; - imgParams.samples = IImage::ESCF_1_BIT; - imgParams.type = IImage::ET_2D; - imgParams.usage = IImage::EUF_STORAGE_BIT | IImage::EUF_TRANSFER_SRC_BIT | IImage::EUF_TRANSFER_DST_BIT | IImage::EUF_SAMPLED_BIT; - const auto image = device->createImage(std::move(imgParams)); - auto imageMemReqs = image->getMemoryReqs(); - imageMemReqs.memoryTypeBits &= device->getPhysicalDevice()->getDeviceLocalMemoryTypeBits(); - device->allocate(imageMemReqs, image.get()); - - IGPUImageView::SCreationParams viewparams; - viewparams.subUsages = IImage::EUF_STORAGE_BIT | IImage::EUF_SAMPLED_BIT; - viewparams.flags = static_cast(0); - viewparams.format = format; - viewparams.image = std::move(image); - viewparams.viewType = layers ? IGPUImageView::ET_2D_ARRAY:IGPUImageView::ET_2D; - viewparams.subresourceRange.aspectMask = IImage::EAF_COLOR_BIT; - viewparams.subresourceRange.baseArrayLayer = 0u; - viewparams.subresourceRange.layerCount = real_layers; - viewparams.subresourceRange.baseMipLevel = 0u; - viewparams.subresourceRange.levelCount = mipLevels; - - return device->createImageView(std::move(viewparams)); - } - - core::smart_refctd_ptr getShaderSource( asset::IAssetManager* assetManager, const char* filePath, system::ILogger* logger) - { - IAssetLoader::SAssetLoadParams lparams = {}; - lparams.logger = logger; - lparams.workingDirectory = NBL_EXT_MOUNT_ENTRY; - auto bundle = assetManager->getAsset(filePath, lparams); - if (bundle.getContents().empty() || bundle.getAssetType()!=IAsset::ET_SHADER) - { - const auto assetType = bundle.getAssetType(); - logger->log("Shader %s not found!", ILogger::ELL_ERROR, filePath); - exit(-1); - } - auto firstAssetInBundle = bundle.getContents()[0]; - return smart_refctd_ptr_static_cast(firstAssetInBundle); - } -} - -core::smart_refctd_ptr EnvmapImportanceSampling::create(SCreationParameters&& params) -{ - auto* const logger = params.utilities->getLogger(); - - if (!params.validate()) - { - logger->log("Failed creation parameters validation!", ILogger::ELL_ERROR); - return nullptr; - } - - const auto EnvmapExtent = params.envMap->getCreationParameters().image->getCreationParameters().extent; - // we don't need the 1x1 mip for anything - const uint32_t MipCountLuminance = IImage::calculateFullMipPyramidLevelCount(EnvmapExtent,IImage::ET_2D)-1; - const auto EnvMapPoTExtent = [MipCountLuminance]() -> asset::VkExtent3D - { - const uint32_t width = 0x1u<>1u,1u }; - }(); - auto calcWorkgroupSize = [](const asset::VkExtent3D extent, const uint32_t workgroupDimension) -> uint32_t2 - { - return uint32_t2(extent.width - 1, extent.height - 1) / workgroupDimension + uint32_t2(1); - }; - - const auto device = params.utilities->getLogicalDevice(); - - ConstructorParams constructorParams; - - constructorParams.lumaWorkgroupCount = calcWorkgroupSize(EnvMapPoTExtent, params.genLumaMapWorkgroupDimension); - constructorParams.lumaMap = createLumaMap(device, EnvMapPoTExtent, MipCountLuminance); - - const auto upscale = 0; - const asset::VkExtent3D WarpMapExtent = {EnvMapPoTExtent.width<createDescriptorPoolForDSLayouts(IDescriptorPool::ECF_UPDATE_AFTER_BIND_BIT, genLumaPipelineLayout->getDescriptorSetLayouts()); - const auto genLumaDescriptorSet = genLumaDescriptorPool->createDescriptorSet(core::smart_refctd_ptr(genLumaPipelineLayout->getDescriptorSetLayouts()[0])); - - const auto genWarpPipelineLayout = createGenWarpPipelineLayout(device); - constructorParams.genWarpPipeline = createGenWarpPipeline(params, genWarpPipelineLayout.get()); - const auto genWarpDescriptorPool = device->createDescriptorPoolForDSLayouts(IDescriptorPool::ECF_UPDATE_AFTER_BIND_BIT, genWarpPipelineLayout->getDescriptorSetLayouts()); - const auto genWarpDescriptorSet = genWarpDescriptorPool->createDescriptorSet(core::smart_refctd_ptr(genWarpPipelineLayout->getDescriptorSetLayouts()[0])); - - IGPUDescriptorSet::SDescriptorInfo envMapDescriptorInfo; - envMapDescriptorInfo.desc = params.envMap; - envMapDescriptorInfo.info.image.imageLayout = IImage::LAYOUT::READ_ONLY_OPTIMAL; - - IGPUDescriptorSet::SDescriptorInfo lumaMapGeneralDescriptorInfo; - lumaMapGeneralDescriptorInfo.desc = constructorParams.lumaMap; - lumaMapGeneralDescriptorInfo.info.image.imageLayout = IImage::LAYOUT::GENERAL; - - IGPUDescriptorSet::SDescriptorInfo lumaMapReadDescriptorInfo; - lumaMapReadDescriptorInfo.desc = constructorParams.lumaMap; - lumaMapReadDescriptorInfo.info.image.imageLayout = IImage::LAYOUT::READ_ONLY_OPTIMAL; - - IGPUDescriptorSet::SDescriptorInfo warpMapDescriptorInfo; - warpMapDescriptorInfo.desc = constructorParams.warpMap; - warpMapDescriptorInfo.info.image.imageLayout = IImage::LAYOUT::GENERAL; - - const IGPUDescriptorSet::SWriteDescriptorSet writes[] = { - { - .dstSet = genLumaDescriptorSet.get(), .binding = 0, .count = 1, .info = &envMapDescriptorInfo - }, - { - .dstSet = genLumaDescriptorSet.get(), .binding = 1, .count = 1, .info = &lumaMapGeneralDescriptorInfo - }, - { - .dstSet = genWarpDescriptorSet.get(), .binding = 0, .count = 1, .info = &lumaMapReadDescriptorInfo - }, - { - .dstSet = genWarpDescriptorSet.get(), .binding = 1, .count = 1, .info = &warpMapDescriptorInfo - }, - }; - - device->updateDescriptorSets(writes, {}); - - constructorParams.genLumaDescriptorSet = genLumaDescriptorSet; - constructorParams.genWarpDescriptorSet = genWarpDescriptorSet; - - constructorParams.creationParams = std::move(params); - - return core::smart_refctd_ptr(new EnvmapImportanceSampling(std::move(constructorParams))); -} - -core::smart_refctd_ptr EnvmapImportanceSampling::createLumaMap(video::ILogicalDevice* device, asset::VkExtent3D extent, uint32_t mipCount, const std::string_view debugName) -{ - return createTexture(device, extent, EF_R32_SFLOAT, mipCount); -} - -core::smart_refctd_ptr EnvmapImportanceSampling::createWarpMap(video::ILogicalDevice* device, asset::VkExtent3D extent, const std::string_view debugName) -{ - return createTexture(device, extent, EF_R32G32_SFLOAT); -} - -smart_refctd_ptr EnvmapImportanceSampling::mount(core::smart_refctd_ptr logger, ISystem* system, video::ILogicalDevice* device, const std::string_view archiveAlias) -{ - assert(system); - - if (!system) - return nullptr; - - // extension should mount everything for you, regardless if content goes from virtual filesystem - // or disk directly - and you should never rely on application framework to expose extension data - #ifdef NBL_EMBED_BUILTIN_RESOURCES - auto archive = make_smart_refctd_ptr(smart_refctd_ptr(logger)); - #else - auto archive = make_smart_refctd_ptr(std::string_view(NBL_ENVMAP_IMPORTANCE_SAMPLING_HLSL_MOUNT_POINT), smart_refctd_ptr(logger), system); - #endif - - system->mount(smart_refctd_ptr(archive), archiveAlias.data()); - return smart_refctd_ptr(archive); -} - -core::smart_refctd_ptr EnvmapImportanceSampling::createGenLumaPipeline(const SCreationParameters& params, const video::IGPUPipelineLayout* pipelineLayout) -{ - system::logger_opt_ptr logger = params.utilities->getLogger(); - auto system = smart_refctd_ptr(params.assetManager->getSystem()); - auto* device = params.utilities->getLogicalDevice(); - mount(smart_refctd_ptr(params.utilities->getLogger()), system.get(), params.utilities->getLogicalDevice(), NBL_EXT_MOUNT_ENTRY); - - const auto shaderSource = getShaderSource(params.assetManager.get(), "gen_luma.comp.hlsl", logger.get()); - auto compiler = make_smart_refctd_ptr(smart_refctd_ptr(system)); - CHLSLCompiler::SOptions options = {}; - options.stage = IShader::E_SHADER_STAGE::ESS_COMPUTE; - options.preprocessorOptions.targetSpirvVersion = device->getPhysicalDevice()->getLimits().spirvVersion; - options.spirvOptimizer = nullptr; - -#ifndef _NBL_DEBUG - ISPIRVOptimizer::E_OPTIMIZER_PASS optPasses = ISPIRVOptimizer::EOP_STRIP_DEBUG_INFO; - auto opt = make_smart_refctd_ptr(std::span(&optPasses, 1)); - options.spirvOptimizer = opt.get(); -#else - options.debugInfoFlags |= IShaderCompiler::E_DEBUG_INFO_FLAGS::EDIF_LINE_BIT; -#endif - options.preprocessorOptions.sourceIdentifier = shaderSource->getFilepathHint(); - options.preprocessorOptions.logger = logger.get(); - options.preprocessorOptions.includeFinder = compiler->getDefaultIncludeFinder(); - - const auto workgroupDimStr = std::to_string(params.genLumaMapWorkgroupDimension); - const IShaderCompiler::SMacroDefinition defines[] = { - { "WORKGROUP_DIM", workgroupDimStr.data() }, - }; - - options.preprocessorOptions.extraDefines = defines; - - const auto overridenUnspecialized = compiler->compileToSPIRV((const char*)shaderSource->getContent()->getPointer(), options); - const auto shader = device->compileShader({ overridenUnspecialized.get() }); - if (!shader) - { - logger.log("Could not compile shaders!", ILogger::ELL_ERROR); - return nullptr; - } - - video::IGPUComputePipeline::SCreationParams pipelineParams[1] = {}; - pipelineParams[0].layout = pipelineLayout; - pipelineParams[0].shader = { .shader = shader.get(), .entryPoint = "main" }; - - smart_refctd_ptr pipeline; - params.utilities->getLogicalDevice()->createComputePipelines(nullptr, pipelineParams, &pipeline); - if (!pipeline) - { - logger.log("Could not create pipeline!", ILogger::ELL_ERROR); - return nullptr; - } - - return pipeline; -} - -core::smart_refctd_ptr EnvmapImportanceSampling::createGenWarpPipeline(const SCreationParameters& params, const video::IGPUPipelineLayout* pipelineLayout) -{ - system::logger_opt_ptr logger = params.utilities->getLogger(); - auto system = smart_refctd_ptr(params.assetManager->getSystem()); - auto* device = params.utilities->getLogicalDevice(); - mount(smart_refctd_ptr(params.utilities->getLogger()), system.get(), params.utilities->getLogicalDevice(), NBL_EXT_MOUNT_ENTRY); - - const auto shaderSource = getShaderSource(params.assetManager.get(), "gen_warp.comp.hlsl", logger.get()); - auto compiler = make_smart_refctd_ptr(smart_refctd_ptr(system)); - CHLSLCompiler::SOptions options = {}; - options.stage = IShader::E_SHADER_STAGE::ESS_COMPUTE; - options.preprocessorOptions.targetSpirvVersion = device->getPhysicalDevice()->getLimits().spirvVersion; - options.spirvOptimizer = nullptr; - -#ifndef _NBL_DEBUG - ISPIRVOptimizer::E_OPTIMIZER_PASS optPasses = ISPIRVOptimizer::EOP_STRIP_DEBUG_INFO; - auto opt = make_smart_refctd_ptr(std::span(&optPasses, 1)); - options.spirvOptimizer = opt.get(); -#else - options.debugInfoFlags |= IShaderCompiler::E_DEBUG_INFO_FLAGS::EDIF_LINE_BIT; -#endif - options.preprocessorOptions.sourceIdentifier = shaderSource->getFilepathHint(); - options.preprocessorOptions.logger = logger.get(); - options.preprocessorOptions.includeFinder = compiler->getDefaultIncludeFinder(); - - const auto workgroupDimStr = std::to_string(params.genWarpMapWorkgroupDimension); - const IShaderCompiler::SMacroDefinition defines[] = { - { "WORKGROUP_DIM", workgroupDimStr.data() }, - }; - - options.preprocessorOptions.extraDefines = defines; - - const auto overridenUnspecialized = compiler->compileToSPIRV((const char*)shaderSource->getContent()->getPointer(), options); - const auto shader = device->compileShader({ overridenUnspecialized.get() }); - if (!shader) - { - logger.log("Could not compile shaders!", ILogger::ELL_ERROR); - return nullptr; - } - - video::IGPUComputePipeline::SCreationParams pipelineParams[1] = {}; - pipelineParams[0].layout = pipelineLayout; - pipelineParams[0].shader = { .shader = shader.get(), .entryPoint = "main" }; - - smart_refctd_ptr pipeline; - params.utilities->getLogicalDevice()->createComputePipelines(nullptr, pipelineParams, &pipeline); - if (!pipeline) - { - logger.log("Could not create pipeline!", ILogger::ELL_ERROR); - return nullptr; - } - - return pipeline; -} - -core::smart_refctd_ptr < video::IGPUPipelineLayout> EnvmapImportanceSampling::createGenLumaPipelineLayout(video::ILogicalDevice* device) -{ - asset::SPushConstantRange pcRange = { - .stageFlags = hlsl::ESS_COMPUTE, - .offset = 0, - .size = sizeof(SLumaGenPushConstants) - }; - - const IGPUDescriptorSetLayout::SBinding bindings[] = { - { - .binding = 0u, - .type = nbl::asset::IDescriptor::E_TYPE::ET_SAMPLED_IMAGE, - .createFlags = IGPUDescriptorSetLayout::SBinding::E_CREATE_FLAGS::ECF_NONE, - .stageFlags = IShader::E_SHADER_STAGE::ESS_COMPUTE, - .count = 1u - }, - { - .binding = 1u, - .type = nbl::asset::IDescriptor::E_TYPE::ET_STORAGE_IMAGE, - .createFlags = IGPUDescriptorSetLayout::SBinding::E_CREATE_FLAGS::ECF_NONE, - .stageFlags = IShader::E_SHADER_STAGE::ESS_COMPUTE, - .count = 1u - } - }; - - const auto setLayout = device->createDescriptorSetLayout(bindings); - return device->createPipelineLayout({ &pcRange, 1 }, setLayout); - -} - -core::smart_refctd_ptr EnvmapImportanceSampling::createGenWarpPipelineLayout(video::ILogicalDevice* device) -{ - const IGPUDescriptorSetLayout::SBinding bindings[] = { - { - .binding = 0u, - .type = nbl::asset::IDescriptor::E_TYPE::ET_SAMPLED_IMAGE, - .createFlags = IGPUDescriptorSetLayout::SBinding::E_CREATE_FLAGS::ECF_NONE, - .stageFlags = IShader::E_SHADER_STAGE::ESS_COMPUTE, - .count = 1u, - }, - { - .binding = 1u, - .type = nbl::asset::IDescriptor::E_TYPE::ET_STORAGE_IMAGE, - .createFlags = IGPUDescriptorSetLayout::SBinding::E_CREATE_FLAGS::ECF_NONE, - .stageFlags = IShader::E_SHADER_STAGE::ESS_COMPUTE, - .count = 1u - } - }; - - const auto setLayout = device->createDescriptorSetLayout(bindings); - return device->createPipelineLayout({}, setLayout, nullptr, nullptr, nullptr); -} - -void EnvmapImportanceSampling::computeWarpMap(video::IQueue* queue) -{ - const auto logicalDevice = m_cachedCreationParams.utilities->getLogicalDevice(); - - core::smart_refctd_ptr cmdBuf; - { - // commandbuffer should refcount the pool, so it should be 100% legal to drop at the end of the scope - auto gpuCommandPool = logicalDevice->createCommandPool(queue->getFamilyIndex(),IGPUCommandPool::CREATE_FLAGS::TRANSIENT_BIT); - if (!gpuCommandPool) - { - if (auto* logger = logicalDevice->getLogger()) - logger->log("Compute Warpmap: failed to create command pool.", system::ILogger::ELL_ERROR); - return; - } - gpuCommandPool->createCommandBuffers(IGPUCommandPool::BUFFER_LEVEL::PRIMARY, 1u, &cmdBuf); - if (!cmdBuf) - { - if (auto* logger = logicalDevice->getLogger()) - logger->log("Compute Warpmap: failed to create command buffer.", system::ILogger::ELL_ERROR); - return; - } - } - - if (!cmdBuf->begin(IGPUCommandBuffer::USAGE::ONE_TIME_SUBMIT_BIT)) - { - if (auto* logger = logicalDevice->getLogger()) - logger->log("Compute Warpmap: failed to begin command buffer.", system::ILogger::ELL_ERROR); - return; - } - - const auto lumaMapImage = m_lumaMap->getCreationParameters().image.get(); - const auto lumaMapMipLevels = lumaMapImage->getCreationParameters().mipLevels; - const auto lumaMapExtent = lumaMapImage->getCreationParameters().extent; - - const auto warpMapImage = m_warpMap->getCreationParameters().image.get(); - - { - IGPUCommandBuffer::SPipelineBarrierDependencyInfo::image_barrier_t barriers[] = { - { - .barrier = { - .dep = { - .srcStageMask = PIPELINE_STAGE_FLAGS::NONE, - .srcAccessMask = ACCESS_FLAGS::NONE, - .dstStageMask = PIPELINE_STAGE_FLAGS::COMPUTE_SHADER_BIT, - .dstAccessMask = ACCESS_FLAGS::SHADER_WRITE_BITS - } - }, - .image = lumaMapImage, - .subresourceRange = { - .aspectMask = IImage::EAF_COLOR_BIT, - .baseMipLevel = 0u, - .levelCount = lumaMapMipLevels, - .baseArrayLayer = 0u, - .layerCount = 1u - }, - .oldLayout = IImage::LAYOUT::UNDEFINED, - .newLayout = IImage::LAYOUT::GENERAL, - } - }; - cmdBuf->pipelineBarrier(E_DEPENDENCY_FLAGS::EDF_NONE, { .imgBarriers = barriers }); - } - - // Gen Luma Map - { - SLumaGenPushConstants pcData = {}; - pcData.lumaRGBCoefficients = { 0.2126729f, 0.7151522f, 0.0721750f }; - pcData.lumaMapResolution = {lumaMapExtent.width, lumaMapExtent.height}; - - cmdBuf->bindComputePipeline(m_genLumaPipeline.get()); - cmdBuf->pushConstants(m_genLumaPipeline->getLayout(), IShader::E_SHADER_STAGE::ESS_COMPUTE, - 0, sizeof(SLumaGenPushConstants), &pcData); - cmdBuf->bindDescriptorSets(EPBP_COMPUTE, m_genLumaPipeline->getLayout(), - 0, 1, &m_genLumaDescriptorSet.get()); - cmdBuf->dispatch(m_lumaWorkgroupCount.x, m_lumaWorkgroupCount.y, 1); - } - - // Generate luminance mip map - { - IGPUCommandBuffer::SPipelineBarrierDependencyInfo::image_barrier_t barriers[] = { - { - .barrier = { - .dep = { - .srcStageMask = PIPELINE_STAGE_FLAGS::COMPUTE_SHADER_BIT, - .srcAccessMask = ACCESS_FLAGS::SHADER_WRITE_BITS, - .dstStageMask = PIPELINE_STAGE_FLAGS::BLIT_BIT, - .dstAccessMask = ACCESS_FLAGS::TRANSFER_READ_BIT - } - }, - .image = lumaMapImage, - .subresourceRange = { - .aspectMask = IImage::EAF_COLOR_BIT, - .baseMipLevel = 0u, - .levelCount = 1, - .baseArrayLayer = 0u, - .layerCount = 1u - }, - .oldLayout = IImage::LAYOUT::GENERAL, - .newLayout = IImage::LAYOUT::TRANSFER_SRC_OPTIMAL, - }, - { - .barrier = { - .dep = { - .srcStageMask = PIPELINE_STAGE_FLAGS::NONE, - .srcAccessMask = ACCESS_FLAGS::NONE, - .dstStageMask = PIPELINE_STAGE_FLAGS::BLIT_BIT, - .dstAccessMask = ACCESS_FLAGS::TRANSFER_WRITE_BIT - } - }, - .image = lumaMapImage, - .subresourceRange = { - .aspectMask = IImage::EAF_COLOR_BIT, - .baseMipLevel = 1u, - .levelCount = lumaMapMipLevels - 1, - .baseArrayLayer = 0u, - .layerCount = 1u - }, - .oldLayout = IImage::LAYOUT::GENERAL, - .newLayout = IImage::LAYOUT::TRANSFER_DST_OPTIMAL, - } - }; - cmdBuf->pipelineBarrier(E_DEPENDENCY_FLAGS::EDF_NONE, { .imgBarriers = barriers }); - generateMipmap(cmdBuf.get(), lumaMapImage); - } - - core::smart_refctd_ptr lumaTexelBuffer; - const auto lumaMapLastMip = lumaMapMipLevels - 1; - const auto lumaMapLastMipExtent = lumaMapImage->getMipSize(lumaMapLastMip); - const auto lumaMapLastTexelCount = lumaMapLastMipExtent.x * lumaMapLastMipExtent.y * lumaMapLastMipExtent.z; - { - IGPUImage::SBufferCopy region = {}; - region.imageSubresource.aspectMask = IImage::EAF_COLOR_BIT; - region.imageSubresource.mipLevel = lumaMapLastMip; - region.imageSubresource.baseArrayLayer = 0; - region.imageSubresource.layerCount = 1; - region.imageExtent = { lumaMapLastMipExtent.x, lumaMapLastMipExtent.y, lumaMapLastMipExtent.z }; - - IGPUBuffer::SCreationParams bufferCreationParams = {}; - bufferCreationParams.size = lumaMapLastTexelCount * getTexelOrBlockBytesize(EF_R32_SFLOAT); - bufferCreationParams.usage = IGPUBuffer::EUF_TRANSFER_DST_BIT; - lumaTexelBuffer = logicalDevice->createBuffer(std::move(bufferCreationParams)); - if (!lumaTexelBuffer) - { - if (auto* logger = logicalDevice->getLogger()) - logger->log("ScreenShot: failed to create GPU texel buffer.", system::ILogger::ELL_ERROR); - return; - } - auto gpuTexelBufferMemReqs = lumaTexelBuffer->getMemoryReqs(); - gpuTexelBufferMemReqs.memoryTypeBits &= logicalDevice->getPhysicalDevice()->getDownStreamingMemoryTypeBits(); - if (!gpuTexelBufferMemReqs.memoryTypeBits) - { - if (auto* logger = logicalDevice->getLogger()) - logger->log("ScreenShot: no down-streaming memory type for texel buffer.", system::ILogger::ELL_ERROR); - return; - } - auto gpuTexelBufferMem = logicalDevice->allocate(gpuTexelBufferMemReqs, lumaTexelBuffer.get()); - if (!gpuTexelBufferMem.isValid()) - { - if (auto* logger = logicalDevice->getLogger()) - logger->log("ScreenShot: failed to allocate texel buffer memory.", system::ILogger::ELL_ERROR); - return; - } - - IGPUCommandBuffer::SPipelineBarrierDependencyInfo info = {}; - decltype(info)::image_barrier_t barrier = {}; - info.imgBarriers = { &barrier, &barrier + 1 }; - - { - barrier.barrier.dep.srcStageMask = PIPELINE_STAGE_FLAGS::BLIT_BIT; - barrier.barrier.dep.srcAccessMask = ACCESS_FLAGS::TRANSFER_WRITE_BIT; - barrier.barrier.dep.dstStageMask = PIPELINE_STAGE_FLAGS::COPY_BIT; - barrier.barrier.dep.dstAccessMask = ACCESS_FLAGS::TRANSFER_READ_BIT; - barrier.oldLayout = IImage::LAYOUT::TRANSFER_DST_OPTIMAL; - barrier.newLayout = IImage::LAYOUT::TRANSFER_SRC_OPTIMAL; - barrier.image = lumaMapImage; - barrier.subresourceRange.aspectMask = IImage::EAF_COLOR_BIT; - barrier.subresourceRange.baseMipLevel = lumaMapMipLevels - 1; - barrier.subresourceRange.levelCount = 1u; - barrier.subresourceRange.baseArrayLayer = 0; - barrier.subresourceRange.layerCount = 1; - cmdBuf->pipelineBarrier(EDF_NONE,info); - } - cmdBuf->copyImageToBuffer(lumaMapImage,IImage::LAYOUT::TRANSFER_SRC_OPTIMAL,lumaTexelBuffer.get(),1,®ion); - } - - { - IGPUCommandBuffer::SPipelineBarrierDependencyInfo::image_barrier_t barriers[] = { - { - .barrier = { - .dep = { - .srcStageMask = PIPELINE_STAGE_FLAGS::BLIT_BIT, - .srcAccessMask = ACCESS_FLAGS::TRANSFER_READ_BIT, - .dstStageMask = PIPELINE_STAGE_FLAGS::ALL_COMMANDS_BITS, - .dstAccessMask = ACCESS_FLAGS::SHADER_READ_BITS - } - }, - .image = lumaMapImage, - .subresourceRange = { - .aspectMask = IImage::EAF_COLOR_BIT, - .baseMipLevel = 0u, - .levelCount = lumaMapMipLevels - 1, - .baseArrayLayer = 0u, - .layerCount = 1u - }, - .oldLayout = IImage::LAYOUT::TRANSFER_SRC_OPTIMAL, - .newLayout = IImage::LAYOUT::READ_ONLY_OPTIMAL, - }, - { - .barrier = { - .dep = { - .srcStageMask = PIPELINE_STAGE_FLAGS::COPY_BIT, - .srcAccessMask = ACCESS_FLAGS::TRANSFER_READ_BIT, - .dstStageMask = PIPELINE_STAGE_FLAGS::ALL_COMMANDS_BITS, - .dstAccessMask = ACCESS_FLAGS::SHADER_READ_BITS - } - }, - .image = lumaMapImage, - .subresourceRange = { - .aspectMask = IImage::EAF_COLOR_BIT, - .baseMipLevel = lumaMapMipLevels - 1, - .levelCount = 1, - .baseArrayLayer = 0u, - .layerCount = 1u - }, - .oldLayout = IImage::LAYOUT::TRANSFER_SRC_OPTIMAL, - .newLayout = IImage::LAYOUT::READ_ONLY_OPTIMAL, - }, - { - .barrier = { - .dep = { - .srcStageMask = PIPELINE_STAGE_FLAGS::NONE, - .srcAccessMask = ACCESS_FLAGS::NONE, - .dstStageMask = PIPELINE_STAGE_FLAGS::COMPUTE_SHADER_BIT, - .dstAccessMask = ACCESS_FLAGS::SHADER_WRITE_BITS - } - }, - .image = warpMapImage, - .subresourceRange = { - .aspectMask = IImage::EAF_COLOR_BIT, - .baseMipLevel = 0, - .levelCount = 1, - .baseArrayLayer = 0u, - .layerCount = 1u - }, - .oldLayout = IImage::LAYOUT::UNDEFINED, - .newLayout = IImage::LAYOUT::GENERAL, - } - }; - cmdBuf->pipelineBarrier(E_DEPENDENCY_FLAGS::EDF_NONE, { .imgBarriers = barriers }); - cmdBuf->bindComputePipeline(m_genWarpPipeline.get()); - cmdBuf->bindDescriptorSets(EPBP_COMPUTE, m_genWarpPipeline->getLayout(), - 0, 1, &m_genWarpDescriptorSet.get()); - cmdBuf->dispatch(m_warpWorkgroupCount.x, m_warpWorkgroupCount.y, 1); - } - - { - IGPUCommandBuffer::SPipelineBarrierDependencyInfo::image_barrier_t barriers[] = { - { - .barrier = { - .dep = { - .srcStageMask = PIPELINE_STAGE_FLAGS::COMPUTE_SHADER_BIT, - .srcAccessMask = ACCESS_FLAGS::SHADER_WRITE_BITS, - .dstStageMask = PIPELINE_STAGE_FLAGS::ALL_COMMANDS_BITS, - .dstAccessMask = ACCESS_FLAGS::SHADER_READ_BITS - } - }, - .image = warpMapImage, - .subresourceRange = { - .aspectMask = IImage::EAF_COLOR_BIT, - .baseMipLevel = 0, - .levelCount = 1, - .baseArrayLayer = 0u, - .layerCount = 1u - }, - .oldLayout = IImage::LAYOUT::GENERAL, - .newLayout = IImage::LAYOUT::READ_ONLY_OPTIMAL, - } - }; - cmdBuf->pipelineBarrier(E_DEPENDENCY_FLAGS::EDF_NONE, { .imgBarriers = barriers }); - } - - if (!cmdBuf->end()) - { - if (auto* logger = logicalDevice->getLogger()) - logger->log("ScreenShot: failed to end command buffer.", system::ILogger::ELL_ERROR); - return; - } - - { - auto signalSemaphore = logicalDevice->createSemaphore(0); - - IQueue::SSubmitInfo info; - IQueue::SSubmitInfo::SCommandBufferInfo cmdBufferInfo{ cmdBuf.get() }; - IQueue::SSubmitInfo::SSemaphoreInfo signalSemaphoreInfo; - signalSemaphoreInfo.semaphore = signalSemaphore.get(); - signalSemaphoreInfo.value = 1; - signalSemaphoreInfo.stageMask = PIPELINE_STAGE_FLAGS::ALL_COMMANDS_BITS; - info.commandBuffers = { &cmdBufferInfo, &cmdBufferInfo + 1 }; - info.signalSemaphores = { &signalSemaphoreInfo, &signalSemaphoreInfo + 1 }; - - if (auto* logger = logicalDevice->getLogger()) - logger->log("Compute Warpmap: submitting copy command buffer.", system::ILogger::ELL_INFO); - if (queue->submit({ &info, &info + 1}) != IQueue::RESULT::SUCCESS) - { - if (auto* logger = logicalDevice->getLogger()) - logger->log("Compute Warpmap: failed to submit copy command buffer.", system::ILogger::ELL_ERROR); - return; - } - - ISemaphore::SWaitInfo waitInfo{ signalSemaphore.get(), 1u}; - - if (auto* logger = logicalDevice->getLogger()) - logger->log("Compute Warpmap: waiting for copy completion.", system::ILogger::ELL_INFO); - if (logicalDevice->blockForSemaphores({&waitInfo, &waitInfo + 1}) != ISemaphore::WAIT_RESULT::SUCCESS) - { - if (auto* logger = logicalDevice->getLogger()) - logger->log("Compute Warpmap: failed to wait for copy completion.", system::ILogger::ELL_ERROR); - return; - } - - auto* allocation = lumaTexelBuffer->getBoundMemory().memory; - const IDeviceMemoryAllocation::MemoryRange range = { 0u, lumaTexelBuffer->getSize() }; - auto* ptr = reinterpret_cast(allocation->map(range, IDeviceMemoryAllocation::EMCAF_READ)); - - m_avgLuma = std::reduce(ptr, ptr + lumaMapLastTexelCount) / float32_t(lumaMapLastTexelCount); - } -} - -nbl::video::IGPUCommandBuffer::SPipelineBarrierDependencyInfo::image_barrier_t EnvmapImportanceSampling::getWarpMapBarrier( - core::bitflag dstStageMask, - core::bitflag dstAccessMask, - nbl::video::IGPUImage::LAYOUT newLayout) -{ - const auto warpMapImage = m_warpMap->getCreationParameters().image.get(); - return { - .barrier = { - .dep = { - .srcStageMask = PIPELINE_STAGE_FLAGS::COMPUTE_SHADER_BIT, - .srcAccessMask = ACCESS_FLAGS::SHADER_WRITE_BITS, - .dstStageMask = dstStageMask, - .dstAccessMask = dstAccessMask - } - }, - .image = warpMapImage, - .subresourceRange = { - .aspectMask = IImage::EAF_COLOR_BIT, - .baseMipLevel = 0, - .levelCount = 1, - .baseArrayLayer = 0u, - .layerCount = 1u - }, - .oldLayout = IImage::LAYOUT::GENERAL, - .newLayout = newLayout, - }; -} - -nbl::video::IGPUCommandBuffer::SPipelineBarrierDependencyInfo::image_barrier_t EnvmapImportanceSampling::getLumaMapBarrier( - core::bitflag dstStageMask, - core::bitflag dstAccessMask, - nbl::video::IGPUImage::LAYOUT newLayout) -{ - const auto lumaMapImage = m_lumaMap->getCreationParameters().image.get(); - return { - .barrier = { - .dep = { - .srcStageMask = PIPELINE_STAGE_FLAGS::COMPUTE_SHADER_BIT, - .srcAccessMask = ACCESS_FLAGS::SHADER_READ_BITS, - .dstStageMask = dstStageMask, - .dstAccessMask = dstAccessMask - } - }, - .image = lumaMapImage, - .subresourceRange = { - .aspectMask = IImage::EAF_COLOR_BIT, - .baseMipLevel = 0, - .levelCount = 1, - .baseArrayLayer = 0u, - .layerCount = 1u - }, - .oldLayout = IImage::LAYOUT::READ_ONLY_OPTIMAL, - .newLayout = newLayout, - }; -} - - -} diff --git a/src/nbl/ext/EnvmapImportanceSampling/CMakeLists.txt b/src/nbl/ext/EnvmapImportanceSampling/CMakeLists.txt deleted file mode 100644 index 7486ba8923..0000000000 --- a/src/nbl/ext/EnvmapImportanceSampling/CMakeLists.txt +++ /dev/null @@ -1,52 +0,0 @@ -include(${NBL_ROOT_PATH}/cmake/common.cmake) - -set(NBL_EXT_INTERNAL_INCLUDE_DIR "${NBL_ROOT_PATH}/include") - -set(NBL_EXT_ENVMAP_IMPORTANCE_SAMPLING_H - ${NBL_EXT_INTERNAL_INCLUDE_DIR}/nbl/ext/EnvmapImportanceSampling/CEnvmapImportanceSampling.h -) - -set(NBL_EXT_ENVMAP_IMPORTANCE_SAMPLING_SRC - "${CMAKE_CURRENT_SOURCE_DIR}/CEnvmapImportanceSampling.cpp" -) - -get_filename_component(_ARCHIVE_ABSOLUTE_ENTRY_PATH_ "${NBL_EXT_INTERNAL_INCLUDE_DIR}" ABSOLUTE) - -set(NBL_ENVMAP_IMPORTANCE_SAMPLING_HLSL_MOUNT_POINT "${_ARCHIVE_ABSOLUTE_ENTRY_PATH_}/nbl/ext/EnvmapImportanceSampling/builtin/hlsl") - -set(OUTPUT_DIRECTORY "${CMAKE_CURRENT_BINARY_DIR}/auto-gen") -set(DEPENDS - ${NBL_ENVMAP_IMPORTANCE_SAMPLING_HLSL_MOUNT_POINT}/common.hlsl - ${NBL_ENVMAP_IMPORTANCE_SAMPLING_HLSL_MOUNT_POINT}/gen_luma.comp.hlsl - ${NBL_ENVMAP_IMPORTANCE_SAMPLING_HLSL_MOUNT_POINT}/gen_warp.comp.hlsl - ${NBL_ENVMAP_IMPORTANCE_SAMPLING_HLSL_MOUNT_POINT}/measure_luma.comp.hlsl -) - -nbl_create_ext_library_project( - ENVMAP_IMPORTANCE_SAMPLING - "${NBL_EXT_ENVMAP_IMPORTANCE_SAMPLING_H}" - "${NBL_EXT_ENVMAP_IMPORTANCE_SAMPLING_SRC}" - "${NBL_EXT_ENVMAP_IMPORTANCE_SAMPLING_EXTERNAL_INCLUDE}" - "" - NBL_ENVMAP_IMPORTANCE_SAMPLING_HLSL_MOUNT_POINT="${NBL_ENVMAP_IMPORTANCE_SAMPLING_HLSL_MOUNT_POINT}" -) - -target_sources(${LIB_NAME} PRIVATE ${DEPENDS}) -set_source_files_properties(${DEPENDS} PROPERTIES HEADER_FILE_ONLY ON) - - -NBL_CREATE_RESOURCE_ARCHIVE( - NAMESPACE nbl::ext::envmap_importance_sampling::builtin::build - TARGET ${LIB_NAME}_builtinsBuild - LINK_TO ${LIB_NAME} - BIND ${OUTPUT_DIRECTORY} - BUILTINS - common.hlsl - gen_luma.comp.hlsl - gen_warp.comp.hlsl - measure_luma.comp.hlsl - -) - - -add_library(Nabla::ext::EnvmapImportanceSampling ALIAS ${LIB_NAME}) diff --git a/src/nbl/ext/EnvmapImportanceSampling/EnvmapImportanceSampling.cpp b/src/nbl/ext/EnvmapImportanceSampling/EnvmapImportanceSampling.cpp deleted file mode 100644 index f11df5ce15..0000000000 --- a/src/nbl/ext/EnvmapImportanceSampling/EnvmapImportanceSampling.cpp +++ /dev/null @@ -1,426 +0,0 @@ -// Copyright (C) 2018-2020 - DevSH Graphics Programming Sp. z O.O. -// This file is part of the "Nabla Engine". -// For conditions of distribution and use, see copyright notice in nabla.h - -#include "nbl/ext/EnvmapImportanceSampling/EnvmapImportanceSampling.h" - -#include - -using namespace nbl; -using namespace nbl::asset; -using namespace nbl::video; -using namespace ext::EnvmapImportanceSampling; - - -static core::smart_refctd_ptr createTexture(nbl::video::IVideoDriver* _driver, const VkExtent3D extent, E_FORMAT format, uint32_t mipLevels=1u, uint32_t layers=0u) -{ - const auto real_layers = layers ? layers:1u; - - IGPUImage::SCreationParams imgparams; - imgparams.extent = extent; - imgparams.arrayLayers = real_layers; - imgparams.flags = static_cast(0); - imgparams.format = format; - imgparams.mipLevels = mipLevels; - imgparams.samples = IImage::ESCF_1_BIT; - imgparams.type = IImage::ET_2D; - - IGPUImageView::SCreationParams viewparams; - viewparams.flags = static_cast(0); - viewparams.format = format; - viewparams.image = _driver->createDeviceLocalGPUImageOnDedMem(std::move(imgparams)); - viewparams.viewType = layers ? IGPUImageView::ET_2D_ARRAY:IGPUImageView::ET_2D; - viewparams.subresourceRange.aspectMask = static_cast(0); - viewparams.subresourceRange.baseArrayLayer = 0u; - viewparams.subresourceRange.layerCount = real_layers; - viewparams.subresourceRange.baseMipLevel = 0u; - viewparams.subresourceRange.levelCount = mipLevels; - - return _driver->createGPUImageView(std::move(viewparams)); -} - -void EnvmapImportanceSampling::initResources(core::smart_refctd_ptr envmap, uint32_t lumaGenWorkgroupDimension, uint32_t warpMapGenWorkgroupDimension) -{ - const auto EnvmapExtent = envmap->getCreationParameters().image->getCreationParameters().extent; - // we don't need the 1x1 mip for anything - const uint32_t MipCountLuminance = IImage::calculateFullMipPyramidLevelCount(EnvmapExtent,IImage::ET_2D)-1; - const auto EnvMapPoTExtent = [MipCountLuminance]() -> VkExtent3D - { - const uint32_t width = 0x1u<>1u,1u }; - }(); - auto calcWorkgroups = [](uint32_t* workGroups, const VkExtent3D extent, const uint32_t workgroupDimension) - { - for (auto i=0; i<2; i++) - workGroups[i] = ((&extent.width)[i]-1u)/workgroupDimension+1u; - }; - - // TODO: Can we get away with R16_SFLOAT for the probabilities? - m_luminance = createTexture(m_driver,EnvMapPoTExtent,EF_R32_SFLOAT,MipCountLuminance); - calcWorkgroups(m_lumaWorkgroups,EnvMapPoTExtent,lumaGenWorkgroupDimension); - - // default make the warp-map same resolution as input envmap - // Format needs to be 32bit full precision float, because the Jacobian needs to accurately match PDF - const uint32_t upscale = 0; - const VkExtent3D WarpMapExtent = {EnvMapPoTExtent.width<&& pipelineLayout) -> core::smart_refctd_ptr - { - const char* sourceFmt = - R"===(#version 430 core - -#define LUMA_MIP_MAP_GEN_WORKGROUP_DIM %u -#define WARP_MAP_GEN_WORKGROUP_DIM %u - -#include "%s" - -)==="; - - const size_t extraSize = 2u * 8u + 128u; - auto shader = core::make_smart_refctd_ptr(strlen(sourceFmt) + extraSize + 1u); - snprintf( - reinterpret_cast(shader->getPointer()), shader->getSize(), sourceFmt, - lumaGenWorkgroupDimension, - warpMapGenWorkgroupDimension, - shaderPath - ); - auto gpuShader = m_driver->createGPUShader(core::make_smart_refctd_ptr(std::move(shader), ICPUShader::buffer_contains_glsl)); - if (!gpuShader) - return nullptr; - - auto specializedShader = m_driver->createGPUSpecializedShader(gpuShader.get(), ISpecializedShader::SInfo{ nullptr,nullptr,"main",asset::ISpecializedShader::ESS_COMPUTE }); - if (!specializedShader) - return nullptr; - - return m_driver->createGPUComputePipeline(nullptr,std::move(pipelineLayout),std::move(specializedShader)); - }; - - // Create Everything - { - ISampler::SParams samplerParams; - samplerParams.TextureWrapU = samplerParams.TextureWrapV = samplerParams.TextureWrapW = ISampler::ETC_CLAMP_TO_EDGE; - samplerParams.MinFilter = ISampler::ETF_NEAREST; - samplerParams.MaxFilter = ISampler::ETF_LINEAR; - samplerParams.MipmapMode = ISampler::ESMM_NEAREST; - samplerParams.AnisotropicFilter = 0u; - samplerParams.CompareEnable = false; - - IGPUDescriptorSet::SDescriptorInfo lumaDescriptorInfo = {}; - lumaDescriptorInfo.desc = m_luminance; - lumaDescriptorInfo.image.sampler = nullptr; - - { - auto upscaleSampler = m_driver->createGPUSampler(samplerParams); - - constexpr auto lumaDescriptorCount = 3u; - IGPUDescriptorSetLayout::SBinding bindings[lumaDescriptorCount]; - bindings[0].binding = 0u; - bindings[0].type = asset::EDT_COMBINED_IMAGE_SAMPLER; - bindings[0].stageFlags = ISpecializedShader::ESS_COMPUTE; - bindings[0].count = 1u; - bindings[0].samplers = &upscaleSampler; - - bindings[1].binding = 1u; - bindings[1].type = asset::EDT_STORAGE_BUFFER_DYNAMIC; - bindings[1].stageFlags = ISpecializedShader::ESS_COMPUTE; - bindings[1].count = 1u; - - bindings[2].binding = 2u; - bindings[2].type = asset::EDT_STORAGE_IMAGE; - bindings[2].stageFlags = ISpecializedShader::ESS_COMPUTE; - bindings[2].count = 1u; - - auto lumaDSLayout = m_driver->createGPUDescriptorSetLayout(bindings,bindings+lumaDescriptorCount); - { - SPushConstantRange range{ ISpecializedShader::ESS_COMPUTE,0u,sizeof(nbl_glsl_ext_EnvmapSampling_LumaGenShaderData_t) }; - auto lumaPipelineLayout = m_driver->createGPUPipelineLayout(&range,&range+1u,core::smart_refctd_ptr(lumaDSLayout)); - m_lumaMeasurePipeline = genPipeline("nbl/builtin/glsl/ext/EnvmapImportanceSampling/measure_luma.comp",core::smart_refctd_ptr(lumaPipelineLayout)); - m_lumaGenPipeline = genPipeline("nbl/builtin/glsl/ext/EnvmapImportanceSampling/gen_luma.comp",std::move(lumaPipelineLayout)); - } - m_lumaDS = m_driver->createGPUDescriptorSet(std::move(lumaDSLayout)); - - { - IGPUDescriptorSet::SDescriptorInfo envMapDescriptorInfo = {}; - envMapDescriptorInfo.desc = envmap; - envMapDescriptorInfo.image.sampler = nullptr; - envMapDescriptorInfo.image.imageLayout = asset::EIL_SHADER_READ_ONLY_OPTIMAL; - - IGPUDescriptorSet::SDescriptorInfo lumaMeasurementInfo = {}; - lumaMeasurementInfo.desc = core::smart_refctd_ptr(m_driver->getDefaultDownStreamingBuffer()->getBuffer()); - lumaMeasurementInfo.buffer = {0,calcMeasurementBufferSize()}; - - IGPUDescriptorSet::SWriteDescriptorSet writes[lumaDescriptorCount]; - for (auto i=0u; iupdateDescriptorSets(lumaDescriptorCount,writes,0u,nullptr); - } - } - - { - samplerParams.TextureWrapU = samplerParams.TextureWrapV = samplerParams.TextureWrapW = ISampler::ETC_CLAMP_TO_BORDER; - samplerParams.BorderColor = ISampler::ETBC_FLOAT_OPAQUE_BLACK; - samplerParams.MaxFilter = ISampler::ETF_NEAREST; - auto lumaSampler = m_driver->createGPUSampler(samplerParams); - - constexpr auto warpDescriptorCount = 2u; - IGPUDescriptorSetLayout::SBinding bindings[warpDescriptorCount]; - bindings[0].binding = 0u; - bindings[0].type = asset::EDT_COMBINED_IMAGE_SAMPLER; - bindings[0].stageFlags = ISpecializedShader::ESS_COMPUTE; - bindings[0].count = 1; - bindings[0].samplers = &lumaSampler; - - bindings[1].binding = 1u; - bindings[1].type = asset::EDT_STORAGE_IMAGE; - bindings[1].stageFlags = ISpecializedShader::ESS_COMPUTE; - bindings[1].count = 1u; - - auto warpDSLayout = m_driver->createGPUDescriptorSetLayout(bindings,bindings+warpDescriptorCount); - - m_warpPipeline = genPipeline( - "nbl/builtin/glsl/ext/EnvmapImportanceSampling/gen_warpmap.comp", - m_driver->createGPUPipelineLayout(nullptr,nullptr,core::smart_refctd_ptr(warpDSLayout)) - ); - - m_warpDS = m_driver->createGPUDescriptorSet(std::move(warpDSLayout)); - { - IGPUDescriptorSet::SDescriptorInfo warpMapDescriptorInfo = {}; - warpMapDescriptorInfo.desc = m_warpMap; - warpMapDescriptorInfo.image.sampler = nullptr; - warpMapDescriptorInfo.image.imageLayout = asset::EIL_GENERAL; - - IGPUDescriptorSet::SWriteDescriptorSet writes[warpDescriptorCount]; - for (auto i=0u; iupdateDescriptorSets(warpDescriptorCount,writes,0u,nullptr); - } - } - } -} - -void EnvmapImportanceSampling::deinitResources() -{ - m_lumaMeasurePipeline = nullptr; - m_lumaGenPipeline = nullptr; - m_lumaDS = nullptr; - - m_warpPipeline = nullptr; - m_warpDS = nullptr; - - m_warpMap = nullptr; - m_luminance = nullptr; -} - -bool EnvmapImportanceSampling::computeWarpMap(const float envMapRegularizationFactor, float& pdfNormalizationFactor, float& maxEmittanceLuma) -{ - bool enableRIS = false; - // - nbl_glsl_ext_EnvmapSampling_LumaGenShaderData_t pcData = {}; - pcData.luminanceScales.set(0.2126729f, 0.7151522f, 0.0721750f, 0.0f); - { - const auto imageExtent = m_luminance->getCreationParameters().image->getCreationParameters().extent; - pcData.lumaMapResolution = {imageExtent.width,imageExtent.height}; - } - - auto dynamicOffsets = core::make_refctd_dynamic_array>(1u); - auto lumaDispatch = [&](core::smart_refctd_ptr& pipeline,core::smart_refctd_dynamic_array* dynamicOffsets) - { - m_driver->bindComputePipeline(pipeline.get()); - m_driver->bindDescriptorSets(EPBP_COMPUTE,pipeline->getLayout(),0u,1u,&m_lumaDS.get(),dynamicOffsets); - m_driver->pushConstants(pipeline->getLayout(),ICPUSpecializedShader::ESS_COMPUTE,0u,sizeof(pcData),&pcData); - m_driver->dispatch(m_lumaWorkgroups[0],m_lumaWorkgroups[1],1); - }; - - // 3 seconds is a long time - constexpr uint64_t timeoutInNanoSeconds = 300000000000u; - - // Calculate directionality metric (0 uniform, 1 totally unidirectional) and new Regularization Factor. - // Ideally would want a better metric of how "concentrated" the energy is in one direction rather than variance, so it - // turns out that the first order spherical harmonic band and weighted (by luma) average of directions are the same thing. - float directionalityMetric = [&]() - { - maxEmittanceLuma = 0.f; - - const uint32_t size = calcMeasurementBufferSize(); - // remember that without initializing the address to be allocated to invalid_address you won't get an allocation! - auto downloadStagingArea = m_driver->getDefaultDownStreamingBuffer(); - const auto& address = dynamicOffsets->operator[](0) = std::remove_pointer::type::invalid_address; - // allocate - { - // common page size - const uint32_t alignment = 4096u; - const auto waitPoint = std::chrono::high_resolution_clock::now()+std::chrono::nanoseconds(timeoutInNanoSeconds); - auto unallocatedSize = downloadStagingArea->multi_alloc(waitPoint,1u,dynamicOffsets->data(),&size,&alignment); - if (unallocatedSize) - { - os::Printer::log("Could not download the buffer from the GPU!", ELL_ERROR); - return 0.f; - } - } - auto* data = reinterpret_cast(reinterpret_cast(downloadStagingArea->getBufferPointer())+address); - - // measure into buffer - lumaDispatch(m_lumaMeasurePipeline,&dynamicOffsets); - COpenGLExtensionHandler::pGlMemoryBarrier(GL_ALL_BARRIER_BITS); // TODO: rethink when reimplementing in Vulkan - { - // place and wait for download fence - auto downloadFence = m_driver->placeFence(true); - auto result = downloadFence->waitCPU(timeoutInNanoSeconds,true); - // - if (result==E_DRIVER_FENCE_RETVAL::EDFR_TIMEOUT_EXPIRED || result==E_DRIVER_FENCE_RETVAL::EDFR_FAIL) - { - os::Printer::log("Could not download the buffer from the GPU, fence not signalled!", ELL_ERROR); - downloadStagingArea->multi_free(1u,&address,&size,nullptr); - return 0.f; - } - // then invalidate the CPU cache of the mapping - if (downloadStagingArea->needsManualFlushOrInvalidate()) - m_driver->invalidateMappedMemoryRanges({ {downloadStagingArea->getBuffer()->getBoundMemory(),address,size} }); - } - - // reduce - core::vectorSIMDf avgDir; - { - const auto reduction = std::reduce( - data,data+size/sizeof(nbl_glsl_ext_EnvmapSampling_LumaMeasurement_t), - nbl_glsl_ext_EnvmapSampling_LumaMeasurement_t{0.f,0.f,0.f,0.f,0.f}, - [](nbl_glsl_ext_EnvmapSampling_LumaMeasurement_t lhs, const nbl_glsl_ext_EnvmapSampling_LumaMeasurement_t& rhs){ - lhs.xDirSum += rhs.xDirSum; - lhs.yDirSum += rhs.yDirSum; - lhs.zDirSum += rhs.zDirSum; - lhs.weightSum += rhs.weightSum; - if (lhs.maxLumamulti_free(1u,&address,&size,nullptr); - - avgDir /= avgDir.wwww(); - avgDir.w = 0.f; - // should it be length or length squared? - const float directionality = core::length(avgDir)[0]; - std::cout << "Final Luminance Directionality = " << directionality << std::endl; - // the only reason why we'd get a NaN would be because there's literally 0 luminance in the image - return core::isnan(directionality) ? 0.f:directionality; - }(); - - const float regularizationFactor = core::min(envMapRegularizationFactor*directionalityMetric,envMapRegularizationFactor); - std::cout << "New Regularization Factor based on Directionality = " << regularizationFactor << std::endl; - - constexpr float regularizationThreshold = 0.00001f; - enableRIS = regularizationFactor>=regularizationThreshold; - - // Calc Luma again with new Regularization Factor - { - pcData.luminanceScales *= regularizationFactor; - pcData.luminanceScales.w = 1.f-regularizationFactor; - lumaDispatch(m_lumaGenPipeline,&dynamicOffsets); - COpenGLExtensionHandler::pGlMemoryBarrier(GL_ALL_BARRIER_BITS); // TODO: rethink when reimplementing in Vulkan - } - - // Calc Mipmaps - m_luminance->regenerateMipMapLevels(); - - // Download last mip level and get avg from it - { - const auto lumaImage = m_luminance->getCreationParameters().image; - - // - IImage::SBufferCopy copyRegion = {}; - { - copyRegion.bufferRowLength = 0u; - copyRegion.bufferImageHeight = 0u; - //copyRegion.imageSubresource.aspectMask = wait for Vulkan; - copyRegion.imageSubresource.mipLevel = lumaImage->getCreationParameters().mipLevels-1u; - copyRegion.imageSubresource.baseArrayLayer = 0u; - copyRegion.imageSubresource.layerCount = lumaImage->getCreationParameters().arrayLayers; - copyRegion.imageOffset = { 0u,0u,0u }; - const auto extent = lumaImage->getMipSize(copyRegion.imageSubresource.mipLevel); - copyRegion.imageExtent = { extent.x,extent.y,extent.z }; - } - const uint32_t lastMipTexelCount = copyRegion.imageSubresource.layerCount*copyRegion.imageExtent.depth*copyRegion.imageExtent.height*copyRegion.imageExtent.width; - const uint32_t size = lastMipTexelCount*asset::getTexelOrBlockBytesize(lumaImage->getCreationParameters().format); - - // remember that without initializing the address to be allocated to invalid_address you won't get an allocation! - auto downloadStagingArea = m_driver->getDefaultDownStreamingBuffer(); - uint32_t address = std::remove_pointer::type::invalid_address; - // allocate - { - // common page size - const uint32_t alignment = 4096u; - const auto waitPoint = std::chrono::high_resolution_clock::now()+std::chrono::nanoseconds(timeoutInNanoSeconds); - auto unallocatedSize = downloadStagingArea->multi_alloc(waitPoint,1u,&address,&size,&alignment); - if (unallocatedSize) - { - os::Printer::log("Could not download the last luma mip map level from the GPU!", ELL_ERROR); - return core::nan(); - } - } - - // - copyRegion.bufferOffset = address; - m_driver->copyImageToBuffer(lumaImage.get(),downloadStagingArea->getBuffer(),1,©Region); - - // place and wait for download fence - { - auto downloadFence = m_driver->placeFence(true); - auto result = downloadFence->waitCPU(timeoutInNanoSeconds,true); - // - if (result==E_DRIVER_FENCE_RETVAL::EDFR_TIMEOUT_EXPIRED || result==E_DRIVER_FENCE_RETVAL::EDFR_FAIL) - { - os::Printer::log("Could not download the last luma mip map level from the GPU! Fence not Signalled!", ELL_ERROR); - downloadStagingArea->multi_free(1u,&address,&size,nullptr); - return core::nan(); - } - // then invalidate the CPU cache of the mapping - if (downloadStagingArea->needsManualFlushOrInvalidate()) - m_driver->invalidateMappedMemoryRanges({ {downloadStagingArea->getBuffer()->getBoundMemory(),address,size} }); - } - - // - { - const float* r32fData = reinterpret_cast(reinterpret_cast(downloadStagingArea->getBufferPointer())+address); - const auto avgVal = std::reduce(r32fData,r32fData+lastMipTexelCount)/float(lastMipTexelCount); - pdfNormalizationFactor = 1.0/(2.0*core::PI()*core::PI()*avgVal); - } - downloadStagingArea->multi_free(1u,&address,&size,nullptr); - } - - // Generate WarpMap - { - m_driver->bindComputePipeline(m_warpPipeline.get()); - m_driver->bindDescriptorSets(EPBP_COMPUTE,m_warpPipeline->getLayout(),0u,1u,&m_warpDS.get(),nullptr); - m_driver->dispatch(m_warpWorkgroups[0],m_warpWorkgroups[1],1); - COpenGLExtensionHandler::pGlMemoryBarrier(GL_ALL_BARRIER_BITS); // TODO: rethink when reimplementing in Vulkan - } - - return enableRIS; -} - - From fde2bbabad271737d49821b4a439fbe7054373f3 Mon Sep 17 00:00:00 2001 From: kevyuu Date: Sat, 21 Feb 2026 13:53:10 +0700 Subject: [PATCH 40/69] Fix binarySearch implementation. when last is 2x1 we should check for 0,0 and 1,0 not 0,1 --- include/nbl/builtin/hlsl/sampling/hierarchical_image.hlsl | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/include/nbl/builtin/hlsl/sampling/hierarchical_image.hlsl b/include/nbl/builtin/hlsl/sampling/hierarchical_image.hlsl index 10d3fad6f2..c9f861ed36 100644 --- a/include/nbl/builtin/hlsl/sampling/hierarchical_image.hlsl +++ b/include/nbl/builtin/hlsl/sampling/hierarchical_image.hlsl @@ -61,7 +61,7 @@ struct LuminanceMapSampler if (_aspect2x1) { // do one split in the X axis first cause penultimate full mip would have been 2x1 - p.x = choseSecond(_map.get(uint32_t2(0, 0), mip2x1), _map.get(uint32_t2(0, 1), mip2x1), xi.x) ? 1 : 0; + p.x = choseSecond(_map.get(uint32_t2(0, 0), mip2x1), _map.get(uint32_t2(1, 0), mip2x1), xi.x) ? 1 : 0; } for (int i = mip2x1 - 1; i >= 0; i--) @@ -182,6 +182,10 @@ struct HierarchicalImage pdf = abs(warpResult.density / detInterpolJacobian); + // scalar_type luma; + // lumaMap.get(uv, luma); + // pdf = luma * invAvgLuma * warpResult.density; + return warpResult.dst; } }; From 81cae212559183846ed3749557803e9cacb7e4de Mon Sep 17 00:00:00 2001 From: kevyuu Date: Sat, 21 Feb 2026 15:39:48 +0700 Subject: [PATCH 41/69] Rename private member with underscore prefix --- .../hlsl/sampling/hierarchical_image.hlsl | 40 +++++++++---------- 1 file changed, 18 insertions(+), 22 deletions(-) diff --git a/include/nbl/builtin/hlsl/sampling/hierarchical_image.hlsl b/include/nbl/builtin/hlsl/sampling/hierarchical_image.hlsl index c9f861ed36..d0fd02b8f0 100644 --- a/include/nbl/builtin/hlsl/sampling/hierarchical_image.hlsl +++ b/include/nbl/builtin/hlsl/sampling/hierarchical_image.hlsl @@ -120,20 +120,20 @@ struct HierarchicalImage using vector2_type = vector; using vector3_type = vector; using vector4_type = vector; - LuminanceAccessorT lumaMap; - HierarchicalSamplerT warpMap; - uint32_t2 warpSize; - uint32_t2 lastWarpPixel; - scalar_type invAvgLuma; + LuminanceAccessorT _lumaMap; + HierarchicalSamplerT _warpMap; + uint32_t2 _warpSize; + uint32_t2 _lastWarpPixel; + scalar_type _rcpAvgLuma; static HierarchicalImage create(NBL_CONST_REF_ARG(LuminanceAccessorT) lumaMap, NBL_CONST_REF_ARG(HierarchicalSamplerT) warpMap, uint32_t2 warpSize, scalar_type avgLuma) { HierarchicalImage result; - result.lumaMap = lumaMap; - result.warpMap = warpMap; - result.warpSize = warpSize; - result.lastWarpPixel = warpSize - uint32_t2(1, 1); - result.invAvgLuma = ScalarT(1.0) / avgLuma; + result._lumaMap = lumaMap; + result._warpMap = warpMap; + result._warpSize = warpSize; + result._lastWarpPixel = warpSize - uint32_t2(1, 1); + result._rcpAvgLuma = ScalarT(1.0) / avgLuma; return result; } @@ -141,8 +141,8 @@ struct HierarchicalImage { vector2_type envmapUv = PostWarpT::inverseWarp(direction); scalar_type luma; - lumaMap.get(envmapUv, luma); - pdf = (luma * invAvgLuma) * PostWarpT::backwardDensity(direction); + _lumaMap.get(envmapUv, luma); + pdf = (luma * _rcpAvgLuma) * PostWarpT::backwardDensity(direction); return envmapUv; } @@ -150,15 +150,15 @@ struct HierarchicalImage { vector2_type envmapUv = PostWarpT::inverseWarp(direction); scalar_type luma; - lumaMap.get(envmapUv, luma); - return luma * invAvgLuma * PostWarpT::backwardDensity(direction); + _lumaMap.get(envmapUv, luma); + return luma * _rcpAvgLuma * PostWarpT::backwardDensity(direction); } vector3_type generate_and_pdf(NBL_REF_ARG(scalar_type) pdf, NBL_REF_ARG(vector2_type) uv, vector2_type xi) NBL_CONST_MEMBER_FUNC { - const vector2_type texelCoord = xi * float32_t2(lastWarpPixel); + const vector2_type texelCoord = xi * float32_t2(_lastWarpPixel); - matrix uvs = warpMap.sampleUvs(uint32_t2(texelCoord)); + matrix uvs = _warpMap.sampleUvs(uint32_t2(texelCoord)); const vector2_type interpolant = frac(texelCoord); @@ -175,17 +175,13 @@ struct HierarchicalImage const WarpResult warpResult = PostWarpT::warp(uv); - const scalar_type detInterpolJacobian = determinant(transpose(matrix( + const scalar_type detInterpolJacobian = determinant(matrix( lerp(xDiffs[0], xDiffs[1], interpolant.y), // first column dFdx yDiff // second column dFdy - ))) * lastWarpPixel.x * lastWarpPixel.y; + )) * _lastWarpPixel.x * _lastWarpPixel.y; pdf = abs(warpResult.density / detInterpolJacobian); - // scalar_type luma; - // lumaMap.get(uv, luma); - // pdf = luma * invAvgLuma * warpResult.density; - return warpResult.dst; } }; From ba6be938116f485c600c88483d0f9e4e99592c68 Mon Sep 17 00:00:00 2001 From: kevyuu Date: Sun, 22 Feb 2026 11:27:53 +0700 Subject: [PATCH 42/69] Update submodule to follow master branch --- 3rdparty/Vulkan-Headers | 2 +- 3rdparty/Vulkan-Tools | 2 +- 3rdparty/openexr | 2 +- examples_tests | 2 +- 4 files changed, 4 insertions(+), 4 deletions(-) diff --git a/3rdparty/Vulkan-Headers b/3rdparty/Vulkan-Headers index 3dda5a1a87..33d7f51258 160000 --- a/3rdparty/Vulkan-Headers +++ b/3rdparty/Vulkan-Headers @@ -1 +1 @@ -Subproject commit 3dda5a1a87b62fdf3baf4680edc41c00e85a7a22 +Subproject commit 33d7f512583b8de44d1b6384aa1cf482f92e53e9 diff --git a/3rdparty/Vulkan-Tools b/3rdparty/Vulkan-Tools index 4b6f7101c1..761e7bf273 160000 --- a/3rdparty/Vulkan-Tools +++ b/3rdparty/Vulkan-Tools @@ -1 +1 @@ -Subproject commit 4b6f7101c15e09a8931f2f81c97146d0dfe68bc5 +Subproject commit 761e7bf2736f3ad326fdfc1b3c1543f4e669fd5c diff --git a/3rdparty/openexr b/3rdparty/openexr index aaf5f750d7..c8a74d9ac9 160000 --- a/3rdparty/openexr +++ b/3rdparty/openexr @@ -1 +1 @@ -Subproject commit aaf5f750d7a5fd117d79932d209f0e9816cbff1f +Subproject commit c8a74d9ac97dd579a47a7913f361a87349c0fffd diff --git a/examples_tests b/examples_tests index 85d44671d1..b712d1e49c 160000 --- a/examples_tests +++ b/examples_tests @@ -1 +1 @@ -Subproject commit 85d44671d137669ce51d973c8cf76b38dad5a12a +Subproject commit b712d1e49cfc43a0ab3e82d4b6ef689f0e0f0edc From f04d98b361236f21fc76697655019afe3b261fa7 Mon Sep 17 00:00:00 2001 From: kevyuu Date: Mon, 23 Feb 2026 19:36:33 +0700 Subject: [PATCH 43/69] Add missed EnvmapSampler.h and cpp --- include/nbl/core/sampling/EnvmapSampler.h | 148 ++++ src/nbl/core/sampling/EnvmapSampler.cpp | 778 ++++++++++++++++++++++ 2 files changed, 926 insertions(+) create mode 100644 include/nbl/core/sampling/EnvmapSampler.h create mode 100644 src/nbl/core/sampling/EnvmapSampler.cpp diff --git a/include/nbl/core/sampling/EnvmapSampler.h b/include/nbl/core/sampling/EnvmapSampler.h new file mode 100644 index 0000000000..fbd2b8abd0 --- /dev/null +++ b/include/nbl/core/sampling/EnvmapSampler.h @@ -0,0 +1,148 @@ +#ifndef _NBL_CORE_ENVMAP_SAMPLER_INCLUDED_ +#define _NBL_CORE_ENVMAP_SAMPLER_INCLUDED_ + +#include "nbl/video/declarations.h" + +namespace nbl::core +{ + +class NBL_API2 EnvmapSampler final : public core::IReferenceCounted +{ + public: + + static constexpr uint32_t MaxMipCountLuminance = 13u; + static constexpr uint32_t DefaultLumaMipMapGenWorkgroupDimension = 16u; + static constexpr uint32_t DefaultWarpMapGenWorkgroupDimension = 16u; + + struct SCachedCreationParameters + { + core::smart_refctd_ptr utilities; + uint32_t genLumaMapWorkgroupDimension = DefaultLumaMipMapGenWorkgroupDimension; + uint32_t genWarpMapWorkgroupDimension = DefaultWarpMapGenWorkgroupDimension; + }; + + struct SCreationParameters : public SCachedCreationParameters + { + core::smart_refctd_ptr assetManager = nullptr; + core::smart_refctd_ptr envMap = nullptr; + + inline bool validate() const + { + const auto validation = std::to_array + ({ + std::make_pair(bool(assetManager), "Invalid `creationParams.assetManager` is nullptr!"), + std::make_pair(bool(utilities), "Invalid `creationParams.utilities` is nullptr!"), + std::make_pair(bool(envMap), "Invalid `creationParams.envMap` is nullptr!"), + }); + + system::logger_opt_ptr logger = utilities->getLogger(); + for (const auto& [ok, error] : validation) + if (!ok) + { + logger.log(error, system::ILogger::ELL_ERROR); + return false; + } + + assert(bool(assetManager->getSystem())); + + return true; + } + + }; + + static core::smart_refctd_ptr create(SCreationParameters&& params); + + static core::smart_refctd_ptr createGenLumaPipelineLayout(video::ILogicalDevice* device); + + static core::smart_refctd_ptr createGenWarpPipelineLayout(video::ILogicalDevice* device); + + //! mounts the extension's archive to given system - useful if you want to create your own shaders with common header included + static core::smart_refctd_ptr mount(core::smart_refctd_ptr logger, system::ISystem* system, video::ILogicalDevice* device, const std::string_view archiveAlias = ""); + + static core::smart_refctd_ptr createGenLumaPipeline(const SCreationParameters& params, const video::IGPUPipelineLayout* pipelineLayout); + + static core::smart_refctd_ptr createGenWarpPipeline(const SCreationParameters& params, const video::IGPUPipelineLayout* pipelineLayout); + + static core::smart_refctd_ptr createLumaMap(video::ILogicalDevice* device, asset::VkExtent3D extent, uint32_t mipCount, std::string_view debugName = ""); + + static core::smart_refctd_ptr createWarpMap(video::ILogicalDevice* device, asset::VkExtent3D extent, std::string_view debugName = ""); + + void computeWarpMap(video::IQueue* queue); + + // use this to synchronize warp map after computeWarpMap call + nbl::video::IGPUCommandBuffer::SPipelineBarrierDependencyInfo::image_barrier_t getWarpMapBarrier( + core::bitflag dstStageMask, + core::bitflag dstAccessMask, + nbl::video::IGPUImage::LAYOUT oldLayout); + + // use this to synchronize luma map after computeWarpMap call + nbl::video::IGPUCommandBuffer::SPipelineBarrierDependencyInfo::image_barrier_t getLumaMapBarrier( + core::bitflag dstStageMask, + core::bitflag dstAccessMask, + nbl::video::IGPUImage::LAYOUT oldLayout); + + inline core::smart_refctd_ptr getLumaMapView() const + { + return m_lumaMap; + } + + inline core::smart_refctd_ptr getWarpMapView() const + { + return m_warpMap; + } + + inline hlsl::float32_t getAvgLuma() const + { + return m_avgLuma; + } + + protected: + struct ConstructorParams + { + SCachedCreationParameters creationParams; + hlsl::uint32_t2 lumaWorkgroupCount; + hlsl::uint32_t2 warpWorkgroupCount; + core::smart_refctd_ptr lumaMap; + core::smart_refctd_ptr warpMap; + core::smart_refctd_ptr genLumaPipeline; + core::smart_refctd_ptr genLumaDescriptorSet; + core::smart_refctd_ptr genWarpPipeline; + core::smart_refctd_ptr genWarpDescriptorSet; + }; + + explicit EnvmapSampler(ConstructorParams&& params) : + m_cachedCreationParams(std::move(params.creationParams)), + m_lumaWorkgroupCount(params.lumaWorkgroupCount), + m_warpWorkgroupCount(params.warpWorkgroupCount), + m_lumaMap(std::move(params.lumaMap)), + m_warpMap(std::move(params.warpMap)), + m_genLumaPipeline(std::move(params.genLumaPipeline)), + m_genLumaDescriptorSet(std::move(params.genLumaDescriptorSet)), + m_genWarpPipeline(std::move(params.genWarpPipeline)), + m_genWarpDescriptorSet(std::move(params.genWarpDescriptorSet)) + {} + + ~EnvmapSampler() override {} + + private: + + SCachedCreationParameters m_cachedCreationParams; + + hlsl::uint32_t2 m_lumaWorkgroupCount; + hlsl::uint32_t2 m_warpWorkgroupCount; + + hlsl::float32_t m_avgLuma; + + core::smart_refctd_ptr m_lumaMap; + core::smart_refctd_ptr m_warpMap; + + core::smart_refctd_ptr m_genLumaPipeline; + core::smart_refctd_ptr m_genLumaDescriptorSet; + + core::smart_refctd_ptr m_genWarpPipeline; + core::smart_refctd_ptr m_genWarpDescriptorSet; + +}; + +} +#endif diff --git a/src/nbl/core/sampling/EnvmapSampler.cpp b/src/nbl/core/sampling/EnvmapSampler.cpp new file mode 100644 index 0000000000..8d4d968a17 --- /dev/null +++ b/src/nbl/core/sampling/EnvmapSampler.cpp @@ -0,0 +1,778 @@ +#include "nbl/core/sampling/EnvmapSampler.h" +#include "nbl/builtin/hlsl/sampling/hierarchical_image/common.hlsl" +#include "nlohmann/detail/input/parser.hpp" + +using namespace nbl; +using namespace core; +using namespace video; +using namespace system; +using namespace asset; +using namespace hlsl; +using namespace nbl::hlsl::sampling::hierarchical_image; + +namespace nbl::core +{ + +class EnvmapSampler; + +namespace +{ + constexpr std::string_view NBL_EXT_MOUNT_ENTRY = "nbl/core/builtin"; + + // image must have the first mip layout set to transfer src, and the rest to dst + void generateMipmap(video::IGPUCommandBuffer* cmdBuf, IGPUImage* image) + { + const auto mipLevels = image->getCreationParameters().mipLevels; + const auto extent = image->getCreationParameters().extent; + for (uint32_t srcMip_i = 0; srcMip_i < mipLevels-1; srcMip_i++) + { + + const IGPUCommandBuffer::SImageBlit blit = { + .srcMinCoord = {0, 0, 0}, + .srcMaxCoord = {extent.width >> (srcMip_i), extent.height >> (srcMip_i), 1}, + .dstMinCoord = {0, 0, 0}, + .dstMaxCoord = {extent.width >> srcMip_i + 1, extent.height >> srcMip_i + 1, 1}, + .layerCount = 1, + .srcBaseLayer = 0, + .dstBaseLayer = 0, + .srcMipLevel = srcMip_i, + .dstMipLevel = srcMip_i + 1, + .aspectMask = IGPUImage::E_ASPECT_FLAGS::EAF_COLOR_BIT, + }; + cmdBuf->blitImage(image, IImage::LAYOUT::TRANSFER_SRC_OPTIMAL, image, IImage::LAYOUT::TRANSFER_DST_OPTIMAL, { &blit, 1 }, IGPUSampler::E_TEXTURE_FILTER::ETF_LINEAR); + + // last mip no need to transition + if (srcMip_i + 1 == mipLevels - 1) break; + + IGPUCommandBuffer::SPipelineBarrierDependencyInfo::image_barrier_t barrier = { + .barrier = { + .dep = { + .srcStageMask = PIPELINE_STAGE_FLAGS::BLIT_BIT, + .srcAccessMask = ACCESS_FLAGS::TRANSFER_WRITE_BIT, + .dstStageMask = PIPELINE_STAGE_FLAGS::BLIT_BIT, + .dstAccessMask = ACCESS_FLAGS::TRANSFER_READ_BIT + } + }, + .image = image, + .subresourceRange = { + .aspectMask = IImage::EAF_COLOR_BIT, + .baseMipLevel = srcMip_i + 1, + .levelCount = 1, + .baseArrayLayer = 0u, + .layerCount = 1u + }, + .oldLayout = IImage::LAYOUT::TRANSFER_DST_OPTIMAL, + .newLayout = IImage::LAYOUT::TRANSFER_SRC_OPTIMAL, + }; + cmdBuf->pipelineBarrier(E_DEPENDENCY_FLAGS::EDF_NONE, { .imgBarriers = {&barrier, 1} }); + + } + } + + core::smart_refctd_ptr createTexture(video::ILogicalDevice* device, const asset::VkExtent3D extent, E_FORMAT format, uint32_t mipLevels = 1u, uint32_t layers = 0u) + { + const auto real_layers = layers ? layers:1u; + + IGPUImage::SCreationParams imgParams; + imgParams.extent = extent; + imgParams.arrayLayers = real_layers; + imgParams.flags = static_cast(0); + imgParams.format = format; + imgParams.mipLevels = mipLevels; + imgParams.samples = IImage::ESCF_1_BIT; + imgParams.type = IImage::ET_2D; + imgParams.usage = IImage::EUF_STORAGE_BIT | IImage::EUF_TRANSFER_SRC_BIT | IImage::EUF_TRANSFER_DST_BIT | IImage::EUF_SAMPLED_BIT; + const auto image = device->createImage(std::move(imgParams)); + auto imageMemReqs = image->getMemoryReqs(); + imageMemReqs.memoryTypeBits &= device->getPhysicalDevice()->getDeviceLocalMemoryTypeBits(); + device->allocate(imageMemReqs, image.get()); + + IGPUImageView::SCreationParams viewparams; + viewparams.subUsages = IImage::EUF_STORAGE_BIT | IImage::EUF_SAMPLED_BIT; + viewparams.flags = static_cast(0); + viewparams.format = format; + viewparams.image = std::move(image); + viewparams.viewType = layers ? IGPUImageView::ET_2D_ARRAY:IGPUImageView::ET_2D; + viewparams.subresourceRange.aspectMask = IImage::EAF_COLOR_BIT; + viewparams.subresourceRange.baseArrayLayer = 0u; + viewparams.subresourceRange.layerCount = real_layers; + viewparams.subresourceRange.baseMipLevel = 0u; + viewparams.subresourceRange.levelCount = mipLevels; + + return device->createImageView(std::move(viewparams)); + } + + core::smart_refctd_ptr getShaderSource( asset::IAssetManager* assetManager, const char* filePath, system::ILogger* logger) + { + IAssetLoader::SAssetLoadParams lparams = {}; + lparams.logger = logger; + lparams.workingDirectory = NBL_EXT_MOUNT_ENTRY; + auto bundle = assetManager->getAsset(filePath, lparams); + if (bundle.getContents().empty() || bundle.getAssetType()!=IAsset::ET_SHADER) + { + const auto assetType = bundle.getAssetType(); + logger->log("Shader %s not found!", ILogger::ELL_ERROR, filePath); + exit(-1); + } + auto firstAssetInBundle = bundle.getContents()[0]; + return smart_refctd_ptr_static_cast(firstAssetInBundle); + } +} + +core::smart_refctd_ptr EnvmapSampler::create(SCreationParameters&& params) +{ + auto* const logger = params.utilities->getLogger(); + + if (!params.validate()) + { + logger->log("Failed creation parameters validation!", ILogger::ELL_ERROR); + return nullptr; + } + + const auto EnvmapExtent = params.envMap->getCreationParameters().image->getCreationParameters().extent; + // we don't need the 1x1 mip for anything + const uint32_t MipCountLuminance = IImage::calculateFullMipPyramidLevelCount(EnvmapExtent,IImage::ET_2D)-1; + const auto EnvMapPoTExtent = [MipCountLuminance]() -> asset::VkExtent3D + { + const uint32_t width = 0x1u<>1u,1u }; + }(); + auto calcWorkgroupSize = [](const asset::VkExtent3D extent, const uint32_t workgroupDimension) -> uint32_t2 + { + return uint32_t2(extent.width - 1, extent.height - 1) / workgroupDimension + uint32_t2(1); + }; + + const auto device = params.utilities->getLogicalDevice(); + + ConstructorParams constructorParams; + + constructorParams.lumaWorkgroupCount = calcWorkgroupSize(EnvMapPoTExtent, params.genLumaMapWorkgroupDimension); + constructorParams.lumaMap = createLumaMap(device, EnvMapPoTExtent, MipCountLuminance); + + const auto upscale = 0; + const asset::VkExtent3D WarpMapExtent = {EnvMapPoTExtent.width<createDescriptorPoolForDSLayouts(IDescriptorPool::ECF_UPDATE_AFTER_BIND_BIT, genLumaPipelineLayout->getDescriptorSetLayouts()); + const auto genLumaDescriptorSet = genLumaDescriptorPool->createDescriptorSet(core::smart_refctd_ptr(genLumaPipelineLayout->getDescriptorSetLayouts()[0])); + + const auto genWarpPipelineLayout = createGenWarpPipelineLayout(device); + constructorParams.genWarpPipeline = createGenWarpPipeline(params, genWarpPipelineLayout.get()); + const auto genWarpDescriptorPool = device->createDescriptorPoolForDSLayouts(IDescriptorPool::ECF_UPDATE_AFTER_BIND_BIT, genWarpPipelineLayout->getDescriptorSetLayouts()); + const auto genWarpDescriptorSet = genWarpDescriptorPool->createDescriptorSet(core::smart_refctd_ptr(genWarpPipelineLayout->getDescriptorSetLayouts()[0])); + + IGPUDescriptorSet::SDescriptorInfo envMapDescriptorInfo; + envMapDescriptorInfo.desc = params.envMap; + envMapDescriptorInfo.info.image.imageLayout = IImage::LAYOUT::READ_ONLY_OPTIMAL; + + IGPUDescriptorSet::SDescriptorInfo lumaMapGeneralDescriptorInfo; + lumaMapGeneralDescriptorInfo.desc = constructorParams.lumaMap; + lumaMapGeneralDescriptorInfo.info.image.imageLayout = IImage::LAYOUT::GENERAL; + + IGPUDescriptorSet::SDescriptorInfo lumaMapReadDescriptorInfo; + lumaMapReadDescriptorInfo.desc = constructorParams.lumaMap; + lumaMapReadDescriptorInfo.info.image.imageLayout = IImage::LAYOUT::READ_ONLY_OPTIMAL; + + IGPUDescriptorSet::SDescriptorInfo warpMapDescriptorInfo; + warpMapDescriptorInfo.desc = constructorParams.warpMap; + warpMapDescriptorInfo.info.image.imageLayout = IImage::LAYOUT::GENERAL; + + const IGPUDescriptorSet::SWriteDescriptorSet writes[] = { + { + .dstSet = genLumaDescriptorSet.get(), .binding = 0, .count = 1, .info = &envMapDescriptorInfo + }, + { + .dstSet = genLumaDescriptorSet.get(), .binding = 1, .count = 1, .info = &lumaMapGeneralDescriptorInfo + }, + { + .dstSet = genWarpDescriptorSet.get(), .binding = 0, .count = 1, .info = &lumaMapReadDescriptorInfo + }, + { + .dstSet = genWarpDescriptorSet.get(), .binding = 1, .count = 1, .info = &warpMapDescriptorInfo + }, + }; + + device->updateDescriptorSets(writes, {}); + + constructorParams.genLumaDescriptorSet = genLumaDescriptorSet; + constructorParams.genWarpDescriptorSet = genWarpDescriptorSet; + + constructorParams.creationParams = std::move(params); + + return core::smart_refctd_ptr(new EnvmapSampler(std::move(constructorParams))); +} + +core::smart_refctd_ptr EnvmapSampler::createLumaMap(video::ILogicalDevice* device, asset::VkExtent3D extent, uint32_t mipCount, const std::string_view debugName) +{ + return createTexture(device, extent, EF_R32_SFLOAT, mipCount); +} + +core::smart_refctd_ptr EnvmapSampler::createWarpMap(video::ILogicalDevice* device, asset::VkExtent3D extent, const std::string_view debugName) +{ + return createTexture(device, extent, EF_R32G32_SFLOAT); +} + +smart_refctd_ptr EnvmapSampler::mount(core::smart_refctd_ptr logger, ISystem* system, video::ILogicalDevice* device, const std::string_view archiveAlias) +{ + assert(system); + + if (!system) + return nullptr; + + auto archive = make_smart_refctd_ptr(std::string_view("nbl/builtin/hlsl/sampling/hierarchical_image"), smart_refctd_ptr(logger), system); + + system->mount(smart_refctd_ptr(archive), archiveAlias.data()); + return smart_refctd_ptr(archive); +} + +core::smart_refctd_ptr EnvmapSampler::createGenLumaPipeline(const SCreationParameters& params, const video::IGPUPipelineLayout* pipelineLayout) +{ + system::logger_opt_ptr logger = params.utilities->getLogger(); + auto system = smart_refctd_ptr(params.assetManager->getSystem()); + auto* device = params.utilities->getLogicalDevice(); + mount(smart_refctd_ptr(params.utilities->getLogger()), system.get(), params.utilities->getLogicalDevice(), NBL_EXT_MOUNT_ENTRY); + + const auto shaderSource = getShaderSource(params.assetManager.get(), "gen_luma.comp.hlsl", logger.get()); + auto compiler = make_smart_refctd_ptr(smart_refctd_ptr(system)); + CHLSLCompiler::SOptions options = {}; + options.stage = IShader::E_SHADER_STAGE::ESS_COMPUTE; + options.preprocessorOptions.targetSpirvVersion = device->getPhysicalDevice()->getLimits().spirvVersion; + options.spirvOptimizer = nullptr; + +#ifndef _NBL_DEBUG + ISPIRVOptimizer::E_OPTIMIZER_PASS optPasses = ISPIRVOptimizer::EOP_STRIP_DEBUG_INFO; + auto opt = make_smart_refctd_ptr(std::span(&optPasses, 1)); + options.spirvOptimizer = opt.get(); +#else + options.debugInfoFlags |= IShaderCompiler::E_DEBUG_INFO_FLAGS::EDIF_LINE_BIT; +#endif + options.preprocessorOptions.sourceIdentifier = shaderSource->getFilepathHint(); + options.preprocessorOptions.logger = logger.get(); + options.preprocessorOptions.includeFinder = compiler->getDefaultIncludeFinder(); + + const auto workgroupDimStr = std::to_string(params.genLumaMapWorkgroupDimension); + const IShaderCompiler::SMacroDefinition defines[] = { + { "WORKGROUP_DIM", workgroupDimStr.data() }, + }; + + options.preprocessorOptions.extraDefines = defines; + + const auto overridenUnspecialized = compiler->compileToSPIRV((const char*)shaderSource->getContent()->getPointer(), options); + const auto shader = device->compileShader({ overridenUnspecialized.get() }); + if (!shader) + { + logger.log("Could not compile shaders!", ILogger::ELL_ERROR); + return nullptr; + } + + video::IGPUComputePipeline::SCreationParams pipelineParams[1] = {}; + pipelineParams[0].layout = pipelineLayout; + pipelineParams[0].shader = { .shader = shader.get(), .entryPoint = "main" }; + + smart_refctd_ptr pipeline; + params.utilities->getLogicalDevice()->createComputePipelines(nullptr, pipelineParams, &pipeline); + if (!pipeline) + { + logger.log("Could not create pipeline!", ILogger::ELL_ERROR); + return nullptr; + } + + return pipeline; +} + +core::smart_refctd_ptr EnvmapSampler::createGenWarpPipeline(const SCreationParameters& params, const video::IGPUPipelineLayout* pipelineLayout) +{ + system::logger_opt_ptr logger = params.utilities->getLogger(); + auto system = smart_refctd_ptr(params.assetManager->getSystem()); + auto* device = params.utilities->getLogicalDevice(); + mount(smart_refctd_ptr(params.utilities->getLogger()), system.get(), params.utilities->getLogicalDevice(), NBL_EXT_MOUNT_ENTRY); + + const auto shaderSource = getShaderSource(params.assetManager.get(), "gen_warp.comp.hlsl", logger.get()); + auto compiler = make_smart_refctd_ptr(smart_refctd_ptr(system)); + CHLSLCompiler::SOptions options = {}; + options.stage = IShader::E_SHADER_STAGE::ESS_COMPUTE; + options.preprocessorOptions.targetSpirvVersion = device->getPhysicalDevice()->getLimits().spirvVersion; + options.spirvOptimizer = nullptr; + +#ifndef _NBL_DEBUG + ISPIRVOptimizer::E_OPTIMIZER_PASS optPasses = ISPIRVOptimizer::EOP_STRIP_DEBUG_INFO; + auto opt = make_smart_refctd_ptr(std::span(&optPasses, 1)); + options.spirvOptimizer = opt.get(); +#else + options.debugInfoFlags |= IShaderCompiler::E_DEBUG_INFO_FLAGS::EDIF_LINE_BIT; +#endif + options.preprocessorOptions.sourceIdentifier = shaderSource->getFilepathHint(); + options.preprocessorOptions.logger = logger.get(); + options.preprocessorOptions.includeFinder = compiler->getDefaultIncludeFinder(); + + const auto workgroupDimStr = std::to_string(params.genWarpMapWorkgroupDimension); + const IShaderCompiler::SMacroDefinition defines[] = { + { "WORKGROUP_DIM", workgroupDimStr.data() }, + }; + + options.preprocessorOptions.extraDefines = defines; + + const auto overridenUnspecialized = compiler->compileToSPIRV((const char*)shaderSource->getContent()->getPointer(), options); + const auto shader = device->compileShader({ overridenUnspecialized.get() }); + if (!shader) + { + logger.log("Could not compile shaders!", ILogger::ELL_ERROR); + return nullptr; + } + + video::IGPUComputePipeline::SCreationParams pipelineParams[1] = {}; + pipelineParams[0].layout = pipelineLayout; + pipelineParams[0].shader = { .shader = shader.get(), .entryPoint = "main" }; + + smart_refctd_ptr pipeline; + params.utilities->getLogicalDevice()->createComputePipelines(nullptr, pipelineParams, &pipeline); + if (!pipeline) + { + logger.log("Could not create pipeline!", ILogger::ELL_ERROR); + return nullptr; + } + + return pipeline; +} + +core::smart_refctd_ptr < video::IGPUPipelineLayout> EnvmapSampler::createGenLumaPipelineLayout(video::ILogicalDevice* device) +{ + asset::SPushConstantRange pcRange = { + .stageFlags = hlsl::ESS_COMPUTE, + .offset = 0, + .size = sizeof(SLumaGenPushConstants) + }; + + const IGPUDescriptorSetLayout::SBinding bindings[] = { + { + .binding = 0u, + .type = nbl::asset::IDescriptor::E_TYPE::ET_SAMPLED_IMAGE, + .createFlags = IGPUDescriptorSetLayout::SBinding::E_CREATE_FLAGS::ECF_NONE, + .stageFlags = IShader::E_SHADER_STAGE::ESS_COMPUTE, + .count = 1u + }, + { + .binding = 1u, + .type = nbl::asset::IDescriptor::E_TYPE::ET_STORAGE_IMAGE, + .createFlags = IGPUDescriptorSetLayout::SBinding::E_CREATE_FLAGS::ECF_NONE, + .stageFlags = IShader::E_SHADER_STAGE::ESS_COMPUTE, + .count = 1u + } + }; + + const auto setLayout = device->createDescriptorSetLayout(bindings); + return device->createPipelineLayout({ &pcRange, 1 }, setLayout); + +} + +core::smart_refctd_ptr EnvmapSampler::createGenWarpPipelineLayout(video::ILogicalDevice* device) +{ + const IGPUDescriptorSetLayout::SBinding bindings[] = { + { + .binding = 0u, + .type = nbl::asset::IDescriptor::E_TYPE::ET_SAMPLED_IMAGE, + .createFlags = IGPUDescriptorSetLayout::SBinding::E_CREATE_FLAGS::ECF_NONE, + .stageFlags = IShader::E_SHADER_STAGE::ESS_COMPUTE, + .count = 1u, + }, + { + .binding = 1u, + .type = nbl::asset::IDescriptor::E_TYPE::ET_STORAGE_IMAGE, + .createFlags = IGPUDescriptorSetLayout::SBinding::E_CREATE_FLAGS::ECF_NONE, + .stageFlags = IShader::E_SHADER_STAGE::ESS_COMPUTE, + .count = 1u + } + }; + + const auto setLayout = device->createDescriptorSetLayout(bindings); + return device->createPipelineLayout({}, setLayout, nullptr, nullptr, nullptr); +} + +void EnvmapSampler::computeWarpMap(video::IQueue* queue) +{ + const auto logicalDevice = m_cachedCreationParams.utilities->getLogicalDevice(); + + core::smart_refctd_ptr cmdBuf; + { + // commandbuffer should refcount the pool, so it should be 100% legal to drop at the end of the scope + auto gpuCommandPool = logicalDevice->createCommandPool(queue->getFamilyIndex(),IGPUCommandPool::CREATE_FLAGS::TRANSIENT_BIT); + if (!gpuCommandPool) + { + if (auto* logger = logicalDevice->getLogger()) + logger->log("Compute Warpmap: failed to create command pool.", system::ILogger::ELL_ERROR); + return; + } + gpuCommandPool->createCommandBuffers(IGPUCommandPool::BUFFER_LEVEL::PRIMARY, 1u, &cmdBuf); + if (!cmdBuf) + { + if (auto* logger = logicalDevice->getLogger()) + logger->log("Compute Warpmap: failed to create command buffer.", system::ILogger::ELL_ERROR); + return; + } + } + + if (!cmdBuf->begin(IGPUCommandBuffer::USAGE::ONE_TIME_SUBMIT_BIT)) + { + if (auto* logger = logicalDevice->getLogger()) + logger->log("Compute Warpmap: failed to begin command buffer.", system::ILogger::ELL_ERROR); + return; + } + + const auto lumaMapImage = m_lumaMap->getCreationParameters().image.get(); + const auto lumaMapMipLevels = lumaMapImage->getCreationParameters().mipLevels; + const auto lumaMapExtent = lumaMapImage->getCreationParameters().extent; + + const auto warpMapImage = m_warpMap->getCreationParameters().image.get(); + + { + IGPUCommandBuffer::SPipelineBarrierDependencyInfo::image_barrier_t barriers[] = { + { + .barrier = { + .dep = { + .srcStageMask = PIPELINE_STAGE_FLAGS::NONE, + .srcAccessMask = ACCESS_FLAGS::NONE, + .dstStageMask = PIPELINE_STAGE_FLAGS::COMPUTE_SHADER_BIT, + .dstAccessMask = ACCESS_FLAGS::SHADER_WRITE_BITS + } + }, + .image = lumaMapImage, + .subresourceRange = { + .aspectMask = IImage::EAF_COLOR_BIT, + .baseMipLevel = 0u, + .levelCount = lumaMapMipLevels, + .baseArrayLayer = 0u, + .layerCount = 1u + }, + .oldLayout = IImage::LAYOUT::UNDEFINED, + .newLayout = IImage::LAYOUT::GENERAL, + } + }; + cmdBuf->pipelineBarrier(E_DEPENDENCY_FLAGS::EDF_NONE, { .imgBarriers = barriers }); + } + + // Gen Luma Map + { + SLumaGenPushConstants pcData = {}; + pcData.lumaRGBCoefficients = { 0.2126729f, 0.7151522f, 0.0721750f }; + pcData.lumaMapResolution = {lumaMapExtent.width, lumaMapExtent.height}; + + cmdBuf->bindComputePipeline(m_genLumaPipeline.get()); + cmdBuf->pushConstants(m_genLumaPipeline->getLayout(), IShader::E_SHADER_STAGE::ESS_COMPUTE, + 0, sizeof(SLumaGenPushConstants), &pcData); + cmdBuf->bindDescriptorSets(EPBP_COMPUTE, m_genLumaPipeline->getLayout(), + 0, 1, &m_genLumaDescriptorSet.get()); + cmdBuf->dispatch(m_lumaWorkgroupCount.x, m_lumaWorkgroupCount.y, 1); + } + + // Generate luminance mip map + { + IGPUCommandBuffer::SPipelineBarrierDependencyInfo::image_barrier_t barriers[] = { + { + .barrier = { + .dep = { + .srcStageMask = PIPELINE_STAGE_FLAGS::COMPUTE_SHADER_BIT, + .srcAccessMask = ACCESS_FLAGS::SHADER_WRITE_BITS, + .dstStageMask = PIPELINE_STAGE_FLAGS::BLIT_BIT, + .dstAccessMask = ACCESS_FLAGS::TRANSFER_READ_BIT + } + }, + .image = lumaMapImage, + .subresourceRange = { + .aspectMask = IImage::EAF_COLOR_BIT, + .baseMipLevel = 0u, + .levelCount = 1, + .baseArrayLayer = 0u, + .layerCount = 1u + }, + .oldLayout = IImage::LAYOUT::GENERAL, + .newLayout = IImage::LAYOUT::TRANSFER_SRC_OPTIMAL, + }, + { + .barrier = { + .dep = { + .srcStageMask = PIPELINE_STAGE_FLAGS::NONE, + .srcAccessMask = ACCESS_FLAGS::NONE, + .dstStageMask = PIPELINE_STAGE_FLAGS::BLIT_BIT, + .dstAccessMask = ACCESS_FLAGS::TRANSFER_WRITE_BIT + } + }, + .image = lumaMapImage, + .subresourceRange = { + .aspectMask = IImage::EAF_COLOR_BIT, + .baseMipLevel = 1u, + .levelCount = lumaMapMipLevels - 1, + .baseArrayLayer = 0u, + .layerCount = 1u + }, + .oldLayout = IImage::LAYOUT::GENERAL, + .newLayout = IImage::LAYOUT::TRANSFER_DST_OPTIMAL, + } + }; + cmdBuf->pipelineBarrier(E_DEPENDENCY_FLAGS::EDF_NONE, { .imgBarriers = barriers }); + generateMipmap(cmdBuf.get(), lumaMapImage); + } + + core::smart_refctd_ptr lumaTexelBuffer; + const auto lumaMapLastMip = lumaMapMipLevels - 1; + const auto lumaMapLastMipExtent = lumaMapImage->getMipSize(lumaMapLastMip); + const auto lumaMapLastTexelCount = lumaMapLastMipExtent.x * lumaMapLastMipExtent.y * lumaMapLastMipExtent.z; + { + IGPUImage::SBufferCopy region = {}; + region.imageSubresource.aspectMask = IImage::EAF_COLOR_BIT; + region.imageSubresource.mipLevel = lumaMapLastMip; + region.imageSubresource.baseArrayLayer = 0; + region.imageSubresource.layerCount = 1; + region.imageExtent = { lumaMapLastMipExtent.x, lumaMapLastMipExtent.y, lumaMapLastMipExtent.z }; + + IGPUBuffer::SCreationParams bufferCreationParams = {}; + bufferCreationParams.size = lumaMapLastTexelCount * getTexelOrBlockBytesize(EF_R32_SFLOAT); + bufferCreationParams.usage = IGPUBuffer::EUF_TRANSFER_DST_BIT; + lumaTexelBuffer = logicalDevice->createBuffer(std::move(bufferCreationParams)); + if (!lumaTexelBuffer) + { + if (auto* logger = logicalDevice->getLogger()) + logger->log("ScreenShot: failed to create GPU texel buffer.", system::ILogger::ELL_ERROR); + return; + } + auto gpuTexelBufferMemReqs = lumaTexelBuffer->getMemoryReqs(); + gpuTexelBufferMemReqs.memoryTypeBits &= logicalDevice->getPhysicalDevice()->getDownStreamingMemoryTypeBits(); + if (!gpuTexelBufferMemReqs.memoryTypeBits) + { + if (auto* logger = logicalDevice->getLogger()) + logger->log("ScreenShot: no down-streaming memory type for texel buffer.", system::ILogger::ELL_ERROR); + return; + } + auto gpuTexelBufferMem = logicalDevice->allocate(gpuTexelBufferMemReqs, lumaTexelBuffer.get()); + if (!gpuTexelBufferMem.isValid()) + { + if (auto* logger = logicalDevice->getLogger()) + logger->log("ScreenShot: failed to allocate texel buffer memory.", system::ILogger::ELL_ERROR); + return; + } + + IGPUCommandBuffer::SPipelineBarrierDependencyInfo info = {}; + decltype(info)::image_barrier_t barrier = {}; + info.imgBarriers = { &barrier, &barrier + 1 }; + + { + barrier.barrier.dep.srcStageMask = PIPELINE_STAGE_FLAGS::BLIT_BIT; + barrier.barrier.dep.srcAccessMask = ACCESS_FLAGS::TRANSFER_WRITE_BIT; + barrier.barrier.dep.dstStageMask = PIPELINE_STAGE_FLAGS::COPY_BIT; + barrier.barrier.dep.dstAccessMask = ACCESS_FLAGS::TRANSFER_READ_BIT; + barrier.oldLayout = IImage::LAYOUT::TRANSFER_DST_OPTIMAL; + barrier.newLayout = IImage::LAYOUT::TRANSFER_SRC_OPTIMAL; + barrier.image = lumaMapImage; + barrier.subresourceRange.aspectMask = IImage::EAF_COLOR_BIT; + barrier.subresourceRange.baseMipLevel = lumaMapMipLevels - 1; + barrier.subresourceRange.levelCount = 1u; + barrier.subresourceRange.baseArrayLayer = 0; + barrier.subresourceRange.layerCount = 1; + cmdBuf->pipelineBarrier(EDF_NONE,info); + } + cmdBuf->copyImageToBuffer(lumaMapImage,IImage::LAYOUT::TRANSFER_SRC_OPTIMAL,lumaTexelBuffer.get(),1,®ion); + } + + { + IGPUCommandBuffer::SPipelineBarrierDependencyInfo::image_barrier_t barriers[] = { + { + .barrier = { + .dep = { + .srcStageMask = PIPELINE_STAGE_FLAGS::BLIT_BIT, + .srcAccessMask = ACCESS_FLAGS::TRANSFER_READ_BIT, + .dstStageMask = PIPELINE_STAGE_FLAGS::ALL_COMMANDS_BITS, + .dstAccessMask = ACCESS_FLAGS::SHADER_READ_BITS + } + }, + .image = lumaMapImage, + .subresourceRange = { + .aspectMask = IImage::EAF_COLOR_BIT, + .baseMipLevel = 0u, + .levelCount = lumaMapMipLevels - 1, + .baseArrayLayer = 0u, + .layerCount = 1u + }, + .oldLayout = IImage::LAYOUT::TRANSFER_SRC_OPTIMAL, + .newLayout = IImage::LAYOUT::READ_ONLY_OPTIMAL, + }, + { + .barrier = { + .dep = { + .srcStageMask = PIPELINE_STAGE_FLAGS::COPY_BIT, + .srcAccessMask = ACCESS_FLAGS::TRANSFER_READ_BIT, + .dstStageMask = PIPELINE_STAGE_FLAGS::ALL_COMMANDS_BITS, + .dstAccessMask = ACCESS_FLAGS::SHADER_READ_BITS + } + }, + .image = lumaMapImage, + .subresourceRange = { + .aspectMask = IImage::EAF_COLOR_BIT, + .baseMipLevel = lumaMapMipLevels - 1, + .levelCount = 1, + .baseArrayLayer = 0u, + .layerCount = 1u + }, + .oldLayout = IImage::LAYOUT::TRANSFER_SRC_OPTIMAL, + .newLayout = IImage::LAYOUT::READ_ONLY_OPTIMAL, + }, + { + .barrier = { + .dep = { + .srcStageMask = PIPELINE_STAGE_FLAGS::NONE, + .srcAccessMask = ACCESS_FLAGS::NONE, + .dstStageMask = PIPELINE_STAGE_FLAGS::COMPUTE_SHADER_BIT, + .dstAccessMask = ACCESS_FLAGS::SHADER_WRITE_BITS + } + }, + .image = warpMapImage, + .subresourceRange = { + .aspectMask = IImage::EAF_COLOR_BIT, + .baseMipLevel = 0, + .levelCount = 1, + .baseArrayLayer = 0u, + .layerCount = 1u + }, + .oldLayout = IImage::LAYOUT::UNDEFINED, + .newLayout = IImage::LAYOUT::GENERAL, + } + }; + cmdBuf->pipelineBarrier(E_DEPENDENCY_FLAGS::EDF_NONE, { .imgBarriers = barriers }); + cmdBuf->bindComputePipeline(m_genWarpPipeline.get()); + cmdBuf->bindDescriptorSets(EPBP_COMPUTE, m_genWarpPipeline->getLayout(), + 0, 1, &m_genWarpDescriptorSet.get()); + cmdBuf->dispatch(m_warpWorkgroupCount.x, m_warpWorkgroupCount.y, 1); + } + + { + IGPUCommandBuffer::SPipelineBarrierDependencyInfo::image_barrier_t barriers[] = { + { + .barrier = { + .dep = { + .srcStageMask = PIPELINE_STAGE_FLAGS::COMPUTE_SHADER_BIT, + .srcAccessMask = ACCESS_FLAGS::SHADER_WRITE_BITS, + .dstStageMask = PIPELINE_STAGE_FLAGS::ALL_COMMANDS_BITS, + .dstAccessMask = ACCESS_FLAGS::SHADER_READ_BITS + } + }, + .image = warpMapImage, + .subresourceRange = { + .aspectMask = IImage::EAF_COLOR_BIT, + .baseMipLevel = 0, + .levelCount = 1, + .baseArrayLayer = 0u, + .layerCount = 1u + }, + .oldLayout = IImage::LAYOUT::GENERAL, + .newLayout = IImage::LAYOUT::READ_ONLY_OPTIMAL, + } + }; + cmdBuf->pipelineBarrier(E_DEPENDENCY_FLAGS::EDF_NONE, { .imgBarriers = barriers }); + } + + if (!cmdBuf->end()) + { + if (auto* logger = logicalDevice->getLogger()) + logger->log("ScreenShot: failed to end command buffer.", system::ILogger::ELL_ERROR); + return; + } + + { + auto signalSemaphore = logicalDevice->createSemaphore(0); + + IQueue::SSubmitInfo info; + IQueue::SSubmitInfo::SCommandBufferInfo cmdBufferInfo{ cmdBuf.get() }; + IQueue::SSubmitInfo::SSemaphoreInfo signalSemaphoreInfo; + signalSemaphoreInfo.semaphore = signalSemaphore.get(); + signalSemaphoreInfo.value = 1; + signalSemaphoreInfo.stageMask = PIPELINE_STAGE_FLAGS::ALL_COMMANDS_BITS; + info.commandBuffers = { &cmdBufferInfo, &cmdBufferInfo + 1 }; + info.signalSemaphores = { &signalSemaphoreInfo, &signalSemaphoreInfo + 1 }; + + if (auto* logger = logicalDevice->getLogger()) + logger->log("Compute Warpmap: submitting copy command buffer.", system::ILogger::ELL_INFO); + if (queue->submit({ &info, &info + 1}) != IQueue::RESULT::SUCCESS) + { + if (auto* logger = logicalDevice->getLogger()) + logger->log("Compute Warpmap: failed to submit copy command buffer.", system::ILogger::ELL_ERROR); + return; + } + + ISemaphore::SWaitInfo waitInfo{ signalSemaphore.get(), 1u}; + + if (auto* logger = logicalDevice->getLogger()) + logger->log("Compute Warpmap: waiting for copy completion.", system::ILogger::ELL_INFO); + if (logicalDevice->blockForSemaphores({&waitInfo, &waitInfo + 1}) != ISemaphore::WAIT_RESULT::SUCCESS) + { + if (auto* logger = logicalDevice->getLogger()) + logger->log("Compute Warpmap: failed to wait for copy completion.", system::ILogger::ELL_ERROR); + return; + } + + auto* allocation = lumaTexelBuffer->getBoundMemory().memory; + const IDeviceMemoryAllocation::MemoryRange range = { 0u, lumaTexelBuffer->getSize() }; + auto* ptr = reinterpret_cast(allocation->map(range, IDeviceMemoryAllocation::EMCAF_READ)); + + m_avgLuma = std::reduce(ptr, ptr + lumaMapLastTexelCount) / float32_t(lumaMapLastTexelCount); + } +} + +nbl::video::IGPUCommandBuffer::SPipelineBarrierDependencyInfo::image_barrier_t EnvmapSampler::getWarpMapBarrier( + core::bitflag dstStageMask, + core::bitflag dstAccessMask, + nbl::video::IGPUImage::LAYOUT newLayout) +{ + const auto warpMapImage = m_warpMap->getCreationParameters().image.get(); + return { + .barrier = { + .dep = { + .srcStageMask = PIPELINE_STAGE_FLAGS::COMPUTE_SHADER_BIT, + .srcAccessMask = ACCESS_FLAGS::SHADER_WRITE_BITS, + .dstStageMask = dstStageMask, + .dstAccessMask = dstAccessMask + } + }, + .image = warpMapImage, + .subresourceRange = { + .aspectMask = IImage::EAF_COLOR_BIT, + .baseMipLevel = 0, + .levelCount = 1, + .baseArrayLayer = 0u, + .layerCount = 1u + }, + .oldLayout = IImage::LAYOUT::GENERAL, + .newLayout = newLayout, + }; +} + +nbl::video::IGPUCommandBuffer::SPipelineBarrierDependencyInfo::image_barrier_t EnvmapSampler::getLumaMapBarrier( + core::bitflag dstStageMask, + core::bitflag dstAccessMask, + nbl::video::IGPUImage::LAYOUT newLayout) +{ + const auto lumaMapImage = m_lumaMap->getCreationParameters().image.get(); + return { + .barrier = { + .dep = { + .srcStageMask = PIPELINE_STAGE_FLAGS::COMPUTE_SHADER_BIT, + .srcAccessMask = ACCESS_FLAGS::SHADER_READ_BITS, + .dstStageMask = dstStageMask, + .dstAccessMask = dstAccessMask + } + }, + .image = lumaMapImage, + .subresourceRange = { + .aspectMask = IImage::EAF_COLOR_BIT, + .baseMipLevel = 0, + .levelCount = 1, + .baseArrayLayer = 0u, + .layerCount = 1u + }, + .oldLayout = IImage::LAYOUT::READ_ONLY_OPTIMAL, + .newLayout = newLayout, + }; +} + + +} From df2bfc3c61a7291d74ce049a8d4aafc73b467cc8 Mon Sep 17 00:00:00 2001 From: kevyuu Date: Mon, 23 Feb 2026 19:39:27 +0700 Subject: [PATCH 44/69] Rename get and gather to texelFetch and texelGather --- include/nbl/builtin/hlsl/sampling/hierarchical_image.hlsl | 4 ++-- .../builtin/hlsl/sampling/hierarchical_image/accessors.hlsl | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/include/nbl/builtin/hlsl/sampling/hierarchical_image.hlsl b/include/nbl/builtin/hlsl/sampling/hierarchical_image.hlsl index d0fd02b8f0..9a27e11df6 100644 --- a/include/nbl/builtin/hlsl/sampling/hierarchical_image.hlsl +++ b/include/nbl/builtin/hlsl/sampling/hierarchical_image.hlsl @@ -61,13 +61,13 @@ struct LuminanceMapSampler if (_aspect2x1) { // do one split in the X axis first cause penultimate full mip would have been 2x1 - p.x = choseSecond(_map.get(uint32_t2(0, 0), mip2x1), _map.get(uint32_t2(1, 0), mip2x1), xi.x) ? 1 : 0; + p.x = choseSecond(_map.texelFetch(uint32_t2(0, 0), mip2x1), _map.texelFetch(uint32_t2(1, 0), mip2x1), xi.x) ? 1 : 0; } for (int i = mip2x1 - 1; i >= 0; i--) { p <<= 1; - const vector4_type values = _map.gather(p, i); + const vector4_type values = _map.texelGather(p, i); scalar_type wx_0, wx_1; { const scalar_type wy_0 = values[3] + values[2]; diff --git a/include/nbl/builtin/hlsl/sampling/hierarchical_image/accessors.hlsl b/include/nbl/builtin/hlsl/sampling/hierarchical_image/accessors.hlsl index 0ee7423031..304293b93e 100644 --- a/include/nbl/builtin/hlsl/sampling/hierarchical_image/accessors.hlsl +++ b/include/nbl/builtin/hlsl/sampling/hierarchical_image/accessors.hlsl @@ -26,8 +26,8 @@ NBL_CONCEPT_BEGIN(3) #define coord NBL_CONCEPT_PARAM_T NBL_CONCEPT_PARAM_1 #define level NBL_CONCEPT_PARAM_T NBL_CONCEPT_PARAM_2 NBL_CONCEPT_END( - ((NBL_CONCEPT_REQ_EXPR_RET_TYPE)((a.template get(coord,level)) , ::nbl::hlsl::is_same_v, ScalarT)) - ((NBL_CONCEPT_REQ_EXPR_RET_TYPE)((a.template gather(coord,level)) , ::nbl::hlsl::is_same_v, vector)) + ((NBL_CONCEPT_REQ_EXPR_RET_TYPE)((a.template texelFetch(coord,level)) , ::nbl::hlsl::is_same_v, ScalarT)) + ((NBL_CONCEPT_REQ_EXPR_RET_TYPE)((a.template texelGather(coord,level)) , ::nbl::hlsl::is_same_v, vector)) ); #undef level #undef coord From 05b862afda6a9fb978d5b2d117d9361c13a06581 Mon Sep 17 00:00:00 2001 From: kevyuu Date: Mon, 23 Feb 2026 22:17:31 +0700 Subject: [PATCH 45/69] Include missing files into commit --- .../sampling/hierarchical_image/common.hlsl | 26 ++++++++++ .../hierarchical_image/gen_luma.comp.hlsl | 25 ++++++++++ .../hierarchical_image/gen_warp.comp.hlsl | 48 +++++++++++++++++++ 3 files changed, 99 insertions(+) create mode 100644 include/nbl/builtin/hlsl/sampling/hierarchical_image/common.hlsl create mode 100644 include/nbl/builtin/hlsl/sampling/hierarchical_image/gen_luma.comp.hlsl create mode 100644 include/nbl/builtin/hlsl/sampling/hierarchical_image/gen_warp.comp.hlsl diff --git a/include/nbl/builtin/hlsl/sampling/hierarchical_image/common.hlsl b/include/nbl/builtin/hlsl/sampling/hierarchical_image/common.hlsl new file mode 100644 index 0000000000..2f8ad4b019 --- /dev/null +++ b/include/nbl/builtin/hlsl/sampling/hierarchical_image/common.hlsl @@ -0,0 +1,26 @@ +#ifndef _NBL_HLSL_SAMPLING_HIERARCHICAL_IMAGE_COMMON_INCLUDED_ +#define _NBL_HLSL_SAMPLING_HIERARCHICAL_IMAGE_COMMON_INCLUDED_ + +#include "nbl/builtin/hlsl/cpp_compat.hlsl" + +namespace nbl +{ +namespace hlsl +{ +namespace sampling +{ +namespace hierarchical_image +{ + +struct SLumaGenPushConstants +{ + float32_t3 lumaRGBCoefficients; + uint32_t2 lumaMapResolution; +}; + +} +} +} +} + +#endif diff --git a/include/nbl/builtin/hlsl/sampling/hierarchical_image/gen_luma.comp.hlsl b/include/nbl/builtin/hlsl/sampling/hierarchical_image/gen_luma.comp.hlsl new file mode 100644 index 0000000000..f9ff6299b6 --- /dev/null +++ b/include/nbl/builtin/hlsl/sampling/hierarchical_image/gen_luma.comp.hlsl @@ -0,0 +1,25 @@ +#include "common.hlsl" + +using namespace nbl; +using namespace nbl::hlsl; +using namespace nbl::hlsl::sampling::hierarchical_image; + +[[vk::push_constant]] SLumaGenPushConstants pc; + +[[vk::binding(0, 0)]] Texture2D envMap; +[[vk::binding(1, 0)]] RWTexture2D outImage; + +[numthreads(WORKGROUP_DIM, WORKGROUP_DIM, 1)] +[shader("compute")] +void main(uint32_t3 threadID : SV_DispatchThreadID) +{ + if (all(threadID < pc.lumaMapResolution)) + { + + const float uv_y = (float(threadID.y) + float(0.5f)) / pc.lumaMapResolution.y; + const float32_t3 envMapSample = envMap.Load(float32_t3(threadID.xy, 0)); + const float32_t luma = hlsl::dot(envMapSample, pc.lumaRGBCoefficients) * sin(numbers::pi * uv_y); + + outImage[threadID.xy] = luma; + } +} diff --git a/include/nbl/builtin/hlsl/sampling/hierarchical_image/gen_warp.comp.hlsl b/include/nbl/builtin/hlsl/sampling/hierarchical_image/gen_warp.comp.hlsl new file mode 100644 index 0000000000..8c2b2c9bc3 --- /dev/null +++ b/include/nbl/builtin/hlsl/sampling/hierarchical_image/gen_warp.comp.hlsl @@ -0,0 +1,48 @@ +#include "nbl/builtin/hlsl/sampling/hierarchical_image.hlsl" + +[[vk::binding(0, 0)]] Texture2D lumaMap; + +[[vk::binding(1, 0)]] RWTexture2D outImage; + +using namespace nbl; +using namespace nbl::hlsl; +using namespace nbl::hlsl::sampling; + +struct LuminanceAccessor +{ + float32_t texelFetch(uint32_t2 coord, uint32_t level) + { + return lumaMap.Load(uint32_t3(coord, level)); + } + + float32_t4 texelGather(uint32_t2 coord, uint32_t level) + { + return float32_t4( + lumaMap.Load(uint32_t3(coord, level), uint32_t2(0, 1)), + lumaMap.Load(uint32_t3(coord, level), uint32_t2(1, 1)), + lumaMap.Load(uint32_t3(coord, level), uint32_t2(1, 0)), + lumaMap.Load(uint32_t3(coord, level), uint32_t2(0, 0)) + ); + + } +}; + +[numthreads(WORKGROUP_DIM, WORKGROUP_DIM, 1)] +[shader("compute")] +void main(uint32_t3 threadID : SV_DispatchThreadID) +{ + LuminanceAccessor luminanceAccessor; + uint32_t lumaMapWidth, lumaMapHeight; + + lumaMap.GetDimensions(lumaMapWidth, lumaMapHeight); + + using LuminanceSampler = LuminanceMapSampler; + + LuminanceSampler luminanceSampler = + LuminanceSampler::create(luminanceAccessor, uint32_t2(lumaMapWidth, lumaMapHeight), lumaMapWidth != lumaMapHeight, uint32_t2(lumaMapWidth, lumaMapHeight)); + + uint32_t2 pixelCoord = threadID.xy; + + outImage[pixelCoord] = luminanceSampler.binarySearch(pixelCoord); + +} From 1498094f00c39ef32c2e768592a1947bdcebc83a Mon Sep 17 00:00:00 2001 From: kevyuu Date: Tue, 3 Mar 2026 13:40:47 +0700 Subject: [PATCH 46/69] Update comment on sampleUvs --- .../nbl/builtin/hlsl/sampling/hierarchical_image/accessors.hlsl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/nbl/builtin/hlsl/sampling/hierarchical_image/accessors.hlsl b/include/nbl/builtin/hlsl/sampling/hierarchical_image/accessors.hlsl index 304293b93e..7259d85082 100644 --- a/include/nbl/builtin/hlsl/sampling/hierarchical_image/accessors.hlsl +++ b/include/nbl/builtin/hlsl/sampling/hierarchical_image/accessors.hlsl @@ -34,7 +34,7 @@ NBL_CONCEPT_END( #undef a #include -// sampleUvs return 4 UVs in a square to calculate the jacobian matrix +// sampleUvs return 4 UVs in a square for manual bilinear interpolation with differentiability // declare concept #define NBL_CONCEPT_NAME HierarchicalSampler #define NBL_CONCEPT_TPLT_PRM_KINDS (typename)(typename) From 39da42b4fe95de0f9f0a1f10e79eb93612ad2fbf Mon Sep 17 00:00:00 2001 From: kevyuu Date: Tue, 3 Mar 2026 16:51:08 +0700 Subject: [PATCH 47/69] Use bitfield for lumaMapResolution in push constant --- .../nbl/builtin/hlsl/sampling/hierarchical_image/common.hlsl | 3 ++- .../hlsl/sampling/hierarchical_image/gen_luma.comp.hlsl | 4 ++-- src/nbl/core/sampling/EnvmapSampler.cpp | 3 ++- 3 files changed, 6 insertions(+), 4 deletions(-) diff --git a/include/nbl/builtin/hlsl/sampling/hierarchical_image/common.hlsl b/include/nbl/builtin/hlsl/sampling/hierarchical_image/common.hlsl index 2f8ad4b019..37ae9d7f5d 100644 --- a/include/nbl/builtin/hlsl/sampling/hierarchical_image/common.hlsl +++ b/include/nbl/builtin/hlsl/sampling/hierarchical_image/common.hlsl @@ -15,7 +15,8 @@ namespace hierarchical_image struct SLumaGenPushConstants { float32_t3 lumaRGBCoefficients; - uint32_t2 lumaMapResolution; + uint32_t lumaMapWidth : 16; + uint32_t lumaMapHeight : 16; }; } diff --git a/include/nbl/builtin/hlsl/sampling/hierarchical_image/gen_luma.comp.hlsl b/include/nbl/builtin/hlsl/sampling/hierarchical_image/gen_luma.comp.hlsl index f9ff6299b6..7af4434864 100644 --- a/include/nbl/builtin/hlsl/sampling/hierarchical_image/gen_luma.comp.hlsl +++ b/include/nbl/builtin/hlsl/sampling/hierarchical_image/gen_luma.comp.hlsl @@ -13,10 +13,10 @@ using namespace nbl::hlsl::sampling::hierarchical_image; [shader("compute")] void main(uint32_t3 threadID : SV_DispatchThreadID) { - if (all(threadID < pc.lumaMapResolution)) + if (all(threadID.xy < uint32_t2(pc.lumaMapWidth, pc.lumaMapHeight))) { - const float uv_y = (float(threadID.y) + float(0.5f)) / pc.lumaMapResolution.y; + const float uv_y = (float(threadID.y) + float(0.5f)) / pc.lumaMapHeight; const float32_t3 envMapSample = envMap.Load(float32_t3(threadID.xy, 0)); const float32_t luma = hlsl::dot(envMapSample, pc.lumaRGBCoefficients) * sin(numbers::pi * uv_y); diff --git a/src/nbl/core/sampling/EnvmapSampler.cpp b/src/nbl/core/sampling/EnvmapSampler.cpp index 8d4d968a17..e13bcec934 100644 --- a/src/nbl/core/sampling/EnvmapSampler.cpp +++ b/src/nbl/core/sampling/EnvmapSampler.cpp @@ -457,7 +457,8 @@ void EnvmapSampler::computeWarpMap(video::IQueue* queue) { SLumaGenPushConstants pcData = {}; pcData.lumaRGBCoefficients = { 0.2126729f, 0.7151522f, 0.0721750f }; - pcData.lumaMapResolution = {lumaMapExtent.width, lumaMapExtent.height}; + pcData.lumaMapWidth = lumaMapExtent.width; + pcData.lumaMapHeight = lumaMapExtent.height; cmdBuf->bindComputePipeline(m_genLumaPipeline.get()); cmdBuf->pushConstants(m_genLumaPipeline->getLayout(), IShader::E_SHADER_STAGE::ESS_COMPUTE, From 47799ab7971be9530d1a22cdf0d205bf679a35bb Mon Sep 17 00:00:00 2001 From: kevyuu Date: Tue, 3 Mar 2026 16:52:23 +0700 Subject: [PATCH 48/69] Remove NBL_BUILD_ENVMAP_IMPORTANCE_SAMPLING option --- CMakeLists.txt | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 161026137b..41c0df13c8 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -70,7 +70,7 @@ else() message(STATUS "Vulkan SDK is not found") endif() -option(NBL_COMPILE_WITH_CUDA "Compile with CUDA interop?" OFF) +option(NBL_COMPILE_WITH_CUDA "Compile with CUDA interop?" ON) if(NBL_COMPILE_WITH_CUDA) find_package(CUDAToolkit REQUIRED) @@ -181,7 +181,6 @@ option(NBL_BUILD_EXAMPLES "Enable building examples" ON) option(NBL_BUILD_MITSUBA_LOADER "Enable nbl::ext::MitsubaLoader?" ON) option(NBL_BUILD_IMGUI "Enable nbl::ext::ImGui?" ON) option(NBL_BUILD_DEBUG_DRAW "Enable Nabla Debug Draw extension?" ON) -option(NBL_BUILD_ENVMAP_IMPORTANCE_SAMPLING "Enable Nabla Envmap Importance Sampling extension?" ON) option(NBL_BUILD_OPTIX "Enable nbl::ext::OptiX?" OFF) if(NBL_COMPILE_WITH_CUDA) @@ -196,7 +195,7 @@ endif() option(NBL_BUILD_BULLET "Enable Bullet Physics building and integration?" OFF) option(NBL_BUILD_DOCS "Enable building documentation?" OFF) # No one has doxygen installed, plus we dont know when was the last time we generated working doxy and we'll use SphinX in the future option(NBL_ENABLE_PROJECT_JSON_CONFIG_VALIDATION "" ON) -option(NBL_EMBED_BUILTIN_RESOURCES "Embed built-in resources?" OFF) +option(NBL_EMBED_BUILTIN_RESOURCES "Embed built-in resources?" ON) option(NBL_ENABLE_DOCKER_INTEGRATION "Enables docker integration, if client is not found Docker Desktop will be installed" OFF) if (NBL_ENABLE_DOCKER_INTEGRATION) From 0f69171875d74e6402f9991896ac6ea82e305876 Mon Sep 17 00:00:00 2001 From: kevyuu Date: Tue, 3 Mar 2026 18:12:15 +0700 Subject: [PATCH 49/69] Fix worgroup dim for gen_warp --- .../nbl/builtin/hlsl/sampling/hierarchical_image/common.hlsl | 2 ++ .../hlsl/sampling/hierarchical_image/gen_warp.comp.hlsl | 4 +++- 2 files changed, 5 insertions(+), 1 deletion(-) diff --git a/include/nbl/builtin/hlsl/sampling/hierarchical_image/common.hlsl b/include/nbl/builtin/hlsl/sampling/hierarchical_image/common.hlsl index 37ae9d7f5d..a3ce2c19ef 100644 --- a/include/nbl/builtin/hlsl/sampling/hierarchical_image/common.hlsl +++ b/include/nbl/builtin/hlsl/sampling/hierarchical_image/common.hlsl @@ -19,6 +19,8 @@ struct SLumaGenPushConstants uint32_t lumaMapHeight : 16; }; +NBL_CONSTEXPR_INLINE_NSPC_SCOPE_VAR uint32_t GEN_WARP_WORKGROUP_DIM = 16; + } } } diff --git a/include/nbl/builtin/hlsl/sampling/hierarchical_image/gen_warp.comp.hlsl b/include/nbl/builtin/hlsl/sampling/hierarchical_image/gen_warp.comp.hlsl index 8c2b2c9bc3..c5dd3f8fb2 100644 --- a/include/nbl/builtin/hlsl/sampling/hierarchical_image/gen_warp.comp.hlsl +++ b/include/nbl/builtin/hlsl/sampling/hierarchical_image/gen_warp.comp.hlsl @@ -1,4 +1,5 @@ #include "nbl/builtin/hlsl/sampling/hierarchical_image.hlsl" +#include "nbl/builtin/hlsl/sampling/hierarchical_image/common.hlsl" [[vk::binding(0, 0)]] Texture2D lumaMap; @@ -7,6 +8,7 @@ using namespace nbl; using namespace nbl::hlsl; using namespace nbl::hlsl::sampling; +using namespace nbl::hlsl::sampling::hierarchical_image; struct LuminanceAccessor { @@ -27,7 +29,7 @@ struct LuminanceAccessor } }; -[numthreads(WORKGROUP_DIM, WORKGROUP_DIM, 1)] +[numthreads(GEN_WARP_WORKGROUP_DIM, GEN_WARP_WORKGROUP_DIM, 1)] [shader("compute")] void main(uint32_t3 threadID : SV_DispatchThreadID) { From 43b88ef4eb377a63fa61b9a9f8672224b0b386bf Mon Sep 17 00:00:00 2001 From: kevyuu Date: Tue, 3 Mar 2026 18:41:42 +0700 Subject: [PATCH 50/69] Use constant workgroup dimension instead of WORKGROUP_DIM define --- .../nbl/builtin/hlsl/sampling/hierarchical_image/common.hlsl | 1 + .../builtin/hlsl/sampling/hierarchical_image/gen_luma.comp.hlsl | 2 +- 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/include/nbl/builtin/hlsl/sampling/hierarchical_image/common.hlsl b/include/nbl/builtin/hlsl/sampling/hierarchical_image/common.hlsl index a3ce2c19ef..f85571c3f7 100644 --- a/include/nbl/builtin/hlsl/sampling/hierarchical_image/common.hlsl +++ b/include/nbl/builtin/hlsl/sampling/hierarchical_image/common.hlsl @@ -20,6 +20,7 @@ struct SLumaGenPushConstants }; NBL_CONSTEXPR_INLINE_NSPC_SCOPE_VAR uint32_t GEN_WARP_WORKGROUP_DIM = 16; +NBL_CONSTEXPR_INLINE_NSPC_SCOPE_VAR uint32_t GEN_LUMA_WORKGROUP_DIM = 16; } } diff --git a/include/nbl/builtin/hlsl/sampling/hierarchical_image/gen_luma.comp.hlsl b/include/nbl/builtin/hlsl/sampling/hierarchical_image/gen_luma.comp.hlsl index 7af4434864..be3d665bb8 100644 --- a/include/nbl/builtin/hlsl/sampling/hierarchical_image/gen_luma.comp.hlsl +++ b/include/nbl/builtin/hlsl/sampling/hierarchical_image/gen_luma.comp.hlsl @@ -9,7 +9,7 @@ using namespace nbl::hlsl::sampling::hierarchical_image; [[vk::binding(0, 0)]] Texture2D envMap; [[vk::binding(1, 0)]] RWTexture2D outImage; -[numthreads(WORKGROUP_DIM, WORKGROUP_DIM, 1)] +[numthreads(GEN_LUMA_WORKGROUP_DIM, GEN_LUMA_WORKGROUP_DIM, 1)] [shader("compute")] void main(uint32_t3 threadID : SV_DispatchThreadID) { From 9aa31135fab8a35d172ad2316a41356c37397569 Mon Sep 17 00:00:00 2001 From: kevyuu Date: Tue, 3 Mar 2026 18:49:36 +0700 Subject: [PATCH 51/69] Remove passing WORKGROUP_DIM to shader --- src/nbl/core/sampling/EnvmapSampler.cpp | 5 ----- 1 file changed, 5 deletions(-) diff --git a/src/nbl/core/sampling/EnvmapSampler.cpp b/src/nbl/core/sampling/EnvmapSampler.cpp index e13bcec934..01b687dd77 100644 --- a/src/nbl/core/sampling/EnvmapSampler.cpp +++ b/src/nbl/core/sampling/EnvmapSampler.cpp @@ -254,11 +254,6 @@ core::smart_refctd_ptr EnvmapSampler::createGenLumaP options.preprocessorOptions.includeFinder = compiler->getDefaultIncludeFinder(); const auto workgroupDimStr = std::to_string(params.genLumaMapWorkgroupDimension); - const IShaderCompiler::SMacroDefinition defines[] = { - { "WORKGROUP_DIM", workgroupDimStr.data() }, - }; - - options.preprocessorOptions.extraDefines = defines; const auto overridenUnspecialized = compiler->compileToSPIRV((const char*)shaderSource->getContent()->getPointer(), options); const auto shader = device->compileShader({ overridenUnspecialized.get() }); From 16c374ee6bc321a95f674f250a501c211496968c Mon Sep 17 00:00:00 2001 From: kevyuu Date: Wed, 4 Mar 2026 14:11:38 +0700 Subject: [PATCH 52/69] Remove passing WORKGROUP_DIM for gen_luma --- include/nbl/core/sampling/EnvmapSampler.h | 2 -- src/nbl/core/sampling/EnvmapSampler.cpp | 13 ++----------- 2 files changed, 2 insertions(+), 13 deletions(-) diff --git a/include/nbl/core/sampling/EnvmapSampler.h b/include/nbl/core/sampling/EnvmapSampler.h index fbd2b8abd0..d965ee7cfb 100644 --- a/include/nbl/core/sampling/EnvmapSampler.h +++ b/include/nbl/core/sampling/EnvmapSampler.h @@ -17,8 +17,6 @@ class NBL_API2 EnvmapSampler final : public core::IReferenceCounted struct SCachedCreationParameters { core::smart_refctd_ptr utilities; - uint32_t genLumaMapWorkgroupDimension = DefaultLumaMipMapGenWorkgroupDimension; - uint32_t genWarpMapWorkgroupDimension = DefaultWarpMapGenWorkgroupDimension; }; struct SCreationParameters : public SCachedCreationParameters diff --git a/src/nbl/core/sampling/EnvmapSampler.cpp b/src/nbl/core/sampling/EnvmapSampler.cpp index 01b687dd77..ebbe3e4620 100644 --- a/src/nbl/core/sampling/EnvmapSampler.cpp +++ b/src/nbl/core/sampling/EnvmapSampler.cpp @@ -146,12 +146,12 @@ core::smart_refctd_ptr EnvmapSampler::create(SCreationParameters& ConstructorParams constructorParams; - constructorParams.lumaWorkgroupCount = calcWorkgroupSize(EnvMapPoTExtent, params.genLumaMapWorkgroupDimension); + constructorParams.lumaWorkgroupCount = calcWorkgroupSize(EnvMapPoTExtent, GEN_LUMA_WORKGROUP_DIM); constructorParams.lumaMap = createLumaMap(device, EnvMapPoTExtent, MipCountLuminance); const auto upscale = 0; const asset::VkExtent3D WarpMapExtent = {EnvMapPoTExtent.width< EnvmapSampler::createGenLumaP options.preprocessorOptions.logger = logger.get(); options.preprocessorOptions.includeFinder = compiler->getDefaultIncludeFinder(); - const auto workgroupDimStr = std::to_string(params.genLumaMapWorkgroupDimension); - const auto overridenUnspecialized = compiler->compileToSPIRV((const char*)shaderSource->getContent()->getPointer(), options); const auto shader = device->compileShader({ overridenUnspecialized.get() }); if (!shader) @@ -303,13 +301,6 @@ core::smart_refctd_ptr EnvmapSampler::createGenWarpP options.preprocessorOptions.logger = logger.get(); options.preprocessorOptions.includeFinder = compiler->getDefaultIncludeFinder(); - const auto workgroupDimStr = std::to_string(params.genWarpMapWorkgroupDimension); - const IShaderCompiler::SMacroDefinition defines[] = { - { "WORKGROUP_DIM", workgroupDimStr.data() }, - }; - - options.preprocessorOptions.extraDefines = defines; - const auto overridenUnspecialized = compiler->compileToSPIRV((const char*)shaderSource->getContent()->getPointer(), options); const auto shader = device->compileShader({ overridenUnspecialized.get() }); if (!shader) From 126aa21678a5288399f81e8168f11a151304900e Mon Sep 17 00:00:00 2001 From: kevyuu Date: Wed, 4 Mar 2026 14:12:49 +0700 Subject: [PATCH 53/69] Add upsscale parameter for EnvmapSampler --- include/nbl/core/sampling/EnvmapSampler.h | 1 + src/nbl/core/sampling/EnvmapSampler.cpp | 3 +-- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/include/nbl/core/sampling/EnvmapSampler.h b/include/nbl/core/sampling/EnvmapSampler.h index d965ee7cfb..bc1b0495af 100644 --- a/include/nbl/core/sampling/EnvmapSampler.h +++ b/include/nbl/core/sampling/EnvmapSampler.h @@ -23,6 +23,7 @@ class NBL_API2 EnvmapSampler final : public core::IReferenceCounted { core::smart_refctd_ptr assetManager = nullptr; core::smart_refctd_ptr envMap = nullptr; + uint8_t upscaleLog2 = 0; inline bool validate() const { diff --git a/src/nbl/core/sampling/EnvmapSampler.cpp b/src/nbl/core/sampling/EnvmapSampler.cpp index ebbe3e4620..dfb1bd59e3 100644 --- a/src/nbl/core/sampling/EnvmapSampler.cpp +++ b/src/nbl/core/sampling/EnvmapSampler.cpp @@ -149,8 +149,7 @@ core::smart_refctd_ptr EnvmapSampler::create(SCreationParameters& constructorParams.lumaWorkgroupCount = calcWorkgroupSize(EnvMapPoTExtent, GEN_LUMA_WORKGROUP_DIM); constructorParams.lumaMap = createLumaMap(device, EnvMapPoTExtent, MipCountLuminance); - const auto upscale = 0; - const asset::VkExtent3D WarpMapExtent = {EnvMapPoTExtent.width< Date: Wed, 4 Mar 2026 14:13:28 +0700 Subject: [PATCH 54/69] Pass warpMap width and height to luminanceSampler. Check For Out of Bound access --- .../sampling/hierarchical_image/gen_warp.comp.hlsl | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/include/nbl/builtin/hlsl/sampling/hierarchical_image/gen_warp.comp.hlsl b/include/nbl/builtin/hlsl/sampling/hierarchical_image/gen_warp.comp.hlsl index c5dd3f8fb2..a6e8cd65a8 100644 --- a/include/nbl/builtin/hlsl/sampling/hierarchical_image/gen_warp.comp.hlsl +++ b/include/nbl/builtin/hlsl/sampling/hierarchical_image/gen_warp.comp.hlsl @@ -33,18 +33,24 @@ struct LuminanceAccessor [shader("compute")] void main(uint32_t3 threadID : SV_DispatchThreadID) { + uint32_t warpMapWidth, warpMapHeight; + warpMap.GetDimensions(warpMapWidth, warpMapHeight); + + if (threadID.x < warpMapWidth && threadID.y < warpMapHeight) + { LuminanceAccessor luminanceAccessor; uint32_t lumaMapWidth, lumaMapHeight; lumaMap.GetDimensions(lumaMapWidth, lumaMapHeight); - using LuminanceSampler = LuminanceMapSampler; LuminanceSampler luminanceSampler = - LuminanceSampler::create(luminanceAccessor, uint32_t2(lumaMapWidth, lumaMapHeight), lumaMapWidth != lumaMapHeight, uint32_t2(lumaMapWidth, lumaMapHeight)); + LuminanceSampler::create(luminanceAccessor, uint32_t2(lumaMapWidth, lumaMapHeight), lumaMapWidth != lumaMapHeight, uint32_t2(pc.warpMapWidth, pc.warpMapHeight)); uint32_t2 pixelCoord = threadID.xy; outImage[pixelCoord] = luminanceSampler.binarySearch(pixelCoord); + } + } From 7509c83d7bd4b291de3a426de66a9d6444308e38 Mon Sep 17 00:00:00 2001 From: kevyuu Date: Wed, 4 Mar 2026 14:43:40 +0700 Subject: [PATCH 55/69] Small fixes to gen_warp shader --- .../hlsl/sampling/hierarchical_image/gen_warp.comp.hlsl | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/include/nbl/builtin/hlsl/sampling/hierarchical_image/gen_warp.comp.hlsl b/include/nbl/builtin/hlsl/sampling/hierarchical_image/gen_warp.comp.hlsl index a6e8cd65a8..e3bc595b10 100644 --- a/include/nbl/builtin/hlsl/sampling/hierarchical_image/gen_warp.comp.hlsl +++ b/include/nbl/builtin/hlsl/sampling/hierarchical_image/gen_warp.comp.hlsl @@ -34,7 +34,7 @@ struct LuminanceAccessor void main(uint32_t3 threadID : SV_DispatchThreadID) { uint32_t warpMapWidth, warpMapHeight; - warpMap.GetDimensions(warpMapWidth, warpMapHeight); + outImage.GetDimensions(warpMapWidth, warpMapHeight); if (threadID.x < warpMapWidth && threadID.y < warpMapHeight) { @@ -45,7 +45,7 @@ void main(uint32_t3 threadID : SV_DispatchThreadID) using LuminanceSampler = LuminanceMapSampler; LuminanceSampler luminanceSampler = - LuminanceSampler::create(luminanceAccessor, uint32_t2(lumaMapWidth, lumaMapHeight), lumaMapWidth != lumaMapHeight, uint32_t2(pc.warpMapWidth, pc.warpMapHeight)); + LuminanceSampler::create(luminanceAccessor, uint32_t2(lumaMapWidth, lumaMapHeight), lumaMapWidth != lumaMapHeight, uint32_t2(warpMapWidth, warpMapHeight)); uint32_t2 pixelCoord = threadID.xy; From 833b388430f1d40bf8dfb305e04dcebe6f91f3fe Mon Sep 17 00:00:00 2001 From: kevyuu Date: Wed, 4 Mar 2026 15:49:54 +0700 Subject: [PATCH 56/69] gen_warp get image dimension from push constant instead of OpImageQuery. Assert oob access --- .../sampling/hierarchical_image/common.hlsl | 8 ++++++ .../hierarchical_image/gen_warp.comp.hlsl | 27 +++++++++---------- src/nbl/core/sampling/EnvmapSampler.cpp | 18 ++++++++++++- 3 files changed, 38 insertions(+), 15 deletions(-) diff --git a/include/nbl/builtin/hlsl/sampling/hierarchical_image/common.hlsl b/include/nbl/builtin/hlsl/sampling/hierarchical_image/common.hlsl index f85571c3f7..a0fc20f3b0 100644 --- a/include/nbl/builtin/hlsl/sampling/hierarchical_image/common.hlsl +++ b/include/nbl/builtin/hlsl/sampling/hierarchical_image/common.hlsl @@ -19,6 +19,14 @@ struct SLumaGenPushConstants uint32_t lumaMapHeight : 16; }; +struct SWarpGenPushConstants +{ + uint32_t lumaMapWidth : 16; + uint32_t lumaMapHeight : 16; + uint32_t warpMapWidth : 16; + uint32_t warpMapHeight : 16; +}; + NBL_CONSTEXPR_INLINE_NSPC_SCOPE_VAR uint32_t GEN_WARP_WORKGROUP_DIM = 16; NBL_CONSTEXPR_INLINE_NSPC_SCOPE_VAR uint32_t GEN_LUMA_WORKGROUP_DIM = 16; diff --git a/include/nbl/builtin/hlsl/sampling/hierarchical_image/gen_warp.comp.hlsl b/include/nbl/builtin/hlsl/sampling/hierarchical_image/gen_warp.comp.hlsl index e3bc595b10..a7f949dc47 100644 --- a/include/nbl/builtin/hlsl/sampling/hierarchical_image/gen_warp.comp.hlsl +++ b/include/nbl/builtin/hlsl/sampling/hierarchical_image/gen_warp.comp.hlsl @@ -1,51 +1,50 @@ #include "nbl/builtin/hlsl/sampling/hierarchical_image.hlsl" #include "nbl/builtin/hlsl/sampling/hierarchical_image/common.hlsl" -[[vk::binding(0, 0)]] Texture2D lumaMap; - -[[vk::binding(1, 0)]] RWTexture2D outImage; - using namespace nbl; using namespace nbl::hlsl; using namespace nbl::hlsl::sampling; using namespace nbl::hlsl::sampling::hierarchical_image; +[[vk::push_constant]] SWarpGenPushConstants pc; + +[[vk::binding(0, 0)]] Texture2D lumaMap; + +[[vk::binding(1, 0)]] RWTexture2D outImage; + + struct LuminanceAccessor { float32_t texelFetch(uint32_t2 coord, uint32_t level) { + assert(coord.x < pc.warpMapWidth && coord.y < pc.warpMapHeight); return lumaMap.Load(uint32_t3(coord, level)); } float32_t4 texelGather(uint32_t2 coord, uint32_t level) { + assert(coord.x < pc.warpMapWidth - 1 && coord.y < pc.warpMapHeight - 1); return float32_t4( lumaMap.Load(uint32_t3(coord, level), uint32_t2(0, 1)), lumaMap.Load(uint32_t3(coord, level), uint32_t2(1, 1)), lumaMap.Load(uint32_t3(coord, level), uint32_t2(1, 0)), lumaMap.Load(uint32_t3(coord, level), uint32_t2(0, 0)) ); - } + }; [numthreads(GEN_WARP_WORKGROUP_DIM, GEN_WARP_WORKGROUP_DIM, 1)] [shader("compute")] void main(uint32_t3 threadID : SV_DispatchThreadID) { - uint32_t warpMapWidth, warpMapHeight; - outImage.GetDimensions(warpMapWidth, warpMapHeight); - - if (threadID.x < warpMapWidth && threadID.y < warpMapHeight) + if (threadID.x < pc.warpMapWidth && threadID.y < pc.warpMapHeight) { - LuminanceAccessor luminanceAccessor; - uint32_t lumaMapWidth, lumaMapHeight; - - lumaMap.GetDimensions(lumaMapWidth, lumaMapHeight); using LuminanceSampler = LuminanceMapSampler; + LuminanceAccessor luminanceAccessor; LuminanceSampler luminanceSampler = - LuminanceSampler::create(luminanceAccessor, uint32_t2(lumaMapWidth, lumaMapHeight), lumaMapWidth != lumaMapHeight, uint32_t2(warpMapWidth, warpMapHeight)); + LuminanceSampler::create(luminanceAccessor, uint32_t2(pc.lumaMapWidth, pc.lumaMapHeight), pc.lumaMapWidth != pc.lumaMapHeight, uint32_t2(pc.warpMapWidth, pc.warpMapHeight)); uint32_t2 pixelCoord = threadID.xy; diff --git a/src/nbl/core/sampling/EnvmapSampler.cpp b/src/nbl/core/sampling/EnvmapSampler.cpp index dfb1bd59e3..9e76fc5f3b 100644 --- a/src/nbl/core/sampling/EnvmapSampler.cpp +++ b/src/nbl/core/sampling/EnvmapSampler.cpp @@ -355,6 +355,12 @@ core::smart_refctd_ptr < video::IGPUPipelineLayout> EnvmapSampler::createGenLuma core::smart_refctd_ptr EnvmapSampler::createGenWarpPipelineLayout(video::ILogicalDevice* device) { + asset::SPushConstantRange pcRange = { + .stageFlags = hlsl::ESS_COMPUTE, + .offset = 0, + .size = sizeof(SLumaGenPushConstants) + }; + const IGPUDescriptorSetLayout::SBinding bindings[] = { { .binding = 0u, @@ -373,7 +379,7 @@ core::smart_refctd_ptr EnvmapSampler::createGenWarpPi }; const auto setLayout = device->createDescriptorSetLayout(bindings); - return device->createPipelineLayout({}, setLayout, nullptr, nullptr, nullptr); + return device->createPipelineLayout({&pcRange, 1}, setLayout); } void EnvmapSampler::computeWarpMap(video::IQueue* queue) @@ -411,6 +417,7 @@ void EnvmapSampler::computeWarpMap(video::IQueue* queue) const auto lumaMapExtent = lumaMapImage->getCreationParameters().extent; const auto warpMapImage = m_warpMap->getCreationParameters().image.get(); + const auto warpMapExtent = warpMapImage->getCreationParameters().extent; { IGPUCommandBuffer::SPipelineBarrierDependencyInfo::image_barrier_t barriers[] = { @@ -625,9 +632,18 @@ void EnvmapSampler::computeWarpMap(video::IQueue* queue) } }; cmdBuf->pipelineBarrier(E_DEPENDENCY_FLAGS::EDF_NONE, { .imgBarriers = barriers }); + + const SWarpGenPushConstants pcData = { + .lumaMapWidth = lumaMapExtent.width, + .lumaMapHeight = lumaMapExtent.height, + .warpMapWidth = warpMapExtent.width, + .warpMapHeight = warpMapExtent.height + }; cmdBuf->bindComputePipeline(m_genWarpPipeline.get()); cmdBuf->bindDescriptorSets(EPBP_COMPUTE, m_genWarpPipeline->getLayout(), 0, 1, &m_genWarpDescriptorSet.get()); + cmdBuf->pushConstants(m_genLumaPipeline->getLayout(), IShader::E_SHADER_STAGE::ESS_COMPUTE, + 0, sizeof(SLumaGenPushConstants), &pcData); cmdBuf->dispatch(m_warpWorkgroupCount.x, m_warpWorkgroupCount.y, 1); } From ac63441665cfc4aac8e46b723cd72d835bd4ed4b Mon Sep 17 00:00:00 2001 From: kevyuu Date: Wed, 4 Mar 2026 18:35:23 +0700 Subject: [PATCH 57/69] Rename HierarchicalImage to WarpmapSampler --- include/nbl/builtin/hlsl/sampling/hierarchical_image.hlsl | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/include/nbl/builtin/hlsl/sampling/hierarchical_image.hlsl b/include/nbl/builtin/hlsl/sampling/hierarchical_image.hlsl index 9a27e11df6..d6068ca9af 100644 --- a/include/nbl/builtin/hlsl/sampling/hierarchical_image.hlsl +++ b/include/nbl/builtin/hlsl/sampling/hierarchical_image.hlsl @@ -114,7 +114,7 @@ template && hierarchical_image::HierarchicalSampler && concepts::Warp) -struct HierarchicalImage +struct WarpmapSampler { using scalar_type = ScalarT; using vector2_type = vector; @@ -126,9 +126,9 @@ struct HierarchicalImage uint32_t2 _lastWarpPixel; scalar_type _rcpAvgLuma; - static HierarchicalImage create(NBL_CONST_REF_ARG(LuminanceAccessorT) lumaMap, NBL_CONST_REF_ARG(HierarchicalSamplerT) warpMap, uint32_t2 warpSize, scalar_type avgLuma) + static WarpmapSampler create(NBL_CONST_REF_ARG(LuminanceAccessorT) lumaMap, NBL_CONST_REF_ARG(HierarchicalSamplerT) warpMap, uint32_t2 warpSize, scalar_type avgLuma) { - HierarchicalImage result; + WarpmapSampler result; result._lumaMap = lumaMap; result._warpMap = warpMap; result._warpSize = warpSize; From dacf4938a0f095a2b7118eaf17b780a2589be434 Mon Sep 17 00:00:00 2001 From: kevyuu Date: Wed, 4 Mar 2026 18:35:31 +0700 Subject: [PATCH 58/69] Add todo comment for cube map --- .../builtin/hlsl/sampling/hierarchical_image/gen_luma.comp.hlsl | 1 + 1 file changed, 1 insertion(+) diff --git a/include/nbl/builtin/hlsl/sampling/hierarchical_image/gen_luma.comp.hlsl b/include/nbl/builtin/hlsl/sampling/hierarchical_image/gen_luma.comp.hlsl index be3d665bb8..d2f7f9658c 100644 --- a/include/nbl/builtin/hlsl/sampling/hierarchical_image/gen_luma.comp.hlsl +++ b/include/nbl/builtin/hlsl/sampling/hierarchical_image/gen_luma.comp.hlsl @@ -6,6 +6,7 @@ using namespace nbl::hlsl::sampling::hierarchical_image; [[vk::push_constant]] SLumaGenPushConstants pc; +// TODO: Use layer texture, to implement envmap importance sampling for cube map [[vk::binding(0, 0)]] Texture2D envMap; [[vk::binding(1, 0)]] RWTexture2D outImage; From a2b57f9b689ef574b937e2df74f76383a4bf70b8 Mon Sep 17 00:00:00 2001 From: kevyuu Date: Thu, 5 Mar 2026 14:56:23 +0700 Subject: [PATCH 59/69] Move EnvmapSampler from core/sampling to video/sampling --- include/nbl/{core => video}/sampling/EnvmapSampler.h | 2 +- src/nbl/CMakeLists.txt | 4 +++- src/nbl/{core => video}/sampling/EnvmapSampler.cpp | 4 ++-- 3 files changed, 6 insertions(+), 4 deletions(-) rename include/nbl/{core => video}/sampling/EnvmapSampler.h (99%) rename src/nbl/{core => video}/sampling/EnvmapSampler.cpp (99%) diff --git a/include/nbl/core/sampling/EnvmapSampler.h b/include/nbl/video/sampling/EnvmapSampler.h similarity index 99% rename from include/nbl/core/sampling/EnvmapSampler.h rename to include/nbl/video/sampling/EnvmapSampler.h index bc1b0495af..948ec60f16 100644 --- a/include/nbl/core/sampling/EnvmapSampler.h +++ b/include/nbl/video/sampling/EnvmapSampler.h @@ -3,7 +3,7 @@ #include "nbl/video/declarations.h" -namespace nbl::core +namespace nbl::video { class NBL_API2 EnvmapSampler final : public core::IReferenceCounted diff --git a/src/nbl/CMakeLists.txt b/src/nbl/CMakeLists.txt index c359535468..2b8067c1dd 100644 --- a/src/nbl/CMakeLists.txt +++ b/src/nbl/CMakeLists.txt @@ -127,7 +127,6 @@ unset(NABLA_HEADERS_PUBLIC2 ${NBL_TMP_FULL_PATHS}) set(NBL_CORE_SOURCES core/alloc/refctd_memory_resource.cpp core/hash/blake.cpp - core/sampling/EnvmapSampler.cpp ) set(NBL_SYSTEM_SOURCES @@ -293,6 +292,9 @@ set(NBL_VIDEO_SOURCES # CUDA video/CCUDAHandler.cpp video/CCUDADevice.cpp + +# Sampling + video/sampling/EnvmapSampler.cpp ) set(NBL_SCENE_SOURCES diff --git a/src/nbl/core/sampling/EnvmapSampler.cpp b/src/nbl/video/sampling/EnvmapSampler.cpp similarity index 99% rename from src/nbl/core/sampling/EnvmapSampler.cpp rename to src/nbl/video/sampling/EnvmapSampler.cpp index 9e76fc5f3b..a436575da5 100644 --- a/src/nbl/core/sampling/EnvmapSampler.cpp +++ b/src/nbl/video/sampling/EnvmapSampler.cpp @@ -1,4 +1,4 @@ -#include "nbl/core/sampling/EnvmapSampler.h" +#include "nbl/video/sampling/EnvmapSampler.h" #include "nbl/builtin/hlsl/sampling/hierarchical_image/common.hlsl" #include "nlohmann/detail/input/parser.hpp" @@ -10,7 +10,7 @@ using namespace asset; using namespace hlsl; using namespace nbl::hlsl::sampling::hierarchical_image; -namespace nbl::core +namespace nbl::video { class EnvmapSampler; From 3343e64296498d6a4613e3a9f3c0418509f40350 Mon Sep 17 00:00:00 2001 From: kevyuu Date: Thu, 5 Mar 2026 16:19:36 +0700 Subject: [PATCH 60/69] Rename LuminanceSampler to HierarchicalLuminanceSampler --- include/nbl/builtin/hlsl/sampling/hierarchical_image.hlsl | 6 +++--- .../hlsl/sampling/hierarchical_image/gen_warp.comp.hlsl | 2 +- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/include/nbl/builtin/hlsl/sampling/hierarchical_image.hlsl b/include/nbl/builtin/hlsl/sampling/hierarchical_image.hlsl index d6068ca9af..71f871ffe5 100644 --- a/include/nbl/builtin/hlsl/sampling/hierarchical_image.hlsl +++ b/include/nbl/builtin/hlsl/sampling/hierarchical_image.hlsl @@ -22,7 +22,7 @@ template && hierarchical_image::LuminanceReadAccessor ) -struct LuminanceMapSampler +struct HierarchicalLuminanceSampler { using scalar_type = ScalarT; using vector2_type = vector; @@ -33,9 +33,9 @@ struct LuminanceMapSampler uint32_t2 _lastWarpPixel; bool _aspect2x1; - static LuminanceMapSampler create(NBL_CONST_REF_ARG(LuminanceAccessorT) lumaMap, uint32_t2 mapSize, bool aspect2x1, uint32_t2 warpSize) + static HierarchicalLuminanceSampler create(NBL_CONST_REF_ARG(LuminanceAccessorT) lumaMap, uint32_t2 mapSize, bool aspect2x1, uint32_t2 warpSize) { - LuminanceMapSampler result; + HierarchicalLuminanceSampler result; result._map = lumaMap; result._mapSize = mapSize; result._lastWarpPixel = warpSize - uint32_t2(1, 1); diff --git a/include/nbl/builtin/hlsl/sampling/hierarchical_image/gen_warp.comp.hlsl b/include/nbl/builtin/hlsl/sampling/hierarchical_image/gen_warp.comp.hlsl index a7f949dc47..06db01d0d0 100644 --- a/include/nbl/builtin/hlsl/sampling/hierarchical_image/gen_warp.comp.hlsl +++ b/include/nbl/builtin/hlsl/sampling/hierarchical_image/gen_warp.comp.hlsl @@ -40,7 +40,7 @@ void main(uint32_t3 threadID : SV_DispatchThreadID) { if (threadID.x < pc.warpMapWidth && threadID.y < pc.warpMapHeight) { - using LuminanceSampler = LuminanceMapSampler; + using LuminanceSampler = HierarchicalLuminanceSampler; LuminanceAccessor luminanceAccessor; LuminanceSampler luminanceSampler = From 834163a882b9a512584ac2c642c38d9c573fbb36 Mon Sep 17 00:00:00 2001 From: kevyuu Date: Fri, 6 Mar 2026 19:14:33 +0700 Subject: [PATCH 61/69] Fix corner sampling logic in gen_luma and hierarchical_image.hlsl --- .../hlsl/sampling/hierarchical_image.hlsl | 44 ++++++++++++------- .../hierarchical_image/gen_luma.comp.hlsl | 8 +++- 2 files changed, 35 insertions(+), 17 deletions(-) diff --git a/include/nbl/builtin/hlsl/sampling/hierarchical_image.hlsl b/include/nbl/builtin/hlsl/sampling/hierarchical_image.hlsl index 71f871ffe5..6e3088b36c 100644 --- a/include/nbl/builtin/hlsl/sampling/hierarchical_image.hlsl +++ b/include/nbl/builtin/hlsl/sampling/hierarchical_image.hlsl @@ -29,21 +29,24 @@ struct HierarchicalLuminanceSampler using vector4_type = vector; LuminanceAccessorT _map; - uint32_t2 _mapSize; - uint32_t2 _lastWarpPixel; - bool _aspect2x1; + float32_t2 _rcpMapSize; + float32_t2 _rcpWarpSize; + uint16_t _mip2x1 : 15; + uint16_t _aspect2x1 : 1; static HierarchicalLuminanceSampler create(NBL_CONST_REF_ARG(LuminanceAccessorT) lumaMap, uint32_t2 mapSize, bool aspect2x1, uint32_t2 warpSize) { HierarchicalLuminanceSampler result; result._map = lumaMap; - result._mapSize = mapSize; - result._lastWarpPixel = warpSize - uint32_t2(1, 1); - result._aspect2x1 = aspect2x1; + result._rcpMapSize = scalar_type(1.0) / vector2_type(warpSize); + result._rcpWarpSize = scalar_type(1.0) / vector2_type(warpSize - uint32_t2(1, 1)); + // Note: We use mapSize.y here because the currently the map aspect ratio can only be 1x1 or 2x1 + result._mip2x1 = findMSB(mapSize.y); + result._aspect2x1 = aspect2x1; return result; } - static bool choseSecond(scalar_type first, scalar_type second, NBL_REF_ARG(scalar_type) xi) + static bool __choseSecond(scalar_type first, scalar_type second, NBL_REF_ARG(scalar_type) xi) { // numerical resilience against IEEE754 scalar_type dummy = scalar_type(0); @@ -54,17 +57,16 @@ struct HierarchicalLuminanceSampler vector2_type binarySearch(const uint32_t2 coord) { - // We use _lastWarpPixel here for corner sampling - float32_t2 xi = float32_t2(coord)/ _lastWarpPixel; + // We use _rcpWarpSize here for corner sampling. Corner sampling is a sampling mechanism where we map texel_index / map_size to the center of the texel instead of the edge of the texel. So uv.x == 0 is mapped to the center of the left most texel, and uv.x == width - 1 is mapped to the center of the right most texel. That's why the length of the domain is subtracted by 1 for each dimension. + float32_t2 xi = float32_t2(coord) * _rcpWarpSize; uint32_t2 p = uint32_t2(0, 0); - const uint32_t2 mip2x1 = findMSB(_mapSize.y); if (_aspect2x1) { // do one split in the X axis first cause penultimate full mip would have been 2x1 - p.x = choseSecond(_map.texelFetch(uint32_t2(0, 0), mip2x1), _map.texelFetch(uint32_t2(1, 0), mip2x1), xi.x) ? 1 : 0; + p.x = __choseSecond(_map.texelFetch(uint32_t2(0, 0), _mip2x1), _map.texelFetch(uint32_t2(1, 0), _mip2x1), xi.x) ? 1 : 0; } - for (int i = mip2x1 - 1; i >= 0; i--) + for (int i = _mip2x1 - 1; i >= 0; i--) { p <<= 1; const vector4_type values = _map.texelGather(p, i); @@ -72,7 +74,7 @@ struct HierarchicalLuminanceSampler { const scalar_type wy_0 = values[3] + values[2]; const scalar_type wy_1 = values[1] + values[0]; - if (choseSecond(wy_0, wy_1, xi.y)) + if (__choseSecond(wy_0, wy_1, xi.y)) { p.y |= 1; wx_0 = values[0]; @@ -84,13 +86,23 @@ struct HierarchicalLuminanceSampler wx_1 = values[2]; } } - if (choseSecond(wx_0, wx_1, xi.x)) + if (__choseSecond(wx_0, wx_1, xi.x)) p.x |= 1; } - // If we don`t add xi, the sample will clump to the lowest corner of environment map texel. We add xi to simulate uniform distribution within a pixel and make the sample continuous. This is why we compute the pdf not from the normalized luminance of the texel, instead from the reciprocal of the Jacobian. - const vector2_type directionUV = (vector2_type(p.x, p.y) + xi) / vector2_type(_mapSize); + // If we don`t add xi, the sample will clump to the lowest corner of environment map texel. Each time we call PartitionRandVariable(), the output xi is the new xi that determines how left and right(or top and bottom for y axis) to choose the child partition. It means that if for some input xi, the output xi = 0, then the input xi is the edge of choosing this partition and the previous partition, and vice versa, if output xi = 1, then the input xi is the edge of choosing this partition and the next partition. Hence, by adding xi to the lower corner of the texel, we create a gradual transition from one pixel to another. Without adding output xi, the calculation of jacobian using the difference of sample value would not work. + // Since we want to do corner sampling. We have to handle edge texels as corner cases. Remember, in corner sampling we map uv [0,1] to [center of first texel, center of last texel]. So when p is an edge texel, we have to remap xi. [0.5, 1] when p == 0, and [0.5, 1] when p == length - 1. + if (p.x == 0) + xi.x = xi.x * scalar_type(0.5) + scalar_type(0.5); + if (p.y == 0) + xi.y = xi.y * scalar_type(0.5) + scalar_type(0.5); + if (p.x == ) + xi.x = xi.x * scalar_type(0.5) + scalar_type(0.5); + if (p.y == 0) + xi.y = xi.y * scalar_type(0.5) + scalar_type(0.5); + + const vector2_type directionUV = (vector2_type(p.x, p.y) + xi) * _rcpMapSize; return directionUV; } diff --git a/include/nbl/builtin/hlsl/sampling/hierarchical_image/gen_luma.comp.hlsl b/include/nbl/builtin/hlsl/sampling/hierarchical_image/gen_luma.comp.hlsl index d2f7f9658c..e4c5abefd3 100644 --- a/include/nbl/builtin/hlsl/sampling/hierarchical_image/gen_luma.comp.hlsl +++ b/include/nbl/builtin/hlsl/sampling/hierarchical_image/gen_luma.comp.hlsl @@ -19,7 +19,13 @@ void main(uint32_t3 threadID : SV_DispatchThreadID) const float uv_y = (float(threadID.y) + float(0.5f)) / pc.lumaMapHeight; const float32_t3 envMapSample = envMap.Load(float32_t3(threadID.xy, 0)); - const float32_t luma = hlsl::dot(envMapSample, pc.lumaRGBCoefficients) * sin(numbers::pi * uv_y); + float32_t luma = hlsl::dot(envMapSample, pc.lumaRGBCoefficients) * sin(numbers::pi * uv_y); + + // We reduce the luma of the corner texel since we want to do "corner sampling" when generating warp map. + if (threadID.x == 0 || threadID.x == (pc.lumaMapWidth - 1)) + luma *= 0.5f; + if (threadID.y == 0 || threadID.y == (pc.lumaMapHeight - 1)) + luma *= 0.5f; outImage[threadID.xy] = luma; } From 94d1f142a63500f16c85eea6353b0bf3c6115d77 Mon Sep 17 00:00:00 2001 From: kevyuu Date: Fri, 6 Mar 2026 19:23:21 +0700 Subject: [PATCH 62/69] Fix previous commit --- .../builtin/hlsl/sampling/hierarchical_image.hlsl | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/include/nbl/builtin/hlsl/sampling/hierarchical_image.hlsl b/include/nbl/builtin/hlsl/sampling/hierarchical_image.hlsl index 6e3088b36c..e4723f3667 100644 --- a/include/nbl/builtin/hlsl/sampling/hierarchical_image.hlsl +++ b/include/nbl/builtin/hlsl/sampling/hierarchical_image.hlsl @@ -29,8 +29,8 @@ struct HierarchicalLuminanceSampler using vector4_type = vector; LuminanceAccessorT _map; - float32_t2 _rcpMapSize; float32_t2 _rcpWarpSize; + uint16_t2 _mapSize; uint16_t _mip2x1 : 15; uint16_t _aspect2x1 : 1; @@ -38,7 +38,7 @@ struct HierarchicalLuminanceSampler { HierarchicalLuminanceSampler result; result._map = lumaMap; - result._rcpMapSize = scalar_type(1.0) / vector2_type(warpSize); + result._mapSize = vector2_type(mapSize); result._rcpWarpSize = scalar_type(1.0) / vector2_type(warpSize - uint32_t2(1, 1)); // Note: We use mapSize.y here because the currently the map aspect ratio can only be 1x1 or 2x1 result._mip2x1 = findMSB(mapSize.y); @@ -97,12 +97,12 @@ struct HierarchicalLuminanceSampler xi.x = xi.x * scalar_type(0.5) + scalar_type(0.5); if (p.y == 0) xi.y = xi.y * scalar_type(0.5) + scalar_type(0.5); - if (p.x == ) - xi.x = xi.x * scalar_type(0.5) + scalar_type(0.5); - if (p.y == 0) - xi.y = xi.y * scalar_type(0.5) + scalar_type(0.5); + if (p.x == _mapSize.x - 1) + xi.x = xi.x * scalar_type(0.5); + if (p.y == _mapSize.y - 1) + xi.y = xi.y * scalar_type(0.5); - const vector2_type directionUV = (vector2_type(p.x, p.y) + xi) * _rcpMapSize; + const vector2_type directionUV = (vector2_type(p.x, p.y) + xi) / _mapSize; return directionUV; } From 23a7e12fd8006968fa73bd06f7fa5f63a0c5c1f6 Mon Sep 17 00:00:00 2001 From: kevyuu Date: Fri, 6 Mar 2026 19:28:27 +0700 Subject: [PATCH 63/69] Add some todo comment for corner sampling flag --- include/nbl/builtin/hlsl/sampling/hierarchical_image.hlsl | 1 + 1 file changed, 1 insertion(+) diff --git a/include/nbl/builtin/hlsl/sampling/hierarchical_image.hlsl b/include/nbl/builtin/hlsl/sampling/hierarchical_image.hlsl index e4723f3667..a2b5ea1826 100644 --- a/include/nbl/builtin/hlsl/sampling/hierarchical_image.hlsl +++ b/include/nbl/builtin/hlsl/sampling/hierarchical_image.hlsl @@ -17,6 +17,7 @@ namespace hlsl namespace sampling { +// TODO: Add an option for corner sampling or centered sampling as boolean parameter template && From e8930ef548e47dbb80be37cece26e11d2750b769 Mon Sep 17 00:00:00 2001 From: kevyuu Date: Sat, 7 Mar 2026 10:41:35 +0700 Subject: [PATCH 64/69] Add const modifier for binarySearch --- include/nbl/builtin/hlsl/sampling/hierarchical_image.hlsl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/nbl/builtin/hlsl/sampling/hierarchical_image.hlsl b/include/nbl/builtin/hlsl/sampling/hierarchical_image.hlsl index a2b5ea1826..2e59bd1b25 100644 --- a/include/nbl/builtin/hlsl/sampling/hierarchical_image.hlsl +++ b/include/nbl/builtin/hlsl/sampling/hierarchical_image.hlsl @@ -56,7 +56,7 @@ struct HierarchicalLuminanceSampler return partition(xi, dummy); } - vector2_type binarySearch(const uint32_t2 coord) + vector2_type binarySearch(const uint32_t2 coord) NBL_CONST_MEMBER_FUNC { // We use _rcpWarpSize here for corner sampling. Corner sampling is a sampling mechanism where we map texel_index / map_size to the center of the texel instead of the edge of the texel. So uv.x == 0 is mapped to the center of the left most texel, and uv.x == width - 1 is mapped to the center of the right most texel. That's why the length of the domain is subtracted by 1 for each dimension. float32_t2 xi = float32_t2(coord) * _rcpWarpSize; From de4807ff37e0020085bda93ff1e323c9339db4e5 Mon Sep 17 00:00:00 2001 From: kevyuu Date: Sat, 7 Mar 2026 10:46:12 +0700 Subject: [PATCH 65/69] Add some temporary struct from pr #1001 --- .../hlsl/sampling/hierarchical_image.hlsl | 41 +++++++++++++++++++ 1 file changed, 41 insertions(+) diff --git a/include/nbl/builtin/hlsl/sampling/hierarchical_image.hlsl b/include/nbl/builtin/hlsl/sampling/hierarchical_image.hlsl index 2e59bd1b25..911d1dfcea 100644 --- a/include/nbl/builtin/hlsl/sampling/hierarchical_image.hlsl +++ b/include/nbl/builtin/hlsl/sampling/hierarchical_image.hlsl @@ -17,6 +17,47 @@ namespace hlsl namespace sampling { +// TODO(kevinyu): Temporary struct before PR #1001 merged to master +template +struct value_and_rcpPdf +{ + using this_t = value_and_rcpPdf; + + static this_t create(const V _value, const P _rcpPdf) + { + this_t retval; + retval._value = _value; + retval._rcpPdf = _rcpPdf; + return retval; + } + + V value() { return _value; } + P rcpPdf() { return _rcpPdf; } + + V _value; + P _rcpPdf; +}; + +template +struct value_and_pdf +{ + using this_t = value_and_pdf; + + static this_t create(const V _value, const P _pdf) + { + this_t retval; + retval._value = _value; + retval._pdf = _pdf; + return retval; + } + + V value() { return _value; } + P pdf() { return _pdf; } + + V _value; + P _pdf; +}; + // TODO: Add an option for corner sampling or centered sampling as boolean parameter template Date: Tue, 10 Mar 2026 18:38:07 +0700 Subject: [PATCH 66/69] Refactor hierarchical_image naming and concepts --- .../hlsl/sampling/hierarchical_image.hlsl | 167 +++++++++++++----- .../hierarchical_image/accessors.hlsl | 17 +- .../hierarchical_image/gen_warp.comp.hlsl | 27 +-- 3 files changed, 137 insertions(+), 74 deletions(-) diff --git a/include/nbl/builtin/hlsl/sampling/hierarchical_image.hlsl b/include/nbl/builtin/hlsl/sampling/hierarchical_image.hlsl index 911d1dfcea..a23af69d74 100644 --- a/include/nbl/builtin/hlsl/sampling/hierarchical_image.hlsl +++ b/include/nbl/builtin/hlsl/sampling/hierarchical_image.hlsl @@ -64,59 +64,78 @@ template && hierarchical_image::LuminanceReadAccessor ) -struct HierarchicalLuminanceSampler +struct HierarchicalWarpGenerator { using scalar_type = ScalarT; using vector2_type = vector; using vector4_type = vector; + using domain_type = vector2_type; + using codomain_type = vector2_type; + using sample_type = value_and_pdf; + using density_type = scalar_type; LuminanceAccessorT _map; + float32_t _rcpAvgLuma; float32_t2 _rcpWarpSize; uint16_t2 _mapSize; uint16_t _mip2x1 : 15; uint16_t _aspect2x1 : 1; - static HierarchicalLuminanceSampler create(NBL_CONST_REF_ARG(LuminanceAccessorT) lumaMap, uint32_t2 mapSize, bool aspect2x1, uint32_t2 warpSize) + static HierarchicalWarpGenerator create(NBL_CONST_REF_ARG(LuminanceAccessorT) lumaMap, uint32_t2 mapSize, bool aspect2x1) { - HierarchicalLuminanceSampler result; + HierarchicalWarpGenerator result; result._map = lumaMap; - result._mapSize = vector2_type(mapSize); - result._rcpWarpSize = scalar_type(1.0) / vector2_type(warpSize - uint32_t2(1, 1)); + result._mapSize = mapSize; // Note: We use mapSize.y here because the currently the map aspect ratio can only be 1x1 or 2x1 - result._mip2x1 = findMSB(mapSize.y); + result._mip2x1 = _static_cast(findMSB(mapSize.y)); result._aspect2x1 = aspect2x1; return result; } - static bool __choseSecond(scalar_type first, scalar_type second, NBL_REF_ARG(scalar_type) xi) + static bool __choseSecond(scalar_type first, scalar_type second, NBL_REF_ARG(scalar_type) xi, NBL_REF_ARG(scalar_type) rcpPmf) NBL_CONST_MEMBER_FUNC { // numerical resilience against IEEE754 - scalar_type dummy = scalar_type(0); + scalar_type rcpChoiceProb = scalar_type(0); PartitionRandVariable partition; partition.leftProb = scalar_type(1) / (scalar_type(1) + (second / first)); - return partition(xi, dummy); + bool choseSecond = partition(xi, rcpChoiceProb); + rcpPmf *= rcpChoiceProb; + return choseSecond; } - vector2_type binarySearch(const uint32_t2 coord) NBL_CONST_MEMBER_FUNC + // Cannot use textureGather since we need to pass the mipLevel + vector4_type __texelGather(uint32_t2 coord, uint32_t level) NBL_CONST_MEMBER_FUNC + { + assert(coord.x < _mapSize.x - 1 && coord.y < _mapSize.y - 1); + const scalar_type v0, v1, v2, v3; + + return float32_t4( + _map.load(uint32_t3(coord, level), uint32_t2(0, 1)), + _map.load(uint32_t3(coord, level), uint32_t2(1, 1)), + _map.load(uint32_t3(coord, level), uint32_t2(1, 0)), + _map.load(uint32_t3(coord, level), uint32_t2(0, 0)) + ); + } + + sample_type generate(vector2_type xi) NBL_CONST_MEMBER_FUNC { - // We use _rcpWarpSize here for corner sampling. Corner sampling is a sampling mechanism where we map texel_index / map_size to the center of the texel instead of the edge of the texel. So uv.x == 0 is mapped to the center of the left most texel, and uv.x == width - 1 is mapped to the center of the right most texel. That's why the length of the domain is subtracted by 1 for each dimension. - float32_t2 xi = float32_t2(coord) * _rcpWarpSize; uint32_t2 p = uint32_t2(0, 0); + scalar_type rcpPmf = 1; if (_aspect2x1) { // do one split in the X axis first cause penultimate full mip would have been 2x1 - p.x = __choseSecond(_map.texelFetch(uint32_t2(0, 0), _mip2x1), _map.texelFetch(uint32_t2(1, 0), _mip2x1), xi.x) ? 1 : 0; + p.x = __choseSecond(_map.load(uint32_t2(0, 0), _mip2x1), _map.load(uint32_t2(1, 0), _mip2x1), xi.x, rcpPmf) ? 1 : 0; } for (int i = _mip2x1 - 1; i >= 0; i--) { p <<= 1; - const vector4_type values = _map.texelGather(p, i); + const vector4_type values = __texelGather(p, i); scalar_type wx_0, wx_1; { const scalar_type wy_0 = values[3] + values[2]; const scalar_type wy_1 = values[1] + values[0]; - if (__choseSecond(wy_0, wy_1, xi.y)) + if (__choseSecond(wy_0, wy_1, xi.y, rcpPmf)) { p.y |= 1; wx_0 = values[0]; @@ -128,7 +147,7 @@ struct HierarchicalLuminanceSampler wx_1 = values[2]; } } - if (__choseSecond(wx_0, wx_1, xi.x)) + if (__choseSecond(wx_0, wx_1, xi.x, rcpPmf)) p.x |= 1; } @@ -145,24 +164,73 @@ struct HierarchicalLuminanceSampler xi.y = xi.y * scalar_type(0.5); const vector2_type directionUV = (vector2_type(p.x, p.y) + xi) / _mapSize; - return directionUV; + return sample_type::create(directionUV, (_mapSize.x * _mapSize.y) / rcpPmf); + } + + density_type forwardPdf(domain_type xi) NBL_CONST_MEMBER_FUNC + { + return generate(xi).pdf(); } - matrix sampleUvs(uint32_t2 sampleCoord) NBL_CONST_MEMBER_FUNC + // Doesn't comply with sampler concept. This class is extracted so can be used on warpmap generation without passing in unnecessary information like avgLuma. So, need to pass in avgLuma when calculating backwardPdf. + density_type backwardPdf(codomain_type codomainVal, scalar_type rcpAvgLuma) NBL_CONST_MEMBER_FUNC { - const vector2_type dir0 = binarySearch(sampleCoord + vector2_type(0, 1)); - const vector2_type dir1 = binarySearch(sampleCoord + vector2_type(1, 1)); - const vector2_type dir2 = binarySearch(sampleCoord + vector2_type(1, 0)); - const vector2_type dir3 = binarySearch(sampleCoord); - return matrix( - dir0, - dir1, - dir2, - dir3 - ); + return _map.load(codomainVal) * rcpAvgLuma; } + }; +template && + hierarchical_image::LuminanceReadAccessor && + concepts::Warp + ) +struct HierarchicalWarpSampler +{ + using warp_generator_type = HierarchicalWarpGenerator; + using warp_sample_type = typename warp_generator_type::sample_type; + using scalar_type = ScalarT; + using density_type = scalar_type; + using vector2_type = vector; + using vector3_type = vector; + using vector4_type = vector; + using domain_type = vector2_type; + using codomain_type = vector3_type; + using sample_type = value_and_pdf; + + warp_generator_type _warpGenerator; + scalar_type _rcpAvgLuma; + + static HierarchicalWarpSampler create(NBL_CONST_REF_ARG(LuminanceAccessorT) lumaMap, scalar_type avgLuma, uint32_t2 mapSize, bool aspect2x1) + { + HierarchicalWarpSampler result; + result._warpGenerator = warp_generator_type::create(lumaMap, mapSize, aspect2x1); + result._rcpAvgLuma = scalar_type(1.0) / avgLuma; + return result; + } + + sample_type generate(domain_type xi) NBL_CONST_MEMBER_FUNC + { + const warp_sample_type warpSample = _warpGenerator.generate(xi); + const WarpResult postWarpResult = PostWarpT::warp(warpSample.value()); + return sample_type::create(postWarpResult.dst, postWarpResult.density * warpSample.pdf()); + } + + density_type forwardPdf(domain_type xi) NBL_CONST_MEMBER_FUNC + { + const warp_sample_type warpSample = _warpGenerator.generate(xi); + return PostWarpT::forwardDensity(warpSample.value()) * warpSample.pdf(); + } + + density_type backwardPdf(codomain_type codomainVal) NBL_CONST_MEMBER_FUNC + { + return PostWarpT::backwardPdf(codomainVal, _rcpAvgLuma) * _warpGenerator.backwardPdf(codomainVal); + } + +}; + + template && concepts::accessors::GenericReadAccessor && @@ -174,10 +242,15 @@ struct WarpmapSampler using vector2_type = vector; using vector3_type = vector; using vector4_type = vector; + using domain_type = vector2_type; + using codomain_type = vector3_type; + using sample_type = value_and_pdf; + using weight_type = scalar_type; + LuminanceAccessorT _lumaMap; HierarchicalSamplerT _warpMap; uint32_t2 _warpSize; - uint32_t2 _lastWarpPixel; + uint32_t _effectiveWarpArea; scalar_type _rcpAvgLuma; static WarpmapSampler create(NBL_CONST_REF_ARG(LuminanceAccessorT) lumaMap, NBL_CONST_REF_ARG(HierarchicalSamplerT) warpMap, uint32_t2 warpSize, scalar_type avgLuma) @@ -186,21 +259,17 @@ struct WarpmapSampler result._lumaMap = lumaMap; result._warpMap = warpMap; result._warpSize = warpSize; - result._lastWarpPixel = warpSize - uint32_t2(1, 1); + result._effectiveWarpArea = (warpSize.x - 1) * (warpSize.y - 1); result._rcpAvgLuma = ScalarT(1.0) / avgLuma; return result; } - vector2_type inverseWarp_and_deferredPdf(NBL_REF_ARG(scalar_type) pdf, vector3_type direction) NBL_CONST_MEMBER_FUNC - { - vector2_type envmapUv = PostWarpT::inverseWarp(direction); - scalar_type luma; - _lumaMap.get(envmapUv, luma); - pdf = (luma * _rcpAvgLuma) * PostWarpT::backwardDensity(direction); - return envmapUv; - } + weight_type forwardWeight(domain_type xi) NBL_CONST_MEMBER_FUNC + { + return generate(xi).value(); + } - scalar_type deferredPdf(vector3_type direction) NBL_CONST_MEMBER_FUNC + weight_type backwardWeight(codomain_type direction) NBL_CONST_MEMBER_FUNC { vector2_type envmapUv = PostWarpT::inverseWarp(direction); scalar_type luma; @@ -208,13 +277,15 @@ struct WarpmapSampler return luma * _rcpAvgLuma * PostWarpT::backwardDensity(direction); } - vector3_type generate_and_pdf(NBL_REF_ARG(scalar_type) pdf, NBL_REF_ARG(vector2_type) uv, vector2_type xi) NBL_CONST_MEMBER_FUNC + sample_type generate(vector2_type xi) NBL_CONST_MEMBER_FUNC { - const vector2_type texelCoord = xi * float32_t2(_lastWarpPixel); - - matrix uvs = _warpMap.sampleUvs(uint32_t2(texelCoord)); - + // Need to remap xi from [0,1] to [center of first texel, center of second texel] due to corner sampling + const vector2_type texelCoord = xi * (_warpSize - uint32_t2(1, 1)); const vector2_type interpolant = frac(texelCoord); + const vector2_type warpmapUv = (texelCoord + vector2_type(0.5)) / _warpSize; + matrix uvs; + _warpMap.gatherUv(warpmapUv, uvs); + const vector2_type xDiffs[] = { uvs[2] - uvs[3], @@ -225,18 +296,18 @@ struct WarpmapSampler xDiffs[1] * interpolant.x + uvs[0] }; const vector2_type yDiff = yVals[1] - yVals[0]; - uv = yDiff * interpolant.y + yVals[0]; + vector2_type uv = yDiff * interpolant.y + yVals[0]; const WarpResult warpResult = PostWarpT::warp(uv); const scalar_type detInterpolJacobian = determinant(matrix( lerp(xDiffs[0], xDiffs[1], interpolant.y), // first column dFdx yDiff // second column dFdy - )) * _lastWarpPixel.x * _lastWarpPixel.y; + )) * _effectiveWarpArea; - pdf = abs(warpResult.density / detInterpolJacobian); + const scalar_type pdf = abs(warpResult.density / detInterpolJacobian); - return warpResult.dst; + return sample_type::create(warpResult.dst, pdf); } }; diff --git a/include/nbl/builtin/hlsl/sampling/hierarchical_image/accessors.hlsl b/include/nbl/builtin/hlsl/sampling/hierarchical_image/accessors.hlsl index 7259d85082..1d8dc7c941 100644 --- a/include/nbl/builtin/hlsl/sampling/hierarchical_image/accessors.hlsl +++ b/include/nbl/builtin/hlsl/sampling/hierarchical_image/accessors.hlsl @@ -26,29 +26,30 @@ NBL_CONCEPT_BEGIN(3) #define coord NBL_CONCEPT_PARAM_T NBL_CONCEPT_PARAM_1 #define level NBL_CONCEPT_PARAM_T NBL_CONCEPT_PARAM_2 NBL_CONCEPT_END( - ((NBL_CONCEPT_REQ_EXPR_RET_TYPE)((a.template texelFetch(coord,level)) , ::nbl::hlsl::is_same_v, ScalarT)) - ((NBL_CONCEPT_REQ_EXPR_RET_TYPE)((a.template texelGather(coord,level)) , ::nbl::hlsl::is_same_v, vector)) + ((NBL_CONCEPT_REQ_EXPR_RET_TYPE)((a.template load(coord,level)) , ::nbl::hlsl::is_same_v, ScalarT)) ); #undef level #undef coord #undef a #include -// sampleUvs return 4 UVs in a square for manual bilinear interpolation with differentiability +// gatherUvs return 4 UVs in a square for manual bilinear interpolation with differentiability // declare concept -#define NBL_CONCEPT_NAME HierarchicalSampler +#define NBL_CONCEPT_NAME WarpAccessor #define NBL_CONCEPT_TPLT_PRM_KINDS (typename)(typename) -#define NBL_CONCEPT_TPLT_PRM_NAMES (HierarchicalSamplerT)(ScalarT) +#define NBL_CONCEPT_TPLT_PRM_NAMES (WarpAccessorT)(ScalarT) // not the greatest syntax but works -#define NBL_CONCEPT_PARAM_0 (sampler,HierarchicalSamplerT) +#define NBL_CONCEPT_PARAM_0 (sampler,WarpAccessorT) #define NBL_CONCEPT_PARAM_1 (coord,vector) +#define NBL_CONCEPT_PARAM_2 (val, matrix) // start concept -NBL_CONCEPT_BEGIN(2) +NBL_CONCEPT_BEGIN(3) // need to be defined AFTER the concept begins #define sampler NBL_CONCEPT_PARAM_T NBL_CONCEPT_PARAM_0 #define coord NBL_CONCEPT_PARAM_T NBL_CONCEPT_PARAM_1 +#define val NBL_CONCEPT_PARAM_T NBL_CONCEPT_PARAM_2 NBL_CONCEPT_END( - ((NBL_CONCEPT_REQ_EXPR_RET_TYPE)((sampler.template sampleUvs(coord)) , ::nbl::hlsl::is_same_v, matrix)) + ((NBL_CONCEPT_REQ_EXPR_RET_TYPE)((sampler.gatherUv(coord), val , ::nbl::hlsl::is_same_v, void)) ); #undef sampler #undef coord diff --git a/include/nbl/builtin/hlsl/sampling/hierarchical_image/gen_warp.comp.hlsl b/include/nbl/builtin/hlsl/sampling/hierarchical_image/gen_warp.comp.hlsl index 06db01d0d0..5dac9da7ac 100644 --- a/include/nbl/builtin/hlsl/sampling/hierarchical_image/gen_warp.comp.hlsl +++ b/include/nbl/builtin/hlsl/sampling/hierarchical_image/gen_warp.comp.hlsl @@ -15,23 +15,12 @@ using namespace nbl::hlsl::sampling::hierarchical_image; struct LuminanceAccessor { - float32_t texelFetch(uint32_t2 coord, uint32_t level) + float32_t load(uint32_t2 coord, uint32_t level) NBL_CONST_MEMBER_FUNC { assert(coord.x < pc.warpMapWidth && coord.y < pc.warpMapHeight); return lumaMap.Load(uint32_t3(coord, level)); } - float32_t4 texelGather(uint32_t2 coord, uint32_t level) - { - assert(coord.x < pc.warpMapWidth - 1 && coord.y < pc.warpMapHeight - 1); - return float32_t4( - lumaMap.Load(uint32_t3(coord, level), uint32_t2(0, 1)), - lumaMap.Load(uint32_t3(coord, level), uint32_t2(1, 1)), - lumaMap.Load(uint32_t3(coord, level), uint32_t2(1, 0)), - lumaMap.Load(uint32_t3(coord, level), uint32_t2(0, 0)) - ); - } - }; [numthreads(GEN_WARP_WORKGROUP_DIM, GEN_WARP_WORKGROUP_DIM, 1)] @@ -40,15 +29,17 @@ void main(uint32_t3 threadID : SV_DispatchThreadID) { if (threadID.x < pc.warpMapWidth && threadID.y < pc.warpMapHeight) { - using LuminanceSampler = HierarchicalLuminanceSampler; + using WarpGenerator = HierarchicalWarpGenerator; + + const LuminanceAccessor luminanceAccessor; + + const WarpGenerator warpGenerator = WarpGenerator::create(luminanceAccessor, uint32_t2(pc.lumaMapWidth, pc.lumaMapHeight), pc.lumaMapWidth != pc.lumaMapHeight); - LuminanceAccessor luminanceAccessor; - LuminanceSampler luminanceSampler = - LuminanceSampler::create(luminanceAccessor, uint32_t2(pc.lumaMapWidth, pc.lumaMapHeight), pc.lumaMapWidth != pc.lumaMapHeight, uint32_t2(pc.warpMapWidth, pc.warpMapHeight)); + const uint32_t2 pixelCoord = threadID.xy; - uint32_t2 pixelCoord = threadID.xy; + const float32_t2 xi = float32_t2(pixelCoord) / float32_t2(pc.warpMapWidth - 1, pc.warpMapHeight - 1); - outImage[pixelCoord] = luminanceSampler.binarySearch(pixelCoord); + outImage[pixelCoord] = warpGenerator.generate(xi).value(); } From 4dd4e1f522da9cf5ee4efb5d028fc7a801316584 Mon Sep 17 00:00:00 2001 From: kevyuu Date: Tue, 10 Mar 2026 18:38:45 +0700 Subject: [PATCH 67/69] Fix indentation of hierarchical_image.hlsl --- .../hlsl/sampling/hierarchical_image.hlsl | 469 +++++++++--------- 1 file changed, 234 insertions(+), 235 deletions(-) diff --git a/include/nbl/builtin/hlsl/sampling/hierarchical_image.hlsl b/include/nbl/builtin/hlsl/sampling/hierarchical_image.hlsl index a23af69d74..4998fcece9 100644 --- a/include/nbl/builtin/hlsl/sampling/hierarchical_image.hlsl +++ b/include/nbl/builtin/hlsl/sampling/hierarchical_image.hlsl @@ -21,93 +21,93 @@ namespace sampling template struct value_and_rcpPdf { - using this_t = value_and_rcpPdf; + using this_t = value_and_rcpPdf; - static this_t create(const V _value, const P _rcpPdf) - { - this_t retval; - retval._value = _value; - retval._rcpPdf = _rcpPdf; - return retval; - } + static this_t create(const V _value, const P _rcpPdf) + { + this_t retval; + retval._value = _value; + retval._rcpPdf = _rcpPdf; + return retval; + } - V value() { return _value; } - P rcpPdf() { return _rcpPdf; } + V value() { return _value; } + P rcpPdf() { return _rcpPdf; } - V _value; - P _rcpPdf; + V _value; + P _rcpPdf; }; template struct value_and_pdf { - using this_t = value_and_pdf; + using this_t = value_and_pdf; - static this_t create(const V _value, const P _pdf) - { - this_t retval; - retval._value = _value; - retval._pdf = _pdf; - return retval; - } + static this_t create(const V _value, const P _pdf) + { + this_t retval; + retval._value = _value; + retval._pdf = _pdf; + return retval; + } - V value() { return _value; } - P pdf() { return _pdf; } + V value() { return _value; } + P pdf() { return _pdf; } - V _value; - P _pdf; + V _value; + P _pdf; }; // TODO: Add an option for corner sampling or centered sampling as boolean parameter template && - hierarchical_image::LuminanceReadAccessor - ) + is_scalar_v && + hierarchical_image::LuminanceReadAccessor + ) struct HierarchicalWarpGenerator { - using scalar_type = ScalarT; - using vector2_type = vector; - using vector4_type = vector; - using domain_type = vector2_type; - using codomain_type = vector2_type; - using sample_type = value_and_pdf; - using density_type = scalar_type; - - LuminanceAccessorT _map; - float32_t _rcpAvgLuma; - float32_t2 _rcpWarpSize; - uint16_t2 _mapSize; - uint16_t _mip2x1 : 15; - uint16_t _aspect2x1 : 1; - - static HierarchicalWarpGenerator create(NBL_CONST_REF_ARG(LuminanceAccessorT) lumaMap, uint32_t2 mapSize, bool aspect2x1) - { - HierarchicalWarpGenerator result; - result._map = lumaMap; - result._mapSize = mapSize; - // Note: We use mapSize.y here because the currently the map aspect ratio can only be 1x1 or 2x1 - result._mip2x1 = _static_cast(findMSB(mapSize.y)); - result._aspect2x1 = aspect2x1; - return result; - } - - static bool __choseSecond(scalar_type first, scalar_type second, NBL_REF_ARG(scalar_type) xi, NBL_REF_ARG(scalar_type) rcpPmf) NBL_CONST_MEMBER_FUNC - { - // numerical resilience against IEEE754 - scalar_type rcpChoiceProb = scalar_type(0); - PartitionRandVariable partition; - partition.leftProb = scalar_type(1) / (scalar_type(1) + (second / first)); - bool choseSecond = partition(xi, rcpChoiceProb); - rcpPmf *= rcpChoiceProb; - return choseSecond; - } - - // Cannot use textureGather since we need to pass the mipLevel - vector4_type __texelGather(uint32_t2 coord, uint32_t level) NBL_CONST_MEMBER_FUNC - { + using scalar_type = ScalarT; + using vector2_type = vector; + using vector4_type = vector; + using domain_type = vector2_type; + using codomain_type = vector2_type; + using sample_type = value_and_pdf; + using density_type = scalar_type; + + LuminanceAccessorT _map; + float32_t _rcpAvgLuma; + float32_t2 _rcpWarpSize; + uint16_t2 _mapSize; + uint16_t _mip2x1 : 15; + uint16_t _aspect2x1 : 1; + + static HierarchicalWarpGenerator create(NBL_CONST_REF_ARG(LuminanceAccessorT) lumaMap, uint32_t2 mapSize, bool aspect2x1) + { + HierarchicalWarpGenerator result; + result._map = lumaMap; + result._mapSize = mapSize; + // Note: We use mapSize.y here because the currently the map aspect ratio can only be 1x1 or 2x1 + result._mip2x1 = _static_cast(findMSB(mapSize.y)); + result._aspect2x1 = aspect2x1; + return result; + } + + static bool __choseSecond(scalar_type first, scalar_type second, NBL_REF_ARG(scalar_type) xi, NBL_REF_ARG(scalar_type) rcpPmf) NBL_CONST_MEMBER_FUNC + { + // numerical resilience against IEEE754 + scalar_type rcpChoiceProb = scalar_type(0); + PartitionRandVariable partition; + partition.leftProb = scalar_type(1) / (scalar_type(1) + (second / first)); + bool choseSecond = partition(xi, rcpChoiceProb); + rcpPmf *= rcpChoiceProb; + return choseSecond; + } + + // Cannot use textureGather since we need to pass the mipLevel + vector4_type __texelGather(uint32_t2 coord, uint32_t level) NBL_CONST_MEMBER_FUNC + { assert(coord.x < _mapSize.x - 1 && coord.y < _mapSize.y - 1); - const scalar_type v0, v1, v2, v3; + const scalar_type v0, v1, v2, v3; return float32_t4( _map.load(uint32_t3(coord, level), uint32_t2(0, 1)), @@ -115,68 +115,68 @@ struct HierarchicalWarpGenerator _map.load(uint32_t3(coord, level), uint32_t2(1, 0)), _map.load(uint32_t3(coord, level), uint32_t2(0, 0)) ); - } - - sample_type generate(vector2_type xi) NBL_CONST_MEMBER_FUNC - { - uint32_t2 p = uint32_t2(0, 0); - - scalar_type rcpPmf = 1; - if (_aspect2x1) { - // do one split in the X axis first cause penultimate full mip would have been 2x1 - p.x = __choseSecond(_map.load(uint32_t2(0, 0), _mip2x1), _map.load(uint32_t2(1, 0), _mip2x1), xi.x, rcpPmf) ? 1 : 0; - } - - for (int i = _mip2x1 - 1; i >= 0; i--) - { - p <<= 1; - const vector4_type values = __texelGather(p, i); - scalar_type wx_0, wx_1; - { - const scalar_type wy_0 = values[3] + values[2]; - const scalar_type wy_1 = values[1] + values[0]; - if (__choseSecond(wy_0, wy_1, xi.y, rcpPmf)) - { - p.y |= 1; - wx_0 = values[0]; - wx_1 = values[1]; - } - else - { - wx_0 = values[3]; - wx_1 = values[2]; - } + } + + sample_type generate(vector2_type xi) NBL_CONST_MEMBER_FUNC + { + uint32_t2 p = uint32_t2(0, 0); + + scalar_type rcpPmf = 1; + if (_aspect2x1) { + // do one split in the X axis first cause penultimate full mip would have been 2x1 + p.x = __choseSecond(_map.load(uint32_t2(0, 0), _mip2x1), _map.load(uint32_t2(1, 0), _mip2x1), xi.x, rcpPmf) ? 1 : 0; + } + + for (int i = _mip2x1 - 1; i >= 0; i--) + { + p <<= 1; + const vector4_type values = __texelGather(p, i); + scalar_type wx_0, wx_1; + { + const scalar_type wy_0 = values[3] + values[2]; + const scalar_type wy_1 = values[1] + values[0]; + if (__choseSecond(wy_0, wy_1, xi.y, rcpPmf)) + { + p.y |= 1; + wx_0 = values[0]; + wx_1 = values[1]; + } + else + { + wx_0 = values[3]; + wx_1 = values[2]; + } } if (__choseSecond(wx_0, wx_1, xi.x, rcpPmf)) p.x |= 1; - } - - - // If we don`t add xi, the sample will clump to the lowest corner of environment map texel. Each time we call PartitionRandVariable(), the output xi is the new xi that determines how left and right(or top and bottom for y axis) to choose the child partition. It means that if for some input xi, the output xi = 0, then the input xi is the edge of choosing this partition and the previous partition, and vice versa, if output xi = 1, then the input xi is the edge of choosing this partition and the next partition. Hence, by adding xi to the lower corner of the texel, we create a gradual transition from one pixel to another. Without adding output xi, the calculation of jacobian using the difference of sample value would not work. - // Since we want to do corner sampling. We have to handle edge texels as corner cases. Remember, in corner sampling we map uv [0,1] to [center of first texel, center of last texel]. So when p is an edge texel, we have to remap xi. [0.5, 1] when p == 0, and [0.5, 1] when p == length - 1. - if (p.x == 0) - xi.x = xi.x * scalar_type(0.5) + scalar_type(0.5); - if (p.y == 0) - xi.y = xi.y * scalar_type(0.5) + scalar_type(0.5); - if (p.x == _mapSize.x - 1) - xi.x = xi.x * scalar_type(0.5); - if (p.y == _mapSize.y - 1) - xi.y = xi.y * scalar_type(0.5); - - const vector2_type directionUV = (vector2_type(p.x, p.y) + xi) / _mapSize; - return sample_type::create(directionUV, (_mapSize.x * _mapSize.y) / rcpPmf); - } - - density_type forwardPdf(domain_type xi) NBL_CONST_MEMBER_FUNC - { - return generate(xi).pdf(); - } - - // Doesn't comply with sampler concept. This class is extracted so can be used on warpmap generation without passing in unnecessary information like avgLuma. So, need to pass in avgLuma when calculating backwardPdf. - density_type backwardPdf(codomain_type codomainVal, scalar_type rcpAvgLuma) NBL_CONST_MEMBER_FUNC - { - return _map.load(codomainVal) * rcpAvgLuma; - } + } + + + // If we don`t add xi, the sample will clump to the lowest corner of environment map texel. Each time we call PartitionRandVariable(), the output xi is the new xi that determines how left and right(or top and bottom for y axis) to choose the child partition. It means that if for some input xi, the output xi = 0, then the input xi is the edge of choosing this partition and the previous partition, and vice versa, if output xi = 1, then the input xi is the edge of choosing this partition and the next partition. Hence, by adding xi to the lower corner of the texel, we create a gradual transition from one pixel to another. Without adding output xi, the calculation of jacobian using the difference of sample value would not work. + // Since we want to do corner sampling. We have to handle edge texels as corner cases. Remember, in corner sampling we map uv [0,1] to [center of first texel, center of last texel]. So when p is an edge texel, we have to remap xi. [0.5, 1] when p == 0, and [0.5, 1] when p == length - 1. + if (p.x == 0) + xi.x = xi.x * scalar_type(0.5) + scalar_type(0.5); + if (p.y == 0) + xi.y = xi.y * scalar_type(0.5) + scalar_type(0.5); + if (p.x == _mapSize.x - 1) + xi.x = xi.x * scalar_type(0.5); + if (p.y == _mapSize.y - 1) + xi.y = xi.y * scalar_type(0.5); + + const vector2_type directionUV = (vector2_type(p.x, p.y) + xi) / _mapSize; + return sample_type::create(directionUV, (_mapSize.x * _mapSize.y) / rcpPmf); + } + + density_type forwardPdf(domain_type xi) NBL_CONST_MEMBER_FUNC + { + return generate(xi).pdf(); + } + + // Doesn't comply with sampler concept. This class is extracted so can be used on warpmap generation without passing in unnecessary information like avgLuma. So, need to pass in avgLuma when calculating backwardPdf. + density_type backwardPdf(codomain_type codomainVal, scalar_type rcpAvgLuma) NBL_CONST_MEMBER_FUNC + { + return _map.load(codomainVal) * rcpAvgLuma; + } }; @@ -184,48 +184,48 @@ template && hierarchical_image::LuminanceReadAccessor && - concepts::Warp + concepts::Warp ) struct HierarchicalWarpSampler { - using warp_generator_type = HierarchicalWarpGenerator; - using warp_sample_type = typename warp_generator_type::sample_type; - using scalar_type = ScalarT; - using density_type = scalar_type; - using vector2_type = vector; - using vector3_type = vector; - using vector4_type = vector; - using domain_type = vector2_type; - using codomain_type = vector3_type; - using sample_type = value_and_pdf; + using warp_generator_type = HierarchicalWarpGenerator; + using warp_sample_type = typename warp_generator_type::sample_type; + using scalar_type = ScalarT; + using density_type = scalar_type; + using vector2_type = vector; + using vector3_type = vector; + using vector4_type = vector; + using domain_type = vector2_type; + using codomain_type = vector3_type; + using sample_type = value_and_pdf; - warp_generator_type _warpGenerator; - scalar_type _rcpAvgLuma; - - static HierarchicalWarpSampler create(NBL_CONST_REF_ARG(LuminanceAccessorT) lumaMap, scalar_type avgLuma, uint32_t2 mapSize, bool aspect2x1) - { - HierarchicalWarpSampler result; - result._warpGenerator = warp_generator_type::create(lumaMap, mapSize, aspect2x1); - result._rcpAvgLuma = scalar_type(1.0) / avgLuma; - return result; - } - - sample_type generate(domain_type xi) NBL_CONST_MEMBER_FUNC - { - const warp_sample_type warpSample = _warpGenerator.generate(xi); - const WarpResult postWarpResult = PostWarpT::warp(warpSample.value()); - return sample_type::create(postWarpResult.dst, postWarpResult.density * warpSample.pdf()); - } - - density_type forwardPdf(domain_type xi) NBL_CONST_MEMBER_FUNC + warp_generator_type _warpGenerator; + scalar_type _rcpAvgLuma; + + static HierarchicalWarpSampler create(NBL_CONST_REF_ARG(LuminanceAccessorT) lumaMap, scalar_type avgLuma, uint32_t2 mapSize, bool aspect2x1) + { + HierarchicalWarpSampler result; + result._warpGenerator = warp_generator_type::create(lumaMap, mapSize, aspect2x1); + result._rcpAvgLuma = scalar_type(1.0) / avgLuma; + return result; + } + + sample_type generate(domain_type xi) NBL_CONST_MEMBER_FUNC { - const warp_sample_type warpSample = _warpGenerator.generate(xi); - return PostWarpT::forwardDensity(warpSample.value()) * warpSample.pdf(); + const warp_sample_type warpSample = _warpGenerator.generate(xi); + const WarpResult postWarpResult = PostWarpT::warp(warpSample.value()); + return sample_type::create(postWarpResult.dst, postWarpResult.density * warpSample.pdf()); } - density_type backwardPdf(codomain_type codomainVal) NBL_CONST_MEMBER_FUNC + density_type forwardPdf(domain_type xi) NBL_CONST_MEMBER_FUNC { - return PostWarpT::backwardPdf(codomainVal, _rcpAvgLuma) * _warpGenerator.backwardPdf(codomainVal); + const warp_sample_type warpSample = _warpGenerator.generate(xi); + return PostWarpT::forwardDensity(warpSample.value()) * warpSample.pdf(); + } + + density_type backwardPdf(codomain_type codomainVal) NBL_CONST_MEMBER_FUNC + { + return PostWarpT::backwardPdf(codomainVal, _rcpAvgLuma) * _warpGenerator.backwardPdf(codomainVal); } }; @@ -233,82 +233,81 @@ struct HierarchicalWarpSampler template && - concepts::accessors::GenericReadAccessor && - hierarchical_image::HierarchicalSampler && - concepts::Warp) + concepts::accessors::GenericReadAccessor && + hierarchical_image::HierarchicalSampler && + concepts::Warp) struct WarpmapSampler { - using scalar_type = ScalarT; - using vector2_type = vector; - using vector3_type = vector; - using vector4_type = vector; - using domain_type = vector2_type; - using codomain_type = vector3_type; - using sample_type = value_and_pdf; - using weight_type = scalar_type; - - LuminanceAccessorT _lumaMap; - HierarchicalSamplerT _warpMap; - uint32_t2 _warpSize; - uint32_t _effectiveWarpArea; - scalar_type _rcpAvgLuma; - - static WarpmapSampler create(NBL_CONST_REF_ARG(LuminanceAccessorT) lumaMap, NBL_CONST_REF_ARG(HierarchicalSamplerT) warpMap, uint32_t2 warpSize, scalar_type avgLuma) - { - WarpmapSampler result; - result._lumaMap = lumaMap; - result._warpMap = warpMap; - result._warpSize = warpSize; - result._effectiveWarpArea = (warpSize.x - 1) * (warpSize.y - 1); - result._rcpAvgLuma = ScalarT(1.0) / avgLuma; - return result; - } - - weight_type forwardWeight(domain_type xi) NBL_CONST_MEMBER_FUNC - { - return generate(xi).value(); - } - - weight_type backwardWeight(codomain_type direction) NBL_CONST_MEMBER_FUNC - { - vector2_type envmapUv = PostWarpT::inverseWarp(direction); - scalar_type luma; - _lumaMap.get(envmapUv, luma); - return luma * _rcpAvgLuma * PostWarpT::backwardDensity(direction); - } - - sample_type generate(vector2_type xi) NBL_CONST_MEMBER_FUNC - { - // Need to remap xi from [0,1] to [center of first texel, center of second texel] due to corner sampling - const vector2_type texelCoord = xi * (_warpSize - uint32_t2(1, 1)); - const vector2_type interpolant = frac(texelCoord); - const vector2_type warpmapUv = (texelCoord + vector2_type(0.5)) / _warpSize; - matrix uvs; - _warpMap.gatherUv(warpmapUv, uvs); - - - const vector2_type xDiffs[] = { - uvs[2] - uvs[3], - uvs[1] - uvs[0] - }; - const vector2_type yVals[] = { - xDiffs[0] * interpolant.x + uvs[3], - xDiffs[1] * interpolant.x + uvs[0] - }; - const vector2_type yDiff = yVals[1] - yVals[0]; - vector2_type uv = yDiff * interpolant.y + yVals[0]; - - const WarpResult warpResult = PostWarpT::warp(uv); - - const scalar_type detInterpolJacobian = determinant(matrix( - lerp(xDiffs[0], xDiffs[1], interpolant.y), // first column dFdx - yDiff // second column dFdy - )) * _effectiveWarpArea; - - const scalar_type pdf = abs(warpResult.density / detInterpolJacobian); - - return sample_type::create(warpResult.dst, pdf); - } + using scalar_type = ScalarT; + using vector2_type = vector; + using vector3_type = vector; + using vector4_type = vector; + using domain_type = vector2_type; + using codomain_type = vector3_type; + using sample_type = value_and_pdf; + using weight_type = scalar_type; + + LuminanceAccessorT _lumaMap; + HierarchicalSamplerT _warpMap; + uint32_t2 _warpSize; + uint32_t _effectiveWarpArea; + scalar_type _rcpAvgLuma; + + static WarpmapSampler create(NBL_CONST_REF_ARG(LuminanceAccessorT) lumaMap, NBL_CONST_REF_ARG(HierarchicalSamplerT) warpMap, uint32_t2 warpSize, scalar_type avgLuma) + { + WarpmapSampler result; + result._lumaMap = lumaMap; + result._warpMap = warpMap; + result._warpSize = warpSize; + result._effectiveWarpArea = (warpSize.x - 1) * (warpSize.y - 1); + result._rcpAvgLuma = ScalarT(1.0) / avgLuma; + return result; + } + + weight_type forwardWeight(domain_type xi) NBL_CONST_MEMBER_FUNC + { + return generate(xi).value(); + } + + weight_type backwardWeight(codomain_type direction) NBL_CONST_MEMBER_FUNC + { + vector2_type envmapUv = PostWarpT::inverseWarp(direction); + scalar_type luma; + _lumaMap.get(envmapUv, luma); + return luma * _rcpAvgLuma * PostWarpT::backwardDensity(direction); + } + + sample_type generate(vector2_type xi) NBL_CONST_MEMBER_FUNC + { + // Need to remap xi from [0,1] to [center of first texel, center of second texel] due to corner sampling + const vector2_type texelCoord = xi * (_warpSize - uint32_t2(1, 1)); + const vector2_type interpolant = frac(texelCoord); + const vector2_type warpmapUv = (texelCoord + vector2_type(0.5)) / _warpSize; + matrix uvs; + _warpMap.gatherUv(warpmapUv, uvs); + + const vector2_type xDiffs[] = { + uvs[2] - uvs[3], + uvs[1] - uvs[0] + }; + const vector2_type yVals[] = { + xDiffs[0] * interpolant.x + uvs[3], + xDiffs[1] * interpolant.x + uvs[0] + }; + const vector2_type yDiff = yVals[1] - yVals[0]; + vector2_type uv = yDiff * interpolant.y + yVals[0]; + + const WarpResult warpResult = PostWarpT::warp(uv); + + const scalar_type detInterpolJacobian = determinant(matrix( + lerp(xDiffs[0], xDiffs[1], interpolant.y), // first column dFdx + yDiff // second column dFdy + )) * _effectiveWarpArea; + + const scalar_type pdf = abs(warpResult.density / detInterpolJacobian); + + return sample_type::create(warpResult.dst, pdf); + } }; } From fe14c93ef5960de4a0d5b9734be2285ab9336995 Mon Sep 17 00:00:00 2001 From: kevyuu Date: Tue, 10 Mar 2026 22:56:51 +0700 Subject: [PATCH 68/69] Small fixes --- .../builtin/hlsl/sampling/hierarchical_image.hlsl | 11 ++++------- .../sampling/hierarchical_image/accessors.hlsl | 14 +++++++++----- 2 files changed, 13 insertions(+), 12 deletions(-) diff --git a/include/nbl/builtin/hlsl/sampling/hierarchical_image.hlsl b/include/nbl/builtin/hlsl/sampling/hierarchical_image.hlsl index 4998fcece9..006377ce11 100644 --- a/include/nbl/builtin/hlsl/sampling/hierarchical_image.hlsl +++ b/include/nbl/builtin/hlsl/sampling/hierarchical_image.hlsl @@ -234,7 +234,7 @@ struct HierarchicalWarpSampler template && concepts::accessors::GenericReadAccessor && - hierarchical_image::HierarchicalSampler && + hierarchical_image::WarpAccessor && concepts::Warp) struct WarpmapSampler { @@ -244,8 +244,8 @@ struct WarpmapSampler using vector4_type = vector; using domain_type = vector2_type; using codomain_type = vector3_type; - using sample_type = value_and_pdf; using weight_type = scalar_type; + using sample_type = value_and_pdf; LuminanceAccessorT _lumaMap; HierarchicalSamplerT _warpMap; @@ -279,12 +279,9 @@ struct WarpmapSampler sample_type generate(vector2_type xi) NBL_CONST_MEMBER_FUNC { - // Need to remap xi from [0,1] to [center of first texel, center of second texel] due to corner sampling - const vector2_type texelCoord = xi * (_warpSize - uint32_t2(1, 1)); - const vector2_type interpolant = frac(texelCoord); - const vector2_type warpmapUv = (texelCoord + vector2_type(0.5)) / _warpSize; + const vector2_type interpolant; matrix uvs; - _warpMap.gatherUv(warpmapUv, uvs); + _warpMap.gatherUv(xi, uvs, interpolant); const vector2_type xDiffs[] = { uvs[2] - uvs[3], diff --git a/include/nbl/builtin/hlsl/sampling/hierarchical_image/accessors.hlsl b/include/nbl/builtin/hlsl/sampling/hierarchical_image/accessors.hlsl index 1d8dc7c941..360bc30bf0 100644 --- a/include/nbl/builtin/hlsl/sampling/hierarchical_image/accessors.hlsl +++ b/include/nbl/builtin/hlsl/sampling/hierarchical_image/accessors.hlsl @@ -39,20 +39,24 @@ NBL_CONCEPT_END( #define NBL_CONCEPT_TPLT_PRM_KINDS (typename)(typename) #define NBL_CONCEPT_TPLT_PRM_NAMES (WarpAccessorT)(ScalarT) // not the greatest syntax but works -#define NBL_CONCEPT_PARAM_0 (sampler,WarpAccessorT) +#define NBL_CONCEPT_PARAM_0 (accessor,WarpAccessorT) #define NBL_CONCEPT_PARAM_1 (coord,vector) #define NBL_CONCEPT_PARAM_2 (val, matrix) +#define NBL_CONCEPT_PARAM_3 (interpolant, vector) // start concept -NBL_CONCEPT_BEGIN(3) +NBL_CONCEPT_BEGIN(4) // need to be defined AFTER the concept begins -#define sampler NBL_CONCEPT_PARAM_T NBL_CONCEPT_PARAM_0 +#define accessor NBL_CONCEPT_PARAM_T NBL_CONCEPT_PARAM_0 #define coord NBL_CONCEPT_PARAM_T NBL_CONCEPT_PARAM_1 #define val NBL_CONCEPT_PARAM_T NBL_CONCEPT_PARAM_2 +#define interpolant NBL_CONCEPT_PARAM_T NBL_CONCEPT_PARAM_3 NBL_CONCEPT_END( - ((NBL_CONCEPT_REQ_EXPR_RET_TYPE)((sampler.gatherUv(coord), val , ::nbl::hlsl::is_same_v, void)) + ((NBL_CONCEPT_REQ_EXPR_RET_TYPE)((accessor.gatherUv(coord, val, interpolant)), ::nbl::hlsl::is_same_v, void)) ); -#undef sampler +#undef accessor #undef coord +#undef val +#undef interpolant #include } From bb41942d8d7a2b7c9fb63b84e01119e9710cc987 Mon Sep 17 00:00:00 2001 From: kevyuu Date: Wed, 11 Mar 2026 13:43:27 +0700 Subject: [PATCH 69/69] Remove superfluous member from WarpmapSampler --- include/nbl/builtin/hlsl/sampling/hierarchical_image.hlsl | 2 -- 1 file changed, 2 deletions(-) diff --git a/include/nbl/builtin/hlsl/sampling/hierarchical_image.hlsl b/include/nbl/builtin/hlsl/sampling/hierarchical_image.hlsl index 006377ce11..502173eec6 100644 --- a/include/nbl/builtin/hlsl/sampling/hierarchical_image.hlsl +++ b/include/nbl/builtin/hlsl/sampling/hierarchical_image.hlsl @@ -249,7 +249,6 @@ struct WarpmapSampler LuminanceAccessorT _lumaMap; HierarchicalSamplerT _warpMap; - uint32_t2 _warpSize; uint32_t _effectiveWarpArea; scalar_type _rcpAvgLuma; @@ -258,7 +257,6 @@ struct WarpmapSampler WarpmapSampler result; result._lumaMap = lumaMap; result._warpMap = warpMap; - result._warpSize = warpSize; result._effectiveWarpArea = (warpSize.x - 1) * (warpSize.y - 1); result._rcpAvgLuma = ScalarT(1.0) / avgLuma; return result;