diff --git a/3rdparty/Vulkan-Headers b/3rdparty/Vulkan-Headers index 3dda5a1a87..33d7f51258 160000 --- a/3rdparty/Vulkan-Headers +++ b/3rdparty/Vulkan-Headers @@ -1 +1 @@ -Subproject commit 3dda5a1a87b62fdf3baf4680edc41c00e85a7a22 +Subproject commit 33d7f512583b8de44d1b6384aa1cf482f92e53e9 diff --git a/3rdparty/Vulkan-Tools b/3rdparty/Vulkan-Tools index 4b6f7101c1..761e7bf273 160000 --- a/3rdparty/Vulkan-Tools +++ b/3rdparty/Vulkan-Tools @@ -1 +1 @@ -Subproject commit 4b6f7101c15e09a8931f2f81c97146d0dfe68bc5 +Subproject commit 761e7bf2736f3ad326fdfc1b3c1543f4e669fd5c diff --git a/3rdparty/openexr b/3rdparty/openexr index aaf5f750d7..c8a74d9ac9 160000 --- a/3rdparty/openexr +++ b/3rdparty/openexr @@ -1 +1 @@ -Subproject commit aaf5f750d7a5fd117d79932d209f0e9816cbff1f +Subproject commit c8a74d9ac97dd579a47a7913f361a87349c0fffd diff --git a/CMakeLists.txt b/CMakeLists.txt index c21da262c0..41c0df13c8 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -70,7 +70,7 @@ else() message(STATUS "Vulkan SDK is not found") endif() -option(NBL_COMPILE_WITH_CUDA "Compile with CUDA interop?" OFF) +option(NBL_COMPILE_WITH_CUDA "Compile with CUDA interop?" ON) if(NBL_COMPILE_WITH_CUDA) find_package(CUDAToolkit REQUIRED) @@ -195,7 +195,7 @@ endif() option(NBL_BUILD_BULLET "Enable Bullet Physics building and integration?" OFF) option(NBL_BUILD_DOCS "Enable building documentation?" OFF) # No one has doxygen installed, plus we dont know when was the last time we generated working doxy and we'll use SphinX in the future option(NBL_ENABLE_PROJECT_JSON_CONFIG_VALIDATION "" ON) -option(NBL_EMBED_BUILTIN_RESOURCES "Embed built-in resources?" OFF) +option(NBL_EMBED_BUILTIN_RESOURCES "Embed built-in resources?" ON) option(NBL_ENABLE_DOCKER_INTEGRATION "Enables docker integration, if client is not found Docker Desktop will be installed" OFF) if (NBL_ENABLE_DOCKER_INTEGRATION) diff --git a/examples_tests b/examples_tests index 77f4b77500..b3e27cb339 160000 --- a/examples_tests +++ b/examples_tests @@ -1 +1 @@ -Subproject commit 77f4b775008a50cda066af5d611e6147a886f52e +Subproject commit b3e27cb339b97214dab7939b265ac899ffdd4d03 diff --git a/include/nbl/builtin/hlsl/concepts/accessors/generic_shared_data.hlsl b/include/nbl/builtin/hlsl/concepts/accessors/generic_shared_data.hlsl index cc22595444..ab7a87c7dd 100644 --- a/include/nbl/builtin/hlsl/concepts/accessors/generic_shared_data.hlsl +++ b/include/nbl/builtin/hlsl/concepts/accessors/generic_shared_data.hlsl @@ -69,7 +69,7 @@ NBL_CONCEPT_END( #include template -NBL_BOOL_CONCEPT GenericDataAccessor = GenericWriteAccessor && GenericWriteAccessor; +NBL_BOOL_CONCEPT GenericDataAccessor = GenericReadAccessor && GenericWriteAccessor; } } diff --git a/include/nbl/builtin/hlsl/sampling/hierarchical_image.hlsl b/include/nbl/builtin/hlsl/sampling/hierarchical_image.hlsl new file mode 100644 index 0000000000..502173eec6 --- /dev/null +++ b/include/nbl/builtin/hlsl/sampling/hierarchical_image.hlsl @@ -0,0 +1,312 @@ +// Copyright (C) 2018-2025 - DevSH Graphics Programming Sp. z O.O. +// This file is part of the "Nabla Engine". +// For conditions of distribution and use, see copyright notice in nabla.h + +#ifndef _NBL_BUILTIN_HLSL_SAMPLING_HIERARCHICAL_IMAGE_INCLUDED_ +#define _NBL_BUILTIN_HLSL_SAMPLING_HIERARCHICAL_IMAGE_INCLUDED_ + +#include +#include +#include +#include + +namespace nbl +{ +namespace hlsl +{ +namespace sampling +{ + +// TODO(kevinyu): Temporary struct before PR #1001 merged to master +template +struct value_and_rcpPdf +{ + using this_t = value_and_rcpPdf; + + static this_t create(const V _value, const P _rcpPdf) + { + this_t retval; + retval._value = _value; + retval._rcpPdf = _rcpPdf; + return retval; + } + + V value() { return _value; } + P rcpPdf() { return _rcpPdf; } + + V _value; + P _rcpPdf; +}; + +template +struct value_and_pdf +{ + using this_t = value_and_pdf; + + static this_t create(const V _value, const P _pdf) + { + this_t retval; + retval._value = _value; + retval._pdf = _pdf; + return retval; + } + + V value() { return _value; } + P pdf() { return _pdf; } + + V _value; + P _pdf; +}; + +// TODO: Add an option for corner sampling or centered sampling as boolean parameter +template && + hierarchical_image::LuminanceReadAccessor + ) +struct HierarchicalWarpGenerator +{ + using scalar_type = ScalarT; + using vector2_type = vector; + using vector4_type = vector; + using domain_type = vector2_type; + using codomain_type = vector2_type; + using sample_type = value_and_pdf; + using density_type = scalar_type; + + LuminanceAccessorT _map; + float32_t _rcpAvgLuma; + float32_t2 _rcpWarpSize; + uint16_t2 _mapSize; + uint16_t _mip2x1 : 15; + uint16_t _aspect2x1 : 1; + + static HierarchicalWarpGenerator create(NBL_CONST_REF_ARG(LuminanceAccessorT) lumaMap, uint32_t2 mapSize, bool aspect2x1) + { + HierarchicalWarpGenerator result; + result._map = lumaMap; + result._mapSize = mapSize; + // Note: We use mapSize.y here because the currently the map aspect ratio can only be 1x1 or 2x1 + result._mip2x1 = _static_cast(findMSB(mapSize.y)); + result._aspect2x1 = aspect2x1; + return result; + } + + static bool __choseSecond(scalar_type first, scalar_type second, NBL_REF_ARG(scalar_type) xi, NBL_REF_ARG(scalar_type) rcpPmf) NBL_CONST_MEMBER_FUNC + { + // numerical resilience against IEEE754 + scalar_type rcpChoiceProb = scalar_type(0); + PartitionRandVariable partition; + partition.leftProb = scalar_type(1) / (scalar_type(1) + (second / first)); + bool choseSecond = partition(xi, rcpChoiceProb); + rcpPmf *= rcpChoiceProb; + return choseSecond; + } + + // Cannot use textureGather since we need to pass the mipLevel + vector4_type __texelGather(uint32_t2 coord, uint32_t level) NBL_CONST_MEMBER_FUNC + { + assert(coord.x < _mapSize.x - 1 && coord.y < _mapSize.y - 1); + const scalar_type v0, v1, v2, v3; + + return float32_t4( + _map.load(uint32_t3(coord, level), uint32_t2(0, 1)), + _map.load(uint32_t3(coord, level), uint32_t2(1, 1)), + _map.load(uint32_t3(coord, level), uint32_t2(1, 0)), + _map.load(uint32_t3(coord, level), uint32_t2(0, 0)) + ); + } + + sample_type generate(vector2_type xi) NBL_CONST_MEMBER_FUNC + { + uint32_t2 p = uint32_t2(0, 0); + + scalar_type rcpPmf = 1; + if (_aspect2x1) { + // do one split in the X axis first cause penultimate full mip would have been 2x1 + p.x = __choseSecond(_map.load(uint32_t2(0, 0), _mip2x1), _map.load(uint32_t2(1, 0), _mip2x1), xi.x, rcpPmf) ? 1 : 0; + } + + for (int i = _mip2x1 - 1; i >= 0; i--) + { + p <<= 1; + const vector4_type values = __texelGather(p, i); + scalar_type wx_0, wx_1; + { + const scalar_type wy_0 = values[3] + values[2]; + const scalar_type wy_1 = values[1] + values[0]; + if (__choseSecond(wy_0, wy_1, xi.y, rcpPmf)) + { + p.y |= 1; + wx_0 = values[0]; + wx_1 = values[1]; + } + else + { + wx_0 = values[3]; + wx_1 = values[2]; + } + } + if (__choseSecond(wx_0, wx_1, xi.x, rcpPmf)) + p.x |= 1; + } + + + // If we don`t add xi, the sample will clump to the lowest corner of environment map texel. Each time we call PartitionRandVariable(), the output xi is the new xi that determines how left and right(or top and bottom for y axis) to choose the child partition. It means that if for some input xi, the output xi = 0, then the input xi is the edge of choosing this partition and the previous partition, and vice versa, if output xi = 1, then the input xi is the edge of choosing this partition and the next partition. Hence, by adding xi to the lower corner of the texel, we create a gradual transition from one pixel to another. Without adding output xi, the calculation of jacobian using the difference of sample value would not work. + // Since we want to do corner sampling. We have to handle edge texels as corner cases. Remember, in corner sampling we map uv [0,1] to [center of first texel, center of last texel]. So when p is an edge texel, we have to remap xi. [0.5, 1] when p == 0, and [0.5, 1] when p == length - 1. + if (p.x == 0) + xi.x = xi.x * scalar_type(0.5) + scalar_type(0.5); + if (p.y == 0) + xi.y = xi.y * scalar_type(0.5) + scalar_type(0.5); + if (p.x == _mapSize.x - 1) + xi.x = xi.x * scalar_type(0.5); + if (p.y == _mapSize.y - 1) + xi.y = xi.y * scalar_type(0.5); + + const vector2_type directionUV = (vector2_type(p.x, p.y) + xi) / _mapSize; + return sample_type::create(directionUV, (_mapSize.x * _mapSize.y) / rcpPmf); + } + + density_type forwardPdf(domain_type xi) NBL_CONST_MEMBER_FUNC + { + return generate(xi).pdf(); + } + + // Doesn't comply with sampler concept. This class is extracted so can be used on warpmap generation without passing in unnecessary information like avgLuma. So, need to pass in avgLuma when calculating backwardPdf. + density_type backwardPdf(codomain_type codomainVal, scalar_type rcpAvgLuma) NBL_CONST_MEMBER_FUNC + { + return _map.load(codomainVal) * rcpAvgLuma; + } + +}; + +template && + hierarchical_image::LuminanceReadAccessor && + concepts::Warp + ) +struct HierarchicalWarpSampler +{ + using warp_generator_type = HierarchicalWarpGenerator; + using warp_sample_type = typename warp_generator_type::sample_type; + using scalar_type = ScalarT; + using density_type = scalar_type; + using vector2_type = vector; + using vector3_type = vector; + using vector4_type = vector; + using domain_type = vector2_type; + using codomain_type = vector3_type; + using sample_type = value_and_pdf; + + warp_generator_type _warpGenerator; + scalar_type _rcpAvgLuma; + + static HierarchicalWarpSampler create(NBL_CONST_REF_ARG(LuminanceAccessorT) lumaMap, scalar_type avgLuma, uint32_t2 mapSize, bool aspect2x1) + { + HierarchicalWarpSampler result; + result._warpGenerator = warp_generator_type::create(lumaMap, mapSize, aspect2x1); + result._rcpAvgLuma = scalar_type(1.0) / avgLuma; + return result; + } + + sample_type generate(domain_type xi) NBL_CONST_MEMBER_FUNC + { + const warp_sample_type warpSample = _warpGenerator.generate(xi); + const WarpResult postWarpResult = PostWarpT::warp(warpSample.value()); + return sample_type::create(postWarpResult.dst, postWarpResult.density * warpSample.pdf()); + } + + density_type forwardPdf(domain_type xi) NBL_CONST_MEMBER_FUNC + { + const warp_sample_type warpSample = _warpGenerator.generate(xi); + return PostWarpT::forwardDensity(warpSample.value()) * warpSample.pdf(); + } + + density_type backwardPdf(codomain_type codomainVal) NBL_CONST_MEMBER_FUNC + { + return PostWarpT::backwardPdf(codomainVal, _rcpAvgLuma) * _warpGenerator.backwardPdf(codomainVal); + } + +}; + + +template && + concepts::accessors::GenericReadAccessor && + hierarchical_image::WarpAccessor && + concepts::Warp) +struct WarpmapSampler +{ + using scalar_type = ScalarT; + using vector2_type = vector; + using vector3_type = vector; + using vector4_type = vector; + using domain_type = vector2_type; + using codomain_type = vector3_type; + using weight_type = scalar_type; + using sample_type = value_and_pdf; + + LuminanceAccessorT _lumaMap; + HierarchicalSamplerT _warpMap; + uint32_t _effectiveWarpArea; + scalar_type _rcpAvgLuma; + + static WarpmapSampler create(NBL_CONST_REF_ARG(LuminanceAccessorT) lumaMap, NBL_CONST_REF_ARG(HierarchicalSamplerT) warpMap, uint32_t2 warpSize, scalar_type avgLuma) + { + WarpmapSampler result; + result._lumaMap = lumaMap; + result._warpMap = warpMap; + result._effectiveWarpArea = (warpSize.x - 1) * (warpSize.y - 1); + result._rcpAvgLuma = ScalarT(1.0) / avgLuma; + return result; + } + + weight_type forwardWeight(domain_type xi) NBL_CONST_MEMBER_FUNC + { + return generate(xi).value(); + } + + weight_type backwardWeight(codomain_type direction) NBL_CONST_MEMBER_FUNC + { + vector2_type envmapUv = PostWarpT::inverseWarp(direction); + scalar_type luma; + _lumaMap.get(envmapUv, luma); + return luma * _rcpAvgLuma * PostWarpT::backwardDensity(direction); + } + + sample_type generate(vector2_type xi) NBL_CONST_MEMBER_FUNC + { + const vector2_type interpolant; + matrix uvs; + _warpMap.gatherUv(xi, uvs, interpolant); + + const vector2_type xDiffs[] = { + uvs[2] - uvs[3], + uvs[1] - uvs[0] + }; + const vector2_type yVals[] = { + xDiffs[0] * interpolant.x + uvs[3], + xDiffs[1] * interpolant.x + uvs[0] + }; + const vector2_type yDiff = yVals[1] - yVals[0]; + vector2_type uv = yDiff * interpolant.y + yVals[0]; + + const WarpResult warpResult = PostWarpT::warp(uv); + + const scalar_type detInterpolJacobian = determinant(matrix( + lerp(xDiffs[0], xDiffs[1], interpolant.y), // first column dFdx + yDiff // second column dFdy + )) * _effectiveWarpArea; + + const scalar_type pdf = abs(warpResult.density / detInterpolJacobian); + + return sample_type::create(warpResult.dst, pdf); + } +}; + +} +} +} + +#endif diff --git a/include/nbl/builtin/hlsl/sampling/hierarchical_image/accessors.hlsl b/include/nbl/builtin/hlsl/sampling/hierarchical_image/accessors.hlsl new file mode 100644 index 0000000000..360bc30bf0 --- /dev/null +++ b/include/nbl/builtin/hlsl/sampling/hierarchical_image/accessors.hlsl @@ -0,0 +1,67 @@ +#ifndef _NBL_BUILTIN_HLSL_HIERARCHICAL_IMAGE_ACCESSORS_INCLUDED_ +#define _NBL_BUILTIN_HLSL_CONCEPTS_ACCESSORS_HIERARCHICAL_IMAGE_INCLUDED_ + +#include "nbl/builtin/hlsl/concepts/accessors/generic_shared_data.hlsl" + +namespace nbl +{ +namespace hlsl +{ +namespace sampling +{ +namespace hierarchical_image +{ +// declare concept +#define NBL_CONCEPT_NAME LuminanceReadAccessor +#define NBL_CONCEPT_TPLT_PRM_KINDS (typename)(typename) +#define NBL_CONCEPT_TPLT_PRM_NAMES (U)(ScalarT) +// not the greatest syntax but works +#define NBL_CONCEPT_PARAM_0 (a,U) +#define NBL_CONCEPT_PARAM_1 (coord,uint32_t2) +#define NBL_CONCEPT_PARAM_2 (level,uint32_t) +// start concept +NBL_CONCEPT_BEGIN(3) +// need to be defined AFTER the concept begins +#define a NBL_CONCEPT_PARAM_T NBL_CONCEPT_PARAM_0 +#define coord NBL_CONCEPT_PARAM_T NBL_CONCEPT_PARAM_1 +#define level NBL_CONCEPT_PARAM_T NBL_CONCEPT_PARAM_2 +NBL_CONCEPT_END( + ((NBL_CONCEPT_REQ_EXPR_RET_TYPE)((a.template load(coord,level)) , ::nbl::hlsl::is_same_v, ScalarT)) +); +#undef level +#undef coord +#undef a +#include + +// gatherUvs return 4 UVs in a square for manual bilinear interpolation with differentiability +// declare concept +#define NBL_CONCEPT_NAME WarpAccessor +#define NBL_CONCEPT_TPLT_PRM_KINDS (typename)(typename) +#define NBL_CONCEPT_TPLT_PRM_NAMES (WarpAccessorT)(ScalarT) +// not the greatest syntax but works +#define NBL_CONCEPT_PARAM_0 (accessor,WarpAccessorT) +#define NBL_CONCEPT_PARAM_1 (coord,vector) +#define NBL_CONCEPT_PARAM_2 (val, matrix) +#define NBL_CONCEPT_PARAM_3 (interpolant, vector) +// start concept +NBL_CONCEPT_BEGIN(4) +// need to be defined AFTER the concept begins +#define accessor NBL_CONCEPT_PARAM_T NBL_CONCEPT_PARAM_0 +#define coord NBL_CONCEPT_PARAM_T NBL_CONCEPT_PARAM_1 +#define val NBL_CONCEPT_PARAM_T NBL_CONCEPT_PARAM_2 +#define interpolant NBL_CONCEPT_PARAM_T NBL_CONCEPT_PARAM_3 +NBL_CONCEPT_END( + ((NBL_CONCEPT_REQ_EXPR_RET_TYPE)((accessor.gatherUv(coord, val, interpolant)), ::nbl::hlsl::is_same_v, void)) +); +#undef accessor +#undef coord +#undef val +#undef interpolant +#include + +} +} +} +} + +#endif diff --git a/include/nbl/builtin/hlsl/sampling/hierarchical_image/common.hlsl b/include/nbl/builtin/hlsl/sampling/hierarchical_image/common.hlsl new file mode 100644 index 0000000000..a0fc20f3b0 --- /dev/null +++ b/include/nbl/builtin/hlsl/sampling/hierarchical_image/common.hlsl @@ -0,0 +1,38 @@ +#ifndef _NBL_HLSL_SAMPLING_HIERARCHICAL_IMAGE_COMMON_INCLUDED_ +#define _NBL_HLSL_SAMPLING_HIERARCHICAL_IMAGE_COMMON_INCLUDED_ + +#include "nbl/builtin/hlsl/cpp_compat.hlsl" + +namespace nbl +{ +namespace hlsl +{ +namespace sampling +{ +namespace hierarchical_image +{ + +struct SLumaGenPushConstants +{ + float32_t3 lumaRGBCoefficients; + uint32_t lumaMapWidth : 16; + uint32_t lumaMapHeight : 16; +}; + +struct SWarpGenPushConstants +{ + uint32_t lumaMapWidth : 16; + uint32_t lumaMapHeight : 16; + uint32_t warpMapWidth : 16; + uint32_t warpMapHeight : 16; +}; + +NBL_CONSTEXPR_INLINE_NSPC_SCOPE_VAR uint32_t GEN_WARP_WORKGROUP_DIM = 16; +NBL_CONSTEXPR_INLINE_NSPC_SCOPE_VAR uint32_t GEN_LUMA_WORKGROUP_DIM = 16; + +} +} +} +} + +#endif diff --git a/include/nbl/builtin/hlsl/sampling/hierarchical_image/gen_luma.comp.hlsl b/include/nbl/builtin/hlsl/sampling/hierarchical_image/gen_luma.comp.hlsl new file mode 100644 index 0000000000..e4c5abefd3 --- /dev/null +++ b/include/nbl/builtin/hlsl/sampling/hierarchical_image/gen_luma.comp.hlsl @@ -0,0 +1,32 @@ +#include "common.hlsl" + +using namespace nbl; +using namespace nbl::hlsl; +using namespace nbl::hlsl::sampling::hierarchical_image; + +[[vk::push_constant]] SLumaGenPushConstants pc; + +// TODO: Use layer texture, to implement envmap importance sampling for cube map +[[vk::binding(0, 0)]] Texture2D envMap; +[[vk::binding(1, 0)]] RWTexture2D outImage; + +[numthreads(GEN_LUMA_WORKGROUP_DIM, GEN_LUMA_WORKGROUP_DIM, 1)] +[shader("compute")] +void main(uint32_t3 threadID : SV_DispatchThreadID) +{ + if (all(threadID.xy < uint32_t2(pc.lumaMapWidth, pc.lumaMapHeight))) + { + + const float uv_y = (float(threadID.y) + float(0.5f)) / pc.lumaMapHeight; + const float32_t3 envMapSample = envMap.Load(float32_t3(threadID.xy, 0)); + float32_t luma = hlsl::dot(envMapSample, pc.lumaRGBCoefficients) * sin(numbers::pi * uv_y); + + // We reduce the luma of the corner texel since we want to do "corner sampling" when generating warp map. + if (threadID.x == 0 || threadID.x == (pc.lumaMapWidth - 1)) + luma *= 0.5f; + if (threadID.y == 0 || threadID.y == (pc.lumaMapHeight - 1)) + luma *= 0.5f; + + outImage[threadID.xy] = luma; + } +} diff --git a/include/nbl/builtin/hlsl/sampling/hierarchical_image/gen_warp.comp.hlsl b/include/nbl/builtin/hlsl/sampling/hierarchical_image/gen_warp.comp.hlsl new file mode 100644 index 0000000000..5dac9da7ac --- /dev/null +++ b/include/nbl/builtin/hlsl/sampling/hierarchical_image/gen_warp.comp.hlsl @@ -0,0 +1,46 @@ +#include "nbl/builtin/hlsl/sampling/hierarchical_image.hlsl" +#include "nbl/builtin/hlsl/sampling/hierarchical_image/common.hlsl" + +using namespace nbl; +using namespace nbl::hlsl; +using namespace nbl::hlsl::sampling; +using namespace nbl::hlsl::sampling::hierarchical_image; + +[[vk::push_constant]] SWarpGenPushConstants pc; + +[[vk::binding(0, 0)]] Texture2D lumaMap; + +[[vk::binding(1, 0)]] RWTexture2D outImage; + + +struct LuminanceAccessor +{ + float32_t load(uint32_t2 coord, uint32_t level) NBL_CONST_MEMBER_FUNC + { + assert(coord.x < pc.warpMapWidth && coord.y < pc.warpMapHeight); + return lumaMap.Load(uint32_t3(coord, level)); + } + +}; + +[numthreads(GEN_WARP_WORKGROUP_DIM, GEN_WARP_WORKGROUP_DIM, 1)] +[shader("compute")] +void main(uint32_t3 threadID : SV_DispatchThreadID) +{ + if (threadID.x < pc.warpMapWidth && threadID.y < pc.warpMapHeight) + { + using WarpGenerator = HierarchicalWarpGenerator; + + const LuminanceAccessor luminanceAccessor; + + const WarpGenerator warpGenerator = WarpGenerator::create(luminanceAccessor, uint32_t2(pc.lumaMapWidth, pc.lumaMapHeight), pc.lumaMapWidth != pc.lumaMapHeight); + + const uint32_t2 pixelCoord = threadID.xy; + + const float32_t2 xi = float32_t2(pixelCoord) / float32_t2(pc.warpMapWidth - 1, pc.warpMapHeight - 1); + + outImage[pixelCoord] = warpGenerator.generate(xi).value(); + } + + +} diff --git a/include/nbl/builtin/hlsl/sampling/warp.hlsl b/include/nbl/builtin/hlsl/sampling/warp.hlsl new file mode 100644 index 0000000000..37c1800f51 --- /dev/null +++ b/include/nbl/builtin/hlsl/sampling/warp.hlsl @@ -0,0 +1,55 @@ +#ifndef _NBL_BUILTIN_HLSL_SAMPLING_CONCEPTS_WARP_INCLUDED_ +#define _NBL_BUILTIN_HLSL_SAMPLING_CONCEPTS_WARP_INCLUDED_ + +#include + +namespace nbl +{ +namespace hlsl +{ +namespace sampling +{ + +template +struct WarpResult +{ + CodomainT dst; + DensityT density; +}; +} + +namespace concepts +{ + +// declare concept +#define NBL_CONCEPT_NAME Warp +#define NBL_CONCEPT_TPLT_PRM_KINDS (typename) +#define NBL_CONCEPT_TPLT_PRM_NAMES (U) +// not the greatest syntax but works +#define NBL_CONCEPT_PARAM_0 (warper,U) +#define NBL_CONCEPT_PARAM_1 (xi,typename U::domain_type) +#define NBL_CONCEPT_PARAM_2 (dst,typename U::codomain_type) +// start concept +NBL_CONCEPT_BEGIN(3) +#define warper NBL_CONCEPT_PARAM_T NBL_CONCEPT_PARAM_0 +#define xi NBL_CONCEPT_PARAM_T NBL_CONCEPT_PARAM_1 +#define dst NBL_CONCEPT_PARAM_T NBL_CONCEPT_PARAM_2 +NBL_CONCEPT_END( + ((NBL_CONCEPT_REQ_TYPE)(U::domain_type)) + ((NBL_CONCEPT_REQ_TYPE)(U::codomain_type)) + ((NBL_CONCEPT_REQ_TYPE)(U::density_type)) + ((NBL_CONCEPT_REQ_EXPR_RET_TYPE)((warper.template warp(xi)) , ::nbl::hlsl::is_same_v, sampling::WarpResult)) + ((NBL_CONCEPT_REQ_EXPR_RET_TYPE)((warper.template forwardDensity(xi)) , ::nbl::hlsl::is_same_v, typename U::density_type)) + ((NBL_CONCEPT_REQ_EXPR_RET_TYPE)((warper.template backwardDensity(dst)) , ::nbl::hlsl::is_same_v, typename U::density_type)) +); +#undef dst +#undef xi +#undef warper +#include + +} + +} +} + +#endif \ No newline at end of file diff --git a/include/nbl/builtin/hlsl/sampling/warps/spherical.hlsl b/include/nbl/builtin/hlsl/sampling/warps/spherical.hlsl new file mode 100644 index 0000000000..6094befe45 --- /dev/null +++ b/include/nbl/builtin/hlsl/sampling/warps/spherical.hlsl @@ -0,0 +1,80 @@ +#ifndef _NBL_BUILTIN_HLSL_SAMPLING_WARP_SPHERICAL_INCLUDED_ +#define _NBL_BUILTIN_HLSL_SAMPLING_WARP_SPHERICAL_INCLUDED_ + +#include +#include +#include + +namespace nbl +{ +namespace hlsl +{ +namespace sampling +{ +namespace warp +{ + +template +struct Spherical +{ + using density_type = T; + using domain_type = vector; + using codomain_type = vector; + + template ) + static WarpResult warp(const DomainT uv) + { + codomain_type dir; + dir.x = cos(uv.x * density_type(2) * numbers::pi); + dir.z = sqrt(density_type(1) - (dir.x * dir.x)); + if (uv.x > density_type(0.5)) + dir.z = -dir.z; + const density_type theta = uv.y * numbers::pi; + const density_type cosTheta = cos(theta); + const density_type sinTheta = sqrt(density_type(1) - (cosTheta * cosTheta)); + dir.xz *= sinTheta; + dir.y = cosTheta; + + WarpResult warpResult; + warpResult.dst = dir; + warpResult.density = density_type(1) / (density_type(2) * sinTheta * numbers::pi * numbers::pi); + + return warpResult; + } + + template ) + static domain_type inverseWarp(const CodomainT v) + { + const density_type phi = atan2(v.z, v.x); + const density_type theta = acos(v.y); + density_type uv_x = phi * density_type(0.5) * numbers::inv_pi; + if (uv_x < density_type(0)) + uv_x += density_type(1); + density_type uv_y = theta * numbers::inv_pi; + return domain_type(uv_x, uv_y); + } + + + template ) + static density_type forwardDensity(const DomainT uv) + { + const density_type theta = uv.y * numbers::pi; + return density_type(1) / (sin(theta) * density_type(2) * numbers::pi * numbers::pi); + + } + + template ) + static density_type backwardDensity(const CodomainT dst) + { + const density_type cosTheta = dst.y; + const density_type sinTheta = sqrt(density_type(1) - (cosTheta * cosTheta)); + return density_type(1) / (sinTheta * density_type(2) * numbers::pi * numbers::pi); + } +}; + +} +} +} +} + +#endif \ No newline at end of file diff --git a/include/nbl/builtin/hlsl/workgroup2/arithmetic_config.hlsl b/include/nbl/builtin/hlsl/workgroup2/arithmetic_config.hlsl index 22c93ce193..aa395ad524 100644 --- a/include/nbl/builtin/hlsl/workgroup2/arithmetic_config.hlsl +++ b/include/nbl/builtin/hlsl/workgroup2/arithmetic_config.hlsl @@ -205,7 +205,7 @@ struct SArithmeticConfiguration #undef DEFINE_ASSIGN } - std::string getConfigTemplateStructString() + std::string getConfigTemplateStructString() NBL_CONST_MEMBER_FUNC { std::ostringstream os; os << "nbl::hlsl::workgroup2::ArithmeticConfiguration<" << WorkgroupSizeLog2 << "," << SubgroupSizeLog2 << "," << ItemsPerInvocation_0 << ">;"; diff --git a/include/nbl/ext/EnvmapImportanceSampling/EnvmapImportanceSampling.h b/include/nbl/ext/EnvmapImportanceSampling/EnvmapImportanceSampling.h deleted file mode 100644 index 678adf59a9..0000000000 --- a/include/nbl/ext/EnvmapImportanceSampling/EnvmapImportanceSampling.h +++ /dev/null @@ -1,92 +0,0 @@ -// Copyright (C) 2018-2020 - DevSH Graphics Programming Sp. z O.O. -// This file is part of the "Nabla Engine". -// For conditions of distribution and use, see copyright notice in nabla.h - -#ifndef _NBL_EXT_ENVMAP_IMPORTANCE_SAMPLING_INCLUDED_ -#define _NBL_EXT_ENVMAP_IMPORTANCE_SAMPLING_INCLUDED_ - -#include "nabla.h" -#include "nbl/video/IGPUShader.h" -#include "nbl/asset/ICPUShader.h" - -namespace nbl::ext::EnvmapImportanceSampling -{ - -class EnvmapImportanceSampling -{ - public: - EnvmapImportanceSampling(video::IVideoDriver* _driver) : m_driver(_driver) - {} - ~EnvmapImportanceSampling() = default; - - // Shader and Resources for Generating Luminance MipMaps from EnvMap - static constexpr uint32_t MaxMipCountLuminance = 13u; - static constexpr uint32_t DefaultLumaMipMapGenWorkgroupDimension = 16u; - static constexpr uint32_t DefaultWarpMapGenWorkgroupDimension = 16u; - - void initResources( - core::smart_refctd_ptr envmap, - uint32_t lumaGenWorkgroupDimension = DefaultLumaMipMapGenWorkgroupDimension, - uint32_t warpMapGenWorkgroupDimension = DefaultWarpMapGenWorkgroupDimension); - void deinitResources(); - - // returns if RIS should be enabled based on variance calculations - inline bool computeWarpMap(const float envMapRegularizationFactor, float& pdfNormalizationFactor) - { - [[maybe_unused]] float dummy; - return computeWarpMap(envMapRegularizationFactor,pdfNormalizationFactor,dummy); - } - bool computeWarpMap(const float envMapRegularizationFactor, float& pdfNormalizationFactor, float& maxEmittanceLuma); - - core::smart_refctd_ptr getLuminanceImageView() { return m_luminance; } - core::smart_refctd_ptr getWarpMapImageView() { return m_warpMap; } - - private: - #define uint uint32_t - struct uvec2 - { - uint x,y; - }; - struct vec2 - { - float x,y; - }; - struct vec3 - { - float x,y,z; - }; - #define vec4 core::vectorSIMDf - #define mat4 core::matrix4SIMD - #define mat4x3 core::matrix3x4SIMD - #include "nbl/builtin/glsl/ext/EnvmapImportanceSampling/structs.glsl" - #undef uint - #undef vec4 - #undef mat4 - #undef mat4x3 - inline uint32_t calcMeasurementBufferSize() const - { - return sizeof(nbl_glsl_ext_EnvmapSampling_LumaMeasurement_t)*m_lumaWorkgroups[0]*m_lumaWorkgroups[1]; - } - #undef NBL_GLSL_EXT_ENVMAP_SAMPLING_LUMA_MEASUREMENTS - - uint32_t m_lumaWorkgroups[2]; - uint32_t m_warpWorkgroups[2]; - - core::smart_refctd_ptr m_luminance; - core::smart_refctd_ptr m_warpMap; // Warps Sample based on EnvMap Luminance - - core::smart_refctd_ptr m_lumaDS; - core::smart_refctd_ptr m_lumaMeasurePipeline; - core::smart_refctd_ptr m_lumaGenPipeline; - - // Shader and Resources for EnvironmentalMap Sample Warping - core::smart_refctd_ptr m_warpDS; - core::smart_refctd_ptr m_warpGPUShader; - core::smart_refctd_ptr m_warpPipeline; - - video::IVideoDriver* m_driver; -}; - -} - -#endif diff --git a/include/nbl/video/sampling/EnvmapSampler.h b/include/nbl/video/sampling/EnvmapSampler.h new file mode 100644 index 0000000000..948ec60f16 --- /dev/null +++ b/include/nbl/video/sampling/EnvmapSampler.h @@ -0,0 +1,147 @@ +#ifndef _NBL_CORE_ENVMAP_SAMPLER_INCLUDED_ +#define _NBL_CORE_ENVMAP_SAMPLER_INCLUDED_ + +#include "nbl/video/declarations.h" + +namespace nbl::video +{ + +class NBL_API2 EnvmapSampler final : public core::IReferenceCounted +{ + public: + + static constexpr uint32_t MaxMipCountLuminance = 13u; + static constexpr uint32_t DefaultLumaMipMapGenWorkgroupDimension = 16u; + static constexpr uint32_t DefaultWarpMapGenWorkgroupDimension = 16u; + + struct SCachedCreationParameters + { + core::smart_refctd_ptr utilities; + }; + + struct SCreationParameters : public SCachedCreationParameters + { + core::smart_refctd_ptr assetManager = nullptr; + core::smart_refctd_ptr envMap = nullptr; + uint8_t upscaleLog2 = 0; + + inline bool validate() const + { + const auto validation = std::to_array + ({ + std::make_pair(bool(assetManager), "Invalid `creationParams.assetManager` is nullptr!"), + std::make_pair(bool(utilities), "Invalid `creationParams.utilities` is nullptr!"), + std::make_pair(bool(envMap), "Invalid `creationParams.envMap` is nullptr!"), + }); + + system::logger_opt_ptr logger = utilities->getLogger(); + for (const auto& [ok, error] : validation) + if (!ok) + { + logger.log(error, system::ILogger::ELL_ERROR); + return false; + } + + assert(bool(assetManager->getSystem())); + + return true; + } + + }; + + static core::smart_refctd_ptr create(SCreationParameters&& params); + + static core::smart_refctd_ptr createGenLumaPipelineLayout(video::ILogicalDevice* device); + + static core::smart_refctd_ptr createGenWarpPipelineLayout(video::ILogicalDevice* device); + + //! mounts the extension's archive to given system - useful if you want to create your own shaders with common header included + static core::smart_refctd_ptr mount(core::smart_refctd_ptr logger, system::ISystem* system, video::ILogicalDevice* device, const std::string_view archiveAlias = ""); + + static core::smart_refctd_ptr createGenLumaPipeline(const SCreationParameters& params, const video::IGPUPipelineLayout* pipelineLayout); + + static core::smart_refctd_ptr createGenWarpPipeline(const SCreationParameters& params, const video::IGPUPipelineLayout* pipelineLayout); + + static core::smart_refctd_ptr createLumaMap(video::ILogicalDevice* device, asset::VkExtent3D extent, uint32_t mipCount, std::string_view debugName = ""); + + static core::smart_refctd_ptr createWarpMap(video::ILogicalDevice* device, asset::VkExtent3D extent, std::string_view debugName = ""); + + void computeWarpMap(video::IQueue* queue); + + // use this to synchronize warp map after computeWarpMap call + nbl::video::IGPUCommandBuffer::SPipelineBarrierDependencyInfo::image_barrier_t getWarpMapBarrier( + core::bitflag dstStageMask, + core::bitflag dstAccessMask, + nbl::video::IGPUImage::LAYOUT oldLayout); + + // use this to synchronize luma map after computeWarpMap call + nbl::video::IGPUCommandBuffer::SPipelineBarrierDependencyInfo::image_barrier_t getLumaMapBarrier( + core::bitflag dstStageMask, + core::bitflag dstAccessMask, + nbl::video::IGPUImage::LAYOUT oldLayout); + + inline core::smart_refctd_ptr getLumaMapView() const + { + return m_lumaMap; + } + + inline core::smart_refctd_ptr getWarpMapView() const + { + return m_warpMap; + } + + inline hlsl::float32_t getAvgLuma() const + { + return m_avgLuma; + } + + protected: + struct ConstructorParams + { + SCachedCreationParameters creationParams; + hlsl::uint32_t2 lumaWorkgroupCount; + hlsl::uint32_t2 warpWorkgroupCount; + core::smart_refctd_ptr lumaMap; + core::smart_refctd_ptr warpMap; + core::smart_refctd_ptr genLumaPipeline; + core::smart_refctd_ptr genLumaDescriptorSet; + core::smart_refctd_ptr genWarpPipeline; + core::smart_refctd_ptr genWarpDescriptorSet; + }; + + explicit EnvmapSampler(ConstructorParams&& params) : + m_cachedCreationParams(std::move(params.creationParams)), + m_lumaWorkgroupCount(params.lumaWorkgroupCount), + m_warpWorkgroupCount(params.warpWorkgroupCount), + m_lumaMap(std::move(params.lumaMap)), + m_warpMap(std::move(params.warpMap)), + m_genLumaPipeline(std::move(params.genLumaPipeline)), + m_genLumaDescriptorSet(std::move(params.genLumaDescriptorSet)), + m_genWarpPipeline(std::move(params.genWarpPipeline)), + m_genWarpDescriptorSet(std::move(params.genWarpDescriptorSet)) + {} + + ~EnvmapSampler() override {} + + private: + + SCachedCreationParameters m_cachedCreationParams; + + hlsl::uint32_t2 m_lumaWorkgroupCount; + hlsl::uint32_t2 m_warpWorkgroupCount; + + hlsl::float32_t m_avgLuma; + + core::smart_refctd_ptr m_lumaMap; + core::smart_refctd_ptr m_warpMap; + + core::smart_refctd_ptr m_genLumaPipeline; + core::smart_refctd_ptr m_genLumaDescriptorSet; + + core::smart_refctd_ptr m_genWarpPipeline; + core::smart_refctd_ptr m_genWarpDescriptorSet; + +}; + +} +#endif diff --git a/src/nbl/CMakeLists.txt b/src/nbl/CMakeLists.txt index 18a25c8619..2b8067c1dd 100644 --- a/src/nbl/CMakeLists.txt +++ b/src/nbl/CMakeLists.txt @@ -128,6 +128,7 @@ set(NBL_CORE_SOURCES core/alloc/refctd_memory_resource.cpp core/hash/blake.cpp ) + set(NBL_SYSTEM_SOURCES system/DefaultFuncPtrLoader.cpp system/IFileBase.cpp @@ -291,6 +292,9 @@ set(NBL_VIDEO_SOURCES # CUDA video/CCUDAHandler.cpp video/CCUDADevice.cpp + +# Sampling + video/sampling/EnvmapSampler.cpp ) set(NBL_SCENE_SOURCES diff --git a/src/nbl/builtin/CMakeLists.txt b/src/nbl/builtin/CMakeLists.txt index f27514c2c7..adfd903d02 100644 --- a/src/nbl/builtin/CMakeLists.txt +++ b/src/nbl/builtin/CMakeLists.txt @@ -281,6 +281,13 @@ LIST_BUILTIN_RESOURCE(NBL_RESOURCES_TO_EMBED "hlsl/sampling/spherical_rectangle. LIST_BUILTIN_RESOURCE(NBL_RESOURCES_TO_EMBED "hlsl/sampling/cos_weighted_spheres.hlsl") LIST_BUILTIN_RESOURCE(NBL_RESOURCES_TO_EMBED "hlsl/sampling/quotient_and_pdf.hlsl") LIST_BUILTIN_RESOURCE(NBL_RESOURCES_TO_EMBED "hlsl/sampling/uniform_spheres.hlsl") +LIST_BUILTIN_RESOURCE(NBL_RESOURCES_TO_EMBED "hlsl/sampling/warp.hlsl") +LIST_BUILTIN_RESOURCE(NBL_RESOURCES_TO_EMBED "hlsl/sampling/warps/spherical.hlsl") +LIST_BUILTIN_RESOURCE(NBL_RESOURCES_TO_EMBED "hlsl/sampling/hierarchical_image.hlsl") +LIST_BUILTIN_RESOURCE(NBL_RESOURCES_TO_EMBED "hlsl/sampling/hierarchical_image/accessors.hlsl") +LIST_BUILTIN_RESOURCE(NBL_RESOURCES_TO_EMBED "hlsl/sampling/hierarchical_image/common.hlsl") +LIST_BUILTIN_RESOURCE(NBL_RESOURCES_TO_EMBED "hlsl/sampling/hierarchical_image/gen_luma.comp.hlsl") +LIST_BUILTIN_RESOURCE(NBL_RESOURCES_TO_EMBED "hlsl/sampling/hierarchical_image/gen_warp.comp.hlsl") # LIST_BUILTIN_RESOURCE(NBL_RESOURCES_TO_EMBED "hlsl/ndarray_addressing.hlsl") # @@ -356,7 +363,6 @@ LIST_BUILTIN_RESOURCE(NBL_RESOURCES_TO_EMBED "hlsl/workgroup2/shared_scan.hlsl") #Extensions LIST_BUILTIN_RESOURCE(NBL_RESOURCES_TO_EMBED "hlsl/ext/FullScreenTriangle/SVertexAttributes.hlsl") LIST_BUILTIN_RESOURCE(NBL_RESOURCES_TO_EMBED "hlsl/ext/FullScreenTriangle/default.vert.hlsl") -LIST_BUILTIN_RESOURCE(NBL_RESOURCES_TO_EMBED "hlsl/text_rendering/msdf.hlsl") #memory LIST_BUILTIN_RESOURCE(NBL_RESOURCES_TO_EMBED "hlsl/memory.hlsl") LIST_BUILTIN_RESOURCE(NBL_RESOURCES_TO_EMBED "hlsl/memory_accessor.hlsl") diff --git a/src/nbl/ext/EnvmapImportanceSampling/EnvmapImportanceSampling.cpp b/src/nbl/ext/EnvmapImportanceSampling/EnvmapImportanceSampling.cpp deleted file mode 100644 index f11df5ce15..0000000000 --- a/src/nbl/ext/EnvmapImportanceSampling/EnvmapImportanceSampling.cpp +++ /dev/null @@ -1,426 +0,0 @@ -// Copyright (C) 2018-2020 - DevSH Graphics Programming Sp. z O.O. -// This file is part of the "Nabla Engine". -// For conditions of distribution and use, see copyright notice in nabla.h - -#include "nbl/ext/EnvmapImportanceSampling/EnvmapImportanceSampling.h" - -#include - -using namespace nbl; -using namespace nbl::asset; -using namespace nbl::video; -using namespace ext::EnvmapImportanceSampling; - - -static core::smart_refctd_ptr createTexture(nbl::video::IVideoDriver* _driver, const VkExtent3D extent, E_FORMAT format, uint32_t mipLevels=1u, uint32_t layers=0u) -{ - const auto real_layers = layers ? layers:1u; - - IGPUImage::SCreationParams imgparams; - imgparams.extent = extent; - imgparams.arrayLayers = real_layers; - imgparams.flags = static_cast(0); - imgparams.format = format; - imgparams.mipLevels = mipLevels; - imgparams.samples = IImage::ESCF_1_BIT; - imgparams.type = IImage::ET_2D; - - IGPUImageView::SCreationParams viewparams; - viewparams.flags = static_cast(0); - viewparams.format = format; - viewparams.image = _driver->createDeviceLocalGPUImageOnDedMem(std::move(imgparams)); - viewparams.viewType = layers ? IGPUImageView::ET_2D_ARRAY:IGPUImageView::ET_2D; - viewparams.subresourceRange.aspectMask = static_cast(0); - viewparams.subresourceRange.baseArrayLayer = 0u; - viewparams.subresourceRange.layerCount = real_layers; - viewparams.subresourceRange.baseMipLevel = 0u; - viewparams.subresourceRange.levelCount = mipLevels; - - return _driver->createGPUImageView(std::move(viewparams)); -} - -void EnvmapImportanceSampling::initResources(core::smart_refctd_ptr envmap, uint32_t lumaGenWorkgroupDimension, uint32_t warpMapGenWorkgroupDimension) -{ - const auto EnvmapExtent = envmap->getCreationParameters().image->getCreationParameters().extent; - // we don't need the 1x1 mip for anything - const uint32_t MipCountLuminance = IImage::calculateFullMipPyramidLevelCount(EnvmapExtent,IImage::ET_2D)-1; - const auto EnvMapPoTExtent = [MipCountLuminance]() -> VkExtent3D - { - const uint32_t width = 0x1u<>1u,1u }; - }(); - auto calcWorkgroups = [](uint32_t* workGroups, const VkExtent3D extent, const uint32_t workgroupDimension) - { - for (auto i=0; i<2; i++) - workGroups[i] = ((&extent.width)[i]-1u)/workgroupDimension+1u; - }; - - // TODO: Can we get away with R16_SFLOAT for the probabilities? - m_luminance = createTexture(m_driver,EnvMapPoTExtent,EF_R32_SFLOAT,MipCountLuminance); - calcWorkgroups(m_lumaWorkgroups,EnvMapPoTExtent,lumaGenWorkgroupDimension); - - // default make the warp-map same resolution as input envmap - // Format needs to be 32bit full precision float, because the Jacobian needs to accurately match PDF - const uint32_t upscale = 0; - const VkExtent3D WarpMapExtent = {EnvMapPoTExtent.width<&& pipelineLayout) -> core::smart_refctd_ptr - { - const char* sourceFmt = - R"===(#version 430 core - -#define LUMA_MIP_MAP_GEN_WORKGROUP_DIM %u -#define WARP_MAP_GEN_WORKGROUP_DIM %u - -#include "%s" - -)==="; - - const size_t extraSize = 2u * 8u + 128u; - auto shader = core::make_smart_refctd_ptr(strlen(sourceFmt) + extraSize + 1u); - snprintf( - reinterpret_cast(shader->getPointer()), shader->getSize(), sourceFmt, - lumaGenWorkgroupDimension, - warpMapGenWorkgroupDimension, - shaderPath - ); - auto gpuShader = m_driver->createGPUShader(core::make_smart_refctd_ptr(std::move(shader), ICPUShader::buffer_contains_glsl)); - if (!gpuShader) - return nullptr; - - auto specializedShader = m_driver->createGPUSpecializedShader(gpuShader.get(), ISpecializedShader::SInfo{ nullptr,nullptr,"main",asset::ISpecializedShader::ESS_COMPUTE }); - if (!specializedShader) - return nullptr; - - return m_driver->createGPUComputePipeline(nullptr,std::move(pipelineLayout),std::move(specializedShader)); - }; - - // Create Everything - { - ISampler::SParams samplerParams; - samplerParams.TextureWrapU = samplerParams.TextureWrapV = samplerParams.TextureWrapW = ISampler::ETC_CLAMP_TO_EDGE; - samplerParams.MinFilter = ISampler::ETF_NEAREST; - samplerParams.MaxFilter = ISampler::ETF_LINEAR; - samplerParams.MipmapMode = ISampler::ESMM_NEAREST; - samplerParams.AnisotropicFilter = 0u; - samplerParams.CompareEnable = false; - - IGPUDescriptorSet::SDescriptorInfo lumaDescriptorInfo = {}; - lumaDescriptorInfo.desc = m_luminance; - lumaDescriptorInfo.image.sampler = nullptr; - - { - auto upscaleSampler = m_driver->createGPUSampler(samplerParams); - - constexpr auto lumaDescriptorCount = 3u; - IGPUDescriptorSetLayout::SBinding bindings[lumaDescriptorCount]; - bindings[0].binding = 0u; - bindings[0].type = asset::EDT_COMBINED_IMAGE_SAMPLER; - bindings[0].stageFlags = ISpecializedShader::ESS_COMPUTE; - bindings[0].count = 1u; - bindings[0].samplers = &upscaleSampler; - - bindings[1].binding = 1u; - bindings[1].type = asset::EDT_STORAGE_BUFFER_DYNAMIC; - bindings[1].stageFlags = ISpecializedShader::ESS_COMPUTE; - bindings[1].count = 1u; - - bindings[2].binding = 2u; - bindings[2].type = asset::EDT_STORAGE_IMAGE; - bindings[2].stageFlags = ISpecializedShader::ESS_COMPUTE; - bindings[2].count = 1u; - - auto lumaDSLayout = m_driver->createGPUDescriptorSetLayout(bindings,bindings+lumaDescriptorCount); - { - SPushConstantRange range{ ISpecializedShader::ESS_COMPUTE,0u,sizeof(nbl_glsl_ext_EnvmapSampling_LumaGenShaderData_t) }; - auto lumaPipelineLayout = m_driver->createGPUPipelineLayout(&range,&range+1u,core::smart_refctd_ptr(lumaDSLayout)); - m_lumaMeasurePipeline = genPipeline("nbl/builtin/glsl/ext/EnvmapImportanceSampling/measure_luma.comp",core::smart_refctd_ptr(lumaPipelineLayout)); - m_lumaGenPipeline = genPipeline("nbl/builtin/glsl/ext/EnvmapImportanceSampling/gen_luma.comp",std::move(lumaPipelineLayout)); - } - m_lumaDS = m_driver->createGPUDescriptorSet(std::move(lumaDSLayout)); - - { - IGPUDescriptorSet::SDescriptorInfo envMapDescriptorInfo = {}; - envMapDescriptorInfo.desc = envmap; - envMapDescriptorInfo.image.sampler = nullptr; - envMapDescriptorInfo.image.imageLayout = asset::EIL_SHADER_READ_ONLY_OPTIMAL; - - IGPUDescriptorSet::SDescriptorInfo lumaMeasurementInfo = {}; - lumaMeasurementInfo.desc = core::smart_refctd_ptr(m_driver->getDefaultDownStreamingBuffer()->getBuffer()); - lumaMeasurementInfo.buffer = {0,calcMeasurementBufferSize()}; - - IGPUDescriptorSet::SWriteDescriptorSet writes[lumaDescriptorCount]; - for (auto i=0u; iupdateDescriptorSets(lumaDescriptorCount,writes,0u,nullptr); - } - } - - { - samplerParams.TextureWrapU = samplerParams.TextureWrapV = samplerParams.TextureWrapW = ISampler::ETC_CLAMP_TO_BORDER; - samplerParams.BorderColor = ISampler::ETBC_FLOAT_OPAQUE_BLACK; - samplerParams.MaxFilter = ISampler::ETF_NEAREST; - auto lumaSampler = m_driver->createGPUSampler(samplerParams); - - constexpr auto warpDescriptorCount = 2u; - IGPUDescriptorSetLayout::SBinding bindings[warpDescriptorCount]; - bindings[0].binding = 0u; - bindings[0].type = asset::EDT_COMBINED_IMAGE_SAMPLER; - bindings[0].stageFlags = ISpecializedShader::ESS_COMPUTE; - bindings[0].count = 1; - bindings[0].samplers = &lumaSampler; - - bindings[1].binding = 1u; - bindings[1].type = asset::EDT_STORAGE_IMAGE; - bindings[1].stageFlags = ISpecializedShader::ESS_COMPUTE; - bindings[1].count = 1u; - - auto warpDSLayout = m_driver->createGPUDescriptorSetLayout(bindings,bindings+warpDescriptorCount); - - m_warpPipeline = genPipeline( - "nbl/builtin/glsl/ext/EnvmapImportanceSampling/gen_warpmap.comp", - m_driver->createGPUPipelineLayout(nullptr,nullptr,core::smart_refctd_ptr(warpDSLayout)) - ); - - m_warpDS = m_driver->createGPUDescriptorSet(std::move(warpDSLayout)); - { - IGPUDescriptorSet::SDescriptorInfo warpMapDescriptorInfo = {}; - warpMapDescriptorInfo.desc = m_warpMap; - warpMapDescriptorInfo.image.sampler = nullptr; - warpMapDescriptorInfo.image.imageLayout = asset::EIL_GENERAL; - - IGPUDescriptorSet::SWriteDescriptorSet writes[warpDescriptorCount]; - for (auto i=0u; iupdateDescriptorSets(warpDescriptorCount,writes,0u,nullptr); - } - } - } -} - -void EnvmapImportanceSampling::deinitResources() -{ - m_lumaMeasurePipeline = nullptr; - m_lumaGenPipeline = nullptr; - m_lumaDS = nullptr; - - m_warpPipeline = nullptr; - m_warpDS = nullptr; - - m_warpMap = nullptr; - m_luminance = nullptr; -} - -bool EnvmapImportanceSampling::computeWarpMap(const float envMapRegularizationFactor, float& pdfNormalizationFactor, float& maxEmittanceLuma) -{ - bool enableRIS = false; - // - nbl_glsl_ext_EnvmapSampling_LumaGenShaderData_t pcData = {}; - pcData.luminanceScales.set(0.2126729f, 0.7151522f, 0.0721750f, 0.0f); - { - const auto imageExtent = m_luminance->getCreationParameters().image->getCreationParameters().extent; - pcData.lumaMapResolution = {imageExtent.width,imageExtent.height}; - } - - auto dynamicOffsets = core::make_refctd_dynamic_array>(1u); - auto lumaDispatch = [&](core::smart_refctd_ptr& pipeline,core::smart_refctd_dynamic_array* dynamicOffsets) - { - m_driver->bindComputePipeline(pipeline.get()); - m_driver->bindDescriptorSets(EPBP_COMPUTE,pipeline->getLayout(),0u,1u,&m_lumaDS.get(),dynamicOffsets); - m_driver->pushConstants(pipeline->getLayout(),ICPUSpecializedShader::ESS_COMPUTE,0u,sizeof(pcData),&pcData); - m_driver->dispatch(m_lumaWorkgroups[0],m_lumaWorkgroups[1],1); - }; - - // 3 seconds is a long time - constexpr uint64_t timeoutInNanoSeconds = 300000000000u; - - // Calculate directionality metric (0 uniform, 1 totally unidirectional) and new Regularization Factor. - // Ideally would want a better metric of how "concentrated" the energy is in one direction rather than variance, so it - // turns out that the first order spherical harmonic band and weighted (by luma) average of directions are the same thing. - float directionalityMetric = [&]() - { - maxEmittanceLuma = 0.f; - - const uint32_t size = calcMeasurementBufferSize(); - // remember that without initializing the address to be allocated to invalid_address you won't get an allocation! - auto downloadStagingArea = m_driver->getDefaultDownStreamingBuffer(); - const auto& address = dynamicOffsets->operator[](0) = std::remove_pointer::type::invalid_address; - // allocate - { - // common page size - const uint32_t alignment = 4096u; - const auto waitPoint = std::chrono::high_resolution_clock::now()+std::chrono::nanoseconds(timeoutInNanoSeconds); - auto unallocatedSize = downloadStagingArea->multi_alloc(waitPoint,1u,dynamicOffsets->data(),&size,&alignment); - if (unallocatedSize) - { - os::Printer::log("Could not download the buffer from the GPU!", ELL_ERROR); - return 0.f; - } - } - auto* data = reinterpret_cast(reinterpret_cast(downloadStagingArea->getBufferPointer())+address); - - // measure into buffer - lumaDispatch(m_lumaMeasurePipeline,&dynamicOffsets); - COpenGLExtensionHandler::pGlMemoryBarrier(GL_ALL_BARRIER_BITS); // TODO: rethink when reimplementing in Vulkan - { - // place and wait for download fence - auto downloadFence = m_driver->placeFence(true); - auto result = downloadFence->waitCPU(timeoutInNanoSeconds,true); - // - if (result==E_DRIVER_FENCE_RETVAL::EDFR_TIMEOUT_EXPIRED || result==E_DRIVER_FENCE_RETVAL::EDFR_FAIL) - { - os::Printer::log("Could not download the buffer from the GPU, fence not signalled!", ELL_ERROR); - downloadStagingArea->multi_free(1u,&address,&size,nullptr); - return 0.f; - } - // then invalidate the CPU cache of the mapping - if (downloadStagingArea->needsManualFlushOrInvalidate()) - m_driver->invalidateMappedMemoryRanges({ {downloadStagingArea->getBuffer()->getBoundMemory(),address,size} }); - } - - // reduce - core::vectorSIMDf avgDir; - { - const auto reduction = std::reduce( - data,data+size/sizeof(nbl_glsl_ext_EnvmapSampling_LumaMeasurement_t), - nbl_glsl_ext_EnvmapSampling_LumaMeasurement_t{0.f,0.f,0.f,0.f,0.f}, - [](nbl_glsl_ext_EnvmapSampling_LumaMeasurement_t lhs, const nbl_glsl_ext_EnvmapSampling_LumaMeasurement_t& rhs){ - lhs.xDirSum += rhs.xDirSum; - lhs.yDirSum += rhs.yDirSum; - lhs.zDirSum += rhs.zDirSum; - lhs.weightSum += rhs.weightSum; - if (lhs.maxLumamulti_free(1u,&address,&size,nullptr); - - avgDir /= avgDir.wwww(); - avgDir.w = 0.f; - // should it be length or length squared? - const float directionality = core::length(avgDir)[0]; - std::cout << "Final Luminance Directionality = " << directionality << std::endl; - // the only reason why we'd get a NaN would be because there's literally 0 luminance in the image - return core::isnan(directionality) ? 0.f:directionality; - }(); - - const float regularizationFactor = core::min(envMapRegularizationFactor*directionalityMetric,envMapRegularizationFactor); - std::cout << "New Regularization Factor based on Directionality = " << regularizationFactor << std::endl; - - constexpr float regularizationThreshold = 0.00001f; - enableRIS = regularizationFactor>=regularizationThreshold; - - // Calc Luma again with new Regularization Factor - { - pcData.luminanceScales *= regularizationFactor; - pcData.luminanceScales.w = 1.f-regularizationFactor; - lumaDispatch(m_lumaGenPipeline,&dynamicOffsets); - COpenGLExtensionHandler::pGlMemoryBarrier(GL_ALL_BARRIER_BITS); // TODO: rethink when reimplementing in Vulkan - } - - // Calc Mipmaps - m_luminance->regenerateMipMapLevels(); - - // Download last mip level and get avg from it - { - const auto lumaImage = m_luminance->getCreationParameters().image; - - // - IImage::SBufferCopy copyRegion = {}; - { - copyRegion.bufferRowLength = 0u; - copyRegion.bufferImageHeight = 0u; - //copyRegion.imageSubresource.aspectMask = wait for Vulkan; - copyRegion.imageSubresource.mipLevel = lumaImage->getCreationParameters().mipLevels-1u; - copyRegion.imageSubresource.baseArrayLayer = 0u; - copyRegion.imageSubresource.layerCount = lumaImage->getCreationParameters().arrayLayers; - copyRegion.imageOffset = { 0u,0u,0u }; - const auto extent = lumaImage->getMipSize(copyRegion.imageSubresource.mipLevel); - copyRegion.imageExtent = { extent.x,extent.y,extent.z }; - } - const uint32_t lastMipTexelCount = copyRegion.imageSubresource.layerCount*copyRegion.imageExtent.depth*copyRegion.imageExtent.height*copyRegion.imageExtent.width; - const uint32_t size = lastMipTexelCount*asset::getTexelOrBlockBytesize(lumaImage->getCreationParameters().format); - - // remember that without initializing the address to be allocated to invalid_address you won't get an allocation! - auto downloadStagingArea = m_driver->getDefaultDownStreamingBuffer(); - uint32_t address = std::remove_pointer::type::invalid_address; - // allocate - { - // common page size - const uint32_t alignment = 4096u; - const auto waitPoint = std::chrono::high_resolution_clock::now()+std::chrono::nanoseconds(timeoutInNanoSeconds); - auto unallocatedSize = downloadStagingArea->multi_alloc(waitPoint,1u,&address,&size,&alignment); - if (unallocatedSize) - { - os::Printer::log("Could not download the last luma mip map level from the GPU!", ELL_ERROR); - return core::nan(); - } - } - - // - copyRegion.bufferOffset = address; - m_driver->copyImageToBuffer(lumaImage.get(),downloadStagingArea->getBuffer(),1,©Region); - - // place and wait for download fence - { - auto downloadFence = m_driver->placeFence(true); - auto result = downloadFence->waitCPU(timeoutInNanoSeconds,true); - // - if (result==E_DRIVER_FENCE_RETVAL::EDFR_TIMEOUT_EXPIRED || result==E_DRIVER_FENCE_RETVAL::EDFR_FAIL) - { - os::Printer::log("Could not download the last luma mip map level from the GPU! Fence not Signalled!", ELL_ERROR); - downloadStagingArea->multi_free(1u,&address,&size,nullptr); - return core::nan(); - } - // then invalidate the CPU cache of the mapping - if (downloadStagingArea->needsManualFlushOrInvalidate()) - m_driver->invalidateMappedMemoryRanges({ {downloadStagingArea->getBuffer()->getBoundMemory(),address,size} }); - } - - // - { - const float* r32fData = reinterpret_cast(reinterpret_cast(downloadStagingArea->getBufferPointer())+address); - const auto avgVal = std::reduce(r32fData,r32fData+lastMipTexelCount)/float(lastMipTexelCount); - pdfNormalizationFactor = 1.0/(2.0*core::PI()*core::PI()*avgVal); - } - downloadStagingArea->multi_free(1u,&address,&size,nullptr); - } - - // Generate WarpMap - { - m_driver->bindComputePipeline(m_warpPipeline.get()); - m_driver->bindDescriptorSets(EPBP_COMPUTE,m_warpPipeline->getLayout(),0u,1u,&m_warpDS.get(),nullptr); - m_driver->dispatch(m_warpWorkgroups[0],m_warpWorkgroups[1],1); - COpenGLExtensionHandler::pGlMemoryBarrier(GL_ALL_BARRIER_BITS); // TODO: rethink when reimplementing in Vulkan - } - - return enableRIS; -} - - diff --git a/src/nbl/video/sampling/EnvmapSampler.cpp b/src/nbl/video/sampling/EnvmapSampler.cpp new file mode 100644 index 0000000000..a436575da5 --- /dev/null +++ b/src/nbl/video/sampling/EnvmapSampler.cpp @@ -0,0 +1,780 @@ +#include "nbl/video/sampling/EnvmapSampler.h" +#include "nbl/builtin/hlsl/sampling/hierarchical_image/common.hlsl" +#include "nlohmann/detail/input/parser.hpp" + +using namespace nbl; +using namespace core; +using namespace video; +using namespace system; +using namespace asset; +using namespace hlsl; +using namespace nbl::hlsl::sampling::hierarchical_image; + +namespace nbl::video +{ + +class EnvmapSampler; + +namespace +{ + constexpr std::string_view NBL_EXT_MOUNT_ENTRY = "nbl/core/builtin"; + + // image must have the first mip layout set to transfer src, and the rest to dst + void generateMipmap(video::IGPUCommandBuffer* cmdBuf, IGPUImage* image) + { + const auto mipLevels = image->getCreationParameters().mipLevels; + const auto extent = image->getCreationParameters().extent; + for (uint32_t srcMip_i = 0; srcMip_i < mipLevels-1; srcMip_i++) + { + + const IGPUCommandBuffer::SImageBlit blit = { + .srcMinCoord = {0, 0, 0}, + .srcMaxCoord = {extent.width >> (srcMip_i), extent.height >> (srcMip_i), 1}, + .dstMinCoord = {0, 0, 0}, + .dstMaxCoord = {extent.width >> srcMip_i + 1, extent.height >> srcMip_i + 1, 1}, + .layerCount = 1, + .srcBaseLayer = 0, + .dstBaseLayer = 0, + .srcMipLevel = srcMip_i, + .dstMipLevel = srcMip_i + 1, + .aspectMask = IGPUImage::E_ASPECT_FLAGS::EAF_COLOR_BIT, + }; + cmdBuf->blitImage(image, IImage::LAYOUT::TRANSFER_SRC_OPTIMAL, image, IImage::LAYOUT::TRANSFER_DST_OPTIMAL, { &blit, 1 }, IGPUSampler::E_TEXTURE_FILTER::ETF_LINEAR); + + // last mip no need to transition + if (srcMip_i + 1 == mipLevels - 1) break; + + IGPUCommandBuffer::SPipelineBarrierDependencyInfo::image_barrier_t barrier = { + .barrier = { + .dep = { + .srcStageMask = PIPELINE_STAGE_FLAGS::BLIT_BIT, + .srcAccessMask = ACCESS_FLAGS::TRANSFER_WRITE_BIT, + .dstStageMask = PIPELINE_STAGE_FLAGS::BLIT_BIT, + .dstAccessMask = ACCESS_FLAGS::TRANSFER_READ_BIT + } + }, + .image = image, + .subresourceRange = { + .aspectMask = IImage::EAF_COLOR_BIT, + .baseMipLevel = srcMip_i + 1, + .levelCount = 1, + .baseArrayLayer = 0u, + .layerCount = 1u + }, + .oldLayout = IImage::LAYOUT::TRANSFER_DST_OPTIMAL, + .newLayout = IImage::LAYOUT::TRANSFER_SRC_OPTIMAL, + }; + cmdBuf->pipelineBarrier(E_DEPENDENCY_FLAGS::EDF_NONE, { .imgBarriers = {&barrier, 1} }); + + } + } + + core::smart_refctd_ptr createTexture(video::ILogicalDevice* device, const asset::VkExtent3D extent, E_FORMAT format, uint32_t mipLevels = 1u, uint32_t layers = 0u) + { + const auto real_layers = layers ? layers:1u; + + IGPUImage::SCreationParams imgParams; + imgParams.extent = extent; + imgParams.arrayLayers = real_layers; + imgParams.flags = static_cast(0); + imgParams.format = format; + imgParams.mipLevels = mipLevels; + imgParams.samples = IImage::ESCF_1_BIT; + imgParams.type = IImage::ET_2D; + imgParams.usage = IImage::EUF_STORAGE_BIT | IImage::EUF_TRANSFER_SRC_BIT | IImage::EUF_TRANSFER_DST_BIT | IImage::EUF_SAMPLED_BIT; + const auto image = device->createImage(std::move(imgParams)); + auto imageMemReqs = image->getMemoryReqs(); + imageMemReqs.memoryTypeBits &= device->getPhysicalDevice()->getDeviceLocalMemoryTypeBits(); + device->allocate(imageMemReqs, image.get()); + + IGPUImageView::SCreationParams viewparams; + viewparams.subUsages = IImage::EUF_STORAGE_BIT | IImage::EUF_SAMPLED_BIT; + viewparams.flags = static_cast(0); + viewparams.format = format; + viewparams.image = std::move(image); + viewparams.viewType = layers ? IGPUImageView::ET_2D_ARRAY:IGPUImageView::ET_2D; + viewparams.subresourceRange.aspectMask = IImage::EAF_COLOR_BIT; + viewparams.subresourceRange.baseArrayLayer = 0u; + viewparams.subresourceRange.layerCount = real_layers; + viewparams.subresourceRange.baseMipLevel = 0u; + viewparams.subresourceRange.levelCount = mipLevels; + + return device->createImageView(std::move(viewparams)); + } + + core::smart_refctd_ptr getShaderSource( asset::IAssetManager* assetManager, const char* filePath, system::ILogger* logger) + { + IAssetLoader::SAssetLoadParams lparams = {}; + lparams.logger = logger; + lparams.workingDirectory = NBL_EXT_MOUNT_ENTRY; + auto bundle = assetManager->getAsset(filePath, lparams); + if (bundle.getContents().empty() || bundle.getAssetType()!=IAsset::ET_SHADER) + { + const auto assetType = bundle.getAssetType(); + logger->log("Shader %s not found!", ILogger::ELL_ERROR, filePath); + exit(-1); + } + auto firstAssetInBundle = bundle.getContents()[0]; + return smart_refctd_ptr_static_cast(firstAssetInBundle); + } +} + +core::smart_refctd_ptr EnvmapSampler::create(SCreationParameters&& params) +{ + auto* const logger = params.utilities->getLogger(); + + if (!params.validate()) + { + logger->log("Failed creation parameters validation!", ILogger::ELL_ERROR); + return nullptr; + } + + const auto EnvmapExtent = params.envMap->getCreationParameters().image->getCreationParameters().extent; + // we don't need the 1x1 mip for anything + const uint32_t MipCountLuminance = IImage::calculateFullMipPyramidLevelCount(EnvmapExtent,IImage::ET_2D)-1; + const auto EnvMapPoTExtent = [MipCountLuminance]() -> asset::VkExtent3D + { + const uint32_t width = 0x1u<>1u,1u }; + }(); + auto calcWorkgroupSize = [](const asset::VkExtent3D extent, const uint32_t workgroupDimension) -> uint32_t2 + { + return uint32_t2(extent.width - 1, extent.height - 1) / workgroupDimension + uint32_t2(1); + }; + + const auto device = params.utilities->getLogicalDevice(); + + ConstructorParams constructorParams; + + constructorParams.lumaWorkgroupCount = calcWorkgroupSize(EnvMapPoTExtent, GEN_LUMA_WORKGROUP_DIM); + constructorParams.lumaMap = createLumaMap(device, EnvMapPoTExtent, MipCountLuminance); + + const asset::VkExtent3D WarpMapExtent = {EnvMapPoTExtent.width << params.upscaleLog2, EnvMapPoTExtent.height << params.upscaleLog2, EnvMapPoTExtent.depth }; + constructorParams.warpWorkgroupCount = calcWorkgroupSize(WarpMapExtent, GEN_WARP_WORKGROUP_DIM); + constructorParams.warpMap = createWarpMap(device, WarpMapExtent); + + const auto genLumaPipelineLayout = createGenLumaPipelineLayout(device); + constructorParams.genLumaPipeline = createGenLumaPipeline(params, genLumaPipelineLayout.get()); + const auto genLumaDescriptorPool = device->createDescriptorPoolForDSLayouts(IDescriptorPool::ECF_UPDATE_AFTER_BIND_BIT, genLumaPipelineLayout->getDescriptorSetLayouts()); + const auto genLumaDescriptorSet = genLumaDescriptorPool->createDescriptorSet(core::smart_refctd_ptr(genLumaPipelineLayout->getDescriptorSetLayouts()[0])); + + const auto genWarpPipelineLayout = createGenWarpPipelineLayout(device); + constructorParams.genWarpPipeline = createGenWarpPipeline(params, genWarpPipelineLayout.get()); + const auto genWarpDescriptorPool = device->createDescriptorPoolForDSLayouts(IDescriptorPool::ECF_UPDATE_AFTER_BIND_BIT, genWarpPipelineLayout->getDescriptorSetLayouts()); + const auto genWarpDescriptorSet = genWarpDescriptorPool->createDescriptorSet(core::smart_refctd_ptr(genWarpPipelineLayout->getDescriptorSetLayouts()[0])); + + IGPUDescriptorSet::SDescriptorInfo envMapDescriptorInfo; + envMapDescriptorInfo.desc = params.envMap; + envMapDescriptorInfo.info.image.imageLayout = IImage::LAYOUT::READ_ONLY_OPTIMAL; + + IGPUDescriptorSet::SDescriptorInfo lumaMapGeneralDescriptorInfo; + lumaMapGeneralDescriptorInfo.desc = constructorParams.lumaMap; + lumaMapGeneralDescriptorInfo.info.image.imageLayout = IImage::LAYOUT::GENERAL; + + IGPUDescriptorSet::SDescriptorInfo lumaMapReadDescriptorInfo; + lumaMapReadDescriptorInfo.desc = constructorParams.lumaMap; + lumaMapReadDescriptorInfo.info.image.imageLayout = IImage::LAYOUT::READ_ONLY_OPTIMAL; + + IGPUDescriptorSet::SDescriptorInfo warpMapDescriptorInfo; + warpMapDescriptorInfo.desc = constructorParams.warpMap; + warpMapDescriptorInfo.info.image.imageLayout = IImage::LAYOUT::GENERAL; + + const IGPUDescriptorSet::SWriteDescriptorSet writes[] = { + { + .dstSet = genLumaDescriptorSet.get(), .binding = 0, .count = 1, .info = &envMapDescriptorInfo + }, + { + .dstSet = genLumaDescriptorSet.get(), .binding = 1, .count = 1, .info = &lumaMapGeneralDescriptorInfo + }, + { + .dstSet = genWarpDescriptorSet.get(), .binding = 0, .count = 1, .info = &lumaMapReadDescriptorInfo + }, + { + .dstSet = genWarpDescriptorSet.get(), .binding = 1, .count = 1, .info = &warpMapDescriptorInfo + }, + }; + + device->updateDescriptorSets(writes, {}); + + constructorParams.genLumaDescriptorSet = genLumaDescriptorSet; + constructorParams.genWarpDescriptorSet = genWarpDescriptorSet; + + constructorParams.creationParams = std::move(params); + + return core::smart_refctd_ptr(new EnvmapSampler(std::move(constructorParams))); +} + +core::smart_refctd_ptr EnvmapSampler::createLumaMap(video::ILogicalDevice* device, asset::VkExtent3D extent, uint32_t mipCount, const std::string_view debugName) +{ + return createTexture(device, extent, EF_R32_SFLOAT, mipCount); +} + +core::smart_refctd_ptr EnvmapSampler::createWarpMap(video::ILogicalDevice* device, asset::VkExtent3D extent, const std::string_view debugName) +{ + return createTexture(device, extent, EF_R32G32_SFLOAT); +} + +smart_refctd_ptr EnvmapSampler::mount(core::smart_refctd_ptr logger, ISystem* system, video::ILogicalDevice* device, const std::string_view archiveAlias) +{ + assert(system); + + if (!system) + return nullptr; + + auto archive = make_smart_refctd_ptr(std::string_view("nbl/builtin/hlsl/sampling/hierarchical_image"), smart_refctd_ptr(logger), system); + + system->mount(smart_refctd_ptr(archive), archiveAlias.data()); + return smart_refctd_ptr(archive); +} + +core::smart_refctd_ptr EnvmapSampler::createGenLumaPipeline(const SCreationParameters& params, const video::IGPUPipelineLayout* pipelineLayout) +{ + system::logger_opt_ptr logger = params.utilities->getLogger(); + auto system = smart_refctd_ptr(params.assetManager->getSystem()); + auto* device = params.utilities->getLogicalDevice(); + mount(smart_refctd_ptr(params.utilities->getLogger()), system.get(), params.utilities->getLogicalDevice(), NBL_EXT_MOUNT_ENTRY); + + const auto shaderSource = getShaderSource(params.assetManager.get(), "gen_luma.comp.hlsl", logger.get()); + auto compiler = make_smart_refctd_ptr(smart_refctd_ptr(system)); + CHLSLCompiler::SOptions options = {}; + options.stage = IShader::E_SHADER_STAGE::ESS_COMPUTE; + options.preprocessorOptions.targetSpirvVersion = device->getPhysicalDevice()->getLimits().spirvVersion; + options.spirvOptimizer = nullptr; + +#ifndef _NBL_DEBUG + ISPIRVOptimizer::E_OPTIMIZER_PASS optPasses = ISPIRVOptimizer::EOP_STRIP_DEBUG_INFO; + auto opt = make_smart_refctd_ptr(std::span(&optPasses, 1)); + options.spirvOptimizer = opt.get(); +#else + options.debugInfoFlags |= IShaderCompiler::E_DEBUG_INFO_FLAGS::EDIF_LINE_BIT; +#endif + options.preprocessorOptions.sourceIdentifier = shaderSource->getFilepathHint(); + options.preprocessorOptions.logger = logger.get(); + options.preprocessorOptions.includeFinder = compiler->getDefaultIncludeFinder(); + + const auto overridenUnspecialized = compiler->compileToSPIRV((const char*)shaderSource->getContent()->getPointer(), options); + const auto shader = device->compileShader({ overridenUnspecialized.get() }); + if (!shader) + { + logger.log("Could not compile shaders!", ILogger::ELL_ERROR); + return nullptr; + } + + video::IGPUComputePipeline::SCreationParams pipelineParams[1] = {}; + pipelineParams[0].layout = pipelineLayout; + pipelineParams[0].shader = { .shader = shader.get(), .entryPoint = "main" }; + + smart_refctd_ptr pipeline; + params.utilities->getLogicalDevice()->createComputePipelines(nullptr, pipelineParams, &pipeline); + if (!pipeline) + { + logger.log("Could not create pipeline!", ILogger::ELL_ERROR); + return nullptr; + } + + return pipeline; +} + +core::smart_refctd_ptr EnvmapSampler::createGenWarpPipeline(const SCreationParameters& params, const video::IGPUPipelineLayout* pipelineLayout) +{ + system::logger_opt_ptr logger = params.utilities->getLogger(); + auto system = smart_refctd_ptr(params.assetManager->getSystem()); + auto* device = params.utilities->getLogicalDevice(); + mount(smart_refctd_ptr(params.utilities->getLogger()), system.get(), params.utilities->getLogicalDevice(), NBL_EXT_MOUNT_ENTRY); + + const auto shaderSource = getShaderSource(params.assetManager.get(), "gen_warp.comp.hlsl", logger.get()); + auto compiler = make_smart_refctd_ptr(smart_refctd_ptr(system)); + CHLSLCompiler::SOptions options = {}; + options.stage = IShader::E_SHADER_STAGE::ESS_COMPUTE; + options.preprocessorOptions.targetSpirvVersion = device->getPhysicalDevice()->getLimits().spirvVersion; + options.spirvOptimizer = nullptr; + +#ifndef _NBL_DEBUG + ISPIRVOptimizer::E_OPTIMIZER_PASS optPasses = ISPIRVOptimizer::EOP_STRIP_DEBUG_INFO; + auto opt = make_smart_refctd_ptr(std::span(&optPasses, 1)); + options.spirvOptimizer = opt.get(); +#else + options.debugInfoFlags |= IShaderCompiler::E_DEBUG_INFO_FLAGS::EDIF_LINE_BIT; +#endif + options.preprocessorOptions.sourceIdentifier = shaderSource->getFilepathHint(); + options.preprocessorOptions.logger = logger.get(); + options.preprocessorOptions.includeFinder = compiler->getDefaultIncludeFinder(); + + const auto overridenUnspecialized = compiler->compileToSPIRV((const char*)shaderSource->getContent()->getPointer(), options); + const auto shader = device->compileShader({ overridenUnspecialized.get() }); + if (!shader) + { + logger.log("Could not compile shaders!", ILogger::ELL_ERROR); + return nullptr; + } + + video::IGPUComputePipeline::SCreationParams pipelineParams[1] = {}; + pipelineParams[0].layout = pipelineLayout; + pipelineParams[0].shader = { .shader = shader.get(), .entryPoint = "main" }; + + smart_refctd_ptr pipeline; + params.utilities->getLogicalDevice()->createComputePipelines(nullptr, pipelineParams, &pipeline); + if (!pipeline) + { + logger.log("Could not create pipeline!", ILogger::ELL_ERROR); + return nullptr; + } + + return pipeline; +} + +core::smart_refctd_ptr < video::IGPUPipelineLayout> EnvmapSampler::createGenLumaPipelineLayout(video::ILogicalDevice* device) +{ + asset::SPushConstantRange pcRange = { + .stageFlags = hlsl::ESS_COMPUTE, + .offset = 0, + .size = sizeof(SLumaGenPushConstants) + }; + + const IGPUDescriptorSetLayout::SBinding bindings[] = { + { + .binding = 0u, + .type = nbl::asset::IDescriptor::E_TYPE::ET_SAMPLED_IMAGE, + .createFlags = IGPUDescriptorSetLayout::SBinding::E_CREATE_FLAGS::ECF_NONE, + .stageFlags = IShader::E_SHADER_STAGE::ESS_COMPUTE, + .count = 1u + }, + { + .binding = 1u, + .type = nbl::asset::IDescriptor::E_TYPE::ET_STORAGE_IMAGE, + .createFlags = IGPUDescriptorSetLayout::SBinding::E_CREATE_FLAGS::ECF_NONE, + .stageFlags = IShader::E_SHADER_STAGE::ESS_COMPUTE, + .count = 1u + } + }; + + const auto setLayout = device->createDescriptorSetLayout(bindings); + return device->createPipelineLayout({ &pcRange, 1 }, setLayout); + +} + +core::smart_refctd_ptr EnvmapSampler::createGenWarpPipelineLayout(video::ILogicalDevice* device) +{ + asset::SPushConstantRange pcRange = { + .stageFlags = hlsl::ESS_COMPUTE, + .offset = 0, + .size = sizeof(SLumaGenPushConstants) + }; + + const IGPUDescriptorSetLayout::SBinding bindings[] = { + { + .binding = 0u, + .type = nbl::asset::IDescriptor::E_TYPE::ET_SAMPLED_IMAGE, + .createFlags = IGPUDescriptorSetLayout::SBinding::E_CREATE_FLAGS::ECF_NONE, + .stageFlags = IShader::E_SHADER_STAGE::ESS_COMPUTE, + .count = 1u, + }, + { + .binding = 1u, + .type = nbl::asset::IDescriptor::E_TYPE::ET_STORAGE_IMAGE, + .createFlags = IGPUDescriptorSetLayout::SBinding::E_CREATE_FLAGS::ECF_NONE, + .stageFlags = IShader::E_SHADER_STAGE::ESS_COMPUTE, + .count = 1u + } + }; + + const auto setLayout = device->createDescriptorSetLayout(bindings); + return device->createPipelineLayout({&pcRange, 1}, setLayout); +} + +void EnvmapSampler::computeWarpMap(video::IQueue* queue) +{ + const auto logicalDevice = m_cachedCreationParams.utilities->getLogicalDevice(); + + core::smart_refctd_ptr cmdBuf; + { + // commandbuffer should refcount the pool, so it should be 100% legal to drop at the end of the scope + auto gpuCommandPool = logicalDevice->createCommandPool(queue->getFamilyIndex(),IGPUCommandPool::CREATE_FLAGS::TRANSIENT_BIT); + if (!gpuCommandPool) + { + if (auto* logger = logicalDevice->getLogger()) + logger->log("Compute Warpmap: failed to create command pool.", system::ILogger::ELL_ERROR); + return; + } + gpuCommandPool->createCommandBuffers(IGPUCommandPool::BUFFER_LEVEL::PRIMARY, 1u, &cmdBuf); + if (!cmdBuf) + { + if (auto* logger = logicalDevice->getLogger()) + logger->log("Compute Warpmap: failed to create command buffer.", system::ILogger::ELL_ERROR); + return; + } + } + + if (!cmdBuf->begin(IGPUCommandBuffer::USAGE::ONE_TIME_SUBMIT_BIT)) + { + if (auto* logger = logicalDevice->getLogger()) + logger->log("Compute Warpmap: failed to begin command buffer.", system::ILogger::ELL_ERROR); + return; + } + + const auto lumaMapImage = m_lumaMap->getCreationParameters().image.get(); + const auto lumaMapMipLevels = lumaMapImage->getCreationParameters().mipLevels; + const auto lumaMapExtent = lumaMapImage->getCreationParameters().extent; + + const auto warpMapImage = m_warpMap->getCreationParameters().image.get(); + const auto warpMapExtent = warpMapImage->getCreationParameters().extent; + + { + IGPUCommandBuffer::SPipelineBarrierDependencyInfo::image_barrier_t barriers[] = { + { + .barrier = { + .dep = { + .srcStageMask = PIPELINE_STAGE_FLAGS::NONE, + .srcAccessMask = ACCESS_FLAGS::NONE, + .dstStageMask = PIPELINE_STAGE_FLAGS::COMPUTE_SHADER_BIT, + .dstAccessMask = ACCESS_FLAGS::SHADER_WRITE_BITS + } + }, + .image = lumaMapImage, + .subresourceRange = { + .aspectMask = IImage::EAF_COLOR_BIT, + .baseMipLevel = 0u, + .levelCount = lumaMapMipLevels, + .baseArrayLayer = 0u, + .layerCount = 1u + }, + .oldLayout = IImage::LAYOUT::UNDEFINED, + .newLayout = IImage::LAYOUT::GENERAL, + } + }; + cmdBuf->pipelineBarrier(E_DEPENDENCY_FLAGS::EDF_NONE, { .imgBarriers = barriers }); + } + + // Gen Luma Map + { + SLumaGenPushConstants pcData = {}; + pcData.lumaRGBCoefficients = { 0.2126729f, 0.7151522f, 0.0721750f }; + pcData.lumaMapWidth = lumaMapExtent.width; + pcData.lumaMapHeight = lumaMapExtent.height; + + cmdBuf->bindComputePipeline(m_genLumaPipeline.get()); + cmdBuf->pushConstants(m_genLumaPipeline->getLayout(), IShader::E_SHADER_STAGE::ESS_COMPUTE, + 0, sizeof(SLumaGenPushConstants), &pcData); + cmdBuf->bindDescriptorSets(EPBP_COMPUTE, m_genLumaPipeline->getLayout(), + 0, 1, &m_genLumaDescriptorSet.get()); + cmdBuf->dispatch(m_lumaWorkgroupCount.x, m_lumaWorkgroupCount.y, 1); + } + + // Generate luminance mip map + { + IGPUCommandBuffer::SPipelineBarrierDependencyInfo::image_barrier_t barriers[] = { + { + .barrier = { + .dep = { + .srcStageMask = PIPELINE_STAGE_FLAGS::COMPUTE_SHADER_BIT, + .srcAccessMask = ACCESS_FLAGS::SHADER_WRITE_BITS, + .dstStageMask = PIPELINE_STAGE_FLAGS::BLIT_BIT, + .dstAccessMask = ACCESS_FLAGS::TRANSFER_READ_BIT + } + }, + .image = lumaMapImage, + .subresourceRange = { + .aspectMask = IImage::EAF_COLOR_BIT, + .baseMipLevel = 0u, + .levelCount = 1, + .baseArrayLayer = 0u, + .layerCount = 1u + }, + .oldLayout = IImage::LAYOUT::GENERAL, + .newLayout = IImage::LAYOUT::TRANSFER_SRC_OPTIMAL, + }, + { + .barrier = { + .dep = { + .srcStageMask = PIPELINE_STAGE_FLAGS::NONE, + .srcAccessMask = ACCESS_FLAGS::NONE, + .dstStageMask = PIPELINE_STAGE_FLAGS::BLIT_BIT, + .dstAccessMask = ACCESS_FLAGS::TRANSFER_WRITE_BIT + } + }, + .image = lumaMapImage, + .subresourceRange = { + .aspectMask = IImage::EAF_COLOR_BIT, + .baseMipLevel = 1u, + .levelCount = lumaMapMipLevels - 1, + .baseArrayLayer = 0u, + .layerCount = 1u + }, + .oldLayout = IImage::LAYOUT::GENERAL, + .newLayout = IImage::LAYOUT::TRANSFER_DST_OPTIMAL, + } + }; + cmdBuf->pipelineBarrier(E_DEPENDENCY_FLAGS::EDF_NONE, { .imgBarriers = barriers }); + generateMipmap(cmdBuf.get(), lumaMapImage); + } + + core::smart_refctd_ptr lumaTexelBuffer; + const auto lumaMapLastMip = lumaMapMipLevels - 1; + const auto lumaMapLastMipExtent = lumaMapImage->getMipSize(lumaMapLastMip); + const auto lumaMapLastTexelCount = lumaMapLastMipExtent.x * lumaMapLastMipExtent.y * lumaMapLastMipExtent.z; + { + IGPUImage::SBufferCopy region = {}; + region.imageSubresource.aspectMask = IImage::EAF_COLOR_BIT; + region.imageSubresource.mipLevel = lumaMapLastMip; + region.imageSubresource.baseArrayLayer = 0; + region.imageSubresource.layerCount = 1; + region.imageExtent = { lumaMapLastMipExtent.x, lumaMapLastMipExtent.y, lumaMapLastMipExtent.z }; + + IGPUBuffer::SCreationParams bufferCreationParams = {}; + bufferCreationParams.size = lumaMapLastTexelCount * getTexelOrBlockBytesize(EF_R32_SFLOAT); + bufferCreationParams.usage = IGPUBuffer::EUF_TRANSFER_DST_BIT; + lumaTexelBuffer = logicalDevice->createBuffer(std::move(bufferCreationParams)); + if (!lumaTexelBuffer) + { + if (auto* logger = logicalDevice->getLogger()) + logger->log("ScreenShot: failed to create GPU texel buffer.", system::ILogger::ELL_ERROR); + return; + } + auto gpuTexelBufferMemReqs = lumaTexelBuffer->getMemoryReqs(); + gpuTexelBufferMemReqs.memoryTypeBits &= logicalDevice->getPhysicalDevice()->getDownStreamingMemoryTypeBits(); + if (!gpuTexelBufferMemReqs.memoryTypeBits) + { + if (auto* logger = logicalDevice->getLogger()) + logger->log("ScreenShot: no down-streaming memory type for texel buffer.", system::ILogger::ELL_ERROR); + return; + } + auto gpuTexelBufferMem = logicalDevice->allocate(gpuTexelBufferMemReqs, lumaTexelBuffer.get()); + if (!gpuTexelBufferMem.isValid()) + { + if (auto* logger = logicalDevice->getLogger()) + logger->log("ScreenShot: failed to allocate texel buffer memory.", system::ILogger::ELL_ERROR); + return; + } + + IGPUCommandBuffer::SPipelineBarrierDependencyInfo info = {}; + decltype(info)::image_barrier_t barrier = {}; + info.imgBarriers = { &barrier, &barrier + 1 }; + + { + barrier.barrier.dep.srcStageMask = PIPELINE_STAGE_FLAGS::BLIT_BIT; + barrier.barrier.dep.srcAccessMask = ACCESS_FLAGS::TRANSFER_WRITE_BIT; + barrier.barrier.dep.dstStageMask = PIPELINE_STAGE_FLAGS::COPY_BIT; + barrier.barrier.dep.dstAccessMask = ACCESS_FLAGS::TRANSFER_READ_BIT; + barrier.oldLayout = IImage::LAYOUT::TRANSFER_DST_OPTIMAL; + barrier.newLayout = IImage::LAYOUT::TRANSFER_SRC_OPTIMAL; + barrier.image = lumaMapImage; + barrier.subresourceRange.aspectMask = IImage::EAF_COLOR_BIT; + barrier.subresourceRange.baseMipLevel = lumaMapMipLevels - 1; + barrier.subresourceRange.levelCount = 1u; + barrier.subresourceRange.baseArrayLayer = 0; + barrier.subresourceRange.layerCount = 1; + cmdBuf->pipelineBarrier(EDF_NONE,info); + } + cmdBuf->copyImageToBuffer(lumaMapImage,IImage::LAYOUT::TRANSFER_SRC_OPTIMAL,lumaTexelBuffer.get(),1,®ion); + } + + { + IGPUCommandBuffer::SPipelineBarrierDependencyInfo::image_barrier_t barriers[] = { + { + .barrier = { + .dep = { + .srcStageMask = PIPELINE_STAGE_FLAGS::BLIT_BIT, + .srcAccessMask = ACCESS_FLAGS::TRANSFER_READ_BIT, + .dstStageMask = PIPELINE_STAGE_FLAGS::ALL_COMMANDS_BITS, + .dstAccessMask = ACCESS_FLAGS::SHADER_READ_BITS + } + }, + .image = lumaMapImage, + .subresourceRange = { + .aspectMask = IImage::EAF_COLOR_BIT, + .baseMipLevel = 0u, + .levelCount = lumaMapMipLevels - 1, + .baseArrayLayer = 0u, + .layerCount = 1u + }, + .oldLayout = IImage::LAYOUT::TRANSFER_SRC_OPTIMAL, + .newLayout = IImage::LAYOUT::READ_ONLY_OPTIMAL, + }, + { + .barrier = { + .dep = { + .srcStageMask = PIPELINE_STAGE_FLAGS::COPY_BIT, + .srcAccessMask = ACCESS_FLAGS::TRANSFER_READ_BIT, + .dstStageMask = PIPELINE_STAGE_FLAGS::ALL_COMMANDS_BITS, + .dstAccessMask = ACCESS_FLAGS::SHADER_READ_BITS + } + }, + .image = lumaMapImage, + .subresourceRange = { + .aspectMask = IImage::EAF_COLOR_BIT, + .baseMipLevel = lumaMapMipLevels - 1, + .levelCount = 1, + .baseArrayLayer = 0u, + .layerCount = 1u + }, + .oldLayout = IImage::LAYOUT::TRANSFER_SRC_OPTIMAL, + .newLayout = IImage::LAYOUT::READ_ONLY_OPTIMAL, + }, + { + .barrier = { + .dep = { + .srcStageMask = PIPELINE_STAGE_FLAGS::NONE, + .srcAccessMask = ACCESS_FLAGS::NONE, + .dstStageMask = PIPELINE_STAGE_FLAGS::COMPUTE_SHADER_BIT, + .dstAccessMask = ACCESS_FLAGS::SHADER_WRITE_BITS + } + }, + .image = warpMapImage, + .subresourceRange = { + .aspectMask = IImage::EAF_COLOR_BIT, + .baseMipLevel = 0, + .levelCount = 1, + .baseArrayLayer = 0u, + .layerCount = 1u + }, + .oldLayout = IImage::LAYOUT::UNDEFINED, + .newLayout = IImage::LAYOUT::GENERAL, + } + }; + cmdBuf->pipelineBarrier(E_DEPENDENCY_FLAGS::EDF_NONE, { .imgBarriers = barriers }); + + const SWarpGenPushConstants pcData = { + .lumaMapWidth = lumaMapExtent.width, + .lumaMapHeight = lumaMapExtent.height, + .warpMapWidth = warpMapExtent.width, + .warpMapHeight = warpMapExtent.height + }; + cmdBuf->bindComputePipeline(m_genWarpPipeline.get()); + cmdBuf->bindDescriptorSets(EPBP_COMPUTE, m_genWarpPipeline->getLayout(), + 0, 1, &m_genWarpDescriptorSet.get()); + cmdBuf->pushConstants(m_genLumaPipeline->getLayout(), IShader::E_SHADER_STAGE::ESS_COMPUTE, + 0, sizeof(SLumaGenPushConstants), &pcData); + cmdBuf->dispatch(m_warpWorkgroupCount.x, m_warpWorkgroupCount.y, 1); + } + + { + IGPUCommandBuffer::SPipelineBarrierDependencyInfo::image_barrier_t barriers[] = { + { + .barrier = { + .dep = { + .srcStageMask = PIPELINE_STAGE_FLAGS::COMPUTE_SHADER_BIT, + .srcAccessMask = ACCESS_FLAGS::SHADER_WRITE_BITS, + .dstStageMask = PIPELINE_STAGE_FLAGS::ALL_COMMANDS_BITS, + .dstAccessMask = ACCESS_FLAGS::SHADER_READ_BITS + } + }, + .image = warpMapImage, + .subresourceRange = { + .aspectMask = IImage::EAF_COLOR_BIT, + .baseMipLevel = 0, + .levelCount = 1, + .baseArrayLayer = 0u, + .layerCount = 1u + }, + .oldLayout = IImage::LAYOUT::GENERAL, + .newLayout = IImage::LAYOUT::READ_ONLY_OPTIMAL, + } + }; + cmdBuf->pipelineBarrier(E_DEPENDENCY_FLAGS::EDF_NONE, { .imgBarriers = barriers }); + } + + if (!cmdBuf->end()) + { + if (auto* logger = logicalDevice->getLogger()) + logger->log("ScreenShot: failed to end command buffer.", system::ILogger::ELL_ERROR); + return; + } + + { + auto signalSemaphore = logicalDevice->createSemaphore(0); + + IQueue::SSubmitInfo info; + IQueue::SSubmitInfo::SCommandBufferInfo cmdBufferInfo{ cmdBuf.get() }; + IQueue::SSubmitInfo::SSemaphoreInfo signalSemaphoreInfo; + signalSemaphoreInfo.semaphore = signalSemaphore.get(); + signalSemaphoreInfo.value = 1; + signalSemaphoreInfo.stageMask = PIPELINE_STAGE_FLAGS::ALL_COMMANDS_BITS; + info.commandBuffers = { &cmdBufferInfo, &cmdBufferInfo + 1 }; + info.signalSemaphores = { &signalSemaphoreInfo, &signalSemaphoreInfo + 1 }; + + if (auto* logger = logicalDevice->getLogger()) + logger->log("Compute Warpmap: submitting copy command buffer.", system::ILogger::ELL_INFO); + if (queue->submit({ &info, &info + 1}) != IQueue::RESULT::SUCCESS) + { + if (auto* logger = logicalDevice->getLogger()) + logger->log("Compute Warpmap: failed to submit copy command buffer.", system::ILogger::ELL_ERROR); + return; + } + + ISemaphore::SWaitInfo waitInfo{ signalSemaphore.get(), 1u}; + + if (auto* logger = logicalDevice->getLogger()) + logger->log("Compute Warpmap: waiting for copy completion.", system::ILogger::ELL_INFO); + if (logicalDevice->blockForSemaphores({&waitInfo, &waitInfo + 1}) != ISemaphore::WAIT_RESULT::SUCCESS) + { + if (auto* logger = logicalDevice->getLogger()) + logger->log("Compute Warpmap: failed to wait for copy completion.", system::ILogger::ELL_ERROR); + return; + } + + auto* allocation = lumaTexelBuffer->getBoundMemory().memory; + const IDeviceMemoryAllocation::MemoryRange range = { 0u, lumaTexelBuffer->getSize() }; + auto* ptr = reinterpret_cast(allocation->map(range, IDeviceMemoryAllocation::EMCAF_READ)); + + m_avgLuma = std::reduce(ptr, ptr + lumaMapLastTexelCount) / float32_t(lumaMapLastTexelCount); + } +} + +nbl::video::IGPUCommandBuffer::SPipelineBarrierDependencyInfo::image_barrier_t EnvmapSampler::getWarpMapBarrier( + core::bitflag dstStageMask, + core::bitflag dstAccessMask, + nbl::video::IGPUImage::LAYOUT newLayout) +{ + const auto warpMapImage = m_warpMap->getCreationParameters().image.get(); + return { + .barrier = { + .dep = { + .srcStageMask = PIPELINE_STAGE_FLAGS::COMPUTE_SHADER_BIT, + .srcAccessMask = ACCESS_FLAGS::SHADER_WRITE_BITS, + .dstStageMask = dstStageMask, + .dstAccessMask = dstAccessMask + } + }, + .image = warpMapImage, + .subresourceRange = { + .aspectMask = IImage::EAF_COLOR_BIT, + .baseMipLevel = 0, + .levelCount = 1, + .baseArrayLayer = 0u, + .layerCount = 1u + }, + .oldLayout = IImage::LAYOUT::GENERAL, + .newLayout = newLayout, + }; +} + +nbl::video::IGPUCommandBuffer::SPipelineBarrierDependencyInfo::image_barrier_t EnvmapSampler::getLumaMapBarrier( + core::bitflag dstStageMask, + core::bitflag dstAccessMask, + nbl::video::IGPUImage::LAYOUT newLayout) +{ + const auto lumaMapImage = m_lumaMap->getCreationParameters().image.get(); + return { + .barrier = { + .dep = { + .srcStageMask = PIPELINE_STAGE_FLAGS::COMPUTE_SHADER_BIT, + .srcAccessMask = ACCESS_FLAGS::SHADER_READ_BITS, + .dstStageMask = dstStageMask, + .dstAccessMask = dstAccessMask + } + }, + .image = lumaMapImage, + .subresourceRange = { + .aspectMask = IImage::EAF_COLOR_BIT, + .baseMipLevel = 0, + .levelCount = 1, + .baseArrayLayer = 0u, + .layerCount = 1u + }, + .oldLayout = IImage::LAYOUT::READ_ONLY_OPTIMAL, + .newLayout = newLayout, + }; +} + + +}