diff --git a/31_HLSLPathTracer/CMakeLists.txt b/31_HLSLPathTracer/CMakeLists.txt new file mode 100644 index 000000000..99ea00017 --- /dev/null +++ b/31_HLSLPathTracer/CMakeLists.txt @@ -0,0 +1,40 @@ +include(common RESULT_VARIABLE RES) + +if(NOT RES) + message(FATAL_ERROR "common.cmake not found. Should be in {repo_root}/cmake directory") +endif() + +if(NBL_BUILD_IMGUI) + set(NBL_INCLUDE_SERACH_DIRECTORIES + "${CMAKE_CURRENT_SOURCE_DIR}/include" + ) + + list(APPEND NBL_LIBRARIES + imtestengine + imguizmo + "${NBL_EXT_IMGUI_UI_LIB}" + ) + + nbl_create_executable_project("" "" "${NBL_INCLUDE_SERACH_DIRECTORIES}" "${NBL_LIBRARIES}" "${NBL_EXECUTABLE_PROJECT_CREATION_PCH_TARGET}") + + if(NBL_EMBED_BUILTIN_RESOURCES) + set(_BR_TARGET_ ${EXECUTABLE_NAME}_builtinResourceData) + set(RESOURCE_DIR "app_resources") + + get_filename_component(_SEARCH_DIRECTORIES_ "${CMAKE_CURRENT_SOURCE_DIR}" ABSOLUTE) + get_filename_component(_OUTPUT_DIRECTORY_SOURCE_ "${CMAKE_CURRENT_BINARY_DIR}/src" ABSOLUTE) + get_filename_component(_OUTPUT_DIRECTORY_HEADER_ "${CMAKE_CURRENT_BINARY_DIR}/include" ABSOLUTE) + + file(GLOB_RECURSE BUILTIN_RESOURCE_FILES RELATIVE "${CMAKE_CURRENT_SOURCE_DIR}/${RESOURCE_DIR}" "${CMAKE_CURRENT_SOURCE_DIR}/${RESOURCE_DIR}/*") + + foreach(RES_FILE ${BUILTIN_RESOURCE_FILES}) + LIST_BUILTIN_RESOURCE(RESOURCES_TO_EMBED "${RES_FILE}") + endforeach() + + ADD_CUSTOM_BUILTIN_RESOURCES(${_BR_TARGET_} RESOURCES_TO_EMBED "${_SEARCH_DIRECTORIES_}" "${RESOURCE_DIR}" "nbl::this_example::builtin" "${_OUTPUT_DIRECTORY_HEADER_}" "${_OUTPUT_DIRECTORY_SOURCE_}") + + LINK_BUILTIN_RESOURCES_TO_TARGET(${EXECUTABLE_NAME} ${_BR_TARGET_}) + endif() +endif() + + diff --git a/31_HLSLPathTracer/app_resources/glsl/common.glsl b/31_HLSLPathTracer/app_resources/glsl/common.glsl new file mode 100644 index 000000000..6b6e96710 --- /dev/null +++ b/31_HLSLPathTracer/app_resources/glsl/common.glsl @@ -0,0 +1,837 @@ +// Copyright (C) 2018-2020 - DevSH Graphics Programming Sp. z O.O. +// This file is part of the "Nabla Engine". +// For conditions of distribution and use, see copyright notice in nabla.h + +// firefly and variance reduction techniques +//#define KILL_DIFFUSE_SPECULAR_PATHS +//#define VISUALIZE_HIGH_VARIANCE + +// debug +//#define NEE_ONLY + +layout(set = 2, binding = 0) uniform sampler2D envMap; +layout(set = 2, binding = 1) uniform usamplerBuffer sampleSequence; +layout(set = 2, binding = 2) uniform usampler2D scramblebuf; + +layout(set=0, binding=0, rgba16f) uniform image2D outImage; + +#ifndef _NBL_GLSL_WORKGROUP_SIZE_ +#define _NBL_GLSL_WORKGROUP_SIZE_ 512 +layout(local_size_x=_NBL_GLSL_WORKGROUP_SIZE_, local_size_y=1, local_size_z=1) in; +#endif + +ivec2 getCoordinates() { + ivec2 imageSize = imageSize(outImage); + return ivec2(gl_GlobalInvocationID.x % imageSize.x, gl_GlobalInvocationID.x / imageSize.x); +} + +vec2 getTexCoords() { + ivec2 imageSize = imageSize(outImage); + ivec2 iCoords = getCoordinates(); + return vec2(float(iCoords.x) / imageSize.x, 1.0 - float(iCoords.y) / imageSize.y); +} + + +#include +#include +#include +#ifdef PERSISTENT_WORKGROUPS +#include +#endif + +#include + +layout(push_constant, row_major) uniform constants +{ + mat4 invMVP; + int sampleCount; + int depth; +} PTPushConstant; + +#define INVALID_ID_16BIT 0xffffu +struct Sphere +{ + vec3 position; + float radius2; + uint bsdfLightIDs; +}; + +Sphere Sphere_Sphere(in vec3 position, in float radius, in uint bsdfID, in uint lightID) +{ + Sphere sphere; + sphere.position = position; + sphere.radius2 = radius*radius; + sphere.bsdfLightIDs = bitfieldInsert(bsdfID,lightID,16,16); + return sphere; +} + +// return intersection distance if found, nbl_glsl_FLT_NAN otherwise +float Sphere_intersect(in Sphere sphere, in vec3 origin, in vec3 direction) +{ + vec3 relOrigin = origin-sphere.position; + float relOriginLen2 = dot(relOrigin,relOrigin); + const float radius2 = sphere.radius2; + + float dirDotRelOrigin = dot(direction,relOrigin); + float det = radius2-relOriginLen2+dirDotRelOrigin*dirDotRelOrigin; + + // do some speculative math here + float detsqrt = sqrt(det); + return -dirDotRelOrigin+(relOriginLen2>radius2 ? (-detsqrt):detsqrt); +} + +vec3 Sphere_getNormal(in Sphere sphere, in vec3 position) +{ + const float radiusRcp = inversesqrt(sphere.radius2); + return (position-sphere.position)*radiusRcp; +} + +float Sphere_getSolidAngle_impl(in float cosThetaMax) +{ + return 2.0*nbl_glsl_PI*(1.0-cosThetaMax); +} +float Sphere_getSolidAngle(in Sphere sphere, in vec3 origin) +{ + float cosThetaMax = sqrt(1.0-sphere.radius2/nbl_glsl_lengthSq(sphere.position-origin)); + return Sphere_getSolidAngle_impl(cosThetaMax); +} + + +Sphere spheres[SPHERE_COUNT] = { + Sphere_Sphere(vec3(0.0,-100.5,-1.0),100.0,0u,INVALID_ID_16BIT), + Sphere_Sphere(vec3(2.0,0.0,-1.0),0.5,1u,INVALID_ID_16BIT), + Sphere_Sphere(vec3(0.0,0.0,-1.0),0.5,2u,INVALID_ID_16BIT), + Sphere_Sphere(vec3(-2.0,0.0,-1.0),0.5,3u,INVALID_ID_16BIT), + Sphere_Sphere(vec3(2.0,0.0,1.0),0.5,4u,INVALID_ID_16BIT), + Sphere_Sphere(vec3(0.0,0.0,1.0),0.5,4u,INVALID_ID_16BIT), + Sphere_Sphere(vec3(-2.0,0.0,1.0),0.5,5u,INVALID_ID_16BIT), + Sphere_Sphere(vec3(0.5,1.0,0.5),0.5,6u,INVALID_ID_16BIT) +#if SPHERE_COUNT>8 + ,Sphere_Sphere(vec3(-1.5,1.5,0.0),0.3,INVALID_ID_16BIT,0u) +#endif +}; + + +struct Triangle +{ + vec3 vertex0; + uint bsdfLightIDs; + vec3 vertex1; + uint padding0; + vec3 vertex2; + uint padding1; +}; + +Triangle Triangle_Triangle(in mat3 vertices, in uint bsdfID, in uint lightID) +{ + Triangle tri; + tri.vertex0 = vertices[0]; + tri.vertex1 = vertices[1]; + tri.vertex2 = vertices[2]; + // + tri.bsdfLightIDs = bitfieldInsert(bsdfID, lightID, 16, 16); + return tri; +} + +// return intersection distance if found, nbl_glsl_FLT_NAN otherwise +float Triangle_intersect(in Triangle tri, in vec3 origin, in vec3 direction) +{ + const vec3 edges[2] = vec3[2](tri.vertex1-tri.vertex0,tri.vertex2-tri.vertex0); + + const vec3 h = cross(direction,edges[1]); + const float a = dot(edges[0],h); + + const vec3 relOrigin = origin-tri.vertex0; + + const float u = dot(relOrigin,h)/a; + + const vec3 q = cross(relOrigin,edges[0]); + const float v = dot(direction,q)/a; + + const float t = dot(edges[1],q)/a; + + return t>0.f&&u>=0.f&&v>=0.f&&(u+v)<=1.f ? t:nbl_glsl_FLT_NAN; +} + +vec3 Triangle_getNormalTimesArea_impl(in mat2x3 edges) +{ + return cross(edges[0],edges[1])*0.5; +} +vec3 Triangle_getNormalTimesArea(in Triangle tri) +{ + return Triangle_getNormalTimesArea_impl(mat2x3(tri.vertex1-tri.vertex0,tri.vertex2-tri.vertex0)); +} + + + +struct Rectangle +{ + vec3 offset; + uint bsdfLightIDs; + vec3 edge0; + uint padding0; + vec3 edge1; + uint padding1; +}; + +Rectangle Rectangle_Rectangle(in vec3 offset, in vec3 edge0, in vec3 edge1, in uint bsdfID, in uint lightID) +{ + Rectangle rect; + rect.offset = offset; + rect.edge0 = edge0; + rect.edge1 = edge1; + // + rect.bsdfLightIDs = bitfieldInsert(bsdfID, lightID, 16, 16); + return rect; +} + +void Rectangle_getNormalBasis(in Rectangle rect, out mat3 basis, out vec2 extents) +{ + extents = vec2(length(rect.edge0), length(rect.edge1)); + basis[0] = rect.edge0/extents[0]; + basis[1] = rect.edge1/extents[1]; + basis[2] = normalize(cross(basis[0],basis[1])); +} + +// return intersection distance if found, nbl_glsl_FLT_NAN otherwise +float Rectangle_intersect(in Rectangle rect, in vec3 origin, in vec3 direction) +{ + const vec3 h = cross(direction,rect.edge1); + const float a = dot(rect.edge0,h); + + const vec3 relOrigin = origin-rect.offset; + + const float u = dot(relOrigin,h)/a; + + const vec3 q = cross(relOrigin,rect.edge0); + const float v = dot(direction,q)/a; + + const float t = dot(rect.edge1,q)/a; + + const bool intersection = t>0.f&&u>=0.f&&v>=0.f&&u<=1.f&&v<=1.f; + return intersection ? t:nbl_glsl_FLT_NAN; +} + +vec3 Rectangle_getNormalTimesArea(in Rectangle rect) +{ + return cross(rect.edge0,rect.edge1); +} + + + +#define DIFFUSE_OP 0u +#define CONDUCTOR_OP 1u +#define DIELECTRIC_OP 2u +#define OP_BITS_OFFSET 0 +#define OP_BITS_SIZE 2 +struct BSDFNode +{ + uvec4 data[2]; +}; + +uint BSDFNode_getType(in BSDFNode node) +{ + return bitfieldExtract(node.data[0].w,OP_BITS_OFFSET,OP_BITS_SIZE); +} +bool BSDFNode_isBSDF(in BSDFNode node) +{ + return BSDFNode_getType(node)==DIELECTRIC_OP; +} +bool BSDFNode_isNotDiffuse(in BSDFNode node) +{ + return BSDFNode_getType(node)!=DIFFUSE_OP; +} +float BSDFNode_getRoughness(in BSDFNode node) +{ + return uintBitsToFloat(node.data[1].w); +} +vec3 BSDFNode_getRealEta(in BSDFNode node) +{ + return uintBitsToFloat(node.data[0].rgb); +} +vec3 BSDFNode_getImaginaryEta(in BSDFNode node) +{ + return uintBitsToFloat(node.data[1].rgb); +} +mat2x3 BSDFNode_getEta(in BSDFNode node) +{ + return mat2x3(BSDFNode_getRealEta(node),BSDFNode_getImaginaryEta(node)); +} +#include +vec3 BSDFNode_getReflectance(in BSDFNode node, in float VdotH) +{ + const vec3 albedoOrRealIoR = uintBitsToFloat(node.data[0].rgb); + if (BSDFNode_isNotDiffuse(node)) + return nbl_glsl_fresnel_conductor(albedoOrRealIoR, BSDFNode_getImaginaryEta(node), VdotH); + else + return albedoOrRealIoR; +} + +float BSDFNode_getNEEProb(in BSDFNode bsdf) +{ + const float alpha = BSDFNode_isNotDiffuse(bsdf) ? BSDFNode_getRoughness(bsdf):1.0; + return min(8.0*alpha,1.0); +} + +#include +#include +float getLuma(in vec3 col) +{ + return dot(transpose(nbl_glsl_scRGBtoXYZ)[1],col); +} + +#define BSDF_COUNT 7 +BSDFNode bsdfs[BSDF_COUNT] = { + {{uvec4(floatBitsToUint(vec3(0.8,0.8,0.8)),DIFFUSE_OP),floatBitsToUint(vec4(0.0,0.0,0.0,0.0))}}, + {{uvec4(floatBitsToUint(vec3(0.8,0.4,0.4)),DIFFUSE_OP),floatBitsToUint(vec4(0.0,0.0,0.0,0.0))}}, + {{uvec4(floatBitsToUint(vec3(0.4,0.8,0.4)),DIFFUSE_OP),floatBitsToUint(vec4(0.0,0.0,0.0,0.0))}}, + {{uvec4(floatBitsToUint(vec3(1.02,1.02,1.3)),CONDUCTOR_OP),floatBitsToUint(vec4(1.0,1.0,2.0,0.0))}}, + {{uvec4(floatBitsToUint(vec3(1.02,1.3,1.02)),CONDUCTOR_OP),floatBitsToUint(vec4(1.0,2.0,1.0,0.0))}}, + {{uvec4(floatBitsToUint(vec3(1.02,1.3,1.02)),CONDUCTOR_OP),floatBitsToUint(vec4(1.0,2.0,1.0,0.15))}}, + {{uvec4(floatBitsToUint(vec3(1.4,1.45,1.5)),DIELECTRIC_OP),floatBitsToUint(vec4(0.0,0.0,0.0,0.0625))}} +}; + + +struct Light +{ + vec3 radiance; + uint objectID; +}; + +vec3 Light_getRadiance(in Light light) +{ + return light.radiance; +} +uint Light_getObjectID(in Light light) +{ + return light.objectID; +} + + +#define LIGHT_COUNT 1 +float scene_getLightChoicePdf(in Light light) +{ + return 1.0/float(LIGHT_COUNT); +} + + +#define LIGHT_COUNT 1 +Light lights[LIGHT_COUNT] = +{ + { + vec3(30.0,25.0,15.0), +#ifdef POLYGON_METHOD + 0u +#else + 8u +#endif + } +}; + + + +#define ANY_HIT_FLAG (-2147483648) +#define DEPTH_BITS_COUNT 8 +#define DEPTH_BITS_OFFSET (31-DEPTH_BITS_COUNT) +struct ImmutableRay_t +{ + vec3 origin; + vec3 direction; +#if POLYGON_METHOD==2 + vec3 normalAtOrigin; + bool wasBSDFAtOrigin; +#endif +}; +struct MutableRay_t +{ + float intersectionT; + uint objectID; + /* irrelevant here + uint triangleID; + vec2 barycentrics; + */ +}; +struct Payload_t +{ + vec3 accumulation; + float otherTechniqueHeuristic; + vec3 throughput; + #ifdef KILL_DIFFUSE_SPECULAR_PATHS + bool hasDiffuse; + #endif +}; + +struct Ray_t +{ + ImmutableRay_t _immutable; + MutableRay_t _mutable; + Payload_t _payload; +}; + + +#define INTERSECTION_ERROR_BOUND_LOG2 (-8.0) +float getTolerance_common(in uint depth) +{ + float depthRcp = 1.0/float(depth); + return INTERSECTION_ERROR_BOUND_LOG2;// *depthRcp*depthRcp; +} +float getStartTolerance(in uint depth) +{ + return exp2(getTolerance_common(depth)); +} +float getEndTolerance(in uint depth) +{ + return 1.0-exp2(getTolerance_common(depth)+1.0); +} + + +vec2 SampleSphericalMap(vec3 v) +{ + vec2 uv = vec2(atan(v.z, v.x), asin(v.y)); + uv *= nbl_glsl_RECIPROCAL_PI*0.5; + uv += 0.5; + return uv; +} + +void missProgram(in ImmutableRay_t _immutable, inout Payload_t _payload) +{ + vec3 finalContribution = _payload.throughput; + // #define USE_ENVMAP +#ifdef USE_ENVMAP + vec2 uv = SampleSphericalMap(_immutable.direction); + finalContribution *= textureLod(envMap, uv, 0.0).rgb; +#else + const vec3 kConstantEnvLightRadiance = vec3(0.15, 0.21, 0.3); + finalContribution *= kConstantEnvLightRadiance; + _payload.accumulation += finalContribution; +#endif +} + +#include +#include +#include +#include +#include +#include +#include +nbl_glsl_LightSample nbl_glsl_bsdf_cos_generate(in nbl_glsl_AnisotropicViewSurfaceInteraction interaction, in vec3 u, in BSDFNode bsdf, in float monochromeEta, out nbl_glsl_AnisotropicMicrofacetCache _cache) +{ + const float a = BSDFNode_getRoughness(bsdf); + const mat2x3 ior = BSDFNode_getEta(bsdf); + + // fresnel stuff for dielectrics + float orientedEta, rcpOrientedEta; + const bool viewerInsideMedium = nbl_glsl_getOrientedEtas(orientedEta,rcpOrientedEta,interaction.isotropic.NdotV,monochromeEta); + + nbl_glsl_LightSample smpl; + nbl_glsl_AnisotropicMicrofacetCache dummy; + switch (BSDFNode_getType(bsdf)) + { + case DIFFUSE_OP: + smpl = nbl_glsl_oren_nayar_cos_generate(interaction,u.xy,a*a); + break; + case CONDUCTOR_OP: + smpl = nbl_glsl_ggx_cos_generate(interaction,u.xy,a,a,_cache); + break; + default: + smpl = nbl_glsl_ggx_dielectric_cos_generate(interaction,u,a,a,monochromeEta,_cache); + break; + } + return smpl; +} + +vec3 nbl_glsl_bsdf_cos_remainder_and_pdf(out float pdf, in nbl_glsl_LightSample _sample, in nbl_glsl_AnisotropicViewSurfaceInteraction interaction, in BSDFNode bsdf, in float monochromeEta, in nbl_glsl_AnisotropicMicrofacetCache _cache) +{ + // are V and L on opposite sides of the surface? + const bool transmitted = nbl_glsl_isTransmissionPath(interaction.isotropic.NdotV,_sample.NdotL); + + // is the BSDF or BRDF, if it is then we make the dot products `abs` before `max(,0.0)` + const bool transmissive = BSDFNode_isBSDF(bsdf); + const float clampedNdotL = nbl_glsl_conditionalAbsOrMax(transmissive,_sample.NdotL,0.0); + const float clampedNdotV = nbl_glsl_conditionalAbsOrMax(transmissive,interaction.isotropic.NdotV,0.0); + + vec3 remainder; + + const float minimumProjVectorLen = 0.00000001; + if (clampedNdotV>minimumProjVectorLen && clampedNdotL>minimumProjVectorLen) + { + // fresnel stuff for conductors (but reflectance also doubles as albedo) + const mat2x3 ior = BSDFNode_getEta(bsdf); + const vec3 reflectance = BSDFNode_getReflectance(bsdf,_cache.isotropic.VdotH); + + // fresnel stuff for dielectrics + float orientedEta, rcpOrientedEta; + const bool viewerInsideMedium = nbl_glsl_getOrientedEtas(orientedEta,rcpOrientedEta,interaction.isotropic.NdotV,monochromeEta); + + // + const float VdotL = dot(interaction.isotropic.V.dir,_sample.L); + + // + const float a = max(BSDFNode_getRoughness(bsdf),0.0001); // TODO: @Crisspl 0-roughness still doesn't work! Also Beckmann has a weird dark rim instead as fresnel!? + const float a2 = a*a; + + // TODO: refactor into Material Compiler-esque thing + switch (BSDFNode_getType(bsdf)) + { + case DIFFUSE_OP: + remainder = reflectance*nbl_glsl_oren_nayar_cos_remainder_and_pdf_wo_clamps(pdf,a*a,VdotL,clampedNdotL,clampedNdotV); + break; + case CONDUCTOR_OP: + remainder = nbl_glsl_ggx_cos_remainder_and_pdf_wo_clamps(pdf,nbl_glsl_ggx_trowbridge_reitz(a2,_cache.isotropic.NdotH2),clampedNdotL,_sample.NdotL2,clampedNdotV,interaction.isotropic.NdotV_squared,reflectance,a2); + break; + default: + remainder = vec3(nbl_glsl_ggx_dielectric_cos_remainder_and_pdf(pdf, _sample, interaction.isotropic, _cache.isotropic, monochromeEta, a*a)); + break; + } + } + else + remainder = vec3(0.0); + return remainder; +} + +layout (constant_id = 0) const int MAX_DEPTH_LOG2 = 4; +layout (constant_id = 1) const int MAX_SAMPLES_LOG2 = 10; + + +#include + +mat2x3 rand3d(in uint protoDimension, in uint _sample, inout nbl_glsl_xoroshiro64star_state_t scramble_state) +{ + mat2x3 retval; + uint address = bitfieldInsert(protoDimension,_sample,MAX_DEPTH_LOG2,MAX_SAMPLES_LOG2); + for (int i=0; i<2u; i++) + { + uvec3 seqVal = texelFetch(sampleSequence,int(address)+i).xyz; + seqVal ^= uvec3(nbl_glsl_xoroshiro64star(scramble_state),nbl_glsl_xoroshiro64star(scramble_state),nbl_glsl_xoroshiro64star(scramble_state)); + retval[i] = vec3(seqVal)*uintBitsToFloat(0x2f800004u); + } + return retval; +} + + +void traceRay_extraShape(inout int objectID, inout float intersectionT, in vec3 origin, in vec3 direction); +int traceRay(inout float intersectionT, in vec3 origin, in vec3 direction) +{ + const bool anyHit = intersectionT!=nbl_glsl_FLT_MAX; + + int objectID = -1; + for (int i=0; i0.0 && tnbl_glsl_FLT_MIN; + // but if we allowed non-watertight transmitters (single water surface), it would make sense just to apply this line by itself + nbl_glsl_AnisotropicMicrofacetCache _cache; + validPath = validPath && nbl_glsl_calcAnisotropicMicrofacetCache(_cache, interaction, nee_sample, monochromeEta); + if (lightPdflumaContributionThreshold && traceRay(t,intersection+nee_sample.L*t*getStartTolerance(depth),nee_sample.L)==-1) + ray._payload.accumulation += neeContrib; + }} + } +#if NEE_ONLY + return false; +#endif + // sample BSDF + float bsdfPdf; vec3 bsdfSampleL; + { + nbl_glsl_AnisotropicMicrofacetCache _cache; + nbl_glsl_LightSample bsdf_sample = nbl_glsl_bsdf_cos_generate(interaction,epsilon[1],bsdf,monochromeEta,_cache); + // the value of the bsdf divided by the probability of the sample being generated + throughput *= nbl_glsl_bsdf_cos_remainder_and_pdf(bsdfPdf,bsdf_sample,interaction,bsdf,monochromeEta,_cache); + // + bsdfSampleL = bsdf_sample.L; + } + + // additional threshold + const float lumaThroughputThreshold = lumaContributionThreshold; + if (bsdfPdf>bsdfPdfThreshold && getLuma(throughput)>lumaThroughputThreshold) + { + ray._payload.throughput = throughput; + ray._payload.otherTechniqueHeuristic = neeProbability/bsdfPdf; // numerically stable, don't touch + ray._payload.otherTechniqueHeuristic *= ray._payload.otherTechniqueHeuristic; + + // trace new ray + ray._immutable.origin = intersection+bsdfSampleL*(1.0/*kSceneSize*/)*getStartTolerance(depth); + ray._immutable.direction = bsdfSampleL; + #if POLYGON_METHOD==2 + ray._immutable.normalAtOrigin = interaction.isotropic.N; + ray._immutable.wasBSDFAtOrigin = isBSDF; + #endif + return true; + } + } + return false; +} + +void main() +{ + const ivec2 imageExtents = imageSize(outImage); + +#ifdef PERSISTENT_WORKGROUPS + uint virtualThreadIndex; + for (uint virtualThreadBase = gl_WorkGroupID.x * _NBL_GLSL_WORKGROUP_SIZE_; virtualThreadBase < 1920*1080; virtualThreadBase += gl_NumWorkGroups.x * _NBL_GLSL_WORKGROUP_SIZE_) // not sure why 1280*720 doesn't cover draw surface + { + virtualThreadIndex = virtualThreadBase + gl_LocalInvocationIndex.x; + const ivec2 coords = ivec2(nbl_glsl_morton_decode2d32b(virtualThreadIndex)); +#else + const ivec2 coords = getCoordinates(); +#endif + + vec2 texCoord = vec2(coords) / vec2(imageExtents); + texCoord.y = 1.0 - texCoord.y; + + if (false == (all(lessThanEqual(ivec2(0),coords)) && all(greaterThan(imageExtents,coords)))) { +#ifdef PERSISTENT_WORKGROUPS + continue; +#else + return; +#endif + } + + if (((PTPushConstant.depth-1)>>MAX_DEPTH_LOG2)>0 || ((PTPushConstant.sampleCount-1)>>MAX_SAMPLES_LOG2)>0) + { + vec4 pixelCol = vec4(1.0,0.0,0.0,1.0); + imageStore(outImage, coords, pixelCol); +#ifdef PERSISTENT_WORKGROUPS + continue; +#else + return; +#endif + } + + nbl_glsl_xoroshiro64star_state_t scramble_start_state = texelFetch(scramblebuf,coords,0).rg; + const vec2 pixOffsetParam = vec2(1.0)/vec2(textureSize(scramblebuf,0)); + + + const mat4 invMVP = PTPushConstant.invMVP; + + vec4 NDC = vec4(texCoord*vec2(2.0,-2.0)+vec2(-1.0,1.0),0.0,1.0); + vec3 camPos; + { + vec4 tmp = invMVP*NDC; + camPos = tmp.xyz/tmp.w; + NDC.z = 1.0; + } + + vec3 color = vec3(0.0); + float meanLumaSquared = 0.0; + // TODO: if we collapse the nested for loop, then all GPUs will get `PTPushConstant.depth` factor speedup, not just NV with separate PC + for (int i=0; i5.0) + color = vec3(1.0,0.0,0.0); + #endif + + vec4 pixelCol = vec4(color, 1.0); + imageStore(outImage, coords, pixelCol); + +#ifdef PERSISTENT_WORKGROUPS + } +#endif +} +/** TODO: Improving Rendering + +Now: +- Always MIS (path correlated reuse) +- Test MIS alpha (roughness) scheme + +Many Lights: +- Path Guiding +- Light Importance Lists/Classification +- Spatio-Temporal Reservoir Sampling + +Indirect Light: +- Bidirectional Path Tracing +- Uniform Path Sampling / Vertex Connection and Merging / Path Space Regularization + +Animations: +- A-SVGF / BMFR +**/ \ No newline at end of file diff --git a/31_HLSLPathTracer/app_resources/glsl/litByRectangle.comp b/31_HLSLPathTracer/app_resources/glsl/litByRectangle.comp new file mode 100644 index 000000000..d898655c4 --- /dev/null +++ b/31_HLSLPathTracer/app_resources/glsl/litByRectangle.comp @@ -0,0 +1,182 @@ +// Copyright (C) 2018-2020 - DevSH Graphics Programming Sp. z O.O. +// This file is part of the "Nabla Engine". +// For conditions of distribution and use, see copyright notice in nabla.h + +#version 430 core +#extension GL_GOOGLE_include_directive : require + +#define SPHERE_COUNT 8 +#define POLYGON_METHOD 1 // 0 area sampling, 1 solid angle sampling, 2 approximate projected solid angle sampling +#include "app_resources/glsl/common.glsl" + +#define RECTANGLE_COUNT 1 +const vec3 edge0 = normalize(vec3(2,0,-1)); +const vec3 edge1 = normalize(vec3(2,-5,4)); +Rectangle rectangles[RECTANGLE_COUNT] = { + Rectangle_Rectangle(vec3(-3.8,0.35,1.3),edge0*7.0,edge1*0.1,INVALID_ID_16BIT,0u) +}; + + +void traceRay_extraShape(inout int objectID, inout float intersectionT, in vec3 origin, in vec3 direction) +{ + for (int i=0; i0.0 && t +#include +#include + +float nbl_glsl_light_deferred_pdf(in Light light, in Ray_t ray) +{ + const Rectangle rect = rectangles[Light_getObjectID(light)]; + + const ImmutableRay_t _immutable = ray._immutable; + const vec3 L = _immutable.direction; +#if POLYGON_METHOD==0 + const float dist = ray._mutable.intersectionT; + return dist*dist/abs(dot(Rectangle_getNormalTimesArea(rect),L)); +#else + #ifdef TRIANGLE_REFERENCE + const mat3 sphericalVertices[2] = + { + nbl_glsl_shapes_getSphericalTriangle(mat3(rect.offset,rect.offset+rect.edge0,rect.offset+rect.edge1),_immutable.origin), + nbl_glsl_shapes_getSphericalTriangle(mat3(rect.offset+rect.edge1,rect.offset+rect.edge0,rect.offset+rect.edge0+rect.edge1),_immutable.origin) + }; + float solidAngle[2]; + vec3 cos_vertices[2],sin_vertices[2]; + float cos_a[2],cos_c[2],csc_b[2],csc_c[2]; + for (uint i=0u; i<2u; i++) + solidAngle[i] = nbl_glsl_shapes_SolidAngleOfTriangle(sphericalVertices[i],cos_vertices[i],sin_vertices[i],cos_a[i],cos_c[i],csc_b[i],csc_c[i]); + const float rectSolidAngle = solidAngle[0]+solidAngle[1]; + #if POLYGON_METHOD==1 + return 1.f/rectSolidAngle; + #elif POLYGON_METHOD==2 + // TODO: figure out what breaks for a directly visible light under MIS + if (rectSolidAngle > nbl_glsl_FLT_MIN) + { + const vec2 bary = nbl_glsl_barycentric_reconstructBarycentrics(L*ray._mutable.intersectionT+_immutable.origin-rect.offset,mat2x3(rect.edge0,rect.edge1)); + const uint i = bary.x>=0.f&&bary.y>=0.f&&(bary.x+bary.y)<=1.f ? 0u:1u; + + float pdf = nbl_glsl_sampling_probProjectedSphericalTriangleSample(solidAngle[i],cos_vertices[i],sin_vertices[i],cos_a[i],cos_c[i],csc_b[i],csc_c[i],sphericalVertices[i],_immutable.normalAtOrigin,_immutable.wasBSDFAtOrigin,L); + pdf *= solidAngle[i]/rectSolidAngle; + return pdf; + } + else + return nbl_glsl_FLT_INF; + #endif + #else + float pdf; + mat3 rectNormalBasis; + vec2 rectExtents; + Rectangle_getNormalBasis(rect, rectNormalBasis, rectExtents); + vec3 sphR0 = nbl_glsl_shapes_getSphericalRectangle(_immutable.origin, rect.offset, rectNormalBasis); + float solidAngle = nbl_glsl_shapes_SolidAngleOfRectangle(sphR0, rectExtents); + if (solidAngle > nbl_glsl_FLT_MIN) + { + #if POLYGON_METHOD==1 + pdf = 1.f/solidAngle; + #else + #error + #endif + } + else + pdf = nbl_glsl_FLT_INF; + return pdf; + #endif +#endif +} + +vec3 nbl_glsl_light_generate_and_pdf(out float pdf, out float newRayMaxT, in vec3 origin, in nbl_glsl_AnisotropicViewSurfaceInteraction interaction, in bool isBSDF, in vec3 xi, in uint objectID) +{ + const Rectangle rect = rectangles[objectID]; + const vec3 N = Rectangle_getNormalTimesArea(rect); + + const vec3 origin2origin = rect.offset-origin; +#if POLYGON_METHOD==0 + vec3 L = origin2origin+rect.edge0*xi.x+rect.edge1*xi.y; // TODO: refactor + + const float distanceSq = dot(L,L); + const float rcpDistance = inversesqrt(distanceSq); + L *= rcpDistance; + + pdf = distanceSq/abs(dot(N,L)); + newRayMaxT = 1.0/rcpDistance; + return L; +#else + #ifdef TRIANGLE_REFERENCE + const mat3 sphericalVertices[2] = + { + nbl_glsl_shapes_getSphericalTriangle(mat3(rect.offset,rect.offset+rect.edge0,rect.offset+rect.edge1),origin), + nbl_glsl_shapes_getSphericalTriangle(mat3(rect.offset+rect.edge1,rect.offset+rect.edge0,rect.offset+rect.edge0+rect.edge1),origin) + }; + float solidAngle[2]; + vec3 cos_vertices[2],sin_vertices[2]; + float cos_a[2],cos_c[2],csc_b[2],csc_c[2]; + for (uint i=0u; i<2u; i++) + solidAngle[i] = nbl_glsl_shapes_SolidAngleOfTriangle(sphericalVertices[i],cos_vertices[i],sin_vertices[i],cos_a[i],cos_c[i],csc_b[i],csc_c[i]); + vec3 L = vec3(0.f,0.f,0.f); + const float rectangleSolidAngle = solidAngle[0]+solidAngle[1]; + if (rectangleSolidAngle > nbl_glsl_FLT_MIN) + { + float rcpTriangleChoiceProb; + const uint i = nbl_glsl_partitionRandVariable(solidAngle[0]/rectangleSolidAngle,xi.z,rcpTriangleChoiceProb) ? 1u:0u; + #if POLYGON_METHOD==1 + L = nbl_glsl_sampling_generateSphericalTriangleSample(solidAngle[i],cos_vertices[i],sin_vertices[i],cos_a[i],cos_c[i],csc_b[i],csc_c[i],sphericalVertices[i],xi.xy); + pdf = 1.f/rectangleSolidAngle; + #elif POLYGON_METHOD==2 + float rcpPdf; + L = nbl_glsl_sampling_generateProjectedSphericalTriangleSample(rcpPdf,solidAngle[i],cos_vertices[i],sin_vertices[i],cos_a[i],cos_c[i],csc_b[i],csc_c[i],sphericalVertices[i],interaction.isotropic.N,isBSDF,xi.xy); + pdf = 1.f/(rcpPdf*rcpTriangleChoiceProb); + #endif + } + else + pdf = nbl_glsl_FLT_INF; + #else + mat3 rectNormalBasis; + vec2 rectExtents; + Rectangle_getNormalBasis(rect, rectNormalBasis, rectExtents); + vec3 sphR0 = nbl_glsl_shapes_getSphericalRectangle(origin, rect.offset, rectNormalBasis); + vec3 L = vec3(0.f,0.f,0.f); + float solidAngle; + vec2 sphUv = nbl_glsl_sampling_generateSphericalRectangleSample(sphR0, rectExtents, xi.xy, solidAngle); + if (solidAngle > nbl_glsl_FLT_MIN) + { + #if POLYGON_METHOD==1 + vec3 sph_sample = sphUv[0] * rect.edge0 + sphUv[1] * rect.edge1 + rect.offset; + L = normalize(sph_sample - origin); + pdf = 1.f/solidAngle; + #else + #error + #endif + } + else + pdf = nbl_glsl_FLT_INF; + #endif + newRayMaxT = dot(N,origin2origin)/dot(N,L); + return L; +#endif +} + + +uint getBSDFLightIDAndDetermineNormal(out vec3 normal, in uint objectID, in vec3 intersection) +{ + if (objectID0.0) + { + const float rcpDistance = inversesqrt(distanceSQ); + Z *= rcpDistance; + + const float cosThetaMax = sqrt(cosThetaMax2); + const float cosTheta = mix(1.0,cosThetaMax,xi.x); + + vec3 L = Z*cosTheta; + + const float cosTheta2 = cosTheta*cosTheta; + const float sinTheta = sqrt(1.0-cosTheta2); + float sinPhi,cosPhi; + nbl_glsl_sincos(2.0*nbl_glsl_PI*xi.y-nbl_glsl_PI,sinPhi,cosPhi); + mat2x3 XY = nbl_glsl_frisvad(Z); + + L += (XY[0]*cosPhi+XY[1]*sinPhi)*sinTheta; + + newRayMaxT = (cosTheta-sqrt(cosTheta2-cosThetaMax2))/rcpDistance; + pdf = 1.0/Sphere_getSolidAngle_impl(cosThetaMax); + return L; + } + pdf = 0.0; + return vec3(0.0,0.0,0.0); +} + +uint getBSDFLightIDAndDetermineNormal(out vec3 normal, in uint objectID, in vec3 intersection) +{ + Sphere sphere = spheres[objectID]; + normal = Sphere_getNormal(sphere,intersection); + return sphere.bsdfLightIDs; +} \ No newline at end of file diff --git a/31_HLSLPathTracer/app_resources/glsl/litByTriangle.comp b/31_HLSLPathTracer/app_resources/glsl/litByTriangle.comp new file mode 100644 index 000000000..36fe522f2 --- /dev/null +++ b/31_HLSLPathTracer/app_resources/glsl/litByTriangle.comp @@ -0,0 +1,105 @@ +// Copyright (C) 2018-2020 - DevSH Graphics Programming Sp. z O.O. +// This file is part of the "Nabla Engine". +// For conditions of distribution and use, see copyright notice in nabla.h + +#version 430 core +#extension GL_GOOGLE_include_directive : require + +#define SPHERE_COUNT 8 +#define POLYGON_METHOD 1 // 0 area sampling, 1 solid angle sampling, 2 approximate projected solid angle sampling +#include "app_resources/glsl/common.glsl" + +#define TRIANGLE_COUNT 1 +Triangle triangles[TRIANGLE_COUNT] = { + Triangle_Triangle(mat3(vec3(-1.8,0.35,0.3),vec3(-1.2,0.35,0.0),vec3(-1.5,0.8,-0.3))*10.0,INVALID_ID_16BIT,0u) +}; + +void traceRay_extraShape(inout int objectID, inout float intersectionT, in vec3 origin, in vec3 direction) +{ + for (int i=0; i0.0 && t +float nbl_glsl_light_deferred_pdf(in Light light, in Ray_t ray) +{ + const Triangle tri = triangles[Light_getObjectID(light)]; + + const vec3 L = ray._immutable.direction; +#if POLYGON_METHOD==0 + const float dist = ray._mutable.intersectionT; + return dist*dist/abs(dot(Triangle_getNormalTimesArea(tri),L)); +#else + const ImmutableRay_t _immutable = ray._immutable; + const mat3 sphericalVertices = nbl_glsl_shapes_getSphericalTriangle(mat3(tri.vertex0,tri.vertex1,tri.vertex2),_immutable.origin); + #if POLYGON_METHOD==1 + const float rcpProb = nbl_glsl_shapes_SolidAngleOfTriangle(sphericalVertices); + // if `rcpProb` is NAN then the triangle's solid angle was close to 0.0 + return rcpProb>nbl_glsl_FLT_MIN ? (1.0/rcpProb):nbl_glsl_FLT_MAX; + #elif POLYGON_METHOD==2 + const float pdf = nbl_glsl_sampling_probProjectedSphericalTriangleSample(sphericalVertices,_immutable.normalAtOrigin,_immutable.wasBSDFAtOrigin,L); + // if `pdf` is NAN then the triangle's projected solid angle was close to 0.0, if its close to INF then the triangle was very small + return pdfnbl_glsl_FLT_MIN ? (1.0/rcpPdf):0.0; + + const vec3 N = Triangle_getNormalTimesArea(tri); + newRayMaxT = dot(N,tri.vertex0-origin)/dot(N,L); + return L; +#endif +} + + +uint getBSDFLightIDAndDetermineNormal(out vec3 normal, in uint objectID, in vec3 intersection) +{ + if (objectID + +namespace Accumulator +{ + +template) +struct DefaultAccumulator +{ + using input_sample_type = OutputTypeVec; + using output_storage_type = OutputTypeVec; + using this_t = DefaultAccumulator; + using scalar_type = typename vector_traits::scalar_type; + + static this_t create() + { + this_t retval; + retval.accumulation = promote(0.0f); + + return retval; + } + + void addSample(uint32_t sampleCount, input_sample_type _sample) + { + scalar_type rcpSampleSize = 1.0 / (sampleCount); + accumulation += (_sample - accumulation) * rcpSampleSize; + } + + output_storage_type accumulation; +}; + +} + +#endif diff --git a/31_HLSLPathTracer/app_resources/hlsl/common.hlsl b/31_HLSLPathTracer/app_resources/hlsl/common.hlsl new file mode 100644 index 000000000..f9d0b005d --- /dev/null +++ b/31_HLSLPathTracer/app_resources/hlsl/common.hlsl @@ -0,0 +1,50 @@ +#ifndef _NBL_HLSL_EXT_PATHTRACING_COMMON_INCLUDED_ +#define _NBL_HLSL_EXT_PATHTRACING_COMMON_INCLUDED_ + +#include +#include +#include +#include +#include + +namespace nbl +{ +namespace hlsl +{ +namespace path_tracing +{ + +template +struct Tolerance +{ + NBL_CONSTEXPR_STATIC_INLINE T INTERSECTION_ERROR_BOUND_LOG2 = -8.0; + + static T __common(uint32_t depth) + { + T depthRcp = 1.0 / T(depth); + return INTERSECTION_ERROR_BOUND_LOG2; + } + + static T getStart(uint32_t depth) + { + return nbl::hlsl::exp2(__common(depth)); + } + + static T getEnd(uint32_t depth) + { + return 1.0 - nbl::hlsl::exp2(__common(depth) + 1.0); + } +}; + +enum PTPolygonMethod : uint16_t +{ + PPM_AREA, + PPM_SOLID_ANGLE, + PPM_APPROX_PROJECTED_SOLID_ANGLE +}; + +} +} +} + +#endif diff --git a/31_HLSLPathTracer/app_resources/hlsl/concepts.hlsl b/31_HLSLPathTracer/app_resources/hlsl/concepts.hlsl new file mode 100644 index 000000000..a1728721d --- /dev/null +++ b/31_HLSLPathTracer/app_resources/hlsl/concepts.hlsl @@ -0,0 +1,204 @@ +#ifndef _NBL_HLSL_PATHTRACING_CONCEPTS_INCLUDED_ +#define _NBL_HLSL_PATHTRACING_CONCEPTS_INCLUDED_ + +#include + +namespace nbl +{ +namespace hlsl +{ +namespace path_tracing +{ +namespace concepts +{ + +#define NBL_CONCEPT_NAME RandGenerator +#define NBL_CONCEPT_TPLT_PRM_KINDS (typename) +#define NBL_CONCEPT_TPLT_PRM_NAMES (T) +#define NBL_CONCEPT_PARAM_0 (rand, T) +NBL_CONCEPT_BEGIN(1) +#define rand NBL_CONCEPT_PARAM_T NBL_CONCEPT_PARAM_0 +NBL_CONCEPT_END( + ((NBL_CONCEPT_REQ_TYPE)(T::rng_type)) + ((NBL_CONCEPT_REQ_TYPE)(T::return_type)) + ((NBL_CONCEPT_REQ_EXPR_RET_TYPE)((rand()), ::nbl::hlsl::is_same_v, typename T::return_type)) +); +#undef rand +#include + +#define NBL_CONCEPT_NAME RayGenerator +#define NBL_CONCEPT_TPLT_PRM_KINDS (typename) +#define NBL_CONCEPT_TPLT_PRM_NAMES (T) +#define NBL_CONCEPT_PARAM_0 (raygen, T) +#define NBL_CONCEPT_PARAM_1 (randVec, typename T::vector3_type) +NBL_CONCEPT_BEGIN(2) +#define raygen NBL_CONCEPT_PARAM_T NBL_CONCEPT_PARAM_0 +#define randVec NBL_CONCEPT_PARAM_T NBL_CONCEPT_PARAM_1 +NBL_CONCEPT_END( + ((NBL_CONCEPT_REQ_TYPE)(T::vector3_type)) + ((NBL_CONCEPT_REQ_TYPE)(T::ray_type)) + ((NBL_CONCEPT_REQ_EXPR_RET_TYPE)((raygen.generate(randVec)), ::nbl::hlsl::is_same_v, typename T::ray_type)) +); +#undef randVec +#undef raygen +#include + +#define NBL_CONCEPT_NAME Intersector +#define NBL_CONCEPT_TPLT_PRM_KINDS (typename) +#define NBL_CONCEPT_TPLT_PRM_NAMES (T) +#define NBL_CONCEPT_PARAM_0 (intersect, T) +#define NBL_CONCEPT_PARAM_1 (ray, typename T::ray_type) +#define NBL_CONCEPT_PARAM_2 (scene, typename T::scene_type) +NBL_CONCEPT_BEGIN(3) +#define intersect NBL_CONCEPT_PARAM_T NBL_CONCEPT_PARAM_0 +#define ray NBL_CONCEPT_PARAM_T NBL_CONCEPT_PARAM_1 +#define scene NBL_CONCEPT_PARAM_T NBL_CONCEPT_PARAM_2 +NBL_CONCEPT_END( + ((NBL_CONCEPT_REQ_TYPE)(T::scene_type)) + ((NBL_CONCEPT_REQ_TYPE)(T::ray_type)) + ((NBL_CONCEPT_REQ_TYPE)(T::id_type)) + ((NBL_CONCEPT_REQ_EXPR_RET_TYPE)((intersect.traceRay(ray, scene)), ::nbl::hlsl::is_same_v, typename T::id_type)) +); +#undef scene +#undef ray +#undef intersect +#include + +#define NBL_CONCEPT_NAME MaterialSystem +#define NBL_CONCEPT_TPLT_PRM_KINDS (typename) +#define NBL_CONCEPT_TPLT_PRM_NAMES (T) +#define NBL_CONCEPT_PARAM_0 (matsys, T) +#define NBL_CONCEPT_PARAM_1 (_sample, typename T::sample_type) +#define NBL_CONCEPT_PARAM_2 (matid, uint32_t) +#define NBL_CONCEPT_PARAM_3 (aniso_inter, typename T::anisotropic_interaction_type) +#define NBL_CONCEPT_PARAM_4 (iso_inter, typename T::isotropic_interaction_type) +#define NBL_CONCEPT_PARAM_5 (aniso_cache, typename T::anisocache_type) +#define NBL_CONCEPT_PARAM_6 (iso_cache, typename T::isocache_type) +#define NBL_CONCEPT_PARAM_7 (params, typename T::create_params_t) +#define NBL_CONCEPT_PARAM_8 (u, typename T::vector3_type) +NBL_CONCEPT_BEGIN(9) +#define matsys NBL_CONCEPT_PARAM_T NBL_CONCEPT_PARAM_0 +#define _sample NBL_CONCEPT_PARAM_T NBL_CONCEPT_PARAM_1 +#define matid NBL_CONCEPT_PARAM_T NBL_CONCEPT_PARAM_2 +#define aniso_inter NBL_CONCEPT_PARAM_T NBL_CONCEPT_PARAM_3 +#define iso_inter NBL_CONCEPT_PARAM_T NBL_CONCEPT_PARAM_4 +#define aniso_cache NBL_CONCEPT_PARAM_T NBL_CONCEPT_PARAM_5 +#define iso_cache NBL_CONCEPT_PARAM_T NBL_CONCEPT_PARAM_6 +#define params NBL_CONCEPT_PARAM_T NBL_CONCEPT_PARAM_7 +#define u NBL_CONCEPT_PARAM_T NBL_CONCEPT_PARAM_8 +NBL_CONCEPT_END( + ((NBL_CONCEPT_REQ_TYPE)(T::vector3_type)) + ((NBL_CONCEPT_REQ_TYPE)(T::sample_type)) + ((NBL_CONCEPT_REQ_TYPE)(T::quotient_pdf_type)) + ((NBL_CONCEPT_REQ_TYPE)(T::measure_type)) + ((NBL_CONCEPT_REQ_TYPE)(T::anisotropic_interaction_type)) + ((NBL_CONCEPT_REQ_TYPE)(T::isotropic_interaction_type)) + ((NBL_CONCEPT_REQ_TYPE)(T::anisocache_type)) + ((NBL_CONCEPT_REQ_TYPE)(T::isocache_type)) + ((NBL_CONCEPT_REQ_TYPE)(T::create_params_t)) + ((NBL_CONCEPT_REQ_EXPR_RET_TYPE)((matsys.eval(matid, params, _sample, iso_inter, iso_cache)), ::nbl::hlsl::is_same_v, typename T::measure_type)) + ((NBL_CONCEPT_REQ_EXPR_RET_TYPE)((matsys.generate(matid, params, aniso_inter, u, aniso_cache)), ::nbl::hlsl::is_same_v, typename T::sample_type)) + ((NBL_CONCEPT_REQ_EXPR_RET_TYPE)((matsys.quotient_and_pdf(matid, params, _sample, iso_inter, iso_cache)), ::nbl::hlsl::is_same_v, typename T::quotient_pdf_type)) +); +#undef u +#undef params +#undef iso_cache +#undef aniso_cache +#undef iso_inter +#undef aniso_inter +#undef matid +#undef _sample +#undef matsys +#include + +#define NBL_CONCEPT_NAME NextEventEstimator +#define NBL_CONCEPT_TPLT_PRM_KINDS (typename) +#define NBL_CONCEPT_TPLT_PRM_NAMES (T) +#define NBL_CONCEPT_PARAM_0 (nee, T) +#define NBL_CONCEPT_PARAM_1 (ray, typename T::ray_type) +#define NBL_CONCEPT_PARAM_2 (scene, typename T::scene_type) +#define NBL_CONCEPT_PARAM_3 (id, uint32_t) +#define NBL_CONCEPT_PARAM_4 (pdf, typename T::scalar_type) +#define NBL_CONCEPT_PARAM_5 (quo_pdf, typename T::quotient_pdf_type) +#define NBL_CONCEPT_PARAM_6 (v, typename T::vector3_type) +#define NBL_CONCEPT_PARAM_7 (interaction, typename T::interaction_type) +#define NBL_CONCEPT_PARAM_8 (b, bool) +NBL_CONCEPT_BEGIN(9) +#define nee NBL_CONCEPT_PARAM_T NBL_CONCEPT_PARAM_0 +#define ray NBL_CONCEPT_PARAM_T NBL_CONCEPT_PARAM_1 +#define scene NBL_CONCEPT_PARAM_T NBL_CONCEPT_PARAM_2 +#define id NBL_CONCEPT_PARAM_T NBL_CONCEPT_PARAM_3 +#define pdf NBL_CONCEPT_PARAM_T NBL_CONCEPT_PARAM_4 +#define quo_pdf NBL_CONCEPT_PARAM_T NBL_CONCEPT_PARAM_5 +#define v NBL_CONCEPT_PARAM_T NBL_CONCEPT_PARAM_6 +#define interaction NBL_CONCEPT_PARAM_T NBL_CONCEPT_PARAM_7 +#define b NBL_CONCEPT_PARAM_T NBL_CONCEPT_PARAM_8 +NBL_CONCEPT_END( + ((NBL_CONCEPT_REQ_TYPE)(T::scalar_type)) + ((NBL_CONCEPT_REQ_TYPE)(T::vector3_type)) + ((NBL_CONCEPT_REQ_TYPE)(T::scene_type)) + ((NBL_CONCEPT_REQ_TYPE)(T::ray_type)) + ((NBL_CONCEPT_REQ_TYPE)(T::spectral_type)) + ((NBL_CONCEPT_REQ_TYPE)(T::sample_type)) + ((NBL_CONCEPT_REQ_TYPE)(T::quotient_pdf_type)) + ((NBL_CONCEPT_REQ_TYPE)(T::interaction_type)) + ((NBL_CONCEPT_REQ_EXPR_RET_TYPE)((nee.deferredEvalAndPdf(pdf, scene, id, ray)), ::nbl::hlsl::is_same_v, typename T::spectral_type)) + ((NBL_CONCEPT_REQ_EXPR_RET_TYPE)((nee.generate_and_quotient_and_pdf(quo_pdf, pdf, scene, id, v, interaction, b, v, id)), ::nbl::hlsl::is_same_v, typename T::sample_type)) +); +#undef b +#undef interaction +#undef v +#undef quo_pdf +#undef pdf +#undef id +#undef scene +#undef ray +#undef nee +#include + +#define NBL_CONCEPT_NAME Accumulator +#define NBL_CONCEPT_TPLT_PRM_KINDS (typename) +#define NBL_CONCEPT_TPLT_PRM_NAMES (T) +#define NBL_CONCEPT_PARAM_0 (acc, T) +#define NBL_CONCEPT_PARAM_1 (sampleCount, uint32_t) +#define NBL_CONCEPT_PARAM_2 (_sample, typename T::input_sample_type) +NBL_CONCEPT_BEGIN(3) +#define acc NBL_CONCEPT_PARAM_T NBL_CONCEPT_PARAM_0 +#define sampleCount NBL_CONCEPT_PARAM_T NBL_CONCEPT_PARAM_1 +#define _sample NBL_CONCEPT_PARAM_T NBL_CONCEPT_PARAM_2 +NBL_CONCEPT_END( + ((NBL_CONCEPT_REQ_TYPE)(T::input_sample_type)) + ((NBL_CONCEPT_REQ_EXPR_RET_TYPE)((acc.addSample(sampleCount, _sample)), ::nbl::hlsl::is_same_v, void)) +); +#undef _sample +#undef sampleCount +#undef acc +#include + +#define NBL_CONCEPT_NAME Scene +#define NBL_CONCEPT_TPLT_PRM_KINDS (typename) +#define NBL_CONCEPT_TPLT_PRM_NAMES (T) +#define NBL_CONCEPT_PARAM_0 (scene, T) +#define NBL_CONCEPT_PARAM_1 (intersectP, typename T::vector3_type) +#define NBL_CONCEPT_PARAM_2 (id, typename T::id_type) +NBL_CONCEPT_BEGIN(3) +#define scene NBL_CONCEPT_PARAM_T NBL_CONCEPT_PARAM_0 +#define intersectP NBL_CONCEPT_PARAM_T NBL_CONCEPT_PARAM_1 +#define id NBL_CONCEPT_PARAM_T NBL_CONCEPT_PARAM_2 +NBL_CONCEPT_END( + ((NBL_CONCEPT_REQ_TYPE)(T::vector3_type)) + ((NBL_CONCEPT_REQ_TYPE)(T::id_type)) + ((NBL_CONCEPT_REQ_EXPR_RET_TYPE)((scene.getBsdfLightIDs(id)), ::nbl::hlsl::is_same_v, uint32_t)) + ((NBL_CONCEPT_REQ_EXPR_RET_TYPE)((scene.getNormal(id, intersectP)), ::nbl::hlsl::is_same_v, typename T::vector3_type)) +); +#undef id +#undef intersectP +#undef scene +#include + +} +} +} +} + +#endif diff --git a/31_HLSLPathTracer/app_resources/hlsl/example_common.hlsl b/31_HLSLPathTracer/app_resources/hlsl/example_common.hlsl new file mode 100644 index 000000000..9055468f5 --- /dev/null +++ b/31_HLSLPathTracer/app_resources/hlsl/example_common.hlsl @@ -0,0 +1,390 @@ +#ifndef _NBL_HLSL_PATHTRACING_EXAMPLE_COMMON_INCLUDED_ +#define _NBL_HLSL_PATHTRACING_EXAMPLE_COMMON_INCLUDED_ + +#include "nbl/builtin/hlsl/cpp_compat.hlsl" +#include "nbl/builtin/hlsl/shapes/spherical_triangle.hlsl" +#include "nbl/builtin/hlsl/shapes/spherical_rectangle.hlsl" +#include "nbl/builtin/hlsl/sampling/spherical_triangle.hlsl" +#include "nbl/builtin/hlsl/sampling/projected_spherical_triangle.hlsl" +#include "nbl/builtin/hlsl/sampling/spherical_rectangle.hlsl" +#include "common.hlsl" + +using namespace nbl; +using namespace hlsl; + +enum ProceduralShapeType : uint16_t +{ + PST_NONE = 0, + PST_SPHERE, + PST_TRIANGLE, + PST_RECTANGLE +}; + +enum IntersectMode : uint32_t +{ + IM_RAY_QUERY, + IM_RAY_TRACING, + IM_PROCEDURAL +}; + +template // TODO make type T Spectrum +struct Payload +{ + using this_t = Payload; + using scalar_type = T; + using vector3_type = vector; + + vector3_type accumulation; + scalar_type otherTechniqueHeuristic; + vector3_type throughput; + // #ifdef KILL_DIFFUSE_SPECULAR_PATHS + // bool hasDiffuse; + // #endif +}; + +struct ObjectID +{ + static ObjectID create(uint32_t id, uint32_t mode, ProceduralShapeType shapeType) + { + ObjectID retval; + retval.id = id; + retval.mode = mode; + retval.shapeType = shapeType; + return retval; + } + + uint32_t id; + uint32_t mode; + ProceduralShapeType shapeType; +}; + +template +struct Ray +{ + using this_t = Ray; + using scalar_type = T; + using vector3_type = vector; + + // immutable + vector3_type origin; + vector3_type direction; + + // polygon method == PPM_APPROX_PROJECTED_SOLID_ANGLE + vector3_type normalAtOrigin; + bool wasBSDFAtOrigin; + + // mutable + scalar_type intersectionT; + ObjectID objectID; + + Payload payload; +}; + +template +struct Light +{ + using spectral_type = Spectrum; + + NBL_CONSTEXPR_STATIC_INLINE uint32_t INVALID_ID = 0xffffu; + + static Light create(NBL_CONST_REF_ARG(spectral_type) radiance, uint32_t objId, uint32_t mode, ProceduralShapeType shapeType) + { + Light retval; + retval.radiance = radiance; + retval.objectID = ObjectID::create(objId, mode, shapeType); + return retval; + } + + static Light create(NBL_CONST_REF_ARG(spectral_type) radiance, NBL_CONST_REF_ARG(ObjectID) objectID) + { + Light retval; + retval.radiance = radiance; + retval.objectID = objectID; + return retval; + } + + spectral_type radiance; + ObjectID objectID; +}; + +template) +struct SBxDFCreationParams +{ + bool is_aniso; + vector A; // roughness + Spectrum ior0; // source ior + Spectrum ior1; // destination ior + Spectrum iork; // destination iork (for iridescent only) + Scalar eta; // in most cases, eta will be calculated from ior0 and ior1; see monochromeEta in pathtracer.hlsl +}; + +template +struct BxDFNode +{ + using spectral_type = Spectrum; + using scalar_type = typename vector_traits::scalar_type; + using vector2_type = vector; + using params_type = SBxDFCreationParams; + + NBL_CONSTEXPR_STATIC_INLINE uint32_t INVALID_ID = 0xffffu; + + // for diffuse bxdfs + static BxDFNode create(uint32_t materialType, bool isAniso, NBL_CONST_REF_ARG(vector2_type) A, NBL_CONST_REF_ARG(spectral_type) albedo) + { + BxDFNode retval; + retval.albedo = albedo; + retval.materialType = materialType; + retval.params.is_aniso = isAniso; + retval.params.A = hlsl::max(A, hlsl::promote(1e-3)); + retval.params.ior0 = hlsl::promote(1.0); + retval.params.ior1 = hlsl::promote(1.0); + return retval; + } + + // for conductor, ior0 = eta, ior1 = etak + // for dielectric, eta = ior1/ior0 + static BxDFNode create(uint32_t materialType, bool isAniso, NBL_CONST_REF_ARG(vector2_type) A, NBL_CONST_REF_ARG(spectral_type) ior0, NBL_CONST_REF_ARG(spectral_type) ior1) + { + BxDFNode retval; + retval.albedo = hlsl::promote(1.0); + retval.materialType = materialType; + retval.params.is_aniso = isAniso; + retval.params.A = hlsl::max(A, hlsl::promote(1e-3)); + retval.params.ior0 = ior0; + retval.params.ior1 = ior1; + return retval; + } + + // for iridescent bxdfs, ior0 = thin film ior, ior1+iork1 = base mat ior (k for conductor base) + static BxDFNode create(uint32_t materialType, bool isAniso, scalar_type A, scalar_type Dinc, NBL_CONST_REF_ARG(spectral_type) ior0, NBL_CONST_REF_ARG(spectral_type) ior1, NBL_CONST_REF_ARG(spectral_type) iork1) + { + BxDFNode retval; + retval.albedo = hlsl::promote(1.0); + retval.materialType = materialType; + retval.params.is_aniso = isAniso; + retval.params.A = vector2_type(hlsl::max(A, 1e-3), Dinc); + retval.params.ior0 = ior0; + retval.params.ior1 = ior1; + retval.params.iork = iork1; + return retval; + } + + spectral_type albedo; + uint32_t materialType; + params_type params; +}; + + +template +struct Shape; + +template +struct Shape +{ + using scalar_type = T; + using vector3_type = vector; + + static Shape create(NBL_CONST_REF_ARG(vector3_type) position, float32_t radius2, uint32_t bsdfLightIDs) + { + Shape retval; + retval.position = position; + retval.radius2 = radius2; + retval.bsdfLightIDs = bsdfLightIDs; + return retval; + } + + static Shape create(NBL_CONST_REF_ARG(vector3_type) position, scalar_type radius, uint32_t bsdfID, uint32_t lightID) + { + uint32_t bsdfLightIDs = glsl::bitfieldInsert(bsdfID, lightID, 16, 16); + return create(position, radius * radius, bsdfLightIDs); + } + + void updateTransform(NBL_CONST_REF_ARG(float32_t3x4) m) + { + position = float3(m[0].w, m[1].w, m[2].w); + radius2 = m[0].x * m[0].x; + } + + // return intersection distance if found, nan otherwise + scalar_type intersect(NBL_CONST_REF_ARG(vector3_type) origin, NBL_CONST_REF_ARG(vector3_type) direction) + { + vector3_type relOrigin = origin - position; + scalar_type relOriginLen2 = hlsl::dot(relOrigin, relOrigin); + + scalar_type dirDotRelOrigin = hlsl::dot(direction, relOrigin); + scalar_type det = radius2 - relOriginLen2 + dirDotRelOrigin * dirDotRelOrigin; + + // do some speculative math here + scalar_type detsqrt = hlsl::sqrt(det); + return -dirDotRelOrigin + (relOriginLen2 > radius2 ? (-detsqrt) : detsqrt); + } + + vector3_type getNormal(NBL_CONST_REF_ARG(vector3_type) hitPosition) + { + const scalar_type radiusRcp = hlsl::rsqrt(radius2); + return (hitPosition - position) * radiusRcp; + } + + scalar_type getSolidAngle(NBL_CONST_REF_ARG(vector3_type) origin) + { + vector3_type dist = position - origin; + scalar_type cosThetaMax = hlsl::sqrt(1.0 - radius2 / hlsl::dot(dist, dist)); + return 2.0 * numbers::pi * (1.0 - cosThetaMax); + } + + NBL_CONSTEXPR_STATIC_INLINE uint32_t ObjSize = 5; + + vector3_type position; + float32_t radius2; + uint32_t bsdfLightIDs; +}; + +template +struct Shape +{ + using scalar_type = T; + using vector3_type = vector; + + static Shape create(NBL_CONST_REF_ARG(vector3_type) vertex0, NBL_CONST_REF_ARG(vector3_type) vertex1, NBL_CONST_REF_ARG(vector3_type) vertex2, uint32_t bsdfLightIDs) + { + Shape retval; + retval.vertex0 = vertex0; + retval.vertex1 = vertex1; + retval.vertex2 = vertex2; + retval.bsdfLightIDs = bsdfLightIDs; + return retval; + } + + static Shape create(NBL_CONST_REF_ARG(vector3_type) vertex0, NBL_CONST_REF_ARG(vector3_type) vertex1, NBL_CONST_REF_ARG(vector3_type) vertex2, uint32_t bsdfID, uint32_t lightID) + { + uint32_t bsdfLightIDs = glsl::bitfieldInsert(bsdfID, lightID, 16, 16); + return create(vertex0, vertex1, vertex2, bsdfLightIDs); + } + + void updateTransform(NBL_CONST_REF_ARG(float32_t3x4) m) + { + // Define triangle in local space + float3 localVertex0 = float3(0.0, 0.0, 0.0); + float3 localVertex1 = float3(1.0, 0.0, 0.0); + float3 localVertex2 = float3(0.0, 1.0, 0.0); + + // Transform each vertex + vertex0 = mul(m, float4(localVertex0, 1.0)).xyz; + vertex1 = mul(m, float4(localVertex1, 1.0)).xyz; + vertex2 = mul(m, float4(localVertex2, 1.0)).xyz; + } + + scalar_type intersect(NBL_CONST_REF_ARG(vector3_type) origin, NBL_CONST_REF_ARG(vector3_type) direction) + { + const vector3_type edges[2] = { vertex1 - vertex0, vertex2 - vertex0 }; + + const vector3_type h = hlsl::cross(direction, edges[1]); + const scalar_type a = hlsl::dot(edges[0], h); + + const vector3_type relOrigin = origin - vertex0; + + const scalar_type u = hlsl::dot(relOrigin, h) / a; + + const vector3_type q = hlsl::cross(relOrigin, edges[0]); + const scalar_type v = hlsl::dot(direction, q) / a; + + const scalar_type t = hlsl::dot(edges[1], q) / a; + + const bool intersection = t > 0.f && u >= 0.f && v >= 0.f && (u + v) <= 1.f; + return intersection ? t : bit_cast(numeric_limits::infinity); + } + + vector3_type getNormalTimesArea() + { + const vector3_type edges[2] = { vertex1 - vertex0, vertex2 - vertex0 }; + return hlsl::cross(edges[0], edges[1]) * 0.5f; + } + + NBL_CONSTEXPR_STATIC_INLINE uint32_t ObjSize = 10; + + vector3_type vertex0; + vector3_type vertex1; + vector3_type vertex2; + uint32_t bsdfLightIDs; +}; + +template +struct Shape +{ + using scalar_type = T; + using vector3_type = vector; + + static Shape create(NBL_CONST_REF_ARG(vector3_type) offset, NBL_CONST_REF_ARG(vector3_type) edge0, NBL_CONST_REF_ARG(vector3_type) edge1, uint32_t bsdfLightIDs) + { + Shape retval; + retval.offset = offset; + retval.edge0 = edge0; + retval.edge1 = edge1; + retval.bsdfLightIDs = bsdfLightIDs; + return retval; + } + + static Shape create(NBL_CONST_REF_ARG(vector3_type) offset, NBL_CONST_REF_ARG(vector3_type) edge0, NBL_CONST_REF_ARG(vector3_type) edge1, uint32_t bsdfID, uint32_t lightID) + { + uint32_t bsdfLightIDs = glsl::bitfieldInsert(bsdfID, lightID, 16, 16); + return create(offset, edge0, edge1, bsdfLightIDs); + } + + void updateTransform(NBL_CONST_REF_ARG(float32_t3x4) m) + { + // Define rectangle in local space + float3 localVertex0 = float3(0.0, 0.0, 0.0); + float3 localVertex1 = float3(1.0, 0.0, 0.0); + float3 localVertex2 = float3(0.0, 1.0, 0.0); + + // Transform each vertex + float3 vertex0 = mul(m, float4(localVertex0, 1.0)).xyz; + float3 vertex1 = mul(m, float4(localVertex1, 1.0)).xyz; + float3 vertex2 = mul(m, float4(localVertex2, 1.0)).xyz; + + // Extract offset and edges from transformed vertices + offset = vertex0; + edge0 = vertex1 - vertex0; + edge1 = vertex2 - vertex0; + } + + scalar_type intersect(NBL_CONST_REF_ARG(vector3_type) origin, NBL_CONST_REF_ARG(vector3_type) direction) + { + const vector3_type h = hlsl::cross(direction, edge1); + const scalar_type a = hlsl::dot(edge0, h); + + const vector3_type relOrigin = origin - offset; + + const scalar_type u = hlsl::dot(relOrigin,h)/a; + + const vector3_type q = hlsl::cross(relOrigin, edge0); + const scalar_type v = hlsl::dot(direction, q) / a; + + const scalar_type t = hlsl::dot(edge1, q) / a; + + const bool intersection = t > 0.f && u >= 0.f && v >= 0.f && u <= 1.f && v <= 1.f; + return intersection ? t : bit_cast(numeric_limits::infinity); + } + + vector3_type getNormalTimesArea() + { + return hlsl::cross(edge0, edge1); + } + + void getNormalBasis(NBL_REF_ARG(matrix) basis, NBL_REF_ARG(vector) extents) + { + extents = vector(nbl::hlsl::length(edge0), nbl::hlsl::length(edge1)); + basis[0] = edge0 / extents[0]; + basis[1] = edge1 / extents[1]; + basis[2] = nbl::hlsl::normalize(nbl::hlsl::cross(basis[0],basis[1])); + } + + NBL_CONSTEXPR_STATIC_INLINE uint32_t ObjSize = 10; + + vector3_type offset; + vector3_type edge0; + vector3_type edge1; + uint32_t bsdfLightIDs; +}; + +#endif diff --git a/31_HLSLPathTracer/app_resources/hlsl/intersector.hlsl b/31_HLSLPathTracer/app_resources/hlsl/intersector.hlsl new file mode 100644 index 000000000..7fc16dff2 --- /dev/null +++ b/31_HLSLPathTracer/app_resources/hlsl/intersector.hlsl @@ -0,0 +1,74 @@ +#ifndef _NBL_HLSL_EXT_INTERSECTOR_INCLUDED_ +#define _NBL_HLSL_EXT_INTERSECTOR_INCLUDED_ + +#include "example_common.hlsl" +#include + +using namespace nbl; +using namespace hlsl; + +template +struct Intersector +{ + using scalar_type = typename Ray::scalar_type; + using vector3_type = vector; + using ray_type = Ray; + using scene_type = Scene; + using id_type = ObjectID; + + static id_type traceRay(NBL_REF_ARG(ray_type) ray, NBL_CONST_REF_ARG(scene_type) scene) + { + id_type objectID; + objectID.id = -1; + + // prodedural shapes + NBL_UNROLL for (int i = 0; i < scene_type::SphereCount; i++) + { + float t = scene.getSphere(i).intersect(ray.origin, ray.direction); + + bool closerIntersection = t > 0.0 && t < ray.intersectionT; + + if (closerIntersection) + { + ray.intersectionT = t; + objectID.id = i; + objectID.mode = IM_PROCEDURAL; + objectID.shapeType = PST_SPHERE; + } + } + NBL_UNROLL for (int i = 0; i < scene_type::TriangleCount; i++) + { + float t = scene.getTriangle(i).intersect(ray.origin, ray.direction); + + bool closerIntersection = t > 0.0 && t < ray.intersectionT; + + if (closerIntersection) + { + ray.intersectionT = t; + objectID.id = i; + objectID.mode = IM_PROCEDURAL; + objectID.shapeType = PST_TRIANGLE; + } + } + NBL_UNROLL for (int i = 0; i < scene_type::RectangleCount; i++) + { + float t = scene.getRectangle(i).intersect(ray.origin, ray.direction); + + bool closerIntersection = t > 0.0 && t < ray.intersectionT; + + if (closerIntersection) + { + ray.intersectionT = t; + objectID.id = i; + objectID.mode = IM_PROCEDURAL; + objectID.shapeType = PST_TRIANGLE; + } + } + + // TODO: trace AS + + return objectID; + } +}; + +#endif diff --git a/31_HLSLPathTracer/app_resources/hlsl/material_system.hlsl b/31_HLSLPathTracer/app_resources/hlsl/material_system.hlsl new file mode 100644 index 000000000..7878472b5 --- /dev/null +++ b/31_HLSLPathTracer/app_resources/hlsl/material_system.hlsl @@ -0,0 +1,242 @@ +#ifndef _NBL_HLSL_EXT_MATERIAL_SYSTEM_INCLUDED_ +#define _NBL_HLSL_EXT_MATERIAL_SYSTEM_INCLUDED_ + +#include +#include +#include + +#include "example_common.hlsl" + +using namespace nbl; +using namespace hlsl; + +enum MaterialType : uint32_t // enum class? +{ + DIFFUSE = 0u, + CONDUCTOR, + DIELECTRIC, + IRIDESCENT_CONDUCTOR, + IRIDESCENT_DIELECTRIC, +}; + +template // NOTE: these bxdfs should match the ones in Scene BxDFNode +struct MaterialSystem +{ + using this_t = MaterialSystem; + using scalar_type = typename DiffuseBxDF::scalar_type; // types should be same across all 3 bxdfs + using vector2_type = vector; + using vector3_type = vector; + using measure_type = typename DiffuseBxDF::spectral_type; + using sample_type = typename DiffuseBxDF::sample_type; + using ray_dir_info_type = typename sample_type::ray_dir_info_type; + using quotient_pdf_type = typename DiffuseBxDF::quotient_pdf_type; + using anisotropic_interaction_type = typename DiffuseBxDF::anisotropic_interaction_type; + using isotropic_interaction_type = typename anisotropic_interaction_type::isotropic_interaction_type; + using anisocache_type = typename ConductorBxDF::anisocache_type; + using isocache_type = typename anisocache_type::isocache_type; + using create_params_t = SBxDFCreationParams; + + using bxdfnode_type = BxDFNode; + using diffuse_op_type = DiffuseBxDF; + using conductor_op_type = ConductorBxDF; + using dielectric_op_type = DielectricBxDF; + using iri_conductor_op_type = IridescentConductorBxDF; + using iri_dielectric_op_type = IridescentDielectricBxDF; + + NBL_CONSTEXPR_STATIC_INLINE uint32_t IsBSDFPacked = uint32_t(bxdf::traits::type == bxdf::BT_BSDF) << uint32_t(MaterialType::DIFFUSE) & + uint32_t(bxdf::traits::type == bxdf::BT_BSDF) << uint32_t(MaterialType::CONDUCTOR) & + uint32_t(bxdf::traits::type == bxdf::BT_BSDF) << uint32_t(MaterialType::DIELECTRIC) & + uint32_t(bxdf::traits::type == bxdf::BT_BSDF) << uint32_t(MaterialType::IRIDESCENT_CONDUCTOR) & + uint32_t(bxdf::traits::type == bxdf::BT_BSDF) << uint32_t(MaterialType::IRIDESCENT_DIELECTRIC); + + static bool isBSDF(uint32_t material) + { + return bool(IsBSDFPacked & (1u << material)); + } + + // these are specific for the bxdfs used for this example + void fillBxdfParams(uint32_t material, NBL_CONST_REF_ARG(create_params_t) cparams) + { + switch(material) + { + case MaterialType::DIFFUSE: + { + using creation_t = typename diffuse_op_type::creation_type; + creation_t params; + params.A = cparams.A.x; + diffuseBxDF = diffuse_op_type::create(params); + } + break; + case MaterialType::CONDUCTOR: + { + conductorBxDF.ndf = conductor_op_type::ndf_type::create(cparams.A.x); + conductorBxDF.fresnel = conductor_op_type::fresnel_type::create(cparams.ior0,cparams.ior1); + } + break; + case MaterialType::DIELECTRIC: + { + using oriented_eta_t = bxdf::fresnel::OrientedEtas; + oriented_eta_t orientedEta = oriented_eta_t::create(1.0, hlsl::promote(cparams.eta)); + dielectricBxDF.ndf = dielectric_op_type::ndf_type::create(cparams.A.x); + dielectricBxDF.fresnel = dielectric_op_type::fresnel_type::create(orientedEta); + } + break; + case MaterialType::IRIDESCENT_CONDUCTOR: + { + iridescentConductorBxDF.ndf = iri_conductor_op_type::ndf_type::create(cparams.A.x); + using creation_params_t = typename iri_conductor_op_type::fresnel_type::creation_params_type; + creation_params_t params; + params.Dinc = cparams.A.y; + params.ior1 = hlsl::promote(1.0); + params.ior2 = cparams.ior0; + params.ior3 = cparams.ior1; + params.iork3 = cparams.iork; + iridescentConductorBxDF.fresnel = iri_conductor_op_type::fresnel_type::create(params); + } + break; + case MaterialType::IRIDESCENT_DIELECTRIC: + { + iridescentDielectricBxDF.ndf = iri_dielectric_op_type::ndf_type::create(cparams.A.x); + using creation_params_t = typename iri_dielectric_op_type::fresnel_type::creation_params_type; + creation_params_t params; + params.Dinc = cparams.A.y; + params.ior1 = hlsl::promote(1.0); + params.ior2 = cparams.ior0; + params.ior3 = cparams.ior1; + iridescentDielectricBxDF.fresnel = iri_dielectric_op_type::fresnel_type::create(params); + } + break; + default: + return; + } + } + + measure_type eval(uint32_t material, NBL_CONST_REF_ARG(create_params_t) cparams, NBL_CONST_REF_ARG(sample_type) _sample, NBL_CONST_REF_ARG(isotropic_interaction_type) interaction, NBL_CONST_REF_ARG(isocache_type) _cache) + { + fillBxdfParams(material, cparams); + switch(material) + { + case MaterialType::DIFFUSE: + { + return diffuseBxDF.eval(_sample, interaction); + } + break; + case MaterialType::CONDUCTOR: + { + return conductorBxDF.eval(_sample, interaction, _cache); + } + break; + case MaterialType::DIELECTRIC: + { + return dielectricBxDF.eval(_sample, interaction, _cache); + } + break; + case MaterialType::IRIDESCENT_CONDUCTOR: + { + return iridescentConductorBxDF.eval(_sample, interaction, _cache); + } + break; + case MaterialType::IRIDESCENT_DIELECTRIC: + { + return iridescentDielectricBxDF.eval(_sample, interaction, _cache); + } + break; + default: + return hlsl::promote(0.0); + } + } + + sample_type generate(uint32_t material, NBL_CONST_REF_ARG(create_params_t) cparams, NBL_CONST_REF_ARG(anisotropic_interaction_type) interaction, NBL_CONST_REF_ARG(vector3_type) u, NBL_REF_ARG(anisocache_type) _cache) + { + fillBxdfParams(material, cparams); + switch(material) + { + case MaterialType::DIFFUSE: + { + return diffuseBxDF.generate(interaction, u.xy); + } + break; + case MaterialType::CONDUCTOR: + { + return conductorBxDF.generate(interaction, u.xy, _cache); + } + break; + case MaterialType::DIELECTRIC: + { + return dielectricBxDF.generate(interaction, u, _cache); + } + break; + case MaterialType::IRIDESCENT_CONDUCTOR: + { + return iridescentConductorBxDF.generate(interaction, u.xy, _cache); + } + break; + case MaterialType::IRIDESCENT_DIELECTRIC: + { + return iridescentDielectricBxDF.generate(interaction, u, _cache); + } + break; + default: + { + ray_dir_info_type L; + L.makeInvalid(); + return sample_type::create(L, hlsl::promote(0.0)); + } + } + + ray_dir_info_type L; + L.makeInvalid(); + return sample_type::create(L, hlsl::promote(0.0)); + } + + quotient_pdf_type quotient_and_pdf(uint32_t material, NBL_CONST_REF_ARG(create_params_t) cparams, NBL_CONST_REF_ARG(sample_type) _sample, NBL_CONST_REF_ARG(isotropic_interaction_type) interaction, NBL_CONST_REF_ARG(isocache_type) _cache) + { + const float minimumProjVectorLen = 0.00000001; // TODO: still need this check? + if (interaction.getNdotV(bxdf::BxDFClampMode::BCM_ABS) > minimumProjVectorLen && _sample.getNdotL(bxdf::BxDFClampMode::BCM_ABS) > minimumProjVectorLen) + { + fillBxdfParams(material, cparams); + switch(material) + { + case MaterialType::DIFFUSE: + { + return diffuseBxDF.quotient_and_pdf(_sample, interaction); + } + break; + case MaterialType::CONDUCTOR: + { + return conductorBxDF.quotient_and_pdf(_sample, interaction, _cache); + } + break; + case MaterialType::DIELECTRIC: + { + return dielectricBxDF.quotient_and_pdf(_sample, interaction, _cache); + } + break; + case MaterialType::IRIDESCENT_CONDUCTOR: + { + return iridescentConductorBxDF.quotient_and_pdf(_sample, interaction, _cache); + } + break; + case MaterialType::IRIDESCENT_DIELECTRIC: + { + return iridescentDielectricBxDF.quotient_and_pdf(_sample, interaction, _cache); + } + break; + default: + return quotient_pdf_type::create(hlsl::promote(0.0), 0.0); + } + } + return quotient_pdf_type::create(hlsl::promote(0.0), 0.0); + } + + DiffuseBxDF diffuseBxDF; + ConductorBxDF conductorBxDF; + DielectricBxDF dielectricBxDF; + IridescentConductorBxDF iridescentConductorBxDF; + IridescentDielectricBxDF iridescentDielectricBxDF; + + bxdfnode_type bxdfs[Scene::SCENE_BXDF_COUNT]; + uint32_t bxdfCount; +}; + +#endif diff --git a/31_HLSLPathTracer/app_resources/hlsl/next_event_estimator.hlsl b/31_HLSLPathTracer/app_resources/hlsl/next_event_estimator.hlsl new file mode 100644 index 000000000..5c34eed3a --- /dev/null +++ b/31_HLSLPathTracer/app_resources/hlsl/next_event_estimator.hlsl @@ -0,0 +1,382 @@ +#ifndef _NBL_HLSL_EXT_NEXT_EVENT_ESTIMATOR_INCLUDED_ +#define _NBL_HLSL_EXT_NEXT_EVENT_ESTIMATOR_INCLUDED_ + +#include "example_common.hlsl" + +using namespace nbl; +using namespace hlsl; + +template +struct ShapeSampling; + +template +struct ShapeSampling +{ + using scalar_type = T; + using vector3_type = vector; + + static ShapeSampling create(NBL_CONST_REF_ARG(Shape) sphere) + { + ShapeSampling retval; + retval.sphere = sphere; + return retval; + } + + template + scalar_type deferredPdf(NBL_CONST_REF_ARG(Ray) ray) + { + return 1.0 / sphere.getSolidAngle(ray.origin); + } + + template + vector3_type generate_and_pdf(NBL_REF_ARG(scalar_type) pdf, NBL_REF_ARG(scalar_type) newRayMaxT, NBL_CONST_REF_ARG(vector3_type) origin, NBL_CONST_REF_ARG(Aniso) interaction, bool isBSDF, NBL_CONST_REF_ARG(vector3_type) xi) + { + vector3_type Z = sphere.position - origin; + const scalar_type distanceSQ = hlsl::dot(Z,Z); + const scalar_type cosThetaMax2 = 1.0 - sphere.radius2 / distanceSQ; + if (cosThetaMax2 > 0.0) + { + const scalar_type rcpDistance = 1.0 / hlsl::sqrt(distanceSQ); + Z *= rcpDistance; + + const scalar_type cosThetaMax = hlsl::sqrt(cosThetaMax2); + const scalar_type cosTheta = hlsl::mix(1.0f, cosThetaMax, xi.x); + + vector3_type L = Z * cosTheta; + + const scalar_type cosTheta2 = cosTheta * cosTheta; + const scalar_type sinTheta = hlsl::sqrt(1.0 - cosTheta2); + scalar_type sinPhi, cosPhi; + math::sincos(2.0 * numbers::pi * xi.y - numbers::pi, sinPhi, cosPhi); + vector3_type X, Y; + math::frisvad(Z, X, Y); + + L += (X * cosPhi + Y * sinPhi) * sinTheta; + + newRayMaxT = (cosTheta - hlsl::sqrt(cosTheta2 - cosThetaMax2)) / rcpDistance; + pdf = 1.0 / (2.0 * numbers::pi * (1.0 - cosThetaMax)); + return L; + } + pdf = 0.0; + return vector3_type(0.0,0.0,0.0); + } + + Shape sphere; +}; + +template +struct ShapeSampling +{ + using scalar_type = T; + using vector3_type = vector; + + static ShapeSampling create(NBL_CONST_REF_ARG(Shape) tri) + { + ShapeSampling retval; + retval.tri = tri; + return retval; + } + + template + scalar_type deferredPdf(NBL_CONST_REF_ARG(Ray) ray) + { + const scalar_type dist = ray.intersectionT; + const vector3_type L = ray.direction; + return dist * dist / hlsl::abs(hlsl::dot(tri.getNormalTimesArea(), L)); + } + + template + vector3_type generate_and_pdf(NBL_REF_ARG(scalar_type) pdf, NBL_REF_ARG(scalar_type) newRayMaxT, NBL_CONST_REF_ARG(vector3_type) origin, NBL_CONST_REF_ARG(Aniso) interaction, bool isBSDF, NBL_CONST_REF_ARG(vector3_type) xi) + { + const vector3_type edge0 = tri.vertex1 - tri.vertex0; + const vector3_type edge1 = tri.vertex2 - tri.vertex0; + const scalar_type sqrtU = hlsl::sqrt(xi.x); + vector3_type pnt = tri.vertex0 + edge0 * (1.0 - sqrtU) + edge1 * sqrtU * xi.y; + vector3_type L = pnt - origin; + + const scalar_type distanceSq = hlsl::dot(L,L); + const scalar_type rcpDistance = 1.0 / hlsl::sqrt(distanceSq); + L *= rcpDistance; + + pdf = distanceSq / hlsl::abs(hlsl::dot(hlsl::cross(edge0, edge1) * 0.5f, L)); + newRayMaxT = 1.0 / rcpDistance; + return L; + } + + Shape tri; +}; + +template +struct ShapeSampling +{ + using scalar_type = T; + using vector3_type = vector; + + static ShapeSampling create(NBL_CONST_REF_ARG(Shape) tri) + { + ShapeSampling retval; + retval.tri = tri; + return retval; + } + + template + scalar_type deferredPdf(NBL_CONST_REF_ARG(Ray) ray) + { + shapes::SphericalTriangle st = shapes::SphericalTriangle::create(tri.vertex0, tri.vertex1, tri.vertex2, ray.origin); + const scalar_type rcpProb = st.solidAngleOfTriangle(); + // if `rcpProb` is NAN then the triangle's solid angle was close to 0.0 + return rcpProb > numeric_limits::min ? (1.0 / rcpProb) : numeric_limits::max; + } + + template + vector3_type generate_and_pdf(NBL_REF_ARG(scalar_type) pdf, NBL_REF_ARG(scalar_type) newRayMaxT, NBL_CONST_REF_ARG(vector3_type) origin, NBL_CONST_REF_ARG(Aniso) interaction, bool isBSDF, NBL_CONST_REF_ARG(vector3_type) xi) + { + scalar_type rcpPdf; + shapes::SphericalTriangle st = shapes::SphericalTriangle::create(tri.vertex0, tri.vertex1, tri.vertex2, origin); + sampling::SphericalTriangle sst = sampling::SphericalTriangle::create(st); + + const vector3_type L = sst.generate(rcpPdf, xi.xy); + + pdf = rcpPdf > numeric_limits::min ? (1.0 / rcpPdf) : numeric_limits::max; + + const vector3_type N = tri.getNormalTimesArea(); + newRayMaxT = hlsl::dot(N, tri.vertex0 - origin) / hlsl::dot(N, L); + return L; + } + + Shape tri; +}; + +template +struct ShapeSampling +{ + using scalar_type = T; + using vector3_type = vector; + + static ShapeSampling create(NBL_CONST_REF_ARG(Shape) tri) + { + ShapeSampling retval; + retval.tri = tri; + return retval; + } + + template + scalar_type deferredPdf(NBL_CONST_REF_ARG(Ray) ray) + { + const vector3_type L = ray.direction; + shapes::SphericalTriangle st = shapes::SphericalTriangle::create(tri.vertex0, tri.vertex1, tri.vertex2, ray.origin); + sampling::ProjectedSphericalTriangle pst = sampling::ProjectedSphericalTriangle::create(st); + const scalar_type pdf = pst.pdf(ray.normalAtOrigin, ray.wasBSDFAtOrigin, L); + // if `pdf` is NAN then the triangle's projected solid angle was close to 0.0, if its close to INF then the triangle was very small + return hlsl::mix(numeric_limits::max, pdf, pdf < numeric_limits::max); + } + + template + vector3_type generate_and_pdf(NBL_REF_ARG(scalar_type) pdf, NBL_REF_ARG(scalar_type) newRayMaxT, NBL_CONST_REF_ARG(vector3_type) origin, NBL_CONST_REF_ARG(Aniso) interaction, bool isBSDF, NBL_CONST_REF_ARG(vector3_type) xi) + { + scalar_type rcpPdf; + shapes::SphericalTriangle st = shapes::SphericalTriangle::create(tri.vertex0, tri.vertex1, tri.vertex2, origin); + sampling::ProjectedSphericalTriangle sst = sampling::ProjectedSphericalTriangle::create(st); + + const vector3_type L = sst.generate(rcpPdf, interaction.getN(), isBSDF, xi.xy); + + pdf = hlsl::mix(numeric_limits::max, scalar_type(1.0) / rcpPdf, rcpPdf > numeric_limits::min); + + const vector3_type N = tri.getNormalTimesArea(); + newRayMaxT = hlsl::dot(N, tri.vertex0 - origin) / hlsl::dot(N, L); + return L; + } + + Shape tri; +}; + +template +struct ShapeSampling +{ + using scalar_type = T; + using vector3_type = vector; + + static ShapeSampling create(NBL_CONST_REF_ARG(Shape) rect) + { + ShapeSampling retval; + retval.rect = rect; + return retval; + } + + template + scalar_type deferredPdf(NBL_CONST_REF_ARG(Ray) ray) + { + const scalar_type dist = ray.intersectionT; + const vector3_type L = ray.direction; + return dist * dist / hlsl::abs(hlsl::dot(rect.getNormalTimesArea(), L)); + } + + template + vector3_type generate_and_pdf(NBL_REF_ARG(scalar_type) pdf, NBL_REF_ARG(scalar_type) newRayMaxT, NBL_CONST_REF_ARG(vector3_type) origin, NBL_CONST_REF_ARG(Aniso) interaction, bool isBSDF, NBL_CONST_REF_ARG(vector3_type) xi) + { + const vector3_type N = rect.getNormalTimesArea(); + const vector3_type origin2origin = rect.offset - origin; + + vector3_type L = origin2origin + rect.edge0 * xi.x + rect.edge1 * xi.y; + const scalar_type distSq = hlsl::dot(L, L); + const scalar_type rcpDist = 1.0 / hlsl::sqrt(distSq); + L *= rcpDist; + pdf = distSq / hlsl::abs(hlsl::dot(N, L)); + newRayMaxT = 1.0 / rcpDist; + return L; + } + + Shape rect; +}; + +template +struct ShapeSampling +{ + using scalar_type = T; + using vector3_type = vector; + + static ShapeSampling create(NBL_CONST_REF_ARG(Shape) rect) + { + ShapeSampling retval; + retval.rect = rect; + return retval; + } + + template + scalar_type deferredPdf(NBL_CONST_REF_ARG(Ray) ray) + { + scalar_type pdf; + matrix rectNormalBasis; + vector rectExtents; + rect.getNormalBasis(rectNormalBasis, rectExtents); + shapes::SphericalRectangle sphR0 = shapes::SphericalRectangle::create(ray.origin, rect.offset, rectNormalBasis); + scalar_type solidAngle = sphR0.solidAngleOfRectangle(rectExtents); + if (solidAngle > numeric_limits::min) + pdf = 1.f / solidAngle; + else + pdf = bit_cast(numeric_limits::infinity); + return pdf; + } + + template + vector3_type generate_and_pdf(NBL_REF_ARG(scalar_type) pdf, NBL_REF_ARG(scalar_type) newRayMaxT, NBL_CONST_REF_ARG(vector3_type) origin, NBL_CONST_REF_ARG(Aniso) interaction, bool isBSDF, NBL_CONST_REF_ARG(vector3_type) xi) + { + const vector3_type N = rect.getNormalTimesArea(); + const vector3_type origin2origin = rect.offset - origin; + + matrix rectNormalBasis; + vector rectExtents; + rect.getNormalBasis(rectNormalBasis, rectExtents); + shapes::SphericalRectangle sphR0 = shapes::SphericalRectangle::create(origin, rect.offset, rectNormalBasis); + vector3_type L = hlsl::promote(0.0); + scalar_type solidAngle = sphR0.solidAngleOfRectangle(rectExtents); + + sampling::SphericalRectangle ssph = sampling::SphericalRectangle::create(sphR0); + vector sphUv = ssph.generate(rectExtents, xi.xy, solidAngle); + if (solidAngle > numeric_limits::min) + { + vector3_type sph_sample = sphUv.x * rect.edge0 + sphUv.y * rect.edge1 + rect.offset; + L = sph_sample - origin; + const bool invalid = hlsl::all(hlsl::abs(L) < hlsl::promote(numeric_limits::min)); + L = hlsl::mix(hlsl::normalize(L), hlsl::promote(0.0), invalid); + pdf = hlsl::mix(1.f / solidAngle, bit_cast(numeric_limits::infinity), invalid); + } + else + pdf = bit_cast(numeric_limits::infinity); + + newRayMaxT = hlsl::dot(N, origin2origin) / hlsl::dot(N, L); + return L; + } + + Shape rect; +}; + +// PPM_APPROX_PROJECTED_SOLID_ANGLE not available for PST_TRIANGLE + + +template +struct NextEventEstimator; + +template +struct NextEventEstimator +{ + using scalar_type = typename Ray::scalar_type; + using vector3_type = vector; + using ray_type = Ray; + using scene_type = Scene; + using light_type = Light; + using spectral_type = typename light_type::spectral_type; + using interaction_type = Aniso; + using quotient_pdf_type = sampling::quotient_and_pdf; + using sample_type = LightSample; + using ray_dir_info_type = typename sample_type::ray_dir_info_type; + + using shape_type = Shape; + using shape_sampling_type = ShapeSampling; + + // affected by https://github.com/microsoft/DirectXShaderCompiler/issues/7007 + // NBL_CONSTEXPR_STATIC_INLINE PTPolygonMethod PolygonMethod = PPM; + enum : uint16_t { PolygonMethod = PPM }; + + template NBL_FUNC_REQUIRES(C::value && PST==PST_SPHERE) + static shape_sampling_type __getShapeSampling(NBL_CONST_REF_ARG(scene_type) scene, uint32_t lightObjectID) + { + const shape_type sphere = scene.getSphere(lightObjectID); + return shape_sampling_type::create(sphere); + } + template NBL_FUNC_REQUIRES(C::value && PST==PST_TRIANGLE) + static shape_sampling_type __getShapeSampling(NBL_CONST_REF_ARG(scene_type) scene, uint32_t lightObjectID) + { + const shape_type tri = scene.getTriangle(lightObjectID); + return shape_sampling_type::create(tri); + } + template NBL_FUNC_REQUIRES(C::value && PST==PST_RECTANGLE) + static shape_sampling_type __getShapeSampling(NBL_CONST_REF_ARG(scene_type) scene, uint32_t lightObjectID) + { + const shape_type rect = scene.getRectangle(lightObjectID); + return shape_sampling_type::create(rect); + } + + spectral_type deferredEvalAndPdf(NBL_REF_ARG(scalar_type) pdf, NBL_CONST_REF_ARG(scene_type) scene, uint32_t lightID, NBL_CONST_REF_ARG(ray_type) ray) + { + pdf = 1.0 / lightCount; + const light_type light = lights[lightID]; + const shape_sampling_type sampling = __getShapeSampling(scene, light.objectID.id); + pdf *= sampling.template deferredPdf(ray); + + return light.radiance; + } + + sample_type generate_and_quotient_and_pdf(NBL_REF_ARG(quotient_pdf_type) quotient_pdf, NBL_REF_ARG(scalar_type) newRayMaxT, NBL_CONST_REF_ARG(scene_type) scene, uint32_t lightID, NBL_CONST_REF_ARG(vector3_type) origin, NBL_CONST_REF_ARG(interaction_type) interaction, bool isBSDF, NBL_CONST_REF_ARG(vector3_type) xi, uint32_t depth) + { + const light_type light = lights[lightID]; + const shape_sampling_type sampling = __getShapeSampling(scene, light.objectID.id); + + scalar_type pdf; + const vector3_type sampleL = sampling.template generate_and_pdf(pdf, newRayMaxT, origin, interaction, isBSDF, xi); + ray_dir_info_type rayL; + if (hlsl::isinf(pdf)) + { + quotient_pdf = quotient_pdf_type::create(hlsl::promote(0.0), 0.0); + return sample_type::createInvalid(); + } + + const vector3_type N = interaction.getN(); + const scalar_type NdotL = nbl::hlsl::dot(N, sampleL); + + rayL.setDirection(sampleL); + sample_type L = sample_type::create(rayL,interaction.getT(),interaction.getB(),NdotL); + + newRayMaxT *= path_tracing::Tolerance::getEnd(depth); + pdf *= 1.0 / scalar_type(lightCount); + spectral_type quo = light.radiance / pdf; + quotient_pdf = quotient_pdf_type::create(quo, pdf); + + return L; + } + + light_type lights[scene_type::SCENE_LIGHT_COUNT]; + uint32_t lightCount; +}; + +#endif diff --git a/31_HLSLPathTracer/app_resources/hlsl/pathtracer.hlsl b/31_HLSLPathTracer/app_resources/hlsl/pathtracer.hlsl new file mode 100644 index 000000000..6f67cd79e --- /dev/null +++ b/31_HLSLPathTracer/app_resources/hlsl/pathtracer.hlsl @@ -0,0 +1,266 @@ +#ifndef _NBL_HLSL_PATHTRACING_INCLUDED_ +#define _NBL_HLSL_PATHTRACING_INCLUDED_ + +#include +#include +#include +#include +#include +#include +#include +#include "concepts.hlsl" + +namespace nbl +{ +namespace hlsl +{ +namespace path_tracing +{ + +template && concepts::RayGenerator && + concepts::Intersector && concepts::MaterialSystem && + concepts::NextEventEstimator && concepts::Accumulator && + concepts::Scene) +struct Unidirectional +{ + using this_t = Unidirectional; + using randgen_type = RandGen; + using raygen_type = RayGen; + using intersector_type = Intersector; + using material_system_type = MaterialSystem; + using nee_type = NextEventEstimator; + using scene_type = Scene; + + using scalar_type = typename MaterialSystem::scalar_type; + using vector3_type = vector; + using monochrome_type = vector; + using measure_type = typename MaterialSystem::measure_type; + using output_storage_type = typename Accumulator::output_storage_type; // ? + using sample_type = typename NextEventEstimator::sample_type; + using ray_dir_info_type = typename sample_type::ray_dir_info_type; + using ray_type = typename RayGen::ray_type; + using id_type = typename Intersector::id_type; + using light_type = Light; + using bxdfnode_type = typename MaterialSystem::bxdfnode_type; + using anisotropic_interaction_type = typename MaterialSystem::anisotropic_interaction_type; + using isotropic_interaction_type = typename anisotropic_interaction_type::isotropic_interaction_type; + using anisocache_type = typename MaterialSystem::anisocache_type; + using isocache_type = typename anisocache_type::isocache_type; + using quotient_pdf_type = typename NextEventEstimator::quotient_pdf_type; + + using diffuse_op_type = typename MaterialSystem::diffuse_op_type; + using conductor_op_type = typename MaterialSystem::conductor_op_type; + using dielectric_op_type = typename MaterialSystem::dielectric_op_type; + + vector3_type rand3d(uint32_t protoDimension, uint32_t _sample, uint32_t i) + { + using sequence_type = sampling::QuantizedSequence; + uint32_t address = glsl::bitfieldInsert(protoDimension, _sample, MAX_DEPTH_LOG2, MAX_SAMPLES_LOG2); + sequence_type tmpSeq = vk::RawBufferLoad(pSampleBuffer + (address + i) * sizeof(sequence_type)); + return sampling::decode(tmpSeq, randGen()); + } + + scalar_type getLuma(NBL_CONST_REF_ARG(vector3_type) col) + { + return hlsl::dot(colorspace::scRGBtoXYZ[1], col); + } + + // TODO: probably will only work with isotropic surfaces, need to do aniso + bool closestHitProgram(uint32_t depth, uint32_t _sample, NBL_REF_ARG(ray_type) ray, NBL_CONST_REF_ARG(scene_type) scene) + { + const id_type objectID = ray.objectID; + const vector3_type intersection = ray.origin + ray.direction * ray.intersectionT; + + uint32_t bsdfLightIDs = scene.getBsdfLightIDs(objectID); + vector3_type N = scene.getNormal(objectID, intersection); + N = nbl::hlsl::normalize(N); + ray_dir_info_type V; + V.setDirection(-ray.direction); + isotropic_interaction_type iso_interaction = isotropic_interaction_type::create(V, N); + iso_interaction.luminosityContributionHint = colorspace::scRGBtoXYZ[1]; + anisotropic_interaction_type interaction = anisotropic_interaction_type::create(iso_interaction); + + vector3_type throughput = ray.payload.throughput; + + // emissive + const uint32_t lightID = glsl::bitfieldExtract(bsdfLightIDs, 16, 16); + if (lightID != light_type::INVALID_ID) + { + float _pdf; + ray.payload.accumulation += nee.deferredEvalAndPdf(_pdf, scene, lightID, ray) * throughput / (1.0 + _pdf * _pdf * ray.payload.otherTechniqueHeuristic); + } + + const uint32_t bsdfID = glsl::bitfieldExtract(bsdfLightIDs, 0, 16); + if (bsdfID == bxdfnode_type::INVALID_ID) + return false; + + bxdfnode_type bxdf = materialSystem.bxdfs[bsdfID]; + + // TODO: ifdef kill diffuse specular paths + + const bool isBSDF = material_system_type::isBSDF(bxdf.materialType); + + vector3_type eps0 = rand3d(depth, _sample, 0u); + vector3_type eps1 = rand3d(depth, _sample, 1u); + + // thresholds + const scalar_type bxdfPdfThreshold = 0.0001; + const scalar_type lumaContributionThreshold = getLuma(colorspace::eotf::sRGB((vector3_type)1.0 / 255.0)); // OETF smallest perceptible value + const vector3_type throughputCIE_Y = colorspace::sRGBtoXYZ[1] * throughput; // TODO: this only works if spectral_type is dim 3 + const measure_type eta = bxdf.params.ior1 / bxdf.params.ior0; + const scalar_type monochromeEta = hlsl::dot(throughputCIE_Y, eta) / (throughputCIE_Y.r + throughputCIE_Y.g + throughputCIE_Y.b); // TODO: imaginary eta? + + // sample lights + const scalar_type neeProbability = 1.0; // BSDFNode_getNEEProb(bsdf); + scalar_type rcpChoiceProb; + sampling::PartitionRandVariable partitionRandVariable; + partitionRandVariable.leftProb = neeProbability; + if (!partitionRandVariable(eps0.z, rcpChoiceProb) && depth < 2u) + { + uint32_t randLightID = uint32_t(float32_t(randGen.rng()) / numeric_limits::max) * nee.lightCount; + quotient_pdf_type neeContrib_pdf; + scalar_type t; + sample_type nee_sample = nee.generate_and_quotient_and_pdf( + neeContrib_pdf, t, + scene, randLightID, intersection, interaction, + isBSDF, eps0, depth + ); + + // We don't allow non watertight transmitters in this renderer + bool validPath = nee_sample.getNdotL() > numeric_limits::min && nee_sample.isValid(); + // but if we allowed non-watertight transmitters (single water surface), it would make sense just to apply this line by itself + bxdf::fresnel::OrientedEtas orientedEta = bxdf::fresnel::OrientedEtas::create(interaction.getNdotV(), hlsl::promote(monochromeEta)); + anisocache_type _cache = anisocache_type::template create(interaction, nee_sample, orientedEta); + validPath = validPath && _cache.getAbsNdotH() >= 0.0; + bxdf.params.eta = monochromeEta; + + if (neeContrib_pdf.pdf < numeric_limits::max) + { + if (nbl::hlsl::any(hlsl::isnan(nee_sample.getL().getDirection()))) + ray.payload.accumulation += vector3_type(1000.f, 0.f, 0.f); + else if (nbl::hlsl::all((vector3_type)69.f == nee_sample.getL().getDirection())) + ray.payload.accumulation += vector3_type(0.f, 1000.f, 0.f); + else if (validPath) + { + // example only uses isotropic bxdfs + quotient_pdf_type bsdf_quotient_pdf = materialSystem.quotient_and_pdf(bxdf.materialType, bxdf.params, nee_sample, interaction.isotropic, _cache.iso_cache); + neeContrib_pdf.quotient *= bxdf.albedo * throughput * bsdf_quotient_pdf.quotient; + const scalar_type otherGenOverChoice = bsdf_quotient_pdf.pdf * rcpChoiceProb; + const scalar_type otherGenOverLightAndChoice = otherGenOverChoice / bsdf_quotient_pdf.pdf; + neeContrib_pdf.quotient *= otherGenOverChoice / (1.f + otherGenOverLightAndChoice * otherGenOverLightAndChoice); // balance heuristic + + // TODO: ifdef NEE only + // neeContrib_pdf.quotient *= otherGenOverChoice; + + ray_type nee_ray; + nee_ray.origin = intersection + nee_sample.getL().getDirection() * t * Tolerance::getStart(depth); + nee_ray.direction = nee_sample.getL().getDirection(); + nee_ray.intersectionT = t; + if (bsdf_quotient_pdf.pdf < numeric_limits::max && getLuma(neeContrib_pdf.quotient) > lumaContributionThreshold && intersector_type::traceRay(nee_ray, scene).id == -1) + ray.payload.accumulation += neeContrib_pdf.quotient; + } + } + } + + // return false; // NEE only + + // sample BSDF + scalar_type bxdfPdf; + vector3_type bxdfSample; + { + anisocache_type _cache; + sample_type bsdf_sample = materialSystem.generate(bxdf.materialType, bxdf.params, interaction, eps1, _cache); + + if (!bsdf_sample.isValid()) + return false; + + // example only uses isotropic bxdfs + // the value of the bsdf divided by the probability of the sample being generated + quotient_pdf_type bsdf_quotient_pdf = materialSystem.quotient_and_pdf(bxdf.materialType, bxdf.params, bsdf_sample, interaction.isotropic, _cache.iso_cache); + throughput *= bxdf.albedo * bsdf_quotient_pdf.quotient; + bxdfPdf = bsdf_quotient_pdf.pdf; + bxdfSample = bsdf_sample.getL().getDirection(); + } + + // additional threshold + const float lumaThroughputThreshold = lumaContributionThreshold; + if (bxdfPdf > bxdfPdfThreshold && getLuma(throughput) > lumaThroughputThreshold) + { + ray.payload.throughput = throughput; + scalar_type otherTechniqueHeuristic = neeProbability / bxdfPdf; // numerically stable, don't touch + ray.payload.otherTechniqueHeuristic = otherTechniqueHeuristic * otherTechniqueHeuristic; + + // trace new ray + ray.origin = intersection + bxdfSample * (1.0/*kSceneSize*/) * Tolerance::getStart(depth); + ray.direction = bxdfSample; + if ((PTPolygonMethod)nee_type::PolygonMethod == PPM_APPROX_PROJECTED_SOLID_ANGLE) + { + ray.normalAtOrigin = interaction.getN(); + ray.wasBSDFAtOrigin = isBSDF; + } + return true; + } + + return false; + } + + void missProgram(NBL_REF_ARG(ray_type) ray) + { + vector3_type finalContribution = ray.payload.throughput; + // #ifdef USE_ENVMAP + // vec2 uv = SampleSphericalMap(_immutable.direction); + // finalContribution *= textureLod(envMap, uv, 0.0).rgb; + // #else + const vector3_type kConstantEnvLightRadiance = vector3_type(0.15, 0.21, 0.3); // TODO: match spectral_type + finalContribution *= kConstantEnvLightRadiance; + ray.payload.accumulation += finalContribution; + // #endif + } + + // Li + void sampleMeasure(uint32_t sampleIndex, uint32_t maxDepth, NBL_CONST_REF_ARG(scene_type) scene, NBL_REF_ARG(Accumulator) accumulator) + { + //scalar_type meanLumaSq = 0.0; + vector3_type uvw = rand3d(0u, sampleIndex, 0u); + ray_type ray = rayGen.generate(uvw); + + // bounces + bool hit = true; + bool rayAlive = true; + for (int d = 1; (d <= maxDepth) && hit && rayAlive; d += 2) + { + ray.intersectionT = numeric_limits::max; + ray.objectID = intersector_type::traceRay(ray, scene); + + hit = ray.objectID.id != -1; + if (hit) + rayAlive = closestHitProgram(1, sampleIndex, ray, scene); + } + if (!hit) + missProgram(ray); + + const uint32_t sampleCount = sampleIndex + 1; + accumulator.addSample(sampleCount, ray.payload.accumulation); + + // TODO: visualize high variance + + // TODO: russian roulette early exit? + } + + NBL_CONSTEXPR_STATIC_INLINE uint32_t MAX_DEPTH_LOG2 = 4u; + NBL_CONSTEXPR_STATIC_INLINE uint32_t MAX_SAMPLES_LOG2 = 10u; + + randgen_type randGen; + raygen_type rayGen; + material_system_type materialSystem; + nee_type nee; + + uint64_t pSampleBuffer; +}; + +} +} +} + +#endif diff --git a/31_HLSLPathTracer/app_resources/hlsl/present.frag.hlsl b/31_HLSLPathTracer/app_resources/hlsl/present.frag.hlsl new file mode 100644 index 000000000..d556a7162 --- /dev/null +++ b/31_HLSLPathTracer/app_resources/hlsl/present.frag.hlsl @@ -0,0 +1,19 @@ +// Copyright (C) 2024-2024 - DevSH Graphics Programming Sp. z O.O. +// This file is part of the "Nabla Engine". +// For conditions of distribution and use, see copyright notice in nabla.h + +#pragma wave shader_stage(fragment) + +// vertex shader is provided by the fullScreenTriangle extension +#include +using namespace nbl::hlsl; +using namespace ext::FullScreenTriangle; + +// binding 0 set 0 +[[vk::combinedImageSampler]] [[vk::binding(0, 0)]] Texture2DArray texture; +[[vk::combinedImageSampler]] [[vk::binding(0, 0)]] SamplerState samplerState; + +[[vk::location(0)]] float32_t4 main(SVertexAttributes vxAttr) : SV_Target0 +{ + return float32_t4(texture.Sample(samplerState, float3(vxAttr.uv, 0)).rgb, 1.0f); +} \ No newline at end of file diff --git a/31_HLSLPathTracer/app_resources/hlsl/rand_gen.hlsl b/31_HLSLPathTracer/app_resources/hlsl/rand_gen.hlsl new file mode 100644 index 000000000..bffe39940 --- /dev/null +++ b/31_HLSLPathTracer/app_resources/hlsl/rand_gen.hlsl @@ -0,0 +1,51 @@ +#ifndef _NBL_HLSL_EXT_RANDGEN_INCLUDED_ +#define _NBL_HLSL_EXT_RANDGEN_INCLUDED_ + +namespace RandGen +{ + +template +struct Uniform1D +{ + using rng_type = RNG; + using return_type = uint32_t; + + static Uniform1D construct(uint32_t2 seed) + { + Uniform1D retval; + retval.rng = rng_type::construct(seed); + return retval; + } + + return_type operator()() + { + return rng(); + } + + rng_type rng; +}; + +template +struct Uniform3D +{ + using rng_type = RNG; + using return_type = uint32_t3; + + static Uniform3D construct(uint32_t2 seed) + { + Uniform3D retval; + retval.rng = rng_type::construct(seed); + return retval; + } + + return_type operator()() + { + return return_type(rng(), rng(), rng()); + } + + rng_type rng; +}; + +} + +#endif diff --git a/31_HLSLPathTracer/app_resources/hlsl/ray_gen.hlsl b/31_HLSLPathTracer/app_resources/hlsl/ray_gen.hlsl new file mode 100644 index 000000000..5f2b2d130 --- /dev/null +++ b/31_HLSLPathTracer/app_resources/hlsl/ray_gen.hlsl @@ -0,0 +1,75 @@ +#ifndef _NBL_HLSL_EXT_RAYGEN_INCLUDED_ +#define _NBL_HLSL_EXT_RAYGEN_INCLUDED_ + +#include + +#include "common.hlsl" + +namespace RayGen +{ + +template +struct Basic +{ + using this_t = Basic; + using ray_type = Ray; + using scalar_type = typename Ray::scalar_type; + using vector3_type = typename Ray::vector3_type; + + using vector2_type = vector; + using vector4_type = vector; + using matrix4x4_type = matrix; + + static this_t create(NBL_CONST_REF_ARG(vector2_type) pixOffsetParam, NBL_CONST_REF_ARG(vector3_type) camPos, NBL_CONST_REF_ARG(vector4_type) NDC, NBL_CONST_REF_ARG(matrix4x4_type) invMVP) + { + this_t retval; + retval.pixOffsetParam = pixOffsetParam; + retval.camPos = camPos; + retval.NDC = NDC; + retval.invMVP = invMVP; + return retval; + } + + ray_type generate(NBL_CONST_REF_ARG(vector3_type) randVec) + { + ray_type ray; + ray.origin = camPos; + + vector4_type tmp = NDC; + // apply stochastic reconstruction filter + const float gaussianFilterCutoff = 2.5; + const float truncation = nbl::hlsl::exp(-0.5 * gaussianFilterCutoff * gaussianFilterCutoff); + vector2_type remappedRand = randVec.xy; + remappedRand.x *= 1.0 - truncation; + remappedRand.x += truncation; + nbl::hlsl::sampling::BoxMullerTransform boxMuller; + boxMuller.stddev = 1.5; + tmp.xy += pixOffsetParam * boxMuller(remappedRand); + // for depth of field we could do another stochastic point-pick + tmp = nbl::hlsl::mul(invMVP, tmp); + ray.direction = nbl::hlsl::normalize(tmp.xyz / tmp.w - camPos); + + // #if POLYGON_METHOD==2 + // ray._immutable.normalAtOrigin = vec3(0.0,0.0,0.0); + // ray._immutable.wasBSDFAtOrigin = false; + // #endif + + ray.payload.accumulation = (vector3_type)0.0; + ray.payload.otherTechniqueHeuristic = 0.0; // needed for direct eye-light paths + ray.payload.throughput = (vector3_type)1.0; + // #ifdef KILL_DIFFUSE_SPECULAR_PATHS + // ray._payload.hasDiffuse = false; + // #endif + + return ray; + } + + vector2_type pixOffsetParam; + vector3_type camPos; + vector4_type NDC; + matrix4x4_type invMVP; +}; + +} + +#endif diff --git a/31_HLSLPathTracer/app_resources/hlsl/render.comp.hlsl b/31_HLSLPathTracer/app_resources/hlsl/render.comp.hlsl new file mode 100644 index 000000000..46660bac3 --- /dev/null +++ b/31_HLSLPathTracer/app_resources/hlsl/render.comp.hlsl @@ -0,0 +1,263 @@ +#include "nbl/builtin/hlsl/cpp_compat.hlsl" +#include "nbl/builtin/hlsl/glsl_compat/core.hlsl" +#include "nbl/builtin/hlsl/random/pcg.hlsl" +#include "nbl/builtin/hlsl/random/xoroshiro.hlsl" +#ifdef PERSISTENT_WORKGROUPS +#include "nbl/builtin/hlsl/math/morton.hlsl" +#endif + +#include "nbl/builtin/hlsl/bxdf/reflection.hlsl" +#include "nbl/builtin/hlsl/bxdf/transmission.hlsl" + +// add these defines (one at a time) using -D argument to dxc +// #define SPHERE_LIGHT +// #define TRIANGLE_LIGHT +// #define RECTANGLE_LIGHT + +#include +#include + +#ifdef RWMC_ENABLED +#include +#include +#endif + +#ifdef RWMC_ENABLED +[[vk::push_constant]] RenderRWMCPushConstants pc; +#else +[[vk::push_constant]] RenderPushConstants pc; +#endif + +[[vk::combinedImageSampler]] [[vk::binding(0, 2)]] Texture2D envMap; // unused +[[vk::combinedImageSampler]] [[vk::binding(0, 2)]] SamplerState envSampler; + +[[vk::combinedImageSampler]] [[vk::binding(2, 2)]] Texture2D scramblebuf; +[[vk::combinedImageSampler]] [[vk::binding(2, 2)]] SamplerState scrambleSampler; + +[[vk::image_format("rgba16f")]] [[vk::binding(0)]] RWTexture2DArray outImage; +[[vk::image_format("rgba16f")]] [[vk::binding(1)]] RWTexture2DArray cascade; + +#include "example_common.hlsl" +#include "scene.hlsl" +#include "rand_gen.hlsl" +#include "ray_gen.hlsl" +#include "intersector.hlsl" +#include "material_system.hlsl" +#include "next_event_estimator.hlsl" +#include "accumulator.hlsl" +#include "pathtracer.hlsl" + +using namespace nbl; +using namespace hlsl; + +#ifdef SPHERE_LIGHT +NBL_CONSTEXPR ProceduralShapeType LIGHT_TYPE = PST_SPHERE; +#endif +#ifdef TRIANGLE_LIGHT +NBL_CONSTEXPR ProceduralShapeType LIGHT_TYPE = PST_TRIANGLE; +#endif +#ifdef RECTANGLE_LIGHT +NBL_CONSTEXPR ProceduralShapeType LIGHT_TYPE = PST_RECTANGLE; +#endif + +NBL_CONSTEXPR path_tracing::PTPolygonMethod POLYGON_METHOD = path_tracing::PPM_SOLID_ANGLE; + +int32_t2 getCoordinates() +{ + uint32_t width, height, imageArraySize; + outImage.GetDimensions(width, height, imageArraySize); + return int32_t2(glsl::gl_GlobalInvocationID().x % width, glsl::gl_GlobalInvocationID().x / width); +} + +float32_t2 getTexCoords() +{ + uint32_t width, height, imageArraySize; + outImage.GetDimensions(width, height, imageArraySize); + int32_t2 iCoords = getCoordinates(); + return float32_t2(float(iCoords.x) / width, 1.0 - float(iCoords.y) / height); +} + +using spectral_t = vector; +using ray_dir_info_t = bxdf::ray_dir_info::SBasic; +using iso_interaction = bxdf::surface_interactions::SIsotropic; +using aniso_interaction = bxdf::surface_interactions::SAnisotropic; +using sample_t = bxdf::SLightSample; +using iso_cache = bxdf::SIsotropicMicrofacetCache; +using aniso_cache = bxdf::SAnisotropicMicrofacetCache; +using quotient_pdf_t = sampling::quotient_and_pdf; + +using iso_config_t = bxdf::SConfiguration; +using iso_microfacet_config_t = bxdf::SMicrofacetConfiguration; + +using diffuse_bxdf_type = bxdf::reflection::SOrenNayar; +using conductor_bxdf_type = bxdf::reflection::SGGXIsotropic; +using dielectric_bxdf_type = bxdf::transmission::SGGXDielectricIsotropic; +using iri_conductor_bxdf_type = bxdf::reflection::SIridescent; +using iri_dielectric_bxdf_type = bxdf::transmission::SIridescent; + +using ray_type = Ray; +using light_type = Light; +using bxdfnode_type = BxDFNode; +using scene_type = Scene; +using randgen_type = RandGen::Uniform3D; +using raygen_type = RayGen::Basic; +using intersector_type = Intersector; +using material_system_type = MaterialSystem; +using nee_type = NextEventEstimator; + +#ifdef RWMC_ENABLED +using accumulator_type = rwmc::CascadeAccumulator; +#else +using accumulator_type = Accumulator::DefaultAccumulator; +#endif + +using pathtracer_type = path_tracing::Unidirectional; + +#ifdef SPHERE_LIGHT +static const Shape spheres[scene_type::SCENE_LIGHT_COUNT] = { + Shape::create(float3(-1.5, 1.5, 0.0), 0.3, bxdfnode_type::INVALID_ID, 0u) +}; +#endif + +#ifdef TRIANGLE_LIGHT +static const Shape triangles[scene_type::SCENE_LIGHT_COUNT] = { + Shape::create(float3(-1.8,0.35,0.3) * 10.0, float3(-1.2,0.35,0.0) * 10.0, float3(-1.5,0.8,-0.3) * 10.0, bxdfnode_type::INVALID_ID, 0u) +}; +#endif + +#ifdef RECTANGLE_LIGHT +static const Shape rectangles[scene_type::SCENE_LIGHT_COUNT] = { + Shape::create(float3(-3.8,0.35,1.3), normalize(float3(2,0,-1))*7.0, normalize(float3(2,-5,4))*0.1, bxdfnode_type::INVALID_ID, 0u) +}; +#endif + +static const light_type lights[scene_type::SCENE_LIGHT_COUNT] = { + light_type::create(LightEminence, +#ifdef SPHERE_LIGHT + scene_type::SCENE_SPHERE_COUNT, +#else + 0u, +#endif + IM_PROCEDURAL, LIGHT_TYPE) +}; + +static const bxdfnode_type bxdfs[scene_type::SCENE_BXDF_COUNT] = { + bxdfnode_type::create(MaterialType::DIFFUSE, false, float2(0,0), spectral_t(0.8,0.8,0.8)), + bxdfnode_type::create(MaterialType::DIFFUSE, false, float2(0,0), spectral_t(0.8,0.4,0.4)), + bxdfnode_type::create(MaterialType::DIFFUSE, false, float2(0,0), spectral_t(0.4,0.8,0.4)), + bxdfnode_type::create(MaterialType::CONDUCTOR, false, float2(0,0), spectral_t(1.02,1.02,1.3), spectral_t(1.0,1.0,2.0)), + bxdfnode_type::create(MaterialType::CONDUCTOR, false, float2(0,0), spectral_t(1.02,1.3,1.02), spectral_t(1.0,2.0,1.0)), + bxdfnode_type::create(MaterialType::CONDUCTOR, false, float2(0.15,0.15), spectral_t(1.02,1.3,1.02), spectral_t(1.0,2.0,1.0)), + bxdfnode_type::create(MaterialType::DIELECTRIC, false, float2(0.0625,0.0625), spectral_t(1,1,1), spectral_t(1.4,1.45,1.5)), + bxdfnode_type::create(MaterialType::IRIDESCENT_CONDUCTOR, false, 0.0, 505.0, spectral_t(1.39,1.39,1.39), spectral_t(1.2,1.2,1.2), spectral_t(0.5,0.5,0.5)), + bxdfnode_type::create(MaterialType::IRIDESCENT_DIELECTRIC, false, 0.0, 400.0, spectral_t(1.7,1.7,1.7), spectral_t(1.0,1.0,1.0), spectral_t(0,0,0)) +}; + +RenderPushConstants retireveRenderPushConstants() +{ +#ifdef RWMC_ENABLED + return pc.renderPushConstants; +#else + return pc; +#endif +} + +[numthreads(RenderWorkgroupSize, 1, 1)] +void main(uint32_t3 threadID : SV_DispatchThreadID) +{ + const RenderPushConstants renderPushConstants = retireveRenderPushConstants(); + + uint32_t width, height, imageArraySize; + outImage.GetDimensions(width, height, imageArraySize); +#ifdef PERSISTENT_WORKGROUPS + uint32_t virtualThreadIndex; + [loop] + for (uint32_t virtualThreadBase = glsl::gl_WorkGroupID().x * RenderWorkgroupSize; virtualThreadBase < 1920*1080; virtualThreadBase += glsl::gl_NumWorkGroups().x * RenderWorkgroupSize) // not sure why 1280*720 doesn't cover draw surface + { + virtualThreadIndex = virtualThreadBase + glsl::gl_LocalInvocationIndex().x; + const int32_t2 coords = (int32_t2)math::Morton::decode2d(virtualThreadIndex); +#else + const int32_t2 coords = getCoordinates(); +#endif + float32_t2 texCoord = float32_t2(coords) / float32_t2(width, height); + texCoord.y = 1.0 - texCoord.y; + + if (false == (all((int32_t2)0 < coords)) && all(int32_t2(width, height) < coords)) { +#ifdef PERSISTENT_WORKGROUPS + continue; +#else + return; +#endif + } + + if (((renderPushConstants.depth - 1) >> MAX_DEPTH_LOG2) > 0 || ((renderPushConstants.sampleCount - 1) >> MAX_SAMPLES_LOG2) > 0) + { + float32_t4 pixelCol = float32_t4(1.0,0.0,0.0,1.0); + outImage[uint3(coords.x, coords.y, 0)] = pixelCol; +#ifdef PERSISTENT_WORKGROUPS + continue; +#else + return; +#endif + } + + int flatIdx = glsl::gl_GlobalInvocationID().y * glsl::gl_NumWorkGroups().x * RenderWorkgroupSize + glsl::gl_GlobalInvocationID().x; + + // set up scene + scene_type scene; +#ifdef SPHERE_LIGHT + scene.light_spheres[0] = spheres[0]; +#endif +#ifdef TRIANGLE_LIGHT + scene.light_triangles[0] = triangles[0]; +#endif +#ifdef RECTANGLE_LIGHT + scene.light_rectangles[0] = rectangles[0]; +#endif + + // set up path tracer + pathtracer_type pathtracer; + pathtracer.randGen = randgen_type::construct(scramblebuf[coords].rg); // TODO concept this create + + uint2 scrambleDim; + scramblebuf.GetDimensions(scrambleDim.x, scrambleDim.y); + float32_t2 pixOffsetParam = (float2)1.0 / float2(scrambleDim); + + float32_t4 NDC = float4(texCoord * float2(2.0, -2.0) + float2(-1.0, 1.0), 0.0, 1.0); + float32_t3 camPos; + { + float4 tmp = mul(renderPushConstants.invMVP, NDC); + camPos = tmp.xyz / tmp.w; + NDC.z = 1.0; + } + + scene.updateLight(renderPushConstants.generalPurposeLightMatrix); + pathtracer.rayGen = raygen_type::create(pixOffsetParam, camPos, NDC, renderPushConstants.invMVP); + pathtracer.nee.lights = lights; + pathtracer.nee.lightCount = scene_type::SCENE_LIGHT_COUNT; + pathtracer.materialSystem.bxdfs = bxdfs; + pathtracer.materialSystem.bxdfCount = scene_type::SCENE_BXDF_COUNT; + pathtracer.pSampleBuffer = renderPushConstants.pSampleSequence; + +#ifdef RWMC_ENABLED + const float32_t2 unpacked = hlsl::unpackHalf2x16(pc.packedSplattingParams); + rwmc::SplattingParameters splattingParameters = rwmc::SplattingParameters::create(unpacked[0], unpacked[1], CascadeCount); + accumulator_type accumulator = accumulator_type::create(splattingParameters); +#else + accumulator_type accumulator = accumulator_type::create(); +#endif + // path tracing loop + for(int i = 0; i < renderPushConstants.sampleCount; ++i) + pathtracer.sampleMeasure(i, renderPushConstants.depth, scene, accumulator); + +#ifdef RWMC_ENABLED + for (uint32_t i = 0; i < CascadeCount; ++i) + cascade[uint3(coords.x, coords.y, i)] = float32_t4(accumulator.accumulation.data[i], 1.0f); +#else + outImage[uint3(coords.x, coords.y, 0)] = float32_t4(accumulator.accumulation, 1.0); +#endif + +#ifdef PERSISTENT_WORKGROUPS + } +#endif +} \ No newline at end of file diff --git a/31_HLSLPathTracer/app_resources/hlsl/render_common.hlsl b/31_HLSLPathTracer/app_resources/hlsl/render_common.hlsl new file mode 100644 index 000000000..76e8abfe3 --- /dev/null +++ b/31_HLSLPathTracer/app_resources/hlsl/render_common.hlsl @@ -0,0 +1,30 @@ +#ifndef _NBL_HLSL_PATHTRACER_RENDER_COMMON_INCLUDED_ +#define _NBL_HLSL_PATHTRACER_RENDER_COMMON_INCLUDED_ +#include "nbl/builtin/hlsl/cpp_compat.hlsl" + +#ifndef __HLSL_VERSION +#include "matrix4SIMD.h" +#endif + +struct RenderPushConstants +{ +#ifdef __HLSL_VERSION + float32_t4x4 invMVP; + float32_t3x4 generalPurposeLightMatrix; +#else + nbl::hlsl::float32_t4x4 invMVP; + nbl::hlsl::float32_t3x4 generalPurposeLightMatrix; +#endif + int sampleCount; + int depth; + uint64_t pSampleSequence; +}; + +NBL_CONSTEXPR nbl::hlsl::float32_t3 LightEminence = nbl::hlsl::float32_t3(30.0f, 25.0f, 15.0f); +NBL_CONSTEXPR uint32_t RenderWorkgroupSize = 64u; +NBL_CONSTEXPR uint32_t MAX_DEPTH_LOG2 = 4u; +NBL_CONSTEXPR uint32_t MAX_SAMPLES_LOG2 = 10u; +NBL_CONSTEXPR uint32_t MaxBufferDimensions = 3u << MAX_DEPTH_LOG2; +NBL_CONSTEXPR uint32_t MaxBufferSamples = 1u << MAX_SAMPLES_LOG2; + +#endif diff --git a/31_HLSLPathTracer/app_resources/hlsl/render_rwmc_common.hlsl b/31_HLSLPathTracer/app_resources/hlsl/render_rwmc_common.hlsl new file mode 100644 index 000000000..850aa463d --- /dev/null +++ b/31_HLSLPathTracer/app_resources/hlsl/render_rwmc_common.hlsl @@ -0,0 +1,17 @@ +#ifndef _NBL_HLSL_PATHTRACER_RENDER_RWMC_COMMON_INCLUDED_ +#define _NBL_HLSL_PATHTRACER_RENDER_RWMC_COMMON_INCLUDED_ +#include "nbl/builtin/hlsl/cpp_compat.hlsl" +#include "nbl/builtin/hlsl/rwmc/SplattingParameters.hlsl" +#include "render_common.hlsl" + +#ifndef __HLSL_VERSION +#include "matrix4SIMD.h" +#endif + +struct RenderRWMCPushConstants +{ + RenderPushConstants renderPushConstants; + int32_t packedSplattingParams; +}; + +#endif diff --git a/31_HLSLPathTracer/app_resources/hlsl/resolve.comp.hlsl b/31_HLSLPathTracer/app_resources/hlsl/resolve.comp.hlsl new file mode 100644 index 000000000..6049c67f3 --- /dev/null +++ b/31_HLSLPathTracer/app_resources/hlsl/resolve.comp.hlsl @@ -0,0 +1,66 @@ +#include +#include "resolve_common.hlsl" +#include "rwmc_global_settings_common.hlsl" +#ifdef PERSISTENT_WORKGROUPS +#include "nbl/builtin/hlsl/math/morton.hlsl" +#endif + +[[vk::image_format("rgba16f")]] [[vk::binding(0)]] RWTexture2DArray outImage; +[[vk::image_format("rgba16f")]] [[vk::binding(1)]] RWTexture2DArray cascade; +[[vk::push_constant]] ResolvePushConstants pc; + +using namespace nbl; +using namespace hlsl; + +template +struct ResolveAccessorAdaptor +{ + using output_scalar_type = OutputScalar; + using output_type = vector; + NBL_CONSTEXPR int32_t image_dimension = 2; + + float32_t calcLuma(NBL_REF_ARG(float32_t3) col) + { + return hlsl::dot(colorspace::scRGB::ToXYZ()[1], col); + } + + template + output_type get(vector uv, uint16_t layer) + { + uint32_t imgWidth, imgHeight, layers; + cascade.GetDimensions(imgWidth, imgHeight, layers); + int16_t2 cascadeImageDimension = int16_t2(imgWidth, imgHeight); + + if (any(uv < int16_t2(0, 0)) || any(uv > cascadeImageDimension)) + return vector(0, 0, 0, 0); + + return cascade.Load(int32_t3(uv, int32_t(layer))); + } +}; + +int32_t2 getImageExtents() +{ + uint32_t width, height, imageArraySize; + outImage.GetDimensions(width, height, imageArraySize); + return int32_t2(width, height); +} + +[numthreads(ResolveWorkgroupSizeX, ResolveWorkgroupSizeY, 1)] +void main(uint32_t3 threadID : SV_DispatchThreadID) +{ + const int32_t2 coords = int32_t2(threadID.x, threadID.y); + const int32_t2 imageExtents = getImageExtents(); + if (coords.x >= imageExtents.x || coords.y >= imageExtents.y) + return; + + using ResolveAccessorAdaptorType = ResolveAccessorAdaptor; + using ResolverType = rwmc::Resolver; + ResolveAccessorAdaptorType accessor; + ResolverType resolve = ResolverType::create(pc.resolveParameters); + + float32_t3 color = resolve(accessor, int16_t2(coords.x, coords.y)); + + //float32_t3 color = rwmc::reweight >(pc.resolveParameters, cascade, coords); + + outImage[uint3(coords.x, coords.y, 0)] = float32_t4(color, 1.0f); +} diff --git a/31_HLSLPathTracer/app_resources/hlsl/resolve_common.hlsl b/31_HLSLPathTracer/app_resources/hlsl/resolve_common.hlsl new file mode 100644 index 000000000..a3ad72364 --- /dev/null +++ b/31_HLSLPathTracer/app_resources/hlsl/resolve_common.hlsl @@ -0,0 +1,15 @@ +#ifndef _NBL_HLSL_PATHTRACER_RESOLVE_COMMON_INCLUDED_ +#define _NBL_HLSL_PATHTRACER_RESOLVE_COMMON_INCLUDED_ +#include "nbl/builtin/hlsl/cpp_compat.hlsl" +#include "nbl/builtin/hlsl/rwmc/ResolveParameters.hlsl" + +struct ResolvePushConstants +{ + uint32_t sampleCount; + nbl::hlsl::rwmc::ResolveParameters resolveParameters; +}; + +NBL_CONSTEXPR uint32_t ResolveWorkgroupSizeX = 32u; +NBL_CONSTEXPR uint32_t ResolveWorkgroupSizeY = 16u; + +#endif diff --git a/31_HLSLPathTracer/app_resources/hlsl/rwmc_global_settings_common.hlsl b/31_HLSLPathTracer/app_resources/hlsl/rwmc_global_settings_common.hlsl new file mode 100644 index 000000000..8adf0a5e1 --- /dev/null +++ b/31_HLSLPathTracer/app_resources/hlsl/rwmc_global_settings_common.hlsl @@ -0,0 +1,7 @@ +#ifndef _NBL_HLSL_PATHTRACER_RWMC_GLOBAL_SETTINGS_COMMON_INCLUDED_ +#define _NBL_HLSL_PATHTRACER_RWMC_GLOBAL_SETTINGS_COMMON_INCLUDED_ +#include "nbl/builtin/hlsl/cpp_compat.hlsl" + +NBL_CONSTEXPR uint32_t CascadeCount = 6u; + +#endif diff --git a/31_HLSLPathTracer/app_resources/hlsl/scene.hlsl b/31_HLSLPathTracer/app_resources/hlsl/scene.hlsl new file mode 100644 index 000000000..3d004664e --- /dev/null +++ b/31_HLSLPathTracer/app_resources/hlsl/scene.hlsl @@ -0,0 +1,197 @@ +#ifndef _NBL_HLSL_EXT_PATHTRACING_SCENE_INCLUDED_ +#define _NBL_HLSL_EXT_PATHTRACING_SCENE_INCLUDED_ + +#include "common.hlsl" +#include "example_common.hlsl" + +using namespace nbl; +using namespace hlsl; + +struct SceneBase +{ + using scalar_type = float; + using vector3_type = vector; + using light_type = Light; + + NBL_CONSTEXPR_STATIC_INLINE uint32_t SCENE_SPHERE_COUNT = 10u; + NBL_CONSTEXPR_STATIC_INLINE uint32_t SCENE_LIGHT_COUNT = 1u; + NBL_CONSTEXPR_STATIC_INLINE uint32_t SCENE_BXDF_COUNT = 9u; + + static const Shape scene_spheres[SCENE_SPHERE_COUNT]; +}; + +const Shape SceneBase::scene_spheres[SCENE_SPHERE_COUNT] = { + Shape::create(float3(0.0, -100.5, -1.0), 100.0, 0u, SceneBase::light_type::INVALID_ID), + Shape::create(float3(2.0, 0.0, -1.0), 0.5, 1u, SceneBase::light_type::INVALID_ID), + Shape::create(float3(0.0, 0.0, -1.0), 0.5, 2u, SceneBase::light_type::INVALID_ID), + Shape::create(float3(-2.0, 0.0, -1.0), 0.5, 3u, SceneBase::light_type::INVALID_ID), + Shape::create(float3(2.0, 0.0, 1.0), 0.5, 4u, SceneBase::light_type::INVALID_ID), + Shape::create(float3(0.0, 0.0, 1.0), 0.5, 4u, SceneBase::light_type::INVALID_ID), + Shape::create(float3(-2.0, 0.0, 1.0), 0.5, 5u, SceneBase::light_type::INVALID_ID), + Shape::create(float3(0.5, 1.0, 0.5), 0.5, 6u, SceneBase::light_type::INVALID_ID), + Shape::create(float3(-4.0, 0.0, 1.0), 0.5, 7u, SceneBase::light_type::INVALID_ID), + Shape::create(float3(-4.0, 0.0, -1.0), 0.5, 8u, SceneBase::light_type::INVALID_ID) +}; + +template +struct Scene; + +template<> +struct Scene : SceneBase +{ + using scalar_type = float; + using vector3_type = vector; + using this_t = Scene; + using base_t = SceneBase; + using id_type = ObjectID; + + NBL_CONSTEXPR_STATIC_INLINE uint32_t SphereCount = base_t::SCENE_SPHERE_COUNT + base_t::SCENE_LIGHT_COUNT; + NBL_CONSTEXPR_STATIC_INLINE uint32_t TriangleCount = 0u; + NBL_CONSTEXPR_STATIC_INLINE uint32_t RectangleCount = 0u; + + Shape light_spheres[1]; + Shape light_triangles[1]; + Shape light_rectangles[1]; + + Shape getSphere(uint32_t idx) + { + assert(idx < SphereCount); + if (idx < base_t::SCENE_SPHERE_COUNT) + return base_t::scene_spheres[idx]; + else + return light_spheres[idx-base_t::SCENE_SPHERE_COUNT]; + } + + Shape getTriangle(uint32_t idx) + { + assert(false); + return light_triangles[0]; + } + + Shape getRectangle(uint32_t idx) + { + assert(false); + return light_rectangles[0]; + } + + void updateLight(NBL_CONST_REF_ARG(float32_t3x4) generalPurposeLightMatrix) + { + light_spheres[0].updateTransform(generalPurposeLightMatrix); + } + + uint32_t getBsdfLightIDs(NBL_CONST_REF_ARG(id_type) objectID) + { + assert(objectID.shapeType == PST_SPHERE); + return getSphere(objectID.id).bsdfLightIDs; + } + + vector3_type getNormal(NBL_CONST_REF_ARG(id_type) objectID, NBL_CONST_REF_ARG(vector3_type) intersection) + { + assert(objectID.shapeType == PST_SPHERE); + return getSphere(objectID.id).getNormal(intersection); + } +}; + +template<> +struct Scene : SceneBase +{ + using scalar_type = float; + using vector3_type = vector; + using this_t = Scene; + using base_t = SceneBase; + using id_type = ObjectID; + + NBL_CONSTEXPR_STATIC_INLINE uint32_t SphereCount = base_t::SCENE_SPHERE_COUNT; + NBL_CONSTEXPR_STATIC_INLINE uint32_t TriangleCount = base_t::SCENE_LIGHT_COUNT; + NBL_CONSTEXPR_STATIC_INLINE uint32_t RectangleCount = 0u; + + Shape light_spheres[1]; + Shape light_triangles[1]; + Shape light_rectangles[1]; + + Shape getSphere(uint32_t idx) + { + assert(idx < SphereCount); + return base_t::scene_spheres[idx]; + } + Shape getTriangle(uint32_t idx) + { + assert(idx < TriangleCount); + return light_triangles[idx]; + } + Shape getRectangle(uint32_t idx) + { + assert(false); + return light_rectangles[0]; + } + + void updateLight(NBL_CONST_REF_ARG(float32_t3x4) generalPurposeLightMatrix) + { + light_triangles[0].updateTransform(generalPurposeLightMatrix); + } + + uint32_t getBsdfLightIDs(NBL_CONST_REF_ARG(id_type) objectID) + { + assert(objectID.shapeType == PST_SPHERE || objectID.shapeType == PST_TRIANGLE); + return objectID.shapeType == PST_SPHERE ? getSphere(objectID.id).bsdfLightIDs : getTriangle(objectID.id).bsdfLightIDs; + } + + vector3_type getNormal(NBL_CONST_REF_ARG(id_type) objectID, NBL_CONST_REF_ARG(vector3_type) intersection) + { + assert(objectID.shapeType == PST_SPHERE || objectID.shapeType == PST_TRIANGLE); + return objectID.shapeType == PST_SPHERE ? getSphere(objectID.id).getNormal(intersection) : getTriangle(objectID.id).getNormalTimesArea(); + } +}; + +template<> +struct Scene : SceneBase +{ + using scalar_type = float; + using vector3_type = vector; + using this_t = Scene; + using base_t = SceneBase; + using id_type = ObjectID; + + NBL_CONSTEXPR_STATIC_INLINE uint32_t SphereCount = base_t::SCENE_SPHERE_COUNT; + NBL_CONSTEXPR_STATIC_INLINE uint32_t TriangleCount = 0u; + NBL_CONSTEXPR_STATIC_INLINE uint32_t RectangleCount = base_t::SCENE_LIGHT_COUNT; + + Shape light_spheres[1]; + Shape light_triangles[1]; + Shape light_rectangles[1]; + + Shape getSphere(uint32_t idx) + { + assert(idx < SphereCount); + return base_t::scene_spheres[idx]; + } + Shape getTriangle(uint32_t idx) + { + assert(false); + return light_triangles[0]; + } + Shape getRectangle(uint32_t idx) + { + assert(idx < RectangleCount); + return light_rectangles[idx]; + } + + void updateLight(NBL_CONST_REF_ARG(float32_t3x4) generalPurposeLightMatrix) + { + light_rectangles[0].updateTransform(generalPurposeLightMatrix); + } + + uint32_t getBsdfLightIDs(NBL_CONST_REF_ARG(id_type) objectID) + { + assert(objectID.shapeType == PST_SPHERE || objectID.shapeType == PST_RECTANGLE); + return objectID.shapeType == PST_SPHERE ? getSphere(objectID.id).bsdfLightIDs : getRectangle(objectID.id).bsdfLightIDs; + } + + vector3_type getNormal(NBL_CONST_REF_ARG(id_type) objectID, NBL_CONST_REF_ARG(vector3_type) intersection) + { + assert(objectID.shapeType == PST_SPHERE || objectID.shapeType == PST_RECTANGLE); + return objectID.shapeType == PST_SPHERE ? getSphere(objectID.id).getNormal(intersection) : getRectangle(objectID.id).getNormalTimesArea(); + } +}; + +#endif diff --git a/31_HLSLPathTracer/config.json.template b/31_HLSLPathTracer/config.json.template new file mode 100644 index 000000000..24adf54fb --- /dev/null +++ b/31_HLSLPathTracer/config.json.template @@ -0,0 +1,28 @@ +{ + "enableParallelBuild": true, + "threadsPerBuildProcess" : 2, + "isExecuted": false, + "scriptPath": "", + "cmake": { + "configurations": [ "Release", "Debug", "RelWithDebInfo" ], + "buildModes": [], + "requiredOptions": [] + }, + "profiles": [ + { + "backend": "vulkan", + "platform": "windows", + "buildModes": [], + "runConfiguration": "Release", + "gpuArchitectures": [] + } + ], + "dependencies": [], + "data": [ + { + "dependencies": [], + "command": [""], + "outputs": [] + } + ] +} diff --git a/31_HLSLPathTracer/include/nbl/this_example/common.hpp b/31_HLSLPathTracer/include/nbl/this_example/common.hpp new file mode 100644 index 000000000..db051bb3e --- /dev/null +++ b/31_HLSLPathTracer/include/nbl/this_example/common.hpp @@ -0,0 +1,17 @@ +#ifndef __NBL_THIS_EXAMPLE_COMMON_H_INCLUDED__ +#define __NBL_THIS_EXAMPLE_COMMON_H_INCLUDED__ + +#include + +// common api +#include "nbl/examples/common/SimpleWindowedApplication.hpp" +#include "nbl/examples/examples.hpp" +#include "nbl/examples/cameras/CCamera.hpp" +#include "nbl/examples/common/CEventCallback.hpp" + +// example's own headers +#include "nbl/ui/ICursorControl.h" +#include "nbl/ext/ImGui/ImGui.h" +#include "imgui/imgui_internal.h" + +#endif // __NBL_THIS_EXAMPLE_COMMON_H_INCLUDED__ \ No newline at end of file diff --git a/31_HLSLPathTracer/include/nbl/this_example/transform.hpp b/31_HLSLPathTracer/include/nbl/this_example/transform.hpp new file mode 100644 index 000000000..dd6368ca1 --- /dev/null +++ b/31_HLSLPathTracer/include/nbl/this_example/transform.hpp @@ -0,0 +1,167 @@ +#ifndef _NBL_THIS_EXAMPLE_TRANSFORM_H_INCLUDED_ +#define _NBL_THIS_EXAMPLE_TRANSFORM_H_INCLUDED_ + +#include "nbl/ui/ICursorControl.h" + +#include "nbl/ext/ImGui/ImGui.h" + +#include "imgui/imgui_internal.h" +#include "imguizmo/ImGuizmo.h" + +struct TransformRequestParams +{ + float camDistance = 8.f; + bool isSphere = false; + ImGuizmo::OPERATION allowedOp; + uint8_t sceneTexDescIx = ~0; + bool useWindow = false, editTransformDecomposition = false, enableViewManipulate = false; +}; + +nbl::hlsl::uint16_t2 EditTransform(float* cameraView, const float* cameraProjection, float* matrix, const TransformRequestParams& params) +{ + static ImGuizmo::OPERATION mCurrentGizmoOperation(ImGuizmo::TRANSLATE); + static ImGuizmo::MODE mCurrentGizmoMode(ImGuizmo::LOCAL); + static bool useSnap = false; + static float snap[3] = { 1.f, 1.f, 1.f }; + static float bounds[] = { -0.5f, -0.5f, -0.5f, 0.5f, 0.5f, 0.5f }; + static float boundsSnap[] = { 0.1f, 0.1f, 0.1f }; + static bool boundSizing = false; + static bool boundSizingSnap = false; + + if (params.editTransformDecomposition) + { + if (ImGui::IsKeyPressed(ImGuiKey_T)) // Always translate + mCurrentGizmoOperation = ImGuizmo::TRANSLATE; + if (ImGui::IsKeyPressed(ImGuiKey_R) && params.allowedOp & ImGuizmo::OPERATION::ROTATE) + mCurrentGizmoOperation = ImGuizmo::ROTATE; + if (ImGui::IsKeyPressed(ImGuiKey_S) && params.allowedOp & ImGuizmo::OPERATION::SCALEU) // for sphere + mCurrentGizmoOperation = ImGuizmo::SCALEU; + if (ImGui::IsKeyPressed(ImGuiKey_S) && params.allowedOp & ImGuizmo::OPERATION::SCALE) // for triangle/rectangle + mCurrentGizmoOperation = ImGuizmo::SCALE_X | ImGuizmo::SCALE_Y; + +#if 0 + if (ImGui::RadioButton("Translate", mCurrentGizmoOperation == ImGuizmo::TRANSLATE)) + mCurrentGizmoOperation = ImGuizmo::TRANSLATE; + ImGui::SameLine(); + if (ImGui::RadioButton("Rotate", mCurrentGizmoOperation == ImGuizmo::ROTATE)) + mCurrentGizmoOperation = ImGuizmo::ROTATE; + ImGui::SameLine(); + if (ImGui::RadioButton("Scale", mCurrentGizmoOperation == ImGuizmo::SCALE)) + mCurrentGizmoOperation = ImGuizmo::SCALE; + if (ImGui::RadioButton("Universal", mCurrentGizmoOperation == ImGuizmo::UNIVERSAL)) + mCurrentGizmoOperation = ImGuizmo::UNIVERSAL; + float matrixTranslation[3], matrixRotation[3], matrixScale[3]; + ImGuizmo::DecomposeMatrixToComponents(matrix, matrixTranslation, matrixRotation, matrixScale); + ImGui::InputFloat3("Tr", matrixTranslation); + ImGui::InputFloat3("Rt", matrixRotation); + ImGui::InputFloat3("Sc", matrixScale); + ImGuizmo::RecomposeMatrixFromComponents(matrixTranslation, matrixRotation, matrixScale, matrix); + + if (mCurrentGizmoOperation != ImGuizmo::SCALE) + { + if (ImGui::RadioButton("Local", mCurrentGizmoMode == ImGuizmo::LOCAL)) + mCurrentGizmoMode = ImGuizmo::LOCAL; + ImGui::SameLine(); + if (ImGui::RadioButton("World", mCurrentGizmoMode == ImGuizmo::WORLD)) + mCurrentGizmoMode = ImGuizmo::WORLD; + } + if (ImGui::IsKeyPressed(ImGuiKey_S) && ImGui::IsKeyPressed(ImGuiKey_LeftShift)) + useSnap = !useSnap; + ImGui::Checkbox("##UseSnap", &useSnap); + ImGui::SameLine(); + + switch (mCurrentGizmoOperation) + { + case ImGuizmo::TRANSLATE: + ImGui::InputFloat3("Snap", &snap[0]); + break; + case ImGuizmo::ROTATE: + ImGui::InputFloat("Angle Snap", &snap[0]); + break; + case ImGuizmo::SCALE: + ImGui::InputFloat("Scale Snap", &snap[0]); + break; + } + ImGui::Checkbox("Bound Sizing", &boundSizing); + if (boundSizing) + { + ImGui::PushID(3); + ImGui::Checkbox("##BoundSizing", &boundSizingSnap); + ImGui::SameLine(); + ImGui::InputFloat3("Snap", boundsSnap); + ImGui::PopID(); + } +#endif + } + + ImGuiIO& io = ImGui::GetIO(); + float viewManipulateRight = io.DisplaySize.x; + float viewManipulateTop = 0; + static ImGuiWindowFlags gizmoWindowFlags = 0; + + /* + for the "useWindow" case we just render to a gui area, + otherwise to fake full screen transparent window + + note that for both cases we make sure gizmo being + rendered is aligned to our texture scene using + imgui "cursor" screen positions + */ + // TODO: this shouldn't be handled here I think + SImResourceInfo info; + info.textureID = params.sceneTexDescIx; + info.samplerIx = (uint16_t)nbl::ext::imgui::UI::DefaultSamplerIx::USER; + + nbl::hlsl::uint16_t2 retval; + if (params.useWindow) + { + ImGui::SetNextWindowSize(ImVec2(800, 400), ImGuiCond_Appearing); + ImGui::SetNextWindowPos(ImVec2(400, 20), ImGuiCond_Appearing); + ImGui::PushStyleColor(ImGuiCol_WindowBg, (ImVec4)ImColor(0.35f, 0.3f, 0.3f)); + ImGui::Begin("Gizmo", 0, gizmoWindowFlags); + ImGuizmo::SetDrawlist(); + + ImVec2 contentRegionSize = ImGui::GetContentRegionAvail(); + ImVec2 windowPos = ImGui::GetWindowPos(); + ImVec2 cursorPos = ImGui::GetCursorScreenPos(); + + ImGui::Image(info, contentRegionSize); + ImGuizmo::SetRect(cursorPos.x, cursorPos.y, contentRegionSize.x, contentRegionSize.y); + retval = { contentRegionSize.x, contentRegionSize.y }; + + viewManipulateRight = cursorPos.x + contentRegionSize.x; + viewManipulateTop = cursorPos.y; + + ImGuiWindow* window = ImGui::GetCurrentWindow(); + gizmoWindowFlags = (ImGui::IsWindowHovered() && ImGui::IsMouseHoveringRect(window->InnerRect.Min, window->InnerRect.Max) ? ImGuiWindowFlags_NoMove : 0); + } + else + { + ImGui::SetNextWindowPos(ImVec2(0, 0)); + ImGui::SetNextWindowSize(io.DisplaySize); + ImGui::PushStyleColor(ImGuiCol_WindowBg, ImVec4(0, 0, 0, 0)); // fully transparent fake window + ImGui::Begin("FullScreenWindow", nullptr, ImGuiWindowFlags_NoTitleBar | ImGuiWindowFlags_NoResize | ImGuiWindowFlags_NoMove | ImGuiWindowFlags_NoScrollbar | ImGuiWindowFlags_NoScrollWithMouse | ImGuiWindowFlags_NoCollapse | ImGuiWindowFlags_NoBringToFrontOnFocus | ImGuiWindowFlags_NoBackground | ImGuiWindowFlags_NoInputs); + + ImVec2 contentRegionSize = ImGui::GetContentRegionAvail(); + ImVec2 cursorPos = ImGui::GetCursorScreenPos(); + + ImGui::Image(info, contentRegionSize); + ImGuizmo::SetRect(cursorPos.x, cursorPos.y, contentRegionSize.x, contentRegionSize.y); + retval = { contentRegionSize.x, contentRegionSize.y }; + + viewManipulateRight = cursorPos.x + contentRegionSize.x; + viewManipulateTop = cursorPos.y; + } + + ImGuizmo::Manipulate(cameraView, cameraProjection, mCurrentGizmoOperation, mCurrentGizmoMode, matrix, NULL, useSnap ? &snap[0] : NULL, boundSizing ? bounds : NULL, boundSizingSnap ? boundsSnap : NULL); + + //if (params.enableViewManipulate) + //ImGuizmo::ViewManipulate(cameraView, params.camDistance, ImVec2(viewManipulateRight - 128, viewManipulateTop), ImVec2(128, 128), 0x10101010); + + ImGui::End(); + ImGui::PopStyleColor(); + + return retval; +} + +#endif // __NBL_THIS_EXAMPLE_TRANSFORM_H_INCLUDED__ diff --git a/31_HLSLPathTracer/main.cpp b/31_HLSLPathTracer/main.cpp new file mode 100644 index 000000000..d4fcdc427 --- /dev/null +++ b/31_HLSLPathTracer/main.cpp @@ -0,0 +1,1698 @@ +// Copyright (C) 2018-2020 - DevSH Graphics Programming Sp. z O.O. +// This file is part of the "Nabla Engine". +// For conditions of distribution and use, see copyright notice in nabla.h + +#include "nbl/examples/examples.hpp" +#include "nbl/this_example/transform.hpp" +#include "nbl/ext/FullScreenTriangle/FullScreenTriangle.h" +#include "nbl/builtin/hlsl/surface_transform.h" +#include "nbl/this_example/common.hpp" +#include "nbl/builtin/hlsl/colorspace/encodeCIEXYZ.hlsl" +#include "nbl/builtin/hlsl/matrix_utils/transformation_matrix_utils.hlsl" +#include "nbl/builtin/hlsl/sampling/quantized_sequence.hlsl" +#include "app_resources/hlsl/render_common.hlsl" +#include "app_resources/hlsl/render_rwmc_common.hlsl" +#include "app_resources/hlsl/resolve_common.hlsl" +#include "app_resources/hlsl/rwmc_global_settings_common.hlsl" + +using namespace nbl; +using namespace core; +using namespace hlsl; +using namespace system; +using namespace asset; +using namespace ui; +using namespace video; +using namespace nbl::examples; + +// TODO: Add a QueryPool for timestamping once its ready +// TODO: Do buffer creation using assConv +class HLSLComputePathtracer final : public SimpleWindowedApplication, public BuiltinResourcesApplication +{ + using device_base_t = SimpleWindowedApplication; + using asset_base_t = BuiltinResourcesApplication; + using clock_t = std::chrono::steady_clock; + + enum E_LIGHT_GEOMETRY : uint8_t + { + ELG_SPHERE, + ELG_TRIANGLE, + ELG_RECTANGLE, + ELG_COUNT + }; + + enum E_RENDER_MODE : uint8_t + { + ERM_GLSL, + ERM_HLSL, + // ERM_CHECKERED, + ERM_COUNT + }; + + constexpr static inline uint32_t2 WindowDimensions = { 1280, 720 }; + constexpr static inline uint32_t MaxFramesInFlight = 5; + constexpr static inline uint32_t MaxDescriptorCount = 256u; + constexpr static inline uint8_t MaxUITextureCount = 1u; + static inline std::string DefaultImagePathsFile = "envmap/envmap_0.exr"; + static inline std::string OwenSamplerFilePath = "owen_sampler_buffer.bin"; + static inline std::array PTGLSLShaderPaths = { + "app_resources/glsl/litBySphere.comp", + "app_resources/glsl/litByTriangle.comp", + "app_resources/glsl/litByRectangle.comp" + }; + static inline std::string PTHLSLShaderPath = "app_resources/hlsl/render.comp.hlsl"; + static inline std::array PTHLSLShaderVariants = { + "SPHERE_LIGHT", + "TRIANGLE_LIGHT", + "RECTANGLE_LIGHT" + }; + static inline std::string ResolveShaderPath = "app_resources/hlsl/resolve.comp.hlsl"; + static inline std::string PresentShaderPath = "app_resources/hlsl/present.frag.hlsl"; + + const char* shaderNames[E_LIGHT_GEOMETRY::ELG_COUNT] = { + "ELG_SPHERE", + "ELG_TRIANGLE", + "ELG_RECTANGLE" + }; + + const char* shaderTypes[E_RENDER_MODE::ERM_COUNT] = { + "ERM_GLSL", + "ERM_HLSL" + }; + + public: + inline HLSLComputePathtracer(const path& _localInputCWD, const path& _localOutputCWD, const path& _sharedInputCWD, const path& _sharedOutputCWD) + : IApplicationFramework(_localInputCWD, _localOutputCWD, _sharedInputCWD, _sharedOutputCWD) {} + + inline bool isComputeOnly() const override { return false; } + + inline video::SPhysicalDeviceLimits getRequiredDeviceLimits() const override + { + video::SPhysicalDeviceLimits retval = device_base_t::getRequiredDeviceLimits(); + retval.storagePushConstant16 = true; + return retval; + } + + inline core::vector getSurfaces() const override + { + if (!m_surface) + { + { + auto windowCallback = core::make_smart_refctd_ptr(smart_refctd_ptr(m_inputSystem), smart_refctd_ptr(m_logger)); + IWindow::SCreationParams params = {}; + params.callback = core::make_smart_refctd_ptr(); + params.width = WindowDimensions.x; + params.height = WindowDimensions.y; + params.x = 32; + params.y = 32; + params.flags = ui::IWindow::ECF_HIDDEN | IWindow::ECF_BORDERLESS | IWindow::ECF_RESIZABLE; + params.windowCaption = "ComputeShaderPathtracer"; + params.callback = windowCallback; + const_cast&>(m_window) = m_winMgr->createWindow(std::move(params)); + } + + auto surface = CSurfaceVulkanWin32::create(smart_refctd_ptr(m_api), smart_refctd_ptr_static_cast(m_window)); + const_cast&>(m_surface) = nbl::video::CSimpleResizeSurface::create(std::move(surface)); + } + + if (m_surface) + return { {m_surface->getSurface()/*,EQF_NONE*/} }; + + return {}; + } + + inline bool onAppInitialized(smart_refctd_ptr&& system) override + { + // Init systems + { + m_inputSystem = make_smart_refctd_ptr(logger_opt_smart_ptr(smart_refctd_ptr(m_logger))); + + // Remember to call the base class initialization! + if (!device_base_t::onAppInitialized(smart_refctd_ptr(system))) + return false; + if (!asset_base_t::onAppInitialized(std::move(system))) + return false; + + m_semaphore = m_device->createSemaphore(m_realFrameIx); + + if (!m_semaphore) + return logFail("Failed to create semaphore!"); + } + + // Create renderpass and init surface + nbl::video::IGPURenderpass* renderpass; + { + ISwapchain::SCreationParams swapchainParams = { .surface = smart_refctd_ptr(m_surface->getSurface()) }; + if (!swapchainParams.deduceFormat(m_physicalDevice)) + return logFail("Could not choose a Surface Format for the Swapchain!"); + + const static IGPURenderpass::SCreationParams::SSubpassDependency dependencies[] = + { + { + .srcSubpass = IGPURenderpass::SCreationParams::SSubpassDependency::External, + .dstSubpass = 0, + .memoryBarrier = + { + .srcStageMask = asset::PIPELINE_STAGE_FLAGS::COPY_BIT, + .srcAccessMask = asset::ACCESS_FLAGS::TRANSFER_WRITE_BIT, + .dstStageMask = asset::PIPELINE_STAGE_FLAGS::COLOR_ATTACHMENT_OUTPUT_BIT, + .dstAccessMask = asset::ACCESS_FLAGS::COLOR_ATTACHMENT_WRITE_BIT + } + }, + { + .srcSubpass = 0, + .dstSubpass = IGPURenderpass::SCreationParams::SSubpassDependency::External, + .memoryBarrier = + { + .srcStageMask = asset::PIPELINE_STAGE_FLAGS::COLOR_ATTACHMENT_OUTPUT_BIT, + .srcAccessMask = asset::ACCESS_FLAGS::COLOR_ATTACHMENT_WRITE_BIT + } + }, + IGPURenderpass::SCreationParams::DependenciesEnd + }; + + auto scResources = std::make_unique(m_device.get(), swapchainParams.surfaceFormat.format, dependencies); + renderpass = scResources->getRenderpass(); + + if (!renderpass) + return logFail("Failed to create Renderpass!"); + + auto gQueue = getGraphicsQueue(); + if (!m_surface || !m_surface->init(gQueue, std::move(scResources), swapchainParams.sharedParams)) + return logFail("Could not create Window & Surface or initialize the Surface!"); + } + + // Create command pool and buffers + { + auto gQueue = getGraphicsQueue(); + m_cmdPool = m_device->createCommandPool(gQueue->getFamilyIndex(), IGPUCommandPool::CREATE_FLAGS::RESET_COMMAND_BUFFER_BIT); + if (!m_cmdPool) + return logFail("Couldn't create Command Pool!"); + + if (!m_cmdPool->createCommandBuffers(IGPUCommandPool::BUFFER_LEVEL::PRIMARY, { m_cmdBufs.data(), MaxFramesInFlight })) + return logFail("Couldn't create Command Buffer!"); + } + + ISampler::SParams samplerParams = { + .AnisotropicFilter = 0 + }; + auto defaultSampler = m_device->createSampler(samplerParams); + + // Create descriptors and pipeline for the pathtracer + { + auto convertDSLayoutCPU2GPU = [&](smart_refctd_ptr cpuLayout) { + auto converter = CAssetConverter::create({ .device = m_device.get() }); + CAssetConverter::SInputs inputs = {}; + inputs.readCache = converter.get(); + inputs.logger = m_logger.get(); + CAssetConverter::SConvertParams params = {}; + params.utilities = m_utils.get(); + + std::get>(inputs.assets) = { &cpuLayout.get(),1 }; + // don't need to assert that we don't need to provide patches since layouts are not patchable + //assert(true); + auto reservation = converter->reserve(inputs); + // the `.value` is just a funny way to make the `smart_refctd_ptr` copyable + auto gpuLayout = reservation.getGPUObjects().front().value; + if (!gpuLayout) { + m_logger->log("Failed to convert %s into an IGPUDescriptorSetLayout handle", ILogger::ELL_ERROR); + std::exit(-1); + } + + return gpuLayout; + }; + auto convertDSCPU2GPU = [&](smart_refctd_ptr cpuDS) { + auto converter = CAssetConverter::create({ .device = m_device.get() }); + CAssetConverter::SInputs inputs = {}; + inputs.readCache = converter.get(); + inputs.logger = m_logger.get(); + CAssetConverter::SConvertParams params = {}; + params.utilities = m_utils.get(); + + std::get>(inputs.assets) = { &cpuDS.get(), 1 }; + // don't need to assert that we don't need to provide patches since layouts are not patchable + //assert(true); + auto reservation = converter->reserve(inputs); + // the `.value` is just a funny way to make the `smart_refctd_ptr` copyable + auto gpuDS = reservation.getGPUObjects().front().value; + if (!gpuDS) { + m_logger->log("Failed to convert %s into an IGPUDescriptorSet handle", ILogger::ELL_ERROR); + std::exit(-1); + } + + return gpuDS; + }; + + std::array descriptorSet0Bindings = {}; + std::array descriptorSet3Bindings = {}; + std::array presentDescriptorSetBindings; + + descriptorSet0Bindings[0] = { + .binding = 0u, + .type = nbl::asset::IDescriptor::E_TYPE::ET_STORAGE_IMAGE, + .createFlags = ICPUDescriptorSetLayout::SBinding::E_CREATE_FLAGS::ECF_NONE, + .stageFlags = IShader::E_SHADER_STAGE::ESS_COMPUTE, + .count = 1u, + .immutableSamplers = nullptr + }; + + descriptorSet0Bindings[1] = { + .binding = 1u, + .type = nbl::asset::IDescriptor::E_TYPE::ET_STORAGE_IMAGE, + .createFlags = ICPUDescriptorSetLayout::SBinding::E_CREATE_FLAGS::ECF_NONE, + .stageFlags = IShader::E_SHADER_STAGE::ESS_COMPUTE, + .count = 1u, + .immutableSamplers = nullptr + }; + + descriptorSet3Bindings[0] = { + .binding = 0u, + .type = nbl::asset::IDescriptor::E_TYPE::ET_COMBINED_IMAGE_SAMPLER, + .createFlags = ICPUDescriptorSetLayout::SBinding::E_CREATE_FLAGS::ECF_NONE, + .stageFlags = IShader::E_SHADER_STAGE::ESS_COMPUTE, + .count = 1u, + .immutableSamplers = nullptr + }; + descriptorSet3Bindings[1] = { + .binding = 2u, + .type = nbl::asset::IDescriptor::E_TYPE::ET_COMBINED_IMAGE_SAMPLER, + .createFlags = ICPUDescriptorSetLayout::SBinding::E_CREATE_FLAGS::ECF_NONE, + .stageFlags = IShader::E_SHADER_STAGE::ESS_COMPUTE, + .count = 1u, + .immutableSamplers = nullptr + }; + + presentDescriptorSetBindings[0] = { + .binding = 0u, + .type = nbl::asset::IDescriptor::E_TYPE::ET_COMBINED_IMAGE_SAMPLER, + .createFlags = ICPUDescriptorSetLayout::SBinding::E_CREATE_FLAGS::ECF_NONE, + .stageFlags = IShader::E_SHADER_STAGE::ESS_FRAGMENT, + .count = 1u, + .immutableSamplers = &defaultSampler + }; + + auto cpuDescriptorSetLayout0 = make_smart_refctd_ptr(descriptorSet0Bindings); + auto cpuDescriptorSetLayout2 = make_smart_refctd_ptr(descriptorSet3Bindings); + + auto gpuDescriptorSetLayout0 = convertDSLayoutCPU2GPU(cpuDescriptorSetLayout0); + auto gpuDescriptorSetLayout2 = convertDSLayoutCPU2GPU(cpuDescriptorSetLayout2); + auto gpuPresentDescriptorSetLayout = m_device->createDescriptorSetLayout(presentDescriptorSetBindings); + + auto cpuDescriptorSet0 = make_smart_refctd_ptr(std::move(cpuDescriptorSetLayout0)); + auto cpuDescriptorSet2 = make_smart_refctd_ptr(std::move(cpuDescriptorSetLayout2)); + + m_descriptorSet0 = convertDSCPU2GPU(cpuDescriptorSet0); + m_descriptorSet2 = convertDSCPU2GPU(cpuDescriptorSet2); + + smart_refctd_ptr presentDSPool; + { + const video::IGPUDescriptorSetLayout* const layouts[] = { gpuPresentDescriptorSetLayout.get() }; + const uint32_t setCounts[] = { 1u }; + presentDSPool = m_device->createDescriptorPoolForDSLayouts(IDescriptorPool::E_CREATE_FLAGS::ECF_NONE, layouts, setCounts); + } + m_presentDescriptorSet = presentDSPool->createDescriptorSet(gpuPresentDescriptorSetLayout); + + // Create Shaders + auto loadAndCompileGLSLShader = [&](const std::string& pathToShader, bool persistentWorkGroups = false) -> smart_refctd_ptr + { + IAssetLoader::SAssetLoadParams lp = {}; + lp.workingDirectory = localInputCWD; + auto assetBundle = m_assetMgr->getAsset(pathToShader, lp); + const auto assets = assetBundle.getContents(); + if (assets.empty()) + { + m_logger->log("Could not load shader: ", ILogger::ELL_ERROR, pathToShader); + std::exit(-1); + } + + auto source = smart_refctd_ptr_static_cast(assets[0]); + // The down-cast should not fail! + assert(source); + + auto compiler = make_smart_refctd_ptr(smart_refctd_ptr(m_system)); + CGLSLCompiler::SOptions options = {}; + options.stage = IShader::E_SHADER_STAGE::ESS_COMPUTE; // should be compute + options.preprocessorOptions.targetSpirvVersion = m_device->getPhysicalDevice()->getLimits().spirvVersion; + options.spirvOptimizer = nullptr; +#ifndef _NBL_DEBUG + ISPIRVOptimizer::E_OPTIMIZER_PASS optPasses = ISPIRVOptimizer::EOP_STRIP_DEBUG_INFO; + auto opt = make_smart_refctd_ptr(std::span(&optPasses, 1)); + options.spirvOptimizer = opt.get(); +#endif + options.debugInfoFlags |= IShaderCompiler::E_DEBUG_INFO_FLAGS::EDIF_LINE_BIT; + options.preprocessorOptions.sourceIdentifier = source->getFilepathHint(); + options.preprocessorOptions.logger = m_logger.get(); + options.preprocessorOptions.includeFinder = compiler->getDefaultIncludeFinder(); + + const IShaderCompiler::SMacroDefinition persistentDefine = { "PERSISTENT_WORKGROUPS", "1" }; + if (persistentWorkGroups) + options.preprocessorOptions.extraDefines = { &persistentDefine, &persistentDefine + 1 }; + + source = compiler->compileToSPIRV((const char*)source->getContent()->getPointer(), options); + + // this time we skip the use of the asset converter since the ICPUShader->IGPUShader path is quick and simple + auto shader = m_device->compileShader({ source.get(), nullptr, nullptr, nullptr }); + if (!shader) + { + m_logger->log("GLSL shader creationed failed: %s!", ILogger::ELL_ERROR, pathToShader); + std::exit(-1); + } + + return shader; + }; + + auto loadAndCompileHLSLShader = [&](const std::string& pathToShader, const std::string& defineMacro = "", bool persistentWorkGroups = false, bool rwmc = false) -> smart_refctd_ptr + { + IAssetLoader::SAssetLoadParams lp = {}; + lp.workingDirectory = localInputCWD; + auto assetBundle = m_assetMgr->getAsset(pathToShader, lp); + const auto assets = assetBundle.getContents(); + if (assets.empty()) + { + m_logger->log("Could not load shader: ", ILogger::ELL_ERROR, pathToShader); + std::exit(-1); + } + + auto source = smart_refctd_ptr_static_cast(assets[0]); + // The down-cast should not fail! + assert(source); + + auto compiler = make_smart_refctd_ptr(smart_refctd_ptr(m_system)); + CHLSLCompiler::SOptions options = {}; + options.stage = IShader::E_SHADER_STAGE::ESS_COMPUTE; + options.preprocessorOptions.targetSpirvVersion = m_device->getPhysicalDevice()->getLimits().spirvVersion; + options.spirvOptimizer = nullptr; +#ifndef _NBL_DEBUG + ISPIRVOptimizer::E_OPTIMIZER_PASS optPasses = ISPIRVOptimizer::EOP_STRIP_DEBUG_INFO; + auto opt = make_smart_refctd_ptr(std::span(&optPasses, 1)); + options.spirvOptimizer = opt.get(); +#endif + options.debugInfoFlags |= IShaderCompiler::E_DEBUG_INFO_FLAGS::EDIF_LINE_BIT; + options.preprocessorOptions.sourceIdentifier = source->getFilepathHint(); + options.preprocessorOptions.logger = m_logger.get(); + options.preprocessorOptions.includeFinder = compiler->getDefaultIncludeFinder(); + + core::vector defines; + defines.reserve(3); + if (!defineMacro.empty()) + defines.push_back({ defineMacro, "" }); + if(persistentWorkGroups) + defines.push_back({ "PERSISTENT_WORKGROUPS", "1" }); + if(rwmc) + defines.push_back({ "RWMC_ENABLED", "" }); + + options.preprocessorOptions.extraDefines = defines; + + source = compiler->compileToSPIRV((const char*)source->getContent()->getPointer(), options); + + auto shader = m_device->compileShader({ source.get(), nullptr, nullptr, nullptr }); + if (!shader) + { + m_logger->log("HLSL shader creationed failed: %s!", ILogger::ELL_ERROR, pathToShader); + std::exit(-1); + } + + return shader; + }; + + const auto deviceMinSubgroupSize = m_device->getPhysicalDevice()->getLimits().minSubgroupSize; + auto getComputePipelineCreationParams = [deviceMinSubgroupSize](IShader* shader, IGPUPipelineLayout* pipelineLayout) -> IGPUComputePipeline::SCreationParams + { + IGPUComputePipeline::SCreationParams params = {}; + params.layout = pipelineLayout; + params.shader.shader = shader; + params.shader.entryPoint = "main"; + params.shader.entries = nullptr; + params.cached.requireFullSubgroups = true; + params.shader.requiredSubgroupSize = static_cast(5); + + return params; + }; + + // Create compute pipelines + { + for (int index = 0; index < E_LIGHT_GEOMETRY::ELG_COUNT; index++) + { + const nbl::asset::SPushConstantRange pcRange = { + .stageFlags = IShader::E_SHADER_STAGE::ESS_COMPUTE, + .offset = 0, + .size = sizeof(RenderPushConstants) + }; + auto ptPipelineLayout = m_device->createPipelineLayout( + { &pcRange, 1 }, + core::smart_refctd_ptr(gpuDescriptorSetLayout0), + nullptr, + core::smart_refctd_ptr(gpuDescriptorSetLayout2), + nullptr + ); + if (!ptPipelineLayout) + return logFail("Failed to create Pathtracing pipeline layout"); + + const nbl::asset::SPushConstantRange rwmcPcRange = { + .stageFlags = IShader::E_SHADER_STAGE::ESS_COMPUTE, + .offset = 0, + .size = sizeof(RenderRWMCPushConstants) + }; + auto rwmcPtPipelineLayout = m_device->createPipelineLayout( + { &rwmcPcRange, 1 }, + core::smart_refctd_ptr(gpuDescriptorSetLayout0), + nullptr, + core::smart_refctd_ptr(gpuDescriptorSetLayout2), + nullptr + ); + if (!rwmcPtPipelineLayout) + return logFail("Failed to create RWMC Pathtracing pipeline layout"); + + { + auto ptShader = loadAndCompileHLSLShader(PTHLSLShaderPath, PTHLSLShaderVariants[index]); + auto params = getComputePipelineCreationParams(ptShader.get(), ptPipelineLayout.get()); + + if (!m_device->createComputePipelines(nullptr, { ¶ms, 1 }, m_PTHLSLPipelines.data() + index)) + return logFail("Failed to create HLSL compute pipeline!\n"); + } + { + auto ptShader = loadAndCompileHLSLShader(PTHLSLShaderPath, PTHLSLShaderVariants[index], true); + auto params = getComputePipelineCreationParams(ptShader.get(), ptPipelineLayout.get()); + + if (!m_device->createComputePipelines(nullptr, { ¶ms, 1 }, m_PTHLSLPersistentWGPipelines.data() + index)) + return logFail("Failed to create HLSL PersistentWG compute pipeline!\n"); + } + + // rwmc pipelines + { + auto ptShader = loadAndCompileHLSLShader(PTHLSLShaderPath, PTHLSLShaderVariants[index], false, true); + auto params = getComputePipelineCreationParams(ptShader.get(), rwmcPtPipelineLayout.get()); + + if (!m_device->createComputePipelines(nullptr, { ¶ms, 1 }, m_PTHLSLPipelinesRWMC.data() + index)) + return logFail("Failed to create HLSL RWMC compute pipeline!\n"); + } + { + auto ptShader = loadAndCompileHLSLShader(PTHLSLShaderPath, PTHLSLShaderVariants[index], true, true); + auto params = getComputePipelineCreationParams(ptShader.get(), rwmcPtPipelineLayout.get()); + + if (!m_device->createComputePipelines(nullptr, { ¶ms, 1 }, m_PTHLSLPersistentWGPipelinesRWMC.data() + index)) + return logFail("Failed to create HLSL RWMC PersistentWG compute pipeline!\n"); + } + } + } + + // Create resolve pipelines + { + const nbl::asset::SPushConstantRange pcRange = { + .stageFlags = IShader::E_SHADER_STAGE::ESS_COMPUTE, + .offset = 0, + .size = sizeof(ResolvePushConstants) + }; + + auto pipelineLayout = m_device->createPipelineLayout( + { &pcRange, 1 }, + core::smart_refctd_ptr(gpuDescriptorSetLayout0) + ); + + if (!pipelineLayout) { + return logFail("Failed to create resolve pipeline layout"); + } + + { + auto shader = loadAndCompileHLSLShader(ResolveShaderPath); + auto params = getComputePipelineCreationParams(shader.get(), pipelineLayout.get()); + + if (!m_device->createComputePipelines(nullptr, { ¶ms, 1 }, &m_resolvePipeline)) + return logFail("Failed to create HLSL resolve compute pipeline!\n"); + } + } + + // Create graphics pipeline + { + auto scRes = static_cast(m_surface->getSwapchainResources()); + ext::FullScreenTriangle::ProtoPipeline fsTriProtoPPln(m_assetMgr.get(), m_device.get(), m_logger.get()); + if (!fsTriProtoPPln) + return logFail("Failed to create Full Screen Triangle protopipeline or load its vertex shader!"); + + // Load Fragment Shader + auto fragmentShader = loadAndCompileHLSLShader(PresentShaderPath); + if (!fragmentShader) + return logFail("Failed to Load and Compile Fragment Shader: lumaMeterShader!"); + + const IGPUPipelineBase::SShaderSpecInfo fragSpec = { + .shader = fragmentShader.get(), + .entryPoint = "main" + }; + + auto presentLayout = m_device->createPipelineLayout( + {}, + core::smart_refctd_ptr(gpuPresentDescriptorSetLayout), + nullptr, + nullptr, + nullptr + ); + m_presentPipeline = fsTriProtoPPln.createPipeline(fragSpec, presentLayout.get(), scRes->getRenderpass()); + if (!m_presentPipeline) + return logFail("Could not create Graphics Pipeline!"); + + } + } + + // load CPUImages and convert to GPUImages + smart_refctd_ptr envMap, scrambleMap; + { + auto convertImgCPU2GPU = [&](std::span cpuImgs) { + auto queue = getGraphicsQueue(); + auto cmdbuf = m_cmdBufs[0].get(); + cmdbuf->reset(IGPUCommandBuffer::RESET_FLAGS::NONE); + std::array commandBufferInfo = { cmdbuf }; + core::smart_refctd_ptr imgFillSemaphore = m_device->createSemaphore(0); + imgFillSemaphore->setObjectDebugName("Image Fill Semaphore"); + + auto converter = CAssetConverter::create({ .device = m_device.get() }); + // We don't want to generate mip-maps for these images, to ensure that we must override the default callbacks. + struct SInputs final : CAssetConverter::SInputs + { + // we also need to override this to have concurrent sharing + inline std::span getSharedOwnershipQueueFamilies(const size_t groupCopyID, const asset::ICPUImage* buffer, const CAssetConverter::patch_t& patch) const override + { + if (familyIndices.size() > 1) + return familyIndices; + return {}; + } + + inline uint8_t getMipLevelCount(const size_t groupCopyID, const ICPUImage* image, const CAssetConverter::patch_t& patch) const override + { + return image->getCreationParameters().mipLevels; + } + inline uint16_t needToRecomputeMips(const size_t groupCopyID, const ICPUImage* image, const CAssetConverter::patch_t& patch) const override + { + return 0b0u; + } + + std::vector familyIndices; + } inputs = {}; + inputs.readCache = converter.get(); + inputs.logger = m_logger.get(); + { + const core::set uniqueFamilyIndices = { queue->getFamilyIndex(), queue->getFamilyIndex() }; + inputs.familyIndices = { uniqueFamilyIndices.begin(),uniqueFamilyIndices.end() }; + } + // scratch command buffers for asset converter transfer commands + SIntendedSubmitInfo transfer = { + .queue = queue, + .waitSemaphores = {}, + .prevCommandBuffers = {}, + .scratchCommandBuffers = commandBufferInfo, + .scratchSemaphore = { + .semaphore = imgFillSemaphore.get(), + .value = 0, + // because of layout transitions + .stageMask = PIPELINE_STAGE_FLAGS::ALL_COMMANDS_BITS + } + }; + // as per the `SIntendedSubmitInfo` one commandbuffer must be begun + cmdbuf->begin(IGPUCommandBuffer::USAGE::ONE_TIME_SUBMIT_BIT); + // Normally we'd have to inherit and override the `getFinalOwnerQueueFamily` callback to ensure that the + // compute queue becomes the owner of the buffers and images post-transfer, but in this example we use concurrent sharing + CAssetConverter::SConvertParams params = {}; + params.transfer = &transfer; + params.utilities = m_utils.get(); + + std::get>(inputs.assets) = cpuImgs; + // assert that we don't need to provide patches + assert(cpuImgs[0]->getImageUsageFlags().hasFlags(ICPUImage::E_USAGE_FLAGS::EUF_SAMPLED_BIT)); + auto reservation = converter->reserve(inputs); + // the `.value` is just a funny way to make the `smart_refctd_ptr` copyable + auto gpuImgs = reservation.getGPUObjects(); + for (auto& gpuImg : gpuImgs) { + if (!gpuImg) { + m_logger->log("Failed to convert %s into an IGPUImage handle", ILogger::ELL_ERROR, DefaultImagePathsFile); + std::exit(-1); + } + } + + // and launch the conversions + m_api->startCapture(); + auto result = reservation.convert(params); + m_api->endCapture(); + if (!result.blocking() && result.copy() != IQueue::RESULT::SUCCESS) { + m_logger->log("Failed to record or submit conversions", ILogger::ELL_ERROR); + std::exit(-1); + } + + envMap = gpuImgs[0].value; + scrambleMap = gpuImgs[1].value; + }; + + smart_refctd_ptr envMapCPU, scrambleMapCPU; + { + IAssetLoader::SAssetLoadParams lp; + lp.workingDirectory = this->sharedInputCWD; + SAssetBundle bundle = m_assetMgr->getAsset(DefaultImagePathsFile, lp); + if (bundle.getContents().empty()) { + m_logger->log("Couldn't load an asset.", ILogger::ELL_ERROR); + std::exit(-1); + } + + envMapCPU = IAsset::castDown(bundle.getContents()[0]); + if (!envMapCPU) { + m_logger->log("Couldn't load an asset.", ILogger::ELL_ERROR); + std::exit(-1); + } + }; + { + asset::ICPUImage::SCreationParams info; + info.format = asset::E_FORMAT::EF_R32G32_UINT; + info.type = asset::ICPUImage::ET_2D; + auto extent = envMapCPU->getCreationParameters().extent; + info.extent.width = extent.width; + info.extent.height = extent.height; + info.extent.depth = 1u; + info.mipLevels = 1u; + info.arrayLayers = 1u; + info.samples = asset::ICPUImage::E_SAMPLE_COUNT_FLAGS::ESCF_1_BIT; + info.flags = static_cast(0u); + info.usage = asset::IImage::EUF_TRANSFER_SRC_BIT | asset::IImage::EUF_SAMPLED_BIT; + + scrambleMapCPU = ICPUImage::create(std::move(info)); + const uint32_t texelFormatByteSize = getTexelOrBlockBytesize(scrambleMapCPU->getCreationParameters().format); + const uint32_t texelBufferSize = scrambleMapCPU->getImageDataSizeInBytes(); + auto texelBuffer = ICPUBuffer::create({ texelBufferSize }); + + core::RandomSampler rng(0xbadc0ffeu); + auto out = reinterpret_cast(texelBuffer->getPointer()); + for (auto index = 0u; index < texelBufferSize / 4; index++) { + out[index] = rng.nextSample(); + } + + auto regions = core::make_refctd_dynamic_array>(1u); + ICPUImage::SBufferCopy& region = regions->front(); + region.imageSubresource.aspectMask = IImage::E_ASPECT_FLAGS::EAF_COLOR_BIT; + region.imageSubresource.mipLevel = 0u; + region.imageSubresource.baseArrayLayer = 0u; + region.imageSubresource.layerCount = 1u; + region.bufferOffset = 0u; + region.bufferRowLength = IImageAssetHandlerBase::calcPitchInBlocks(extent.width, texelFormatByteSize); + region.bufferImageHeight = 0u; + region.imageOffset = { 0u, 0u, 0u }; + region.imageExtent = scrambleMapCPU->getCreationParameters().extent; + + scrambleMapCPU->setBufferAndRegions(std::move(texelBuffer), regions); + + // programmatically user-created IPreHashed need to have their hash computed (loaders do it while loading) + scrambleMapCPU->setContentHash(scrambleMapCPU->computeContentHash()); + } + + std::array cpuImgs = { envMapCPU.get(), scrambleMapCPU.get() }; + convertImgCPU2GPU(cpuImgs); + } + + // create views for textures + { + auto createHDRIImage = [this](const asset::E_FORMAT colorFormat, const uint32_t width, const uint32_t height, const bool useCascadeCreationParameters = false) -> smart_refctd_ptr { + IGPUImage::SCreationParams imgInfo; + imgInfo.format = colorFormat; + imgInfo.type = IGPUImage::ET_2D; + imgInfo.extent.width = width; + imgInfo.extent.height = height; + imgInfo.extent.depth = 1u; + imgInfo.mipLevels = 1u; + imgInfo.samples = IGPUImage::ESCF_1_BIT; + imgInfo.flags = static_cast(0u); + + if (!useCascadeCreationParameters) + { + imgInfo.arrayLayers = 1u; + imgInfo.usage = asset::IImage::EUF_STORAGE_BIT | asset::IImage::EUF_TRANSFER_DST_BIT | asset::IImage::EUF_SAMPLED_BIT; + } + else + { + imgInfo.arrayLayers = CascadeCount; + imgInfo.usage = asset::IImage::EUF_STORAGE_BIT; + } + + auto image = m_device->createImage(std::move(imgInfo)); + auto imageMemReqs = image->getMemoryReqs(); + imageMemReqs.memoryTypeBits &= m_device->getPhysicalDevice()->getDeviceLocalMemoryTypeBits(); + m_device->allocate(imageMemReqs, image.get()); + + return image; + }; + auto createHDRIImageView = [this](smart_refctd_ptr img, const uint32_t imageArraySize = 1u, const IGPUImageView::E_TYPE imageViewType = IGPUImageView::ET_2D) -> smart_refctd_ptr + { + auto format = img->getCreationParameters().format; + IGPUImageView::SCreationParams imgViewInfo; + imgViewInfo.image = std::move(img); + imgViewInfo.format = format; + imgViewInfo.flags = static_cast(0u); + imgViewInfo.subresourceRange.aspectMask = IImage::E_ASPECT_FLAGS::EAF_COLOR_BIT; + imgViewInfo.subresourceRange.baseArrayLayer = 0u; + imgViewInfo.subresourceRange.baseMipLevel = 0u; + imgViewInfo.subresourceRange.levelCount = 1u; + imgViewInfo.viewType = imageViewType; + + imgViewInfo.subresourceRange.layerCount = imageArraySize; + + return m_device->createImageView(std::move(imgViewInfo)); + }; + + auto params = envMap->getCreationParameters(); + auto extent = params.extent; + + envMap->setObjectDebugName("Env Map"); + m_envMapView = createHDRIImageView(envMap); + m_envMapView->setObjectDebugName("Env Map View"); + + scrambleMap->setObjectDebugName("Scramble Map"); + m_scrambleView = createHDRIImageView(scrambleMap); + m_scrambleView->setObjectDebugName("Scramble Map View"); + + auto outImg = createHDRIImage(asset::E_FORMAT::EF_R16G16B16A16_SFLOAT, WindowDimensions.x, WindowDimensions.y); + outImg->setObjectDebugName("Output Image"); + m_outImgView = createHDRIImageView(outImg, 1, IGPUImageView::ET_2D_ARRAY); + m_outImgView->setObjectDebugName("Output Image View"); + + auto cascade = createHDRIImage(asset::E_FORMAT::EF_R16G16B16A16_SFLOAT, WindowDimensions.x, WindowDimensions.y, true); + cascade->setObjectDebugName("Cascade"); + m_cascadeView = createHDRIImageView(cascade, CascadeCount, IGPUImageView::ET_2D_ARRAY); + m_cascadeView->setObjectDebugName("Cascade View"); + + // TODO: change cascade layout to general + } + + // create sequence buffer view + { + // TODO: do this better use asset manager to get the ICPUBuffer from `.bin` + auto createBufferFromCacheFile = [this]( + system::path filename, + size_t bufferSize, + void *data, + smart_refctd_ptr& buffer + ) -> std::pair, bool> + { + ISystem::future_t> owenSamplerFileFuture; + ISystem::future_t owenSamplerFileReadFuture; + size_t owenSamplerFileBytesRead; + + m_system->createFile(owenSamplerFileFuture, localOutputCWD / filename, IFile::ECF_READ); + smart_refctd_ptr owenSamplerFile; + + if (owenSamplerFileFuture.wait()) + { + owenSamplerFileFuture.acquire().move_into(owenSamplerFile); + if (!owenSamplerFile) + return { nullptr, false }; + + owenSamplerFile->read(owenSamplerFileReadFuture, data, 0, bufferSize); + if (owenSamplerFileReadFuture.wait()) + { + owenSamplerFileReadFuture.acquire().move_into(owenSamplerFileBytesRead); + + if (owenSamplerFileBytesRead < bufferSize) + { + buffer = asset::ICPUBuffer::create({ sizeof(uint32_t) * bufferSize }); + return { owenSamplerFile, false }; + } + + buffer = asset::ICPUBuffer::create({ { sizeof(uint32_t) * bufferSize }, data }); + } + } + + return { owenSamplerFile, true }; + }; + auto writeBufferIntoCacheFile = [this](smart_refctd_ptr file, size_t bufferSize, void* data) + { + ISystem::future_t owenSamplerFileWriteFuture; + size_t owenSamplerFileBytesWritten; + + file->write(owenSamplerFileWriteFuture, data, 0, bufferSize); + if (owenSamplerFileWriteFuture.wait()) + owenSamplerFileWriteFuture.acquire().move_into(owenSamplerFileBytesWritten); + }; + + constexpr uint32_t quantizedDimensions = MaxBufferDimensions / 3u; + constexpr size_t bufferSize = quantizedDimensions * MaxBufferSamples; + using sequence_type = sampling::QuantizedSequence; + std::array data = {}; + smart_refctd_ptr sampleSeq; + + auto cacheBufferResult = createBufferFromCacheFile(sharedOutputCWD/OwenSamplerFilePath, bufferSize, data.data(), sampleSeq); + if (!cacheBufferResult.second) + { + core::OwenSampler sampler(MaxBufferDimensions, 0xdeadbeefu); + + ICPUBuffer::SCreationParams params = {}; + params.size = quantizedDimensions * MaxBufferSamples * sizeof(sequence_type); + sampleSeq = ICPUBuffer::create(std::move(params)); + + auto out = reinterpret_cast(sampleSeq->getPointer()); + for (auto dim = 0u; dim < MaxBufferDimensions; dim++) + for (uint32_t i = 0; i < MaxBufferSamples; i++) + { + const uint32_t quant_dim = dim / 3u; + const uint32_t offset = dim % 3u; + auto& seq = out[i * quantizedDimensions + quant_dim]; + const uint32_t sample = sampler.sample(dim, i); + seq.set(offset, sample); + } + if (cacheBufferResult.first) + writeBufferIntoCacheFile(cacheBufferResult.first, bufferSize, out); + } + + IGPUBuffer::SCreationParams params = {}; + params.usage = asset::IBuffer::EUF_TRANSFER_DST_BIT | asset::IBuffer::EUF_STORAGE_BUFFER_BIT | asset::IBuffer::EUF_SHADER_DEVICE_ADDRESS_BIT; + params.size = bufferSize; + + // we don't want to overcomplicate the example with multi-queue + m_utils->createFilledDeviceLocalBufferOnDedMem( + SIntendedSubmitInfo{ .queue = getGraphicsQueue() }, + std::move(params), + sampleSeq->getPointer() + ).move_into(m_sequenceBuffer); + + m_sequenceBuffer->setObjectDebugName("Sequence buffer"); + } + + // Update Descriptors + { + ISampler::SParams samplerParams0 = { + ISampler::E_TEXTURE_CLAMP::ETC_CLAMP_TO_EDGE, + ISampler::E_TEXTURE_CLAMP::ETC_CLAMP_TO_EDGE, + ISampler::E_TEXTURE_CLAMP::ETC_CLAMP_TO_EDGE, + ISampler::ETBC_FLOAT_OPAQUE_BLACK, + ISampler::ETF_LINEAR, + ISampler::ETF_LINEAR, + ISampler::ESMM_LINEAR, + 0u, + false, + ECO_ALWAYS + }; + auto sampler0 = m_device->createSampler(samplerParams0); + ISampler::SParams samplerParams1 = { + ISampler::E_TEXTURE_CLAMP::ETC_CLAMP_TO_EDGE, + ISampler::E_TEXTURE_CLAMP::ETC_CLAMP_TO_EDGE, + ISampler::E_TEXTURE_CLAMP::ETC_CLAMP_TO_EDGE, + ISampler::ETBC_INT_OPAQUE_BLACK, + ISampler::ETF_NEAREST, + ISampler::ETF_NEAREST, + ISampler::ESMM_NEAREST, + 0u, + false, + ECO_ALWAYS + }; + auto sampler1 = m_device->createSampler(samplerParams1); + + std::array writeDSInfos = {}; + writeDSInfos[0].desc = m_outImgView; + writeDSInfos[0].info.image.imageLayout = IImage::LAYOUT::GENERAL; + writeDSInfos[1].desc = m_cascadeView; + writeDSInfos[1].info.image.imageLayout = IImage::LAYOUT::GENERAL; + writeDSInfos[2].desc = m_envMapView; + // ISampler::SParams samplerParams = { ISampler::ETC_CLAMP_TO_EDGE, ISampler::ETC_CLAMP_TO_EDGE, ISampler::ETC_CLAMP_TO_EDGE, ISampler::ETBC_FLOAT_OPAQUE_BLACK, ISampler::ETF_LINEAR, ISampler::ETF_LINEAR, ISampler::ESMM_LINEAR, 0u, false, ECO_ALWAYS }; + writeDSInfos[2].info.combinedImageSampler.sampler = sampler0; + writeDSInfos[2].info.combinedImageSampler.imageLayout = asset::IImage::LAYOUT::READ_ONLY_OPTIMAL; + writeDSInfos[3].desc = m_scrambleView; + // ISampler::SParams samplerParams = { ISampler::ETC_CLAMP_TO_EDGE, ISampler::ETC_CLAMP_TO_EDGE, ISampler::ETC_CLAMP_TO_EDGE, ISampler::ETBC_INT_OPAQUE_BLACK, ISampler::ETF_NEAREST, ISampler::ETF_NEAREST, ISampler::ESMM_NEAREST, 0u, false, ECO_ALWAYS }; + writeDSInfos[3].info.combinedImageSampler.sampler = sampler1; + writeDSInfos[3].info.combinedImageSampler.imageLayout = asset::IImage::LAYOUT::READ_ONLY_OPTIMAL; + writeDSInfos[4].desc = m_outImgView; + writeDSInfos[4].info.image.imageLayout = IImage::LAYOUT::READ_ONLY_OPTIMAL; + + std::array writeDescriptorSets = {}; + writeDescriptorSets[0] = { + .dstSet = m_descriptorSet0.get(), + .binding = 0, + .arrayElement = 0u, + .count = 1u, + .info = &writeDSInfos[0] + }; + writeDescriptorSets[1] = { + .dstSet = m_descriptorSet0.get(), + .binding = 1, + .arrayElement = 0u, + .count = 1u, + .info = &writeDSInfos[1] + }; + writeDescriptorSets[2] = { + .dstSet = m_descriptorSet2.get(), + .binding = 0, + .arrayElement = 0u, + .count = 1u, + .info = &writeDSInfos[2] + }; + writeDescriptorSets[3] = { + .dstSet = m_descriptorSet2.get(), + .binding = 2, + .arrayElement = 0u, + .count = 1u, + .info = &writeDSInfos[3] + }; + writeDescriptorSets[4] = { + .dstSet = m_presentDescriptorSet.get(), + .binding = 0, + .arrayElement = 0u, + .count = 1u, + .info = &writeDSInfos[4] + }; + + m_device->updateDescriptorSets(writeDescriptorSets, {}); + } + + // Create ui descriptors + { + using binding_flags_t = IGPUDescriptorSetLayout::SBinding::E_CREATE_FLAGS; + { + IGPUSampler::SParams params; + params.AnisotropicFilter = 1u; + params.TextureWrapU = ISampler::E_TEXTURE_CLAMP::ETC_REPEAT; + params.TextureWrapV = ISampler::E_TEXTURE_CLAMP::ETC_REPEAT; + params.TextureWrapW = ISampler::E_TEXTURE_CLAMP::ETC_REPEAT; + + m_ui.samplers.gui = m_device->createSampler(params); + m_ui.samplers.gui->setObjectDebugName("Nabla IMGUI UI Sampler"); + } + + std::array, 69u> immutableSamplers; + for (auto& it : immutableSamplers) + it = smart_refctd_ptr(m_ui.samplers.scene); + + immutableSamplers[nbl::ext::imgui::UI::FontAtlasTexId] = smart_refctd_ptr(m_ui.samplers.gui); + + nbl::ext::imgui::UI::SCreationParameters params; + + params.resources.texturesInfo = { .setIx = 0u, .bindingIx = 0u }; + params.resources.samplersInfo = { .setIx = 0u, .bindingIx = 1u }; + params.assetManager = m_assetMgr; + params.pipelineCache = nullptr; + params.pipelineLayout = nbl::ext::imgui::UI::createDefaultPipelineLayout(m_utils->getLogicalDevice(), params.resources.texturesInfo, params.resources.samplersInfo, MaxUITextureCount); + params.renderpass = smart_refctd_ptr(renderpass); + params.streamingBuffer = nullptr; + params.subpassIx = 0u; + params.transfer = getTransferUpQueue(); + params.utilities = m_utils; + { + m_ui.manager = ext::imgui::UI::create(std::move(params)); + + // note that we use default layout provided by our extension, but you are free to create your own by filling nbl::ext::imgui::UI::S_CREATION_PARAMETERS::resources + const auto* descriptorSetLayout = m_ui.manager->getPipeline()->getLayout()->getDescriptorSetLayout(0u); + const auto& params = m_ui.manager->getCreationParameters(); + + IDescriptorPool::SCreateInfo descriptorPoolInfo = {}; + descriptorPoolInfo.maxDescriptorCount[static_cast(asset::IDescriptor::E_TYPE::ET_SAMPLER)] = (uint32_t)nbl::ext::imgui::UI::DefaultSamplerIx::COUNT; + descriptorPoolInfo.maxDescriptorCount[static_cast(asset::IDescriptor::E_TYPE::ET_SAMPLED_IMAGE)] = MaxUITextureCount; + descriptorPoolInfo.maxSets = 1u; + descriptorPoolInfo.flags = IDescriptorPool::E_CREATE_FLAGS::ECF_UPDATE_AFTER_BIND_BIT; + + m_guiDescriptorSetPool = m_device->createDescriptorPool(std::move(descriptorPoolInfo)); + assert(m_guiDescriptorSetPool); + + m_guiDescriptorSetPool->createDescriptorSets(1u, &descriptorSetLayout, &m_ui.descriptorSet); + assert(m_ui.descriptorSet); + } + } + m_ui.manager->registerListener( + [this]() -> void { + ImGuiIO& io = ImGui::GetIO(); + ImGuizmo::SetOrthographic(false); + ImGuizmo::BeginFrame(); + + m_camera.setProjectionMatrix([&]() + { + static matrix4SIMD projection; + + projection = matrix4SIMD::buildProjectionMatrixPerspectiveFovRH(core::radians(fov), io.DisplaySize.x / io.DisplaySize.y, zNear, zFar); + + return projection; + }()); + + ImGui::SetNextWindowPos(ImVec2(1024, 100), ImGuiCond_Appearing); + ImGui::SetNextWindowSize(ImVec2(256, 256), ImGuiCond_Appearing); + + // create a window and insert the inspector + ImGui::SetNextWindowPos(ImVec2(10, 10), ImGuiCond_Appearing); + ImGui::SetNextWindowSize(ImVec2(320, 340), ImGuiCond_Appearing); + ImGui::Begin("Controls"); + + ImGui::SameLine(); + + ImGui::Text("Camera"); + + ImGui::Text("Press Home to reset camera."); + ImGui::Text("Press End to reset light."); + + ImGui::SliderFloat("Move speed", &moveSpeed, 0.1f, 10.f); + ImGui::SliderFloat("Rotate speed", &rotateSpeed, 0.1f, 10.f); + ImGui::SliderFloat("Fov", &fov, 20.f, 150.f); + ImGui::SliderFloat("zNear", &zNear, 0.1f, 100.f); + ImGui::SliderFloat("zFar", &zFar, 110.f, 10000.f); + ImGui::Combo("Shader", &PTPipeline, shaderNames, E_LIGHT_GEOMETRY::ELG_COUNT); + ImGui::Combo("Render Mode", &renderMode, shaderTypes, E_RENDER_MODE::ERM_COUNT); + ImGui::SliderInt("SPP", &spp, 1, MaxBufferSamples); + ImGui::SliderInt("Depth", &depth, 1, MaxBufferDimensions / 3); + ImGui::Checkbox("Persistent WorkGroups", &usePersistentWorkGroups); + + ImGui::Text("X: %f Y: %f", io.MousePos.x, io.MousePos.y); + + ImGui::Text("\nRWMC settings:"); + ImGui::Checkbox("Enable RWMC", &useRWMC); + ImGui::SliderFloat("start", &rwmcStart, 1.0f, 32.0f); + ImGui::SliderFloat("base", &rwmcBase, 1.0f, 32.0f); + ImGui::SliderFloat("minReliableLuma", &rwmcMinReliableLuma, 0.1f, 1024.0f); + ImGui::SliderFloat("kappa", &rwmcKappa, 0.1f, 1024.0f); + + ImGui::End(); + } + ); + + m_ui.manager->registerListener( + [this]() -> void { + static struct + { + hlsl::float32_t4x4 view, projection; + } imguizmoM16InOut; + + ImGuizmo::SetID(0u); + + // TODO: camera will return hlsl::float32_tMxN + auto view = *reinterpret_cast(m_camera.getViewMatrix().pointer()); + imguizmoM16InOut.view = hlsl::transpose(getMatrix3x4As4x4(view)); + + // TODO: camera will return hlsl::float32_tMxN + imguizmoM16InOut.projection = hlsl::transpose(*reinterpret_cast(m_camera.getProjectionMatrix().pointer())); + imguizmoM16InOut.projection[1][1] *= -1.f; // https://johannesugb.github.io/gpu-programming/why-do-opengl-proj-matrices-fail-in-vulkan/ + + m_transformParams.editTransformDecomposition = true; + m_transformParams.sceneTexDescIx = 1u; + + if (ImGui::IsKeyPressed(ImGuiKey_End)) + { + m_lightModelMatrix = hlsl::float32_t4x4( + 0.3f, 0.0f, 0.0f, 0.0f, + 0.0f, 0.3f, 0.0f, 0.0f, + 0.0f, 0.0f, 0.3f, 0.0f, + -1.0f, 1.5f, 0.0f, 1.0f + ); + } + + if (E_LIGHT_GEOMETRY::ELG_SPHERE == PTPipeline) + { + m_transformParams.allowedOp = ImGuizmo::OPERATION::TRANSLATE | ImGuizmo::OPERATION::SCALEU; + m_transformParams.isSphere = true; + } + else + { + m_transformParams.allowedOp = ImGuizmo::OPERATION::TRANSLATE | ImGuizmo::OPERATION::ROTATE | ImGuizmo::OPERATION::SCALE; + m_transformParams.isSphere = false; + } + EditTransform(&imguizmoM16InOut.view[0][0], &imguizmoM16InOut.projection[0][0], &m_lightModelMatrix[0][0], m_transformParams); + + if (E_LIGHT_GEOMETRY::ELG_SPHERE == PTPipeline) + { + // keep uniform scale for sphere + float32_t uniformScale = (m_lightModelMatrix[0][0] + m_lightModelMatrix[1][1] + m_lightModelMatrix[2][2]) / 3.0f; + m_lightModelMatrix[0][0] = uniformScale; + m_lightModelMatrix[1][1] = uniformScale; // Doesn't affect sphere but will affect rectangle/triangle if switching shapes + m_lightModelMatrix[2][2] = uniformScale; + } + + } + ); + + // Set Camera + { + core::vectorSIMDf cameraPosition(0, 5, -10); + matrix4SIMD proj = matrix4SIMD::buildProjectionMatrixPerspectiveFovRH( + core::radians(60.0f), + WindowDimensions.x / WindowDimensions.y, + 0.01f, + 500.0f + ); + m_camera = Camera(cameraPosition, core::vectorSIMDf(0, 0, 0), proj); + } + + m_winMgr->setWindowSize(m_window.get(), WindowDimensions.x, WindowDimensions.y); + m_surface->recreateSwapchain(); + m_winMgr->show(m_window.get()); + m_oracle.reportBeginFrameRecord(); + m_camera.mapKeysToArrows(); + + // set initial rwmc settings + + rwmcStart = hlsl::dot(hlsl::transpose(colorspace::scRGBtoXYZ)[1], LightEminence); + rwmcBase = 8.0f; + rwmcMinReliableLuma = 1.0f; + rwmcKappa = 5.0f; + return true; + } + + bool updateGUIDescriptorSet() + { + // texture atlas, note we don't create info & write pair for the font sampler because UI extension's is immutable and baked into DS layout + static std::array descriptorInfo; + static IGPUDescriptorSet::SWriteDescriptorSet writes[MaxUITextureCount]; + + descriptorInfo[nbl::ext::imgui::UI::FontAtlasTexId].info.image.imageLayout = IImage::LAYOUT::READ_ONLY_OPTIMAL; + descriptorInfo[nbl::ext::imgui::UI::FontAtlasTexId].desc = smart_refctd_ptr(m_ui.manager->getFontAtlasView()); + + for (uint32_t i = 0; i < descriptorInfo.size(); ++i) + { + writes[i].dstSet = m_ui.descriptorSet.get(); + writes[i].binding = 0u; + writes[i].arrayElement = i; + writes[i].count = 1u; + } + writes[nbl::ext::imgui::UI::FontAtlasTexId].info = descriptorInfo.data() + nbl::ext::imgui::UI::FontAtlasTexId; + + return m_device->updateDescriptorSets(writes, {}); + } + + inline void workLoopBody() override + { + // framesInFlight: ensuring safe execution of command buffers and acquires, `framesInFlight` only affect semaphore waits, don't use this to index your resources because it can change with swapchain recreation. + const uint32_t framesInFlight = core::min(MaxFramesInFlight, m_surface->getMaxAcquiresInFlight()); + // We block for semaphores for 2 reasons here: + // A) Resource: Can't use resource like a command buffer BEFORE previous use is finished! [MaxFramesInFlight] + // B) Acquire: Can't have more acquires in flight than a certain threshold returned by swapchain or your surface helper class. [MaxAcquiresInFlight] + if (m_realFrameIx >= framesInFlight) + { + const ISemaphore::SWaitInfo cbDonePending[] = + { + { + .semaphore = m_semaphore.get(), + .value = m_realFrameIx + 1 - framesInFlight + } + }; + if (m_device->blockForSemaphores(cbDonePending) != ISemaphore::WAIT_RESULT::SUCCESS) + return; + } + const auto resourceIx = m_realFrameIx % MaxFramesInFlight; + + //m_api->startCapture(); + + // CPU events + update(); + + auto queue = getGraphicsQueue(); + auto cmdbuf = m_cmdBufs[resourceIx].get(); + + if (!keepRunning()) + return; + + if (renderMode != E_RENDER_MODE::ERM_HLSL) + { + m_logger->log("Only HLSL render mode is supported.", ILogger::ELL_ERROR); + std::exit(-1); + } + + cmdbuf->reset(IGPUCommandBuffer::RESET_FLAGS::NONE); + + // safe to proceed + // upload buffer data + cmdbuf->begin(IGPUCommandBuffer::USAGE::ONE_TIME_SUBMIT_BIT); + cmdbuf->beginDebugMarker("ComputeShaderPathtracer IMGUI Frame"); + + updatePathtracerPushConstants(); + + // TRANSITION m_outImgView to GENERAL (because of descriptorSets0 -> ComputeShader Writes into the image) + { + const IGPUCommandBuffer::SImageMemoryBarrier imgBarriers[] = { + { + .barrier = { + .dep = { + .srcStageMask = PIPELINE_STAGE_FLAGS::ALL_TRANSFER_BITS, + .srcAccessMask = ACCESS_FLAGS::TRANSFER_WRITE_BIT, + .dstStageMask = PIPELINE_STAGE_FLAGS::COMPUTE_SHADER_BIT, + .dstAccessMask = ACCESS_FLAGS::SHADER_WRITE_BITS + } + }, + .image = m_outImgView->getCreationParameters().image.get(), + .subresourceRange = { + .aspectMask = IImage::EAF_COLOR_BIT, + .baseMipLevel = 0u, + .levelCount = 1u, + .baseArrayLayer = 0u, + .layerCount = 1u + }, + .oldLayout = IImage::LAYOUT::UNDEFINED, + .newLayout = IImage::LAYOUT::GENERAL + } + }; + cmdbuf->pipelineBarrier(E_DEPENDENCY_FLAGS::EDF_NONE, { .imgBarriers = imgBarriers }); + } + + // transit m_cascadeView layout to GENERAL, block until previous shader is done with reading from the cascade + if(useRWMC) + { + const IGPUCommandBuffer::SImageMemoryBarrier cascadeBarrier[] = { + { + .barrier = { + .dep = { + .srcStageMask = PIPELINE_STAGE_FLAGS::COMPUTE_SHADER_BIT, + .srcAccessMask = ACCESS_FLAGS::NONE, + .dstStageMask = PIPELINE_STAGE_FLAGS::COMPUTE_SHADER_BIT, + .dstAccessMask = ACCESS_FLAGS::NONE + } + }, + .image = m_cascadeView->getCreationParameters().image.get(), + .subresourceRange = { + .aspectMask = IImage::EAF_COLOR_BIT, + .baseMipLevel = 0u, + .levelCount = 1u, + .baseArrayLayer = 0u, + .layerCount = CascadeCount + }, + .oldLayout = IImage::LAYOUT::UNDEFINED, + .newLayout = IImage::LAYOUT::GENERAL + } + }; + cmdbuf->pipelineBarrier(E_DEPENDENCY_FLAGS::EDF_NONE, { .imgBarriers = cascadeBarrier }); + } + + { + // TODO: shouldn't it be computed only at initialization stage and on window resize? + const uint32_t dispatchSize = usePersistentWorkGroups ? + m_physicalDevice->getLimits().computeOptimalPersistentWorkgroupDispatchSize(WindowDimensions.x * WindowDimensions.y, RenderWorkgroupSize) : + 1 + (WindowDimensions.x * WindowDimensions.y - 1) / RenderWorkgroupSize; + + IGPUComputePipeline* pipeline = pickPTPipeline(); + + cmdbuf->bindComputePipeline(pipeline); + cmdbuf->bindDescriptorSets(EPBP_COMPUTE, pipeline->getLayout(), 0u, 1u, &m_descriptorSet0.get()); + cmdbuf->bindDescriptorSets(EPBP_COMPUTE, pipeline->getLayout(), 2u, 1u, &m_descriptorSet2.get()); + + const uint32_t pushConstantsSize = useRWMC ? sizeof(RenderRWMCPushConstants) : sizeof(RenderPushConstants); + const void* pushConstantsPtr = useRWMC ? reinterpret_cast(&rwmcPushConstants) : reinterpret_cast(&pc); + cmdbuf->pushConstants(pipeline->getLayout(), IShader::E_SHADER_STAGE::ESS_COMPUTE, 0, pushConstantsSize, pushConstantsPtr); + + cmdbuf->dispatch(dispatchSize, 1u, 1u); + } + + // m_cascadeView synchronization - wait for previous compute shader to write into the cascade + // TODO: create this and every other barrier once outside of the loop? + if(useRWMC) + { + const IGPUCommandBuffer::SImageMemoryBarrier cascadeBarrier[] = { + { + .barrier = { + .dep = { + .srcStageMask = PIPELINE_STAGE_FLAGS::COMPUTE_SHADER_BIT, + .srcAccessMask = ACCESS_FLAGS::SHADER_WRITE_BITS, + .dstStageMask = PIPELINE_STAGE_FLAGS::COMPUTE_SHADER_BIT, + .dstAccessMask = ACCESS_FLAGS::SHADER_READ_BITS + } + }, + .image = m_cascadeView->getCreationParameters().image.get(), + .subresourceRange = { + .aspectMask = IImage::EAF_COLOR_BIT, + .baseMipLevel = 0u, + .levelCount = 1u, + .baseArrayLayer = 0u, + .layerCount = CascadeCount + } + } + }; + cmdbuf->pipelineBarrier(E_DEPENDENCY_FLAGS::EDF_NONE, { .imgBarriers = cascadeBarrier }); + } + + // resolve + if(useRWMC) + { + if (renderMode != E_RENDER_MODE::ERM_HLSL) + { + m_logger->log("RWMC is only supported with HLSL.", ILogger::ELL_ERROR); + std::exit(-1); + } + + // TODO: shouldn't it be computed only at initialization stage and on window resize? + // Round up division + const uint32_t2 dispatchSize = uint32_t2( + (m_window->getWidth() + ResolveWorkgroupSizeX - 1) / ResolveWorkgroupSizeX, + (m_window->getHeight() + ResolveWorkgroupSizeY - 1) / ResolveWorkgroupSizeY + ); + + IGPUComputePipeline* pipeline = m_resolvePipeline.get(); + + resolvePushConstants.resolveParameters = rwmc::computeResolveParameters(rwmcBase, spp, rwmcMinReliableLuma, rwmcKappa, CascadeCount); + + cmdbuf->bindComputePipeline(pipeline); + cmdbuf->bindDescriptorSets(EPBP_COMPUTE, pipeline->getLayout(), 0u, 1u, &m_descriptorSet0.get()); + cmdbuf->pushConstants(pipeline->getLayout(), IShader::E_SHADER_STAGE::ESS_COMPUTE, 0, sizeof(ResolvePushConstants), &resolvePushConstants); + + cmdbuf->dispatch(dispatchSize.x, dispatchSize.y, 1u); + } + + // TRANSITION m_outImgView to READ (because of descriptorSets0 -> ComputeShader Writes into the image) + { + const IGPUCommandBuffer::SImageMemoryBarrier imgBarriers[] = { + { + .barrier = { + .dep = { + .srcStageMask = PIPELINE_STAGE_FLAGS::COMPUTE_SHADER_BIT, + .srcAccessMask = ACCESS_FLAGS::SHADER_WRITE_BITS, + .dstStageMask = PIPELINE_STAGE_FLAGS::FRAGMENT_SHADER_BIT, + .dstAccessMask = ACCESS_FLAGS::SHADER_READ_BITS + } + }, + .image = m_outImgView->getCreationParameters().image.get(), + .subresourceRange = { + .aspectMask = IImage::EAF_COLOR_BIT, + .baseMipLevel = 0u, + .levelCount = 1u, + .baseArrayLayer = 0u, + .layerCount = 1u + }, + .oldLayout = IImage::LAYOUT::GENERAL, + .newLayout = IImage::LAYOUT::READ_ONLY_OPTIMAL + } + }; + cmdbuf->pipelineBarrier(E_DEPENDENCY_FLAGS::EDF_NONE, { .imgBarriers = imgBarriers }); + } + + // TODO: tone mapping and stuff + + asset::SViewport viewport; + { + viewport.minDepth = 1.f; + viewport.maxDepth = 0.f; + viewport.x = 0u; + viewport.y = 0u; + viewport.width = WindowDimensions.x; + viewport.height = WindowDimensions.y; + } + cmdbuf->setViewport(0u, 1u, &viewport); + + + VkRect2D defaultScisors[] = { {.offset = {(int32_t)viewport.x, (int32_t)viewport.y}, .extent = {(uint32_t)viewport.width, (uint32_t)viewport.height}} }; + cmdbuf->setScissor(defaultScisors); + + const VkRect2D currentRenderArea = + { + .offset = {0,0}, + .extent = {m_window->getWidth(),m_window->getHeight()} + }; + auto scRes = static_cast(m_surface->getSwapchainResources()); + + // Upload m_outImg to swapchain + UI + { + const IGPUCommandBuffer::SRenderpassBeginInfo info = + { + .framebuffer = scRes->getFramebuffer(m_currentImageAcquire.imageIndex), + .colorClearValues = &clearColor, + .depthStencilClearValues = nullptr, + .renderArea = currentRenderArea + }; + nbl::video::ISemaphore::SWaitInfo waitInfo = { .semaphore = m_semaphore.get(), .value = m_realFrameIx + 1u }; + + cmdbuf->beginRenderPass(info, IGPUCommandBuffer::SUBPASS_CONTENTS::INLINE); + + cmdbuf->bindGraphicsPipeline(m_presentPipeline.get()); + cmdbuf->bindDescriptorSets(EPBP_GRAPHICS, m_presentPipeline->getLayout(), 0, 1u, &m_presentDescriptorSet.get()); + ext::FullScreenTriangle::recordDrawCall(cmdbuf); + + const auto uiParams = m_ui.manager->getCreationParameters(); + auto* uiPipeline = m_ui.manager->getPipeline(); + cmdbuf->bindGraphicsPipeline(uiPipeline); + cmdbuf->bindDescriptorSets(EPBP_GRAPHICS, uiPipeline->getLayout(), uiParams.resources.texturesInfo.setIx, 1u, &m_ui.descriptorSet.get()); + m_ui.manager->render(cmdbuf, waitInfo); + + cmdbuf->endRenderPass(); + } + + cmdbuf->end(); + { + const IQueue::SSubmitInfo::SSemaphoreInfo rendered[] = + { + { + .semaphore = m_semaphore.get(), + .value = ++m_realFrameIx, + .stageMask = PIPELINE_STAGE_FLAGS::COLOR_ATTACHMENT_OUTPUT_BIT + } + }; + { + { + const IQueue::SSubmitInfo::SCommandBufferInfo commandBuffers[] = + { + {.cmdbuf = cmdbuf } + }; + + const IQueue::SSubmitInfo::SSemaphoreInfo acquired[] = + { + { + .semaphore = m_currentImageAcquire.semaphore, + .value = m_currentImageAcquire.acquireCount, + .stageMask = PIPELINE_STAGE_FLAGS::NONE + } + }; + const IQueue::SSubmitInfo infos[] = + { + { + .waitSemaphores = acquired, + .commandBuffers = commandBuffers, + .signalSemaphores = rendered + } + }; + + updateGUIDescriptorSet(); + + if (queue->submit(infos) != IQueue::RESULT::SUCCESS) + m_realFrameIx--; + } + } + + m_window->setCaption("[Nabla Engine] HLSL Compute Path Tracer"); + m_surface->present(m_currentImageAcquire.imageIndex, rendered); + } + //m_api->endCapture(); + } + + inline bool keepRunning() override + { + if (m_surface->irrecoverable()) + return false; + + return true; + } + + inline bool onAppTerminated() override + { + return device_base_t::onAppTerminated(); + } + + inline void update() + { + m_camera.setMoveSpeed(moveSpeed); + m_camera.setRotateSpeed(rotateSpeed); + + static std::chrono::microseconds previousEventTimestamp{}; + + m_inputSystem->getDefaultMouse(&mouse); + m_inputSystem->getDefaultKeyboard(&keyboard); + + auto updatePresentationTimestamp = [&]() + { + m_currentImageAcquire = m_surface->acquireNextImage(); + + m_oracle.reportEndFrameRecord(); + const auto timestamp = m_oracle.getNextPresentationTimeStamp(); + m_oracle.reportBeginFrameRecord(); + + return timestamp; + }; + + const auto nextPresentationTimestamp = updatePresentationTimestamp(); + + struct + { + std::vector mouse{}; + std::vector keyboard{}; + } capturedEvents; + + m_camera.beginInputProcessing(nextPresentationTimestamp); + { + const auto& io = ImGui::GetIO(); + mouse.consumeEvents([&](const IMouseEventChannel::range_t& events) -> void + { + if (!io.WantCaptureMouse) + m_camera.mouseProcess(events); // don't capture the events, only let camera handle them with its impl + + for (const auto& e : events) // here capture + { + if (e.timeStamp < previousEventTimestamp) + continue; + + previousEventTimestamp = e.timeStamp; + capturedEvents.mouse.emplace_back(e); + + if (e.type == nbl::ui::SMouseEvent::EET_SCROLL) + gcIndex = std::clamp(int16_t(gcIndex) + int16_t(core::sign(e.scrollEvent.verticalScroll)), int64_t(0), int64_t(ELG_COUNT - (uint8_t)1u)); + } + }, m_logger.get()); + + keyboard.consumeEvents([&](const IKeyboardEventChannel::range_t& events) -> void + { + if (!io.WantCaptureKeyboard) + m_camera.keyboardProcess(events); // don't capture the events, only let camera handle them with its impl + + for (const auto& e : events) // here capture + { + if (e.timeStamp < previousEventTimestamp) + continue; + + previousEventTimestamp = e.timeStamp; + capturedEvents.keyboard.emplace_back(e); + } + }, m_logger.get()); + } + m_camera.endInputProcessing(nextPresentationTimestamp); + + const core::SRange mouseEvents(capturedEvents.mouse.data(), capturedEvents.mouse.data() + capturedEvents.mouse.size()); + const core::SRange keyboardEvents(capturedEvents.keyboard.data(), capturedEvents.keyboard.data() + capturedEvents.keyboard.size()); + const auto cursorPosition = m_window->getCursorControl()->getPosition(); + const auto mousePosition = float32_t2(cursorPosition.x, cursorPosition.y) - float32_t2(m_window->getX(), m_window->getY()); + + const ext::imgui::UI::SUpdateParameters params = + { + .mousePosition = mousePosition, + .displaySize = { m_window->getWidth(), m_window->getHeight() }, + .mouseEvents = mouseEvents, + .keyboardEvents = keyboardEvents + }; + + m_ui.manager->update(params); + } + + private: + void updatePathtracerPushConstants() + { + // disregard surface/swapchain transformation for now + const auto viewProjectionMatrix = m_camera.getConcatenatedMatrix(); + // TODO: rewrite the `Camera` class so it uses hlsl::float32_t4x4 instead of core::matrix4SIMD + core::matrix4SIMD invMVP; + viewProjectionMatrix.getInverseTransform(invMVP); + if (useRWMC) + { + memcpy(&rwmcPushConstants.renderPushConstants.invMVP, invMVP.pointer(), sizeof(rwmcPushConstants.renderPushConstants.invMVP)); + rwmcPushConstants.renderPushConstants.generalPurposeLightMatrix = hlsl::float32_t3x4(transpose(m_lightModelMatrix)); + rwmcPushConstants.renderPushConstants.depth = depth; + rwmcPushConstants.renderPushConstants.sampleCount = resolvePushConstants.sampleCount = spp; + rwmcPushConstants.renderPushConstants.pSampleSequence = m_sequenceBuffer->getDeviceAddress(); + float32_t2 packParams = float32_t2(rwmcBase, rwmcStart); + rwmcPushConstants.packedSplattingParams = hlsl::packHalf2x16(packParams); + } + else + { + memcpy(&pc.invMVP, invMVP.pointer(), sizeof(pc.invMVP)); + pc.generalPurposeLightMatrix = hlsl::float32_t3x4(transpose(m_lightModelMatrix)); + pc.sampleCount = spp; + pc.depth = depth; + pc.pSampleSequence = m_sequenceBuffer->getDeviceAddress(); + } + } + + IGPUComputePipeline* pickPTPipeline() + { + IGPUComputePipeline* pipeline; + if (useRWMC) + { + if (renderMode != E_RENDER_MODE::ERM_HLSL) + { + m_logger->log("RWMC is only supported with HLSL.", ILogger::ELL_ERROR); + std::exit(-1); + } + + pipeline = usePersistentWorkGroups ? m_PTHLSLPersistentWGPipelinesRWMC[PTPipeline].get() : m_PTHLSLPipelinesRWMC[PTPipeline].get(); + } + else + { + if (usePersistentWorkGroups) + pipeline = renderMode == E_RENDER_MODE::ERM_HLSL ? m_PTHLSLPersistentWGPipelines[PTPipeline].get() : m_PTGLSLPersistentWGPipelines[PTPipeline].get(); + else + pipeline = renderMode == E_RENDER_MODE::ERM_HLSL ? m_PTHLSLPipelines[PTPipeline].get() : m_PTGLSLPipelines[PTPipeline].get(); + } + + return pipeline; + } + + private: + smart_refctd_ptr m_window; + smart_refctd_ptr> m_surface; + + // gpu resources + smart_refctd_ptr m_cmdPool; + std::array, E_LIGHT_GEOMETRY::ELG_COUNT> m_PTGLSLPipelines; + std::array, E_LIGHT_GEOMETRY::ELG_COUNT> m_PTHLSLPipelines; + std::array, E_LIGHT_GEOMETRY::ELG_COUNT> m_PTGLSLPersistentWGPipelines; + std::array, E_LIGHT_GEOMETRY::ELG_COUNT> m_PTHLSLPersistentWGPipelines; + std::array, E_LIGHT_GEOMETRY::ELG_COUNT> m_PTHLSLPipelinesRWMC; + std::array, E_LIGHT_GEOMETRY::ELG_COUNT> m_PTHLSLPersistentWGPipelinesRWMC; + smart_refctd_ptr m_resolvePipeline; + smart_refctd_ptr m_presentPipeline; + uint64_t m_realFrameIx = 0; + std::array, MaxFramesInFlight> m_cmdBufs; + ISimpleManagedSurface::SAcquireResult m_currentImageAcquire = {}; + smart_refctd_ptr m_descriptorSet0, m_descriptorSet2, m_presentDescriptorSet; + + core::smart_refctd_ptr m_guiDescriptorSetPool; + + // system resources + core::smart_refctd_ptr m_inputSystem; + InputSystem::ChannelReader mouse; + InputSystem::ChannelReader keyboard; + + // pathtracer resources + smart_refctd_ptr m_envMapView, m_scrambleView; + smart_refctd_ptr m_sequenceBuffer; + smart_refctd_ptr m_outImgView; + smart_refctd_ptr m_cascadeView; + + // sync + smart_refctd_ptr m_semaphore; + + // image upload resources + smart_refctd_ptr m_scratchSemaphore; + SIntendedSubmitInfo m_intendedSubmit; + + struct C_UI + { + nbl::core::smart_refctd_ptr manager; + + struct + { + core::smart_refctd_ptr gui, scene; + } samplers; + + core::smart_refctd_ptr descriptorSet; + } m_ui; + + Camera m_camera; + + video::CDumbPresentationOracle m_oracle; + + uint16_t gcIndex = {}; // note: this is dirty however since I assume only single object in scene I can leave it now, when this example is upgraded to support multiple objects this needs to be changed + + float fov = 60.f, zNear = 0.1f, zFar = 10000.f, moveSpeed = 1.f, rotateSpeed = 1.f; + float viewWidth = 10.f; + float camYAngle = 165.f / 180.f * 3.14159f; + float camXAngle = 32.f / 180.f * 3.14159f; + int PTPipeline = E_LIGHT_GEOMETRY::ELG_SPHERE; + int renderMode = E_RENDER_MODE::ERM_HLSL; + int spp = 32; + int depth = 3; + float rwmcMinReliableLuma; + float rwmcKappa; + float rwmcStart; + float rwmcBase; + bool usePersistentWorkGroups = false; + bool useRWMC = false; + RenderRWMCPushConstants rwmcPushConstants; + RenderPushConstants pc; + ResolvePushConstants resolvePushConstants; + + hlsl::float32_t4x4 m_lightModelMatrix = { + 0.3f, 0.0f, 0.0f, 0.0f, + 0.0f, 0.3f, 0.0f, 0.0f, + 0.0f, 0.0f, 0.3f, 0.0f, + -1.0f, 1.5f, 0.0f, 1.0f, + }; + TransformRequestParams m_transformParams; + + bool m_firstFrame = true; + IGPUCommandBuffer::SClearColorValue clearColor = { .float32 = {0.f,0.f,0.f,1.f} }; +}; + +NBL_MAIN_FUNC(HLSLComputePathtracer) diff --git a/31_HLSLPathTracer/pipeline.groovy b/31_HLSLPathTracer/pipeline.groovy new file mode 100644 index 000000000..955e77cec --- /dev/null +++ b/31_HLSLPathTracer/pipeline.groovy @@ -0,0 +1,50 @@ +import org.DevshGraphicsProgramming.Agent +import org.DevshGraphicsProgramming.BuilderInfo +import org.DevshGraphicsProgramming.IBuilder + +class CHLSLPathTracerBuilder extends IBuilder +{ + public CHLSLPathTracerBuilder(Agent _agent, _info) + { + super(_agent, _info) + } + + @Override + public boolean prepare(Map axisMapping) + { + return true + } + + @Override + public boolean build(Map axisMapping) + { + IBuilder.CONFIGURATION config = axisMapping.get("CONFIGURATION") + IBuilder.BUILD_TYPE buildType = axisMapping.get("BUILD_TYPE") + + def nameOfBuildDirectory = getNameOfBuildDirectory(buildType) + def nameOfConfig = getNameOfConfig(config) + + agent.execute("cmake --build ${info.rootProjectPath}/${nameOfBuildDirectory}/${info.targetProjectPathRelativeToRoot} --target ${info.targetBaseName} --config ${nameOfConfig} -j12 -v") + + return true + } + + @Override + public boolean test(Map axisMapping) + { + return true + } + + @Override + public boolean install(Map axisMapping) + { + return true + } +} + +def create(Agent _agent, _info) +{ + return new CHLSLPathTracerBuilder(_agent, _info) +} + +return this diff --git a/73_SolidAngleVisualizer/CMakeLists.txt b/73_SolidAngleVisualizer/CMakeLists.txt new file mode 100644 index 000000000..6438c8e06 --- /dev/null +++ b/73_SolidAngleVisualizer/CMakeLists.txt @@ -0,0 +1,94 @@ +if(NBL_BUILD_IMGUI) + set(NBL_EXTRA_SOURCES + "${CMAKE_CURRENT_SOURCE_DIR}/src/transform.cpp" + ) + + set(NBL_INCLUDE_SERACH_DIRECTORIES + "${CMAKE_CURRENT_SOURCE_DIR}/include" + ) + + list(APPEND NBL_LIBRARIES + imtestengine + imguizmo + "${NBL_EXT_IMGUI_UI_LIB}" + ) + + if(NBL_EMBED_BUILTIN_RESOURCES) + set(_BR_TARGET_ ${EXECUTABLE_NAME}_builtinResourceData) + set(RESOURCE_DIR "app_resources") + + get_filename_component(_SEARCH_DIRECTORIES_ "${CMAKE_CURRENT_SOURCE_DIR}" ABSOLUTE) + get_filename_component(_OUTPUT_DIRECTORY_SOURCE_ "${CMAKE_CURRENT_BINARY_DIR}/src" ABSOLUTE) + get_filename_component(_OUTPUT_DIRECTORY_HEADER_ "${CMAKE_CURRENT_BINARY_DIR}/include" ABSOLUTE) + + file(GLOB_RECURSE BUILTIN_RESOURCE_FILES RELATIVE "${CMAKE_CURRENT_SOURCE_DIR}/${RESOURCE_DIR}" CONFIGURE_DEPENDS "${CMAKE_CURRENT_SOURCE_DIR}/${RESOURCE_DIR}/*") + + foreach(RES_FILE ${BUILTIN_RESOURCE_FILES}) + LIST_BUILTIN_RESOURCE(RESOURCES_TO_EMBED "${RES_FILE}") + endforeach() + + ADD_CUSTOM_BUILTIN_RESOURCES(${_BR_TARGET_} RESOURCES_TO_EMBED "${_SEARCH_DIRECTORIES_}" "${RESOURCE_DIR}" "nbl::this_example::builtin" "${_OUTPUT_DIRECTORY_HEADER_}" "${_OUTPUT_DIRECTORY_SOURCE_}") + + LINK_BUILTIN_RESOURCES_TO_TARGET(${EXECUTABLE_NAME} ${_BR_TARGET_}) + endif() + + # TODO; Arek I removed `NBL_EXECUTABLE_PROJECT_CREATION_PCH_TARGET` from the last parameter here, doesn't this macro have 4 arguments anyway !? + nbl_create_executable_project("${NBL_EXTRA_SOURCES}" "" "${NBL_INCLUDE_SERACH_DIRECTORIES}" "${NBL_LIBRARIES}") + + # TODO: Arek temporarily disabled cause I haven't figured out how to make this target yet + # LINK_BUILTIN_RESOURCES_TO_TARGET(${EXECUTABLE_NAME} nblExamplesGeometrySpirvBRD) + set(OUTPUT_DIRECTORY "${CMAKE_CURRENT_BINARY_DIR}/auto-gen") + set(DEPENDS + app_resources/hlsl/common.hlsl + app_resources/hlsl/gpu_common.hlsl + app_resources/hlsl/Drawing.hlsl + app_resources/hlsl/Sampling.hlsl + app_resources/hlsl/silhouette.hlsl + app_resources/hlsl/utils.hlsl + app_resources/hlsl/parallelogram_sampling.hlsl + + # app_resources/hlsl/test.comp.hlsl + app_resources/hlsl/benchmark/benchmark.comp.hlsl + app_resources/hlsl/benchmark/common.hlsl + ) + target_sources(${EXECUTABLE_NAME} PRIVATE ${DEPENDS}) + set_source_files_properties(${DEPENDS} PROPERTIES HEADER_FILE_ONLY ON) + + set(SM 6_8) + set(JSON [=[ + [ + + { + "INPUT": "app_resources/hlsl/benchmark/benchmark.comp.hlsl", + "KEY": "benchmark", + }, + ] + ]=]) + string(CONFIGURE "${JSON}" JSON) + + set(COMPILE_OPTIONS + -I "${CMAKE_CURRENT_SOURCE_DIR}" + -T lib_${SM} + ) + + NBL_CREATE_NSC_COMPILE_RULES( + TARGET ${EXECUTABLE_NAME}SPIRV + LINK_TO ${EXECUTABLE_NAME} + DEPENDS ${DEPENDS} + BINARY_DIR ${OUTPUT_DIRECTORY} + MOUNT_POINT_DEFINE NBL_THIS_EXAMPLE_BUILD_MOUNT_POINT + COMMON_OPTIONS ${COMPILE_OPTIONS} + OUTPUT_VAR KEYS + INCLUDE nbl/this_example/builtin/build/spirv/keys.hpp + NAMESPACE nbl::this_example::builtin::build + INPUTS ${JSON} + ) + + NBL_CREATE_RESOURCE_ARCHIVE( + NAMESPACE nbl::this_example::builtin::build + TARGET ${EXECUTABLE_NAME}_builtinsBuild + LINK_TO ${EXECUTABLE_NAME} + BIND ${OUTPUT_DIRECTORY} + BUILTINS ${KEYS} + ) +endif() \ No newline at end of file diff --git a/73_SolidAngleVisualizer/README.md b/73_SolidAngleVisualizer/README.md new file mode 100644 index 000000000..e69de29bb diff --git a/73_SolidAngleVisualizer/app_resources/hlsl/Drawing.hlsl b/73_SolidAngleVisualizer/app_resources/hlsl/Drawing.hlsl new file mode 100644 index 000000000..4338bd958 --- /dev/null +++ b/73_SolidAngleVisualizer/app_resources/hlsl/Drawing.hlsl @@ -0,0 +1,594 @@ +//// Copyright (C) 2026-2026 - DevSH Graphics Programming Sp. z O.O. +//// This file is part of the "Nabla Engine". +//// For conditions of distribution and use, see copyright notice in nabla.h +#ifndef _SOLID_ANGLE_VIS_EXAMPLE_DRAWING_HLSL_INCLUDED_ +#define _SOLID_ANGLE_VIS_EXAMPLE_DRAWING_HLSL_INCLUDED_ + +#include "common.hlsl" +#include "gpu_common.hlsl" + +// Check if a face on the hemisphere is visible from camera at origin +bool isFaceVisible(float32_t3 faceCenter, float32_t3 faceNormal) +{ + float32_t3 viewVec = normalize(-faceCenter); // Vector from camera to face + return dot(faceNormal, viewVec) > 0.0f; +} + +// doesn't change Z coordinate +float32_t3 sphereToCircle(float32_t3 spherePoint) +{ + if (spherePoint.z >= 0.0f) + { + return float32_t3(spherePoint.xy * CIRCLE_RADIUS, spherePoint.z); + } + else + { + float32_t r2 = (1.0f - spherePoint.z) / (1.0f + spherePoint.z); + float32_t uv2Plus1 = r2 + 1.0f; + return float32_t3((spherePoint.xy * uv2Plus1 / 2.0f) * CIRCLE_RADIUS, spherePoint.z); + } +} + +#if VISUALIZE_SAMPLES + +float32_t drawGreatCircleArc(float32_t3 fragPos, float32_t3 points[2], float32_t aaWidth, float32_t width = 0.01f) +{ + float32_t3 v0 = normalize(points[0]); + float32_t3 v1 = normalize(points[1]); + float32_t3 ndc = normalize(fragPos); + + float32_t3 arcNormal = normalize(cross(v0, v1)); + float32_t dist = abs(dot(ndc, arcNormal)); + + float32_t dotMid = dot(v0, v1); + bool onArc = (dot(ndc, v0) >= dotMid) && (dot(ndc, v1) >= dotMid); + + if (!onArc) + return 0.0f; + + float32_t avgDepth = (length(points[0]) + length(points[1])) * 0.5f; + float32_t depthScale = 3.0f / avgDepth; + + width = min(width * depthScale, 0.02f); + float32_t alpha = 1.0f - smoothstep(width - aaWidth, width + aaWidth, dist); + + return alpha; +} + +float32_t drawCross2D(float32_t2 fragPos, float32_t2 center, float32_t size, float32_t thickness) +{ + float32_t2 ndc = abs(fragPos - center); + + // Check if point is inside the cross (horizontal or vertical bar) + bool inHorizontal = (ndc.x <= size && ndc.y <= thickness); + bool inVertical = (ndc.y <= size && ndc.x <= thickness); + + return (inHorizontal || inVertical) ? 1.0f : 0.0f; +} + +float32_t4 drawHiddenEdges(float32_t3x4 modelMatrix, float32_t3 spherePos, uint32_t silEdgeMask, float32_t aaWidth) +{ + float32_t4 color = 0; + float32_t3 hiddenEdgeColor = float32_t3(0.1, 0.1, 0.1); + + NBL_UNROLL + for (uint32_t i = 0; i < 12; i++) + { + // skip silhouette edges + if (silEdgeMask & (1u << i)) + continue; + + uint32_t2 edge = allEdges[i]; + + float32_t3 v0 = normalize(getVertex(modelMatrix, edge.x)); + float32_t3 v1 = normalize(getVertex(modelMatrix, edge.y)); + + bool neg0 = v0.z < 0.0f; + bool neg1 = v1.z < 0.0f; + + // fully hidden + if (neg0 && neg1) + continue; + + float32_t3 p0 = v0; + float32_t3 p1 = v1; + + // clip if needed + if (neg0 ^ neg1) + { + float32_t t = v0.z / (v0.z - v1.z); + float32_t3 clip = normalize(lerp(v0, v1, t)); + + p0 = neg0 ? clip : v0; + p1 = neg1 ? clip : v1; + } + + float32_t3 pts[2] = {p0, p1}; + float32_t c = drawGreatCircleArc(spherePos, pts, aaWidth, 0.003f); + color += float32_t4(hiddenEdgeColor * c, c); + } + + return color; +} + +float32_t4 drawCorner(float32_t3 cornerNDCPos, float32_t2 ndc, float32_t aaWidth, float32_t dotSize, float32_t innerDotSize, float32_t3 dotColor) +{ + float32_t4 color = float32_t4(0, 0, 0, 0); + float32_t dist = length(ndc - cornerNDCPos.xy); + + // outer dot + float32_t outerAlpha = 1.0f - smoothstep(dotSize - aaWidth, + dotSize + aaWidth, + dist); + + if (outerAlpha <= 0.0f) + return color; + + color += float32_t4(dotColor * outerAlpha, outerAlpha); + + // ------------------------------------------------- + // inner black dot for hidden corners + // ------------------------------------------------- + if (cornerNDCPos.z < 0.0f && innerDotSize > 0.0) + { + float32_t innerAlpha = 1.0f - smoothstep(innerDotSize - aaWidth, + innerDotSize + aaWidth, + dist); + + // ensure it stays inside the outer dot + innerAlpha *= outerAlpha; + + color -= float32_t4(innerAlpha.xxx, 0.0f); + } + + return color; +} + +// Draw a line segment in NDC space +float32_t lineSegment(float32_t2 ndc, float32_t2 a, float32_t2 b, float32_t thickness) +{ + float32_t2 pa = ndc - a; + float32_t2 ba = b - a; + float32_t h = saturate(dot(pa, ba) / dot(ba, ba)); + float32_t dist = length(pa - ba * h); + return smoothstep(thickness, thickness * 0.5, dist); +} + +// Draw an arrow head (triangle) in NDC space +float32_t arrowHead(float32_t2 ndc, float32_t2 tip, float32_t2 direction, float32_t size) +{ + // Create perpendicular vector + float32_t2 perp = float32_t2(-direction.y, direction.x); + + // Three points of the arrow head triangle + float32_t2 p1 = tip; + float32_t2 p2 = tip - direction * size + perp * size * 0.5; + float32_t2 p3 = tip - direction * size - perp * size * 0.5; + + // Check if point is inside triangle using barycentric coordinates + float32_t2 v0 = p3 - p1; + float32_t2 v1 = p2 - p1; + float32_t2 v2 = ndc - p1; + + float32_t dot00 = dot(v0, v0); + float32_t dot01 = dot(v0, v1); + float32_t dot02 = dot(v0, v2); + float32_t dot11 = dot(v1, v1); + float32_t dot12 = dot(v1, v2); + + float32_t invDenom = 1.0 / (dot00 * dot11 - dot01 * dot01); + float32_t u = (dot11 * dot02 - dot01 * dot12) * invDenom; + float32_t v = (dot00 * dot12 - dot01 * dot02) * invDenom; + + bool inside = (u >= 0.0) && (v >= 0.0) && (u + v <= 1.0); + + // Add some antialiasing + float32_t minDist = min(min( + length(ndc - p1), + length(ndc - p2)), + length(ndc - p3)); + + return inside ? 1.0 : smoothstep(0.02, 0.0, minDist); +} + +// Helper to draw an edge with proper color mapping +float32_t4 drawEdge(uint32_t originalEdgeIdx, float32_t3 pts[2], float32_t3 spherePos, float32_t aaWidth, float32_t width = 0.003f) +{ + float32_t4 edgeContribution = drawGreatCircleArc(spherePos, pts, aaWidth, width); + return float32_t4(colorLUT[originalEdgeIdx] * edgeContribution.a, edgeContribution.a); +}; + +float32_t4 drawCorners(float32_t3x4 modelMatrix, float32_t2 ndc, float32_t aaWidth, float32_t dotSize) +{ + float32_t4 color = float32_t4(0, 0, 0, 0); + + float32_t innerDotSize = dotSize * 0.5f; + + for (uint32_t i = 0; i < 8; i++) + { + float32_t3 cornerCirclePos = sphereToCircle(normalize(getVertex(modelMatrix, i))); + color += drawCorner(cornerCirclePos, ndc, aaWidth, dotSize, 0.0, colorLUT[i]); + } + + return color; +} + +#ifdef _SOLID_ANGLE_VIS_EXAMPLE_SILHOUETTE_HLSL_INCLUDED_ +float32_t4 drawClippedSilhouetteVertices(float32_t2 ndc, ClippedSilhouette silhouette, float32_t aaWidth) +{ + float32_t4 color = 0; + float32_t dotSize = 0.03f; + + for (uint i = 0; i < silhouette.count; i++) + { + float32_t3 cornerCirclePos = sphereToCircle(normalize(silhouette.vertices[i])); + float32_t dist = length(ndc - cornerCirclePos.xy); + + // Smooth circle for the vertex + float32_t alpha = 1.0f - smoothstep(dotSize * 0.8f, dotSize, dist); + + if (alpha > 0.0f) + { + // Color gradient: Red (index 0) to Cyan (last index) + // This helps verify the CCW winding order visually + float32_t t = float32_t(i) / float32_t(max(1u, silhouette.count - 1)); + float32_t3 vertexColor = lerp(float32_t3(1, 0, 0), float32_t3(0, 1, 1), t); + + color += float32_t4(vertexColor * alpha, alpha); + } + } + return color; +} +#endif // _SOLID_ANGLE_VIS_EXAMPLE_SILHOUETTE_HLSL_INCLUDED_ + +float32_t4 drawRing(float32_t2 ndc, float32_t aaWidth) +{ + float32_t positionLength = length(ndc); + float32_t ringWidth = 0.003f; + float32_t ringDistance = abs(positionLength - CIRCLE_RADIUS); + float32_t ringAlpha = 1.0f - smoothstep(ringWidth - aaWidth, ringWidth + aaWidth, ringDistance); + return ringAlpha * float32_t4(1, 1, 1, 1); +} + +// Returns the number of visible faces and populates the faceIndices array +uint getVisibleFaces(int3 region, out uint faceIndices[3]) +{ + uint count = 0; + + // Check X axis + if (region.x == 0) + faceIndices[count++] = 3; // X+ + else if (region.x == 2) + faceIndices[count++] = 2; // X- + + // Check Y axis + if (region.y == 0) + faceIndices[count++] = 5; // Y+ + else if (region.y == 2) + faceIndices[count++] = 4; // Y- + + // Check Z axis + if (region.z == 0) + faceIndices[count++] = 1; // Z+ + else if (region.z == 2) + faceIndices[count++] = 0; // Z- + + return count; +} + +float32_t4 drawVisibleFaceOverlay(float32_t3x4 modelMatrix, float32_t3 spherePos, int3 region, float32_t aaWidth) +{ + uint faceIndices[3]; + uint count = getVisibleFaces(region, faceIndices); + + float32_t4 color = 0; + + for (uint i = 0; i < count; i++) + { + uint fIdx = faceIndices[i]; + float32_t3 n = localNormals[fIdx]; + + // Transform normal to world space (using the same logic as your corners) + float32_t3 worldNormal = -normalize(mul((float3x3)modelMatrix, n)); + worldNormal.z = -worldNormal.z; // Invert Z for correct orientation + + // Very basic visualization: highlight if the sphere position + // is generally pointing towards that face's normal + float32_t alignment = dot(spherePos, worldNormal); + if (alignment > 0.95f) + { + // Use different colors for different face indices + color += float32_t4(colorLUT[fIdx % 24], 0.5f); + } + } + return color; +} + +float32_t4 drawFaces(float32_t3x4 modelMatrix, float32_t3 spherePos, float32_t aaWidth) +{ + float32_t4 color = 0.0f; + float32_t3 ndc = normalize(spherePos); + + float3x3 rotMatrix = (float3x3)modelMatrix; + + // Check each of the 6 faces + for (uint32_t faceIdx = 0; faceIdx < 6; faceIdx++) + { + float32_t3 n_world = mul(rotMatrix, localNormals[faceIdx]); + + // Check if face is visible + if (!isFaceVisible(faceCenters[faceIdx], n_world)) + continue; + + // Get the 4 corners of this face + float32_t3 faceVerts[4]; + for (uint32_t i = 0; i < 4; i++) + { + uint32_t cornerIdx = faceToCorners[faceIdx][i]; + faceVerts[i] = normalize(getVertex(modelMatrix, cornerIdx)); + } + + // Compute face center for winding + float32_t3 faceCenter = float32_t3(0, 0, 0); + for (uint32_t i = 0; i < 4; i++) + faceCenter += faceVerts[i]; + faceCenter = normalize(faceCenter); + + // Check if point is inside this face + bool isInside = true; + float32_t minDist = 1e10; + + for (uint32_t i = 0; i < 4; i++) + { + float32_t3 v0 = faceVerts[i]; + float32_t3 v1 = faceVerts[(i + 1) % 4]; + + // Skip edges behind camera + if (v0.z < 0.0f && v1.z < 0.0f) + { + isInside = false; + break; + } + + // Great circle normal + float32_t3 edgeNormal = normalize(cross(v0, v1)); + + // Ensure normal points inward + if (dot(edgeNormal, faceCenter) < 0.0f) + edgeNormal = -edgeNormal; + + float32_t d = dot(ndc, edgeNormal); + + if (d < -1e-6f) + { + isInside = false; + break; + } + + minDist = min(minDist, abs(d)); + } + + if (isInside) + { + float32_t alpha = smoothstep(0.0f, aaWidth * 2.0f, minDist); + + // Use colorLUT based on face index (0-5) + float32_t3 faceColor = colorLUT[faceIdx]; + + float32_t shading = saturate(ndc.z * 0.8f + 0.2f); + color += float32_t4(faceColor * shading * alpha, alpha); + } + } + + return color; +} + +// ============================================================================ +// Spherical geometry drawing helpers (for pyramid visualization) +// ============================================================================ + +// Draw a great circle where dot(p, axis) = 0 +// Used to visualize caliper planes +float32_t4 drawGreatCirclePlane( + float32_t3 axis, + float32_t3 spherePos, + float32_t aaWidth, + float32_t3 color, + float32_t width = 0.005f) +{ + float32_t3 fragDir = normalize(spherePos); + + // Only draw on front hemisphere + if (fragDir.z < 0.0f) + return float32_t4(0, 0, 0, 0); + + // Distance from the great circle plane + float32_t distFromPlane = abs(dot(fragDir, axis)); + + float32_t alpha = 1.0f - smoothstep(width - aaWidth, width + aaWidth, distFromPlane); + + return float32_t4(color * alpha, alpha); +} + +// Draw lune boundaries - two small circles at dot(p, axis) = offset ± halfWidth +// halfWidth and offset are in sin-space (not radians) +float32_t4 drawLuneBoundary(float32_t3 axis, float32_t halfWidth, float32_t offset, float32_t3 spherePos, float32_t aaWidth, float32_t3 color, float32_t lineWidth = 0.004f) +{ + float32_t3 fragDir = normalize(spherePos); + + // Only draw on front hemisphere + if (fragDir.z < 0.0f) + return float32_t4(0, 0, 0, 0); + + // The lune boundaries are where dot(p, axis) = offset ± halfWidth + float32_t dotWithAxis = dot(fragDir, axis); + + // Draw both boundaries of the lune (accounting for offset) + float32_t upperBound = offset + halfWidth; + float32_t lowerBound = offset - halfWidth; + float32_t distFromUpperBoundary = abs(dotWithAxis - upperBound); + float32_t distFromLowerBoundary = abs(dotWithAxis - lowerBound); + + float32_t alphaUpper = 1.0f - smoothstep(lineWidth - aaWidth, lineWidth + aaWidth, distFromUpperBoundary); + float32_t alphaLower = 1.0f - smoothstep(lineWidth - aaWidth, lineWidth + aaWidth, distFromLowerBoundary); + + float32_t alpha = max(alphaUpper, alphaLower); + + return float32_t4(color * alpha, alpha); +} + +// Draw axis direction markers (dots at +/- axis from center) +float32_t4 drawAxisMarkers( + float32_t3 axis, + float32_t3 center, + float32_t2 ndc, + float32_t aaWidth, + float32_t3 color, + float32_t extent = 0.25f) +{ + float32_t4 result = float32_t4(0, 0, 0, 0); + + // Positive axis endpoint + float32_t3 axisEndPos = normalize(center + axis * extent); + float32_t3 axisEndPosCircle = sphereToCircle(axisEndPos); + result += drawCorner(axisEndPosCircle, ndc, aaWidth, 0.025f, 0.0f, color); + + // Negative axis endpoint (smaller, dimmer) + float32_t3 axisEndNeg = normalize(center - axis * extent); + float32_t3 axisEndNegCircle = sphereToCircle(axisEndNeg); + result += drawCorner(axisEndNegCircle, ndc, aaWidth, 0.015f, 0.0f, color * 0.5f); + + return result; +} + +// ============================================================================ +// Visualization +// ============================================================================ + +// Draw half of a great circle (the visible half of a lune boundary) +float32_t4 drawGreatCircleHalf(float32_t3 normal, float32_t3 spherePos, float32_t3 axis3, float32_t aaWidth, float32_t3 color, float32_t thickness) +{ + // Point is on great circle if dot(point, normal) ≈ 0 + // Only draw the half where dot(point, axis3) > 0 (toward silhouette) + float32_t dist = abs(dot(spherePos, normal)); + float32_t sideFade = smoothstep(-0.1f, 0.1f, dot(spherePos, axis3)); + float32_t alpha = (1.0f - smoothstep(thickness - aaWidth, thickness + aaWidth, dist)) * sideFade; + return float32_t4(color * alpha, alpha); +} + +// Visualize the best caliper edge (the edge that determined axis1) +float32_t4 visualizeBestCaliperEdge(const float32_t3 vertices[MAX_SILHOUETTE_VERTICES], uint32_t bestEdgeIdx, uint32_t count, float32_t3 spherePos, float32_t aaWidth) +{ + float32_t4 result = float32_t4(0, 0, 0, 0); + + if (bestEdgeIdx >= count) + return result; + + uint32_t nextIdx = (bestEdgeIdx + 1 < count) ? bestEdgeIdx + 1 : 0; + float32_t3 v0 = vertices[bestEdgeIdx]; + float32_t3 v1 = vertices[nextIdx]; + + // Draw the best caliper edge with a thicker, gold line + float32_t3 pts[2] = {v0, v1}; + float32_t3 highlightColor = float32_t3(1.0f, 0.8f, 0.0f); + float32_t alpha = drawGreatCircleArc(spherePos, pts, aaWidth, 0.008f); + result += float32_t4(highlightColor * alpha, alpha); + + return result; +} + +#endif // VISUALIZE_SAMPLES + +#if DEBUG_DATA + +uint32_t getEdgeVisibility(float32_t3x4 modelMatrix, uint32_t edgeIdx) +{ + + // Adjacency of edges to faces + // Corrected Adjacency of edges to faces + static const uint32_t2 edgeToFaces[12] = { + // Edge Index: | allEdges[i] | Shared Faces: + + /* 0 (0-1) */ {4, 0}, // Y- (4) and Z- (0) + /* 1 (2-3) */ {5, 0}, // Y+ (5) and Z- (0) + /* 2 (4-5) */ {4, 1}, // Y- (4) and Z+ (1) + /* 3 (6-7) */ {5, 1}, // Y+ (5) and Z+ (1) + + /* 4 (0-2) */ {2, 0}, // X- (2) and Z- (0) + /* 5 (1-3) */ {3, 0}, // X+ (3) and Z- (0) + /* 6 (4-6) */ {2, 1}, // X- (2) and Z+ (1) + /* 7 (5-7) */ {3, 1}, // X+ (3) and Z+ (1) + + /* 8 (0-4) */ {2, 4}, // X- (2) and Y- (4) + /* 9 (1-5) */ {3, 4}, // X+ (3) and Y- (4) + /* 10 (2-6) */ {2, 5}, // X- (2) and Y+ (5) + /* 11 (3-7) */ {3, 5} // X+ (3) and Y+ (5) + }; + + uint32_t2 faces = edgeToFaces[edgeIdx]; + + // Transform normals to world space + float3x3 rotMatrix = (float3x3)modelMatrix; + float32_t3 n_world_f1 = mul(rotMatrix, localNormals[faces.x]); + float32_t3 n_world_f2 = mul(rotMatrix, localNormals[faces.y]); + + bool visible1 = isFaceVisible(faceCenters[faces.x], n_world_f1); + bool visible2 = isFaceVisible(faceCenters[faces.y], n_world_f2); + + // Silhouette: exactly one face visible + if (visible1 != visible2) + return 1; + + // Inner edge: both faces visible + if (visible1 && visible2) + return 2; + + // Hidden edge: both faces hidden + return 0; +} + +uint32_t computeGroundTruthEdgeMask(float32_t3x4 modelMatrix) +{ + uint32_t mask = 0u; + NBL_UNROLL + for (uint32_t j = 0; j < 12; j++) + { + // getEdgeVisibility returns 1 for a silhouette edge based on 3D geometry + if (getEdgeVisibility(modelMatrix, j) == 1) + { + mask |= (1u << j); + } + } + return mask; +} + +void validateEdgeVisibility(float32_t3x4 modelMatrix, uint32_t sil, uint32_t vertexCount, uint32_t generatedSilMask) +{ + uint32_t mismatchAccumulator = 0; + + // The Ground Truth now represents the full 3D silhouette, clipped or not. + uint32_t groundTruthMask = computeGroundTruthEdgeMask(modelMatrix); + + // The comparison checks if the generated mask perfectly matches the full 3D ground truth. + uint32_t mismatchMask = groundTruthMask ^ generatedSilMask; + + if (mismatchMask != 0) + { + NBL_UNROLL + for (uint32_t j = 0; j < 12; j++) + { + if ((mismatchMask >> j) & 1u) + { + uint32_t2 edge = allEdges[j]; + // Accumulate vertex indices where error occurred + mismatchAccumulator |= (1u << edge.x) | (1u << edge.y); + } + } + } + + // Simple Write (assuming all fragments calculate the same result) + InterlockedOr(DebugDataBuffer[0].edgeVisibilityMismatch, mismatchAccumulator); +} +#endif // DEBUG_DATA + +#endif // _SOLID_ANGLE_VIS_EXAMPLE_DRAWING_HLSL_INCLUDED_ diff --git a/73_SolidAngleVisualizer/app_resources/hlsl/benchmark/benchmark.comp.hlsl b/73_SolidAngleVisualizer/app_resources/hlsl/benchmark/benchmark.comp.hlsl new file mode 100644 index 000000000..3b49d17ca --- /dev/null +++ b/73_SolidAngleVisualizer/app_resources/hlsl/benchmark/benchmark.comp.hlsl @@ -0,0 +1,128 @@ +//// Copyright (C) 2026-2026 - DevSH Graphics Programming Sp. z O.O. +//// This file is part of the "Nabla Engine". +//// For conditions of distribution and use, see copyright notice in nabla.h +#pragma shader_stage(compute) + +#include "app_resources/hlsl/common.hlsl" +#include "app_resources/hlsl/benchmark/common.hlsl" +#include "app_resources/hlsl/silhouette.hlsl" +#include "app_resources/hlsl/parallelogram_sampling.hlsl" +#include "app_resources/hlsl/pyramid_sampling.hlsl" +#include "app_resources/hlsl/triangle_sampling.hlsl" + +using namespace nbl::hlsl; + +[[vk::binding(0, 0)]] RWByteAddressBuffer outputBuffer; +[[vk::push_constant]] BenchmarkPushConstants pc; + +static const SAMPLING_MODE benchmarkMode = (SAMPLING_MODE)SAMPLING_MODE_CONST; + +[numthreads(BENCHMARK_WORKGROUP_DIMENSION_SIZE_X, 1, 1)] + [shader("compute")] void + main(uint32_t3 invocationID : SV_DispatchThreadID) +{ + // Perturb model matrix slightly per sample group + float32_t3x4 perturbedMatrix = pc.modelMatrix; + perturbedMatrix[0][3] += float32_t(invocationID.x) * 1e-6f; + + uint32_t3 region; + uint32_t configIndex; + uint32_t vertexCount; + uint32_t sil = ClippedSilhouette::computeRegionAndConfig(perturbedMatrix, region, configIndex, vertexCount); + + ClippedSilhouette silhouette = (ClippedSilhouette)0; + silhouette.compute(perturbedMatrix, vertexCount, sil); + + float32_t pdf; + uint32_t triIdx; + uint32_t validSampleCount = 0; + float32_t3 sampleDir = float32_t3(0.0, 0.0, 0.0); + + bool sampleValid; + if (benchmarkMode == SAMPLING_MODE::TRIANGLE_SOLID_ANGLE || + benchmarkMode == SAMPLING_MODE::TRIANGLE_PROJECTED_SOLID_ANGLE) + { + TriangleFanSampler samplingData; + samplingData = TriangleFanSampler::create(silhouette, benchmarkMode); + + for (uint32_t i = 0; i < pc.sampleCount; i++) + { + float32_t2 xi = float32_t2( + (float32_t(i & 7u) + 0.5f) / 8.0f, + (float32_t(i >> 3u) + 0.5f) / 8.0f); + + sampleDir += samplingData.sample(silhouette, xi, pdf, triIdx); + validSampleCount++; + } + } + else if (benchmarkMode == SAMPLING_MODE::PROJECTED_PARALLELOGRAM_SOLID_ANGLE) + { + // Precompute parallelogram for sampling + silhouette.normalize(); + SilEdgeNormals silEdgeNormals; + Parallelogram parallelogram = Parallelogram::create(silhouette, silEdgeNormals); + for (uint32_t i = 0; i < pc.sampleCount; i++) + { + float32_t2 xi = float32_t2( + (float32_t(i & 7u) + 0.5f) / 8.0f, + (float32_t(i >> 3u) + 0.5f) / 8.0f); + + sampleDir += parallelogram.sample(silEdgeNormals, xi, pdf, sampleValid); + validSampleCount += sampleValid ? 1u : 0u; + } + } + else if (benchmarkMode == SAMPLING_MODE::SYMMETRIC_PYRAMID_SOLID_ANGLE_RECTANGLE) + { + // Precompute spherical pyramid and Urena sampler once (edge normals fused) + SilEdgeNormals silEdgeNormals; + SphericalPyramid pyramid = SphericalPyramid::create(silhouette, silEdgeNormals); + UrenaSampler urena = UrenaSampler::create(pyramid); + + for (uint32_t i = 0; i < pc.sampleCount; i++) + { + float32_t2 xi = float32_t2( + (float32_t(i & 7u) + 0.5f) / 8.0f, + (float32_t(i >> 3u) + 0.5f) / 8.0f); + + sampleDir += urena.sample(pyramid, silEdgeNormals, xi, pdf, sampleValid); + validSampleCount += sampleValid ? 1u : 0u; + } + } + else if (benchmarkMode == SAMPLING_MODE::SYMMETRIC_PYRAMID_SOLID_ANGLE_BIQUADRATIC) + { + // Precompute spherical pyramid and biquadratic sampler once (edge normals fused) + SilEdgeNormals silEdgeNormals; + SphericalPyramid pyramid = SphericalPyramid::create(silhouette, silEdgeNormals); + BiquadraticSampler biquad = BiquadraticSampler::create(pyramid); + + for (uint32_t i = 0; i < pc.sampleCount; i++) + { + float32_t2 xi = float32_t2( + (float32_t(i & 7u) + 0.5f) / 8.0f, + (float32_t(i >> 3u) + 0.5f) / 8.0f); + + sampleDir += biquad.sample(pyramid, silEdgeNormals, xi, pdf, sampleValid); + validSampleCount += sampleValid ? 1u : 0u; + } + } + else if (benchmarkMode == SAMPLING_MODE::SYMMETRIC_PYRAMID_SOLID_ANGLE_BILINEAR) + { + // Precompute spherical pyramid and bilinear sampler once (edge normals fused) + SilEdgeNormals silEdgeNormals; + SphericalPyramid pyramid = SphericalPyramid::create(silhouette, silEdgeNormals); + BilinearSampler bilin = BilinearSampler::create(pyramid); + + for (uint32_t i = 0; i < pc.sampleCount; i++) + { + float32_t2 xi = float32_t2( + (float32_t(i & 7u) + 0.5f) / 8.0f, + (float32_t(i >> 3u) + 0.5f) / 8.0f); + + sampleDir += bilin.sample(pyramid, silEdgeNormals, xi, pdf, sampleValid); + validSampleCount += sampleValid ? 1u : 0u; + } + } + + const uint32_t offset = sizeof(uint32_t) * invocationID.x; + outputBuffer.Store(offset, pdf + validSampleCount + triIdx + asuint(sampleDir.x) + asuint(sampleDir.y) + asuint(sampleDir.z)); +} diff --git a/73_SolidAngleVisualizer/app_resources/hlsl/benchmark/common.hlsl b/73_SolidAngleVisualizer/app_resources/hlsl/benchmark/common.hlsl new file mode 100644 index 000000000..3091bc793 --- /dev/null +++ b/73_SolidAngleVisualizer/app_resources/hlsl/benchmark/common.hlsl @@ -0,0 +1,11 @@ +//// Copyright (C) 2023-2024 - DevSH Graphics Programming Sp. z O.O. +//// This file is part of the "Nabla Engine". +//// For conditions of distribution and use, see copyright notice in nabla.h + +#include + +NBL_CONSTEXPR_INLINE_NSPC_SCOPE_VAR uint32_t BENCHMARK_WORKGROUP_DIMENSION_SIZE_X = 64u; +NBL_CONSTEXPR_INLINE_NSPC_SCOPE_VAR uint32_t BENCHMARK_WORKGROUP_DIMENSION_SIZE_Y = 1u; +NBL_CONSTEXPR_INLINE_NSPC_SCOPE_VAR uint32_t BENCHMARK_WORKGROUP_DIMENSION_SIZE_Z = 1u; +NBL_CONSTEXPR_INLINE_NSPC_SCOPE_VAR uint32_t BENCHMARK_WORKGROUP_COUNT = 1000000u; + diff --git a/73_SolidAngleVisualizer/app_resources/hlsl/common.hlsl b/73_SolidAngleVisualizer/app_resources/hlsl/common.hlsl new file mode 100644 index 000000000..d63ec3c6a --- /dev/null +++ b/73_SolidAngleVisualizer/app_resources/hlsl/common.hlsl @@ -0,0 +1,136 @@ +//// Copyright (C) 2026-2026 - DevSH Graphics Programming Sp. z O.O. +//// This file is part of the "Nabla Engine". +//// For conditions of distribution and use, see copyright notice in nabla.h +#ifndef _SOLID_ANGLE_VIS_EXAMPLE_COMMON_HLSL_INCLUDED_ +#define _SOLID_ANGLE_VIS_EXAMPLE_COMMON_HLSL_INCLUDED_ + +#include "nbl/builtin/hlsl/cpp_compat.hlsl" + +#define FAST 1 + +namespace nbl +{ + namespace hlsl + { + // Sampling mode enum + enum SAMPLING_MODE : uint32_t + { + TRIANGLE_SOLID_ANGLE, + TRIANGLE_PROJECTED_SOLID_ANGLE, + PROJECTED_PARALLELOGRAM_SOLID_ANGLE, + SYMMETRIC_PYRAMID_SOLID_ANGLE_RECTANGLE, + SYMMETRIC_PYRAMID_SOLID_ANGLE_BIQUADRATIC, + SYMMETRIC_PYRAMID_SOLID_ANGLE_BILINEAR, + Count + }; + + struct ResultData + { + // Silhouette + uint32_t3 region; + uint32_t silhouetteIndex; + uint32_t silhouetteVertexCount; + uint32_t silhouette; + uint32_t positiveVertCount; + uint32_t edgeVisibilityMismatch; + uint32_t clipMask; + uint32_t clipCount; + uint32_t rotatedSil; + uint32_t wrapAround; + uint32_t rotatedClipMask; + uint32_t rotateAmount; + uint32_t vertices[6]; + uint32_t clippedSilhouetteVertexCount; + float32_t3 clippedSilhouetteVertices[7]; + uint32_t clippedSilhouetteVerticesIndices[7]; + + // Parallelogram + uint32_t parallelogramDoesNotBound; + float32_t parallelogramArea; + uint32_t failedVertexIndex; + uint32_t edgeIsConvex[4]; + uint32_t parallelogramVerticesInside; + uint32_t parallelogramEdgesInside; + float32_t2 parallelogramCorners[4]; + + // spherical triangle + uint32_t maxTrianglesExceeded; + uint32_t sphericalLuneDetected; + uint32_t triangleCount; + float32_t solidAngles[5]; + float32_t totalSolidAngles; + + // Sampling ray visualization data + uint32_t sampleCount; + float32_t4 rayData[512]; // xyz = direction, w = PDF + + // Pyramid sampling debug data + float32_t3 pyramidAxis1; // First caliper axis direction + float32_t3 pyramidAxis2; // Second caliper axis direction + float32_t3 pyramidCenter; // Silhouette center direction + float32_t pyramidHalfWidth1; // Half-width along axis1 (sin-space) + float32_t pyramidHalfWidth2; // Half-width along axis2 (sin-space) + float32_t pyramidOffset1; // Center offset along axis1 + float32_t pyramidOffset2; // Center offset along axis2 + float32_t pyramidSolidAngle; // Bounding region solid angle + uint32_t pyramidBestEdge; // Which edge produced best caliper + uint32_t pyramidSpansHemisphere; // Warning: silhouette >= hemisphere + float32_t pyramidMin1; // Min dot product along axis1 + float32_t pyramidMax1; // Max dot product along axis1 + float32_t pyramidMin2; // Min dot product along axis2 + float32_t pyramidMax2; // Max dot product along axis2 + uint32_t axis2BiggerThanAxis1; + + // Sampling stats + uint32_t validSampleCount; + uint32_t threadCount; // Used as a hack for fragment shader, as dividend for validSampleCount + }; + +#ifdef __HLSL_VERSION + [[vk::binding(0, 0)]] RWStructuredBuffer DebugDataBuffer; +#endif + + struct PushConstants + { + float32_t3x4 modelMatrix; + float32_t4 viewport; + uint32_t sampleCount; + uint32_t frameIndex; + }; + + struct PushConstantRayVis + { + float32_t4x4 viewProjMatrix; + float32_t3x4 viewMatrix; + float32_t3x4 modelMatrix; + float32_t3x4 invModelMatrix; + float32_t4 viewport; + uint32_t frameIndex; + }; + + struct BenchmarkPushConstants + { + float32_t3x4 modelMatrix; + uint32_t sampleCount; + }; + + static const float32_t3 colorLUT[27] = { + float32_t3(0, 0, 0), float32_t3(0.5, 0.5, 0.5), + float32_t3(1, 0, 0), float32_t3(0, 1, 0), float32_t3(0, 0, 1), + float32_t3(1, 1, 0), float32_t3(1, 0, 1), float32_t3(0, 1, 1), + float32_t3(1, 0.5, 0), float32_t3(1, 0.65, 0), float32_t3(0.8, 0.4, 0), + float32_t3(1, 0.4, 0.7), float32_t3(1, 0.75, 0.8), float32_t3(0.7, 0.1, 0.3), + float32_t3(0.5, 0, 0.5), float32_t3(0.6, 0.4, 0.8), float32_t3(0.3, 0, 0.5), + float32_t3(0, 0.5, 0), float32_t3(0.5, 1, 0), float32_t3(0, 0.5, 0.25), + float32_t3(0, 0, 0.5), float32_t3(0.3, 0.7, 1), float32_t3(0, 0.4, 0.6), + float32_t3(0.6, 0.4, 0.2), float32_t3(0.8, 0.7, 0.3), float32_t3(0.4, 0.3, 0.1), float32_t3(1, 1, 1)}; + +#ifndef __HLSL_VERSION + static const char *colorNames[27] = {"Black", "Gray", "Red", "Green", "Blue", "Yellow", "Magenta", "Cyan", + "Orange", "Light Orange", "Dark Orange", "Pink", "Light Pink", "Deep Rose", "Purple", "Light Purple", + "Indigo", "Dark Green", "Lime", "Forest Green", "Navy", "Sky Blue", "Teal", "Brown", + "Tan/Beige", "Dark Brown", "White"}; +#endif // __HLSL_VERSION + } +} +#endif // _SOLID_ANGLE_VIS_EXAMPLE_COMMON_HLSL_INCLUDED_ diff --git a/73_SolidAngleVisualizer/app_resources/hlsl/gpu_common.hlsl b/73_SolidAngleVisualizer/app_resources/hlsl/gpu_common.hlsl new file mode 100644 index 000000000..142471493 --- /dev/null +++ b/73_SolidAngleVisualizer/app_resources/hlsl/gpu_common.hlsl @@ -0,0 +1,175 @@ +//// Copyright (C) 2026-2026 - DevSH Graphics Programming Sp. z O.O. +//// This file is part of the "Nabla Engine". +//// For conditions of distribution and use, see copyright notice in nabla.h +#ifndef _SOLID_ANGLE_VIS_EXAMPLE_GPU_COMMON_HLSL_INCLUDED_ +#define _SOLID_ANGLE_VIS_EXAMPLE_GPU_COMMON_HLSL_INCLUDED_ + +#include "utils.hlsl" + +static const float32_t CIRCLE_RADIUS = 0.5f; +static const float32_t INV_CIRCLE_RADIUS = 1.0f / CIRCLE_RADIUS; + +// --- Geometry Utils --- +#define MAX_SILHOUETTE_VERTICES 7 + +// Special index values for clip points +static const uint32_t CLIP_POINT_A = 23; // Clip point between last positive and first negative +static const uint32_t CLIP_POINT_B = 24; // Clip point between last negative and first positive + +static const float32_t3 constCorners[8] = { + float32_t3(-0.5f, -0.5f, -0.5f), float32_t3(0.5f, -0.5f, -0.5f), float32_t3(-0.5f, 0.5f, -0.5f), float32_t3(0.5f, 0.5f, -0.5f), + float32_t3(-0.5f, -0.5f, 0.5f), float32_t3(0.5f, -0.5f, 0.5f), float32_t3(-0.5f, 0.5f, 0.5f), float32_t3(0.5f, 0.5f, 0.5f)}; + +static const uint32_t2 allEdges[12] = { + {0, 1}, + {2, 3}, + {4, 5}, + {6, 7}, // X axis + {0, 2}, + {1, 3}, + {4, 6}, + {5, 7}, // Y axis + {0, 4}, + {1, 5}, + {2, 6}, + {3, 7}, // Z axis +}; + +// Maps face index (0-5) to its 4 corner indices in CCW order +static const uint32_t faceToCorners[6][4] = { + {0, 2, 3, 1}, // Face 0: Z- + {4, 5, 7, 6}, // Face 1: Z+ + {0, 4, 6, 2}, // Face 2: X- + {1, 3, 7, 5}, // Face 3: X+ + {0, 1, 5, 4}, // Face 4: Y- + {2, 6, 7, 3} // Face 5: Y+ +}; + +static float32_t3 corners[8]; +static float32_t3 faceCenters[6] = { + float32_t3(0, 0, 0), float32_t3(0, 0, 0), float32_t3(0, 0, 0), + float32_t3(0, 0, 0), float32_t3(0, 0, 0), float32_t3(0, 0, 0)}; + +static const float32_t3 localNormals[6] = { + float32_t3(0, 0, -1), // Face 0 (Z-) + float32_t3(0, 0, 1), // Face 1 (Z+) + float32_t3(-1, 0, 0), // Face 2 (X-) + float32_t3(1, 0, 0), // Face 3 (X+) + float32_t3(0, -1, 0), // Face 4 (Y-) + float32_t3(0, 1, 0) // Face 5 (Y+) +}; + +// TODO: unused, remove later +// Vertices are ordered CCW relative to the camera view. +static const uint32_t silhouettes[27][7] = { + {6, 1, 3, 2, 6, 4, 5}, // 0: Black + {6, 2, 6, 4, 5, 7, 3}, // 1: White + {6, 0, 4, 5, 7, 3, 2}, // 2: Gray + {6, 1, 3, 7, 6, 4, 5}, // 3: Red + {4, 4, 5, 7, 6, 0, 0}, // 4: Green + {6, 0, 4, 5, 7, 6, 2}, // 5: Blue + {6, 0, 1, 3, 7, 6, 4}, // 6: Yellow + {6, 0, 1, 5, 7, 6, 4}, // 7: Magenta + {6, 0, 1, 5, 7, 6, 2}, // 8: Cyan + {6, 1, 3, 2, 6, 7, 5}, // 9: Orange + {4, 2, 6, 7, 3, 0, 0}, // 10: Light Orange + {6, 0, 4, 6, 7, 3, 2}, // 11: Dark Orange + {4, 1, 3, 7, 5, 0, 0}, // 12: Pink + {4, 0, 4, 6, 7, 3, 2}, // 13: Light Pink + {4, 0, 4, 6, 2, 0, 0}, // 14: Deep Rose + {6, 0, 1, 3, 7, 5, 4}, // 15: Purple + {4, 0, 1, 5, 4, 0, 0}, // 16: Light Purple + {6, 0, 1, 5, 4, 6, 2}, // 17: Indigo + {6, 0, 2, 6, 7, 5, 1}, // 18: Dark Green + {6, 0, 2, 6, 7, 3, 1}, // 19: Lime + {6, 0, 4, 6, 7, 3, 1}, // 20: Forest Green + {6, 0, 2, 3, 7, 5, 1}, // 21: Navy + {4, 0, 2, 3, 1, 0, 0}, // 22: Sky Blue + {6, 0, 4, 6, 2, 3, 1}, // 23: Teal + {6, 0, 2, 3, 7, 5, 4}, // 24: Brown + {6, 0, 2, 3, 1, 5, 4}, // 25: Tan/Beige + {6, 1, 5, 4, 6, 2, 3} // 26: Dark Brown +}; + +// Binary packed silhouettes +static const uint32_t binSilhouettes[27] = { + 0b11000000000000101100110010011001, + 0b11000000000000011111101100110010, + 0b11000000000000010011111101100000, + 0b11000000000000101100110111011001, + 0b10000000000000000000110111101100, + 0b11000000000000010110111101100000, + 0b11000000000000100110111011001000, + 0b11000000000000100110111101001000, + 0b11000000000000010110111101001000, + 0b11000000000000101111110010011001, + 0b10000000000000000000011111110010, + 0b11000000000000010011111110100000, + 0b10000000000000000000101111011001, + 0b11000000000000010011111110100000, + 0b10000000000000000000010110100000, + 0b11000000000000100101111011001000, + 0b10000000000000000000100101001000, + 0b11000000000000010110100101001000, + 0b11000000000000001101111110010000, + 0b11000000000000001011111110010000, + 0b11000000000000001011111110100000, + 0b11000000000000001101111011010000, + 0b10000000000000000000001011010000, + 0b11000000000000001011010110100000, + 0b11000000000000100101111011010000, + 0b11000000000000100101001011010000, + 0b11000000000000011010110100101001, +}; + +uint32_t getSilhouetteVertex(uint32_t packedSil, uint32_t index) +{ + return (packedSil >> (3u * index)) & 0x7u; +} + +// Get silhouette size +uint32_t getSilhouetteSize(uint32_t sil) +{ + return (sil >> 29u) & 0x7u; +} + +// Check if vertex has negative z +bool getVertexZNeg(float32_t3x4 modelMatrix, uint32_t vertexIdx) +{ +#if FAST + float32_t3 localPos = float32_t3( + (vertexIdx & 1) ? 0.5f : -0.5f, + (vertexIdx & 2) ? 0.5f : -0.5f, + (vertexIdx & 4) ? 0.5f : -0.5f); + + float32_t transformedZ = nbl::hlsl::dot(modelMatrix[2].xyz, localPos) + modelMatrix[2].w; + return transformedZ < 0.0f; +#else + return corners[vertexIdx].z < 0.0f; +#endif +} + +// Get world position of cube vertex +float32_t3 getVertex(float32_t3x4 modelMatrix, uint32_t vertexIdx) +{ +#if FAST + // Reconstruct local cube corner from index bits + float32_t sx = (vertexIdx & 1) ? 0.5f : -0.5f; + float32_t sy = (vertexIdx & 2) ? 0.5f : -0.5f; + float32_t sz = (vertexIdx & 4) ? 0.5f : -0.5f; + + float32_t4x3 model = transpose(modelMatrix); + + // Transform to world + // Full position, not just Z like getVertexZNeg + return model[0].xyz * sx + + model[1].xyz * sy + + model[2].xyz * sz + + model[3].xyz; + // return mul(modelMatrix, float32_t4(sx, sy, sz, 1.0f)); +#else + return corners[vertexIdx]; +#endif +} + +#endif // _SOLID_ANGLE_VIS_EXAMPLE_GPU_COMMON_HLSL_INCLUDED_ diff --git a/73_SolidAngleVisualizer/app_resources/hlsl/parallelogram_sampling.hlsl b/73_SolidAngleVisualizer/app_resources/hlsl/parallelogram_sampling.hlsl new file mode 100644 index 000000000..cd02171af --- /dev/null +++ b/73_SolidAngleVisualizer/app_resources/hlsl/parallelogram_sampling.hlsl @@ -0,0 +1,418 @@ +//// Copyright (C) 2026-2026 - DevSH Graphics Programming Sp. z O.O. +//// This file is part of the "Nabla Engine". +//// For conditions of distribution and use, see copyright notice in nabla.h +#ifndef _SOLID_ANGLE_VIS_EXAMPLE_PARALLELOGRAM_SAMPLING_HLSL_INCLUDED_ +#define _SOLID_ANGLE_VIS_EXAMPLE_PARALLELOGRAM_SAMPLING_HLSL_INCLUDED_ + +#include +#include +#include "silhouette.hlsl" +#include "drawing.hlsl" + +#define MAX_CURVE_APEXES 2 +#define GET_PROJ_VERT(i) silhouette.vertices[i].xy *CIRCLE_RADIUS + +// ============================================================================ +// Minimum bounding rectangle on projected sphere +// ============================================================================ +struct Parallelogram +{ + float16_t2 corner; + float16_t2 axisDir; + float16_t width; + float16_t height; + + // ======================================================================== + // Projection helpers + // ======================================================================== + + static float32_t3 circleToSphere(float32_t2 circlePoint) + { + float32_t2 xy = circlePoint / CIRCLE_RADIUS; + float32_t xy_len_sq = dot(xy, xy); + return float32_t3(xy, sqrt(1.0f - xy_len_sq)); + } + + // ======================================================================== + // Curve evaluation helpers + // ======================================================================== + + static float32_t2 evalCurvePoint(float32_t3 S, float32_t3 E, float32_t t) + { + float32_t3 v = S + t * (E - S); + float32_t invLen = rsqrt(dot(v, v)); + return v.xy * (invLen * CIRCLE_RADIUS); + } + + static float32_t2 evalCurveTangent(float32_t3 S, float32_t3 E, float32_t t) + { + float32_t3 v = S + t * (E - S); + float32_t vLenSq = dot(v, v); + + if (vLenSq < 1e-12f) + return normalize(E.xy - S.xy); + + float32_t3 p = v * rsqrt(vLenSq); + float32_t3 vPrime = E - S; + float32_t2 tangent2D = (vPrime - p * dot(p, vPrime)).xy; + + float32_t len = length(tangent2D); + return (len > 1e-7f) ? tangent2D / len : normalize(E.xy - S.xy); + } + + // Get both endpoint tangents (shares SdotE computation) + static void getProjectedTangents(float32_t3 S, float32_t3 E, out float32_t2 t0, out float32_t2 t1) + { + float32_t SdotE = dot(S, E); + + float32_t2 tangent0_2D = (E - S * SdotE).xy; + float32_t2 tangent1_2D = (E * SdotE - S).xy; + + float32_t len0Sq = dot(tangent0_2D, tangent0_2D); + float32_t len1Sq = dot(tangent1_2D, tangent1_2D); + + const float32_t eps = 1e-14f; + + if (len0Sq > eps && len1Sq > eps) + { + t0 = tangent0_2D * rsqrt(len0Sq); + t1 = tangent1_2D * rsqrt(len1Sq); + return; + } + + // Rare fallback path + float32_t2 diff = E.xy - S.xy; + float32_t diffLenSq = dot(diff, diff); + float32_t2 fallback = diffLenSq > eps ? diff * rsqrt(diffLenSq) : float32_t2(1.0f, 0.0f); + + t0 = len0Sq > eps ? tangent0_2D * rsqrt(len0Sq) : fallback; + t1 = len1Sq > eps ? tangent1_2D * rsqrt(len1Sq) : fallback; + } + + // Compute apex with clamping to prevent apex explosion + static void computeApexClamped(float32_t2 p0, float32_t2 p1, float32_t2 t0, float32_t2 t1, out float32_t2 apex) + { + float32_t denom = t0.x * t1.y - t0.y * t1.x; + float32_t2 center = (p0 + p1) * 0.5f; + + if (abs(denom) < 1e-6f) + { + apex = center; + return; + } + + float32_t2 dp = p1 - p0; + float32_t s = (dp.x * t1.y - dp.y * t1.x) / denom; + apex = p0 + s * t0; + + float32_t2 toApex = apex - center; + float32_t distSq = dot(toApex, toApex); + float32_t maxDistSq = CIRCLE_RADIUS * CIRCLE_RADIUS * 4.0f; + + if (distSq > maxDistSq) + { + apex = center + toApex * (CIRCLE_RADIUS * 2.0f * rsqrt(distSq)); + } + } + + // ======================================================================== + // Bounding box computation (rotating calipers) + // + // testEdgeForAxis and computeBoundsForAxis are + // templated on a bool to select between two precision levels: + // + // Accurate=false (used by tryCaliperDir, O(N^2) total calls): + // Tests vertices + edge midpoints only. Cheap (just dot products) and + // sufficient for *ranking* candidate axes, even though it may + // underestimate the true extent of convex edges. + // + // Accurate=true (used by buildForAxis, called once): + // Also computes tangent-line apex intersections for convex edges to + // find the true extremum. Great circle arcs that project as convex + // curves can bulge beyond their endpoints; the apex (tangent + // evaluation + line intersection + clamping) captures this but is + // ~4x more expensive per edge. + // + // The fast path gives the same relative ranking of axes (the + // approximation error is consistent across candidates), so the + // cheapest axis found by Fast is also the cheapest under Accurate. + // ======================================================================== + + static void testPoint(inout float32_t minAlong, inout float32_t maxAlong, inout float32_t minPerp, inout float32_t maxPerp, float32_t2 pt, float32_t2 dir, float32_t2 perpDir) + { + float32_t projAlong = dot(pt, dir); + float32_t projPerp = dot(pt, perpDir); + + minAlong = min(minAlong, projAlong); + maxAlong = max(maxAlong, projAlong); + minPerp = min(minPerp, projPerp); + maxPerp = max(maxPerp, projPerp); + } + + // Accurate=false (Fast): tests vertex + midpoint only. Used O(N^2) times for axis ranking. + // Accurate=true: also computes tangent-line apex for convex edges. Used once for final rect. + template + static void testEdgeForAxis(inout float32_t minAlong, inout float32_t maxAlong, inout float32_t minPerp, inout float32_t maxPerp, const ClippedSilhouette silhouette, uint32_t convexMask, uint32_t n3Mask, float32_t2 dir, float32_t2 perpDir) + { + const uint32_t nextIdx = (I + 1 < silhouette.count) ? I + 1 : 0; + const float32_t2 projectedVertex = GET_PROJ_VERT(I); + + testPoint(minAlong, maxAlong, minPerp, maxPerp, projectedVertex, dir, perpDir); + + bool isN3 = (n3Mask & (1u << I)) != 0; + + if (Accurate) + { + bool isConvex = (convexMask & (1u << I)) != 0; + + if (!isN3 && !isConvex) + return; + + float32_t3 S = silhouette.vertices[I]; + float32_t3 E = silhouette.vertices[nextIdx]; + float32_t2 midPoint = evalCurvePoint(S, E, 0.5f); + + if (isN3) + { + testPoint(minAlong, maxAlong, minPerp, maxPerp, midPoint, dir, perpDir); + } + + if (isConvex) + { + float32_t2 t0, endTangent; + getProjectedTangents(S, E, t0, endTangent); + + if (dot(t0, perpDir) > 0.0f) + { + float32_t2 apex0; + if (isN3) + { + float32_t2 tangentAtMid = evalCurveTangent(S, E, 0.5f); + computeApexClamped(projectedVertex, midPoint, t0, tangentAtMid, apex0); + testPoint(minAlong, maxAlong, minPerp, maxPerp, apex0, dir, perpDir); + + if (dot(tangentAtMid, perpDir) > 0.0f) + { + float32_t2 apex1; + computeApexClamped(midPoint, E.xy * CIRCLE_RADIUS, tangentAtMid, endTangent, apex1); + testPoint(minAlong, maxAlong, minPerp, maxPerp, apex1, dir, perpDir); + } + } + else + { + computeApexClamped(projectedVertex, E.xy * CIRCLE_RADIUS, t0, endTangent, apex0); + testPoint(minAlong, maxAlong, minPerp, maxPerp, apex0, dir, perpDir); + } + } + } + } + else + { + if (isN3) + { + float32_t2 midPoint = evalCurvePoint(silhouette.vertices[I], silhouette.vertices[nextIdx], 0.5f); + testPoint(minAlong, maxAlong, minPerp, maxPerp, midPoint, dir, perpDir); + } + } + } + + // Unrolled bounding box computation for a given axis direction. + // Accurate=false: fast path for axis ranking during candidate selection. + // Accurate=true: tight bounds with apex computation for the final rectangle. + template + static void computeBoundsForAxis(inout float32_t minAlong, inout float32_t maxAlong, inout float32_t minPerp, inout float32_t maxPerp, const ClippedSilhouette silhouette, uint32_t convexMask, uint32_t n3Mask, float32_t2 dir, float32_t2 perpDir) + { + testEdgeForAxis<0, Accurate>(minAlong, maxAlong, minPerp, maxPerp, silhouette, convexMask, n3Mask, dir, perpDir); + testEdgeForAxis<1, Accurate>(minAlong, maxAlong, minPerp, maxPerp, silhouette, convexMask, n3Mask, dir, perpDir); + testEdgeForAxis<2, Accurate>(minAlong, maxAlong, minPerp, maxPerp, silhouette, convexMask, n3Mask, dir, perpDir); + if (silhouette.count > 3) + { + testEdgeForAxis<3, Accurate>(minAlong, maxAlong, minPerp, maxPerp, silhouette, convexMask, n3Mask, dir, perpDir); + if (silhouette.count > 4) + { + testEdgeForAxis<4, Accurate>(minAlong, maxAlong, minPerp, maxPerp, silhouette, convexMask, n3Mask, dir, perpDir); + if (silhouette.count > 5) + { + testEdgeForAxis<5, Accurate>(minAlong, maxAlong, minPerp, maxPerp, silhouette, convexMask, n3Mask, dir, perpDir); + if (silhouette.count > 6) + { + testEdgeForAxis<6, Accurate>(minAlong, maxAlong, minPerp, maxPerp, silhouette, convexMask, n3Mask, dir, perpDir); + } + } + } + } + } + + static void tryCaliperDir(inout float32_t bestArea, inout float32_t2 bestDir, const float32_t2 dir, const ClippedSilhouette silhouette, uint32_t n3Mask) + { + float32_t2 perpDir = float32_t2(-dir.y, dir.x); + + float32_t minAlong = 1e10f; + float32_t maxAlong = -1e10f; + float32_t minPerp = 1e10f; + float32_t maxPerp = -1e10f; + + computeBoundsForAxis(minAlong, maxAlong, minPerp, maxPerp, silhouette, 0, n3Mask, dir, perpDir); + + float32_t area = (maxAlong - minAlong) * (maxPerp - minPerp); + if (area < bestArea) + { + bestArea = area; + bestDir = dir; + } + } + + template + static void processEdge(inout float32_t bestArea, inout float32_t2 bestDir, inout uint32_t convexMask, inout uint32_t n3Mask, const ClippedSilhouette silhouette, inout SilEdgeNormals precompSil) + { + const uint32_t nextIdx = (I + 1 < silhouette.count) ? I + 1 : 0; + float32_t3 S = silhouette.vertices[I]; + float32_t3 E = silhouette.vertices[nextIdx]; + precompSil.edgeNormals[I] = float16_t3(cross(S, E)); + + float32_t2 t0, t1; + getProjectedTangents(S, E, t0, t1); + + tryCaliperDir(bestArea, bestDir, t0, silhouette, n3Mask); + + if (nbl::hlsl::cross2D(S.xy, E.xy) < -1e-6f) + { + convexMask |= (1u << I); + tryCaliperDir(bestArea, bestDir, t1, silhouette, n3Mask); + + if (dot(t0, t1) < 0.5f) + { + n3Mask |= (1u << I); + float32_t2 tangentAtMid = evalCurveTangent(S, E, 0.5f); + tryCaliperDir(bestArea, bestDir, tangentAtMid, silhouette, n3Mask); + } + } + } + + // ======================================================================== + // Factory methods + // ======================================================================== + + static Parallelogram buildForAxis(const ClippedSilhouette silhouette, uint32_t convexMask, uint32_t n3Mask, float32_t2 dir) + { + float32_t2 perpDir = float32_t2(-dir.y, dir.x); + + float32_t minAlong = 1e10f; + float32_t maxAlong = -1e10f; + float32_t minPerp = 1e10f; + float32_t maxPerp = -1e10f; + + computeBoundsForAxis(minAlong, maxAlong, minPerp, maxPerp, silhouette, convexMask, n3Mask, dir, perpDir); + + Parallelogram result; + result.width = float16_t(maxAlong - minAlong); + result.height = float16_t(maxPerp - minPerp); + result.axisDir = float16_t2(dir); + result.corner = float16_t2(minAlong * dir + minPerp * float16_t2(-dir.y, dir.x)); + + return result; + } + + // Silhouette vertices must be normalized before calling create() + static Parallelogram create(const ClippedSilhouette silhouette, out SilEdgeNormals precompSil +#if VISUALIZE_SAMPLES + , + float32_t2 ndc, float32_t3 spherePos, float32_t aaWidth, + inout float32_t4 color +#endif + ) + { + precompSil = (SilEdgeNormals)0; + precompSil.count = silhouette.count; + + uint32_t convexMask = 0; + uint32_t n3Mask = 0; + float32_t bestArea = 1e10f; + float32_t2 bestDir = float32_t2(1.0f, 0.0f); + + processEdge<0>(bestArea, bestDir, convexMask, n3Mask, silhouette, precompSil); + processEdge<1>(bestArea, bestDir, convexMask, n3Mask, silhouette, precompSil); + processEdge<2>(bestArea, bestDir, convexMask, n3Mask, silhouette, precompSil); + if (silhouette.count > 3) + { + processEdge<3>(bestArea, bestDir, convexMask, n3Mask, silhouette, precompSil); + if (silhouette.count > 4) + { + processEdge<4>(bestArea, bestDir, convexMask, n3Mask, silhouette, precompSil); + if (silhouette.count > 5) + { + processEdge<5>(bestArea, bestDir, convexMask, n3Mask, silhouette, precompSil); + if (silhouette.count > 6) + { + processEdge<6>(bestArea, bestDir, convexMask, n3Mask, silhouette, precompSil); + } + } + } + } + + tryCaliperDir(bestArea, bestDir, float32_t2(1.0f, 0.0f), silhouette, n3Mask); + tryCaliperDir(bestArea, bestDir, float32_t2(0.0f, 1.0f), silhouette, n3Mask); + + Parallelogram best = buildForAxis(silhouette, convexMask, n3Mask, bestDir); + +#if VISUALIZE_SAMPLES + for (uint32_t i = 0; i < silhouette.count; i++) + { + if (convexMask & (1u << i)) + { + uint32_t nextIdx = (i + 1) % silhouette.count; + float32_t2 p0 = GET_PROJ_VERT(i); + float32_t2 p1 = GET_PROJ_VERT(nextIdx); + + float32_t2 t0, endTangent; + getProjectedTangents(silhouette.vertices[i], silhouette.vertices[nextIdx], t0, endTangent); + + if (n3Mask & (1u << i)) + { + float32_t2 tangentAtMid = evalCurveTangent(silhouette.vertices[i], silhouette.vertices[nextIdx], 0.5f); + float32_t2 midPoint = evalCurvePoint(silhouette.vertices[i], silhouette.vertices[nextIdx], 0.5f); + + float32_t2 apex0, apex1; + computeApexClamped(p0, midPoint, t0, tangentAtMid, apex0); + computeApexClamped(midPoint, p1, tangentAtMid, endTangent, apex1); + + color += drawCorner(float32_t3(apex0, 0.0f), ndc, aaWidth, 0.03, 0.0f, float32_t3(1, 0, 1)); + color += drawCorner(float32_t3(midPoint, 0.0f), ndc, aaWidth, 0.02, 0.0f, float32_t3(0, 1, 0)); + color += drawCorner(float32_t3(apex1, 0.0f), ndc, aaWidth, 0.03, 0.0f, float32_t3(1, 0.5, 0)); + } + else + { + float32_t2 apex; + computeApexClamped(p0, p1, t0, endTangent, apex); + color += drawCorner(float32_t3(apex, 0.0f), ndc, aaWidth, 0.03, 0.0f, float32_t3(1, 0, 1)); + } + } + } +#endif +#if DEBUG_DATA + DebugDataBuffer[0].parallelogramArea = best.width * best.height; +#endif + + return best; + } + + float32_t3 sample(NBL_CONST_REF_ARG(SilEdgeNormals) silhouette, float32_t2 xi, out float32_t pdf, out bool valid) + { + float16_t2 perpDir = float16_t2(-axisDir.y, axisDir.x); + + float16_t2 circleXY = corner + + float16_t(xi.x) * width * axisDir + + float16_t(xi.y) * height * perpDir; + + float32_t3 direction = circleToSphere(circleXY); + + valid = direction.z > 0.0f && silhouette.isInside(direction); + // PDF in solid angle measure: the rectangle is in circle-space (scaled by CIRCLE_RADIUS), + // and the orthographic projection Jacobian is dA_circle/dω = CIRCLE_RADIUS^2 * z + pdf = valid ? (CIRCLE_RADIUS * CIRCLE_RADIUS * direction.z / (float32_t(width) * float32_t(height))) : 0.0f; + + return direction; + } +}; + +#endif // _SOLID_ANGLE_VIS_EXAMPLE_PARALLELOGRAM_SAMPLING_HLSL_INCLUDED_ diff --git a/73_SolidAngleVisualizer/app_resources/hlsl/pyramid_sampling.hlsl b/73_SolidAngleVisualizer/app_resources/hlsl/pyramid_sampling.hlsl new file mode 100644 index 000000000..fab111b3e --- /dev/null +++ b/73_SolidAngleVisualizer/app_resources/hlsl/pyramid_sampling.hlsl @@ -0,0 +1,568 @@ +//// Copyright (C) 2026-2026 - DevSH Graphics Programming Sp. z O.O. +//// This file is part of the "Nabla Engine". +//// For conditions of distribution and use, see copyright notice in nabla.h +#ifndef _SOLID_ANGLE_VIS_EXAMPLE_PYRAMID_SAMPLING_HLSL_INCLUDED_ +#define _SOLID_ANGLE_VIS_EXAMPLE_PYRAMID_SAMPLING_HLSL_INCLUDED_ + +#include "gpu_common.hlsl" + +#include +#include +#include +#include + +#include "silhouette.hlsl" +#include "drawing.hlsl" + +// ============================================================================ +// Spherical Rectangle Bound via Rotating Calipers +// +// Bounds the silhouette with a spherical rectangle (intersection of two +// orthogonal lunes). Each lune is defined by two great circles (planes +// through the origin). The rectangle is parameterized for downstream +// samplers (Urena, bilinear, biquadratic) in pyramid_sampling/*.hlsl. +// +// Algorithm: +// 1. Rotating Calipers: Find the edge that minimizes the lune-width proxy +// dot(cross(A, B), C) = sin(edge_len) * sin(angular_dist) +// No per-edge normalization needed, scalar triple product suffices. +// +// 2. Build orthonormal frame from the minimum-width edge: +// - axis1 = normalize(cross(A, B)), pole of the primary lune +// - axis2, axis3 complete the frame via edge-based candidate search +// (tryPrimaryFrameCandidate), oriented toward silhouette center +// +// 3. Project vertices onto the frame as (x/z, y/z) +// to find the bounding rectangle extents (rectR0, rectExtents) +// +// 4. Fallback: if the primary frame leaves vertices near the z=0 plane, +// fix axis3 = camera forward (0,0,1) and search axis1/axis2 via +// tryFallbackFrameCandidate +// +// Key property: If all vertices are inside a great circle half-space, +// then all edges (geodesic arcs) are also inside. No edge extremum +// checking needed (unlike parallelogram_sampling which works in +// projected 2D space where arcs can bulge beyond vertices). +// ============================================================================ +// Spherical rectangle bound: stores the orthonormal frame and gnomonic +// projection extents. Consumed by UrenaSampler, BilinearSampler, BiquadraticSampler. +struct SphericalPyramid +{ + // Orthonormal frame for the bounding region + float32_t3 axis1; // Primary axis (from minimum-width edge's great circle normal) + float32_t3 axis2; // Secondary axis (perpendicular to axis1) + float32_t3 axis3; // Forward axis, toward silhouette (primary) or camera forward (fallback) + + // SphericalRectangle parameters (in the local frame where axis3 is Z) + float32_t3 rectR0; // Corner position in local frame + float32_t2 rectExtents; // Width (along axis1) and height (along axis2) + float32_t solidAngle; // Solid angle of the bounding region (steradians) + + // ======================================================================== + // Rotating Calipers - Minimum Width Edge Finding (Scalar Triple Product) + // ======================================================================== + + // Simplified metric: dot(cross(A, B), C) = sin(edge_len) * sin(angular_dist) + // This is a lune-area proxy, no per-edge normalization needed for comparison. + // Per-vertex cost: one dot product with precomputed edge normal. + // Per-edge cost: one cross product (replaces addition + rsqrt). + // + // Triangular column-major traversal (rotating calipers pattern): + // Vertex V_j checks against edges 0..j-2. + // V2 -> edge 0; V3 -> edges 0,1; V4 -> edges 0,1,2; etc. + // Total checks: (N-2)(N-1)/2 instead of N(N-2). + // + // Endpoints: dot(cross(A,B), A) = dot(cross(A,B), B) = 0, never affect max. + static void findMinimumWidthEdge(const ClippedSilhouette silhouette, out uint32_t bestEdge, out float32_t3 bestV0, out float32_t3 bestV1, out float32_t bestWidth, out SilEdgeNormals precompSil) + { + precompSil = (SilEdgeNormals)0; + precompSil.count = silhouette.count; + + // Edge normals: cross(v[i], v[i+1]), inward-facing for CCW-from-origin winding + float32_t3 en0 = cross(silhouette.vertices[0], silhouette.vertices[1]); + precompSil.edgeNormals[0] = float16_t3(en0); + float32_t3 en1 = cross(silhouette.vertices[1], silhouette.vertices[2]); + precompSil.edgeNormals[1] = float16_t3(en1); + + // Per-edge max(dot(en_i, v_j)), positive = inside, maximum = widest vertex + float32_t maxDot0 = dot(silhouette.vertices[2], en0); // V2 vs edge 0 + + float32_t maxDot1 = 1e10f; + float32_t maxDot2 = 1e10f; + float32_t maxDot3 = 1e10f; + float32_t maxDot4 = 1e10f; + + if (silhouette.count > 3) + { + float32_t3 en2 = cross(silhouette.vertices[2], silhouette.vertices[3]); + precompSil.edgeNormals[2] = float16_t3(en2); + + // V3 vs edges 0, 1 + float32_t3 v3 = silhouette.vertices[3]; + maxDot0 = max(maxDot0, dot(v3, en0)); + maxDot1 = dot(v3, en1); + + if (silhouette.count > 4) + { + float32_t3 en3 = cross(silhouette.vertices[3], silhouette.vertices[4]); + precompSil.edgeNormals[3] = float16_t3(en3); + + // V4 vs edges 0, 1, 2 + float32_t3 v4 = silhouette.vertices[4]; + maxDot0 = max(maxDot0, dot(v4, en0)); + maxDot1 = max(maxDot1, dot(v4, en1)); + maxDot2 = dot(v4, en2); + + if (silhouette.count > 5) + { + float32_t3 en4 = cross(silhouette.vertices[4], silhouette.vertices[5]); + precompSil.edgeNormals[4] = float16_t3(en4); + + // V5 vs edges 0, 1, 2, 3 + float32_t3 v5 = silhouette.vertices[5]; + maxDot0 = max(maxDot0, dot(v5, en0)); + maxDot1 = max(maxDot1, dot(v5, en1)); + maxDot2 = max(maxDot2, dot(v5, en2)); + maxDot3 = dot(v5, en3); + + if (silhouette.count > 6) + { + // V6 vs edges 0, 1, 2, 3, 4 + float32_t3 v6 = silhouette.vertices[6]; + maxDot0 = max(maxDot0, dot(v6, en0)); + maxDot1 = max(maxDot1, dot(v6, en1)); + maxDot2 = max(maxDot2, dot(v6, en2)); + maxDot3 = max(maxDot3, dot(v6, en3)); + maxDot4 = dot(v6, en4); + } + } + } + } + + // Best edge: minimum maxDot, no per-edge normalization needed. + // Relative epsilon prevents tie-breaking flicker when two edges have + // nearly identical widths — the current winner is "sticky" unless a + // new edge is meaningfully better (0.1% narrower). + const float32_t EDGE_SELECT_EPS = 1e-3f; + + bestWidth = maxDot0; + bestEdge = 0; + bestV0 = silhouette.vertices[0]; + bestV1 = silhouette.vertices[1]; + + if (silhouette.count > 3) + { + bool better = maxDot1 < bestWidth * (1.0f - EDGE_SELECT_EPS); + bestWidth = better ? maxDot1 : bestWidth; + bestEdge = better ? 1 : bestEdge; + bestV0 = better ? silhouette.vertices[1] : bestV0; + bestV1 = better ? silhouette.vertices[2] : bestV1; + + if (silhouette.count > 4) + { + better = maxDot2 < bestWidth * (1.0f - EDGE_SELECT_EPS); + bestWidth = better ? maxDot2 : bestWidth; + bestEdge = better ? 2 : bestEdge; + bestV0 = better ? silhouette.vertices[2] : bestV0; + bestV1 = better ? silhouette.vertices[3] : bestV1; + + if (silhouette.count > 5) + { + better = maxDot3 < bestWidth * (1.0f - EDGE_SELECT_EPS); + bestWidth = better ? maxDot3 : bestWidth; + bestEdge = better ? 3 : bestEdge; + bestV0 = better ? silhouette.vertices[3] : bestV0; + bestV1 = better ? silhouette.vertices[4] : bestV1; + + if (silhouette.count > 6) + { + better = maxDot4 < bestWidth * (1.0f - EDGE_SELECT_EPS); + bestWidth = better ? maxDot4 : bestWidth; + bestEdge = better ? 4 : bestEdge; + bestV0 = better ? silhouette.vertices[4] : bestV0; + bestV1 = better ? silhouette.vertices[5] : bestV1; + } + } + } + } + + // Check the last 2 edges missed by the triangular traversal: + // Edge count-2: vertices[count-2] -> vertices[count-1], check V0..V[count-3] + // Edge count-1: vertices[count-1] -> vertices[0], check V1..V[count-2] + // Explicit per-count unrolling avoids the generic loop with runtime index comparisons. + { + // Penultimate edge: vertices[count-2] -> vertices[count-1] + const uint32_t penIdx = silhouette.count - 2; + float32_t3 enPen = cross(silhouette.vertices[penIdx], silhouette.vertices[penIdx + 1]); + precompSil.edgeNormals[penIdx] = float16_t3(enPen); + float32_t maxDotPen = dot(silhouette.vertices[0], enPen); + if (silhouette.count > 3) + { + maxDotPen = max(maxDotPen, dot(silhouette.vertices[1], enPen)); + if (silhouette.count > 4) + { + maxDotPen = max(maxDotPen, dot(silhouette.vertices[2], enPen)); + if (silhouette.count > 5) + { + maxDotPen = max(maxDotPen, dot(silhouette.vertices[3], enPen)); + if (silhouette.count > 6) + { + maxDotPen = max(maxDotPen, dot(silhouette.vertices[4], enPen)); + } + } + } + } + + bool betterPen = maxDotPen < bestWidth * (1.0f - EDGE_SELECT_EPS); + bestWidth = betterPen ? maxDotPen : bestWidth; + bestEdge = betterPen ? penIdx : bestEdge; + bestV0 = betterPen ? silhouette.vertices[penIdx] : bestV0; + bestV1 = betterPen ? silhouette.vertices[penIdx + 1] : bestV1; + + // Last edge: vertices[count-1] -> vertices[0] (wrap-around) + const uint32_t lastIdx = silhouette.count - 1; + float32_t3 enLast = cross(silhouette.vertices[lastIdx], silhouette.vertices[0]); + precompSil.edgeNormals[lastIdx] = float16_t3(enLast); + float32_t maxDotLast = dot(silhouette.vertices[1], enLast); + if (silhouette.count > 3) + { + maxDotLast = max(maxDotLast, dot(silhouette.vertices[2], enLast)); + if (silhouette.count > 4) + { + maxDotLast = max(maxDotLast, dot(silhouette.vertices[3], enLast)); + if (silhouette.count > 5) + { + maxDotLast = max(maxDotLast, dot(silhouette.vertices[4], enLast)); + if (silhouette.count > 6) + { + maxDotLast = max(maxDotLast, dot(silhouette.vertices[5], enLast)); + } + } + } + } + + bool betterLast = maxDotLast < bestWidth * (1.0f - EDGE_SELECT_EPS); + bestWidth = betterLast ? maxDotLast : bestWidth; + bestEdge = betterLast ? lastIdx : bestEdge; + bestV0 = betterLast ? silhouette.vertices[lastIdx] : bestV0; + bestV1 = betterLast ? silhouette.vertices[0] : bestV1; + } + } + + // ======================================================================== + // Template-Unrolled Projection Helpers + // ======================================================================== + + // Project a single vertex onto candidate axes, updating bounds and minZ in one fused pass + template + static void projectAndBound(const float32_t3 vertices[MAX_SILHOUETTE_VERTICES], float32_t3 projAxis1, float32_t3 projAxis2, float32_t3 projAxis3, NBL_REF_ARG(float32_t4) bound, NBL_REF_ARG(float32_t) minZ) + { + float32_t3 v = vertices[I]; + float32_t x = dot(v, projAxis1); + float32_t y = dot(v, projAxis2); + float32_t z = dot(v, projAxis3); + minZ = min(minZ, z); + float32_t rcpZ = rcp(z); + float32_t projX = x * rcpZ; + float32_t projY = y * rcpZ; + bound.x = min(bound.x, projX); + bound.y = min(bound.y, projY); + bound.z = max(bound.z, projX); + bound.w = max(bound.w, projY); + } + + // Project all silhouette vertices (template-unrolled, fused bounds + minZ) + static void projectAllVertices(const ClippedSilhouette silhouette, float32_t3 projAxis1, float32_t3 projAxis2, float32_t3 projAxis3, NBL_REF_ARG(float32_t4) bound, NBL_REF_ARG(float32_t) minZ) + { + bound = float32_t4(1e10f, 1e10f, -1e10f, -1e10f); + minZ = 1e10f; + projectAndBound<0>(silhouette.vertices, projAxis1, projAxis2, projAxis3, bound, minZ); + projectAndBound<1>(silhouette.vertices, projAxis1, projAxis2, projAxis3, bound, minZ); + projectAndBound<2>(silhouette.vertices, projAxis1, projAxis2, projAxis3, bound, minZ); + if (silhouette.count > 3) + { + projectAndBound<3>(silhouette.vertices, projAxis1, projAxis2, projAxis3, bound, minZ); + if (silhouette.count > 4) + { + projectAndBound<4>(silhouette.vertices, projAxis1, projAxis2, projAxis3, bound, minZ); + if (silhouette.count > 5) + { + projectAndBound<5>(silhouette.vertices, projAxis1, projAxis2, projAxis3, bound, minZ); + if (silhouette.count > 6) + { + projectAndBound<6>(silhouette.vertices, projAxis1, projAxis2, projAxis3, bound, minZ); + } + } + } + } + } + + // ======================================================================== + // Template-Unrolled Frame Candidate Selection + // ======================================================================== + + // Try an edge as frame candidate for the primary path (axis1 fixed, find best axis2/axis3) + template + static void tryPrimaryFrameCandidate(NBL_CONST_REF_ARG(ClippedSilhouette) silhouette, float32_t3 fixedAxis1, float32_t3 axis3Ref, + NBL_REF_ARG(float32_t) bestArea, NBL_REF_ARG(float32_t3) bestAxis2, + NBL_REF_ARG(float32_t3) bestAxis3, NBL_REF_ARG(bool) found, + NBL_REF_ARG(float32_t) bestMinZ, NBL_REF_ARG(float32_t4) bestBound) + { + const uint32_t j = CheckCount ? ((I + 1 < silhouette.count) ? I + 1 : 0) : I + 1; + float32_t3 edge = silhouette.vertices[j] - silhouette.vertices[I]; + + // Candidate axis2: perpendicular to edge, in plane perpendicular to axis1 + float32_t3 axis2Cand = cross(fixedAxis1, edge); + float32_t lenSq = dot(axis2Cand, axis2Cand); + if (lenSq < 1e-14f) + return; + axis2Cand *= rsqrt(lenSq); + + // Candidate axis3: completes the frame + float32_t3 axis3Cand = cross(fixedAxis1, axis2Cand); + + // Ensure axis3 points toward center (same hemisphere as reference) + if (dot(axis3Cand, axis3Ref) < 0.0f) + { + axis2Cand = -axis2Cand; + axis3Cand = -axis3Cand; + } + + // Fused: check all vertices have positive z AND compute bounding rect in one pass + float32_t4 bound; + float32_t minZ; + projectAllVertices(silhouette, fixedAxis1, axis2Cand, axis3Cand, bound, minZ); + + // Skip if any vertex would have z <= 0 + if (minZ <= 1e-6f) + return; + + float32_t rectArea = (bound.z - bound.x) * (bound.w - bound.y); + if (rectArea < bestArea) + { + bestArea = rectArea; + bestAxis2 = axis2Cand; + bestAxis3 = axis3Cand; + bestMinZ = minZ; + bestBound = bound; + found = true; + } + } + + // Try an edge as frame candidate for the fallback path (axis3 fixed, find best axis1/axis2) + template + static void tryFallbackFrameCandidate(NBL_CONST_REF_ARG(ClippedSilhouette) silhouette, float32_t3 fixedAxis3, NBL_REF_ARG(float32_t) bestArea, NBL_REF_ARG(float32_t3) bestAxis1, NBL_REF_ARG(float32_t3) bestAxis2, NBL_REF_ARG(uint32_t) bestEdge, NBL_REF_ARG(float32_t4) bestBound) + { + const uint32_t j = CheckCount ? ((I + 1 < silhouette.count) ? I + 1 : 0) : I + 1; + float32_t3 edge = silhouette.vertices[j] - silhouette.vertices[I]; + + float32_t3 edgeInPlane = edge - fixedAxis3 * dot(edge, fixedAxis3); + float32_t lenSq = dot(edgeInPlane, edgeInPlane); + if (lenSq < 1e-14f) + return; + + float32_t3 axis1Cand = edgeInPlane * rsqrt(lenSq); + float32_t3 axis2Cand = cross(fixedAxis3, axis1Cand); + + float32_t4 bound; + float32_t minZ; + projectAllVertices(silhouette, axis1Cand, axis2Cand, fixedAxis3, bound, minZ); + + float32_t rectArea = (bound.z - bound.x) * (bound.w - bound.y); + if (rectArea < bestArea) + { + bestArea = rectArea; + bestAxis1 = axis1Cand; + bestAxis2 = axis2Cand; + bestBound = bound; + bestEdge = I; + } + } + + // ======================================================================== + // Visualization + // ======================================================================== + +#if VISUALIZE_SAMPLES + float32_t4 visualize(float32_t3 spherePos, float32_t2 ndc, float32_t aaWidth) + { + float32_t4 color = float32_t4(0, 0, 0, 0); + + // Colors for visualization + float32_t3 boundColor1 = float32_t3(1.0f, 0.5f, 0.5f); // Light red for axis1 bounds + float32_t3 boundColor2 = float32_t3(0.5f, 0.5f, 1.0f); // Light blue for axis2 bounds + float32_t3 centerColor = float32_t3(1.0f, 1.0f, 0.0f); // Yellow for center + + float32_t x0 = rectR0.x; + float32_t x1 = rectR0.x + rectExtents.x; + float32_t y0 = rectR0.y; + float32_t y1 = rectR0.y + rectExtents.y; + float32_t z = rectR0.z; + + // Great circle normals for the 4 edges (in local frame, then transform to world) + float32_t3 bottomNormalLocal = normalize(float32_t3(0, -z, y0)); + float32_t3 topNormalLocal = normalize(float32_t3(0, z, -y1)); + float32_t3 leftNormalLocal = normalize(float32_t3(-z, 0, x0)); + float32_t3 rightNormalLocal = normalize(float32_t3(z, 0, -x1)); + + // Transform to world space + float32_t3 bottomNormal = bottomNormalLocal.x * axis1 + bottomNormalLocal.y * axis2 + bottomNormalLocal.z * axis3; + float32_t3 topNormal = topNormalLocal.x * axis1 + topNormalLocal.y * axis2 + topNormalLocal.z * axis3; + float32_t3 leftNormal = leftNormalLocal.x * axis1 + leftNormalLocal.y * axis2 + leftNormalLocal.z * axis3; + float32_t3 rightNormal = rightNormalLocal.x * axis1 + rightNormalLocal.y * axis2 + rightNormalLocal.z * axis3; + + // Draw the 4 bounding great circles + color += drawGreatCircleHalf(bottomNormal, spherePos, axis3, aaWidth, boundColor2, 0.004f); + color += drawGreatCircleHalf(topNormal, spherePos, axis3, aaWidth, boundColor2, 0.004f); + color += drawGreatCircleHalf(leftNormal, spherePos, axis3, aaWidth, boundColor1, 0.004f); + color += drawGreatCircleHalf(rightNormal, spherePos, axis3, aaWidth, boundColor1, 0.004f); + + // Draw center point (center of the rectangle projected onto sphere) + float32_t centerX = (x0 + x1) * 0.5f; + float32_t centerY = (y0 + y1) * 0.5f; + float32_t3 centerLocal = normalize(float32_t3(centerX, centerY, z)); + float32_t3 centerWorld = centerLocal.x * axis1 - centerLocal.y * axis2 + centerLocal.z * axis3; + + float32_t3 centerCircle = sphereToCircle(centerWorld); + color += drawCorner(centerCircle, ndc, aaWidth, 0.025f, 0.0f, centerColor); + + color += drawCorner(axis1, ndc, aaWidth, 0.025f, 0.0f, float32_t3(1.0f, 0.0f, 0.0f)); + color += drawCorner(axis2, ndc, aaWidth, 0.025f, 0.0f, float32_t3(0.0f, 1.0f, 0.0f)); + color += drawCorner(axis3, ndc, aaWidth, 0.025f, 0.0f, float32_t3(0.0f, 0.0f, 1.0f)); + + return color; + } +#endif // VISUALIZE_SAMPLES + + // ======================================================================== + // Factory + // ======================================================================== + + static SphericalPyramid create(NBL_CONST_REF_ARG(ClippedSilhouette) silhouette, NBL_REF_ARG(SilEdgeNormals) silEdgeNormals +#if VISUALIZE_SAMPLES + , + float32_t2 ndc, float32_t3 spherePos, float32_t aaWidth, inout float32_t4 color +#endif + ) + { + SphericalPyramid self; + + // Step 1: Find minimum-width edge using rotating calipers with lune metric + uint32_t bestEdge; + float32_t3 bestV0, bestV1; + float32_t minWidth; + findMinimumWidthEdge(silhouette, bestEdge, bestV0, bestV1, minWidth, silEdgeNormals); + + // Step 2: Build orthonormal frame from best edge + // axis1 = perpendicular to the best edge's great circle (primary caliper direction) + self.axis1 = normalize(cross(bestV0, bestV1)); + + // Compute centroid for reference direction + float32_t3 center = silhouette.getCenter(); + float32_t3 centerInPlane = center - self.axis1 * dot(center, self.axis1); + float32_t3 axis3Ref = normalize(centerInPlane); + + // Step 2b: Try each edge-aligned rotation around axis1 to find the axis2/axis3 + // orientation that keeps all vertices in the positive half-space with minimum + // bounding rectangle area + float32_t bestRectArea = 1e20f; + float32_t3 bestAxis2 = cross(axis3Ref, self.axis1); + float32_t3 bestAxis3 = axis3Ref; + bool foundValidFrame = false; + float32_t bestMinZ = 0.0f; + float32_t4 bounds = float32_t4(-0.1f, -0.1f, 0.1f, 0.1f); + + tryPrimaryFrameCandidate<0>(silhouette, self.axis1, axis3Ref, bestRectArea, bestAxis2, bestAxis3, foundValidFrame, bestMinZ, bounds); + tryPrimaryFrameCandidate<1>(silhouette, self.axis1, axis3Ref, bestRectArea, bestAxis2, bestAxis3, foundValidFrame, bestMinZ, bounds); + tryPrimaryFrameCandidate<2>(silhouette, self.axis1, axis3Ref, bestRectArea, bestAxis2, bestAxis3, foundValidFrame, bestMinZ, bounds); + if (silhouette.count > 3) + { + tryPrimaryFrameCandidate<3, true>(silhouette, self.axis1, axis3Ref, bestRectArea, bestAxis2, bestAxis3, foundValidFrame, bestMinZ, bounds); + if (silhouette.count > 4) + { + tryPrimaryFrameCandidate<4, true>(silhouette, self.axis1, axis3Ref, bestRectArea, bestAxis2, bestAxis3, foundValidFrame, bestMinZ, bounds); + if (silhouette.count > 5) + { + tryPrimaryFrameCandidate<5, true>(silhouette, self.axis1, axis3Ref, bestRectArea, bestAxis2, bestAxis3, foundValidFrame, bestMinZ, bounds); + if (silhouette.count > 6) + { + tryPrimaryFrameCandidate<6, true>(silhouette, self.axis1, axis3Ref, bestRectArea, bestAxis2, bestAxis3, foundValidFrame, bestMinZ, bounds); + } + } + } + } + + self.axis2 = bestAxis2; + self.axis3 = bestAxis3; + + // Fallback: if the primary path failed (no valid frame found, or axis3 leaves + // vertices too close to the z=0 singularity), fix axis3 = camera forward and + // search for the best axis1/axis2 rotation around it. + if (!foundValidFrame || bestMinZ < 0.15f) + { + // Use camera forward as axis3 (all silhouette vertices have z > 0 by construction) + self.axis3 = float32_t3(0.0f, 0.0f, 1.0f); + + // Find optimal axis1/axis2 rotation around axis3 by trying each edge + float32_t bestFallbackArea = 1e20f; + // axis3 = (0,0,1), so cross((0,0,1), (1,0,0)) = (0,1,0), cross((0,0,1), (0,1,0)) = (-1,0,0) + self.axis1 = float32_t3(0.0f, 1.0f, 0.0f); + self.axis2 = float32_t3(-1.0f, 0.0f, 0.0f); + + tryFallbackFrameCandidate<0>(silhouette, self.axis3, bestFallbackArea, self.axis1, self.axis2, bestEdge, bounds); + tryFallbackFrameCandidate<1>(silhouette, self.axis3, bestFallbackArea, self.axis1, self.axis2, bestEdge, bounds); + tryFallbackFrameCandidate<2>(silhouette, self.axis3, bestFallbackArea, self.axis1, self.axis2, bestEdge, bounds); + if (silhouette.count > 3) + { + tryFallbackFrameCandidate<3, true>(silhouette, self.axis3, bestFallbackArea, self.axis1, self.axis2, bestEdge, bounds); + if (silhouette.count > 4) + { + tryFallbackFrameCandidate<4, true>(silhouette, self.axis3, bestFallbackArea, self.axis1, self.axis2, bestEdge, bounds); + if (silhouette.count > 5) + { + tryFallbackFrameCandidate<5, true>(silhouette, self.axis3, bestFallbackArea, self.axis1, self.axis2, bestEdge, bounds); + if (silhouette.count > 6) + { + tryFallbackFrameCandidate<6, true>(silhouette, self.axis3, bestFallbackArea, self.axis1, self.axis2, bestEdge, bounds); + } + } + } + } + } + + // Degenerate bounds check (single computation, after primary/fallback decision) + if (bounds.x >= bounds.z || bounds.y >= bounds.w) + bounds = float32_t4(-0.1f, -0.1f, 0.1f, 0.1f); + + self.rectR0 = float32_t3(bounds.xy, 1.0f); + self.rectExtents = float32_t2(bounds.zw - bounds.xy); + +#if VISUALIZE_SAMPLES + color += drawCorner(center, ndc, aaWidth, 0.05f, 0.0f, float32_t3(1.0f, 0.0f, 1.0f)); + color += visualizeBestCaliperEdge(silhouette.vertices, bestEdge, silhouette.count, spherePos, aaWidth); + color += self.visualize(spherePos, ndc, aaWidth); +#endif + +#if DEBUG_DATA + DebugDataBuffer[0].pyramidAxis1 = self.axis1; + DebugDataBuffer[0].pyramidAxis2 = self.axis2; + DebugDataBuffer[0].pyramidCenter = center; + DebugDataBuffer[0].pyramidHalfWidth1 = (atan(bounds.z) - atan(bounds.x)) * 0.5f; + DebugDataBuffer[0].pyramidHalfWidth2 = (atan(bounds.w) - atan(bounds.y)) * 0.5f; + DebugDataBuffer[0].pyramidSolidAngle = self.solidAngle; + DebugDataBuffer[0].pyramidBestEdge = bestEdge; + DebugDataBuffer[0].pyramidMin1 = bounds.x; + DebugDataBuffer[0].pyramidMin2 = bounds.y; + DebugDataBuffer[0].pyramidMax1 = bounds.z; + DebugDataBuffer[0].pyramidMax2 = bounds.w; +#endif + + return self; + } +}; + +#include "pyramid_sampling/urena.hlsl" +#include "pyramid_sampling/bilinear.hlsl" +#include "pyramid_sampling/biquadratic.hlsl" + +#endif // _SOLID_ANGLE_VIS_EXAMPLE_PYRAMID_SAMPLING_HLSL_INCLUDED_ diff --git a/73_SolidAngleVisualizer/app_resources/hlsl/pyramid_sampling/bilinear.hlsl b/73_SolidAngleVisualizer/app_resources/hlsl/pyramid_sampling/bilinear.hlsl new file mode 100644 index 000000000..7d3319a7c --- /dev/null +++ b/73_SolidAngleVisualizer/app_resources/hlsl/pyramid_sampling/bilinear.hlsl @@ -0,0 +1,86 @@ +//// Copyright (C) 2026-2026 - DevSH Graphics Programming Sp. z O.O. +//// This file is part of the "Nabla Engine". +//// For conditions of distribution and use, see copyright notice in nabla.h +#ifndef _SOLID_ANGLE_VIS_EXAMPLE_SAMPLING_BILINEAR_HLSL_INCLUDED_ +#define _SOLID_ANGLE_VIS_EXAMPLE_SAMPLING_BILINEAR_HLSL_INCLUDED_ +#include + +// ============================================================================ +// Bilinear Approximation Sampling (closed-form, faster than biquadratic) +// ============================================================================ +// +struct BilinearSampler +{ + nbl::hlsl::sampling::Bilinear sampler; + + float32_t rcpTotalIntegral; + float32_t rectArea; + + // Precompute bilinear sampler from pyramid + static BilinearSampler create(NBL_CONST_REF_ARG(SphericalPyramid) pyramid) + { + BilinearSampler self; + + // 4 corner positions on the rectangle + const float32_t x0 = pyramid.rectR0.x; + const float32_t x1 = x0 + pyramid.rectExtents.x; + const float32_t y0 = pyramid.rectR0.y; + const float32_t y1 = y0 + pyramid.rectExtents.y; + + // dSA(x,y) = 1 / (x^2 + y^2 + 1)^(3/2) [z = 1.0 in local frame] + const float32_t xx0 = x0 * x0, xx1 = x1 * x1; + const float32_t yy0 = y0 * y0, yy1 = y1 * y1; + + float32_t d; + d = xx0 + yy0 + 1.0f; + const float32_t v00 = rsqrt(d) / d; // x0y0 + d = xx1 + yy0 + 1.0f; + const float32_t v10 = rsqrt(d) / d; // x1y0 + d = xx0 + yy1 + 1.0f; + const float32_t v01 = rsqrt(d) / d; // x0y1 + d = xx1 + yy1 + 1.0f; + const float32_t v11 = rsqrt(d) / d; // x1y1 + + // Bilinear layout: (x0y0, x0y1, x1y0, x1y1) + self.sampler = nbl::hlsl::sampling::Bilinear::create(float32_t4(v00, v01, v10, v11)); + + // Total integral = average of 4 corners (bilinear integral over unit square) + const float32_t totalIntegral = (v00 + v10 + v01 + v11) * 0.25f; + self.rcpTotalIntegral = 1.0f / max(totalIntegral, 1e-20f); + self.rectArea = pyramid.rectExtents.x * pyramid.rectExtents.y; + + return self; + } + + // Sample a direction on the spherical pyramid using bilinear importance sampling. + // Returns the world-space direction; outputs pdf in solid-angle space and validity flag. + float32_t3 sample(NBL_CONST_REF_ARG(SphericalPyramid) pyramid, NBL_CONST_REF_ARG(SilEdgeNormals) silhouette, float32_t2 xi, out float32_t pdf, out bool valid) + { + // Step 1: Sample UV from bilinear distribution (closed-form via quadratic formula) + float32_t rcpPdf; + float32_t2 uv = sampler.generate(rcpPdf, xi); + + // Step 2: UV to direction + // Bilinear sampler convention: u.y = first-sampled axis (X), u.x = second-sampled axis (Y) + const float32_t localX = pyramid.rectR0.x + uv.y * pyramid.rectExtents.x; + const float32_t localY = pyramid.rectR0.y + uv.x * pyramid.rectExtents.y; + + // Compute dist2 and rcpLen once, reuse for both normalization and dSA + const float32_t dist2 = localX * localX + localY * localY + 1.0f; + const float32_t rcpLen = rsqrt(dist2); + float32_t3 direction = (localX * pyramid.axis1 + + localY * pyramid.axis2 + + pyramid.axis3) * rcpLen; + + valid = direction.z > 0.0f && silhouette.isInside(direction); + + // PDF in solid angle space: 1 / (rcpPdf * dSA * rectArea) + // rcpPdf already = 1/pdfUV from Bilinear::generate, avoid redundant reciprocal + const float32_t dsa = rcpLen / dist2; + pdf = 1.0f / max(rcpPdf * dsa * rectArea, 1e-7f); + + return direction; + } +}; + +#endif // _SOLID_ANGLE_VIS_EXAMPLE_SAMPLING_BILINEAR_HLSL_INCLUDED_ diff --git a/73_SolidAngleVisualizer/app_resources/hlsl/pyramid_sampling/biquadratic.hlsl b/73_SolidAngleVisualizer/app_resources/hlsl/pyramid_sampling/biquadratic.hlsl new file mode 100644 index 000000000..e75c89595 --- /dev/null +++ b/73_SolidAngleVisualizer/app_resources/hlsl/pyramid_sampling/biquadratic.hlsl @@ -0,0 +1,158 @@ +//// Copyright (C) 2026-2026 - DevSH Graphics Programming Sp. z O.O. +//// This file is part of the "Nabla Engine". +//// For conditions of distribution and use, see copyright notice in nabla.h +#ifndef _SOLID_ANGLE_VIS_EXAMPLE_SAMPLING_BIQUADRATIC_HLSL_INCLUDED_ +#define _SOLID_ANGLE_VIS_EXAMPLE_SAMPLING_BIQUADRATIC_HLSL_INCLUDED_ + +// ============================================================================ +// Biquadratic Approximation Sampling (Hart et al. 2020) +// ============================================================================ +// +// Precomputed biquadratic sampler for importance sampling solid angle density. +// Build once from a SphericalPyramid, then call sample() per random pair. + +struct BiquadraticSampler +{ + // Column-major: cols[i] = (row0[i], row1[i], row2[i]) for fast sliceAtY via dot + float32_t3x3 cols; + + // Precomputed marginal (Y) polynomial: f(y) = c0 + y*(c1 + y*c2) + float32_t margC0, margC1, margC2, margIntegral; + + float32_t rcpTotalIntegral; + float32_t rcpIntegralTimesRcpArea; // rcpTotalIntegral / rectArea (fused for PDF computation) + + // Newton-Raphson CDF inversion for a quadratic PDF (2 iterations) + // Solves: c0*t + (c1/2)*t^2 + (c2/3)*t^3 = u * integral + // Returns sampled t and the PDF value at t (avoids redundant recomputation by caller). + // 2 iterations give ~4 decimal digits, should be sufficient for importance sampling with rejection? + static float32_t sampleQuadraticCDF(float32_t u, float32_t c0, float32_t c1, float32_t c2, float32_t integral, out float32_t lastPdfVal) + { + const float32_t target = u * integral; + const float32_t c1half = c1 * 0.5f; + const float32_t c2third = c2 * (1.0f / 3.0f); + float32_t t = u; + + // Iteration 1 + float32_t cdfVal = t * (c0 + t * (c1half + t * c2third)); + lastPdfVal = c0 + t * (c1 + t * c2); + t = clamp(t - (cdfVal - target) / lastPdfVal, 0.0f, 1.0f); + + // Iteration 2 + cdfVal = t * (c0 + t * (c1half + t * c2third)); + lastPdfVal = c0 + t * (c1 + t * c2); + t = clamp(t - (cdfVal - target) / lastPdfVal, 0.0f, 1.0f); + + return t; + } + + // Precompute biquadratic sampler from pyramid (call ONCE, reuse for all samples) + static BiquadraticSampler create(NBL_CONST_REF_ARG(SphericalPyramid) pyramid) + { + BiquadraticSampler self; + + // 3x3 grid positions on the rectangle + const float32_t x0 = pyramid.rectR0.x; + const float32_t x1 = x0 + 0.5f * pyramid.rectExtents.x; + const float32_t x2 = x0 + pyramid.rectExtents.x; + const float32_t y0 = pyramid.rectR0.y; + const float32_t y1 = y0 + 0.5f * pyramid.rectExtents.y; + const float32_t y2 = y0 + pyramid.rectExtents.y; + + // dSA(x,y) = rsqrt(x^2+y^2+1) / (x^2+y^2+1) [z = rectR0.z = 1.0] + const float32_t xx0 = x0 * x0, xx1 = x1 * x1, xx2 = x2 * x2; + const float32_t yy0 = y0 * y0, yy1 = y1 * y1, yy2 = y2 * y2; + + float32_t3 row0, row1, row2; + float32_t d; + + d = xx0 + yy0 + 1.0f; + row0.x = rsqrt(d) / d; + d = xx1 + yy0 + 1.0f; + row0.y = rsqrt(d) / d; + d = xx2 + yy0 + 1.0f; + row0.z = rsqrt(d) / d; + + d = xx0 + yy1 + 1.0f; + row1.x = rsqrt(d) / d; + d = xx1 + yy1 + 1.0f; + row1.y = rsqrt(d) / d; + d = xx2 + yy1 + 1.0f; + row1.z = rsqrt(d) / d; + + d = xx0 + yy2 + 1.0f; + row2.x = rsqrt(d) / d; + d = xx1 + yy2 + 1.0f; + row2.y = rsqrt(d) / d; + d = xx2 + yy2 + 1.0f; + row2.z = rsqrt(d) / d; + + // Store column-major for sliceAtY: cols[i] = (row0[i], row1[i], row2[i]) + self.cols[0] = float32_t3(row0.x, row1.x, row2.x); + self.cols[1] = float32_t3(row0.y, row1.y, row2.y); + self.cols[2] = float32_t3(row0.z, row1.z, row2.z); + + // Marginal along Y: Simpson's rule integral of each row + const float32_t3 marginal = float32_t3( + (row0.x + 4.0f * row0.y + row0.z) / 6.0f, + (row1.x + 4.0f * row1.y + row1.z) / 6.0f, + (row2.x + 4.0f * row2.y + row2.z) / 6.0f); + + // Precompute marginal polynomial: f(y) = c0 + y*(c1 + y*c2) + self.margC0 = marginal[0]; + self.margC1 = -3.0f * marginal[0] + 4.0f * marginal[1] - marginal[2]; + self.margC2 = 2.0f * (marginal[0] - 2.0f * marginal[1] + marginal[2]); + self.margIntegral = (marginal[0] + 4.0f * marginal[1] + marginal[2]) / 6.0f; + + self.rcpTotalIntegral = 1.0f / max(self.margIntegral, 1e-20f); + const float32_t rectArea = pyramid.rectExtents.x * pyramid.rectExtents.y; + self.rcpIntegralTimesRcpArea = self.rcpTotalIntegral / max(rectArea, 1e-20f); + + return self; + } + + // Sample a direction on the spherical pyramid using biquadratic importance sampling. + // Returns the world-space direction; outputs pdf in solid-angle space and validity flag. + float32_t3 sample(NBL_CONST_REF_ARG(SphericalPyramid) pyramid, NBL_CONST_REF_ARG(SilEdgeNormals) silhouette, float32_t2 xi, out float32_t pdf, out bool valid) + { + // Step 1: Sample Y from precomputed marginal polynomial + float32_t margPdfAtY; + const float32_t y = sampleQuadraticCDF(xi.y, margC0, margC1, margC2, margIntegral, margPdfAtY); + + // Step 2: Compute conditional X slice at sampled Y via Lagrange basis + const float32_t y2 = y * y; + const float32_t3 Ly = float32_t3(2.0f * y2 - 3.0f * y + 1.0f, -4.0f * y2 + 4.0f * y, 2.0f * y2 - y); + const float32_t3 slice = float32_t3(dot(cols[0], Ly), dot(cols[1], Ly), dot(cols[2], Ly)); + + // Step 3: Build conditional polynomial and sample X + const float32_t condC0 = slice[0]; + const float32_t condC1 = -3.0f * slice[0] + 4.0f * slice[1] - slice[2]; + const float32_t condC2 = 2.0f * (slice[0] - 2.0f * slice[1] + slice[2]); + const float32_t condIntegral = (slice[0] + 4.0f * slice[1] + slice[2]) / 6.0f; + float32_t condPdfAtX; + const float32_t x = sampleQuadraticCDF(xi.x, condC0, condC1, condC2, condIntegral, condPdfAtX); + + // Step 4: UV to direction + const float32_t localX = pyramid.rectR0.x + x * pyramid.rectExtents.x; + const float32_t localY = pyramid.rectR0.y + y * pyramid.rectExtents.y; + + // Compute dist2 and rcpLen once, reuse for both normalization and dSA + const float32_t dist2 = localX * localX + localY * localY + 1.0f; + const float32_t rcpLen = rsqrt(dist2); + float32_t3 direction = (localX * pyramid.axis1 + + localY * pyramid.axis2 + + pyramid.axis3) * + rcpLen; + + valid = direction.z > 0.0f && silhouette.isInside(direction); + + // Step 5: PDF in solid angle space = condPdfAtX / (totalIntegral * dSA * rectArea) + // condPdfAtX is reused from the last Newton iteration + const float32_t dsa = rcpLen / dist2; + pdf = condPdfAtX * rcpIntegralTimesRcpArea / max(dsa, 1e-7f); + + return direction; + } +}; + +#endif // _SOLID_ANGLE_VIS_EXAMPLE_SAMPLING_BIQUADRATIC_HLSL_INCLUDED_ diff --git a/73_SolidAngleVisualizer/app_resources/hlsl/pyramid_sampling/urena.hlsl b/73_SolidAngleVisualizer/app_resources/hlsl/pyramid_sampling/urena.hlsl new file mode 100644 index 000000000..6709bf7da --- /dev/null +++ b/73_SolidAngleVisualizer/app_resources/hlsl/pyramid_sampling/urena.hlsl @@ -0,0 +1,87 @@ +//// Copyright (C) 2026-2026 - DevSH Graphics Programming Sp. z O.O. +//// This file is part of the "Nabla Engine". +//// For conditions of distribution and use, see copyright notice in nabla.h +#ifndef _SOLID_ANGLE_VIS_EXAMPLE_SAMPLING_URENA_HLSL_INCLUDED_ +#define _SOLID_ANGLE_VIS_EXAMPLE_SAMPLING_URENA_HLSL_INCLUDED_ + +// ============================================================================ +// Sampling using Urena 2003 (SphericalRectangle) +// ============================================================================ + +struct UrenaSampler +{ + float32_t solidAngle; // Solid angle of the bounding region (steradians) + float32_t samplerK; // = 2*pi - q (angle offset for horizontal sampling) + float32_t samplerB0; // = n_z[0] (normalized edge parameter) + float32_t samplerB1; // = n_z[2] (normalized edge parameter) + + // Precompute solid angle AND sampler intermediates in one pass + // (solidAngleOfRectangle and generate() both compute n_z/cosGamma -- fuse them) + static UrenaSampler create(NBL_CONST_REF_ARG(SphericalPyramid) pyramid) + { + UrenaSampler self; + + const float32_t4 denorm_n_z = float32_t4(-pyramid.rectR0.y, pyramid.rectR0.x + pyramid.rectExtents.x, pyramid.rectR0.y + pyramid.rectExtents.y, -pyramid.rectR0.x); + const float32_t4 n_z = denorm_n_z / sqrt((float32_t4)(pyramid.rectR0.z * pyramid.rectR0.z) + denorm_n_z * denorm_n_z); + const float32_t4 cosGamma = float32_t4(-n_z[0] * n_z[1], -n_z[1] * n_z[2], + -n_z[2] * n_z[3], -n_z[3] * n_z[0]); + + nbl::hlsl::math::sincos_accumulator adder = nbl::hlsl::math::sincos_accumulator::create(cosGamma[0]); + adder.addCosine(cosGamma[1]); + const float32_t p = adder.getSumofArccos(); + adder = nbl::hlsl::math::sincos_accumulator::create(cosGamma[2]); + adder.addCosine(cosGamma[3]); + const float32_t q = adder.getSumofArccos(); + + self.solidAngle = p + q - 2.0f * nbl::hlsl::numbers::pi; + self.samplerK = 2.0f * nbl::hlsl::numbers::pi - q; + self.samplerB0 = n_z[0]; + self.samplerB1 = n_z[2]; + + return self; + } + + float32_t3 sample(NBL_CONST_REF_ARG(SphericalPyramid) pyramid, NBL_CONST_REF_ARG(SilEdgeNormals) silhouette, float32_t2 xi, out float32_t pdf, out bool valid) + { + // Inlined Urena 2003 with algebraic simplifications: + const float32_t r1x = pyramid.rectR0.x + pyramid.rectExtents.x; + const float32_t r1y = pyramid.rectR0.y + pyramid.rectExtents.y; + + // Horizontal CDF inversion + const float32_t au = xi.x * solidAngle + samplerK; + float32_t sinAu, cosAu; + sincos(au, sinAu, cosAu); + const float32_t fu = (cosAu * samplerB0 - samplerB1) / sinAu; + + // cu = sign(fu)/sqrt(cu_2), xu = cu/sqrt(1-cu^2) + // Fused: xu = sign(fu)/sqrt(cu_2 - 1) [eliminates 2 sqrt + 2 div -> 1 rsqrt] + const float32_t cu_2 = max(fu * fu + samplerB0 * samplerB0, 1.0f); + const float32_t xu = clamp( + (fu >= 0.0f ? 1.0f : -1.0f) * rsqrt(max(cu_2 - 1.0f, 1e-10f)), + pyramid.rectR0.x, r1x); + const float32_t d_2 = xu * xu + 1.0f; + + // Vertical sampling in h-space (div -> rsqrt + mul) + const float32_t h0 = pyramid.rectR0.y * rsqrt(d_2 + pyramid.rectR0.y * pyramid.rectR0.y); + const float32_t h1 = r1y * rsqrt(d_2 + r1y * r1y); + const float32_t hv = h0 + xi.y * (h1 - h0); + + // Normalized direction via ||(xu,yv,1)||^2 = d_2/(1-hv^2): + // localDir.y = yv/||v|| = hv (exact cancellation) + // localDir.xz = (xu, 1) * t where t = sqrt(1-hv^2)/sqrt(d_2) + // Eliminates: sqrt(d_2), yv computation, and normalize() + const float32_t t = sqrt(max(1.0f - hv * hv, 0.0f)) * rsqrt(d_2); + const float32_t3 localDir = float32_t3(xu * t, hv, t); + + float32_t3 direction = localDir.x * pyramid.axis1 + + localDir.y * pyramid.axis2 + + localDir.z * pyramid.axis3; + + valid = direction.z > 0.0f && silhouette.isInside(direction); + pdf = 1.0f / max(solidAngle, 1e-7f); + + return direction; + } +}; + +#endif // _SOLID_ANGLE_VIS_EXAMPLE_SAMPLING_URENA_HLSL_INCLUDED_ diff --git a/73_SolidAngleVisualizer/app_resources/hlsl/ray_vis.frag.hlsl b/73_SolidAngleVisualizer/app_resources/hlsl/ray_vis.frag.hlsl new file mode 100644 index 000000000..d01b3a07f --- /dev/null +++ b/73_SolidAngleVisualizer/app_resources/hlsl/ray_vis.frag.hlsl @@ -0,0 +1,289 @@ +//// Copyright (C) 2026-2026 - DevSH Graphics Programming Sp. z O.O. +//// This file is part of the "Nabla Engine". +//// For conditions of distribution and use, see copyright notice in nabla.h +#pragma wave shader_stage(fragment) + +#include "common.hlsl" +#include +#include "utils.hlsl" + +using namespace nbl::hlsl; +using namespace ext::FullScreenTriangle; + +// Visualizes a ray as an arrow from origin in NDC space +// Returns color (rgb), intensity (a), and depth (in extra component) +struct ArrowResult +{ + float32_t4 color : SV_Target0; + float32_t depth : SV_Depth; +}; + +[[vk::push_constant]] struct PushConstantRayVis pc; + +#if VISUALIZE_SAMPLES +#include "drawing.hlsl" + +// Ray-AABB intersection in world space +// Returns the distance to the nearest intersection point, or -1 if no hit +float32_t rayAABBIntersection(float32_t3 rayOrigin, float32_t3 rayDir, float32_t3 aabbMin, float32_t3 aabbMax) +{ + float32_t3 invDir = 1.0f / rayDir; + float32_t3 t0 = (aabbMin - rayOrigin) * invDir; + float32_t3 t1 = (aabbMax - rayOrigin) * invDir; + + float32_t3 tmin = min(t0, t1); + float32_t3 tmax = max(t0, t1); + + float32_t tNear = max(max(tmin.x, tmin.y), tmin.z); + float32_t tFar = min(min(tmax.x, tmax.y), tmax.z); + + // Check if ray intersects AABB + if (tNear > tFar || tFar < 0.0) + return -1.0; + + // Return the nearest positive intersection + return tNear >= 0.0 ? tNear : tFar; +} + +// Project 3D point to NDC space +float32_t2 projectToNDC(float32_t3 worldPos, float32_t4x4 viewProj, float32_t aspect) +{ + float32_t4 clipPos = mul(viewProj, float32_t4(worldPos, 1.0)); + clipPos /= clipPos.w; + + // Apply aspect ratio correction + clipPos.x *= aspect; + + return clipPos.xy; +} + +ArrowResult visualizeRayAsArrow(float32_t3 rayOrigin, float32_t4 directionAndPdf, float32_t arrowLength, float32_t2 ndcPos, float32_t aspect) +{ + ArrowResult result; + result.color = float32_t4(0, 0, 0, 0); + result.depth = 0.0; // Far plane in reversed-Z + + float32_t3 rayDir = normalize(directionAndPdf.xyz); + float32_t pdf = directionAndPdf.w; + + // Define the 3D line segment + float32_t3 worldStart = rayOrigin; + float32_t3 worldEnd = rayOrigin + rayDir * arrowLength; + + // Transform to view space (camera space) for clipping + float32_t4x4 viewMatrix = pc.viewProjMatrix; // If you have view matrix separately, use that + // For now, we'll work in clip space and check w values + + float32_t4 clipStart = mul(pc.viewProjMatrix, float32_t4(worldStart, 1.0)); + float32_t4 clipEnd = mul(pc.viewProjMatrix, float32_t4(worldEnd, 1.0)); + + // Clip against near plane (w = 0 plane in clip space) + // If both points are behind camera, reject + if (clipStart.w <= 0.001 && clipEnd.w <= 0.001) + return result; + + // If line crosses the near plane, clip it + float32_t t0 = 0.0; + float32_t t1 = 1.0; + + if (clipStart.w <= 0.001) + { + // Start is behind camera, clip to near plane + float32_t t = (0.001 - clipStart.w) / (clipEnd.w - clipStart.w); + t0 = saturate(t); + clipStart = lerp(clipStart, clipEnd, t0); + worldStart = lerp(worldStart, worldEnd, t0); + } + + if (clipEnd.w <= 0.001) + { + // End is behind camera, clip to near plane + float32_t t = (0.001 - clipStart.w) / (clipEnd.w - clipStart.w); + t1 = saturate(t); + clipEnd = lerp(clipStart, clipEnd, t1); + worldEnd = lerp(worldStart, worldEnd, t1); + } + + // Now check if the clipped segment is valid + if (t0 >= t1) + return result; + + // Perspective divide to NDC + float32_t2 ndcStart = clipStart.xy / clipStart.w; + float32_t2 ndcEnd = clipEnd.xy / clipEnd.w; + + // Apply aspect ratio correction + ndcStart.x *= aspect; + ndcEnd.x *= aspect; + + // Calculate arrow direction in NDC + float32_t2 arrowVec = ndcEnd - ndcStart; + float32_t arrowNDCLength = length(arrowVec); + + // Skip if arrow is too small on screen + if (arrowNDCLength < 0.005) + return result; + + // Calculate perpendicular distance to line segment in NDC space + float32_t2 toPixel = ndcPos - ndcStart; + float32_t t_ndc = saturate(dot(toPixel, arrowVec) / dot(arrowVec, arrowVec)); + + // Draw line shaft + float32_t lineThickness = 0.002; + float32_t lineIntensity = lineSegment(ndcPos, ndcStart, ndcEnd, lineThickness); + + // Calculate perspective-correct depth + if (lineIntensity > 0.0) + { + // Interpolate in clip space + float32_t4 clipPos = lerp(clipStart, clipEnd, t_ndc); + + // Compute NDC depth for reversed-Z + float32_t depthNDC = clipPos.z / clipPos.w; + result.depth = 1.0f - depthNDC; + + // Clip against valid depth range + if (result.depth < 0.0 || result.depth > 1.0) + { + lineIntensity = 0.0; + } + } + + // Modulate by PDF + float32_t pdfIntensity = saturate(pdf * 0.5); + float32_t3 finalColor = float32_t3(pdfIntensity, pdfIntensity, pdfIntensity); + + result.color = float32_t4(finalColor, lineIntensity); + return result; +} + +// Returns both tMin (entry) and tMax (exit) for ray-AABB intersection +struct AABBIntersection +{ + float32_t tMin; // Distance to front face (entry point) + float32_t tMax; // Distance to back face (exit point) + bool hit; // Whether ray intersects the AABB at all +}; + +AABBIntersection rayAABBIntersectionFull(float32_t3 origin, float32_t3 dir, float32_t3 boxMin, float32_t3 boxMax) +{ + AABBIntersection result; + result.hit = false; + result.tMin = 0.0f; + result.tMax = 0.0f; + + float32_t3 invDir = 1.0f / dir; + float32_t3 t0 = (boxMin - origin) * invDir; + float32_t3 t1 = (boxMax - origin) * invDir; + + float32_t3 tmin = min(t0, t1); + float32_t3 tmax = max(t0, t1); + + result.tMin = max(max(tmin.x, tmin.y), tmin.z); + result.tMax = min(min(tmax.x, tmax.y), tmax.z); + + // Ray intersects if tMax >= tMin and tMax > 0 + result.hit = (result.tMax >= result.tMin) && (result.tMax > 0.0f); + + // If we're inside the box, tMin will be negative + // In that case, we want to use tMax (exit point) + if (result.tMin < 0.0f) + result.tMin = 0.0f; + + return result; +} +#endif // VISUALIZE_SAMPLES + +// [shader("pixel")] +[[vk::location(0)]] ArrowResult main(SVertexAttributes vx) +{ + ArrowResult output; +#if VISUALIZE_SAMPLES + output.color = float32_t4(0.0, 0.0, 0.0, 0.0); + output.depth = 0.0; // Far plane in reversed-Z (near=0, far=1) + float32_t maxDepth = 0.0; // Track closest depth (minimum in reversed-Z) + float32_t aaWidth = length(float32_t2(ddx(vx.uv.x), ddy(vx.uv.y))); + + // Convert to NDC space with aspect ratio correction + float32_t2 ndcPos = vx.uv * 2.0f - 1.0f; + float32_t aspect = pc.viewport.z / pc.viewport.w; + ndcPos.x *= aspect; + + for (uint32_t v = 0; v < DebugDataBuffer[0].clippedSilhouetteVertexCount; v++) + { + float32_t4 clipPos = mul(pc.viewProjMatrix, float32_t4(DebugDataBuffer[0].clippedSilhouetteVertices[v], 1.0)); + float32_t3 ndcPosVertex = clipPos.xyz / clipPos.w; + if (ndcPosVertex.z < maxDepth) + continue; + + float32_t4 intensity = drawCorner(ndcPosVertex, ndcPos, aaWidth, 0.03, 0.0, colorLUT[DebugDataBuffer[0].clippedSilhouetteVerticesIndices[v]]); + + // Update depth only where we drew something + if (any(intensity.rgb > 0.0)) + { + output.color.rgb += intensity.rgb; + maxDepth = max(maxDepth, 1.0f - ndcPosVertex.z); + } + } + + uint32_t sampleCount = DebugDataBuffer[0].sampleCount; + + for (uint32_t i = 0; i < sampleCount; i++) + { + float32_t3 rayOrigin = float32_t3(0, 0, 0); + float32_t4 directionAndPdf = DebugDataBuffer[0].rayData[i]; + float32_t3 rayDir = normalize(directionAndPdf.xyz); + + // Define cube bounds in local space + float32_t3 cubeLocalMin = float32_t3(-0.5, -0.5, -0.5); + float32_t3 cubeLocalMax = float32_t3(0.5, 0.5, 0.5); + + // Transform ray to local space of the cube (using precomputed inverse) + float32_t3 localRayOrigin = mul(pc.invModelMatrix, float32_t4(rayOrigin, 1.0)).xyz; + float32_t3 localRayDir = normalize(mul(pc.invModelMatrix, float32_t4(rayDir, 0.0)).xyz); + + // Get both entry and exit distances + AABBIntersection intersection = rayAABBIntersectionFull(localRayOrigin, localRayDir, cubeLocalMin, cubeLocalMax); + + float32_t arrowLength; + float32_t3 arrowColor; + + if (intersection.hit) + { + // Use tMax (exit point at back face) instead of tMin (entry point at front face) + float32_t3 localExitPoint = localRayOrigin + localRayDir * intersection.tMax; + float32_t3 worldExitPoint = mul(pc.modelMatrix, float32_t4(localExitPoint, 1.0)).xyz; + arrowLength = length(worldExitPoint - rayOrigin); + arrowColor = float32_t3(0.0, 1.0, 0.0); // Green for valid samples + } + else + { + // Ray doesn't intersect - THIS SHOULD NEVER HAPPEN with correct sampling! + float32_t3 cubeCenter = mul(pc.modelMatrix, float32_t4(0, 0, 0, 1)).xyz; + arrowLength = length(cubeCenter - rayOrigin) + 2.0; + arrowColor = float32_t3(1.0, 0.0, 0.0); // Red for BROKEN samples + } + + ArrowResult arrow = visualizeRayAsArrow(rayOrigin, directionAndPdf, arrowLength, ndcPos, aspect); + + // Only update depth if arrow was actually drawn + if (arrow.color.a > 0.0) + { + maxDepth = max(maxDepth, arrow.depth); + } + + // Modulate arrow color by its alpha (only add where arrow is visible) + output.color.rgb += arrowColor * arrow.color.a; + output.color.a = max(output.color.a, arrow.color.a); + } + + // Clamp to prevent overflow + output.color = saturate(output.color); + output.color.a = 1.0; + + // Write the closest depth (minimum in reversed-Z) + output.depth = maxDepth; + +#endif + return output; +} diff --git a/73_SolidAngleVisualizer/app_resources/hlsl/silhouette.hlsl b/73_SolidAngleVisualizer/app_resources/hlsl/silhouette.hlsl new file mode 100644 index 000000000..8213c17fc --- /dev/null +++ b/73_SolidAngleVisualizer/app_resources/hlsl/silhouette.hlsl @@ -0,0 +1,244 @@ +//// Copyright (C) 2026-2026 - DevSH Graphics Programming Sp. z O.O. +//// This file is part of the "Nabla Engine". +//// For conditions of distribution and use, see copyright notice in nabla.h +#ifndef _SOLID_ANGLE_VIS_EXAMPLE_SILHOUETTE_HLSL_INCLUDED_ +#define _SOLID_ANGLE_VIS_EXAMPLE_SILHOUETTE_HLSL_INCLUDED_ + +#include "gpu_common.hlsl" + +struct ClippedSilhouette +{ + float32_t3 vertices[MAX_SILHOUETTE_VERTICES]; // Max 7 vertices after clipping, unnormalized + uint32_t count; + + void normalize() + { + vertices[0] = nbl::hlsl::normalize(vertices[0]); + vertices[1] = nbl::hlsl::normalize(vertices[1]); + vertices[2] = nbl::hlsl::normalize(vertices[2]); + if (count > 3) + { + vertices[3] = nbl::hlsl::normalize(vertices[3]); + if (count > 4) + { + vertices[4] = nbl::hlsl::normalize(vertices[4]); + if (count > 5) + { + vertices[5] = nbl::hlsl::normalize(vertices[5]); + if (count > 6) + { + vertices[6] = nbl::hlsl::normalize(vertices[6]); + } + } + } + } + } + + // Compute the silhouette centroid (average direction) + float32_t3 getCenter() + { + float32_t3 sum = float32_t3(0, 0, 0); + + NBL_UNROLL + for (uint32_t i = 0; i < MAX_SILHOUETTE_VERTICES; i++) + { + if (i < count) + sum += vertices[i]; + } + + return nbl::hlsl::normalize(sum); + } + + static uint32_t computeRegionAndConfig(float32_t3x4 modelMatrix, out uint32_t3 region, out uint32_t configIndex, out uint32_t vertexCount) + { + float32_t4x3 columnModel = transpose(modelMatrix); + float32_t3 obbCenter = columnModel[3].xyz; + float32_t3x3 upper3x3 = (float32_t3x3)columnModel; + + float32_t3 rcpSqScales = rcp(float32_t3( + dot(upper3x3[0], upper3x3[0]), + dot(upper3x3[1], upper3x3[1]), + dot(upper3x3[2], upper3x3[2]))); + + float32_t3 normalizedProj = mul(upper3x3, obbCenter) * rcpSqScales; + + region = uint32_t3( + normalizedProj.x < -0.5f ? 0 : (normalizedProj.x > 0.5f ? 2 : 1), + normalizedProj.y < -0.5f ? 0 : (normalizedProj.y > 0.5f ? 2 : 1), + normalizedProj.z < -0.5f ? 0 : (normalizedProj.z > 0.5f ? 2 : 1)); + + configIndex = region.x + region.y * 3u + region.z * 9u; + + uint32_t sil = binSilhouettes[configIndex]; + vertexCount = getSilhouetteSize(sil); + + return sil; + } + + void compute(float32_t3x4 modelMatrix, uint32_t vertexCount, uint32_t sil) + { + count = 0; + + // Build clip mask (z < 0) + uint32_t clipMask = 0u; + NBL_UNROLL + for (uint32_t i = 0; i < 4; i++) + clipMask |= (getVertexZNeg(modelMatrix, getSilhouetteVertex(sil, i)) ? 1u : 0u) << i; + + if (vertexCount == 6) + { + NBL_UNROLL + for (uint32_t i = 4; i < 6; i++) + clipMask |= (getVertexZNeg(modelMatrix, getSilhouetteVertex(sil, i)) ? 1u : 0u) << i; + } + + uint32_t clipCount = countbits(clipMask); + + // Invert clip mask to find first positive vertex + uint32_t invertedMask = ~clipMask & ((1u << vertexCount) - 1u); + + // Check if wrap-around is needed (first and last bits negative) + bool wrapAround = ((clipMask & 1u) != 0u) && ((clipMask & (1u << (vertexCount - 1))) != 0u); + + // Compute rotation amount + uint32_t rotateAmount = wrapAround + ? firstbitlow(invertedMask) // first positive + : firstbithigh(clipMask) + 1; // first vertex after last negative + + // Rotate masks + uint32_t rotatedClipMask = rotr(clipMask, rotateAmount, vertexCount); + uint32_t rotatedSil = rotr(sil, rotateAmount * 3, vertexCount * 3); + uint32_t positiveCount = vertexCount - clipCount; + + // ALWAYS compute both clip points + uint32_t lastPosIdx = positiveCount - 1; + uint32_t firstNegIdx = positiveCount; + + float32_t3 vLastPos = getVertex(modelMatrix, getSilhouetteVertex(rotatedSil, lastPosIdx)); + float32_t3 vFirstNeg = getVertex(modelMatrix, getSilhouetteVertex(rotatedSil, firstNegIdx)); + float32_t t = vLastPos.z / (vLastPos.z - vFirstNeg.z); + float32_t3 clipA = lerp(vLastPos, vFirstNeg, t); + + float32_t3 vLastNeg = getVertex(modelMatrix, getSilhouetteVertex(rotatedSil, vertexCount - 1)); + float32_t3 vFirstPos = getVertex(modelMatrix, getSilhouetteVertex(rotatedSil, 0)); + t = vLastNeg.z / (vLastNeg.z - vFirstPos.z); + float32_t3 clipB = lerp(vLastNeg, vFirstPos, t); + + NBL_UNROLL + for (uint32_t i = 0; i < positiveCount; i++) + { + float32_t3 v0 = getVertex(modelMatrix, getSilhouetteVertex(rotatedSil, i)); + +#if DEBUG_DATA + uint32_t originalIndex = (i + rotateAmount) % vertexCount; + DebugDataBuffer[0].clippedSilhouetteVertices[count] = v0; + DebugDataBuffer[0].clippedSilhouetteVerticesIndices[count] = originalIndex; +#endif + vertices[count++] = v0; + } + + if (clipCount > 0 && clipCount < vertexCount) + { +#if DEBUG_DATA + DebugDataBuffer[0].clippedSilhouetteVertices[count] = clipA; + DebugDataBuffer[0].clippedSilhouetteVerticesIndices[count] = CLIP_POINT_A; +#endif + vertices[count++] = clipA; + +#if DEBUG_DATA + DebugDataBuffer[0].clippedSilhouetteVertices[count] = clipB; + DebugDataBuffer[0].clippedSilhouetteVerticesIndices[count] = CLIP_POINT_B; +#endif + vertices[count++] = clipB; + } + +#if DEBUG_DATA + DebugDataBuffer[0].clippedSilhouetteVertexCount = count; + DebugDataBuffer[0].clipMask = clipMask; + DebugDataBuffer[0].clipCount = clipCount; + DebugDataBuffer[0].rotatedClipMask = rotatedClipMask; + DebugDataBuffer[0].rotateAmount = rotateAmount; + DebugDataBuffer[0].positiveVertCount = positiveCount; + DebugDataBuffer[0].wrapAround = (uint32_t)wrapAround; + DebugDataBuffer[0].rotatedSil = rotatedSil; +#endif + } +}; + +struct SilEdgeNormals +{ + float16_t3 edgeNormals[MAX_SILHOUETTE_VERTICES]; // 10.5 floats instead of 21 + uint32_t count; + + // Better not use and calculate it while creating the sampler + static SilEdgeNormals create(NBL_CONST_REF_ARG(ClippedSilhouette) sil) + { + SilEdgeNormals result = (SilEdgeNormals)0; + result.count = sil.count; + + float32_t3 v0 = sil.vertices[0]; + float32_t3 v1 = sil.vertices[1]; + float32_t3 v2 = sil.vertices[2]; + + result.edgeNormals[0] = float16_t3(cross(v0, v1)); + result.edgeNormals[1] = float16_t3(cross(v1, v2)); + + if (sil.count > 3) + { + float32_t3 v3 = sil.vertices[3]; + result.edgeNormals[2] = float16_t3(cross(v2, v3)); + + if (sil.count > 4) + { + float32_t3 v4 = sil.vertices[4]; + result.edgeNormals[3] = float16_t3(cross(v3, v4)); + + if (sil.count > 5) + { + float32_t3 v5 = sil.vertices[5]; + result.edgeNormals[4] = float16_t3(cross(v4, v5)); + + if (sil.count > 6) + { + float32_t3 v6 = sil.vertices[6]; + result.edgeNormals[5] = float16_t3(cross(v5, v6)); + result.edgeNormals[6] = float16_t3(cross(v6, v0)); + } + else + { + result.edgeNormals[5] = float16_t3(cross(v5, v0)); + } + } + else + { + result.edgeNormals[4] = float16_t3(cross(v4, v0)); + } + } + else + { + result.edgeNormals[3] = float16_t3(cross(v3, v0)); + } + } + else + { + result.edgeNormals[2] = float16_t3(cross(v2, v0)); + } + + return result; + } + + bool isInside(float32_t3 dir) + { + float16_t3 d = float16_t3(dir); + half maxDot = dot(d, edgeNormals[0]); + maxDot = max(maxDot, dot(d, edgeNormals[1])); + maxDot = max(maxDot, dot(d, edgeNormals[2])); + maxDot = max(maxDot, dot(d, edgeNormals[3])); + maxDot = max(maxDot, dot(d, edgeNormals[4])); + maxDot = max(maxDot, dot(d, edgeNormals[5])); + maxDot = max(maxDot, dot(d, edgeNormals[6])); + return maxDot <= float16_t(0.0f); + } +}; + +#endif // _SOLID_ANGLE_VIS_EXAMPLE_SILHOUETTE_HLSL_INCLUDED_ diff --git a/73_SolidAngleVisualizer/app_resources/hlsl/solid_angle_vis.frag.hlsl b/73_SolidAngleVisualizer/app_resources/hlsl/solid_angle_vis.frag.hlsl new file mode 100644 index 000000000..bba9aba75 --- /dev/null +++ b/73_SolidAngleVisualizer/app_resources/hlsl/solid_angle_vis.frag.hlsl @@ -0,0 +1,305 @@ +//// Copyright (C) 2026-2026 - DevSH Graphics Programming Sp. z O.O. +//// This file is part of the "Nabla Engine". +//// For conditions of distribution and use, see copyright notice in nabla.h +#pragma wave shader_stage(fragment) + +#include "common.hlsl" +#include + +using namespace nbl::hlsl; +using namespace ext::FullScreenTriangle; + +#include "drawing.hlsl" +#include "utils.hlsl" +#include "silhouette.hlsl" +#include "triangle_sampling.hlsl" +#include "pyramid_sampling.hlsl" +#include "parallelogram_sampling.hlsl" + +[[vk::push_constant]] struct PushConstants pc; + +static const SAMPLING_MODE samplingMode = (SAMPLING_MODE)SAMPLING_MODE_CONST; + +void computeCubeGeo() +{ + for (uint32_t i = 0; i < 8; i++) + corners[i] = mul(pc.modelMatrix, float32_t4(constCorners[i], 1.0f)).xyz; + + for (uint32_t f = 0; f < 6; f++) + { + faceCenters[f] = float32_t3(0, 0, 0); + for (uint32_t v = 0; v < 4; v++) + faceCenters[f] += corners[faceToCorners[f][v]]; + faceCenters[f] /= 4.0f; + } +} + +void validateSilhouetteEdges(uint32_t sil, uint32_t vertexCount, inout uint32_t silEdgeMask) +{ +#if DEBUG_DATA + { + for (uint32_t i = 0; i < vertexCount; i++) + { + uint32_t vIdx = i % vertexCount; + uint32_t v1Idx = (i + 1) % vertexCount; + + uint32_t v0Corner = getSilhouetteVertex(sil, vIdx); + uint32_t v1Corner = getSilhouetteVertex(sil, v1Idx); + // Mark edge as part of silhouette + for (uint32_t e = 0; e < 12; e++) + { + uint32_t2 edge = allEdges[e]; + if ((edge.x == v0Corner && edge.y == v1Corner) || + (edge.x == v1Corner && edge.y == v0Corner)) + { + silEdgeMask |= (1u << e); + } + } + } + validateEdgeVisibility(pc.modelMatrix, sil, vertexCount, silEdgeMask); + } +#endif +} + +void computeSpherePos(SVertexAttributes vx, out float32_t2 ndc, out float32_t3 spherePos) +{ + ndc = vx.uv * 2.0f - 1.0f; + float32_t aspect = pc.viewport.z / pc.viewport.w; + ndc.x *= aspect; + + float32_t2 normalized = ndc / CIRCLE_RADIUS; + float32_t r2 = dot(normalized, normalized); + + if (r2 <= 1.0f) + { + spherePos = float32_t3(normalized.x, normalized.y, sqrt(1.0f - r2)); + } + else + { + float32_t uv2Plus1 = r2 + 1.0f; + spherePos = float32_t3(normalized.x * 2.0f, normalized.y * 2.0f, 1.0f - r2) / uv2Plus1; + } + spherePos = normalize(spherePos); +} + +#if VISUALIZE_SAMPLES +float32_t4 visualizeSample(float32_t3 sampleDir, float32_t2 xi, uint32_t index, float32_t2 screenUV, float32_t3 spherePos, float32_t2 ndc, float32_t aaWidth +#if DEBUG_DATA + , + inout RWStructuredBuffer DebugDataBuffer +#endif +) +{ + float32_t4 accumColor = 0; + + float32_t2 pssSize = float32_t2(0.3, 0.3); // 30% of screen + float32_t2 pssPos = float32_t2(0.01, 0.01); // Offset from corner + bool isInsidePSS = all(and(screenUV >= pssPos, screenUV <= (pssPos + pssSize))); + + float32_t dist3D = distance(sampleDir, normalize(spherePos)); + float32_t alpha3D = 1.0f - smoothstep(0.0f, 0.02f, dist3D); + + if (alpha3D > 0.0f /* && !isInsidePSS*/) + { + float32_t3 sampleColor = colorLUT[index].rgb; + accumColor += float32_t4(sampleColor * alpha3D, alpha3D); + } + + // if (isInsidePSS) + // { + // // Map the raw xi to the PSS square dimensions + // float32_t2 xiPixelPos = pssPos + xi * pssSize; + // float32_t dist2D = distance(screenUV, xiPixelPos); + + // float32_t alpha2D = drawCross2D(screenUV, xiPixelPos, 0.005f, 0.001f); + // if (alpha2D > 0.0f) + // { + // float32_t3 sampleColor = colorLUT[index].rgb; + // accumColor += float32_t4(sampleColor * alpha2D, alpha2D); + // } + // } + + // // just the outline of the PSS + // if (isInsidePSS && accumColor.a < 0.1) + // accumColor = float32_t4(0.1, 0.1, 0.1, 1.0); + + return accumColor; +} +#endif // VISUALIZE_SAMPLES + +// [shader("pixel")] +[[vk::location(0)]] float32_t4 main(SVertexAttributes vx) : SV_Target0 +{ + float32_t4 color = float32_t4(0, 0, 0, 0); + for (uint32_t i = 0; i < 1; i++) + { + float32_t aaWidth = length(float32_t2(ddx(vx.uv.x), ddy(vx.uv.y))); + float32_t3 spherePos; + float32_t2 ndc; + computeSpherePos(vx, ndc, spherePos); +#if !FAST || DEBUG_DATA + computeCubeGeo(); +#endif + uint32_t3 region; + uint32_t configIndex; + uint32_t vertexCount; + uint32_t sil = ClippedSilhouette::computeRegionAndConfig(pc.modelMatrix, region, configIndex, vertexCount); + + uint32_t silEdgeMask = 0; // TODO: take from 'fast' compute() +#if DEBUG_DATA + validateSilhouetteEdges(sil, vertexCount, silEdgeMask); +#endif + ClippedSilhouette silhouette; + silhouette.compute(pc.modelMatrix, vertexCount, sil); + +#if VISUALIZE_SAMPLES + // Draw silhouette edges on the sphere + for (uint32_t ei = 0; ei < silhouette.count; ei++) + { + float32_t3 v0 = normalize(silhouette.vertices[ei]); + float32_t3 v1 = normalize(silhouette.vertices[(ei + 1) % silhouette.count]); + float32_t3 pts[2] = {v0, v1}; + color += drawEdge(0, pts, spherePos, aaWidth); + } +#endif + + TriangleFanSampler samplingData; + Parallelogram parallelogram; + SphericalPyramid pyramid; + UrenaSampler urena; + BiquadraticSampler biquad; + BilinearSampler bilin; + + SilEdgeNormals silEdgeNormals; + //===================================================================== + // Building + //===================================================================== + if (samplingMode == SAMPLING_MODE::TRIANGLE_SOLID_ANGLE || + samplingMode == SAMPLING_MODE::TRIANGLE_PROJECTED_SOLID_ANGLE) + { + samplingData = TriangleFanSampler::create(silhouette, samplingMode); + } + else if (samplingMode == SAMPLING_MODE::PROJECTED_PARALLELOGRAM_SOLID_ANGLE) + { + silhouette.normalize(); + parallelogram = Parallelogram::create(silhouette, silEdgeNormals +#if VISUALIZE_SAMPLES + , + ndc, spherePos, aaWidth, color +#endif + ); + } + else if (samplingMode == SAMPLING_MODE::SYMMETRIC_PYRAMID_SOLID_ANGLE_RECTANGLE || + samplingMode == SAMPLING_MODE::SYMMETRIC_PYRAMID_SOLID_ANGLE_BIQUADRATIC || + samplingMode == SAMPLING_MODE::SYMMETRIC_PYRAMID_SOLID_ANGLE_BILINEAR) + { + pyramid = SphericalPyramid::create(silhouette, silEdgeNormals +#if VISUALIZE_SAMPLES + , + ndc, spherePos, aaWidth, color +#endif + ); + + if (samplingMode == SAMPLING_MODE::SYMMETRIC_PYRAMID_SOLID_ANGLE_RECTANGLE) + urena = UrenaSampler::create(pyramid); + else if (samplingMode == SAMPLING_MODE::SYMMETRIC_PYRAMID_SOLID_ANGLE_BIQUADRATIC) + biquad = BiquadraticSampler::create(pyramid); + else if (samplingMode == SAMPLING_MODE::SYMMETRIC_PYRAMID_SOLID_ANGLE_BILINEAR) + bilin = BilinearSampler::create(pyramid); + } + +#if DEBUG_DATA + uint32_t validSampleCount = 0u; + DebugDataBuffer[0].sampleCount = pc.sampleCount; +#endif + //===================================================================== + // Sampling + //===================================================================== + for (uint32_t i = 0; i < pc.sampleCount; i++) + { + // Hash the invocation to offset the grid + float32_t2 xi = float32_t2( + (float32_t(i & 7u) + 0.5) / 8.0f, + (float32_t(i >> 3u) + 0.5) / 8.0f); + + float32_t pdf; + uint32_t index = 0; + float32_t3 sampleDir; + bool valid; + + if (samplingMode == SAMPLING_MODE::TRIANGLE_SOLID_ANGLE || samplingMode == SAMPLING_MODE::TRIANGLE_PROJECTED_SOLID_ANGLE) + sampleDir = samplingData.sample(silhouette, xi, pdf, index); + else if (samplingMode == SAMPLING_MODE::PROJECTED_PARALLELOGRAM_SOLID_ANGLE) + sampleDir = parallelogram.sample(silEdgeNormals, xi, pdf, valid); + else if (samplingMode == SAMPLING_MODE::SYMMETRIC_PYRAMID_SOLID_ANGLE_RECTANGLE) + sampleDir = urena.sample(pyramid, silEdgeNormals, xi, pdf, valid); + else if (samplingMode == SAMPLING_MODE::SYMMETRIC_PYRAMID_SOLID_ANGLE_BIQUADRATIC) + sampleDir = biquad.sample(pyramid, silEdgeNormals, xi, pdf, valid); + else if (samplingMode == SAMPLING_MODE::SYMMETRIC_PYRAMID_SOLID_ANGLE_BILINEAR) + sampleDir = bilin.sample(pyramid, silEdgeNormals, xi, pdf, valid); + + if (!valid) + { + pdf = 0.0f; + // sampleDir = float32_t3(0, 0, 1); + } +#if DEBUG_DATA + else + { + validSampleCount++; + } + + DebugDataBuffer[0].rayData[i] = float32_t4(sampleDir, pdf); +#endif + +#if VISUALIZE_SAMPLES + // Draw samples on sphere + color += visualizeSample(sampleDir, xi, index, vx.uv, spherePos, ndc, aaWidth +#if DEBUG_DATA + , + DebugDataBuffer +#endif + ); +#else + if (pdf > 0.0f) + color += float4(sampleDir * 0.02f / pdf, 1.0f); +#endif // VISUALIZE_SAMPLES + } + +#if VISUALIZE_SAMPLES + + // For debugging: Draw a small indicator of which faces are found + // color += drawVisibleFaceOverlay(pc.modelMatrix, spherePos, region, aaWidth); + + // color += drawFaces(pc.modelMatrix, spherePos, aaWidth); + + // Draw clipped silhouette vertices + // color += drawClippedSilhouetteVertices(ndc, silhouette, aaWidth); + // color += drawHiddenEdges(pc.modelMatrix, spherePos, silEdgeMask, aaWidth); + // color += drawCorners(pc.modelMatrix, ndc, aaWidth, 0.05f); + color += drawRing(ndc, aaWidth); + + if (all(vx.uv >= float32_t2(0.f, 0.97f)) && all(vx.uv <= float32_t2(0.03f, 1.0f))) + { + return float32_t4(colorLUT[configIndex], 1.0f); + } +#else +#endif // VISUALIZE_SAMPLES + +#if DEBUG_DATA + InterlockedAdd(DebugDataBuffer[0].validSampleCount, validSampleCount); + InterlockedAdd(DebugDataBuffer[0].threadCount, 1u); + DebugDataBuffer[0].region = uint32_t3(region); + DebugDataBuffer[0].silhouetteIndex = uint32_t(configIndex); + DebugDataBuffer[0].silhouetteVertexCount = uint32_t(getSilhouetteSize(sil)); + for (uint32_t i = 0; i < 6; i++) + { + DebugDataBuffer[0].vertices[i] = uint32_t(getSilhouetteVertex(sil, i)); + } + DebugDataBuffer[0].silhouette = sil; + +#endif + } + + return color; +} diff --git a/73_SolidAngleVisualizer/app_resources/hlsl/triangle_sampling.hlsl b/73_SolidAngleVisualizer/app_resources/hlsl/triangle_sampling.hlsl new file mode 100644 index 000000000..46277ca27 --- /dev/null +++ b/73_SolidAngleVisualizer/app_resources/hlsl/triangle_sampling.hlsl @@ -0,0 +1,241 @@ +//// Copyright (C) 2026-2026 - DevSH Graphics Programming Sp. z O.O. +//// This file is part of the "Nabla Engine". +//// For conditions of distribution and use, see copyright notice in nabla.h +#ifndef _SOLID_ANGLE_VIS_EXAMPLE_TRIANGLE_SAMPLING_HLSL_INCLUDED_ +#define _SOLID_ANGLE_VIS_EXAMPLE_TRIANGLE_SAMPLING_HLSL_INCLUDED_ + +// Include the spherical triangle utilities +#include "gpu_common.hlsl" +#include +#include +#include +#include +#include +#include "silhouette.hlsl" + +using namespace nbl::hlsl; + +// Maximum number of triangles we can have after clipping +// Without clipping, max 3 faces can be visible at once so 3 faces * 2 triangles = 6 edges, forming max 4 triangles +// With clipping, one more edge. 7 - 2 = 5 max triangles because fanning from one vertex +#define MAX_TRIANGLES 5 + +struct TriangleFanSampler +{ + uint32_t count; // Number of valid triangles + uint32_t samplingMode; // Mode used during build + float32_t totalWeight; // Sum of all triangle weights + float32_t3 faceNormal; // Face normal (only used for projected mode) + float32_t triangleSolidAngles[MAX_TRIANGLES]; // Weight per triangle (for selection) + uint32_t triangleIndices[MAX_TRIANGLES]; // Vertex index i (forms triangle with v0, vi, vi+1) + + float32_t computeProjectedSolidAngleFallback(float32_t3 v0, float32_t3 v1, float32_t3 v2, float32_t3 N) + { + // 1. Get edge normals (unit vectors) + // We use the cross product of the vertices (unit vectors on sphere) + float32_t3 n0 = cross(v0, v1); + float32_t3 n1 = cross(v1, v2); + float32_t3 n2 = cross(v2, v0); + + // 2. Normalize edge normals (magnitude is sin of the arc length) + float32_t l0 = length(n0); + float32_t l1 = length(n1); + float32_t l2 = length(n2); + + // Guard against degenerate triangles + if (l0 < 1e-7 || l1 < 1e-7 || l2 < 1e-7) + return 0.0f; + + n0 /= l0; + n1 /= l1; + n2 /= l2; + + // 3. Get arc lengths (angles in radians) + float32_t a = asin(clamp(l0, -1.0f, 1.0f)); // side v0-v1 + float32_t b = asin(clamp(l1, -1.0f, 1.0f)); // side v1-v2 + float32_t c = asin(clamp(l2, -1.0f, 1.0f)); // side v2-v0 + + // Handle acos/asin quadrant if dot product is negative + if (dot(v0, v1) < 0) + a = 3.14159265 - a; + if (dot(v1, v2) < 0) + b = 3.14159265 - b; + if (dot(v2, v0) < 0) + c = 3.14159265 - c; + + // 4. Compute projected solid angle + float32_t Gamma = 0.5f * (a * dot(n0, N) + b * dot(n1, N) + c * dot(n2, N)); + + // Return the absolute value of the total + return abs(Gamma); + } + + // Build fan triangulation, cache weights for triangle selection + static TriangleFanSampler create(ClippedSilhouette silhouette, uint32_t mode) + { + TriangleFanSampler self; + self.count = 0; + self.totalWeight = 0.0f; + self.samplingMode = mode; + self.faceNormal = float32_t3(0, 0, 0); + + if (silhouette.count < 3) + return self; + + const float32_t3 v0 = silhouette.vertices[0]; + const float32_t3 origin = float32_t3(0, 0, 0); + + // Compute face normal ONCE before the loop - silhouette is planar! + if (mode == SAMPLING_MODE::TRIANGLE_PROJECTED_SOLID_ANGLE) + { + float32_t3 v1 = silhouette.vertices[1]; + float32_t3 v2 = silhouette.vertices[2]; + self.faceNormal = normalize(cross(v1 - v0, v2 - v0)); + } + + // Build fan triangulation from v0 + NBL_UNROLL + for (uint32_t i = 1; i < silhouette.count - 1; i++) + { + float32_t3 v1 = silhouette.vertices[i]; + float32_t3 v2 = silhouette.vertices[i + 1]; + + shapes::SphericalTriangle shapeTri = shapes::SphericalTriangle::create(v0, v1, v2, origin); + + // Skip degenerate triangles + if (shapeTri.pyramidAngles()) + continue; + + // Calculate triangle solid angle + float32_t solidAngle; + if (mode == SAMPLING_MODE::TRIANGLE_PROJECTED_SOLID_ANGLE) + { + float32_t3 cos_vertices = clamp( + (shapeTri.cos_sides - shapeTri.cos_sides.yzx * shapeTri.cos_sides.zxy) * + shapeTri.csc_sides.yzx * shapeTri.csc_sides.zxy, + float32_t3(-1.0f, -1.0f, -1.0f), + float32_t3(1.0f, 1.0f, 1.0f)); + solidAngle = shapeTri.projectedSolidAngleOfTriangle(self.faceNormal, shapeTri.cos_sides, shapeTri.csc_sides, cos_vertices); + } + else + { + solidAngle = shapeTri.solidAngleOfTriangle(); + } + + if (solidAngle <= 0.0f) + continue; + + // Store only what's needed for weighted selection + self.triangleSolidAngles[self.count] = solidAngle; + self.triangleIndices[self.count] = i; + self.totalWeight += solidAngle; + self.count++; + } + +#if DEBUG_DATA + // Validate no antipodal edges exist (would create spherical lune) + for (uint32_t i = 0; i < silhouette.count; i++) + { + uint32_t j = (i + 1) % silhouette.count; + float32_t3 n1 = normalize(silhouette.vertices[i]); + float32_t3 n2 = normalize(silhouette.vertices[j]); + + if (dot(n1, n2) < -0.99f) + { + DebugDataBuffer[0].sphericalLuneDetected = 1; + assert(false && "Spherical lune detected: antipodal silhouette edge"); + } + } + DebugDataBuffer[0].maxTrianglesExceeded = (self.count > MAX_TRIANGLES); + DebugDataBuffer[0].triangleCount = self.count; + DebugDataBuffer[0].totalSolidAngles = self.totalWeight; + for (uint32_t tri = 0; tri < self.count; tri++) + { + DebugDataBuffer[0].solidAngles[tri] = self.triangleSolidAngles[tri]; + } +#endif + + return self; + } + + // Sample using cached selection weights, recompute geometry on-demand + float32_t3 sample(ClippedSilhouette silhouette, float32_t2 xi, out float32_t pdf, out uint32_t selectedIdx) + { + selectedIdx = 0; + + // Handle empty or invalid data + if (count == 0 || totalWeight <= 0.0f) + { + pdf = 0.0f; + return float32_t3(0, 0, 1); + } + + // Select triangle using cached weighted random selection + float32_t targetWeight = xi.x * totalWeight; + float32_t cumulativeWeight = 0.0f; + float32_t prevCumulativeWeight = 0.0f; + + NBL_UNROLL + for (uint32_t i = 0; i < count; i++) + { + prevCumulativeWeight = cumulativeWeight; + cumulativeWeight += triangleSolidAngles[i]; + + if (targetWeight <= cumulativeWeight) + { + selectedIdx = i; + break; + } + } + + // Remap xi.x to [0,1] within selected triangle's solidAngle interval + float32_t triSolidAngle = triangleSolidAngles[selectedIdx]; + float32_t u = (targetWeight - prevCumulativeWeight) / max(triSolidAngle, 1e-7f); + + // Reconstruct the selected triangle geometry + uint32_t vertexIdx = triangleIndices[selectedIdx]; + float32_t3 v0 = silhouette.vertices[0]; + float32_t3 v1 = silhouette.vertices[vertexIdx]; + float32_t3 v2 = silhouette.vertices[vertexIdx + 1]; + + float32_t3 fn = normalize(cross(v1 - v0, v2 - v0)); + + float32_t3 origin = float32_t3(0, 0, 0); + + shapes::SphericalTriangle shapeTri = shapes::SphericalTriangle::create(v0, v1, v2, origin); + + // Compute vertex angles once + float32_t3 cos_vertices = clamp( + (shapeTri.cos_sides - shapeTri.cos_sides.yzx * shapeTri.cos_sides.zxy) * + shapeTri.csc_sides.yzx * shapeTri.csc_sides.zxy, + float32_t3(-1.0f, -1.0f, -1.0f), + float32_t3(1.0f, 1.0f, 1.0f)); + float32_t3 sin_vertices = sqrt(float32_t3(1.0f, 1.0f, 1.0f) - cos_vertices * cos_vertices); + + // Sample based on mode + float32_t3 direction; + float32_t rcpPdf; + + if (samplingMode == SAMPLING_MODE::TRIANGLE_PROJECTED_SOLID_ANGLE) + { + sampling::ProjectedSphericalTriangle samplingTri = sampling::ProjectedSphericalTriangle::create(shapeTri); + + direction = samplingTri.generate(rcpPdf, triSolidAngle, cos_vertices, sin_vertices, shapeTri.cos_sides[0], shapeTri.cos_sides[2], shapeTri.csc_sides[1], shapeTri.csc_sides[2], fn, false, float32_t2(u, xi.y)); + triSolidAngle = rcpPdf; // projected solid angle returned as rcpPdf + } + else + { + sampling::SphericalTriangle samplingTri = sampling::SphericalTriangle::create(shapeTri); + direction = samplingTri.generate(triSolidAngle, cos_vertices, sin_vertices, shapeTri.cos_sides[0], shapeTri.cos_sides[2], shapeTri.csc_sides[1], shapeTri.csc_sides[2], float32_t2(u, xi.y)); + } + + // Calculate PDF + float32_t trianglePdf = 1.0f / triSolidAngle; + float32_t selectionProb = triSolidAngle / totalWeight; + pdf = trianglePdf * selectionProb; + + return normalize(direction); + } +}; + +#endif // _SOLID_ANGLE_VIS_EXAMPLE_TRIANGLE_SAMPLING_HLSL_INCLUDED_ diff --git a/73_SolidAngleVisualizer/app_resources/hlsl/utils.hlsl b/73_SolidAngleVisualizer/app_resources/hlsl/utils.hlsl new file mode 100644 index 000000000..832204cf2 --- /dev/null +++ b/73_SolidAngleVisualizer/app_resources/hlsl/utils.hlsl @@ -0,0 +1,68 @@ +//// Copyright (C) 2026-2026 - DevSH Graphics Programming Sp. z O.O. +//// This file is part of the "Nabla Engine". +//// For conditions of distribution and use, see copyright notice in nabla.h +#ifndef _SOLID_ANGLE_VIS_EXAMPLE_UTILS_HLSL_INCLUDED_ +#define _SOLID_ANGLE_VIS_EXAMPLE_UTILS_HLSL_INCLUDED_ +#include +#include + +// TODO: implemented somewhere else? +// Bit rotation helpers +uint32_t rotl(uint32_t value, uint32_t bits, uint32_t width) +{ + // mask for the width + uint32_t mask = (width == 32) ? 0xFFFFFFFFu : ((1u << width) - 1u); + value &= mask; + + // Map bits==width -> 0 + bits &= -(bits < width); + + return ((value << bits) | (value >> (width - bits))) & mask; +} + +uint32_t rotr(uint32_t value, uint32_t bits, uint32_t width) +{ + uint32_t mask = ((1u << width) - 1u); + value &= mask; + + // Map bits==width -> 0 + bits &= -(bits < width); + + return ((value >> bits) | (value << (width - bits))) & mask; +} + +uint32_t packSilhouette(const uint32_t s[7]) +{ + uint32_t packed = 0; + uint32_t size = s[0] & 0x7; // 3 bits for size + + // Pack vertices LSB-first (vertex1 in lowest 3 bits above size) + for (uint32_t i = 1; i <= 6; ++i) + { + uint32_t v = s[i]; + if (v < 0) + v = 0; // replace unused vertices with 0 + packed |= (v & 0x7) << (3 * (i - 1)); // vertex i-1 shifted by 3*(i-1) + } + + // Put size in the MSB (bits 29-31 for a 32-bit uint32_t, leaving 29 bits for vertices) + packed |= (size & 0x7) << 29; + + return packed; +} + +float32_t2 hammersleySample(uint32_t i, uint32_t numSamples) +{ + return float32_t2( + float32_t(i) / float32_t(numSamples), + float32_t(reversebits(i)) / 4294967295.0f); +} + +float32_t2 nextRandomUnorm2(inout nbl::hlsl::Xoroshiro64StarStar rnd) +{ + return float32_t2( + float32_t(rnd()) * 2.3283064365386963e-10, + float32_t(rnd()) * 2.3283064365386963e-10); +} + +#endif // _SOLID_ANGLE_VIS_EXAMPLE_UTILS_HLSL_INCLUDED_ diff --git a/73_SolidAngleVisualizer/config.json.template b/73_SolidAngleVisualizer/config.json.template new file mode 100644 index 000000000..f961745c1 --- /dev/null +++ b/73_SolidAngleVisualizer/config.json.template @@ -0,0 +1,28 @@ +{ + "enableParallelBuild": true, + "threadsPerBuildProcess" : 2, + "isExecuted": false, + "scriptPath": "", + "cmake": { + "configurations": [ "Release", "Debug", "RelWithDebInfo" ], + "buildModes": [], + "requiredOptions": [] + }, + "profiles": [ + { + "backend": "vulkan", + "platform": "windows", + "buildModes": [], + "runConfiguration": "Release", + "gpuArchitectures": [] + } + ], + "dependencies": [], + "data": [ + { + "dependencies": [], + "command": [""], + "outputs": [] + } + ] +} \ No newline at end of file diff --git a/73_SolidAngleVisualizer/include/common.hpp b/73_SolidAngleVisualizer/include/common.hpp new file mode 100644 index 000000000..fe7d086dd --- /dev/null +++ b/73_SolidAngleVisualizer/include/common.hpp @@ -0,0 +1,19 @@ +#ifndef _NBL_THIS_EXAMPLE_COMMON_H_INCLUDED_ +#define _NBL_THIS_EXAMPLE_COMMON_H_INCLUDED_ + + +#include "nbl/examples/examples.hpp" + +// the example's headers +#include "transform.hpp" + +using namespace nbl; +using namespace nbl::core; +using namespace nbl::hlsl; +using namespace nbl::system; +using namespace nbl::asset; +using namespace nbl::ui; +using namespace nbl::video; +using namespace nbl::examples; + +#endif // _NBL_THIS_EXAMPLE_COMMON_H_INCLUDED_ \ No newline at end of file diff --git a/73_SolidAngleVisualizer/include/transform.hpp b/73_SolidAngleVisualizer/include/transform.hpp new file mode 100644 index 000000000..e1ffcd764 --- /dev/null +++ b/73_SolidAngleVisualizer/include/transform.hpp @@ -0,0 +1,213 @@ +#ifndef _NBL_THIS_EXAMPLE_TRANSFORM_H_INCLUDED_ +#define _NBL_THIS_EXAMPLE_TRANSFORM_H_INCLUDED_ + +#include "nbl/ui/ICursorControl.h" +#include "nbl/ext/ImGui/ImGui.h" +#include "imgui/imgui_internal.h" +#include "imguizmo/ImGuizmo.h" + +struct TransformRequestParams +{ + uint8_t sceneTexDescIx = ~0; + bool useWindow = true, editTransformDecomposition = false, enableViewManipulate = true; +}; + +struct TransformReturnInfo +{ + nbl::hlsl::uint16_t2 sceneResolution = { 1, 1 }; + bool allowCameraMovement = false; +}; + +TransformReturnInfo EditTransform(float* cameraView, const float* cameraProjection, float* matrix, const TransformRequestParams& params) +{ + static ImGuizmo::OPERATION mCurrentGizmoOperation(ImGuizmo::TRANSLATE); + static ImGuizmo::MODE mCurrentGizmoMode(ImGuizmo::LOCAL); + static bool useSnap = false; + static float snap[3] = { 1.f, 1.f, 1.f }; + static float bounds[] = { -0.5f, -0.5f, -0.5f, 0.5f, 0.5f, 0.5f }; + static float boundsSnap[] = { 0.1f, 0.1f, 0.1f }; + static bool boundSizing = false; + static bool boundSizingSnap = false; + + ImGui::Text("Use gizmo (T/R/G) or ViewManipulate widget to transform the cube"); + + if (params.editTransformDecomposition) + { + if (ImGui::IsKeyPressed(ImGuiKey_T)) + mCurrentGizmoOperation = ImGuizmo::TRANSLATE; + if (ImGui::IsKeyPressed(ImGuiKey_R)) + mCurrentGizmoOperation = ImGuizmo::ROTATE; + if (ImGui::IsKeyPressed(ImGuiKey_G)) + mCurrentGizmoOperation = ImGuizmo::SCALE; + if (ImGui::RadioButton("Translate", mCurrentGizmoOperation == ImGuizmo::TRANSLATE)) + mCurrentGizmoOperation = ImGuizmo::TRANSLATE; + ImGui::SameLine(); + if (ImGui::RadioButton("Rotate", mCurrentGizmoOperation == ImGuizmo::ROTATE)) + mCurrentGizmoOperation = ImGuizmo::ROTATE; + ImGui::SameLine(); + if (ImGui::RadioButton("Scale", mCurrentGizmoOperation == ImGuizmo::SCALE)) + mCurrentGizmoOperation = ImGuizmo::SCALE; + if (ImGui::RadioButton("Universal", mCurrentGizmoOperation == ImGuizmo::UNIVERSAL)) + mCurrentGizmoOperation = ImGuizmo::UNIVERSAL; + + // For UI editing, decompose temporarily + float matrixTranslation[3], matrixRotation[3], matrixScale[3]; + ImGuizmo::DecomposeMatrixToComponents(matrix, matrixTranslation, matrixRotation, matrixScale); + ImGui::DragFloat3("Tr", matrixTranslation, 0.01f); + ImGui::DragFloat3("Rt", matrixRotation, 0.01f); + ImGui::DragFloat3("Sc", matrixScale, 0.01f); + ImGuizmo::RecomposeMatrixFromComponents(matrixTranslation, matrixRotation, matrixScale, matrix); + + if (mCurrentGizmoOperation != ImGuizmo::SCALE) + { + if (ImGui::RadioButton("Local", mCurrentGizmoMode == ImGuizmo::LOCAL)) + mCurrentGizmoMode = ImGuizmo::LOCAL; + ImGui::SameLine(); + if (ImGui::RadioButton("World", mCurrentGizmoMode == ImGuizmo::WORLD)) + mCurrentGizmoMode = ImGuizmo::WORLD; + } + if (ImGui::IsKeyPressed(ImGuiKey_S) && ImGui::IsKeyPressed(ImGuiKey_LeftShift)) + useSnap = !useSnap; + ImGui::Checkbox("##UseSnap", &useSnap); + ImGui::SameLine(); + + switch (mCurrentGizmoOperation) + { + case ImGuizmo::TRANSLATE: + ImGui::InputFloat3("Snap", &snap[0]); + break; + case ImGuizmo::ROTATE: + ImGui::InputFloat("Angle Snap", &snap[0]); + break; + case ImGuizmo::SCALE: + ImGui::InputFloat("Scale Snap", &snap[0]); + break; + } + ImGui::Checkbox("Bound Sizing", &boundSizing); + if (boundSizing) + { + ImGui::PushID(3); + ImGui::Checkbox("##BoundSizing", &boundSizingSnap); + ImGui::SameLine(); + ImGui::InputFloat3("Snap", boundsSnap); + ImGui::PopID(); + } + } + + ImGuiIO& io = ImGui::GetIO(); + float viewManipulateRight = io.DisplaySize.x; + float viewManipulateTop = 0; + bool isWindowHovered = false; + static ImGuiWindowFlags gizmoWindowFlags = 0; + + /* + for the "useWindow" case we just render to a gui area, + otherwise to fake full screen transparent window + + note that for both cases we make sure gizmo being + rendered is aligned to our texture scene using + imgui "cursor" screen positions + */ + // TODO: this shouldn't be handled here I think + SImResourceInfo info; + info.textureID = params.sceneTexDescIx; + info.samplerIx = (uint16_t)nbl::ext::imgui::UI::DefaultSamplerIx::USER; + + TransformReturnInfo retval; + if (params.useWindow) + { + ImGui::SetNextWindowSize(ImVec2(800, 800), ImGuiCond_Appearing); + ImGui::SetNextWindowPos(ImVec2(400, 20), ImGuiCond_Appearing); + ImGui::PushStyleColor(ImGuiCol_WindowBg, (ImVec4)ImColor(0.35f, 0.3f, 0.3f)); + ImGui::Begin("Gizmo", 0, gizmoWindowFlags); + ImGuizmo::SetDrawlist(); + + ImVec2 contentRegionSize = ImGui::GetContentRegionAvail(); + ImVec2 windowPos = ImGui::GetWindowPos(); + ImVec2 cursorPos = ImGui::GetCursorScreenPos(); + isWindowHovered = ImGui::IsWindowHovered(); + + ImGui::Image(info, contentRegionSize); + ImGuizmo::SetRect(cursorPos.x, cursorPos.y, contentRegionSize.x, contentRegionSize.y); + retval.sceneResolution = { contentRegionSize.x,contentRegionSize.y }; + + viewManipulateRight = cursorPos.x + contentRegionSize.x; + viewManipulateTop = cursorPos.y; + + ImGuiWindow* window = ImGui::GetCurrentWindow(); + gizmoWindowFlags = (isWindowHovered && ImGui::IsMouseHoveringRect(window->InnerRect.Min, window->InnerRect.Max) ? ImGuiWindowFlags_NoMove : 0); + } + else + { + ImGui::SetNextWindowPos(ImVec2(0, 0)); + ImGui::SetNextWindowSize(io.DisplaySize); + ImGui::PushStyleColor(ImGuiCol_WindowBg, ImVec4(0, 0, 0, 0)); // fully transparent fake window + ImGui::Begin("FullScreenWindow", nullptr, ImGuiWindowFlags_NoTitleBar | ImGuiWindowFlags_NoResize | ImGuiWindowFlags_NoMove | ImGuiWindowFlags_NoScrollbar | ImGuiWindowFlags_NoScrollWithMouse | ImGuiWindowFlags_NoCollapse | ImGuiWindowFlags_NoBringToFrontOnFocus | ImGuiWindowFlags_NoBackground | ImGuiWindowFlags_NoInputs); + + ImVec2 contentRegionSize = ImGui::GetContentRegionAvail(); + ImVec2 cursorPos = ImGui::GetCursorScreenPos(); + isWindowHovered = ImGui::IsWindowHovered(); + + ImGui::Image(info, contentRegionSize); + ImGuizmo::SetRect(cursorPos.x, cursorPos.y, contentRegionSize.x, contentRegionSize.y); + retval.sceneResolution = { contentRegionSize.x,contentRegionSize.y }; + + viewManipulateRight = cursorPos.x + contentRegionSize.x; + viewManipulateTop = cursorPos.y; + } + + // Standard Manipulate gizmo - let ImGuizmo modify the matrix directly + ImGuizmo::Manipulate(cameraView, cameraProjection, mCurrentGizmoOperation, mCurrentGizmoMode, matrix, NULL, useSnap ? &snap[0] : NULL, boundSizing ? bounds : NULL, boundSizingSnap ? boundsSnap : NULL); + + retval.allowCameraMovement = isWindowHovered && !ImGuizmo::IsUsing(); + + // ViewManipulate for rotating the view + if (params.enableViewManipulate) + { + // Store original translation and scale before ViewManipulate + // Decompose original matrix + nbl::hlsl::float32_t3 translation, rotation, scale; + ImGuizmo::DecomposeMatrixToComponents(matrix, &translation.x, &rotation.x, &scale.x); + // Create rotation-only matrix + nbl::hlsl::float32_t4x4 temp; + nbl::hlsl::float32_t3 baseTranslation(0.0f); + nbl::hlsl::float32_t3 baseScale(1.0f); + ImGuizmo::RecomposeMatrixFromComponents(&baseTranslation.x, &rotation.x, &baseScale.x, &temp[0][0]); + temp = nbl::hlsl::transpose(temp); + + // Invert to make it "view-like" + nbl::hlsl::float32_t4x4 tempInv = nbl::hlsl::inverse(temp); + + // Create flip matrix (flip X to fix left/right) + nbl::hlsl::float32_t4x4 flip(1.0f); + flip[0][0] = -1.0f; // Flip X axis + + // Apply flip to the inverted matrix + tempInv = nbl::hlsl::mul(nbl::hlsl::mul(flip, tempInv), flip); + + // Manipulate + ImGuizmo::ViewManipulate(&tempInv[0][0], 1.0f, ImVec2(viewManipulateRight - 128, viewManipulateTop), ImVec2(128, 128), 0x10101010); + + // Undo flip (flip is its own inverse, so multiply by flip again) + tempInv = nbl::hlsl::mul(nbl::hlsl::mul(flip, tempInv), flip); + + // Invert back to model space + temp = nbl::hlsl::inverse(tempInv); + temp = nbl::hlsl::transpose(temp); + + // Extract rotation + nbl::hlsl::float32_t3 newRot; + ImGuizmo::DecomposeMatrixToComponents(&temp[0][0], &baseTranslation.x, &newRot.x, &baseScale.x); + // Recompose original matrix with new rotation but keep translation & scale + ImGuizmo::RecomposeMatrixFromComponents(&translation.x, &newRot.x, &scale.x, matrix); + + retval.allowCameraMovement &= isWindowHovered && !ImGuizmo::IsUsingViewManipulate(); + } + + ImGui::End(); + ImGui::PopStyleColor(); + + return retval; +} + +#endif // _NBL_THIS_EXAMPLE_TRANSFORM_H_INCLUDED_ \ No newline at end of file diff --git a/73_SolidAngleVisualizer/main.cpp b/73_SolidAngleVisualizer/main.cpp new file mode 100644 index 000000000..c60952394 --- /dev/null +++ b/73_SolidAngleVisualizer/main.cpp @@ -0,0 +1,1777 @@ +// Copyright (C) 2018-2020 - DevSH Graphics Programming Sp. z O.O. +// This file is part of the "Nabla Engine". +// For conditions of distribution and use, see copyright notice in nabla.h +#include "nbl/this_example/builtin/build/spirv/keys.hpp" + +#include "common.hpp" +#include +#include +#include "app_resources/hlsl/common.hlsl" +#include "app_resources/hlsl/benchmark/common.hlsl" +#include "nbl/ext/FullScreenTriangle/FullScreenTriangle.h" + +/* +Renders scene texture to an offscreen framebuffer whose color attachment is then sampled into a imgui window. + +Written with Nabla's UI extension and got integrated with ImGuizmo to handle scene's object translations. +*/ +class SolidAngleVisualizer final : public MonoWindowApplication, public BuiltinResourcesApplication +{ + using device_base_t = MonoWindowApplication; + using asset_base_t = BuiltinResourcesApplication; + +public: + inline SolidAngleVisualizer(const path &_localInputCWD, const path &_localOutputCWD, const path &_sharedInputCWD, const path &_sharedOutputCWD) + : IApplicationFramework(_localInputCWD, _localOutputCWD, _sharedInputCWD, _sharedOutputCWD), + device_base_t({2048, 1024}, EF_UNKNOWN, _localInputCWD, _localOutputCWD, _sharedInputCWD, _sharedOutputCWD) + { + } + + inline bool onAppInitialized(smart_refctd_ptr &&system) override + { + if (!asset_base_t::onAppInitialized(smart_refctd_ptr(system))) + return false; + if (!device_base_t::onAppInitialized(smart_refctd_ptr(system))) + return false; + + interface.m_visualizer = this; + + m_semaphore = m_device->createSemaphore(m_realFrameIx); + if (!m_semaphore) + return logFail("Failed to Create a Semaphore!"); + + auto pool = m_device->createCommandPool(getGraphicsQueue()->getFamilyIndex(), IGPUCommandPool::CREATE_FLAGS::RESET_COMMAND_BUFFER_BIT); + for (auto i = 0u; i < MaxFramesInFlight; i++) + { + if (!pool) + return logFail("Couldn't create Command Pool!"); + if (!pool->createCommandBuffers(IGPUCommandPool::BUFFER_LEVEL::PRIMARY, {m_cmdBufs.data() + i, 1})) + return logFail("Couldn't create Command Buffer!"); + } + + const uint32_t addtionalBufferOwnershipFamilies[] = {getGraphicsQueue()->getFamilyIndex()}; + m_scene = CGeometryCreatorScene::create( + {.transferQueue = getTransferUpQueue(), + .utilities = m_utils.get(), + .logger = m_logger.get(), + .addtionalBufferOwnershipFamilies = addtionalBufferOwnershipFamilies}, + CSimpleDebugRenderer::DefaultPolygonGeometryPatch); + + // for the scene drawing pass + { + IGPURenderpass::SCreationParams params = {}; + const IGPURenderpass::SCreationParams::SDepthStencilAttachmentDescription depthAttachments[] = { + {{{.format = sceneRenderDepthFormat, + .samples = IGPUImage::ESCF_1_BIT, + .mayAlias = false}, + /*.loadOp =*/{IGPURenderpass::LOAD_OP::CLEAR}, + /*.storeOp =*/{IGPURenderpass::STORE_OP::STORE}, + /*.initialLayout =*/{IGPUImage::LAYOUT::UNDEFINED}, + /*.finalLayout =*/{IGPUImage::LAYOUT::ATTACHMENT_OPTIMAL}}}, + IGPURenderpass::SCreationParams::DepthStencilAttachmentsEnd}; + params.depthStencilAttachments = depthAttachments; + const IGPURenderpass::SCreationParams::SColorAttachmentDescription colorAttachments[] = { + {{ + {.format = finalSceneRenderFormat, + .samples = IGPUImage::E_SAMPLE_COUNT_FLAGS::ESCF_1_BIT, + .mayAlias = false}, + /*.loadOp =*/IGPURenderpass::LOAD_OP::CLEAR, + /*.storeOp =*/IGPURenderpass::STORE_OP::STORE, + /*.initialLayout =*/IGPUImage::LAYOUT::UNDEFINED, + /*.finalLayout =*/IGPUImage::LAYOUT::READ_ONLY_OPTIMAL // ImGUI shall read + }}, + IGPURenderpass::SCreationParams::ColorAttachmentsEnd}; + params.colorAttachments = colorAttachments; + IGPURenderpass::SCreationParams::SSubpassDescription subpasses[] = { + {}, + IGPURenderpass::SCreationParams::SubpassesEnd}; + subpasses[0].depthStencilAttachment = {{.render = {.attachmentIndex = 0, .layout = IGPUImage::LAYOUT::ATTACHMENT_OPTIMAL}}}; + subpasses[0].colorAttachments[0] = {.render = {.attachmentIndex = 0, .layout = IGPUImage::LAYOUT::ATTACHMENT_OPTIMAL}}; + params.subpasses = subpasses; + + const static IGPURenderpass::SCreationParams::SSubpassDependency dependencies[] = { + // wipe-transition of Color to ATTACHMENT_OPTIMAL and depth + { + .srcSubpass = IGPURenderpass::SCreationParams::SSubpassDependency::External, + .dstSubpass = 0, + .memoryBarrier = { + // last place where the depth can get modified in previous frame, `COLOR_ATTACHMENT_OUTPUT_BIT` is implicitly later + // while color is sampled by ImGUI + .srcStageMask = PIPELINE_STAGE_FLAGS::LATE_FRAGMENT_TESTS_BIT | PIPELINE_STAGE_FLAGS::FRAGMENT_SHADER_BIT, + // don't want any writes to be available, as we are clearing both attachments + .srcAccessMask = ACCESS_FLAGS::NONE, + // destination needs to wait as early as possible + // TODO: `COLOR_ATTACHMENT_OUTPUT_BIT` shouldn't be needed, because its a logically later stage, see TODO in `ECommonEnums.h` + .dstStageMask = PIPELINE_STAGE_FLAGS::EARLY_FRAGMENT_TESTS_BIT | PIPELINE_STAGE_FLAGS::COLOR_ATTACHMENT_OUTPUT_BIT, + // because depth and color get cleared first no read mask + .dstAccessMask = ACCESS_FLAGS::DEPTH_STENCIL_ATTACHMENT_WRITE_BIT | ACCESS_FLAGS::COLOR_ATTACHMENT_WRITE_BIT} + // leave view offsets and flags default + }, + { + .srcSubpass = 0, .dstSubpass = IGPURenderpass::SCreationParams::SSubpassDependency::External, .memoryBarrier = {// last place where the color can get modified, depth is implicitly earlier + .srcStageMask = PIPELINE_STAGE_FLAGS::COLOR_ATTACHMENT_OUTPUT_BIT, + // only write ops, reads can't be made available, also won't be using depth so don't care about it being visible to anyone else + .srcAccessMask = ACCESS_FLAGS::COLOR_ATTACHMENT_WRITE_BIT, + // the ImGUI will sample the color, then next frame we overwrite both attachments + .dstStageMask = PIPELINE_STAGE_FLAGS::FRAGMENT_SHADER_BIT | PIPELINE_STAGE_FLAGS::EARLY_FRAGMENT_TESTS_BIT, + // but we only care about the availability-visibility chain between renderpass and imgui + .dstAccessMask = ACCESS_FLAGS::SAMPLED_READ_BIT} + // leave view offsets and flags default + }, + IGPURenderpass::SCreationParams::DependenciesEnd}; + params.dependencies = dependencies; + auto solidAngleRenderpassParams = params; + m_mainRenderpass = m_device->createRenderpass(std::move(params)); + if (!m_mainRenderpass) + return logFail("Failed to create Main Renderpass!"); + + m_solidAngleRenderpass = m_device->createRenderpass(std::move(solidAngleRenderpassParams)); + if (!m_solidAngleRenderpass) + return logFail("Failed to create Solid Angle Renderpass!"); + } + + const auto &geometries = m_scene->getInitParams().geometries; + m_renderer = CSimpleDebugRenderer::create(m_assetMgr.get(), m_solidAngleRenderpass.get(), 0, {&geometries.front().get(), geometries.size()}); + // special case + { + const auto &pipelines = m_renderer->getInitParams().pipelines; + auto ix = 0u; + for (const auto &name : m_scene->getInitParams().geometryNames) + { + if (name == "Cone") + m_renderer->getGeometry(ix).pipeline = pipelines[CSimpleDebugRenderer::SInitParams::PipelineType::Cone]; + ix++; + } + } + // we'll only display one thing at a time + m_renderer->m_instances.resize(1); + + // Create graphics pipeline + { + auto loadPrecompiledShader = [&](auto key) -> smart_refctd_ptr + { + IAssetLoader::SAssetLoadParams lp = {}; + lp.logger = m_logger.get(); + lp.workingDirectory = "app_resources"; + auto assetBundle = m_assetMgr->getAsset(key.data(), lp); + const auto assets = assetBundle.getContents(); + if (assets.empty()) + { + m_logger->log("Could not load precompiled shader!", ILogger::ELL_ERROR); + std::exit(-1); + } + assert(assets.size() == 1); + auto shader = IAsset::castDown(assets[0]); + if (!shader) + { + m_logger->log("Failed to load precompiled shader!", ILogger::ELL_ERROR); + std::exit(-1); + } + return shader; + }; + + ext::FullScreenTriangle::ProtoPipeline fsTriProtoPPln(m_assetMgr.get(), m_device.get(), m_logger.get()); + if (!fsTriProtoPPln) + return logFail("Failed to create Full Screen Triangle protopipeline or load its vertex shader!"); + + // Load pre-compiled fragment shaders (6 modes x 2 debug = 12 SolidAngleVis + 2 RayVis) + // Can't use string literal template args in a loop, so unroll manually + // Index: mode * 2 + debugFlag (0=release, 1=debug) + smart_refctd_ptr saVisShaders[SAMPLING_MODE::Count * DebugPermutations]; + saVisShaders[0] = loadPrecompiledShader(nbl::this_example::builtin::build::get_spirv_key<"sa_vis_tri_sa">(m_device.get())); + saVisShaders[1] = loadPrecompiledShader(nbl::this_example::builtin::build::get_spirv_key<"sa_vis_tri_sa_dbg">(m_device.get())); + saVisShaders[2] = loadPrecompiledShader(nbl::this_example::builtin::build::get_spirv_key<"sa_vis_tri_psa">(m_device.get())); + saVisShaders[3] = loadPrecompiledShader(nbl::this_example::builtin::build::get_spirv_key<"sa_vis_tri_psa_dbg">(m_device.get())); + saVisShaders[4] = loadPrecompiledShader(nbl::this_example::builtin::build::get_spirv_key<"sa_vis_para">(m_device.get())); + saVisShaders[5] = loadPrecompiledShader(nbl::this_example::builtin::build::get_spirv_key<"sa_vis_para_dbg">(m_device.get())); + saVisShaders[6] = loadPrecompiledShader(nbl::this_example::builtin::build::get_spirv_key<"sa_vis_rectangle">(m_device.get())); + saVisShaders[7] = loadPrecompiledShader(nbl::this_example::builtin::build::get_spirv_key<"sa_vis_rectangle_dbg">(m_device.get())); + saVisShaders[8] = loadPrecompiledShader(nbl::this_example::builtin::build::get_spirv_key<"sa_vis_biquad">(m_device.get())); + saVisShaders[9] = loadPrecompiledShader(nbl::this_example::builtin::build::get_spirv_key<"sa_vis_biquad_dbg">(m_device.get())); + saVisShaders[10] = loadPrecompiledShader(nbl::this_example::builtin::build::get_spirv_key<"sa_vis_bilinear">(m_device.get())); + saVisShaders[11] = loadPrecompiledShader(nbl::this_example::builtin::build::get_spirv_key<"sa_vis_bilinear_dbg">(m_device.get())); + + smart_refctd_ptr rayVisShaders[DebugPermutations]; + rayVisShaders[0] = loadPrecompiledShader(nbl::this_example::builtin::build::get_spirv_key<"ray_vis">(m_device.get())); + rayVisShaders[1] = loadPrecompiledShader(nbl::this_example::builtin::build::get_spirv_key<"ray_vis_dbg">(m_device.get())); + + smart_refctd_ptr solidAngleVisLayout, rayVisLayout; + nbl::video::IGPUDescriptorSetLayout::SBinding bindings[1] = + { + {.binding = 0, + .type = nbl::asset::IDescriptor::E_TYPE::ET_STORAGE_BUFFER, + .createFlags = IGPUDescriptorSetLayout::SBinding::E_CREATE_FLAGS::ECF_NONE, + .stageFlags = ShaderStage::ESS_FRAGMENT, + .count = 1}}; + smart_refctd_ptr dsLayout = m_device->createDescriptorSetLayout(bindings); + + const asset::SPushConstantRange saRanges[] = {{.stageFlags = hlsl::ShaderStage::ESS_FRAGMENT, .offset = 0, .size = sizeof(PushConstants)}}; + const asset::SPushConstantRange rayRanges[] = {{.stageFlags = hlsl::ShaderStage::ESS_FRAGMENT, .offset = 0, .size = sizeof(PushConstantRayVis)}}; + + if (!dsLayout) + logFail("Failed to create a Descriptor Layout!\n"); + + solidAngleVisLayout = m_device->createPipelineLayout(saRanges, dsLayout); + + rayVisLayout = m_device->createPipelineLayout(rayRanges, dsLayout); + + { + // Create all SolidAngleVis pipeline variants + for (uint32_t i = 0; i < SAMPLING_MODE::Count * DebugPermutations; i++) + { + const IGPUPipelineBase::SShaderSpecInfo fragSpec = { + .shader = saVisShaders[i].get(), + .entryPoint = "main"}; + m_solidAngleVisPipelines[i] = fsTriProtoPPln.createPipeline(fragSpec, solidAngleVisLayout.get(), m_solidAngleRenderpass.get()); + if (!m_solidAngleVisPipelines[i]) + return logFail("Could not create SolidAngleVis Graphics Pipeline variant %d!", i); + } + + asset::SRasterizationParams rasterParams = ext::FullScreenTriangle::ProtoPipeline::DefaultRasterParams; + rasterParams.depthWriteEnable = true; + rasterParams.depthCompareOp = asset::E_COMPARE_OP::ECO_GREATER; + + // Create all RayVis pipeline variants + for (uint32_t i = 0; i < DebugPermutations; i++) + { + const IGPUPipelineBase::SShaderSpecInfo fragSpec = { + .shader = rayVisShaders[i].get(), + .entryPoint = "main"}; + m_rayVisPipelines[i] = fsTriProtoPPln.createPipeline(fragSpec, rayVisLayout.get(), m_mainRenderpass.get(), 0, {}, rasterParams); + if (!m_rayVisPipelines[i]) + return logFail("Could not create RayVis Graphics Pipeline variant %d!", i); + } + } + // Allocate the memory + { + constexpr size_t BufferSize = sizeof(ResultData); + + nbl::video::IGPUBuffer::SCreationParams params = {}; + params.size = BufferSize; + params.usage = IGPUBuffer::EUF_STORAGE_BUFFER_BIT | IGPUBuffer::EUF_TRANSFER_DST_BIT; + m_outputStorageBuffer = m_device->createBuffer(std::move(params)); + if (!m_outputStorageBuffer) + logFail("Failed to create a GPU Buffer of size %d!\n", params.size); + + m_outputStorageBuffer->setObjectDebugName("ResultData output buffer"); + + nbl::video::IDeviceMemoryBacked::SDeviceMemoryRequirements reqs = m_outputStorageBuffer->getMemoryReqs(); + reqs.memoryTypeBits &= m_physicalDevice->getHostVisibleMemoryTypeBits(); + + m_allocation = m_device->allocate(reqs, m_outputStorageBuffer.get(), nbl::video::IDeviceMemoryAllocation::EMAF_NONE); + if (!m_allocation.isValid()) + logFail("Failed to allocate Device Memory compatible with our GPU Buffer!\n"); + + assert(m_outputStorageBuffer->getBoundMemory().memory == m_allocation.memory.get()); + smart_refctd_ptr pool = m_device->createDescriptorPoolForDSLayouts(IDescriptorPool::ECF_NONE, {&dsLayout.get(), 1}); + + m_ds = pool->createDescriptorSet(std::move(dsLayout)); + { + IGPUDescriptorSet::SDescriptorInfo info[1]; + info[0].desc = smart_refctd_ptr(m_outputStorageBuffer); + info[0].info.buffer = {.offset = 0, .size = BufferSize}; + IGPUDescriptorSet::SWriteDescriptorSet writes[1] = { + {.dstSet = m_ds.get(), .binding = 0, .arrayElement = 0, .count = 1, .info = info}}; + m_device->updateDescriptorSets(writes, {}); + } + } + + if (!m_allocation.memory->map({0ull, m_allocation.memory->getAllocationSize()}, IDeviceMemoryAllocation::EMCAF_READ)) + logFail("Failed to map the Device Memory!\n"); + + // if the mapping is not coherent the range needs to be invalidated to pull in new data for the CPU's caches + const ILogicalDevice::MappedMemoryRange memoryRange(m_allocation.memory.get(), 0ull, m_allocation.memory->getAllocationSize()); + if (!m_allocation.memory->getMemoryPropertyFlags().hasFlags(IDeviceMemoryAllocation::EMPF_HOST_COHERENT_BIT)) + m_device->invalidateMappedMemoryRanges(1, &memoryRange); + } + + // Create ImGUI + { + auto scRes = static_cast(m_surface->getSwapchainResources()); + ext::imgui::UI::SCreationParameters params = {}; + params.resources.texturesInfo = {.setIx = 0u, .bindingIx = TexturesImGUIBindingIndex}; + params.resources.samplersInfo = {.setIx = 0u, .bindingIx = 1u}; + params.utilities = m_utils; + params.transfer = getTransferUpQueue(); + params.pipelineLayout = ext::imgui::UI::createDefaultPipelineLayout(m_utils->getLogicalDevice(), params.resources.texturesInfo, params.resources.samplersInfo, MaxImGUITextures); + params.assetManager = make_smart_refctd_ptr(smart_refctd_ptr(m_system)); + params.renderpass = smart_refctd_ptr(scRes->getRenderpass()); + params.subpassIx = 0u; + params.pipelineCache = nullptr; + interface.imGUI = ext::imgui::UI::create(std::move(params)); + if (!interface.imGUI) + return logFail("Failed to create `nbl::ext::imgui::UI` class"); + } + + // create rest of User Interface + { + auto *imgui = interface.imGUI.get(); + // create the suballocated descriptor set + { + // note that we use default layout provided by our extension, but you are free to create your own by filling ext::imgui::UI::S_CREATION_PARAMETERS::resources + const auto *layout = interface.imGUI->getPipeline()->getLayout()->getDescriptorSetLayout(0u); + auto pool = m_device->createDescriptorPoolForDSLayouts(IDescriptorPool::E_CREATE_FLAGS::ECF_UPDATE_AFTER_BIND_BIT, {&layout, 1}); + auto ds = pool->createDescriptorSet(smart_refctd_ptr(layout)); + interface.subAllocDS = make_smart_refctd_ptr(std::move(ds)); + if (!interface.subAllocDS) + return logFail("Failed to create the descriptor set"); + // make sure Texture Atlas slot is taken for eternity + { + auto dummy = SubAllocatedDescriptorSet::invalid_value; + interface.subAllocDS->multi_allocate(0, 1, &dummy); + assert(dummy == ext::imgui::UI::FontAtlasTexId); + } + // write constant descriptors, note we don't create info & write pair for the samplers because UI extension's are immutable and baked into DS layout + IGPUDescriptorSet::SDescriptorInfo info = {}; + info.desc = smart_refctd_ptr(interface.imGUI->getFontAtlasView()); + info.info.image.imageLayout = IImage::LAYOUT::READ_ONLY_OPTIMAL; + const IGPUDescriptorSet::SWriteDescriptorSet write = { + .dstSet = interface.subAllocDS->getDescriptorSet(), + .binding = TexturesImGUIBindingIndex, + .arrayElement = ext::imgui::UI::FontAtlasTexId, + .count = 1, + .info = &info}; + if (!m_device->updateDescriptorSets({&write, 1}, {})) + return logFail("Failed to write the descriptor set"); + } + imgui->registerListener([this]() + { interface(); }); + } + + interface.camera.mapKeysToWASD(); + + onAppInitializedFinish(); + return true; + } + + // + virtual inline bool onAppTerminated() + { + SubAllocatedDescriptorSet::value_type fontAtlasDescIx = ext::imgui::UI::FontAtlasTexId; + IGPUDescriptorSet::SDropDescriptorSet dummy[1]; + interface.subAllocDS->multi_deallocate(dummy, TexturesImGUIBindingIndex, 1, &fontAtlasDescIx); + return device_base_t::onAppTerminated(); + } + + inline IQueue::SSubmitInfo::SSemaphoreInfo renderFrame(const std::chrono::microseconds nextPresentationTimestamp) override + { + // CPU events + update(nextPresentationTimestamp); + + { + const auto &virtualSolidAngleWindowRes = interface.solidAngleViewTransformReturnInfo.sceneResolution; + const auto &virtualMainWindowRes = interface.mainViewTransformReturnInfo.sceneResolution; + if (!m_solidAngleViewFramebuffer || m_solidAngleViewFramebuffer->getCreationParameters().width != virtualSolidAngleWindowRes[0] || m_solidAngleViewFramebuffer->getCreationParameters().height != virtualSolidAngleWindowRes[1] || + !m_mainViewFramebuffer || m_mainViewFramebuffer->getCreationParameters().width != virtualMainWindowRes[0] || m_mainViewFramebuffer->getCreationParameters().height != virtualMainWindowRes[1]) + recreateFramebuffers(); + } + + // + const auto resourceIx = m_realFrameIx % MaxFramesInFlight; + + auto *const cb = m_cmdBufs.data()[resourceIx].get(); + cb->reset(IGPUCommandBuffer::RESET_FLAGS::RELEASE_RESOURCES_BIT); + cb->begin(IGPUCommandBuffer::USAGE::ONE_TIME_SUBMIT_BIT); + + if (m_solidAngleViewFramebuffer) + { + asset::SBufferRange range{ + .offset = 0, + .size = m_outputStorageBuffer->getSize(), + .buffer = m_outputStorageBuffer}; + cb->fillBuffer(range, 0u); + { + + const auto &creationParams = m_solidAngleViewFramebuffer->getCreationParameters(); + cb->beginDebugMarker("Draw Circle View Frame"); + { + const IGPUCommandBuffer::SClearDepthStencilValue farValue = {.depth = 0.f}; + const IGPUCommandBuffer::SClearColorValue clearValue = {.float32 = {0.f, 0.f, 0.f, 1.f}}; + const IGPUCommandBuffer::SRenderpassBeginInfo renderpassInfo = + { + .framebuffer = m_solidAngleViewFramebuffer.get(), + .colorClearValues = &clearValue, + .depthStencilClearValues = &farValue, + .renderArea = { + .offset = {0, 0}, + .extent = {creationParams.width, creationParams.height}}}; + beginRenderpass(cb, renderpassInfo); + } + // draw scene + { + static uint32_t lastFrameSeed = 0u; + lastFrameSeed = m_frameSeeding ? static_cast(m_realFrameIx) : lastFrameSeed; + PushConstants pc{ + .modelMatrix = hlsl::float32_t3x4(hlsl::transpose(interface.m_OBBModelMatrix)), + .viewport = {0.f, 0.f, static_cast(creationParams.width), static_cast(creationParams.height)}, + .sampleCount = static_cast(m_SampleCount), + .frameIndex = lastFrameSeed}; + const uint32_t debugIdx = m_debugVisualization ? 1u : 0u; + auto pipeline = m_solidAngleVisPipelines[m_samplingMode * DebugPermutations + debugIdx]; + cb->bindGraphicsPipeline(pipeline.get()); + cb->pushConstants(pipeline->getLayout(), hlsl::ShaderStage::ESS_FRAGMENT, 0, sizeof(pc), &pc); + cb->bindDescriptorSets(nbl::asset::EPBP_GRAPHICS, pipeline->getLayout(), 0, 1, &m_ds.get()); + ext::FullScreenTriangle::recordDrawCall(cb); + } + cb->endRenderPass(); + cb->endDebugMarker(); + } + + if (m_debugVisualization) + { + m_device->waitIdle(); + std::memcpy(&m_GPUOutResulData, static_cast(m_allocation.memory->getMappedPointer()), sizeof(ResultData)); + m_device->waitIdle(); + } + } + // draw main view + if (m_mainViewFramebuffer) + { + { + auto creationParams = m_mainViewFramebuffer->getCreationParameters(); + const IGPUCommandBuffer::SClearDepthStencilValue farValue = {.depth = 0.f}; + const IGPUCommandBuffer::SClearColorValue clearValue = {.float32 = {0.1f, 0.1f, 0.1f, 1.f}}; + const IGPUCommandBuffer::SRenderpassBeginInfo renderpassInfo = + { + .framebuffer = m_mainViewFramebuffer.get(), + .colorClearValues = &clearValue, + .depthStencilClearValues = &farValue, + .renderArea = { + .offset = {0, 0}, + .extent = {creationParams.width, creationParams.height}}}; + beginRenderpass(cb, renderpassInfo); + } + { // draw rays visualization + auto creationParams = m_mainViewFramebuffer->getCreationParameters(); + + cb->beginDebugMarker("Draw Rays visualization"); + // draw scene + { + float32_t4x4 viewProj = *reinterpret_cast(&interface.camera.getConcatenatedMatrix()); + float32_t3x4 view = *reinterpret_cast(&interface.camera.getViewMatrix()); + PushConstantRayVis pc{ + .viewProjMatrix = viewProj, + .viewMatrix = view, + .modelMatrix = hlsl::float32_t3x4(hlsl::transpose(interface.m_OBBModelMatrix)), + .invModelMatrix = hlsl::float32_t3x4(hlsl::transpose(hlsl::inverse(interface.m_OBBModelMatrix))), + .viewport = {0.f, 0.f, static_cast(creationParams.width), static_cast(creationParams.height)}, + .frameIndex = m_frameSeeding ? static_cast(m_realFrameIx) : 0u}; + auto pipeline = m_rayVisPipelines[m_debugVisualization ? 1u : 0u]; + cb->bindGraphicsPipeline(pipeline.get()); + cb->pushConstants(pipeline->getLayout(), hlsl::ShaderStage::ESS_FRAGMENT, 0, sizeof(pc), &pc); + cb->bindDescriptorSets(nbl::asset::EPBP_GRAPHICS, pipeline->getLayout(), 0, 1, &m_ds.get()); + ext::FullScreenTriangle::recordDrawCall(cb); + } + cb->endDebugMarker(); + } + // draw scene + { + cb->beginDebugMarker("Main Scene Frame"); + + float32_t3x4 viewMatrix; + float32_t4x4 viewProjMatrix; + // TODO: get rid of legacy matrices + { + const auto &camera = interface.camera; + memcpy(&viewMatrix, &camera.getViewMatrix(), sizeof(viewMatrix)); + memcpy(&viewProjMatrix, &camera.getConcatenatedMatrix(), sizeof(viewProjMatrix)); + } + const auto viewParams = CSimpleDebugRenderer::SViewParams(viewMatrix, viewProjMatrix); + + // tear down scene every frame + auto &instance = m_renderer->m_instances[0]; + instance.world = float32_t3x4(hlsl::transpose(interface.m_OBBModelMatrix)); + instance.packedGeo = m_renderer->getGeometries().data(); // cube // +interface.gcIndex; + m_renderer->render(cb, viewParams); // draw the cube/OBB + + instance.world = float32_t3x4(1.0f); + instance.packedGeo = m_renderer->getGeometries().data() + 2; // disk + m_renderer->render(cb, viewParams); + } + + cb->endDebugMarker(); + cb->endRenderPass(); + } + + { + cb->beginDebugMarker("SolidAngleVisualizer IMGUI Frame"); + { + auto scRes = static_cast(m_surface->getSwapchainResources()); + const IGPUCommandBuffer::SClearColorValue clearValue = {.float32 = {0.f, 0.f, 0.f, 1.f}}; + const IGPUCommandBuffer::SRenderpassBeginInfo renderpassInfo = + { + .framebuffer = scRes->getFramebuffer(device_base_t::getCurrentAcquire().imageIndex), + .colorClearValues = &clearValue, + .depthStencilClearValues = nullptr, + .renderArea = { + .offset = {0, 0}, + .extent = {m_window->getWidth(), m_window->getHeight()}}}; + beginRenderpass(cb, renderpassInfo); + } + // draw ImGUI + { + auto *imgui = interface.imGUI.get(); + auto *pipeline = imgui->getPipeline(); + cb->bindGraphicsPipeline(pipeline); + // note that we use default UI pipeline layout where uiParams.resources.textures.setIx == uiParams.resources.samplers.setIx + const auto *ds = interface.subAllocDS->getDescriptorSet(); + cb->bindDescriptorSets(EPBP_GRAPHICS, pipeline->getLayout(), imgui->getCreationParameters().resources.texturesInfo.setIx, 1u, &ds); + // a timepoint in the future to release streaming resources for geometry + const ISemaphore::SWaitInfo drawFinished = {.semaphore = m_semaphore.get(), .value = m_realFrameIx + 1u}; + if (!imgui->render(cb, drawFinished)) + { + m_logger->log("TODO: need to present acquired image before bailing because its already acquired.", ILogger::ELL_ERROR); + return {}; + } + } + cb->endRenderPass(); + cb->endDebugMarker(); + } + cb->end(); + + IQueue::SSubmitInfo::SSemaphoreInfo retval = + { + .semaphore = m_semaphore.get(), + .value = ++m_realFrameIx, + .stageMask = PIPELINE_STAGE_FLAGS::ALL_GRAPHICS_BITS}; + const IQueue::SSubmitInfo::SCommandBufferInfo commandBuffers[] = + { + {.cmdbuf = cb}}; + const IQueue::SSubmitInfo::SSemaphoreInfo acquired[] = { + {.semaphore = device_base_t::getCurrentAcquire().semaphore, + .value = device_base_t::getCurrentAcquire().acquireCount, + .stageMask = PIPELINE_STAGE_FLAGS::NONE}}; + const IQueue::SSubmitInfo infos[] = + { + {.waitSemaphores = acquired, + .commandBuffers = commandBuffers, + .signalSemaphores = {&retval, 1}}}; + + if (getGraphicsQueue()->submit(infos) != IQueue::RESULT::SUCCESS) + { + retval.semaphore = nullptr; // so that we don't wait on semaphore that will never signal + m_realFrameIx--; + } + + m_window->setCaption("[Nabla Engine] UI App Test Demo"); + return retval; + } + +protected: + const video::IGPURenderpass::SCreationParams::SSubpassDependency *getDefaultSubpassDependencies() const override + { + // Subsequent submits don't wait for each other, but they wait for acquire and get waited on by present + const static IGPURenderpass::SCreationParams::SSubpassDependency dependencies[] = { + // don't want any writes to be available, we'll clear, only thing to worry about is the layout transition + { + .srcSubpass = IGPURenderpass::SCreationParams::SSubpassDependency::External, + .dstSubpass = 0, + .memoryBarrier = { + .srcStageMask = PIPELINE_STAGE_FLAGS::NONE, // should sync against the semaphore wait anyway + .srcAccessMask = ACCESS_FLAGS::NONE, + // layout transition needs to finish before the color write + .dstStageMask = PIPELINE_STAGE_FLAGS::COLOR_ATTACHMENT_OUTPUT_BIT, + .dstAccessMask = ACCESS_FLAGS::COLOR_ATTACHMENT_WRITE_BIT} + // leave view offsets and flags default + }, + // want layout transition to begin after all color output is done + { + .srcSubpass = 0, .dstSubpass = IGPURenderpass::SCreationParams::SSubpassDependency::External, .memoryBarrier = { + // last place where the color can get modified, depth is implicitly earlier + .srcStageMask = PIPELINE_STAGE_FLAGS::COLOR_ATTACHMENT_OUTPUT_BIT, + // only write ops, reads can't be made available + .srcAccessMask = ACCESS_FLAGS::COLOR_ATTACHMENT_WRITE_BIT + // spec says nothing is needed when presentation is the destination + } + // leave view offsets and flags default + }, + IGPURenderpass::SCreationParams::DependenciesEnd}; + return dependencies; + } + +private: + inline void update(const std::chrono::microseconds nextPresentationTimestamp) + { + auto &camera = interface.camera; + camera.setMoveSpeed(interface.moveSpeed); + camera.setRotateSpeed(interface.rotateSpeed); + + m_inputSystem->getDefaultMouse(&mouse); + m_inputSystem->getDefaultKeyboard(&keyboard); + + struct + { + std::vector mouse{}; + std::vector keyboard{}; + } uiEvents; + + // TODO: should be a member really + static std::chrono::microseconds previousEventTimestamp{}; + + // I think begin/end should always be called on camera, just events shouldn't be fed, why? + // If you stop begin/end, whatever keys were up/down get their up/down values frozen leading to + // `perActionDt` becoming obnoxiously large the first time the even processing resumes due to + // `timeDiff` being computed since `lastVirtualUpTimeStamp` + camera.beginInputProcessing(nextPresentationTimestamp); + { + mouse.consumeEvents([&](const IMouseEventChannel::range_t &events) -> void + { + if (interface.move) + camera.mouseProcess(events); // don't capture the events, only let camera handle them with its impl + else + camera.mouseKeysUp(); + + for (const auto& e : events) // here capture + { + if (e.timeStamp < previousEventTimestamp) + continue; + + previousEventTimestamp = e.timeStamp; + uiEvents.mouse.emplace_back(e); + + //if (e.type == nbl::ui::SMouseEvent::EET_SCROLL && m_renderer) + //{ + // interface.gcIndex += int16_t(core::sign(e.scrollEvent.verticalScroll)); + // interface.gcIndex = core::clamp(interface.gcIndex, 0ull, m_renderer->getGeometries().size() - 1); + //} + } }, + m_logger.get()); + keyboard.consumeEvents([&](const IKeyboardEventChannel::range_t &events) -> void + { + if (interface.move) + camera.keyboardProcess(events); // don't capture the events, only let camera handle them with its impl + + for (const auto& e : events) // here capture + { + if (e.timeStamp < previousEventTimestamp) + continue; + + previousEventTimestamp = e.timeStamp; + uiEvents.keyboard.emplace_back(e); + } }, + m_logger.get()); + } + camera.endInputProcessing(nextPresentationTimestamp); + + const auto cursorPosition = m_window->getCursorControl()->getPosition(); + + ext::imgui::UI::SUpdateParameters params = + { + .mousePosition = float32_t2(cursorPosition.x, cursorPosition.y) - float32_t2(m_window->getX(), m_window->getY()), + .displaySize = {m_window->getWidth(), m_window->getHeight()}, + .mouseEvents = uiEvents.mouse, + .keyboardEvents = uiEvents.keyboard}; + + // interface.objectName = m_scene->getInitParams().geometryNames[interface.gcIndex]; + interface.imGUI->update(params); + } + + void recreateFramebuffers() + { + + auto createImageAndView = [&](const uint16_t2 resolution, E_FORMAT format) -> smart_refctd_ptr + { + auto image = m_device->createImage({{.type = IGPUImage::ET_2D, + .samples = IGPUImage::ESCF_1_BIT, + .format = format, + .extent = {resolution.x, resolution.y, 1}, + .mipLevels = 1, + .arrayLayers = 1, + .usage = IGPUImage::EUF_RENDER_ATTACHMENT_BIT | IGPUImage::EUF_SAMPLED_BIT}}); + if (!m_device->allocate(image->getMemoryReqs(), image.get()).isValid()) + return nullptr; + IGPUImageView::SCreationParams params = { + .image = std::move(image), + .viewType = IGPUImageView::ET_2D, + .format = format}; + params.subresourceRange.aspectMask = isDepthOrStencilFormat(format) ? IGPUImage::EAF_DEPTH_BIT : IGPUImage::EAF_COLOR_BIT; + return m_device->createImageView(std::move(params)); + }; + + smart_refctd_ptr solidAngleView; + smart_refctd_ptr mainView; + const uint16_t2 solidAngleViewRes = interface.solidAngleViewTransformReturnInfo.sceneResolution; + const uint16_t2 mainViewRes = interface.mainViewTransformReturnInfo.sceneResolution; + + // detect window minimization + if (solidAngleViewRes.x < 0x4000 && solidAngleViewRes.y < 0x4000 || + mainViewRes.x < 0x4000 && mainViewRes.y < 0x4000) + { + solidAngleView = createImageAndView(solidAngleViewRes, finalSceneRenderFormat); + auto solidAngleDepthView = createImageAndView(solidAngleViewRes, sceneRenderDepthFormat); + m_solidAngleViewFramebuffer = m_device->createFramebuffer({{.renderpass = m_solidAngleRenderpass, + .depthStencilAttachments = &solidAngleDepthView.get(), + .colorAttachments = &solidAngleView.get(), + .width = solidAngleViewRes.x, + .height = solidAngleViewRes.y}}); + + mainView = createImageAndView(mainViewRes, finalSceneRenderFormat); + auto mainDepthView = createImageAndView(mainViewRes, sceneRenderDepthFormat); + m_mainViewFramebuffer = m_device->createFramebuffer({{.renderpass = m_mainRenderpass, + .depthStencilAttachments = &mainDepthView.get(), + .colorAttachments = &mainView.get(), + .width = mainViewRes.x, + .height = mainViewRes.y}}); + } + else + { + m_solidAngleViewFramebuffer = nullptr; + m_mainViewFramebuffer = nullptr; + } + + // release previous slot and its image + interface.subAllocDS->multi_deallocate(0, static_cast(CInterface::Count), interface.renderColorViewDescIndices, {.semaphore = m_semaphore.get(), .value = m_realFrameIx + 1}); + // + if (solidAngleView && mainView) + { + interface.subAllocDS->multi_allocate(0, static_cast(CInterface::Count), interface.renderColorViewDescIndices); + // update descriptor set + IGPUDescriptorSet::SDescriptorInfo infos[static_cast(CInterface::Count)] = {}; + infos[0].desc = mainView; + infos[0].info.image.imageLayout = IGPUImage::LAYOUT::READ_ONLY_OPTIMAL; + infos[1].desc = solidAngleView; + infos[1].info.image.imageLayout = IGPUImage::LAYOUT::READ_ONLY_OPTIMAL; + const IGPUDescriptorSet::SWriteDescriptorSet write[static_cast(CInterface::Count)] = { + {.dstSet = interface.subAllocDS->getDescriptorSet(), + .binding = TexturesImGUIBindingIndex, + .arrayElement = interface.renderColorViewDescIndices[static_cast(CInterface::ERV_MAIN_VIEW)], + .count = 1, + .info = &infos[static_cast(CInterface::ERV_MAIN_VIEW)]}, + {.dstSet = interface.subAllocDS->getDescriptorSet(), + .binding = TexturesImGUIBindingIndex, + .arrayElement = interface.renderColorViewDescIndices[static_cast(CInterface::ERV_SOLID_ANGLE_VIEW)], + .count = 1, + .info = &infos[static_cast(CInterface::ERV_SOLID_ANGLE_VIEW)]}}; + m_device->updateDescriptorSets({write, static_cast(CInterface::Count)}, {}); + } + interface.transformParams.sceneTexDescIx = interface.renderColorViewDescIndices[CInterface::ERV_MAIN_VIEW]; + } + + inline void beginRenderpass(IGPUCommandBuffer *cb, const IGPUCommandBuffer::SRenderpassBeginInfo &info) + { + cb->beginRenderPass(info, IGPUCommandBuffer::SUBPASS_CONTENTS::INLINE); + cb->setScissor(0, 1, &info.renderArea); + const SViewport viewport = { + .x = 0, + .y = 0, + .width = static_cast(info.renderArea.extent.width), + .height = static_cast(info.renderArea.extent.height)}; + cb->setViewport(0u, 1u, &viewport); + } + + ~SolidAngleVisualizer() override + { + m_allocation.memory->unmap(); + } + + // Maximum frames which can be simultaneously submitted, used to cycle through our per-frame resources like command buffers + constexpr static inline uint32_t MaxFramesInFlight = 3u; + constexpr static inline auto sceneRenderDepthFormat = EF_D32_SFLOAT; + constexpr static inline auto finalSceneRenderFormat = EF_R8G8B8A8_SRGB; + constexpr static inline auto TexturesImGUIBindingIndex = 0u; + // we create the Descriptor Set with a few slots extra to spare, so we don't have to `waitIdle` the device whenever ImGUI virtual window resizes + constexpr static inline auto MaxImGUITextures = 2u + MaxFramesInFlight; + + static inline SAMPLING_MODE m_samplingMode = SAMPLING_MODE::SYMMETRIC_PYRAMID_SOLID_ANGLE_RECTANGLE; + static inline bool m_debugVisualization = true; + static inline int m_SampleCount = 64; + static inline bool m_frameSeeding = true; + static inline ResultData m_GPUOutResulData; + // + smart_refctd_ptr m_scene; + smart_refctd_ptr m_solidAngleRenderpass; + smart_refctd_ptr m_mainRenderpass; + smart_refctd_ptr m_renderer; + smart_refctd_ptr m_solidAngleViewFramebuffer; + smart_refctd_ptr m_mainViewFramebuffer; + // Pipeline variants: SolidAngleVis indexed by [mode * 2 + debugFlag], RayVis by [debugFlag] + static constexpr uint32_t DebugPermutations = 2; + smart_refctd_ptr m_solidAngleVisPipelines[SAMPLING_MODE::Count * DebugPermutations]; + smart_refctd_ptr m_rayVisPipelines[DebugPermutations]; + // + nbl::video::IDeviceMemoryAllocator::SAllocation m_allocation = {}; + smart_refctd_ptr m_outputStorageBuffer; + smart_refctd_ptr m_ds = nullptr; + smart_refctd_ptr m_semaphore; + uint64_t m_realFrameIx = 0; + std::array, MaxFramesInFlight> m_cmdBufs; + // + InputSystem::ChannelReader mouse; + InputSystem::ChannelReader keyboard; + // UI stuff + struct CInterface + { + void operator()() + { + ImGuiIO &io = ImGui::GetIO(); + + // TODO: why is this a lambda and not just an assignment in a scope ? + camera.setProjectionMatrix([&]() + { + hlsl::float32_t4x4 projection; + + if (isPerspective) + if (isLH) + projection = hlsl::math::thin_lens::lhPerspectiveFovMatrix(core::radians(fov), io.DisplaySize.x / io.DisplaySize.y * 0.5f, zNear, zFar); // TODO: why do I need to divide aspect ratio by 2? + else + projection = hlsl::math::thin_lens::rhPerspectiveFovMatrix(core::radians(fov), io.DisplaySize.x / io.DisplaySize.y * 0.5f, zNear, zFar); + else + { + float viewHeight = viewWidth * io.DisplaySize.y / io.DisplaySize.x; + + if (isLH) + projection = hlsl::math::thin_lens::lhPerspectiveFovMatrix(viewWidth, viewHeight, zNear, zFar); + else + projection = hlsl::math::thin_lens::rhPerspectiveFovMatrix(viewWidth, viewHeight, zNear, zFar); + } + + return projection; }()); + + ImGuizmo::SetOrthographic(!isPerspective); + ImGuizmo::BeginFrame(); + + ImGui::SetNextWindowPos(ImVec2(1024, 100), ImGuiCond_Appearing); + ImGui::SetNextWindowSize(ImVec2(256, 256), ImGuiCond_Appearing); + + // create a window and insert the inspector + ImGui::SetNextWindowPos(ImVec2(10, 10), ImGuiCond_Appearing); + ImGui::SetNextWindowSize(ImVec2(320, 340), ImGuiCond_Appearing); + ImGui::Begin("Editor"); + + ImGui::Text("Benchmarking Solid Angle Visualizer"); + + if (ImGui::Button("Run Benchmark")) + { + SolidAngleVisualizer::SamplingBenchmark benchmark(*m_visualizer); + benchmark.run(); + } + ImGui::Separator(); + + ImGui::Text("Sampling Mode:"); + ImGui::SameLine(); + + const char *samplingModes[] = + { + "Triangle Solid Angle", + "Triangle Projected Solid Angle", + "Parallelogram Projected Solid Angle", + "Rectangle Pyramid Solid Angle", + "Biquadratic pyramid solid angle", + "Bilinear pyramid solid angle"}; + + int currentMode = static_cast(m_samplingMode); + + if (ImGui::Combo("##SamplingMode", ¤tMode, samplingModes, IM_ARRAYSIZE(samplingModes))) + { + m_samplingMode = static_cast(currentMode); + } + + ImGui::Checkbox("Debug Visualization", &m_debugVisualization); + ImGui::Text("Pipeline idx: SA=%d, Ray=%d", + static_cast(m_samplingMode) * DebugPermutations + (m_debugVisualization ? 1 : 0), + m_debugVisualization ? 1 : 0); + ImGui::Checkbox("Frame seeding", &m_frameSeeding); + + ImGui::SliderInt("Sample Count", &m_SampleCount, 0, 512); + + ImGui::Separator(); + + ImGui::Text("Camera"); + + if (ImGui::RadioButton("LH", isLH)) + isLH = true; + + ImGui::SameLine(); + + if (ImGui::RadioButton("RH", !isLH)) + isLH = false; + + if (ImGui::RadioButton("Perspective", isPerspective)) + isPerspective = true; + + ImGui::SameLine(); + + if (ImGui::RadioButton("Orthographic", !isPerspective)) + isPerspective = false; + + ImGui::Checkbox("Enable \"view manipulate\"", &transformParams.enableViewManipulate); + // ImGui::Checkbox("Enable camera movement", &move); + ImGui::SliderFloat("Move speed", &moveSpeed, 0.1f, 10.f); + ImGui::SliderFloat("Rotate speed", &rotateSpeed, 0.1f, 10.f); + + // ImGui::Checkbox("Flip Gizmo's Y axis", &flipGizmoY); // let's not expose it to be changed in UI but keep the logic in case + + if (isPerspective) + ImGui::SliderFloat("Fov", &fov, 20.f, 150.f); + else + ImGui::SliderFloat("Ortho width", &viewWidth, 1, 20); + + ImGui::SliderFloat("zNear", &zNear, 0.1f, 100.f); + ImGui::SliderFloat("zFar", &zFar, 110.f, 10000.f); + + if (firstFrame) + { + camera.setPosition(cameraIntialPosition); + camera.setTarget(cameraInitialTarget); + camera.setUpVector(cameraInitialUp); + + camera.recomputeViewMatrix(); + } + firstFrame = false; + + ImGui::Text("X: %f Y: %f", io.MousePos.x, io.MousePos.y); + if (ImGuizmo::IsUsing()) + { + ImGui::Text("Using gizmo"); + } + else + { + ImGui::Text(ImGuizmo::IsOver() ? "Over gizmo" : ""); + ImGui::SameLine(); + ImGui::Text(ImGuizmo::IsOver(ImGuizmo::TRANSLATE) ? "Over translate gizmo" : ""); + ImGui::SameLine(); + ImGui::Text(ImGuizmo::IsOver(ImGuizmo::ROTATE) ? "Over rotate gizmo" : ""); + ImGui::SameLine(); + ImGui::Text(ImGuizmo::IsOver(ImGuizmo::SCALE) ? "Over scale gizmo" : ""); + } + ImGui::Separator(); + + /* + * ImGuizmo expects view & perspective matrix to be column major both with 4x4 layout + * and Nabla uses row major matricies - 3x4 matrix for view & 4x4 for projection + + - VIEW: + + ImGuizmo + + | X[0] Y[0] Z[0] 0.0f | + | X[1] Y[1] Z[1] 0.0f | + | X[2] Y[2] Z[2] 0.0f | + | -Dot(X, eye) -Dot(Y, eye) -Dot(Z, eye) 1.0f | + + Nabla + + | X[0] X[1] X[2] -Dot(X, eye) | + | Y[0] Y[1] Y[2] -Dot(Y, eye) | + | Z[0] Z[1] Z[2] -Dot(Z, eye) | + + = transpose(nbl::core::matrix4SIMD()) + + - PERSPECTIVE [PROJECTION CASE]: + + ImGuizmo + + | (temp / temp2) (0.0) (0.0) (0.0) | + | (0.0) (temp / temp3) (0.0) (0.0) | + | ((right + left) / temp2) ((top + bottom) / temp3) ((-zfar - znear) / temp4) (-1.0f) | + | (0.0) (0.0) ((-temp * zfar) / temp4) (0.0) | + + Nabla + + | w (0.0) (0.0) (0.0) | + | (0.0) -h (0.0) (0.0) | + | (0.0) (0.0) (-zFar/(zFar-zNear)) (-zNear*zFar/(zFar-zNear)) | + | (0.0) (0.0) (-1.0) (0.0) | + + = transpose() + + * + * the ViewManipulate final call (inside EditTransform) returns world space column major matrix for an object, + * note it also modifies input view matrix but projection matrix is immutable + */ + + if (ImGui::IsKeyPressed(ImGuiKey_End)) + { + m_TRS = TRS{}; + } + + { + static struct + { + float32_t4x4 view, projection, model; + } imguizmoM16InOut; + + ImGuizmo::SetID(0u); + + // TODO: camera will return hlsl::float32_tMxN + auto view = camera.getViewMatrix(); + imguizmoM16InOut.view = hlsl::transpose(hlsl::math::linalg::promote_affine<4, 4>(view)); + + // TODO: camera will return hlsl::float32_tMxN + imguizmoM16InOut.projection = hlsl::transpose(camera.getProjectionMatrix()); + ImGuizmo::RecomposeMatrixFromComponents(&m_TRS.translation.x, &m_TRS.rotation.x, &m_TRS.scale.x, &imguizmoM16InOut.model[0][0]); + + if (flipGizmoY) // note we allow to flip gizmo just to match our coordinates + imguizmoM16InOut.projection[1][1] *= -1.f; // https://johannesugb.github.io/gpu-programming/why-do-opengl-proj-matrices-fail-in-vulkan/ + + transformParams.editTransformDecomposition = true; + mainViewTransformReturnInfo = EditTransform(&imguizmoM16InOut.view[0][0], &imguizmoM16InOut.projection[0][0], &imguizmoM16InOut.model[0][0], transformParams); + move = mainViewTransformReturnInfo.allowCameraMovement; + + ImGuizmo::DecomposeMatrixToComponents(&imguizmoM16InOut.model[0][0], &m_TRS.translation.x, &m_TRS.rotation.x, &m_TRS.scale.x); + ImGuizmo::RecomposeMatrixFromComponents(&m_TRS.translation.x, &m_TRS.rotation.x, &m_TRS.scale.x, &imguizmoM16InOut.model[0][0]); + } + // object meta display + //{ + // ImGui::Begin("Object"); + // ImGui::Text("type: \"%s\"", objectName.data()); + // ImGui::End(); + //} + + // solid angle view window + { + ImGui::SetNextWindowSize(ImVec2(800, 800), ImGuiCond_Appearing); + ImGui::SetNextWindowPos(ImVec2(1240, 20), ImGuiCond_Appearing); + static bool isOpen = true; + ImGui::Begin("Projected Solid Angle View", &isOpen, 0); + + ImVec2 contentRegionSize = ImGui::GetContentRegionAvail(); + solidAngleViewTransformReturnInfo.sceneResolution = uint16_t2(static_cast(contentRegionSize.x), static_cast(contentRegionSize.y)); + solidAngleViewTransformReturnInfo.allowCameraMovement = false; // not used in this view + ImGui::Image({renderColorViewDescIndices[ERV_SOLID_ANGLE_VIEW]}, contentRegionSize); + ImGui::End(); + } + + // Show data coming from GPU + if (m_debugVisualization) + { + if (ImGui::Begin("Result Data")) + { + auto drawColorField = [&](const char *fieldName, uint32_t index) + { + ImGui::Text("%s: %u", fieldName, index); + + if (index >= 27) + { + ImGui::SameLine(); + ImGui::Text(""); + return; + } + + const auto &c = colorLUT[index]; // uses the combined LUT we made earlier + + ImGui::SameLine(); + + // Color preview button + ImGui::ColorButton( + fieldName, + ImVec4(c.r, c.g, c.b, 1.0f), + 0, + ImVec2(20, 20)); + + ImGui::SameLine(); + ImGui::Text("%s", colorNames[index]); + }; + + // Vertices + if (ImGui::CollapsingHeader("Vertices", ImGuiTreeNodeFlags_DefaultOpen)) + { + for (uint32_t i = 0; i < 6; ++i) + { + if (i < m_GPUOutResulData.silhouetteVertexCount) + { + ImGui::Text("corners[%u]", i); + ImGui::SameLine(); + drawColorField(":", m_GPUOutResulData.vertices[i]); + ImGui::SameLine(); + static const float32_t3 constCorners[8] = { + float32_t3(-1, -1, -1), float32_t3(1, -1, -1), float32_t3(-1, 1, -1), float32_t3(1, 1, -1), + float32_t3(-1, -1, 1), float32_t3(1, -1, 1), float32_t3(-1, 1, 1), float32_t3(1, 1, 1)}; + float32_t3 vertexLocation = constCorners[m_GPUOutResulData.vertices[i]]; + ImGui::Text(" : (%.3f, %.3f, %.3f", vertexLocation.x, vertexLocation.y, vertexLocation.z); + } + else + { + ImGui::Text("corners[%u] :: ", i); + ImGui::SameLine(); + ImGui::ColorButton( + "", + ImVec4(0.0f, 0.0f, 0.0f, 0.0f), + 0, + ImVec2(20, 20)); + ImGui::SameLine(); + ImGui::Text(""); + } + } + } + + if (ImGui::CollapsingHeader("Color LUT Map")) + { + for (int i = 0; i < 27; i++) + drawColorField(" ", i); + } + + ImGui::Separator(); + ImGui::Text("Valid Samples: %u / %u", m_GPUOutResulData.validSampleCount / hlsl::max(m_GPUOutResulData.threadCount, 1u), m_GPUOutResulData.sampleCount); + ImGui::ProgressBar(static_cast(m_GPUOutResulData.validSampleCount / hlsl::max(m_GPUOutResulData.threadCount, 1u)) / static_cast(m_GPUOutResulData.sampleCount)); + ImGui::Separator(); + + // Silhouette + if (ImGui::CollapsingHeader("Silhouette")) + { + drawColorField("silhouetteIndex", m_GPUOutResulData.silhouetteIndex); + ImGui::Text("Region: (%u, %u, %u)", m_GPUOutResulData.region.x, m_GPUOutResulData.region.y, m_GPUOutResulData.region.z); + ImGui::Text("Silhouette Vertex Count: %u", m_GPUOutResulData.silhouetteVertexCount); + ImGui::Text("Positive Vertex Count: %u", m_GPUOutResulData.positiveVertCount); + ImGui::Text("Edge Visibility Mismatch: %s", m_GPUOutResulData.edgeVisibilityMismatch ? "true" : "false"); + ImGui::Text("Max Triangles Exceeded: %s", m_GPUOutResulData.maxTrianglesExceeded ? "true" : "false"); + for (uint32_t i = 0; i < 6; i++) + ImGui::Text("Vertex[%u]: %u", i, m_GPUOutResulData.vertices[i]); + ImGui::Text("Clipped Silhouette Vertex Count: %u", m_GPUOutResulData.clippedSilhouetteVertexCount); + for (uint32_t i = 0; i < 7; i++) + ImGui::Text("Clipped Vertex[%u]: (%.3f, %.3f, %.3f) Index: %u", i, + m_GPUOutResulData.clippedSilhouetteVertices[i].x, + m_GPUOutResulData.clippedSilhouetteVertices[i].y, + m_GPUOutResulData.clippedSilhouetteVertices[i].z, + m_GPUOutResulData.clippedSilhouetteVerticesIndices[i]); + + // Silhouette mask printed in binary + auto printBin = [](uint32_t bin, const char *name) + { + char buf[33]; + for (int i = 0; i < 32; i++) + buf[i] = (bin & (1u << (31 - i))) ? '1' : '0'; + buf[32] = '\0'; + ImGui::Text("%s: 0x%08X", name, bin); + ImGui::Text("binary: 0b%s", buf); + ImGui::Separator(); + }; + printBin(m_GPUOutResulData.silhouette, "Silhouette"); + printBin(m_GPUOutResulData.rotatedSil, "rotatedSilhouette"); + + printBin(m_GPUOutResulData.clipCount, "clipCount"); + printBin(m_GPUOutResulData.clipMask, "clipMask"); + printBin(m_GPUOutResulData.rotatedClipMask, "rotatedClipMask"); + printBin(m_GPUOutResulData.rotateAmount, "rotateAmount"); + printBin(m_GPUOutResulData.wrapAround, "wrapAround"); + } + + // Parallelogram + if (m_samplingMode == PROJECTED_PARALLELOGRAM_SOLID_ANGLE && ImGui::CollapsingHeader("Projected Parallelogram", ImGuiTreeNodeFlags_DefaultOpen)) + { + ImGui::Text("Does Not Bound: %s", m_GPUOutResulData.parallelogramDoesNotBound ? "true" : "false"); + ImGui::Text("Area: %.3f", m_GPUOutResulData.parallelogramArea); + ImGui::Text("Failed Vertex Index: %u", m_GPUOutResulData.failedVertexIndex); + for (uint32_t i = 0; i < 4; i++) + ImGui::Text("Edge Is Convex[%u]: %s", i, m_GPUOutResulData.edgeIsConvex[i] ? "true" : "false"); + ImGui::Text("Vertices Inside: %s", m_GPUOutResulData.parallelogramVerticesInside ? "true" : "false"); + ImGui::Text("Edges Inside: %s", m_GPUOutResulData.parallelogramEdgesInside ? "true" : "false"); + for (uint32_t i = 0; i < 4; i++) + ImGui::Text("Corner[%u]: (%.3f, %.3f)", i, m_GPUOutResulData.parallelogramCorners[i].x, m_GPUOutResulData.parallelogramCorners[i].y); + } + else if ((m_samplingMode == SYMMETRIC_PYRAMID_SOLID_ANGLE_RECTANGLE || m_samplingMode == SYMMETRIC_PYRAMID_SOLID_ANGLE_BIQUADRATIC ||m_samplingMode == SYMMETRIC_PYRAMID_SOLID_ANGLE_BILINEAR) && ImGui::CollapsingHeader("Spherical Pyramid", ImGuiTreeNodeFlags_DefaultOpen)) + { + ImGui::Text("Spans Hemisphere: %s", m_GPUOutResulData.pyramidSpansHemisphere ? "YES (warning)" : "no"); + ImGui::Text("Best Caliper Edge: %u", m_GPUOutResulData.pyramidBestEdge); + ImGui::Separator(); + + ImGui::Text("Axis 1: (%.4f, %.4f, %.4f)", + m_GPUOutResulData.pyramidAxis1.x, m_GPUOutResulData.pyramidAxis1.y, m_GPUOutResulData.pyramidAxis1.z); + ImGui::Text(" Half-Width: %.4f Offset: %.4f", + m_GPUOutResulData.pyramidHalfWidth1, m_GPUOutResulData.pyramidOffset1); + ImGui::Text(" Bounds: [%.4f, %.4f]", + m_GPUOutResulData.pyramidMin1, m_GPUOutResulData.pyramidMax1); + + ImGui::Text("Axis 2: (%.4f, %.4f, %.4f)", + m_GPUOutResulData.pyramidAxis2.x, m_GPUOutResulData.pyramidAxis2.y, m_GPUOutResulData.pyramidAxis2.z); + ImGui::Text(" Half-Width: %.4f Offset: %.4f", + m_GPUOutResulData.pyramidHalfWidth2, m_GPUOutResulData.pyramidOffset2); + ImGui::Text(" Bounds: [%.4f, %.4f]", + m_GPUOutResulData.pyramidMin2, m_GPUOutResulData.pyramidMax2); + + ImGui::Separator(); + ImGui::Text("Center: (%.4f, %.4f, %.4f)", + m_GPUOutResulData.pyramidCenter.x, m_GPUOutResulData.pyramidCenter.y, m_GPUOutResulData.pyramidCenter.z); + ImGui::Text("Solid Angle (bound): %.6f sr", m_GPUOutResulData.pyramidSolidAngle); + } + else if (m_samplingMode == TRIANGLE_SOLID_ANGLE || m_samplingMode == TRIANGLE_PROJECTED_SOLID_ANGLE && ImGui::CollapsingHeader("Spherical Triangle", ImGuiTreeNodeFlags_DefaultOpen)) + { + ImGui::Text("Spherical Lune Detected: %s", m_GPUOutResulData.sphericalLuneDetected ? "true" : "false"); + ImGui::Text("Triangle Count: %u", m_GPUOutResulData.triangleCount); + // print solidAngles for each triangle + { + ImGui::Text("Solid Angles per Triangle:"); + ImGui::BeginTable("SolidAnglesTable", 2); + ImGui::TableSetupColumn("Triangle Index"); + ImGui::TableSetupColumn("Solid Angle"); + ImGui::TableHeadersRow(); + for (uint32_t i = 0; i < m_GPUOutResulData.triangleCount; ++i) + { + ImGui::TableNextRow(); + ImGui::TableSetColumnIndex(0); + ImGui::Text("%u", i); + ImGui::TableSetColumnIndex(1); + ImGui::Text("%.6f", m_GPUOutResulData.solidAngles[i]); + } + ImGui::Text("Total: %.6f", m_GPUOutResulData.totalSolidAngles); + ImGui::EndTable(); + } + } + + { + float32_t3 xAxis = m_OBBModelMatrix[0].xyz; + float32_t3 yAxis = m_OBBModelMatrix[1].xyz; + float32_t3 zAxis = m_OBBModelMatrix[2].xyz; + + float32_t3 nx = normalize(xAxis); + float32_t3 ny = normalize(yAxis); + float32_t3 nz = normalize(zAxis); + + const float epsilon = 1e-4; + bool hasSkew = false; + if (abs(dot(nx, ny)) > epsilon || abs(dot(nx, nz)) > epsilon || abs(dot(ny, nz)) > epsilon) + hasSkew = true; + ImGui::Separator(); + ImGui::Text("Matrix Has Skew: %s", hasSkew ? "true" : "false"); + } + + static bool modalShown = false; + static bool modalDismissed = false; + static uint32_t lastSilhouetteIndex = ~0u; + + // Reset modal flags if silhouette configuration changed + if (m_GPUOutResulData.silhouetteIndex != lastSilhouetteIndex) + { + modalShown = false; + modalDismissed = false; // Allow modal to show again for new configuration + lastSilhouetteIndex = m_GPUOutResulData.silhouetteIndex; + } + + // Reset flags when mismatch is cleared + if (!m_GPUOutResulData.edgeVisibilityMismatch && !m_GPUOutResulData.maxTrianglesExceeded && !m_GPUOutResulData.sphericalLuneDetected) + { + modalShown = false; + modalDismissed = false; + } + + // Open modal only if not already shown/dismissed + if ((m_GPUOutResulData.edgeVisibilityMismatch || m_GPUOutResulData.maxTrianglesExceeded || m_GPUOutResulData.sphericalLuneDetected) && m_GPUOutResulData.silhouetteIndex != 13 && !modalShown && !modalDismissed) // Don't reopen if user dismissed it + { + ImGui::OpenPopup("Edge Visibility Mismatch Warning"); + modalShown = true; + } + + // Modal popup + if (ImGui::BeginPopupModal("Edge Visibility Mismatch Warning", NULL, ImGuiWindowFlags_AlwaysAutoResize)) + { + ImGui::TextColored(ImVec4(1.0f, 0.5f, 0.0f, 1.0f), "Warning: Edge Visibility Mismatch Detected!"); + ImGui::Separator(); + ImGui::Text("The silhouette lookup table (LUT) does not match the computed edge visibility."); + ImGui::Text("This indicates the pre-computed silhouette data may be incorrect."); + ImGui::Spacing(); + ImGui::TextWrapped("Configuration Index: %u", m_GPUOutResulData.silhouetteIndex); + ImGui::TextWrapped("Region: (%u, %u, %u)", m_GPUOutResulData.region.x, m_GPUOutResulData.region.y, m_GPUOutResulData.region.z); + ImGui::Spacing(); + ImGui::Text("Mismatched Vertices (bitmask): 0x%08X", m_GPUOutResulData.edgeVisibilityMismatch); + ImGui::Text("Vertices involved in mismatched edges:"); + ImGui::Indent(); + for (int i = 0; i < 8; i++) + { + if (m_GPUOutResulData.edgeVisibilityMismatch & (1u << i)) + { + ImGui::BulletText("Vertex %d", i); + } + } + ImGui::Unindent(); + ImGui::Spacing(); + if (ImGui::Button("OK", ImVec2(120, 0))) + { + ImGui::CloseCurrentPopup(); + modalShown = false; + modalDismissed = true; // Mark as dismissed to prevent reopening + } + ImGui::EndPopup(); + } + } + ImGui::End(); + } + + // view matrices editor + { + ImGui::Begin("Matrices"); + + auto addMatrixTable = [&](const char *topText, const char *tableName, const int rows, const int columns, const float *pointer, const bool withSeparator = true) + { + ImGui::Text(topText); + if (ImGui::BeginTable(tableName, columns)) + { + for (int y = 0; y < rows; ++y) + { + ImGui::TableNextRow(); + for (int x = 0; x < columns; ++x) + { + ImGui::TableSetColumnIndex(x); + ImGui::Text("%.3f", *(pointer + (y * columns) + x)); + } + } + ImGui::EndTable(); + } + + if (withSeparator) + ImGui::Separator(); + }; + + static RandomSampler rng(0x45); // Initialize RNG with seed + + // Helper function to check if cube intersects unit sphere at origin + auto isCubeOutsideUnitSphere = [](const float32_t3 &translation, const float32_t3 &scale) -> bool + { + float cubeRadius = glm::length(scale) * 0.5f; + float distanceToCenter = glm::length(translation); + return (distanceToCenter - cubeRadius) > 1.0f; + }; + + static TRS lastTRS = {}; + if (ImGui::Button("Randomize Translation")) + { + lastTRS = m_TRS; // Backup before randomizing + int attempts = 0; + do + { + m_TRS.translation = float32_t3(rng.nextFloat(-3.f, 3.f), rng.nextFloat(-3.f, 3.f), rng.nextFloat(-1.f, 3.f)); + attempts++; + } while (!isCubeOutsideUnitSphere(m_TRS.translation, m_TRS.scale) && attempts < 100); + } + ImGui::SameLine(); + if (ImGui::Button("Randomize Rotation")) + { + lastTRS = m_TRS; // Backup before randomizing + m_TRS.rotation = float32_t3(rng.nextFloat(-180.f, 180.f), rng.nextFloat(-180.f, 180.f), rng.nextFloat(-180.f, 180.f)); + } + ImGui::SameLine(); + if (ImGui::Button("Randomize Scale")) + { + lastTRS = m_TRS; // Backup before randomizing + int attempts = 0; + do + { + m_TRS.scale = float32_t3(rng.nextFloat(0.5f, 2.0f), rng.nextFloat(0.5f, 2.0f), rng.nextFloat(0.5f, 2.0f)); + attempts++; + } while (!isCubeOutsideUnitSphere(m_TRS.translation, m_TRS.scale) && attempts < 100); + } + // ImGui::SameLine(); + if (ImGui::Button("Randomize All")) + { + lastTRS = m_TRS; // Backup before randomizing + int attempts = 0; + do + { + m_TRS.translation = float32_t3(rng.nextFloat(-3.f, 3.f), rng.nextFloat(-3.f, 3.f), rng.nextFloat(-1.f, 3.f)); + m_TRS.rotation = float32_t3(rng.nextFloat(-180.f, 180.f), rng.nextFloat(-180.f, 180.f), rng.nextFloat(-180.f, 180.f)); + m_TRS.scale = float32_t3(rng.nextFloat(0.5f, 2.0f), rng.nextFloat(0.5f, 2.0f), rng.nextFloat(0.5f, 2.0f)); + attempts++; + } while (!isCubeOutsideUnitSphere(m_TRS.translation, m_TRS.scale) && attempts < 100); + } + ImGui::SameLine(); + if (ImGui::Button("Revert to Last")) + { + m_TRS = lastTRS; // Restore backed-up TRS + } + + addMatrixTable("Model Matrix", "ModelMatrixTable", 4, 4, &m_OBBModelMatrix[0][0]); + addMatrixTable("Camera View Matrix", "ViewMatrixTable", 3, 4, &camera.getViewMatrix()[0].x); + addMatrixTable("Camera View Projection Matrix", "ViewProjectionMatrixTable", 4, 4, &camera.getProjectionMatrix()[0].x, false); + + ImGui::End(); + } + + // Nabla Imgui backend MDI buffer info + // To be 100% accurate and not overly conservative we'd have to explicitly `cull_frees` and defragment each time, + // so unless you do that, don't use this basic info to optimize the size of your IMGUI buffer. + { + auto *streaminingBuffer = imGUI->getStreamingBuffer(); + + const size_t total = streaminingBuffer->get_total_size(); // total memory range size for which allocation can be requested + const size_t freeSize = streaminingBuffer->getAddressAllocator().get_free_size(); // max total free bloock memory size we can still allocate from total memory available + const size_t consumedMemory = total - freeSize; // memory currently consumed by streaming buffer + + float freePercentage = 100.0f * (float)(freeSize) / (float)total; + float allocatedPercentage = (float)(consumedMemory) / (float)total; + + ImVec2 barSize = ImVec2(400, 30); + float windowPadding = 10.0f; + float verticalPadding = ImGui::GetStyle().FramePadding.y; + + ImGui::SetNextWindowSize(ImVec2(barSize.x + 2 * windowPadding, 110 + verticalPadding), ImGuiCond_Always); + ImGui::Begin("Nabla Imgui MDI Buffer Info", nullptr, ImGuiWindowFlags_NoResize | ImGuiWindowFlags_NoScrollbar); + + ImGui::Text("Total Allocated Size: %zu bytes", total); + ImGui::Text("In use: %zu bytes", consumedMemory); + ImGui::Text("Buffer Usage:"); + + ImGui::SetCursorPosX(windowPadding); + + if (freePercentage > 70.0f) + ImGui::PushStyleColor(ImGuiCol_PlotHistogram, ImVec4(0.0f, 1.0f, 0.0f, 0.4f)); // Green + else if (freePercentage > 30.0f) + ImGui::PushStyleColor(ImGuiCol_PlotHistogram, ImVec4(1.0f, 1.0f, 0.0f, 0.4f)); // Yellow + else + ImGui::PushStyleColor(ImGuiCol_PlotHistogram, ImVec4(1.0f, 0.0f, 0.0f, 0.4f)); // Red + + ImGui::ProgressBar(allocatedPercentage, barSize, ""); + + ImGui::PopStyleColor(); + + ImDrawList *drawList = ImGui::GetWindowDrawList(); + + ImVec2 progressBarPos = ImGui::GetItemRectMin(); + ImVec2 progressBarSize = ImGui::GetItemRectSize(); + + const char *text = "%.2f%% free"; + char textBuffer[64]; + snprintf(textBuffer, sizeof(textBuffer), text, freePercentage); + + ImVec2 textSize = ImGui::CalcTextSize(textBuffer); + ImVec2 textPos = ImVec2( + progressBarPos.x + (progressBarSize.x - textSize.x) * 0.5f, + progressBarPos.y + (progressBarSize.y - textSize.y) * 0.5f); + + ImVec4 bgColor = ImGui::GetStyleColorVec4(ImGuiCol_WindowBg); + drawList->AddRectFilled( + ImVec2(textPos.x - 5, textPos.y - 2), + ImVec2(textPos.x + textSize.x + 5, textPos.y + textSize.y + 2), + ImGui::GetColorU32(bgColor)); + + ImGui::SetCursorScreenPos(textPos); + ImGui::Text("%s", textBuffer); + + ImGui::Dummy(ImVec2(0.0f, verticalPadding)); + + ImGui::End(); + } + ImGui::End(); + + ImGuizmo::RecomposeMatrixFromComponents(&m_TRS.translation.x, &m_TRS.rotation.x, &m_TRS.scale.x, &m_OBBModelMatrix[0][0]); + } + + smart_refctd_ptr imGUI; + + // descriptor set + smart_refctd_ptr subAllocDS; + enum E_RENDER_VIEWS : uint8_t + { + ERV_MAIN_VIEW, + ERV_SOLID_ANGLE_VIEW, + Count + }; + SubAllocatedDescriptorSet::value_type renderColorViewDescIndices[E_RENDER_VIEWS::Count] = {SubAllocatedDescriptorSet::invalid_value, SubAllocatedDescriptorSet::invalid_value}; + // + Camera camera = Camera(cameraIntialPosition, cameraInitialTarget, {}, 1, 1, nbl::core::vectorSIMDf(0.0f, 0.0f, 1.0f)); + // mutables + struct TRS // Source of truth + { + float32_t3 translation{0.0f, 0.0f, 1.5f}; + float32_t3 rotation{0.0f}; // MUST stay orthonormal + float32_t3 scale{1.0f}; + } m_TRS; + float32_t4x4 m_OBBModelMatrix; // always overwritten from TRS + + // std::string_view objectName; + TransformRequestParams transformParams; + TransformReturnInfo mainViewTransformReturnInfo; + TransformReturnInfo solidAngleViewTransformReturnInfo; + + const static inline core::vectorSIMDf cameraIntialPosition{-3.0f, 6.0f, 3.0f}; + const static inline core::vectorSIMDf cameraInitialTarget{0.f, 0.0f, 3.f}; + const static inline core::vectorSIMDf cameraInitialUp{0.f, 0.f, 1.f}; + + float fov = 90.f, zNear = 0.1f, zFar = 10000.f, moveSpeed = 1.f, rotateSpeed = 1.f; + float viewWidth = 10.f; + // uint16_t gcIndex = {}; // note: this is dirty however since I assume only single object in scene I can leave it now, when this example is upgraded to support multiple objects this needs to be changed + bool isPerspective = true, isLH = true, flipGizmoY = true, move = true; + bool firstFrame = true; + + SolidAngleVisualizer *m_visualizer; + } interface; + + class SamplingBenchmark final + { + public: + SamplingBenchmark(SolidAngleVisualizer &base) + : m_api(base.m_api), m_device(base.m_device), m_logger(base.m_logger), m_visualizer(&base) + { + + // setting up pipeline in the constructor + m_queueFamily = base.getComputeQueue()->getFamilyIndex(); + m_cmdpool = base.m_device->createCommandPool(m_queueFamily, IGPUCommandPool::CREATE_FLAGS::RESET_COMMAND_BUFFER_BIT); + // core::smart_refctd_ptr* cmdBuffs[] = { &m_cmdbuf, &m_timestampBeforeCmdBuff, &m_timestampAfterCmdBuff }; + if (!m_cmdpool->createCommandBuffers(IGPUCommandPool::BUFFER_LEVEL::PRIMARY, 1u, &m_cmdbuf)) + base.logFail("Failed to create Command Buffers!\n"); + if (!m_cmdpool->createCommandBuffers(IGPUCommandPool::BUFFER_LEVEL::PRIMARY, 1u, &m_timestampBeforeCmdBuff)) + base.logFail("Failed to create Command Buffers!\n"); + if (!m_cmdpool->createCommandBuffers(IGPUCommandPool::BUFFER_LEVEL::PRIMARY, 1u, &m_timestampAfterCmdBuff)) + base.logFail("Failed to create Command Buffers!\n"); + + // Load shaders, set up pipelines (one per sampling mode) + { + auto loadShader = [&](auto key) -> smart_refctd_ptr + { + IAssetLoader::SAssetLoadParams lp = {}; + lp.logger = base.m_logger.get(); + lp.workingDirectory = "app_resources"; + auto assetBundle = base.m_assetMgr->getAsset(key.data(), lp); + const auto assets = assetBundle.getContents(); + if (assets.empty()) + { + base.logFail("Could not load shader!"); + assert(0); + } + assert(assets.size() == 1); + auto shader = IAsset::castDown(assets[0]); + if (!shader) + base.logFail("Failed to load precompiled benchmark shader!\n"); + return shader; + }; + + smart_refctd_ptr shaders[SAMPLING_MODE::Count] = { + loadShader(nbl::this_example::builtin::build::get_spirv_key<"benchmark_tri_sa">(m_device.get())), + loadShader(nbl::this_example::builtin::build::get_spirv_key<"benchmark_tri_psa">(m_device.get())), + loadShader(nbl::this_example::builtin::build::get_spirv_key<"benchmark_para">(m_device.get())), + loadShader(nbl::this_example::builtin::build::get_spirv_key<"benchmark_rectangle">(m_device.get())), + loadShader(nbl::this_example::builtin::build::get_spirv_key<"benchmark_biquad">(m_device.get())), + loadShader(nbl::this_example::builtin::build::get_spirv_key<"benchmark_bilinear">(m_device.get())), + }; + + nbl::video::IGPUDescriptorSetLayout::SBinding bindings[1] = { + {.binding = 0, + .type = nbl::asset::IDescriptor::E_TYPE::ET_STORAGE_BUFFER, + .createFlags = IGPUDescriptorSetLayout::SBinding::E_CREATE_FLAGS::ECF_NONE, + .stageFlags = ShaderStage::ESS_COMPUTE, + .count = 1}}; + smart_refctd_ptr dsLayout = base.m_device->createDescriptorSetLayout(bindings); + if (!dsLayout) + base.logFail("Failed to create a Descriptor Layout!\n"); + + SPushConstantRange pushConstantRanges[] = { + {.stageFlags = ShaderStage::ESS_COMPUTE, + .offset = 0, + .size = sizeof(BenchmarkPushConstants)}}; + m_pplnLayout = base.m_device->createPipelineLayout(pushConstantRanges, smart_refctd_ptr(dsLayout)); + if (!m_pplnLayout) + base.logFail("Failed to create a Pipeline Layout!\n"); + + for (uint32_t i = 0; i < SAMPLING_MODE::Count; i++) + { + IGPUComputePipeline::SCreationParams params = {}; + params.layout = m_pplnLayout.get(); + params.shader.entryPoint = "main"; + params.shader.shader = shaders[i].get(); + if (!base.m_device->createComputePipelines(nullptr, {¶ms, 1}, &m_pipelines[i])) + base.logFail("Failed to create pipelines (compile & link shaders)!\n"); + } + + // Allocate the memory + { + constexpr size_t BufferSize = BENCHMARK_WORKGROUP_COUNT * BENCHMARK_WORKGROUP_DIMENSION_SIZE_X * + BENCHMARK_WORKGROUP_DIMENSION_SIZE_Y * BENCHMARK_WORKGROUP_DIMENSION_SIZE_Z * sizeof(uint32_t); + + nbl::video::IGPUBuffer::SCreationParams params = {}; + params.size = BufferSize; + params.usage = IGPUBuffer::EUF_STORAGE_BUFFER_BIT; + smart_refctd_ptr dummyBuff = base.m_device->createBuffer(std::move(params)); + if (!dummyBuff) + base.logFail("Failed to create a GPU Buffer of size %d!\n", params.size); + + dummyBuff->setObjectDebugName("benchmark buffer"); + + nbl::video::IDeviceMemoryBacked::SDeviceMemoryRequirements reqs = dummyBuff->getMemoryReqs(); + + m_allocation = base.m_device->allocate(reqs, dummyBuff.get(), nbl::video::IDeviceMemoryAllocation::EMAF_NONE); + if (!m_allocation.isValid()) + base.logFail("Failed to allocate Device Memory compatible with our GPU Buffer!\n"); + + assert(dummyBuff->getBoundMemory().memory == m_allocation.memory.get()); + smart_refctd_ptr pool = base.m_device->createDescriptorPoolForDSLayouts(IDescriptorPool::ECF_NONE, {&dsLayout.get(), 1}); + + m_ds = pool->createDescriptorSet(std::move(dsLayout)); + { + IGPUDescriptorSet::SDescriptorInfo info[1]; + info[0].desc = smart_refctd_ptr(dummyBuff); + info[0].info.buffer = {.offset = 0, .size = BufferSize}; + IGPUDescriptorSet::SWriteDescriptorSet writes[1] = { + {.dstSet = m_ds.get(), .binding = 0, .arrayElement = 0, .count = 1, .info = info}}; + base.m_device->updateDescriptorSets(writes, {}); + } + } + } + + IQueryPool::SCreationParams queryPoolCreationParams{}; + queryPoolCreationParams.queryType = IQueryPool::TYPE::TIMESTAMP; + queryPoolCreationParams.queryCount = 2; + queryPoolCreationParams.pipelineStatisticsFlags = IQueryPool::PIPELINE_STATISTICS_FLAGS::NONE; + m_queryPool = m_device->createQueryPool(queryPoolCreationParams); + + m_computeQueue = m_device->getQueue(m_queueFamily, 0); + } + + void run() + { + m_logger->log("\n\nsampling benchmark result:", ILogger::ELL_PERFORMANCE); + + m_logger->log("sampling benchmark, SYMMETRIC_PYRAMID_SOLID_ANGLE_RECTANGLE result:", ILogger::ELL_PERFORMANCE); + performBenchmark(SAMPLING_MODE::SYMMETRIC_PYRAMID_SOLID_ANGLE_RECTANGLE); + + m_logger->log("sampling benchmark, SYMMETRIC_PYRAMID_SOLID_ANGLE_BIQUADRATIC result:", ILogger::ELL_PERFORMANCE); + performBenchmark(SAMPLING_MODE::SYMMETRIC_PYRAMID_SOLID_ANGLE_BIQUADRATIC); + + m_logger->log("sampling benchmark, SYMMETRIC_PYRAMID_SOLID_ANGLE_BILINEAR result:", ILogger::ELL_PERFORMANCE); + performBenchmark(SAMPLING_MODE::SYMMETRIC_PYRAMID_SOLID_ANGLE_BILINEAR); + + m_logger->log("sampling benchmark, PROJECTED_PARALLELOGRAM_SOLID_ANGLE result:", ILogger::ELL_PERFORMANCE); + performBenchmark(SAMPLING_MODE::PROJECTED_PARALLELOGRAM_SOLID_ANGLE); + + m_logger->log("sampling benchmark, TRIANGLE_SOLID_ANGLE result:", ILogger::ELL_PERFORMANCE); + performBenchmark(SAMPLING_MODE::TRIANGLE_SOLID_ANGLE); + + // m_logger->log("sampling benchmark, triangle projected solid angle result:", ILogger::ELL_PERFORMANCE); + // performBenchmark(SAMPLING_MODE::TRIANGLE_PROJECTED_SOLID_ANGLE); + } + + private: + void performBenchmark(SAMPLING_MODE mode) + { + m_device->waitIdle(); + + recordTimestampQueryCmdBuffers(); + + uint64_t semaphoreCounter = 0; + smart_refctd_ptr semaphore = m_device->createSemaphore(semaphoreCounter); + + IQueue::SSubmitInfo::SSemaphoreInfo signals[] = {{.semaphore = semaphore.get(), .value = 0u, .stageMask = asset::PIPELINE_STAGE_FLAGS::COMPUTE_SHADER_BIT}}; + IQueue::SSubmitInfo::SSemaphoreInfo waits[] = {{.semaphore = semaphore.get(), .value = 0u, .stageMask = asset::PIPELINE_STAGE_FLAGS::COMPUTE_SHADER_BIT}}; + + IQueue::SSubmitInfo beforeTimestapSubmitInfo[1] = {}; + const IQueue::SSubmitInfo::SCommandBufferInfo cmdbufsBegin[] = {{.cmdbuf = m_timestampBeforeCmdBuff.get()}}; + beforeTimestapSubmitInfo[0].commandBuffers = cmdbufsBegin; + beforeTimestapSubmitInfo[0].signalSemaphores = signals; + beforeTimestapSubmitInfo[0].waitSemaphores = waits; + + IQueue::SSubmitInfo afterTimestapSubmitInfo[1] = {}; + const IQueue::SSubmitInfo::SCommandBufferInfo cmdbufsEnd[] = {{.cmdbuf = m_timestampAfterCmdBuff.get()}}; + afterTimestapSubmitInfo[0].commandBuffers = cmdbufsEnd; + afterTimestapSubmitInfo[0].signalSemaphores = signals; + afterTimestapSubmitInfo[0].waitSemaphores = waits; + + IQueue::SSubmitInfo benchmarkSubmitInfos[1] = {}; + const IQueue::SSubmitInfo::SCommandBufferInfo cmdbufs[] = {{.cmdbuf = m_cmdbuf.get()}}; + benchmarkSubmitInfos[0].commandBuffers = cmdbufs; + benchmarkSubmitInfos[0].signalSemaphores = signals; + benchmarkSubmitInfos[0].waitSemaphores = waits; + + m_pushConstants.modelMatrix = float32_t3x4(transpose(m_visualizer->interface.m_OBBModelMatrix)); + m_pushConstants.sampleCount = m_SampleCount; + recordCmdBuff(mode); + + // warmup runs + for (int i = 0; i < WarmupIterations; ++i) + { + if (i == 0) + m_api->startCapture(); + waits[0].value = semaphoreCounter; + signals[0].value = ++semaphoreCounter; + m_computeQueue->submit(benchmarkSubmitInfos); + if (i == 0) + m_api->endCapture(); + } + + waits[0].value = semaphoreCounter; + signals[0].value = ++semaphoreCounter; + m_computeQueue->submit(beforeTimestapSubmitInfo); + + // actual benchmark runs + for (int i = 0; i < Iterations; ++i) + { + waits[0].value = semaphoreCounter; + signals[0].value = ++semaphoreCounter; + m_computeQueue->submit(benchmarkSubmitInfos); + } + + waits[0].value = semaphoreCounter; + signals[0].value = ++semaphoreCounter; + m_computeQueue->submit(afterTimestapSubmitInfo); + + m_device->waitIdle(); + + const uint64_t nativeBenchmarkTimeElapsedNanoseconds = calcTimeElapsed(); + const float nativeBenchmarkTimeElapsedSeconds = double(nativeBenchmarkTimeElapsedNanoseconds) / 1000000000.0; + + m_logger->log("%llu ns, %f s", ILogger::ELL_PERFORMANCE, nativeBenchmarkTimeElapsedNanoseconds, nativeBenchmarkTimeElapsedSeconds); + } + + void recordCmdBuff(SAMPLING_MODE mode) + { + m_cmdbuf->begin(IGPUCommandBuffer::USAGE::SIMULTANEOUS_USE_BIT); + m_cmdbuf->beginDebugMarker("sampling compute dispatch", vectorSIMDf(0, 1, 0, 1)); + m_cmdbuf->bindComputePipeline(m_pipelines[mode].get()); + m_cmdbuf->bindDescriptorSets(nbl::asset::EPBP_COMPUTE, m_pplnLayout.get(), 0, 1, &m_ds.get()); + m_cmdbuf->pushConstants(m_pplnLayout.get(), IShader::E_SHADER_STAGE::ESS_COMPUTE, 0, sizeof(BenchmarkPushConstants), &m_pushConstants); + m_cmdbuf->dispatch(BENCHMARK_WORKGROUP_COUNT, 1, 1); + m_cmdbuf->endDebugMarker(); + m_cmdbuf->end(); + } + + void recordTimestampQueryCmdBuffers() + { + static bool firstInvocation = true; + + if (!firstInvocation) + { + m_timestampBeforeCmdBuff->reset(IGPUCommandBuffer::RESET_FLAGS::NONE); + m_timestampBeforeCmdBuff->reset(IGPUCommandBuffer::RESET_FLAGS::NONE); + } + + m_timestampBeforeCmdBuff->begin(IGPUCommandBuffer::USAGE::ONE_TIME_SUBMIT_BIT); + m_timestampBeforeCmdBuff->resetQueryPool(m_queryPool.get(), 0, 2); + m_timestampBeforeCmdBuff->writeTimestamp(PIPELINE_STAGE_FLAGS::NONE, m_queryPool.get(), 0); + m_timestampBeforeCmdBuff->end(); + + m_timestampAfterCmdBuff->begin(IGPUCommandBuffer::USAGE::ONE_TIME_SUBMIT_BIT); + m_timestampAfterCmdBuff->writeTimestamp(PIPELINE_STAGE_FLAGS::NONE, m_queryPool.get(), 1); + m_timestampAfterCmdBuff->end(); + + firstInvocation = false; + } + + uint64_t calcTimeElapsed() + { + uint64_t timestamps[2]; + const core::bitflag flags = core::bitflag(IQueryPool::RESULTS_FLAGS::_64_BIT) | core::bitflag(IQueryPool::RESULTS_FLAGS::WAIT_BIT); + m_device->getQueryPoolResults(m_queryPool.get(), 0, 2, ×tamps, sizeof(uint64_t), flags); + return timestamps[1] - timestamps[0]; + } + + private: + core::smart_refctd_ptr m_api; + smart_refctd_ptr m_device; + smart_refctd_ptr m_logger; + SolidAngleVisualizer *m_visualizer; + + nbl::video::IDeviceMemoryAllocator::SAllocation m_allocation = {}; + smart_refctd_ptr m_cmdpool = nullptr; + smart_refctd_ptr m_cmdbuf = nullptr; + smart_refctd_ptr m_ds = nullptr; + smart_refctd_ptr m_pplnLayout = nullptr; + BenchmarkPushConstants m_pushConstants; + smart_refctd_ptr m_pipelines[SAMPLING_MODE::Count]; + + smart_refctd_ptr m_timestampBeforeCmdBuff = nullptr; + smart_refctd_ptr m_timestampAfterCmdBuff = nullptr; + smart_refctd_ptr m_queryPool = nullptr; + + uint32_t m_queueFamily; + IQueue *m_computeQueue; + static constexpr int WarmupIterations = 50; + static constexpr int Iterations = 1; + }; + + template + inline bool logFail(const char *msg, Args &&...args) + { + m_logger->log(msg, ILogger::ELL_ERROR, std::forward(args)...); + return false; + } + + std::ofstream m_logFile; +}; + +NBL_MAIN_FUNC(SolidAngleVisualizer) \ No newline at end of file diff --git a/73_SolidAngleVisualizer/pipeline.groovy b/73_SolidAngleVisualizer/pipeline.groovy new file mode 100644 index 000000000..7b7c9702a --- /dev/null +++ b/73_SolidAngleVisualizer/pipeline.groovy @@ -0,0 +1,50 @@ +import org.DevshGraphicsProgramming.Agent +import org.DevshGraphicsProgramming.BuilderInfo +import org.DevshGraphicsProgramming.IBuilder + +class CUIBuilder extends IBuilder +{ + public CUIBuilder(Agent _agent, _info) + { + super(_agent, _info) + } + + @Override + public boolean prepare(Map axisMapping) + { + return true + } + + @Override + public boolean build(Map axisMapping) + { + IBuilder.CONFIGURATION config = axisMapping.get("CONFIGURATION") + IBuilder.BUILD_TYPE buildType = axisMapping.get("BUILD_TYPE") + + def nameOfBuildDirectory = getNameOfBuildDirectory(buildType) + def nameOfConfig = getNameOfConfig(config) + + agent.execute("cmake --build ${info.rootProjectPath}/${nameOfBuildDirectory}/${info.targetProjectPathRelativeToRoot} --target ${info.targetBaseName} --config ${nameOfConfig} -j12 -v") + + return true + } + + @Override + public boolean test(Map axisMapping) + { + return true + } + + @Override + public boolean install(Map axisMapping) + { + return true + } +} + +def create(Agent _agent, _info) +{ + return new CUIBuilder(_agent, _info) +} + +return this \ No newline at end of file diff --git a/73_SolidAngleVisualizer/src/transform.cpp b/73_SolidAngleVisualizer/src/transform.cpp new file mode 100644 index 000000000..e69de29bb diff --git a/CMakeLists.txt b/CMakeLists.txt index d945c547a..7928738d9 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -74,6 +74,7 @@ if(NBL_BUILD_EXAMPLES) # Showcase compute pathtracing add_subdirectory(30_ComputeShaderPathTracer) + add_subdirectory(31_HLSLPathTracer) add_subdirectory(34_DebugDraw) add_subdirectory(38_EXRSplit) @@ -105,6 +106,7 @@ if(NBL_BUILD_EXAMPLES) add_subdirectory(70_FLIPFluids) add_subdirectory(71_RayTracingPipeline) add_subdirectory(72_CooperativeBinarySearch) + add_subdirectory(73_SolidAngleVisualizer) if (NBL_BUILD_MITSUBA_LOADER) add_subdirectory(73_GeometryInspector) diff --git a/common/include/nbl/examples/cameras/CCamera.hpp b/common/include/nbl/examples/cameras/CCamera.hpp index f185e60f6..8fadbd866 100644 --- a/common/include/nbl/examples/cameras/CCamera.hpp +++ b/common/include/nbl/examples/cameras/CCamera.hpp @@ -16,8 +16,8 @@ #include #include -class Camera -{ +class Camera +{ public: Camera() = default; Camera(const nbl::core::vectorSIMDf& position, const nbl::core::vectorSIMDf& lookat, const nbl::hlsl::float32_t4x4& projection, float moveSpeed = 1.0f, float rotateSpeed = 1.0f, const nbl::core::vectorSIMDf& upVec = nbl::core::vectorSIMDf(0.0f, 1.0f, 0.0f), const nbl::core::vectorSIMDf& backupUpVec = nbl::core::vectorSIMDf(0.5f, 1.0f, 0.0f)) @@ -43,6 +43,8 @@ class Camera enum E_CAMERA_MOVE_KEYS : uint8_t { ECMK_MOVE_FORWARD = 0, + ECMK_MOVE_UP, + ECMK_MOVE_DOWN, ECMK_MOVE_BACKWARD, ECMK_MOVE_LEFT, ECMK_MOVE_RIGHT, @@ -51,6 +53,8 @@ class Camera inline void mapKeysToWASD() { + keysMap[ECMK_MOVE_UP] = nbl::ui::EKC_E; + keysMap[ECMK_MOVE_DOWN] = nbl::ui::EKC_Q; keysMap[ECMK_MOVE_FORWARD] = nbl::ui::EKC_W; keysMap[ECMK_MOVE_BACKWARD] = nbl::ui::EKC_S; keysMap[ECMK_MOVE_LEFT] = nbl::ui::EKC_A; @@ -68,7 +72,7 @@ class Camera inline void mapKeysCustom(std::array& map) { keysMap = map; } inline const nbl::hlsl::float32_t4x4& getProjectionMatrix() const { return projMatrix; } - inline const nbl::hlsl::float32_t3x4& getViewMatrix() const { return viewMatrix; } + inline const nbl::hlsl::float32_t3x4& getViewMatrix() const { return viewMatrix; } inline const nbl::hlsl::float32_t4x4& getConcatenatedMatrix() const { return concatMatrix; } inline void setProjectionMatrix(const nbl::hlsl::float32_t4x4& projection) @@ -77,16 +81,16 @@ class Camera leftHanded = nbl::hlsl::determinant(projMatrix) < 0.f; concatMatrix = nbl::hlsl::math::linalg::promoted_mul(projMatrix, viewMatrix); } - + inline void setPosition(const nbl::core::vectorSIMDf& pos) { position.set(pos); recomputeViewMatrix(); } - + inline const nbl::core::vectorSIMDf& getPosition() const { return position; } - inline void setTarget(const nbl::core::vectorSIMDf& pos) + inline void setTarget(const nbl::core::vectorSIMDf& pos) { target.set(pos); recomputeViewMatrix(); @@ -95,11 +99,11 @@ class Camera inline const nbl::core::vectorSIMDf& getTarget() const { return target; } inline void setUpVector(const nbl::core::vectorSIMDf& up) { upVector = up; } - + inline void setBackupUpVector(const nbl::core::vectorSIMDf& up) { backupUpVector = up; } inline const nbl::core::vectorSIMDf& getUpVector() const { return upVector; } - + inline const nbl::core::vectorSIMDf& getBackupUpVector() const { return backupUpVector; } inline const float getMoveSpeed() const { return moveSpeed; } @@ -110,7 +114,7 @@ class Camera inline void setRotateSpeed(const float _rotateSpeed) { rotateSpeed = _rotateSpeed; } - inline void recomputeViewMatrix() + inline void recomputeViewMatrix() { nbl::hlsl::float32_t3 pos = nbl::core::convertToHLSLVector(position).xyz; nbl::hlsl::float32_t3 localTarget = nbl::hlsl::normalize(nbl::core::convertToHLSLVector(target).xyz - pos); @@ -140,63 +144,78 @@ class Camera void mouseProcess(const nbl::ui::IMouseEventChannel::range_t& events) { - for (auto eventIt=events.begin(); eventIt!=events.end(); eventIt++) + for (auto eventIt = events.begin(); eventIt != events.end(); eventIt++) { auto ev = *eventIt; - if(ev.type == nbl::ui::SMouseEvent::EET_CLICK && ev.clickEvent.mouseButton == nbl::ui::EMB_LEFT_BUTTON) - if(ev.clickEvent.action == nbl::ui::SMouseEvent::SClickEvent::EA_PRESSED) + if (ev.type == nbl::ui::SMouseEvent::EET_CLICK && ev.clickEvent.mouseButton == nbl::ui::EMB_LEFT_BUTTON) + if (ev.clickEvent.action == nbl::ui::SMouseEvent::SClickEvent::EA_PRESSED) mouseDown = true; else if (ev.clickEvent.action == nbl::ui::SMouseEvent::SClickEvent::EA_RELEASED) mouseDown = false; - if(ev.type == nbl::ui::SMouseEvent::EET_MOVEMENT && mouseDown) + if (ev.type == nbl::ui::SMouseEvent::EET_MOVEMENT && mouseDown) { - nbl::hlsl::float32_t4 pos = nbl::core::convertToHLSLVector(getPosition()); - nbl::hlsl::float32_t4 localTarget = nbl::core::convertToHLSLVector(getTarget()) - pos; - - // Get Relative Rotation for localTarget in Radians - float relativeRotationX, relativeRotationY; - relativeRotationY = atan2(localTarget.x, localTarget.z); - const double z1 = nbl::core::sqrt(localTarget.x*localTarget.x + localTarget.z*localTarget.z); - relativeRotationX = atan2(z1, localTarget.y) - nbl::core::PI()/2; - - constexpr float RotateSpeedScale = 0.003f; - relativeRotationX -= ev.movementEvent.relativeMovementY * rotateSpeed * RotateSpeedScale * -1.0f; - float tmpYRot = ev.movementEvent.relativeMovementX * rotateSpeed * RotateSpeedScale * -1.0f; - + // --- corrected camera rotation update --- + nbl::hlsl::float32_t3 pos = nbl::core::convertToHLSLVector(getPosition()).xyz; + nbl::hlsl::float32_t3 targetVec = nbl::core::convertToHLSLVector(getTarget()).xyz - pos; // original vector to target + + // preserve distance so we don't collapse to unit length + float targetDistance = nbl::hlsl::length(targetVec); + if (targetDistance < 1e-6f) targetDistance = 1.0f; // avoid div-by-zero + + nbl::hlsl::float32_t3 forward = nbl::hlsl::normalize(targetVec); + nbl::hlsl::float32_t3 upVector = nbl::core::convertToHLSLVector(getUpVector()).xyz; + nbl::hlsl::float32_t3 right = nbl::hlsl::normalize(nbl::hlsl::cross(upVector, forward)); + nbl::hlsl::float32_t3 correctedForward = nbl::hlsl::normalize(nbl::hlsl::cross(right, upVector)); + + // horizontal yaw (angle from correctedForward towards right) + float rightDot = nbl::hlsl::dot(targetVec, right); + float forwardDot = nbl::hlsl::dot(targetVec, correctedForward); + float relativeRotationY = atan2(rightDot, forwardDot); + + // pitch: angle above/below horizontal + float upDot = nbl::hlsl::dot(targetVec, upVector); + nbl::hlsl::float32_t3 horizontalComponent = targetVec - upVector * upDot; + float horizontalLength = nbl::hlsl::length(horizontalComponent); + float relativeRotationX = atan2(upDot, horizontalLength); + + // apply mouse/controller deltas (signs simplified) + constexpr float RotateSpeedScale = 0.003f; + relativeRotationX -= ev.movementEvent.relativeMovementY * rotateSpeed * RotateSpeedScale; + float tmpYRot = ev.movementEvent.relativeMovementX * rotateSpeed * RotateSpeedScale; if (leftHanded) - relativeRotationY -= tmpYRot; - else relativeRotationY += tmpYRot; - - const double MaxVerticalAngle = nbl::core::radians(88.0f); - - if (relativeRotationX > MaxVerticalAngle*2 && relativeRotationX < 2 * nbl::core::PI()-MaxVerticalAngle) - relativeRotationX = 2 * nbl::core::PI()-MaxVerticalAngle; else - if (relativeRotationX > MaxVerticalAngle && relativeRotationX < 2 * nbl::core::PI()-MaxVerticalAngle) - relativeRotationX = MaxVerticalAngle; - - pos.w = 0; - localTarget = nbl::hlsl::float32_t4(0, 0, nbl::core::max(1.f, nbl::hlsl::length(pos)), 1.0f); + relativeRotationY -= tmpYRot; - const nbl::hlsl::math::quaternion quat = nbl::hlsl::math::quaternion::create(relativeRotationX, relativeRotationY, 0.0f); - nbl::hlsl::float32_t3x4 mat = nbl::hlsl::math::linalg::promote_affine<3, 4, 3, 3>(quat.__constructMatrix()); + // clamp pitch + const float MaxVerticalAngle = nbl::core::radians(88.0f); + if (relativeRotationX > MaxVerticalAngle) relativeRotationX = MaxVerticalAngle; + if (relativeRotationX < -MaxVerticalAngle) relativeRotationX = -MaxVerticalAngle; + // build final direction by first yaw-rotating in the horizontal plane, then pitching + float cosYaw = cos(relativeRotationY); + float sinYaw = sin(relativeRotationY); + nbl::hlsl::float32_t3 yawForward = correctedForward * cosYaw + right * sinYaw; + yawForward = nbl::hlsl::normalize(yawForward); - localTarget = nbl::hlsl::float32_t4(nbl::hlsl::mul(mat, localTarget), 1.0f); + float cosPitch = cos(relativeRotationX); + float sinPitch = sin(relativeRotationX); + nbl::hlsl::float32_t3 finalDir = nbl::hlsl::normalize(yawForward * cosPitch + upVector * sinPitch); - nbl::core::vectorSIMDf finalTarget = nbl::core::constructVecorSIMDFromHLSLVector(localTarget + pos); + // restore original distance and set target + nbl::core::vectorSIMDf finalTarget = nbl::core::constructVecorSIMDFromHLSLVector(pos + finalDir * targetDistance); finalTarget.w = 1.0f; setTarget(finalTarget); + } } } void keyboardProcess(const nbl::ui::IKeyboardEventChannel::range_t& events) { - for(uint32_t k = 0; k < E_CAMERA_MOVE_KEYS::ECMK_COUNT; ++k) + for (uint32_t k = 0; k < E_CAMERA_MOVE_KEYS::ECMK_COUNT; ++k) perActionDt[k] = 0.0; /* @@ -205,8 +224,8 @@ class Camera * And If an UP event was sent It will get subtracted it from this value. (Currently Disabled Because we Need better Oracle) */ - for(uint32_t k = 0; k < E_CAMERA_MOVE_KEYS::ECMK_COUNT; ++k) - if(keysDown[k]) + for (uint32_t k = 0; k < E_CAMERA_MOVE_KEYS::ECMK_COUNT; ++k) + if (keysDown[k]) { auto timeDiff = std::chrono::duration_cast(nextPresentationTimeStamp - lastVirtualUpTimeStamp).count(); if (timeDiff < 0) @@ -214,28 +233,28 @@ class Camera perActionDt[k] += timeDiff; } - for (auto eventIt=events.begin(); eventIt!=events.end(); eventIt++) + for (auto eventIt = events.begin(); eventIt != events.end(); eventIt++) { const auto ev = *eventIt; - + // accumulate the periods for which a key was down auto timeDiff = std::chrono::duration_cast(nextPresentationTimeStamp - ev.timeStamp).count(); if (timeDiff < 0) timeDiff = 0; // handle camera movement - for (const auto logicalKey : { ECMK_MOVE_FORWARD, ECMK_MOVE_BACKWARD, ECMK_MOVE_LEFT, ECMK_MOVE_RIGHT }) + for (const auto logicalKey : { ECMK_MOVE_FORWARD, ECMK_MOVE_UP, ECMK_MOVE_DOWN, ECMK_MOVE_BACKWARD, ECMK_MOVE_LEFT, ECMK_MOVE_RIGHT }) { const auto code = keysMap[logicalKey]; if (ev.keyCode == code) { - if (ev.action == nbl::ui::SKeyboardEvent::ECA_PRESSED && !keysDown[logicalKey]) + if (ev.action == nbl::ui::SKeyboardEvent::ECA_PRESSED && !keysDown[logicalKey]) { perActionDt[logicalKey] += timeDiff; keysDown[logicalKey] = true; } - else if (ev.action == nbl::ui::SKeyboardEvent::ECA_RELEASED) + else if (ev.action == nbl::ui::SKeyboardEvent::ECA_RELEASED) { // perActionDt[logicalKey] -= timeDiff; keysDown[logicalKey] = false; @@ -259,7 +278,7 @@ class Camera nextPresentationTimeStamp = _nextPresentationTimeStamp; return; } - + void endInputProcessing(std::chrono::microseconds _nextPresentationTimeStamp) { nbl::core::vectorSIMDf pos = getPosition(); @@ -271,13 +290,12 @@ class Camera movedir.makeSafe3D(); movedir = nbl::core::normalize(movedir); - constexpr float MoveSpeedScale = 0.02f; + constexpr float MoveSpeedScale = 0.02f; pos += movedir * perActionDt[E_CAMERA_MOVE_KEYS::ECMK_MOVE_FORWARD] * moveSpeed * MoveSpeedScale; pos -= movedir * perActionDt[E_CAMERA_MOVE_KEYS::ECMK_MOVE_BACKWARD] * moveSpeed * MoveSpeedScale; - // strafing - + // if upvector and vector to the target are the same, we have a // problem. so solve this problem: nbl::core::vectorSIMDf up = nbl::core::normalize(upVector); @@ -288,6 +306,11 @@ class Camera up = nbl::core::normalize(backupUpVector); } + nbl::core::vectorSIMDf currentUp = nbl::core::normalize(nbl::core::cross(localTarget, nbl::core::cross(up, localTarget))); + pos += currentUp * perActionDt[E_CAMERA_MOVE_KEYS::ECMK_MOVE_UP] * moveSpeed * MoveSpeedScale; + pos -= currentUp * perActionDt[E_CAMERA_MOVE_KEYS::ECMK_MOVE_DOWN] * moveSpeed * MoveSpeedScale; + + // strafing nbl::core::vectorSIMDf strafevect = localTarget; if (leftHanded) strafevect = nbl::core::cross(strafevect, up); @@ -303,18 +326,23 @@ class Camera firstUpdate = false; setPosition(pos); - setTarget(localTarget+pos); + setTarget(localTarget + pos); lastVirtualUpTimeStamp = nextPresentationTimeStamp; } + // TODO: temporary but a good fix for the camera events when mouse stops dragging gizmo + void mouseKeysUp() + { + mouseDown = false; + } private: inline void initDefaultKeysMap() { mapKeysToWASD(); } - - inline void allKeysUp() + + inline void allKeysUp() { - for (uint32_t i=0; i< E_CAMERA_MOVE_KEYS::ECMK_COUNT; ++i) + for (uint32_t i = 0; i < E_CAMERA_MOVE_KEYS::ECMK_COUNT; ++i) keysDown[i] = false; mouseDown = false; @@ -327,7 +355,7 @@ class Camera float moveSpeed, rotateSpeed; bool leftHanded, firstUpdate = true, mouseDown = false; - + std::array keysMap = { {nbl::ui::EKC_NONE} }; // map camera E_CAMERA_MOVE_KEYS to corresponding Nabla key codes, by default camera uses WSAD to move // TODO: make them use std::array bool keysDown[E_CAMERA_MOVE_KEYS::ECMK_COUNT] = {};