diff --git a/31_HLSLPathTracer/CMakeLists.txt b/31_HLSLPathTracer/CMakeLists.txt new file mode 100644 index 000000000..2e769bb18 --- /dev/null +++ b/31_HLSLPathTracer/CMakeLists.txt @@ -0,0 +1,40 @@ +include(common RESULT_VARIABLE RES) + +if(NOT RES) + message(FATAL_ERROR "common.cmake not found. Should be in {repo_root}/cmake directory") +endif() + +if(NBL_BUILD_IMGUI) + set(NBL_INCLUDE_SERACH_DIRECTORIES + "${CMAKE_CURRENT_SOURCE_DIR}/include" + ) + + list(APPEND NBL_LIBRARIES + imguizmo + "${NBL_EXT_IMGUI_UI_LIB}" + Nabla::ext::FullScreenTriangle + ) + + nbl_create_executable_project("" "" "${NBL_INCLUDE_SERACH_DIRECTORIES}" "${NBL_LIBRARIES}" "${NBL_EXECUTABLE_PROJECT_CREATION_PCH_TARGET}") + + if(NBL_EMBED_BUILTIN_RESOURCES) + set(_BR_TARGET_ ${EXECUTABLE_NAME}_builtinResourceData) + set(RESOURCE_DIR "app_resources") + + get_filename_component(_SEARCH_DIRECTORIES_ "${CMAKE_CURRENT_SOURCE_DIR}" ABSOLUTE) + get_filename_component(_OUTPUT_DIRECTORY_SOURCE_ "${CMAKE_CURRENT_BINARY_DIR}/src" ABSOLUTE) + get_filename_component(_OUTPUT_DIRECTORY_HEADER_ "${CMAKE_CURRENT_BINARY_DIR}/include" ABSOLUTE) + + file(GLOB_RECURSE BUILTIN_RESOURCE_FILES RELATIVE "${CMAKE_CURRENT_SOURCE_DIR}/${RESOURCE_DIR}" "${CMAKE_CURRENT_SOURCE_DIR}/${RESOURCE_DIR}/*") + + foreach(RES_FILE ${BUILTIN_RESOURCE_FILES}) + LIST_BUILTIN_RESOURCE(RESOURCES_TO_EMBED "${RES_FILE}") + endforeach() + + ADD_CUSTOM_BUILTIN_RESOURCES(${_BR_TARGET_} RESOURCES_TO_EMBED "${_SEARCH_DIRECTORIES_}" "${RESOURCE_DIR}" "nbl::this_example::builtin" "${_OUTPUT_DIRECTORY_HEADER_}" "${_OUTPUT_DIRECTORY_SOURCE_}") + + LINK_BUILTIN_RESOURCES_TO_TARGET(${EXECUTABLE_NAME} ${_BR_TARGET_}) + endif() +endif() + + diff --git a/31_HLSLPathTracer/app_resources/glsl/common.glsl b/31_HLSLPathTracer/app_resources/glsl/common.glsl new file mode 100644 index 000000000..6b6e96710 --- /dev/null +++ b/31_HLSLPathTracer/app_resources/glsl/common.glsl @@ -0,0 +1,837 @@ +// Copyright (C) 2018-2020 - DevSH Graphics Programming Sp. z O.O. +// This file is part of the "Nabla Engine". +// For conditions of distribution and use, see copyright notice in nabla.h + +// firefly and variance reduction techniques +//#define KILL_DIFFUSE_SPECULAR_PATHS +//#define VISUALIZE_HIGH_VARIANCE + +// debug +//#define NEE_ONLY + +layout(set = 2, binding = 0) uniform sampler2D envMap; +layout(set = 2, binding = 1) uniform usamplerBuffer sampleSequence; +layout(set = 2, binding = 2) uniform usampler2D scramblebuf; + +layout(set=0, binding=0, rgba16f) uniform image2D outImage; + +#ifndef _NBL_GLSL_WORKGROUP_SIZE_ +#define _NBL_GLSL_WORKGROUP_SIZE_ 512 +layout(local_size_x=_NBL_GLSL_WORKGROUP_SIZE_, local_size_y=1, local_size_z=1) in; +#endif + +ivec2 getCoordinates() { + ivec2 imageSize = imageSize(outImage); + return ivec2(gl_GlobalInvocationID.x % imageSize.x, gl_GlobalInvocationID.x / imageSize.x); +} + +vec2 getTexCoords() { + ivec2 imageSize = imageSize(outImage); + ivec2 iCoords = getCoordinates(); + return vec2(float(iCoords.x) / imageSize.x, 1.0 - float(iCoords.y) / imageSize.y); +} + + +#include +#include +#include +#ifdef PERSISTENT_WORKGROUPS +#include +#endif + +#include + +layout(push_constant, row_major) uniform constants +{ + mat4 invMVP; + int sampleCount; + int depth; +} PTPushConstant; + +#define INVALID_ID_16BIT 0xffffu +struct Sphere +{ + vec3 position; + float radius2; + uint bsdfLightIDs; +}; + +Sphere Sphere_Sphere(in vec3 position, in float radius, in uint bsdfID, in uint lightID) +{ + Sphere sphere; + sphere.position = position; + sphere.radius2 = radius*radius; + sphere.bsdfLightIDs = bitfieldInsert(bsdfID,lightID,16,16); + return sphere; +} + +// return intersection distance if found, nbl_glsl_FLT_NAN otherwise +float Sphere_intersect(in Sphere sphere, in vec3 origin, in vec3 direction) +{ + vec3 relOrigin = origin-sphere.position; + float relOriginLen2 = dot(relOrigin,relOrigin); + const float radius2 = sphere.radius2; + + float dirDotRelOrigin = dot(direction,relOrigin); + float det = radius2-relOriginLen2+dirDotRelOrigin*dirDotRelOrigin; + + // do some speculative math here + float detsqrt = sqrt(det); + return -dirDotRelOrigin+(relOriginLen2>radius2 ? (-detsqrt):detsqrt); +} + +vec3 Sphere_getNormal(in Sphere sphere, in vec3 position) +{ + const float radiusRcp = inversesqrt(sphere.radius2); + return (position-sphere.position)*radiusRcp; +} + +float Sphere_getSolidAngle_impl(in float cosThetaMax) +{ + return 2.0*nbl_glsl_PI*(1.0-cosThetaMax); +} +float Sphere_getSolidAngle(in Sphere sphere, in vec3 origin) +{ + float cosThetaMax = sqrt(1.0-sphere.radius2/nbl_glsl_lengthSq(sphere.position-origin)); + return Sphere_getSolidAngle_impl(cosThetaMax); +} + + +Sphere spheres[SPHERE_COUNT] = { + Sphere_Sphere(vec3(0.0,-100.5,-1.0),100.0,0u,INVALID_ID_16BIT), + Sphere_Sphere(vec3(2.0,0.0,-1.0),0.5,1u,INVALID_ID_16BIT), + Sphere_Sphere(vec3(0.0,0.0,-1.0),0.5,2u,INVALID_ID_16BIT), + Sphere_Sphere(vec3(-2.0,0.0,-1.0),0.5,3u,INVALID_ID_16BIT), + Sphere_Sphere(vec3(2.0,0.0,1.0),0.5,4u,INVALID_ID_16BIT), + Sphere_Sphere(vec3(0.0,0.0,1.0),0.5,4u,INVALID_ID_16BIT), + Sphere_Sphere(vec3(-2.0,0.0,1.0),0.5,5u,INVALID_ID_16BIT), + Sphere_Sphere(vec3(0.5,1.0,0.5),0.5,6u,INVALID_ID_16BIT) +#if SPHERE_COUNT>8 + ,Sphere_Sphere(vec3(-1.5,1.5,0.0),0.3,INVALID_ID_16BIT,0u) +#endif +}; + + +struct Triangle +{ + vec3 vertex0; + uint bsdfLightIDs; + vec3 vertex1; + uint padding0; + vec3 vertex2; + uint padding1; +}; + +Triangle Triangle_Triangle(in mat3 vertices, in uint bsdfID, in uint lightID) +{ + Triangle tri; + tri.vertex0 = vertices[0]; + tri.vertex1 = vertices[1]; + tri.vertex2 = vertices[2]; + // + tri.bsdfLightIDs = bitfieldInsert(bsdfID, lightID, 16, 16); + return tri; +} + +// return intersection distance if found, nbl_glsl_FLT_NAN otherwise +float Triangle_intersect(in Triangle tri, in vec3 origin, in vec3 direction) +{ + const vec3 edges[2] = vec3[2](tri.vertex1-tri.vertex0,tri.vertex2-tri.vertex0); + + const vec3 h = cross(direction,edges[1]); + const float a = dot(edges[0],h); + + const vec3 relOrigin = origin-tri.vertex0; + + const float u = dot(relOrigin,h)/a; + + const vec3 q = cross(relOrigin,edges[0]); + const float v = dot(direction,q)/a; + + const float t = dot(edges[1],q)/a; + + return t>0.f&&u>=0.f&&v>=0.f&&(u+v)<=1.f ? t:nbl_glsl_FLT_NAN; +} + +vec3 Triangle_getNormalTimesArea_impl(in mat2x3 edges) +{ + return cross(edges[0],edges[1])*0.5; +} +vec3 Triangle_getNormalTimesArea(in Triangle tri) +{ + return Triangle_getNormalTimesArea_impl(mat2x3(tri.vertex1-tri.vertex0,tri.vertex2-tri.vertex0)); +} + + + +struct Rectangle +{ + vec3 offset; + uint bsdfLightIDs; + vec3 edge0; + uint padding0; + vec3 edge1; + uint padding1; +}; + +Rectangle Rectangle_Rectangle(in vec3 offset, in vec3 edge0, in vec3 edge1, in uint bsdfID, in uint lightID) +{ + Rectangle rect; + rect.offset = offset; + rect.edge0 = edge0; + rect.edge1 = edge1; + // + rect.bsdfLightIDs = bitfieldInsert(bsdfID, lightID, 16, 16); + return rect; +} + +void Rectangle_getNormalBasis(in Rectangle rect, out mat3 basis, out vec2 extents) +{ + extents = vec2(length(rect.edge0), length(rect.edge1)); + basis[0] = rect.edge0/extents[0]; + basis[1] = rect.edge1/extents[1]; + basis[2] = normalize(cross(basis[0],basis[1])); +} + +// return intersection distance if found, nbl_glsl_FLT_NAN otherwise +float Rectangle_intersect(in Rectangle rect, in vec3 origin, in vec3 direction) +{ + const vec3 h = cross(direction,rect.edge1); + const float a = dot(rect.edge0,h); + + const vec3 relOrigin = origin-rect.offset; + + const float u = dot(relOrigin,h)/a; + + const vec3 q = cross(relOrigin,rect.edge0); + const float v = dot(direction,q)/a; + + const float t = dot(rect.edge1,q)/a; + + const bool intersection = t>0.f&&u>=0.f&&v>=0.f&&u<=1.f&&v<=1.f; + return intersection ? t:nbl_glsl_FLT_NAN; +} + +vec3 Rectangle_getNormalTimesArea(in Rectangle rect) +{ + return cross(rect.edge0,rect.edge1); +} + + + +#define DIFFUSE_OP 0u +#define CONDUCTOR_OP 1u +#define DIELECTRIC_OP 2u +#define OP_BITS_OFFSET 0 +#define OP_BITS_SIZE 2 +struct BSDFNode +{ + uvec4 data[2]; +}; + +uint BSDFNode_getType(in BSDFNode node) +{ + return bitfieldExtract(node.data[0].w,OP_BITS_OFFSET,OP_BITS_SIZE); +} +bool BSDFNode_isBSDF(in BSDFNode node) +{ + return BSDFNode_getType(node)==DIELECTRIC_OP; +} +bool BSDFNode_isNotDiffuse(in BSDFNode node) +{ + return BSDFNode_getType(node)!=DIFFUSE_OP; +} +float BSDFNode_getRoughness(in BSDFNode node) +{ + return uintBitsToFloat(node.data[1].w); +} +vec3 BSDFNode_getRealEta(in BSDFNode node) +{ + return uintBitsToFloat(node.data[0].rgb); +} +vec3 BSDFNode_getImaginaryEta(in BSDFNode node) +{ + return uintBitsToFloat(node.data[1].rgb); +} +mat2x3 BSDFNode_getEta(in BSDFNode node) +{ + return mat2x3(BSDFNode_getRealEta(node),BSDFNode_getImaginaryEta(node)); +} +#include +vec3 BSDFNode_getReflectance(in BSDFNode node, in float VdotH) +{ + const vec3 albedoOrRealIoR = uintBitsToFloat(node.data[0].rgb); + if (BSDFNode_isNotDiffuse(node)) + return nbl_glsl_fresnel_conductor(albedoOrRealIoR, BSDFNode_getImaginaryEta(node), VdotH); + else + return albedoOrRealIoR; +} + +float BSDFNode_getNEEProb(in BSDFNode bsdf) +{ + const float alpha = BSDFNode_isNotDiffuse(bsdf) ? BSDFNode_getRoughness(bsdf):1.0; + return min(8.0*alpha,1.0); +} + +#include +#include +float getLuma(in vec3 col) +{ + return dot(transpose(nbl_glsl_scRGBtoXYZ)[1],col); +} + +#define BSDF_COUNT 7 +BSDFNode bsdfs[BSDF_COUNT] = { + {{uvec4(floatBitsToUint(vec3(0.8,0.8,0.8)),DIFFUSE_OP),floatBitsToUint(vec4(0.0,0.0,0.0,0.0))}}, + {{uvec4(floatBitsToUint(vec3(0.8,0.4,0.4)),DIFFUSE_OP),floatBitsToUint(vec4(0.0,0.0,0.0,0.0))}}, + {{uvec4(floatBitsToUint(vec3(0.4,0.8,0.4)),DIFFUSE_OP),floatBitsToUint(vec4(0.0,0.0,0.0,0.0))}}, + {{uvec4(floatBitsToUint(vec3(1.02,1.02,1.3)),CONDUCTOR_OP),floatBitsToUint(vec4(1.0,1.0,2.0,0.0))}}, + {{uvec4(floatBitsToUint(vec3(1.02,1.3,1.02)),CONDUCTOR_OP),floatBitsToUint(vec4(1.0,2.0,1.0,0.0))}}, + {{uvec4(floatBitsToUint(vec3(1.02,1.3,1.02)),CONDUCTOR_OP),floatBitsToUint(vec4(1.0,2.0,1.0,0.15))}}, + {{uvec4(floatBitsToUint(vec3(1.4,1.45,1.5)),DIELECTRIC_OP),floatBitsToUint(vec4(0.0,0.0,0.0,0.0625))}} +}; + + +struct Light +{ + vec3 radiance; + uint objectID; +}; + +vec3 Light_getRadiance(in Light light) +{ + return light.radiance; +} +uint Light_getObjectID(in Light light) +{ + return light.objectID; +} + + +#define LIGHT_COUNT 1 +float scene_getLightChoicePdf(in Light light) +{ + return 1.0/float(LIGHT_COUNT); +} + + +#define LIGHT_COUNT 1 +Light lights[LIGHT_COUNT] = +{ + { + vec3(30.0,25.0,15.0), +#ifdef POLYGON_METHOD + 0u +#else + 8u +#endif + } +}; + + + +#define ANY_HIT_FLAG (-2147483648) +#define DEPTH_BITS_COUNT 8 +#define DEPTH_BITS_OFFSET (31-DEPTH_BITS_COUNT) +struct ImmutableRay_t +{ + vec3 origin; + vec3 direction; +#if POLYGON_METHOD==2 + vec3 normalAtOrigin; + bool wasBSDFAtOrigin; +#endif +}; +struct MutableRay_t +{ + float intersectionT; + uint objectID; + /* irrelevant here + uint triangleID; + vec2 barycentrics; + */ +}; +struct Payload_t +{ + vec3 accumulation; + float otherTechniqueHeuristic; + vec3 throughput; + #ifdef KILL_DIFFUSE_SPECULAR_PATHS + bool hasDiffuse; + #endif +}; + +struct Ray_t +{ + ImmutableRay_t _immutable; + MutableRay_t _mutable; + Payload_t _payload; +}; + + +#define INTERSECTION_ERROR_BOUND_LOG2 (-8.0) +float getTolerance_common(in uint depth) +{ + float depthRcp = 1.0/float(depth); + return INTERSECTION_ERROR_BOUND_LOG2;// *depthRcp*depthRcp; +} +float getStartTolerance(in uint depth) +{ + return exp2(getTolerance_common(depth)); +} +float getEndTolerance(in uint depth) +{ + return 1.0-exp2(getTolerance_common(depth)+1.0); +} + + +vec2 SampleSphericalMap(vec3 v) +{ + vec2 uv = vec2(atan(v.z, v.x), asin(v.y)); + uv *= nbl_glsl_RECIPROCAL_PI*0.5; + uv += 0.5; + return uv; +} + +void missProgram(in ImmutableRay_t _immutable, inout Payload_t _payload) +{ + vec3 finalContribution = _payload.throughput; + // #define USE_ENVMAP +#ifdef USE_ENVMAP + vec2 uv = SampleSphericalMap(_immutable.direction); + finalContribution *= textureLod(envMap, uv, 0.0).rgb; +#else + const vec3 kConstantEnvLightRadiance = vec3(0.15, 0.21, 0.3); + finalContribution *= kConstantEnvLightRadiance; + _payload.accumulation += finalContribution; +#endif +} + +#include +#include +#include +#include +#include +#include +#include +nbl_glsl_LightSample nbl_glsl_bsdf_cos_generate(in nbl_glsl_AnisotropicViewSurfaceInteraction interaction, in vec3 u, in BSDFNode bsdf, in float monochromeEta, out nbl_glsl_AnisotropicMicrofacetCache _cache) +{ + const float a = BSDFNode_getRoughness(bsdf); + const mat2x3 ior = BSDFNode_getEta(bsdf); + + // fresnel stuff for dielectrics + float orientedEta, rcpOrientedEta; + const bool viewerInsideMedium = nbl_glsl_getOrientedEtas(orientedEta,rcpOrientedEta,interaction.isotropic.NdotV,monochromeEta); + + nbl_glsl_LightSample smpl; + nbl_glsl_AnisotropicMicrofacetCache dummy; + switch (BSDFNode_getType(bsdf)) + { + case DIFFUSE_OP: + smpl = nbl_glsl_oren_nayar_cos_generate(interaction,u.xy,a*a); + break; + case CONDUCTOR_OP: + smpl = nbl_glsl_ggx_cos_generate(interaction,u.xy,a,a,_cache); + break; + default: + smpl = nbl_glsl_ggx_dielectric_cos_generate(interaction,u,a,a,monochromeEta,_cache); + break; + } + return smpl; +} + +vec3 nbl_glsl_bsdf_cos_remainder_and_pdf(out float pdf, in nbl_glsl_LightSample _sample, in nbl_glsl_AnisotropicViewSurfaceInteraction interaction, in BSDFNode bsdf, in float monochromeEta, in nbl_glsl_AnisotropicMicrofacetCache _cache) +{ + // are V and L on opposite sides of the surface? + const bool transmitted = nbl_glsl_isTransmissionPath(interaction.isotropic.NdotV,_sample.NdotL); + + // is the BSDF or BRDF, if it is then we make the dot products `abs` before `max(,0.0)` + const bool transmissive = BSDFNode_isBSDF(bsdf); + const float clampedNdotL = nbl_glsl_conditionalAbsOrMax(transmissive,_sample.NdotL,0.0); + const float clampedNdotV = nbl_glsl_conditionalAbsOrMax(transmissive,interaction.isotropic.NdotV,0.0); + + vec3 remainder; + + const float minimumProjVectorLen = 0.00000001; + if (clampedNdotV>minimumProjVectorLen && clampedNdotL>minimumProjVectorLen) + { + // fresnel stuff for conductors (but reflectance also doubles as albedo) + const mat2x3 ior = BSDFNode_getEta(bsdf); + const vec3 reflectance = BSDFNode_getReflectance(bsdf,_cache.isotropic.VdotH); + + // fresnel stuff for dielectrics + float orientedEta, rcpOrientedEta; + const bool viewerInsideMedium = nbl_glsl_getOrientedEtas(orientedEta,rcpOrientedEta,interaction.isotropic.NdotV,monochromeEta); + + // + const float VdotL = dot(interaction.isotropic.V.dir,_sample.L); + + // + const float a = max(BSDFNode_getRoughness(bsdf),0.0001); // TODO: @Crisspl 0-roughness still doesn't work! Also Beckmann has a weird dark rim instead as fresnel!? + const float a2 = a*a; + + // TODO: refactor into Material Compiler-esque thing + switch (BSDFNode_getType(bsdf)) + { + case DIFFUSE_OP: + remainder = reflectance*nbl_glsl_oren_nayar_cos_remainder_and_pdf_wo_clamps(pdf,a*a,VdotL,clampedNdotL,clampedNdotV); + break; + case CONDUCTOR_OP: + remainder = nbl_glsl_ggx_cos_remainder_and_pdf_wo_clamps(pdf,nbl_glsl_ggx_trowbridge_reitz(a2,_cache.isotropic.NdotH2),clampedNdotL,_sample.NdotL2,clampedNdotV,interaction.isotropic.NdotV_squared,reflectance,a2); + break; + default: + remainder = vec3(nbl_glsl_ggx_dielectric_cos_remainder_and_pdf(pdf, _sample, interaction.isotropic, _cache.isotropic, monochromeEta, a*a)); + break; + } + } + else + remainder = vec3(0.0); + return remainder; +} + +layout (constant_id = 0) const int MAX_DEPTH_LOG2 = 4; +layout (constant_id = 1) const int MAX_SAMPLES_LOG2 = 10; + + +#include + +mat2x3 rand3d(in uint protoDimension, in uint _sample, inout nbl_glsl_xoroshiro64star_state_t scramble_state) +{ + mat2x3 retval; + uint address = bitfieldInsert(protoDimension,_sample,MAX_DEPTH_LOG2,MAX_SAMPLES_LOG2); + for (int i=0; i<2u; i++) + { + uvec3 seqVal = texelFetch(sampleSequence,int(address)+i).xyz; + seqVal ^= uvec3(nbl_glsl_xoroshiro64star(scramble_state),nbl_glsl_xoroshiro64star(scramble_state),nbl_glsl_xoroshiro64star(scramble_state)); + retval[i] = vec3(seqVal)*uintBitsToFloat(0x2f800004u); + } + return retval; +} + + +void traceRay_extraShape(inout int objectID, inout float intersectionT, in vec3 origin, in vec3 direction); +int traceRay(inout float intersectionT, in vec3 origin, in vec3 direction) +{ + const bool anyHit = intersectionT!=nbl_glsl_FLT_MAX; + + int objectID = -1; + for (int i=0; i0.0 && tnbl_glsl_FLT_MIN; + // but if we allowed non-watertight transmitters (single water surface), it would make sense just to apply this line by itself + nbl_glsl_AnisotropicMicrofacetCache _cache; + validPath = validPath && nbl_glsl_calcAnisotropicMicrofacetCache(_cache, interaction, nee_sample, monochromeEta); + if (lightPdflumaContributionThreshold && traceRay(t,intersection+nee_sample.L*t*getStartTolerance(depth),nee_sample.L)==-1) + ray._payload.accumulation += neeContrib; + }} + } +#if NEE_ONLY + return false; +#endif + // sample BSDF + float bsdfPdf; vec3 bsdfSampleL; + { + nbl_glsl_AnisotropicMicrofacetCache _cache; + nbl_glsl_LightSample bsdf_sample = nbl_glsl_bsdf_cos_generate(interaction,epsilon[1],bsdf,monochromeEta,_cache); + // the value of the bsdf divided by the probability of the sample being generated + throughput *= nbl_glsl_bsdf_cos_remainder_and_pdf(bsdfPdf,bsdf_sample,interaction,bsdf,monochromeEta,_cache); + // + bsdfSampleL = bsdf_sample.L; + } + + // additional threshold + const float lumaThroughputThreshold = lumaContributionThreshold; + if (bsdfPdf>bsdfPdfThreshold && getLuma(throughput)>lumaThroughputThreshold) + { + ray._payload.throughput = throughput; + ray._payload.otherTechniqueHeuristic = neeProbability/bsdfPdf; // numerically stable, don't touch + ray._payload.otherTechniqueHeuristic *= ray._payload.otherTechniqueHeuristic; + + // trace new ray + ray._immutable.origin = intersection+bsdfSampleL*(1.0/*kSceneSize*/)*getStartTolerance(depth); + ray._immutable.direction = bsdfSampleL; + #if POLYGON_METHOD==2 + ray._immutable.normalAtOrigin = interaction.isotropic.N; + ray._immutable.wasBSDFAtOrigin = isBSDF; + #endif + return true; + } + } + return false; +} + +void main() +{ + const ivec2 imageExtents = imageSize(outImage); + +#ifdef PERSISTENT_WORKGROUPS + uint virtualThreadIndex; + for (uint virtualThreadBase = gl_WorkGroupID.x * _NBL_GLSL_WORKGROUP_SIZE_; virtualThreadBase < 1920*1080; virtualThreadBase += gl_NumWorkGroups.x * _NBL_GLSL_WORKGROUP_SIZE_) // not sure why 1280*720 doesn't cover draw surface + { + virtualThreadIndex = virtualThreadBase + gl_LocalInvocationIndex.x; + const ivec2 coords = ivec2(nbl_glsl_morton_decode2d32b(virtualThreadIndex)); +#else + const ivec2 coords = getCoordinates(); +#endif + + vec2 texCoord = vec2(coords) / vec2(imageExtents); + texCoord.y = 1.0 - texCoord.y; + + if (false == (all(lessThanEqual(ivec2(0),coords)) && all(greaterThan(imageExtents,coords)))) { +#ifdef PERSISTENT_WORKGROUPS + continue; +#else + return; +#endif + } + + if (((PTPushConstant.depth-1)>>MAX_DEPTH_LOG2)>0 || ((PTPushConstant.sampleCount-1)>>MAX_SAMPLES_LOG2)>0) + { + vec4 pixelCol = vec4(1.0,0.0,0.0,1.0); + imageStore(outImage, coords, pixelCol); +#ifdef PERSISTENT_WORKGROUPS + continue; +#else + return; +#endif + } + + nbl_glsl_xoroshiro64star_state_t scramble_start_state = texelFetch(scramblebuf,coords,0).rg; + const vec2 pixOffsetParam = vec2(1.0)/vec2(textureSize(scramblebuf,0)); + + + const mat4 invMVP = PTPushConstant.invMVP; + + vec4 NDC = vec4(texCoord*vec2(2.0,-2.0)+vec2(-1.0,1.0),0.0,1.0); + vec3 camPos; + { + vec4 tmp = invMVP*NDC; + camPos = tmp.xyz/tmp.w; + NDC.z = 1.0; + } + + vec3 color = vec3(0.0); + float meanLumaSquared = 0.0; + // TODO: if we collapse the nested for loop, then all GPUs will get `PTPushConstant.depth` factor speedup, not just NV with separate PC + for (int i=0; i5.0) + color = vec3(1.0,0.0,0.0); + #endif + + vec4 pixelCol = vec4(color, 1.0); + imageStore(outImage, coords, pixelCol); + +#ifdef PERSISTENT_WORKGROUPS + } +#endif +} +/** TODO: Improving Rendering + +Now: +- Always MIS (path correlated reuse) +- Test MIS alpha (roughness) scheme + +Many Lights: +- Path Guiding +- Light Importance Lists/Classification +- Spatio-Temporal Reservoir Sampling + +Indirect Light: +- Bidirectional Path Tracing +- Uniform Path Sampling / Vertex Connection and Merging / Path Space Regularization + +Animations: +- A-SVGF / BMFR +**/ \ No newline at end of file diff --git a/31_HLSLPathTracer/app_resources/glsl/litByRectangle.comp b/31_HLSLPathTracer/app_resources/glsl/litByRectangle.comp new file mode 100644 index 000000000..d898655c4 --- /dev/null +++ b/31_HLSLPathTracer/app_resources/glsl/litByRectangle.comp @@ -0,0 +1,182 @@ +// Copyright (C) 2018-2020 - DevSH Graphics Programming Sp. z O.O. +// This file is part of the "Nabla Engine". +// For conditions of distribution and use, see copyright notice in nabla.h + +#version 430 core +#extension GL_GOOGLE_include_directive : require + +#define SPHERE_COUNT 8 +#define POLYGON_METHOD 1 // 0 area sampling, 1 solid angle sampling, 2 approximate projected solid angle sampling +#include "app_resources/glsl/common.glsl" + +#define RECTANGLE_COUNT 1 +const vec3 edge0 = normalize(vec3(2,0,-1)); +const vec3 edge1 = normalize(vec3(2,-5,4)); +Rectangle rectangles[RECTANGLE_COUNT] = { + Rectangle_Rectangle(vec3(-3.8,0.35,1.3),edge0*7.0,edge1*0.1,INVALID_ID_16BIT,0u) +}; + + +void traceRay_extraShape(inout int objectID, inout float intersectionT, in vec3 origin, in vec3 direction) +{ + for (int i=0; i0.0 && t +#include +#include + +float nbl_glsl_light_deferred_pdf(in Light light, in Ray_t ray) +{ + const Rectangle rect = rectangles[Light_getObjectID(light)]; + + const ImmutableRay_t _immutable = ray._immutable; + const vec3 L = _immutable.direction; +#if POLYGON_METHOD==0 + const float dist = ray._mutable.intersectionT; + return dist*dist/abs(dot(Rectangle_getNormalTimesArea(rect),L)); +#else + #ifdef TRIANGLE_REFERENCE + const mat3 sphericalVertices[2] = + { + nbl_glsl_shapes_getSphericalTriangle(mat3(rect.offset,rect.offset+rect.edge0,rect.offset+rect.edge1),_immutable.origin), + nbl_glsl_shapes_getSphericalTriangle(mat3(rect.offset+rect.edge1,rect.offset+rect.edge0,rect.offset+rect.edge0+rect.edge1),_immutable.origin) + }; + float solidAngle[2]; + vec3 cos_vertices[2],sin_vertices[2]; + float cos_a[2],cos_c[2],csc_b[2],csc_c[2]; + for (uint i=0u; i<2u; i++) + solidAngle[i] = nbl_glsl_shapes_SolidAngleOfTriangle(sphericalVertices[i],cos_vertices[i],sin_vertices[i],cos_a[i],cos_c[i],csc_b[i],csc_c[i]); + const float rectSolidAngle = solidAngle[0]+solidAngle[1]; + #if POLYGON_METHOD==1 + return 1.f/rectSolidAngle; + #elif POLYGON_METHOD==2 + // TODO: figure out what breaks for a directly visible light under MIS + if (rectSolidAngle > nbl_glsl_FLT_MIN) + { + const vec2 bary = nbl_glsl_barycentric_reconstructBarycentrics(L*ray._mutable.intersectionT+_immutable.origin-rect.offset,mat2x3(rect.edge0,rect.edge1)); + const uint i = bary.x>=0.f&&bary.y>=0.f&&(bary.x+bary.y)<=1.f ? 0u:1u; + + float pdf = nbl_glsl_sampling_probProjectedSphericalTriangleSample(solidAngle[i],cos_vertices[i],sin_vertices[i],cos_a[i],cos_c[i],csc_b[i],csc_c[i],sphericalVertices[i],_immutable.normalAtOrigin,_immutable.wasBSDFAtOrigin,L); + pdf *= solidAngle[i]/rectSolidAngle; + return pdf; + } + else + return nbl_glsl_FLT_INF; + #endif + #else + float pdf; + mat3 rectNormalBasis; + vec2 rectExtents; + Rectangle_getNormalBasis(rect, rectNormalBasis, rectExtents); + vec3 sphR0 = nbl_glsl_shapes_getSphericalRectangle(_immutable.origin, rect.offset, rectNormalBasis); + float solidAngle = nbl_glsl_shapes_SolidAngleOfRectangle(sphR0, rectExtents); + if (solidAngle > nbl_glsl_FLT_MIN) + { + #if POLYGON_METHOD==1 + pdf = 1.f/solidAngle; + #else + #error + #endif + } + else + pdf = nbl_glsl_FLT_INF; + return pdf; + #endif +#endif +} + +vec3 nbl_glsl_light_generate_and_pdf(out float pdf, out float newRayMaxT, in vec3 origin, in nbl_glsl_AnisotropicViewSurfaceInteraction interaction, in bool isBSDF, in vec3 xi, in uint objectID) +{ + const Rectangle rect = rectangles[objectID]; + const vec3 N = Rectangle_getNormalTimesArea(rect); + + const vec3 origin2origin = rect.offset-origin; +#if POLYGON_METHOD==0 + vec3 L = origin2origin+rect.edge0*xi.x+rect.edge1*xi.y; // TODO: refactor + + const float distanceSq = dot(L,L); + const float rcpDistance = inversesqrt(distanceSq); + L *= rcpDistance; + + pdf = distanceSq/abs(dot(N,L)); + newRayMaxT = 1.0/rcpDistance; + return L; +#else + #ifdef TRIANGLE_REFERENCE + const mat3 sphericalVertices[2] = + { + nbl_glsl_shapes_getSphericalTriangle(mat3(rect.offset,rect.offset+rect.edge0,rect.offset+rect.edge1),origin), + nbl_glsl_shapes_getSphericalTriangle(mat3(rect.offset+rect.edge1,rect.offset+rect.edge0,rect.offset+rect.edge0+rect.edge1),origin) + }; + float solidAngle[2]; + vec3 cos_vertices[2],sin_vertices[2]; + float cos_a[2],cos_c[2],csc_b[2],csc_c[2]; + for (uint i=0u; i<2u; i++) + solidAngle[i] = nbl_glsl_shapes_SolidAngleOfTriangle(sphericalVertices[i],cos_vertices[i],sin_vertices[i],cos_a[i],cos_c[i],csc_b[i],csc_c[i]); + vec3 L = vec3(0.f,0.f,0.f); + const float rectangleSolidAngle = solidAngle[0]+solidAngle[1]; + if (rectangleSolidAngle > nbl_glsl_FLT_MIN) + { + float rcpTriangleChoiceProb; + const uint i = nbl_glsl_partitionRandVariable(solidAngle[0]/rectangleSolidAngle,xi.z,rcpTriangleChoiceProb) ? 1u:0u; + #if POLYGON_METHOD==1 + L = nbl_glsl_sampling_generateSphericalTriangleSample(solidAngle[i],cos_vertices[i],sin_vertices[i],cos_a[i],cos_c[i],csc_b[i],csc_c[i],sphericalVertices[i],xi.xy); + pdf = 1.f/rectangleSolidAngle; + #elif POLYGON_METHOD==2 + float rcpPdf; + L = nbl_glsl_sampling_generateProjectedSphericalTriangleSample(rcpPdf,solidAngle[i],cos_vertices[i],sin_vertices[i],cos_a[i],cos_c[i],csc_b[i],csc_c[i],sphericalVertices[i],interaction.isotropic.N,isBSDF,xi.xy); + pdf = 1.f/(rcpPdf*rcpTriangleChoiceProb); + #endif + } + else + pdf = nbl_glsl_FLT_INF; + #else + mat3 rectNormalBasis; + vec2 rectExtents; + Rectangle_getNormalBasis(rect, rectNormalBasis, rectExtents); + vec3 sphR0 = nbl_glsl_shapes_getSphericalRectangle(origin, rect.offset, rectNormalBasis); + vec3 L = vec3(0.f,0.f,0.f); + float solidAngle; + vec2 sphUv = nbl_glsl_sampling_generateSphericalRectangleSample(sphR0, rectExtents, xi.xy, solidAngle); + if (solidAngle > nbl_glsl_FLT_MIN) + { + #if POLYGON_METHOD==1 + vec3 sph_sample = sphUv[0] * rect.edge0 + sphUv[1] * rect.edge1 + rect.offset; + L = normalize(sph_sample - origin); + pdf = 1.f/solidAngle; + #else + #error + #endif + } + else + pdf = nbl_glsl_FLT_INF; + #endif + newRayMaxT = dot(N,origin2origin)/dot(N,L); + return L; +#endif +} + + +uint getBSDFLightIDAndDetermineNormal(out vec3 normal, in uint objectID, in vec3 intersection) +{ + if (objectID0.0) + { + const float rcpDistance = inversesqrt(distanceSQ); + Z *= rcpDistance; + + const float cosThetaMax = sqrt(cosThetaMax2); + const float cosTheta = mix(1.0,cosThetaMax,xi.x); + + vec3 L = Z*cosTheta; + + const float cosTheta2 = cosTheta*cosTheta; + const float sinTheta = sqrt(1.0-cosTheta2); + float sinPhi,cosPhi; + nbl_glsl_sincos(2.0*nbl_glsl_PI*xi.y-nbl_glsl_PI,sinPhi,cosPhi); + mat2x3 XY = nbl_glsl_frisvad(Z); + + L += (XY[0]*cosPhi+XY[1]*sinPhi)*sinTheta; + + newRayMaxT = (cosTheta-sqrt(cosTheta2-cosThetaMax2))/rcpDistance; + pdf = 1.0/Sphere_getSolidAngle_impl(cosThetaMax); + return L; + } + pdf = 0.0; + return vec3(0.0,0.0,0.0); +} + +uint getBSDFLightIDAndDetermineNormal(out vec3 normal, in uint objectID, in vec3 intersection) +{ + Sphere sphere = spheres[objectID]; + normal = Sphere_getNormal(sphere,intersection); + return sphere.bsdfLightIDs; +} \ No newline at end of file diff --git a/31_HLSLPathTracer/app_resources/glsl/litByTriangle.comp b/31_HLSLPathTracer/app_resources/glsl/litByTriangle.comp new file mode 100644 index 000000000..36fe522f2 --- /dev/null +++ b/31_HLSLPathTracer/app_resources/glsl/litByTriangle.comp @@ -0,0 +1,105 @@ +// Copyright (C) 2018-2020 - DevSH Graphics Programming Sp. z O.O. +// This file is part of the "Nabla Engine". +// For conditions of distribution and use, see copyright notice in nabla.h + +#version 430 core +#extension GL_GOOGLE_include_directive : require + +#define SPHERE_COUNT 8 +#define POLYGON_METHOD 1 // 0 area sampling, 1 solid angle sampling, 2 approximate projected solid angle sampling +#include "app_resources/glsl/common.glsl" + +#define TRIANGLE_COUNT 1 +Triangle triangles[TRIANGLE_COUNT] = { + Triangle_Triangle(mat3(vec3(-1.8,0.35,0.3),vec3(-1.2,0.35,0.0),vec3(-1.5,0.8,-0.3))*10.0,INVALID_ID_16BIT,0u) +}; + +void traceRay_extraShape(inout int objectID, inout float intersectionT, in vec3 origin, in vec3 direction) +{ + for (int i=0; i0.0 && t +float nbl_glsl_light_deferred_pdf(in Light light, in Ray_t ray) +{ + const Triangle tri = triangles[Light_getObjectID(light)]; + + const vec3 L = ray._immutable.direction; +#if POLYGON_METHOD==0 + const float dist = ray._mutable.intersectionT; + return dist*dist/abs(dot(Triangle_getNormalTimesArea(tri),L)); +#else + const ImmutableRay_t _immutable = ray._immutable; + const mat3 sphericalVertices = nbl_glsl_shapes_getSphericalTriangle(mat3(tri.vertex0,tri.vertex1,tri.vertex2),_immutable.origin); + #if POLYGON_METHOD==1 + const float rcpProb = nbl_glsl_shapes_SolidAngleOfTriangle(sphericalVertices); + // if `rcpProb` is NAN then the triangle's solid angle was close to 0.0 + return rcpProb>nbl_glsl_FLT_MIN ? (1.0/rcpProb):nbl_glsl_FLT_MAX; + #elif POLYGON_METHOD==2 + const float pdf = nbl_glsl_sampling_probProjectedSphericalTriangleSample(sphericalVertices,_immutable.normalAtOrigin,_immutable.wasBSDFAtOrigin,L); + // if `pdf` is NAN then the triangle's projected solid angle was close to 0.0, if its close to INF then the triangle was very small + return pdfnbl_glsl_FLT_MIN ? (1.0/rcpPdf):0.0; + + const vec3 N = Triangle_getNormalTimesArea(tri); + newRayMaxT = dot(N,tri.vertex0-origin)/dot(N,L); + return L; +#endif +} + + +uint getBSDFLightIDAndDetermineNormal(out vec3 normal, in uint objectID, in vec3 intersection) +{ + if (objectID +struct Payload +{ + using this_t = Payload; + using scalar_type = T; + using spectral_type = vector; + + spectral_type accumulation; + scalar_type otherTechniqueHeuristic; + spectral_type throughput; + // #ifdef KILL_DIFFUSE_SPECULAR_PATHS + // bool hasDiffuse; + // #endif +}; + +enum NEEPolygonMethod : uint16_t +{ + PPM_AREA, + PPM_SOLID_ANGLE, + PPM_APPROX_PROJECTED_SOLID_ANGLE +}; + +struct ObjectID +{ + static ObjectID create(uint16_t id, ProceduralShapeType shapeType) + { + ObjectID retval; + retval.id = id; + retval.shapeType = shapeType; + return retval; + } + + NBL_CONSTEXPR_STATIC_INLINE uint16_t INVALID_ID = 0x3fffu; + + uint16_t id : 14u; + ProceduralShapeType shapeType : 2u; +}; + +struct LightID +{ + NBL_CONSTEXPR_STATIC_INLINE uint16_t INVALID_ID = 0xffffu; + + uint16_t id; +}; + +struct MaterialID +{ + NBL_CONSTEXPR_STATIC_INLINE uint16_t INVALID_ID = 0xffffu; + + uint16_t id; +}; + +template +struct Ray +{ + using this_t = Ray; + using payload_type = Payload; + using scalar_type = typename payload_type::scalar_type; + using vector3_type = vector; + + // immutable + vector3_type origin; + vector3_type direction; + + // mutable + scalar_type intersectionT; + + payload_type payload; + using spectral_type = typename payload_type::spectral_type; + + void init(const vector3_type _origin, const vector3_type _direction) + { + origin = _origin; + direction = _direction; + } + + template + void setInteraction(NBL_CONST_REF_ARG(Interaction) interaction) + { + // empty, only for projected solid angle + } + + void initPayload() + { + payload.accumulation = hlsl::promote(0.0); + payload.otherTechniqueHeuristic = scalar_type(0.0); // needed for direct eye-light paths + payload.throughput = hlsl::promote(1.0); + } + + bool shouldDoMIS() + { + return payload.otherTechniqueHeuristic > numeric_limits::min; + } + + scalar_type foundEmissiveMIS(scalar_type pdfSq) + { + return scalar_type(1.0) / (scalar_type(1.0) + pdfSq * payload.otherTechniqueHeuristic); + } + + void addPayloadContribution(const spectral_type contribution) + { + payload.accumulation += contribution; + } + spectral_type getPayloadAccumulatiion() { return payload.accumulation; } + + void updateThroughputAndMISWeights(const spectral_type throughput, const scalar_type otherTechniqueHeuristic) + { + payload.throughput = throughput; + payload.otherTechniqueHeuristic = otherTechniqueHeuristic; + } + + void setT(scalar_type t) { intersectionT = t; } + scalar_type getT() NBL_CONST_MEMBER_FUNC { return intersectionT; } + + spectral_type getPayloadThroughput() NBL_CONST_MEMBER_FUNC { return payload.throughput; } +}; + +template +struct Ray +{ + using this_t = Ray; + using payload_type = Payload; + using scalar_type = typename payload_type::scalar_type; + using vector3_type = vector; + + // immutable + vector3_type origin; + vector3_type direction; + + vector3_type normalAtOrigin; + bool wasBSDFAtOrigin; + + // mutable + scalar_type intersectionT; + + payload_type payload; + using spectral_type = typename payload_type::spectral_type; + + void init(const vector3_type _origin, const vector3_type _direction) + { + origin = _origin; + direction = _direction; + } + + template + void setInteraction(NBL_CONST_REF_ARG(Interaction) interaction) + { + normalAtOrigin = interaction.getN(); + wasBSDFAtOrigin = interaction.isMaterialBSDF(); + } + + void initPayload() + { + payload.accumulation = hlsl::promote(0.0); + payload.otherTechniqueHeuristic = scalar_type(0.0); // needed for direct eye-light paths + payload.throughput = hlsl::promote(1.0); + } + + bool shouldDoMIS() + { + return payload.otherTechniqueHeuristic > numeric_limits::min; + } + + scalar_type foundEmissiveMIS(scalar_type pdfSq) + { + return scalar_type(1.0) / (scalar_type(1.0) + pdfSq * payload.otherTechniqueHeuristic); + } + + void addPayloadContribution(const vector3_type contribution) + { + payload.accumulation += contribution; + } + vector3_type getPayloadAccumulatiion() { return payload.accumulation; } + + void updateThroughputAndMISWeights(const vector3_type throughput, const scalar_type otherTechniqueHeuristic) + { + payload.throughput = throughput; + payload.otherTechniqueHeuristic = otherTechniqueHeuristic; + } + + void setT(scalar_type t) { intersectionT = t; } + scalar_type getT() NBL_CONST_MEMBER_FUNC { return intersectionT; } + + vector3_type getPayloadThroughput() NBL_CONST_MEMBER_FUNC { return payload.throughput; } +}; + +template +struct Light +{ + using spectral_type = Spectrum; + + static Light create(uint32_t emissiveMatID, uint32_t objId, ProceduralShapeType shapeType) + { + Light retval; + retval.emissiveMatID.id = uint16_t(emissiveMatID); + retval.objectID = ObjectID::create(uint16_t(objId), shapeType); + return retval; + } + + static Light create(uint32_t emissiveMatID, NBL_CONST_REF_ARG(ObjectID) objectID) + { + Light retval; + retval.emissiveMatID.id = uint16_t(emissiveMatID); + retval.objectID = objectID; + return retval; + } + + MaterialID emissiveMatID; + ObjectID objectID; +}; + +template +struct Tolerance +{ + NBL_CONSTEXPR_STATIC_INLINE T INTERSECTION_ERROR_BOUND_LOG2 = -8.0; + + static T __common(uint16_t depth) + { + T depthRcp = 1.0 / T(depth); + return INTERSECTION_ERROR_BOUND_LOG2; + } + + static T getStart(uint16_t depth) + { + return nbl::hlsl::exp2(__common(depth)); + } + + static T getEnd(uint16_t depth) + { + return 1.0 - nbl::hlsl::exp2(__common(depth) + 1.0); + } + + template + static void adjust(NBL_REF_ARG(Ray) ray, const vector adjDirection, uint16_t depth) + { + ray.origin += adjDirection * ray.intersectionT * getStart(depth); + } +}; + +enum MaterialType : uint32_t +{ + DIFFUSE = 0u, + CONDUCTOR, + DIELECTRIC, + IRIDESCENT_CONDUCTOR, + IRIDESCENT_DIELECTRIC, + EMISSIVE +}; + +template) +struct SBxDFCreationParams +{ + bool is_aniso; + vector A; // roughness + Spectrum ior0; // source ior + Spectrum ior1; // destination ior + Spectrum iork; // destination iork (for iridescent only) + Scalar eta; // in most cases, eta will be calculated from ior0 and ior1; see monochromeEta in path_tracing/unidirectional.hlsl +}; + +template +struct BxDFNode +{ + using spectral_type = Spectrum; + using scalar_type = typename vector_traits::scalar_type; + using vector2_type = vector; + using params_type = SBxDFCreationParams; + + // for diffuse bxdfs + static BxDFNode create(uint32_t materialType, bool isAniso, NBL_CONST_REF_ARG(vector2_type) A, NBL_CONST_REF_ARG(spectral_type) albedo) + { + BxDFNode retval; + retval.albedo = albedo; + retval.materialType = materialType; + retval.params.is_aniso = isAniso; + retval.params.A = hlsl::max(A, hlsl::promote(1e-3)); + retval.params.ior0 = hlsl::promote(1.0); + retval.params.ior1 = hlsl::promote(1.0); + return retval; + } + + // for conductor, ior0 = eta, ior1 = etak + // for dielectric, eta = ior1/ior0 + static BxDFNode create(uint32_t materialType, bool isAniso, NBL_CONST_REF_ARG(vector2_type) A, NBL_CONST_REF_ARG(spectral_type) ior0, NBL_CONST_REF_ARG(spectral_type) ior1) + { + BxDFNode retval; + retval.albedo = hlsl::promote(1.0); + retval.materialType = materialType; + retval.params.is_aniso = isAniso; + retval.params.A = hlsl::max(A, hlsl::promote(1e-3)); + retval.params.ior0 = ior0; + retval.params.ior1 = ior1; + return retval; + } + + // for iridescent bxdfs, ior0 = thin film ior, ior1+iork1 = base mat ior (k for conductor base) + static BxDFNode create(uint32_t materialType, bool isAniso, scalar_type A, scalar_type Dinc, NBL_CONST_REF_ARG(spectral_type) ior0, NBL_CONST_REF_ARG(spectral_type) ior1, NBL_CONST_REF_ARG(spectral_type) iork1) + { + BxDFNode retval; + retval.albedo = hlsl::promote(1.0); + retval.materialType = materialType; + retval.params.is_aniso = isAniso; + retval.params.A = vector2_type(hlsl::max(A, 1e-3), Dinc); + retval.params.ior0 = ior0; + retval.params.ior1 = ior1; + retval.params.iork = iork1; + return retval; + } + + // for emissive materials + static BxDFNode create(uint32_t materialType, NBL_CONST_REF_ARG(spectral_type) radiance) + { + BxDFNode retval; + retval.albedo = radiance; + retval.materialType = materialType; + return retval; + } + + scalar_type getNEEProb() + { + const scalar_type alpha = materialType != MaterialType::DIFFUSE ? params.A[0] : 1.0; + return hlsl::min(8.0 * alpha, 1.0); + } + + spectral_type albedo; // also stores radiance for emissive + uint32_t materialType; + params_type params; +}; + + +template +struct Shape; + +template +struct Shape +{ + using scalar_type = T; + using vector3_type = vector; + + static Shape create(NBL_CONST_REF_ARG(vector3_type) position, float32_t radius2, uint32_t bsdfLightIDs) + { + Shape retval; + retval.position = position; + retval.radius2 = radius2; + retval.bsdfLightIDs = bsdfLightIDs; + return retval; + } + + static Shape create(NBL_CONST_REF_ARG(vector3_type) position, scalar_type radius, uint32_t bsdfID, uint32_t lightID) + { + uint32_t bsdfLightIDs = glsl::bitfieldInsert(bsdfID, lightID, 16, 16); + return create(position, radius * radius, bsdfLightIDs); + } + + void updateTransform(NBL_CONST_REF_ARG(float32_t3x4) m) + { + position = float3(m[0].w, m[1].w, m[2].w); + radius2 = m[0].x * m[0].x; + } + + // return intersection distance if found, nan otherwise + scalar_type intersect(NBL_CONST_REF_ARG(vector3_type) origin, NBL_CONST_REF_ARG(vector3_type) direction) + { + vector3_type relOrigin = origin - position; + scalar_type relOriginLen2 = hlsl::dot(relOrigin, relOrigin); + + scalar_type dirDotRelOrigin = hlsl::dot(direction, relOrigin); + scalar_type det = radius2 - relOriginLen2 + dirDotRelOrigin * dirDotRelOrigin; + + // do some speculative math here + scalar_type detsqrt = hlsl::sqrt(det); + return -dirDotRelOrigin + (relOriginLen2 > radius2 ? (-detsqrt) : detsqrt); + } + + vector3_type getNormal(NBL_CONST_REF_ARG(vector3_type) hitPosition) + { + const scalar_type radiusRcp = hlsl::rsqrt(radius2); + return (hitPosition - position) * radiusRcp; + } + + scalar_type getSolidAngle(NBL_CONST_REF_ARG(vector3_type) origin) + { + vector3_type dist = position - origin; + scalar_type cosThetaMax = hlsl::sqrt(1.0 - radius2 / hlsl::dot(dist, dist)); + return 2.0 * numbers::pi * (1.0 - cosThetaMax); + } + + NBL_CONSTEXPR_STATIC_INLINE uint32_t ObjSize = 5; + + vector3_type position; + float32_t radius2; + uint32_t bsdfLightIDs; +}; + +template +struct Shape +{ + using scalar_type = T; + using vector3_type = vector; + + static Shape create(NBL_CONST_REF_ARG(vector3_type) vertex0, NBL_CONST_REF_ARG(vector3_type) vertex1, NBL_CONST_REF_ARG(vector3_type) vertex2, uint32_t bsdfLightIDs) + { + Shape retval; + retval.vertex0 = vertex0; + retval.vertex1 = vertex1; + retval.vertex2 = vertex2; + retval.bsdfLightIDs = bsdfLightIDs; + return retval; + } + + static Shape create(NBL_CONST_REF_ARG(vector3_type) vertex0, NBL_CONST_REF_ARG(vector3_type) vertex1, NBL_CONST_REF_ARG(vector3_type) vertex2, uint32_t bsdfID, uint32_t lightID) + { + uint32_t bsdfLightIDs = glsl::bitfieldInsert(bsdfID, lightID, 16, 16); + return create(vertex0, vertex1, vertex2, bsdfLightIDs); + } + + void updateTransform(NBL_CONST_REF_ARG(float32_t3x4) m) + { + // Define triangle in local space + float3 localVertex0 = float3(0.0, 0.0, 0.0); + float3 localVertex1 = float3(1.0, 0.0, 0.0); + float3 localVertex2 = float3(0.0, 1.0, 0.0); + + // Transform each vertex + vertex0 = mul(m, float4(localVertex0, 1.0)).xyz; + vertex1 = mul(m, float4(localVertex1, 1.0)).xyz; + vertex2 = mul(m, float4(localVertex2, 1.0)).xyz; + } + + scalar_type intersect(NBL_CONST_REF_ARG(vector3_type) origin, NBL_CONST_REF_ARG(vector3_type) direction) + { + const vector3_type edges[2] = { vertex1 - vertex0, vertex2 - vertex0 }; + + const vector3_type h = hlsl::cross(direction, edges[1]); + const scalar_type a = hlsl::dot(edges[0], h); + + const vector3_type relOrigin = origin - vertex0; + + const scalar_type u = hlsl::dot(relOrigin, h) / a; + + const vector3_type q = hlsl::cross(relOrigin, edges[0]); + const scalar_type v = hlsl::dot(direction, q) / a; + + const scalar_type t = hlsl::dot(edges[1], q) / a; + + const bool intersection = t > 0.f && u >= 0.f && v >= 0.f && (u + v) <= 1.f; + return intersection ? t : bit_cast(numeric_limits::infinity); + } + + vector3_type getNormalTimesArea() + { + const vector3_type edges[2] = { vertex1 - vertex0, vertex2 - vertex0 }; + return hlsl::cross(edges[0], edges[1]) * 0.5f; + } + + NBL_CONSTEXPR_STATIC_INLINE uint32_t ObjSize = 10; + + vector3_type vertex0; + vector3_type vertex1; + vector3_type vertex2; + uint32_t bsdfLightIDs; +}; + +template +struct Shape +{ + using scalar_type = T; + using vector3_type = vector; + + static Shape create(NBL_CONST_REF_ARG(vector3_type) offset, NBL_CONST_REF_ARG(vector3_type) edge0, NBL_CONST_REF_ARG(vector3_type) edge1, uint32_t bsdfLightIDs) + { + Shape retval; + retval.offset = offset; + retval.edge0 = edge0; + retval.edge1 = edge1; + retval.bsdfLightIDs = bsdfLightIDs; + return retval; + } + + static Shape create(NBL_CONST_REF_ARG(vector3_type) offset, NBL_CONST_REF_ARG(vector3_type) edge0, NBL_CONST_REF_ARG(vector3_type) edge1, uint32_t bsdfID, uint32_t lightID) + { + uint32_t bsdfLightIDs = glsl::bitfieldInsert(bsdfID, lightID, 16, 16); + return create(offset, edge0, edge1, bsdfLightIDs); + } + + void updateTransform(NBL_CONST_REF_ARG(float32_t3x4) m) + { + // Define rectangle in local space + float3 localVertex0 = float3(0.0, 0.0, 0.0); + float3 localVertex1 = float3(1.0, 0.0, 0.0); + float3 localVertex2 = float3(0.0, 1.0, 0.0); + + // Transform each vertex + float3 vertex0 = mul(m, float4(localVertex0, 1.0)).xyz; + float3 vertex1 = mul(m, float4(localVertex1, 1.0)).xyz; + float3 vertex2 = mul(m, float4(localVertex2, 1.0)).xyz; + + // Extract offset and edges from transformed vertices + offset = vertex0; + edge0 = vertex1 - vertex0; + edge1 = vertex2 - vertex0; + } + + scalar_type intersect(NBL_CONST_REF_ARG(vector3_type) origin, NBL_CONST_REF_ARG(vector3_type) direction) + { + const vector3_type h = hlsl::cross(direction, edge1); + const scalar_type a = hlsl::dot(edge0, h); + + const vector3_type relOrigin = origin - offset; + + const scalar_type u = hlsl::dot(relOrigin,h)/a; + + const vector3_type q = hlsl::cross(relOrigin, edge0); + const scalar_type v = hlsl::dot(direction, q) / a; + + const scalar_type t = hlsl::dot(edge1, q) / a; + + const bool intersection = t > 0.f && u >= 0.f && v >= 0.f && u <= 1.f && v <= 1.f; + return intersection ? t : bit_cast(numeric_limits::infinity); + } + + vector3_type getNormalTimesArea() + { + return hlsl::cross(edge0, edge1); + } + + void getNormalBasis(NBL_REF_ARG(matrix) basis, NBL_REF_ARG(vector) extents) + { + extents = vector(nbl::hlsl::length(edge0), nbl::hlsl::length(edge1)); + basis[0] = edge0 / extents[0]; + basis[1] = edge1 / extents[1]; + basis[2] = nbl::hlsl::normalize(nbl::hlsl::cross(basis[0],basis[1])); + } + + NBL_CONSTEXPR_STATIC_INLINE uint32_t ObjSize = 10; + + vector3_type offset; + vector3_type edge0; + vector3_type edge1; + uint32_t bsdfLightIDs; +}; + +template && concepts::FloatingPointLikeVectorial) +struct PTIsotropicInteraction +{ + using this_t = PTIsotropicInteraction; + using ray_dir_info_type = RayDirInfo; + using scalar_type = typename RayDirInfo::scalar_type; + using vector3_type = typename RayDirInfo::vector3_type; + using spectral_type = vector3_type; + + // WARNING: Changed since GLSL, now arguments need to be normalized! + static this_t create(NBL_CONST_REF_ARG(RayDirInfo) normalizedV, const vector3_type normalizedN) + { + this_t retval; + retval.V = normalizedV; + retval.N = normalizedN; + retval.NdotV = nbl::hlsl::dot(retval.N, retval.V.getDirection()); + retval.NdotV2 = retval.NdotV * retval.NdotV; + retval.luminosityContributionHint = hlsl::promote(1.0); + + return retval; + } + + RayDirInfo getV() NBL_CONST_MEMBER_FUNC { return V; } + vector3_type getN() NBL_CONST_MEMBER_FUNC { return N; } + scalar_type getNdotV(bxdf::BxDFClampMode _clamp = bxdf::BxDFClampMode::BCM_NONE) NBL_CONST_MEMBER_FUNC + { + return bxdf::conditionalAbsOrMax(NdotV, _clamp); + } + scalar_type getNdotV2() NBL_CONST_MEMBER_FUNC { return NdotV2; } + + bxdf::PathOrigin getPathOrigin() NBL_CONST_MEMBER_FUNC { return bxdf::PathOrigin::PO_SENSOR; } + spectral_type getLuminosityContributionHint() NBL_CONST_MEMBER_FUNC { return luminosityContributionHint; } + bool isMaterialBSDF() NBL_CONST_MEMBER_FUNC { return b_isMaterialBSDF; } + + RayDirInfo V; + vector3_type N; + scalar_type NdotV; + scalar_type NdotV2; + bool b_isMaterialBSDF; + + spectral_type luminosityContributionHint; +}; + +template) +struct PTAnisotropicInteraction +{ + using this_t = PTAnisotropicInteraction; + using isotropic_interaction_type = IsotropicInteraction; + using ray_dir_info_type = typename isotropic_interaction_type::ray_dir_info_type; + using scalar_type = typename ray_dir_info_type::scalar_type; + using vector3_type = typename ray_dir_info_type::vector3_type; + using matrix3x3_type = matrix; + using spectral_type = typename isotropic_interaction_type::spectral_type; + + // WARNING: Changed since GLSL, now arguments need to be normalized! + static this_t create( + NBL_CONST_REF_ARG(isotropic_interaction_type) isotropic, + const vector3_type normalizedT, + const vector3_type normalizedB + ) + { + this_t retval; + retval.isotropic = isotropic; + + retval.T = normalizedT; + retval.B = normalizedB; + + retval.TdotV = nbl::hlsl::dot(retval.isotropic.getV().getDirection(), retval.T); + retval.BdotV = nbl::hlsl::dot(retval.isotropic.getV().getDirection(), retval.B); + + return retval; + } + static this_t create(NBL_CONST_REF_ARG(isotropic_interaction_type) isotropic, const vector3_type normalizedT) + { + return create(isotropic, normalizedT, cross(isotropic.getN(), normalizedT)); + } + static this_t create(NBL_CONST_REF_ARG(isotropic_interaction_type) isotropic) + { + vector3_type T, B; + math::frisvad(isotropic.getN(), T, B); + return create(isotropic, nbl::hlsl::normalize(T), nbl::hlsl::normalize(B)); + } + + static this_t create(NBL_CONST_REF_ARG(ray_dir_info_type) normalizedV, const vector3_type normalizedN) + { + isotropic_interaction_type isotropic = isotropic_interaction_type::create(normalizedV, normalizedN); + return create(isotropic); + } + + ray_dir_info_type getV() NBL_CONST_MEMBER_FUNC { return isotropic.getV(); } + vector3_type getN() NBL_CONST_MEMBER_FUNC { return isotropic.getN(); } + scalar_type getNdotV(bxdf::BxDFClampMode _clamp = bxdf::BxDFClampMode::BCM_NONE) NBL_CONST_MEMBER_FUNC { return isotropic.getNdotV(_clamp); } + scalar_type getNdotV2() NBL_CONST_MEMBER_FUNC { return isotropic.getNdotV2(); } + bxdf::PathOrigin getPathOrigin() NBL_CONST_MEMBER_FUNC { return isotropic.getPathOrigin(); } + spectral_type getLuminosityContributionHint() NBL_CONST_MEMBER_FUNC { return isotropic.getLuminosityContributionHint(); } + bool isMaterialBSDF() NBL_CONST_MEMBER_FUNC { return isotropic.isMaterialBSDF(); } + isotropic_interaction_type getIsotropic() NBL_CONST_MEMBER_FUNC { return isotropic; } + + vector3_type getT() NBL_CONST_MEMBER_FUNC { return T; } + vector3_type getB() NBL_CONST_MEMBER_FUNC { return B; } + scalar_type getTdotV() NBL_CONST_MEMBER_FUNC { return TdotV; } + scalar_type getTdotV2() NBL_CONST_MEMBER_FUNC { const scalar_type t = getTdotV(); return t*t; } + scalar_type getBdotV() NBL_CONST_MEMBER_FUNC { return BdotV; } + scalar_type getBdotV2() NBL_CONST_MEMBER_FUNC { const scalar_type t = getBdotV(); return t*t; } + + vector3_type getTangentSpaceV() NBL_CONST_MEMBER_FUNC { return vector3_type(TdotV, BdotV, isotropic.getNdotV()); } + matrix3x3_type getToTangentSpace() NBL_CONST_MEMBER_FUNC { return matrix3x3_type(T, B, isotropic.getN()); } + matrix3x3_type getFromTangentSpace() NBL_CONST_MEMBER_FUNC { return nbl::hlsl::transpose(matrix3x3_type(T, B, isotropic.getN())); } + + isotropic_interaction_type isotropic; + vector3_type T; + vector3_type B; + scalar_type TdotV; + scalar_type BdotV; +}; + +template +struct PTIsoConfiguration; + +#define CONF_ISO bxdf::LightSample && bxdf::surface_interactions::Isotropic && !bxdf::surface_interactions::Anisotropic && concepts::FloatingPointLikeVectorial + +template +NBL_PARTIAL_REQ_TOP(CONF_ISO) +struct PTIsoConfiguration +#undef CONF_ISO +{ + NBL_CONSTEXPR_STATIC_INLINE bool IsAnisotropic = false; + + using scalar_type = typename LS::scalar_type; + using ray_dir_info_type = typename LS::ray_dir_info_type; + using vector2_type = vector; + using vector3_type = vector; + using monochrome_type = vector; + + using isotropic_interaction_type = Interaction; + using anisotropic_interaction_type = PTAnisotropicInteraction; + using sample_type = LS; + using spectral_type = Spectrum; + using quotient_pdf_type = sampling::quotient_and_pdf; +}; + +template +struct PTIsoMicrofacetConfiguration; + +#define MICROFACET_CONF_ISO bxdf::LightSample && bxdf::surface_interactions::Isotropic && !bxdf::surface_interactions::Anisotropic && bxdf::CreatableIsotropicMicrofacetCache && !bxdf::AnisotropicMicrofacetCache && concepts::FloatingPointLikeVectorial + +template +NBL_PARTIAL_REQ_TOP(MICROFACET_CONF_ISO) +struct PTIsoMicrofacetConfiguration : PTIsoConfiguration +#undef MICROFACET_CONF_ISO +{ + NBL_CONSTEXPR_STATIC_INLINE bool IsAnisotropic = false; + + using base_type = PTIsoConfiguration; + + using matrix3x3_type = matrix; + + using isocache_type = MicrofacetCache; + using anisocache_type = bxdf::SAnisotropicMicrofacetCache; +}; + +template +struct PTMaterialSystemCache +{ + using this_t = PTMaterialSystemCache; + using anisocache_type = AnisoCache; + using isocache_type = IsoCache; + + anisocache_type aniso_cache; + + // TODO: union or serialize somehow? + DiffuseBxDF diffuseBxDF; + ConductorBxDF conductorBxDF; + DielectricBxDF dielectricBxDF; + IridescentConductorBxDF iridescentConductorBxDF; + IridescentDielectricBxDF iridescentDielectricBxDF; +}; + +#endif diff --git a/31_HLSLPathTracer/app_resources/hlsl/intersector.hlsl b/31_HLSLPathTracer/app_resources/hlsl/intersector.hlsl new file mode 100644 index 000000000..1ed93f098 --- /dev/null +++ b/31_HLSLPathTracer/app_resources/hlsl/intersector.hlsl @@ -0,0 +1,131 @@ +#ifndef _PATHTRACER_EXAMPLE_INTERSECTOR_INCLUDED_ +#define _PATHTRACER_EXAMPLE_INTERSECTOR_INCLUDED_ + +#include "example_common.hlsl" +#include + +using namespace nbl; +using namespace hlsl; + +template +struct Intersector +{ + using scalar_type = typename Ray::scalar_type; + using vector3_type = vector; + using ray_type = Ray; + using scene_type = Scene; + using object_handle_type = ObjectID; + + using anisotropic_interaction_type = AnisoInteraction; + using isotropic_interaction_type = typename anisotropic_interaction_type::isotropic_interaction_type; + using ray_dir_info_type = typename anisotropic_interaction_type::ray_dir_info_type; + + struct SIntersectData + { + using object_handle_type = object_handle_type; + using vector3_type = vector3_type; + using interaction_type = anisotropic_interaction_type; + + object_handle_type objectID; + vector3_type position; + interaction_type aniso_interaction; + vector3_type geometricNormal; + + bool foundHit() NBL_CONST_MEMBER_FUNC { return !hlsl::isnan(position.x); } + object_handle_type getObjectID() NBL_CONST_MEMBER_FUNC { return objectID; } + vector3_type getPosition() NBL_CONST_MEMBER_FUNC { return position; } + interaction_type getInteraction() NBL_CONST_MEMBER_FUNC { return aniso_interaction; } + vector3_type getGeometricNormal() NBL_CONST_MEMBER_FUNC { return geometricNormal; } + }; + using closest_hit_type = SIntersectData; + + static closest_hit_type traceClosestHit(NBL_CONST_REF_ARG(scene_type) scene, NBL_REF_ARG(ray_type) ray) + { + object_handle_type objectID; + objectID.id = object_handle_type::INVALID_ID; + + // prodedural shapes + NBL_UNROLL for (int i = 0; i < scene_type::SphereCount; i++) + { + float t = scene.getSphere(i).intersect(ray.origin, ray.direction); + + bool closerIntersection = t > 0.0 && t < ray.intersectionT; + + if (closerIntersection) + { + ray.intersectionT = t; + objectID.id = uint16_t(i); + objectID.shapeType = PST_SPHERE; + } + } + NBL_UNROLL for (int i = 0; i < scene_type::TriangleCount; i++) + { + float t = scene.getTriangle(i).intersect(ray.origin, ray.direction); + + bool closerIntersection = t > 0.0 && t < ray.intersectionT; + + if (closerIntersection) + { + ray.intersectionT = t; + objectID.id = uint16_t(i); + objectID.shapeType = PST_TRIANGLE; + } + } + NBL_UNROLL for (int i = 0; i < scene_type::RectangleCount; i++) + { + float t = scene.getRectangle(i).intersect(ray.origin, ray.direction); + + bool closerIntersection = t > 0.0 && t < ray.intersectionT; + + if (closerIntersection) + { + ray.intersectionT = t; + objectID.id = uint16_t(i); + objectID.shapeType = PST_RECTANGLE; + } + } + + closest_hit_type retval; + retval.objectID = objectID; + retval.position = hlsl::promote(bit_cast(numeric_limits::quiet_NaN)); + + bool foundHit = objectID.id != object_handle_type::INVALID_ID; + if (foundHit) + retval = scene.template getIntersection(objectID, ray); + + return retval; + } + + static scalar_type traceShadowRay(NBL_CONST_REF_ARG(scene_type) scene, NBL_REF_ARG(ray_type) ray, NBL_CONST_REF_ARG(object_handle_type) objectID) + { + // prodedural shapes + NBL_UNROLL for (int i = 0; i < scene_type::SphereCount; i++) + { + float t = scene.getSphere(i).intersect(ray.origin, ray.direction); + bool closerIntersection = t > 0.0 && t < ray.intersectionT; + + if (closerIntersection) + return 0.0; + } + NBL_UNROLL for (int i = 0; i < scene_type::TriangleCount; i++) + { + float t = scene.getTriangle(i).intersect(ray.origin, ray.direction); + bool closerIntersection = t > 0.0 && t < ray.intersectionT; + + if (closerIntersection) + return 0.0; + } + NBL_UNROLL for (int i = 0; i < scene_type::RectangleCount; i++) + { + float t = scene.getRectangle(i).intersect(ray.origin, ray.direction); + bool closerIntersection = t > 0.0 && t < ray.intersectionT; + + if (closerIntersection) + return 0.0; + } + + return 1.0; + } +}; + +#endif diff --git a/31_HLSLPathTracer/app_resources/hlsl/material_system.hlsl b/31_HLSLPathTracer/app_resources/hlsl/material_system.hlsl new file mode 100644 index 000000000..9798ad8e8 --- /dev/null +++ b/31_HLSLPathTracer/app_resources/hlsl/material_system.hlsl @@ -0,0 +1,286 @@ +#ifndef _PATHTRACER_EXAMPLE_MATERIAL_SYSTEM_INCLUDED_ +#define _PATHTRACER_EXAMPLE_MATERIAL_SYSTEM_INCLUDED_ + +#include +#include +#include + +#include "example_common.hlsl" + +using namespace nbl; +using namespace hlsl; + +template // NOTE: these bxdfs should match the ones in Scene BxDFNode +struct MaterialSystem +{ + using this_t = MaterialSystem; + using scalar_type = typename DiffuseBxDF::scalar_type; // types should be same across all 3 bxdfs + using vector2_type = vector; + using vector3_type = vector; + using material_id_type = MaterialID; + using measure_type = typename DiffuseBxDF::spectral_type; + using sample_type = typename DiffuseBxDF::sample_type; + using ray_dir_info_type = typename sample_type::ray_dir_info_type; + using quotient_pdf_type = typename DiffuseBxDF::quotient_pdf_type; + using anisotropic_interaction_type = typename DiffuseBxDF::anisotropic_interaction_type; + using isotropic_interaction_type = typename anisotropic_interaction_type::isotropic_interaction_type; + using anisocache_type = typename ConductorBxDF::anisocache_type; + using isocache_type = typename anisocache_type::isocache_type; + using cache_type = PTMaterialSystemCache; + using create_params_t = SBxDFCreationParams; + + using bxdfnode_type = BxDFNode; + using diffuse_op_type = DiffuseBxDF; + using conductor_op_type = ConductorBxDF; + using dielectric_op_type = DielectricBxDF; + using iri_conductor_op_type = IridescentConductorBxDF; + using iri_dielectric_op_type = IridescentDielectricBxDF; + + NBL_CONSTEXPR_STATIC_INLINE uint32_t IsBSDFPacked = uint32_t(bxdf::traits::type == bxdf::BT_BSDF) << uint32_t(MaterialType::DIFFUSE) | + uint32_t(bxdf::traits::type == bxdf::BT_BSDF) << uint32_t(MaterialType::CONDUCTOR) | + uint32_t(bxdf::traits::type == bxdf::BT_BSDF) << uint32_t(MaterialType::DIELECTRIC) | + uint32_t(bxdf::traits::type == bxdf::BT_BSDF) << uint32_t(MaterialType::IRIDESCENT_CONDUCTOR) | + uint32_t(bxdf::traits::type == bxdf::BT_BSDF) << uint32_t(MaterialType::IRIDESCENT_DIELECTRIC); + + bool isBSDF(material_id_type matID) + { + MaterialType matType = (MaterialType)bxdfs[matID.id].materialType; + return bool(IsBSDFPacked & (1u << matID.id)); + } + + bxdfnode_type getBxDFNode(material_id_type matID, NBL_REF_ARG(anisotropic_interaction_type) interaction) NBL_CONST_MEMBER_FUNC + { + interaction.isotropic.b_isMaterialBSDF = isBSDF(matID); + return bxdfs[matID.id]; + } + + scalar_type setMonochromeEta(material_id_type matID, measure_type throughputCIE_Y) + { + bxdfnode_type bxdf = bxdfs[matID.id]; + const measure_type eta = bxdf.params.ior1 / bxdf.params.ior0; + const scalar_type monochromeEta = hlsl::dot(throughputCIE_Y, eta) / (throughputCIE_Y.r + throughputCIE_Y.g + throughputCIE_Y.b); // TODO: imaginary eta? + bxdfs[matID.id].params.eta = monochromeEta; + return monochromeEta; + } + + cache_type getCacheFromSampleInteraction(material_id_type matID, NBL_CONST_REF_ARG(sample_type) _sample, NBL_CONST_REF_ARG(anisotropic_interaction_type) interaction) + { + const scalar_type monochromeEta = setMonochromeEta(matID, interaction.getLuminosityContributionHint()); + using monochrome_type = typename dielectric_op_type::monochrome_type; + bxdf::fresnel::OrientedEtas orientedEta = bxdf::fresnel::OrientedEtas::create(interaction.getNdotV(), hlsl::promote(monochromeEta)); + cache_type _cache; + _cache.aniso_cache = anisocache_type::template create(interaction, _sample, orientedEta); + fillBxdfParams(matID, _cache); + return _cache; + } + + // these are specific for the bxdfs used for this example + void fillBxdfParams(material_id_type matID, NBL_REF_ARG(cache_type) _cache) + { + create_params_t cparams = bxdfs[matID.id].params; + MaterialType matType = (MaterialType)bxdfs[matID.id].materialType; + switch(matType) + { + case MaterialType::DIFFUSE: + { + using creation_t = typename diffuse_op_type::creation_type; + creation_t params; + params.A = cparams.A.x; + _cache.diffuseBxDF = diffuse_op_type::create(params); + } + break; + case MaterialType::CONDUCTOR: + { + _cache.conductorBxDF.ndf = conductor_op_type::ndf_type::create(cparams.A.x); + _cache.conductorBxDF.fresnel = conductor_op_type::fresnel_type::create(cparams.ior0,cparams.ior1); + } + break; + case MaterialType::DIELECTRIC: + { + using oriented_eta_t = bxdf::fresnel::OrientedEtas; + oriented_eta_t orientedEta = oriented_eta_t::create(1.0, hlsl::promote(cparams.eta)); + _cache.dielectricBxDF.ndf = dielectric_op_type::ndf_type::create(cparams.A.x); + _cache.dielectricBxDF.fresnel = dielectric_op_type::fresnel_type::create(orientedEta); + } + break; + case MaterialType::IRIDESCENT_CONDUCTOR: + { + _cache.iridescentConductorBxDF.ndf = iri_conductor_op_type::ndf_type::create(cparams.A.x); + using creation_params_t = typename iri_conductor_op_type::fresnel_type::creation_params_type; + creation_params_t params; + params.Dinc = cparams.A.y; + params.ior1 = hlsl::promote(1.0); + params.ior2 = cparams.ior0; + params.ior3 = cparams.ior1; + params.iork3 = cparams.iork; + _cache.iridescentConductorBxDF.fresnel = iri_conductor_op_type::fresnel_type::create(params); + } + break; + case MaterialType::IRIDESCENT_DIELECTRIC: + { + _cache.iridescentDielectricBxDF.ndf = iri_dielectric_op_type::ndf_type::create(cparams.A.x); + using creation_params_t = typename iri_dielectric_op_type::fresnel_type::creation_params_type; + creation_params_t params; + params.Dinc = cparams.A.y; + params.ior1 = hlsl::promote(1.0); + params.ior2 = cparams.ior0; + params.ior3 = cparams.ior1; + _cache.iridescentDielectricBxDF.fresnel = iri_dielectric_op_type::fresnel_type::create(params); + } + break; + default: + return; + } + } + + measure_type eval(material_id_type matID, NBL_CONST_REF_ARG(sample_type) _sample, NBL_CONST_REF_ARG(anisotropic_interaction_type) interaction) + { + cache_type _cache = getCacheFromSampleInteraction(matID, _sample, interaction); + MaterialType matType = (MaterialType)bxdfs[matID.id].materialType; + switch(matType) + { + case MaterialType::DIFFUSE: + { + return bxdfs[matID.id].albedo * _cache.diffuseBxDF.eval(_sample, interaction.isotropic); + } + case MaterialType::CONDUCTOR: + { + return _cache.conductorBxDF.eval(_sample, interaction.isotropic, _cache.aniso_cache.iso_cache); + } + case MaterialType::DIELECTRIC: + { + return _cache.dielectricBxDF.eval(_sample, interaction.isotropic, _cache.aniso_cache.iso_cache); + } + case MaterialType::IRIDESCENT_CONDUCTOR: + { + return _cache.iridescentConductorBxDF.eval(_sample, interaction.isotropic, _cache.aniso_cache.iso_cache); + } + case MaterialType::IRIDESCENT_DIELECTRIC: + { + return _cache.iridescentDielectricBxDF.eval(_sample, interaction.isotropic, _cache.aniso_cache.iso_cache); + } + default: + return hlsl::promote(0.0); + } + } + + sample_type generate(material_id_type matID, NBL_CONST_REF_ARG(anisotropic_interaction_type) interaction, NBL_CONST_REF_ARG(vector3_type) u, NBL_REF_ARG(cache_type) _cache) + { + fillBxdfParams(matID, _cache); + MaterialType matType = (MaterialType)bxdfs[matID.id].materialType; + switch(matType) + { + case MaterialType::DIFFUSE: + { + return _cache.diffuseBxDF.generate(interaction, u.xy); + } + case MaterialType::CONDUCTOR: + { + return _cache.conductorBxDF.generate(interaction, u.xy, _cache.aniso_cache); + } + case MaterialType::DIELECTRIC: + { + return _cache.dielectricBxDF.generate(interaction, u, _cache.aniso_cache); + } + case MaterialType::IRIDESCENT_CONDUCTOR: + { + return _cache.iridescentConductorBxDF.generate(interaction, u.xy, _cache.aniso_cache); + } + case MaterialType::IRIDESCENT_DIELECTRIC: + { + return _cache.iridescentDielectricBxDF.generate(interaction, u, _cache.aniso_cache); + } + default: + { + ray_dir_info_type L; + L.makeInvalid(); + return sample_type::create(L, hlsl::promote(0.0)); + } + } + } + + scalar_type pdf(material_id_type matID, NBL_CONST_REF_ARG(sample_type) _sample, NBL_CONST_REF_ARG(anisotropic_interaction_type) interaction) + { + cache_type _cache = getCacheFromSampleInteraction(matID, _sample, interaction); + MaterialType matType = (MaterialType)bxdfs[matID.id].materialType; + switch(matType) + { + case MaterialType::DIFFUSE: + { + return _cache.diffuseBxDF.pdf(_sample, interaction.isotropic); + } + case MaterialType::CONDUCTOR: + { + return _cache.conductorBxDF.pdf(_sample, interaction.isotropic, _cache.aniso_cache.iso_cache); + } + case MaterialType::DIELECTRIC: + { + return _cache.dielectricBxDF.pdf(_sample, interaction.isotropic, _cache.aniso_cache.iso_cache); + } + case MaterialType::IRIDESCENT_CONDUCTOR: + { + return _cache.iridescentConductorBxDF.pdf(_sample, interaction.isotropic, _cache.aniso_cache.iso_cache); + } + case MaterialType::IRIDESCENT_DIELECTRIC: + { + return _cache.iridescentDielectricBxDF.pdf(_sample, interaction.isotropic, _cache.aniso_cache.iso_cache); + } + default: + return scalar_type(0.0); + } + } + + quotient_pdf_type quotient_and_pdf(material_id_type matID, NBL_CONST_REF_ARG(sample_type) _sample, NBL_CONST_REF_ARG(anisotropic_interaction_type) interaction, NBL_REF_ARG(cache_type) _cache) + { + const float minimumProjVectorLen = 0.00000001; // TODO: still need this check? + if (interaction.getNdotV(bxdf::BxDFClampMode::BCM_ABS) > minimumProjVectorLen && _sample.getNdotL(bxdf::BxDFClampMode::BCM_ABS) > minimumProjVectorLen) + { + MaterialType matType = (MaterialType)bxdfs[matID.id].materialType; + switch(matType) + { + case MaterialType::DIFFUSE: + { + quotient_pdf_type ret = _cache.diffuseBxDF.quotient_and_pdf(_sample, interaction.isotropic); + ret._quotient *= bxdfs[matID.id].albedo; + return ret; + } + case MaterialType::CONDUCTOR: + { + return _cache.conductorBxDF.quotient_and_pdf(_sample, interaction.isotropic, _cache.aniso_cache.iso_cache); + } + case MaterialType::DIELECTRIC: + { + return _cache.dielectricBxDF.quotient_and_pdf(_sample, interaction.isotropic, _cache.aniso_cache.iso_cache); + } + case MaterialType::IRIDESCENT_CONDUCTOR: + { + return _cache.iridescentConductorBxDF.quotient_and_pdf(_sample, interaction.isotropic, _cache.aniso_cache.iso_cache); + } + case MaterialType::IRIDESCENT_DIELECTRIC: + { + return _cache.iridescentDielectricBxDF.quotient_and_pdf(_sample, interaction.isotropic, _cache.aniso_cache.iso_cache); + } + default: + break; + } + } + return quotient_pdf_type::create(hlsl::promote(0.0), 0.0); + } + + bool hasEmission(material_id_type matID) + { + MaterialType matType = (MaterialType)bxdfs[matID.id].materialType; + return matType == MaterialType::EMISSIVE; + } + + measure_type getEmission(material_id_type matID, NBL_CONST_REF_ARG(anisotropic_interaction_type) interaction) + { + if (hasEmission(matID)) + return bxdfs[matID.id].albedo; + return hlsl::promote(0.0); + } + + bxdfnode_type bxdfs[Scene::SCENE_BXDF_COUNT]; +}; + +#endif diff --git a/31_HLSLPathTracer/app_resources/hlsl/next_event_estimator.hlsl b/31_HLSLPathTracer/app_resources/hlsl/next_event_estimator.hlsl new file mode 100644 index 000000000..33135a677 --- /dev/null +++ b/31_HLSLPathTracer/app_resources/hlsl/next_event_estimator.hlsl @@ -0,0 +1,438 @@ +#ifndef _PATHTRACER_EXAMPLE_NEXT_EVENT_ESTIMATOR_INCLUDED_ +#define _PATHTRACER_EXAMPLE_NEXT_EVENT_ESTIMATOR_INCLUDED_ + +#include "example_common.hlsl" + +using namespace nbl; +using namespace hlsl; + +template +struct ShapeSampling; + +template +struct ShapeSampling +{ + using scalar_type = T; + using vector3_type = vector; + + static ShapeSampling create(NBL_CONST_REF_ARG(Shape) sphere) + { + ShapeSampling retval; + retval.sphere = sphere; + return retval; + } + + template + scalar_type deferredPdf(NBL_CONST_REF_ARG(Ray) ray) + { + return 1.0 / sphere.getSolidAngle(ray.origin); + } + + template + vector3_type generate_and_pdf(NBL_REF_ARG(scalar_type) pdf, NBL_REF_ARG(scalar_type) newRayMaxT, NBL_CONST_REF_ARG(vector3_type) origin, NBL_CONST_REF_ARG(Aniso) interaction, NBL_CONST_REF_ARG(vector3_type) xi) + { + vector3_type Z = sphere.position - origin; + const scalar_type distanceSQ = hlsl::dot(Z,Z); + const scalar_type cosThetaMax2 = 1.0 - sphere.radius2 / distanceSQ; + if (cosThetaMax2 > 0.0) + { + const scalar_type rcpDistance = 1.0 / hlsl::sqrt(distanceSQ); + Z *= rcpDistance; + + const scalar_type cosThetaMax = hlsl::sqrt(cosThetaMax2); + const scalar_type cosTheta = hlsl::mix(1.0f, cosThetaMax, xi.x); + + vector3_type L = Z * cosTheta; + + const scalar_type cosTheta2 = cosTheta * cosTheta; + const scalar_type sinTheta = hlsl::sqrt(1.0 - cosTheta2); + scalar_type sinPhi, cosPhi; + math::sincos(2.0 * numbers::pi * xi.y - numbers::pi, sinPhi, cosPhi); + vector3_type X, Y; + math::frisvad(Z, X, Y); + + L += (X * cosPhi + Y * sinPhi) * sinTheta; + + newRayMaxT = (cosTheta - hlsl::sqrt(cosTheta2 - cosThetaMax2)) / rcpDistance; + pdf = 1.0 / (2.0 * numbers::pi * (1.0 - cosThetaMax)); + return L; + } + pdf = 0.0; + return vector3_type(0.0,0.0,0.0); + } + + Shape sphere; +}; + +template +struct ShapeSampling +{ + using scalar_type = T; + using vector3_type = vector; + + static ShapeSampling create(NBL_CONST_REF_ARG(Shape) tri) + { + ShapeSampling retval; + retval.tri = tri; + return retval; + } + + template + scalar_type deferredPdf(NBL_CONST_REF_ARG(Ray) ray) + { + const scalar_type dist = ray.intersectionT; + const vector3_type L = ray.direction; + return dist * dist / hlsl::abs(hlsl::dot(tri.getNormalTimesArea(), L)); + } + + template + vector3_type generate_and_pdf(NBL_REF_ARG(scalar_type) pdf, NBL_REF_ARG(scalar_type) newRayMaxT, NBL_CONST_REF_ARG(vector3_type) origin, NBL_CONST_REF_ARG(Aniso) interaction, NBL_CONST_REF_ARG(vector3_type) xi) + { + const vector3_type edge0 = tri.vertex1 - tri.vertex0; + const vector3_type edge1 = tri.vertex2 - tri.vertex0; + const scalar_type sqrtU = hlsl::sqrt(xi.x); + vector3_type pnt = tri.vertex0 + edge0 * (1.0 - sqrtU) + edge1 * sqrtU * xi.y; + vector3_type L = pnt - origin; + + const scalar_type distanceSq = hlsl::dot(L,L); + const scalar_type rcpDistance = 1.0 / hlsl::sqrt(distanceSq); + L *= rcpDistance; + + pdf = distanceSq / hlsl::abs(hlsl::dot(hlsl::cross(edge0, edge1) * 0.5f, L)); + newRayMaxT = 1.0 / rcpDistance; + return L; + } + + Shape tri; +}; + +template +struct ShapeSampling +{ + using scalar_type = T; + using vector3_type = vector; + + static ShapeSampling create(NBL_CONST_REF_ARG(Shape) tri) + { + ShapeSampling retval; + retval.tri = tri; + return retval; + } + + template + scalar_type deferredPdf(NBL_CONST_REF_ARG(Ray) ray) + { + const vector3_type tri_vertices[3] = {tri.vertex0, tri.vertex1, tri.vertex2}; + shapes::SphericalTriangle st = shapes::SphericalTriangle::create(tri_vertices, ray.origin); + const scalar_type rcpProb = st.solidAngle(); + // if `rcpProb` is NAN then the triangle's solid angle was close to 0.0 + return rcpProb > numeric_limits::min ? (1.0 / rcpProb) : numeric_limits::max; + } + + template + vector3_type generate_and_pdf(NBL_REF_ARG(scalar_type) pdf, NBL_REF_ARG(scalar_type) newRayMaxT, NBL_CONST_REF_ARG(vector3_type) origin, NBL_CONST_REF_ARG(Aniso) interaction, NBL_CONST_REF_ARG(vector3_type) xi) + { + const vector3_type tri_vertices[3] = {tri.vertex0, tri.vertex1, tri.vertex2}; + shapes::SphericalTriangle st = shapes::SphericalTriangle::create(tri_vertices, origin); + sampling::SphericalTriangle sst = sampling::SphericalTriangle::create(st); + + typename sampling::SphericalTriangle::cache_type cache; + const vector3_type L = sst.generate(xi.xy, cache); + + pdf = cache.pdf; + + const vector3_type N = tri.getNormalTimesArea(); + newRayMaxT = hlsl::dot(N, tri.vertex0 - origin) / hlsl::dot(N, L); + return L; + } + + Shape tri; +}; + +template +struct ShapeSampling +{ + using scalar_type = T; + using vector3_type = vector; + + static ShapeSampling create(NBL_CONST_REF_ARG(Shape) tri) + { + ShapeSampling retval; + retval.tri = tri; + return retval; + } + + template + scalar_type deferredPdf(NBL_CONST_REF_ARG(Ray) ray) + { + const vector3_type L = ray.direction; + const vector3_type tri_vertices[3] = {tri.vertex0, tri.vertex1, tri.vertex2}; + shapes::SphericalTriangle st = shapes::SphericalTriangle::create(tri_vertices, ray.origin); + sampling::ProjectedSphericalTriangle pst = sampling::ProjectedSphericalTriangle::create(st); + const scalar_type pdf = pst.backwardPdf(ray.normalAtOrigin, ray.wasBSDFAtOrigin, L); + // if `pdf` is NAN then the triangle's projected solid angle was close to 0.0, if its close to INF then the triangle was very small + return pdf < numeric_limits::max ? pdf : numeric_limits::max; + } + + template + vector3_type generate_and_pdf(NBL_REF_ARG(scalar_type) pdf, NBL_REF_ARG(scalar_type) newRayMaxT, NBL_CONST_REF_ARG(vector3_type) origin, NBL_CONST_REF_ARG(Aniso) interaction, NBL_CONST_REF_ARG(vector3_type) xi) + { + scalar_type rcpPdf; + const vector3_type tri_vertices[3] = {tri.vertex0, tri.vertex1, tri.vertex2}; + shapes::SphericalTriangle st = shapes::SphericalTriangle::create(tri_vertices, origin); + sampling::ProjectedSphericalTriangle pst = sampling::ProjectedSphericalTriangle::create(st); + + const vector3_type L = pst.generate(rcpPdf, interaction.getN(), interaction.isMaterialBSDF(), xi.xy); + + pdf = rcpPdf > numeric_limits::min ? (1.0 / rcpPdf) : numeric_limits::max; + + const vector3_type N = tri.getNormalTimesArea(); + newRayMaxT = hlsl::dot(N, tri.vertex0 - origin) / hlsl::dot(N, L); + return L; + } + + Shape tri; +}; + +template +struct ShapeSampling +{ + using scalar_type = T; + using vector3_type = vector; + + static ShapeSampling create(NBL_CONST_REF_ARG(Shape) rect) + { + ShapeSampling retval; + retval.rect = rect; + return retval; + } + + template + scalar_type deferredPdf(NBL_CONST_REF_ARG(Ray) ray) + { + const scalar_type dist = ray.intersectionT; + const vector3_type L = ray.direction; + return dist * dist / hlsl::abs(hlsl::dot(rect.getNormalTimesArea(), L)); + } + + template + vector3_type generate_and_pdf(NBL_REF_ARG(scalar_type) pdf, NBL_REF_ARG(scalar_type) newRayMaxT, NBL_CONST_REF_ARG(vector3_type) origin, NBL_CONST_REF_ARG(Aniso) interaction, NBL_CONST_REF_ARG(vector3_type) xi) + { + const vector3_type N = rect.getNormalTimesArea(); + const vector3_type origin2origin = rect.offset - origin; + + vector3_type L = origin2origin + rect.edge0 * xi.x + rect.edge1 * xi.y; + const scalar_type distSq = hlsl::dot(L, L); + const scalar_type rcpDist = 1.0 / hlsl::sqrt(distSq); + L *= rcpDist; + pdf = distSq / hlsl::abs(hlsl::dot(N, L)); + newRayMaxT = 1.0 / rcpDist; + return L; + } + + Shape rect; +}; + +template +struct ShapeSampling +{ + using scalar_type = T; + using vector3_type = vector; + + static ShapeSampling create(NBL_CONST_REF_ARG(Shape) rect) + { + ShapeSampling retval; + retval.rect = rect; + return retval; + } + + template + scalar_type deferredPdf(NBL_CONST_REF_ARG(Ray) ray) + { + scalar_type pdf; + matrix rectNormalBasis; + vector rectExtents; + rect.getNormalBasis(rectNormalBasis, rectExtents); + shapes::SphericalRectangle sphR0; + sphR0.origin = rect.offset; + sphR0.extents = rectExtents; + sphR0.basis = rectNormalBasis; + scalar_type solidAngle = sphR0.solidAngle(ray.origin); + if (solidAngle > numeric_limits::min) + pdf = 1.f / solidAngle; + else + pdf = bit_cast(numeric_limits::infinity); + return pdf; + } + + template + vector3_type generate_and_pdf(NBL_REF_ARG(scalar_type) pdf, NBL_REF_ARG(scalar_type) newRayMaxT, NBL_CONST_REF_ARG(vector3_type) origin, NBL_CONST_REF_ARG(Aniso) interaction, NBL_CONST_REF_ARG(vector3_type) xi) + { + const vector3_type N = rect.getNormalTimesArea(); + const vector3_type origin2origin = rect.offset - origin; + + matrix rectNormalBasis; + vector rectExtents; + rect.getNormalBasis(rectNormalBasis, rectExtents); + shapes::SphericalRectangle sphR0; + sphR0.origin = rect.offset; + sphR0.extents = rectExtents; + sphR0.basis = rectNormalBasis; + vector3_type L = hlsl::promote(0.0); + scalar_type solidAngle = sphR0.solidAngle(origin); + + sampling::SphericalRectangle ssph = sampling::SphericalRectangle::create(sphR0, origin); + typename sampling::SphericalRectangle::cache_type cache; + vector sphUv = ssph.generate(xi.xy, cache); + if (solidAngle > numeric_limits::min) + { + vector3_type sph_sample = sphUv.x * rect.edge0 + sphUv.y * rect.edge1 + rect.offset; + L = sph_sample - origin; + const bool invalid = hlsl::all(hlsl::abs(L) < hlsl::promote(numeric_limits::min)); + L = hlsl::mix(hlsl::normalize(L), hlsl::promote(0.0), invalid); + pdf = hlsl::mix(1.f / solidAngle, bit_cast(numeric_limits::infinity), invalid); + } + else + pdf = bit_cast(numeric_limits::infinity); + + newRayMaxT = hlsl::dot(N, origin2origin) / hlsl::dot(N, L); + return L; + } + + Shape rect; +}; + +// PPM_APPROX_PROJECTED_SOLID_ANGLE not available for PST_TRIANGLE + + +template +struct NextEventEstimator +{ + using scalar_type = typename Ray::scalar_type; + using vector3_type = vector; + using ray_type = Ray; + using scene_type = Scene; + using light_type = Light; + using light_id_type = LightID; + using spectral_type = typename light_type::spectral_type; + using interaction_type = Aniso; + using quotient_pdf_type = sampling::quotient_and_pdf; + using sample_type = LightSample; + using ray_dir_info_type = typename sample_type::ray_dir_info_type; + using tolerance_method_type = Tolerance; + + using shape_type = Shape; + using shape_sampling_type = ShapeSampling; + + struct SampleQuotientReturn + { + using sample_type = sample_type; + using quotient_pdf_type = quotient_pdf_type; + using scalar_type = scalar_type; + using object_handle_type = ObjectID; + + sample_type sample_; + quotient_pdf_type quotient_pdf; + scalar_type newRayMaxT; + object_handle_type lightObjectID; + + sample_type getSample() NBL_CONST_MEMBER_FUNC { return sample_; } + quotient_pdf_type getQuotientPdf() NBL_CONST_MEMBER_FUNC { return quotient_pdf; } + scalar_type getT() NBL_CONST_MEMBER_FUNC { return newRayMaxT; } + object_handle_type getLightObjectID() NBL_CONST_MEMBER_FUNC { return lightObjectID; } + }; + using sample_quotient_return_type = SampleQuotientReturn; + + template NBL_FUNC_REQUIRES(C::value && PST==PST_SPHERE) + shape_sampling_type __getShapeSampling(uint32_t lightObjectID, NBL_CONST_REF_ARG(scene_type) scene) + { + const shape_type sphere = scene.getSphere(lightObjectID); + return shape_sampling_type::create(sphere); + } + template NBL_FUNC_REQUIRES(C::value && PST==PST_TRIANGLE) + shape_sampling_type __getShapeSampling(uint32_t lightObjectID, NBL_CONST_REF_ARG(scene_type) scene) + { + const shape_type tri = scene.getTriangle(lightObjectID); + return shape_sampling_type::create(tri); + } + template NBL_FUNC_REQUIRES(C::value && PST==PST_RECTANGLE) + shape_sampling_type __getShapeSampling(uint32_t lightObjectID, NBL_CONST_REF_ARG(scene_type) scene) + { + const shape_type rect = scene.getRectangle(lightObjectID); + return shape_sampling_type::create(rect); + } + + scalar_type deferred_pdf(NBL_CONST_REF_ARG(scene_type) scene, light_id_type lightID, NBL_CONST_REF_ARG(ray_type) ray) + { + if (lightID.id == 0u) + return scalar_type(0.0); // env light pdf=0 + const light_type light = lights[0u]; + const shape_sampling_type sampling = __getShapeSampling(light.objectID.id, scene); + return sampling.template deferredPdf(ray) / scalar_type(scene_type::SCENE_LIGHT_COUNT); + } + + template + sample_quotient_return_type generate_and_quotient_and_pdf(NBL_CONST_REF_ARG(scene_type) scene, NBL_CONST_REF_ARG(MaterialSystem) materialSystem, const vector3_type origin, NBL_CONST_REF_ARG(interaction_type) interaction, const vector3_type xi, uint16_t depth) + { + // light id 0 is reserved for env light + // however, we start indexing light array without env light, so index 0 is first shape light + // use constant indices because with variables, driver (at least nvidia) seemed to nuke the light array and propagated constants throughout the code + // which caused frame times to increase from 16ms to 85ms + const light_type light = lights[0u]; + const shape_sampling_type sampling = __getShapeSampling(light.objectID.id, scene); + + sample_quotient_return_type retval; + scalar_type pdf, newRayMaxT; + const vector3_type sampleL = sampling.template generate_and_pdf(pdf, newRayMaxT, origin, interaction, xi); + + const vector3_type N = interaction.getN(); + const scalar_type NdotL = nbl::hlsl::dot(N, sampleL); + + // returned pdf is for MIS weight only + // normally, pdf=inf indicates a point light + // but here pdf=inf when solidAngle=0, so quotient of finite area emission =0 due to division by inf + // also for NdotL, normally would have to check conditionalMaxOrAbs(NdotL,0.0f,isBSDF) > min + // because BSDFs should receive light from the backside + // however, unnecessary for this example because scene has only watertight geometry + if (pdf > numeric_limits::min && !hlsl::isinf(pdf) && NdotL > numeric_limits::min) + { + ray_dir_info_type rayL; + rayL.setDirection(sampleL); + retval.sample_ = sample_type::create(rayL,interaction.getT(),interaction.getB(),NdotL); + + newRayMaxT *= tolerance_method_type::getEnd(depth); + pdf *= 1.0 / scalar_type(scene_type::SCENE_LIGHT_COUNT); + const spectral_type radiance = materialSystem.getEmission(light.emissiveMatID, interaction); + spectral_type quo = radiance / pdf; + retval.quotient_pdf = quotient_pdf_type::create(quo, pdf); + retval.newRayMaxT = newRayMaxT; + retval.lightObjectID = light.objectID; + } + else + { + retval.quotient_pdf = quotient_pdf_type::create(0.0, 0.0); + ray_dir_info_type rayL; + rayL.makeInvalid(); + retval.sample_ = sample_type::create(rayL,hlsl::promote(0.0)); + } + + return retval; + } + + light_id_type get_env_light_id() + { + light_id_type env_light_id; + env_light_id.id = 0u; + return env_light_id; + } + + spectral_type get_environment_radiance(NBL_CONST_REF_ARG(ray_type) ray) + { + // can also sample environment map using ray direction + return vector3_type(0.15, 0.21, 0.3); + } + + light_type lights[scene_type::SCENE_LIGHT_COUNT]; +}; + +#endif diff --git a/31_HLSLPathTracer/app_resources/hlsl/present.frag.hlsl b/31_HLSLPathTracer/app_resources/hlsl/present.frag.hlsl new file mode 100644 index 000000000..d556a7162 --- /dev/null +++ b/31_HLSLPathTracer/app_resources/hlsl/present.frag.hlsl @@ -0,0 +1,19 @@ +// Copyright (C) 2024-2024 - DevSH Graphics Programming Sp. z O.O. +// This file is part of the "Nabla Engine". +// For conditions of distribution and use, see copyright notice in nabla.h + +#pragma wave shader_stage(fragment) + +// vertex shader is provided by the fullScreenTriangle extension +#include +using namespace nbl::hlsl; +using namespace ext::FullScreenTriangle; + +// binding 0 set 0 +[[vk::combinedImageSampler]] [[vk::binding(0, 0)]] Texture2DArray texture; +[[vk::combinedImageSampler]] [[vk::binding(0, 0)]] SamplerState samplerState; + +[[vk::location(0)]] float32_t4 main(SVertexAttributes vxAttr) : SV_Target0 +{ + return float32_t4(texture.Sample(samplerState, float3(vxAttr.uv, 0)).rgb, 1.0f); +} \ No newline at end of file diff --git a/31_HLSLPathTracer/app_resources/hlsl/rand_gen.hlsl b/31_HLSLPathTracer/app_resources/hlsl/rand_gen.hlsl new file mode 100644 index 000000000..ece23374d --- /dev/null +++ b/31_HLSLPathTracer/app_resources/hlsl/rand_gen.hlsl @@ -0,0 +1,40 @@ +#ifndef _PATHTRACER_EXAMPLE_RANDGEN_INCLUDED_ +#define _PATHTRACER_EXAMPLE_RANDGEN_INCLUDED_ + +#include "nbl/builtin/hlsl/sampling/quantized_sequence.hlsl" +#include "nbl/builtin/hlsl/random/dim_adaptor_recursive.hlsl" + +#include "render_common.hlsl" + +using namespace nbl; +using namespace hlsl; + +template +struct RandomUniformND +{ + using rng_type = RNG; + using return_type = vector; + + static RandomUniformND create(uint32_t2 seed, uint64_t pSampleSequence) + { + RandomUniformND retval; + retval.rng = rng_type::construct(seed); + retval.pSampleBuffer = pSampleSequence; + return retval; + } + + // baseDimension: offset index of the sequence + // sampleIndex: iteration number of current pixel (samples per pixel) + return_type operator()(uint32_t baseDimension, uint32_t sampleIndex) + { + using sequence_type = sampling::QuantizedSequence; + uint32_t address = glsl::bitfieldInsert(baseDimension, sampleIndex, MaxDepthLog2, MaxSamplesLog2); + sequence_type tmpSeq = vk::RawBufferLoad(pSampleBuffer + address * sizeof(sequence_type)); + return tmpSeq.template decode(random::DimAdaptorRecursive::__call(rng)); + } + + rng_type rng; + uint64_t pSampleBuffer; +}; + +#endif diff --git a/31_HLSLPathTracer/app_resources/hlsl/render.comp.hlsl b/31_HLSLPathTracer/app_resources/hlsl/render.comp.hlsl new file mode 100644 index 000000000..6b0d1434b --- /dev/null +++ b/31_HLSLPathTracer/app_resources/hlsl/render.comp.hlsl @@ -0,0 +1,222 @@ +#include "nbl/builtin/hlsl/cpp_compat.hlsl" +#include "nbl/builtin/hlsl/glsl_compat/core.hlsl" +#include "nbl/builtin/hlsl/random/pcg.hlsl" +#include "nbl/builtin/hlsl/random/xoroshiro.hlsl" +#ifdef PERSISTENT_WORKGROUPS +#include "nbl/builtin/hlsl/morton.hlsl" +#endif + +#include "nbl/builtin/hlsl/bxdf/reflection.hlsl" +#include "nbl/builtin/hlsl/bxdf/transmission.hlsl" + +#include "nbl/builtin/hlsl/path_tracing/basic_ray_gen.hlsl" +#include "nbl/builtin/hlsl/path_tracing/unidirectional.hlsl" + +// add these defines (one at a time) using -D argument to dxc +// #define SPHERE_LIGHT +// #define TRIANGLE_LIGHT +// #define RECTANGLE_LIGHT + +#include "render_common.hlsl" +#include "resolve_common.hlsl" + +#ifdef RWMC_ENABLED +#include +#include +#endif + +#ifdef RWMC_ENABLED +[[vk::push_constant]] RenderRWMCPushConstants pc; +#else +[[vk::push_constant]] RenderPushConstants pc; +#endif + +[[vk::combinedImageSampler]] [[vk::binding(0, 0)]] Texture2D envMap; // unused +[[vk::combinedImageSampler]] [[vk::binding(0, 0)]] SamplerState envSampler; + +[[vk::combinedImageSampler]] [[vk::binding(1, 0)]] Texture2D scramblebuf; +[[vk::combinedImageSampler]] [[vk::binding(1, 0)]] SamplerState scrambleSampler; + +[[vk::image_format("rgba16f")]] [[vk::binding(2, 0)]] RWTexture2DArray outImage; +[[vk::image_format("rgba16f")]] [[vk::binding(3, 0)]] RWTexture2DArray cascade; + +#include "example_common.hlsl" +#include "rand_gen.hlsl" +#include "intersector.hlsl" +#include "material_system.hlsl" +#include "next_event_estimator.hlsl" + +using namespace nbl; +using namespace hlsl; + +#ifdef SPHERE_LIGHT +#include "scene_sphere_light.hlsl" +#endif +#ifdef TRIANGLE_LIGHT +#include "scene_triangle_light.hlsl" +#endif +#ifdef RECTANGLE_LIGHT +#include "scene_rectangle_light.hlsl" +#endif + +NBL_CONSTEXPR NEEPolygonMethod POLYGON_METHOD = PPM_SOLID_ANGLE; + +int32_t2 getCoordinates() +{ + uint32_t width, height, imageArraySize; + outImage.GetDimensions(width, height, imageArraySize); + return int32_t2(glsl::gl_GlobalInvocationID().x % width, glsl::gl_GlobalInvocationID().x / width); +} + +float32_t2 getTexCoords() +{ + uint32_t width, height, imageArraySize; + outImage.GetDimensions(width, height, imageArraySize); + int32_t2 iCoords = getCoordinates(); + return float32_t2(float(iCoords.x) / width, 1.0 - float(iCoords.y) / height); +} + +using spectral_t = vector; +using ray_dir_info_t = bxdf::ray_dir_info::SBasic; +using iso_interaction = PTIsotropicInteraction; +using aniso_interaction = PTAnisotropicInteraction; +using sample_t = bxdf::SLightSample; +using iso_cache = bxdf::SIsotropicMicrofacetCache; +using aniso_cache = bxdf::SAnisotropicMicrofacetCache; +using quotient_pdf_t = sampling::quotient_and_pdf; + +using iso_config_t = PTIsoConfiguration; +using iso_microfacet_config_t = PTIsoMicrofacetConfiguration; + +using diffuse_bxdf_type = bxdf::reflection::SOrenNayar; +using conductor_bxdf_type = bxdf::reflection::SGGXIsotropic; +using dielectric_bxdf_type = bxdf::transmission::SGGXDielectricIsotropic; +using iri_conductor_bxdf_type = bxdf::reflection::SIridescent; +using iri_dielectric_bxdf_type = bxdf::transmission::SIridescent; + +using payload_type = Payload; +using ray_type = Ray; +using randgen_type = RandomUniformND; +using raygen_type = path_tracing::BasicRayGenerator; +using intersector_type = Intersector; +using material_system_type = MaterialSystem; +using nee_type = NextEventEstimator; + +#ifdef RWMC_ENABLED +using accumulator_type = rwmc::CascadeAccumulator >; +#else +#include "nbl/builtin/hlsl/path_tracing/default_accumulator.hlsl" +using accumulator_type = path_tracing::DefaultAccumulator; +#endif + +using pathtracer_type = path_tracing::Unidirectional; + +RenderPushConstants retireveRenderPushConstants() +{ +#ifdef RWMC_ENABLED + return pc.renderPushConstants; +#else + return pc; +#endif +} + +[numthreads(RenderWorkgroupSize, 1, 1)] +void main(uint32_t3 threadID : SV_DispatchThreadID) +{ + const RenderPushConstants renderPushConstants = retireveRenderPushConstants(); + + uint32_t width, height, imageArraySize; + outImage.GetDimensions(width, height, imageArraySize); +#ifdef PERSISTENT_WORKGROUPS + const uint32_t NumWorkgroupsX = width / RenderWorkgroupSizeSqrt; + const uint32_t NumWorkgroupsY = height / RenderWorkgroupSizeSqrt; + [loop] + for (uint32_t wgBase = glsl::gl_WorkGroupID().x; wgBase < NumWorkgroupsX*NumWorkgroupsY; wgBase += glsl::gl_NumWorkGroups().x) + { + const int32_t2 wgCoords = int32_t2(wgBase % NumWorkgroupsX, wgBase / NumWorkgroupsX); + morton::code mc; + mc.value = glsl::gl_LocalInvocationIndex().x; + const int32_t2 localCoords = _static_cast(mc); + const int32_t2 coords = wgCoords * int32_t2(RenderWorkgroupSizeSqrt,RenderWorkgroupSizeSqrt) + localCoords; +#else + const int32_t2 coords = getCoordinates(); +#endif + float32_t2 texCoord = float32_t2(coords) / float32_t2(width, height); + texCoord.y = 1.0 - texCoord.y; + + if (any(coords < int32_t2(0,0)) || any(coords >= int32_t2(width, height))) { +#ifdef PERSISTENT_WORKGROUPS + continue; +#else + return; +#endif + } + + if (((renderPushConstants.depth - 1) >> MaxDepthLog2) > 0 || ((renderPushConstants.sampleCount - 1) >> MaxSamplesLog2) > 0) + { + float32_t4 pixelCol = float32_t4(1.0,0.0,0.0,1.0); + outImage[uint3(coords.x, coords.y, 0)] = pixelCol; +#ifdef PERSISTENT_WORKGROUPS + continue; +#else + return; +#endif + } + + // set up path tracer + pathtracer_type pathtracer; + + uint2 scrambleDim; + scramblebuf.GetDimensions(scrambleDim.x, scrambleDim.y); + float32_t2 pixOffsetParam = (float2)1.0 / float2(scrambleDim); + + float32_t4 NDC = float4(texCoord * float2(2.0, -2.0) + float2(-1.0, 1.0), 0.0, 1.0); + float32_t3 camPos; + { + float4 tmp = mul(renderPushConstants.invMVP, NDC); + camPos = tmp.xyz / tmp.w; + NDC.z = 1.0; + } + + scene_type scene; + scene.updateLight(renderPushConstants.generalPurposeLightMatrix); + + raygen_type rayGen; + rayGen.pixOffsetParam = pixOffsetParam; + rayGen.camPos = camPos; + rayGen.NDC = NDC; + rayGen.invMVP = renderPushConstants.invMVP; + + pathtracer.scene = scene; + pathtracer.randGen = randgen_type::create(scramblebuf[coords].rg, renderPushConstants.pSampleSequence); + pathtracer.nee.lights = lights; + pathtracer.materialSystem.bxdfs = bxdfs; + pathtracer.bxdfPdfThreshold = 0.0001; + pathtracer.lumaContributionThreshold = hlsl::dot(colorspace::scRGBtoXYZ[1], colorspace::eotf::sRGB(hlsl::promote(1.0 / 255.0))); + pathtracer.spectralTypeToLumaCoeffs = colorspace::scRGBtoXYZ[1]; + +#ifdef RWMC_ENABLED + accumulator_type accumulator = accumulator_type::create(pc.splattingParameters); +#else + accumulator_type accumulator = accumulator_type::create(); +#endif + // path tracing loop + for(int i = 0; i < renderPushConstants.sampleCount; ++i) + { + float32_t3 uvw = pathtracer.randGen(0u, i); + ray_type ray = rayGen.generate(uvw); + ray.initPayload(); + pathtracer.sampleMeasure(ray, i, renderPushConstants.depth, accumulator); + } + +#ifdef RWMC_ENABLED + for (uint32_t i = 0; i < CascadeCount; ++i) + cascade[uint3(coords.x, coords.y, i)] = float32_t4(accumulator.accumulation.data[i], 1.0f); +#else + outImage[uint3(coords.x, coords.y, 0)] = float32_t4(accumulator.accumulation, 1.0); +#endif + +#ifdef PERSISTENT_WORKGROUPS + } +#endif +} \ No newline at end of file diff --git a/31_HLSLPathTracer/app_resources/hlsl/render_common.hlsl b/31_HLSLPathTracer/app_resources/hlsl/render_common.hlsl new file mode 100644 index 000000000..f69496c48 --- /dev/null +++ b/31_HLSLPathTracer/app_resources/hlsl/render_common.hlsl @@ -0,0 +1,27 @@ +#ifndef _PATHTRACER_EXAMPLE_RENDER_COMMON_INCLUDED_ +#define _PATHTRACER_EXAMPLE_RENDER_COMMON_INCLUDED_ +#include "nbl/builtin/hlsl/cpp_compat.hlsl" + +using namespace nbl; +using namespace hlsl; + +struct RenderPushConstants +{ + float32_t4x4 invMVP; + float32_t3x4 generalPurposeLightMatrix; + int sampleCount; + int depth; + uint64_t pSampleSequence; +}; + +NBL_CONSTEXPR float32_t3 LightEminence = float32_t3(30.0f, 25.0f, 15.0f); +NBL_CONSTEXPR uint32_t RenderWorkgroupSizeSqrt = 8u; +NBL_CONSTEXPR uint32_t RenderWorkgroupSize = RenderWorkgroupSizeSqrt*RenderWorkgroupSizeSqrt; +NBL_CONSTEXPR uint32_t MaxDepthLog2 = 4u; +NBL_CONSTEXPR uint32_t MaxSamplesLog2 = 10u; +NBL_CONSTEXPR uint32_t MaxBufferDimensions = 3u << MaxDepthLog2; +NBL_CONSTEXPR uint32_t MaxSamplesBuffer = 1u << MaxSamplesLog2; +NBL_CONSTEXPR uint32_t MaxDescriptorCount = 256u; +NBL_CONSTEXPR uint16_t MaxUITextureCount = 1u; + +#endif diff --git a/31_HLSLPathTracer/app_resources/hlsl/render_rwmc_common.hlsl b/31_HLSLPathTracer/app_resources/hlsl/render_rwmc_common.hlsl new file mode 100644 index 000000000..540aadf76 --- /dev/null +++ b/31_HLSLPathTracer/app_resources/hlsl/render_rwmc_common.hlsl @@ -0,0 +1,13 @@ +#ifndef _PATHTRACER_EXAMPLE_RENDER_RWMC_COMMON_INCLUDED_ +#define _PATHTRACER_EXAMPLE_RENDER_RWMC_COMMON_INCLUDED_ +#include "nbl/builtin/hlsl/cpp_compat.hlsl" +#include "nbl/builtin/hlsl/rwmc/SplattingParameters.hlsl" +#include "render_common.hlsl" + +struct RenderRWMCPushConstants +{ + RenderPushConstants renderPushConstants; + nbl::hlsl::rwmc::SPackedSplattingParameters splattingParameters; +}; + +#endif diff --git a/31_HLSLPathTracer/app_resources/hlsl/resolve.comp.hlsl b/31_HLSLPathTracer/app_resources/hlsl/resolve.comp.hlsl new file mode 100644 index 000000000..c0982e9f2 --- /dev/null +++ b/31_HLSLPathTracer/app_resources/hlsl/resolve.comp.hlsl @@ -0,0 +1,65 @@ +#include +#include "resolve_common.hlsl" + +[[vk::image_format("rgba16f")]] [[vk::binding(2, 0)]] RWTexture2DArray outImage; +[[vk::image_format("rgba16f")]] [[vk::binding(3, 0)]] RWTexture2DArray cascade; +[[vk::push_constant]] ResolvePushConstants pc; + +using namespace nbl; +using namespace hlsl; + +struct SCascadeAccessor +{ + using output_scalar_t = float32_t; + NBL_CONSTEXPR_STATIC_INLINE int32_t Components = 3; + using output_t = vector; + NBL_CONSTEXPR_STATIC_INLINE int32_t image_dimension = 2; + + static SCascadeAccessor create() + { + SCascadeAccessor retval; + uint32_t imgWidth, imgHeight, layers; + cascade.GetDimensions(imgWidth, imgHeight, layers); + retval.cascadeImageDimension = int16_t2(imgWidth, imgHeight); + return retval; + } + + template + void get(NBL_REF_ARG(output_t) value, vector uv, uint16_t layer, uint16_t level) + { + if (any(uv < int16_t2(0, 0)) || any(uv >= cascadeImageDimension)) + { + value = promote(0); + return; + } + + value = cascade.Load(int32_t3(uv, int32_t(layer))).rgb; + } + + int16_t2 cascadeImageDimension; +}; + +int32_t2 getImageExtents() +{ + uint32_t width, height, imageArraySize; + outImage.GetDimensions(width, height, imageArraySize); + return int32_t2(width, height); +} + +[numthreads(ResolveWorkgroupSizeX, ResolveWorkgroupSizeY, 1)] +void main(uint32_t3 threadID : SV_DispatchThreadID) +{ + const int32_t2 coords = int32_t2(threadID.x, threadID.y); + const int32_t2 imageExtents = getImageExtents(); + if (coords.x >= imageExtents.x || coords.y >= imageExtents.y) + return; + + using SResolveAccessorAdaptorType = rwmc::SResolveAccessorAdaptor; + using SResolverType = rwmc::SResolver; + SResolveAccessorAdaptorType accessor = { SCascadeAccessor::create() }; + SResolverType resolve = SResolverType::create(pc.resolveParameters); + + float32_t3 color = resolve(accessor, int16_t2(coords.x, coords.y)); + + outImage[uint3(coords.x, coords.y, 0)] = float32_t4(color, 1.0f); +} diff --git a/31_HLSLPathTracer/app_resources/hlsl/resolve_common.hlsl b/31_HLSLPathTracer/app_resources/hlsl/resolve_common.hlsl new file mode 100644 index 000000000..66fb20acb --- /dev/null +++ b/31_HLSLPathTracer/app_resources/hlsl/resolve_common.hlsl @@ -0,0 +1,15 @@ +#ifndef _PATHTRACER_EXAMPLE_RESOLVE_COMMON_INCLUDED_ +#define _PATHTRACER_EXAMPLE_RESOLVE_COMMON_INCLUDED_ +#include "nbl/builtin/hlsl/cpp_compat.hlsl" +#include "nbl/builtin/hlsl/rwmc/ResolveParameters.hlsl" + +struct ResolvePushConstants +{ + nbl::hlsl::rwmc::SResolveParameters resolveParameters; +}; + +NBL_CONSTEXPR uint32_t ResolveWorkgroupSizeX = 32u; +NBL_CONSTEXPR uint32_t ResolveWorkgroupSizeY = 16u; +NBL_CONSTEXPR uint32_t CascadeCount = 6u; + +#endif diff --git a/31_HLSLPathTracer/app_resources/hlsl/scene_base.hlsl b/31_HLSLPathTracer/app_resources/hlsl/scene_base.hlsl new file mode 100644 index 000000000..070a7c164 --- /dev/null +++ b/31_HLSLPathTracer/app_resources/hlsl/scene_base.hlsl @@ -0,0 +1,76 @@ +#ifndef _PATHTRACER_EXAMPLE_SCENE_BASE_INCLUDED_ +#define _PATHTRACER_EXAMPLE_SCENE_BASE_INCLUDED_ + +#include "example_common.hlsl" + +using namespace nbl; +using namespace hlsl; + +struct SceneBase +{ + using scalar_type = float; + using vector3_type = vector; + using light_type = Light; + using light_id_type = LightID; + + NBL_CONSTEXPR_STATIC_INLINE uint32_t SCENE_SPHERE_COUNT = 10u; + NBL_CONSTEXPR_STATIC_INLINE uint32_t SCENE_LIGHT_COUNT = 1u; + NBL_CONSTEXPR_STATIC_INLINE uint32_t SCENE_BXDF_COUNT = 10u; + + static const Shape scene_spheres[SCENE_SPHERE_COUNT]; + + struct MatLightID + { + using light_id_type = LightID; + using material_id_type = MaterialID; + + light_id_type lightID; + material_id_type matID; + + static MatLightID createFromPacked(uint32_t packedID) + { + MatLightID retval; + retval.lightID.id = uint16_t(glsl::bitfieldExtract(packedID, 16, 16)); + retval.matID.id = uint16_t(glsl::bitfieldExtract(packedID, 0, 16)); + return retval; + } + + light_id_type getLightID() NBL_CONST_MEMBER_FUNC { return lightID; } + material_id_type getMaterialID() NBL_CONST_MEMBER_FUNC { return matID; } + + bool isLight() NBL_CONST_MEMBER_FUNC { return lightID.id != light_id_type::INVALID_ID; } + bool canContinuePath() NBL_CONST_MEMBER_FUNC { return matID.id != material_id_type::INVALID_ID; } + }; + using mat_light_id_type = MatLightID; +}; + +const Shape SceneBase::scene_spheres[SCENE_SPHERE_COUNT] = { + Shape::create(float3(0.0, -100.5, -1.0), 100.0, 0u, SceneBase::light_id_type::INVALID_ID), + Shape::create(float3(2.0, 0.0, -1.0), 0.5, 1u, SceneBase::light_id_type::INVALID_ID), + Shape::create(float3(0.0, 0.0, -1.0), 0.5, 2u, SceneBase::light_id_type::INVALID_ID), + Shape::create(float3(-2.0, 0.0, -1.0), 0.5, 3u, SceneBase::light_id_type::INVALID_ID), + Shape::create(float3(2.0, 0.0, 1.0), 0.5, 4u, SceneBase::light_id_type::INVALID_ID), + Shape::create(float3(0.0, 0.0, 1.0), 0.5, 4u, SceneBase::light_id_type::INVALID_ID), + Shape::create(float3(-2.0, 0.0, 1.0), 0.5, 5u, SceneBase::light_id_type::INVALID_ID), + Shape::create(float3(0.5, 1.0, 0.5), 0.5, 6u, SceneBase::light_id_type::INVALID_ID), + Shape::create(float3(-4.0, 0.0, 1.0), 0.5, 7u, SceneBase::light_id_type::INVALID_ID), + Shape::create(float3(-4.0, 0.0, -1.0), 0.5, 8u, SceneBase::light_id_type::INVALID_ID) +}; + +using spectral_t = vector; +using bxdfnode_type = BxDFNode; + +static const bxdfnode_type bxdfs[SceneBase::SCENE_BXDF_COUNT] = { + bxdfnode_type::create(MaterialType::DIFFUSE, false, float2(0,0), spectral_t(0.8,0.8,0.8)), + bxdfnode_type::create(MaterialType::DIFFUSE, false, float2(0,0), spectral_t(0.8,0.4,0.4)), + bxdfnode_type::create(MaterialType::DIFFUSE, false, float2(0,0), spectral_t(0.4,0.8,0.4)), + bxdfnode_type::create(MaterialType::CONDUCTOR, false, float2(0,0), spectral_t(1.02,1.02,1.3), spectral_t(1.0,1.0,2.0)), + bxdfnode_type::create(MaterialType::CONDUCTOR, false, float2(0,0), spectral_t(1.02,1.3,1.02), spectral_t(1.0,2.0,1.0)), + bxdfnode_type::create(MaterialType::CONDUCTOR, false, float2(0.15,0.15), spectral_t(1.02,1.3,1.02), spectral_t(1.0,2.0,1.0)), + bxdfnode_type::create(MaterialType::DIELECTRIC, false, float2(0.0625,0.0625), spectral_t(1,1,1), spectral_t(1.4,1.45,1.5)), + bxdfnode_type::create(MaterialType::IRIDESCENT_CONDUCTOR, false, 0.0, 505.0, spectral_t(1.39,1.39,1.39), spectral_t(1.2,1.2,1.2), spectral_t(0.5,0.5,0.5)), + bxdfnode_type::create(MaterialType::IRIDESCENT_DIELECTRIC, false, 0.0, 400.0, spectral_t(1.7,1.7,1.7), spectral_t(1.0,1.0,1.0), spectral_t(0,0,0)), + bxdfnode_type::create(MaterialType::EMISSIVE, LightEminence) +}; + +#endif diff --git a/31_HLSLPathTracer/app_resources/hlsl/scene_rectangle_light.hlsl b/31_HLSLPathTracer/app_resources/hlsl/scene_rectangle_light.hlsl new file mode 100644 index 000000000..4dd821a94 --- /dev/null +++ b/31_HLSLPathTracer/app_resources/hlsl/scene_rectangle_light.hlsl @@ -0,0 +1,97 @@ +#ifndef _PATHTRACER_EXAMPLE_SCENE_RECTANGLE_LIGHT_INCLUDED_ +#define _PATHTRACER_EXAMPLE_SCENE_RECTANGLE_LIGHT_INCLUDED_ + +#include "scene_base.hlsl" + +using namespace nbl; +using namespace hlsl; + +struct SceneRectangleLight : SceneBase +{ + using scalar_type = float; + using vector3_type = vector; + using this_t = SceneRectangleLight; + using base_t = SceneBase; + using object_handle_type = ObjectID; + using mat_light_id_type = base_t::mat_light_id_type; + + using ray_dir_info_t = bxdf::ray_dir_info::SBasic; + using interaction_type = PTIsotropicInteraction; + + NBL_CONSTEXPR_STATIC_INLINE uint32_t SphereCount = base_t::SCENE_SPHERE_COUNT; + NBL_CONSTEXPR_STATIC_INLINE uint32_t TriangleCount = 0u; + NBL_CONSTEXPR_STATIC_INLINE uint32_t RectangleCount = base_t::SCENE_LIGHT_COUNT; + + static const Shape light_rectangles[1]; + + Shape getSphere(uint32_t idx) + { + assert(idx < SphereCount); + return base_t::scene_spheres[idx]; + } + Shape getTriangle(uint32_t idx) + { + assert(false); + Shape dummy; + return dummy; + } + Shape getRectangle(uint32_t idx) + { + assert(idx < RectangleCount); + return light_rectangles[idx]; + } + + void updateLight(NBL_CONST_REF_ARG(float32_t3x4) generalPurposeLightMatrix) + { + light_rectangles[0].updateTransform(generalPurposeLightMatrix); + } + + mat_light_id_type getMatLightIDs(NBL_CONST_REF_ARG(object_handle_type) objectID) + { + assert(objectID.shapeType == PST_SPHERE || objectID.shapeType == PST_RECTANGLE); + if (objectID.shapeType == PST_SPHERE) + return mat_light_id_type::createFromPacked(getSphere(objectID.id).bsdfLightIDs); + else + return mat_light_id_type::createFromPacked(getRectangle(objectID.id).bsdfLightIDs); + } + + template + Intersection getIntersection(NBL_CONST_REF_ARG(object_handle_type) objectID, NBL_CONST_REF_ARG(Ray) rayIntersected) + { + assert(objectID.shapeType == PST_SPHERE || objectID.shapeType == PST_RECTANGLE); + Intersection intersection; + intersection.objectID = objectID; + intersection.position = rayIntersected.origin + rayIntersected.direction * rayIntersected.intersectionT; + + vector3_type N = objectID.shapeType == PST_SPHERE ? getSphere(objectID.id).getNormal(intersection.position) : getRectangle(objectID.id).getNormalTimesArea(); + N = hlsl::normalize(N); + intersection.geometricNormal = N; + ray_dir_info_t V; + V.setDirection(-rayIntersected.direction); + interaction_type interaction = interaction_type::create(V, N); + interaction.luminosityContributionHint = colorspace::scRGBtoXYZ[1] * rayIntersected.getPayloadThroughput(); + interaction.luminosityContributionHint /= interaction.luminosityContributionHint.r + interaction.luminosityContributionHint.g + interaction.luminosityContributionHint.b; + intersection.aniso_interaction = Intersection::interaction_type::create(interaction); + return intersection; + } +}; + +const Shape SceneRectangleLight::light_rectangles[1] = { + Shape::create(float3(-3.8,0.35,1.3), normalize(float3(2,0,-1))*7.0, normalize(float3(2,-5,4))*0.1, SceneBase::SCENE_BXDF_COUNT-1u, 1u) +}; + +using scene_type = SceneRectangleLight; + +NBL_CONSTEXPR ProceduralShapeType LIGHT_TYPE = PST_RECTANGLE; +using light_type = Light; + +// light id 0 is reserved for env light +// however, we start indexing light array without env light, so index 0 is first shape light +// use constant indices because with variables, driver (at least nvidia) seemed to nuke the light array and propagated constants throughout the code +// which caused frame times to increase from 16ms to 85ms +static const light_type lights[scene_type::SCENE_LIGHT_COUNT] = { + // imaginary index env light 0 here, + light_type::create(SceneBase::SCENE_BXDF_COUNT-1u, 0u, LIGHT_TYPE) +}; + +#endif diff --git a/31_HLSLPathTracer/app_resources/hlsl/scene_sphere_light.hlsl b/31_HLSLPathTracer/app_resources/hlsl/scene_sphere_light.hlsl new file mode 100644 index 000000000..17ea519c6 --- /dev/null +++ b/31_HLSLPathTracer/app_resources/hlsl/scene_sphere_light.hlsl @@ -0,0 +1,100 @@ +#ifndef _PATHTRACER_EXAMPLE_SCENE_SPHERE_LIGHT_INCLUDED_ +#define _PATHTRACER_EXAMPLE_SCENE_SPHERE_LIGHT_INCLUDED_ + +#include "scene_base.hlsl" + +using namespace nbl; +using namespace hlsl; + +struct SceneSphereLight : SceneBase +{ + using scalar_type = float; + using vector3_type = vector; + using this_t = SceneSphereLight; + using base_t = SceneBase; + using object_handle_type = ObjectID; + using mat_light_id_type = base_t::mat_light_id_type; + + using ray_dir_info_t = bxdf::ray_dir_info::SBasic; + using interaction_type = PTIsotropicInteraction; + + NBL_CONSTEXPR_STATIC_INLINE uint32_t SphereCount = base_t::SCENE_SPHERE_COUNT + base_t::SCENE_LIGHT_COUNT; + NBL_CONSTEXPR_STATIC_INLINE uint32_t TriangleCount = 0u; + NBL_CONSTEXPR_STATIC_INLINE uint32_t RectangleCount = 0u; + + static const Shape light_spheres[1]; + + Shape getSphere(uint32_t idx) + { + assert(idx < SphereCount); + if (idx < base_t::SCENE_SPHERE_COUNT) + return base_t::scene_spheres[idx]; + else + return light_spheres[idx-base_t::SCENE_SPHERE_COUNT]; + } + + Shape getTriangle(uint32_t idx) + { + assert(false); + Shape dummy; + return dummy; + } + + Shape getRectangle(uint32_t idx) + { + assert(false); + Shape dummy; + return dummy; + } + + void updateLight(NBL_CONST_REF_ARG(float32_t3x4) generalPurposeLightMatrix) + { + light_spheres[0].updateTransform(generalPurposeLightMatrix); + } + + mat_light_id_type getMatLightIDs(NBL_CONST_REF_ARG(object_handle_type) objectID) + { + assert(objectID.shapeType == PST_SPHERE); + return mat_light_id_type::createFromPacked(getSphere(objectID.id).bsdfLightIDs); + } + + template + Intersection getIntersection(NBL_CONST_REF_ARG(object_handle_type) objectID, NBL_CONST_REF_ARG(Ray) rayIntersected) + { + assert(objectID.shapeType == PST_SPHERE); + Intersection intersection; + intersection.objectID = objectID; + intersection.position = rayIntersected.origin + rayIntersected.direction * rayIntersected.intersectionT; + + vector3_type N = getSphere(objectID.id).getNormal(intersection.position); + N = hlsl::normalize(N); + intersection.geometricNormal = N; + ray_dir_info_t V; + V.setDirection(-rayIntersected.direction); + interaction_type interaction = interaction_type::create(V, N); + interaction.luminosityContributionHint = colorspace::scRGBtoXYZ[1] * rayIntersected.getPayloadThroughput(); + interaction.luminosityContributionHint /= interaction.luminosityContributionHint.r + interaction.luminosityContributionHint.g + interaction.luminosityContributionHint.b; + intersection.aniso_interaction = Intersection::interaction_type::create(interaction); + return intersection; + } +}; + +const Shape SceneSphereLight::light_spheres[1] = { + Shape::create(float3(-1.5, 1.5, 0.0), 0.3, SceneBase::SCENE_BXDF_COUNT-1u/*last in mat arr*/, 1u) +}; + +using scene_type = SceneSphereLight; + +NBL_CONSTEXPR ProceduralShapeType LIGHT_TYPE = PST_SPHERE; +using light_type = Light; + +// light id 0 is reserved for env light +// however, we start indexing light array without env light, so index 0 is first shape light +// use constant indices because with variables, driver (at least nvidia) seemed to nuke the light array and propagated constants throughout the code +// which caused frame times to increase from 16ms to 85ms +static const light_type lights[scene_type::SCENE_LIGHT_COUNT] = { + // imaginary index env light 0 here, + light_type::create(SceneBase::SCENE_BXDF_COUNT-1u/*last in mat arr*/, scene_type::SCENE_SPHERE_COUNT, LIGHT_TYPE) +}; + +#endif diff --git a/31_HLSLPathTracer/app_resources/hlsl/scene_triangle_light.hlsl b/31_HLSLPathTracer/app_resources/hlsl/scene_triangle_light.hlsl new file mode 100644 index 000000000..92edfe5cd --- /dev/null +++ b/31_HLSLPathTracer/app_resources/hlsl/scene_triangle_light.hlsl @@ -0,0 +1,97 @@ +#ifndef _PATHTRACER_EXAMPLE_SCENE_TRIANGLE_LIGHT_INCLUDED_ +#define _PATHTRACER_EXAMPLE_SCENE_TRIANGLE_LIGHT_INCLUDED_ + +#include "scene_base.hlsl" + +using namespace nbl; +using namespace hlsl; + +struct SceneTriangleLight : SceneBase +{ + using scalar_type = float; + using vector3_type = vector; + using this_t = SceneTriangleLight; + using base_t = SceneBase; + using object_handle_type = ObjectID; + using mat_light_id_type = base_t::mat_light_id_type; + + using ray_dir_info_t = bxdf::ray_dir_info::SBasic; + using interaction_type = PTIsotropicInteraction; + + NBL_CONSTEXPR_STATIC_INLINE uint32_t SphereCount = base_t::SCENE_SPHERE_COUNT; + NBL_CONSTEXPR_STATIC_INLINE uint32_t TriangleCount = base_t::SCENE_LIGHT_COUNT; + NBL_CONSTEXPR_STATIC_INLINE uint32_t RectangleCount = 0u; + + static const Shape light_triangles[1]; + + Shape getSphere(uint32_t idx) + { + assert(idx < SphereCount); + return base_t::scene_spheres[idx]; + } + Shape getTriangle(uint32_t idx) + { + assert(idx < TriangleCount); + return light_triangles[idx]; + } + Shape getRectangle(uint32_t idx) + { + assert(false); + Shape dummy; + return dummy; + } + + void updateLight(NBL_CONST_REF_ARG(float32_t3x4) generalPurposeLightMatrix) + { + light_triangles[0].updateTransform(generalPurposeLightMatrix); + } + + mat_light_id_type getMatLightIDs(NBL_CONST_REF_ARG(object_handle_type) objectID) + { + assert(objectID.shapeType == PST_SPHERE || objectID.shapeType == PST_TRIANGLE); + if (objectID.shapeType == PST_SPHERE) + return mat_light_id_type::createFromPacked(getSphere(objectID.id).bsdfLightIDs); + else + return mat_light_id_type::createFromPacked(getTriangle(objectID.id).bsdfLightIDs); + } + + template + Intersection getIntersection(NBL_CONST_REF_ARG(object_handle_type) objectID, NBL_CONST_REF_ARG(Ray) rayIntersected) + { + assert(objectID.shapeType == PST_SPHERE || objectID.shapeType == PST_TRIANGLE); + Intersection intersection; + intersection.objectID = objectID; + intersection.position = rayIntersected.origin + rayIntersected.direction * rayIntersected.intersectionT; + + vector3_type N = objectID.shapeType == PST_SPHERE ? getSphere(objectID.id).getNormal(intersection.position) : getTriangle(objectID.id).getNormalTimesArea(); + N = hlsl::normalize(N); + intersection.geometricNormal = N; + ray_dir_info_t V; + V.setDirection(-rayIntersected.direction); + interaction_type interaction = interaction_type::create(V, N); + interaction.luminosityContributionHint = colorspace::scRGBtoXYZ[1] * rayIntersected.getPayloadThroughput(); + interaction.luminosityContributionHint /= interaction.luminosityContributionHint.r + interaction.luminosityContributionHint.g + interaction.luminosityContributionHint.b; + intersection.aniso_interaction = Intersection::interaction_type::create(interaction); + return intersection; + } +}; + +const Shape SceneTriangleLight::light_triangles[1] = { + Shape::create(float3(-1.8,0.35,0.3) * 10.0, float3(-1.2,0.35,0.0) * 10.0, float3(-1.5,0.8,-0.3) * 10.0, SceneBase::SCENE_BXDF_COUNT-1u, 1u) +}; + +using scene_type = SceneTriangleLight; + +NBL_CONSTEXPR ProceduralShapeType LIGHT_TYPE = PST_TRIANGLE; +using light_type = Light; + +// light id 0 is reserved for env light +// however, we start indexing light array without env light, so index 0 is first shape light +// use constant indices because with variables, driver (at least nvidia) seemed to nuke the light array and propagated constants throughout the code +// which caused frame times to increase from 16ms to 85ms +static const light_type lights[scene_type::SCENE_LIGHT_COUNT] = { + // imaginary index env light 0 here, + light_type::create(SceneBase::SCENE_BXDF_COUNT-1u, 0u, LIGHT_TYPE) +}; + +#endif diff --git a/31_HLSLPathTracer/config.json.template b/31_HLSLPathTracer/config.json.template new file mode 100644 index 000000000..24adf54fb --- /dev/null +++ b/31_HLSLPathTracer/config.json.template @@ -0,0 +1,28 @@ +{ + "enableParallelBuild": true, + "threadsPerBuildProcess" : 2, + "isExecuted": false, + "scriptPath": "", + "cmake": { + "configurations": [ "Release", "Debug", "RelWithDebInfo" ], + "buildModes": [], + "requiredOptions": [] + }, + "profiles": [ + { + "backend": "vulkan", + "platform": "windows", + "buildModes": [], + "runConfiguration": "Release", + "gpuArchitectures": [] + } + ], + "dependencies": [], + "data": [ + { + "dependencies": [], + "command": [""], + "outputs": [] + } + ] +} diff --git a/31_HLSLPathTracer/include/nbl/this_example/common.hpp b/31_HLSLPathTracer/include/nbl/this_example/common.hpp new file mode 100644 index 000000000..db051bb3e --- /dev/null +++ b/31_HLSLPathTracer/include/nbl/this_example/common.hpp @@ -0,0 +1,17 @@ +#ifndef __NBL_THIS_EXAMPLE_COMMON_H_INCLUDED__ +#define __NBL_THIS_EXAMPLE_COMMON_H_INCLUDED__ + +#include + +// common api +#include "nbl/examples/common/SimpleWindowedApplication.hpp" +#include "nbl/examples/examples.hpp" +#include "nbl/examples/cameras/CCamera.hpp" +#include "nbl/examples/common/CEventCallback.hpp" + +// example's own headers +#include "nbl/ui/ICursorControl.h" +#include "nbl/ext/ImGui/ImGui.h" +#include "imgui/imgui_internal.h" + +#endif // __NBL_THIS_EXAMPLE_COMMON_H_INCLUDED__ \ No newline at end of file diff --git a/31_HLSLPathTracer/include/nbl/this_example/transform.hpp b/31_HLSLPathTracer/include/nbl/this_example/transform.hpp new file mode 100644 index 000000000..dd6368ca1 --- /dev/null +++ b/31_HLSLPathTracer/include/nbl/this_example/transform.hpp @@ -0,0 +1,167 @@ +#ifndef _NBL_THIS_EXAMPLE_TRANSFORM_H_INCLUDED_ +#define _NBL_THIS_EXAMPLE_TRANSFORM_H_INCLUDED_ + +#include "nbl/ui/ICursorControl.h" + +#include "nbl/ext/ImGui/ImGui.h" + +#include "imgui/imgui_internal.h" +#include "imguizmo/ImGuizmo.h" + +struct TransformRequestParams +{ + float camDistance = 8.f; + bool isSphere = false; + ImGuizmo::OPERATION allowedOp; + uint8_t sceneTexDescIx = ~0; + bool useWindow = false, editTransformDecomposition = false, enableViewManipulate = false; +}; + +nbl::hlsl::uint16_t2 EditTransform(float* cameraView, const float* cameraProjection, float* matrix, const TransformRequestParams& params) +{ + static ImGuizmo::OPERATION mCurrentGizmoOperation(ImGuizmo::TRANSLATE); + static ImGuizmo::MODE mCurrentGizmoMode(ImGuizmo::LOCAL); + static bool useSnap = false; + static float snap[3] = { 1.f, 1.f, 1.f }; + static float bounds[] = { -0.5f, -0.5f, -0.5f, 0.5f, 0.5f, 0.5f }; + static float boundsSnap[] = { 0.1f, 0.1f, 0.1f }; + static bool boundSizing = false; + static bool boundSizingSnap = false; + + if (params.editTransformDecomposition) + { + if (ImGui::IsKeyPressed(ImGuiKey_T)) // Always translate + mCurrentGizmoOperation = ImGuizmo::TRANSLATE; + if (ImGui::IsKeyPressed(ImGuiKey_R) && params.allowedOp & ImGuizmo::OPERATION::ROTATE) + mCurrentGizmoOperation = ImGuizmo::ROTATE; + if (ImGui::IsKeyPressed(ImGuiKey_S) && params.allowedOp & ImGuizmo::OPERATION::SCALEU) // for sphere + mCurrentGizmoOperation = ImGuizmo::SCALEU; + if (ImGui::IsKeyPressed(ImGuiKey_S) && params.allowedOp & ImGuizmo::OPERATION::SCALE) // for triangle/rectangle + mCurrentGizmoOperation = ImGuizmo::SCALE_X | ImGuizmo::SCALE_Y; + +#if 0 + if (ImGui::RadioButton("Translate", mCurrentGizmoOperation == ImGuizmo::TRANSLATE)) + mCurrentGizmoOperation = ImGuizmo::TRANSLATE; + ImGui::SameLine(); + if (ImGui::RadioButton("Rotate", mCurrentGizmoOperation == ImGuizmo::ROTATE)) + mCurrentGizmoOperation = ImGuizmo::ROTATE; + ImGui::SameLine(); + if (ImGui::RadioButton("Scale", mCurrentGizmoOperation == ImGuizmo::SCALE)) + mCurrentGizmoOperation = ImGuizmo::SCALE; + if (ImGui::RadioButton("Universal", mCurrentGizmoOperation == ImGuizmo::UNIVERSAL)) + mCurrentGizmoOperation = ImGuizmo::UNIVERSAL; + float matrixTranslation[3], matrixRotation[3], matrixScale[3]; + ImGuizmo::DecomposeMatrixToComponents(matrix, matrixTranslation, matrixRotation, matrixScale); + ImGui::InputFloat3("Tr", matrixTranslation); + ImGui::InputFloat3("Rt", matrixRotation); + ImGui::InputFloat3("Sc", matrixScale); + ImGuizmo::RecomposeMatrixFromComponents(matrixTranslation, matrixRotation, matrixScale, matrix); + + if (mCurrentGizmoOperation != ImGuizmo::SCALE) + { + if (ImGui::RadioButton("Local", mCurrentGizmoMode == ImGuizmo::LOCAL)) + mCurrentGizmoMode = ImGuizmo::LOCAL; + ImGui::SameLine(); + if (ImGui::RadioButton("World", mCurrentGizmoMode == ImGuizmo::WORLD)) + mCurrentGizmoMode = ImGuizmo::WORLD; + } + if (ImGui::IsKeyPressed(ImGuiKey_S) && ImGui::IsKeyPressed(ImGuiKey_LeftShift)) + useSnap = !useSnap; + ImGui::Checkbox("##UseSnap", &useSnap); + ImGui::SameLine(); + + switch (mCurrentGizmoOperation) + { + case ImGuizmo::TRANSLATE: + ImGui::InputFloat3("Snap", &snap[0]); + break; + case ImGuizmo::ROTATE: + ImGui::InputFloat("Angle Snap", &snap[0]); + break; + case ImGuizmo::SCALE: + ImGui::InputFloat("Scale Snap", &snap[0]); + break; + } + ImGui::Checkbox("Bound Sizing", &boundSizing); + if (boundSizing) + { + ImGui::PushID(3); + ImGui::Checkbox("##BoundSizing", &boundSizingSnap); + ImGui::SameLine(); + ImGui::InputFloat3("Snap", boundsSnap); + ImGui::PopID(); + } +#endif + } + + ImGuiIO& io = ImGui::GetIO(); + float viewManipulateRight = io.DisplaySize.x; + float viewManipulateTop = 0; + static ImGuiWindowFlags gizmoWindowFlags = 0; + + /* + for the "useWindow" case we just render to a gui area, + otherwise to fake full screen transparent window + + note that for both cases we make sure gizmo being + rendered is aligned to our texture scene using + imgui "cursor" screen positions + */ + // TODO: this shouldn't be handled here I think + SImResourceInfo info; + info.textureID = params.sceneTexDescIx; + info.samplerIx = (uint16_t)nbl::ext::imgui::UI::DefaultSamplerIx::USER; + + nbl::hlsl::uint16_t2 retval; + if (params.useWindow) + { + ImGui::SetNextWindowSize(ImVec2(800, 400), ImGuiCond_Appearing); + ImGui::SetNextWindowPos(ImVec2(400, 20), ImGuiCond_Appearing); + ImGui::PushStyleColor(ImGuiCol_WindowBg, (ImVec4)ImColor(0.35f, 0.3f, 0.3f)); + ImGui::Begin("Gizmo", 0, gizmoWindowFlags); + ImGuizmo::SetDrawlist(); + + ImVec2 contentRegionSize = ImGui::GetContentRegionAvail(); + ImVec2 windowPos = ImGui::GetWindowPos(); + ImVec2 cursorPos = ImGui::GetCursorScreenPos(); + + ImGui::Image(info, contentRegionSize); + ImGuizmo::SetRect(cursorPos.x, cursorPos.y, contentRegionSize.x, contentRegionSize.y); + retval = { contentRegionSize.x, contentRegionSize.y }; + + viewManipulateRight = cursorPos.x + contentRegionSize.x; + viewManipulateTop = cursorPos.y; + + ImGuiWindow* window = ImGui::GetCurrentWindow(); + gizmoWindowFlags = (ImGui::IsWindowHovered() && ImGui::IsMouseHoveringRect(window->InnerRect.Min, window->InnerRect.Max) ? ImGuiWindowFlags_NoMove : 0); + } + else + { + ImGui::SetNextWindowPos(ImVec2(0, 0)); + ImGui::SetNextWindowSize(io.DisplaySize); + ImGui::PushStyleColor(ImGuiCol_WindowBg, ImVec4(0, 0, 0, 0)); // fully transparent fake window + ImGui::Begin("FullScreenWindow", nullptr, ImGuiWindowFlags_NoTitleBar | ImGuiWindowFlags_NoResize | ImGuiWindowFlags_NoMove | ImGuiWindowFlags_NoScrollbar | ImGuiWindowFlags_NoScrollWithMouse | ImGuiWindowFlags_NoCollapse | ImGuiWindowFlags_NoBringToFrontOnFocus | ImGuiWindowFlags_NoBackground | ImGuiWindowFlags_NoInputs); + + ImVec2 contentRegionSize = ImGui::GetContentRegionAvail(); + ImVec2 cursorPos = ImGui::GetCursorScreenPos(); + + ImGui::Image(info, contentRegionSize); + ImGuizmo::SetRect(cursorPos.x, cursorPos.y, contentRegionSize.x, contentRegionSize.y); + retval = { contentRegionSize.x, contentRegionSize.y }; + + viewManipulateRight = cursorPos.x + contentRegionSize.x; + viewManipulateTop = cursorPos.y; + } + + ImGuizmo::Manipulate(cameraView, cameraProjection, mCurrentGizmoOperation, mCurrentGizmoMode, matrix, NULL, useSnap ? &snap[0] : NULL, boundSizing ? bounds : NULL, boundSizingSnap ? boundsSnap : NULL); + + //if (params.enableViewManipulate) + //ImGuizmo::ViewManipulate(cameraView, params.camDistance, ImVec2(viewManipulateRight - 128, viewManipulateTop), ImVec2(128, 128), 0x10101010); + + ImGui::End(); + ImGui::PopStyleColor(); + + return retval; +} + +#endif // __NBL_THIS_EXAMPLE_TRANSFORM_H_INCLUDED__ diff --git a/31_HLSLPathTracer/main.cpp b/31_HLSLPathTracer/main.cpp new file mode 100644 index 000000000..2981e3c1b --- /dev/null +++ b/31_HLSLPathTracer/main.cpp @@ -0,0 +1,1580 @@ +// Copyright (C) 2018-2020 - DevSH Graphics Programming Sp. z O.O. +// This file is part of the "Nabla Engine". +// For conditions of distribution and use, see copyright notice in nabla.h + +#include "nbl/examples/examples.hpp" +#include "nbl/this_example/transform.hpp" +#include "nbl/ext/FullScreenTriangle/FullScreenTriangle.h" +#include "nbl/builtin/hlsl/math/thin_lens_projection.hlsl" +#include "nbl/this_example/common.hpp" +#include "nbl/builtin/hlsl/colorspace/encodeCIEXYZ.hlsl" +#include "nbl/builtin/hlsl/sampling/quantized_sequence.hlsl" +#include "app_resources/hlsl/render_common.hlsl" +#include "app_resources/hlsl/render_rwmc_common.hlsl" +#include "app_resources/hlsl/resolve_common.hlsl" + +using namespace nbl; +using namespace core; +using namespace hlsl; +using namespace system; +using namespace asset; +using namespace ui; +using namespace video; +using namespace nbl::examples; + +// TODO: Add a QueryPool for timestamping once its ready +// TODO: Do buffer creation using assConv +class HLSLComputePathtracer final : public SimpleWindowedApplication, public BuiltinResourcesApplication +{ + using device_base_t = SimpleWindowedApplication; + using asset_base_t = BuiltinResourcesApplication; + using clock_t = std::chrono::steady_clock; + + enum E_LIGHT_GEOMETRY : uint8_t + { + ELG_SPHERE, + ELG_TRIANGLE, + ELG_RECTANGLE, + ELG_COUNT + }; + + constexpr static inline uint32_t2 WindowDimensions = { 1280, 720 }; + constexpr static inline uint32_t MaxFramesInFlight = 5; + static inline std::string DefaultImagePathsFile = "envmap/envmap_0.exr"; + static inline std::string OwenSamplerFilePath = "owen_sampler_buffer.bin"; + static inline std::string PTHLSLShaderPath = "app_resources/hlsl/render.comp.hlsl"; + static inline std::array PTHLSLShaderVariants = { + "SPHERE_LIGHT", + "TRIANGLE_LIGHT", + "RECTANGLE_LIGHT" + }; + static inline std::string ResolveShaderPath = "app_resources/hlsl/resolve.comp.hlsl"; + static inline std::string PresentShaderPath = "app_resources/hlsl/present.frag.hlsl"; + + const char* shaderNames[E_LIGHT_GEOMETRY::ELG_COUNT] = { + "ELG_SPHERE", + "ELG_TRIANGLE", + "ELG_RECTANGLE" + }; + + public: + inline HLSLComputePathtracer(const path& _localInputCWD, const path& _localOutputCWD, const path& _sharedInputCWD, const path& _sharedOutputCWD) + : IApplicationFramework(_localInputCWD, _localOutputCWD, _sharedInputCWD, _sharedOutputCWD) {} + + inline bool isComputeOnly() const override { return false; } + + inline video::SPhysicalDeviceLimits getRequiredDeviceLimits() const override + { + video::SPhysicalDeviceLimits retval = device_base_t::getRequiredDeviceLimits(); + retval.storagePushConstant16 = true; + return retval; + } + + inline core::vector getSurfaces() const override + { + if (!m_surface) + { + { + auto windowCallback = core::make_smart_refctd_ptr(smart_refctd_ptr(m_inputSystem), smart_refctd_ptr(m_logger)); + IWindow::SCreationParams params = {}; + params.callback = core::make_smart_refctd_ptr(); + params.width = WindowDimensions.x; + params.height = WindowDimensions.y; + params.x = 32; + params.y = 32; + params.flags = ui::IWindow::ECF_HIDDEN | IWindow::ECF_BORDERLESS | IWindow::ECF_RESIZABLE; + params.windowCaption = "ComputeShaderPathtracer"; + params.callback = windowCallback; + const_cast&>(m_window) = m_winMgr->createWindow(std::move(params)); + } + + auto surface = CSurfaceVulkanWin32::create(smart_refctd_ptr(m_api), smart_refctd_ptr_static_cast(m_window)); + const_cast&>(m_surface) = nbl::video::CSimpleResizeSurface::create(std::move(surface)); + } + + if (m_surface) + return { {m_surface->getSurface()/*,EQF_NONE*/} }; + + return {}; + } + + inline bool onAppInitialized(smart_refctd_ptr&& system) override + { + // Init systems + { + m_inputSystem = make_smart_refctd_ptr(logger_opt_smart_ptr(smart_refctd_ptr(m_logger))); + + // Remember to call the base class initialization! + if (!device_base_t::onAppInitialized(smart_refctd_ptr(system))) + return false; + if (!asset_base_t::onAppInitialized(std::move(system))) + return false; + + m_semaphore = m_device->createSemaphore(m_realFrameIx); + + if (!m_semaphore) + return logFail("Failed to create semaphore!"); + } + + // Create renderpass and init surface + nbl::video::IGPURenderpass* renderpass; + { + ISwapchain::SCreationParams swapchainParams = { .surface = smart_refctd_ptr(m_surface->getSurface()) }; + if (!swapchainParams.deduceFormat(m_physicalDevice)) + return logFail("Could not choose a Surface Format for the Swapchain!"); + + const static IGPURenderpass::SCreationParams::SSubpassDependency dependencies[] = + { + { + .srcSubpass = IGPURenderpass::SCreationParams::SSubpassDependency::External, + .dstSubpass = 0, + .memoryBarrier = + { + .srcStageMask = asset::PIPELINE_STAGE_FLAGS::COPY_BIT, + .srcAccessMask = asset::ACCESS_FLAGS::TRANSFER_WRITE_BIT, + .dstStageMask = asset::PIPELINE_STAGE_FLAGS::COLOR_ATTACHMENT_OUTPUT_BIT, + .dstAccessMask = asset::ACCESS_FLAGS::COLOR_ATTACHMENT_WRITE_BIT + } + }, + { + .srcSubpass = 0, + .dstSubpass = IGPURenderpass::SCreationParams::SSubpassDependency::External, + .memoryBarrier = + { + .srcStageMask = asset::PIPELINE_STAGE_FLAGS::COLOR_ATTACHMENT_OUTPUT_BIT, + .srcAccessMask = asset::ACCESS_FLAGS::COLOR_ATTACHMENT_WRITE_BIT + } + }, + IGPURenderpass::SCreationParams::DependenciesEnd + }; + + auto scResources = std::make_unique(m_device.get(), swapchainParams.surfaceFormat.format, dependencies); + renderpass = scResources->getRenderpass(); + + if (!renderpass) + return logFail("Failed to create Renderpass!"); + + auto gQueue = getGraphicsQueue(); + if (!m_surface || !m_surface->init(gQueue, std::move(scResources), swapchainParams.sharedParams)) + return logFail("Could not create Window & Surface or initialize the Surface!"); + } + + // Create command pool and buffers + { + auto gQueue = getGraphicsQueue(); + m_cmdPool = m_device->createCommandPool(gQueue->getFamilyIndex(), IGPUCommandPool::CREATE_FLAGS::RESET_COMMAND_BUFFER_BIT); + if (!m_cmdPool) + return logFail("Couldn't create Command Pool!"); + + if (!m_cmdPool->createCommandBuffers(IGPUCommandPool::BUFFER_LEVEL::PRIMARY, { m_cmdBufs.data(), MaxFramesInFlight })) + return logFail("Couldn't create Command Buffer!"); + } + + ISampler::SParams samplerParams = { + .AnisotropicFilter = 0 + }; + auto defaultSampler = m_device->createSampler(samplerParams); + + // Create descriptors and pipeline for the pathtracer + { + auto convertDSLayoutCPU2GPU = [&](smart_refctd_ptr cpuLayout) { + auto converter = CAssetConverter::create({ .device = m_device.get() }); + CAssetConverter::SInputs inputs = {}; + inputs.readCache = converter.get(); + inputs.logger = m_logger.get(); + CAssetConverter::SConvertParams params = {}; + params.utilities = m_utils.get(); + + std::get>(inputs.assets) = { &cpuLayout.get(),1 }; + // don't need to assert that we don't need to provide patches since layouts are not patchable + //assert(true); + auto reservation = converter->reserve(inputs); + // the `.value` is just a funny way to make the `smart_refctd_ptr` copyable + auto gpuLayout = reservation.getGPUObjects().front().value; + if (!gpuLayout) { + m_logger->log("Failed to convert %s into an IGPUDescriptorSetLayout handle", ILogger::ELL_ERROR); + std::exit(-1); + } + + return gpuLayout; + }; + auto convertDSCPU2GPU = [&](smart_refctd_ptr cpuDS) { + auto converter = CAssetConverter::create({ .device = m_device.get() }); + CAssetConverter::SInputs inputs = {}; + inputs.readCache = converter.get(); + inputs.logger = m_logger.get(); + CAssetConverter::SConvertParams params = {}; + params.utilities = m_utils.get(); + + std::get>(inputs.assets) = { &cpuDS.get(), 1 }; + // don't need to assert that we don't need to provide patches since layouts are not patchable + //assert(true); + auto reservation = converter->reserve(inputs); + // the `.value` is just a funny way to make the `smart_refctd_ptr` copyable + auto gpuDS = reservation.getGPUObjects().front().value; + if (!gpuDS) { + m_logger->log("Failed to convert %s into an IGPUDescriptorSet handle", ILogger::ELL_ERROR); + std::exit(-1); + } + + return gpuDS; + }; + + std::array descriptorSetBindings = {}; + std::array presentDescriptorSetBindings; + + descriptorSetBindings[0] = { + .binding = 0u, + .type = nbl::asset::IDescriptor::E_TYPE::ET_COMBINED_IMAGE_SAMPLER, + .createFlags = ICPUDescriptorSetLayout::SBinding::E_CREATE_FLAGS::ECF_NONE, + .stageFlags = IShader::E_SHADER_STAGE::ESS_COMPUTE, + .count = 1u, + .immutableSamplers = nullptr + }; + descriptorSetBindings[1] = { + .binding = 1u, + .type = nbl::asset::IDescriptor::E_TYPE::ET_COMBINED_IMAGE_SAMPLER, + .createFlags = ICPUDescriptorSetLayout::SBinding::E_CREATE_FLAGS::ECF_NONE, + .stageFlags = IShader::E_SHADER_STAGE::ESS_COMPUTE, + .count = 1u, + .immutableSamplers = nullptr + }; + descriptorSetBindings[2] = { + .binding = 2u, + .type = nbl::asset::IDescriptor::E_TYPE::ET_STORAGE_IMAGE, + .createFlags = ICPUDescriptorSetLayout::SBinding::E_CREATE_FLAGS::ECF_NONE, + .stageFlags = IShader::E_SHADER_STAGE::ESS_COMPUTE, + .count = 1u, + .immutableSamplers = nullptr + }; + descriptorSetBindings[3] = { + .binding = 3u, + .type = nbl::asset::IDescriptor::E_TYPE::ET_STORAGE_IMAGE, + .createFlags = ICPUDescriptorSetLayout::SBinding::E_CREATE_FLAGS::ECF_NONE, + .stageFlags = IShader::E_SHADER_STAGE::ESS_COMPUTE, + .count = 1u, + .immutableSamplers = nullptr + }; + + presentDescriptorSetBindings[0] = { + .binding = 0u, + .type = nbl::asset::IDescriptor::E_TYPE::ET_COMBINED_IMAGE_SAMPLER, + .createFlags = ICPUDescriptorSetLayout::SBinding::E_CREATE_FLAGS::ECF_NONE, + .stageFlags = IShader::E_SHADER_STAGE::ESS_FRAGMENT, + .count = 1u, + .immutableSamplers = &defaultSampler + }; + + auto cpuDescriptorSetLayout = make_smart_refctd_ptr(descriptorSetBindings); + + auto gpuDescriptorSetLayout = convertDSLayoutCPU2GPU(cpuDescriptorSetLayout); + auto gpuPresentDescriptorSetLayout = m_device->createDescriptorSetLayout(presentDescriptorSetBindings); + + auto cpuDescriptorSet = make_smart_refctd_ptr(std::move(cpuDescriptorSetLayout)); + + m_descriptorSet = convertDSCPU2GPU(cpuDescriptorSet); + + smart_refctd_ptr presentDSPool; + { + const video::IGPUDescriptorSetLayout* const layouts[] = { gpuPresentDescriptorSetLayout.get() }; + const uint32_t setCounts[] = { 1u }; + presentDSPool = m_device->createDescriptorPoolForDSLayouts(IDescriptorPool::E_CREATE_FLAGS::ECF_NONE, layouts, setCounts); + } + m_presentDescriptorSet = presentDSPool->createDescriptorSet(gpuPresentDescriptorSetLayout); + + // Create Shaders + auto loadAndCompileHLSLShader = [&](const std::string& pathToShader, const std::string& defineMacro = "", bool persistentWorkGroups = false, bool rwmc = false) -> smart_refctd_ptr + { + IAssetLoader::SAssetLoadParams lp = {}; + lp.workingDirectory = localInputCWD; + auto assetBundle = m_assetMgr->getAsset(pathToShader, lp); + const auto assets = assetBundle.getContents(); + if (assets.empty()) + { + m_logger->log("Could not load shader: ", ILogger::ELL_ERROR, pathToShader); + std::exit(-1); + } + + auto source = smart_refctd_ptr_static_cast(assets[0]); + // The down-cast should not fail! + assert(source); + + auto compiler = make_smart_refctd_ptr(smart_refctd_ptr(m_system)); + CHLSLCompiler::SOptions options = {}; + options.stage = IShader::E_SHADER_STAGE::ESS_COMPUTE; + options.preprocessorOptions.targetSpirvVersion = m_device->getPhysicalDevice()->getLimits().spirvVersion; + options.spirvOptimizer = nullptr; +#ifndef _NBL_DEBUG + ISPIRVOptimizer::E_OPTIMIZER_PASS optPasses = ISPIRVOptimizer::EOP_STRIP_DEBUG_INFO; + auto opt = make_smart_refctd_ptr(std::span(&optPasses, 1)); + options.spirvOptimizer = opt.get(); +#endif + options.debugInfoFlags |= IShaderCompiler::E_DEBUG_INFO_FLAGS::EDIF_LINE_BIT; + options.preprocessorOptions.sourceIdentifier = source->getFilepathHint(); + options.preprocessorOptions.logger = m_logger.get(); + options.preprocessorOptions.includeFinder = compiler->getDefaultIncludeFinder(); + + core::vector defines; + defines.reserve(3); + if (!defineMacro.empty()) + defines.push_back({ defineMacro, "" }); + if(persistentWorkGroups) + defines.push_back({ "PERSISTENT_WORKGROUPS", "1" }); + if(rwmc) + defines.push_back({ "RWMC_ENABLED", "" }); + + options.preprocessorOptions.extraDefines = defines; + + source = compiler->compileToSPIRV((const char*)source->getContent()->getPointer(), options); + + auto shader = m_device->compileShader({ source.get(), nullptr, nullptr, nullptr }); + if (!shader) + { + m_logger->log("HLSL shader creationed failed: %s!", ILogger::ELL_ERROR, pathToShader); + std::exit(-1); + } + + return shader; + }; + + const uint32_t deviceMinSubgroupSize = m_device->getPhysicalDevice()->getLimits().minSubgroupSize; + auto getComputePipelineCreationParams = [deviceMinSubgroupSize](IShader* shader, IGPUPipelineLayout* pipelineLayout) -> IGPUComputePipeline::SCreationParams + { + IGPUComputePipeline::SCreationParams params = {}; + params.layout = pipelineLayout; + params.shader.shader = shader; + params.shader.entryPoint = "main"; + params.shader.entries = nullptr; + params.cached.requireFullSubgroups = true; + params.shader.requiredSubgroupSize = static_cast(hlsl::log2(float(deviceMinSubgroupSize))); + + return params; + }; + + // Create compute pipelines + { + for (int index = 0; index < E_LIGHT_GEOMETRY::ELG_COUNT; index++) + { + const nbl::asset::SPushConstantRange pcRange = { + .stageFlags = IShader::E_SHADER_STAGE::ESS_COMPUTE, + .offset = 0, + .size = sizeof(RenderPushConstants) + }; + auto ptPipelineLayout = m_device->createPipelineLayout( + { &pcRange, 1 }, + core::smart_refctd_ptr(gpuDescriptorSetLayout), + nullptr, + nullptr, + nullptr + ); + if (!ptPipelineLayout) + return logFail("Failed to create Pathtracing pipeline layout"); + + const nbl::asset::SPushConstantRange rwmcPcRange = { + .stageFlags = IShader::E_SHADER_STAGE::ESS_COMPUTE, + .offset = 0, + .size = sizeof(RenderRWMCPushConstants) + }; + auto rwmcPtPipelineLayout = m_device->createPipelineLayout( + { &rwmcPcRange, 1 }, + core::smart_refctd_ptr(gpuDescriptorSetLayout), + nullptr, + nullptr, + nullptr + ); + if (!rwmcPtPipelineLayout) + return logFail("Failed to create RWMC Pathtracing pipeline layout"); + + { + auto ptShader = loadAndCompileHLSLShader(PTHLSLShaderPath, PTHLSLShaderVariants[index]); + auto params = getComputePipelineCreationParams(ptShader.get(), ptPipelineLayout.get()); + + if (!m_device->createComputePipelines(nullptr, { ¶ms, 1 }, m_PTHLSLPipelines.data() + index)) + return logFail("Failed to create HLSL compute pipeline!\n"); + } + { + auto ptShader = loadAndCompileHLSLShader(PTHLSLShaderPath, PTHLSLShaderVariants[index], true); + auto params = getComputePipelineCreationParams(ptShader.get(), ptPipelineLayout.get()); + + if (!m_device->createComputePipelines(nullptr, { ¶ms, 1 }, m_PTHLSLPersistentWGPipelines.data() + index)) + return logFail("Failed to create HLSL PersistentWG compute pipeline!\n"); + } + + // rwmc pipelines + { + auto ptShader = loadAndCompileHLSLShader(PTHLSLShaderPath, PTHLSLShaderVariants[index], false, true); + auto params = getComputePipelineCreationParams(ptShader.get(), rwmcPtPipelineLayout.get()); + + if (!m_device->createComputePipelines(nullptr, { ¶ms, 1 }, m_PTHLSLPipelinesRWMC.data() + index)) + return logFail("Failed to create HLSL RWMC compute pipeline!\n"); + } + { + auto ptShader = loadAndCompileHLSLShader(PTHLSLShaderPath, PTHLSLShaderVariants[index], true, true); + auto params = getComputePipelineCreationParams(ptShader.get(), rwmcPtPipelineLayout.get()); + + if (!m_device->createComputePipelines(nullptr, { ¶ms, 1 }, m_PTHLSLPersistentWGPipelinesRWMC.data() + index)) + return logFail("Failed to create HLSL RWMC PersistentWG compute pipeline!\n"); + } + } + } + + // Create resolve pipelines + { + const nbl::asset::SPushConstantRange pcRange = { + .stageFlags = IShader::E_SHADER_STAGE::ESS_COMPUTE, + .offset = 0, + .size = sizeof(ResolvePushConstants) + }; + + auto pipelineLayout = m_device->createPipelineLayout( + { &pcRange, 1 }, + core::smart_refctd_ptr(gpuDescriptorSetLayout) + ); + + if (!pipelineLayout) { + return logFail("Failed to create resolve pipeline layout"); + } + + { + auto shader = loadAndCompileHLSLShader(ResolveShaderPath); + auto params = getComputePipelineCreationParams(shader.get(), pipelineLayout.get()); + + if (!m_device->createComputePipelines(nullptr, { ¶ms, 1 }, &m_resolvePipeline)) + return logFail("Failed to create HLSL resolve compute pipeline!\n"); + } + } + + // Create graphics pipeline + { + auto scRes = static_cast(m_surface->getSwapchainResources()); + ext::FullScreenTriangle::ProtoPipeline fsTriProtoPPln(m_assetMgr.get(), m_device.get(), m_logger.get()); + if (!fsTriProtoPPln) + return logFail("Failed to create Full Screen Triangle protopipeline or load its vertex shader!"); + + // Load Fragment Shader + auto fragmentShader = loadAndCompileHLSLShader(PresentShaderPath); + if (!fragmentShader) + return logFail("Failed to Load and Compile Fragment Shader: lumaMeterShader!"); + + const IGPUPipelineBase::SShaderSpecInfo fragSpec = { + .shader = fragmentShader.get(), + .entryPoint = "main" + }; + + auto presentLayout = m_device->createPipelineLayout( + {}, + core::smart_refctd_ptr(gpuPresentDescriptorSetLayout), + nullptr, + nullptr, + nullptr + ); + m_presentPipeline = fsTriProtoPPln.createPipeline(fragSpec, presentLayout.get(), scRes->getRenderpass()); + if (!m_presentPipeline) + return logFail("Could not create Graphics Pipeline!"); + + } + } + + // load CPUImages and convert to GPUImages + smart_refctd_ptr envMap, scrambleMap; + { + auto convertImgCPU2GPU = [&](std::span cpuImgs) { + auto queue = getGraphicsQueue(); + auto cmdbuf = m_cmdBufs[0].get(); + cmdbuf->reset(IGPUCommandBuffer::RESET_FLAGS::NONE); + std::array commandBufferInfo = { cmdbuf }; + core::smart_refctd_ptr imgFillSemaphore = m_device->createSemaphore(0); + imgFillSemaphore->setObjectDebugName("Image Fill Semaphore"); + + auto converter = CAssetConverter::create({ .device = m_device.get() }); + // We don't want to generate mip-maps for these images, to ensure that we must override the default callbacks. + struct SInputs final : CAssetConverter::SInputs + { + // we also need to override this to have concurrent sharing + inline std::span getSharedOwnershipQueueFamilies(const size_t groupCopyID, const asset::ICPUImage* buffer, const CAssetConverter::patch_t& patch) const override + { + if (familyIndices.size() > 1) + return familyIndices; + return {}; + } + + inline uint8_t getMipLevelCount(const size_t groupCopyID, const ICPUImage* image, const CAssetConverter::patch_t& patch) const override + { + return image->getCreationParameters().mipLevels; + } + inline uint16_t needToRecomputeMips(const size_t groupCopyID, const ICPUImage* image, const CAssetConverter::patch_t& patch) const override + { + return 0b0u; + } + + std::vector familyIndices; + } inputs = {}; + inputs.readCache = converter.get(); + inputs.logger = m_logger.get(); + { + const core::set uniqueFamilyIndices = { queue->getFamilyIndex(), queue->getFamilyIndex() }; + inputs.familyIndices = { uniqueFamilyIndices.begin(),uniqueFamilyIndices.end() }; + } + // scratch command buffers for asset converter transfer commands + SIntendedSubmitInfo transfer = { + .queue = queue, + .waitSemaphores = {}, + .prevCommandBuffers = {}, + .scratchCommandBuffers = commandBufferInfo, + .scratchSemaphore = { + .semaphore = imgFillSemaphore.get(), + .value = 0, + // because of layout transitions + .stageMask = PIPELINE_STAGE_FLAGS::ALL_COMMANDS_BITS + } + }; + // as per the `SIntendedSubmitInfo` one commandbuffer must be begun + cmdbuf->begin(IGPUCommandBuffer::USAGE::ONE_TIME_SUBMIT_BIT); + // Normally we'd have to inherit and override the `getFinalOwnerQueueFamily` callback to ensure that the + // compute queue becomes the owner of the buffers and images post-transfer, but in this example we use concurrent sharing + CAssetConverter::SConvertParams params = {}; + params.transfer = &transfer; + params.utilities = m_utils.get(); + + std::get>(inputs.assets) = cpuImgs; + // assert that we don't need to provide patches + assert(cpuImgs[0]->getImageUsageFlags().hasFlags(ICPUImage::E_USAGE_FLAGS::EUF_SAMPLED_BIT)); + auto reservation = converter->reserve(inputs); + // the `.value` is just a funny way to make the `smart_refctd_ptr` copyable + auto gpuImgs = reservation.getGPUObjects(); + for (auto& gpuImg : gpuImgs) { + if (!gpuImg) { + m_logger->log("Failed to convert %s into an IGPUImage handle", ILogger::ELL_ERROR, DefaultImagePathsFile); + std::exit(-1); + } + } + + // and launch the conversions + m_api->startCapture(); + auto result = reservation.convert(params); + m_api->endCapture(); + if (!result.blocking() && result.copy() != IQueue::RESULT::SUCCESS) { + m_logger->log("Failed to record or submit conversions", ILogger::ELL_ERROR); + std::exit(-1); + } + + envMap = gpuImgs[0].value; + scrambleMap = gpuImgs[1].value; + }; + + smart_refctd_ptr envMapCPU, scrambleMapCPU; + { + IAssetLoader::SAssetLoadParams lp; + lp.workingDirectory = this->sharedInputCWD; + SAssetBundle bundle = m_assetMgr->getAsset(DefaultImagePathsFile, lp); + if (bundle.getContents().empty()) { + m_logger->log("Couldn't load an asset.", ILogger::ELL_ERROR); + std::exit(-1); + } + + envMapCPU = IAsset::castDown(bundle.getContents()[0]); + if (!envMapCPU) { + m_logger->log("Couldn't load an asset.", ILogger::ELL_ERROR); + std::exit(-1); + } + }; + { + asset::ICPUImage::SCreationParams info; + info.format = asset::E_FORMAT::EF_R32G32_UINT; + info.type = asset::ICPUImage::ET_2D; + auto extent = envMapCPU->getCreationParameters().extent; + info.extent.width = extent.width; + info.extent.height = extent.height; + info.extent.depth = 1u; + info.mipLevels = 1u; + info.arrayLayers = 1u; + info.samples = asset::ICPUImage::E_SAMPLE_COUNT_FLAGS::ESCF_1_BIT; + info.flags = static_cast(0u); + info.usage = asset::IImage::EUF_TRANSFER_SRC_BIT | asset::IImage::EUF_SAMPLED_BIT; + + scrambleMapCPU = ICPUImage::create(std::move(info)); + const uint32_t texelFormatByteSize = getTexelOrBlockBytesize(scrambleMapCPU->getCreationParameters().format); + const uint32_t texelBufferSize = scrambleMapCPU->getImageDataSizeInBytes(); + auto texelBuffer = ICPUBuffer::create({ texelBufferSize }); + + core::RandomSampler rng(0xbadc0ffeu); + auto out = reinterpret_cast(texelBuffer->getPointer()); + for (auto index = 0u; index < texelBufferSize / 4; index++) { + out[index] = rng.nextSample(); + } + + auto regions = core::make_refctd_dynamic_array>(1u); + ICPUImage::SBufferCopy& region = regions->front(); + region.imageSubresource.aspectMask = IImage::E_ASPECT_FLAGS::EAF_COLOR_BIT; + region.imageSubresource.mipLevel = 0u; + region.imageSubresource.baseArrayLayer = 0u; + region.imageSubresource.layerCount = 1u; + region.bufferOffset = 0u; + region.bufferRowLength = IImageAssetHandlerBase::calcPitchInBlocks(extent.width, texelFormatByteSize); + region.bufferImageHeight = 0u; + region.imageOffset = { 0u, 0u, 0u }; + region.imageExtent = scrambleMapCPU->getCreationParameters().extent; + + scrambleMapCPU->setBufferAndRegions(std::move(texelBuffer), regions); + + // programmatically user-created IPreHashed need to have their hash computed (loaders do it while loading) + scrambleMapCPU->setContentHash(scrambleMapCPU->computeContentHash()); + } + + std::array cpuImgs = { envMapCPU.get(), scrambleMapCPU.get() }; + convertImgCPU2GPU(cpuImgs); + } + + // create views for textures + { + auto createHDRIImage = [this](const asset::E_FORMAT colorFormat, const uint32_t width, const uint32_t height, const bool useCascadeCreationParameters = false) -> smart_refctd_ptr { + IGPUImage::SCreationParams imgInfo; + imgInfo.format = colorFormat; + imgInfo.type = IGPUImage::ET_2D; + imgInfo.extent.width = width; + imgInfo.extent.height = height; + imgInfo.extent.depth = 1u; + imgInfo.mipLevels = 1u; + imgInfo.samples = IGPUImage::ESCF_1_BIT; + imgInfo.flags = static_cast(0u); + + if (!useCascadeCreationParameters) + { + imgInfo.arrayLayers = 1u; + imgInfo.usage = asset::IImage::EUF_STORAGE_BIT | asset::IImage::EUF_TRANSFER_DST_BIT | asset::IImage::EUF_SAMPLED_BIT; + } + else + { + imgInfo.arrayLayers = CascadeCount; + imgInfo.usage = asset::IImage::EUF_STORAGE_BIT; + } + + auto image = m_device->createImage(std::move(imgInfo)); + auto imageMemReqs = image->getMemoryReqs(); + imageMemReqs.memoryTypeBits &= m_device->getPhysicalDevice()->getDeviceLocalMemoryTypeBits(); + m_device->allocate(imageMemReqs, image.get()); + + return image; + }; + auto createHDRIImageView = [this](smart_refctd_ptr img, const uint32_t imageArraySize = 1u, const IGPUImageView::E_TYPE imageViewType = IGPUImageView::ET_2D) -> smart_refctd_ptr + { + auto format = img->getCreationParameters().format; + IGPUImageView::SCreationParams imgViewInfo; + imgViewInfo.image = std::move(img); + imgViewInfo.format = format; + imgViewInfo.flags = static_cast(0u); + imgViewInfo.subresourceRange.aspectMask = IImage::E_ASPECT_FLAGS::EAF_COLOR_BIT; + imgViewInfo.subresourceRange.baseArrayLayer = 0u; + imgViewInfo.subresourceRange.baseMipLevel = 0u; + imgViewInfo.subresourceRange.levelCount = 1u; + imgViewInfo.viewType = imageViewType; + + imgViewInfo.subresourceRange.layerCount = imageArraySize; + + return m_device->createImageView(std::move(imgViewInfo)); + }; + + auto params = envMap->getCreationParameters(); + auto extent = params.extent; + + envMap->setObjectDebugName("Env Map"); + m_envMapView = createHDRIImageView(envMap); + m_envMapView->setObjectDebugName("Env Map View"); + + scrambleMap->setObjectDebugName("Scramble Map"); + m_scrambleView = createHDRIImageView(scrambleMap); + m_scrambleView->setObjectDebugName("Scramble Map View"); + + auto outImg = createHDRIImage(asset::E_FORMAT::EF_R16G16B16A16_SFLOAT, WindowDimensions.x, WindowDimensions.y); + outImg->setObjectDebugName("Output Image"); + m_outImgView = createHDRIImageView(outImg, 1, IGPUImageView::ET_2D_ARRAY); + m_outImgView->setObjectDebugName("Output Image View"); + + auto cascade = createHDRIImage(asset::E_FORMAT::EF_R16G16B16A16_SFLOAT, WindowDimensions.x, WindowDimensions.y, true); + cascade->setObjectDebugName("Cascade"); + m_cascadeView = createHDRIImageView(cascade, CascadeCount, IGPUImageView::ET_2D_ARRAY); + m_cascadeView->setObjectDebugName("Cascade View"); + } + + // create sequence buffer view + { + // TODO: do this better use asset manager to get the ICPUBuffer from `.bin` + auto createBufferFromCacheFile = [this]( + system::path filename, + size_t bufferSize, + void *data, + smart_refctd_ptr& buffer + ) -> std::pair, bool> + { + ISystem::future_t> owenSamplerFileFuture; + ISystem::future_t owenSamplerFileReadFuture; + size_t owenSamplerFileBytesRead; + + m_system->createFile(owenSamplerFileFuture, localOutputCWD / filename, IFile::ECF_READ); + smart_refctd_ptr owenSamplerFile; + + if (owenSamplerFileFuture.wait()) + { + owenSamplerFileFuture.acquire().move_into(owenSamplerFile); + if (!owenSamplerFile) + return { nullptr, false }; + + owenSamplerFile->read(owenSamplerFileReadFuture, data, 0, bufferSize); + if (owenSamplerFileReadFuture.wait()) + { + owenSamplerFileReadFuture.acquire().move_into(owenSamplerFileBytesRead); + + if (owenSamplerFileBytesRead < bufferSize) + { + buffer = asset::ICPUBuffer::create({ sizeof(uint32_t) * bufferSize }); + return { owenSamplerFile, false }; + } + + buffer = asset::ICPUBuffer::create({ { sizeof(uint32_t) * bufferSize }, data }); + } + } + + return { owenSamplerFile, true }; + }; + auto writeBufferIntoCacheFile = [this](smart_refctd_ptr file, size_t bufferSize, void* data) + { + ISystem::future_t owenSamplerFileWriteFuture; + size_t owenSamplerFileBytesWritten; + + file->write(owenSamplerFileWriteFuture, data, 0, bufferSize); + if (owenSamplerFileWriteFuture.wait()) + owenSamplerFileWriteFuture.acquire().move_into(owenSamplerFileBytesWritten); + }; + + constexpr uint32_t quantizedDimensions = MaxBufferDimensions / 3u; + constexpr size_t bufferSize = quantizedDimensions * MaxSamplesBuffer; + using sequence_type = sampling::QuantizedSequence; + std::array data = {}; + smart_refctd_ptr sampleSeq; + + auto cacheBufferResult = createBufferFromCacheFile(sharedOutputCWD/OwenSamplerFilePath, bufferSize, data.data(), sampleSeq); + if (!cacheBufferResult.second) + { + core::OwenSampler sampler(MaxBufferDimensions, 0xdeadbeefu); + + ICPUBuffer::SCreationParams params = {}; + params.size = quantizedDimensions * MaxSamplesBuffer * sizeof(sequence_type); + sampleSeq = ICPUBuffer::create(std::move(params)); + + auto out = reinterpret_cast(sampleSeq->getPointer()); + for (auto dim = 0u; dim < MaxBufferDimensions; dim++) + for (uint32_t i = 0; i < MaxSamplesBuffer; i++) + { + const uint32_t quant_dim = dim / 3u; + const uint32_t offset = dim % 3u; + auto& seq = out[i * quantizedDimensions + quant_dim]; + const uint32_t sample = sampler.sample(dim, i); + seq.set(offset, sample); + } + if (cacheBufferResult.first) + writeBufferIntoCacheFile(cacheBufferResult.first, bufferSize, out); + } + + IGPUBuffer::SCreationParams params = {}; + params.usage = asset::IBuffer::EUF_TRANSFER_DST_BIT | asset::IBuffer::EUF_STORAGE_BUFFER_BIT | asset::IBuffer::EUF_SHADER_DEVICE_ADDRESS_BIT; + params.size = bufferSize; + + // we don't want to overcomplicate the example with multi-queue + m_utils->createFilledDeviceLocalBufferOnDedMem( + SIntendedSubmitInfo{ .queue = getGraphicsQueue() }, + std::move(params), + sampleSeq->getPointer() + ).move_into(m_sequenceBuffer); + + m_sequenceBuffer->setObjectDebugName("Sequence buffer"); + } + + // Update Descriptors + { + ISampler::SParams samplerParams0 = { + ISampler::E_TEXTURE_CLAMP::ETC_CLAMP_TO_EDGE, + ISampler::E_TEXTURE_CLAMP::ETC_CLAMP_TO_EDGE, + ISampler::E_TEXTURE_CLAMP::ETC_CLAMP_TO_EDGE, + ISampler::ETBC_FLOAT_OPAQUE_BLACK, + ISampler::ETF_LINEAR, + ISampler::ETF_LINEAR, + ISampler::ESMM_LINEAR, + 0u, + false, + ECO_ALWAYS + }; + auto sampler0 = m_device->createSampler(samplerParams0); + ISampler::SParams samplerParams1 = { + ISampler::E_TEXTURE_CLAMP::ETC_CLAMP_TO_EDGE, + ISampler::E_TEXTURE_CLAMP::ETC_CLAMP_TO_EDGE, + ISampler::E_TEXTURE_CLAMP::ETC_CLAMP_TO_EDGE, + ISampler::ETBC_INT_OPAQUE_BLACK, + ISampler::ETF_NEAREST, + ISampler::ETF_NEAREST, + ISampler::ESMM_NEAREST, + 0u, + false, + ECO_ALWAYS + }; + auto sampler1 = m_device->createSampler(samplerParams1); + + std::array writeDSInfos = {}; + writeDSInfos[0].desc = m_outImgView; + writeDSInfos[0].info.image.imageLayout = IImage::LAYOUT::GENERAL; + writeDSInfos[1].desc = m_cascadeView; + writeDSInfos[1].info.image.imageLayout = IImage::LAYOUT::GENERAL; + writeDSInfos[2].desc = m_envMapView; + // ISampler::SParams samplerParams = { ISampler::ETC_CLAMP_TO_EDGE, ISampler::ETC_CLAMP_TO_EDGE, ISampler::ETC_CLAMP_TO_EDGE, ISampler::ETBC_FLOAT_OPAQUE_BLACK, ISampler::ETF_LINEAR, ISampler::ETF_LINEAR, ISampler::ESMM_LINEAR, 0u, false, ECO_ALWAYS }; + writeDSInfos[2].info.combinedImageSampler.sampler = sampler0; + writeDSInfos[2].info.combinedImageSampler.imageLayout = asset::IImage::LAYOUT::READ_ONLY_OPTIMAL; + writeDSInfos[3].desc = m_scrambleView; + // ISampler::SParams samplerParams = { ISampler::ETC_CLAMP_TO_EDGE, ISampler::ETC_CLAMP_TO_EDGE, ISampler::ETC_CLAMP_TO_EDGE, ISampler::ETBC_INT_OPAQUE_BLACK, ISampler::ETF_NEAREST, ISampler::ETF_NEAREST, ISampler::ESMM_NEAREST, 0u, false, ECO_ALWAYS }; + writeDSInfos[3].info.combinedImageSampler.sampler = sampler1; + writeDSInfos[3].info.combinedImageSampler.imageLayout = asset::IImage::LAYOUT::READ_ONLY_OPTIMAL; + writeDSInfos[4].desc = m_outImgView; + writeDSInfos[4].info.image.imageLayout = IImage::LAYOUT::READ_ONLY_OPTIMAL; + + std::array writeDescriptorSets = {}; + writeDescriptorSets[0] = { + .dstSet = m_descriptorSet.get(), + .binding = 2, + .arrayElement = 0u, + .count = 1u, + .info = &writeDSInfos[0] + }; + writeDescriptorSets[1] = { + .dstSet = m_descriptorSet.get(), + .binding = 3, + .arrayElement = 0u, + .count = 1u, + .info = &writeDSInfos[1] + }; + writeDescriptorSets[2] = { + .dstSet = m_descriptorSet.get(), + .binding = 0, + .arrayElement = 0u, + .count = 1u, + .info = &writeDSInfos[2] + }; + writeDescriptorSets[3] = { + .dstSet = m_descriptorSet.get(), + .binding = 1, + .arrayElement = 0u, + .count = 1u, + .info = &writeDSInfos[3] + }; + writeDescriptorSets[4] = { + .dstSet = m_presentDescriptorSet.get(), + .binding = 0, + .arrayElement = 0u, + .count = 1u, + .info = &writeDSInfos[4] + }; + + m_device->updateDescriptorSets(writeDescriptorSets, {}); + } + + // Create ui descriptors + { + using binding_flags_t = IGPUDescriptorSetLayout::SBinding::E_CREATE_FLAGS; + { + IGPUSampler::SParams params; + params.AnisotropicFilter = 1u; + params.TextureWrapU = ISampler::E_TEXTURE_CLAMP::ETC_REPEAT; + params.TextureWrapV = ISampler::E_TEXTURE_CLAMP::ETC_REPEAT; + params.TextureWrapW = ISampler::E_TEXTURE_CLAMP::ETC_REPEAT; + + m_ui.samplers.gui = m_device->createSampler(params); + m_ui.samplers.gui->setObjectDebugName("Nabla IMGUI UI Sampler"); + } + + std::array, 69u> immutableSamplers; + for (auto& it : immutableSamplers) + it = smart_refctd_ptr(m_ui.samplers.scene); + + immutableSamplers[nbl::ext::imgui::UI::FontAtlasTexId] = smart_refctd_ptr(m_ui.samplers.gui); + + nbl::ext::imgui::UI::SCreationParameters params; + + params.resources.texturesInfo = { .setIx = 0u, .bindingIx = 0u }; + params.resources.samplersInfo = { .setIx = 0u, .bindingIx = 1u }; + params.assetManager = m_assetMgr; + params.pipelineCache = nullptr; + params.pipelineLayout = nbl::ext::imgui::UI::createDefaultPipelineLayout(m_utils->getLogicalDevice(), params.resources.texturesInfo, params.resources.samplersInfo, MaxUITextureCount); + params.renderpass = smart_refctd_ptr(renderpass); + params.streamingBuffer = nullptr; + params.subpassIx = 0u; + params.transfer = getTransferUpQueue(); + params.utilities = m_utils; + { + m_ui.manager = ext::imgui::UI::create(std::move(params)); + + // note that we use default layout provided by our extension, but you are free to create your own by filling nbl::ext::imgui::UI::S_CREATION_PARAMETERS::resources + const auto* descriptorSetLayout = m_ui.manager->getPipeline()->getLayout()->getDescriptorSetLayout(0u); + const auto& params = m_ui.manager->getCreationParameters(); + + IDescriptorPool::SCreateInfo descriptorPoolInfo = {}; + descriptorPoolInfo.maxDescriptorCount[static_cast(asset::IDescriptor::E_TYPE::ET_SAMPLER)] = (uint32_t)nbl::ext::imgui::UI::DefaultSamplerIx::COUNT; + descriptorPoolInfo.maxDescriptorCount[static_cast(asset::IDescriptor::E_TYPE::ET_SAMPLED_IMAGE)] = MaxUITextureCount; + descriptorPoolInfo.maxSets = 1u; + descriptorPoolInfo.flags = IDescriptorPool::E_CREATE_FLAGS::ECF_UPDATE_AFTER_BIND_BIT; + + m_guiDescriptorSetPool = m_device->createDescriptorPool(std::move(descriptorPoolInfo)); + assert(m_guiDescriptorSetPool); + + m_guiDescriptorSetPool->createDescriptorSets(1u, &descriptorSetLayout, &m_ui.descriptorSet); + assert(m_ui.descriptorSet); + } + } + m_ui.manager->registerListener( + [this]() -> void { + ImGuiIO& io = ImGui::GetIO(); + ImGuizmo::SetOrthographic(false); + ImGuizmo::BeginFrame(); + ImGuizmo::SetRect(ImGui::GetWindowPos().x, ImGui::GetWindowPos().y, ImGui::GetWindowWidth(), ImGui::GetWindowHeight()); + + const auto aspectRatio = io.DisplaySize.x / io.DisplaySize.y; + m_camera.setProjectionMatrix(hlsl::math::thin_lens::rhPerspectiveFovMatrix(hlsl::radians(guiControlled.fov), aspectRatio, guiControlled.zNear, guiControlled.zFar)); + + ImGui::SetNextWindowPos(ImVec2(1024, 100), ImGuiCond_Appearing); + ImGui::SetNextWindowSize(ImVec2(256, 256), ImGuiCond_Appearing); + + // create a window and insert the inspector + ImGui::SetNextWindowPos(ImVec2(10, 10), ImGuiCond_Appearing); + ImGui::SetNextWindowSize(ImVec2(320, 340), ImGuiCond_Appearing); + ImGui::Begin("Controls"); + + ImGui::SameLine(); + + ImGui::Text("Camera"); + + ImGui::Text("Press Home to reset camera."); + ImGui::Text("Press End to reset light."); + + ImGui::SliderFloat("Move speed", &guiControlled.moveSpeed, 0.1f, 10.f); + ImGui::SliderFloat("Rotate speed", &guiControlled.rotateSpeed, 0.1f, 10.f); + ImGui::SliderFloat("Fov", &guiControlled.fov, 20.f, 150.f); + ImGui::SliderFloat("zNear", &guiControlled.zNear, 0.1f, 100.f); + ImGui::SliderFloat("zFar", &guiControlled.zFar, 110.f, 10000.f); + ImGui::Combo("Shader", &guiControlled.PTPipeline, shaderNames, E_LIGHT_GEOMETRY::ELG_COUNT); + ImGui::SliderInt("SPP", &guiControlled.spp, 1, MaxSamplesBuffer); + ImGui::SliderInt("Depth", &guiControlled.depth, 1, MaxBufferDimensions / 4); + ImGui::Checkbox("Persistent WorkGroups", &guiControlled.usePersistentWorkGroups); + + ImGui::Text("X: %f Y: %f", io.MousePos.x, io.MousePos.y); + + ImGui::Text("\nRWMC settings:"); + ImGui::Checkbox("Enable RWMC", &guiControlled.useRWMC); + ImGui::SliderFloat("start", &guiControlled.rwmcParams.start, 1.0f, 32.0f); + ImGui::SliderFloat("base", &guiControlled.rwmcParams.base, 1.0f, 32.0f); + ImGui::SliderFloat("minReliableLuma", &guiControlled.rwmcParams.minReliableLuma, 0.1f, 1024.0f); + ImGui::SliderFloat("kappa", &guiControlled.rwmcParams.kappa, 0.1f, 1024.0f); + + ImGui::End(); + } + ); + + m_ui.manager->registerListener( + [this]() -> void { + static struct + { + hlsl::float32_t4x4 view, projection; + } imguizmoM16InOut; + + ImGuizmo::SetID(0u); + + imguizmoM16InOut.view = hlsl::transpose(math::linalg::promoted_mul(float32_t4x4(1.f), m_camera.getViewMatrix())); + imguizmoM16InOut.projection = hlsl::transpose(m_camera.getProjectionMatrix()); + imguizmoM16InOut.projection[1][1] *= -1.f; // https://johannesugb.github.io/gpu-programming/why-do-opengl-proj-matrices-fail-in-vulkan/ + + m_transformParams.editTransformDecomposition = true; + m_transformParams.sceneTexDescIx = 1u; + + if (ImGui::IsKeyPressed(ImGuiKey_End)) + { + m_lightModelMatrix = hlsl::float32_t4x4( + 0.3f, 0.0f, 0.0f, 0.0f, + 0.0f, 0.3f, 0.0f, 0.0f, + 0.0f, 0.0f, 0.3f, 0.0f, + -1.0f, 1.5f, 0.0f, 1.0f + ); + } + + if (E_LIGHT_GEOMETRY::ELG_SPHERE == guiControlled.PTPipeline) + { + m_transformParams.allowedOp = ImGuizmo::OPERATION::TRANSLATE | ImGuizmo::OPERATION::SCALEU; + m_transformParams.isSphere = true; + } + else + { + m_transformParams.allowedOp = ImGuizmo::OPERATION::TRANSLATE | ImGuizmo::OPERATION::ROTATE | ImGuizmo::OPERATION::SCALE; + m_transformParams.isSphere = false; + } + EditTransform(&imguizmoM16InOut.view[0][0], &imguizmoM16InOut.projection[0][0], &m_lightModelMatrix[0][0], m_transformParams); + + if (E_LIGHT_GEOMETRY::ELG_SPHERE == guiControlled.PTPipeline) + { + // keep uniform scale for sphere + float32_t uniformScale = (m_lightModelMatrix[0][0] + m_lightModelMatrix[1][1] + m_lightModelMatrix[2][2]) / 3.0f; + m_lightModelMatrix[0][0] = uniformScale; + m_lightModelMatrix[1][1] = uniformScale; // Doesn't affect sphere but will affect rectangle/triangle if switching shapes + m_lightModelMatrix[2][2] = uniformScale; + } + + } + ); + + // Set Camera + { + core::vectorSIMDf cameraPosition(0, 5, -10); + const auto proj = hlsl::math::thin_lens::rhPerspectiveFovMatrix(hlsl::radians(guiControlled.fov), WindowDimensions.x / WindowDimensions.y, guiControlled.zNear, guiControlled.zFar); + m_camera = Camera(cameraPosition, core::vectorSIMDf(0, 0, 0), proj); + } + m_showUI = true; + + m_winMgr->setWindowSize(m_window.get(), WindowDimensions.x, WindowDimensions.y); + m_surface->recreateSwapchain(); + m_winMgr->show(m_window.get()); + m_oracle.reportBeginFrameRecord(); + m_camera.mapKeysToArrows(); + + // set initial rwmc settings + + guiControlled.rwmcParams.start = hlsl::dot(hlsl::transpose(colorspace::scRGBtoXYZ)[1], LightEminence); + guiControlled.rwmcParams.base = 8.0f; + guiControlled.rwmcParams.minReliableLuma = 1.0f; + guiControlled.rwmcParams.kappa = 5.0f; + return true; + } + + bool updateGUIDescriptorSet() + { + // texture atlas, note we don't create info & write pair for the font sampler because UI extension's is immutable and baked into DS layout + static std::array descriptorInfo; + static IGPUDescriptorSet::SWriteDescriptorSet writes[MaxUITextureCount]; + + descriptorInfo[nbl::ext::imgui::UI::FontAtlasTexId].info.image.imageLayout = IImage::LAYOUT::READ_ONLY_OPTIMAL; + descriptorInfo[nbl::ext::imgui::UI::FontAtlasTexId].desc = smart_refctd_ptr(m_ui.manager->getFontAtlasView()); + + for (uint32_t i = 0; i < descriptorInfo.size(); ++i) + { + writes[i].dstSet = m_ui.descriptorSet.get(); + writes[i].binding = 0u; + writes[i].arrayElement = i; + writes[i].count = 1u; + } + writes[nbl::ext::imgui::UI::FontAtlasTexId].info = descriptorInfo.data() + nbl::ext::imgui::UI::FontAtlasTexId; + + return m_device->updateDescriptorSets(writes, {}); + } + + inline void workLoopBody() override + { + // framesInFlight: ensuring safe execution of command buffers and acquires, `framesInFlight` only affect semaphore waits, don't use this to index your resources because it can change with swapchain recreation. + const uint32_t framesInFlight = core::min(MaxFramesInFlight, m_surface->getMaxAcquiresInFlight()); + // We block for semaphores for 2 reasons here: + // A) Resource: Can't use resource like a command buffer BEFORE previous use is finished! [MaxFramesInFlight] + // B) Acquire: Can't have more acquires in flight than a certain threshold returned by swapchain or your surface helper class. [MaxAcquiresInFlight] + if (m_realFrameIx >= framesInFlight) + { + const ISemaphore::SWaitInfo cbDonePending[] = + { + { + .semaphore = m_semaphore.get(), + .value = m_realFrameIx + 1 - framesInFlight + } + }; + if (m_device->blockForSemaphores(cbDonePending) != ISemaphore::WAIT_RESULT::SUCCESS) + return; + } + const auto resourceIx = m_realFrameIx % MaxFramesInFlight; + + // CPU events + update(); + + auto queue = getGraphicsQueue(); + auto cmdbuf = m_cmdBufs[resourceIx].get(); + + if (!keepRunning()) + return; + + cmdbuf->reset(IGPUCommandBuffer::RESET_FLAGS::NONE); + + // safe to proceed + // upload buffer data + cmdbuf->begin(IGPUCommandBuffer::USAGE::ONE_TIME_SUBMIT_BIT); + cmdbuf->beginDebugMarker("ComputeShaderPathtracer IMGUI Frame"); + + RenderRWMCPushConstants rwmcPushConstants; + ResolvePushConstants resolvePushConstants; + RenderPushConstants pc; + auto updatePathtracerPushConstants = [&]() -> void { + // disregard surface/swapchain transformation for now + const float32_t4x4 viewProjectionMatrix = m_camera.getConcatenatedMatrix(); + const float32_t3x4 modelMatrix = hlsl::math::linalg::identity(); + + const float32_t4x4 modelViewProjectionMatrix = nbl::hlsl::math::linalg::promoted_mul(viewProjectionMatrix, modelMatrix); + const float32_t4x4 invMVP = hlsl::inverse(modelViewProjectionMatrix); + + if (guiControlled.useRWMC) + { + rwmcPushConstants.renderPushConstants.invMVP = invMVP; + rwmcPushConstants.renderPushConstants.generalPurposeLightMatrix = hlsl::float32_t3x4(transpose(m_lightModelMatrix)); + rwmcPushConstants.renderPushConstants.depth = guiControlled.depth; + rwmcPushConstants.renderPushConstants.sampleCount = guiControlled.rwmcParams.sampleCount = guiControlled.spp; + rwmcPushConstants.renderPushConstants.pSampleSequence = m_sequenceBuffer->getDeviceAddress(); + rwmcPushConstants.splattingParameters = rwmc::SPackedSplattingParameters::create(guiControlled.rwmcParams.base, guiControlled.rwmcParams.start, CascadeCount); + } + else + { + pc.invMVP = invMVP; + pc.generalPurposeLightMatrix = hlsl::float32_t3x4(transpose(m_lightModelMatrix)); + pc.sampleCount = guiControlled.spp; + pc.depth = guiControlled.depth; + pc.pSampleSequence = m_sequenceBuffer->getDeviceAddress(); + } + }; + updatePathtracerPushConstants(); + + // TRANSITION m_outImgView to GENERAL (because of descriptorSets0 -> ComputeShader Writes into the image) + { + const IGPUCommandBuffer::SImageMemoryBarrier imgBarriers[] = { + { + .barrier = { + .dep = { + .srcStageMask = PIPELINE_STAGE_FLAGS::ALL_TRANSFER_BITS, + .srcAccessMask = ACCESS_FLAGS::TRANSFER_WRITE_BIT, + .dstStageMask = PIPELINE_STAGE_FLAGS::COMPUTE_SHADER_BIT, + .dstAccessMask = ACCESS_FLAGS::SHADER_WRITE_BITS + } + }, + .image = m_outImgView->getCreationParameters().image.get(), + .subresourceRange = { + .aspectMask = IImage::EAF_COLOR_BIT, + .baseMipLevel = 0u, + .levelCount = 1u, + .baseArrayLayer = 0u, + .layerCount = 1u + }, + .oldLayout = IImage::LAYOUT::UNDEFINED, + .newLayout = IImage::LAYOUT::GENERAL + } + }; + cmdbuf->pipelineBarrier(E_DEPENDENCY_FLAGS::EDF_NONE, { .imgBarriers = imgBarriers }); + } + + // transit m_cascadeView layout to GENERAL, block until previous shader is done with reading from the cascade + if(guiControlled.useRWMC) + { + const IGPUCommandBuffer::SImageMemoryBarrier cascadeBarrier[] = { + { + .barrier = { + .dep = { + .srcStageMask = PIPELINE_STAGE_FLAGS::COMPUTE_SHADER_BIT, + .srcAccessMask = ACCESS_FLAGS::NONE, + .dstStageMask = PIPELINE_STAGE_FLAGS::COMPUTE_SHADER_BIT, + .dstAccessMask = ACCESS_FLAGS::NONE + } + }, + .image = m_cascadeView->getCreationParameters().image.get(), + .subresourceRange = { + .aspectMask = IImage::EAF_COLOR_BIT, + .baseMipLevel = 0u, + .levelCount = 1u, + .baseArrayLayer = 0u, + .layerCount = CascadeCount + }, + .oldLayout = IImage::LAYOUT::UNDEFINED, + .newLayout = IImage::LAYOUT::GENERAL + } + }; + cmdbuf->pipelineBarrier(E_DEPENDENCY_FLAGS::EDF_NONE, { .imgBarriers = cascadeBarrier }); + } + + { + // TODO: shouldn't it be computed only at initialization stage and on window resize? + const uint32_t dispatchSize = guiControlled.usePersistentWorkGroups ? + m_physicalDevice->getLimits().computeOptimalPersistentWorkgroupDispatchSize(WindowDimensions.x * WindowDimensions.y, RenderWorkgroupSize) : + 1 + (WindowDimensions.x * WindowDimensions.y - 1) / RenderWorkgroupSize; + + IGPUComputePipeline* pipeline = pickPTPipeline(); + + cmdbuf->bindComputePipeline(pipeline); + cmdbuf->bindDescriptorSets(EPBP_COMPUTE, pipeline->getLayout(), 0u, 1u, &m_descriptorSet.get()); + + const uint32_t pushConstantsSize = guiControlled.useRWMC ? sizeof(RenderRWMCPushConstants) : sizeof(RenderPushConstants); + const void* pushConstantsPtr = guiControlled.useRWMC ? reinterpret_cast(&rwmcPushConstants) : reinterpret_cast(&pc); + cmdbuf->pushConstants(pipeline->getLayout(), IShader::E_SHADER_STAGE::ESS_COMPUTE, 0, pushConstantsSize, pushConstantsPtr); + + cmdbuf->dispatch(dispatchSize, 1u, 1u); + } + + // m_cascadeView synchronization - wait for previous compute shader to write into the cascade + // TODO: create this and every other barrier once outside of the loop? + if(guiControlled.useRWMC) + { + const IGPUCommandBuffer::SImageMemoryBarrier cascadeBarrier[] = { + { + .barrier = { + .dep = { + .srcStageMask = PIPELINE_STAGE_FLAGS::COMPUTE_SHADER_BIT, + .srcAccessMask = ACCESS_FLAGS::SHADER_WRITE_BITS, + .dstStageMask = PIPELINE_STAGE_FLAGS::COMPUTE_SHADER_BIT, + .dstAccessMask = ACCESS_FLAGS::SHADER_READ_BITS + } + }, + .image = m_cascadeView->getCreationParameters().image.get(), + .subresourceRange = { + .aspectMask = IImage::EAF_COLOR_BIT, + .baseMipLevel = 0u, + .levelCount = 1u, + .baseArrayLayer = 0u, + .layerCount = CascadeCount + } + } + }; + cmdbuf->pipelineBarrier(E_DEPENDENCY_FLAGS::EDF_NONE, { .imgBarriers = cascadeBarrier }); + + // resolve + const uint32_t2 dispatchSize = uint32_t2( // Round up division + (m_window->getWidth() + ResolveWorkgroupSizeX - 1) / ResolveWorkgroupSizeX, + (m_window->getHeight() + ResolveWorkgroupSizeY - 1) / ResolveWorkgroupSizeY + ); + + IGPUComputePipeline* pipeline = m_resolvePipeline.get(); + + resolvePushConstants.resolveParameters = rwmc::SResolveParameters::create(guiControlled.rwmcParams); + + cmdbuf->bindComputePipeline(pipeline); + cmdbuf->bindDescriptorSets(EPBP_COMPUTE, pipeline->getLayout(), 0u, 1u, &m_descriptorSet.get()); + cmdbuf->pushConstants(pipeline->getLayout(), IShader::E_SHADER_STAGE::ESS_COMPUTE, 0, sizeof(ResolvePushConstants), &resolvePushConstants); + + cmdbuf->dispatch(dispatchSize.x, dispatchSize.y, 1u); + } + + // TRANSITION m_outImgView to READ (because of descriptorSets0 -> ComputeShader Writes into the image) + { + const IGPUCommandBuffer::SImageMemoryBarrier imgBarriers[] = { + { + .barrier = { + .dep = { + .srcStageMask = PIPELINE_STAGE_FLAGS::COMPUTE_SHADER_BIT, + .srcAccessMask = ACCESS_FLAGS::SHADER_WRITE_BITS, + .dstStageMask = PIPELINE_STAGE_FLAGS::FRAGMENT_SHADER_BIT, + .dstAccessMask = ACCESS_FLAGS::SHADER_READ_BITS + } + }, + .image = m_outImgView->getCreationParameters().image.get(), + .subresourceRange = { + .aspectMask = IImage::EAF_COLOR_BIT, + .baseMipLevel = 0u, + .levelCount = 1u, + .baseArrayLayer = 0u, + .layerCount = 1u + }, + .oldLayout = IImage::LAYOUT::GENERAL, + .newLayout = IImage::LAYOUT::READ_ONLY_OPTIMAL + } + }; + cmdbuf->pipelineBarrier(E_DEPENDENCY_FLAGS::EDF_NONE, { .imgBarriers = imgBarriers }); + } + + // TODO: tone mapping and stuff + + asset::SViewport viewport; + { + viewport.minDepth = 1.f; + viewport.maxDepth = 0.f; + viewport.x = 0u; + viewport.y = 0u; + viewport.width = WindowDimensions.x; + viewport.height = WindowDimensions.y; + } + cmdbuf->setViewport(0u, 1u, &viewport); + + + VkRect2D defaultScisors[] = { {.offset = {(int32_t)viewport.x, (int32_t)viewport.y}, .extent = {(uint32_t)viewport.width, (uint32_t)viewport.height}} }; + cmdbuf->setScissor(defaultScisors); + + const VkRect2D currentRenderArea = + { + .offset = {0,0}, + .extent = {m_window->getWidth(),m_window->getHeight()} + }; + auto scRes = static_cast(m_surface->getSwapchainResources()); + + // Upload m_outImg to swapchain + UI + { + const IGPUCommandBuffer::SRenderpassBeginInfo info = + { + .framebuffer = scRes->getFramebuffer(m_currentImageAcquire.imageIndex), + .colorClearValues = &clearColor, + .depthStencilClearValues = nullptr, + .renderArea = currentRenderArea + }; + nbl::video::ISemaphore::SWaitInfo waitInfo = { .semaphore = m_semaphore.get(), .value = m_realFrameIx + 1u }; + + cmdbuf->beginRenderPass(info, IGPUCommandBuffer::SUBPASS_CONTENTS::INLINE); + + cmdbuf->bindGraphicsPipeline(m_presentPipeline.get()); + cmdbuf->bindDescriptorSets(EPBP_GRAPHICS, m_presentPipeline->getLayout(), 0, 1u, &m_presentDescriptorSet.get()); + ext::FullScreenTriangle::recordDrawCall(cmdbuf); + + if (m_showUI) + { + const auto uiParams = m_ui.manager->getCreationParameters(); + auto* uiPipeline = m_ui.manager->getPipeline(); + cmdbuf->bindGraphicsPipeline(uiPipeline); + cmdbuf->bindDescriptorSets(EPBP_GRAPHICS, uiPipeline->getLayout(), uiParams.resources.texturesInfo.setIx, 1u, &m_ui.descriptorSet.get()); + m_ui.manager->render(cmdbuf, waitInfo); + } + + cmdbuf->endRenderPass(); + } + + cmdbuf->end(); + { + const IQueue::SSubmitInfo::SSemaphoreInfo rendered[] = + { + { + .semaphore = m_semaphore.get(), + .value = ++m_realFrameIx, + .stageMask = PIPELINE_STAGE_FLAGS::COLOR_ATTACHMENT_OUTPUT_BIT + } + }; + { + { + const IQueue::SSubmitInfo::SCommandBufferInfo commandBuffers[] = + { + {.cmdbuf = cmdbuf } + }; + + const IQueue::SSubmitInfo::SSemaphoreInfo acquired[] = + { + { + .semaphore = m_currentImageAcquire.semaphore, + .value = m_currentImageAcquire.acquireCount, + .stageMask = PIPELINE_STAGE_FLAGS::NONE + } + }; + const IQueue::SSubmitInfo infos[] = + { + { + .waitSemaphores = acquired, + .commandBuffers = commandBuffers, + .signalSemaphores = rendered + } + }; + + updateGUIDescriptorSet(); + + m_api->startCapture(); + if (queue->submit(infos) != IQueue::RESULT::SUCCESS) + m_realFrameIx--; + m_api->endCapture(); + } + } + + m_window->setCaption("[Nabla Engine] HLSL Compute Path Tracer"); + m_surface->present(m_currentImageAcquire.imageIndex, rendered); + } + } + + inline bool keepRunning() override + { + if (m_surface->irrecoverable()) + return false; + + return true; + } + + inline bool onAppTerminated() override + { + return device_base_t::onAppTerminated(); + } + + inline void update() + { + m_camera.setMoveSpeed(guiControlled.moveSpeed); + m_camera.setRotateSpeed(guiControlled.rotateSpeed); + + static std::chrono::microseconds previousEventTimestamp{}; + + m_inputSystem->getDefaultMouse(&mouse); + m_inputSystem->getDefaultKeyboard(&keyboard); + + auto updatePresentationTimestamp = [&]() + { + m_currentImageAcquire = m_surface->acquireNextImage(); + + m_oracle.reportEndFrameRecord(); + const auto timestamp = m_oracle.getNextPresentationTimeStamp(); + m_oracle.reportBeginFrameRecord(); + + return timestamp; + }; + + const auto nextPresentationTimestamp = updatePresentationTimestamp(); + + struct + { + std::vector mouse{}; + std::vector keyboard{}; + } capturedEvents; + + m_camera.beginInputProcessing(nextPresentationTimestamp); + { + const auto& io = ImGui::GetIO(); + mouse.consumeEvents([&](const IMouseEventChannel::range_t& events) -> void + { + if (!io.WantCaptureMouse) + m_camera.mouseProcess(events); // don't capture the events, only let camera handle them with its impl + + for (const auto& e : events) // here capture + { + if (e.timeStamp < previousEventTimestamp) + continue; + + previousEventTimestamp = e.timeStamp; + capturedEvents.mouse.emplace_back(e); + + if (e.type == nbl::ui::SMouseEvent::EET_SCROLL) + gcIndex = std::clamp(int16_t(gcIndex) + int16_t(core::sign(e.scrollEvent.verticalScroll)), int64_t(0), int64_t(ELG_COUNT - (uint8_t)1u)); + } + }, m_logger.get()); + + keyboard.consumeEvents([&](const IKeyboardEventChannel::range_t& events) -> void + { + if (!io.WantCaptureKeyboard) + m_camera.keyboardProcess(events); // don't capture the events, only let camera handle them with its impl + + for (const auto& e : events) // here capture + { + if (e.timeStamp < previousEventTimestamp) + continue; + + if (e.keyCode == ui::EKC_H) + if (e.action == ui::SKeyboardEvent::ECA_RELEASED) + m_showUI = !m_showUI; + + previousEventTimestamp = e.timeStamp; + capturedEvents.keyboard.emplace_back(e); + } + }, m_logger.get()); + } + m_camera.endInputProcessing(nextPresentationTimestamp); + + const core::SRange mouseEvents(capturedEvents.mouse.data(), capturedEvents.mouse.data() + capturedEvents.mouse.size()); + const core::SRange keyboardEvents(capturedEvents.keyboard.data(), capturedEvents.keyboard.data() + capturedEvents.keyboard.size()); + const auto cursorPosition = m_window->getCursorControl()->getPosition(); + const auto mousePosition = float32_t2(cursorPosition.x, cursorPosition.y) - float32_t2(m_window->getX(), m_window->getY()); + + const ext::imgui::UI::SUpdateParameters params = + { + .mousePosition = mousePosition, + .displaySize = { m_window->getWidth(), m_window->getHeight() }, + .mouseEvents = mouseEvents, + .keyboardEvents = keyboardEvents + }; + + if (m_showUI) + m_ui.manager->update(params); + } + + private: + + IGPUComputePipeline* pickPTPipeline() + { + IGPUComputePipeline* pipeline; + if (guiControlled.useRWMC) + pipeline = guiControlled.usePersistentWorkGroups ? m_PTHLSLPersistentWGPipelinesRWMC[guiControlled.PTPipeline].get() : m_PTHLSLPipelinesRWMC[guiControlled.PTPipeline].get(); + else + pipeline = guiControlled.usePersistentWorkGroups ? m_PTHLSLPersistentWGPipelines[guiControlled.PTPipeline].get() : m_PTHLSLPipelines[guiControlled.PTPipeline].get(); + + return pipeline; + } + + private: + smart_refctd_ptr m_window; + smart_refctd_ptr> m_surface; + + // gpu resources + smart_refctd_ptr m_cmdPool; + std::array, E_LIGHT_GEOMETRY::ELG_COUNT> m_PTHLSLPipelines; + std::array, E_LIGHT_GEOMETRY::ELG_COUNT> m_PTHLSLPersistentWGPipelines; + std::array, E_LIGHT_GEOMETRY::ELG_COUNT> m_PTHLSLPipelinesRWMC; + std::array, E_LIGHT_GEOMETRY::ELG_COUNT> m_PTHLSLPersistentWGPipelinesRWMC; + smart_refctd_ptr m_resolvePipeline; + smart_refctd_ptr m_presentPipeline; + uint64_t m_realFrameIx = 0; + std::array, MaxFramesInFlight> m_cmdBufs; + ISimpleManagedSurface::SAcquireResult m_currentImageAcquire = {}; + smart_refctd_ptr m_descriptorSet, m_presentDescriptorSet; + + core::smart_refctd_ptr m_guiDescriptorSetPool; + + // system resources + core::smart_refctd_ptr m_inputSystem; + InputSystem::ChannelReader mouse; + InputSystem::ChannelReader keyboard; + + // pathtracer resources + smart_refctd_ptr m_envMapView, m_scrambleView; + smart_refctd_ptr m_sequenceBuffer; + smart_refctd_ptr m_outImgView; + smart_refctd_ptr m_cascadeView; + + // sync + smart_refctd_ptr m_semaphore; + + // image upload resources + smart_refctd_ptr m_scratchSemaphore; + SIntendedSubmitInfo m_intendedSubmit; + + struct C_UI + { + nbl::core::smart_refctd_ptr manager; + + struct + { + core::smart_refctd_ptr gui, scene; + } samplers; + + core::smart_refctd_ptr descriptorSet; + } m_ui; + + Camera m_camera; + bool m_showUI; + + video::CDumbPresentationOracle m_oracle; + + uint16_t gcIndex = {}; // note: this is dirty however since I assume only single object in scene I can leave it now, when this example is upgraded to support multiple objects this needs to be changed + + struct GUIControllables + { + float fov = 60.f, zNear = 0.1f, zFar = 10000.f, moveSpeed = 1.f, rotateSpeed = 1.f; + float viewWidth = 10.f; + float camYAngle = 165.f / 180.f * 3.14159f; + float camXAngle = 32.f / 180.f * 3.14159f; + int PTPipeline = E_LIGHT_GEOMETRY::ELG_SPHERE; + int spp = 32; + int depth = 3; + rwmc::SResolveParameters::SCreateParams rwmcParams; + bool usePersistentWorkGroups = false; + bool useRWMC = false; + }; + GUIControllables guiControlled; + + hlsl::float32_t4x4 m_lightModelMatrix = { + 0.3f, 0.0f, 0.0f, 0.0f, + 0.0f, 0.3f, 0.0f, 0.0f, + 0.0f, 0.0f, 0.3f, 0.0f, + -1.0f, 1.5f, 0.0f, 1.0f, + }; + TransformRequestParams m_transformParams; + + bool m_firstFrame = true; + IGPUCommandBuffer::SClearColorValue clearColor = { .float32 = {0.f,0.f,0.f,1.f} }; +}; + +NBL_MAIN_FUNC(HLSLComputePathtracer) diff --git a/31_HLSLPathTracer/pipeline.groovy b/31_HLSLPathTracer/pipeline.groovy new file mode 100644 index 000000000..955e77cec --- /dev/null +++ b/31_HLSLPathTracer/pipeline.groovy @@ -0,0 +1,50 @@ +import org.DevshGraphicsProgramming.Agent +import org.DevshGraphicsProgramming.BuilderInfo +import org.DevshGraphicsProgramming.IBuilder + +class CHLSLPathTracerBuilder extends IBuilder +{ + public CHLSLPathTracerBuilder(Agent _agent, _info) + { + super(_agent, _info) + } + + @Override + public boolean prepare(Map axisMapping) + { + return true + } + + @Override + public boolean build(Map axisMapping) + { + IBuilder.CONFIGURATION config = axisMapping.get("CONFIGURATION") + IBuilder.BUILD_TYPE buildType = axisMapping.get("BUILD_TYPE") + + def nameOfBuildDirectory = getNameOfBuildDirectory(buildType) + def nameOfConfig = getNameOfConfig(config) + + agent.execute("cmake --build ${info.rootProjectPath}/${nameOfBuildDirectory}/${info.targetProjectPathRelativeToRoot} --target ${info.targetBaseName} --config ${nameOfConfig} -j12 -v") + + return true + } + + @Override + public boolean test(Map axisMapping) + { + return true + } + + @Override + public boolean install(Map axisMapping) + { + return true + } +} + +def create(Agent _agent, _info) +{ + return new CHLSLPathTracerBuilder(_agent, _info) +} + +return this diff --git a/37_HLSLSamplingTests/CBilinearTester.h b/37_HLSLSamplingTests/CBilinearTester.h new file mode 100644 index 000000000..aabcdadc1 --- /dev/null +++ b/37_HLSLSamplingTests/CBilinearTester.h @@ -0,0 +1,64 @@ +#ifndef _NBL_EXAMPLES_TESTS_37_SAMPLING_C_BILINEAR_TESTER_INCLUDED_ +#define _NBL_EXAMPLES_TESTS_37_SAMPLING_C_BILINEAR_TESTER_INCLUDED_ + +#include "nbl/examples/examples.hpp" +#include "app_resources/common/bilinear.hlsl" +#include "nbl/examples/Tester/ITester.h" + +class CBilinearTester final : public ITester +{ + using base_t = ITester; + +public: + CBilinearTester(const uint32_t testBatchCount, const uint32_t workgroupSize) : base_t(testBatchCount, workgroupSize) {} + +private: + BilinearInputValues generateInputTestValues() override + { + std::uniform_real_distribution coeffDist(0.1f, 5.0f); + std::uniform_real_distribution uDist(0.0f, 1.0f); + + BilinearInputValues input; + input.bilinearCoeffs = nbl::hlsl::float32_t4( + coeffDist(getRandomEngine()), coeffDist(getRandomEngine()), + coeffDist(getRandomEngine()), coeffDist(getRandomEngine())); + input.u = nbl::hlsl::float32_t2(uDist(getRandomEngine()), uDist(getRandomEngine())); + return input; + } + + BilinearTestResults determineExpectedResults(const BilinearInputValues& input) override + { + BilinearTestResults expected; + BilinearTestExecutor executor; + executor(input, expected); + return expected; + } + + bool verifyTestResults(const BilinearTestResults& expected, const BilinearTestResults& actual, const size_t iteration, const uint32_t seed, TestType testType) override + { + bool pass = true; + pass &= verifyTestValue("Bilinear::generate", expected.generated, actual.generated, iteration, seed, testType, 1e-2, 1e-3); + pass &= verifyTestValue("Bilinear::pdf", expected.backwardPdf, actual.backwardPdf, iteration, seed, testType, 1e-5, 5e-3); + pass &= verifyTestValue("Bilinear::cache.pdf", expected.cachedPdf, actual.cachedPdf, iteration, seed, testType, 1e-5, 5e-3); + pass &= verifyTestValue("Bilinear::forwardPdf", expected.forwardPdf, actual.forwardPdf, iteration, seed, testType, 1e-5, 5e-3); + pass &= verifyTestValue("Bilinear::forwardPdf == cache.pdf", actual.forwardPdf, actual.cachedPdf, iteration, seed, testType, 1e-5, 1e-5); + pass &= verifyTestValue("Bilinear::generateInverse", expected.inverted, actual.inverted, iteration, seed, testType, 1e-5, 5e-3); // tolerated + pass &= verifyTestValue("Bilinear::roundtripError (absolute)", 0.0f, actual.roundtripError, iteration, seed, testType, 5e-3, 5e-3); // tolerated + pass &= verifyTestValue("Bilinear::jacobianProduct", 1.0f, actual.jacobianProduct, iteration, seed, testType, 1e-4, 1e-3); + + if (!(actual.forwardPdf > 0.0f) || !std::isfinite(actual.forwardPdf)) + { + pass = false; + printTestFail("Bilinear::forwardPdf (positive & finite)", 1.0f, actual.forwardPdf, iteration, seed, testType, 0.0, 0.0); + } + if (!(actual.backwardPdf > 0.0f) || !std::isfinite(actual.backwardPdf)) + { + pass = false; + printTestFail("Bilinear::backwardPdf (positive & finite)", 1.0f, actual.backwardPdf, iteration, seed, testType, 0.0, 0.0); + } + + return pass; + } +}; + +#endif diff --git a/37_HLSLSamplingTests/CBoxMullerTransformTester.h b/37_HLSLSamplingTests/CBoxMullerTransformTester.h new file mode 100644 index 000000000..6134cff66 --- /dev/null +++ b/37_HLSLSamplingTests/CBoxMullerTransformTester.h @@ -0,0 +1,67 @@ +#ifndef _NBL_EXAMPLES_TESTS_37_SAMPLING_C_BOX_MULLER_TRANSFORM_TESTER_INCLUDED_ +#define _NBL_EXAMPLES_TESTS_37_SAMPLING_C_BOX_MULLER_TRANSFORM_TESTER_INCLUDED_ + +#include "nbl/examples/examples.hpp" +#include "app_resources/common/box_muller_transform.hlsl" +#include "nbl/examples/Tester/ITester.h" + +class CBoxMullerTransformTester final : public ITester +{ + using base_t = ITester; + +public: + CBoxMullerTransformTester(const uint32_t testBatchCount, const uint32_t workgroupSize) : base_t(testBatchCount, workgroupSize) {} + +private: + BoxMullerTransformInputValues generateInputTestValues() override + { + std::uniform_real_distribution stddevDist(0.1f, 5.0f); + // Avoid u.x near 0 to prevent log(0) = -inf + std::uniform_real_distribution uDist(1e-4f, 1.0f - 1e-4f); + + BoxMullerTransformInputValues input; + input.stddev = stddevDist(getRandomEngine()); + input.u = nbl::hlsl::float32_t2(uDist(getRandomEngine()), uDist(getRandomEngine())); + return input; + } + + BoxMullerTransformTestResults determineExpectedResults(const BoxMullerTransformInputValues& input) override + { + BoxMullerTransformTestResults expected; + BoxMullerTransformTestExecutor executor; + executor(input, expected); + return expected; + } + + bool verifyTestResults(const BoxMullerTransformTestResults& expected, const BoxMullerTransformTestResults& actual, + const size_t iteration, const uint32_t seed, TestType testType) override + { + bool pass = true; + pass &= verifyTestValue("BoxMullerTransform::generate", expected.generated, actual.generated, iteration, seed, testType, 1e-5, 2e-3); // tolerated + pass &= verifyTestValue("BoxMullerTransform::cache.pdf", expected.cachedPdf, actual.cachedPdf, iteration, seed, testType, 1e-5, 1e-3); + pass &= verifyTestValue("BoxMullerTransform::forwardPdf", expected.forwardPdf, actual.forwardPdf, iteration, seed, testType, 1e-5, 1e-3); + pass &= verifyTestValue("BoxMullerTransform::backwardPdf", expected.backwardPdf, actual.backwardPdf, iteration, seed, testType, 1e-5, 1e-3); + pass &= verifyTestValue("BoxMullerTransform::separateBackwardPdf", expected.separateBackwardPdf, actual.separateBackwardPdf, iteration, seed, testType, 1e-5, 1e-3); + + // Joint PDF == product of marginal PDFs (independent random variables) + pass &= verifyTestValue("BoxMullerTransform::jointPdf == pdf product", actual.backwardPdf, actual.separateBackwardPdf.x * actual.separateBackwardPdf.y, iteration, seed, testType, 1e-5, 1e-5); + + // forwardPdf must return the same value stored in cache.pdf by generate + pass &= verifyTestValue("BoxMullerTransform::forwardPdf == cache.pdf", actual.forwardPdf, actual.cachedPdf, iteration, seed, testType, 1e-5, 1e-5); + + if (!(actual.forwardPdf > 0.0f) || !std::isfinite(actual.forwardPdf)) + { + pass = false; + printTestFail("BoxMullerTransform::forwardPdf (positive & finite)", 1.0f, actual.forwardPdf, iteration, seed, testType, 0.0, 0.0); + } + if (!(actual.backwardPdf > 0.0f) || !std::isfinite(actual.backwardPdf)) + { + pass = false; + printTestFail("BoxMullerTransform::backwardPdf (positive & finite)", 1.0f, actual.backwardPdf, iteration, seed, testType, 0.0, 0.0); + } + + return pass; + } +}; + +#endif diff --git a/37_HLSLSamplingTests/CConcentricMappingTester.h b/37_HLSLSamplingTests/CConcentricMappingTester.h new file mode 100644 index 000000000..b77384c64 --- /dev/null +++ b/37_HLSLSamplingTests/CConcentricMappingTester.h @@ -0,0 +1,61 @@ +#ifndef _NBL_EXAMPLES_TESTS_37_SAMPLING_C_CONCENTRIC_MAPPING_TESTER_INCLUDED_ +#define _NBL_EXAMPLES_TESTS_37_SAMPLING_C_CONCENTRIC_MAPPING_TESTER_INCLUDED_ + +#include "nbl/examples/examples.hpp" +#include "app_resources/common/concentric_mapping.hlsl" +#include "nbl/examples/Tester/ITester.h" + +class CConcentricMappingTester final : public ITester +{ + using base_t = ITester; + +public: + CConcentricMappingTester(const uint32_t testBatchCount, const uint32_t workgroupSize) : base_t(testBatchCount, workgroupSize) {} + +private: + ConcentricMappingInputValues generateInputTestValues() override + { + std::uniform_real_distribution dist(0.0f, 1.0f); + + ConcentricMappingInputValues input; + input.u = nbl::hlsl::float32_t2(dist(getRandomEngine()), dist(getRandomEngine())); + return input; + } + + ConcentricMappingTestResults determineExpectedResults(const ConcentricMappingInputValues& input) override + { + ConcentricMappingTestResults expected; + ConcentricMappingTestExecutor executor; + executor(input, expected); + return expected; + } + + bool verifyTestResults(const ConcentricMappingTestResults& expected, const ConcentricMappingTestResults& actual, + const size_t iteration, const uint32_t seed, TestType testType) override + { + bool pass = true; + pass &= verifyTestValue("ConcentricMapping::concentricMapping", expected.mapped, actual.mapped, iteration, seed, testType, 1e-5, 1e-5); + pass &= verifyTestValue("ConcentricMapping::invertConcentricMapping", expected.inverted, actual.inverted, iteration, seed, testType, 1e-5, 1e-5); + pass &= verifyTestValue("ConcentringMapping::roundtripError (absolute)", 0.0f, actual.roundtripError, iteration, seed, testType, 1e-5, 1e-5); + pass &= verifyTestValue("ConcentricMapping::cache.pdf", expected.cachedPdf, actual.cachedPdf, iteration, seed, testType, 1e-5, 1e-5); + pass &= verifyTestValue("ConcentringMapping::forwardPdf", expected.forwardPdf, actual.forwardPdf, iteration, seed, testType, 1e-5, 1e-5); + pass &= verifyTestValue("ConcentricMapping::forwardPdf == cache.pdf", actual.forwardPdf, actual.cachedPdf, iteration, seed, testType, 1e-5, 1e-5); + pass &= verifyTestValue("ConcentringMapping::backwardPdf", expected.backwardPdf, actual.backwardPdf, iteration, seed, testType, 1e-5, 1e-5); + pass &= verifyTestValue("ConcentringMapping::jacobianProduct", 1.0f, actual.jacobianProduct, iteration, seed, testType, 1e-5, 1e-5); + + if (!(actual.forwardPdf > 0.0f) || !std::isfinite(actual.forwardPdf)) + { + pass = false; + printTestFail("ConcentricMapping::forwardPdf (positive & finite)", 1.0f, actual.forwardPdf, iteration, seed, testType, 0.0, 0.0); + } + if (!(actual.backwardPdf > 0.0f) || !std::isfinite(actual.backwardPdf)) + { + pass = false; + printTestFail("ConcentricMapping::backwardPdf (positive & finite)", 1.0f, actual.backwardPdf, iteration, seed, testType, 0.0, 0.0); + } + + return pass; + } +}; + +#endif diff --git a/37_HLSLSamplingTests/CLinearTester.h b/37_HLSLSamplingTests/CLinearTester.h new file mode 100644 index 000000000..02c473b3d --- /dev/null +++ b/37_HLSLSamplingTests/CLinearTester.h @@ -0,0 +1,62 @@ +#ifndef _NBL_EXAMPLES_TESTS_37_SAMPLING_C_LINEAR_TESTER_INCLUDED_ +#define _NBL_EXAMPLES_TESTS_37_SAMPLING_C_LINEAR_TESTER_INCLUDED_ + +#include "nbl/examples/examples.hpp" +#include "app_resources/common/linear.hlsl" +#include "nbl/examples/Tester/ITester.h" + +class CLinearTester final : public ITester +{ + using base_t = ITester; + +public: + CLinearTester(const uint32_t testBatchCount, const uint32_t workgroupSize) : base_t(testBatchCount, workgroupSize) {} + +private: + LinearInputValues generateInputTestValues() override + { + std::uniform_real_distribution coeffDist(0.1f, 5.0f); + std::uniform_real_distribution uDist(0.0f, 1.0f); + + LinearInputValues input; + input.coeffs = nbl::hlsl::float32_t2(coeffDist(getRandomEngine()), coeffDist(getRandomEngine())); + input.u = uDist(getRandomEngine()); + return input; + } + + LinearTestResults determineExpectedResults(const LinearInputValues& input) override + { + LinearTestResults expected; + LinearTestExecutor executor; + executor(input, expected); + return expected; + } + + bool verifyTestResults(const LinearTestResults& expected, const LinearTestResults& actual, const size_t iteration, const uint32_t seed, TestType testType) override + { + bool pass = true; + pass &= verifyTestValue("Linear::generate", expected.generated, actual.generated, iteration, seed, testType, 5e-2, 5e-5); + pass &= verifyTestValue("Linear::generateInverse", expected.generateInversed, actual.generateInversed, iteration, seed, testType, 5e-2, 5e-5); + pass &= verifyTestValue("Linear::cache.pdf", expected.cachedPdf, actual.cachedPdf, iteration, seed, testType, 5e-2, 1e-5); + pass &= verifyTestValue("Linear::forwardPdf", expected.forwardPdf, actual.forwardPdf, iteration, seed, testType, 5e-2, 1e-5); + pass &= verifyTestValue("Linear::forwardPdf == cache.pdf", actual.forwardPdf, actual.cachedPdf, iteration, seed, testType, 1e-5, 1e-5); + pass &= verifyTestValue("Linear::backwardPdf", expected.backwardPdf, actual.backwardPdf, iteration, seed, testType, 5e-2, 1e-5); + pass &= verifyTestValue("Linear::roundtripError (absolute)", 0.0f, actual.roundtripError, iteration, seed, testType, 1e-2, 5e-3); + pass &= verifyTestValue("Linear::jacobianProduct", 1.0f, actual.jacobianProduct, iteration, seed, testType, 1e-4, 1e-4); + + if (!(actual.forwardPdf > 0.0f) || !std::isfinite(actual.forwardPdf)) + { + pass = false; + printTestFail("Linear::forwardPdf (positive & finite)", 1.0f, actual.forwardPdf, iteration, seed, testType, 0.0, 0.0); + } + if (!(actual.backwardPdf > 0.0f) || !std::isfinite(actual.backwardPdf)) + { + pass = false; + printTestFail("Linear::backwardPdf (positive & finite)", 1.0f, actual.backwardPdf, iteration, seed, testType, 0.0, 0.0); + } + + return pass; + } +}; + +#endif diff --git a/37_HLSLSamplingTests/CMakeLists.txt b/37_HLSLSamplingTests/CMakeLists.txt new file mode 100644 index 000000000..cbe5a30cf --- /dev/null +++ b/37_HLSLSamplingTests/CMakeLists.txt @@ -0,0 +1,201 @@ +set(NBL_INCLUDE_SEARCH_DIRECTORIES + "${CMAKE_CURRENT_SOURCE_DIR}/include" +) + +include(common RESULT_VARIABLE RES) +if(NOT RES) + message(FATAL_ERROR "common.cmake not found. Should be in {repo_root}/cmake directory") +endif() + +nbl_create_executable_project("" "" "${NBL_INCLUDE_SEARCH_DIRECTORIES}" "" "${NBL_EXECUTABLE_PROJECT_CREATION_PCH_TARGET}") + +set(DEPENDS + app_resources/test_compile.comp.hlsl + app_resources/linear_test.comp.hlsl + app_resources/uniform_hemisphere_test.comp.hlsl + app_resources/uniform_sphere_test.comp.hlsl + app_resources/projected_hemisphere_test.comp.hlsl + app_resources/projected_sphere_test.comp.hlsl + app_resources/spherical_triangle.comp.hlsl + app_resources/concentric_mapping_test.comp.hlsl + app_resources/bilinear_test.comp.hlsl + app_resources/box_muller_transform_test.comp.hlsl + app_resources/projected_spherical_triangle_test.comp.hlsl + app_resources/spherical_rectangle_test.comp.hlsl + app_resources/common/linear.hlsl + app_resources/common/uniform_hemisphere.hlsl + app_resources/common/uniform_sphere.hlsl + app_resources/common/projected_hemisphere.hlsl + app_resources/common/projected_sphere.hlsl + app_resources/common/spherical_triangle.hlsl + app_resources/common/bilinear.hlsl + app_resources/common/box_muller_transform.hlsl + app_resources/common/projected_spherical_triangle.hlsl + app_resources/common/spherical_rectangle.hlsl + app_resources/common/concentric_mapping.hlsl +) +list(TRANSFORM DEPENDS PREPEND "${CMAKE_CURRENT_SOURCE_DIR}/") + +target_sources(${EXECUTABLE_NAME} PRIVATE + "${CMAKE_CURRENT_SOURCE_DIR}/CLinearTester.h" + "${CMAKE_CURRENT_SOURCE_DIR}/CUniformHemisphereTester.h" + "${CMAKE_CURRENT_SOURCE_DIR}/CUniformSphereTester.h" + "${CMAKE_CURRENT_SOURCE_DIR}/CProjectedHemisphereTester.h" + "${CMAKE_CURRENT_SOURCE_DIR}/CProjectedSphereTester.h" + "${CMAKE_CURRENT_SOURCE_DIR}/CSphericalTriangleTester.h" + "${CMAKE_CURRENT_SOURCE_DIR}/CConcentricMappingTester.h" + "${CMAKE_CURRENT_SOURCE_DIR}/CBilinearTester.h" + "${CMAKE_CURRENT_SOURCE_DIR}/CBoxMullerTransformTester.h" + "${CMAKE_CURRENT_SOURCE_DIR}/CProjectedSphericalTriangleTester.h" + "${CMAKE_CURRENT_SOURCE_DIR}/CSphericalRectangleTester.h" + "${CMAKE_CURRENT_SOURCE_DIR}/CSamplerBenchmark.h" + ${DEPENDS} +) + +if(NBL_EMBED_BUILTIN_RESOURCES) + set(_BR_TARGET_ ${EXECUTABLE_NAME}_builtinResourceData) + set(RESOURCE_DIR "app_resources") + + get_filename_component(_SEARCH_DIRECTORIES_ "${CMAKE_CURRENT_SOURCE_DIR}" ABSOLUTE) + get_filename_component(_OUTPUT_DIRECTORY_SOURCE_ "${CMAKE_CURRENT_BINARY_DIR}/src" ABSOLUTE) + get_filename_component(_OUTPUT_DIRECTORY_HEADER_ "${CMAKE_CURRENT_BINARY_DIR}/include" ABSOLUTE) + + file(GLOB_RECURSE BUILTIN_RESOURCE_FILES RELATIVE "${CMAKE_CURRENT_SOURCE_DIR}/${RESOURCE_DIR}" CONFIGURE_DEPENDS "${CMAKE_CURRENT_SOURCE_DIR}/${RESOURCE_DIR}/*") + foreach(RES_FILE ${BUILTIN_RESOURCE_FILES}) + LIST_BUILTIN_RESOURCE(RESOURCES_TO_EMBED "${RES_FILE}") + endforeach() + + ADD_CUSTOM_BUILTIN_RESOURCES(${_BR_TARGET_} RESOURCES_TO_EMBED "${_SEARCH_DIRECTORIES_}" "${RESOURCE_DIR}" "nbl::this_example::builtin" "${_OUTPUT_DIRECTORY_HEADER_}" "${_OUTPUT_DIRECTORY_SOURCE_}") + + LINK_BUILTIN_RESOURCES_TO_TARGET(${EXECUTABLE_NAME} ${_BR_TARGET_}) +endif() + +set(OUTPUT_DIRECTORY "${CMAKE_CURRENT_BINARY_DIR}/auto-gen") + +set(JSON [=[ +[ + { + "INPUT": "app_resources/test_compile.comp.hlsl", + "KEY": "shader" + }, + { + "INPUT": "app_resources/linear_test.comp.hlsl", + "KEY": "linear_test" + }, + { + "INPUT": "app_resources/linear_test.comp.hlsl", + "KEY": "linear_bench", + "COMPILE_OPTIONS": ["-DBENCH_ITERS=4096"] /* ALSO SET IN main.cpp */ + }, + { + "INPUT": "app_resources/uniform_hemisphere_test.comp.hlsl", + "KEY": "uniform_hemisphere_test" + }, + { + "INPUT": "app_resources/uniform_hemisphere_test.comp.hlsl", + "KEY": "uniform_hemisphere_bench", + "COMPILE_OPTIONS": ["-DBENCH_ITERS=4096"] + }, + { + "INPUT": "app_resources/uniform_sphere_test.comp.hlsl", + "KEY": "uniform_sphere_test" + }, + { + "INPUT": "app_resources/uniform_sphere_test.comp.hlsl", + "KEY": "uniform_sphere_bench", + "COMPILE_OPTIONS": ["-DBENCH_ITERS=4096"] + }, + { + "INPUT": "app_resources/projected_hemisphere_test.comp.hlsl", + "KEY": "projected_hemisphere_test" + }, + { + "INPUT": "app_resources/projected_hemisphere_test.comp.hlsl", + "KEY": "projected_hemisphere_bench", + "COMPILE_OPTIONS": ["-DBENCH_ITERS=4096"] + }, + { + "INPUT": "app_resources/projected_sphere_test.comp.hlsl", + "KEY": "projected_sphere_test" + }, + { + "INPUT": "app_resources/projected_sphere_test.comp.hlsl", + "KEY": "projected_sphere_bench", + "COMPILE_OPTIONS": ["-DBENCH_ITERS=4096"] + }, + { + "INPUT": "app_resources/spherical_triangle.comp.hlsl", + "KEY": "spherical_triangle" + }, + { + "INPUT": "app_resources/spherical_triangle.comp.hlsl", + "KEY": "spherical_triangle_bench", + "COMPILE_OPTIONS": ["-DBENCH_ITERS=4096"] + }, + { + "INPUT": "app_resources/concentric_mapping_test.comp.hlsl", + "KEY": "concentric_mapping_test" + }, + { + "INPUT": "app_resources/concentric_mapping_test.comp.hlsl", + "KEY": "concentric_mapping_bench", + "COMPILE_OPTIONS": ["-DBENCH_ITERS=4096"] + }, + { + "INPUT": "app_resources/bilinear_test.comp.hlsl", + "KEY": "bilinear_test" + }, + { + "INPUT": "app_resources/bilinear_test.comp.hlsl", + "KEY": "bilinear_bench", + "COMPILE_OPTIONS": ["-DBENCH_ITERS=4096"] + }, + { + "INPUT": "app_resources/box_muller_transform_test.comp.hlsl", + "KEY": "box_muller_transform_test" + }, + { + "INPUT": "app_resources/box_muller_transform_test.comp.hlsl", + "KEY": "box_muller_transform_bench", + "COMPILE_OPTIONS": ["-DBENCH_ITERS=4096"] + }, + { + "INPUT": "app_resources/projected_spherical_triangle_test.comp.hlsl", + "KEY": "projected_spherical_triangle_test" + }, + { + "INPUT": "app_resources/projected_spherical_triangle_test.comp.hlsl", + "KEY": "projected_spherical_triangle_bench", + "COMPILE_OPTIONS": ["-DBENCH_ITERS=4096"] + }, + { + "INPUT": "app_resources/spherical_rectangle_test.comp.hlsl", + "KEY": "spherical_rectangle_test" + }, + { + "INPUT": "app_resources/spherical_rectangle_test.comp.hlsl", + "KEY": "spherical_rectangle_bench", + "COMPILE_OPTIONS": ["-DBENCH_ITERS=4096"] + } +] +]=]) + +NBL_CREATE_NSC_COMPILE_RULES( + TARGET ${EXECUTABLE_NAME}SPIRV + LINK_TO ${EXECUTABLE_NAME} + BINARY_DIR ${OUTPUT_DIRECTORY} + MOUNT_POINT_DEFINE NBL_THIS_EXAMPLE_BUILD_MOUNT_POINT + COMMON_OPTIONS -I ${CMAKE_CURRENT_SOURCE_DIR} -T cs_6_8 + OUTPUT_VAR KEYS + INCLUDE nbl/this_example/builtin/build/spirv/keys.hpp + NAMESPACE nbl::this_example::builtin::build + INPUTS ${JSON} +) + +NBL_CREATE_RESOURCE_ARCHIVE( + NAMESPACE nbl::this_example::builtin::build + TARGET ${EXECUTABLE_NAME}_builtinsBuild + LINK_TO ${EXECUTABLE_NAME} + BIND ${OUTPUT_DIRECTORY} + BUILTINS ${KEYS} +) diff --git a/37_HLSLSamplingTests/CProjectedHemisphereTester.h b/37_HLSLSamplingTests/CProjectedHemisphereTester.h new file mode 100644 index 000000000..54f723f7c --- /dev/null +++ b/37_HLSLSamplingTests/CProjectedHemisphereTester.h @@ -0,0 +1,61 @@ +#ifndef _NBL_EXAMPLES_TESTS_37_SAMPLING_C_PROJECTED_HEMISPHERE_TESTER_INCLUDED_ +#define _NBL_EXAMPLES_TESTS_37_SAMPLING_C_PROJECTED_HEMISPHERE_TESTER_INCLUDED_ + +#include "nbl/examples/examples.hpp" +#include "app_resources/common/projected_hemisphere.hlsl" +#include "nbl/examples/Tester/ITester.h" + +class CProjectedHemisphereTester final : public ITester +{ + using base_t = ITester; + +public: + CProjectedHemisphereTester(const uint32_t testBatchCount, const uint32_t workgroupSize) : base_t(testBatchCount, workgroupSize) {} + +private: + ProjectedHemisphereInputValues generateInputTestValues() override + { + std::uniform_real_distribution dist(0.0f, 1.0f); + + ProjectedHemisphereInputValues input; + input.u = nbl::hlsl::float32_t2(dist(getRandomEngine()), dist(getRandomEngine())); + return input; + } + + ProjectedHemisphereTestResults determineExpectedResults(const ProjectedHemisphereInputValues& input) override + { + ProjectedHemisphereTestResults expected; + ProjectedHemisphereTestExecutor executor; + executor(input, expected); + return expected; + } + + bool verifyTestResults(const ProjectedHemisphereTestResults& expected, const ProjectedHemisphereTestResults& actual, + const size_t iteration, const uint32_t seed, TestType testType) override + { + bool pass = true; + pass &= verifyTestValue("ProjectedHemisphere::generate", expected.generated, actual.generated, iteration, seed, testType, 5e-5, 5e-5); + pass &= verifyTestValue("ProjectedHemisphere::cache.pdf", expected.cachedPdf, actual.cachedPdf, iteration, seed, testType, 5e-5, 5e-5); + pass &= verifyTestValue("ProjectedHemisphere::forwardPdf", expected.forwardPdf, actual.forwardPdf, iteration, seed, testType, 5e-5, 5e-5); + pass &= verifyTestValue("ProjectedHemisphere::forwardPdf == cache.pdf", actual.forwardPdf, actual.cachedPdf, iteration, seed, testType, 1e-5, 1e-5); + pass &= verifyTestValue("ProjectedHemisphere::generateInverse", expected.inverted, actual.inverted, iteration, seed, testType, 5e-5, 5e-5); + pass &= verifyTestValue("ProjectedHemisphere::backwardPdf", expected.backwardPdf, actual.backwardPdf, iteration, seed, testType, 1e-1, 1e-2); + pass &= verifyTestValue("ProjectedHemisphere::roundtripError (absolute)", 0.0f, actual.roundtripError, iteration, seed, testType, 5e-4, 1e-4); + pass &= verifyTestValue("ProjectedHemisphere::jacobianProduct", 1.0f, actual.jacobianProduct, iteration, seed, testType, 1e-4, 1e-4); + + if (!(actual.forwardPdf > 0.0f) || !std::isfinite(actual.forwardPdf)) + { + pass = false; + printTestFail("ProjectedHemisphere::forwardPdf (positive & finite)", 1.0f, actual.forwardPdf, iteration, seed, testType, 0.0, 0.0); + } + if (!(actual.backwardPdf > 0.0f) || !std::isfinite(actual.backwardPdf)) + { + pass = false; + printTestFail("ProjectedHemisphere::backwardPdf (positive & finite)", 1.0f, actual.backwardPdf, iteration, seed, testType, 0.0, 0.0); + } + + return pass; + } +}; + +#endif diff --git a/37_HLSLSamplingTests/CProjectedSphereTester.h b/37_HLSLSamplingTests/CProjectedSphereTester.h new file mode 100644 index 000000000..5c1f6caa4 --- /dev/null +++ b/37_HLSLSamplingTests/CProjectedSphereTester.h @@ -0,0 +1,63 @@ +#ifndef _NBL_EXAMPLES_TESTS_37_SAMPLING_C_PROJECTED_SPHERE_TESTER_INCLUDED_ +#define _NBL_EXAMPLES_TESTS_37_SAMPLING_C_PROJECTED_SPHERE_TESTER_INCLUDED_ + +#include "nbl/examples/examples.hpp" +#include "app_resources/common/projected_sphere.hlsl" +#include "nbl/examples/Tester/ITester.h" + +class CProjectedSphereTester final : public ITester +{ + using base_t = ITester; + +public: + CProjectedSphereTester(const uint32_t testBatchCount, const uint32_t workgroupSize) : base_t(testBatchCount, workgroupSize) {} + +private: + ProjectedSphereInputValues generateInputTestValues() override + { + std::uniform_real_distribution dist(0.0f, 1.0f); + + ProjectedSphereInputValues input; + input.u = nbl::hlsl::float32_t3(dist(getRandomEngine()), dist(getRandomEngine()), dist(getRandomEngine())); + return input; + } + + ProjectedSphereTestResults determineExpectedResults(const ProjectedSphereInputValues& input) override + { + ProjectedSphereTestResults expected; + ProjectedSphereTestExecutor executor; + executor(input, expected); + return expected; + } + + bool verifyTestResults(const ProjectedSphereTestResults& expected, const ProjectedSphereTestResults& actual, + const size_t iteration, const uint32_t seed, TestType testType) override + { + bool pass = true; + pass &= verifyTestValue("ProjectedSphere::generate", expected.generated, actual.generated, iteration, seed, testType, 5e-5, 5e-5); + pass &= verifyTestValue("ProjectedSphere::cache.pdf", expected.cachedPdf, actual.cachedPdf, iteration, seed, testType, 1e-5, 1e-5); + pass &= verifyTestValue("ProjectedSphere::forwardPdf", expected.forwardPdf, actual.forwardPdf, iteration, seed, testType, 1e-5, 1e-5); + pass &= verifyTestValue("ProjectedSphere::forwardPdf == cache.pdf", actual.forwardPdf, actual.cachedPdf, iteration, seed, testType, 1e-5, 1e-5); + pass &= verifyTestValue("ProjectedSphere::modifiedU", expected.modifiedU, actual.modifiedU, iteration, seed, testType, 1e-5, 1e-5); + pass &= verifyTestValue("ProjectedSphere::generateInverse", expected.inverted, actual.inverted, iteration, seed, testType, 5e-5, 5e-5); + pass &= verifyTestValue("ProjectedSphere::backwardPdf", expected.backwardPdf, actual.backwardPdf, iteration, seed, testType, 1e-5, 1e-5); + // roundtripError covers only xy (z is intentionally lossy in generateInverse) + pass &= verifyTestValue("ProjectedSphere::roundtripError (absolute)", 0.0f, actual.roundtripError, iteration, seed, testType, 0.0, 1e-4); + pass &= verifyTestValue("ProjectedSphere::jacobianProduct", 1.0f, actual.jacobianProduct, iteration, seed, testType, 1e-4, 1e-4); + + if (!(actual.forwardPdf > 0.0f) || !std::isfinite(actual.forwardPdf)) + { + pass = false; + printTestFail("ProjectedSphere::forwardPdf (positive & finite)", 1.0f, actual.forwardPdf, iteration, seed, testType, 0.0, 0.0); + } + if (!(actual.backwardPdf > 0.0f) || !std::isfinite(actual.backwardPdf)) + { + pass = false; + printTestFail("ProjectedSphere::backwardPdf (positive & finite)", 1.0f, actual.backwardPdf, iteration, seed, testType, 0.0, 0.0); + } + + return pass; + } +}; + +#endif diff --git a/37_HLSLSamplingTests/CProjectedSphericalTriangleTester.h b/37_HLSLSamplingTests/CProjectedSphericalTriangleTester.h new file mode 100644 index 000000000..c484d8010 --- /dev/null +++ b/37_HLSLSamplingTests/CProjectedSphericalTriangleTester.h @@ -0,0 +1,102 @@ +#ifndef _NBL_EXAMPLES_TESTS_37_SAMPLING_C_PROJECTED_SPHERICAL_TRIANGLE_TESTER_INCLUDED_ +#define _NBL_EXAMPLES_TESTS_37_SAMPLING_C_PROJECTED_SPHERICAL_TRIANGLE_TESTER_INCLUDED_ + +#include "nbl/examples/examples.hpp" +#include "app_resources/common/projected_spherical_triangle.hlsl" +#include "nbl/builtin/hlsl/sampling/uniform_spheres.hlsl" +#include "nbl/examples/Tester/ITester.h" + +class CProjectedSphericalTriangleTester final : public ITester +{ + using base_t = ITester; + +public: + CProjectedSphericalTriangleTester(const uint32_t testBatchCount, const uint32_t workgroupSize) : base_t(testBatchCount, workgroupSize) {} + +private: + nbl::hlsl::float32_t3 generateRandomUnitVector() + { + std::uniform_real_distribution dist(0.0f, 1.0f); + nbl::hlsl::float32_t2 u(dist(getRandomEngine()), dist(getRandomEngine())); + nbl::hlsl::sampling::UniformSphere::cache_type cache; + return nbl::hlsl::sampling::UniformSphere::generate(u, cache); + } + + static bool isValidSphericalTriangle(nbl::hlsl::float32_t3 v0, nbl::hlsl::float32_t3 v1, nbl::hlsl::float32_t3 v2) + { + using namespace nbl::hlsl; + constexpr float sinSqThreshold = 0.09f; // sin(theta) > 0.3 + const float d01 = dot(v0, v1); + const float d12 = dot(v1, v2); + const float d20 = dot(v2, v0); + if ((1.f - d01 * d01) < sinSqThreshold) return false; + if ((1.f - d12 * d12) < sinSqThreshold) return false; + if ((1.f - d20 * d20) < sinSqThreshold) return false; + constexpr float tripleThreshold = 0.1f; + return abs(dot(v0, cross(v1, v2))) > tripleThreshold; + } + + ProjectedSphericalTriangleInputValues generateInputTestValues() override + { + std::uniform_real_distribution uDist(0.0f, 1.0f); + + ProjectedSphericalTriangleInputValues input; + + do + { + input.vertex0 = generateRandomUnitVector(); + input.vertex1 = generateRandomUnitVector(); + input.vertex2 = generateRandomUnitVector(); + } while (!isValidSphericalTriangle(input.vertex0, input.vertex1, input.vertex2)); + + // Ensure the receiver normal has positive projection onto at least one vertex, + // otherwise the projected solid angle is zero and the bilinear patch is degenerate (NaN PDFs). + do + { + input.receiverNormal = generateRandomUnitVector(); + } while (nbl::hlsl::dot(input.receiverNormal, input.vertex0) <= 0.0f && + nbl::hlsl::dot(input.receiverNormal, input.vertex1) <= 0.0f && + nbl::hlsl::dot(input.receiverNormal, input.vertex2) <= 0.0f); + input.receiverWasBSDF = 0u; + input.u = nbl::hlsl::float32_t2(uDist(getRandomEngine()), uDist(getRandomEngine())); + return input; + } + + ProjectedSphericalTriangleTestResults determineExpectedResults(const ProjectedSphericalTriangleInputValues& input) override + { + ProjectedSphericalTriangleTestResults expected; + ProjectedSphericalTriangleTestExecutor executor; + executor(input, expected); + return expected; + } + + bool verifyTestResults(const ProjectedSphericalTriangleTestResults& expected, const ProjectedSphericalTriangleTestResults& actual, + const size_t iteration, const uint32_t seed, TestType testType) override + { + bool pass = true; + // SphericalTriangle::generate has a known precision issue (see TODO in spherical_triangle.hlsl) + // due to catastrophic cancellation in the cosAngleAlongAC formula; CPU/GPU rounding diverges + // by up to ~0.002 in direction components for certain triangle geometries + pass &= verifyTestValue("ProjectedSphericalTriangle::generate", expected.generated, actual.generated, iteration, seed, testType, 1e-4, 3e-3); + pass &= verifyTestValue("ProjectedSphericalTriangle::cache.pdf", expected.cachedPdf, actual.cachedPdf, iteration, seed, testType, 1e-4, 1e-3); + pass &= verifyTestValue("ProjectedSphericalTriangle::forwardPdf", expected.forwardPdf, actual.forwardPdf, iteration, seed, testType, 1e-4, 1e-3); + pass &= verifyTestValue("ProjectedSphericalTriangle::forwardPdf == cache.pdf", actual.forwardPdf, actual.cachedPdf, iteration, seed, testType, 1e-5, 1e-5); + pass &= verifyTestValue("ProjectedSphericalTriangle::backwardPdf", expected.backwardPdf, actual.backwardPdf, iteration, seed, testType, 1e-4, 1e-3); + + // PDF positivity and finiteness + if (!(actual.forwardPdf > 0.0f) || !std::isfinite(actual.forwardPdf)) + { + pass = false; + printTestFail("ProjectedSphericalTriangle::forwardPdf (positive & finite)", 1.0f, actual.forwardPdf, iteration, seed, testType, 0.0, 0.0); + } + if (!(actual.backwardPdf > 0.0f) || !std::isfinite(actual.backwardPdf)) + { + pass = false; + printTestFail("ProjectedSphericalTriangle::backwardPdf (positive & finite)", 1.0f, actual.backwardPdf, iteration, seed, testType, 0.0, 0.0); + } + + return pass; + } +}; + +#endif diff --git a/37_HLSLSamplingTests/CSamplerBenchmark.h b/37_HLSLSamplingTests/CSamplerBenchmark.h new file mode 100644 index 000000000..42d5db680 --- /dev/null +++ b/37_HLSLSamplingTests/CSamplerBenchmark.h @@ -0,0 +1,251 @@ +// Copyright (C) 2018-2024 - DevSH Graphics Programming Sp. z O.O. +// This file is part of the "Nabla Engine". +// For conditions of distribution and use, see copyright notice in nabla.h + +#ifndef _NBL_EXAMPLES_TESTS_37_SAMPLING_SAMPLER_BENCHMARK_INCLUDED_ +#define _NBL_EXAMPLES_TESTS_37_SAMPLING_SAMPLER_BENCHMARK_INCLUDED_ + +#include +#include "nbl/examples/examples.hpp" + +using namespace nbl; + +// Measures GPU execution time of a sampler shader using GPU timestamp queries. +class CSamplerBenchmark +{ +public: + struct SetupData + { + core::smart_refctd_ptr device; + core::smart_refctd_ptr api; + core::smart_refctd_ptr assetMgr; + core::smart_refctd_ptr logger; + video::IPhysicalDevice* physicalDevice; + uint32_t computeFamilyIndex; + std::string shaderKey; + uint32_t dispatchGroupCount; // workgroup count = testBatchCount + uint32_t samplesPerDispatch; // dispatchGroupCount * WorkgroupSize (BenchIters is internal to the shader) + size_t inputBufferBytes; // sizeof(InputType) * samplesPerDispatch + size_t outputBufferBytes; // sizeof(ResultType) * samplesPerDispatch + }; + + void setup(const SetupData& data) + { + m_device = data.device; + m_logger = data.logger; + m_dispatchGroupCount = data.dispatchGroupCount; + + // Command pool + 3 command buffers: benchmark (multi-submit), before/after timestamp + m_cmdpool = m_device->createCommandPool(data.computeFamilyIndex, video::IGPUCommandPool::CREATE_FLAGS::RESET_COMMAND_BUFFER_BIT); + if (!m_cmdpool->createCommandBuffers(video::IGPUCommandPool::BUFFER_LEVEL::PRIMARY, 1u, &m_benchmarkCmdbuf)) + m_logger->log("CSamplerBenchmark: failed to create benchmark cmdbuf", system::ILogger::ELL_ERROR); + if (!m_cmdpool->createCommandBuffers(video::IGPUCommandPool::BUFFER_LEVEL::PRIMARY, 1u, &m_timestampBeforeCmdbuf)) + m_logger->log("CSamplerBenchmark: failed to create timestamp-before cmdbuf", system::ILogger::ELL_ERROR); + if (!m_cmdpool->createCommandBuffers(video::IGPUCommandPool::BUFFER_LEVEL::PRIMARY, 1u, &m_timestampAfterCmdbuf)) + m_logger->log("CSamplerBenchmark: failed to create timestamp-after cmdbuf", system::ILogger::ELL_ERROR); + + // Timestamp query pool (2 queries: before and after) + { + video::IQueryPool::SCreationParams qparams = {}; + qparams.queryType = video::IQueryPool::TYPE::TIMESTAMP; + qparams.queryCount = 2; + qparams.pipelineStatisticsFlags = video::IQueryPool::PIPELINE_STATISTICS_FLAGS::NONE; + m_queryPool = m_device->createQueryPool(qparams); + if (!m_queryPool) + m_logger->log("CSamplerBenchmark: failed to create query pool", system::ILogger::ELL_ERROR); + } + + // Load and compile shader + core::smart_refctd_ptr shader; + { + asset::IAssetLoader::SAssetLoadParams lp = {}; + lp.logger = m_logger.get(); + lp.workingDirectory = "app_resources"; + auto bundle = data.assetMgr->getAsset(data.shaderKey.data(), lp); + const auto assets = bundle.getContents(); + if (assets.empty()) + { + m_logger->log("CSamplerBenchmark: failed to load shader", system::ILogger::ELL_ERROR); + return; + } + auto source = asset::IAsset::castDown(assets[0]); + shader = m_device->compileShader({ source.get() }); + } + + // Descriptor set layout: binding 0 = input SSBO, binding 1 = output SSBO + video::IGPUDescriptorSetLayout::SBinding bindings[2] = { + { .binding = 0, .type = asset::IDescriptor::E_TYPE::ET_STORAGE_BUFFER, + .createFlags = video::IGPUDescriptorSetLayout::SBinding::E_CREATE_FLAGS::ECF_NONE, + .stageFlags = ShaderStage::ESS_COMPUTE, .count = 1 }, + { .binding = 1, .type = asset::IDescriptor::E_TYPE::ET_STORAGE_BUFFER, + .createFlags = video::IGPUDescriptorSetLayout::SBinding::E_CREATE_FLAGS::ECF_NONE, + .stageFlags = ShaderStage::ESS_COMPUTE, .count = 1 } + }; + auto dsLayout = m_device->createDescriptorSetLayout(bindings); + + m_pplnLayout = m_device->createPipelineLayout({}, core::smart_refctd_ptr(dsLayout)); + + { + video::IGPUComputePipeline::SCreationParams pparams = {}; + pparams.layout = m_pplnLayout.get(); + pparams.shader.entryPoint = "main"; + pparams.shader.shader = shader.get(); + if (!m_device->createComputePipelines(nullptr, { &pparams, 1 }, &m_pipeline)) + m_logger->log("CSamplerBenchmark: failed to create compute pipeline", system::ILogger::ELL_ERROR); + } + + // Allocate input buffer (host-visible, zero-filled, correctness irrelevant for benchmarking) + core::smart_refctd_ptr inputBuf; + { + video::IGPUBuffer::SCreationParams bparams = {}; + bparams.size = data.inputBufferBytes; + bparams.usage = video::IGPUBuffer::EUF_STORAGE_BUFFER_BIT; + inputBuf = m_device->createBuffer(std::move(bparams)); + video::IDeviceMemoryBacked::SDeviceMemoryRequirements reqs = inputBuf->getMemoryReqs(); + reqs.memoryTypeBits &= data.physicalDevice->getHostVisibleMemoryTypeBits(); + m_inputAlloc = m_device->allocate(reqs, inputBuf.get(), video::IDeviceMemoryAllocation::EMAF_NONE); + if (!m_inputAlloc.isValid()) + m_logger->log("CSamplerBenchmark: failed to allocate input buffer memory", system::ILogger::ELL_ERROR); + if (m_inputAlloc.memory->map({ 0ull, m_inputAlloc.memory->getAllocationSize() }, video::IDeviceMemoryAllocation::EMCAF_READ)) + { + std::memset(m_inputAlloc.memory->getMappedPointer(), 0, m_inputAlloc.memory->getAllocationSize()); + m_inputAlloc.memory->unmap(); + } + } + + // Allocate output buffer (host-visible, GPU writes garbage, never read back) + core::smart_refctd_ptr outputBuf; + { + video::IGPUBuffer::SCreationParams bparams = {}; + bparams.size = data.outputBufferBytes; + bparams.usage = video::IGPUBuffer::EUF_STORAGE_BUFFER_BIT; + outputBuf = m_device->createBuffer(std::move(bparams)); + video::IDeviceMemoryBacked::SDeviceMemoryRequirements reqs = outputBuf->getMemoryReqs(); + reqs.memoryTypeBits &= data.physicalDevice->getHostVisibleMemoryTypeBits(); + m_outputAlloc = m_device->allocate(reqs, outputBuf.get(), video::IDeviceMemoryAllocation::EMAF_NONE); + if (!m_outputAlloc.isValid()) + m_logger->log("CSamplerBenchmark: failed to allocate output buffer memory", system::ILogger::ELL_ERROR); + } + + // Descriptor set: bind both buffers + auto pool = m_device->createDescriptorPoolForDSLayouts(video::IDescriptorPool::ECF_NONE, { &dsLayout.get(), 1 }); + m_ds = pool->createDescriptorSet(core::smart_refctd_ptr(dsLayout)); + { + video::IGPUDescriptorSet::SDescriptorInfo info[2]; + info[0].desc = core::smart_refctd_ptr(inputBuf); + info[0].info.buffer = { .offset = 0, .size = data.inputBufferBytes }; + info[1].desc = core::smart_refctd_ptr(outputBuf); + info[1].info.buffer = { .offset = 0, .size = data.outputBufferBytes }; + video::IGPUDescriptorSet::SWriteDescriptorSet writes[2] = { + { .dstSet = m_ds.get(), .binding = 0, .arrayElement = 0, .count = 1, .info = &info[0] }, + { .dstSet = m_ds.get(), .binding = 1, .arrayElement = 0, .count = 1, .info = &info[1] } + }; + m_device->updateDescriptorSets(writes, {}); + } + + m_queue = m_device->getQueue(data.computeFamilyIndex, 0); + m_samplesPerDispatch = data.samplesPerDispatch; + m_physicalDevice = data.physicalDevice; + } + + // Runs warmupIterations submits (unclocked), then benchmarkIterations submits under GPU timestamps. + void run(const std::string& samplerName, uint32_t warmupIterations = 1000, uint32_t benchmarkIterations = 20000) + { + m_device->waitIdle(); + recordBenchmarkCmdBuf(); + recordTimestampCmdBufs(); + + auto semaphore = m_device->createSemaphore(0u); + uint64_t semCounter = 0u; + + const video::IQueue::SSubmitInfo::SCommandBufferInfo benchCmds[] = { {.cmdbuf = m_benchmarkCmdbuf.get()} }; + const video::IQueue::SSubmitInfo::SCommandBufferInfo beforeCmds[] = { {.cmdbuf = m_timestampBeforeCmdbuf.get()} }; + const video::IQueue::SSubmitInfo::SCommandBufferInfo afterCmds[] = { {.cmdbuf = m_timestampAfterCmdbuf.get()} }; + + // Chains submissions via a timeline semaphore so they execute strictly in order + auto submitSerial = [&](const video::IQueue::SSubmitInfo::SCommandBufferInfo* cmds, uint32_t count) + { + const video::IQueue::SSubmitInfo::SSemaphoreInfo waitSem[] = { + {.semaphore = semaphore.get(), .value = semCounter, .stageMask = asset::PIPELINE_STAGE_FLAGS::COMPUTE_SHADER_BIT} + }; + const video::IQueue::SSubmitInfo::SSemaphoreInfo signalSem[] = { + {.semaphore = semaphore.get(), .value = ++semCounter, .stageMask = asset::PIPELINE_STAGE_FLAGS::COMPUTE_SHADER_BIT} + }; + video::IQueue::SSubmitInfo submit = {}; + submit.commandBuffers = {cmds, count}; + submit.waitSemaphores = waitSem; + submit.signalSemaphores = signalSem; + m_queue->submit({&submit, 1u}); + }; + + for (uint32_t i = 0u; i < warmupIterations; ++i) + submitSerial(benchCmds, 1u); + + submitSerial(beforeCmds, 1u); + for (uint32_t i = 0u; i < benchmarkIterations; ++i) + submitSerial(benchCmds, 1u); + submitSerial(afterCmds, 1u); + + m_device->waitIdle(); + + uint64_t timestamps[2] = {}; + const auto flags = core::bitflag(video::IQueryPool::RESULTS_FLAGS::_64_BIT) | + core::bitflag(video::IQueryPool::RESULTS_FLAGS::WAIT_BIT); + m_device->getQueryPoolResults(m_queryPool.get(), 0, 2, timestamps, sizeof(uint64_t), flags); + + const float64_t timestampPeriod = float64_t(m_physicalDevice->getLimits().timestampPeriodInNanoSeconds); + const float64_t elapsed_ns = float64_t(timestamps[1] - timestamps[0]) * timestampPeriod; + const uint64_t total_samples = uint64_t(benchmarkIterations) * uint64_t(m_samplesPerDispatch); + const float64_t ns_per_sample = elapsed_ns / float64_t(total_samples); + const float64_t msamples_per_s = (float64_t(total_samples) / elapsed_ns) * 1e3; + const float64_t elapsed_ms = elapsed_ns * 1e-6; + + m_logger->log("[Benchmark] %s: %.5f ns/sample | %.2f MSamples/s | %.3f ms total", + system::ILogger::ELL_PERFORMANCE, + samplerName.c_str(), ns_per_sample, msamples_per_s, elapsed_ms); + } + +private: + void recordBenchmarkCmdBuf() + { + m_benchmarkCmdbuf->reset(video::IGPUCommandBuffer::RESET_FLAGS::NONE); + m_benchmarkCmdbuf->begin(video::IGPUCommandBuffer::USAGE::SIMULTANEOUS_USE_BIT); + m_benchmarkCmdbuf->bindComputePipeline(m_pipeline.get()); + m_benchmarkCmdbuf->bindDescriptorSets(asset::EPBP_COMPUTE, m_pplnLayout.get(), 0, 1, &m_ds.get()); + m_benchmarkCmdbuf->dispatch(m_dispatchGroupCount, 1, 1); + m_benchmarkCmdbuf->end(); + } + + void recordTimestampCmdBufs() + { + m_timestampBeforeCmdbuf->reset(video::IGPUCommandBuffer::RESET_FLAGS::NONE); + m_timestampBeforeCmdbuf->begin(video::IGPUCommandBuffer::USAGE::ONE_TIME_SUBMIT_BIT); + m_timestampBeforeCmdbuf->resetQueryPool(m_queryPool.get(), 0, 2); + m_timestampBeforeCmdbuf->writeTimestamp(asset::PIPELINE_STAGE_FLAGS::NONE, m_queryPool.get(), 0); + m_timestampBeforeCmdbuf->end(); + + m_timestampAfterCmdbuf->reset(video::IGPUCommandBuffer::RESET_FLAGS::NONE); + m_timestampAfterCmdbuf->begin(video::IGPUCommandBuffer::USAGE::ONE_TIME_SUBMIT_BIT); + m_timestampAfterCmdbuf->writeTimestamp(asset::PIPELINE_STAGE_FLAGS::NONE, m_queryPool.get(), 1); + m_timestampAfterCmdbuf->end(); + } + + core::smart_refctd_ptr m_device; + core::smart_refctd_ptr m_logger; + core::smart_refctd_ptr m_cmdpool; + core::smart_refctd_ptr m_benchmarkCmdbuf; + core::smart_refctd_ptr m_timestampBeforeCmdbuf; + core::smart_refctd_ptr m_timestampAfterCmdbuf; + core::smart_refctd_ptr m_queryPool; + core::smart_refctd_ptr m_pplnLayout; + core::smart_refctd_ptr m_pipeline; + core::smart_refctd_ptr m_ds; + video::IDeviceMemoryAllocator::SAllocation m_inputAlloc = {}; + video::IDeviceMemoryAllocator::SAllocation m_outputAlloc = {}; + video::IQueue* m_queue = nullptr; + video::IPhysicalDevice* m_physicalDevice = nullptr; + uint32_t m_dispatchGroupCount = 0; + uint32_t m_samplesPerDispatch = 0; +}; + +#endif diff --git a/37_HLSLSamplingTests/CSphericalRectangleTester.h b/37_HLSLSamplingTests/CSphericalRectangleTester.h new file mode 100644 index 000000000..32ff915c2 --- /dev/null +++ b/37_HLSLSamplingTests/CSphericalRectangleTester.h @@ -0,0 +1,67 @@ +#ifndef _NBL_EXAMPLES_TESTS_37_SAMPLING_C_SPHERICAL_RECTANGLE_TESTER_INCLUDED_ +#define _NBL_EXAMPLES_TESTS_37_SAMPLING_C_SPHERICAL_RECTANGLE_TESTER_INCLUDED_ + +#include "nbl/examples/examples.hpp" +#include "app_resources/common/spherical_rectangle.hlsl" +#include "nbl/examples/Tester/ITester.h" + +class CSphericalRectangleTester final : public ITester +{ + using base_t = ITester; + +public: + CSphericalRectangleTester(const uint32_t testBatchCount, const uint32_t workgroupSize) : base_t(testBatchCount, workgroupSize) {} + +private: + SphericalRectangleInputValues generateInputTestValues() override + { + std::uniform_real_distribution sizeDist(0.5f, 3.0f); + std::uniform_real_distribution uDist(0.0f, 1.0f); + + SphericalRectangleInputValues input; + // Observer at origin, rect placed in front (negative Z) so the solid angle is valid. + input.observer = nbl::hlsl::float32_t3(0.0f, 0.0f, 0.0f); + const float width = sizeDist(getRandomEngine()); + const float height = sizeDist(getRandomEngine()); + input.rectOrigin = nbl::hlsl::float32_t3(0.0f, 0.0f, -2.0f); + input.right = nbl::hlsl::float32_t3(width, 0.0f, 0.0f); + input.up = nbl::hlsl::float32_t3(0.0f, height, 0.0f); + input.u = nbl::hlsl::float32_t2(uDist(getRandomEngine()), uDist(getRandomEngine())); + return input; + } + + SphericalRectangleTestResults determineExpectedResults(const SphericalRectangleInputValues& input) override + { + SphericalRectangleTestResults expected; + SphericalRectangleTestExecutor executor; + executor(input, expected); + return expected; + } + + bool verifyTestResults(const SphericalRectangleTestResults& expected, const SphericalRectangleTestResults& actual, + const size_t iteration, const uint32_t seed, TestType testType) override + { + bool pass = true; + pass &= verifyTestValue("SphericalRectangle::generate", expected.generated, actual.generated, iteration, seed, testType, 1e-5, 1e-5); + pass &= verifyTestValue("SphericalRectangle::cache.pdf", expected.cachedPdf, actual.cachedPdf, iteration, seed, testType, 1e-5, 5e-4); + pass &= verifyTestValue("SphericalRectangle::pdf", expected.pdf, actual.pdf, iteration, seed, testType, 1e-5, 5e-4); + pass &= verifyTestValue("SphericalRectangle::forwardPdf == cache.pdf", actual.pdf, actual.cachedPdf, iteration, seed, testType, 1e-5, 1e-5); + pass &= verifyTestValue("SphericalRectangle::backwardPdf", expected.backwardPdf, actual.backwardPdf, iteration, seed, testType, 1e-5, 5e-4); + + // PDF positivity and finiteness + if (!(actual.pdf > 0.0f) || !std::isfinite(actual.pdf)) + { + pass = false; + printTestFail("SphericalRectangle::forwardPdf (positive & finite)", 1.0f, actual.pdf, iteration, seed, testType, 0.0, 0.0); + } + if (!(actual.backwardPdf > 0.0f) || !std::isfinite(actual.backwardPdf)) + { + pass = false; + printTestFail("SphericalRectangle::backwardPdf (positive & finite)", 1.0f, actual.backwardPdf, iteration, seed, testType, 0.0, 0.0); + } + + return pass; + } +}; + +#endif diff --git a/37_HLSLSamplingTests/CSphericalTriangleTester.h b/37_HLSLSamplingTests/CSphericalTriangleTester.h new file mode 100644 index 000000000..151577852 --- /dev/null +++ b/37_HLSLSamplingTests/CSphericalTriangleTester.h @@ -0,0 +1,124 @@ +#ifndef _NBL_EXAMPLES_TESTS_37_SAMPLING_C_SPHERICAL_TRIANGLE_TESTER_INCLUDED_ +#define _NBL_EXAMPLES_TESTS_37_SAMPLING_C_SPHERICAL_TRIANGLE_TESTER_INCLUDED_ + +#include "nbl/examples/examples.hpp" +#include "app_resources/common/spherical_triangle.hlsl" +#include "nbl/builtin/hlsl/sampling/uniform_spheres.hlsl" +#include "nbl/examples/Tester/ITester.h" + +class CSphericalTriangleTester final : public ITester +{ + using base_t = ITester; + +public: + CSphericalTriangleTester(const uint32_t testBatchCount, const uint32_t workgroupSize) : base_t(testBatchCount, workgroupSize) {} + +private: + nbl::hlsl::float32_t3 generateRandomUnitVector() + { + std::uniform_real_distribution dist(0.0f, 1.0f); + nbl::hlsl::float32_t2 u(dist(getRandomEngine()), dist(getRandomEngine())); + nbl::hlsl::sampling::UniformSphere::cache_type cache; + return nbl::hlsl::sampling::UniformSphere::generate(u, cache); + } + + static bool isValidSphericalTriangle(nbl::hlsl::float32_t3 v0, nbl::hlsl::float32_t3 v1, nbl::hlsl::float32_t3 v2) + { + using namespace nbl::hlsl; + + // Reject edges that are nearly coincident or antipodal + constexpr float sinSqThreshold = 0.09f; // sin(theta) > 0.3 + + const float d01 = dot(v0, v1); + const float d12 = dot(v1, v2); + const float d20 = dot(v2, v0); + + if ((1.f - d01 * d01) < sinSqThreshold) + return false; + if ((1.f - d12 * d12) < sinSqThreshold) + return false; + if ((1.f - d20 * d20) < sinSqThreshold) + return false; + + // Reject triangles whose vertices lie nearly on the same great circle + constexpr float tripleThreshold = 0.1f; + return abs(dot(v0, cross(v1, v2))) > tripleThreshold; + } + + SphericalTriangleInputValues generateInputTestValues() override + { + std::uniform_real_distribution uDist(0.0f, 1.0f); + + SphericalTriangleInputValues input; + + // Generate well-separated unit vectors for a valid spherical triangle + do + { + input.vertex0 = generateRandomUnitVector(); + input.vertex1 = generateRandomUnitVector(); + input.vertex2 = generateRandomUnitVector(); + } while (!isValidSphericalTriangle(input.vertex0, input.vertex1, input.vertex2)); + + // Avoid domain boundaries (u near 0 or 1) where generateInverse + // can produce NaN due to float32 precision in sqrt/acos operations + input.u = nbl::hlsl::float32_t2(uDist(getRandomEngine()), uDist(getRandomEngine())); + return input; + } + + SphericalTriangleTestResults determineExpectedResults(const SphericalTriangleInputValues& input) override + { + SphericalTriangleTestResults expected; + SphericalTriangleTestExecutor executor; + executor(input, expected); + return expected; + } + + bool verifyTestResults(const SphericalTriangleTestResults& expected, const SphericalTriangleTestResults& actual, + const size_t iteration, const uint32_t seed, TestType testType) override + { + bool pass = true; + // GPU vs CPU: CPU trig may use double precision internally, allow larger tolerance. + pass &= verifyTestValue("SphericalTriangle::generate", expected.generated, actual.generated, iteration, seed, testType, 1e-4, 1e-2); + pass &= verifyTestValue("SphericalTriangle::cache.pdf", expected.cachedPdf, actual.cachedPdf, iteration, seed, testType, 1e-4, 1e-3); + pass &= verifyTestValue("SphericalTriangle::forwardPdf", expected.forwardPdf, actual.forwardPdf, iteration, seed, testType, 1e-4, 1e-3); + pass &= verifyTestValue("SphericalTriangle::forwardPdf == cache.pdf", actual.forwardPdf, actual.cachedPdf, iteration, seed, testType, 1e-5, 1e-5); + pass &= verifyTestValue("SphericalTriangle::backwardPdf", expected.backwardPdf, actual.backwardPdf, iteration, seed, testType, 1e-4, 1e-3); + pass &= verifyTestValue("SphericalTriangle::inverted", expected.inverted, actual.inverted, iteration, seed, testType, 1e-4, 4e-2); // tolerated + pass &= verifyTestValue("SphericalTriangle::rountTripError (absolute)", 0.0f, actual.roundtripError, iteration, seed, testType, 5e-2, 1e-2); // tolerated + + // jacobianProduct = (1/forwardPdf) * backwardPdf should be == 1.0. + pass &= verifyTestValue("SphericalTriangle::jacobianProduct", 1.0f, actual.jacobianProduct, iteration, seed, testType, 1e-4, 1e-4); + + if (!(actual.forwardPdf > 0.0f) || !std::isfinite(actual.forwardPdf)) + { + pass = false; + printTestFail("SphericalTriangle::forwardPdf (positive & finite)", 1.0f, actual.forwardPdf, iteration, seed, testType, 0.0, 0.0); + } + if (!(actual.backwardPdf > 0.0f) || !std::isfinite(actual.backwardPdf)) + { + pass = false; + printTestFail("SphericalTriangle::backwardPdf (positive & finite)", 1.0f, actual.backwardPdf, iteration, seed, testType, 0.0, 0.0); + } + + // Domain preservation: samples must not escape the domain. + // Values are signed distances (positive = inside); allow a small negative + // tolerance for float32 imprecision near triangle edges / [0,1]^2 boundaries. + // verifyTestValue is a symmetric closeness check and can't express ">= -eps", + // so we do the comparison directly. + constexpr float64_t domainTolerance = 1e-6; + if (actual.generatedInside < -domainTolerance) + { + pass = false; + printTestFail("SphericalTriangle::generatedInside", 0.0f, actual.generatedInside, iteration, seed, testType, 0.0, domainTolerance); + } + if (actual.invertedInDomain < -domainTolerance) + { + pass = false; + printTestFail("SphericalTriangle::invertedInDomain", 0.0f, actual.invertedInDomain, iteration, seed, testType, 0.0, domainTolerance); + } + + return pass; + } +}; + +#endif diff --git a/37_HLSLSamplingTests/CUniformHemisphereTester.h b/37_HLSLSamplingTests/CUniformHemisphereTester.h new file mode 100644 index 000000000..0e48563ba --- /dev/null +++ b/37_HLSLSamplingTests/CUniformHemisphereTester.h @@ -0,0 +1,62 @@ +#ifndef _NBL_EXAMPLES_TESTS_37_SAMPLING_C_UNIFORM_HEMISPHERE_TESTER_INCLUDED_ +#define _NBL_EXAMPLES_TESTS_37_SAMPLING_C_UNIFORM_HEMISPHERE_TESTER_INCLUDED_ + +#include "nbl/examples/examples.hpp" +#include "app_resources/common/uniform_hemisphere.hlsl" +#include "nbl/examples/Tester/ITester.h" + +class CUniformHemisphereTester final : public ITester +{ + using base_t = ITester; + +public: + CUniformHemisphereTester(const uint32_t testBatchCount, const uint32_t workgroupSize) : base_t(testBatchCount, workgroupSize) {} + +private: + UniformHemisphereInputValues generateInputTestValues() override + { + std::uniform_real_distribution dist(0.0f, 1.0f); + + UniformHemisphereInputValues input; + input.u = nbl::hlsl::float32_t2(dist(getRandomEngine()), dist(getRandomEngine())); + return input; + } + + UniformHemisphereTestResults determineExpectedResults(const UniformHemisphereInputValues& input) override + { + UniformHemisphereTestResults expected; + UniformHemisphereTestExecutor executor; + executor(input, expected); + return expected; + } + + bool verifyTestResults(const UniformHemisphereTestResults& expected, const UniformHemisphereTestResults& actual, + const size_t iteration, const uint32_t seed, TestType testType) override + { + bool pass = true; + pass &= verifyTestValue("UniformHemisphere::generate", expected.generated, actual.generated, iteration, seed, testType, 1e-5, 1e-5); + pass &= verifyTestValue("UniformHemisphere::pdf", expected.pdf, actual.pdf, iteration, seed, testType, 1e-5, 1e-5); + pass &= verifyTestValue("UniformHemisphere::generateInverse", expected.inverted, actual.inverted, iteration, seed, testType, 1e-5, 1e-5); + pass &= verifyTestValue("UniformHemisphere::cache.pdf", expected.cachedPdf, actual.cachedPdf, iteration, seed, testType, 1e-5, 1e-5); + pass &= verifyTestValue("UniformHemisphere::forwardPdf", expected.forwardPdf, actual.forwardPdf, iteration, seed, testType, 1e-5, 1e-5); + pass &= verifyTestValue("UniformHemisphere::forwardPdf == cache.pdf", actual.forwardPdf, actual.cachedPdf, iteration, seed, testType, 1e-5, 1e-5); + pass &= verifyTestValue("UniformHemisphere::backwardPdf", expected.backwardPdf, actual.backwardPdf, iteration, seed, testType, 1e-5, 1e-5); + pass &= verifyTestValue("UniformHemisphere::roundtripError (absolute)", 0.0f, actual.roundtripError, iteration, seed, testType, 0.0, 1e-4); + pass &= verifyTestValue("UniformHemisphere::jacobianProduct", 1.0f, actual.jacobianProduct, iteration, seed, testType, 1e-4, 1e-4); + + if (!(actual.forwardPdf > 0.0f) || !std::isfinite(actual.forwardPdf)) + { + pass = false; + printTestFail("UniformHemisphere::forwardPdf (positive & finite)", 1.0f, actual.forwardPdf, iteration, seed, testType, 0.0, 0.0); + } + if (!(actual.backwardPdf > 0.0f) || !std::isfinite(actual.backwardPdf)) + { + pass = false; + printTestFail("UniformHemisphere::backwardPdf (positive & finite)", 1.0f, actual.backwardPdf, iteration, seed, testType, 0.0, 0.0); + } + + return pass; + } +}; + +#endif diff --git a/37_HLSLSamplingTests/CUniformSphereTester.h b/37_HLSLSamplingTests/CUniformSphereTester.h new file mode 100644 index 000000000..f793ee2ed --- /dev/null +++ b/37_HLSLSamplingTests/CUniformSphereTester.h @@ -0,0 +1,62 @@ +#ifndef _NBL_EXAMPLES_TESTS_37_SAMPLING_C_UNIFORM_SPHERE_TESTER_INCLUDED_ +#define _NBL_EXAMPLES_TESTS_37_SAMPLING_C_UNIFORM_SPHERE_TESTER_INCLUDED_ + +#include "nbl/examples/examples.hpp" +#include "app_resources/common/uniform_sphere.hlsl" +#include "nbl/examples/Tester/ITester.h" + +class CUniformSphereTester final : public ITester +{ + using base_t = ITester; + +public: + CUniformSphereTester(const uint32_t testBatchCount, const uint32_t workgroupSize) : base_t(testBatchCount, workgroupSize) {} + +private: + UniformSphereInputValues generateInputTestValues() override + { + std::uniform_real_distribution dist(0.0f, 1.0f); + + UniformSphereInputValues input; + input.u = nbl::hlsl::float32_t2(dist(getRandomEngine()), dist(getRandomEngine())); + return input; + } + + UniformSphereTestResults determineExpectedResults(const UniformSphereInputValues& input) override + { + UniformSphereTestResults expected; + UniformSphereTestExecutor executor; + executor(input, expected); + return expected; + } + + bool verifyTestResults(const UniformSphereTestResults& expected, const UniformSphereTestResults& actual, + const size_t iteration, const uint32_t seed, TestType testType) override + { + bool pass = true; + pass &= verifyTestValue("UniformSphere::generate", expected.generated, actual.generated, iteration, seed, testType, 1e-5, 1e-5); + pass &= verifyTestValue("UniformSphere::pdf", expected.pdf, actual.pdf, iteration, seed, testType, 1e-5, 1e-5); + pass &= verifyTestValue("UniformSphere::generateInverse", expected.inverted, actual.inverted, iteration, seed, testType, 1e-5, 1e-5); + pass &= verifyTestValue("UniformSphere::cache.pdf", expected.cachedPdf, actual.cachedPdf, iteration, seed, testType, 1e-5, 1e-5); + pass &= verifyTestValue("UniformSphere::forwardPdf", expected.forwardPdf, actual.forwardPdf, iteration, seed, testType, 1e-5, 1e-5); + pass &= verifyTestValue("UniformSphere::forwardPdf == cache.pdf", actual.forwardPdf, actual.cachedPdf, iteration, seed, testType, 1e-5, 1e-5); + pass &= verifyTestValue("UniformSphere::backwardPdf", expected.backwardPdf, actual.backwardPdf, iteration, seed, testType, 1e-5, 1e-5); + pass &= verifyTestValue("UniformSphere::roundtripError (absolute)", 0.0f, actual.roundtripError, iteration, seed, testType, 0.0, 1e-4); + pass &= verifyTestValue("UniformSphere::jacobianProduct", 1.0f, actual.jacobianProduct, iteration, seed, testType, 1e-4, 1e-4); + + if (!(actual.forwardPdf > 0.0f) || !std::isfinite(actual.forwardPdf)) + { + pass = false; + printTestFail("UniformSphere::forwardPdf (positive & finite)", 1.0f, actual.forwardPdf, iteration, seed, testType, 0.0, 0.0); + } + if (!(actual.backwardPdf > 0.0f) || !std::isfinite(actual.backwardPdf)) + { + pass = false; + printTestFail("UniformSphere::backwardPdf (positive & finite)", 1.0f, actual.backwardPdf, iteration, seed, testType, 0.0, 0.0); + } + + return pass; + } +}; + +#endif diff --git a/37_HLSLSamplingTests/app_resources/bilinear_test.comp.hlsl b/37_HLSLSamplingTests/app_resources/bilinear_test.comp.hlsl new file mode 100644 index 000000000..1833ed6b2 --- /dev/null +++ b/37_HLSLSamplingTests/app_resources/bilinear_test.comp.hlsl @@ -0,0 +1,36 @@ +#pragma shader_stage(compute) + +#include "common/bilinear.hlsl" +#include +#include + +[[vk::binding(0, 0)]] RWStructuredBuffer inputTestValues; +[[vk::binding(1, 0)]] RWStructuredBuffer outputTestValues; + +[numthreads(64, 1, 1)] +[shader("compute")] +void main() +{ + const uint32_t invID = nbl::hlsl::glsl::gl_GlobalInvocationID().x; +#ifdef BENCH_ITERS + // Hardcode valid bilinear coefficients (all positive). + const float32_t4 coeffs = float32_t4(0.25f, 0.5f, 0.75f, 1.0f); + sampling::Bilinear sampler = sampling::Bilinear::create(coeffs); + nbl::hlsl::Xoroshiro64Star rng = nbl::hlsl::Xoroshiro64Star::construct(uint32_t2(invID, 0u)); + const float32_t toFloat = asfloat(0x2f800004u); + uint32_t2 acc = (uint32_t2)0; + for (uint32_t i = 0u; i < uint32_t(BENCH_ITERS); i++) + { + float32_t2 u = float32_t2(rng(), rng()) * toFloat; + sampling::Bilinear::cache_type cache; + acc ^= asuint(sampler.generate(u, cache)); + acc ^= asuint(sampler.forwardPdf(cache)); + } + BilinearTestResults result = (BilinearTestResults)0; + result.generated = asfloat(acc); + outputTestValues[invID] = result; +#else + BilinearTestExecutor executor; + executor(inputTestValues[invID], outputTestValues[invID]); +#endif +} diff --git a/37_HLSLSamplingTests/app_resources/box_muller_transform_test.comp.hlsl b/37_HLSLSamplingTests/app_resources/box_muller_transform_test.comp.hlsl new file mode 100644 index 000000000..604c375c4 --- /dev/null +++ b/37_HLSLSamplingTests/app_resources/box_muller_transform_test.comp.hlsl @@ -0,0 +1,38 @@ +#pragma shader_stage(compute) + +#include "common/box_muller_transform.hlsl" +#include +#include + +[[vk::binding(0, 0)]] RWStructuredBuffer inputTestValues; +[[vk::binding(1, 0)]] RWStructuredBuffer outputTestValues; + +[numthreads(64, 1, 1)] +[shader("compute")] +void main() +{ + const uint32_t invID = nbl::hlsl::glsl::gl_GlobalInvocationID().x; +#ifdef BENCH_ITERS + // stddev=1. + sampling::BoxMullerTransform sampler; + sampler.stddev = 1.0f; + nbl::hlsl::Xoroshiro64Star rng = nbl::hlsl::Xoroshiro64Star::construct(uint32_t2(invID, 0u)); + const float32_t toFloat = asfloat(0x2f800004u); + uint32_t2 acc = (uint32_t2)0; + for (uint32_t i = 0u; i < uint32_t(BENCH_ITERS); i++) + { + float32_t2 u = float32_t2(rng(), rng()) * toFloat; + u.x = max(u.x, 1e-7f); + sampling::BoxMullerTransform::cache_type cache; + float32_t2 generated = sampler.generate(u, cache); + acc ^= asuint(generated); + acc ^= asuint(sampler.forwardPdf(cache)); + } + BoxMullerTransformTestResults result = (BoxMullerTransformTestResults)0; + result.generated = asfloat(acc); + outputTestValues[invID] = result; +#else + BoxMullerTransformTestExecutor executor; + executor(inputTestValues[invID], outputTestValues[invID]); +#endif +} diff --git a/37_HLSLSamplingTests/app_resources/common/bilinear.hlsl b/37_HLSLSamplingTests/app_resources/common/bilinear.hlsl new file mode 100644 index 000000000..08f61ccd8 --- /dev/null +++ b/37_HLSLSamplingTests/app_resources/common/bilinear.hlsl @@ -0,0 +1,50 @@ +#ifndef _NBL_EXAMPLES_TESTS_37_SAMPLING_COMMON_BILINEAR_INCLUDED_ +#define _NBL_EXAMPLES_TESTS_37_SAMPLING_COMMON_BILINEAR_INCLUDED_ + +#include +#include + +using namespace nbl::hlsl; + +struct BilinearInputValues +{ + float32_t4 bilinearCoeffs; + float32_t2 u; +}; + +struct BilinearTestResults +{ + float32_t2 generated; + float32_t cachedPdf; + float32_t backwardPdf; + float32_t forwardPdf; + float32_t2 inverted; + float32_t roundtripError; + float32_t jacobianProduct; +}; + +struct BilinearTestExecutor +{ + void operator()(NBL_CONST_REF_ARG(BilinearInputValues) input, NBL_REF_ARG(BilinearTestResults) output) + { + sampling::Bilinear sampler = sampling::Bilinear::create(input.bilinearCoeffs); + { + sampling::Bilinear::cache_type cache; + output.generated = sampler.generate(input.u, cache); + output.cachedPdf = cache.pdf; + output.forwardPdf = sampler.forwardPdf(cache); + } + + { + sampling::Bilinear::cache_type cache; + output.inverted = sampler.generateInverse(output.generated, cache); + output.backwardPdf = sampler.backwardPdf(output.generated); + } + + float32_t2 diff = input.u - output.inverted; + output.roundtripError = nbl::hlsl::length(diff); + output.jacobianProduct = (float32_t(1.0) / output.forwardPdf) * output.backwardPdf; + } +}; + +#endif diff --git a/37_HLSLSamplingTests/app_resources/common/box_muller_transform.hlsl b/37_HLSLSamplingTests/app_resources/common/box_muller_transform.hlsl new file mode 100644 index 000000000..0ffb62e71 --- /dev/null +++ b/37_HLSLSamplingTests/app_resources/common/box_muller_transform.hlsl @@ -0,0 +1,43 @@ +#ifndef _NBL_EXAMPLES_TESTS_37_SAMPLING_COMMON_BOX_MULLER_TRANSFORM_INCLUDED_ +#define _NBL_EXAMPLES_TESTS_37_SAMPLING_COMMON_BOX_MULLER_TRANSFORM_INCLUDED_ + +#include +#include + +using namespace nbl::hlsl; + +struct BoxMullerTransformInputValues +{ + float32_t stddev; + float32_t2 u; +}; + +struct BoxMullerTransformTestResults +{ + float32_t2 generated; + float32_t cachedPdf; + float32_t forwardPdf; + float32_t backwardPdf; + float32_t2 separateBackwardPdf; +}; + +struct BoxMullerTransformTestExecutor +{ + void operator()(NBL_CONST_REF_ARG(BoxMullerTransformInputValues) input, NBL_REF_ARG(BoxMullerTransformTestResults) output) + { + sampling::BoxMullerTransform sampler; + sampler.stddev = input.stddev; + + { + sampling::BoxMullerTransform::cache_type cache; + output.generated = sampler.generate(input.u, cache); + output.cachedPdf = cache.pdf; + output.forwardPdf = sampler.forwardPdf(cache); + } + + output.backwardPdf = sampler.backwardPdf(output.generated); + output.separateBackwardPdf = sampler.separateBackwardPdf(output.generated); + } +}; + +#endif diff --git a/37_HLSLSamplingTests/app_resources/common/concentric_mapping.hlsl b/37_HLSLSamplingTests/app_resources/common/concentric_mapping.hlsl new file mode 100644 index 000000000..00649d486 --- /dev/null +++ b/37_HLSLSamplingTests/app_resources/common/concentric_mapping.hlsl @@ -0,0 +1,46 @@ +#ifndef _NBL_EXAMPLES_TESTS_37_SAMPLING_COMMON_CONCENTRIC_MAPPING_INCLUDED_ +#define _NBL_EXAMPLES_TESTS_37_SAMPLING_COMMON_CONCENTRIC_MAPPING_INCLUDED_ + +#include +#include + +using namespace nbl::hlsl; + +struct ConcentricMappingInputValues +{ + float32_t2 u; +}; + +struct ConcentricMappingTestResults +{ + float32_t2 mapped; + float32_t2 inverted; + float32_t cachedPdf; + float32_t forwardPdf; + float32_t backwardPdf; + float32_t jacobianProduct; + float32_t roundtripError; +}; + +struct ConcentricMappingTestExecutor +{ + void operator()(NBL_CONST_REF_ARG(ConcentricMappingInputValues) input, NBL_REF_ARG(ConcentricMappingTestResults) output) + { + { + sampling::ConcentricMapping::cache_type cache; + output.mapped = sampling::ConcentricMapping::generate(input.u, cache); + output.cachedPdf = cache.pdf; + output.forwardPdf = sampling::ConcentricMapping::forwardPdf(cache); + } + { + sampling::ConcentricMapping::cache_type cache; + output.inverted = sampling::ConcentricMapping::generateInverse(output.mapped, cache); + output.backwardPdf = sampling::ConcentricMapping::backwardPdf(input.u); + } + float32_t2 diff = input.u - output.inverted; + output.roundtripError = nbl::hlsl::length(diff); + output.jacobianProduct = float32_t(1.0 / output.backwardPdf) * output.forwardPdf; + } +}; + +#endif diff --git a/37_HLSLSamplingTests/app_resources/common/linear.hlsl b/37_HLSLSamplingTests/app_resources/common/linear.hlsl new file mode 100644 index 000000000..f64ae165f --- /dev/null +++ b/37_HLSLSamplingTests/app_resources/common/linear.hlsl @@ -0,0 +1,48 @@ +#ifndef _NBL_EXAMPLES_TESTS_37_SAMPLING_COMMON_LINEAR_INCLUDED_ +#define _NBL_EXAMPLES_TESTS_37_SAMPLING_COMMON_LINEAR_INCLUDED_ + +#include +#include + +using namespace nbl::hlsl; + +struct LinearInputValues +{ + float32_t2 coeffs; + float32_t u; +}; + +struct LinearTestResults +{ + float32_t generated; + float32_t generateInversed; + float32_t cachedPdf; + float32_t forwardPdf; + float32_t backwardPdf; + float32_t roundtripError; + float32_t jacobianProduct; +}; + +struct LinearTestExecutor +{ + void operator()(NBL_CONST_REF_ARG(LinearInputValues) input, NBL_REF_ARG(LinearTestResults) output) + { + sampling::Linear _sampler = sampling::Linear::create(input.coeffs); + { + sampling::Linear::cache_type cache; + output.generated = _sampler.generate(input.u, cache); + output.cachedPdf = cache.pdf; + output.forwardPdf = _sampler.forwardPdf(cache); + } + + { + sampling::Linear::cache_type cache; + output.generateInversed = _sampler.generateInverse(output.generated, cache); + output.backwardPdf = _sampler.backwardPdf(output.generated); + } + output.roundtripError = abs(input.u - output.generateInversed); + output.jacobianProduct = (float32_t(1.0) / output.forwardPdf) * output.backwardPdf; + } +}; + +#endif diff --git a/37_HLSLSamplingTests/app_resources/common/projected_hemisphere.hlsl b/37_HLSLSamplingTests/app_resources/common/projected_hemisphere.hlsl new file mode 100644 index 000000000..4dd4066de --- /dev/null +++ b/37_HLSLSamplingTests/app_resources/common/projected_hemisphere.hlsl @@ -0,0 +1,47 @@ +#ifndef _NBL_EXAMPLES_TESTS_37_SAMPLING_COMMON_PROJECTED_HEMISPHERE_INCLUDED_ +#define _NBL_EXAMPLES_TESTS_37_SAMPLING_COMMON_PROJECTED_HEMISPHERE_INCLUDED_ + +#include +#include + +using namespace nbl::hlsl; + +struct ProjectedHemisphereInputValues +{ + float32_t2 u; +}; + +struct ProjectedHemisphereTestResults +{ + float32_t3 generated; + float32_t2 inverted; + float32_t cachedPdf; + float32_t forwardPdf; + float32_t backwardPdf; + float32_t roundtripError; + float32_t jacobianProduct; +}; + +struct ProjectedHemisphereTestExecutor +{ + void operator()(NBL_CONST_REF_ARG(ProjectedHemisphereInputValues) input, NBL_REF_ARG(ProjectedHemisphereTestResults) output) + { + sampling::ProjectedHemisphere sampler; + { + sampling::ProjectedHemisphere::cache_type cache; + output.generated = sampler.generate(input.u, cache); + output.cachedPdf = cache.pdf; + output.forwardPdf = sampler.forwardPdf(cache); + } + { + sampling::ProjectedHemisphere::cache_type cache; + output.inverted = sampler.generateInverse(output.generated, cache); + output.backwardPdf = sampler.backwardPdf(output.generated); + } + float32_t2 diff = input.u - output.inverted; + output.roundtripError = nbl::hlsl::length(diff); + output.jacobianProduct = (float32_t(1.0) / output.forwardPdf) * output.backwardPdf; + } +}; + +#endif diff --git a/37_HLSLSamplingTests/app_resources/common/projected_sphere.hlsl b/37_HLSLSamplingTests/app_resources/common/projected_sphere.hlsl new file mode 100644 index 000000000..98eec9914 --- /dev/null +++ b/37_HLSLSamplingTests/app_resources/common/projected_sphere.hlsl @@ -0,0 +1,52 @@ +#ifndef _NBL_EXAMPLES_TESTS_37_SAMPLING_COMMON_PROJECTED_SPHERE_INCLUDED_ +#define _NBL_EXAMPLES_TESTS_37_SAMPLING_COMMON_PROJECTED_SPHERE_INCLUDED_ + +#include +#include + +using namespace nbl::hlsl; + +struct ProjectedSphereInputValues +{ + float32_t3 u; +}; + +struct ProjectedSphereTestResults +{ + float32_t3 generated; + float32_t cachedPdf; + float32_t forwardPdf; + float32_t3 modifiedU; + float32_t3 inverted; + float32_t backwardPdf; + // Only xy round-trips accurately; z information is intentionally lost in generateInverse + // (it maps to 0 or 1 based on sign of generated.z, not the exact original value). + float32_t roundtripError; + float32_t jacobianProduct; +}; + +struct ProjectedSphereTestExecutor +{ + void operator()(NBL_CONST_REF_ARG(ProjectedSphereInputValues) input, NBL_REF_ARG(ProjectedSphereTestResults) output) + { + sampling::ProjectedSphere sampler; + { + sampling::ProjectedSphere::cache_type cache; + float32_t3 sample = input.u; + output.generated = sampler.generate(sample, cache); + output.cachedPdf = cache.pdf; + output.forwardPdf = sampler.forwardPdf(cache); + output.modifiedU = sample; + } + { + sampling::ProjectedSphere::cache_type cache; + output.inverted = sampler.generateInverse(output.generated, cache); + output.backwardPdf = sampler.backwardPdf(output.generated); + } + float32_t2 xyDiff = output.modifiedU.xy - output.inverted.xy; + output.roundtripError = nbl::hlsl::length(xyDiff); + output.jacobianProduct = (float32_t(1.0) / output.forwardPdf) * output.backwardPdf; + } +}; + +#endif diff --git a/37_HLSLSamplingTests/app_resources/common/projected_spherical_triangle.hlsl b/37_HLSLSamplingTests/app_resources/common/projected_spherical_triangle.hlsl new file mode 100644 index 000000000..591b0333b --- /dev/null +++ b/37_HLSLSamplingTests/app_resources/common/projected_spherical_triangle.hlsl @@ -0,0 +1,68 @@ +#ifndef _NBL_EXAMPLES_TESTS_37_SAMPLING_COMMON_PROJECTED_SPHERICAL_TRIANGLE_INCLUDED_ +#define _NBL_EXAMPLES_TESTS_37_SAMPLING_COMMON_PROJECTED_SPHERICAL_TRIANGLE_INCLUDED_ + +#include +#include +#include + +using namespace nbl::hlsl; + +struct ProjectedSphericalTriangleInputValues +{ + float32_t3 vertex0; + float32_t3 vertex1; + float32_t3 vertex2; + float32_t3 receiverNormal; + uint32_t receiverWasBSDF; + float32_t2 u; +}; + +struct ProjectedSphericalTriangleTestResults +{ + float32_t3 generated; + float32_t cachedPdf; + float32_t forwardPdf; + float32_t backwardPdf; +}; + +struct ProjectedSphericalTriangleTestExecutor +{ + void operator()(NBL_CONST_REF_ARG(ProjectedSphericalTriangleInputValues) input, NBL_REF_ARG(ProjectedSphericalTriangleTestResults) output) + { + shapes::SphericalTriangle shape; + shape.vertices[0] = input.vertex0; + shape.vertices[1] = input.vertex1; + shape.vertices[2] = input.vertex2; + shape.cos_sides = float32_t3( + nbl::hlsl::dot(input.vertex1, input.vertex2), + nbl::hlsl::dot(input.vertex2, input.vertex0), + nbl::hlsl::dot(input.vertex0, input.vertex1)); + float32_t3 csc_sides2 = float32_t3(1.0, 1.0, 1.0) - shape.cos_sides * shape.cos_sides; + shape.csc_sides = float32_t3( + nbl::hlsl::rsqrt(csc_sides2.x), + nbl::hlsl::rsqrt(csc_sides2.y), + nbl::hlsl::rsqrt(csc_sides2.z)); + + sampling::SphericalTriangle sphtri = sampling::SphericalTriangle::create(shape); + + sampling::ProjectedSphericalTriangle sampler; + sampler.sphtri = sphtri; + sampler.receiverNormal = input.receiverNormal; + sampler.receiverWasBSDF = (bool)input.receiverWasBSDF; + + { + sampling::ProjectedSphericalTriangle::cache_type cache; + output.generated = sampler.generate(input.u, cache); + output.cachedPdf = cache.pdf; + output.forwardPdf = sampler.forwardPdf(cache); + } + // Test backwardPdf at the triangle centroid: a deterministic interior point computed + // from only basic arithmetic + sqrt (IEEE 754 exact), so CPU and GPU agree bit-exactly. + // Using output.generated would amplify generate's transcendental FP errors through + // generateInverse's acos, producing ~0.005-0.01 CPU/GPU divergence. + const float32_t3 center = nbl::hlsl::normalize(input.vertex0 + input.vertex1 + input.vertex2); + output.backwardPdf = sampler.backwardPdf(center); + } +}; + +#endif diff --git a/37_HLSLSamplingTests/app_resources/common/spherical_rectangle.hlsl b/37_HLSLSamplingTests/app_resources/common/spherical_rectangle.hlsl new file mode 100644 index 000000000..1520e29f3 --- /dev/null +++ b/37_HLSLSamplingTests/app_resources/common/spherical_rectangle.hlsl @@ -0,0 +1,49 @@ +#ifndef _NBL_EXAMPLES_TESTS_37_SAMPLING_COMMON_SPHERICAL_RECTANGLE_INCLUDED_ +#define _NBL_EXAMPLES_TESTS_37_SAMPLING_COMMON_SPHERICAL_RECTANGLE_INCLUDED_ + +#include +#include +#include + +using namespace nbl::hlsl; + +struct SphericalRectangleInputValues +{ + float32_t3 observer; + float32_t3 rectOrigin; + float32_t3 right; + float32_t3 up; + float32_t2 u; +}; + +struct SphericalRectangleTestResults +{ + float32_t2 generated; + float32_t cachedPdf; + float32_t pdf; // forwardPdf(u) + float32_t backwardPdf; +}; + +struct SphericalRectangleTestExecutor +{ + void operator()(NBL_CONST_REF_ARG(SphericalRectangleInputValues) input, NBL_REF_ARG(SphericalRectangleTestResults) output) + { + shapes::CompressedSphericalRectangle compressed; + compressed.origin = input.rectOrigin; + compressed.right = input.right; + compressed.up = input.up; + + shapes::SphericalRectangle rect = shapes::SphericalRectangle::create(compressed); + sampling::SphericalRectangle sampler = sampling::SphericalRectangle::create(rect, input.observer); + + { + sampling::SphericalRectangle::cache_type cache; + output.generated = sampler.generate(input.u, cache); + output.cachedPdf = cache.pdf; + output.pdf = sampler.forwardPdf(cache); + } + output.backwardPdf = sampler.backwardPdf(output.generated); + } +}; + +#endif diff --git a/37_HLSLSamplingTests/app_resources/common/spherical_triangle.hlsl b/37_HLSLSamplingTests/app_resources/common/spherical_triangle.hlsl new file mode 100644 index 000000000..a63678e31 --- /dev/null +++ b/37_HLSLSamplingTests/app_resources/common/spherical_triangle.hlsl @@ -0,0 +1,97 @@ +#ifndef _NBL_EXAMPLES_TESTS_37_SAMPLING_COMMON_SPHERICAL_TRIANGLE_INCLUDED_ +#define _NBL_EXAMPLES_TESTS_37_SAMPLING_COMMON_SPHERICAL_TRIANGLE_INCLUDED_ + +#include +#include + +using namespace nbl::hlsl; + +struct SphericalTriangleInputValues +{ + float32_t3 vertex0; + float32_t3 vertex1; + float32_t3 vertex2; + float32_t2 u; +}; + +struct SphericalTriangleTestResults +{ + float32_t3 generated; + float32_t2 inverted; + float32_t cachedPdf; + float32_t forwardPdf; + float32_t backwardPdf; + float32_t roundtripError; + float32_t jacobianProduct; + // Minimum signed distance to a triangle edge (sin of angular distance to nearest great circle). + // Positive = inside, negative = outside. Allows tolerance at boundaries. + float32_t generatedInside; + // Minimum margin to the [0,1]^2 boundary: min(u.x, 1-u.x, u.y, 1-u.y). + // Positive = inside, negative = outside. + float32_t invertedInDomain; +}; + +struct SphericalTriangleTestExecutor +{ + void operator()(NBL_CONST_REF_ARG(SphericalTriangleInputValues) input, NBL_REF_ARG(SphericalTriangleTestResults) output) + { + shapes::SphericalTriangle shape; + shape.vertices[0] = input.vertex0; + shape.vertices[1] = input.vertex1; + shape.vertices[2] = input.vertex2; + shape.cos_sides = float32_t3( + nbl::hlsl::dot(input.vertex1, input.vertex2), + nbl::hlsl::dot(input.vertex2, input.vertex0), + nbl::hlsl::dot(input.vertex0, input.vertex1)); + float32_t3 csc_sides2 = float32_t3(1.0, 1.0, 1.0) - shape.cos_sides * shape.cos_sides; + shape.csc_sides = float32_t3( + nbl::hlsl::rsqrt(csc_sides2.x), + nbl::hlsl::rsqrt(csc_sides2.y), + nbl::hlsl::rsqrt(csc_sides2.z)); + + sampling::SphericalTriangle sampler = sampling::SphericalTriangle::create(shape); + + // Forward: u -> v + { + sampling::SphericalTriangle::cache_type cache; + output.generated = sampler.generate(input.u, cache); + output.cachedPdf = cache.pdf; + output.forwardPdf = sampler.forwardPdf(cache); + } + + + // Inverse: v -> u' + { + sampling::SphericalTriangle::cache_type cache; + output.inverted = sampler.generateInverse(output.generated, cache); + // Backward: evaluate pdf at generated point (no cache needed) + output.backwardPdf = sampler.backwardPdf(output.generated); + } + // Roundtrip error: ||u - u'|| + float32_t2 diff = input.u - output.inverted; + output.roundtripError = nbl::hlsl::length(diff); + + // Jacobian product: (1/forwardPdf) * backwardPdf should equal 1 for bijective samplers + output.jacobianProduct = (float32_t(1.0) / output.forwardPdf) * output.backwardPdf; + + // Domain preservation: + // A point is inside the spherical triangle iff it is on the "inside" half-plane + // of every edge. The orientation of the triangle (CCW vs CW) is given by the + // sign of the scalar triple product dot(v0, cross(v1, v2)). + float32_t3 e01 = nbl::hlsl::cross(input.vertex0, input.vertex1); + float32_t3 e12 = nbl::hlsl::cross(input.vertex1, input.vertex2); + float32_t3 e20 = nbl::hlsl::cross(input.vertex2, input.vertex0); + // Normalize by edge lengths so the value is the sine of the angular distance + // to the nearest great-circle edge (positive = inside, negative = outside). + float32_t orientation = nbl::hlsl::dot(input.vertex0, e12); + float32_t sinDist01 = nbl::hlsl::dot(output.generated, e01) * orientation * shape.csc_sides.z; + float32_t sinDist12 = nbl::hlsl::dot(output.generated, e12) * orientation * shape.csc_sides.x; + float32_t sinDist20 = nbl::hlsl::dot(output.generated, e20) * orientation * shape.csc_sides.y; + output.generatedInside = nbl::hlsl::min(nbl::hlsl::min(sinDist01, sinDist12), sinDist20); + + float32_t2 u = output.inverted; + output.invertedInDomain = nbl::hlsl::min(nbl::hlsl::min(u.x, float32_t(1.0) - u.x), nbl::hlsl::min(u.y, float32_t(1.0) - u.y)); + } +}; + +#endif diff --git a/37_HLSLSamplingTests/app_resources/common/uniform_hemisphere.hlsl b/37_HLSLSamplingTests/app_resources/common/uniform_hemisphere.hlsl new file mode 100644 index 000000000..c87fa0e78 --- /dev/null +++ b/37_HLSLSamplingTests/app_resources/common/uniform_hemisphere.hlsl @@ -0,0 +1,48 @@ +#ifndef _NBL_EXAMPLES_TESTS_37_SAMPLING_COMMON_UNIFORM_HEMISPHERE_INCLUDED_ +#define _NBL_EXAMPLES_TESTS_37_SAMPLING_COMMON_UNIFORM_HEMISPHERE_INCLUDED_ + +#include +#include + +using namespace nbl::hlsl; + +struct UniformHemisphereInputValues +{ + float32_t2 u; +}; + +struct UniformHemisphereTestResults +{ + float32_t3 generated; + float32_t pdf; + float32_t2 inverted; + float32_t cachedPdf; + float32_t forwardPdf; + float32_t backwardPdf; + float32_t roundtripError; + float32_t jacobianProduct; +}; + +struct UniformHemisphereTestExecutor +{ + void operator()(NBL_CONST_REF_ARG(UniformHemisphereInputValues) input, NBL_REF_ARG(UniformHemisphereTestResults) output) + { + sampling::UniformHemisphere sampler; + { + sampling::UniformHemisphere::cache_type cache; + output.generated = sampler.generate(input.u, cache); + output.cachedPdf = cache.pdf; + output.forwardPdf = sampler.forwardPdf(cache); + } + { + sampling::UniformHemisphere::cache_type cache; + output.inverted = sampler.generateInverse(output.generated, cache); + output.backwardPdf = sampler.backwardPdf(output.generated); + } + float32_t2 diff = input.u - output.inverted; + output.roundtripError = nbl::hlsl::length(diff); + output.jacobianProduct = (float32_t(1.0) / output.forwardPdf) * output.backwardPdf; + } +}; + +#endif diff --git a/37_HLSLSamplingTests/app_resources/common/uniform_sphere.hlsl b/37_HLSLSamplingTests/app_resources/common/uniform_sphere.hlsl new file mode 100644 index 000000000..6657fadd7 --- /dev/null +++ b/37_HLSLSamplingTests/app_resources/common/uniform_sphere.hlsl @@ -0,0 +1,49 @@ +#ifndef _NBL_EXAMPLES_TESTS_37_SAMPLING_COMMON_UNIFORM_SPHERE_INCLUDED_ +#define _NBL_EXAMPLES_TESTS_37_SAMPLING_COMMON_UNIFORM_SPHERE_INCLUDED_ + +#include +#include + +using namespace nbl::hlsl; + +struct UniformSphereInputValues +{ + float32_t2 u; +}; + +struct UniformSphereTestResults +{ + float32_t3 generated; + float32_t pdf; + float32_t2 inverted; + float32_t cachedPdf; + float32_t forwardPdf; + float32_t backwardPdf; + float32_t roundtripError; + float32_t jacobianProduct; +}; + +struct UniformSphereTestExecutor +{ + void operator()(NBL_CONST_REF_ARG(UniformSphereInputValues) input, NBL_REF_ARG(UniformSphereTestResults) output) + { + sampling::UniformSphere sampler; + { + sampling::UniformSphere::cache_type cache; + output.generated = sampler.generate(input.u, cache); + output.cachedPdf = cache.pdf; + output.forwardPdf = sampler.forwardPdf(cache); + } + + { + sampling::UniformSphere::cache_type cache; + output.inverted = sampler.generateInverse(output.generated, cache); + output.backwardPdf = sampler.backwardPdf(output.generated); + } + float32_t2 diff = input.u - output.inverted; + output.roundtripError = nbl::hlsl::length(diff); + output.jacobianProduct = (float32_t(1.0) / output.forwardPdf) * output.backwardPdf; + } +}; + +#endif diff --git a/37_HLSLSamplingTests/app_resources/concentric_mapping_test.comp.hlsl b/37_HLSLSamplingTests/app_resources/concentric_mapping_test.comp.hlsl new file mode 100644 index 000000000..6899e78a7 --- /dev/null +++ b/37_HLSLSamplingTests/app_resources/concentric_mapping_test.comp.hlsl @@ -0,0 +1,31 @@ +#pragma shader_stage(compute) + +#include "common/concentric_mapping.hlsl" +#include +#include + +[[vk::binding(0, 0)]] RWStructuredBuffer inputTestValues; +[[vk::binding(1, 0)]] RWStructuredBuffer outputTestValues; + +[numthreads(64, 1, 1)] +[shader("compute")] +void main() +{ + const uint32_t invID = nbl::hlsl::glsl::gl_GlobalInvocationID().x; +#ifdef BENCH_ITERS + nbl::hlsl::Xoroshiro64Star rng = nbl::hlsl::Xoroshiro64Star::construct(uint32_t2(invID, 0u)); + const float32_t toFloat = asfloat(0x2f800004u); + uint32_t2 acc = (uint32_t2)0; + for (uint32_t i = 0u; i < uint32_t(BENCH_ITERS); i++) + { + float32_t2 u = float32_t2(rng(), rng()) * toFloat; + acc ^= asuint(sampling::ConcentricMapping::generate(u)); + } + ConcentricMappingTestResults result = (ConcentricMappingTestResults)0; + result.mapped = asfloat(acc); + outputTestValues[invID] = result; +#else + ConcentricMappingTestExecutor executor; + executor(inputTestValues[invID], outputTestValues[invID]); +#endif +} diff --git a/37_HLSLSamplingTests/app_resources/linear_test.comp.hlsl b/37_HLSLSamplingTests/app_resources/linear_test.comp.hlsl new file mode 100644 index 000000000..fa862030d --- /dev/null +++ b/37_HLSLSamplingTests/app_resources/linear_test.comp.hlsl @@ -0,0 +1,36 @@ +#pragma shader_stage(compute) + +#include "common/linear.hlsl" +#include +#include + +[[vk::binding(0, 0)]] RWStructuredBuffer inputTestValues; +[[vk::binding(1, 0)]] RWStructuredBuffer outputTestValues; + +[numthreads(64, 1, 1)] +[shader("compute")] +void main() +{ + const uint32_t invID = nbl::hlsl::glsl::gl_GlobalInvocationID().x; +#ifdef BENCH_ITERS + // Coefficients are hardcoded to a valid non-degenerate distribution. + const float32_t2 coeffs = float32_t2(0.2f, 0.8f); + sampling::Linear sampler = sampling::Linear::create(coeffs); + nbl::hlsl::Xoroshiro64Star rng = nbl::hlsl::Xoroshiro64Star::construct(uint32_t2(invID, 0u)); + const float32_t toFloat = asfloat(0x2f800004u); + uint32_t acc = 0u; + for (uint32_t i = 0u; i < uint32_t(BENCH_ITERS); i++) + { + float32_t u = float32_t(rng()) * toFloat; + sampling::Linear::cache_type cache; + acc ^= asuint(sampler.generate(u, cache)); + acc ^= asuint(sampler.forwardPdf(cache)); + } + LinearTestResults result = (LinearTestResults)0; + result.generated = asfloat(acc); + outputTestValues[invID] = result; +#else + LinearTestExecutor executor; + executor(inputTestValues[invID], outputTestValues[invID]); +#endif +} diff --git a/37_HLSLSamplingTests/app_resources/projected_hemisphere_test.comp.hlsl b/37_HLSLSamplingTests/app_resources/projected_hemisphere_test.comp.hlsl new file mode 100644 index 000000000..c49e17a3f --- /dev/null +++ b/37_HLSLSamplingTests/app_resources/projected_hemisphere_test.comp.hlsl @@ -0,0 +1,34 @@ +#pragma shader_stage(compute) + +#include "common/projected_hemisphere.hlsl" +#include +#include + +[[vk::binding(0, 0)]] RWStructuredBuffer inputTestValues; +[[vk::binding(1, 0)]] RWStructuredBuffer outputTestValues; + +[numthreads(64, 1, 1)] +[shader("compute")] +void main() +{ + const uint32_t invID = nbl::hlsl::glsl::gl_GlobalInvocationID().x; +#ifdef BENCH_ITERS + nbl::hlsl::Xoroshiro64Star rng = nbl::hlsl::Xoroshiro64Star::construct(uint32_t2(invID, 0u)); + const float32_t toFloat = asfloat(0x2f800004u); + uint32_t3 acc = (uint32_t3)0; + for (uint32_t i = 0u; i < uint32_t(BENCH_ITERS); i++) + { + float32_t2 u = float32_t2(rng(), rng()) * toFloat; + sampling::ProjectedHemisphere sampler; + sampling::ProjectedHemisphere::cache_type cache; + acc ^= asuint(sampler.generate(u, cache)); + acc ^= asuint(sampler.forwardPdf(cache)); + } + ProjectedHemisphereTestResults result = (ProjectedHemisphereTestResults)0; + result.generated = asfloat(acc); + outputTestValues[invID] = result; +#else + ProjectedHemisphereTestExecutor executor; + executor(inputTestValues[invID], outputTestValues[invID]); +#endif +} diff --git a/37_HLSLSamplingTests/app_resources/projected_sphere_test.comp.hlsl b/37_HLSLSamplingTests/app_resources/projected_sphere_test.comp.hlsl new file mode 100644 index 000000000..598a2fac0 --- /dev/null +++ b/37_HLSLSamplingTests/app_resources/projected_sphere_test.comp.hlsl @@ -0,0 +1,34 @@ +#pragma shader_stage(compute) + +#include "common/projected_sphere.hlsl" +#include +#include + +[[vk::binding(0, 0)]] RWStructuredBuffer inputTestValues; +[[vk::binding(1, 0)]] RWStructuredBuffer outputTestValues; + +[numthreads(64, 1, 1)] +[shader("compute")] +void main() +{ + const uint32_t invID = nbl::hlsl::glsl::gl_GlobalInvocationID().x; +#ifdef BENCH_ITERS + nbl::hlsl::Xoroshiro64Star rng = nbl::hlsl::Xoroshiro64Star::construct(uint32_t2(invID, 0u)); + const float32_t toFloat = asfloat(0x2f800004u); + uint32_t3 acc = (uint32_t3)0; + for (uint32_t i = 0u; i < uint32_t(BENCH_ITERS); i++) + { + float32_t3 u = float32_t3(rng(), rng(), rng()) * toFloat; + sampling::ProjectedSphere sampler; + sampling::ProjectedSphere::cache_type cache; + acc ^= asuint(sampler.generate(u, cache)); + acc ^= asuint(sampler.forwardPdf(cache)); + } + ProjectedSphereTestResults result = (ProjectedSphereTestResults)0; + result.generated = asfloat(acc); + outputTestValues[invID] = result; +#else + ProjectedSphereTestExecutor executor; + executor(inputTestValues[invID], outputTestValues[invID]); +#endif +} diff --git a/37_HLSLSamplingTests/app_resources/projected_spherical_triangle_test.comp.hlsl b/37_HLSLSamplingTests/app_resources/projected_spherical_triangle_test.comp.hlsl new file mode 100644 index 000000000..65afec39d --- /dev/null +++ b/37_HLSLSamplingTests/app_resources/projected_spherical_triangle_test.comp.hlsl @@ -0,0 +1,47 @@ +#pragma shader_stage(compute) + +#include "common/projected_spherical_triangle.hlsl" +#include +#include + +[[vk::binding(0, 0)]] RWStructuredBuffer inputTestValues; +[[vk::binding(1, 0)]] RWStructuredBuffer outputTestValues; + +[numthreads(64, 1, 1)] +[shader("compute")] +void main() +{ + const uint32_t invID = nbl::hlsl::glsl::gl_GlobalInvocationID().x; +#ifdef BENCH_ITERS + // Hardcode an axis-aligned octant triangle (valid, non-degenerate, cos_sides=0). + shapes::SphericalTriangle shape; + shape.vertices[0] = float32_t3(1.0f, 0.0f, 0.0f); + shape.vertices[1] = float32_t3(0.0f, 1.0f, 0.0f); + shape.vertices[2] = float32_t3(0.0f, 0.0f, 1.0f); + shape.cos_sides = float32_t3(0.0f, 0.0f, 0.0f); + shape.csc_sides = float32_t3(1.0f, 1.0f, 1.0f); + sampling::SphericalTriangle sphtri = sampling::SphericalTriangle::create(shape); + + sampling::ProjectedSphericalTriangle sampler; + sampler.sphtri = sphtri; + sampler.receiverNormal = float32_t3(0.0f, 0.0f, 1.0f); + sampler.receiverWasBSDF = false; + + nbl::hlsl::Xoroshiro64Star rng = nbl::hlsl::Xoroshiro64Star::construct(uint32_t2(invID, 0u)); + const float32_t toFloat = asfloat(0x2f800004u); + uint32_t3 acc = (uint32_t3)0; + for (uint32_t i = 0u; i < uint32_t(BENCH_ITERS); i++) + { + float32_t2 u = float32_t2(rng(), rng()) * toFloat; + sampling::ProjectedSphericalTriangle::cache_type cache; + acc ^= asuint(sampler.generate(u, cache)); + acc ^= asuint(sampler.forwardPdf(cache)); + } + ProjectedSphericalTriangleTestResults result = (ProjectedSphericalTriangleTestResults)0; + result.generated = asfloat(acc); + outputTestValues[invID] = result; +#else + ProjectedSphericalTriangleTestExecutor executor; + executor(inputTestValues[invID], outputTestValues[invID]); +#endif +} diff --git a/37_HLSLSamplingTests/app_resources/spherical_rectangle_test.comp.hlsl b/37_HLSLSamplingTests/app_resources/spherical_rectangle_test.comp.hlsl new file mode 100644 index 000000000..2ed5a7d04 --- /dev/null +++ b/37_HLSLSamplingTests/app_resources/spherical_rectangle_test.comp.hlsl @@ -0,0 +1,42 @@ +#pragma shader_stage(compute) + +#include "common/spherical_rectangle.hlsl" +#include +#include + +[[vk::binding(0, 0)]] RWStructuredBuffer inputTestValues; +[[vk::binding(1, 0)]] RWStructuredBuffer outputTestValues; + +[numthreads(64, 1, 1)] +[shader("compute")] +void main() +{ + const uint32_t invID = nbl::hlsl::glsl::gl_GlobalInvocationID().x; +#ifdef BENCH_ITERS + // Hardcode a valid non-degenerate rectangle: observer at origin, rect at z=-2. + // Use invID for baseU so u is a runtime value — prevents loop DCE after unrolling. + shapes::CompressedSphericalRectangle compressed; + compressed.origin = float32_t3(0.0f, 0.0f, -2.0f); + compressed.right = float32_t3(1.0f, 0.0f, 0.0f); + compressed.up = float32_t3(0.0f, 1.0f, 0.0f); + shapes::SphericalRectangle rect = shapes::SphericalRectangle::create(compressed); + sampling::SphericalRectangle sampler = sampling::SphericalRectangle::create(rect, float32_t3(0.0f, 0.0f, 0.0f)); + + nbl::hlsl::Xoroshiro64Star rng = nbl::hlsl::Xoroshiro64Star::construct(uint32_t2(invID, 0u)); + const float32_t toFloat = asfloat(0x2f800004u); + uint32_t2 acc = (uint32_t2)0; + for (uint32_t i = 0u; i < uint32_t(BENCH_ITERS); i++) + { + float32_t2 u = float32_t2(rng(), rng()) * toFloat; + sampling::SphericalRectangle::cache_type cache; + acc ^= asuint(sampler.generate(u, cache)); + acc ^= asuint(sampler.forwardPdf(cache)); + } + SphericalRectangleTestResults result = (SphericalRectangleTestResults)0; + result.generated = asfloat(acc); + outputTestValues[invID] = result; +#else + SphericalRectangleTestExecutor executor; + executor(inputTestValues[invID], outputTestValues[invID]); +#endif +} diff --git a/37_HLSLSamplingTests/app_resources/spherical_triangle.comp.hlsl b/37_HLSLSamplingTests/app_resources/spherical_triangle.comp.hlsl new file mode 100644 index 000000000..401bed593 --- /dev/null +++ b/37_HLSLSamplingTests/app_resources/spherical_triangle.comp.hlsl @@ -0,0 +1,46 @@ +#pragma shader_stage(compute) + +#include "common/spherical_triangle.hlsl" +#include +#include + +[[vk::binding(0, 0)]] RWStructuredBuffer inputTestValues; +[[vk::binding(1, 0)]] RWStructuredBuffer outputTestValues; + +[numthreads(64, 1, 1)] +[shader("compute")] +void main() +{ + const uint32_t invID = nbl::hlsl::glsl::gl_GlobalInvocationID().x; +#ifdef BENCH_ITERS + // Hardcode an axis-aligned octant triangle (valid, non-degenerate, all cos_sides=0). + // Use invID for baseU so u is a runtime value — prevents loop DCE after unrolling. + shapes::SphericalTriangle shape; + shape.vertices[0] = float32_t3(1.0f, 0.0f, 0.0f); + shape.vertices[1] = float32_t3(0.0f, 1.0f, 0.0f); + shape.vertices[2] = float32_t3(0.0f, 0.0f, 1.0f); + shape.cos_sides = float32_t3(0.0f, 0.0f, 0.0f); + shape.csc_sides = float32_t3(1.0f, 1.0f, 1.0f); + sampling::SphericalTriangle sampler = sampling::SphericalTriangle::create(shape); + + nbl::hlsl::Xoroshiro64Star rng = nbl::hlsl::Xoroshiro64Star::construct(uint32_t2(invID, 0u)); + const float32_t toFloat = asfloat(0x2f800004u); + uint32_t3 accDir = (uint32_t3)0; + uint32_t accPdf = 0u; + for (uint32_t i = 0u; i < uint32_t(BENCH_ITERS); i++) + { + float32_t2 u = float32_t2(rng(), rng()) * toFloat; + sampling::SphericalTriangle::cache_type cache; + float32_t3 generated = sampler.generate(u, cache); + accDir ^= asuint(generated); + accPdf ^= asuint(sampler.forwardPdf(cache)); + } + SphericalTriangleTestResults result = (SphericalTriangleTestResults)0; + result.generated = asfloat(accDir); + result.forwardPdf = asfloat(accPdf); + outputTestValues[invID] = result; +#else + SphericalTriangleTestExecutor executor; + executor(inputTestValues[invID], outputTestValues[invID]); +#endif +} diff --git a/37_HLSLSamplingTests/app_resources/test_compile.comp.hlsl b/37_HLSLSamplingTests/app_resources/test_compile.comp.hlsl new file mode 100644 index 000000000..a5706d14d --- /dev/null +++ b/37_HLSLSamplingTests/app_resources/test_compile.comp.hlsl @@ -0,0 +1,159 @@ +// Compile test: instantiate all sampling types and their concept-required methods to verify DXC compilation +#include +#include +#include +#include +#include +#include +#include +#include +#include +using namespace nbl::hlsl; + +[[vk::binding(0, 0)]] RWStructuredBuffer output; + +[numthreads(1, 1, 1)] +[shader("compute")] +void main() +{ + float32_t2 u2 = float32_t2(0.5, 0.5); + float32_t3 u3 = float32_t3(0.5, 0.5, 0.5); + float32_t4 acc = float32_t4(0, 0, 0, 0); + + // ConcentricMapping — generate, generateInverse, forwardPdf, backwardPdf, forwardWeight, backwardWeight + sampling::ConcentricMapping::cache_type cache; + float32_t2 concentric = sampling::ConcentricMapping::generate(u2, cache); + acc.xy += concentric; + acc.xy += sampling::ConcentricMapping::generateInverse(concentric, cache); + acc.x += sampling::ConcentricMapping::forwardPdf(cache); + acc.x += sampling::ConcentricMapping::backwardPdf(concentric); + acc.x += sampling::ConcentricMapping::forwardWeight(cache); + acc.x += sampling::ConcentricMapping::backwardWeight(concentric); + + // Linear — generate, generateInverse, forwardPdf, backwardPdf, forwardWeight, backwardWeight + sampling::Linear lin = sampling::Linear::create(u2); + sampling::Linear::cache_type linCache; + float32_t linSample = lin.generate(0.5f, linCache); + acc.x += linSample; + acc.x += lin.forwardPdf(linCache); + acc.x += lin.forwardWeight(linCache); + acc.x += lin.generateInverse(linSample, linCache); + acc.x += lin.backwardPdf(linSample); + acc.x += lin.backwardWeight(linSample); + + // Bilinear — generate, generateInverse, forwardPdf, backwardPdf, forwardWeight, backwardWeight + sampling::Bilinear bilinear = sampling::Bilinear::create(float32_t4(1, 2, 3, 4)); + sampling::Bilinear::cache_type bilCache; + float32_t2 bilSample = bilinear.generate(u2, bilCache); + acc.xy += bilSample; + acc.x += bilinear.forwardPdf(bilCache); + acc.x += bilinear.forwardWeight(bilCache); + acc.xy += bilinear.generateInverse(bilSample, bilCache); + acc.x += bilinear.backwardPdf(bilSample); + acc.x += bilinear.backwardWeight(bilSample); + + // UniformHemisphere — generate, generateInverse, forwardPdf, backwardPdf, forwardWeight, backwardWeight + sampling::UniformHemisphere uniHemi; + sampling::UniformHemisphere::cache_type uniHemiCache; + float32_t3 uniHemiSample = uniHemi.generate(u2, uniHemiCache); + acc.xyz += uniHemiSample; + acc.x += uniHemi.forwardPdf(uniHemiCache); + acc.x += uniHemi.forwardWeight(uniHemiCache); + acc.xy += uniHemi.generateInverse(uniHemiSample, uniHemiCache); + acc.x += uniHemi.backwardPdf(uniHemiSample); + acc.x += uniHemi.backwardWeight(uniHemiSample); + + // UniformSphere — generate, generateInverse, forwardPdf, backwardPdf, forwardWeight, backwardWeight + sampling::UniformSphere uniSph; + sampling::UniformSphere::cache_type uniSphCache; + float32_t3 uniSphSample = uniSph.generate(u2, uniSphCache); + acc.xyz += uniSphSample; + acc.x += uniSph.forwardPdf(uniSphCache); + acc.x += uniSph.forwardWeight(uniSphCache); + acc.xy += uniSph.generateInverse(uniSphSample, uniSphCache); + acc.x += uniSph.backwardPdf(uniSphSample); + acc.x += uniSph.backwardWeight(uniSphSample); + + // ProjectedHemisphere — generate, generateInverse, forwardPdf, backwardPdf, forwardWeight, backwardWeight + sampling::ProjectedHemisphere::cache_type projHemiCache; + float32_t3 projHemi = sampling::ProjectedHemisphere::generate(u2, projHemiCache); + acc.xyz += projHemi; + acc.x += sampling::ProjectedHemisphere::forwardPdf(projHemiCache); + acc.x += sampling::ProjectedHemisphere::forwardWeight(projHemiCache); + acc.xy += sampling::ProjectedHemisphere::generateInverse(projHemi, projHemiCache); + acc.x += sampling::ProjectedHemisphere::backwardPdf(projHemi); + acc.x += sampling::ProjectedHemisphere::backwardWeight(projHemi); + + // ProjectedSphere — generate, generateInverse, forwardPdf, backwardPdf, forwardWeight, backwardWeight + sampling::ProjectedSphere projSphSampler; + sampling::ProjectedSphere::cache_type projSphCache; + float32_t3 projSphereSample = u3; + float32_t3 projSphere = projSphSampler.generate(projSphereSample, projSphCache); + acc.xyz += projSphere; + acc.x += projSphSampler.forwardPdf(projSphCache); + acc.x += projSphSampler.forwardWeight(projSphCache); + acc.xyz += projSphSampler.generateInverse(projSphere, projSphCache); + acc.x += projSphSampler.backwardPdf(projSphere); + acc.x += projSphSampler.backwardWeight(projSphere); + + // BoxMullerTransform — generate, forwardPdf, backwardPdf, forwardWeight, backwardWeight + sampling::BoxMullerTransform bmt; + bmt.stddev = 1.0; + sampling::BoxMullerTransform::cache_type bmtCache; + float32_t2 bmtSample = bmt.generate(u2, bmtCache); + acc.xy += bmtSample; + acc.x += bmt.forwardPdf(bmtCache); + acc.x += bmt.forwardWeight(bmtCache); + acc.x += bmt.backwardPdf(bmtSample); + acc.x += bmt.backwardWeight(bmtSample); + acc.xy += bmt.separateBackwardPdf(bmtSample); + + // SphericalTriangle — generate, generateInverse, forwardPdf, backwardPdf, forwardWeight, backwardWeight + shapes::SphericalTriangle shapeTri; + shapeTri.vertices[0] = float32_t3(1, 0, 0); + shapeTri.vertices[1] = float32_t3(0, 1, 0); + shapeTri.vertices[2] = float32_t3(0, 0, 1); + // Octant triangle: all dot products between vertices are 0, so cos_sides=0, csc_sides=1 + shapeTri.cos_sides = float32_t3(0, 0, 0); + shapeTri.csc_sides = float32_t3(1, 1, 1); + sampling::SphericalTriangle sphTri = sampling::SphericalTriangle::create(shapeTri); + sampling::SphericalTriangle::cache_type sphTriCache; + float32_t3 stSample = sphTri.generate(u2, sphTriCache); + acc.xyz += stSample; + acc.x += sphTri.forwardPdf(sphTriCache); + acc.x += sphTri.forwardWeight(sphTriCache); + acc.xy += sphTri.generateInverse(stSample, sphTriCache); + acc.x += sphTri.backwardPdf(stSample); + acc.x += sphTri.backwardWeight(stSample); + + // SphericalRectangle — generate, forwardPdf, backwardPdf, forwardWeight, backwardWeight + shapes::CompressedSphericalRectangle csr; + csr.origin = float32_t3(0.0, 0.0, -1.0); + csr.right = float32_t3(1.0, 0.0, 0.0); + csr.up = float32_t3(0.0, 1.0, 0.0); + shapes::SphericalRectangle shapeRect = shapes::SphericalRectangle::create(csr); + const float32_t3 srObserver = float32_t3(0.0, 0.0, 0.0); + sampling::SphericalRectangle sphRect = sampling::SphericalRectangle::create(shapeRect, srObserver); + sampling::SphericalRectangle::cache_type sphRectCache; + float32_t2 srSample = sphRect.generate(u2, sphRectCache); + acc.xy += srSample; + acc.x += sphRect.forwardPdf(sphRectCache); + acc.x += sphRect.forwardWeight(sphRectCache); + acc.x += sphRect.backwardPdf(srSample); + acc.x += sphRect.backwardWeight(srSample); + + // ProjectedSphericalTriangle — generate, forwardPdf, backwardPdf, forwardWeight, backwardWeight + sampling::ProjectedSphericalTriangle projTri; + projTri.sphtri = sphTri; + projTri.receiverNormal = float32_t3(0.0, 0.0, 1.0); + projTri.receiverWasBSDF = false; + sampling::ProjectedSphericalTriangle::cache_type projTriCache; + float32_t3 ptSample = projTri.generate(u2, projTriCache); + acc.xyz += ptSample; + acc.x += projTri.forwardPdf(projTriCache); + acc.x += projTri.forwardWeight(projTriCache); + acc.x += projTri.backwardPdf(ptSample); + acc.x += projTri.backwardWeight(ptSample); + + output[0] = acc; +} diff --git a/37_HLSLSamplingTests/app_resources/uniform_hemisphere_test.comp.hlsl b/37_HLSLSamplingTests/app_resources/uniform_hemisphere_test.comp.hlsl new file mode 100644 index 000000000..cb68f97df --- /dev/null +++ b/37_HLSLSamplingTests/app_resources/uniform_hemisphere_test.comp.hlsl @@ -0,0 +1,34 @@ +#pragma shader_stage(compute) + +#include "common/uniform_hemisphere.hlsl" +#include +#include + +[[vk::binding(0, 0)]] RWStructuredBuffer inputTestValues; +[[vk::binding(1, 0)]] RWStructuredBuffer outputTestValues; + +[numthreads(64, 1, 1)] +[shader("compute")] +void main() +{ + const uint32_t invID = nbl::hlsl::glsl::gl_GlobalInvocationID().x; +#ifdef BENCH_ITERS + nbl::hlsl::Xoroshiro64Star rng = nbl::hlsl::Xoroshiro64Star::construct(uint32_t2(invID, 0u)); + const float32_t toFloat = asfloat(0x2f800004u); + uint32_t3 acc = (uint32_t3)0; + for (uint32_t i = 0u; i < uint32_t(BENCH_ITERS); i++) + { + float32_t2 u = float32_t2(rng(), rng()) * toFloat; + sampling::UniformHemisphere sampler; + sampling::UniformHemisphere::cache_type cache; + acc ^= asuint(sampler.generate(u, cache)); + acc ^= asuint(sampler.forwardPdf(cache)); + } + UniformHemisphereTestResults result = (UniformHemisphereTestResults)0; + result.generated = asfloat(acc); + outputTestValues[invID] = result; +#else + UniformHemisphereTestExecutor executor; + executor(inputTestValues[invID], outputTestValues[invID]); +#endif +} diff --git a/37_HLSLSamplingTests/app_resources/uniform_sphere_test.comp.hlsl b/37_HLSLSamplingTests/app_resources/uniform_sphere_test.comp.hlsl new file mode 100644 index 000000000..7778467b2 --- /dev/null +++ b/37_HLSLSamplingTests/app_resources/uniform_sphere_test.comp.hlsl @@ -0,0 +1,34 @@ +#pragma shader_stage(compute) + +#include "common/uniform_sphere.hlsl" +#include +#include + +[[vk::binding(0, 0)]] RWStructuredBuffer inputTestValues; +[[vk::binding(1, 0)]] RWStructuredBuffer outputTestValues; + +[numthreads(64, 1, 1)] +[shader("compute")] +void main() +{ + const uint32_t invID = nbl::hlsl::glsl::gl_GlobalInvocationID().x; +#ifdef BENCH_ITERS + nbl::hlsl::Xoroshiro64Star rng = nbl::hlsl::Xoroshiro64Star::construct(uint32_t2(invID, 0u)); + const float32_t toFloat = asfloat(0x2f800004u); + uint32_t3 acc = (uint32_t3)0; + for (uint32_t i = 0u; i < uint32_t(BENCH_ITERS); i++) + { + float32_t2 u = float32_t2(rng(), rng()) * toFloat; + sampling::UniformSphere sampler; + sampling::UniformSphere::cache_type cache; + acc ^= asuint(sampler.generate(u, cache)); + acc ^= asuint(sampler.forwardPdf(cache)); + } + UniformSphereTestResults result = (UniformSphereTestResults)0; + result.generated = asfloat(acc); + outputTestValues[invID] = result; +#else + UniformSphereTestExecutor executor; + executor(inputTestValues[invID], outputTestValues[invID]); +#endif +} diff --git a/37_HLSLSamplingTests/main.cpp b/37_HLSLSamplingTests/main.cpp new file mode 100644 index 000000000..fee37bafc --- /dev/null +++ b/37_HLSLSamplingTests/main.cpp @@ -0,0 +1,385 @@ +#include + +#include "nbl/examples/examples.hpp" +#include "nbl/this_example/builtin/build/spirv/keys.hpp" + +using namespace nbl; +using namespace core; +using namespace system; +using namespace asset; +using namespace video; +using namespace nbl::hlsl; +using namespace nbl::examples; + +// sampling headers (HLSL/C++ compatible) +#include "nbl/builtin/hlsl/sampling/concentric_mapping.hlsl" +#include "nbl/builtin/hlsl/sampling/linear.hlsl" +#include "nbl/builtin/hlsl/sampling/bilinear.hlsl" +#include "nbl/builtin/hlsl/sampling/uniform_spheres.hlsl" +#include "nbl/builtin/hlsl/sampling/cos_weighted_spheres.hlsl" +#include "nbl/builtin/hlsl/sampling/box_muller_transform.hlsl" +#include "nbl/builtin/hlsl/sampling/spherical_triangle.hlsl" +#include "nbl/builtin/hlsl/sampling/projected_spherical_triangle.hlsl" +#include "nbl/builtin/hlsl/sampling/spherical_rectangle.hlsl" + +// concepts header — include AFTER sampler headers, and only in the test +#include "nbl/builtin/hlsl/sampling/concepts.hlsl" + +// ITester-based testers +#include "CLinearTester.h" +#include "CBilinearTester.h" +#include "CUniformHemisphereTester.h" +#include "CUniformSphereTester.h" +#include "CProjectedHemisphereTester.h" +#include "CProjectedSphereTester.h" +#include "CConcentricMappingTester.h" +#include "CSphericalTriangleTester.h" +#include "CBoxMullerTransformTester.h" +#include "CProjectedSphericalTriangleTester.h" +#include "CSphericalRectangleTester.h" + +#include "CSamplerBenchmark.h" + +constexpr bool DoBenchmark = true; + +class HLSLSamplingTests final : public application_templates::MonoDeviceApplication, public BuiltinResourcesApplication +{ + using device_base_t = application_templates::MonoDeviceApplication; + using asset_base_t = BuiltinResourcesApplication; + + // Helper to create pipeline setup data + template + auto createSetupData(const std::string& shaderKey) -> typename Tester::PipelineSetupData + { + typename Tester::PipelineSetupData data; + data.device = m_device; + data.api = m_api; + data.assetMgr = m_assetMgr; + data.logger = m_logger; + data.physicalDevice = m_physicalDevice; + data.computeFamilyIndex = getComputeQueue()->getFamilyIndex(); + data.shaderKey = shaderKey; + return data; + } + + CSamplerBenchmark::SetupData createBenchmarkSetupData(const std::string& shaderKey, uint32_t dispatchGroupCount, uint32_t samplesPerDispatch, size_t inputBufferBytes, size_t outputBufferBytes) + { + CSamplerBenchmark::SetupData data; + data.device = m_device; + data.api = m_api; + data.assetMgr = m_assetMgr; + data.logger = m_logger; + data.physicalDevice = m_physicalDevice; + data.computeFamilyIndex = getComputeQueue()->getFamilyIndex(); + data.shaderKey = shaderKey; + data.dispatchGroupCount = dispatchGroupCount; + data.samplesPerDispatch = samplesPerDispatch; + data.inputBufferBytes = inputBufferBytes; + data.outputBufferBytes = outputBufferBytes; + return data; + } + +public: + HLSLSamplingTests(const path& _localInputCWD, const path& _localOutputCWD, const path& _sharedInputCWD, const path& _sharedOutputCWD) + : system::IApplicationFramework(_localInputCWD, _localOutputCWD, _sharedInputCWD, _sharedOutputCWD) {} + + inline bool onAppInitialized(smart_refctd_ptr&& system) override + { + if (!device_base_t::onAppInitialized(smart_refctd_ptr(system))) + return false; + + if (!asset_base_t::onAppInitialized(std::move(system))) + return false; + + // test compile with dxc + { + IAssetLoader::SAssetLoadParams lp = {}; + lp.logger = m_logger.get(); + lp.workingDirectory = "app_resources"; + auto key = nbl::this_example::builtin::build::get_spirv_key<"shader">(m_device.get()); + auto bundle = m_assetMgr->getAsset(key.c_str(), lp); + + const auto assets = bundle.getContents(); + if (assets.empty()) + { + m_logger->log("Could not load shader!", ILogger::ELL_ERROR); + return false; + } + + auto shader = IAsset::castDown(assets[0]); + if (!shader) + { + m_logger->log("compile shader test failed!", ILogger::ELL_ERROR); + return false; + } + + m_logger->log("Shader compilation test passed.", ILogger::ELL_INFO); + } + + // ================================================================ + // Compile-time concept verification via static_assert + // ================================================================ + + // --- BasicSampler (level 1) --- generate(domain_type) -> codomain_type + // Note: all samplers almost satisfy BasicSampler, but they have cache parameters in generate(). + static_assert(sampling::concepts::BasicSampler>); + + // --- TractableSampler (level 2) --- generate(domain_type, out cache_type) -> codomain_type, forwardPdf(cache_type) -> density_type + static_assert(sampling::concepts::TractableSampler>); + static_assert(sampling::concepts::TractableSampler>); + static_assert(sampling::concepts::TractableSampler>); + static_assert(sampling::concepts::TractableSampler>); + static_assert(sampling::concepts::TractableSampler>); + static_assert(sampling::concepts::TractableSampler>); + static_assert(sampling::concepts::TractableSampler>); + static_assert(sampling::concepts::TractableSampler>); + static_assert(sampling::concepts::TractableSampler>); + static_assert(sampling::concepts::TractableSampler>); + static_assert(sampling::concepts::TractableSampler>); + + // --- ResamplableSampler (level 3, parallel) --- generate(domain_type, out cache_type) -> codomain_type, forwardWeight(cache_type), backwardWeight(codomain_type) + static_assert(sampling::concepts::ResamplableSampler>); + static_assert(sampling::concepts::ResamplableSampler>); + static_assert(sampling::concepts::ResamplableSampler>); + static_assert(sampling::concepts::ResamplableSampler>); + static_assert(sampling::concepts::ResamplableSampler>); + static_assert(sampling::concepts::ResamplableSampler>); + static_assert(sampling::concepts::ResamplableSampler>); + static_assert(sampling::concepts::ResamplableSampler>); + static_assert(sampling::concepts::ResamplableSampler>); + static_assert(sampling::concepts::ResamplableSampler>); + static_assert(sampling::concepts::ResamplableSampler>); + + // --- InvertibleSampler (level 3) --- TractableSampler + backwardPdf(codomain_type), forwardWeight(cache_type), backwardWeight(codomain_type) + static_assert(sampling::concepts::InvertibleSampler>); + static_assert(sampling::concepts::InvertibleSampler>); + static_assert(sampling::concepts::InvertibleSampler>); + static_assert(sampling::concepts::InvertibleSampler>); + static_assert(sampling::concepts::InvertibleSampler>); + static_assert(sampling::concepts::InvertibleSampler>); + static_assert(sampling::concepts::InvertibleSampler>); + static_assert(sampling::concepts::InvertibleSampler>); + static_assert(sampling::concepts::InvertibleSampler>); + static_assert(sampling::concepts::InvertibleSampler>); + static_assert(sampling::concepts::InvertibleSampler>); + + // --- BijectiveSampler (level 4) --- InvertibleSampler + generateInverse(codomain_type, out cache_type) -> domain_type + static_assert(sampling::concepts::BijectiveSampler>); + static_assert(sampling::concepts::BijectiveSampler>); + static_assert(sampling::concepts::BijectiveSampler>); + static_assert(sampling::concepts::BijectiveSampler>); + static_assert(sampling::concepts::BijectiveSampler>); + static_assert(sampling::concepts::BijectiveSampler>); + static_assert(sampling::concepts::BijectiveSampler>); + static_assert(sampling::concepts::BijectiveSampler>); + + m_logger->log("All sampling concept tests passed.", ILogger::ELL_INFO); + + // ================================================================ + // Runtime CPU/GPU comparison tests using ITester harness + // ================================================================ + bool pass = true; + const uint32_t workgroupSize = 64; + const uint32_t testBatchCount = 64; // 64 * workgroupSize = 4096 tests per sampler + + + // --- Sampler tests --- + { + m_logger->log("Running Linear sampler tests...", ILogger::ELL_INFO); + auto data = createSetupData(nbl::this_example::builtin::build::get_spirv_key<"linear_test">(m_device.get())); + CLinearTester tester(testBatchCount, workgroupSize); + tester.setupPipeline(data); + pass &= tester.performTestsAndVerifyResults("LinearTestLog.txt"); + } + { + m_logger->log("Running Bilinear sampler tests...", ILogger::ELL_INFO); + auto data = createSetupData(nbl::this_example::builtin::build::get_spirv_key<"bilinear_test">(m_device.get())); + CBilinearTester tester(testBatchCount, workgroupSize); + tester.setupPipeline(data); + pass &= tester.performTestsAndVerifyResults("BilinearTestLog.txt"); + } + { + m_logger->log("Running UniformHemisphere sampler tests...", ILogger::ELL_INFO); + auto data = createSetupData(nbl::this_example::builtin::build::get_spirv_key<"uniform_hemisphere_test">(m_device.get())); + CUniformHemisphereTester tester(testBatchCount, workgroupSize); + tester.setupPipeline(data); + pass &= tester.performTestsAndVerifyResults("UniformHemisphereTestLog.txt"); + } + { + m_logger->log("Running UniformSphere sampler tests...", ILogger::ELL_INFO); + auto data = createSetupData(nbl::this_example::builtin::build::get_spirv_key<"uniform_sphere_test">(m_device.get())); + CUniformSphereTester tester(testBatchCount, workgroupSize); + tester.setupPipeline(data); + pass &= tester.performTestsAndVerifyResults("UniformSphereTestLog.txt"); + } + { + m_logger->log("Running ProjectedHemisphere sampler tests...", ILogger::ELL_INFO); + auto data = createSetupData(nbl::this_example::builtin::build::get_spirv_key<"projected_hemisphere_test">(m_device.get())); + CProjectedHemisphereTester tester(testBatchCount, workgroupSize); + tester.setupPipeline(data); + pass &= tester.performTestsAndVerifyResults("ProjectedHemisphereTestLog.txt"); + } + { + m_logger->log("Running ProjectedSphere sampler tests...", ILogger::ELL_INFO); + auto data = createSetupData(nbl::this_example::builtin::build::get_spirv_key<"projected_sphere_test">(m_device.get())); + CProjectedSphereTester tester(testBatchCount, workgroupSize); + tester.setupPipeline(data); + pass &= tester.performTestsAndVerifyResults("ProjectedSphereTestLog.txt"); + } + { + m_logger->log("Running ConcentricMapping sampler tests...", ILogger::ELL_INFO); + auto data = createSetupData(nbl::this_example::builtin::build::get_spirv_key<"concentric_mapping_test">(m_device.get())); + CConcentricMappingTester tester(testBatchCount, workgroupSize); + tester.setupPipeline(data); + pass &= tester.performTestsAndVerifyResults("ConcentricMappingTestLog.txt"); + } + { + m_logger->log("Running BoxMullerTransform sampler tests...", ILogger::ELL_INFO); + auto data = createSetupData(nbl::this_example::builtin::build::get_spirv_key<"box_muller_transform_test">(m_device.get())); + CBoxMullerTransformTester tester(testBatchCount, workgroupSize); + tester.setupPipeline(data); + pass &= tester.performTestsAndVerifyResults("BoxMullerTransformTestLog.txt"); + } + { + m_logger->log("Running ProjectedSphericalTriangle sampler tests...", ILogger::ELL_INFO); + auto data = createSetupData(nbl::this_example::builtin::build::get_spirv_key<"projected_spherical_triangle_test">(m_device.get())); + CProjectedSphericalTriangleTester tester(testBatchCount, workgroupSize); + tester.setupPipeline(data); + pass &= tester.performTestsAndVerifyResults("ProjectedSphericalTriangleTestLog.txt"); + } + { + m_logger->log("Running SphericalRectangle sampler tests...", ILogger::ELL_INFO); + auto data = createSetupData(nbl::this_example::builtin::build::get_spirv_key<"spherical_rectangle_test">(m_device.get())); + CSphericalRectangleTester tester(testBatchCount, workgroupSize); + tester.setupPipeline(data); + pass &= tester.performTestsAndVerifyResults("SphericalRectangleTestLog.txt"); + } + { + m_logger->log("Running SphericalTriangle tests...", ILogger::ELL_INFO); + auto data = createSetupData(nbl::this_example::builtin::build::get_spirv_key<"spherical_triangle">(m_device.get())); + CSphericalTriangleTester tester(testBatchCount, workgroupSize); + tester.setupPipeline(data); + pass &= tester.performTestsAndVerifyResults("SphericalTriangleTestLog.txt"); + } + + if (pass) + m_logger->log("All sampling tests PASSED.", ILogger::ELL_INFO); + else + m_logger->log("Some sampling tests FAILED. Check log files for details.", ILogger::ELL_ERROR); + + // ====================================================================== + // GPU throughput benchmarks (1000 warmup + 20000 timed dispatches each) + // ====================================================================== + if constexpr (DoBenchmark) + { + m_logger->log("=== GPU Sampler Benchmarks ===", ILogger::ELL_PERFORMANCE); + constexpr uint32_t totalSamplesPerWorkgroup = testBatchCount * workgroupSize; + constexpr uint32_t iteratationsPerThread = 4096; // internal to shader, set in CMakeLists.txt + constexpr uint32_t benchSamplesPerDispatch = totalSamplesPerWorkgroup * iteratationsPerThread; + + { + CSamplerBenchmark bench; + bench.setup(createBenchmarkSetupData( + nbl::this_example::builtin::build::get_spirv_key<"linear_bench">(m_device.get()), + testBatchCount, benchSamplesPerDispatch, + sizeof(LinearInputValues) * totalSamplesPerWorkgroup, + sizeof(LinearTestResults) * totalSamplesPerWorkgroup)); + bench.run("Linear"); + } + { + CSamplerBenchmark bench; + bench.setup(createBenchmarkSetupData( + nbl::this_example::builtin::build::get_spirv_key<"bilinear_bench">(m_device.get()), + testBatchCount, benchSamplesPerDispatch, + sizeof(BilinearInputValues) * totalSamplesPerWorkgroup, + sizeof(BilinearTestResults) * totalSamplesPerWorkgroup)); + bench.run("Bilinear"); + } + { + CSamplerBenchmark bench; + bench.setup(createBenchmarkSetupData( + nbl::this_example::builtin::build::get_spirv_key<"box_muller_transform_bench">(m_device.get()), + testBatchCount, benchSamplesPerDispatch, + sizeof(BoxMullerTransformInputValues) * totalSamplesPerWorkgroup, + sizeof(BoxMullerTransformTestResults) * totalSamplesPerWorkgroup)); + bench.run("BoxMullerTransform"); + } + { + CSamplerBenchmark bench; + bench.setup(createBenchmarkSetupData( + nbl::this_example::builtin::build::get_spirv_key<"uniform_hemisphere_bench">(m_device.get()), + testBatchCount, benchSamplesPerDispatch, + sizeof(UniformHemisphereInputValues) * totalSamplesPerWorkgroup, + sizeof(UniformHemisphereTestResults) * totalSamplesPerWorkgroup)); + bench.run("UniformHemisphere"); + } + { + CSamplerBenchmark bench; + bench.setup(createBenchmarkSetupData( + nbl::this_example::builtin::build::get_spirv_key<"uniform_sphere_bench">(m_device.get()), + testBatchCount, benchSamplesPerDispatch, + sizeof(UniformSphereInputValues) * totalSamplesPerWorkgroup, + sizeof(UniformSphereTestResults) * totalSamplesPerWorkgroup)); + bench.run("UniformSphere"); + } + { + CSamplerBenchmark bench; + bench.setup(createBenchmarkSetupData( + nbl::this_example::builtin::build::get_spirv_key<"projected_hemisphere_bench">(m_device.get()), + testBatchCount, benchSamplesPerDispatch, + sizeof(ProjectedHemisphereInputValues) * totalSamplesPerWorkgroup, + sizeof(ProjectedHemisphereTestResults) * totalSamplesPerWorkgroup)); + bench.run("ProjectedHemisphere"); + } + { + CSamplerBenchmark bench; + bench.setup(createBenchmarkSetupData( + nbl::this_example::builtin::build::get_spirv_key<"projected_sphere_bench">(m_device.get()), + testBatchCount, benchSamplesPerDispatch, + sizeof(ProjectedSphereInputValues) * totalSamplesPerWorkgroup, + sizeof(ProjectedSphereTestResults) * totalSamplesPerWorkgroup)); + bench.run("ProjectedSphere"); + } + { + CSamplerBenchmark bench; + bench.setup(createBenchmarkSetupData( + nbl::this_example::builtin::build::get_spirv_key<"spherical_rectangle_bench">(m_device.get()), + testBatchCount, benchSamplesPerDispatch, + sizeof(SphericalRectangleInputValues) * totalSamplesPerWorkgroup, + sizeof(SphericalRectangleTestResults) * totalSamplesPerWorkgroup)); + bench.run("SphericalRectangle"); + } + { + CSamplerBenchmark bench; + bench.setup(createBenchmarkSetupData( + nbl::this_example::builtin::build::get_spirv_key<"spherical_triangle_bench">(m_device.get()), + testBatchCount, benchSamplesPerDispatch, + sizeof(SphericalTriangleInputValues) * totalSamplesPerWorkgroup, + sizeof(SphericalTriangleTestResults) * totalSamplesPerWorkgroup)); + bench.run("SphericalTriangle"); + } + { + CSamplerBenchmark bench; + bench.setup(createBenchmarkSetupData( + nbl::this_example::builtin::build::get_spirv_key<"projected_spherical_triangle_bench">(m_device.get()), + testBatchCount, benchSamplesPerDispatch, + sizeof(ProjectedSphericalTriangleInputValues) * totalSamplesPerWorkgroup, + sizeof(ProjectedSphericalTriangleTestResults) * totalSamplesPerWorkgroup)); + bench.run("ProjectedSphericalTriangle"); + } + } + + return pass; + } + + void workLoopBody() override {} + + bool keepRunning() override { return false; } + + bool onAppTerminated() override + { + return device_base_t::onAppTerminated(); + } +}; + +NBL_MAIN_FUNC(HLSLSamplingTests) diff --git a/40_PathTracer/include/renderer/shaders/session.hlsl b/40_PathTracer/include/renderer/shaders/session.hlsl index 9b13b1126..862a8dc37 100644 --- a/40_PathTracer/include/renderer/shaders/session.hlsl +++ b/40_PathTracer/include/renderer/shaders/session.hlsl @@ -23,7 +23,7 @@ struct SSensorUniforms NBL_CONSTEXPR_STATIC_INLINE uint16_t MaxPathDepthLog2 = MAX_PATH_DEPTH_LOG2; hlsl::float32_t2 rcpPixelSize; - hlsl::rwmc::SplattingParameters splatting; + hlsl::rwmc::SSplattingParameters splatting; hlsl::uint16_t2 renderSize; // bitfield uint16_t lastCascadeIndex : MAX_CASCADE_COUNT_LOG2; diff --git a/59_QuaternionTests/CQuaternionTester.h b/59_QuaternionTests/CQuaternionTester.h index 89478d1ad..531402168 100644 --- a/59_QuaternionTests/CQuaternionTester.h +++ b/59_QuaternionTests/CQuaternionTester.h @@ -159,7 +159,7 @@ class CQuaternionTester final : public ITester(memberName, expectedVal, testVal, testIteration, seed, testType); + printTestFail(memberName, expectedVal, testVal, testIteration, seed, testType, maxRelativeDifference, maxAbsoluteDifference); return false; } @@ -167,10 +167,10 @@ class CQuaternionTester final : public ITester(expectedVal, testVal, maxAllowedDifference, testOrientation)) + if (compareVectorTestValues(expectedVal, testVal, maxAllowedDifference, testOrientation)) return true; - printTestFail(memberName, expectedVal, testVal, testIteration, seed, testType); + printTestFail(memberName, expectedVal, testVal, testIteration, seed, testType, maxAllowedDifference, 0.0f); return false; } diff --git a/CMakeLists.txt b/CMakeLists.txt index d945c547a..ea0465a56 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -74,8 +74,10 @@ if(NBL_BUILD_EXAMPLES) # Showcase compute pathtracing add_subdirectory(30_ComputeShaderPathTracer) + add_subdirectory(31_HLSLPathTracer) add_subdirectory(34_DebugDraw) + add_subdirectory(37_HLSLSamplingTests) add_subdirectory(38_EXRSplit) if (NBL_BUILD_MITSUBA_LOADER) # if (NBL_BUILD_OPTIX) diff --git a/common/include/nbl/examples/Tester/ITester.h b/common/include/nbl/examples/Tester/ITester.h index 66cef6888..cd131b783 100644 --- a/common/include/nbl/examples/Tester/ITester.h +++ b/common/include/nbl/examples/Tester/ITester.h @@ -5,6 +5,7 @@ #include #include #include +#include using namespace nbl; @@ -14,400 +15,398 @@ template class ITester { public: - struct PipelineSetupData - { - std::string shaderKey; - core::smart_refctd_ptr device; - core::smart_refctd_ptr api; - core::smart_refctd_ptr assetMgr; - core::smart_refctd_ptr logger; - video::IPhysicalDevice* physicalDevice; - uint32_t computeFamilyIndex; - }; - - void setupPipeline(const PipelineSetupData& pipleineSetupData) - { - // setting up pipeline in the constructor - m_device = core::smart_refctd_ptr(pipleineSetupData.device); - m_physicalDevice = pipleineSetupData.physicalDevice; - m_api = core::smart_refctd_ptr(pipleineSetupData.api); - m_assetMgr = core::smart_refctd_ptr(pipleineSetupData.assetMgr); - m_logger = core::smart_refctd_ptr(pipleineSetupData.logger); - m_queueFamily = pipleineSetupData.computeFamilyIndex; - m_semaphoreCounter = 0; - m_semaphore = m_device->createSemaphore(0); - m_cmdpool = m_device->createCommandPool(m_queueFamily, video::IGPUCommandPool::CREATE_FLAGS::RESET_COMMAND_BUFFER_BIT); - if (!m_cmdpool->createCommandBuffers(video::IGPUCommandPool::BUFFER_LEVEL::PRIMARY, 1u, &m_cmdbuf)) - logFail("Failed to create Command Buffers!\n"); - - // Load shaders, set up pipeline - core::smart_refctd_ptr shader; - { - asset::IAssetLoader::SAssetLoadParams lp = {}; - lp.logger = m_logger.get(); - lp.workingDirectory = "app_resources"; // virtual root - auto assetBundle = m_assetMgr->getAsset(pipleineSetupData.shaderKey.data(), lp); - const auto assets = assetBundle.getContents(); - if (assets.empty()) - return logFail("Could not load shader!"); - - // It would be super weird if loading a shader from a file produced more than 1 asset - assert(assets.size() == 1); - core::smart_refctd_ptr source = asset::IAsset::castDown(assets[0]); - - shader = m_device->compileShader({ source.get() }); - } - - video::IGPUDescriptorSetLayout::SBinding bindings[2] = { - { - .binding = 0, - .type = asset::IDescriptor::E_TYPE::ET_STORAGE_BUFFER, - .createFlags = video::IGPUDescriptorSetLayout::SBinding::E_CREATE_FLAGS::ECF_NONE, - .stageFlags = ShaderStage::ESS_COMPUTE, - .count = 1 - }, - { - .binding = 1, - .type = asset::IDescriptor::E_TYPE::ET_STORAGE_BUFFER, - .createFlags = video::IGPUDescriptorSetLayout::SBinding::E_CREATE_FLAGS::ECF_NONE, - .stageFlags = ShaderStage::ESS_COMPUTE, - .count = 1 - } - }; - - core::smart_refctd_ptr dsLayout = m_device->createDescriptorSetLayout(bindings); - if (!dsLayout) - logFail("Failed to create a Descriptor Layout!\n"); - - m_pplnLayout = m_device->createPipelineLayout({}, core::smart_refctd_ptr(dsLayout)); - if (!m_pplnLayout) - logFail("Failed to create a Pipeline Layout!\n"); - - { - video::IGPUComputePipeline::SCreationParams params = {}; - params.layout = m_pplnLayout.get(); - params.shader.entryPoint = "main"; - params.shader.shader = shader.get(); - if (!m_device->createComputePipelines(nullptr, { ¶ms,1 }, &m_pipeline)) - logFail("Failed to create pipelines (compile & link shaders)!\n"); - } - - // Allocate memory of the input buffer - { - const size_t BufferSize = sizeof(InputTestValues) * m_testIterationCount; - - video::IGPUBuffer::SCreationParams params = {}; - params.size = BufferSize; - params.usage = video::IGPUBuffer::EUF_STORAGE_BUFFER_BIT; - core::smart_refctd_ptr inputBuff = m_device->createBuffer(std::move(params)); - if (!inputBuff) - logFail("Failed to create a GPU Buffer of size %d!\n", params.size); - - inputBuff->setObjectDebugName("emulated_float64_t output buffer"); - - video::IDeviceMemoryBacked::SDeviceMemoryRequirements reqs = inputBuff->getMemoryReqs(); - reqs.memoryTypeBits &= m_physicalDevice->getHostVisibleMemoryTypeBits(); - - m_inputBufferAllocation = m_device->allocate(reqs, inputBuff.get(), video::IDeviceMemoryAllocation::EMAF_NONE); - if (!m_inputBufferAllocation.isValid()) - logFail("Failed to allocate Device Memory compatible with our GPU Buffer!\n"); - - assert(inputBuff->getBoundMemory().memory == m_inputBufferAllocation.memory.get()); - core::smart_refctd_ptr pool = m_device->createDescriptorPoolForDSLayouts(video::IDescriptorPool::ECF_NONE, { &dsLayout.get(),1 }); - - m_ds = pool->createDescriptorSet(core::smart_refctd_ptr(dsLayout)); - { - video::IGPUDescriptorSet::SDescriptorInfo info[1]; - info[0].desc = core::smart_refctd_ptr(inputBuff); - info[0].info.buffer = { .offset = 0,.size = BufferSize }; - video::IGPUDescriptorSet::SWriteDescriptorSet writes[1] = { - {.dstSet = m_ds.get(),.binding = 0,.arrayElement = 0,.count = 1,.info = info} - }; - m_device->updateDescriptorSets(writes, {}); - } - } - - // Allocate memory of the output buffer - { - const size_t BufferSize = sizeof(TestResults) * m_testIterationCount; - - video::IGPUBuffer::SCreationParams params = {}; - params.size = BufferSize; - params.usage = video::IGPUBuffer::EUF_STORAGE_BUFFER_BIT; - core::smart_refctd_ptr outputBuff = m_device->createBuffer(std::move(params)); - if (!outputBuff) - logFail("Failed to create a GPU Buffer of size %d!\n", params.size); - - outputBuff->setObjectDebugName("emulated_float64_t output buffer"); - - video::IDeviceMemoryBacked::SDeviceMemoryRequirements reqs = outputBuff->getMemoryReqs(); - reqs.memoryTypeBits &= m_physicalDevice->getHostVisibleMemoryTypeBits(); - - m_outputBufferAllocation = m_device->allocate(reqs, outputBuff.get(), video::IDeviceMemoryAllocation::EMAF_NONE); - if (!m_outputBufferAllocation.isValid()) - logFail("Failed to allocate Device Memory compatible with our GPU Buffer!\n"); - - assert(outputBuff->getBoundMemory().memory == m_outputBufferAllocation.memory.get()); - core::smart_refctd_ptr pool = m_device->createDescriptorPoolForDSLayouts(video::IDescriptorPool::ECF_NONE, { &dsLayout.get(),1 }); - - { - video::IGPUDescriptorSet::SDescriptorInfo info[1]; - info[0].desc = core::smart_refctd_ptr(outputBuff); - info[0].info.buffer = { .offset = 0,.size = BufferSize }; - video::IGPUDescriptorSet::SWriteDescriptorSet writes[1] = { - {.dstSet = m_ds.get(),.binding = 1,.arrayElement = 0,.count = 1,.info = info} - }; - m_device->updateDescriptorSets(writes, {}); - } - } - - if (!m_outputBufferAllocation.memory->map({ 0ull,m_outputBufferAllocation.memory->getAllocationSize() }, video::IDeviceMemoryAllocation::EMCAF_READ)) - logFail("Failed to map the Device Memory!\n"); - - // if the mapping is not coherent the range needs to be invalidated to pull in new data for the CPU's caches - const video::ILogicalDevice::MappedMemoryRange memoryRange(m_outputBufferAllocation.memory.get(), 0ull, m_outputBufferAllocation.memory->getAllocationSize()); - if (!m_outputBufferAllocation.memory->getMemoryPropertyFlags().hasFlags(video::IDeviceMemoryAllocation::EMPF_HOST_COHERENT_BIT)) - m_device->invalidateMappedMemoryRanges(1, &memoryRange); - - assert(memoryRange.valid() && memoryRange.length >= sizeof(TestResults)); - - m_queue = m_device->getQueue(m_queueFamily, 0); - } - - bool performTestsAndVerifyResults(const std::string& logFileName) - { - m_logFile.open(logFileName, std::ios::out | std::ios::trunc); - if (!m_logFile.is_open()) - m_logger->log("Failed to open log file!", system::ILogger::ELL_ERROR); - - core::vector inputTestValues; - core::vector exceptedTestResults; - - inputTestValues.reserve(m_testIterationCount); - exceptedTestResults.reserve(m_testIterationCount); - - m_logger->log("TESTS:", system::ILogger::ELL_PERFORMANCE); - for (int i = 0; i < m_testIterationCount; ++i) - { - // Set input thest values that will be used in both CPU and GPU tests - InputTestValues testInput = generateInputTestValues(); - // use std library or glm functions to determine expected test values, the output of functions from intrinsics.hlsl will be verified against these values - TestResults expected = determineExpectedResults(testInput); - - inputTestValues.push_back(testInput); - exceptedTestResults.push_back(expected); - } - - core::vector cpuTestResults = performCpuTests(inputTestValues); - core::vector gpuTestResults = performGpuTests(inputTestValues); - - bool pass = verifyAllTestResults(cpuTestResults, gpuTestResults, exceptedTestResults); - - m_logger->log("TESTS DONE.", system::ILogger::ELL_PERFORMANCE); - reloadSeed(); - - m_logFile.close(); - return pass; - } - - virtual ~ITester() - { - m_outputBufferAllocation.memory->unmap(); - }; - -protected: - enum class TestType - { - CPU, - GPU - }; - - /** + struct PipelineSetupData + { + std::string shaderKey; + core::smart_refctd_ptr device; + core::smart_refctd_ptr api; + core::smart_refctd_ptr assetMgr; + core::smart_refctd_ptr logger; + video::IPhysicalDevice* physicalDevice; + uint32_t computeFamilyIndex; + }; + + void setupPipeline(const PipelineSetupData& pipleineSetupData) + { + // setting up pipeline in the constructor + m_device = core::smart_refctd_ptr(pipleineSetupData.device); + m_physicalDevice = pipleineSetupData.physicalDevice; + m_api = core::smart_refctd_ptr(pipleineSetupData.api); + m_assetMgr = core::smart_refctd_ptr(pipleineSetupData.assetMgr); + m_logger = core::smart_refctd_ptr(pipleineSetupData.logger); + m_queueFamily = pipleineSetupData.computeFamilyIndex; + m_semaphoreCounter = 0; + m_semaphore = m_device->createSemaphore(0); + m_cmdpool = m_device->createCommandPool(m_queueFamily, video::IGPUCommandPool::CREATE_FLAGS::RESET_COMMAND_BUFFER_BIT); + if (!m_cmdpool->createCommandBuffers(video::IGPUCommandPool::BUFFER_LEVEL::PRIMARY, 1u, &m_cmdbuf)) + logFail("Failed to create Command Buffers!\n"); + + // Load shaders, set up pipeline + core::smart_refctd_ptr shader; + { + asset::IAssetLoader::SAssetLoadParams lp = {}; + lp.logger = m_logger.get(); + lp.workingDirectory = "app_resources"; // virtual root + auto assetBundle = m_assetMgr->getAsset(pipleineSetupData.shaderKey.data(), lp); + const auto assets = assetBundle.getContents(); + if (assets.empty()) + return logFail("Could not load shader!"); + + // It would be super weird if loading a shader from a file produced more than 1 asset + assert(assets.size() == 1); + core::smart_refctd_ptr source = asset::IAsset::castDown(assets[0]); + + shader = m_device->compileShader({source.get()}); + } + + video::IGPUDescriptorSetLayout::SBinding bindings[2] = { + {.binding = 0, + .type = asset::IDescriptor::E_TYPE::ET_STORAGE_BUFFER, + .createFlags = video::IGPUDescriptorSetLayout::SBinding::E_CREATE_FLAGS::ECF_NONE, + .stageFlags = ShaderStage::ESS_COMPUTE, + .count = 1}, + {.binding = 1, + .type = asset::IDescriptor::E_TYPE::ET_STORAGE_BUFFER, + .createFlags = video::IGPUDescriptorSetLayout::SBinding::E_CREATE_FLAGS::ECF_NONE, + .stageFlags = ShaderStage::ESS_COMPUTE, + .count = 1}}; + + core::smart_refctd_ptr dsLayout = m_device->createDescriptorSetLayout(bindings); + if (!dsLayout) + logFail("Failed to create a Descriptor Layout!\n"); + + m_pplnLayout = m_device->createPipelineLayout({}, core::smart_refctd_ptr(dsLayout)); + if (!m_pplnLayout) + logFail("Failed to create a Pipeline Layout!\n"); + + { + video::IGPUComputePipeline::SCreationParams params = {}; + params.layout = m_pplnLayout.get(); + params.shader.entryPoint = "main"; + params.shader.shader = shader.get(); + if (!m_device->createComputePipelines(nullptr, {¶ms, 1}, &m_pipeline)) + logFail("Failed to create pipelines (compile & link shaders)!\n"); + } + + // Allocate memory of the input buffer + { + const size_t BufferSize = sizeof(InputTestValues) * m_testIterationCount; + + video::IGPUBuffer::SCreationParams params = {}; + params.size = BufferSize; + params.usage = video::IGPUBuffer::EUF_STORAGE_BUFFER_BIT; + core::smart_refctd_ptr inputBuff = m_device->createBuffer(std::move(params)); + if (!inputBuff) + logFail("Failed to create a GPU Buffer of size %d!\n", params.size); + + inputBuff->setObjectDebugName("emulated_float64_t output buffer"); + + video::IDeviceMemoryBacked::SDeviceMemoryRequirements reqs = inputBuff->getMemoryReqs(); + reqs.memoryTypeBits &= m_physicalDevice->getHostVisibleMemoryTypeBits(); + + m_inputBufferAllocation = m_device->allocate(reqs, inputBuff.get(), video::IDeviceMemoryAllocation::EMAF_NONE); + if (!m_inputBufferAllocation.isValid()) + logFail("Failed to allocate Device Memory compatible with our GPU Buffer!\n"); + + assert(inputBuff->getBoundMemory().memory == m_inputBufferAllocation.memory.get()); + core::smart_refctd_ptr pool = m_device->createDescriptorPoolForDSLayouts(video::IDescriptorPool::ECF_NONE, {&dsLayout.get(), 1}); + + m_ds = pool->createDescriptorSet(core::smart_refctd_ptr(dsLayout)); + { + video::IGPUDescriptorSet::SDescriptorInfo info[1]; + info[0].desc = core::smart_refctd_ptr(inputBuff); + info[0].info.buffer = {.offset = 0, .size = BufferSize}; + video::IGPUDescriptorSet::SWriteDescriptorSet writes[1] = { + {.dstSet = m_ds.get(), .binding = 0, .arrayElement = 0, .count = 1, .info = info}}; + m_device->updateDescriptorSets(writes, {}); + } + } + + // Allocate memory of the output buffer + { + const size_t BufferSize = sizeof(TestResults) * m_testIterationCount; + + video::IGPUBuffer::SCreationParams params = {}; + params.size = BufferSize; + params.usage = video::IGPUBuffer::EUF_STORAGE_BUFFER_BIT; + core::smart_refctd_ptr outputBuff = m_device->createBuffer(std::move(params)); + if (!outputBuff) + logFail("Failed to create a GPU Buffer of size %d!\n", params.size); + + outputBuff->setObjectDebugName("emulated_float64_t output buffer"); + + video::IDeviceMemoryBacked::SDeviceMemoryRequirements reqs = outputBuff->getMemoryReqs(); + reqs.memoryTypeBits &= m_physicalDevice->getHostVisibleMemoryTypeBits(); + + m_outputBufferAllocation = m_device->allocate(reqs, outputBuff.get(), video::IDeviceMemoryAllocation::EMAF_NONE); + if (!m_outputBufferAllocation.isValid()) + logFail("Failed to allocate Device Memory compatible with our GPU Buffer!\n"); + + assert(outputBuff->getBoundMemory().memory == m_outputBufferAllocation.memory.get()); + core::smart_refctd_ptr pool = m_device->createDescriptorPoolForDSLayouts(video::IDescriptorPool::ECF_NONE, {&dsLayout.get(), 1}); + + { + video::IGPUDescriptorSet::SDescriptorInfo info[1]; + info[0].desc = core::smart_refctd_ptr(outputBuff); + info[0].info.buffer = {.offset = 0, .size = BufferSize}; + video::IGPUDescriptorSet::SWriteDescriptorSet writes[1] = { + {.dstSet = m_ds.get(), .binding = 1, .arrayElement = 0, .count = 1, .info = info}}; + m_device->updateDescriptorSets(writes, {}); + } + } + + if (!m_outputBufferAllocation.memory->map({0ull, m_outputBufferAllocation.memory->getAllocationSize()}, video::IDeviceMemoryAllocation::EMCAF_READ)) + logFail("Failed to map the Device Memory!\n"); + + // if the mapping is not coherent the range needs to be invalidated to pull in new data for the CPU's caches + const video::ILogicalDevice::MappedMemoryRange memoryRange(m_outputBufferAllocation.memory.get(), 0ull, m_outputBufferAllocation.memory->getAllocationSize()); + if (!m_outputBufferAllocation.memory->getMemoryPropertyFlags().hasFlags(video::IDeviceMemoryAllocation::EMPF_HOST_COHERENT_BIT)) + m_device->invalidateMappedMemoryRanges(1, &memoryRange); + + assert(memoryRange.valid() && memoryRange.length >= sizeof(TestResults)); + + m_queue = m_device->getQueue(m_queueFamily, 0); + } + + bool performTestsAndVerifyResults(const std::string& logFileName) + { + m_logFile.open(logFileName, std::ios::out | std::ios::trunc); + if (!m_logFile.is_open()) + m_logger->log("Failed to open log file!", system::ILogger::ELL_ERROR); + + core::vector inputTestValues; + core::vector exceptedTestResults; + + inputTestValues.reserve(m_testIterationCount); + exceptedTestResults.reserve(m_testIterationCount); + + m_logger->log("TESTS:", system::ILogger::ELL_PERFORMANCE); + for (int i = 0; i < m_testIterationCount; ++i) + { + // Set input thest values that will be used in both CPU and GPU tests + InputTestValues testInput = generateInputTestValues(); + // use std library or glm functions to determine expected test values, the output of functions from intrinsics.hlsl will be verified against these values + TestResults expected = determineExpectedResults(testInput); + + inputTestValues.push_back(testInput); + exceptedTestResults.push_back(expected); + } + + core::vector cpuTestResults = performCpuTests(inputTestValues); + core::vector gpuTestResults = performGpuTests(inputTestValues); + + bool pass = verifyAllTestResults(cpuTestResults, gpuTestResults, exceptedTestResults); + + m_logger->log("TESTS DONE.", system::ILogger::ELL_PERFORMANCE); + reloadSeed(); + + m_logFile.close(); + return pass; + } + + virtual ~ITester() + { + m_outputBufferAllocation.memory->unmap(); + }; + + protected: + enum class TestType + { + CPU, + GPU + }; + + /** * @param testBatchCount one test batch is equal to m_WorkgroupSize, so number of tests performed will be m_WorkgroupSize * testbatchCount */ - ITester(const uint32_t testBatchCount) - : m_testBatchCount(testBatchCount), m_testIterationCount(testBatchCount * m_WorkgroupSize) - { - reloadSeed(); - }; + ITester(const uint32_t testBatchCount, const uint32_t workgroupSize = 256) + : m_WorkgroupSize(workgroupSize), m_testBatchCount(testBatchCount), m_testIterationCount(testBatchCount * m_WorkgroupSize) + { + reloadSeed(); + }; - virtual bool verifyTestResults(const TestResults& expectedTestValues, const TestResults& testValues, const size_t testIteration, const uint32_t seed, TestType testType) = 0; + virtual bool verifyTestResults(const TestResults& expectedTestValues, const TestResults& testValues, const size_t testIteration, const uint32_t seed, TestType testType) = 0; - virtual InputTestValues generateInputTestValues() = 0; + virtual InputTestValues generateInputTestValues() = 0; - virtual TestResults determineExpectedResults(const InputTestValues& testInput) = 0; + virtual TestResults determineExpectedResults(const InputTestValues& testInput) = 0; - std::mt19937& getRandomEngine() - { - return m_mersenneTwister; - } + std::mt19937& getRandomEngine() + { + return m_mersenneTwister; + } protected: - uint32_t m_queueFamily; - core::smart_refctd_ptr m_device; - core::smart_refctd_ptr m_api; - video::IPhysicalDevice* m_physicalDevice; - core::smart_refctd_ptr m_assetMgr; - core::smart_refctd_ptr m_logger; - video::IDeviceMemoryAllocator::SAllocation m_inputBufferAllocation = {}; - video::IDeviceMemoryAllocator::SAllocation m_outputBufferAllocation = {}; - core::smart_refctd_ptr m_cmdbuf = nullptr; - core::smart_refctd_ptr m_cmdpool = nullptr; - core::smart_refctd_ptr m_ds = nullptr; - core::smart_refctd_ptr m_pplnLayout = nullptr; - core::smart_refctd_ptr m_pipeline; - core::smart_refctd_ptr m_semaphore; - video::IQueue* m_queue; - uint64_t m_semaphoreCounter; - - void dispatchGpuTests(const core::vector& input, core::vector& output) - { - // Update input buffer - if (!m_inputBufferAllocation.memory->map({ 0ull,m_inputBufferAllocation.memory->getAllocationSize() }, video::IDeviceMemoryAllocation::EMCAF_READ)) - logFail("Failed to map the Device Memory!\n"); - - const video::ILogicalDevice::MappedMemoryRange memoryRange(m_inputBufferAllocation.memory.get(), 0ull, m_inputBufferAllocation.memory->getAllocationSize()); - if (!m_inputBufferAllocation.memory->getMemoryPropertyFlags().hasFlags(video::IDeviceMemoryAllocation::EMPF_HOST_COHERENT_BIT)) - m_device->invalidateMappedMemoryRanges(1, &memoryRange); - - assert(m_testIterationCount == input.size()); - const size_t inputDataSize = sizeof(InputTestValues) * m_testIterationCount; - std::memcpy(static_cast(m_inputBufferAllocation.memory->getMappedPointer()), input.data(), inputDataSize); - - m_inputBufferAllocation.memory->unmap(); - - // record command buffer - const uint32_t dispatchSizeX = m_testBatchCount; - m_cmdbuf->reset(video::IGPUCommandBuffer::RESET_FLAGS::NONE); - m_cmdbuf->begin(video::IGPUCommandBuffer::USAGE::NONE); - m_cmdbuf->beginDebugMarker("test", core::vector4df_SIMD(0, 1, 0, 1)); - m_cmdbuf->bindComputePipeline(m_pipeline.get()); - m_cmdbuf->bindDescriptorSets(nbl::asset::EPBP_COMPUTE, m_pplnLayout.get(), 0, 1, &m_ds.get()); - m_cmdbuf->dispatch(dispatchSizeX, 1, 1); - m_cmdbuf->endDebugMarker(); - m_cmdbuf->end(); - - video::IQueue::SSubmitInfo submitInfos[1] = {}; - const video::IQueue::SSubmitInfo::SCommandBufferInfo cmdbufs[] = { {.cmdbuf = m_cmdbuf.get()} }; - submitInfos[0].commandBuffers = cmdbufs; - const video::IQueue::SSubmitInfo::SSemaphoreInfo signals[] = { {.semaphore = m_semaphore.get(), .value = ++m_semaphoreCounter, .stageMask = asset::PIPELINE_STAGE_FLAGS::COMPUTE_SHADER_BIT} }; - submitInfos[0].signalSemaphores = signals; - - m_api->startCapture(); - m_queue->submit(submitInfos); - m_api->endCapture(); - - m_device->waitIdle(); - - // save test results - assert(m_testIterationCount == output.size()); - const size_t outputDataSize = sizeof(TestResults) * m_testIterationCount; - std::memcpy(output.data(), static_cast(m_outputBufferAllocation.memory->getMappedPointer()), outputDataSize); - - m_device->waitIdle(); - } - - template - bool verifyTestValue(const std::string& memberName, const T& expectedVal, const T& testVal, - const size_t testIteration, const uint32_t seed, const TestType testType, const float64_t maxAllowedDifference = 0.0) - { - if (compareTestValues(expectedVal, testVal, maxAllowedDifference)) - return true; - - printTestFail(memberName, expectedVal, testVal, testIteration, seed, testType); - return false; - } - - template - void printTestFail(const std::string& memberName, const T& expectedVal, const T& testVal, - const size_t testIteration, const uint32_t seed, const TestType testType) - { - std::stringstream ss; - switch (testType) - { - case TestType::CPU: - ss << "CPU TEST ERROR:\n"; - break; - case TestType::GPU: - ss << "GPU TEST ERROR:\n"; - } - - ss << "nbl::hlsl::" << memberName << " produced incorrect output!" << '\n'; - ss << "TEST ITERATION INDEX: " << testIteration << " SEED: " << seed << '\n'; - ss << "EXPECTED VALUE: " << system::to_string(expectedVal) << " TEST VALUE: " << system::to_string(testVal) << '\n'; - - m_logger->log("%s", system::ILogger::ELL_ERROR, ss.str().c_str()); - m_logFile << ss.str() << '\n'; - } + uint32_t m_queueFamily; + core::smart_refctd_ptr m_device; + core::smart_refctd_ptr m_api; + video::IPhysicalDevice* m_physicalDevice; + core::smart_refctd_ptr m_assetMgr; + core::smart_refctd_ptr m_logger; + video::IDeviceMemoryAllocator::SAllocation m_inputBufferAllocation = {}; + video::IDeviceMemoryAllocator::SAllocation m_outputBufferAllocation = {}; + core::smart_refctd_ptr m_cmdbuf = nullptr; + core::smart_refctd_ptr m_cmdpool = nullptr; + core::smart_refctd_ptr m_ds = nullptr; + core::smart_refctd_ptr m_pplnLayout = nullptr; + core::smart_refctd_ptr m_pipeline; + core::smart_refctd_ptr m_semaphore; + video::IQueue* m_queue; + uint64_t m_semaphoreCounter; + + void dispatchGpuTests(const core::vector& input, core::vector& output) + { + // Update input buffer + if (!m_inputBufferAllocation.memory->map({0ull, m_inputBufferAllocation.memory->getAllocationSize()}, video::IDeviceMemoryAllocation::EMCAF_READ)) + logFail("Failed to map the Device Memory!\n"); + + const video::ILogicalDevice::MappedMemoryRange memoryRange(m_inputBufferAllocation.memory.get(), 0ull, m_inputBufferAllocation.memory->getAllocationSize()); + if (!m_inputBufferAllocation.memory->getMemoryPropertyFlags().hasFlags(video::IDeviceMemoryAllocation::EMPF_HOST_COHERENT_BIT)) + m_device->invalidateMappedMemoryRanges(1, &memoryRange); + + assert(m_testIterationCount == input.size()); + const size_t inputDataSize = sizeof(InputTestValues) * m_testIterationCount; + std::memcpy(static_cast(m_inputBufferAllocation.memory->getMappedPointer()), input.data(), inputDataSize); + + m_inputBufferAllocation.memory->unmap(); + + // record command buffer + const uint32_t dispatchSizeX = m_testBatchCount; + m_cmdbuf->reset(video::IGPUCommandBuffer::RESET_FLAGS::NONE); + m_cmdbuf->begin(video::IGPUCommandBuffer::USAGE::NONE); + m_cmdbuf->beginDebugMarker("test", core::vector4df_SIMD(0, 1, 0, 1)); + m_cmdbuf->bindComputePipeline(m_pipeline.get()); + m_cmdbuf->bindDescriptorSets(nbl::asset::EPBP_COMPUTE, m_pplnLayout.get(), 0, 1, &m_ds.get()); + m_cmdbuf->dispatch(dispatchSizeX, 1, 1); + m_cmdbuf->endDebugMarker(); + m_cmdbuf->end(); + + video::IQueue::SSubmitInfo submitInfos[1] = {}; + const video::IQueue::SSubmitInfo::SCommandBufferInfo cmdbufs[] = {{.cmdbuf = m_cmdbuf.get()}}; + submitInfos[0].commandBuffers = cmdbufs; + const video::IQueue::SSubmitInfo::SSemaphoreInfo signals[] = {{.semaphore = m_semaphore.get(), .value = ++m_semaphoreCounter, .stageMask = asset::PIPELINE_STAGE_FLAGS::COMPUTE_SHADER_BIT}}; + submitInfos[0].signalSemaphores = signals; + + m_api->startCapture(); + m_queue->submit(submitInfos); + m_api->endCapture(); + + m_device->waitIdle(); + + // save test results + assert(m_testIterationCount == output.size()); + const size_t outputDataSize = sizeof(TestResults) * m_testIterationCount; + std::memcpy(output.data(), static_cast(m_outputBufferAllocation.memory->getMappedPointer()), outputDataSize); + + m_device->waitIdle(); + } + + template + bool verifyTestValue(const std::string& memberName, const T& expectedVal, const T& testVal, + const size_t testIteration, const uint32_t seed, const TestType testType, + const float64_t maxRelativeDifference = 0.0, const float64_t maxAbsoluteDifference = 0.0) + { + if (compareTestValues(expectedVal, testVal, maxRelativeDifference, maxAbsoluteDifference)) + return true; + + printTestFail(memberName, expectedVal, testVal, testIteration, seed, testType, maxRelativeDifference, maxAbsoluteDifference); + return false; + } + + template + void printTestFail(const std::string& memberName, const T& expectedVal, const T& testVal, + const size_t testIteration, const uint32_t seed, const TestType testType, const float64_t maxRelativeDifference, const float64_t maxAbsoluteDifference) + { + std::stringstream ss; + switch (testType) + { + case TestType::CPU: + ss << "CPU TEST ERROR:\n"; + break; + case TestType::GPU: + ss << "GPU TEST ERROR:\n"; + } + + ss << "nbl::hlsl::" << memberName << " produced incorrect output!" << '\n'; + ss << "TEST ITERATION INDEX: " << testIteration << " SEED: " << seed << '\n'; + ss << "EXPECTED VALUE: " << system::to_string(expectedVal) << " TEST VALUE: " << system::to_string(testVal); + if constexpr (concepts::FloatingPointLikeScalar || concepts::FloatingPointLikeVectorial) + ss << " DIFFERENCE: " << system::to_string(hlsl::abs(expectedVal - testVal)); + ss << " MAX RELATIVE: " << system::to_string(maxRelativeDifference) << " MAX ABSOLUTE " << system::to_string(maxAbsoluteDifference) << '\n'; + + m_logger->log("%s", system::ILogger::ELL_ERROR, ss.str().c_str()); + m_logFile << ss.str() << '\n'; + } private: - template - inline void logFail(const char* msg, Args&&... args) - { - m_logger->log(msg, system::ILogger::ELL_ERROR, std::forward(args)...); - exit(-1); - } - - core::vector performCpuTests(const core::vector& inputTestValues) - { - core::vector output(m_testIterationCount); - TestExecutor testExecutor; - - auto iterations = std::views::iota(0ull, m_testIterationCount); - std::for_each(std::execution::par_unseq, iterations.begin(), iterations.end(), - [&](size_t i) - { - testExecutor(inputTestValues[i], output[i]); - } - ); - - return output; - } - - core::vector performGpuTests(const core::vector& inputTestValues) - { - core::vector output(m_testIterationCount); - dispatchGpuTests(inputTestValues, output); - - return output; - } - - bool verifyAllTestResults(const core::vector& cpuTestReults, const core::vector& gpuTestReults, const core::vector& exceptedTestReults) - { - bool pass = true; - for (int i = 0; i < m_testIterationCount; ++i) - { - pass = verifyTestResults(exceptedTestReults[i], cpuTestReults[i], i, m_seed, ITester::TestType::CPU) && pass; - pass = verifyTestResults(exceptedTestReults[i], gpuTestReults[i], i, m_seed, ITester::TestType::GPU) && pass; - } - return pass; - } - - void reloadSeed() - { - std::random_device rd; - m_seed = rd(); - m_mersenneTwister = std::mt19937(m_seed); - } - - template - bool compareTestValues(const T& lhs, const T& rhs, const float64_t maxAllowedDifference) - { - return lhs == rhs; - } - template requires concepts::FloatingPointLikeScalar || concepts::FloatingPointLikeVectorial || (concepts::Matricial && concepts::FloatingPointLikeScalar::scalar_type>) - bool compareTestValues(const T& lhs, const T& rhs, const float64_t maxAllowedDifference) - { - return nbl::hlsl::testing::relativeApproxCompare(lhs, rhs, maxAllowedDifference); - } - - const size_t m_testIterationCount; - const uint32_t m_testBatchCount; - static constexpr size_t m_WorkgroupSize = 256u; - // seed will change after every call to performTestsAndVerifyResults() - std::mt19937 m_mersenneTwister; - uint32_t m_seed; - std::ofstream m_logFile; + template + inline void logFail(const char* msg, Args&&... args) + { + m_logger->log(msg, system::ILogger::ELL_ERROR, std::forward(args)...); + exit(-1); + } + + core::vector performCpuTests(const core::vector& inputTestValues) + { + core::vector output(m_testIterationCount); + TestExecutor testExecutor; + + auto iterations = std::views::iota(0ull, m_testIterationCount); + std::for_each(std::execution::par_unseq, iterations.begin(), iterations.end(), + [&](size_t i) + { + testExecutor(inputTestValues[i], output[i]); + }); + + return output; + } + + core::vector performGpuTests(const core::vector& inputTestValues) + { + core::vector output(m_testIterationCount); + dispatchGpuTests(inputTestValues, output); + + return output; + } + + bool verifyAllTestResults(const core::vector& cpuTestReults, const core::vector& gpuTestReults, const core::vector& exceptedTestReults) + { + bool pass = true; + for (int i = 0; i < m_testIterationCount; ++i) + { + pass = verifyTestResults(exceptedTestReults[i], cpuTestReults[i], i, m_seed, ITester::TestType::CPU) && pass; + pass = verifyTestResults(exceptedTestReults[i], gpuTestReults[i], i, m_seed, ITester::TestType::GPU) && pass; + } + return pass; + } + + void reloadSeed() + { + std::random_device rd; + m_seed = rd(); + m_mersenneTwister = std::mt19937(m_seed); + } + + template + bool compareTestValues(const T& lhs, const T& rhs, const float64_t maxRelativeDifference, const float64_t maxAbsoluteDifference) + { + return lhs == rhs; + } + + template + requires concepts::FloatingPointLikeScalar || concepts::FloatingPointLikeVectorial || (concepts::Matricial && concepts::FloatingPointLikeScalar::scalar_type>) + bool compareTestValues(const T& lhs, const T& rhs, const float64_t maxRelativeDifference, const float64_t maxAbsoluteDifference) + { + return nbl::hlsl::testing::approxCompare(lhs, rhs, maxAbsoluteDifference, maxRelativeDifference); + } + + const size_t m_WorkgroupSize; + const size_t m_testIterationCount; + const uint32_t m_testBatchCount; + // seed will change after every call to performTestsAndVerifyResults() + std::mt19937 m_mersenneTwister; + uint32_t m_seed; + std::ofstream m_logFile; }; #endif \ No newline at end of file