From a8c658f7ac7ad013dcea1ec96b86025b54defb7b Mon Sep 17 00:00:00 2001 From: Arkadiusz Lachowicz Date: Thu, 5 Feb 2026 19:53:04 +0100 Subject: [PATCH 001/118] Add OBJ writer in Nabla and update MeshLoaders example --- examples_tests | 2 +- .../nbl/asset/interchange/COBJMeshWriter.h | 36 + include/nbl/config/BuildConfigOptions.h.in | 3 +- include/nbl/ext/ScreenShot/ScreenShot.h | 136 ++- src/nbl/CMakeLists.txt | 12 +- src/nbl/asset/IAssetManager.cpp | 7 + .../asset/interchange/COBJMeshFileLoader.cpp | 958 ++++++------------ .../asset/interchange/COBJMeshFileLoader.h | 61 -- src/nbl/asset/interchange/COBJMeshWriter.cpp | 241 +++++ .../asset/interchange/CPLYMeshFileLoader.cpp | 45 +- src/nbl/asset/interchange/CPLYMeshWriter.cpp | 704 ++++--------- src/nbl/asset/interchange/CPLYMeshWriter.h | 11 +- .../asset/interchange/CSTLMeshFileLoader.cpp | 197 +--- .../asset/interchange/CSTLMeshFileLoader.h | 2 - src/nbl/asset/interchange/CSTLMeshWriter.cpp | 424 ++++---- src/nbl/asset/interchange/CSTLMeshWriter.h | 2 +- src/nbl/asset/interchange/IGeometryWriter.cpp | 12 + src/nbl/asset/pch_asset.h | 1 + 18 files changed, 1209 insertions(+), 1645 deletions(-) create mode 100644 include/nbl/asset/interchange/COBJMeshWriter.h create mode 100644 src/nbl/asset/interchange/COBJMeshWriter.cpp create mode 100644 src/nbl/asset/interchange/IGeometryWriter.cpp diff --git a/examples_tests b/examples_tests index 2fee54acd4..399dddb5f4 160000 --- a/examples_tests +++ b/examples_tests @@ -1 +1 @@ -Subproject commit 2fee54acd4c69579e96b1fa66c22fcbb8d359432 +Subproject commit 399dddb5f42d81cfa55a724972f56ac8aa3f585c diff --git a/include/nbl/asset/interchange/COBJMeshWriter.h b/include/nbl/asset/interchange/COBJMeshWriter.h new file mode 100644 index 0000000000..abcfa4169d --- /dev/null +++ b/include/nbl/asset/interchange/COBJMeshWriter.h @@ -0,0 +1,36 @@ +// Copyright (C) 2025-2025 - DevSH Graphics Programming Sp. z O.O. +// This file is part of the "Nabla Engine". +// For conditions of distribution and use, see copyright notice in nabla.h +#ifndef _NBL_ASSET_OBJ_MESH_WRITER_H_INCLUDED_ +#define _NBL_ASSET_OBJ_MESH_WRITER_H_INCLUDED_ + + +#include "nbl/asset/ICPUPolygonGeometry.h" +#include "nbl/asset/interchange/IGeometryWriter.h" + + +namespace nbl::asset +{ + +//! class to write OBJ mesh files +class COBJMeshWriter : public IGeometryWriter +{ + public: + COBJMeshWriter(); + + virtual const char** getAssociatedFileExtensions() const + { + static const char* ext[]{ "obj", nullptr }; + return ext; + } + + virtual uint32_t getSupportedFlags() override { return 0u; } + + virtual uint32_t getForcedFlags() { return 0u; } + + virtual bool writeAsset(system::IFile* _file, const SAssetWriteParams& _params, IAssetWriterOverride* _override = nullptr) override; +}; + +} // end namespace + +#endif diff --git a/include/nbl/config/BuildConfigOptions.h.in b/include/nbl/config/BuildConfigOptions.h.in index d130ff4ce2..7bd4e950f3 100644 --- a/include/nbl/config/BuildConfigOptions.h.in +++ b/include/nbl/config/BuildConfigOptions.h.in @@ -35,6 +35,7 @@ #cmakedefine _NBL_COMPILE_WITH_GLTF_LOADER_ // writers +#cmakedefine _NBL_COMPILE_WITH_OBJ_WRITER_ #cmakedefine _NBL_COMPILE_WITH_STL_WRITER_ #cmakedefine _NBL_COMPILE_WITH_PLY_WRITER_ #cmakedefine _NBL_COMPILE_WITH_BAW_WRITER_ @@ -95,4 +96,4 @@ #define NBL_API2 #endif -#endif // __NBL_BUILD_CONFIG_OPTIONS_H_INCLUDED__ \ No newline at end of file +#endif // __NBL_BUILD_CONFIG_OPTIONS_H_INCLUDED__ diff --git a/include/nbl/ext/ScreenShot/ScreenShot.h b/include/nbl/ext/ScreenShot/ScreenShot.h index 4e71749cd7..64f5e526d1 100644 --- a/include/nbl/ext/ScreenShot/ScreenShot.h +++ b/include/nbl/ext/ScreenShot/ScreenShot.h @@ -27,7 +27,12 @@ inline core::smart_refctd_ptr createScreenShot( const ACCESS_FLAGS accessMask, const IImage::LAYOUT imageLayout) { - assert(bool(logicalDevice->getPhysicalDevice()->getQueueFamilyProperties().begin()[queue->getFamilyIndex()].queueFlags.value & IQueue::FAMILY_FLAGS::TRANSFER_BIT)); + { + const auto queueFlags = logicalDevice->getPhysicalDevice()->getQueueFamilyProperties().begin()[queue->getFamilyIndex()].queueFlags; + const auto required = core::bitflag(IQueue::FAMILY_FLAGS::TRANSFER_BIT) | IQueue::FAMILY_FLAGS::GRAPHICS_BIT | IQueue::FAMILY_FLAGS::COMPUTE_BIT; + if (!queueFlags.hasAnyFlag(required)) + logicalDevice->getLogger()->log("ScreenShot: queue family %u lacks transfer/graphics/compute flags; continuing anyway.", system::ILogger::ELL_WARNING, queue->getFamilyIndex()); + } auto fetchedImageViewParmas = gpuImageView->getCreationParameters(); auto gpuImage = fetchedImageViewParmas.image; @@ -35,12 +40,17 @@ inline core::smart_refctd_ptr createScreenShot( if(!fetchedGpuImageParams.usage.hasFlags(IImage::EUF_TRANSFER_SRC_BIT)) { - assert(false); + if (auto* logger = logicalDevice->getLogger()) + logger->log("ScreenShot: source image missing TRANSFER_SRC usage.", system::ILogger::ELL_ERROR); return nullptr; } if (isBlockCompressionFormat(fetchedGpuImageParams.format)) + { + if (auto* logger = logicalDevice->getLogger()) + logger->log("ScreenShot: block-compressed formats are not supported.", system::ILogger::ELL_ERROR); return nullptr; + } core::smart_refctd_ptr gpuTexelBuffer; @@ -48,10 +58,28 @@ inline core::smart_refctd_ptr createScreenShot( { // commandbuffer should refcount the pool, so it should be 100% legal to drop at the end of the scope auto gpuCommandPool = logicalDevice->createCommandPool(queue->getFamilyIndex(),IGPUCommandPool::CREATE_FLAGS::TRANSIENT_BIT); + if (!gpuCommandPool) + { + if (auto* logger = logicalDevice->getLogger()) + logger->log("ScreenShot: failed to create command pool.", system::ILogger::ELL_ERROR); + return nullptr; + } gpuCommandPool->createCommandBuffers(IGPUCommandPool::BUFFER_LEVEL::PRIMARY, 1u, &gpuCommandBuffer); - assert(gpuCommandBuffer); + if (!gpuCommandBuffer) + { + if (auto* logger = logicalDevice->getLogger()) + logger->log("ScreenShot: failed to create command buffer.", system::ILogger::ELL_ERROR); + return nullptr; + } + } + if (auto* logger = logicalDevice->getLogger()) + logger->log("ScreenShot: recording command buffer.", system::ILogger::ELL_INFO); + if (!gpuCommandBuffer->begin(IGPUCommandBuffer::USAGE::ONE_TIME_SUBMIT_BIT)) + { + if (auto* logger = logicalDevice->getLogger()) + logger->log("ScreenShot: failed to begin command buffer.", system::ILogger::ELL_ERROR); + return nullptr; } - gpuCommandBuffer->begin(IGPUCommandBuffer::USAGE::ONE_TIME_SUBMIT_BIT); { auto extent = gpuImage->getMipSize(); @@ -68,9 +96,27 @@ inline core::smart_refctd_ptr createScreenShot( bufferCreationParams.size = extent.x*extent.y*extent.z*getTexelOrBlockBytesize(fetchedGpuImageParams.format); bufferCreationParams.usage = IBuffer::EUF_TRANSFER_DST_BIT; gpuTexelBuffer = logicalDevice->createBuffer(std::move(bufferCreationParams)); + if (!gpuTexelBuffer) + { + if (auto* logger = logicalDevice->getLogger()) + logger->log("ScreenShot: failed to create GPU texel buffer.", system::ILogger::ELL_ERROR); + return nullptr; + } auto gpuTexelBufferMemReqs = gpuTexelBuffer->getMemoryReqs(); gpuTexelBufferMemReqs.memoryTypeBits &= logicalDevice->getPhysicalDevice()->getDownStreamingMemoryTypeBits(); + if (!gpuTexelBufferMemReqs.memoryTypeBits) + { + if (auto* logger = logicalDevice->getLogger()) + logger->log("ScreenShot: no down-streaming memory type for texel buffer.", system::ILogger::ELL_ERROR); + return nullptr; + } auto gpuTexelBufferMem = logicalDevice->allocate(gpuTexelBufferMemReqs, gpuTexelBuffer.get()); + if (!gpuTexelBufferMem.isValid()) + { + if (auto* logger = logicalDevice->getLogger()) + logger->log("ScreenShot: failed to allocate texel buffer memory.", system::ILogger::ELL_ERROR); + return nullptr; + } IGPUCommandBuffer::SPipelineBarrierDependencyInfo info = {}; decltype(info)::image_barrier_t barrier = {}; @@ -102,7 +148,12 @@ inline core::smart_refctd_ptr createScreenShot( gpuCommandBuffer->pipelineBarrier(EDF_NONE,info); } } - gpuCommandBuffer->end(); + if (!gpuCommandBuffer->end()) + { + if (auto* logger = logicalDevice->getLogger()) + logger->log("ScreenShot: failed to end command buffer.", system::ILogger::ELL_ERROR); + return nullptr; + } auto signalSemaphore = logicalDevice->createSemaphore(0); @@ -124,22 +175,63 @@ inline core::smart_refctd_ptr createScreenShot( info.waitSemaphores = { &waitSemaphoreInfo, &waitSemaphoreInfo + 1 }; } - queue->submit({ &info, &info + 1}); + if (auto* logger = logicalDevice->getLogger()) + logger->log("ScreenShot: submitting copy command buffer.", system::ILogger::ELL_INFO); + if (queue->submit({ &info, &info + 1}) != IQueue::RESULT::SUCCESS) + { + if (auto* logger = logicalDevice->getLogger()) + logger->log("ScreenShot: failed to submit copy command buffer.", system::ILogger::ELL_ERROR); + return nullptr; + } ISemaphore::SWaitInfo waitInfo{ signalSemaphore.get(), 1u}; + if (auto* logger = logicalDevice->getLogger()) + logger->log("ScreenShot: waiting for copy completion.", system::ILogger::ELL_INFO); if (logicalDevice->blockForSemaphores({&waitInfo, &waitInfo + 1}) != ISemaphore::WAIT_RESULT::SUCCESS) + { + if (auto* logger = logicalDevice->getLogger()) + logger->log("ScreenShot: failed to wait for copy completion.", system::ILogger::ELL_ERROR); return nullptr; + } core::smart_refctd_ptr cpuImageView; { const auto gpuTexelBufferSize = gpuTexelBuffer->getSize(); // If you get validation errors from the `invalidateMappedMemoryRanges` we need to expose VK_WHOLE_BUFFER equivalent constant - ILogicalDevice::MappedMemoryRange mappedMemoryRange(gpuTexelBuffer->getBoundMemory().memory,0u,gpuTexelBufferSize); + auto* allocation = gpuTexelBuffer->getBoundMemory().memory; + if (!allocation) + return nullptr; - if (gpuTexelBuffer->getBoundMemory().memory->haveToMakeVisible()) + bool mappedHere = false; + if (!allocation->getMappedPointer()) + { + const IDeviceMemoryAllocation::MemoryRange range = { 0u, gpuTexelBufferSize }; + if (!allocation->map(range, IDeviceMemoryAllocation::EMCAF_READ)) + { + if (auto* logger = logicalDevice->getLogger()) + logger->log("ScreenShot: failed to map texel buffer memory.", system::ILogger::ELL_ERROR); + return nullptr; + } + mappedHere = true; + } + + ILogicalDevice::MappedMemoryRange mappedMemoryRange(allocation,0u,gpuTexelBufferSize); + if (allocation->haveToMakeVisible()) + { + if (auto* logger = logicalDevice->getLogger()) + logger->log("ScreenShot: invalidating mapped range.", system::ILogger::ELL_INFO); logicalDevice->invalidateMappedMemoryRanges(1u,&mappedMemoryRange); + } auto cpuNewImage = ICPUImage::create(std::move(fetchedGpuImageParams)); + if (!cpuNewImage) + { + if (auto* logger = logicalDevice->getLogger()) + logger->log("ScreenShot: failed to create CPU image.", system::ILogger::ELL_ERROR); + if (mappedHere) + allocation->unmap(); + return nullptr; + } auto regions = core::make_refctd_dynamic_array>(1u); ICPUImage::SBufferCopy& region = regions->front(); @@ -155,10 +247,22 @@ inline core::smart_refctd_ptr createScreenShot( region.imageExtent = cpuNewImage->getCreationParameters().extent; auto cpuNewTexelBuffer = ICPUBuffer::create({ gpuTexelBufferSize }); + if (!cpuNewTexelBuffer) + { + if (auto* logger = logicalDevice->getLogger()) + logger->log("ScreenShot: failed to create CPU buffer.", system::ILogger::ELL_ERROR); + if (mappedHere) + allocation->unmap(); + return nullptr; + } + if (auto* logger = logicalDevice->getLogger()) + logger->log("ScreenShot: copying GPU data to CPU buffer.", system::ILogger::ELL_INFO); { - memcpy(cpuNewTexelBuffer->getPointer(), gpuTexelBuffer->getBoundMemory().memory->getMappedPointer(), gpuTexelBuffer->getSize()); + memcpy(cpuNewTexelBuffer->getPointer(), allocation->getMappedPointer(), gpuTexelBuffer->getSize()); } cpuNewImage->setBufferAndRegions(core::smart_refctd_ptr(cpuNewTexelBuffer), regions); + if (mappedHere) + allocation->unmap(); { auto newCreationParams = cpuNewImage->getCreationParameters(); @@ -190,6 +294,12 @@ inline bool createScreenShot( { assert(outFile->getFlags()&system::IFile::ECF_WRITE); auto cpuImageView = createScreenShot(logicalDevice,queue,semaphore,gpuImageView,accessMask,imageLayout); + if (!cpuImageView) + { + if (auto* logger = logicalDevice->getLogger()) + logger->log("ScreenShot: GPU readback failed, no image to write.", system::ILogger::ELL_ERROR); + return false; + } IAssetWriter::SAssetWriteParams writeParams(cpuImageView.get()); return assetManager->writeAsset(outFile,writeParams); } @@ -205,6 +315,12 @@ inline bool createScreenShot( const ACCESS_FLAGS accessMask = ACCESS_FLAGS::MEMORY_WRITE_BITS) { auto cpuImageView = createScreenShot(logicalDevice,queue,semaphore,gpuImageView,accessMask,imageLayout); + if (!cpuImageView) + { + if (auto* logger = logicalDevice->getLogger()) + logger->log("ScreenShot: GPU readback failed, no image to write.", system::ILogger::ELL_ERROR); + return false; + } IAssetWriter::SAssetWriteParams writeParams(cpuImageView.get()); return assetManager->writeAsset(filename.string(),writeParams); // TODO: Use std::filesystem::path } @@ -212,4 +328,4 @@ inline bool createScreenShot( } // namespace nbl::ext::ScreenShot -#endif \ No newline at end of file +#endif diff --git a/src/nbl/CMakeLists.txt b/src/nbl/CMakeLists.txt index abadd07912..48b359b73b 100644 --- a/src/nbl/CMakeLists.txt +++ b/src/nbl/CMakeLists.txt @@ -50,12 +50,12 @@ include(common) #[[ Loaders and writers compile options available to edit by user All revelant _NBL_COMPILE_WITH will be there]] option(_NBL_COMPILE_WITH_MTL_LOADER_ "Compile with MTL Loader" OFF) #default off until Material Compiler 2 -option(_NBL_COMPILE_WITH_OBJ_LOADER_ "Compile with OBJ Loader" OFF) #default off until Material Compiler 2 -#option(_NBL_COMPILE_WITH_OBJ_WRITER_ "Compile with OBJ Writer" ON) uncomment when writer exists -option(_NBL_COMPILE_WITH_STL_LOADER_ "Compile with STL Loader" OFF) #default off until reimplemented -option(_NBL_COMPILE_WITH_STL_WRITER_ "Compile with STL Writer" OFF) #default off until reimplemented +option(_NBL_COMPILE_WITH_OBJ_LOADER_ "Compile with OBJ Loader" ON) +option(_NBL_COMPILE_WITH_OBJ_WRITER_ "Compile with OBJ Writer" ON) +option(_NBL_COMPILE_WITH_STL_LOADER_ "Compile with STL Loader" ON) +option(_NBL_COMPILE_WITH_STL_WRITER_ "Compile with STL Writer" ON) option(_NBL_COMPILE_WITH_PLY_LOADER_ "Compile with PLY Loader" ON) -option(_NBL_COMPILE_WITH_PLY_WRITER_ "Compile with PLY Writer" OFF) #default off until reimplemented +option(_NBL_COMPILE_WITH_PLY_WRITER_ "Compile with PLY Writer" ON) option(_NBL_COMPILE_WITH_JPG_LOADER_ "Compile with JPG Loader" ON) option(_NBL_COMPILE_WITH_JPG_WRITER_ "Compile with JPG Writer" ON) option(_NBL_COMPILE_WITH_PNG_LOADER_ "Compile with PNG Loader" ON) @@ -165,6 +165,7 @@ set(NBL_ASSET_SOURCES asset/ICPUImage.cpp asset/ICPUPolygonGeometry.cpp asset/interchange/IAssetWriter.cpp + asset/interchange/IGeometryWriter.cpp asset/interchange/IAssetLoader.cpp # Shaders @@ -199,6 +200,7 @@ set(NBL_ASSET_SOURCES asset/interchange/CGLTFLoader.cpp # Mesh writers + asset/interchange/COBJMeshWriter.cpp asset/interchange/CPLYMeshWriter.cpp asset/interchange/CSTLMeshWriter.cpp asset/interchange/CGLTFWriter.cpp diff --git a/src/nbl/asset/IAssetManager.cpp b/src/nbl/asset/IAssetManager.cpp index dc67ed8d01..fce55ddd32 100644 --- a/src/nbl/asset/IAssetManager.cpp +++ b/src/nbl/asset/IAssetManager.cpp @@ -55,6 +55,10 @@ #include "nbl/asset/interchange/CSTLMeshWriter.h" #endif +#ifdef _NBL_COMPILE_WITH_OBJ_WRITER_ +#include "nbl/asset/interchange/COBJMeshWriter.h" +#endif + #ifdef _NBL_COMPILE_WITH_PLY_WRITER_ #include "nbl/asset/interchange/CPLYMeshWriter.h" #endif @@ -160,6 +164,9 @@ void IAssetManager::addLoadersAndWriters() #ifdef _NBL_COMPILE_WITH_GLTF_WRITER_ addAssetWriter(core::make_smart_refctd_ptr()); #endif +#ifdef _NBL_COMPILE_WITH_OBJ_WRITER_ + addAssetWriter(core::make_smart_refctd_ptr()); +#endif #ifdef _NBL_COMPILE_WITH_PLY_WRITER_ addAssetWriter(core::make_smart_refctd_ptr()); #endif diff --git a/src/nbl/asset/interchange/COBJMeshFileLoader.cpp b/src/nbl/asset/interchange/COBJMeshFileLoader.cpp index 69651f8061..5277b6911a 100644 --- a/src/nbl/asset/interchange/COBJMeshFileLoader.cpp +++ b/src/nbl/asset/interchange/COBJMeshFileLoader.cpp @@ -13,735 +13,419 @@ #include "nbl/system/ISystem.h" #include "nbl/system/IFile.h" -#include "nbl/asset/utils/CQuantNormalCache.h" - #include "COBJMeshFileLoader.h" #include -namespace nbl +namespace nbl::asset { -namespace asset + +static const uint32_t WORD_BUFFER_LENGTH = 512u; + +struct ObjVertexKey { + int32_t pos; + int32_t uv; + int32_t normal; + + inline bool operator<(const ObjVertexKey& other) const + { + if (pos == other.pos) + { + if (uv == other.uv) + return normal < other.normal; + return uv < other.uv; + } + return pos < other.pos; + } +}; -//#ifdef _NBL_DEBUG -#define _NBL_DEBUG_OBJ_LOADER_ -//#endif +struct Float3 +{ + float x; + float y; + float z; +}; -static const uint32_t WORD_BUFFER_LENGTH = 512; +struct Float2 +{ + float x; + float y; +}; -constexpr uint32_t POSITION = 0u; -constexpr uint32_t UV = 2u; -constexpr uint32_t NORMAL = 3u; -constexpr uint32_t BND_NUM = 0u; +static_assert(sizeof(Float3) == 12); +static_assert(sizeof(Float2) == 8); -//! Constructor COBJMeshFileLoader::COBJMeshFileLoader(IAssetManager* _manager) : AssetManager(_manager), System(_manager->getSystem()) { } - -//! destructor COBJMeshFileLoader::~COBJMeshFileLoader() { } asset::SAssetBundle COBJMeshFileLoader::loadAsset(system::IFile* _file, const asset::IAssetLoader::SAssetLoadParams& _params, asset::IAssetLoader::IAssetLoaderOverride* _override, uint32_t _hierarchyLevel) { - SContext ctx( - asset::IAssetLoader::SAssetLoadContext{ - _params, - _file - }, - _hierarchyLevel, - _override - ); - - if (_params.meshManipulatorOverride == nullptr) - { - _NBL_DEBUG_BREAK_IF(true); - assert(false); - } - - CQuantNormalCache* const quantNormalCache = _params.meshManipulatorOverride->getQuantNormalCache(); - - const long filesize = _file->getSize(); - if (!filesize) + if (!_file) return {}; - const uint32_t WORD_BUFFER_LENGTH = 512u; - char tmpbuf[WORD_BUFFER_LENGTH]{}; + const long filesize = _file->getSize(); + if (filesize <= 0) + return {}; - uint32_t smoothingGroup=0; - - const std::filesystem::path fullName = _file->getFileName(); - const std::string relPath = [&fullName]() -> std::string - { - auto dir = fullName.parent_path().string(); - return dir; - }(); - - //value_type: directory from which .mtl (pipeline) was loaded and the pipeline - using pipeline_meta_pair_t = std::pair,const CMTLMetadata::CRenderpassIndependentPipeline*>; - struct hash_t - { - inline auto operator()(const pipeline_meta_pair_t& item) const - { - return std::hash()(item.second->m_name); - } - }; - struct key_equal_t - { - inline bool operator()(const pipeline_meta_pair_t& lhs, const pipeline_meta_pair_t& rhs) const - { - return lhs.second->m_name==rhs.second->m_name; - } - }; - core::unordered_multiset pipelines; - - // TODO: map the file whenever possible std::string fileContents; fileContents.resize(filesize); - char* const buf = fileContents.data(); - - system::IFile::success_t success; - _file->read(success, buf, 0, filesize); - if (!success) - return {}; - - const char* const bufEnd = buf+filesize; - // Process obj information - const char* bufPtr = buf; - std::string grpName, mtlName; - - auto performActionBasedOnOrientationSystem = [&](auto performOnRightHanded, auto performOnLeftHanded) - { - if (_params.loaderFlags & E_LOADER_PARAMETER_FLAGS::ELPF_RIGHT_HANDED_MESHES) - performOnRightHanded(); - else - performOnLeftHanded(); - }; - - - struct vec3 { - float data[3]; - }; - struct vec2 { - float data[2]; - }; - core::vector vertexBuffer; - core::vector normalsBuffer; - core::vector textureCoordBuffer; - - core::vector> submeshes; - core::vector> indices; - core::vector vertices; - core::map map_vtx2ix; - core::vector recalcNormals; - core::vector submeshWasLoadedFromCache; - core::vector submeshCacheKeys; - core::vector submeshMaterialNames; - core::vector vtxSmoothGrp; - - // TODO: handle failures much better! - constexpr const char* NO_MATERIAL_MTL_NAME = "#"; - bool noMaterial = true; - bool dummyMaterialCreated = false; - while(bufPtr != bufEnd) - { - switch(bufPtr[0]) - { - case 'm': // mtllib (material) - { - if (ctx.useMaterials) - { - bufPtr = goAndCopyNextWord(tmpbuf, bufPtr, WORD_BUFFER_LENGTH, bufEnd); - _params.logger.log("Reading material _file %s", system::ILogger::ELL_DEBUG, tmpbuf); - - std::string mtllib = tmpbuf; - std::replace(mtllib.begin(), mtllib.end(), '\\', '/'); - SAssetLoadParams loadParams(_params); - loadParams.workingDirectory = _file->getFileName().parent_path(); - auto bundle = interm_getAssetInHierarchy(AssetManager, mtllib, loadParams, _hierarchyLevel+ICPUMesh::PIPELINE_HIERARCHYLEVELS_BELOW, _override); - - if (bundle.getContents().empty()) - break; - - if (bundle.getMetadata()) - { - auto meta = bundle.getMetadata()->selfCast(); - if (bundle.getAssetType()==IAsset::ET_RENDERPASS_INDEPENDENT_PIPELINE) - for (auto ass : bundle.getContents()) - { - auto ppln = core::smart_refctd_ptr_static_cast(ass); - const auto pplnMeta = meta->getAssetSpecificMetadata(ppln.get()); - if (!pplnMeta) - continue; - - pipelines.emplace(std::move(ppln),pplnMeta); - } - } - } - } - break; - - case 'v': // v, vn, vt - //reset flags - noMaterial = true; - dummyMaterialCreated = false; - switch(bufPtr[1]) - { - case ' ': // vertex - { - vec3 vec; - bufPtr = readVec3(bufPtr, vec.data, bufEnd); - performActionBasedOnOrientationSystem([&]() {vec.data[0] = -vec.data[0];}, [&]() {}); - vertexBuffer.push_back(vec); - } - break; - - case 'n': // normal - { - vec3 vec; - bufPtr = readVec3(bufPtr, vec.data, bufEnd); - performActionBasedOnOrientationSystem([&]() {vec.data[0] = -vec.data[0]; }, [&]() {}); - normalsBuffer.push_back(vec); - } - break; - - case 't': // texcoord - { - vec2 vec; - bufPtr = readUV(bufPtr, vec.data, bufEnd); - textureCoordBuffer.push_back(vec); - } - break; - } - break; - - case 'g': // group name - bufPtr = goAndCopyNextWord(tmpbuf, bufPtr, WORD_BUFFER_LENGTH, bufEnd); - grpName = tmpbuf; - break; - case 's': // smoothing can be a group or off (equiv. to 0) - { - bufPtr = goAndCopyNextWord(tmpbuf, bufPtr, WORD_BUFFER_LENGTH, bufEnd); - _params.logger.log("Loaded smoothing group start %s",system::ILogger::ELL_DEBUG, tmpbuf); - if (strcmp("off", tmpbuf)==0) - smoothingGroup=0u; - else - sscanf(tmpbuf,"%u",&smoothingGroup); - } - break; - - case 'u': // usemtl - // get name of material - { - noMaterial = false; - bufPtr = goAndCopyNextWord(tmpbuf, bufPtr, WORD_BUFFER_LENGTH, bufEnd); - _params.logger.log("Loaded material start %s", system::ILogger::ELL_DEBUG, tmpbuf); - mtlName=tmpbuf; - - if (ctx.useMaterials && !ctx.useGroups) - { - asset::IAsset::E_TYPE types[] {asset::IAsset::ET_SUB_MESH, (asset::IAsset::E_TYPE)0u }; - auto mb_bundle = _override->findCachedAsset(genKeyForMeshBuf(ctx, _file->getFileName().string(), mtlName, grpName), types, ctx.inner, _hierarchyLevel+ICPUMesh::MESHBUFFER_HIERARCHYLEVELS_BELOW); - auto mbs = mb_bundle.getContents(); - bool notempty = mbs.size()!=0ull; - { - auto mb = notempty ? core::smart_refctd_ptr_static_cast(*mbs.begin()) : core::make_smart_refctd_ptr(); - submeshes.push_back(std::move(mb)); - } - indices.emplace_back(); - recalcNormals.push_back(false); - submeshWasLoadedFromCache.push_back(notempty); - //if submesh was loaded from cache - insert empty "cache key" (submesh loaded from cache won't be added to cache again) - submeshCacheKeys.push_back(submeshWasLoadedFromCache.back() ? "" : genKeyForMeshBuf(ctx, _file->getFileName().string(), mtlName, grpName)); - submeshMaterialNames.push_back(mtlName); - } - } - break; - case 'f': // face - { - if (noMaterial && !dummyMaterialCreated) - { - dummyMaterialCreated = true; - - submeshes.push_back(core::make_smart_refctd_ptr()); - indices.emplace_back(); - recalcNormals.push_back(false); - submeshWasLoadedFromCache.push_back(false); - submeshCacheKeys.push_back(genKeyForMeshBuf(ctx, _file->getFileName().string(), NO_MATERIAL_MTL_NAME, grpName)); - submeshMaterialNames.push_back(NO_MATERIAL_MTL_NAME); - } - - SObjVertex v; - - // get all vertices data in this face (current line of obj _file) - const std::string wordBuffer = copyLine(bufPtr, bufEnd); - const char* linePtr = wordBuffer.c_str(); - const char* const endPtr = linePtr + wordBuffer.size(); - - core::vector faceCorners; - faceCorners.reserve(32ull); - - // read in all vertices - linePtr = goNextWord(linePtr, endPtr); - while (0 != linePtr[0]) - { - // Array to communicate with retrieveVertexIndices() - // sends the buffer sizes and gets the actual indices - // if index not set returns -1 - int32_t Idx[3]; - Idx[1] = Idx[2] = -1; - - // read in next vertex's data - uint32_t wlength = copyWord(tmpbuf, linePtr, WORD_BUFFER_LENGTH, endPtr); - // this function will also convert obj's 1-based index to c++'s 0-based index - retrieveVertexIndices(tmpbuf, Idx, tmpbuf+wlength+1, vertexBuffer.size(), textureCoordBuffer.size(), normalsBuffer.size()); - v.pos[0] = vertexBuffer[Idx[0]].data[0]; - v.pos[1] = vertexBuffer[Idx[0]].data[1]; - v.pos[2] = vertexBuffer[Idx[0]].data[2]; - //set texcoord - if ( -1 != Idx[1] ) + + system::IFile::success_t success; + _file->read(success, fileContents.data(), 0, filesize); + if (!success) + return {}; + + const char* const buf = fileContents.data(); + const char* const bufEnd = buf + filesize; + const char* bufPtr = buf; + + bool rightHanded = (_params.loaderFlags & E_LOADER_PARAMETER_FLAGS::ELPF_RIGHT_HANDED_MESHES) != 0; + + core::vector positions; + core::vector normals; + core::vector uvs; + + core::vector outPositions; + core::vector outNormals; + core::vector outUVs; + core::vector indices; + + core::map vtxMap; + + bool hasNormals = false; + bool hasUVs = false; + + char tmpbuf[WORD_BUFFER_LENGTH]{}; + + while (bufPtr != bufEnd) + { + switch (bufPtr[0]) + { + case 'v': + switch (bufPtr[1]) + { + case ' ': { - v.uv[0] = textureCoordBuffer[Idx[1]].data[0]; - v.uv[1] = textureCoordBuffer[Idx[1]].data[1]; + Float3 vec{}; + bufPtr = readVec3(bufPtr, &vec.x, bufEnd); + if (rightHanded) + vec.x = -vec.x; + positions.push_back(vec); } - else + break; + case 'n': { - v.uv[0] = core::nan(); - v.uv[1] = core::nan(); + Float3 vec{}; + bufPtr = readVec3(bufPtr, &vec.x, bufEnd); + if (rightHanded) + vec.x = -vec.x; + normals.push_back(vec); } - //set normal - if ( -1 != Idx[2] ) + break; + case 't': { - core::vectorSIMDf simdNormal; - simdNormal.set(normalsBuffer[Idx[2]].data); - simdNormal.makeSafe3D(); - v.normal32bit = quantNormalCache->quantize(simdNormal); + Float2 vec{}; + bufPtr = readUV(bufPtr, &vec.x, bufEnd); + uvs.push_back(vec); } - else - { - v.normal32bit = core::vectorSIMDu32(0u); - recalcNormals.back() = true; - } - - uint32_t ix; - auto vtx_ix = map_vtx2ix.find(v); - if (vtx_ix != map_vtx2ix.end() && smoothingGroup==vtxSmoothGrp[vtx_ix->second]) - ix = vtx_ix->second; - else - { - ix = vertices.size(); - vertices.push_back(v); - vtxSmoothGrp.push_back(smoothingGroup); - map_vtx2ix.insert({v, ix}); - } - - faceCorners.push_back(ix); - - // go to next vertex - linePtr = goNextWord(linePtr, endPtr); - } - - // triangulate the face - for (uint32_t i = 1u; i < faceCorners.size()-1u; ++i) + break; + default: + break; + } + break; + case 'f': { - // Add a triangle - performActionBasedOnOrientationSystem - ( - [&]() + if (positions.empty()) + return {}; + + const std::string line = copyLine(bufPtr, bufEnd); + const char* linePtr = line.c_str(); + const char* const endPtr = linePtr + line.size(); + + core::vector faceCorners; + faceCorners.reserve(16ull); + + linePtr = goNextWord(linePtr, endPtr); + while (0 != linePtr[0]) { - indices.back().push_back(faceCorners[0]); - indices.back().push_back(faceCorners[i]); - indices.back().push_back(faceCorners[i + 1]); - }, - [&]() + int32_t idx[3] = { -1, -1, -1 }; + const uint32_t wlength = copyWord(tmpbuf, linePtr, WORD_BUFFER_LENGTH, endPtr); + retrieveVertexIndices(tmpbuf, idx, tmpbuf + wlength + 1, positions.size(), uvs.size(), normals.size()); + + if (idx[0] < 0 || static_cast(idx[0]) >= positions.size()) + return {}; + + ObjVertexKey key = { idx[0], idx[1], idx[2] }; + auto it = vtxMap.find(key); + uint32_t outIx = 0u; + if (it == vtxMap.end()) + { + outIx = static_cast(outPositions.size()); + vtxMap.insert({ key, outIx }); + + outPositions.push_back(positions[idx[0]]); + + Float2 uv = { 0.f, 0.f }; + if (idx[1] >= 0 && static_cast(idx[1]) < uvs.size()) + { + uv = uvs[idx[1]]; + hasUVs = true; + } + outUVs.push_back(uv); + + Float3 normal = { 0.f, 0.f, 1.f }; + if (idx[2] >= 0 && static_cast(idx[2]) < normals.size()) + { + normal = normals[idx[2]]; + hasNormals = true; + } + outNormals.push_back(normal); + } + else + { + outIx = it->second; + } + + faceCorners.push_back(outIx); + + linePtr = goNextWord(linePtr, endPtr); + } + + for (uint32_t i = 1u; i + 1u < faceCorners.size(); ++i) { - indices.back().push_back(faceCorners[i + 1]); - indices.back().push_back(faceCorners[i]); - indices.back().push_back(faceCorners[0]); + if (rightHanded) + { + indices.push_back(faceCorners[0]); + indices.push_back(faceCorners[i]); + indices.push_back(faceCorners[i + 1]); + } + else + { + indices.push_back(faceCorners[i + 1]); + indices.push_back(faceCorners[i]); + indices.push_back(faceCorners[0]); + } } - ); - } - } - break; - - case '#': // comment - default: - break; - } // end switch(bufPtr[0]) - // eat up rest of line - bufPtr = goNextLine(bufPtr, bufEnd); - } // end while(bufPtr && (bufPtr-buf usedPipelines; - { - uint64_t ixBufOffset = 0ull; - for (size_t i = 0ull; i < submeshes.size(); ++i) - { - if (submeshWasLoadedFromCache[i]) - continue; - - submeshes[i]->setIndexCount(indices[i].size()); - submeshes[i]->setIndexType(EIT_32BIT); - submeshes[i]->setIndexBufferBinding({ixBufOffset,nullptr}); - ixBufOffset += indices[i].size()*4ull; - - const uint32_t hasUV = !core::isnan(vertices[indices[i][0]].uv[0]); - using namespace std::string_literals; - _params.logger.log("Has UV: "s + (hasUV ? "YES":"NO"), system::ILogger::ELL_DEBUG); - // search in loaded - pipeline_meta_pair_t pipeline; - { - CMTLMetadata::CRenderpassIndependentPipeline dummyKey; - dummyKey.m_name = submeshCacheKeys[i].substr(submeshCacheKeys[i].find_last_of('?')+1u); - pipeline_meta_pair_t dummy{nullptr,&dummyKey}; - - auto rng = pipelines.equal_range(dummy); - for (auto it=rng.first; it!=rng.second; it++) - if (it->second->m_hash==hasUV) - { - pipeline = *it; - break; - } - } - //if there's no pipeline for this meshbuffer, set dummy one - if (!pipeline.first) - { - const IAsset::E_TYPE searchTypes[] = {IAsset::ET_RENDERPASS_INDEPENDENT_PIPELINE,static_cast(0u)}; - auto bundle = _override->findCachedAsset("nbl/builtin/renderpass_independent_pipeline/loader/mtl/missing_material_pipeline",searchTypes,ctx.inner,_hierarchyLevel+ICPUMesh::PIPELINE_HIERARCHYLEVELS_BELOW); - const auto* meta = bundle.getMetadata()->selfCast(); - const auto contents = bundle.getContents(); - for (auto pplnIt=contents.begin(); pplnIt!=contents.end(); pplnIt++) - { - auto ppln = core::smart_refctd_ptr_static_cast(*pplnIt); - auto pplnMeta = meta->getAssetSpecificMetadata(ppln.get()); - if (pplnMeta && pplnMeta->m_hash==hasUV) - { - pipeline = { std::move(ppln),pplnMeta }; - break; - } - } - } - // do some checks - assert(pipeline.first && pipeline.second); - const auto* cPpln = pipeline.first.get(); - if (hasUV) - { - const auto& vtxParams = cPpln->getCachedCreationParams().vertexInput; - assert(vtxParams.attributes[POSITION].relativeOffset==offsetof(SObjVertex,pos)); - assert(vtxParams.attributes[NORMAL].relativeOffset==offsetof(SObjVertex,normal32bit)); - assert(vtxParams.attributes[UV].relativeOffset==offsetof(SObjVertex,uv)); - assert(vtxParams.enabledAttribFlags&(1u<getLayout()->getPushConstantRanges().begin()[0].offset; - submeshes[i]->setAttachedDescriptorSet(core::smart_refctd_ptr(pipeline.second->m_descriptorSet3)); - memcpy( - submeshes[i]->getPushConstantsDataPtr()+pcoffset, - &pipeline.second->m_materialParams, - sizeof(CMTLMetadata::CRenderpassIndependentPipeline::SMaterialParameters) - ); + bufPtr = goNextLine(bufPtr, bufEnd); + } - usedPipelines.insert(pipeline); - submeshes[i]->setPipeline(std::move(pipeline.first)); - } + if (outPositions.empty()) + return {}; - core::smart_refctd_ptr vtxBuf = ICPUBuffer::create({ vertices.size() * sizeof(SObjVertex) }); - memcpy(vtxBuf->getPointer(), vertices.data(), vtxBuf->getSize()); + auto geometry = core::make_smart_refctd_ptr(); + geometry->setPositionView(IGeometryLoader::createView(EF_R32G32B32_SFLOAT, outPositions.size(), outPositions.data())); - auto ixBuf = ICPUBuffer::create({ ixBufOffset }); - for (size_t i = 0ull; i < submeshes.size(); ++i) - { - if (submeshWasLoadedFromCache[i]) - continue; - - submeshes[i]->setPositionAttributeIx(POSITION); - submeshes[i]->setNormalAttributeIx(NORMAL); - - submeshes[i]->setIndexBufferBinding({submeshes[i]->getIndexBufferBinding().offset,ixBuf}); - const uint64_t offset = submeshes[i]->getIndexBufferBinding().offset; - memcpy(reinterpret_cast(ixBuf->getPointer())+offset, indices[i].data(), indices[i].size()*4ull); - - SBufferBinding vtxBufBnd; - vtxBufBnd.offset = 0ull; - vtxBufBnd.buffer = vtxBuf; - submeshes[i]->setVertexBufferBinding(std::move(vtxBufBnd), BND_NUM); - - if (recalcNormals[i]) - { - auto vtxcmp = [&vtxSmoothGrp](const IMeshManipulator::SSNGVertexData& v0, const IMeshManipulator::SSNGVertexData& v1, ICPUMeshBuffer* buffer) - { - return vtxSmoothGrp[v0.indexOffset]==vtxSmoothGrp[v1.indexOffset]; - }; - - auto* meshManipulator = AssetManager->getMeshManipulator(); - meshManipulator->calculateSmoothNormals(submeshes[i].get(), false, 1.52e-5f, NORMAL, vtxcmp); - } - } - } + if (hasNormals) + geometry->setNormalView(IGeometryLoader::createView(EF_R32G32B32_SFLOAT, outNormals.size(), outNormals.data())); + + if (hasUVs) + geometry->getAuxAttributeViews()->push_back(IGeometryLoader::createView(EF_R32G32_SFLOAT, outUVs.size(), outUVs.data())); - auto mesh = core::make_smart_refctd_ptr(); - for (auto& submesh : submeshes) + if (!indices.empty()) { - IMeshManipulator::recalculateBoundingBox(submesh.get()); - mesh->getMeshBufferVector().emplace_back(std::move(submesh)); + geometry->setIndexing(IPolygonGeometryBase::TriangleList()); + geometry->setIndexView(IGeometryLoader::createView(EF_R32_UINT, indices.size(), indices.data())); + } + else + { + geometry->setIndexing(IPolygonGeometryBase::PointList()); } - IMeshManipulator::recalculateBoundingBox(mesh.get()); - if (mesh->getMeshBuffers().empty()) - return {}; - - // - auto meta = core::make_smart_refctd_ptr(usedPipelines.size()); - uint32_t metaOffset = 0u; - for (auto pipeAndMeta : usedPipelines) - meta->placeMeta(metaOffset++,pipeAndMeta.first.get(),*pipeAndMeta.second); - - //at the very end, insert submeshes into cache - uint32_t i = 0u; - for (auto meshbuffer : mesh->getMeshBuffers()) - { - auto bundle = SAssetBundle(meta,{ core::smart_refctd_ptr(meshbuffer) }); - _override->insertAssetIntoCache(bundle, submeshCacheKeys[i++], ctx.inner, _hierarchyLevel+ICPUMesh::MESHBUFFER_HIERARCHYLEVELS_BELOW); - } - - return SAssetBundle(std::move(meta),{std::move(mesh)}); -} + CPolygonGeometryManipulator::recomputeContentHashes(geometry.get()); + CPolygonGeometryManipulator::recomputeRanges(geometry.get()); + CPolygonGeometryManipulator::recomputeAABB(geometry.get()); + return SAssetBundle(core::smart_refctd_ptr(), { std::move(geometry) }); +} -//! Read 3d vector of floats const char* COBJMeshFileLoader::readVec3(const char* bufPtr, float vec[3], const char* const bufEnd) { - const uint32_t WORD_BUFFER_LENGTH = 256; - char wordBuffer[WORD_BUFFER_LENGTH]; - - bufPtr = goAndCopyNextWord(wordBuffer, bufPtr, WORD_BUFFER_LENGTH, bufEnd); - sscanf(wordBuffer,"%f",vec); - bufPtr = goAndCopyNextWord(wordBuffer, bufPtr, WORD_BUFFER_LENGTH, bufEnd); - sscanf(wordBuffer,"%f",vec+1); - bufPtr = goAndCopyNextWord(wordBuffer, bufPtr, WORD_BUFFER_LENGTH, bufEnd); - sscanf(wordBuffer,"%f",vec+2); - - vec[0] = -vec[0]; // change handedness - return bufPtr; -} + const uint32_t WORD_BUFFER_LENGTH = 256; + char wordBuffer[WORD_BUFFER_LENGTH]; + bufPtr = goAndCopyNextWord(wordBuffer, bufPtr, WORD_BUFFER_LENGTH, bufEnd); + sscanf(wordBuffer, "%f", vec); + bufPtr = goAndCopyNextWord(wordBuffer, bufPtr, WORD_BUFFER_LENGTH, bufEnd); + sscanf(wordBuffer, "%f", vec + 1); + bufPtr = goAndCopyNextWord(wordBuffer, bufPtr, WORD_BUFFER_LENGTH, bufEnd); + sscanf(wordBuffer, "%f", vec + 2); + + return bufPtr; +} -//! Read 2d vector of floats const char* COBJMeshFileLoader::readUV(const char* bufPtr, float vec[2], const char* const bufEnd) { - const uint32_t WORD_BUFFER_LENGTH = 256; - char wordBuffer[WORD_BUFFER_LENGTH]; + const uint32_t WORD_BUFFER_LENGTH = 256; + char wordBuffer[WORD_BUFFER_LENGTH]; - bufPtr = goAndCopyNextWord(wordBuffer, bufPtr, WORD_BUFFER_LENGTH, bufEnd); - sscanf(wordBuffer,"%f",vec); - bufPtr = goAndCopyNextWord(wordBuffer, bufPtr, WORD_BUFFER_LENGTH, bufEnd); - sscanf(wordBuffer,"%f",vec+1); + bufPtr = goAndCopyNextWord(wordBuffer, bufPtr, WORD_BUFFER_LENGTH, bufEnd); + sscanf(wordBuffer, "%f", vec); + bufPtr = goAndCopyNextWord(wordBuffer, bufPtr, WORD_BUFFER_LENGTH, bufEnd); + sscanf(wordBuffer, "%f", vec + 1); - vec[1] = 1.f-vec[1]; // change handedness - return bufPtr; + vec[1] = 1.f - vec[1]; + return bufPtr; } - -//! Read boolean value represented as 'on' or 'off' const char* COBJMeshFileLoader::readBool(const char* bufPtr, bool& tf, const char* const bufEnd) { - const uint32_t BUFFER_LENGTH = 8; - char tfStr[BUFFER_LENGTH]; + const uint32_t BUFFER_LENGTH = 8; + char tfStr[BUFFER_LENGTH]; - bufPtr = goAndCopyNextWord(tfStr, bufPtr, BUFFER_LENGTH, bufEnd); - tf = strcmp(tfStr, "off") != 0; - return bufPtr; + bufPtr = goAndCopyNextWord(tfStr, bufPtr, BUFFER_LENGTH, bufEnd); + tf = strcmp(tfStr, "off") != 0; + return bufPtr; } -//! skip space characters and stop on first non-space const char* COBJMeshFileLoader::goFirstWord(const char* buf, const char* const bufEnd, bool acrossNewlines) { - // skip space characters - if (acrossNewlines) - while((buf != bufEnd) && core::isspace(*buf)) - ++buf; - else - while((buf != bufEnd) && core::isspace(*buf) && (*buf != '\n')) - ++buf; - - return buf; + if (acrossNewlines) + while ((buf != bufEnd) && core::isspace(*buf)) + ++buf; + else + while ((buf != bufEnd) && core::isspace(*buf) && (*buf != '\n')) + ++buf; + + return buf; } - -//! skip current word and stop at beginning of next one const char* COBJMeshFileLoader::goNextWord(const char* buf, const char* const bufEnd, bool acrossNewlines) { - // skip current word - while(( buf != bufEnd ) && !core::isspace(*buf)) - ++buf; + while ((buf != bufEnd) && !core::isspace(*buf)) + ++buf; - return goFirstWord(buf, bufEnd, acrossNewlines); + return goFirstWord(buf, bufEnd, acrossNewlines); } - -//! Read until line break is reached and stop at the next non-space character const char* COBJMeshFileLoader::goNextLine(const char* buf, const char* const bufEnd) { - // look for newline characters - while(buf != bufEnd) - { - // found it, so leave - if (*buf=='\n' || *buf=='\r') - break; - ++buf; - } - return goFirstWord(buf, bufEnd); + while (buf != bufEnd) + { + if (*buf == '\n' || *buf == '\r') + break; + ++buf; + } + return goFirstWord(buf, bufEnd); } - uint32_t COBJMeshFileLoader::copyWord(char* outBuf, const char* const inBuf, uint32_t outBufLength, const char* const bufEnd) { - if (!outBufLength) - return 0; - if (!inBuf) - { - *outBuf = 0; - return 0; - } - - uint32_t i = 0; - while(inBuf[i]) - { - if (core::isspace(inBuf[i]) || &(inBuf[i]) == bufEnd) - break; - ++i; - } - - uint32_t length = core::min(i, outBufLength-1); - for (uint32_t j=0; j 2 ) - { - // error checking, shouldn't reach here unless file is wrong - idxType = 0; - } - } - else - { - // set all missing values to disable (=-1) - while (++idxType < 3) - idx[idxType]=-1; - ++p; - break; // while - } - } - - // go to the next char - ++p; - } - - return true; -} + char word[16] = ""; + const char* p = goFirstWord(vertexData, bufEnd); + uint32_t idxType = 0; -std::string COBJMeshFileLoader::genKeyForMeshBuf(const SContext& _ctx, const std::string& _baseKey, const std::string& _mtlName, const std::string& _grpName) const -{ - return _baseKey + "?" + _grpName + "?" + _mtlName; -} + uint32_t i = 0; + while (p != bufEnd) + { + if ((core::isdigit(*p)) || (*p == '-')) + { + word[i++] = *p; + } + else if (*p == '/' || *p == ' ' || *p == '\0') + { + word[i] = '\0'; + sscanf(word, "%d", idx + idxType); + if (idx[idxType] < 0) + { + switch (idxType) + { + case 0: + idx[idxType] += vbsize; + break; + case 1: + idx[idxType] += vtsize; + break; + case 2: + idx[idxType] += vnsize; + break; + } + } + else + idx[idxType] -= 1; + word[0] = '\0'; + i = 0; + if (*p == '/') + { + if (++idxType > 2) + idxType = 0; + } + else + { + while (++idxType < 3) + idx[idxType] = -1; + ++p; + break; + } + } + + ++p; + } + return true; +} -} // end namespace scene -} // end namespace nbl +} #endif // _NBL_COMPILE_WITH_OBJ_LOADER_ diff --git a/src/nbl/asset/interchange/COBJMeshFileLoader.h b/src/nbl/asset/interchange/COBJMeshFileLoader.h index c11a09e671..4f0a9f20bd 100644 --- a/src/nbl/asset/interchange/COBJMeshFileLoader.h +++ b/src/nbl/asset/interchange/COBJMeshFileLoader.h @@ -8,65 +8,12 @@ #include "nbl/core/declarations.h" #include "nbl/asset/ICPUPolygonGeometry.h" #include "nbl/asset/interchange/IAssetLoader.h" -#include "nbl/asset/metadata/CMTLMetadata.h" namespace nbl::asset { - -#include "nbl/nblpack.h" -class SObjVertex -{ -public: - inline bool operator<(const SObjVertex& other) const - { - if (pos[0]==other.pos[0]) - { - if (pos[1]==other.pos[1]) - { - if (pos[2]==other.pos[2]) - { - if (uv[0]==other.uv[0]) - { - if (uv[1]==other.uv[1]) - return normal32bit normal32bit; -} PACK_STRUCT; -#include "nbl/nblunpack.h" - //! Meshloader capable of loading obj meshes. class COBJMeshFileLoader : public IGeometryLoader { - struct SContext - { - SContext(const IAssetLoader::SAssetLoadContext& _innerCtx, uint32_t _topHierarchyLevel, IAssetLoader::IAssetLoaderOverride* _override) - : inner(_innerCtx), topHierarchyLevel(_topHierarchyLevel), loaderOverride(_override) {} - - IAssetLoader::SAssetLoadContext inner; - uint32_t topHierarchyLevel; - IAssetLoader::IAssetLoaderOverride* loaderOverride; - - const bool useGroups = false; - const bool useMaterials = true; - }; - protected: //! destructor virtual ~COBJMeshFileLoader(); @@ -119,16 +66,8 @@ class COBJMeshFileLoader : public IGeometryLoader // indices are changed to 0-based index instead of 1-based from the obj file bool retrieveVertexIndices(char* vertexData, int32_t* idx, const char* bufEnd, uint32_t vbsize, uint32_t vtsize, uint32_t vnsize); - std::string genKeyForMeshBuf(const SContext& _ctx, const std::string& _baseKey, const std::string& _mtlName, const std::string& _grpName) const; - IAssetManager* AssetManager; system::ISystem* System; - - template - static inline void performActionBasedOnOrientationSystem(aType& varToHandle, void (*performOnCertainOrientation)(aType& varToHandle)) - { - performOnCertainOrientation(varToHandle); - } }; } // end namespace nbl::asset diff --git a/src/nbl/asset/interchange/COBJMeshWriter.cpp b/src/nbl/asset/interchange/COBJMeshWriter.cpp new file mode 100644 index 0000000000..b8da519825 --- /dev/null +++ b/src/nbl/asset/interchange/COBJMeshWriter.cpp @@ -0,0 +1,241 @@ +// Copyright (C) 2025-2025 - DevSH Graphics Programming Sp. z O.O. +// This file is part of the "Nabla Engine". +// For conditions of distribution and use, see copyright notice in nabla.h + +#include "nbl/asset/interchange/COBJMeshWriter.h" + +#ifdef _NBL_COMPILE_WITH_OBJ_WRITER_ + +#include "nbl/system/IFile.h" + +#include +#include + +namespace nbl::asset +{ + +COBJMeshWriter::COBJMeshWriter() +{ + #ifdef _NBL_DEBUG + setDebugName("COBJMeshWriter"); + #endif +} + +static inline bool decodeVec4(const ICPUPolygonGeometry::SDataView& view, const size_t ix, hlsl::float64_t4& out) +{ + out = hlsl::float64_t4(0.0, 0.0, 0.0, 0.0); + return view.decodeElement(ix, out); +} + +bool COBJMeshWriter::writeAsset(system::IFile* _file, const SAssetWriteParams& _params, IAssetWriterOverride* _override) +{ + if (!_override) + getDefaultOverride(_override); + + if (!_file || !_params.rootAsset) + return false; + + const auto* geom = IAsset::castDown(_params.rootAsset); + if (!geom || !geom->valid()) + return false; + + SAssetWriteContext ctx = { _params, _file }; + system::IFile* file = _override->getOutputFile(_file, ctx, { geom, 0u }); + if (!file) + return false; + + const auto& positionView = geom->getPositionView(); + if (!positionView) + return false; + + const auto& normalView = geom->getNormalView(); + const bool hasNormals = static_cast(normalView); + + const auto& auxViews = geom->getAuxAttributeViews(); + const ICPUPolygonGeometry::SDataView* uvView = nullptr; + for (const auto& view : auxViews) + { + if (!view) + continue; + const auto channels = getFormatChannelCount(view.composed.format); + if (channels >= 2u) + { + uvView = &view; + break; + } + } + const bool hasUVs = uvView != nullptr; + + const size_t vertexCount = positionView.getElementCount(); + if (vertexCount == 0) + return false; + if (hasNormals && normalView.getElementCount() != vertexCount) + return false; + if (hasUVs && uvView->getElementCount() != vertexCount) + return false; + + const auto* indexing = geom->getIndexingCallback(); + if (!indexing) + return false; + if (indexing->knownTopology() != E_PRIMITIVE_TOPOLOGY::EPT_TRIANGLE_LIST) + return false; + + const auto& indexView = geom->getIndexView(); + core::vector indexData; + const uint32_t* indices = nullptr; + size_t faceCount = 0; + + if (indexView) + { + const size_t indexCount = indexView.getElementCount(); + if (indexCount % 3u != 0u) + return false; + + indexData.resize(indexCount); + const void* src = indexView.getPointer(); + if (!src) + return false; + + if (indexView.composed.format == EF_R32_UINT) + { + memcpy(indexData.data(), src, indexCount * sizeof(uint32_t)); + } + else if (indexView.composed.format == EF_R16_UINT) + { + const uint16_t* src16 = reinterpret_cast(src); + for (size_t i = 0; i < indexCount; ++i) + indexData[i] = src16[i]; + } + else + { + return false; + } + + indices = indexData.data(); + faceCount = indexCount / 3u; + } + else + { + if (vertexCount % 3u != 0u) + return false; + + indexData.resize(vertexCount); + for (size_t i = 0; i < vertexCount; ++i) + indexData[i] = static_cast(i); + + indices = indexData.data(); + faceCount = vertexCount / 3u; + } + + const auto flags = _override->getAssetWritingFlags(ctx, geom, 0u); + const bool flipHandedness = !(flags & E_WRITER_FLAGS::EWF_MESH_IS_RIGHT_HANDED); + + SAssetWriteContext writeCtx = { ctx.params, file }; + size_t fileOffset = 0u; + + auto writeString = [&](const std::string& str) + { + system::IFile::success_t success; + writeCtx.outputFile->write(success, str.c_str(), fileOffset, str.size()); + fileOffset += success.getBytesProcessed(); + }; + + { + std::string header = "# Nabla OBJ\n"; + writeString(header); + } + + hlsl::float64_t4 tmp = {}; + for (size_t i = 0u; i < vertexCount; ++i) + { + if (!decodeVec4(positionView, i, tmp)) + return false; + + double x = tmp.x; + double y = tmp.y; + double z = tmp.z; + if (flipHandedness) + x = -x; + + std::ostringstream ss; + ss << std::fixed << std::setprecision(6); + ss << "v " << x << " " << y << " " << z << "\n"; + writeString(ss.str()); + } + + if (hasUVs) + { + for (size_t i = 0u; i < vertexCount; ++i) + { + if (!decodeVec4(*uvView, i, tmp)) + return false; + const double u = tmp.x; + const double v = 1.0 - tmp.y; + + std::ostringstream ss; + ss << std::fixed << std::setprecision(6); + ss << "vt " << u << " " << v << "\n"; + writeString(ss.str()); + } + } + + if (hasNormals) + { + for (size_t i = 0u; i < vertexCount; ++i) + { + if (!decodeVec4(normalView, i, tmp)) + return false; + + double x = tmp.x; + double y = tmp.y; + double z = tmp.z; + if (flipHandedness) + x = -x; + + std::ostringstream ss; + ss << std::fixed << std::setprecision(6); + ss << "vn " << x << " " << y << " " << z << "\n"; + writeString(ss.str()); + } + } + + for (size_t i = 0u; i < faceCount; ++i) + { + const uint32_t i0 = indices[i * 3u + 0u]; + const uint32_t i1 = indices[i * 3u + 1u]; + const uint32_t i2 = indices[i * 3u + 2u]; + + const uint32_t f0 = i2; + const uint32_t f1 = i1; + const uint32_t f2 = i0; + + auto emitIndex = [&](std::ostringstream& ss, const uint32_t idx) + { + const uint32_t objIx = idx + 1u; + if (hasUVs && hasNormals) + ss << objIx << "/" << objIx << "/" << objIx; + else if (hasUVs) + ss << objIx << "/" << objIx; + else if (hasNormals) + ss << objIx << "//" << objIx; + else + ss << objIx; + }; + + std::ostringstream ss; + ss << "f "; + emitIndex(ss, f0); + ss << " "; + emitIndex(ss, f1); + ss << " "; + emitIndex(ss, f2); + ss << "\n"; + writeString(ss.str()); + } + + return true; +} + +} // namespace nbl::asset + +#endif // _NBL_COMPILE_WITH_OBJ_WRITER_ diff --git a/src/nbl/asset/interchange/CPLYMeshFileLoader.cpp b/src/nbl/asset/interchange/CPLYMeshFileLoader.cpp index 932a04b82c..4ea93aa4bc 100644 --- a/src/nbl/asset/interchange/CPLYMeshFileLoader.cpp +++ b/src/nbl/asset/interchange/CPLYMeshFileLoader.cpp @@ -8,6 +8,7 @@ #include "CPLYMeshFileLoader.h" #include +#include #include "nbl/asset/IAssetManager.h" @@ -71,9 +72,9 @@ struct SContext return EF_R16_SINT; else if (strcmp(typeString, "ushort")==0 || strcmp(typeString, "uint16")==0) return EF_R16_UINT; - else if (strcmp(typeString, "long")==0 || strcmp(typeString, "int")==0 || strcmp(typeString, "int16")==0) + else if (strcmp(typeString, "long")==0 || strcmp(typeString, "int")==0 || strcmp(typeString, "int32")==0) return EF_R32_SINT; - else if (strcmp(typeString, "ulong")==0 || strcmp(typeString, "uint16")==0) + else if (strcmp(typeString, "ulong")==0 || strcmp(typeString, "uint")==0 || strcmp(typeString, "uint32")==0) return EF_R32_UINT; else if (strcmp(typeString, "float")==0 || strcmp(typeString, "float32")==0) return EF_R32_SFLOAT; @@ -347,7 +348,7 @@ struct SContext } return 0; } - return std::atoi(getNextWord()); + return std::strtod(getNextWord(), nullptr); } // read the next thing from the file and move the start pointer along void getData(void* dst, const E_FORMAT f) @@ -388,8 +389,20 @@ struct SContext prop.skip(*this); continue; } - // conversion required? - if (it.dstFmt!=prop.type) + if (!IsBinaryFile) + { + if (isIntegerFormat(prop.type)) + { + uint64_t tmp = getInt(prop.type); + encodePixels(it.dstFmt,it.ptr,&tmp); + } + else + { + hlsl::float64_t tmp = getFloat(prop.type); + encodePixels(it.dstFmt,it.ptr,&tmp); + } + } + else if (it.dstFmt!=prop.type) { assert(isIntegerFormat(it.dstFmt)==isIntegerFormat(prop.type)); if (isIntegerFormat(it.dstFmt)) @@ -638,7 +651,8 @@ SAssetBundle CPLYMeshFileLoader::loadAsset(system::IFile* _file, const IAssetLoa _params.logger.log("Multiple `vertex` elements not supported!", system::ILogger::ELL_ERROR); return {}; } - ICPUPolygonGeometry::SDataViewBase posView = {}, normalView = {}; + ICPUPolygonGeometry::SDataViewBase posView = {}, normalView = {}, uvView = {}; + core::vector extraViews; for (auto& vertexProperty : el.Properties) { const auto& propertyName = vertexProperty.Name; @@ -662,10 +676,14 @@ SAssetBundle CPLYMeshFileLoader::loadAsset(system::IFile* _file, const IAssetLoa negotiateFormat(normalView,1); else if (propertyName=="nz") negotiateFormat(normalView,2); + else if (propertyName=="u" || propertyName=="s") + negotiateFormat(uvView,0); + else if (propertyName=="v" || propertyName=="t") + negotiateFormat(uvView,1); else { // TODO: record the `propertyName` - geometry->getAuxAttributeViews()->push_back(createView(vertexProperty.type,el.Count)); + extraViews.push_back(createView(vertexProperty.type,el.Count)); } } auto setFinalFormat = [&ctx](ICPUPolygonGeometry::SDataViewBase& view)->void @@ -832,13 +850,24 @@ SAssetBundle CPLYMeshFileLoader::loadAsset(system::IFile* _file, const IAssetLoa ctx.vertAttrIts[beginIx].ptr += ptrdiff_t(view.src.buffer->getPointer())+view.src.offset; geometry->setNormalView(std::move(view)); } + if (uvView.format!=EF_UNKNOWN) + { + auto beginIx = ctx.vertAttrIts.size(); + setFinalFormat(uvView); + auto view = createView(uvView.format,el.Count); + for (const auto size=ctx.vertAttrIts.size(); beginIx!=size; beginIx++) + ctx.vertAttrIts[beginIx].ptr += ptrdiff_t(view.src.buffer->getPointer())+view.src.offset; + geometry->getAuxAttributeViews()->push_back(std::move(view)); + } // - for (auto& view : *geometry->getAuxAttributeViews()) + for (auto& view : extraViews) ctx.vertAttrIts.push_back({ .ptr = reinterpret_cast(view.src.buffer->getPointer())+view.src.offset, .stride = getTexelOrBlockBytesize(view.composed.format), .dstFmt = view.composed.format }); + for (auto& view : extraViews) + geometry->getAuxAttributeViews()->push_back(std::move(view)); // loop through vertex properties ctx.readVertex(_params,el); verticesProcessed = true; diff --git a/src/nbl/asset/interchange/CPLYMeshWriter.cpp b/src/nbl/asset/interchange/CPLYMeshWriter.cpp index fd6fa3ea9e..1765a28a0b 100644 --- a/src/nbl/asset/interchange/CPLYMeshWriter.cpp +++ b/src/nbl/asset/interchange/CPLYMeshWriter.cpp @@ -9,612 +9,278 @@ #include "nbl/system/ISystem.h" #include "nbl/system/IFile.h" -#include "nbl/asset/utils/CMeshManipulator.h" +#include "nbl/asset/utils/CPolygonGeometryManipulator.h" -namespace nbl -{ -namespace asset -{ +#include +#include -namespace impl +namespace nbl::asset { -static asset::E_FORMAT getCorrespondingIntegerFormat(asset::E_FORMAT _fmt) + +CPLYMeshWriter::CPLYMeshWriter() { - using namespace asset; - switch (_fmt) - { - case EF_R8_UNORM: return EF_R8_UINT; - case EF_R8_SNORM: return EF_R8_SINT; - case EF_R8G8_UNORM: return EF_R8G8_UINT; - case EF_R8G8_SNORM: return EF_R8G8_SINT; - case EF_R8G8B8_UNORM: return EF_R8G8B8_UINT; - case EF_R8G8B8_SNORM: return EF_R8G8B8_SINT; - case EF_R8G8B8A8_UNORM: return EF_R8G8B8A8_UINT; - case EF_R8G8B8A8_SNORM: return EF_R8G8B8A8_SINT; - case EF_R16_UNORM: return EF_R16_UINT; - case EF_R16_SNORM: return EF_R16_SINT; - case EF_R16G16_UNORM: return EF_R16G16_UINT; - case EF_R16G16_SNORM: return EF_R16G16_SINT; - case EF_R16G16B16_UNORM: return EF_R16G16B16_UINT; - case EF_R16G16B16_SNORM: return EF_R16G16B16_SINT; - case EF_R16G16B16A16_UNORM: return EF_R16G16B16A16_UINT; - case EF_R16G16B16A16_SNORM: return EF_R16G16B16A16_SINT; - case EF_A2B10G10R10_UNORM_PACK32: return EF_A2B10G10R10_UINT_PACK32; - case EF_A2B10G10R10_SNORM_PACK32: return EF_A2B10G10R10_SINT_PACK32; - case EF_B8G8R8A8_UNORM: return EF_R8G8B8A8_SINT; - case EF_A2R10G10B10_UNORM_PACK32: return EF_A2B10G10R10_UINT_PACK32; - case EF_A2R10G10B10_SNORM_PACK32: return EF_A2B10G10R10_SINT_PACK32; - default: return EF_UNKNOWN; - } -} + #ifdef _NBL_DEBUG + setDebugName("CPLYMeshWriter"); + #endif } -CPLYMeshWriter::CPLYMeshWriter() +static inline bool decodeVec4(const ICPUPolygonGeometry::SDataView& view, const size_t ix, hlsl::float64_t4& out) { - #ifdef _NBL_DEBUG - setDebugName("CPLYMeshWriter"); - #endif + out = hlsl::float64_t4(0.0, 0.0, 0.0, 0.0); + return view.decodeElement(ix, out); } -//! writes a mesh bool CPLYMeshWriter::writeAsset(system::IFile* _file, const SAssetWriteParams& _params, IAssetWriterOverride* _override) { if (!_override) getDefaultOverride(_override); - SAssetWriteContext inCtx{ _params, _file }; - - const asset::ICPUMesh* mesh = IAsset::castDown(_params.rootAsset); - if (!mesh) + if (!_file || !_params.rootAsset) return false; - system::IFile* file = _override->getOutputFile(_file, inCtx, {mesh, 0u}); - - auto meshbuffers = mesh->getMeshBuffers(); - if (!file || !mesh) - return false; + const auto* geom = IAsset::castDown(_params.rootAsset); + if (!geom || !geom->valid()) + return false; - SContext context = { SAssetWriteContext{ inCtx.params, file} }; - - if (meshbuffers.size() > 1) - { - #ifdef _NBL_DEBUG - context.writeContext.params.logger.log("PLY WRITER WARNING (" + std::to_string(__LINE__) + " line): Only one meshbuffer input is allowed for writing! Saving first one", system::ILogger::ELL_WARNING, file->getFileName().string().c_str()); - #endif // _NBL_DEBUG - } + SAssetWriteContext ctx = { _params, _file }; + system::IFile* file = _override->getOutputFile(_file, ctx, { geom, 0u }); + if (!file) + return false; - context.writeContext.params.logger.log("Writing PLY mesh", system::ILogger::ELL_INFO, file->getFileName().string().c_str()); + const auto& positionView = geom->getPositionView(); + const auto& normalView = geom->getNormalView(); + const auto& auxViews = geom->getAuxAttributeViews(); - const asset::E_WRITER_FLAGS flags = _override->getAssetWritingFlags(context.writeContext, mesh, 0u); + const bool writeNormals = static_cast(normalView); - auto getConvertedCpuMeshBufferWithIndexBuffer = [&]() -> core::smart_refctd_ptr + const ICPUPolygonGeometry::SDataView* uvView = nullptr; + for (const auto& view : auxViews) { - auto inputMeshBuffer = *meshbuffers.begin(); - const bool doesItHaveIndexBuffer = inputMeshBuffer->getIndexBufferBinding().buffer.get(); - const bool isItNotTriangleListsPrimitive = inputMeshBuffer->getPipeline()->getCachedCreationParams().primitiveAssembly.primitiveType != asset::EPT_TRIANGLE_LIST; - - if (doesItHaveIndexBuffer && isItNotTriangleListsPrimitive) + if (!view) + continue; + const auto channels = getFormatChannelCount(view.composed.format); + if (channels >= 2u) { - auto cpuConvertedMeshBuffer = core::smart_refctd_ptr_static_cast(inputMeshBuffer->clone()); - IMeshManipulator::homogenizePrimitiveTypeAndIndices(&cpuConvertedMeshBuffer, &cpuConvertedMeshBuffer + 1, asset::EPT_TRIANGLE_LIST, asset::EIT_32BIT); - return cpuConvertedMeshBuffer; + uvView = &view; + break; } - else - return nullptr; - }; - - const auto cpuConvertedMeshBufferWithIndexBuffer = getConvertedCpuMeshBufferWithIndexBuffer(); - const asset::ICPUMeshBuffer* rawCopyMeshBuffer = cpuConvertedMeshBufferWithIndexBuffer.get() ? cpuConvertedMeshBufferWithIndexBuffer.get() : *meshbuffers.begin(); - const bool doesItUseIndexBufferBinding = (rawCopyMeshBuffer->getIndexBufferBinding().buffer.get() && rawCopyMeshBuffer->getIndexType() != asset::EIT_UNKNOWN); + } - uint32_t faceCount = {}; - size_t vertexCount = {}; + const size_t vertexCount = positionView.getElementCount(); + if (vertexCount == 0) + return false; - void* indices = nullptr; - { - auto indexCount = rawCopyMeshBuffer->getIndexCount(); + const auto* indexing = geom->getIndexingCallback(); + if (!indexing) + return false; - indices = _NBL_ALIGNED_MALLOC(indexCount * sizeof(uint32_t), _NBL_SIMD_ALIGNMENT); - memcpy(indices, rawCopyMeshBuffer->getIndices(), indexCount * sizeof(uint32_t)); - - IMeshManipulator::getPolyCount(faceCount, rawCopyMeshBuffer); - vertexCount = IMeshManipulator::upperBoundVertexID(rawCopyMeshBuffer); - } + if (indexing->knownTopology() != E_PRIMITIVE_TOPOLOGY::EPT_TRIANGLE_LIST) + return false; - // write PLY header - std::string header = "ply\n"; - header += (flags & asset::EWF_BINARY) ? "format binary_little_endian 1.0" : "format ascii 1.0"; - header += "\ncomment IrrlichtBAW "; - header += NABLA_SDK_VERSION; + const auto& indexView = geom->getIndexView(); - // vertex definition - header += "\nelement vertex "; - header += std::to_string(vertexCount) + '\n'; + core::vector indexData; + const uint32_t* indices = nullptr; + size_t faceCount = 0; - bool vaidToWrite[4]{ 0, 0, 0, 0 }; + if (indexView) + { + const size_t indexCount = indexView.getElementCount(); + if (indexCount % 3u != 0u) + return false; - const uint32_t POSITION_ATTRIBUTE = rawCopyMeshBuffer->getPositionAttributeIx(); - constexpr uint32_t COLOR_ATTRIBUTE = 1; - constexpr uint32_t UV_ATTRIBUTE = 2; - const uint32_t NORMAL_ATTRIBUTE = rawCopyMeshBuffer->getNormalAttributeIx(); + indexData.resize(indexCount); + const void* src = indexView.getPointer(); + if (!src) + return false; - if (rawCopyMeshBuffer->getAttribBoundBuffer(POSITION_ATTRIBUTE).buffer) - { - const asset::E_FORMAT t = rawCopyMeshBuffer->getAttribFormat(POSITION_ATTRIBUTE); - std::string typeStr = getTypeString(t); - vaidToWrite[0] = true; - header += - "property " + typeStr + " x\n" + - "property " + typeStr + " y\n" + - "property " + typeStr + " z\n"; - } - if (rawCopyMeshBuffer->getAttribBoundBuffer(COLOR_ATTRIBUTE).buffer) - { - const asset::E_FORMAT t = rawCopyMeshBuffer->getAttribFormat(COLOR_ATTRIBUTE); - std::string typeStr = getTypeString(t); - vaidToWrite[1] = true; - header += - "property " + typeStr + " red\n" + - "property " + typeStr + " green\n" + - "property " + typeStr + " blue\n"; - if (asset::getFormatChannelCount(t) == 4u) + if (indexView.composed.format == EF_R32_UINT) { - header += "property " + typeStr + " alpha\n"; + memcpy(indexData.data(), src, indexCount * sizeof(uint32_t)); + } + else if (indexView.composed.format == EF_R16_UINT) + { + const uint16_t* src16 = reinterpret_cast(src); + for (size_t i = 0; i < indexCount; ++i) + indexData[i] = src16[i]; + } + else + { + return false; } - } - if (rawCopyMeshBuffer->getAttribBoundBuffer(UV_ATTRIBUTE).buffer) - { - const asset::E_FORMAT t = rawCopyMeshBuffer->getAttribFormat(UV_ATTRIBUTE); - std::string typeStr = getTypeString(t); - vaidToWrite[2] = true; - header += - "property " + typeStr + " u\n" + - "property " + typeStr + " v\n"; - } - if (rawCopyMeshBuffer->getAttribBoundBuffer(NORMAL_ATTRIBUTE).buffer) - { - const asset::E_FORMAT t = rawCopyMeshBuffer->getAttribFormat(NORMAL_ATTRIBUTE); - std::string typeStr = getTypeString(t); - vaidToWrite[3] = true; - header += - "property " + typeStr + " nx\n" + - "property " + typeStr + " ny\n" + - "property " + typeStr + " nz\n"; - } - - asset::E_INDEX_TYPE idxT = asset::EIT_UNKNOWN; - bool forceFaces = false; - - const auto primitiveType = rawCopyMeshBuffer->getPipeline()->getCachedCreationParams().primitiveAssembly.primitiveType; - const auto indexType = rawCopyMeshBuffer->getIndexType(); - - if (primitiveType == asset::EPT_POINT_LIST) - faceCount = 0u; - else if (doesItUseIndexBufferBinding) - { - header += "element face "; - header += std::to_string(faceCount) + '\n'; - idxT = indexType; - const std::string idxTypeStr = idxT == asset::EIT_32BIT ? "uint32" : "uint16"; - header += "property list uchar " + idxTypeStr + " vertex_indices\n"; - } - else if (primitiveType == asset::EPT_TRIANGLE_LIST) - { - forceFaces = true; - header += "element face "; - header += std::to_string(faceCount) + '\n'; - idxT = vertexCount <= ((1u<<16) - 1) ? asset::EIT_16BIT : asset::EIT_32BIT; - const std::string idxTypeStr = idxT == asset::EIT_32BIT ? "uint32" : "uint16"; - header += "property list uchar " + idxTypeStr + " vertex_indices\n"; + indices = indexData.data(); + faceCount = indexCount / 3u; } else - faceCount = 0u; - header += "end_header\n"; - { - system::IFile::success_t success; - file->write(success, header.c_str(), context.fileOffset, header.size()); - context.fileOffset += success.getBytesProcessed(); + if (vertexCount % 3u != 0u) + return false; + + indexData.resize(vertexCount); + for (size_t i = 0; i < vertexCount; ++i) + indexData[i] = static_cast(i); + + indices = indexData.data(); + faceCount = vertexCount / 3u; } - - if (flags & asset::EWF_BINARY) - writeBinary(rawCopyMeshBuffer, vertexCount, faceCount, idxT, indices, forceFaces, vaidToWrite, context); - else - writeText(rawCopyMeshBuffer, vertexCount, faceCount, idxT, indices, forceFaces, vaidToWrite, context); - _NBL_ALIGNED_FREE(const_cast(indices)); + const auto flags = _override->getAssetWritingFlags(ctx, geom, 0u); + const bool binary = (flags & E_WRITER_FLAGS::EWF_BINARY) != 0u; - return true; -} + std::string header = "ply\n"; + header += binary ? "format binary_little_endian 1.0" : "format ascii 1.0"; + header += "\ncomment Nabla "; + header += NABLA_SDK_VERSION; -void CPLYMeshWriter::writeBinary(const asset::ICPUMeshBuffer* _mbuf, size_t _vtxCount, size_t _fcCount, asset::E_INDEX_TYPE _idxType, void* const _indices, bool _forceFaces, const bool _vaidToWrite[4], SContext& context) const -{ - const size_t colCpa = asset::getFormatChannelCount(_mbuf->getAttribFormat(1)); + header += "\nelement vertex "; + header += std::to_string(vertexCount); + header += "\n"; - bool flipVectors = (!(context.writeContext.params.flags & E_WRITER_FLAGS::EWF_MESH_IS_RIGHT_HANDED)) ? true : false; + header += "property float x\n"; + header += "property float y\n"; + header += "property float z\n"; - auto mbCopy = createCopyMBuffNormalizedReplacedWithTrueInt(_mbuf); - for (size_t i = 0u; i < _vtxCount; ++i) + if (writeNormals) { - core::vectorSIMDf f; - uint32_t ui[4]; - if (_vaidToWrite[0]) - { - writeAttribBinary(context, mbCopy.get(), 0, i, 3u, flipVectors); - } - if (_vaidToWrite[1]) - { - writeAttribBinary(context, mbCopy.get(), 1, i, colCpa); - } - if (_vaidToWrite[2]) - { - writeAttribBinary(context, mbCopy.get(), 2, i, 2u); - } - if (_vaidToWrite[3]) - { - writeAttribBinary(context, mbCopy.get(), 3, i, 3u, flipVectors); - } + header += "property float nx\n"; + header += "property float ny\n"; + header += "property float nz\n"; } - constexpr uint8_t listSize = 3u; - void* indices = _indices; - if (_forceFaces) + if (uvView) { - indices = _NBL_ALIGNED_MALLOC((_idxType == asset::EIT_32BIT ? 4 : 2) * listSize * _fcCount,_NBL_SIMD_ALIGNMENT); - if (_idxType == asset::EIT_16BIT) - { - for (uint16_t i = 0u; i < _fcCount; ++i) - ((uint16_t*)indices)[i] = i; - } - else - { - for (uint32_t i = 0u; i < _fcCount; ++i) - ((uint32_t*)indices)[i] = i; - } + header += "property float u\n"; + header += "property float v\n"; } - if (_idxType == asset::EIT_32BIT) + + header += "element face "; + header += std::to_string(faceCount); + header += "\nproperty list uchar uint vertex_indices\n"; + header += "end_header\n"; + + SContext context = { SAssetWriteContext{ ctx.params, file } }; { - uint32_t* ind = (uint32_t*)indices; - for (size_t i = 0u; i < _fcCount; ++i) - { - { - system::IFile::success_t success; - context.writeContext.outputFile->write(success, &listSize, context.fileOffset, sizeof(listSize)); - context.fileOffset += success.getBytesProcessed(); - } - - { - system::IFile::success_t success; - context.writeContext.outputFile->write(success, ind, context.fileOffset, listSize * 4); - context.fileOffset += success.getBytesProcessed(); - } - - ind += listSize; - } + system::IFile::success_t success; + file->write(success, header.c_str(), context.fileOffset, header.size()); + context.fileOffset += success.getBytesProcessed(); } + + if (binary) + writeBinary(geom, uvView, writeNormals, vertexCount, indices, faceCount, context); else - { - uint16_t* ind = (uint16_t*)indices; - for (size_t i = 0u; i < _fcCount; ++i) - { - { - system::IFile::success_t success; - context.writeContext.outputFile->write(success, &listSize, context.fileOffset, sizeof(listSize)); - context.fileOffset += success.getBytesProcessed(); - } - - { - system::IFile::success_t success; - context.writeContext.outputFile->write(success, ind, context.fileOffset, listSize * 2); - context.fileOffset += success.getBytesProcessed(); - } - - ind += listSize; - } - } + writeText(geom, uvView, writeNormals, vertexCount, indices, faceCount, context); - if (_forceFaces) - _NBL_ALIGNED_FREE(indices); + return true; } -void CPLYMeshWriter::writeText(const asset::ICPUMeshBuffer* _mbuf, size_t _vtxCount, size_t _fcCount, asset::E_INDEX_TYPE _idxType, void* const _indices, bool _forceFaces, const bool _vaidToWrite[4], SContext& context) const +void CPLYMeshWriter::writeBinary(const ICPUPolygonGeometry* geom, const ICPUPolygonGeometry::SDataView* uvView, bool writeNormals, size_t vertexCount, const uint32_t* indices, size_t faceCount, SContext& context) const { - auto mbCopy = createCopyMBuffNormalizedReplacedWithTrueInt(_mbuf); - - auto writefunc = [&context, &mbCopy, this](uint32_t _vaid, size_t _ix, size_t _cpa) + const bool flipVectors = !(context.writeContext.params.flags & E_WRITER_FLAGS::EWF_MESH_IS_RIGHT_HANDED); + const size_t vertexStride = sizeof(float) * (3u + (writeNormals ? 3u : 0u) + (uvView ? 2u : 0u)); + const size_t faceStride = sizeof(uint8_t) + sizeof(uint32_t) * 3u; + const size_t totalSize = vertexCount * vertexStride + faceCount * faceStride; + core::vector blob; + blob.resize(totalSize); + uint8_t* dst = blob.data(); + + hlsl::float64_t4 tmp = {}; + for (size_t i = 0; i < vertexCount; ++i) { - bool flipVerteciesAndNormals = false; - if (!(context.writeContext.params.flags & E_WRITER_FLAGS::EWF_MESH_IS_RIGHT_HANDED)) - if(_vaid == 0u || _vaid == 3u) - flipVerteciesAndNormals = true; - - uint32_t ui[4]; - core::vectorSIMDf f; - const asset::E_FORMAT t = mbCopy->getAttribFormat(_vaid); - if (asset::isScaledFormat(t) || asset::isIntegerFormat(t)) - { - mbCopy->getAttribute(ui, _vaid, _ix); - if (!asset::isSignedFormat(t)) - writeVectorAsText(context, ui, _cpa, flipVerteciesAndNormals); - else - { - int32_t ii[4]; - memcpy(ii, ui, 4*4); - writeVectorAsText(context, ii, _cpa, flipVerteciesAndNormals); - } - } - else - { - mbCopy->getAttribute(f, _vaid, _ix); - writeVectorAsText(context, f.pointer, _cpa, flipVerteciesAndNormals); - } - }; + if (!decodeVec4(geom->getPositionView(), i, tmp)) + return; - const size_t colCpa = asset::getFormatChannelCount(_mbuf->getAttribFormat(1)); + float pos[3] = { static_cast(tmp.x), static_cast(tmp.y), static_cast(tmp.z) }; + if (flipVectors) + pos[0] = -pos[0]; - for (size_t i = 0u; i < _vtxCount; ++i) - { - core::vectorSIMDf f; - uint32_t ui[4]; - if (_vaidToWrite[0]) - { - writefunc(0, i, 3u); - } - if (_vaidToWrite[1]) - { - writefunc(1, i, colCpa); - } - if (_vaidToWrite[2]) - { - writefunc(2, i, 2u); - } - if (_vaidToWrite[3]) - { - writefunc(3, i, 3u); - } + memcpy(dst, pos, sizeof(pos)); + dst += sizeof(pos); + if (writeNormals) { - system::IFile::success_t success; - context.writeContext.outputFile->write(success, "\n", context.fileOffset, 1); - context.fileOffset += success.getBytesProcessed(); + if (!decodeVec4(geom->getNormalView(), i, tmp)) + return; + float normal[3] = { static_cast(tmp.x), static_cast(tmp.y), static_cast(tmp.z) }; + if (flipVectors) + normal[0] = -normal[0]; + + memcpy(dst, normal, sizeof(normal)); + dst += sizeof(normal); } - } - const char* listSize = "3 "; - void* indices = _indices; - if (_forceFaces) - { - indices = _NBL_ALIGNED_MALLOC((_idxType == asset::EIT_32BIT ? 4 : 2) * 3 * _fcCount,_NBL_SIMD_ALIGNMENT); - if (_idxType == asset::EIT_16BIT) - { - for (uint16_t i = 0u; i < _fcCount; ++i) - ((uint16_t*)indices)[i] = i; - } - else + if (uvView) { - for (uint32_t i = 0u; i < _fcCount; ++i) - ((uint32_t*)indices)[i] = i; - } - } - if (_idxType == asset::EIT_32BIT) - { - uint32_t* ind = (uint32_t*)indices; - for (size_t i = 0u; i < _fcCount; ++i) - { - { - system::IFile::success_t success; - context.writeContext.outputFile->write(success, listSize, context.fileOffset, 2); - context.fileOffset += success.getBytesProcessed(); - } - - writeVectorAsText(context, ind, 3); + if (!decodeVec4(*uvView, i, tmp)) + return; + float uv[2] = { static_cast(tmp.x), static_cast(tmp.y) }; - { - system::IFile::success_t success; - context.writeContext.outputFile->write(success, "\n", context.fileOffset, 1); - context.fileOffset += success.getBytesProcessed(); - } - - ind += 3; + memcpy(dst, uv, sizeof(uv)); + dst += sizeof(uv); } } - else - { - uint16_t* ind = (uint16_t*)indices; - for (size_t i = 0u; i < _fcCount; ++i) - { - { - system::IFile::success_t success; - context.writeContext.outputFile->write(success, listSize, context.fileOffset, 2); - context.fileOffset += success.getBytesProcessed(); - } - writeVectorAsText(context, ind, 3); - - { - system::IFile::success_t success; - context.writeContext.outputFile->write(success, "\n", context.fileOffset, 1); - context.fileOffset += success.getBytesProcessed(); - } + for (size_t i = 0; i < faceCount; ++i) + { + const uint8_t listSize = 3u; + *dst++ = listSize; - ind += 3; - } + const uint32_t* tri = indices + (i * 3u); + memcpy(dst, tri, sizeof(uint32_t) * 3u); + dst += sizeof(uint32_t) * 3u; } - if (_forceFaces) - _NBL_ALIGNED_FREE(indices); + system::IFile::success_t success; + context.writeContext.outputFile->write(success, blob.data(), context.fileOffset, blob.size()); + context.fileOffset += success.getBytesProcessed(); } -void CPLYMeshWriter::writeAttribBinary(SContext& context, asset::ICPUMeshBuffer* _mbuf, uint32_t _vaid, size_t _ix, size_t _cpa, bool flipAttribute) const +void CPLYMeshWriter::writeText(const ICPUPolygonGeometry* geom, const ICPUPolygonGeometry::SDataView* uvView, bool writeNormals, size_t vertexCount, const uint32_t* indices, size_t faceCount, SContext& context) const { - uint32_t ui[4]; - core::vectorSIMDf f; - asset::E_FORMAT t = _mbuf->getAttribFormat(_vaid); + const bool flipVectors = !(context.writeContext.params.flags & E_WRITER_FLAGS::EWF_MESH_IS_RIGHT_HANDED); - if (asset::isScaledFormat(t) || asset::isIntegerFormat(t)) + hlsl::float64_t4 tmp = {}; + for (size_t i = 0; i < vertexCount; ++i) { - _mbuf->getAttribute(ui, _vaid, _ix); - if (flipAttribute) - ui[0] = -ui[0]; + if (!decodeVec4(geom->getPositionView(), i, tmp)) + return; + const double pos[3] = { tmp.x, tmp.y, tmp.z }; + writeVectorAsText(context, pos, 3u, flipVectors); - const uint32_t bytesPerCh = asset::getTexelOrBlockBytesize(t)/asset::getFormatChannelCount(t); - if (bytesPerCh == 1u || t == asset::EF_A2B10G10R10_UINT_PACK32 || t == asset::EF_A2B10G10R10_SINT_PACK32 || t == asset::EF_A2B10G10R10_SSCALED_PACK32 || t == asset::EF_A2B10G10R10_USCALED_PACK32) - { - uint8_t a[4]; - for (uint32_t k = 0u; k < _cpa; ++k) - a[k] = ui[k]; - - { - system::IFile::success_t success; - context.writeContext.outputFile->write(success, a, context.fileOffset, _cpa); - context.fileOffset += success.getBytesProcessed(); - } - } - else if (bytesPerCh == 2u) - { - uint16_t a[4]; - for (uint32_t k = 0u; k < _cpa; ++k) - a[k] = ui[k]; - - { - system::IFile::success_t success; - context.writeContext.outputFile->write(success, a, context.fileOffset, 2 * _cpa); - context.fileOffset += success.getBytesProcessed(); - } - } - else if (bytesPerCh == 4u) + if (writeNormals) { - { - system::IFile::success_t success; - context.writeContext.outputFile->write(success, ui, context.fileOffset, 4 * _cpa); - context.fileOffset += success.getBytesProcessed(); - } + if (!decodeVec4(geom->getNormalView(), i, tmp)) + return; + const double normal[3] = { tmp.x, tmp.y, tmp.z }; + writeVectorAsText(context, normal, 3u, flipVectors); } - } - else - { - _mbuf->getAttribute(f, _vaid, _ix); - if (flipAttribute) - f[0] = -f[0]; + if (uvView) { - system::IFile::success_t success; - context.writeContext.outputFile->write(success, f.pointer, context.fileOffset, 4 * _cpa); - context.fileOffset += success.getBytesProcessed(); + if (!decodeVec4(*uvView, i, tmp)) + return; + const double uv[2] = { tmp.x, tmp.y }; + writeVectorAsText(context, uv, 2u, false); } - } -} - -core::smart_refctd_ptr CPLYMeshWriter::createCopyMBuffNormalizedReplacedWithTrueInt(const asset::ICPUMeshBuffer* _mbuf) -{ - auto mbCopy = core::smart_refctd_ptr_static_cast(_mbuf->clone(2)); - for (size_t i = 0; i < ICPUMeshBuffer::MAX_VERTEX_ATTRIB_COUNT; ++i) - { - auto vaid = i; - asset::E_FORMAT t = _mbuf->getAttribFormat(vaid); - - if (_mbuf->getAttribBoundBuffer(vaid).buffer) - mbCopy->getPipeline()->getCachedCreationParams().vertexInput.attributes[vaid].format = asset::isNormalizedFormat(t) ? impl::getCorrespondingIntegerFormat(t) : t; + system::IFile::success_t success; + context.writeContext.outputFile->write(success, "\n", context.fileOffset, 1); + context.fileOffset += success.getBytesProcessed(); } - return mbCopy; -} - -std::string CPLYMeshWriter::getTypeString(asset::E_FORMAT _t) -{ - using namespace asset; - - if (isFloatingPointFormat(_t)) - return "float"; - - switch (_t) + for (size_t i = 0; i < faceCount; ++i) { - case EF_R8_SNORM: - case EF_R8_SINT: - case EF_R8_SSCALED: - case EF_R8G8_SNORM: - case EF_R8G8_SINT: - case EF_R8G8_SSCALED: - case EF_R8G8B8_SNORM: - case EF_R8G8B8_SINT: - case EF_R8G8B8_SSCALED: - case EF_R8G8B8A8_SNORM: - case EF_R8G8B8A8_SINT: - case EF_R8G8B8A8_SSCALED: - case EF_B8G8R8A8_UNORM: - case EF_A2B10G10R10_SNORM_PACK32: - case EF_A2B10G10R10_SINT_PACK32: - case EF_A2B10G10R10_SSCALED_PACK32: - case EF_A2R10G10B10_SNORM_PACK32: - return "char"; - - case EF_R8_UNORM: - case EF_R8_UINT: - case EF_R8_USCALED: - case EF_R8G8_UNORM: - case EF_R8G8_UINT: - case EF_R8G8_USCALED: - case EF_R8G8B8_UNORM: - case EF_R8G8B8_UINT: - case EF_R8G8B8_USCALED: - case EF_R8G8B8A8_UNORM: - case EF_R8G8B8A8_UINT: - case EF_R8G8B8A8_USCALED: - case EF_A2R10G10B10_UNORM_PACK32: - case EF_A2B10G10R10_UNORM_PACK32: - case EF_A2B10G10R10_UINT_PACK32: - case EF_A2B10G10R10_USCALED_PACK32: - return "uchar"; - - case EF_R16_UNORM: - case EF_R16_UINT: - case EF_R16_USCALED: - case EF_R16G16_UNORM: - case EF_R16G16_UINT: - case EF_R16G16_USCALED: - case EF_R16G16B16_UNORM: - case EF_R16G16B16_UINT: - case EF_R16G16B16_USCALED: - case EF_R16G16B16A16_UNORM: - case EF_R16G16B16A16_UINT: - case EF_R16G16B16A16_USCALED: - return "ushort"; - - case EF_R16_SNORM: - case EF_R16_SINT: - case EF_R16_SSCALED: - case EF_R16G16_SNORM: - case EF_R16G16_SINT: - case EF_R16G16_SSCALED: - case EF_R16G16B16_SNORM: - case EF_R16G16B16_SINT: - case EF_R16G16B16_SSCALED: - case EF_R16G16B16A16_SNORM: - case EF_R16G16B16A16_SINT: - case EF_R16G16B16A16_SSCALED: - return "short"; - - case EF_R32_UINT: - case EF_R32G32_UINT: - case EF_R32G32B32_UINT: - case EF_R32G32B32A32_UINT: - return "uint"; - - case EF_R32_SINT: - case EF_R32G32_SINT: - case EF_R32G32B32_SINT: - case EF_R32G32B32A32_SINT: - return "int"; - - default: - return ""; + const uint32_t* tri = indices + (i * 3u); + std::stringstream ss; + ss << "3 " << tri[0] << " " << tri[1] << " " << tri[2] << "\n"; + const auto str = ss.str(); + + system::IFile::success_t success; + context.writeContext.outputFile->write(success, str.c_str(), context.fileOffset, str.size()); + context.fileOffset += success.getBytesProcessed(); } } -} // end namespace -} // end namespace +} // namespace nbl::asset #endif // _NBL_COMPILE_WITH_PLY_WRITER_ - diff --git a/src/nbl/asset/interchange/CPLYMeshWriter.h b/src/nbl/asset/interchange/CPLYMeshWriter.h index e709ffa0fe..73244a64b5 100644 --- a/src/nbl/asset/interchange/CPLYMeshWriter.h +++ b/src/nbl/asset/interchange/CPLYMeshWriter.h @@ -41,15 +41,8 @@ class CPLYMeshWriter : public IGeometryWriter size_t fileOffset = 0; }; - void writeBinary(const ICPUPolygonGeometry* geom, size_t _vtxCount, size_t _fcCount, asset::E_INDEX_TYPE _idxType, void* const _indices, bool _forceFaces, const bool _vaidToWrite[4], SContext& context) const; - void writeText(const ICPUPolygonGeometry* geom, size_t _vtxCount, size_t _fcCount, asset::E_INDEX_TYPE _idxType, void* const _indices, bool _forceFaces, const bool _vaidToWrite[4], SContext& context) const; - - void writeAttribBinary(SContext& context, ICPUPolygonGeometry* geom, uint32_t _vaid, size_t _ix, size_t _cpa, bool flipAttribute = false) const; - - //! Creates new geometry with the same attribute buffers mapped but with normalized types changed to corresponding true integer types. - static core::smart_refctd_ptr createCopyNormalizedReplacedWithTrueInt(const ICPUPolygonGeometry* geom); - - static std::string getTypeString(asset::E_FORMAT _t); + void writeBinary(const ICPUPolygonGeometry* geom, const ICPUPolygonGeometry::SDataView* uvView, bool writeNormals, size_t vertexCount, const uint32_t* indices, size_t faceCount, SContext& context) const; + void writeText(const ICPUPolygonGeometry* geom, const ICPUPolygonGeometry::SDataView* uvView, bool writeNormals, size_t vertexCount, const uint32_t* indices, size_t faceCount, SContext& context) const; template void writeVectorAsText(SContext& context, const T* _vec, size_t _elementsToWrite, bool flipVectors = false) const diff --git a/src/nbl/asset/interchange/CSTLMeshFileLoader.cpp b/src/nbl/asset/interchange/CSTLMeshFileLoader.cpp index d00c37cf10..cfdf98ec97 100644 --- a/src/nbl/asset/interchange/CSTLMeshFileLoader.cpp +++ b/src/nbl/asset/interchange/CSTLMeshFileLoader.cpp @@ -13,113 +13,18 @@ #include "nbl/system/ISystem.h" #include "nbl/system/IFile.h" +#include using namespace nbl; using namespace nbl::asset; -constexpr auto POSITION_ATTRIBUTE = 0; -constexpr auto COLOR_ATTRIBUTE = 1; -constexpr auto UV_ATTRIBUTE = 2; -constexpr auto NORMAL_ATTRIBUTE = 3; - CSTLMeshFileLoader::CSTLMeshFileLoader(asset::IAssetManager* _m_assetMgr) - : IRenderpassIndependentPipelineLoader(_m_assetMgr), m_assetMgr(_m_assetMgr) + : m_assetMgr(_m_assetMgr) { - } void CSTLMeshFileLoader::initialize() { - IRenderpassIndependentPipelineLoader::initialize(); - - auto precomputeAndCachePipeline = [&](bool withColorAttribute) - { - auto getShaderDefaultPaths = [&]() -> std::pair - { - if (withColorAttribute) - return std::make_pair("nbl/builtin/material/debug/vertex_color/specialized_shader.vert", "nbl/builtin/material/debug/vertex_color/specialized_shader.frag"); - else - return std::make_pair("nbl/builtin/material/debug/vertex_normal/specialized_shader.vert", "nbl/builtin/material/debug/vertex_normal/specialized_shader.frag"); - }; - - auto defaultOverride = IAssetLoaderOverride(m_assetMgr); - const std::string pipelineCacheHash = getPipelineCacheKey(withColorAttribute).data(); - const uint32_t _hierarchyLevel = 0; - const IAssetLoader::SAssetLoadContext fakeContext(IAssetLoader::SAssetLoadParams{}, nullptr); - - const asset::IAsset::E_TYPE types[]{ asset::IAsset::ET_RENDERPASS_INDEPENDENT_PIPELINE, (asset::IAsset::E_TYPE)0u }; - auto pipelineBundle = defaultOverride.findCachedAsset(pipelineCacheHash, types, fakeContext, _hierarchyLevel + ICPURenderpassIndependentPipeline::DESC_SET_HIERARCHYLEVELS_BELOW); - if (pipelineBundle.getContents().empty()) - { - auto mbVertexShader = core::smart_refctd_ptr(); - auto mbFragmentShader = core::smart_refctd_ptr(); - { - const IAsset::E_TYPE types[]{ IAsset::E_TYPE::ET_SPECIALIZED_SHADER, static_cast(0u) }; - const auto shaderPaths = getShaderDefaultPaths(); - - auto vertexShaderBundle = m_assetMgr->findAssets(shaderPaths.first.data(), types); - auto fragmentShaderBundle = m_assetMgr->findAssets(shaderPaths.second.data(), types); - - mbVertexShader = core::smart_refctd_ptr_static_cast(vertexShaderBundle->begin()->getContents().begin()[0]); - mbFragmentShader = core::smart_refctd_ptr_static_cast(fragmentShaderBundle->begin()->getContents().begin()[0]); - } - - auto defaultOverride = IAssetLoaderOverride(m_assetMgr); - - const IAssetLoader::SAssetLoadContext fakeContext(IAssetLoader::SAssetLoadParams{}, nullptr); - auto mbBundlePipelineLayout = defaultOverride.findDefaultAsset("nbl/builtin/pipeline_layout/loader/STL", fakeContext, _hierarchyLevel + ICPURenderpassIndependentPipeline::PIPELINE_LAYOUT_HIERARCHYLEVELS_BELOW); - auto mbPipelineLayout = mbBundlePipelineLayout.first; - - auto const positionFormatByteSize = getTexelOrBlockBytesize(EF_R32G32B32_SFLOAT); - auto const colorFormatByteSize = withColorAttribute ? getTexelOrBlockBytesize(EF_B8G8R8A8_UNORM) : 0; - auto const normalFormatByteSize = getTexelOrBlockBytesize(EF_A2B10G10R10_SNORM_PACK32); - - SVertexInputParams mbInputParams; - const auto stride = positionFormatByteSize + colorFormatByteSize + normalFormatByteSize; - mbInputParams.enabledBindingFlags |= core::createBitmask({ 0 }); - mbInputParams.enabledAttribFlags |= core::createBitmask({ POSITION_ATTRIBUTE, NORMAL_ATTRIBUTE, withColorAttribute ? COLOR_ATTRIBUTE : 0 }); - mbInputParams.bindings[0] = { stride, EVIR_PER_VERTEX }; - - mbInputParams.attributes[POSITION_ATTRIBUTE].format = EF_R32G32B32_SFLOAT; - mbInputParams.attributes[POSITION_ATTRIBUTE].relativeOffset = 0; - mbInputParams.attributes[POSITION_ATTRIBUTE].binding = 0; - - if (withColorAttribute) - { - mbInputParams.attributes[COLOR_ATTRIBUTE].format = EF_R32G32B32_SFLOAT; - mbInputParams.attributes[COLOR_ATTRIBUTE].relativeOffset = positionFormatByteSize; - mbInputParams.attributes[COLOR_ATTRIBUTE].binding = 0; - } - - mbInputParams.attributes[NORMAL_ATTRIBUTE].format = EF_R32G32B32_SFLOAT; - mbInputParams.attributes[NORMAL_ATTRIBUTE].relativeOffset = positionFormatByteSize + colorFormatByteSize; - mbInputParams.attributes[NORMAL_ATTRIBUTE].binding = 0; - - SBlendParams blendParams; - SPrimitiveAssemblyParams primitiveAssemblyParams; - primitiveAssemblyParams.primitiveType = E_PRIMITIVE_TOPOLOGY::EPT_TRIANGLE_LIST; - - SRasterizationParams rastarizationParmas; - - auto mbPipeline = core::make_smart_refctd_ptr(std::move(mbPipelineLayout), nullptr, nullptr, mbInputParams, blendParams, primitiveAssemblyParams, rastarizationParmas); - { - mbPipeline->setShaderAtStage(asset::IShader::ESS_VERTEX, mbVertexShader.get()); - mbPipeline->setShaderAtStage(asset::IShader::ESS_FRAGMENT, mbFragmentShader.get()); - } - - asset::SAssetBundle newPipelineBundle(nullptr, {core::smart_refctd_ptr(mbPipeline)}); - defaultOverride.insertAssetIntoCache(newPipelineBundle, pipelineCacheHash, fakeContext, _hierarchyLevel + ICPURenderpassIndependentPipeline::DESC_SET_HIERARCHYLEVELS_BELOW); - } - else - return; - }; - - /* - Pipeline permutations are cached - */ - - precomputeAndCachePipeline(true); - precomputeAndCachePipeline(false); } SAssetBundle CSTLMeshFileLoader::loadAsset(system::IFile* _file, const IAssetLoader::SAssetLoadParams& _params, IAssetLoader::IAssetLoaderOverride* _override, uint32_t _hierarchyLevel) @@ -141,20 +46,13 @@ SAssetBundle CSTLMeshFileLoader::loadAsset(system::IFile* _file, const IAssetLoa if (filesize < 6ull) // we need a header return {}; - bool hasColor = false; - - auto mesh = core::make_smart_refctd_ptr(); - auto meshbuffer = core::make_smart_refctd_ptr(); - meshbuffer->setPositionAttributeIx(POSITION_ATTRIBUTE); - meshbuffer->setNormalAttributeIx(NORMAL_ATTRIBUTE); - bool binary = false; std::string token; if (getNextToken(&context, token) != "solid") - binary = hasColor = true; + binary = true; + const bool rightHanded = (_params.loaderFlags & E_LOADER_PARAMETER_FLAGS::ELPF_RIGHT_HANDED_MESHES) != 0; core::vector positions, normals; - core::vector colors; if (binary) { if (_file->getSize() < 80) @@ -173,7 +71,6 @@ SAssetBundle CSTLMeshFileLoader::loadAsset(system::IFile* _file, const IAssetLoa positions.reserve(3 * vertexCount); normals.reserve(vertexCount); - colors.reserve(vertexCount); } else goNextLine(&context); // skip header @@ -199,9 +96,13 @@ SAssetBundle CSTLMeshFileLoader::loadAsset(system::IFile* _file, const IAssetLoa { core::vectorSIMDf n; getNextVector(&context, n, binary); - if(_params.loaderFlags & E_LOADER_PARAMETER_FLAGS::ELPF_RIGHT_HANDED_MESHES) - performActionBasedOnOrientationSystem(n.x, [](float& varToFlip) {varToFlip = -varToFlip;}); - normals.push_back(core::normalize(n)); + if (rightHanded) + n.x = -n.x; + const float len2 = core::dot(n, n).X; + if (len2 > 0.f && std::abs(len2 - 1.f) < 1e-4f) + normals.push_back(n); + else + normals.push_back(core::normalize(n)); } if (!binary) @@ -220,8 +121,8 @@ SAssetBundle CSTLMeshFileLoader::loadAsset(system::IFile* _file, const IAssetLoa return {}; } getNextVector(&context, p[i], binary); - if (_params.loaderFlags & E_LOADER_PARAMETER_FLAGS::ELPF_RIGHT_HANDED_MESHES) - performActionBasedOnOrientationSystem(p[i].x, [](float& varToFlip){varToFlip = -varToFlip; }); + if (rightHanded) + p[i].x = -p[i].x; } for (uint32_t i = 0u; i < 3u; ++i) // seems like in STL format vertices are ordered in clockwise manner... positions.push_back(p[2u - i]); @@ -241,19 +142,6 @@ SAssetBundle CSTLMeshFileLoader::loadAsset(system::IFile* _file, const IAssetLoa context.fileOffset += sizeof(attrib); } - if (hasColor && (attrib & 0x8000u)) // assuming VisCam/SolidView non-standard trick to store color in 2 bytes of extra attribute - { - const void* srcColor[1]{ &attrib }; - uint32_t color{}; - convertColor(srcColor, &color, 0u, 0u); - colors.push_back(color); - } - else - { - hasColor = false; - colors.clear(); - } - if ((normals.back() == core::vectorSIMDf()).all()) { normals.back().set( @@ -265,44 +153,36 @@ SAssetBundle CSTLMeshFileLoader::loadAsset(system::IFile* _file, const IAssetLoa } } // end while (_file->getPos() < filesize) - const size_t vtxSize = hasColor ? (3 * sizeof(float) + 4 + 4) : (3 * sizeof(float) + 4); - auto vertexBuf = asset::ICPUBuffer::create({ vtxSize * positions.size() }); + if (positions.empty()) + return {}; - quant_normal_t normal; + core::vector posData(positions.size() * 3u); + core::vector normalData(positions.size() * 3u); for (size_t i = 0u; i < positions.size(); ++i) { - if (i % 3 == 0) - normal = quantNormalCache->quantize(normals[i / 3]); - uint8_t* ptr = (reinterpret_cast(vertexBuf->getPointer())) + i * vtxSize; - memcpy(ptr, positions[i].pointer, 3 * 4); - - *reinterpret_cast(ptr + 12) = normal; - - if (hasColor) - memcpy(ptr + 16, colors.data() + i / 3, 4); - } - - const IAssetLoader::SAssetLoadContext fakeContext(IAssetLoader::SAssetLoadParams{}, nullptr); - const asset::IAsset::E_TYPE types[]{ asset::IAsset::ET_RENDERPASS_INDEPENDENT_PIPELINE, (asset::IAsset::E_TYPE)0u }; - auto pipelineBundle = _override->findCachedAsset(getPipelineCacheKey(hasColor).data(), types, fakeContext, _hierarchyLevel + ICPURenderpassIndependentPipeline::DESC_SET_HIERARCHYLEVELS_BELOW); - { - bool status = !pipelineBundle.getContents().empty(); - assert(status); + const auto& pos = positions[i]; + const auto& nrm = normals[i / 3u]; + const size_t base = i * 3u; + posData[base + 0u] = pos.pointer[0]; + posData[base + 1u] = pos.pointer[1]; + posData[base + 2u] = pos.pointer[2]; + normalData[base + 0u] = nrm.pointer[0]; + normalData[base + 1u] = nrm.pointer[1]; + normalData[base + 2u] = nrm.pointer[2]; } - auto mbPipeline = core::smart_refctd_ptr_static_cast(pipelineBundle.getContents().begin()[0]); - - auto meta = core::make_smart_refctd_ptr(1u, std::move(m_basicViewParamsSemantics)); - meta->placeMeta(0u, mbPipeline.get()); - - meshbuffer->setPipeline(std::move(mbPipeline)); - meshbuffer->setIndexCount(positions.size()); - meshbuffer->setIndexType(asset::EIT_UNKNOWN); - - meshbuffer->setVertexBufferBinding({ 0ul, vertexBuf }, 0); - mesh->getMeshBufferVector().emplace_back(std::move(meshbuffer)); - - return SAssetBundle(std::move(meta), { std::move(mesh) }); + auto geometry = core::make_smart_refctd_ptr(); + geometry->setIndexing(IPolygonGeometryBase::TriangleList()); + auto posView = createView(EF_R32G32B32_SFLOAT, positions.size(), posData.data()); + auto normalView = createView(EF_R32G32B32_SFLOAT, positions.size(), normalData.data()); + geometry->setPositionView(std::move(posView)); + geometry->setNormalView(std::move(normalView)); + CPolygonGeometryManipulator::recomputeContentHashes(geometry.get()); + CPolygonGeometryManipulator::recomputeRanges(geometry.get()); + CPolygonGeometryManipulator::recomputeAABB(geometry.get()); + + auto meta = core::make_smart_refctd_ptr(); + return SAssetBundle(std::move(meta), { std::move(geometry) }); } bool CSTLMeshFileLoader::isALoadableFileFormat(system::IFile* _file, const system::logger_opt_ptr logger) const @@ -373,7 +253,6 @@ void CSTLMeshFileLoader::getNextVector(SContext* context, core::vectorSIMDf& vec getNextToken(context, tmp); sscanf(tmp.c_str(), "%f", &vec.Z); } - vec.X = -vec.X; } //! Read next word diff --git a/src/nbl/asset/interchange/CSTLMeshFileLoader.h b/src/nbl/asset/interchange/CSTLMeshFileLoader.h index f7020ab292..1553f29049 100644 --- a/src/nbl/asset/interchange/CSTLMeshFileLoader.h +++ b/src/nbl/asset/interchange/CSTLMeshFileLoader.h @@ -44,8 +44,6 @@ class CSTLMeshFileLoader final : public IGeometryLoader virtual void initialize() override; - const std::string_view getPipelineCacheKey(bool withColorAttribute) { return withColorAttribute ? "nbl/builtin/pipeline/loader/STL/color_attribute" : "nbl/builtin/pipeline/loader/STL/no_color_attribute"; } - // skips to the first non-space character available void goNextWord(SContext* context) const; // returns the next word diff --git a/src/nbl/asset/interchange/CSTLMeshWriter.cpp b/src/nbl/asset/interchange/CSTLMeshWriter.cpp index 45c7c1f939..5961c7dd4c 100644 --- a/src/nbl/asset/interchange/CSTLMeshWriter.cpp +++ b/src/nbl/asset/interchange/CSTLMeshWriter.cpp @@ -6,16 +6,13 @@ #include "nbl/system/IFile.h" #include "CSTLMeshWriter.h" -#include "SColor.h" + +#include using namespace nbl; using namespace nbl::asset; #ifdef _NBL_COMPILE_WITH_STL_WRITER_ -constexpr auto POSITION_ATTRIBUTE = 0; -constexpr auto COLOR_ATTRIBUTE = 1; -constexpr auto UV_ATTRIBUTE = 2; -constexpr auto NORMAL_ATTRIBUTE = 3; CSTLMeshWriter::CSTLMeshWriter() { @@ -24,29 +21,27 @@ CSTLMeshWriter::CSTLMeshWriter() #endif } - CSTLMeshWriter::~CSTLMeshWriter() { } -//! writes a mesh bool CSTLMeshWriter::writeAsset(system::IFile* _file, const SAssetWriteParams& _params, IAssetWriterOverride* _override) { - if (!_override) - getDefaultOverride(_override); + if (!_override) + getDefaultOverride(_override); - SAssetWriteContext inCtx{_params, _file}; + SAssetWriteContext inCtx{_params, _file}; - const asset::ICPUMesh* mesh = -# ifndef _NBL_DEBUG - static_cast(_params.rootAsset); -# else - dynamic_cast(_params.rootAsset); -# endif - assert(mesh); - - system::IFile* file = _override->getOutputFile(_file, inCtx, {mesh, 0u}); + const asset::ICPUPolygonGeometry* geom = +#ifndef _NBL_DEBUG + static_cast(_params.rootAsset); +#else + dynamic_cast(_params.rootAsset); +#endif + if (!geom) + return false; + system::IFile* file = _override->getOutputFile(_file, inCtx, {geom, 0u}); if (!file) return false; @@ -54,207 +49,223 @@ bool CSTLMeshWriter::writeAsset(system::IFile* _file, const SAssetWriteParams& _ _params.logger.log("WRITING STL: writing the file %s", system::ILogger::ELL_INFO, file->getFileName().string().c_str()); - const asset::E_WRITER_FLAGS flags = _override->getAssetWritingFlags(context.writeContext, mesh, 0u); + const asset::E_WRITER_FLAGS flags = _override->getAssetWritingFlags(context.writeContext, geom, 0u); if (flags & asset::EWF_BINARY) - return writeMeshBinary(mesh, &context); - else - return writeMeshASCII(mesh, &context); + return writeMeshBinary(geom, &context); + return writeMeshASCII(geom, &context); } namespace { -template -inline void writeFacesBinary(const asset::ICPUMeshBuffer* buffer, const bool& noIndices, system::IFile* file, uint32_t _colorVaid, IAssetWriter::SAssetWriteContext* context, size_t* fileOffset) +inline bool decodeTriangle(const ICPUPolygonGeometry* geom, const IPolygonGeometryBase::IIndexingCallback* indexing, const ICPUPolygonGeometry::SDataView& posView, const uint32_t primIx, core::vectorSIMDf& out0, core::vectorSIMDf& out1, core::vectorSIMDf& out2, uint32_t* outIdx) { - auto& inputParams = buffer->getPipeline()->getCachedCreationParams().vertexInput; - bool hasColor = inputParams.enabledAttribFlags & core::createBitmask({ COLOR_ATTRIBUTE }); - const asset::E_FORMAT colorType = static_cast(hasColor ? inputParams.attributes[COLOR_ATTRIBUTE].format : asset::EF_UNKNOWN); - - const uint32_t indexCount = buffer->getIndexCount(); - for (uint32_t j = 0u; j < indexCount; j += 3u) - { - I idx[3]; - for (uint32_t i = 0u; i < 3u; ++i) - { - if (noIndices) - idx[i] = j + i; - else - idx[i] = ((I*)buffer->getIndices())[j + i]; - } - - core::vectorSIMDf v[3]; - for (uint32_t i = 0u; i < 3u; ++i) - v[i] = buffer->getPosition(idx[i]); - - uint16_t color = 0u; - if (hasColor) - { - if (asset::isIntegerFormat(colorType)) - { - uint32_t res[4]; - for (uint32_t i = 0u; i < 3u; ++i) - { - uint32_t d[4]; - buffer->getAttribute(d, _colorVaid, idx[i]); - res[0] += d[0]; res[1] += d[1]; res[2] += d[2]; - } - color = video::RGB16(res[0]/3, res[1]/3, res[2]/3); - } - else - { - core::vectorSIMDf res; - for (uint32_t i = 0u; i < 3u; ++i) - { - core::vectorSIMDf d; - buffer->getAttribute(d, _colorVaid, idx[i]); - res += d; - } - res /= 3.f; - color = video::RGB16(res.X, res.Y, res.Z); - } - } - - core::vectorSIMDf normal = core::plane3dSIMDf(v[0], v[1], v[2]).getNormal(); - core::vectorSIMDf vertex1 = v[2]; - core::vectorSIMDf vertex2 = v[1]; - core::vectorSIMDf vertex3 = v[0]; - - auto flipVectors = [&]() - { - vertex1.X = -vertex1.X; - vertex2.X = -vertex2.X; - vertex3.X = -vertex3.X; - normal = core::plane3dSIMDf(vertex1, vertex2, vertex3).getNormal(); - }; + uint32_t idx[3] = {}; + const auto& indexView = geom->getIndexView(); + const void* indexBuffer = indexView ? indexView.getPointer() : nullptr; + const uint64_t indexSize = indexView ? indexView.composed.getStride() : 0u; + IPolygonGeometryBase::IIndexingCallback::SContext ctx = { + .indexBuffer = indexBuffer, + .indexSize = indexSize, + .beginPrimitive = primIx, + .endPrimitive = primIx + 1u, + .out = idx + }; + indexing->operator()(ctx); + if (outIdx) + { + outIdx[0] = idx[0]; + outIdx[1] = idx[1]; + outIdx[2] = idx[2]; + } - if (!(context->params.flags & E_WRITER_FLAGS::EWF_MESH_IS_RIGHT_HANDED)) - flipVectors(); + hlsl::float32_t3 p0 = {}; + hlsl::float32_t3 p1 = {}; + hlsl::float32_t3 p2 = {}; + if (!posView.decodeElement(idx[0], p0)) + return false; + if (!posView.decodeElement(idx[1], p1)) + return false; + if (!posView.decodeElement(idx[2], p2)) + return false; - { - system::IFile::success_t success;; - file->write(success, &normal, *fileOffset, 12); - - *fileOffset += success.getBytesProcessed(); - } + out0 = core::vectorSIMDf(p0.x, p0.y, p0.z, 1.f); + out1 = core::vectorSIMDf(p1.x, p1.y, p1.z, 1.f); + out2 = core::vectorSIMDf(p2.x, p2.y, p2.z, 1.f); + return true; +} - { - system::IFile::success_t success;; - file->write(success, &vertex1, *fileOffset, 12); - - *fileOffset += success.getBytesProcessed(); - } +inline bool decodeTriangleNormal(const ICPUPolygonGeometry::SDataView& normalView, const uint32_t* idx, core::vectorSIMDf& outNormal) +{ + if (!normalView || !idx) + return false; - { - system::IFile::success_t success;; - file->write(success, &vertex2, *fileOffset, 12); - - *fileOffset += success.getBytesProcessed(); - } + hlsl::float32_t3 n0 = {}; + hlsl::float32_t3 n1 = {}; + hlsl::float32_t3 n2 = {}; + if (!normalView.decodeElement(idx[0], n0)) + return false; + if (!normalView.decodeElement(idx[1], n1)) + return false; + if (!normalView.decodeElement(idx[2], n2)) + return false; - { - system::IFile::success_t success;; - file->write(success, &vertex3, *fileOffset, 12); - - *fileOffset += success.getBytesProcessed(); - } + auto normal = core::vectorSIMDf(n0.x, n0.y, n0.z, 0.f); + if ((normal == core::vectorSIMDf(0.f)).all()) + normal = core::vectorSIMDf(n1.x, n1.y, n1.z, 0.f); + if ((normal == core::vectorSIMDf(0.f)).all()) + normal = core::vectorSIMDf(n2.x, n2.y, n2.z, 0.f); + if ((normal == core::vectorSIMDf(0.f)).all()) + return false; - { - system::IFile::success_t success;; - file->write(success, &color, *fileOffset, 2); // saving color using non-standard VisCAM/SolidView trick - - *fileOffset += success.getBytesProcessed(); - } - } + outNormal = normal; + return true; } } -bool CSTLMeshWriter::writeMeshBinary(const asset::ICPUMesh* mesh, SContext* context) +bool CSTLMeshWriter::writeMeshBinary(const asset::ICPUPolygonGeometry* geom, SContext* context) { + if (!geom) + return false; + + const auto* indexing = geom->getIndexingCallback(); + if (!indexing || indexing->degree() != 3u) + return false; + + const auto& posView = geom->getPositionView(); + if (!posView) + return false; + const auto& normalView = geom->getNormalView(); + const bool flipHandedness = !(context->writeContext.params.flags & E_WRITER_FLAGS::EWF_MESH_IS_RIGHT_HANDED); + // write STL MESH header - const char headerTxt[] = "Irrlicht-baw Engine"; - constexpr size_t HEADER_SIZE = 80u; + const char headerTxt[] = "Irrlicht-baw Engine"; + constexpr size_t HEADER_SIZE = 80u; { system::IFile::success_t success;; context->writeContext.outputFile->write(success, headerTxt, context->fileOffset, sizeof(headerTxt)); - context->fileOffset += success.getBytesProcessed(); } - const std::string name = context->writeContext.outputFile->getFileName().filename().replace_extension().string(); // TODO: check it - const int32_t sizeleft = HEADER_SIZE - sizeof(headerTxt) - name.size(); + const std::string name = context->writeContext.outputFile->getFileName().filename().replace_extension().string(); + const int32_t sizeleft = HEADER_SIZE - sizeof(headerTxt) - static_cast(name.size()); if (sizeleft < 0) { system::IFile::success_t success;; context->writeContext.outputFile->write(success, name.c_str(), context->fileOffset, HEADER_SIZE - sizeof(headerTxt)); - context->fileOffset += success.getBytesProcessed(); } else { const char buf[80] = {0}; - { system::IFile::success_t success;; context->writeContext.outputFile->write(success, name.c_str(), context->fileOffset, name.size()); - context->fileOffset += success.getBytesProcessed(); } - { system::IFile::success_t success;; context->writeContext.outputFile->write(success, buf, context->fileOffset, sizeleft); - context->fileOffset += success.getBytesProcessed(); } } - uint32_t facenum = 0; - for (auto& mb : mesh->getMeshBuffers()) - facenum += mb->getIndexCount()/3; + const uint32_t facenum = static_cast(geom->getPrimitiveCount()); { system::IFile::success_t success;; context->writeContext.outputFile->write(success, &facenum, context->fileOffset, sizeof(facenum)); - context->fileOffset += success.getBytesProcessed(); } - // write mesh buffers - for (auto& buffer : mesh->getMeshBuffers()) - if (buffer) + for (uint32_t primIx = 0u; primIx < facenum; ++primIx) { - asset::E_INDEX_TYPE type = buffer->getIndexType(); - if (!buffer->getIndexBufferBinding().buffer) - type = asset::EIT_UNKNOWN; - - if (type== asset::EIT_16BIT) - writeFacesBinary(buffer, false, context->writeContext.outputFile, COLOR_ATTRIBUTE, &context->writeContext, &context->fileOffset); - else if (type== asset::EIT_32BIT) - writeFacesBinary(buffer, false, context->writeContext.outputFile, COLOR_ATTRIBUTE, &context->writeContext, &context->fileOffset); - else - writeFacesBinary(buffer, true, context->writeContext.outputFile, COLOR_ATTRIBUTE, &context->writeContext, &context->fileOffset); //template param doesn't matter if there's no indices + core::vectorSIMDf v0; + core::vectorSIMDf v1; + core::vectorSIMDf v2; + uint32_t idx[3] = {}; + if (!decodeTriangle(geom, indexing, posView, primIx, v0, v1, v2, idx)) + return false; + + core::vectorSIMDf vertex1 = v2; + core::vectorSIMDf vertex2 = v1; + core::vectorSIMDf vertex3 = v0; + + if (flipHandedness) + { + vertex1.X = -vertex1.X; + vertex2.X = -vertex2.X; + vertex3.X = -vertex3.X; + } + + core::vectorSIMDf normal = core::plane3dSIMDf(vertex1, vertex2, vertex3).getNormal(); + core::vectorSIMDf attrNormal; + if (decodeTriangleNormal(normalView, idx, attrNormal)) + { + if (flipHandedness) + attrNormal.X = -attrNormal.X; + if (core::dot(attrNormal, normal).X < 0.f) + attrNormal = -attrNormal; + normal = attrNormal; + } + + { + system::IFile::success_t success;; + context->writeContext.outputFile->write(success, &normal, context->fileOffset, 12); + context->fileOffset += success.getBytesProcessed(); + } + { + system::IFile::success_t success;; + context->writeContext.outputFile->write(success, &vertex1, context->fileOffset, 12); + context->fileOffset += success.getBytesProcessed(); + } + { + system::IFile::success_t success;; + context->writeContext.outputFile->write(success, &vertex2, context->fileOffset, 12); + context->fileOffset += success.getBytesProcessed(); + } + { + system::IFile::success_t success;; + context->writeContext.outputFile->write(success, &vertex3, context->fileOffset, 12); + context->fileOffset += success.getBytesProcessed(); + } + { + const uint16_t color = 0u; + system::IFile::success_t success;; + context->writeContext.outputFile->write(success, &color, context->fileOffset, 2); + context->fileOffset += success.getBytesProcessed(); + } } + return true; } -bool CSTLMeshWriter::writeMeshASCII(const asset::ICPUMesh* mesh, SContext* context) +bool CSTLMeshWriter::writeMeshASCII(const asset::ICPUPolygonGeometry* geom, SContext* context) { - // write STL MESH header - const char headerTxt[] = "Irrlicht-baw Engine "; + if (!geom) + return false; + + const auto* indexing = geom->getIndexingCallback(); + if (!indexing || indexing->degree() != 3u) + return false; + + const auto& posView = geom->getPositionView(); + if (!posView) + return false; + const auto& normalView = geom->getNormalView(); + const bool flipHandedness = !(context->writeContext.params.flags & E_WRITER_FLAGS::EWF_MESH_IS_RIGHT_HANDED); + + const char headerTxt[] = "Irrlicht-baw Engine "; { system::IFile::success_t success;; context->writeContext.outputFile->write(success, "solid ", context->fileOffset, 6); - context->fileOffset += success.getBytesProcessed(); } - { system::IFile::success_t success;; context->writeContext.outputFile->write(success, headerTxt, context->fileOffset, sizeof(headerTxt) - 1); - context->fileOffset += success.getBytesProcessed(); } @@ -263,70 +274,28 @@ bool CSTLMeshWriter::writeMeshASCII(const asset::ICPUMesh* mesh, SContext* conte { system::IFile::success_t success;; context->writeContext.outputFile->write(success, name.c_str(), context->fileOffset, name.size()); - context->fileOffset += success.getBytesProcessed(); } - { system::IFile::success_t success;; context->writeContext.outputFile->write(success, "\n", context->fileOffset, 1); - context->fileOffset += success.getBytesProcessed(); } - // write mesh buffers - for (auto& buffer : mesh->getMeshBuffers()) - if (buffer) + const uint32_t faceCount = static_cast(geom->getPrimitiveCount()); + for (uint32_t primIx = 0u; primIx < faceCount; ++primIx) { - asset::E_INDEX_TYPE type = buffer->getIndexType(); - if (!buffer->getIndexBufferBinding().buffer) - type = asset::EIT_UNKNOWN; - const uint32_t indexCount = buffer->getIndexCount(); - if (type==asset::EIT_16BIT) - { - //os::Printer::log("Writing mesh with 16bit indices"); - for (uint32_t j=0; jgetPosition(((uint16_t*)buffer->getIndices())[j]), - buffer->getPosition(((uint16_t*)buffer->getIndices())[j+1]), - buffer->getPosition(((uint16_t*)buffer->getIndices())[j+2]), - context - ); - } - } - else if (type==asset::EIT_32BIT) - { - //os::Printer::log("Writing mesh with 32bit indices"); - for (uint32_t j=0; jgetPosition(((uint32_t*)buffer->getIndices())[j]), - buffer->getPosition(((uint32_t*)buffer->getIndices())[j+1]), - buffer->getPosition(((uint32_t*)buffer->getIndices())[j+2]), - context - ); - } - } - else - { - //os::Printer::log("Writing mesh with no indices"); - for (uint32_t j=0; jgetPosition(j), - buffer->getPosition(j+1ul), - buffer->getPosition(j+2ul), - context - ); - } - } - + core::vectorSIMDf v0; + core::vectorSIMDf v1; + core::vectorSIMDf v2; + uint32_t idx[3] = {}; + if (!decodeTriangle(geom, indexing, posView, primIx, v0, v1, v2, idx)) + return false; + writeFaceText(v0, v1, v2, idx, normalView, flipHandedness, context); { system::IFile::success_t success;; context->writeContext.outputFile->write(success, "\n", context->fileOffset, 1); - context->fileOffset += success.getBytesProcessed(); } } @@ -334,21 +303,18 @@ bool CSTLMeshWriter::writeMeshASCII(const asset::ICPUMesh* mesh, SContext* conte { system::IFile::success_t success;; context->writeContext.outputFile->write(success, "endsolid ", context->fileOffset, 9); - context->fileOffset += success.getBytesProcessed(); } { system::IFile::success_t success;; context->writeContext.outputFile->write(success, headerTxt, context->fileOffset, sizeof(headerTxt) - 1); - context->fileOffset += success.getBytesProcessed(); } { system::IFile::success_t success;; context->writeContext.outputFile->write(success, name.c_str(), context->fileOffset, name.size()); - context->fileOffset += success.getBytesProcessed(); } @@ -357,116 +323,110 @@ bool CSTLMeshWriter::writeMeshASCII(const asset::ICPUMesh* mesh, SContext* conte void CSTLMeshWriter::getVectorAsStringLine(const core::vectorSIMDf& v, std::string& s) const { - std::ostringstream tmp; - tmp << v.X << " " << v.Y << " " << v.Z << "\n"; - s = std::string(tmp.str().c_str()); + std::ostringstream tmp; + tmp << v.X << " " << v.Y << " " << v.Z << "\n"; + s = std::string(tmp.str().c_str()); } void CSTLMeshWriter::writeFaceText( const core::vectorSIMDf& v1, const core::vectorSIMDf& v2, const core::vectorSIMDf& v3, + const uint32_t* idx, + const asset::ICPUPolygonGeometry::SDataView& normalView, + const bool flipHandedness, SContext* context) { core::vectorSIMDf vertex1 = v3; core::vectorSIMDf vertex2 = v2; core::vectorSIMDf vertex3 = v1; - core::vectorSIMDf normal = core::plane3dSIMDf(vertex1, vertex2, vertex3).getNormal(); std::string tmp; - auto flipVectors = [&]() + if (flipHandedness) { vertex1.X = -vertex1.X; vertex2.X = -vertex2.X; vertex3.X = -vertex3.X; - normal = core::plane3dSIMDf(vertex1, vertex2, vertex3).getNormal(); - }; - - if (!(context->writeContext.params.flags & E_WRITER_FLAGS::EWF_MESH_IS_RIGHT_HANDED)) - flipVectors(); - + } + + core::vectorSIMDf normal = core::plane3dSIMDf(vertex1, vertex2, vertex3).getNormal(); + core::vectorSIMDf attrNormal; + if (decodeTriangleNormal(normalView, idx, attrNormal)) + { + if (flipHandedness) + attrNormal.X = -attrNormal.X; + if (core::dot(attrNormal, normal).X < 0.f) + attrNormal = -attrNormal; + normal = attrNormal; + } + { system::IFile::success_t success;; context->writeContext.outputFile->write(success, "facet normal ", context->fileOffset, 13); - context->fileOffset += success.getBytesProcessed(); } getVectorAsStringLine(normal, tmp); - { system::IFile::success_t success;; context->writeContext.outputFile->write(success, tmp.c_str(), context->fileOffset, tmp.size()); - context->fileOffset += success.getBytesProcessed(); } { system::IFile::success_t success;; context->writeContext.outputFile->write(success, " outer loop\n", context->fileOffset, 13); - context->fileOffset += success.getBytesProcessed(); } { system::IFile::success_t success;; context->writeContext.outputFile->write(success, " vertex ", context->fileOffset, 11); - context->fileOffset += success.getBytesProcessed(); } getVectorAsStringLine(vertex1, tmp); - { system::IFile::success_t success;; context->writeContext.outputFile->write(success, tmp.c_str(), context->fileOffset, tmp.size()); - context->fileOffset += success.getBytesProcessed(); } { system::IFile::success_t success;; context->writeContext.outputFile->write(success, " vertex ", context->fileOffset, 11); - context->fileOffset += success.getBytesProcessed(); } getVectorAsStringLine(vertex2, tmp); - { system::IFile::success_t success;; context->writeContext.outputFile->write(success, tmp.c_str(), context->fileOffset, tmp.size()); - context->fileOffset += success.getBytesProcessed(); } { system::IFile::success_t success;; context->writeContext.outputFile->write(success, " vertex ", context->fileOffset, 11); - context->fileOffset += success.getBytesProcessed(); } getVectorAsStringLine(vertex3, tmp); - { system::IFile::success_t success;; context->writeContext.outputFile->write(success, tmp.c_str(), context->fileOffset, tmp.size()); - context->fileOffset += success.getBytesProcessed(); } { system::IFile::success_t success;; context->writeContext.outputFile->write(success, " endloop\n", context->fileOffset, 10); - context->fileOffset += success.getBytesProcessed(); } { system::IFile::success_t success;; context->writeContext.outputFile->write(success, "endfacet\n", context->fileOffset, 9); - context->fileOffset += success.getBytesProcessed(); } } diff --git a/src/nbl/asset/interchange/CSTLMeshWriter.h b/src/nbl/asset/interchange/CSTLMeshWriter.h index a25a84534c..a37c6129a8 100644 --- a/src/nbl/asset/interchange/CSTLMeshWriter.h +++ b/src/nbl/asset/interchange/CSTLMeshWriter.h @@ -52,7 +52,7 @@ class CSTLMeshWriter : public IGeometryWriter void getVectorAsStringLine(const core::vectorSIMDf& v, std::string& s) const; // write face information to file - void writeFaceText(const core::vectorSIMDf& v1, const core::vectorSIMDf& v2, const core::vectorSIMDf& v3, SContext* context); + void writeFaceText(const core::vectorSIMDf& v1, const core::vectorSIMDf& v2, const core::vectorSIMDf& v3, const uint32_t* idx, const asset::ICPUPolygonGeometry::SDataView& normalView, const bool flipHandedness, SContext* context); }; } // end namespace diff --git a/src/nbl/asset/interchange/IGeometryWriter.cpp b/src/nbl/asset/interchange/IGeometryWriter.cpp new file mode 100644 index 0000000000..10b55728e6 --- /dev/null +++ b/src/nbl/asset/interchange/IGeometryWriter.cpp @@ -0,0 +1,12 @@ +// Copyright (C) 2025-2025 - DevSH Graphics Programming Sp. z O.O. +// This file is part of the "Nabla Engine". +// For conditions of distribution and use, see copyright notice in nabla.h + +#include "nbl/asset/interchange/IGeometryWriter.h" + +namespace nbl::asset +{ + +IGeometryWriter::~IGeometryWriter() = default; + +} diff --git a/src/nbl/asset/pch_asset.h b/src/nbl/asset/pch_asset.h index 361df786f1..8ee0d9ca7d 100644 --- a/src/nbl/asset/pch_asset.h +++ b/src/nbl/asset/pch_asset.h @@ -37,6 +37,7 @@ #include "nbl/asset/interchange/CPLYMeshFileLoader.h" #include "nbl/asset/interchange/CSTLMeshFileLoader.h" // writers +#include "nbl/asset/interchange/COBJMeshWriter.h" #include "nbl/asset/interchange/CPLYMeshWriter.h" #include "nbl/asset/interchange/CSTLMeshWriter.h" // manipulation From 100bc717ad1d843056a947dd42b7942afe2a5257 Mon Sep 17 00:00:00 2001 From: Arkadiusz Lachowicz Date: Thu, 5 Feb 2026 21:10:37 +0100 Subject: [PATCH 002/118] Update OBJ STL loaders for flag removal --- examples_tests | 2 +- .../asset/interchange/COBJMeshFileLoader.cpp | 21 +++---------------- .../asset/interchange/CSTLMeshFileLoader.cpp | 6 ------ 3 files changed, 4 insertions(+), 25 deletions(-) diff --git a/examples_tests b/examples_tests index 7130e19108..97b15e2ef5 160000 --- a/examples_tests +++ b/examples_tests @@ -1 +1 @@ -Subproject commit 7130e19108ead0700d05ef65cfcf9c971303c916 +Subproject commit 97b15e2ef54ba107899db69238551f1543801175 diff --git a/src/nbl/asset/interchange/COBJMeshFileLoader.cpp b/src/nbl/asset/interchange/COBJMeshFileLoader.cpp index 5277b6911a..e254af3f69 100644 --- a/src/nbl/asset/interchange/COBJMeshFileLoader.cpp +++ b/src/nbl/asset/interchange/COBJMeshFileLoader.cpp @@ -85,8 +85,6 @@ asset::SAssetBundle COBJMeshFileLoader::loadAsset(system::IFile* _file, const as const char* const bufEnd = buf + filesize; const char* bufPtr = buf; - bool rightHanded = (_params.loaderFlags & E_LOADER_PARAMETER_FLAGS::ELPF_RIGHT_HANDED_MESHES) != 0; - core::vector positions; core::vector normals; core::vector uvs; @@ -114,8 +112,6 @@ asset::SAssetBundle COBJMeshFileLoader::loadAsset(system::IFile* _file, const as { Float3 vec{}; bufPtr = readVec3(bufPtr, &vec.x, bufEnd); - if (rightHanded) - vec.x = -vec.x; positions.push_back(vec); } break; @@ -123,8 +119,6 @@ asset::SAssetBundle COBJMeshFileLoader::loadAsset(system::IFile* _file, const as { Float3 vec{}; bufPtr = readVec3(bufPtr, &vec.x, bufEnd); - if (rightHanded) - vec.x = -vec.x; normals.push_back(vec); } break; @@ -199,18 +193,9 @@ asset::SAssetBundle COBJMeshFileLoader::loadAsset(system::IFile* _file, const as for (uint32_t i = 1u; i + 1u < faceCorners.size(); ++i) { - if (rightHanded) - { - indices.push_back(faceCorners[0]); - indices.push_back(faceCorners[i]); - indices.push_back(faceCorners[i + 1]); - } - else - { - indices.push_back(faceCorners[i + 1]); - indices.push_back(faceCorners[i]); - indices.push_back(faceCorners[0]); - } + indices.push_back(faceCorners[i + 1]); + indices.push_back(faceCorners[i]); + indices.push_back(faceCorners[0]); } } break; diff --git a/src/nbl/asset/interchange/CSTLMeshFileLoader.cpp b/src/nbl/asset/interchange/CSTLMeshFileLoader.cpp index cfdf98ec97..951b98ba73 100644 --- a/src/nbl/asset/interchange/CSTLMeshFileLoader.cpp +++ b/src/nbl/asset/interchange/CSTLMeshFileLoader.cpp @@ -50,8 +50,6 @@ SAssetBundle CSTLMeshFileLoader::loadAsset(system::IFile* _file, const IAssetLoa std::string token; if (getNextToken(&context, token) != "solid") binary = true; - const bool rightHanded = (_params.loaderFlags & E_LOADER_PARAMETER_FLAGS::ELPF_RIGHT_HANDED_MESHES) != 0; - core::vector positions, normals; if (binary) { @@ -96,8 +94,6 @@ SAssetBundle CSTLMeshFileLoader::loadAsset(system::IFile* _file, const IAssetLoa { core::vectorSIMDf n; getNextVector(&context, n, binary); - if (rightHanded) - n.x = -n.x; const float len2 = core::dot(n, n).X; if (len2 > 0.f && std::abs(len2 - 1.f) < 1e-4f) normals.push_back(n); @@ -121,8 +117,6 @@ SAssetBundle CSTLMeshFileLoader::loadAsset(system::IFile* _file, const IAssetLoa return {}; } getNextVector(&context, p[i], binary); - if (rightHanded) - p[i].x = -p[i].x; } for (uint32_t i = 0u; i < 3u; ++i) // seems like in STL format vertices are ordered in clockwise manner... positions.push_back(p[2u - i]); From 89a2d5ffb3e8634e8ced7e67cc165fb0ae35e90c Mon Sep 17 00:00:00 2001 From: Arkadiusz Lachowicz Date: Sat, 7 Feb 2026 17:06:38 +0100 Subject: [PATCH 003/118] Add file IO policy and optimize mesh loaders writers --- examples_tests | 2 +- .../nbl/asset/interchange/COBJMeshWriter.h | 13 +- include/nbl/asset/interchange/IAssetLoader.h | 9 +- include/nbl/asset/interchange/IAssetWriter.h | 8 +- include/nbl/asset/interchange/SFileIOPolicy.h | 134 ++++ .../asset/utils/CPolygonGeometryManipulator.h | 2 + .../asset/interchange/COBJMeshFileLoader.cpp | 602 +++++++++------- .../asset/interchange/COBJMeshFileLoader.h | 60 +- src/nbl/asset/interchange/COBJMeshWriter.cpp | 304 +++++++-- .../asset/interchange/CPLYMeshFileLoader.cpp | 388 ++++++++++- .../asset/interchange/CPLYMeshFileLoader.h | 11 +- src/nbl/asset/interchange/CPLYMeshWriter.cpp | 354 ++++++++-- src/nbl/asset/interchange/CPLYMeshWriter.h | 50 +- .../asset/interchange/CSTLMeshFileLoader.cpp | 642 ++++++++++++------ .../asset/interchange/CSTLMeshFileLoader.h | 34 +- src/nbl/asset/interchange/CSTLMeshWriter.cpp | 390 ++++++----- src/nbl/asset/interchange/CSTLMeshWriter.h | 38 +- .../utils/CPolygonGeometryManipulator.cpp | 86 +++ 18 files changed, 2159 insertions(+), 968 deletions(-) create mode 100644 include/nbl/asset/interchange/SFileIOPolicy.h diff --git a/examples_tests b/examples_tests index 97b15e2ef5..99454acc4f 160000 --- a/examples_tests +++ b/examples_tests @@ -1 +1 @@ -Subproject commit 97b15e2ef54ba107899db69238551f1543801175 +Subproject commit 99454acc4f7dd20cd45b1cad256a94efacdf5b93 diff --git a/include/nbl/asset/interchange/COBJMeshWriter.h b/include/nbl/asset/interchange/COBJMeshWriter.h index abcfa4169d..0f1cc3c2e6 100644 --- a/include/nbl/asset/interchange/COBJMeshWriter.h +++ b/include/nbl/asset/interchange/COBJMeshWriter.h @@ -5,7 +5,6 @@ #define _NBL_ASSET_OBJ_MESH_WRITER_H_INCLUDED_ -#include "nbl/asset/ICPUPolygonGeometry.h" #include "nbl/asset/interchange/IGeometryWriter.h" @@ -18,17 +17,13 @@ class COBJMeshWriter : public IGeometryWriter public: COBJMeshWriter(); - virtual const char** getAssociatedFileExtensions() const - { - static const char* ext[]{ "obj", nullptr }; - return ext; - } + const char** getAssociatedFileExtensions() const override; - virtual uint32_t getSupportedFlags() override { return 0u; } + uint32_t getSupportedFlags() override; - virtual uint32_t getForcedFlags() { return 0u; } + uint32_t getForcedFlags() override; - virtual bool writeAsset(system::IFile* _file, const SAssetWriteParams& _params, IAssetWriterOverride* _override = nullptr) override; + bool writeAsset(system::IFile* _file, const SAssetWriteParams& _params, IAssetWriterOverride* _override = nullptr) override; }; } // end namespace diff --git a/include/nbl/asset/interchange/IAssetLoader.h b/include/nbl/asset/interchange/IAssetLoader.h index 09f842e659..ac4ac25782 100644 --- a/include/nbl/asset/interchange/IAssetLoader.h +++ b/include/nbl/asset/interchange/IAssetLoader.h @@ -11,6 +11,7 @@ #include "nbl/system/ILogger.h" #include "nbl/asset/interchange/SAssetBundle.h" +#include "nbl/asset/interchange/SFileIOPolicy.h" namespace nbl::asset @@ -97,10 +98,10 @@ class NBL_API2 IAssetLoader : public virtual core::IReferenceCounted { inline SAssetLoadParams(const size_t _decryptionKeyLen = 0u, const uint8_t* const _decryptionKey = nullptr, const E_CACHING_FLAGS _cacheFlags = ECF_CACHE_EVERYTHING,const E_LOADER_PARAMETER_FLAGS _loaderFlags = ELPF_NONE, - const system::logger_opt_ptr _logger = nullptr, const std::filesystem::path& cwd = "") : + const system::logger_opt_ptr _logger = nullptr, const std::filesystem::path& cwd = "", const SFileIOPolicy& _ioPolicy = {}) : decryptionKeyLen(_decryptionKeyLen), decryptionKey(_decryptionKey), cacheFlags(_cacheFlags), loaderFlags(_loaderFlags), - logger(std::move(_logger)), workingDirectory(cwd) + logger(std::move(_logger)), workingDirectory(cwd), ioPolicy(_ioPolicy) { } @@ -110,7 +111,8 @@ class NBL_API2 IAssetLoader : public virtual core::IReferenceCounted cacheFlags(rhs.cacheFlags), loaderFlags(rhs.loaderFlags), logger(rhs.logger), - workingDirectory(rhs.workingDirectory) + workingDirectory(rhs.workingDirectory), + ioPolicy(rhs.ioPolicy) { } @@ -120,6 +122,7 @@ class NBL_API2 IAssetLoader : public virtual core::IReferenceCounted E_LOADER_PARAMETER_FLAGS loaderFlags; //!< Flags having an impact on extraordinary tasks during loading process std::filesystem::path workingDirectory = ""; system::logger_opt_ptr logger; + SFileIOPolicy ioPolicy = {}; }; //! Struct for keeping the state of the current loadoperation for safe threading diff --git a/include/nbl/asset/interchange/IAssetWriter.h b/include/nbl/asset/interchange/IAssetWriter.h index 694053df5e..46055c08a1 100644 --- a/include/nbl/asset/interchange/IAssetWriter.h +++ b/include/nbl/asset/interchange/IAssetWriter.h @@ -9,6 +9,7 @@ #include "nbl/system/ILogger.h" #include "nbl/asset/IAsset.h" +#include "nbl/asset/interchange/SFileIOPolicy.h" namespace nbl::asset @@ -85,10 +86,10 @@ class IAssetWriter : public virtual core::IReferenceCounted */ struct SAssetWriteParams { - SAssetWriteParams(IAsset* _asset, const E_WRITER_FLAGS& _flags = EWF_NONE, const float& _compressionLevel = 0.f, const size_t& _encryptionKeyLen = 0, const uint8_t* _encryptionKey = nullptr, const void* _userData = nullptr, const system::logger_opt_ptr _logger = nullptr, system::path cwd = "") : + SAssetWriteParams(IAsset* _asset, const E_WRITER_FLAGS& _flags = EWF_NONE, const float& _compressionLevel = 0.f, const size_t& _encryptionKeyLen = 0, const uint8_t* _encryptionKey = nullptr, const void* _userData = nullptr, const system::logger_opt_ptr _logger = nullptr, system::path cwd = "", const SFileIOPolicy& _ioPolicy = {}) : rootAsset(_asset), flags(_flags), compressionLevel(_compressionLevel), encryptionKeyLen(_encryptionKeyLen), encryptionKey(_encryptionKey), - userData(_userData), logger(_logger), workingDirectory(cwd) + userData(_userData), logger(_logger), workingDirectory(cwd), ioPolicy(_ioPolicy) { } @@ -100,6 +101,7 @@ class IAssetWriter : public virtual core::IReferenceCounted const void* userData; //!< Stores writer-dependets parameters. It is usually a struct provided by a writer author. system::logger_opt_ptr logger; system::path workingDirectory; + SFileIOPolicy ioPolicy = {}; }; //! Struct for keeping the state of the current write operation for safe threading @@ -192,4 +194,4 @@ class IAssetWriter : public virtual core::IReferenceCounted }; } //nbl::asset -#endif \ No newline at end of file +#endif diff --git a/include/nbl/asset/interchange/SFileIOPolicy.h b/include/nbl/asset/interchange/SFileIOPolicy.h new file mode 100644 index 0000000000..50e9acd0d2 --- /dev/null +++ b/include/nbl/asset/interchange/SFileIOPolicy.h @@ -0,0 +1,134 @@ +// Copyright (C) 2025-2025 - DevSH Graphics Programming Sp. z O.O. +// This file is part of the "Nabla Engine". +// For conditions of distribution and use, see copyright notice in nabla.h +#ifndef _NBL_ASSET_S_FILE_IO_POLICY_H_INCLUDED_ +#define _NBL_ASSET_S_FILE_IO_POLICY_H_INCLUDED_ + + +#include +#include + + +namespace nbl::asset +{ + +struct SFileIOPolicy +{ + enum class Strategy : uint8_t + { + Auto, + WholeFile, + Chunked + }; + + Strategy strategy = Strategy::Auto; + bool strict = false; + uint64_t wholeFileThresholdBytes = 64ull * 1024ull * 1024ull; + uint64_t chunkSizeBytes = 4ull * 1024ull * 1024ull; + uint64_t maxStagingBytes = 256ull * 1024ull * 1024ull; +}; + +struct SResolvedFileIOPolicy +{ + enum class Strategy : uint8_t + { + WholeFile, + Chunked + }; + + Strategy strategy = Strategy::Chunked; + uint64_t chunkSizeBytes = 0ull; + bool valid = true; + const char* reason = "ok"; +}; + +inline SResolvedFileIOPolicy resolveFileIOPolicy(const SFileIOPolicy& _policy, const uint64_t byteCount, const bool sizeKnown = true) +{ + constexpr uint64_t MIN_CHUNK_SIZE = 64ull * 1024ull; + + const uint64_t maxStaging = std::max(_policy.maxStagingBytes, MIN_CHUNK_SIZE); + const uint64_t requestedChunk = std::max(_policy.chunkSizeBytes, MIN_CHUNK_SIZE); + const uint64_t chunkSize = std::min(requestedChunk, maxStaging); + + auto makeChunked = [&](const char* reason) -> SResolvedFileIOPolicy + { + return SResolvedFileIOPolicy{ + .strategy = SResolvedFileIOPolicy::Strategy::Chunked, + .chunkSizeBytes = chunkSize, + .valid = true, + .reason = reason + }; + }; + auto makeWhole = [&](const char* reason) -> SResolvedFileIOPolicy + { + return SResolvedFileIOPolicy{ + .strategy = SResolvedFileIOPolicy::Strategy::WholeFile, + .chunkSizeBytes = chunkSize, + .valid = true, + .reason = reason + }; + }; + + switch (_policy.strategy) + { + case SFileIOPolicy::Strategy::WholeFile: + { + if (sizeKnown && byteCount <= maxStaging) + return makeWhole("requested_whole_file"); + if (_policy.strict) + { + return SResolvedFileIOPolicy{ + .strategy = SResolvedFileIOPolicy::Strategy::WholeFile, + .chunkSizeBytes = chunkSize, + .valid = false, + .reason = "whole_file_not_feasible_strict" + }; + } + return makeChunked(sizeKnown ? "whole_file_not_feasible_fallback_chunked" : "whole_file_unknown_size_fallback_chunked"); + } + case SFileIOPolicy::Strategy::Chunked: + return makeChunked("requested_chunked"); + case SFileIOPolicy::Strategy::Auto: + default: + { + if (!sizeKnown) + return makeChunked("auto_unknown_size"); + const uint64_t wholeThreshold = std::min(_policy.wholeFileThresholdBytes, maxStaging); + if (byteCount <= wholeThreshold) + return makeWhole("auto_small_enough_for_whole_file"); + return makeChunked("auto_too_large_for_whole_file"); + } + } +} + +inline const char* toString(const SFileIOPolicy::Strategy strategy) +{ + switch (strategy) + { + case SFileIOPolicy::Strategy::Auto: + return "auto"; + case SFileIOPolicy::Strategy::WholeFile: + return "whole"; + case SFileIOPolicy::Strategy::Chunked: + return "chunked"; + default: + return "unknown"; + } +} + +inline const char* toString(const SResolvedFileIOPolicy::Strategy strategy) +{ + switch (strategy) + { + case SResolvedFileIOPolicy::Strategy::WholeFile: + return "whole"; + case SResolvedFileIOPolicy::Strategy::Chunked: + return "chunked"; + default: + return "unknown"; + } +} + +} + +#endif diff --git a/include/nbl/asset/utils/CPolygonGeometryManipulator.h b/include/nbl/asset/utils/CPolygonGeometryManipulator.h index 4a31bd6a95..115a9a8720 100644 --- a/include/nbl/asset/utils/CPolygonGeometryManipulator.h +++ b/include/nbl/asset/utils/CPolygonGeometryManipulator.h @@ -6,6 +6,7 @@ #include "nbl/core/declarations.h" +#include "nbl/core/hash/blake.h" #include "nbl/asset/ICPUPolygonGeometry.h" #include "nbl/asset/utils/CGeometryManipulator.h" @@ -20,6 +21,7 @@ namespace nbl::asset class NBL_API2 CPolygonGeometryManipulator { public: + static core::blake3_hash_t computeDeterministicContentHash(const ICPUPolygonGeometry* geo); static inline void recomputeContentHashes(ICPUPolygonGeometry* geo) { diff --git a/src/nbl/asset/interchange/COBJMeshFileLoader.cpp b/src/nbl/asset/interchange/COBJMeshFileLoader.cpp index e254af3f69..6d3da1e1ba 100644 --- a/src/nbl/asset/interchange/COBJMeshFileLoader.cpp +++ b/src/nbl/asset/interchange/COBJMeshFileLoader.cpp @@ -10,17 +10,23 @@ #ifdef _NBL_COMPILE_WITH_OBJ_LOADER_ -#include "nbl/system/ISystem.h" #include "nbl/system/IFile.h" #include "COBJMeshFileLoader.h" -#include +#include +#include +#include +#include +#include +#include +#include namespace nbl::asset { -static const uint32_t WORD_BUFFER_LENGTH = 512u; +namespace +{ struct ObjVertexKey { @@ -28,15 +34,20 @@ struct ObjVertexKey int32_t uv; int32_t normal; - inline bool operator<(const ObjVertexKey& other) const + inline bool operator==(const ObjVertexKey& other) const { - if (pos == other.pos) - { - if (uv == other.uv) - return normal < other.normal; - return uv < other.uv; - } - return pos < other.pos; + return pos == other.pos && uv == other.uv && normal == other.normal; + } +}; + +struct ObjVertexKeyHash +{ + inline size_t operator()(const ObjVertexKey& key) const noexcept + { + size_t h = static_cast(static_cast(key.pos)); + h ^= static_cast(static_cast(key.uv)) + 0x9e3779b9 + (h << 6) + (h >> 2); + h ^= static_cast(static_cast(key.normal)) + 0x9e3779b9 + (h << 6) + (h >> 2); + return h; } }; @@ -53,36 +64,242 @@ struct Float2 float y; }; -static_assert(sizeof(Float3) == 12); -static_assert(sizeof(Float2) == 8); +static_assert(sizeof(Float3) == sizeof(float) * 3ull); +static_assert(sizeof(Float2) == sizeof(float) * 2ull); + +bool readTextFileWithPolicy(system::IFile* file, char* dst, size_t byteCount, const SResolvedFileIOPolicy& ioPlan, double& ioMs) +{ + if (!file || !dst) + return false; + + using clock_t = std::chrono::high_resolution_clock; + const auto ioStart = clock_t::now(); + size_t bytesRead = 0ull; + switch (ioPlan.strategy) + { + case SResolvedFileIOPolicy::Strategy::WholeFile: + { + system::IFile::success_t success; + file->read(success, dst, 0ull, byteCount); + if (!success || success.getBytesProcessed() != byteCount) + return false; + bytesRead = byteCount; + break; + } + case SResolvedFileIOPolicy::Strategy::Chunked: + default: + { + while (bytesRead < byteCount) + { + const size_t toRead = static_cast(std::min(ioPlan.chunkSizeBytes, byteCount - bytesRead)); + system::IFile::success_t success; + file->read(success, dst + bytesRead, bytesRead, toRead); + if (!success) + return false; + const size_t processed = success.getBytesProcessed(); + if (processed == 0ull) + return false; + bytesRead += processed; + } + break; + } + } + ioMs = std::chrono::duration(clock_t::now() - ioStart).count(); + return bytesRead == byteCount; +} + +const char* goFirstWord(const char* buf, const char* const bufEnd, bool acrossNewlines = true) +{ + if (acrossNewlines) + while ((buf != bufEnd) && core::isspace(*buf)) + ++buf; + else + while ((buf != bufEnd) && core::isspace(*buf) && (*buf != '\n')) + ++buf; + + return buf; +} + +const char* goNextWord(const char* buf, const char* const bufEnd, bool acrossNewlines = true) +{ + while ((buf != bufEnd) && !core::isspace(*buf)) + ++buf; + + return goFirstWord(buf, bufEnd, acrossNewlines); +} + +const char* goNextLine(const char* buf, const char* const bufEnd) +{ + while (buf != bufEnd) + { + if (*buf == '\n' || *buf == '\r') + break; + ++buf; + } + return goFirstWord(buf, bufEnd); +} + +const char* readVec3(const char* bufPtr, float vec[3], const char* const bufEnd) +{ + bufPtr = goNextWord(bufPtr, bufEnd, false); + for (uint32_t i = 0u; i < 3u; ++i) + { + if (bufPtr >= bufEnd) + return bufPtr; + + char* endPtr = nullptr; + vec[i] = std::strtof(bufPtr, &endPtr); + if (endPtr == bufPtr) + return bufPtr; + bufPtr = endPtr; + + while (bufPtr < bufEnd && core::isspace(*bufPtr) && *bufPtr != '\n' && *bufPtr != '\r') + ++bufPtr; + } + + return bufPtr; +} + +const char* readUV(const char* bufPtr, float vec[2], const char* const bufEnd) +{ + bufPtr = goNextWord(bufPtr, bufEnd, false); + for (uint32_t i = 0u; i < 2u; ++i) + { + if (bufPtr >= bufEnd) + return bufPtr; + + char* endPtr = nullptr; + vec[i] = std::strtof(bufPtr, &endPtr); + if (endPtr == bufPtr) + return bufPtr; + bufPtr = endPtr; + + while (bufPtr < bufEnd && core::isspace(*bufPtr) && *bufPtr != '\n' && *bufPtr != '\r') + ++bufPtr; + } + + vec[1] = 1.f - vec[1]; + return bufPtr; +} + +bool retrieveVertexIndices(const char* tokenBegin, const char* tokenEnd, int32_t* idx, uint32_t vbsize, uint32_t vtsize, uint32_t vnsize) +{ + if (!tokenBegin || !idx) + return false; + + idx[0] = -1; + idx[1] = -1; + idx[2] = -1; + + const char* p = tokenBegin; + for (uint32_t idxType = 0u; idxType < 3u && p < tokenEnd; ++idxType) + { + if (*p == '/') + { + ++p; + continue; + } + + char* endNum = nullptr; + const long parsed = std::strtol(p, &endNum, 10); + if (endNum == p) + return false; + + int32_t value = static_cast(parsed); + if (value < 0) + { + switch (idxType) + { + case 0: + value += static_cast(vbsize); + break; + case 1: + value += static_cast(vtsize); + break; + case 2: + value += static_cast(vnsize); + break; + default: + break; + } + } + else + { + value -= 1; + } + idx[idxType] = value; + + p = endNum; + if (p >= tokenEnd) + break; + + if (*p != '/') + break; + ++p; + } + + return true; +} -COBJMeshFileLoader::COBJMeshFileLoader(IAssetManager* _manager) : AssetManager(_manager), System(_manager->getSystem()) +} + +COBJMeshFileLoader::COBJMeshFileLoader(IAssetManager* _manager) +{ + (void)_manager; +} + +COBJMeshFileLoader::~COBJMeshFileLoader() = default; + +bool COBJMeshFileLoader::isALoadableFileFormat(system::IFile* _file, const system::logger_opt_ptr logger) const { + (void)logger; + if (!_file) + return false; + system::IFile::success_t succ; + char firstChar = 0; + _file->read(succ, &firstChar, 0ull, sizeof(firstChar)); + return succ && (firstChar == '#' || firstChar == 'v'); } -COBJMeshFileLoader::~COBJMeshFileLoader() +const char** COBJMeshFileLoader::getAssociatedFileExtensions() const { + static const char* ext[] = { "obj", nullptr }; + return ext; } asset::SAssetBundle COBJMeshFileLoader::loadAsset(system::IFile* _file, const asset::IAssetLoader::SAssetLoadParams& _params, asset::IAssetLoader::IAssetLoaderOverride* _override, uint32_t _hierarchyLevel) { + (void)_override; + (void)_hierarchyLevel; + if (!_file) return {}; + using clock_t = std::chrono::high_resolution_clock; + const auto totalStart = clock_t::now(); + double ioMs = 0.0; + double parseMs = 0.0; + double buildMs = 0.0; + double aabbMs = 0.0; + uint64_t faceCount = 0u; + const long filesize = _file->getSize(); if (filesize <= 0) return {}; + const auto ioPlan = resolveFileIOPolicy(_params.ioPolicy, static_cast(filesize), true); + if (!ioPlan.valid) + { + _params.logger.log("OBJ loader: invalid io policy for %s reason=%s", system::ILogger::ELL_ERROR, _file->getFileName().string().c_str(), ioPlan.reason); + return {}; + } std::string fileContents; - fileContents.resize(filesize); - - system::IFile::success_t success; - _file->read(success, fileContents.data(), 0, filesize); - if (!success) + fileContents.resize(static_cast(filesize)); + if (!readTextFileWithPolicy(_file, fileContents.data(), fileContents.size(), ioPlan, ioMs)) return {}; const char* const buf = fileContents.data(); - const char* const bufEnd = buf + filesize; + const char* const bufEnd = buf + static_cast(filesize); const char* bufPtr = buf; core::vector positions; @@ -94,63 +311,68 @@ asset::SAssetBundle COBJMeshFileLoader::loadAsset(system::IFile* _file, const as core::vector outUVs; core::vector indices; - core::map vtxMap; + std::unordered_map vtxMap; bool hasNormals = false; bool hasUVs = false; + hlsl::shapes::AABB<3, hlsl::float32_t> parsedAABB = hlsl::shapes::AABB<3, hlsl::float32_t>::create(); + bool hasParsedAABB = false; - char tmpbuf[WORD_BUFFER_LENGTH]{}; - + const auto parseStart = clock_t::now(); while (bufPtr != bufEnd) { switch (bufPtr[0]) { - case 'v': - switch (bufPtr[1]) - { - case ' ': + case 'v': + switch (bufPtr[1]) { - Float3 vec{}; - bufPtr = readVec3(bufPtr, &vec.x, bufEnd); - positions.push_back(vec); - } - break; - case 'n': - { - Float3 vec{}; - bufPtr = readVec3(bufPtr, &vec.x, bufEnd); - normals.push_back(vec); - } - break; - case 't': - { - Float2 vec{}; - bufPtr = readUV(bufPtr, &vec.x, bufEnd); - uvs.push_back(vec); + case ' ': + { + Float3 vec{}; + bufPtr = readVec3(bufPtr, &vec.x, bufEnd); + positions.push_back(vec); + } + break; + case 'n': + { + Float3 vec{}; + bufPtr = readVec3(bufPtr, &vec.x, bufEnd); + normals.push_back(vec); + } + break; + case 't': + { + Float2 vec{}; + bufPtr = readUV(bufPtr, &vec.x, bufEnd); + uvs.push_back(vec); + } + break; + default: + break; } break; - default: - break; - } - break; - case 'f': + case 'f': { if (positions.empty()) return {}; + ++faceCount; - const std::string line = copyLine(bufPtr, bufEnd); - const char* linePtr = line.c_str(); - const char* const endPtr = linePtr + line.size(); + const char* endPtr = bufPtr; + while (endPtr != bufEnd && *endPtr != '\n' && *endPtr != '\r') + ++endPtr; core::vector faceCorners; faceCorners.reserve(16ull); - linePtr = goNextWord(linePtr, endPtr); - while (0 != linePtr[0]) + const char* linePtr = goNextWord(bufPtr, endPtr); + while (linePtr < endPtr && 0 != linePtr[0]) { int32_t idx[3] = { -1, -1, -1 }; - const uint32_t wlength = copyWord(tmpbuf, linePtr, WORD_BUFFER_LENGTH, endPtr); - retrieveVertexIndices(tmpbuf, idx, tmpbuf + wlength + 1, positions.size(), uvs.size(), normals.size()); + const char* tokenEnd = linePtr; + while (tokenEnd < endPtr && !core::isspace(*tokenEnd)) + ++tokenEnd; + if (!retrieveVertexIndices(linePtr, tokenEnd, idx, positions.size(), uvs.size(), normals.size())) + return {}; if (idx[0] < 0 || static_cast(idx[0]) >= positions.size()) return {}; @@ -160,10 +382,33 @@ asset::SAssetBundle COBJMeshFileLoader::loadAsset(system::IFile* _file, const as uint32_t outIx = 0u; if (it == vtxMap.end()) { + if (outPositions.empty()) + { + outPositions.reserve(positions.size()); + outNormals.reserve(positions.size()); + outUVs.reserve(positions.size()); + } outIx = static_cast(outPositions.size()); - vtxMap.insert({ key, outIx }); + vtxMap.emplace(key, outIx); - outPositions.push_back(positions[idx[0]]); + const auto& srcPos = positions[idx[0]]; + outPositions.push_back(srcPos); + const hlsl::float32_t3 p = { srcPos.x, srcPos.y, srcPos.z }; + if (!hasParsedAABB) + { + parsedAABB.minVx = p; + parsedAABB.maxVx = p; + hasParsedAABB = true; + } + else + { + if (p.x < parsedAABB.minVx.x) parsedAABB.minVx.x = p.x; + if (p.y < parsedAABB.minVx.y) parsedAABB.minVx.y = p.y; + if (p.z < parsedAABB.minVx.z) parsedAABB.minVx.z = p.z; + if (p.x > parsedAABB.maxVx.x) parsedAABB.maxVx.x = p.x; + if (p.y > parsedAABB.maxVx.y) parsedAABB.maxVx.y = p.y; + if (p.z > parsedAABB.maxVx.z) parsedAABB.maxVx.z = p.z; + } Float2 uv = { 0.f, 0.f }; if (idx[1] >= 0 && static_cast(idx[1]) < uvs.size()) @@ -188,7 +433,9 @@ asset::SAssetBundle COBJMeshFileLoader::loadAsset(system::IFile* _file, const as faceCorners.push_back(outIx); - linePtr = goNextWord(linePtr, endPtr); + while (tokenEnd < endPtr && core::isspace(*tokenEnd)) + ++tokenEnd; + linePtr = tokenEnd; } for (uint32_t i = 1u; i + 1u < faceCorners.size(); ++i) @@ -199,16 +446,18 @@ asset::SAssetBundle COBJMeshFileLoader::loadAsset(system::IFile* _file, const as } } break; - default: - break; + default: + break; } bufPtr = goNextLine(bufPtr, bufEnd); } + parseMs = std::chrono::duration(clock_t::now() - parseStart).count(); if (outPositions.empty()) return {}; + const auto buildStart = clock_t::now(); auto geometry = core::make_smart_refctd_ptr(); geometry->setPositionView(IGeometryLoader::createView(EF_R32G32B32_SFLOAT, outPositions.size(), outPositions.data())); @@ -221,194 +470,69 @@ asset::SAssetBundle COBJMeshFileLoader::loadAsset(system::IFile* _file, const as if (!indices.empty()) { geometry->setIndexing(IPolygonGeometryBase::TriangleList()); - geometry->setIndexView(IGeometryLoader::createView(EF_R32_UINT, indices.size(), indices.data())); + const auto maxIndex = *std::max_element(indices.begin(), indices.end()); + if (maxIndex <= std::numeric_limits::max()) + { + core::vector indices16(indices.size()); + for (size_t i = 0u; i < indices.size(); ++i) + indices16[i] = static_cast(indices[i]); + geometry->setIndexView(IGeometryLoader::createView(EF_R16_UINT, indices16.size(), indices16.data())); + } + else + { + geometry->setIndexView(IGeometryLoader::createView(EF_R32_UINT, indices.size(), indices.data())); + } } else { geometry->setIndexing(IPolygonGeometryBase::PointList()); } + buildMs = std::chrono::duration(clock_t::now() - buildStart).count(); - CPolygonGeometryManipulator::recomputeContentHashes(geometry.get()); - CPolygonGeometryManipulator::recomputeRanges(geometry.get()); - CPolygonGeometryManipulator::recomputeAABB(geometry.get()); - - return SAssetBundle(core::smart_refctd_ptr(), { std::move(geometry) }); -} - -const char* COBJMeshFileLoader::readVec3(const char* bufPtr, float vec[3], const char* const bufEnd) -{ - const uint32_t WORD_BUFFER_LENGTH = 256; - char wordBuffer[WORD_BUFFER_LENGTH]; - - bufPtr = goAndCopyNextWord(wordBuffer, bufPtr, WORD_BUFFER_LENGTH, bufEnd); - sscanf(wordBuffer, "%f", vec); - bufPtr = goAndCopyNextWord(wordBuffer, bufPtr, WORD_BUFFER_LENGTH, bufEnd); - sscanf(wordBuffer, "%f", vec + 1); - bufPtr = goAndCopyNextWord(wordBuffer, bufPtr, WORD_BUFFER_LENGTH, bufEnd); - sscanf(wordBuffer, "%f", vec + 2); - - return bufPtr; -} - -const char* COBJMeshFileLoader::readUV(const char* bufPtr, float vec[2], const char* const bufEnd) -{ - const uint32_t WORD_BUFFER_LENGTH = 256; - char wordBuffer[WORD_BUFFER_LENGTH]; - - bufPtr = goAndCopyNextWord(wordBuffer, bufPtr, WORD_BUFFER_LENGTH, bufEnd); - sscanf(wordBuffer, "%f", vec); - bufPtr = goAndCopyNextWord(wordBuffer, bufPtr, WORD_BUFFER_LENGTH, bufEnd); - sscanf(wordBuffer, "%f", vec + 1); - - vec[1] = 1.f - vec[1]; - return bufPtr; -} - -const char* COBJMeshFileLoader::readBool(const char* bufPtr, bool& tf, const char* const bufEnd) -{ - const uint32_t BUFFER_LENGTH = 8; - char tfStr[BUFFER_LENGTH]; - - bufPtr = goAndCopyNextWord(tfStr, bufPtr, BUFFER_LENGTH, bufEnd); - tf = strcmp(tfStr, "off") != 0; - return bufPtr; -} - -const char* COBJMeshFileLoader::goFirstWord(const char* buf, const char* const bufEnd, bool acrossNewlines) -{ - if (acrossNewlines) - while ((buf != bufEnd) && core::isspace(*buf)) - ++buf; - else - while ((buf != bufEnd) && core::isspace(*buf) && (*buf != '\n')) - ++buf; - - return buf; -} - -const char* COBJMeshFileLoader::goNextWord(const char* buf, const char* const bufEnd, bool acrossNewlines) -{ - while ((buf != bufEnd) && !core::isspace(*buf)) - ++buf; - - return goFirstWord(buf, bufEnd, acrossNewlines); -} - -const char* COBJMeshFileLoader::goNextLine(const char* buf, const char* const bufEnd) -{ - while (buf != bufEnd) - { - if (*buf == '\n' || *buf == '\r') - break; - ++buf; - } - return goFirstWord(buf, bufEnd); -} - -uint32_t COBJMeshFileLoader::copyWord(char* outBuf, const char* const inBuf, uint32_t outBufLength, const char* const bufEnd) -{ - if (!outBufLength) - return 0; - if (!inBuf) + const auto aabbStart = clock_t::now(); + if (hasParsedAABB) { - *outBuf = 0; - return 0; - } - - uint32_t i = 0; - while (inBuf[i]) - { - if (core::isspace(inBuf[i]) || &(inBuf[i]) == bufEnd) - break; - ++i; - } - - uint32_t length = core::min(i, outBufLength - 1); - for (uint32_t j = 0; j < length; ++j) - outBuf[j] = inBuf[j]; - - outBuf[length] = 0; - return length; -} - -std::string COBJMeshFileLoader::copyLine(const char* inBuf, const char* bufEnd) -{ - if (!inBuf) - return std::string(); - - const char* ptr = inBuf; - while (ptr < bufEnd) - { - if (*ptr == '\n' || *ptr == '\r') - break; - ++ptr; + geometry->visitAABB([&parsedAABB](auto& ref)->void + { + ref = std::remove_reference_t::create(); + ref.minVx.x = parsedAABB.minVx.x; + ref.minVx.y = parsedAABB.minVx.y; + ref.minVx.z = parsedAABB.minVx.z; + ref.minVx.w = 0.0; + ref.maxVx.x = parsedAABB.maxVx.x; + ref.maxVx.y = parsedAABB.maxVx.y; + ref.maxVx.z = parsedAABB.maxVx.z; + ref.maxVx.w = 0.0; + }); } - return std::string(inBuf, (uint32_t)(ptr - inBuf + ((ptr < bufEnd) ? 1 : 0))); -} - -const char* COBJMeshFileLoader::goAndCopyNextWord(char* outBuf, const char* inBuf, uint32_t outBufLength, const char* bufEnd) -{ - inBuf = goNextWord(inBuf, bufEnd, false); - copyWord(outBuf, inBuf, outBufLength, bufEnd); - return inBuf; -} - -bool COBJMeshFileLoader::retrieveVertexIndices(char* vertexData, int32_t* idx, const char* bufEnd, uint32_t vbsize, uint32_t vtsize, uint32_t vnsize) -{ - char word[16] = ""; - const char* p = goFirstWord(vertexData, bufEnd); - uint32_t idxType = 0; - - uint32_t i = 0; - while (p != bufEnd) + else { - if ((core::isdigit(*p)) || (*p == '-')) - { - word[i++] = *p; - } - else if (*p == '/' || *p == ' ' || *p == '\0') - { - word[i] = '\0'; - sscanf(word, "%d", idx + idxType); - if (idx[idxType] < 0) - { - switch (idxType) - { - case 0: - idx[idxType] += vbsize; - break; - case 1: - idx[idxType] += vtsize; - break; - case 2: - idx[idxType] += vnsize; - break; - } - } - else - idx[idxType] -= 1; - - word[0] = '\0'; - i = 0; - - if (*p == '/') - { - if (++idxType > 2) - idxType = 0; - } - else - { - while (++idxType < 3) - idx[idxType] = -1; - ++p; - break; - } - } - - ++p; + CPolygonGeometryManipulator::recomputeAABB(geometry.get()); } + aabbMs = std::chrono::duration(clock_t::now() - aabbStart).count(); + + const auto totalMs = std::chrono::duration(clock_t::now() - totalStart).count(); + _params.logger.log( + "OBJ loader perf: file=%s total=%.3f ms io=%.3f parse=%.3f build=%.3f aabb=%.3f in(v=%llu n=%llu uv=%llu) out(v=%llu idx=%llu faces=%llu) io_req=%s io_eff=%s io_chunk=%llu io_reason=%s", + system::ILogger::ELL_PERFORMANCE, + _file->getFileName().string().c_str(), + totalMs, + ioMs, + parseMs, + buildMs, + aabbMs, + static_cast(positions.size()), + static_cast(normals.size()), + static_cast(uvs.size()), + static_cast(outPositions.size()), + static_cast(indices.size()), + static_cast(faceCount), + toString(_params.ioPolicy.strategy), + toString(ioPlan.strategy), + static_cast(ioPlan.chunkSizeBytes), + ioPlan.reason); - return true; + return SAssetBundle(core::smart_refctd_ptr(), { std::move(geometry) }); } } diff --git a/src/nbl/asset/interchange/COBJMeshFileLoader.h b/src/nbl/asset/interchange/COBJMeshFileLoader.h index 4f0a9f20bd..51b06f1fc7 100644 --- a/src/nbl/asset/interchange/COBJMeshFileLoader.h +++ b/src/nbl/asset/interchange/COBJMeshFileLoader.h @@ -6,68 +6,24 @@ #define _NBL_ASSET_C_OBJ_MESH_FILE_LOADER_H_INCLUDED_ #include "nbl/core/declarations.h" -#include "nbl/asset/ICPUPolygonGeometry.h" -#include "nbl/asset/interchange/IAssetLoader.h" +#include "nbl/asset/interchange/IGeometryLoader.h" namespace nbl::asset { //! Meshloader capable of loading obj meshes. class COBJMeshFileLoader : public IGeometryLoader { -protected: - //! destructor - virtual ~COBJMeshFileLoader(); + public: + ~COBJMeshFileLoader() override; -public: - //! Constructor - COBJMeshFileLoader(IAssetManager* _manager); + //! Constructor + explicit COBJMeshFileLoader(IAssetManager* _manager); - inline bool isALoadableFileFormat(system::IFile* _file, const system::logger_opt_ptr logger) const override - { - // OBJ doesn't really have any header but usually starts with a comment - system::IFile::success_t succ; - char firstChar = 0; - _file->read(succ, &firstChar, 0, sizeof(firstChar)); - return succ && (firstChar =='#' || firstChar =='v'); - } + bool isALoadableFileFormat(system::IFile* _file, const system::logger_opt_ptr logger) const override; - virtual const char** getAssociatedFileExtensions() const override - { - static const char* ext[]{ "obj", nullptr }; - return ext; - } + const char** getAssociatedFileExtensions() const override; - virtual asset::SAssetBundle loadAsset(system::IFile* _file, const asset::IAssetLoader::SAssetLoadParams& _params, asset::IAssetLoader::IAssetLoaderOverride* _override = nullptr, uint32_t _hierarchyLevel = 0u) override; - -private: - // returns a pointer to the first printable character available in the buffer - const char* goFirstWord(const char* buf, const char* const bufEnd, bool acrossNewlines=true); - // returns a pointer to the first printable character after the first non-printable - const char* goNextWord(const char* buf, const char* const bufEnd, bool acrossNewlines=true); - // returns a pointer to the next printable character after the first line break - const char* goNextLine(const char* buf, const char* const bufEnd); - // copies the current word from the inBuf to the outBuf - uint32_t copyWord(char* outBuf, const char* inBuf, uint32_t outBufLength, const char* const pBufEnd); - // copies the current line from the inBuf to the outBuf - std::string copyLine(const char* inBuf, const char* const bufEnd); - - // combination of goNextWord followed by copyWord - const char* goAndCopyNextWord(char* outBuf, const char* inBuf, uint32_t outBufLength, const char* const pBufEnd); - - //! Read 3d vector of floats - const char* readVec3(const char* bufPtr, float vec[3], const char* const pBufEnd); - //! Read 2d vector of floats - const char* readUV(const char* bufPtr, float vec[2], const char* const pBufEnd); - //! Read boolean value represented as 'on' or 'off' - const char* readBool(const char* bufPtr, bool& tf, const char* const bufEnd); - - // reads and convert to integer the vertex indices in a line of obj file's face statement - // -1 for the index if it doesn't exist - // indices are changed to 0-based index instead of 1-based from the obj file - bool retrieveVertexIndices(char* vertexData, int32_t* idx, const char* bufEnd, uint32_t vbsize, uint32_t vtsize, uint32_t vnsize); - - IAssetManager* AssetManager; - system::ISystem* System; + asset::SAssetBundle loadAsset(system::IFile* _file, const asset::IAssetLoader::SAssetLoadParams& _params, asset::IAssetLoader::IAssetLoaderOverride* _override = nullptr, uint32_t _hierarchyLevel = 0u) override; }; } // end namespace nbl::asset diff --git a/src/nbl/asset/interchange/COBJMeshWriter.cpp b/src/nbl/asset/interchange/COBJMeshWriter.cpp index b8da519825..e3c3f9d2a1 100644 --- a/src/nbl/asset/interchange/COBJMeshWriter.cpp +++ b/src/nbl/asset/interchange/COBJMeshWriter.cpp @@ -8,8 +8,11 @@ #include "nbl/system/IFile.h" -#include -#include +#include +#include +#include +#include +#include namespace nbl::asset { @@ -21,12 +24,75 @@ COBJMeshWriter::COBJMeshWriter() #endif } +const char** COBJMeshWriter::getAssociatedFileExtensions() const +{ + static const char* ext[] = { "obj", nullptr }; + return ext; +} + +uint32_t COBJMeshWriter::getSupportedFlags() +{ + return 0u; +} + +uint32_t COBJMeshWriter::getForcedFlags() +{ + return 0u; +} + static inline bool decodeVec4(const ICPUPolygonGeometry::SDataView& view, const size_t ix, hlsl::float64_t4& out) { out = hlsl::float64_t4(0.0, 0.0, 0.0, 0.0); return view.decodeElement(ix, out); } +static inline const hlsl::float32_t3* getTightFloat3View(const ICPUPolygonGeometry::SDataView& view) +{ + if (!view) + return nullptr; + if (view.composed.format != EF_R32G32B32_SFLOAT) + return nullptr; + if (view.composed.getStride() != sizeof(hlsl::float32_t3)) + return nullptr; + return reinterpret_cast(view.getPointer()); +} + +static inline const hlsl::float32_t2* getTightFloat2View(const ICPUPolygonGeometry::SDataView& view) +{ + if (!view) + return nullptr; + if (view.composed.format != EF_R32G32_SFLOAT) + return nullptr; + if (view.composed.getStride() != sizeof(hlsl::float32_t2)) + return nullptr; + return reinterpret_cast(view.getPointer()); +} + +static inline void appendUInt(std::string& out, const uint32_t value) +{ + std::array buf = {}; + const auto res = std::to_chars(buf.data(), buf.data() + buf.size(), value); + if (res.ec == std::errc()) + out.append(buf.data(), static_cast(res.ptr - buf.data())); +} + +static inline void appendFloatFixed6(std::string& out, double value) +{ + std::array buf = {}; + const auto res = std::to_chars(buf.data(), buf.data() + buf.size(), value, std::chars_format::fixed, 6); + if (res.ec == std::errc()) + { + out.append(buf.data(), static_cast(res.ptr - buf.data())); + return; + } + + const int written = std::snprintf(buf.data(), buf.size(), "%.6f", value); + if (written > 0) + out.append(buf.data(), static_cast(written)); +} + +static bool writeBufferWithPolicy(system::IFile* file, const SResolvedFileIOPolicy& ioPlan, const uint8_t* data, size_t byteCount); + bool COBJMeshWriter::writeAsset(system::IFile* _file, const SAssetWriteParams& _params, IAssetWriterOverride* _override) { if (!_override) @@ -91,27 +157,34 @@ bool COBJMeshWriter::writeAsset(system::IFile* _file, const SAssetWriteParams& _ if (indexCount % 3u != 0u) return false; - indexData.resize(indexCount); const void* src = indexView.getPointer(); if (!src) return false; - if (indexView.composed.format == EF_R32_UINT) + if (indexView.composed.format == EF_R32_UINT && indexView.composed.getStride() == sizeof(uint32_t)) { - memcpy(indexData.data(), src, indexCount * sizeof(uint32_t)); + indices = reinterpret_cast(src); } - else if (indexView.composed.format == EF_R16_UINT) + else if (indexView.composed.format == EF_R16_UINT && indexView.composed.getStride() == sizeof(uint16_t)) { + indexData.resize(indexCount); const uint16_t* src16 = reinterpret_cast(src); for (size_t i = 0; i < indexCount; ++i) indexData[i] = src16[i]; + indices = indexData.data(); } else { - return false; + indexData.resize(indexCount); + hlsl::vector decoded = {}; + for (size_t i = 0; i < indexCount; ++i) + { + if (!indexView.decodeElement(i, decoded)) + return false; + indexData[i] = decoded.x; + } + indices = indexData.data(); } - - indices = indexData.data(); faceCount = indexCount / 3u; } else @@ -129,53 +202,70 @@ bool COBJMeshWriter::writeAsset(system::IFile* _file, const SAssetWriteParams& _ const auto flags = _override->getAssetWritingFlags(ctx, geom, 0u); const bool flipHandedness = !(flags & E_WRITER_FLAGS::EWF_MESH_IS_RIGHT_HANDED); + std::string output; + output.reserve(vertexCount * 96ull + faceCount * 48ull); - SAssetWriteContext writeCtx = { ctx.params, file }; - size_t fileOffset = 0u; - - auto writeString = [&](const std::string& str) - { - system::IFile::success_t success; - writeCtx.outputFile->write(success, str.c_str(), fileOffset, str.size()); - fileOffset += success.getBytesProcessed(); - }; - - { - std::string header = "# Nabla OBJ\n"; - writeString(header); - } + output += "# Nabla OBJ\n"; hlsl::float64_t4 tmp = {}; + const hlsl::float32_t3* const tightPositions = getTightFloat3View(positionView); + const hlsl::float32_t3* const tightNormals = hasNormals ? getTightFloat3View(normalView) : nullptr; + const hlsl::float32_t2* const tightUV = hasUVs ? getTightFloat2View(*uvView) : nullptr; for (size_t i = 0u; i < vertexCount; ++i) { - if (!decodeVec4(positionView, i, tmp)) - return false; - - double x = tmp.x; - double y = tmp.y; - double z = tmp.z; + double x = 0.0; + double y = 0.0; + double z = 0.0; + if (tightPositions) + { + x = tightPositions[i].x; + y = tightPositions[i].y; + z = tightPositions[i].z; + } + else + { + if (!decodeVec4(positionView, i, tmp)) + return false; + x = tmp.x; + y = tmp.y; + z = tmp.z; + } if (flipHandedness) x = -x; - std::ostringstream ss; - ss << std::fixed << std::setprecision(6); - ss << "v " << x << " " << y << " " << z << "\n"; - writeString(ss.str()); + output += "v "; + appendFloatFixed6(output, x); + output += " "; + appendFloatFixed6(output, y); + output += " "; + appendFloatFixed6(output, z); + output += "\n"; } if (hasUVs) { for (size_t i = 0u; i < vertexCount; ++i) { - if (!decodeVec4(*uvView, i, tmp)) - return false; - const double u = tmp.x; - const double v = 1.0 - tmp.y; - - std::ostringstream ss; - ss << std::fixed << std::setprecision(6); - ss << "vt " << u << " " << v << "\n"; - writeString(ss.str()); + double u = 0.0; + double v = 0.0; + if (tightUV) + { + u = tightUV[i].x; + v = 1.0 - tightUV[i].y; + } + else + { + if (!decodeVec4(*uvView, i, tmp)) + return false; + u = tmp.x; + v = 1.0 - tmp.y; + } + + output += "vt "; + appendFloatFixed6(output, u); + output += " "; + appendFloatFixed6(output, v); + output += "\n"; } } @@ -183,22 +273,59 @@ bool COBJMeshWriter::writeAsset(system::IFile* _file, const SAssetWriteParams& _ { for (size_t i = 0u; i < vertexCount; ++i) { - if (!decodeVec4(normalView, i, tmp)) - return false; - - double x = tmp.x; - double y = tmp.y; - double z = tmp.z; + double x = 0.0; + double y = 0.0; + double z = 0.0; + if (tightNormals) + { + x = tightNormals[i].x; + y = tightNormals[i].y; + z = tightNormals[i].z; + } + else + { + if (!decodeVec4(normalView, i, tmp)) + return false; + x = tmp.x; + y = tmp.y; + z = tmp.z; + } if (flipHandedness) x = -x; - std::ostringstream ss; - ss << std::fixed << std::setprecision(6); - ss << "vn " << x << " " << y << " " << z << "\n"; - writeString(ss.str()); + output += "vn "; + appendFloatFixed6(output, x); + output += " "; + appendFloatFixed6(output, y); + output += " "; + appendFloatFixed6(output, z); + output += "\n"; } } + auto appendFaceIndex = [&](const uint32_t idx) + { + const uint32_t objIx = idx + 1u; + appendUInt(output, objIx); + if (hasUVs && hasNormals) + { + output += "/"; + appendUInt(output, objIx); + output += "/"; + appendUInt(output, objIx); + } + else if (hasUVs) + { + output += "/"; + appendUInt(output, objIx); + } + else if (hasNormals) + { + output += "//"; + appendUInt(output, objIx); + } + }; + for (size_t i = 0u; i < faceCount; ++i) { const uint32_t i0 = indices[i * 3u + 0u]; @@ -209,33 +336,60 @@ bool COBJMeshWriter::writeAsset(system::IFile* _file, const SAssetWriteParams& _ const uint32_t f1 = i1; const uint32_t f2 = i0; - auto emitIndex = [&](std::ostringstream& ss, const uint32_t idx) - { - const uint32_t objIx = idx + 1u; - if (hasUVs && hasNormals) - ss << objIx << "/" << objIx << "/" << objIx; - else if (hasUVs) - ss << objIx << "/" << objIx; - else if (hasNormals) - ss << objIx << "//" << objIx; - else - ss << objIx; - }; - - std::ostringstream ss; - ss << "f "; - emitIndex(ss, f0); - ss << " "; - emitIndex(ss, f1); - ss << " "; - emitIndex(ss, f2); - ss << "\n"; - writeString(ss.str()); + output += "f "; + appendFaceIndex(f0); + output += " "; + appendFaceIndex(f1); + output += " "; + appendFaceIndex(f2); + output += "\n"; + } + + const auto ioPlan = resolveFileIOPolicy(_params.ioPolicy, static_cast(output.size()), true); + if (!ioPlan.valid) + { + _params.logger.log("OBJ writer: invalid io policy for %s reason=%s", system::ILogger::ELL_ERROR, file->getFileName().string().c_str(), ioPlan.reason); + return false; } - return true; + return writeBufferWithPolicy(file, ioPlan, reinterpret_cast(output.data()), output.size()); +} + +static bool writeBufferWithPolicy(system::IFile* file, const SResolvedFileIOPolicy& ioPlan, const uint8_t* data, size_t byteCount) +{ + if (!file || (!data && byteCount != 0ull)) + return false; + + size_t fileOffset = 0ull; + switch (ioPlan.strategy) + { + case SResolvedFileIOPolicy::Strategy::WholeFile: + { + system::IFile::success_t success; + file->write(success, data, fileOffset, byteCount); + return success && success.getBytesProcessed() == byteCount; + } + case SResolvedFileIOPolicy::Strategy::Chunked: + default: + { + while (fileOffset < byteCount) + { + const size_t toWrite = static_cast(std::min(ioPlan.chunkSizeBytes, byteCount - fileOffset)); + system::IFile::success_t success; + file->write(success, data + fileOffset, fileOffset, toWrite); + if (!success) + return false; + const size_t written = success.getBytesProcessed(); + if (written == 0ull) + return false; + fileOffset += written; + } + return true; + } + } } } // namespace nbl::asset #endif // _NBL_COMPILE_WITH_OBJ_WRITER_ + diff --git a/src/nbl/asset/interchange/CPLYMeshFileLoader.cpp b/src/nbl/asset/interchange/CPLYMeshFileLoader.cpp index 4ea93aa4bc..c214b4c3b9 100644 --- a/src/nbl/asset/interchange/CPLYMeshFileLoader.cpp +++ b/src/nbl/asset/interchange/CPLYMeshFileLoader.cpp @@ -6,9 +6,14 @@ #include "CPLYMeshFileLoader.h" +#include "nbl/asset/metadata/CPLYMetadata.h" #include #include +#include +#include +#include +#include #include "nbl/asset/IAssetManager.h" @@ -21,6 +26,14 @@ namespace nbl::asset { +CPLYMeshFileLoader::CPLYMeshFileLoader() = default; + +const char** CPLYMeshFileLoader::getAssociatedFileExtensions() const +{ + static const char* ext[] = { "ply", nullptr }; + return ext; +} + bool CPLYMeshFileLoader::isALoadableFileFormat(system::IFile* _file, const system::logger_opt_ptr logger) const { char buf[40]; @@ -93,7 +106,7 @@ struct SContext int32_t count = _ctx.getInt(list.countType); for (decltype(count) i=0; i(_ioReadWindowSize, 50ull << 10); + Buffer.resize(ioReadWindowSize + 1ull, '\0'); EndPointer = StartPointer = Buffer.data(); LineEndPointer = EndPointer-1; @@ -165,7 +180,13 @@ struct SContext EndPointer = newStart+length; // read data from the file - const size_t requestSize = Buffer.size()-length; + const size_t usableBufferSize = Buffer.size() > 0ull ? Buffer.size() - 1ull : 0ull; + if (usableBufferSize <= length) + { + EndOfFile = true; + return; + } + const size_t requestSize = usableBufferSize - length; system::IFile::success_t success; inner.mainFile->read(success,EndPointer,fileOffset,requestSize); const size_t bytesRead = success.getBytesProcessed(); @@ -422,7 +443,7 @@ struct SContext it.ptr += it.stride; } } - bool readFace(const SElement& Element, core::vector& _outIndices) + bool readFace(const SElement& Element, core::vector& _outIndices, uint32_t& _maxIndex) { if (!IsBinaryFile) getNextLine(); @@ -432,20 +453,82 @@ struct SContext if (prop.isList() && (prop.Name=="vertex_indices" || prop.Name == "vertex_index")) { const uint32_t count = getInt(prop.list.countType); - //_NBL_DEBUG_BREAK_IF(count != 3) const auto srcIndexFmt = prop.list.itemType; + if (count < 3u) + { + for (uint32_t j = 0u; j < count; ++j) + getInt(srcIndexFmt); + continue; + } + if (count > 3u) + _outIndices.reserve(_outIndices.size() + static_cast(count - 2u) * 3ull); + auto emitFan = [&_outIndices, &_maxIndex](auto&& readIndex, const uint32_t faceVertexCount)->void + { + uint32_t i0 = readIndex(); + uint32_t i1 = readIndex(); + uint32_t i2 = readIndex(); + _maxIndex = std::max(_maxIndex, std::max(i0, std::max(i1, i2))); + _outIndices.push_back(i0); + _outIndices.push_back(i1); + _outIndices.push_back(i2); + uint32_t prev = i2; + for (uint32_t j = 3u; j < faceVertexCount; ++j) + { + const uint32_t idx = readIndex(); + _maxIndex = std::max(_maxIndex, idx); + _outIndices.push_back(i0); + _outIndices.push_back(prev); + _outIndices.push_back(idx); + prev = idx; + } + }; - _outIndices.push_back(getInt(srcIndexFmt)); - _outIndices.push_back(getInt(srcIndexFmt)); - _outIndices.push_back(getInt(srcIndexFmt)); - // TODO: handle varying vertex count faces via variable vertex count geometry collections (PLY loader should be a Geometry Collection loader) - for (auto j=3u; j(count) * sizeof(uint32_t); + if (StartPointer + bytesNeeded > EndPointer) + fillBuffer(); + if (StartPointer + bytesNeeded <= EndPointer) + { + const uint8_t* ptr = reinterpret_cast(StartPointer); + auto readIndex = [&ptr]() -> uint32_t + { + uint32_t v = 0u; + std::memcpy(&v, ptr, sizeof(v)); + ptr += sizeof(v); + return v; + }; + emitFan(readIndex, count); + StartPointer = reinterpret_cast(const_cast(ptr)); + continue; + } + } + else if (IsBinaryFile && !IsWrongEndian && srcIndexFmt == EF_R16_UINT) { - // this seems to be a triangle fan ? - _outIndices.push_back(_outIndices.front()); - _outIndices.push_back(_outIndices.back()); - _outIndices.push_back(getInt(srcIndexFmt)); + const size_t bytesNeeded = static_cast(count) * sizeof(uint16_t); + if (StartPointer + bytesNeeded > EndPointer) + fillBuffer(); + if (StartPointer + bytesNeeded <= EndPointer) + { + const uint8_t* ptr = reinterpret_cast(StartPointer); + auto readIndex = [&ptr]() -> uint32_t + { + uint16_t v = 0u; + std::memcpy(&v, ptr, sizeof(v)); + ptr += sizeof(v); + return static_cast(v); + }; + emitFan(readIndex, count); + StartPointer = reinterpret_cast(const_cast(ptr)); + continue; + } } + + auto readIndex = [&]() -> uint32_t + { + return static_cast(getInt(srcIndexFmt)); + }; + emitFan(readIndex, count); } else if (prop.Name == "intensity") { @@ -458,11 +541,190 @@ struct SContext return true; } + bool readFaceElementFast(const SElement& element, core::vector& _outIndices, uint32_t& _maxIndex, uint64_t& _faceCount) + { + if (!IsBinaryFile || IsWrongEndian) + return false; + if (element.Properties.size() != 1u) + return false; + + const auto& prop = element.Properties[0]; + if (!prop.isList() || (prop.Name != "vertex_indices" && prop.Name != "vertex_index")) + return false; + if (prop.list.countType != EF_R8_UINT) + return false; + + const E_FORMAT srcIndexFmt = prop.list.itemType; + if (srcIndexFmt != EF_R32_UINT && srcIndexFmt != EF_R16_UINT) + return false; + + const size_t indexSize = srcIndexFmt == EF_R32_UINT ? sizeof(uint32_t) : sizeof(uint16_t); + const size_t minTriangleRecordSize = sizeof(uint8_t) + indexSize * 3u; + const size_t minBytesNeeded = element.Count * minTriangleRecordSize; + if (StartPointer + minBytesNeeded <= EndPointer) + { + char* scan = StartPointer; + bool allTriangles = true; + for (size_t j = 0u; j < element.Count; ++j) + { + const uint8_t c = static_cast(*scan++); + if (c != 3u) + { + allTriangles = false; + break; + } + scan += indexSize * 3u; + } + + if (allTriangles) + { + const size_t oldSize = _outIndices.size(); + _outIndices.resize(oldSize + element.Count * 3u); + uint32_t* out = _outIndices.data() + oldSize; + const uint8_t* ptr = reinterpret_cast(StartPointer); + + if (srcIndexFmt == EF_R32_UINT) + { + for (size_t j = 0u; j < element.Count; ++j) + { + ++ptr; // list count + uint32_t i0 = 0u; + uint32_t i1 = 0u; + uint32_t i2 = 0u; + std::memcpy(&i0, ptr, sizeof(i0)); + ptr += sizeof(i0); + std::memcpy(&i1, ptr, sizeof(i1)); + ptr += sizeof(i1); + std::memcpy(&i2, ptr, sizeof(i2)); + ptr += sizeof(i2); + _maxIndex = std::max(_maxIndex, std::max(i0, std::max(i1, i2))); + out[0] = i0; + out[1] = i1; + out[2] = i2; + out += 3; + } + } + else + { + for (size_t j = 0u; j < element.Count; ++j) + { + ++ptr; // list count + uint16_t t0 = 0u; + uint16_t t1 = 0u; + uint16_t t2 = 0u; + std::memcpy(&t0, ptr, sizeof(t0)); + ptr += sizeof(t0); + std::memcpy(&t1, ptr, sizeof(t1)); + ptr += sizeof(t1); + std::memcpy(&t2, ptr, sizeof(t2)); + ptr += sizeof(t2); + const uint32_t i0 = t0; + const uint32_t i1 = t1; + const uint32_t i2 = t2; + _maxIndex = std::max(_maxIndex, std::max(i0, std::max(i1, i2))); + out[0] = i0; + out[1] = i1; + out[2] = i2; + out += 3; + } + } + + StartPointer = reinterpret_cast(const_cast(ptr)); + _faceCount += element.Count; + return true; + } + } + + _outIndices.reserve(_outIndices.size() + element.Count * 3u); + auto ensureBytes = [this](const size_t bytes)->bool + { + if (StartPointer + bytes > EndPointer) + fillBuffer(); + return StartPointer + bytes <= EndPointer; + }; + auto readCount = [&ensureBytes, this](int32_t& outCount)->bool + { + if (!ensureBytes(sizeof(uint8_t))) + return false; + outCount = static_cast(*StartPointer++); + return true; + }; + auto readIndex = [&ensureBytes, this, srcIndexFmt](uint32_t& out)->bool + { + if (srcIndexFmt == EF_R32_UINT) + { + if (!ensureBytes(sizeof(uint32_t))) + return false; + std::memcpy(&out, StartPointer, sizeof(uint32_t)); + StartPointer += sizeof(uint32_t); + return true; + } + + if (!ensureBytes(sizeof(uint16_t))) + return false; + uint16_t v = 0u; + std::memcpy(&v, StartPointer, sizeof(uint16_t)); + StartPointer += sizeof(uint16_t); + out = v; + return true; + }; + + for (size_t j = 0u; j < element.Count; ++j) + { + int32_t countSigned = 0; + if (!readCount(countSigned)) + return false; + if (countSigned < 0) + return false; + const uint32_t count = static_cast(countSigned); + if (count < 3u) + { + uint32_t dummy = 0u; + for (uint32_t k = 0u; k < count; ++k) + { + if (!readIndex(dummy)) + return false; + } + ++_faceCount; + continue; + } + + uint32_t i0 = 0u; + uint32_t i1 = 0u; + uint32_t i2 = 0u; + if (!readIndex(i0) || !readIndex(i1) || !readIndex(i2)) + return false; + + _maxIndex = std::max(_maxIndex, std::max(i0, std::max(i1, i2))); + _outIndices.push_back(i0); + _outIndices.push_back(i1); + _outIndices.push_back(i2); + + uint32_t prev = i2; + for (uint32_t k = 3u; k < count; ++k) + { + uint32_t idx = 0u; + if (!readIndex(idx)) + return false; + _maxIndex = std::max(_maxIndex, idx); + _outIndices.push_back(i0); + _outIndices.push_back(prev); + _outIndices.push_back(idx); + prev = idx; + } + + ++_faceCount; + } + + return true; + } + IAssetLoader::SAssetLoadContext inner; uint32_t topHierarchyLevel; IAssetLoader::IAssetLoaderOverride* loaderOverride; // input buffer must be at least twice as long as the longest line in the file - std::array Buffer; // 50kb seems sane to store a line + core::vector Buffer; + size_t ioReadWindowSize = 50ull << 10; core::vector ElementList = {}; char* StartPointer = nullptr, *EndPointer = nullptr, *LineEndPointer = nullptr; int32_t LineLength = 0; @@ -480,6 +742,25 @@ SAssetBundle CPLYMeshFileLoader::loadAsset(system::IFile* _file, const IAssetLoa if (!_file) return {}; + using clock_t = std::chrono::high_resolution_clock; + const auto totalStart = clock_t::now(); + double headerMs = 0.0; + double vertexMs = 0.0; + double faceMs = 0.0; + double skipMs = 0.0; + double hashRangeMs = 0.0; + double indexBuildMs = 0.0; + double aabbMs = 0.0; + uint64_t faceCount = 0u; + uint32_t maxIndexRead = 0u; + const uint64_t fileSize = _file->getSize(); + const auto ioPlan = resolveFileIOPolicy(_params.ioPolicy, fileSize, true); + if (!ioPlan.valid) + { + _params.logger.log("PLY loader: invalid io policy for %s reason=%s", system::ILogger::ELL_ERROR, _file->getFileName().string().c_str(), ioPlan.reason); + return {}; + } + SContext ctx = { asset::IAssetLoader::SAssetLoadContext{ _params, @@ -488,7 +769,9 @@ SAssetBundle CPLYMeshFileLoader::loadAsset(system::IFile* _file, const IAssetLoa _hierarchyLevel, _override }; - ctx.init(); + const uint64_t desiredReadWindow = ioPlan.strategy == SResolvedFileIOPolicy::Strategy::WholeFile ? (fileSize + 1ull) : ioPlan.chunkSizeBytes; + const uint64_t safeReadWindow = std::min(desiredReadWindow, static_cast(std::numeric_limits::max() - 1ull)); + ctx.init(static_cast(safeReadWindow)); // start with empty mesh auto geometry = make_smart_refctd_ptr(); @@ -513,6 +796,7 @@ SAssetBundle CPLYMeshFileLoader::loadAsset(system::IFile* _file, const IAssetLoa bool continueReading = true; ctx.IsBinaryFile = false; ctx.IsWrongEndian= false; + const auto headerStart = clock_t::now(); do { @@ -630,6 +914,7 @@ SAssetBundle CPLYMeshFileLoader::loadAsset(system::IFile* _file, const IAssetLoa } } while (readingHeader && continueReading); + headerMs = std::chrono::duration(clock_t::now() - headerStart).count(); // if (!continueReading) @@ -869,26 +1154,40 @@ SAssetBundle CPLYMeshFileLoader::loadAsset(system::IFile* _file, const IAssetLoa for (auto& view : extraViews) geometry->getAuxAttributeViews()->push_back(std::move(view)); // loop through vertex properties + const auto vertexStart = clock_t::now(); ctx.readVertex(_params,el); + vertexMs += std::chrono::duration(clock_t::now() - vertexStart).count(); verticesProcessed = true; } else if (el.Name=="face") { + const auto faceStart = clock_t::now(); + indices.reserve(indices.size() + el.Count * 3u); for (size_t j=0; j(clock_t::now() - faceStart).count(); } else { // skip these elements + const auto skipStart = clock_t::now(); for (size_t j=0; j(clock_t::now() - skipStart).count(); } } - // do before indices so we don't compute their stuff again - CPolygonGeometryManipulator::recomputeContentHashes(geometry.get()); - CPolygonGeometryManipulator::recomputeRanges(geometry.get()); + hashRangeMs = 0.0; + + const auto aabbStart = clock_t::now(); + CPolygonGeometryManipulator::recomputeAABB(geometry.get()); + aabbMs = std::chrono::duration(clock_t::now() - aabbStart).count(); + const auto indexStart = clock_t::now(); if (indices.empty()) { // no index buffer means point cloud @@ -896,12 +1195,55 @@ SAssetBundle CPLYMeshFileLoader::loadAsset(system::IFile* _file, const IAssetLoa } else { + if (vertCount != 0u && maxIndexRead >= vertCount) + { + _params.logger.log("PLY indices out of range for %s", system::ILogger::ELL_ERROR, _file->getFileName().string().c_str()); + return {}; + } + geometry->setIndexing(IPolygonGeometryBase::TriangleList()); - auto view = IGeometryLoader::createView(EF_R32_UINT,indices.size(),indices.data()); - geometry->setIndexView(std::move(view)); + if (maxIndexRead <= std::numeric_limits::max()) + { + auto view = IGeometryLoader::createView(EF_R16_UINT, indices.size()); + if (!view) + return {}; + auto* dst = reinterpret_cast(view.getPointer()); + for (size_t i = 0u; i < indices.size(); ++i) + dst[i] = static_cast(indices[i]); + geometry->setIndexView(std::move(view)); + } + else + { + auto view = IGeometryLoader::createView(EF_R32_UINT, indices.size()); + if (!view) + return {}; + std::memcpy(view.getPointer(), indices.data(), indices.size() * sizeof(uint32_t)); + geometry->setIndexView(std::move(view)); + } } + indexBuildMs = std::chrono::duration(clock_t::now() - indexStart).count(); - CPolygonGeometryManipulator::recomputeAABB(geometry.get()); + const auto totalMs = std::chrono::duration(clock_t::now() - totalStart).count(); + _params.logger.log( + "PLY loader perf: file=%s total=%.3f ms header=%.3f vertex=%.3f face=%.3f skip=%.3f hash_range=%.3f index=%.3f aabb=%.3f binary=%d verts=%llu faces=%llu idx=%llu io_req=%s io_eff=%s io_chunk=%llu io_reason=%s", + system::ILogger::ELL_PERFORMANCE, + _file->getFileName().string().c_str(), + totalMs, + headerMs, + vertexMs, + faceMs, + skipMs, + hashRangeMs, + indexBuildMs, + aabbMs, + ctx.IsBinaryFile ? 1 : 0, + static_cast(vertCount), + static_cast(faceCount), + static_cast(indices.size()), + toString(_params.ioPolicy.strategy), + toString(ioPlan.strategy), + static_cast(ioPlan.chunkSizeBytes), + ioPlan.reason); auto meta = core::make_smart_refctd_ptr(); return SAssetBundle(std::move(meta),{std::move(geometry)}); diff --git a/src/nbl/asset/interchange/CPLYMeshFileLoader.h b/src/nbl/asset/interchange/CPLYMeshFileLoader.h index 6215364466..df8b72f125 100644 --- a/src/nbl/asset/interchange/CPLYMeshFileLoader.h +++ b/src/nbl/asset/interchange/CPLYMeshFileLoader.h @@ -10,9 +10,6 @@ #include "nbl/asset/interchange/IGeometryLoader.h" -#include "nbl/asset/ICPUPolygonGeometry.h" -#include "nbl/asset/metadata/CPLYMetadata.h" - namespace nbl::asset { @@ -20,15 +17,11 @@ namespace nbl::asset class CPLYMeshFileLoader final : public IGeometryLoader { public: - inline CPLYMeshFileLoader() = default; + CPLYMeshFileLoader(); bool isALoadableFileFormat(system::IFile* _file, const system::logger_opt_ptr logger) const override; - const char** getAssociatedFileExtensions() const override - { - static const char* ext[]{ "ply", nullptr }; - return ext; - } + const char** getAssociatedFileExtensions() const override; //! creates/loads an animated mesh from the file. SAssetBundle loadAsset(system::IFile* _file, const IAssetLoader::SAssetLoadParams& _params, IAssetLoader::IAssetLoaderOverride* _override = nullptr, uint32_t _hierarchyLevel = 0u) override; diff --git a/src/nbl/asset/interchange/CPLYMeshWriter.cpp b/src/nbl/asset/interchange/CPLYMeshWriter.cpp index 1765a28a0b..88a4bff69b 100644 --- a/src/nbl/asset/interchange/CPLYMeshWriter.cpp +++ b/src/nbl/asset/interchange/CPLYMeshWriter.cpp @@ -7,12 +7,14 @@ #ifdef _NBL_COMPILE_WITH_PLY_WRITER_ -#include "nbl/system/ISystem.h" #include "nbl/system/IFile.h" -#include "nbl/asset/utils/CPolygonGeometryManipulator.h" -#include -#include +#include +#include +#include +#include +#include +#include namespace nbl::asset { @@ -24,12 +26,88 @@ CPLYMeshWriter::CPLYMeshWriter() #endif } +const char** CPLYMeshWriter::getAssociatedFileExtensions() const +{ + static const char* ext[] = { "ply", nullptr }; + return ext; +} + +uint32_t CPLYMeshWriter::getSupportedFlags() +{ + return asset::EWF_BINARY; +} + +uint32_t CPLYMeshWriter::getForcedFlags() +{ + return 0u; +} + static inline bool decodeVec4(const ICPUPolygonGeometry::SDataView& view, const size_t ix, hlsl::float64_t4& out) { out = hlsl::float64_t4(0.0, 0.0, 0.0, 0.0); return view.decodeElement(ix, out); } +static inline const hlsl::float32_t3* getTightFloat3View(const ICPUPolygonGeometry::SDataView& view) +{ + if (!view) + return nullptr; + if (view.composed.format != EF_R32G32B32_SFLOAT) + return nullptr; + if (view.composed.getStride() != sizeof(hlsl::float32_t3)) + return nullptr; + return reinterpret_cast(view.getPointer()); +} + +static inline const hlsl::float32_t2* getTightFloat2View(const ICPUPolygonGeometry::SDataView& view) +{ + if (!view) + return nullptr; + if (view.composed.format != EF_R32G32_SFLOAT) + return nullptr; + if (view.composed.getStride() != sizeof(hlsl::float32_t2)) + return nullptr; + return reinterpret_cast(view.getPointer()); +} + +static inline void appendUInt(std::string& out, const uint32_t value) +{ + std::array buf = {}; + const auto res = std::to_chars(buf.data(), buf.data() + buf.size(), value); + if (res.ec == std::errc()) + out.append(buf.data(), static_cast(res.ptr - buf.data())); +} + +static inline void appendFloatFixed6(std::string& out, double value) +{ + std::array buf = {}; + const auto res = std::to_chars(buf.data(), buf.data() + buf.size(), value, std::chars_format::fixed, 6); + if (res.ec == std::errc()) + { + out.append(buf.data(), static_cast(res.ptr - buf.data())); + return; + } + + const int written = std::snprintf(buf.data(), buf.size(), "%.6f", value); + if (written > 0) + out.append(buf.data(), static_cast(written)); +} + +static inline void appendVec(std::string& out, const double* values, size_t count, bool flipVectors = false) +{ + constexpr size_t xID = 0u; + for (size_t i = 0u; i < count; ++i) + { + const bool flip = flipVectors && i == xID; + appendFloatFixed6(out, flip ? -values[i] : values[i]); + out += " "; + } +} + +static bool writeBufferWithPolicy(system::IFile* file, const SResolvedFileIOPolicy& ioPlan, const uint8_t* data, size_t byteCount); +static bool writeBinary(const ICPUPolygonGeometry* geom, const ICPUPolygonGeometry::SDataView* uvView, bool writeNormals, size_t vertexCount, const uint32_t* indices, size_t faceCount, uint8_t* dst, bool flipVectors); +static bool writeText(const ICPUPolygonGeometry* geom, const ICPUPolygonGeometry::SDataView* uvView, bool writeNormals, size_t vertexCount, const uint32_t* indices, size_t faceCount, std::string& output, bool flipVectors); + bool CPLYMeshWriter::writeAsset(system::IFile* _file, const SAssetWriteParams& _params, IAssetWriterOverride* _override) { if (!_override) @@ -89,27 +167,34 @@ bool CPLYMeshWriter::writeAsset(system::IFile* _file, const SAssetWriteParams& _ if (indexCount % 3u != 0u) return false; - indexData.resize(indexCount); const void* src = indexView.getPointer(); if (!src) return false; - if (indexView.composed.format == EF_R32_UINT) + if (indexView.composed.format == EF_R32_UINT && indexView.composed.getStride() == sizeof(uint32_t)) { - memcpy(indexData.data(), src, indexCount * sizeof(uint32_t)); + indices = reinterpret_cast(src); } - else if (indexView.composed.format == EF_R16_UINT) + else if (indexView.composed.format == EF_R16_UINT && indexView.composed.getStride() == sizeof(uint16_t)) { + indexData.resize(indexCount); const uint16_t* src16 = reinterpret_cast(src); for (size_t i = 0; i < indexCount; ++i) indexData[i] = src16[i]; + indices = indexData.data(); } else { - return false; + indexData.resize(indexCount); + hlsl::vector decoded = {}; + for (size_t i = 0; i < indexCount; ++i) + { + if (!indexView.decodeElement(i, decoded)) + return false; + indexData[i] = decoded.x; + } + indices = indexData.data(); } - - indices = indexData.data(); faceCount = indexCount / 3u; } else @@ -159,63 +244,156 @@ bool CPLYMeshWriter::writeAsset(system::IFile* _file, const SAssetWriteParams& _ header += "\nproperty list uchar uint vertex_indices\n"; header += "end_header\n"; - SContext context = { SAssetWriteContext{ ctx.params, file } }; + const bool flipVectors = !(flags & E_WRITER_FLAGS::EWF_MESH_IS_RIGHT_HANDED); + + if (binary) { - system::IFile::success_t success; - file->write(success, header.c_str(), context.fileOffset, header.size()); - context.fileOffset += success.getBytesProcessed(); + const size_t vertexStride = sizeof(float) * (3u + (writeNormals ? 3u : 0u) + (uvView ? 2u : 0u)); + const size_t faceStride = sizeof(uint8_t) + sizeof(uint32_t) * 3u; + const size_t bodySize = vertexCount * vertexStride + faceCount * faceStride; + + core::vector output; + output.resize(header.size() + bodySize); + if (!header.empty()) + std::memcpy(output.data(), header.data(), header.size()); + if (!writeBinary(geom, uvView, writeNormals, vertexCount, indices, faceCount, output.data() + header.size(), flipVectors)) + return false; + + const auto ioPlan = resolveFileIOPolicy(_params.ioPolicy, static_cast(output.size()), true); + if (!ioPlan.valid) + { + _params.logger.log("PLY writer: invalid io policy for %s reason=%s", system::ILogger::ELL_ERROR, file->getFileName().string().c_str(), ioPlan.reason); + return false; + } + return writeBufferWithPolicy(file, ioPlan, output.data(), output.size()); } - if (binary) - writeBinary(geom, uvView, writeNormals, vertexCount, indices, faceCount, context); - else - writeText(geom, uvView, writeNormals, vertexCount, indices, faceCount, context); + std::string body; + body.reserve(vertexCount * 96ull + faceCount * 32ull); + if (!writeText(geom, uvView, writeNormals, vertexCount, indices, faceCount, body, flipVectors)) + return false; - return true; + std::string output = header; + output += body; + const auto ioPlan = resolveFileIOPolicy(_params.ioPolicy, static_cast(output.size()), true); + if (!ioPlan.valid) + { + _params.logger.log("PLY writer: invalid io policy for %s reason=%s", system::ILogger::ELL_ERROR, file->getFileName().string().c_str(), ioPlan.reason); + return false; + } + return writeBufferWithPolicy(file, ioPlan, reinterpret_cast(output.data()), output.size()); +} + +static bool writeBufferWithPolicy(system::IFile* file, const SResolvedFileIOPolicy& ioPlan, const uint8_t* data, size_t byteCount) +{ + if (!file || (!data && byteCount != 0ull)) + return false; + + size_t fileOffset = 0ull; + switch (ioPlan.strategy) + { + case SResolvedFileIOPolicy::Strategy::WholeFile: + { + system::IFile::success_t success; + file->write(success, data, fileOffset, byteCount); + return success && success.getBytesProcessed() == byteCount; + } + case SResolvedFileIOPolicy::Strategy::Chunked: + default: + { + while (fileOffset < byteCount) + { + const size_t toWrite = static_cast(std::min(ioPlan.chunkSizeBytes, byteCount - fileOffset)); + system::IFile::success_t success; + file->write(success, data + fileOffset, fileOffset, toWrite); + if (!success) + return false; + const size_t written = success.getBytesProcessed(); + if (written == 0ull) + return false; + fileOffset += written; + } + return true; + } + } } -void CPLYMeshWriter::writeBinary(const ICPUPolygonGeometry* geom, const ICPUPolygonGeometry::SDataView* uvView, bool writeNormals, size_t vertexCount, const uint32_t* indices, size_t faceCount, SContext& context) const +static bool writeBinary(const ICPUPolygonGeometry* geom, const ICPUPolygonGeometry::SDataView* uvView, bool writeNormals, size_t vertexCount, const uint32_t* indices, size_t faceCount, uint8_t* dst, bool flipVectors) { - const bool flipVectors = !(context.writeContext.params.flags & E_WRITER_FLAGS::EWF_MESH_IS_RIGHT_HANDED); - const size_t vertexStride = sizeof(float) * (3u + (writeNormals ? 3u : 0u) + (uvView ? 2u : 0u)); - const size_t faceStride = sizeof(uint8_t) + sizeof(uint32_t) * 3u; - const size_t totalSize = vertexCount * vertexStride + faceCount * faceStride; - core::vector blob; - blob.resize(totalSize); - uint8_t* dst = blob.data(); + if (!dst) + return false; + + const auto& positionView = geom->getPositionView(); + const auto& normalView = geom->getNormalView(); + const hlsl::float32_t3* const tightPos = getTightFloat3View(positionView); + const hlsl::float32_t3* const tightNormal = writeNormals ? getTightFloat3View(normalView) : nullptr; + const hlsl::float32_t2* const tightUV = uvView ? getTightFloat2View(*uvView) : nullptr; hlsl::float64_t4 tmp = {}; for (size_t i = 0; i < vertexCount; ++i) { - if (!decodeVec4(geom->getPositionView(), i, tmp)) - return; - - float pos[3] = { static_cast(tmp.x), static_cast(tmp.y), static_cast(tmp.z) }; + float pos[3] = {}; + if (tightPos) + { + pos[0] = tightPos[i].x; + pos[1] = tightPos[i].y; + pos[2] = tightPos[i].z; + } + else + { + if (!decodeVec4(positionView, i, tmp)) + return false; + pos[0] = static_cast(tmp.x); + pos[1] = static_cast(tmp.y); + pos[2] = static_cast(tmp.z); + } if (flipVectors) pos[0] = -pos[0]; - memcpy(dst, pos, sizeof(pos)); + std::memcpy(dst, pos, sizeof(pos)); dst += sizeof(pos); if (writeNormals) { - if (!decodeVec4(geom->getNormalView(), i, tmp)) - return; - float normal[3] = { static_cast(tmp.x), static_cast(tmp.y), static_cast(tmp.z) }; + float normal[3] = {}; + if (tightNormal) + { + normal[0] = tightNormal[i].x; + normal[1] = tightNormal[i].y; + normal[2] = tightNormal[i].z; + } + else + { + if (!decodeVec4(normalView, i, tmp)) + return false; + normal[0] = static_cast(tmp.x); + normal[1] = static_cast(tmp.y); + normal[2] = static_cast(tmp.z); + } if (flipVectors) normal[0] = -normal[0]; - memcpy(dst, normal, sizeof(normal)); + std::memcpy(dst, normal, sizeof(normal)); dst += sizeof(normal); } if (uvView) { - if (!decodeVec4(*uvView, i, tmp)) - return; - float uv[2] = { static_cast(tmp.x), static_cast(tmp.y) }; - - memcpy(dst, uv, sizeof(uv)); + float uv[2] = {}; + if (tightUV) + { + uv[0] = tightUV[i].x; + uv[1] = tightUV[i].y; + } + else + { + if (!decodeVec4(*uvView, i, tmp)) + return false; + uv[0] = static_cast(tmp.x); + uv[1] = static_cast(tmp.y); + } + + std::memcpy(dst, uv, sizeof(uv)); dst += sizeof(uv); } } @@ -226,61 +404,97 @@ void CPLYMeshWriter::writeBinary(const ICPUPolygonGeometry* geom, const ICPUPoly *dst++ = listSize; const uint32_t* tri = indices + (i * 3u); - memcpy(dst, tri, sizeof(uint32_t) * 3u); + std::memcpy(dst, tri, sizeof(uint32_t) * 3u); dst += sizeof(uint32_t) * 3u; } - system::IFile::success_t success; - context.writeContext.outputFile->write(success, blob.data(), context.fileOffset, blob.size()); - context.fileOffset += success.getBytesProcessed(); + return true; } -void CPLYMeshWriter::writeText(const ICPUPolygonGeometry* geom, const ICPUPolygonGeometry::SDataView* uvView, bool writeNormals, size_t vertexCount, const uint32_t* indices, size_t faceCount, SContext& context) const +static bool writeText(const ICPUPolygonGeometry* geom, const ICPUPolygonGeometry::SDataView* uvView, bool writeNormals, size_t vertexCount, const uint32_t* indices, size_t faceCount, std::string& output, bool flipVectors) { - const bool flipVectors = !(context.writeContext.params.flags & E_WRITER_FLAGS::EWF_MESH_IS_RIGHT_HANDED); + const auto& positionView = geom->getPositionView(); + const auto& normalView = geom->getNormalView(); + const hlsl::float32_t3* const tightPos = getTightFloat3View(positionView); + const hlsl::float32_t3* const tightNormal = writeNormals ? getTightFloat3View(normalView) : nullptr; + const hlsl::float32_t2* const tightUV = uvView ? getTightFloat2View(*uvView) : nullptr; hlsl::float64_t4 tmp = {}; for (size_t i = 0; i < vertexCount; ++i) { - if (!decodeVec4(geom->getPositionView(), i, tmp)) - return; - const double pos[3] = { tmp.x, tmp.y, tmp.z }; - writeVectorAsText(context, pos, 3u, flipVectors); + double pos[3] = {}; + if (tightPos) + { + pos[0] = tightPos[i].x; + pos[1] = tightPos[i].y; + pos[2] = tightPos[i].z; + } + else + { + if (!decodeVec4(positionView, i, tmp)) + return false; + pos[0] = tmp.x; + pos[1] = tmp.y; + pos[2] = tmp.z; + } + appendVec(output, pos, 3u, flipVectors); if (writeNormals) { - if (!decodeVec4(geom->getNormalView(), i, tmp)) - return; - const double normal[3] = { tmp.x, tmp.y, tmp.z }; - writeVectorAsText(context, normal, 3u, flipVectors); + double normal[3] = {}; + if (tightNormal) + { + normal[0] = tightNormal[i].x; + normal[1] = tightNormal[i].y; + normal[2] = tightNormal[i].z; + } + else + { + if (!decodeVec4(normalView, i, tmp)) + return false; + normal[0] = tmp.x; + normal[1] = tmp.y; + normal[2] = tmp.z; + } + appendVec(output, normal, 3u, flipVectors); } if (uvView) { - if (!decodeVec4(*uvView, i, tmp)) - return; - const double uv[2] = { tmp.x, tmp.y }; - writeVectorAsText(context, uv, 2u, false); + double uv[2] = {}; + if (tightUV) + { + uv[0] = tightUV[i].x; + uv[1] = tightUV[i].y; + } + else + { + if (!decodeVec4(*uvView, i, tmp)) + return false; + uv[0] = tmp.x; + uv[1] = tmp.y; + } + appendVec(output, uv, 2u, false); } - system::IFile::success_t success; - context.writeContext.outputFile->write(success, "\n", context.fileOffset, 1); - context.fileOffset += success.getBytesProcessed(); + output += "\n"; } for (size_t i = 0; i < faceCount; ++i) { const uint32_t* tri = indices + (i * 3u); - std::stringstream ss; - ss << "3 " << tri[0] << " " << tri[1] << " " << tri[2] << "\n"; - const auto str = ss.str(); - - system::IFile::success_t success; - context.writeContext.outputFile->write(success, str.c_str(), context.fileOffset, str.size()); - context.fileOffset += success.getBytesProcessed(); + output += "3 "; + appendUInt(output, tri[0]); + output += " "; + appendUInt(output, tri[1]); + output += " "; + appendUInt(output, tri[2]); + output += "\n"; } + return true; } } // namespace nbl::asset #endif // _NBL_COMPILE_WITH_PLY_WRITER_ + diff --git a/src/nbl/asset/interchange/CPLYMeshWriter.h b/src/nbl/asset/interchange/CPLYMeshWriter.h index 73244a64b5..884ebb6238 100644 --- a/src/nbl/asset/interchange/CPLYMeshWriter.h +++ b/src/nbl/asset/interchange/CPLYMeshWriter.h @@ -6,11 +6,8 @@ #define _NBL_ASSET_PLY_MESH_WRITER_H_INCLUDED_ -#include "nbl/asset/ICPUPolygonGeometry.h" #include "nbl/asset/interchange/IGeometryWriter.h" -#include - namespace nbl::asset { @@ -21,51 +18,12 @@ class CPLYMeshWriter : public IGeometryWriter public: CPLYMeshWriter(); - virtual const char** getAssociatedFileExtensions() const - { - static const char* ext[]{ "ply", nullptr }; - return ext; - } - - virtual uint32_t getSupportedFlags() override { return asset::EWF_BINARY; } - - virtual uint32_t getForcedFlags() { return 0u; } - - virtual bool writeAsset(system::IFile* _file, const SAssetWriteParams& _params, IAssetWriterOverride* _override = nullptr) override; - - private: - - struct SContext - { - SAssetWriteContext writeContext; - size_t fileOffset = 0; - }; - - void writeBinary(const ICPUPolygonGeometry* geom, const ICPUPolygonGeometry::SDataView* uvView, bool writeNormals, size_t vertexCount, const uint32_t* indices, size_t faceCount, SContext& context) const; - void writeText(const ICPUPolygonGeometry* geom, const ICPUPolygonGeometry::SDataView* uvView, bool writeNormals, size_t vertexCount, const uint32_t* indices, size_t faceCount, SContext& context) const; - - template - void writeVectorAsText(SContext& context, const T* _vec, size_t _elementsToWrite, bool flipVectors = false) const - { - constexpr size_t xID = 0u; - std::stringstream ss; - ss << std::fixed; - bool currentFlipOnVariable = false; - for (size_t i = 0u; i < _elementsToWrite; ++i) - { - if (flipVectors && i == xID) - currentFlipOnVariable = true; - else - currentFlipOnVariable = false; + const char** getAssociatedFileExtensions() const override; - ss << std::setprecision(6) << _vec[i] * (currentFlipOnVariable ? -1 : 1) << " "; - } - auto str = ss.str(); + uint32_t getSupportedFlags() override; + uint32_t getForcedFlags() override; - system::IFile::success_t succ; - context.writeContext.outputFile->write(succ, str.c_str(), context.fileOffset, str.size()); - context.fileOffset += succ.getBytesProcessed(); - } + bool writeAsset(system::IFile* _file, const SAssetWriteParams& _params, IAssetWriterOverride* _override = nullptr) override; }; } // end namespace diff --git a/src/nbl/asset/interchange/CSTLMeshFileLoader.cpp b/src/nbl/asset/interchange/CSTLMeshFileLoader.cpp index 951b98ba73..4601aca836 100644 --- a/src/nbl/asset/interchange/CSTLMeshFileLoader.cpp +++ b/src/nbl/asset/interchange/CSTLMeshFileLoader.cpp @@ -8,303 +8,529 @@ #ifdef _NBL_COMPILE_WITH_STL_LOADER_ #include "nbl/asset/asset.h" - -#include "nbl/asset/IAssetManager.h" - -#include "nbl/system/ISystem.h" +#include "nbl/asset/metadata/CSTLMetadata.h" +#include "nbl/asset/utils/CPolygonGeometryManipulator.h" #include "nbl/system/IFile.h" + +#include +#include #include +#include +#include +#include + +namespace nbl::asset +{ + +struct SSTLContext +{ + IAssetLoader::SAssetLoadContext inner; + size_t fileOffset = 0ull; +}; + +static bool stlReadExact(system::IFile* file, void* dst, const size_t offset, const size_t bytes) +{ + if (!file || (!dst && bytes != 0ull)) + return false; + if (bytes == 0ull) + return true; + + system::IFile::success_t success; + file->read(success, dst, offset, bytes); + return success && success.getBytesProcessed() == bytes; +} + +static bool stlReadWithPolicy(system::IFile* file, uint8_t* dst, const size_t offset, const size_t bytes, const SResolvedFileIOPolicy& ioPlan) +{ + if (!file || (!dst && bytes != 0ull)) + return false; + if (bytes == 0ull) + return true; + + size_t bytesRead = 0ull; + switch (ioPlan.strategy) + { + case SResolvedFileIOPolicy::Strategy::WholeFile: + return stlReadExact(file, dst, offset, bytes); + case SResolvedFileIOPolicy::Strategy::Chunked: + default: + while (bytesRead < bytes) + { + const size_t chunk = static_cast(std::min(ioPlan.chunkSizeBytes, bytes - bytesRead)); + system::IFile::success_t success; + file->read(success, dst + bytesRead, offset + bytesRead, chunk); + if (!success) + return false; + const size_t processed = success.getBytesProcessed(); + if (processed == 0ull) + return false; + bytesRead += processed; + } + return true; + } +} + +static bool stlReadU8(SSTLContext* context, uint8_t& out) +{ + if (!context) + return false; + + system::IFile::success_t success; + context->inner.mainFile->read(success, &out, context->fileOffset, sizeof(out)); + if (!success || success.getBytesProcessed() != sizeof(out)) + return false; + context->fileOffset += sizeof(out); + return true; +} + +static bool stlReadF32(SSTLContext* context, float& out) +{ + if (!context) + return false; + + system::IFile::success_t success; + context->inner.mainFile->read(success, &out, context->fileOffset, sizeof(out)); + if (!success || success.getBytesProcessed() != sizeof(out)) + return false; + context->fileOffset += sizeof(out); + return true; +} + +static void stlGoNextWord(SSTLContext* context) +{ + if (!context) + return; + + uint8_t c = 0u; + while (context->fileOffset < context->inner.mainFile->getSize()) + { + const size_t before = context->fileOffset; + if (!stlReadU8(context, c)) + break; + if (!core::isspace(c)) + { + context->fileOffset = before; + break; + } + } +} + +static const std::string& stlGetNextToken(SSTLContext* context, std::string& token) +{ + stlGoNextWord(context); + token.clear(); + + char c = 0; + while (context->fileOffset < context->inner.mainFile->getSize()) + { + system::IFile::success_t success; + context->inner.mainFile->read(success, &c, context->fileOffset, sizeof(c)); + if (!success || success.getBytesProcessed() != sizeof(c)) + break; + context->fileOffset += sizeof(c); + if (core::isspace(c)) + break; + token += c; + } + + return token; +} + +static void stlGoNextLine(SSTLContext* context) +{ + if (!context) + return; + + uint8_t c = 0u; + while (context->fileOffset < context->inner.mainFile->getSize()) + { + if (!stlReadU8(context, c)) + break; + if (c == '\n' || c == '\r') + break; + } +} + +static bool stlGetNextVector(SSTLContext* context, core::vectorSIMDf& vec, const bool binary) +{ + if (!context) + return false; + + if (binary) + { + float x = 0.f; + float y = 0.f; + float z = 0.f; + if (!stlReadF32(context, x) || !stlReadF32(context, y) || !stlReadF32(context, z)) + return false; + vec.set(x, y, z, 0.f); + return true; + } -using namespace nbl; -using namespace nbl::asset; + stlGoNextWord(context); + std::string tmp; + if (stlGetNextToken(context, tmp).empty()) + return false; + std::sscanf(tmp.c_str(), "%f", &vec.X); + if (stlGetNextToken(context, tmp).empty()) + return false; + std::sscanf(tmp.c_str(), "%f", &vec.Y); + if (stlGetNextToken(context, tmp).empty()) + return false; + std::sscanf(tmp.c_str(), "%f", &vec.Z); + vec.W = 0.f; + return true; +} -CSTLMeshFileLoader::CSTLMeshFileLoader(asset::IAssetManager* _m_assetMgr) - : m_assetMgr(_m_assetMgr) +static bool stlReadFloatFromPayload(const uint8_t*& cursor, const uint8_t* const end, float& out) { + if (cursor + sizeof(float) > end) + return false; + std::memcpy(&out, cursor, sizeof(float)); + cursor += sizeof(float); + return true; +} + +CSTLMeshFileLoader::CSTLMeshFileLoader(asset::IAssetManager* _assetManager) +{ + (void)_assetManager; } -void CSTLMeshFileLoader::initialize() +const char** CSTLMeshFileLoader::getAssociatedFileExtensions() const { + static const char* ext[] = { "stl", nullptr }; + return ext; } SAssetBundle CSTLMeshFileLoader::loadAsset(system::IFile* _file, const IAssetLoader::SAssetLoadParams& _params, IAssetLoader::IAssetLoaderOverride* _override, uint32_t _hierarchyLevel) { + (void)_override; + (void)_hierarchyLevel; + if (!_file) return {}; - SContext context = { + using clock_t = std::chrono::high_resolution_clock; + const auto totalStart = clock_t::now(); + double detectMs = 0.0; + double ioMs = 0.0; + double parseMs = 0.0; + double buildMs = 0.0; + double hashMs = 0.0; + double aabbMs = 0.0; + uint64_t triangleCount = 0u; + + SSTLContext context = { asset::IAssetLoader::SAssetLoadContext{ _params, _file }, - _hierarchyLevel, - _override + 0ull }; - const size_t filesize = context.inner.mainFile->getSize(); - if (filesize < 6ull) // we need a header + if (filesize < 6ull) return {}; + const auto ioPlan = resolveFileIOPolicy(_params.ioPolicy, static_cast(filesize), true); + if (!ioPlan.valid) + { + _params.logger.log("STL loader: invalid io policy for %s reason=%s", system::ILogger::ELL_ERROR, _file->getFileName().string().c_str(), ioPlan.reason); + return {}; + } + bool binary = false; std::string token; - if (getNextToken(&context, token) != "solid") - binary = true; - core::vector positions, normals; + { + const auto detectStart = clock_t::now(); + char header[6] = {}; + if (!stlReadExact(context.inner.mainFile, header, 0ull, sizeof(header))) + return {}; + + const bool startsWithSolid = (std::strncmp(header, "solid ", 6u) == 0); + bool binaryBySize = false; + if (filesize >= 84ull) + { + uint32_t triCount = 0u; + if (stlReadExact(context.inner.mainFile, &triCount, 80ull, sizeof(triCount))) + { + const uint64_t expectedSize = 84ull + static_cast(triCount) * 50ull; + binaryBySize = (expectedSize == filesize); + } + } + + if (binaryBySize) + binary = true; + else if (!startsWithSolid) + binary = true; + else + binary = (stlGetNextToken(&context, token) != "solid"); + + if (binary) + context.fileOffset = 0ull; + detectMs = std::chrono::duration(clock_t::now() - detectStart).count(); + } + + core::vector positions; + core::vector normals; + if (binary) { - if (_file->getSize() < 80) + if (filesize < 84ull) return {}; - constexpr size_t headerOffset = 80; - context.fileOffset = headerOffset; //! skip header + uint32_t triangleCount32 = 0u; + if (!stlReadExact(context.inner.mainFile, &triangleCount32, 80ull, sizeof(triangleCount32))) + return {}; - uint32_t vertexCount = 0u; + triangleCount = triangleCount32; + const size_t dataSize = static_cast(triangleCount) * 50ull; + const size_t expectedSize = 84ull + dataSize; + if (filesize < expectedSize) + return {}; - system::IFile::success_t success; - context.inner.mainFile->read(success, &vertexCount, context.fileOffset, sizeof(vertexCount)); - if (!success) + core::vector payload; + payload.resize(dataSize); + + const auto ioStart = clock_t::now(); + if (!stlReadWithPolicy(context.inner.mainFile, payload.data(), 84ull, dataSize, ioPlan)) return {}; - context.fileOffset += sizeof(vertexCount); + ioMs = std::chrono::duration(clock_t::now() - ioStart).count(); + + positions.reserve(static_cast(triangleCount) * 3ull); + normals.reserve(static_cast(triangleCount)); + + const auto parseStart = clock_t::now(); + const uint8_t* cursor = payload.data(); + const uint8_t* const end = cursor + payload.size(); + for (uint64_t tri = 0ull; tri < triangleCount; ++tri) + { + float nx = 0.f; + float ny = 0.f; + float nz = 0.f; + if (!stlReadFloatFromPayload(cursor, end, nx) || !stlReadFloatFromPayload(cursor, end, ny) || !stlReadFloatFromPayload(cursor, end, nz)) + return {}; + + core::vectorSIMDf fileNormal; + fileNormal.set(nx, ny, nz, 0.f); + const float fileLen2 = core::dot(fileNormal, fileNormal).X; + if (fileLen2 > 0.f && std::abs(fileLen2 - 1.f) < 1e-4f) + normals.push_back(fileNormal); + else + normals.push_back(core::normalize(fileNormal)); - positions.reserve(3 * vertexCount); - normals.reserve(vertexCount); + core::vectorSIMDf p[3] = {}; + for (uint32_t i = 0u; i < 3u; ++i) + { + float x = 0.f; + float y = 0.f; + float z = 0.f; + if (!stlReadFloatFromPayload(cursor, end, x) || !stlReadFloatFromPayload(cursor, end, y) || !stlReadFloatFromPayload(cursor, end, z)) + return {}; + p[i].set(x, y, z, 0.f); + } + + positions.push_back(p[2u]); + positions.push_back(p[1u]); + positions.push_back(p[0u]); + + if ((normals.back() == core::vectorSIMDf()).all()) + { + normals.back().set(core::plane3dSIMDf( + *(positions.rbegin() + 2), + *(positions.rbegin() + 1), + *(positions.rbegin() + 0)).getNormal()); + } + + if (cursor + sizeof(uint16_t) > end) + return {}; + cursor += sizeof(uint16_t); + } + parseMs = std::chrono::duration(clock_t::now() - parseStart).count(); } else - goNextLine(&context); // skip header - - uint16_t attrib = 0u; - token.reserve(32); - while (context.fileOffset < filesize) // TODO: check it { - if (!binary) + stlGoNextLine(&context); + token.reserve(32); + + const auto parseStart = clock_t::now(); + while (context.fileOffset < filesize) { - if (getNextToken(&context, token) != "facet") + if (stlGetNextToken(&context, token) != "facet") { if (token == "endsolid") break; return {}; } - if (getNextToken(&context, token) != "normal") - { + if (stlGetNextToken(&context, token) != "normal") return {}; - } - } - { - core::vectorSIMDf n; - getNextVector(&context, n, binary); - const float len2 = core::dot(n, n).X; - if (len2 > 0.f && std::abs(len2 - 1.f) < 1e-4f) - normals.push_back(n); + core::vectorSIMDf fileNormal; + if (!stlGetNextVector(&context, fileNormal, false)) + return {}; + + const float fileLen2 = core::dot(fileNormal, fileNormal).X; + if (fileLen2 > 0.f && std::abs(fileLen2 - 1.f) < 1e-4f) + normals.push_back(fileNormal); else - normals.push_back(core::normalize(n)); - } + normals.push_back(core::normalize(fileNormal)); - if (!binary) - { - if (getNextToken(&context, token) != "outer" || getNextToken(&context, token) != "loop") + if (stlGetNextToken(&context, token) != "outer" || stlGetNextToken(&context, token) != "loop") return {}; - } - { - core::vectorSIMDf p[3]; + core::vectorSIMDf p[3] = {}; for (uint32_t i = 0u; i < 3u; ++i) { - if (!binary) - { - if (getNextToken(&context, token) != "vertex") - return {}; - } - getNextVector(&context, p[i], binary); + if (stlGetNextToken(&context, token) != "vertex") + return {}; + if (!stlGetNextVector(&context, p[i], false)) + return {}; } - for (uint32_t i = 0u; i < 3u; ++i) // seems like in STL format vertices are ordered in clockwise manner... - positions.push_back(p[2u - i]); - } - if (!binary) - { - if (getNextToken(&context, token) != "endloop" || getNextToken(&context, token) != "endfacet") - return {}; - } - else - { - system::IFile::success_t success; - context.inner.mainFile->read(success, &attrib, context.fileOffset, sizeof(attrib)); - if (!success) + positions.push_back(p[2u]); + positions.push_back(p[1u]); + positions.push_back(p[0u]); + + if (stlGetNextToken(&context, token) != "endloop" || stlGetNextToken(&context, token) != "endfacet") return {}; - context.fileOffset += sizeof(attrib); - } - if ((normals.back() == core::vectorSIMDf()).all()) - { - normals.back().set( - core::plane3dSIMDf( + if ((normals.back() == core::vectorSIMDf()).all()) + { + normals.back().set(core::plane3dSIMDf( *(positions.rbegin() + 2), *(positions.rbegin() + 1), - *(positions.rbegin() + 0)).getNormal() - ); + *(positions.rbegin() + 0)).getNormal()); + } } - } // end while (_file->getPos() < filesize) + parseMs = std::chrono::duration(clock_t::now() - parseStart).count(); + } if (positions.empty()) return {}; - core::vector posData(positions.size() * 3u); - core::vector normalData(positions.size() * 3u); - for (size_t i = 0u; i < positions.size(); ++i) - { - const auto& pos = positions[i]; - const auto& nrm = normals[i / 3u]; - const size_t base = i * 3u; - posData[base + 0u] = pos.pointer[0]; - posData[base + 1u] = pos.pointer[1]; - posData[base + 2u] = pos.pointer[2]; - normalData[base + 0u] = nrm.pointer[0]; - normalData[base + 1u] = nrm.pointer[1]; - normalData[base + 2u] = nrm.pointer[2]; - } + triangleCount = positions.size() / 3ull; + const uint64_t vertexCount = positions.size(); + const auto buildStart = clock_t::now(); auto geometry = core::make_smart_refctd_ptr(); geometry->setIndexing(IPolygonGeometryBase::TriangleList()); - auto posView = createView(EF_R32G32B32_SFLOAT, positions.size(), posData.data()); - auto normalView = createView(EF_R32G32B32_SFLOAT, positions.size(), normalData.data()); - geometry->setPositionView(std::move(posView)); - geometry->setNormalView(std::move(normalView)); - CPolygonGeometryManipulator::recomputeContentHashes(geometry.get()); - CPolygonGeometryManipulator::recomputeRanges(geometry.get()); - CPolygonGeometryManipulator::recomputeAABB(geometry.get()); - auto meta = core::make_smart_refctd_ptr(); - return SAssetBundle(std::move(meta), { std::move(geometry) }); -} + auto posView = createView(EF_R32G32B32_SFLOAT, positions.size()); + auto normalView = createView(EF_R32G32B32_SFLOAT, positions.size()); + if (!posView || !normalView) + return {}; -bool CSTLMeshFileLoader::isALoadableFileFormat(system::IFile* _file, const system::logger_opt_ptr logger) const -{ - if (!_file || _file->getSize() <= 6u) - return false; + auto* posOut = reinterpret_cast(posView.getPointer()); + auto* normalOut = reinterpret_cast(normalView.getPointer()); + if (!posOut || !normalOut) + return {}; - char header[6]; + hlsl::shapes::AABB<3, hlsl::float32_t> parsedAABB = hlsl::shapes::AABB<3, hlsl::float32_t>::create(); + bool hasParsedAABB = false; + auto addAABBPoint = [&parsedAABB, &hasParsedAABB](const hlsl::float32_t3& p)->void { - system::IFile::success_t success; - _file->read(success, header, 0, sizeof(header)); - if (!success) - return false; - } + if (!hasParsedAABB) + { + parsedAABB.minVx = p; + parsedAABB.maxVx = p; + hasParsedAABB = true; + return; + } + if (p.x < parsedAABB.minVx.x) parsedAABB.minVx.x = p.x; + if (p.y < parsedAABB.minVx.y) parsedAABB.minVx.y = p.y; + if (p.z < parsedAABB.minVx.z) parsedAABB.minVx.z = p.z; + if (p.x > parsedAABB.maxVx.x) parsedAABB.maxVx.x = p.x; + if (p.y > parsedAABB.maxVx.y) parsedAABB.maxVx.y = p.y; + if (p.z > parsedAABB.maxVx.z) parsedAABB.maxVx.z = p.z; + }; - if (strncmp(header, "solid ", 6u) == 0) - return true; - else + for (size_t i = 0u; i < positions.size(); ++i) { - if (_file->getSize() < 84u) - return false; - - uint32_t triangleCount; + const auto& pos = positions[i]; + const auto& nrm = normals[i / 3u]; + posOut[i] = { pos.X, pos.Y, pos.Z }; + normalOut[i] = { nrm.X, nrm.Y, nrm.Z }; + addAABBPoint(posOut[i]); + } - constexpr size_t readOffset = 80; - system::IFile::success_t success; - _file->read(success, &triangleCount, readOffset, sizeof(triangleCount)); - if (!success) - return false; + geometry->setPositionView(std::move(posView)); + geometry->setNormalView(std::move(normalView)); + buildMs = std::chrono::duration(clock_t::now() - buildStart).count(); - constexpr size_t STL_TRI_SZ = 50u; - return _file->getSize() == (STL_TRI_SZ * triangleCount + 84u); - } -} + const auto hashStart = clock_t::now(); + CPolygonGeometryManipulator::recomputeContentHashes(geometry.get()); + hashMs = std::chrono::duration(clock_t::now() - hashStart).count(); -//! Read 3d vector of floats -void CSTLMeshFileLoader::getNextVector(SContext* context, core::vectorSIMDf& vec, bool binary) const -{ - if (binary) + const auto aabbStart = clock_t::now(); + if (hasParsedAABB) { + geometry->visitAABB([&parsedAABB](auto& ref)->void { - system::IFile::success_t success; - context->inner.mainFile->read(success, &vec.X, context->fileOffset, 4); - context->fileOffset += success.getBytesProcessed(); - } - - { - system::IFile::success_t success; - context->inner.mainFile->read(success, &vec.Y, context->fileOffset, 4); - context->fileOffset += success.getBytesProcessed(); - } - - { - system::IFile::success_t success; - context->inner.mainFile->read(success, &vec.Z, context->fileOffset, 4); - context->fileOffset += success.getBytesProcessed(); - } + ref = std::remove_reference_t::create(); + ref.minVx.x = parsedAABB.minVx.x; + ref.minVx.y = parsedAABB.minVx.y; + ref.minVx.z = parsedAABB.minVx.z; + ref.minVx.w = 0.0; + ref.maxVx.x = parsedAABB.maxVx.x; + ref.maxVx.y = parsedAABB.maxVx.y; + ref.maxVx.z = parsedAABB.maxVx.z; + ref.maxVx.w = 0.0; + }); } else { - goNextWord(context); - std::string tmp; - - getNextToken(context, tmp); - sscanf(tmp.c_str(), "%f", &vec.X); - getNextToken(context, tmp); - sscanf(tmp.c_str(), "%f", &vec.Y); - getNextToken(context, tmp); - sscanf(tmp.c_str(), "%f", &vec.Z); + CPolygonGeometryManipulator::recomputeAABB(geometry.get()); } + aabbMs = std::chrono::duration(clock_t::now() - aabbStart).count(); + + const auto totalMs = std::chrono::duration(clock_t::now() - totalStart).count(); + _params.logger.log( + "STL loader perf: file=%s total=%.3f ms detect=%.3f io=%.3f parse=%.3f build=%.3f hash=%.3f aabb=%.3f binary=%d triangles=%llu vertices=%llu io_req=%s io_eff=%s io_chunk=%llu io_reason=%s", + system::ILogger::ELL_PERFORMANCE, + _file->getFileName().string().c_str(), + totalMs, + detectMs, + ioMs, + parseMs, + buildMs, + hashMs, + aabbMs, + binary ? 1 : 0, + static_cast(triangleCount), + static_cast(vertexCount), + toString(_params.ioPolicy.strategy), + toString(ioPlan.strategy), + static_cast(ioPlan.chunkSizeBytes), + ioPlan.reason); + + auto meta = core::make_smart_refctd_ptr(); + return SAssetBundle(std::move(meta), { std::move(geometry) }); } -//! Read next word -const std::string& CSTLMeshFileLoader::getNextToken(SContext* context, std::string& token) const +bool CSTLMeshFileLoader::isALoadableFileFormat(system::IFile* _file, const system::logger_opt_ptr logger) const { - goNextWord(context); - char c; - token = ""; - - while (context->fileOffset != context->inner.mainFile->getSize()) - { - system::IFile::success_t success; - context->inner.mainFile->read(success, &c, context->fileOffset, sizeof(c)); - context->fileOffset += success.getBytesProcessed(); + (void)logger; + if (!_file || _file->getSize() <= 6u) + return false; - // found it, so leave - if (core::isspace(c)) - break; - token += c; - } - return token; -} + char header[6] = {}; + if (!stlReadExact(_file, header, 0ull, sizeof(header))) + return false; -//! skip to next word -void CSTLMeshFileLoader::goNextWord(SContext* context) const -{ - uint8_t c; - while (context->fileOffset != context->inner.mainFile->getSize()) // TODO: check it - { - system::IFile::success_t success; - context->inner.mainFile->read(success, &c, context->fileOffset, sizeof(c)); - context->fileOffset += success.getBytesProcessed(); + if (std::strncmp(header, "solid ", 6u) == 0) + return true; - // found it, so leave - if (!core::isspace(c)) - { - context->fileOffset -= success.getBytesProcessed(); - break; - } - } -} + if (_file->getSize() < 84u) + return false; -//! Read until line break is reached and stop at the next non-space character -void CSTLMeshFileLoader::goNextLine(SContext* context) const -{ - uint8_t c; - // look for newline characters - while (context->fileOffset != context->inner.mainFile->getSize()) // TODO: check it - { - system::IFile::success_t success; - context->inner.mainFile->read(success, &c, context->fileOffset, sizeof(c)); - context->fileOffset += success.getBytesProcessed(); + uint32_t triangleCount = 0u; + if (!stlReadExact(_file, &triangleCount, 80ull, sizeof(triangleCount))) + return false; - // found it, so leave - if (c == '\n' || c == '\r') - break; - } + constexpr size_t STL_TRI_SZ = sizeof(float) * 12ull + sizeof(uint16_t); + return _file->getSize() == (STL_TRI_SZ * triangleCount + 84u); } +} #endif // _NBL_COMPILE_WITH_STL_LOADER_ diff --git a/src/nbl/asset/interchange/CSTLMeshFileLoader.h b/src/nbl/asset/interchange/CSTLMeshFileLoader.h index 1553f29049..535250e084 100644 --- a/src/nbl/asset/interchange/CSTLMeshFileLoader.h +++ b/src/nbl/asset/interchange/CSTLMeshFileLoader.h @@ -9,7 +9,6 @@ #include "nbl/core/declarations.h" #include "nbl/asset/interchange/IGeometryLoader.h" -#include "nbl/asset/metadata/CSTLMetadata.h" namespace nbl::asset @@ -19,42 +18,13 @@ namespace nbl::asset class CSTLMeshFileLoader final : public IGeometryLoader { public: - - CSTLMeshFileLoader(asset::IAssetManager* _m_assetMgr); + explicit CSTLMeshFileLoader(asset::IAssetManager* _assetManager); asset::SAssetBundle loadAsset(system::IFile* _file, const IAssetLoader::SAssetLoadParams& _params, IAssetLoader::IAssetLoaderOverride* _override = nullptr, uint32_t _hierarchyLevel = 0u) override; bool isALoadableFileFormat(system::IFile* _file, const system::logger_opt_ptr logger) const override; - const char** getAssociatedFileExtensions() const override - { - static const char* ext[]{ "stl", nullptr }; - return ext; - } - - private: - struct SContext - { - IAssetLoader::SAssetLoadContext inner; - uint32_t topHierarchyLevel; - IAssetLoader::IAssetLoaderOverride* loaderOverride; - - size_t fileOffset = {}; - }; - - virtual void initialize() override; - - // skips to the first non-space character available - void goNextWord(SContext* context) const; - // returns the next word - - const std::string& getNextToken(SContext* context, std::string& token) const; - // skip to next printable character after the first line break - void goNextLine(SContext* context) const; - //! Read 3d vector of floats - void getNextVector(SContext* context, core::vectorSIMDf& vec, bool binary) const; - - asset::IAssetManager* m_assetMgr; + const char** getAssociatedFileExtensions() const override; }; } // end namespace nbl::scene diff --git a/src/nbl/asset/interchange/CSTLMeshWriter.cpp b/src/nbl/asset/interchange/CSTLMeshWriter.cpp index 5961c7dd4c..90c449e584 100644 --- a/src/nbl/asset/interchange/CSTLMeshWriter.cpp +++ b/src/nbl/asset/interchange/CSTLMeshWriter.cpp @@ -2,18 +2,49 @@ // This file is part of the "Nabla Engine" and was originally part of the "Irrlicht Engine" // For conditions of distribution and use, see copyright notice in nabla.h // See the original file in irrlicht source for authors -#include "nbl/system/ISystem.h" #include "nbl/system/IFile.h" #include "CSTLMeshWriter.h" #include - -using namespace nbl; -using namespace nbl::asset; +#include +#include +#include #ifdef _NBL_COMPILE_WITH_STL_WRITER_ +namespace nbl::asset +{ + +namespace +{ + +struct SContext +{ + IAssetWriter::SAssetWriteContext writeContext; + SResolvedFileIOPolicy ioPlan = {}; + core::vector ioBuffer = {}; + size_t fileOffset = 0ull; +}; + +} + +static bool flushBytes(SContext* context); +static bool writeBytes(SContext* context, const void* data, size_t size); +static bool decodeTriangle(const ICPUPolygonGeometry* geom, const IPolygonGeometryBase::IIndexingCallback* indexing, const ICPUPolygonGeometry::SDataView& posView, uint32_t primIx, core::vectorSIMDf& out0, core::vectorSIMDf& out1, core::vectorSIMDf& out2, uint32_t* outIdx); +static bool decodeTriangleNormal(const ICPUPolygonGeometry::SDataView& normalView, const uint32_t* idx, core::vectorSIMDf& outNormal); +static bool writeMeshBinary(const asset::ICPUPolygonGeometry* geom, SContext* context); +static bool writeMeshASCII(const asset::ICPUPolygonGeometry* geom, SContext* context); +static void getVectorAsStringLine(const core::vectorSIMDf& v, std::string& s); +static bool writeFaceText( + const core::vectorSIMDf& v1, + const core::vectorSIMDf& v2, + const core::vectorSIMDf& v3, + const uint32_t* idx, + const asset::ICPUPolygonGeometry::SDataView& normalView, + const bool flipHandedness, + SContext* context); + CSTLMeshWriter::CSTLMeshWriter() { #ifdef _NBL_DEBUG @@ -25,12 +56,28 @@ CSTLMeshWriter::~CSTLMeshWriter() { } +const char** CSTLMeshWriter::getAssociatedFileExtensions() const +{ + static const char* ext[] = { "stl", nullptr }; + return ext; +} + +uint32_t CSTLMeshWriter::getSupportedFlags() +{ + return asset::EWF_BINARY; +} + +uint32_t CSTLMeshWriter::getForcedFlags() +{ + return 0u; +} + bool CSTLMeshWriter::writeAsset(system::IFile* _file, const SAssetWriteParams& _params, IAssetWriterOverride* _override) { if (!_override) getDefaultOverride(_override); - SAssetWriteContext inCtx{_params, _file}; + IAssetWriter::SAssetWriteContext inCtx{_params, _file}; const asset::ICPUPolygonGeometry* geom = #ifndef _NBL_DEBUG @@ -45,19 +92,113 @@ bool CSTLMeshWriter::writeAsset(system::IFile* _file, const SAssetWriteParams& _ if (!file) return false; - SContext context = { SAssetWriteContext{ inCtx.params, file} }; + SContext context = { IAssetWriter::SAssetWriteContext{ inCtx.params, file} }; _params.logger.log("WRITING STL: writing the file %s", system::ILogger::ELL_INFO, file->getFileName().string().c_str()); const asset::E_WRITER_FLAGS flags = _override->getAssetWritingFlags(context.writeContext, geom, 0u); - if (flags & asset::EWF_BINARY) - return writeMeshBinary(geom, &context); - return writeMeshASCII(geom, &context); + const bool binary = (flags & asset::EWF_BINARY) != 0u; + + uint64_t expectedSize = 0ull; + bool sizeKnown = false; + if (binary) + { + expectedSize = 84ull + static_cast(geom->getPrimitiveCount()) * 50ull; + sizeKnown = true; + } + + context.ioPlan = resolveFileIOPolicy(_params.ioPolicy, expectedSize, sizeKnown); + if (!context.ioPlan.valid) + { + _params.logger.log("STL writer: invalid io policy for %s reason=%s", system::ILogger::ELL_ERROR, file->getFileName().string().c_str(), context.ioPlan.reason); + return false; + } + + if (context.ioPlan.strategy == SResolvedFileIOPolicy::Strategy::WholeFile && sizeKnown) + context.ioBuffer.reserve(static_cast(expectedSize)); + else + context.ioBuffer.reserve(static_cast(std::min(context.ioPlan.chunkSizeBytes, 1ull << 20))); + + const bool written = binary ? writeMeshBinary(geom, &context) : writeMeshASCII(geom, &context); + if (!written) + return false; + + return flushBytes(&context); } -namespace +static bool flushBytes(SContext* context) { -inline bool decodeTriangle(const ICPUPolygonGeometry* geom, const IPolygonGeometryBase::IIndexingCallback* indexing, const ICPUPolygonGeometry::SDataView& posView, const uint32_t primIx, core::vectorSIMDf& out0, core::vectorSIMDf& out1, core::vectorSIMDf& out2, uint32_t* outIdx) + if (!context) + return false; + if (context->ioBuffer.empty()) + return true; + + size_t bytesWritten = 0ull; + const size_t totalBytes = context->ioBuffer.size(); + while (bytesWritten < totalBytes) + { + system::IFile::success_t success; + context->writeContext.outputFile->write( + success, + context->ioBuffer.data() + bytesWritten, + context->fileOffset + bytesWritten, + totalBytes - bytesWritten); + if (!success) + return false; + const size_t processed = success.getBytesProcessed(); + if (processed == 0ull) + return false; + bytesWritten += processed; + } + context->fileOffset += totalBytes; + context->ioBuffer.clear(); + return true; +} + +static bool writeBytes(SContext* context, const void* data, size_t size) +{ + if (!context || (!data && size != 0ull)) + return false; + if (size == 0ull) + return true; + + const uint8_t* src = reinterpret_cast(data); + switch (context->ioPlan.strategy) + { + case SResolvedFileIOPolicy::Strategy::WholeFile: + { + const size_t oldSize = context->ioBuffer.size(); + context->ioBuffer.resize(oldSize + size); + std::memcpy(context->ioBuffer.data() + oldSize, src, size); + return true; + } + case SResolvedFileIOPolicy::Strategy::Chunked: + default: + { + const size_t chunkSize = static_cast(context->ioPlan.chunkSizeBytes); + size_t remaining = size; + while (remaining > 0ull) + { + const size_t freeSpace = chunkSize - context->ioBuffer.size(); + const size_t toCopy = std::min(freeSpace, remaining); + const size_t oldSize = context->ioBuffer.size(); + context->ioBuffer.resize(oldSize + toCopy); + std::memcpy(context->ioBuffer.data() + oldSize, src, toCopy); + src += toCopy; + remaining -= toCopy; + + if (context->ioBuffer.size() == chunkSize) + { + if (!flushBytes(context)) + return false; + } + } + return true; + } + } +} + +static bool decodeTriangle(const ICPUPolygonGeometry* geom, const IPolygonGeometryBase::IIndexingCallback* indexing, const ICPUPolygonGeometry::SDataView& posView, uint32_t primIx, core::vectorSIMDf& out0, core::vectorSIMDf& out1, core::vectorSIMDf& out2, uint32_t* outIdx) { uint32_t idx[3] = {}; const auto& indexView = geom->getIndexView(); @@ -94,7 +235,7 @@ inline bool decodeTriangle(const ICPUPolygonGeometry* geom, const IPolygonGeomet return true; } -inline bool decodeTriangleNormal(const ICPUPolygonGeometry::SDataView& normalView, const uint32_t* idx, core::vectorSIMDf& outNormal) +static bool decodeTriangleNormal(const ICPUPolygonGeometry::SDataView& normalView, const uint32_t* idx, core::vectorSIMDf& outNormal) { if (!normalView || !idx) return false; @@ -120,9 +261,8 @@ inline bool decodeTriangleNormal(const ICPUPolygonGeometry::SDataView& normalVie outNormal = normal; return true; } -} -bool CSTLMeshWriter::writeMeshBinary(const asset::ICPUPolygonGeometry* geom, SContext* context) +static bool writeMeshBinary(const asset::ICPUPolygonGeometry* geom, SContext* context) { if (!geom) return false; @@ -136,47 +276,33 @@ bool CSTLMeshWriter::writeMeshBinary(const asset::ICPUPolygonGeometry* geom, SCo return false; const auto& normalView = geom->getNormalView(); const bool flipHandedness = !(context->writeContext.params.flags & E_WRITER_FLAGS::EWF_MESH_IS_RIGHT_HANDED); + const uint32_t facenum = static_cast(geom->getPrimitiveCount()); // write STL MESH header const char headerTxt[] = "Irrlicht-baw Engine"; constexpr size_t HEADER_SIZE = 80u; - - { - system::IFile::success_t success;; - context->writeContext.outputFile->write(success, headerTxt, context->fileOffset, sizeof(headerTxt)); - context->fileOffset += success.getBytesProcessed(); - } - const std::string name = context->writeContext.outputFile->getFileName().filename().replace_extension().string(); const int32_t sizeleft = HEADER_SIZE - sizeof(headerTxt) - static_cast(name.size()); + if (!writeBytes(context, headerTxt, sizeof(headerTxt))) + return false; + if (sizeleft < 0) { - system::IFile::success_t success;; - context->writeContext.outputFile->write(success, name.c_str(), context->fileOffset, HEADER_SIZE - sizeof(headerTxt)); - context->fileOffset += success.getBytesProcessed(); + if (!writeBytes(context, name.c_str(), HEADER_SIZE - sizeof(headerTxt))) + return false; } else { const char buf[80] = {0}; - { - system::IFile::success_t success;; - context->writeContext.outputFile->write(success, name.c_str(), context->fileOffset, name.size()); - context->fileOffset += success.getBytesProcessed(); - } - { - system::IFile::success_t success;; - context->writeContext.outputFile->write(success, buf, context->fileOffset, sizeleft); - context->fileOffset += success.getBytesProcessed(); - } + if (!writeBytes(context, name.c_str(), name.size())) + return false; + if (!writeBytes(context, buf, sizeleft)) + return false; } - const uint32_t facenum = static_cast(geom->getPrimitiveCount()); - { - system::IFile::success_t success;; - context->writeContext.outputFile->write(success, &facenum, context->fileOffset, sizeof(facenum)); - context->fileOffset += success.getBytesProcessed(); - } + if (!writeBytes(context, &facenum, sizeof(facenum))) + return false; for (uint32_t primIx = 0u; primIx < facenum; ++primIx) { @@ -209,38 +335,23 @@ bool CSTLMeshWriter::writeMeshBinary(const asset::ICPUPolygonGeometry* geom, SCo normal = attrNormal; } - { - system::IFile::success_t success;; - context->writeContext.outputFile->write(success, &normal, context->fileOffset, 12); - context->fileOffset += success.getBytesProcessed(); - } - { - system::IFile::success_t success;; - context->writeContext.outputFile->write(success, &vertex1, context->fileOffset, 12); - context->fileOffset += success.getBytesProcessed(); - } - { - system::IFile::success_t success;; - context->writeContext.outputFile->write(success, &vertex2, context->fileOffset, 12); - context->fileOffset += success.getBytesProcessed(); - } - { - system::IFile::success_t success;; - context->writeContext.outputFile->write(success, &vertex3, context->fileOffset, 12); - context->fileOffset += success.getBytesProcessed(); - } - { - const uint16_t color = 0u; - system::IFile::success_t success;; - context->writeContext.outputFile->write(success, &color, context->fileOffset, 2); - context->fileOffset += success.getBytesProcessed(); - } + if (!writeBytes(context, &normal, 12)) + return false; + if (!writeBytes(context, &vertex1, 12)) + return false; + if (!writeBytes(context, &vertex2, 12)) + return false; + if (!writeBytes(context, &vertex3, 12)) + return false; + const uint16_t color = 0u; + if (!writeBytes(context, &color, sizeof(color))) + return false; } return true; } -bool CSTLMeshWriter::writeMeshASCII(const asset::ICPUPolygonGeometry* geom, SContext* context) +static bool writeMeshASCII(const asset::ICPUPolygonGeometry* geom, SContext* context) { if (!geom) return false; @@ -257,31 +368,19 @@ bool CSTLMeshWriter::writeMeshASCII(const asset::ICPUPolygonGeometry* geom, SCon const char headerTxt[] = "Irrlicht-baw Engine "; - { - system::IFile::success_t success;; - context->writeContext.outputFile->write(success, "solid ", context->fileOffset, 6); - context->fileOffset += success.getBytesProcessed(); - } + if (!writeBytes(context, "solid ", 6)) + return false; - { - system::IFile::success_t success;; - context->writeContext.outputFile->write(success, headerTxt, context->fileOffset, sizeof(headerTxt) - 1); - context->fileOffset += success.getBytesProcessed(); - } + if (!writeBytes(context, headerTxt, sizeof(headerTxt) - 1)) + return false; const std::string name = context->writeContext.outputFile->getFileName().filename().replace_extension().string(); - { - system::IFile::success_t success;; - context->writeContext.outputFile->write(success, name.c_str(), context->fileOffset, name.size()); - context->fileOffset += success.getBytesProcessed(); - } + if (!writeBytes(context, name.c_str(), name.size())) + return false; - { - system::IFile::success_t success;; - context->writeContext.outputFile->write(success, "\n", context->fileOffset, 1); - context->fileOffset += success.getBytesProcessed(); - } + if (!writeBytes(context, "\n", 1)) + return false; const uint32_t faceCount = static_cast(geom->getPrimitiveCount()); for (uint32_t primIx = 0u; primIx < faceCount; ++primIx) @@ -292,43 +391,32 @@ bool CSTLMeshWriter::writeMeshASCII(const asset::ICPUPolygonGeometry* geom, SCon uint32_t idx[3] = {}; if (!decodeTriangle(geom, indexing, posView, primIx, v0, v1, v2, idx)) return false; - writeFaceText(v0, v1, v2, idx, normalView, flipHandedness, context); - { - system::IFile::success_t success;; - context->writeContext.outputFile->write(success, "\n", context->fileOffset, 1); - context->fileOffset += success.getBytesProcessed(); - } + if (!writeFaceText(v0, v1, v2, idx, normalView, flipHandedness, context)) + return false; + if (!writeBytes(context, "\n", 1)) + return false; } - { - system::IFile::success_t success;; - context->writeContext.outputFile->write(success, "endsolid ", context->fileOffset, 9); - context->fileOffset += success.getBytesProcessed(); - } + if (!writeBytes(context, "endsolid ", 9)) + return false; - { - system::IFile::success_t success;; - context->writeContext.outputFile->write(success, headerTxt, context->fileOffset, sizeof(headerTxt) - 1); - context->fileOffset += success.getBytesProcessed(); - } + if (!writeBytes(context, headerTxt, sizeof(headerTxt) - 1)) + return false; - { - system::IFile::success_t success;; - context->writeContext.outputFile->write(success, name.c_str(), context->fileOffset, name.size()); - context->fileOffset += success.getBytesProcessed(); - } + if (!writeBytes(context, name.c_str(), name.size())) + return false; return true; } -void CSTLMeshWriter::getVectorAsStringLine(const core::vectorSIMDf& v, std::string& s) const +static void getVectorAsStringLine(const core::vectorSIMDf& v, std::string& s) { std::ostringstream tmp; tmp << v.X << " " << v.Y << " " << v.Z << "\n"; s = std::string(tmp.str().c_str()); } -void CSTLMeshWriter::writeFaceText( +static bool writeFaceText( const core::vectorSIMDf& v1, const core::vectorSIMDf& v2, const core::vectorSIMDf& v3, @@ -360,75 +448,47 @@ void CSTLMeshWriter::writeFaceText( normal = attrNormal; } - { - system::IFile::success_t success;; - context->writeContext.outputFile->write(success, "facet normal ", context->fileOffset, 13); - context->fileOffset += success.getBytesProcessed(); - } + if (!writeBytes(context, "facet normal ", 13)) + return false; getVectorAsStringLine(normal, tmp); - { - system::IFile::success_t success;; - context->writeContext.outputFile->write(success, tmp.c_str(), context->fileOffset, tmp.size()); - context->fileOffset += success.getBytesProcessed(); - } + if (!writeBytes(context, tmp.c_str(), tmp.size())) + return false; - { - system::IFile::success_t success;; - context->writeContext.outputFile->write(success, " outer loop\n", context->fileOffset, 13); - context->fileOffset += success.getBytesProcessed(); - } + if (!writeBytes(context, " outer loop\n", 13)) + return false; - { - system::IFile::success_t success;; - context->writeContext.outputFile->write(success, " vertex ", context->fileOffset, 11); - context->fileOffset += success.getBytesProcessed(); - } + if (!writeBytes(context, " vertex ", 11)) + return false; getVectorAsStringLine(vertex1, tmp); - { - system::IFile::success_t success;; - context->writeContext.outputFile->write(success, tmp.c_str(), context->fileOffset, tmp.size()); - context->fileOffset += success.getBytesProcessed(); - } + if (!writeBytes(context, tmp.c_str(), tmp.size())) + return false; - { - system::IFile::success_t success;; - context->writeContext.outputFile->write(success, " vertex ", context->fileOffset, 11); - context->fileOffset += success.getBytesProcessed(); - } + if (!writeBytes(context, " vertex ", 11)) + return false; getVectorAsStringLine(vertex2, tmp); - { - system::IFile::success_t success;; - context->writeContext.outputFile->write(success, tmp.c_str(), context->fileOffset, tmp.size()); - context->fileOffset += success.getBytesProcessed(); - } + if (!writeBytes(context, tmp.c_str(), tmp.size())) + return false; - { - system::IFile::success_t success;; - context->writeContext.outputFile->write(success, " vertex ", context->fileOffset, 11); - context->fileOffset += success.getBytesProcessed(); - } + if (!writeBytes(context, " vertex ", 11)) + return false; getVectorAsStringLine(vertex3, tmp); - { - system::IFile::success_t success;; - context->writeContext.outputFile->write(success, tmp.c_str(), context->fileOffset, tmp.size()); - context->fileOffset += success.getBytesProcessed(); - } + if (!writeBytes(context, tmp.c_str(), tmp.size())) + return false; - { - system::IFile::success_t success;; - context->writeContext.outputFile->write(success, " endloop\n", context->fileOffset, 10); - context->fileOffset += success.getBytesProcessed(); - } + if (!writeBytes(context, " endloop\n", 10)) + return false; + + if (!writeBytes(context, "endfacet\n", 9)) + return false; + + return true; +} - { - system::IFile::success_t success;; - context->writeContext.outputFile->write(success, "endfacet\n", context->fileOffset, 9); - context->fileOffset += success.getBytesProcessed(); - } } #endif + diff --git a/src/nbl/asset/interchange/CSTLMeshWriter.h b/src/nbl/asset/interchange/CSTLMeshWriter.h index a37c6129a8..23994d27da 100644 --- a/src/nbl/asset/interchange/CSTLMeshWriter.h +++ b/src/nbl/asset/interchange/CSTLMeshWriter.h @@ -6,7 +6,6 @@ #define _NBL_ASSET_STL_MESH_WRITER_H_INCLUDED_ -#include "nbl/asset/ICPUPolygonGeometry.h" #include "nbl/asset/interchange/IGeometryWriter.h" @@ -16,43 +15,16 @@ namespace nbl::asset //! class to write meshes, implementing a STL writer class CSTLMeshWriter : public IGeometryWriter { - protected: - virtual ~CSTLMeshWriter(); - public: CSTLMeshWriter(); + ~CSTLMeshWriter() override; - virtual const char** getAssociatedFileExtensions() const - { - static const char* ext[]{ "stl", nullptr }; - return ext; - } - - virtual uint32_t getSupportedFlags() override { return asset::EWF_BINARY; } - - virtual uint32_t getForcedFlags() { return 0u; } - - virtual bool writeAsset(system::IFile* _file, const SAssetWriteParams& _params, IAssetWriterOverride* _override = nullptr) override; - - private: - - struct SContext - { - SAssetWriteContext writeContext; - size_t fileOffset; - }; - - // write binary format - bool writeMeshBinary(const ICPUPolygonGeometry* geom, SContext* context); - - // write text format - bool writeMeshASCII(const ICPUPolygonGeometry* geom, SContext* context); + const char** getAssociatedFileExtensions() const override; - // create vector output with line end into string - void getVectorAsStringLine(const core::vectorSIMDf& v, std::string& s) const; + uint32_t getSupportedFlags() override; + uint32_t getForcedFlags() override; - // write face information to file - void writeFaceText(const core::vectorSIMDf& v1, const core::vectorSIMDf& v2, const core::vectorSIMDf& v3, const uint32_t* idx, const asset::ICPUPolygonGeometry::SDataView& normalView, const bool flipHandedness, SContext* context); + bool writeAsset(system::IFile* _file, const SAssetWriteParams& _params, IAssetWriterOverride* _override = nullptr) override; }; } // end namespace diff --git a/src/nbl/asset/utils/CPolygonGeometryManipulator.cpp b/src/nbl/asset/utils/CPolygonGeometryManipulator.cpp index b4f2f2ef06..d591b3c63b 100644 --- a/src/nbl/asset/utils/CPolygonGeometryManipulator.cpp +++ b/src/nbl/asset/utils/CPolygonGeometryManipulator.cpp @@ -14,11 +14,97 @@ #include "nbl/asset/utils/CForsythVertexCacheOptimizer.h" #include "nbl/asset/utils/COverdrawPolygonGeometryOptimizer.h" #include "nbl/asset/utils/COBBGenerator.h" +#include "nbl/asset/IPreHashed.h" namespace nbl::asset { +core::blake3_hash_t CPolygonGeometryManipulator::computeDeterministicContentHash(const ICPUPolygonGeometry* geo) +{ + if (!geo) + return IPreHashed::INVALID_HASH; + + const auto* indexing = geo->getIndexingCallback(); + if (!indexing) + return IPreHashed::INVALID_HASH; + + // Keep this as a standalone helper instead of an IPreHashed override on geometry. + // A polygon geometry is a composition of shared views over external buffers, not a single owned payload. + // Caching a hash inside the geometry object would need global invalidation across external buffer mutations. + core::blake3_hasher hasher; + hasher << indexing->degree(); + hasher << indexing->rate(); + hasher << indexing->knownTopology(); + + auto hashView = [&](const IGeometry::SDataView& view)->bool + { + if (!view) + { + hasher << false; + return true; + } + + hasher << true; + hasher << view.composed.format; + hasher << view.composed.stride; + hasher << view.composed.getStride(); + hasher << view.composed.rangeFormat; + hasher << view.src.offset; + hasher << view.src.actualSize(); + + const auto* const buffer = view.src.buffer.get(); + if (!buffer || buffer->missingContent()) + return false; + + const auto* const data = reinterpret_cast(buffer->getPointer()); + if (!data) + return false; + + hasher.update(data + view.src.offset, view.src.actualSize()); + return true; + }; + + if (!hashView(geo->getPositionView())) + return IPreHashed::INVALID_HASH; + if (!hashView(geo->getIndexView())) + return IPreHashed::INVALID_HASH; + if (!hashView(geo->getNormalView())) + return IPreHashed::INVALID_HASH; + + hasher << geo->getJointCount(); + if (geo->isSkinned()) + { + if (const auto* jointOBBView = geo->getJointOBBView(); jointOBBView) + { + if (!hashView(*jointOBBView)) + return IPreHashed::INVALID_HASH; + } + else + hasher << false; + + const auto& jointWeightViews = geo->getJointWeightViews(); + hasher << jointWeightViews.size(); + for (const auto& view : jointWeightViews) + { + if (!hashView(view.indices)) + return IPreHashed::INVALID_HASH; + if (!hashView(view.weights)) + return IPreHashed::INVALID_HASH; + } + } + + const auto& auxAttributeViews = geo->getAuxAttributeViews(); + hasher << auxAttributeViews.size(); + for (const auto& view : auxAttributeViews) + { + if (!hashView(view)) + return IPreHashed::INVALID_HASH; + } + + return static_cast(hasher); +} + core::smart_refctd_ptr CPolygonGeometryManipulator::createUnweldedList(const ICPUPolygonGeometry* inGeo) { From 9510dac8a4bc6298f44ee5a0442cf8404a77a032 Mon Sep 17 00:00:00 2001 From: Arkadiusz Lachowicz Date: Sat, 7 Feb 2026 19:59:03 +0100 Subject: [PATCH 004/118] Save loader writer optimization baseline --- .../asset/interchange/COBJMeshFileLoader.cpp | 57 +-- src/nbl/asset/interchange/COBJMeshWriter.cpp | 26 +- .../asset/interchange/CPLYMeshFileLoader.cpp | 164 +++++-- src/nbl/asset/interchange/CPLYMeshWriter.cpp | 36 +- .../asset/interchange/CSTLMeshFileLoader.cpp | 390 +++++++++------ src/nbl/asset/interchange/CSTLMeshWriter.cpp | 449 ++++++++++++++---- 6 files changed, 782 insertions(+), 340 deletions(-) diff --git a/src/nbl/asset/interchange/COBJMeshFileLoader.cpp b/src/nbl/asset/interchange/COBJMeshFileLoader.cpp index 6d3da1e1ba..12bc57617e 100644 --- a/src/nbl/asset/interchange/COBJMeshFileLoader.cpp +++ b/src/nbl/asset/interchange/COBJMeshFileLoader.cpp @@ -34,7 +34,7 @@ struct ObjVertexKey int32_t uv; int32_t normal; - inline bool operator==(const ObjVertexKey& other) const + bool operator==(const ObjVertexKey& other) const { return pos == other.pos && uv == other.uv && normal == other.normal; } @@ -42,7 +42,7 @@ struct ObjVertexKey struct ObjVertexKeyHash { - inline size_t operator()(const ObjVertexKey& key) const noexcept + size_t operator()(const ObjVertexKey& key) const noexcept { size_t h = static_cast(static_cast(key.pos)); h ^= static_cast(static_cast(key.uv)) + 0x9e3779b9 + (h << 6) + (h >> 2); @@ -51,22 +51,30 @@ struct ObjVertexKeyHash } }; -struct Float3 -{ - float x; - float y; - float z; -}; - -struct Float2 -{ - float x; - float y; -}; +using Float3 = hlsl::float32_t3; +using Float2 = hlsl::float32_t2; static_assert(sizeof(Float3) == sizeof(float) * 3ull); static_assert(sizeof(Float2) == sizeof(float) * 2ull); +void extendAABB(hlsl::shapes::AABB<3, hlsl::float32_t>& aabb, bool& hasAABB, const Float3& p) +{ + if (!hasAABB) + { + aabb.minVx = p; + aabb.maxVx = p; + hasAABB = true; + return; + } + + if (p.x < aabb.minVx.x) aabb.minVx.x = p.x; + if (p.y < aabb.minVx.y) aabb.minVx.y = p.y; + if (p.z < aabb.minVx.z) aabb.minVx.z = p.z; + if (p.x > aabb.maxVx.x) aabb.maxVx.x = p.x; + if (p.y > aabb.maxVx.y) aabb.maxVx.y = p.y; + if (p.z > aabb.maxVx.z) aabb.maxVx.z = p.z; +} + bool readTextFileWithPolicy(system::IFile* file, char* dst, size_t byteCount, const SResolvedFileIOPolicy& ioPlan, double& ioMs) { if (!file || !dst) @@ -393,24 +401,9 @@ asset::SAssetBundle COBJMeshFileLoader::loadAsset(system::IFile* _file, const as const auto& srcPos = positions[idx[0]]; outPositions.push_back(srcPos); - const hlsl::float32_t3 p = { srcPos.x, srcPos.y, srcPos.z }; - if (!hasParsedAABB) - { - parsedAABB.minVx = p; - parsedAABB.maxVx = p; - hasParsedAABB = true; - } - else - { - if (p.x < parsedAABB.minVx.x) parsedAABB.minVx.x = p.x; - if (p.y < parsedAABB.minVx.y) parsedAABB.minVx.y = p.y; - if (p.z < parsedAABB.minVx.z) parsedAABB.minVx.z = p.z; - if (p.x > parsedAABB.maxVx.x) parsedAABB.maxVx.x = p.x; - if (p.y > parsedAABB.maxVx.y) parsedAABB.maxVx.y = p.y; - if (p.z > parsedAABB.maxVx.z) parsedAABB.maxVx.z = p.z; - } + extendAABB(parsedAABB, hasParsedAABB, srcPos); - Float2 uv = { 0.f, 0.f }; + Float2 uv(0.f, 0.f); if (idx[1] >= 0 && static_cast(idx[1]) < uvs.size()) { uv = uvs[idx[1]]; @@ -418,7 +411,7 @@ asset::SAssetBundle COBJMeshFileLoader::loadAsset(system::IFile* _file, const as } outUVs.push_back(uv); - Float3 normal = { 0.f, 0.f, 1.f }; + Float3 normal(0.f, 0.f, 1.f); if (idx[2] >= 0 && static_cast(idx[2]) < normals.size()) { normal = normals[idx[2]]; diff --git a/src/nbl/asset/interchange/COBJMeshWriter.cpp b/src/nbl/asset/interchange/COBJMeshWriter.cpp index e3c3f9d2a1..59f42b8225 100644 --- a/src/nbl/asset/interchange/COBJMeshWriter.cpp +++ b/src/nbl/asset/interchange/COBJMeshWriter.cpp @@ -40,13 +40,19 @@ uint32_t COBJMeshWriter::getForcedFlags() return 0u; } -static inline bool decodeVec4(const ICPUPolygonGeometry::SDataView& view, const size_t ix, hlsl::float64_t4& out) +namespace obj_writer_detail +{ + +constexpr size_t ApproxObjBytesPerVertex = 96ull; +constexpr size_t ApproxObjBytesPerFace = 48ull; + +bool decodeVec4(const ICPUPolygonGeometry::SDataView& view, const size_t ix, hlsl::float64_t4& out) { out = hlsl::float64_t4(0.0, 0.0, 0.0, 0.0); return view.decodeElement(ix, out); } -static inline const hlsl::float32_t3* getTightFloat3View(const ICPUPolygonGeometry::SDataView& view) +const hlsl::float32_t3* getTightFloat3View(const ICPUPolygonGeometry::SDataView& view) { if (!view) return nullptr; @@ -57,7 +63,7 @@ static inline const hlsl::float32_t3* getTightFloat3View(const ICPUPolygonGeomet return reinterpret_cast(view.getPointer()); } -static inline const hlsl::float32_t2* getTightFloat2View(const ICPUPolygonGeometry::SDataView& view) +const hlsl::float32_t2* getTightFloat2View(const ICPUPolygonGeometry::SDataView& view) { if (!view) return nullptr; @@ -68,7 +74,7 @@ static inline const hlsl::float32_t2* getTightFloat2View(const ICPUPolygonGeomet return reinterpret_cast(view.getPointer()); } -static inline void appendUInt(std::string& out, const uint32_t value) +void appendUInt(std::string& out, const uint32_t value) { std::array buf = {}; const auto res = std::to_chars(buf.data(), buf.data() + buf.size(), value); @@ -76,7 +82,7 @@ static inline void appendUInt(std::string& out, const uint32_t value) out.append(buf.data(), static_cast(res.ptr - buf.data())); } -static inline void appendFloatFixed6(std::string& out, double value) +void appendFloatFixed6(std::string& out, double value) { std::array buf = {}; const auto res = std::to_chars(buf.data(), buf.data() + buf.size(), value, std::chars_format::fixed, 6); @@ -91,10 +97,14 @@ static inline void appendFloatFixed6(std::string& out, double value) out.append(buf.data(), static_cast(written)); } -static bool writeBufferWithPolicy(system::IFile* file, const SResolvedFileIOPolicy& ioPlan, const uint8_t* data, size_t byteCount); +bool writeBufferWithPolicy(system::IFile* file, const SResolvedFileIOPolicy& ioPlan, const uint8_t* data, size_t byteCount); + +} // namespace obj_writer_detail bool COBJMeshWriter::writeAsset(system::IFile* _file, const SAssetWriteParams& _params, IAssetWriterOverride* _override) { + using namespace obj_writer_detail; + if (!_override) getDefaultOverride(_override); @@ -203,7 +213,7 @@ bool COBJMeshWriter::writeAsset(system::IFile* _file, const SAssetWriteParams& _ const auto flags = _override->getAssetWritingFlags(ctx, geom, 0u); const bool flipHandedness = !(flags & E_WRITER_FLAGS::EWF_MESH_IS_RIGHT_HANDED); std::string output; - output.reserve(vertexCount * 96ull + faceCount * 48ull); + output.reserve(vertexCount * ApproxObjBytesPerVertex + faceCount * ApproxObjBytesPerFace); output += "# Nabla OBJ\n"; @@ -355,7 +365,7 @@ bool COBJMeshWriter::writeAsset(system::IFile* _file, const SAssetWriteParams& _ return writeBufferWithPolicy(file, ioPlan, reinterpret_cast(output.data()), output.size()); } -static bool writeBufferWithPolicy(system::IFile* file, const SResolvedFileIOPolicy& ioPlan, const uint8_t* data, size_t byteCount) +bool obj_writer_detail::writeBufferWithPolicy(system::IFile* file, const SResolvedFileIOPolicy& ioPlan, const uint8_t* data, size_t byteCount) { if (!file || (!data && byteCount != 0ull)) return false; diff --git a/src/nbl/asset/interchange/CPLYMeshFileLoader.cpp b/src/nbl/asset/interchange/CPLYMeshFileLoader.cpp index c214b4c3b9..a7042601ed 100644 --- a/src/nbl/asset/interchange/CPLYMeshFileLoader.cpp +++ b/src/nbl/asset/interchange/CPLYMeshFileLoader.cpp @@ -61,7 +61,7 @@ bool CPLYMeshFileLoader::isALoadableFileFormat(system::IFile* _file, const syste } template -inline T byteswap(const T& v) +T byteswap(const T& v) { T retval; auto it = reinterpret_cast(&v); @@ -71,6 +71,7 @@ inline T byteswap(const T& v) struct SContext { + static constexpr uint64_t ReadWindowPaddingBytes = 1ull; // struct SProperty @@ -97,7 +98,7 @@ struct SContext return EF_UNKNOWN; } - inline bool isList() const {return type==EF_UNKNOWN && asset::isIntegerFormat(list.countType) && asset::isIntegerFormat(list.itemType);} + bool isList() const {return type==EF_UNKNOWN && asset::isIntegerFormat(list.countType) && asset::isIntegerFormat(list.itemType);} void skip(SContext& _ctx) const { @@ -149,10 +150,12 @@ struct SContext uint32_t KnownSize; }; - inline void init(size_t _ioReadWindowSize = 50ull << 10) + static constexpr size_t DefaultIoReadWindowBytes = 50ull << 10; + + void init(size_t _ioReadWindowSize = DefaultIoReadWindowBytes) { - ioReadWindowSize = std::max(_ioReadWindowSize, 50ull << 10); - Buffer.resize(ioReadWindowSize + 1ull, '\0'); + ioReadWindowSize = std::max(_ioReadWindowSize, DefaultIoReadWindowBytes); + Buffer.resize(ioReadWindowSize + ReadWindowPaddingBytes, '\0'); EndPointer = StartPointer = Buffer.data(); LineEndPointer = EndPointer-1; @@ -180,7 +183,7 @@ struct SContext EndPointer = newStart+length; // read data from the file - const size_t usableBufferSize = Buffer.size() > 0ull ? Buffer.size() - 1ull : 0ull; + const size_t usableBufferSize = Buffer.size() > 0ull ? Buffer.size() - ReadWindowPaddingBytes : 0ull; if (usableBufferSize <= length) { EndOfFile = true; @@ -393,7 +396,7 @@ struct SContext uint32_t stride; E_FORMAT dstFmt; }; - inline void readVertex(const IAssetLoader::SAssetLoadParams& _params, const SElement& el) + void readVertex(const IAssetLoader::SAssetLoadParams& _params, const SElement& el) { assert(el.Name=="vertex"); assert(el.Properties.size()==vertAttrIts.size()); @@ -555,10 +558,15 @@ struct SContext return false; const E_FORMAT srcIndexFmt = prop.list.itemType; - if (srcIndexFmt != EF_R32_UINT && srcIndexFmt != EF_R16_UINT) + const bool isSrcU32 = srcIndexFmt == EF_R32_UINT; + const bool isSrcS32 = srcIndexFmt == EF_R32_SINT; + const bool isSrcU16 = srcIndexFmt == EF_R16_UINT; + const bool isSrcS16 = srcIndexFmt == EF_R16_SINT; + if (!isSrcU32 && !isSrcS32 && !isSrcU16 && !isSrcS16) return false; - const size_t indexSize = srcIndexFmt == EF_R32_UINT ? sizeof(uint32_t) : sizeof(uint16_t); + const bool is32Bit = isSrcU32 || isSrcS32; + const size_t indexSize = is32Bit ? sizeof(uint32_t) : sizeof(uint16_t); const size_t minTriangleRecordSize = sizeof(uint8_t) + indexSize * 3u; const size_t minBytesNeeded = element.Count * minTriangleRecordSize; if (StartPointer + minBytesNeeded <= EndPointer) @@ -583,20 +591,36 @@ struct SContext uint32_t* out = _outIndices.data() + oldSize; const uint8_t* ptr = reinterpret_cast(StartPointer); - if (srcIndexFmt == EF_R32_UINT) + if (is32Bit) { for (size_t j = 0u; j < element.Count; ++j) { ++ptr; // list count - uint32_t i0 = 0u; - uint32_t i1 = 0u; - uint32_t i2 = 0u; - std::memcpy(&i0, ptr, sizeof(i0)); - ptr += sizeof(i0); - std::memcpy(&i1, ptr, sizeof(i1)); - ptr += sizeof(i1); - std::memcpy(&i2, ptr, sizeof(i2)); - ptr += sizeof(i2); + uint32_t i0 = 0u, i1 = 0u, i2 = 0u; + if (isSrcU32) + { + std::memcpy(&i0, ptr, sizeof(i0)); + ptr += sizeof(i0); + std::memcpy(&i1, ptr, sizeof(i1)); + ptr += sizeof(i1); + std::memcpy(&i2, ptr, sizeof(i2)); + ptr += sizeof(i2); + } + else + { + int32_t s0 = 0, s1 = 0, s2 = 0; + std::memcpy(&s0, ptr, sizeof(s0)); + ptr += sizeof(s0); + std::memcpy(&s1, ptr, sizeof(s1)); + ptr += sizeof(s1); + std::memcpy(&s2, ptr, sizeof(s2)); + ptr += sizeof(s2); + if (s0 < 0 || s1 < 0 || s2 < 0) + return false; + i0 = static_cast(s0); + i1 = static_cast(s1); + i2 = static_cast(s2); + } _maxIndex = std::max(_maxIndex, std::max(i0, std::max(i1, i2))); out[0] = i0; out[1] = i1; @@ -609,18 +633,35 @@ struct SContext for (size_t j = 0u; j < element.Count; ++j) { ++ptr; // list count - uint16_t t0 = 0u; - uint16_t t1 = 0u; - uint16_t t2 = 0u; - std::memcpy(&t0, ptr, sizeof(t0)); - ptr += sizeof(t0); - std::memcpy(&t1, ptr, sizeof(t1)); - ptr += sizeof(t1); - std::memcpy(&t2, ptr, sizeof(t2)); - ptr += sizeof(t2); - const uint32_t i0 = t0; - const uint32_t i1 = t1; - const uint32_t i2 = t2; + uint32_t i0 = 0u, i1 = 0u, i2 = 0u; + if (isSrcU16) + { + uint16_t t0 = 0u, t1 = 0u, t2 = 0u; + std::memcpy(&t0, ptr, sizeof(t0)); + ptr += sizeof(t0); + std::memcpy(&t1, ptr, sizeof(t1)); + ptr += sizeof(t1); + std::memcpy(&t2, ptr, sizeof(t2)); + ptr += sizeof(t2); + i0 = t0; + i1 = t1; + i2 = t2; + } + else + { + int16_t s0 = 0, s1 = 0, s2 = 0; + std::memcpy(&s0, ptr, sizeof(s0)); + ptr += sizeof(s0); + std::memcpy(&s1, ptr, sizeof(s1)); + ptr += sizeof(s1); + std::memcpy(&s2, ptr, sizeof(s2)); + ptr += sizeof(s2); + if (s0 < 0 || s1 < 0 || s2 < 0) + return false; + i0 = static_cast(s0); + i1 = static_cast(s1); + i2 = static_cast(s2); + } _maxIndex = std::max(_maxIndex, std::max(i0, std::max(i1, i2))); out[0] = i0; out[1] = i1; @@ -649,23 +690,45 @@ struct SContext outCount = static_cast(*StartPointer++); return true; }; - auto readIndex = [&ensureBytes, this, srcIndexFmt](uint32_t& out)->bool + auto readIndex = [&ensureBytes, this, srcIndexFmt, is32Bit, isSrcU32, isSrcU16](uint32_t& out)->bool { - if (srcIndexFmt == EF_R32_UINT) + if (is32Bit) { if (!ensureBytes(sizeof(uint32_t))) return false; - std::memcpy(&out, StartPointer, sizeof(uint32_t)); + if (isSrcU32) + { + std::memcpy(&out, StartPointer, sizeof(uint32_t)); + } + else + { + int32_t v = 0; + std::memcpy(&v, StartPointer, sizeof(v)); + if (v < 0) + return false; + out = static_cast(v); + } StartPointer += sizeof(uint32_t); return true; } if (!ensureBytes(sizeof(uint16_t))) return false; - uint16_t v = 0u; - std::memcpy(&v, StartPointer, sizeof(uint16_t)); + if (isSrcU16) + { + uint16_t v = 0u; + std::memcpy(&v, StartPointer, sizeof(uint16_t)); + out = v; + } + else + { + int16_t v = 0; + std::memcpy(&v, StartPointer, sizeof(int16_t)); + if (v < 0) + return false; + out = static_cast(v); + } StartPointer += sizeof(uint16_t); - out = v; return true; }; @@ -724,7 +787,7 @@ struct SContext IAssetLoader::IAssetLoaderOverride* loaderOverride; // input buffer must be at least twice as long as the longest line in the file core::vector Buffer; - size_t ioReadWindowSize = 50ull << 10; + size_t ioReadWindowSize = DefaultIoReadWindowBytes; core::vector ElementList = {}; char* StartPointer = nullptr, *EndPointer = nullptr, *LineEndPointer = nullptr; int32_t LineLength = 0; @@ -752,6 +815,7 @@ SAssetBundle CPLYMeshFileLoader::loadAsset(system::IFile* _file, const IAssetLoa double indexBuildMs = 0.0; double aabbMs = 0.0; uint64_t faceCount = 0u; + uint64_t fastFaceElementCount = 0u; uint32_t maxIndexRead = 0u; const uint64_t fileSize = _file->getSize(); const auto ioPlan = resolveFileIOPolicy(_params.ioPolicy, fileSize, true); @@ -769,8 +833,8 @@ SAssetBundle CPLYMeshFileLoader::loadAsset(system::IFile* _file, const IAssetLoa _hierarchyLevel, _override }; - const uint64_t desiredReadWindow = ioPlan.strategy == SResolvedFileIOPolicy::Strategy::WholeFile ? (fileSize + 1ull) : ioPlan.chunkSizeBytes; - const uint64_t safeReadWindow = std::min(desiredReadWindow, static_cast(std::numeric_limits::max() - 1ull)); + const uint64_t desiredReadWindow = ioPlan.strategy == SResolvedFileIOPolicy::Strategy::WholeFile ? (fileSize + SContext::ReadWindowPaddingBytes) : ioPlan.chunkSizeBytes; + const uint64_t safeReadWindow = std::min(desiredReadWindow, static_cast(std::numeric_limits::max() - SContext::ReadWindowPaddingBytes)); ctx.init(static_cast(safeReadWindow)); // start with empty mesh @@ -1162,12 +1226,19 @@ SAssetBundle CPLYMeshFileLoader::loadAsset(system::IFile* _file, const IAssetLoa else if (el.Name=="face") { const auto faceStart = clock_t::now(); - indices.reserve(indices.size() + el.Count * 3u); - for (size_t j=0; j(clock_t::now() - faceStart).count(); } @@ -1225,7 +1296,7 @@ SAssetBundle CPLYMeshFileLoader::loadAsset(system::IFile* _file, const IAssetLoa const auto totalMs = std::chrono::duration(clock_t::now() - totalStart).count(); _params.logger.log( - "PLY loader perf: file=%s total=%.3f ms header=%.3f vertex=%.3f face=%.3f skip=%.3f hash_range=%.3f index=%.3f aabb=%.3f binary=%d verts=%llu faces=%llu idx=%llu io_req=%s io_eff=%s io_chunk=%llu io_reason=%s", + "PLY loader perf: file=%s total=%.3f ms header=%.3f vertex=%.3f face=%.3f skip=%.3f hash_range=%.3f index=%.3f aabb=%.3f binary=%d verts=%llu faces=%llu idx=%llu face_fast=%llu io_req=%s io_eff=%s io_chunk=%llu io_reason=%s", system::ILogger::ELL_PERFORMANCE, _file->getFileName().string().c_str(), totalMs, @@ -1240,6 +1311,7 @@ SAssetBundle CPLYMeshFileLoader::loadAsset(system::IFile* _file, const IAssetLoa static_cast(vertCount), static_cast(faceCount), static_cast(indices.size()), + static_cast(fastFaceElementCount), toString(_params.ioPolicy.strategy), toString(ioPlan.strategy), static_cast(ioPlan.chunkSizeBytes), diff --git a/src/nbl/asset/interchange/CPLYMeshWriter.cpp b/src/nbl/asset/interchange/CPLYMeshWriter.cpp index 88a4bff69b..fb34c24748 100644 --- a/src/nbl/asset/interchange/CPLYMeshWriter.cpp +++ b/src/nbl/asset/interchange/CPLYMeshWriter.cpp @@ -42,13 +42,19 @@ uint32_t CPLYMeshWriter::getForcedFlags() return 0u; } -static inline bool decodeVec4(const ICPUPolygonGeometry::SDataView& view, const size_t ix, hlsl::float64_t4& out) +namespace ply_writer_detail +{ + +constexpr size_t ApproxPlyTextBytesPerVertex = 96ull; +constexpr size_t ApproxPlyTextBytesPerFace = 32ull; + +bool decodeVec4(const ICPUPolygonGeometry::SDataView& view, const size_t ix, hlsl::float64_t4& out) { out = hlsl::float64_t4(0.0, 0.0, 0.0, 0.0); return view.decodeElement(ix, out); } -static inline const hlsl::float32_t3* getTightFloat3View(const ICPUPolygonGeometry::SDataView& view) +const hlsl::float32_t3* getTightFloat3View(const ICPUPolygonGeometry::SDataView& view) { if (!view) return nullptr; @@ -59,7 +65,7 @@ static inline const hlsl::float32_t3* getTightFloat3View(const ICPUPolygonGeomet return reinterpret_cast(view.getPointer()); } -static inline const hlsl::float32_t2* getTightFloat2View(const ICPUPolygonGeometry::SDataView& view) +const hlsl::float32_t2* getTightFloat2View(const ICPUPolygonGeometry::SDataView& view) { if (!view) return nullptr; @@ -70,7 +76,7 @@ static inline const hlsl::float32_t2* getTightFloat2View(const ICPUPolygonGeomet return reinterpret_cast(view.getPointer()); } -static inline void appendUInt(std::string& out, const uint32_t value) +void appendUInt(std::string& out, const uint32_t value) { std::array buf = {}; const auto res = std::to_chars(buf.data(), buf.data() + buf.size(), value); @@ -78,7 +84,7 @@ static inline void appendUInt(std::string& out, const uint32_t value) out.append(buf.data(), static_cast(res.ptr - buf.data())); } -static inline void appendFloatFixed6(std::string& out, double value) +void appendFloatFixed6(std::string& out, double value) { std::array buf = {}; const auto res = std::to_chars(buf.data(), buf.data() + buf.size(), value, std::chars_format::fixed, 6); @@ -93,7 +99,7 @@ static inline void appendFloatFixed6(std::string& out, double value) out.append(buf.data(), static_cast(written)); } -static inline void appendVec(std::string& out, const double* values, size_t count, bool flipVectors = false) +void appendVec(std::string& out, const double* values, size_t count, bool flipVectors = false) { constexpr size_t xID = 0u; for (size_t i = 0u; i < count; ++i) @@ -104,12 +110,16 @@ static inline void appendVec(std::string& out, const double* values, size_t coun } } -static bool writeBufferWithPolicy(system::IFile* file, const SResolvedFileIOPolicy& ioPlan, const uint8_t* data, size_t byteCount); -static bool writeBinary(const ICPUPolygonGeometry* geom, const ICPUPolygonGeometry::SDataView* uvView, bool writeNormals, size_t vertexCount, const uint32_t* indices, size_t faceCount, uint8_t* dst, bool flipVectors); -static bool writeText(const ICPUPolygonGeometry* geom, const ICPUPolygonGeometry::SDataView* uvView, bool writeNormals, size_t vertexCount, const uint32_t* indices, size_t faceCount, std::string& output, bool flipVectors); +bool writeBufferWithPolicy(system::IFile* file, const SResolvedFileIOPolicy& ioPlan, const uint8_t* data, size_t byteCount); +bool writeBinary(const ICPUPolygonGeometry* geom, const ICPUPolygonGeometry::SDataView* uvView, bool writeNormals, size_t vertexCount, const uint32_t* indices, size_t faceCount, uint8_t* dst, bool flipVectors); +bool writeText(const ICPUPolygonGeometry* geom, const ICPUPolygonGeometry::SDataView* uvView, bool writeNormals, size_t vertexCount, const uint32_t* indices, size_t faceCount, std::string& output, bool flipVectors); + +} // namespace ply_writer_detail bool CPLYMeshWriter::writeAsset(system::IFile* _file, const SAssetWriteParams& _params, IAssetWriterOverride* _override) { + using namespace ply_writer_detail; + if (!_override) getDefaultOverride(_override); @@ -269,7 +279,7 @@ bool CPLYMeshWriter::writeAsset(system::IFile* _file, const SAssetWriteParams& _ } std::string body; - body.reserve(vertexCount * 96ull + faceCount * 32ull); + body.reserve(vertexCount * ApproxPlyTextBytesPerVertex + faceCount * ApproxPlyTextBytesPerFace); if (!writeText(geom, uvView, writeNormals, vertexCount, indices, faceCount, body, flipVectors)) return false; @@ -284,7 +294,7 @@ bool CPLYMeshWriter::writeAsset(system::IFile* _file, const SAssetWriteParams& _ return writeBufferWithPolicy(file, ioPlan, reinterpret_cast(output.data()), output.size()); } -static bool writeBufferWithPolicy(system::IFile* file, const SResolvedFileIOPolicy& ioPlan, const uint8_t* data, size_t byteCount) +bool ply_writer_detail::writeBufferWithPolicy(system::IFile* file, const SResolvedFileIOPolicy& ioPlan, const uint8_t* data, size_t byteCount) { if (!file || (!data && byteCount != 0ull)) return false; @@ -318,7 +328,7 @@ static bool writeBufferWithPolicy(system::IFile* file, const SResolvedFileIOPoli } } -static bool writeBinary(const ICPUPolygonGeometry* geom, const ICPUPolygonGeometry::SDataView* uvView, bool writeNormals, size_t vertexCount, const uint32_t* indices, size_t faceCount, uint8_t* dst, bool flipVectors) +bool ply_writer_detail::writeBinary(const ICPUPolygonGeometry* geom, const ICPUPolygonGeometry::SDataView* uvView, bool writeNormals, size_t vertexCount, const uint32_t* indices, size_t faceCount, uint8_t* dst, bool flipVectors) { if (!dst) return false; @@ -411,7 +421,7 @@ static bool writeBinary(const ICPUPolygonGeometry* geom, const ICPUPolygonGeomet return true; } -static bool writeText(const ICPUPolygonGeometry* geom, const ICPUPolygonGeometry::SDataView* uvView, bool writeNormals, size_t vertexCount, const uint32_t* indices, size_t faceCount, std::string& output, bool flipVectors) +bool ply_writer_detail::writeText(const ICPUPolygonGeometry* geom, const ICPUPolygonGeometry::SDataView* uvView, bool writeNormals, size_t vertexCount, const uint32_t* indices, size_t faceCount, std::string& output, bool flipVectors) { const auto& positionView = geom->getPositionView(); const auto& normalView = geom->getNormalView(); diff --git a/src/nbl/asset/interchange/CSTLMeshFileLoader.cpp b/src/nbl/asset/interchange/CSTLMeshFileLoader.cpp index 4601aca836..7a98d13a1d 100644 --- a/src/nbl/asset/interchange/CSTLMeshFileLoader.cpp +++ b/src/nbl/asset/interchange/CSTLMeshFileLoader.cpp @@ -28,7 +28,20 @@ struct SSTLContext size_t fileOffset = 0ull; }; -static bool stlReadExact(system::IFile* file, void* dst, const size_t offset, const size_t bytes) +constexpr size_t StlTextProbeBytes = 6ull; +constexpr size_t StlBinaryHeaderBytes = 80ull; +constexpr size_t StlTriangleCountBytes = sizeof(uint32_t); +constexpr size_t StlBinaryPrefixBytes = StlBinaryHeaderBytes + StlTriangleCountBytes; +constexpr size_t StlTriangleFloatCount = 12ull; +constexpr size_t StlTriangleFloatBytes = sizeof(float) * StlTriangleFloatCount; +constexpr size_t StlTriangleAttributeBytes = sizeof(uint16_t); +constexpr size_t StlTriangleRecordBytes = StlTriangleFloatBytes + StlTriangleAttributeBytes; +constexpr size_t StlVerticesPerTriangle = 3ull; +constexpr size_t StlFloatChannelsPerVertex = 3ull; +constexpr size_t StlFloatsPerTriangleVertices = StlVerticesPerTriangle * StlFloatChannelsPerVertex; +constexpr size_t StlFloatsPerTriangleOutput = StlFloatsPerTriangleVertices; + +bool stlReadExact(system::IFile* file, void* dst, const size_t offset, const size_t bytes) { if (!file || (!dst && bytes != 0ull)) return false; @@ -40,7 +53,7 @@ static bool stlReadExact(system::IFile* file, void* dst, const size_t offset, co return success && success.getBytesProcessed() == bytes; } -static bool stlReadWithPolicy(system::IFile* file, uint8_t* dst, const size_t offset, const size_t bytes, const SResolvedFileIOPolicy& ioPlan) +bool stlReadWithPolicy(system::IFile* file, uint8_t* dst, const size_t offset, const size_t bytes, const SResolvedFileIOPolicy& ioPlan) { if (!file || (!dst && bytes != 0ull)) return false; @@ -70,7 +83,7 @@ static bool stlReadWithPolicy(system::IFile* file, uint8_t* dst, const size_t of } } -static bool stlReadU8(SSTLContext* context, uint8_t& out) +bool stlReadU8(SSTLContext* context, uint8_t& out) { if (!context) return false; @@ -83,7 +96,7 @@ static bool stlReadU8(SSTLContext* context, uint8_t& out) return true; } -static bool stlReadF32(SSTLContext* context, float& out) +bool stlReadF32(SSTLContext* context, float& out) { if (!context) return false; @@ -96,7 +109,7 @@ static bool stlReadF32(SSTLContext* context, float& out) return true; } -static void stlGoNextWord(SSTLContext* context) +void stlGoNextWord(SSTLContext* context) { if (!context) return; @@ -115,7 +128,7 @@ static void stlGoNextWord(SSTLContext* context) } } -static const std::string& stlGetNextToken(SSTLContext* context, std::string& token) +const std::string& stlGetNextToken(SSTLContext* context, std::string& token) { stlGoNextWord(context); token.clear(); @@ -136,7 +149,7 @@ static const std::string& stlGetNextToken(SSTLContext* context, std::string& tok return token; } -static void stlGoNextLine(SSTLContext* context) +void stlGoNextLine(SSTLContext* context) { if (!context) return; @@ -151,19 +164,15 @@ static void stlGoNextLine(SSTLContext* context) } } -static bool stlGetNextVector(SSTLContext* context, core::vectorSIMDf& vec, const bool binary) +bool stlGetNextVector(SSTLContext* context, hlsl::float32_t3& vec, const bool binary) { if (!context) return false; if (binary) { - float x = 0.f; - float y = 0.f; - float z = 0.f; - if (!stlReadF32(context, x) || !stlReadF32(context, y) || !stlReadF32(context, z)) + if (!stlReadF32(context, vec.x) || !stlReadF32(context, vec.y) || !stlReadF32(context, vec.z)) return false; - vec.set(x, y, z, 0.f); return true; } @@ -171,24 +180,72 @@ static bool stlGetNextVector(SSTLContext* context, core::vectorSIMDf& vec, const std::string tmp; if (stlGetNextToken(context, tmp).empty()) return false; - std::sscanf(tmp.c_str(), "%f", &vec.X); + std::sscanf(tmp.c_str(), "%f", &vec.x); if (stlGetNextToken(context, tmp).empty()) return false; - std::sscanf(tmp.c_str(), "%f", &vec.Y); + std::sscanf(tmp.c_str(), "%f", &vec.y); if (stlGetNextToken(context, tmp).empty()) return false; - std::sscanf(tmp.c_str(), "%f", &vec.Z); - vec.W = 0.f; + std::sscanf(tmp.c_str(), "%f", &vec.z); return true; } -static bool stlReadFloatFromPayload(const uint8_t*& cursor, const uint8_t* const end, float& out) +hlsl::float32_t3 stlNormalizeOrZero(const hlsl::float32_t3& v) { - if (cursor + sizeof(float) > end) - return false; - std::memcpy(&out, cursor, sizeof(float)); - cursor += sizeof(float); - return true; + const float len2 = hlsl::dot(v, v); + if (len2 <= 0.f) + return hlsl::float32_t3(0.f, 0.f, 0.f); + return hlsl::normalize(v); +} + +hlsl::float32_t3 stlComputeFaceNormal(const hlsl::float32_t3& a, const hlsl::float32_t3& b, const hlsl::float32_t3& c) +{ + return stlNormalizeOrZero(hlsl::cross(b - a, c - a)); +} + +hlsl::float32_t3 stlResolveStoredNormal(const hlsl::float32_t3& fileNormal) +{ + const float fileLen2 = hlsl::dot(fileNormal, fileNormal); + if (fileLen2 > 0.f && std::abs(fileLen2 - 1.f) < 1e-4f) + return fileNormal; + return stlNormalizeOrZero(fileNormal); +} + +void stlPushTriangleReversed(const hlsl::float32_t3 (&p)[3], core::vector& positions) +{ + positions.push_back(p[2u]); + positions.push_back(p[1u]); + positions.push_back(p[0u]); +} + +void stlFixLastFaceNormal(core::vector& normals, const core::vector& positions) +{ + if (normals.empty() || positions.size() < 3ull) + return; + + const auto& lastNormal = normals.back(); + if (hlsl::dot(lastNormal, lastNormal) > 0.f) + return; + + normals.back() = stlComputeFaceNormal(*(positions.rbegin() + 2), *(positions.rbegin() + 1), *(positions.rbegin() + 0)); +} + +void stlExtendAABB(hlsl::shapes::AABB<3, hlsl::float32_t>& aabb, bool& hasAABB, const hlsl::float32_t3& p) +{ + if (!hasAABB) + { + aabb.minVx = p; + aabb.maxVx = p; + hasAABB = true; + return; + } + + if (p.x < aabb.minVx.x) aabb.minVx.x = p.x; + if (p.y < aabb.minVx.y) aabb.minVx.y = p.y; + if (p.z < aabb.minVx.z) aabb.minVx.z = p.z; + if (p.x > aabb.maxVx.x) aabb.maxVx.x = p.x; + if (p.y > aabb.maxVx.y) aabb.maxVx.y = p.y; + if (p.z > aabb.maxVx.z) aabb.maxVx.z = p.z; } CSTLMeshFileLoader::CSTLMeshFileLoader(asset::IAssetManager* _assetManager) @@ -229,7 +286,7 @@ SAssetBundle CSTLMeshFileLoader::loadAsset(system::IFile* _file, const IAssetLoa }; const size_t filesize = context.inner.mainFile->getSize(); - if (filesize < 6ull) + if (filesize < StlTextProbeBytes) return {}; const auto ioPlan = resolveFileIOPolicy(_params.ioPolicy, static_cast(filesize), true); @@ -243,18 +300,18 @@ SAssetBundle CSTLMeshFileLoader::loadAsset(system::IFile* _file, const IAssetLoa std::string token; { const auto detectStart = clock_t::now(); - char header[6] = {}; + char header[StlTextProbeBytes] = {}; if (!stlReadExact(context.inner.mainFile, header, 0ull, sizeof(header))) return {}; - const bool startsWithSolid = (std::strncmp(header, "solid ", 6u) == 0); + const bool startsWithSolid = (std::strncmp(header, "solid ", StlTextProbeBytes) == 0); bool binaryBySize = false; - if (filesize >= 84ull) + if (filesize >= StlBinaryPrefixBytes) { uint32_t triCount = 0u; - if (stlReadExact(context.inner.mainFile, &triCount, 80ull, sizeof(triCount))) + if (stlReadExact(context.inner.mainFile, &triCount, StlBinaryHeaderBytes, sizeof(triCount))) { - const uint64_t expectedSize = 84ull + static_cast(triCount) * 50ull; + const uint64_t expectedSize = StlBinaryPrefixBytes + static_cast(triCount) * StlTriangleRecordBytes; binaryBySize = (expectedSize == filesize); } } @@ -271,21 +328,24 @@ SAssetBundle CSTLMeshFileLoader::loadAsset(system::IFile* _file, const IAssetLoa detectMs = std::chrono::duration(clock_t::now() - detectStart).count(); } - core::vector positions; - core::vector normals; + auto geometry = core::make_smart_refctd_ptr(); + geometry->setIndexing(IPolygonGeometryBase::TriangleList()); + hlsl::shapes::AABB<3, hlsl::float32_t> parsedAABB = hlsl::shapes::AABB<3, hlsl::float32_t>::create(); + bool hasParsedAABB = false; + uint64_t vertexCount = 0ull; if (binary) { - if (filesize < 84ull) + if (filesize < StlBinaryPrefixBytes) return {}; uint32_t triangleCount32 = 0u; - if (!stlReadExact(context.inner.mainFile, &triangleCount32, 80ull, sizeof(triangleCount32))) + if (!stlReadExact(context.inner.mainFile, &triangleCount32, StlBinaryHeaderBytes, sizeof(triangleCount32))) return {}; triangleCount = triangleCount32; - const size_t dataSize = static_cast(triangleCount) * 50ull; - const size_t expectedSize = 84ull + dataSize; + const size_t dataSize = static_cast(triangleCount) * StlTriangleRecordBytes; + const size_t expectedSize = StlBinaryPrefixBytes + dataSize; if (filesize < expectedSize) return {}; @@ -293,63 +353,150 @@ SAssetBundle CSTLMeshFileLoader::loadAsset(system::IFile* _file, const IAssetLoa payload.resize(dataSize); const auto ioStart = clock_t::now(); - if (!stlReadWithPolicy(context.inner.mainFile, payload.data(), 84ull, dataSize, ioPlan)) + if (!stlReadWithPolicy(context.inner.mainFile, payload.data(), StlBinaryPrefixBytes, dataSize, ioPlan)) return {}; ioMs = std::chrono::duration(clock_t::now() - ioStart).count(); - positions.reserve(static_cast(triangleCount) * 3ull); - normals.reserve(static_cast(triangleCount)); + vertexCount = triangleCount * StlVerticesPerTriangle; + const auto buildPrepStart = clock_t::now(); + auto posView = createView(EF_R32G32B32_SFLOAT, static_cast(vertexCount)); + auto normalView = createView(EF_R32G32B32_SFLOAT, static_cast(vertexCount)); + if (!posView || !normalView) + return {}; + + auto* posOut = reinterpret_cast(posView.getPointer()); + auto* normalOut = reinterpret_cast(normalView.getPointer()); + if (!posOut || !normalOut) + return {}; + buildMs += std::chrono::duration(clock_t::now() - buildPrepStart).count(); const auto parseStart = clock_t::now(); const uint8_t* cursor = payload.data(); const uint8_t* const end = cursor + payload.size(); + auto* posOutFloat = reinterpret_cast(posOut); + auto* normalOutFloat = reinterpret_cast(normalOut); for (uint64_t tri = 0ull; tri < triangleCount; ++tri) { - float nx = 0.f; - float ny = 0.f; - float nz = 0.f; - if (!stlReadFloatFromPayload(cursor, end, nx) || !stlReadFloatFromPayload(cursor, end, ny) || !stlReadFloatFromPayload(cursor, end, nz)) + if (cursor + StlTriangleRecordBytes > end) return {}; - core::vectorSIMDf fileNormal; - fileNormal.set(nx, ny, nz, 0.f); - const float fileLen2 = core::dot(fileNormal, fileNormal).X; - if (fileLen2 > 0.f && std::abs(fileLen2 - 1.f) < 1e-4f) - normals.push_back(fileNormal); - else - normals.push_back(core::normalize(fileNormal)); - - core::vectorSIMDf p[3] = {}; - for (uint32_t i = 0u; i < 3u; ++i) + float triData[StlTriangleFloatCount] = {}; + std::memcpy(triData, cursor, StlTriangleFloatBytes); + cursor += StlTriangleFloatBytes; + cursor += StlTriangleAttributeBytes; + + const float vertex0x = triData[9]; + const float vertex0y = triData[10]; + const float vertex0z = triData[11]; + const float vertex1x = triData[6]; + const float vertex1y = triData[7]; + const float vertex1z = triData[8]; + const float vertex2x = triData[3]; + const float vertex2y = triData[4]; + const float vertex2z = triData[5]; + + float normalX = triData[0]; + float normalY = triData[1]; + float normalZ = triData[2]; + const float normalLen2 = normalX * normalX + normalY * normalY + normalZ * normalZ; + if (normalLen2 <= 0.f) { - float x = 0.f; - float y = 0.f; - float z = 0.f; - if (!stlReadFloatFromPayload(cursor, end, x) || !stlReadFloatFromPayload(cursor, end, y) || !stlReadFloatFromPayload(cursor, end, z)) - return {}; - p[i].set(x, y, z, 0.f); + const float edge10x = vertex1x - vertex0x; + const float edge10y = vertex1y - vertex0y; + const float edge10z = vertex1z - vertex0z; + const float edge20x = vertex2x - vertex0x; + const float edge20y = vertex2y - vertex0y; + const float edge20z = vertex2z - vertex0z; + + normalX = edge10y * edge20z - edge10z * edge20y; + normalY = edge10z * edge20x - edge10x * edge20z; + normalZ = edge10x * edge20y - edge10y * edge20x; + const float planeLen2 = normalX * normalX + normalY * normalY + normalZ * normalZ; + if (planeLen2 > 0.f) + { + const float invLen = 1.f / std::sqrt(planeLen2); + normalX *= invLen; + normalY *= invLen; + normalZ *= invLen; + } + else + { + normalX = 0.f; + normalY = 0.f; + normalZ = 0.f; + } + } + else if (std::abs(normalLen2 - 1.f) >= 1e-4f) + { + const float invLen = 1.f / std::sqrt(normalLen2); + normalX *= invLen; + normalY *= invLen; + normalZ *= invLen; } - positions.push_back(p[2u]); - positions.push_back(p[1u]); - positions.push_back(p[0u]); - - if ((normals.back() == core::vectorSIMDf()).all()) + const size_t base = static_cast(tri) * StlFloatsPerTriangleOutput; + posOutFloat[base + 0ull] = vertex0x; + posOutFloat[base + 1ull] = vertex0y; + posOutFloat[base + 2ull] = vertex0z; + posOutFloat[base + 3ull] = vertex1x; + posOutFloat[base + 4ull] = vertex1y; + posOutFloat[base + 5ull] = vertex1z; + posOutFloat[base + 6ull] = vertex2x; + posOutFloat[base + 7ull] = vertex2y; + posOutFloat[base + 8ull] = vertex2z; + + normalOutFloat[base + 0ull] = normalX; + normalOutFloat[base + 1ull] = normalY; + normalOutFloat[base + 2ull] = normalZ; + normalOutFloat[base + 3ull] = normalX; + normalOutFloat[base + 4ull] = normalY; + normalOutFloat[base + 5ull] = normalZ; + normalOutFloat[base + 6ull] = normalX; + normalOutFloat[base + 7ull] = normalY; + normalOutFloat[base + 8ull] = normalZ; + + if (!hasParsedAABB) { - normals.back().set(core::plane3dSIMDf( - *(positions.rbegin() + 2), - *(positions.rbegin() + 1), - *(positions.rbegin() + 0)).getNormal()); + hasParsedAABB = true; + parsedAABB.minVx.x = vertex0x; + parsedAABB.minVx.y = vertex0y; + parsedAABB.minVx.z = vertex0z; + parsedAABB.maxVx.x = vertex0x; + parsedAABB.maxVx.y = vertex0y; + parsedAABB.maxVx.z = vertex0z; } - if (cursor + sizeof(uint16_t) > end) - return {}; - cursor += sizeof(uint16_t); + if (vertex0x < parsedAABB.minVx.x) parsedAABB.minVx.x = vertex0x; + if (vertex0y < parsedAABB.minVx.y) parsedAABB.minVx.y = vertex0y; + if (vertex0z < parsedAABB.minVx.z) parsedAABB.minVx.z = vertex0z; + if (vertex1x < parsedAABB.minVx.x) parsedAABB.minVx.x = vertex1x; + if (vertex1y < parsedAABB.minVx.y) parsedAABB.minVx.y = vertex1y; + if (vertex1z < parsedAABB.minVx.z) parsedAABB.minVx.z = vertex1z; + if (vertex2x < parsedAABB.minVx.x) parsedAABB.minVx.x = vertex2x; + if (vertex2y < parsedAABB.minVx.y) parsedAABB.minVx.y = vertex2y; + if (vertex2z < parsedAABB.minVx.z) parsedAABB.minVx.z = vertex2z; + + if (vertex0x > parsedAABB.maxVx.x) parsedAABB.maxVx.x = vertex0x; + if (vertex0y > parsedAABB.maxVx.y) parsedAABB.maxVx.y = vertex0y; + if (vertex0z > parsedAABB.maxVx.z) parsedAABB.maxVx.z = vertex0z; + if (vertex1x > parsedAABB.maxVx.x) parsedAABB.maxVx.x = vertex1x; + if (vertex1y > parsedAABB.maxVx.y) parsedAABB.maxVx.y = vertex1y; + if (vertex1z > parsedAABB.maxVx.z) parsedAABB.maxVx.z = vertex1z; + if (vertex2x > parsedAABB.maxVx.x) parsedAABB.maxVx.x = vertex2x; + if (vertex2y > parsedAABB.maxVx.y) parsedAABB.maxVx.y = vertex2y; + if (vertex2z > parsedAABB.maxVx.z) parsedAABB.maxVx.z = vertex2z; } parseMs = std::chrono::duration(clock_t::now() - parseStart).count(); + + const auto buildFinalizeStart = clock_t::now(); + geometry->setPositionView(std::move(posView)); + geometry->setNormalView(std::move(normalView)); + buildMs += std::chrono::duration(clock_t::now() - buildFinalizeStart).count(); } else { + core::vector positions; + core::vector normals; stlGoNextLine(&context); token.reserve(32); @@ -365,20 +512,16 @@ SAssetBundle CSTLMeshFileLoader::loadAsset(system::IFile* _file, const IAssetLoa if (stlGetNextToken(&context, token) != "normal") return {}; - core::vectorSIMDf fileNormal; + hlsl::float32_t3 fileNormal = {}; if (!stlGetNextVector(&context, fileNormal, false)) return {}; - const float fileLen2 = core::dot(fileNormal, fileNormal).X; - if (fileLen2 > 0.f && std::abs(fileLen2 - 1.f) < 1e-4f) - normals.push_back(fileNormal); - else - normals.push_back(core::normalize(fileNormal)); + normals.push_back(stlResolveStoredNormal(fileNormal)); if (stlGetNextToken(&context, token) != "outer" || stlGetNextToken(&context, token) != "loop") return {}; - core::vectorSIMDf p[3] = {}; + hlsl::float32_t3 p[3] = {}; for (uint32_t i = 0u; i < 3u; ++i) { if (stlGetNextToken(&context, token) != "vertex") @@ -387,75 +530,47 @@ SAssetBundle CSTLMeshFileLoader::loadAsset(system::IFile* _file, const IAssetLoa return {}; } - positions.push_back(p[2u]); - positions.push_back(p[1u]); - positions.push_back(p[0u]); + stlPushTriangleReversed(p, positions); if (stlGetNextToken(&context, token) != "endloop" || stlGetNextToken(&context, token) != "endfacet") return {}; - if ((normals.back() == core::vectorSIMDf()).all()) - { - normals.back().set(core::plane3dSIMDf( - *(positions.rbegin() + 2), - *(positions.rbegin() + 1), - *(positions.rbegin() + 0)).getNormal()); - } + stlFixLastFaceNormal(normals, positions); } parseMs = std::chrono::duration(clock_t::now() - parseStart).count(); - } - - if (positions.empty()) - return {}; - - triangleCount = positions.size() / 3ull; - const uint64_t vertexCount = positions.size(); + if (positions.empty()) + return {}; - const auto buildStart = clock_t::now(); - auto geometry = core::make_smart_refctd_ptr(); - geometry->setIndexing(IPolygonGeometryBase::TriangleList()); + triangleCount = positions.size() / StlVerticesPerTriangle; + vertexCount = positions.size(); - auto posView = createView(EF_R32G32B32_SFLOAT, positions.size()); - auto normalView = createView(EF_R32G32B32_SFLOAT, positions.size()); - if (!posView || !normalView) - return {}; + const auto buildStart = clock_t::now(); + auto posView = createView(EF_R32G32B32_SFLOAT, positions.size()); + auto normalView = createView(EF_R32G32B32_SFLOAT, positions.size()); + if (!posView || !normalView) + return {}; - auto* posOut = reinterpret_cast(posView.getPointer()); - auto* normalOut = reinterpret_cast(normalView.getPointer()); - if (!posOut || !normalOut) - return {}; + auto* posOut = reinterpret_cast(posView.getPointer()); + auto* normalOut = reinterpret_cast(normalView.getPointer()); + if (!posOut || !normalOut) + return {}; - hlsl::shapes::AABB<3, hlsl::float32_t> parsedAABB = hlsl::shapes::AABB<3, hlsl::float32_t>::create(); - bool hasParsedAABB = false; - auto addAABBPoint = [&parsedAABB, &hasParsedAABB](const hlsl::float32_t3& p)->void - { - if (!hasParsedAABB) + for (size_t i = 0u; i < positions.size(); ++i) { - parsedAABB.minVx = p; - parsedAABB.maxVx = p; - hasParsedAABB = true; - return; + const auto& pos = positions[i]; + const auto& nrm = normals[i / 3u]; + posOut[i] = { pos.x, pos.y, pos.z }; + normalOut[i] = { nrm.x, nrm.y, nrm.z }; + stlExtendAABB(parsedAABB, hasParsedAABB, posOut[i]); } - if (p.x < parsedAABB.minVx.x) parsedAABB.minVx.x = p.x; - if (p.y < parsedAABB.minVx.y) parsedAABB.minVx.y = p.y; - if (p.z < parsedAABB.minVx.z) parsedAABB.minVx.z = p.z; - if (p.x > parsedAABB.maxVx.x) parsedAABB.maxVx.x = p.x; - if (p.y > parsedAABB.maxVx.y) parsedAABB.maxVx.y = p.y; - if (p.z > parsedAABB.maxVx.z) parsedAABB.maxVx.z = p.z; - }; - for (size_t i = 0u; i < positions.size(); ++i) - { - const auto& pos = positions[i]; - const auto& nrm = normals[i / 3u]; - posOut[i] = { pos.X, pos.Y, pos.Z }; - normalOut[i] = { nrm.X, nrm.Y, nrm.Z }; - addAABBPoint(posOut[i]); + geometry->setPositionView(std::move(posView)); + geometry->setNormalView(std::move(normalView)); + buildMs = std::chrono::duration(clock_t::now() - buildStart).count(); } - geometry->setPositionView(std::move(posView)); - geometry->setNormalView(std::move(normalView)); - buildMs = std::chrono::duration(clock_t::now() - buildStart).count(); + if (vertexCount == 0ull) + return {}; const auto hashStart = clock_t::now(); CPolygonGeometryManipulator::recomputeContentHashes(geometry.get()); @@ -510,25 +625,24 @@ SAssetBundle CSTLMeshFileLoader::loadAsset(system::IFile* _file, const IAssetLoa bool CSTLMeshFileLoader::isALoadableFileFormat(system::IFile* _file, const system::logger_opt_ptr logger) const { (void)logger; - if (!_file || _file->getSize() <= 6u) + if (!_file || _file->getSize() <= StlTextProbeBytes) return false; - char header[6] = {}; + char header[StlTextProbeBytes] = {}; if (!stlReadExact(_file, header, 0ull, sizeof(header))) return false; - if (std::strncmp(header, "solid ", 6u) == 0) + if (std::strncmp(header, "solid ", StlTextProbeBytes) == 0) return true; - if (_file->getSize() < 84u) + if (_file->getSize() < StlBinaryPrefixBytes) return false; uint32_t triangleCount = 0u; - if (!stlReadExact(_file, &triangleCount, 80ull, sizeof(triangleCount))) + if (!stlReadExact(_file, &triangleCount, StlBinaryHeaderBytes, sizeof(triangleCount))) return false; - constexpr size_t STL_TRI_SZ = sizeof(float) * 12ull + sizeof(uint16_t); - return _file->getSize() == (STL_TRI_SZ * triangleCount + 84u); + return _file->getSize() == (StlTriangleRecordBytes * triangleCount + StlBinaryPrefixBytes); } } diff --git a/src/nbl/asset/interchange/CSTLMeshWriter.cpp b/src/nbl/asset/interchange/CSTLMeshWriter.cpp index 90c449e584..4433e7e235 100644 --- a/src/nbl/asset/interchange/CSTLMeshWriter.cpp +++ b/src/nbl/asset/interchange/CSTLMeshWriter.cpp @@ -10,13 +10,16 @@ #include #include #include +#include +#include +#include #ifdef _NBL_COMPILE_WITH_STL_WRITER_ namespace nbl::asset { -namespace +namespace stl_writer_detail { struct SContext @@ -27,16 +30,33 @@ struct SContext size_t fileOffset = 0ull; }; +constexpr size_t BinaryHeaderBytes = 80ull; +constexpr size_t BinaryTriangleCountBytes = sizeof(uint32_t); +constexpr size_t BinaryTriangleFloatCount = 12ull; +constexpr size_t BinaryTriangleFloatBytes = sizeof(float) * BinaryTriangleFloatCount; +constexpr size_t BinaryTriangleAttributeBytes = sizeof(uint16_t); +constexpr size_t BinaryTriangleRecordBytes = BinaryTriangleFloatBytes + BinaryTriangleAttributeBytes; +constexpr size_t BinaryPrefixBytes = BinaryHeaderBytes + BinaryTriangleCountBytes; +constexpr size_t IoFallbackReserveBytes = 1ull << 20; +constexpr char AsciiSolidPrefix[] = "solid "; +constexpr char AsciiEndSolidPrefix[] = "endsolid "; +constexpr char AsciiDefaultName[] = "nabla_mesh"; + } -static bool flushBytes(SContext* context); -static bool writeBytes(SContext* context, const void* data, size_t size); -static bool decodeTriangle(const ICPUPolygonGeometry* geom, const IPolygonGeometryBase::IIndexingCallback* indexing, const ICPUPolygonGeometry::SDataView& posView, uint32_t primIx, core::vectorSIMDf& out0, core::vectorSIMDf& out1, core::vectorSIMDf& out2, uint32_t* outIdx); -static bool decodeTriangleNormal(const ICPUPolygonGeometry::SDataView& normalView, const uint32_t* idx, core::vectorSIMDf& outNormal); -static bool writeMeshBinary(const asset::ICPUPolygonGeometry* geom, SContext* context); -static bool writeMeshASCII(const asset::ICPUPolygonGeometry* geom, SContext* context); -static void getVectorAsStringLine(const core::vectorSIMDf& v, std::string& s); -static bool writeFaceText( +using SContext = stl_writer_detail::SContext; + +bool flushBytes(SContext* context); +bool writeBytes(SContext* context, const void* data, size_t size); +bool writeBufferWithPolicy(system::IFile* file, const SResolvedFileIOPolicy& ioPlan, const uint8_t* data, size_t byteCount); +const hlsl::float32_t3* getTightFloat3View(const ICPUPolygonGeometry::SDataView& view); +bool decodeTriangleIndices(const ICPUPolygonGeometry* geom, const ICPUPolygonGeometry::SDataView& posView, core::vector& indexData, const uint32_t*& outIndices, uint32_t& outFaceCount); +bool decodeTriangle(const ICPUPolygonGeometry* geom, const IPolygonGeometryBase::IIndexingCallback* indexing, const ICPUPolygonGeometry::SDataView& posView, uint32_t primIx, core::vectorSIMDf& out0, core::vectorSIMDf& out1, core::vectorSIMDf& out2, uint32_t* outIdx); +bool decodeTriangleNormal(const ICPUPolygonGeometry::SDataView& normalView, const uint32_t* idx, core::vectorSIMDf& outNormal); +bool writeMeshBinary(const asset::ICPUPolygonGeometry* geom, SContext* context); +bool writeMeshASCII(const asset::ICPUPolygonGeometry* geom, SContext* context); +void getVectorAsStringLine(const core::vectorSIMDf& v, std::string& s); +bool writeFaceText( const core::vectorSIMDf& v1, const core::vectorSIMDf& v2, const core::vectorSIMDf& v3, @@ -103,7 +123,7 @@ bool CSTLMeshWriter::writeAsset(system::IFile* _file, const SAssetWriteParams& _ bool sizeKnown = false; if (binary) { - expectedSize = 84ull + static_cast(geom->getPrimitiveCount()) * 50ull; + expectedSize = stl_writer_detail::BinaryPrefixBytes + static_cast(geom->getPrimitiveCount()) * stl_writer_detail::BinaryTriangleRecordBytes; sizeKnown = true; } @@ -117,7 +137,7 @@ bool CSTLMeshWriter::writeAsset(system::IFile* _file, const SAssetWriteParams& _ if (context.ioPlan.strategy == SResolvedFileIOPolicy::Strategy::WholeFile && sizeKnown) context.ioBuffer.reserve(static_cast(expectedSize)); else - context.ioBuffer.reserve(static_cast(std::min(context.ioPlan.chunkSizeBytes, 1ull << 20))); + context.ioBuffer.reserve(static_cast(std::min(context.ioPlan.chunkSizeBytes, stl_writer_detail::IoFallbackReserveBytes))); const bool written = binary ? writeMeshBinary(geom, &context) : writeMeshASCII(geom, &context); if (!written) @@ -126,7 +146,7 @@ bool CSTLMeshWriter::writeAsset(system::IFile* _file, const SAssetWriteParams& _ return flushBytes(&context); } -static bool flushBytes(SContext* context) +bool flushBytes(SContext* context) { if (!context) return false; @@ -155,7 +175,7 @@ static bool flushBytes(SContext* context) return true; } -static bool writeBytes(SContext* context, const void* data, size_t size) +bool writeBytes(SContext* context, const void* data, size_t size) { if (!context || (!data && size != 0ull)) return false; @@ -198,7 +218,102 @@ static bool writeBytes(SContext* context, const void* data, size_t size) } } -static bool decodeTriangle(const ICPUPolygonGeometry* geom, const IPolygonGeometryBase::IIndexingCallback* indexing, const ICPUPolygonGeometry::SDataView& posView, uint32_t primIx, core::vectorSIMDf& out0, core::vectorSIMDf& out1, core::vectorSIMDf& out2, uint32_t* outIdx) +bool writeBufferWithPolicy(system::IFile* file, const SResolvedFileIOPolicy& ioPlan, const uint8_t* data, size_t byteCount) +{ + if (!file || (!data && byteCount != 0ull)) + return false; + + size_t fileOffset = 0ull; + switch (ioPlan.strategy) + { + case SResolvedFileIOPolicy::Strategy::WholeFile: + { + system::IFile::success_t success; + file->write(success, data, fileOffset, byteCount); + return success && success.getBytesProcessed() == byteCount; + } + case SResolvedFileIOPolicy::Strategy::Chunked: + default: + { + while (fileOffset < byteCount) + { + const size_t toWrite = static_cast(std::min(ioPlan.chunkSizeBytes, byteCount - fileOffset)); + system::IFile::success_t success; + file->write(success, data + fileOffset, fileOffset, toWrite); + if (!success) + return false; + const size_t written = success.getBytesProcessed(); + if (written == 0ull) + return false; + fileOffset += written; + } + return true; + } + } +} + +const hlsl::float32_t3* getTightFloat3View(const ICPUPolygonGeometry::SDataView& view) +{ + if (!view) + return nullptr; + if (view.composed.format != EF_R32G32B32_SFLOAT) + return nullptr; + if (view.composed.getStride() != sizeof(hlsl::float32_t3)) + return nullptr; + return reinterpret_cast(view.getPointer()); +} + +bool decodeTriangleIndices(const ICPUPolygonGeometry* geom, const ICPUPolygonGeometry::SDataView& posView, core::vector& indexData, const uint32_t*& outIndices, uint32_t& outFaceCount) +{ + const auto& indexView = geom->getIndexView(); + if (indexView) + { + const size_t indexCount = indexView.getElementCount(); + if ((indexCount % 3ull) != 0ull) + return false; + + const void* src = indexView.getPointer(); + if (!src) + return false; + + if (indexView.composed.format == EF_R32_UINT && indexView.composed.getStride() == sizeof(uint32_t)) + { + outIndices = reinterpret_cast(src); + } + else if (indexView.composed.format == EF_R16_UINT && indexView.composed.getStride() == sizeof(uint16_t)) + { + indexData.resize(indexCount); + const auto* src16 = reinterpret_cast(src); + for (size_t i = 0ull; i < indexCount; ++i) + indexData[i] = src16[i]; + outIndices = indexData.data(); + } + else + { + indexData.resize(indexCount); + hlsl::vector decoded = {}; + for (size_t i = 0ull; i < indexCount; ++i) + { + if (!indexView.decodeElement(i, decoded)) + return false; + indexData[i] = decoded.x; + } + outIndices = indexData.data(); + } + outFaceCount = static_cast(indexCount / 3ull); + return true; + } + + const size_t vertexCount = posView.getElementCount(); + if ((vertexCount % 3ull) != 0ull) + return false; + + outIndices = nullptr; + outFaceCount = static_cast(vertexCount / 3ull); + return true; +} + +bool decodeTriangle(const ICPUPolygonGeometry* geom, const IPolygonGeometryBase::IIndexingCallback* indexing, const ICPUPolygonGeometry::SDataView& posView, uint32_t primIx, core::vectorSIMDf& out0, core::vectorSIMDf& out1, core::vectorSIMDf& out2, uint32_t* outIdx) { uint32_t idx[3] = {}; const auto& indexView = geom->getIndexView(); @@ -235,7 +350,7 @@ static bool decodeTriangle(const ICPUPolygonGeometry* geom, const IPolygonGeomet return true; } -static bool decodeTriangleNormal(const ICPUPolygonGeometry::SDataView& normalView, const uint32_t* idx, core::vectorSIMDf& outNormal) +bool decodeTriangleNormal(const ICPUPolygonGeometry::SDataView& normalView, const uint32_t* idx, core::vectorSIMDf& outNormal) { if (!normalView || !idx) return false; @@ -262,96 +377,231 @@ static bool decodeTriangleNormal(const ICPUPolygonGeometry::SDataView& normalVie return true; } -static bool writeMeshBinary(const asset::ICPUPolygonGeometry* geom, SContext* context) +bool writeMeshBinary(const asset::ICPUPolygonGeometry* geom, SContext* context) { - if (!geom) - return false; - - const auto* indexing = geom->getIndexingCallback(); - if (!indexing || indexing->degree() != 3u) + if (!geom || !context || !context->writeContext.outputFile) return false; const auto& posView = geom->getPositionView(); if (!posView) return false; - const auto& normalView = geom->getNormalView(); + const bool flipHandedness = !(context->writeContext.params.flags & E_WRITER_FLAGS::EWF_MESH_IS_RIGHT_HANDED); - const uint32_t facenum = static_cast(geom->getPrimitiveCount()); + const size_t vertexCount = posView.getElementCount(); + if (vertexCount == 0ull) + return false; - // write STL MESH header - const char headerTxt[] = "Irrlicht-baw Engine"; - constexpr size_t HEADER_SIZE = 80u; - const std::string name = context->writeContext.outputFile->getFileName().filename().replace_extension().string(); - const int32_t sizeleft = HEADER_SIZE - sizeof(headerTxt) - static_cast(name.size()); + core::vector indexData; + const uint32_t* indices = nullptr; + uint32_t facenum = 0u; + if (!decodeTriangleIndices(geom, posView, indexData, indices, facenum)) + return false; - if (!writeBytes(context, headerTxt, sizeof(headerTxt))) + const size_t outputSize = stl_writer_detail::BinaryPrefixBytes + static_cast(facenum) * stl_writer_detail::BinaryTriangleRecordBytes; + std::unique_ptr output(new (std::nothrow) uint8_t[outputSize]); + if (!output) return false; + uint8_t* dst = output.get(); + + std::memset(dst, 0, stl_writer_detail::BinaryHeaderBytes); + dst += stl_writer_detail::BinaryHeaderBytes; + + std::memcpy(dst, &facenum, sizeof(facenum)); + dst += sizeof(facenum); + + const auto& normalView = geom->getNormalView(); + const bool hasNormals = static_cast(normalView); + const hlsl::float32_t3* const tightPositions = getTightFloat3View(posView); + const hlsl::float32_t3* const tightNormals = hasNormals ? getTightFloat3View(normalView) : nullptr; + const float handednessSign = flipHandedness ? -1.f : 1.f; - if (sizeleft < 0) + auto decodePosition = [&](const uint32_t ix, hlsl::float32_t3& out)->bool { - if (!writeBytes(context, name.c_str(), HEADER_SIZE - sizeof(headerTxt))) - return false; - } - else + if (tightPositions) + { + out = tightPositions[ix]; + return true; + } + return posView.decodeElement(ix, out); + }; + + auto decodeNormal = [&](const uint32_t ix, hlsl::float32_t3& out)->bool { - const char buf[80] = {0}; - if (!writeBytes(context, name.c_str(), name.size())) - return false; - if (!writeBytes(context, buf, sizeleft)) + if (!hasNormals) return false; - } - - if (!writeBytes(context, &facenum, sizeof(facenum))) - return false; + if (tightNormals) + { + out = tightNormals[ix]; + return true; + } + return normalView.decodeElement(ix, out); + }; - for (uint32_t primIx = 0u; primIx < facenum; ++primIx) + const bool hasFastTightPath = (indices == nullptr) && (tightPositions != nullptr) && (!hasNormals || (tightNormals != nullptr)); + if (hasFastTightPath) { - core::vectorSIMDf v0; - core::vectorSIMDf v1; - core::vectorSIMDf v2; - uint32_t idx[3] = {}; - if (!decodeTriangle(geom, indexing, posView, primIx, v0, v1, v2, idx)) - return false; + for (uint32_t primIx = 0u; primIx < facenum; ++primIx) + { + const uint32_t i0 = primIx * 3u + 0u; + const uint32_t i1 = primIx * 3u + 1u; + const uint32_t i2 = primIx * 3u + 2u; + if (i0 >= vertexCount || i1 >= vertexCount || i2 >= vertexCount) + return false; + + const hlsl::float32_t3 vertex1 = tightPositions[i2]; + const hlsl::float32_t3 vertex2 = tightPositions[i1]; + const hlsl::float32_t3 vertex3 = tightPositions[i0]; + const float vertex1x = vertex1.x * handednessSign; + const float vertex2x = vertex2.x * handednessSign; + const float vertex3x = vertex3.x * handednessSign; + + float normalX = 0.f; + float normalY = 0.f; + float normalZ = 0.f; + if (hasNormals) + { + hlsl::float32_t3 attrNormal = tightNormals[i0]; + if (attrNormal.x == 0.f && attrNormal.y == 0.f && attrNormal.z == 0.f) + attrNormal = tightNormals[i1]; + if (attrNormal.x == 0.f && attrNormal.y == 0.f && attrNormal.z == 0.f) + attrNormal = tightNormals[i2]; + if (!(attrNormal.x == 0.f && attrNormal.y == 0.f && attrNormal.z == 0.f)) + { + if (flipHandedness) + attrNormal.x = -attrNormal.x; + normalX = attrNormal.x; + normalY = attrNormal.y; + normalZ = attrNormal.z; + } + } - core::vectorSIMDf vertex1 = v2; - core::vectorSIMDf vertex2 = v1; - core::vectorSIMDf vertex3 = v0; + if (normalX == 0.f && normalY == 0.f && normalZ == 0.f) + { + const float edge21x = vertex2x - vertex1x; + const float edge21y = vertex2.y - vertex1.y; + const float edge21z = vertex2.z - vertex1.z; + const float edge31x = vertex3x - vertex1x; + const float edge31y = vertex3.y - vertex1.y; + const float edge31z = vertex3.z - vertex1.z; + + normalX = edge21y * edge31z - edge21z * edge31y; + normalY = edge21z * edge31x - edge21x * edge31z; + normalZ = edge21x * edge31y - edge21y * edge31x; + const float planeNormalLen2 = normalX * normalX + normalY * normalY + normalZ * normalZ; + if (planeNormalLen2 > 0.f) + { + const float invLen = 1.f / std::sqrt(planeNormalLen2); + normalX *= invLen; + normalY *= invLen; + normalZ *= invLen; + } + } - if (flipHandedness) - { - vertex1.X = -vertex1.X; - vertex2.X = -vertex2.X; - vertex3.X = -vertex3.X; + const float packedData[12] = { + normalX, normalY, normalZ, + vertex1x, vertex1.y, vertex1.z, + vertex2x, vertex2.y, vertex2.z, + vertex3x, vertex3.y, vertex3.z + }; + std::memcpy(dst, packedData, sizeof(packedData)); + dst += sizeof(packedData); + + const uint16_t color = 0u; + std::memcpy(dst, &color, sizeof(color)); + dst += sizeof(color); } - - core::vectorSIMDf normal = core::plane3dSIMDf(vertex1, vertex2, vertex3).getNormal(); - core::vectorSIMDf attrNormal; - if (decodeTriangleNormal(normalView, idx, attrNormal)) + } + else + { + for (uint32_t primIx = 0u; primIx < facenum; ++primIx) { + const uint32_t i0 = indices ? indices[primIx * 3u + 0u] : (primIx * 3u + 0u); + const uint32_t i1 = indices ? indices[primIx * 3u + 1u] : (primIx * 3u + 1u); + const uint32_t i2 = indices ? indices[primIx * 3u + 2u] : (primIx * 3u + 2u); + if (i0 >= vertexCount || i1 >= vertexCount || i2 >= vertexCount) + return false; + + hlsl::float32_t3 p0 = {}; + hlsl::float32_t3 p1 = {}; + hlsl::float32_t3 p2 = {}; + if (!decodePosition(i0, p0) || !decodePosition(i1, p1) || !decodePosition(i2, p2)) + return false; + + hlsl::float32_t3 vertex1 = p2; + hlsl::float32_t3 vertex2 = p1; + hlsl::float32_t3 vertex3 = p0; + if (flipHandedness) - attrNormal.X = -attrNormal.X; - if (core::dot(attrNormal, normal).X < 0.f) - attrNormal = -attrNormal; - normal = attrNormal; - } + { + vertex1.x = -vertex1.x; + vertex2.x = -vertex2.x; + vertex3.x = -vertex3.x; + } - if (!writeBytes(context, &normal, 12)) - return false; - if (!writeBytes(context, &vertex1, 12)) - return false; - if (!writeBytes(context, &vertex2, 12)) - return false; - if (!writeBytes(context, &vertex3, 12)) - return false; - const uint16_t color = 0u; - if (!writeBytes(context, &color, sizeof(color))) - return false; + const hlsl::float32_t3 planeNormal = hlsl::cross(vertex2 - vertex1, vertex3 - vertex1); + const float planeNormalLen2 = hlsl::dot(planeNormal, planeNormal); + hlsl::float32_t3 normal = hlsl::float32_t3(0.f, 0.f, 0.f); + if (!hasNormals) + { + if (planeNormalLen2 > 0.f) + normal = hlsl::normalize(planeNormal); + } + + if (hasNormals) + { + hlsl::float32_t3 n0 = {}; + if (!decodeNormal(i0, n0)) + return false; + + hlsl::float32_t3 attrNormal = n0; + if (hlsl::dot(attrNormal, attrNormal) <= 0.f) + { + hlsl::float32_t3 n1 = {}; + if (!decodeNormal(i1, n1)) + return false; + attrNormal = n1; + } + if (hlsl::dot(attrNormal, attrNormal) <= 0.f) + { + hlsl::float32_t3 n2 = {}; + if (!decodeNormal(i2, n2)) + return false; + attrNormal = n2; + } + + if (hlsl::dot(attrNormal, attrNormal) > 0.f) + { + if (flipHandedness) + attrNormal.x = -attrNormal.x; + if (planeNormalLen2 > 0.f && hlsl::dot(attrNormal, planeNormal) < 0.f) + attrNormal = -attrNormal; + normal = attrNormal; + } + else if (planeNormalLen2 > 0.f) + { + normal = hlsl::normalize(planeNormal); + } + } + + const float packedData[12] = { + normal.x, normal.y, normal.z, + vertex1.x, vertex1.y, vertex1.z, + vertex2.x, vertex2.y, vertex2.z, + vertex3.x, vertex3.y, vertex3.z + }; + std::memcpy(dst, packedData, sizeof(packedData)); + dst += sizeof(packedData); + + const uint16_t color = 0u; + std::memcpy(dst, &color, sizeof(color)); + dst += sizeof(color); + } } - return true; + return writeBufferWithPolicy(context->writeContext.outputFile, context->ioPlan, output.get(), outputSize); } -static bool writeMeshASCII(const asset::ICPUPolygonGeometry* geom, SContext* context) +bool writeMeshASCII(const asset::ICPUPolygonGeometry* geom, SContext* context) { if (!geom) return false; @@ -366,20 +616,16 @@ static bool writeMeshASCII(const asset::ICPUPolygonGeometry* geom, SContext* con const auto& normalView = geom->getNormalView(); const bool flipHandedness = !(context->writeContext.params.flags & E_WRITER_FLAGS::EWF_MESH_IS_RIGHT_HANDED); - const char headerTxt[] = "Irrlicht-baw Engine "; - - if (!writeBytes(context, "solid ", 6)) - return false; + const std::string name = context->writeContext.outputFile->getFileName().filename().replace_extension().string(); + const std::string_view solidName = name.empty() ? std::string_view(stl_writer_detail::AsciiDefaultName) : std::string_view(name); - if (!writeBytes(context, headerTxt, sizeof(headerTxt) - 1)) + if (!writeBytes(context, stl_writer_detail::AsciiSolidPrefix, sizeof(stl_writer_detail::AsciiSolidPrefix) - 1ull)) return false; - const std::string name = context->writeContext.outputFile->getFileName().filename().replace_extension().string(); - - if (!writeBytes(context, name.c_str(), name.size())) + if (!writeBytes(context, solidName.data(), solidName.size())) return false; - if (!writeBytes(context, "\n", 1)) + if (!writeBytes(context, "\n", sizeof("\n") - 1ull)) return false; const uint32_t faceCount = static_cast(geom->getPrimitiveCount()); @@ -393,30 +639,27 @@ static bool writeMeshASCII(const asset::ICPUPolygonGeometry* geom, SContext* con return false; if (!writeFaceText(v0, v1, v2, idx, normalView, flipHandedness, context)) return false; - if (!writeBytes(context, "\n", 1)) + if (!writeBytes(context, "\n", sizeof("\n") - 1ull)) return false; } - if (!writeBytes(context, "endsolid ", 9)) - return false; - - if (!writeBytes(context, headerTxt, sizeof(headerTxt) - 1)) + if (!writeBytes(context, stl_writer_detail::AsciiEndSolidPrefix, sizeof(stl_writer_detail::AsciiEndSolidPrefix) - 1ull)) return false; - if (!writeBytes(context, name.c_str(), name.size())) + if (!writeBytes(context, solidName.data(), solidName.size())) return false; return true; } -static void getVectorAsStringLine(const core::vectorSIMDf& v, std::string& s) +void getVectorAsStringLine(const core::vectorSIMDf& v, std::string& s) { std::ostringstream tmp; tmp << v.X << " " << v.Y << " " << v.Z << "\n"; s = std::string(tmp.str().c_str()); } -static bool writeFaceText( +bool writeFaceText( const core::vectorSIMDf& v1, const core::vectorSIMDf& v2, const core::vectorSIMDf& v3, @@ -448,41 +691,41 @@ static bool writeFaceText( normal = attrNormal; } - if (!writeBytes(context, "facet normal ", 13)) + if (!writeBytes(context, "facet normal ", sizeof("facet normal ") - 1ull)) return false; getVectorAsStringLine(normal, tmp); if (!writeBytes(context, tmp.c_str(), tmp.size())) return false; - if (!writeBytes(context, " outer loop\n", 13)) + if (!writeBytes(context, " outer loop\n", sizeof(" outer loop\n") - 1ull)) return false; - if (!writeBytes(context, " vertex ", 11)) + if (!writeBytes(context, " vertex ", sizeof(" vertex ") - 1ull)) return false; getVectorAsStringLine(vertex1, tmp); if (!writeBytes(context, tmp.c_str(), tmp.size())) return false; - if (!writeBytes(context, " vertex ", 11)) + if (!writeBytes(context, " vertex ", sizeof(" vertex ") - 1ull)) return false; getVectorAsStringLine(vertex2, tmp); if (!writeBytes(context, tmp.c_str(), tmp.size())) return false; - if (!writeBytes(context, " vertex ", 11)) + if (!writeBytes(context, " vertex ", sizeof(" vertex ") - 1ull)) return false; getVectorAsStringLine(vertex3, tmp); if (!writeBytes(context, tmp.c_str(), tmp.size())) return false; - if (!writeBytes(context, " endloop\n", 10)) + if (!writeBytes(context, " endloop\n", sizeof(" endloop\n") - 1ull)) return false; - if (!writeBytes(context, "endfacet\n", 9)) + if (!writeBytes(context, "endfacet\n", sizeof("endfacet\n") - 1ull)) return false; return true; From 4423ca66ece87352550dde91415a8155255347d0 Mon Sep 17 00:00:00 2001 From: Arkadiusz Lachowicz Date: Sun, 8 Feb 2026 12:45:50 +0100 Subject: [PATCH 005/118] Optimize mesh loader parse and IO paths --- .gitignore | 2 +- .../asset/interchange/COBJMeshFileLoader.cpp | 293 +++++++++-- .../asset/interchange/CPLYMeshFileLoader.cpp | 475 ++++++++++++++---- .../asset/interchange/CSTLMeshFileLoader.cpp | 262 ++++++---- 4 files changed, 785 insertions(+), 247 deletions(-) diff --git a/.gitignore b/.gitignore index c7bbb2808e..48ce7cc770 100644 --- a/.gitignore +++ b/.gitignore @@ -37,4 +37,4 @@ tools/nsc/bin/* */__pycache__/* __pycache__/* *.pyc - +tmp/* diff --git a/src/nbl/asset/interchange/COBJMeshFileLoader.cpp b/src/nbl/asset/interchange/COBJMeshFileLoader.cpp index 12bc57617e..1e4d9fe90b 100644 --- a/src/nbl/asset/interchange/COBJMeshFileLoader.cpp +++ b/src/nbl/asset/interchange/COBJMeshFileLoader.cpp @@ -15,12 +15,12 @@ #include "COBJMeshFileLoader.h" #include +#include #include #include #include #include #include -#include namespace nbl::asset { @@ -51,6 +51,31 @@ struct ObjVertexKeyHash } }; +struct SFileReadTelemetry +{ + uint64_t callCount = 0ull; + uint64_t totalBytes = 0ull; + uint64_t minBytes = std::numeric_limits::max(); + + void account(const uint64_t bytes) + { + ++callCount; + totalBytes += bytes; + if (bytes < minBytes) + minBytes = bytes; + } + + uint64_t getMinOrZero() const + { + return callCount ? minBytes : 0ull; + } + + uint64_t getAvgOrZero() const + { + return callCount ? (totalBytes / callCount) : 0ull; + } +}; + using Float3 = hlsl::float32_t3; using Float2 = hlsl::float32_t2; @@ -75,7 +100,36 @@ void extendAABB(hlsl::shapes::AABB<3, hlsl::float32_t>& aabb, bool& hasAABB, con if (p.z > aabb.maxVx.z) aabb.maxVx.z = p.z; } -bool readTextFileWithPolicy(system::IFile* file, char* dst, size_t byteCount, const SResolvedFileIOPolicy& ioPlan, double& ioMs) +template +IGeometry::SDataView createAdoptedView(core::vector&& data, const E_FORMAT format) +{ + if (data.empty()) + return {}; + + auto backer = core::make_smart_refctd_ptr>>(std::move(data)); + auto& storage = backer->getBacker(); + auto* const ptr = storage.data(); + const size_t byteCount = storage.size() * sizeof(T); + auto buffer = ICPUBuffer::create({ { byteCount }, ptr, core::smart_refctd_ptr(std::move(backer)), alignof(T) }, core::adopt_memory); + if (!buffer) + return {}; + + IGeometry::SDataView view = { + .composed = { + .stride = sizeof(T), + .format = format, + .rangeFormat = IGeometryBase::getMatchingAABBFormat(format) + }, + .src = { + .offset = 0u, + .size = byteCount, + .buffer = std::move(buffer) + } + }; + return view; +} + +bool readTextFileWithPolicy(system::IFile* file, char* dst, size_t byteCount, const SResolvedFileIOPolicy& ioPlan, double& ioMs, SFileReadTelemetry& ioTelemetry) { if (!file || !dst) return false; @@ -92,6 +146,7 @@ bool readTextFileWithPolicy(system::IFile* file, char* dst, size_t byteCount, co if (!success || success.getBytesProcessed() != byteCount) return false; bytesRead = byteCount; + ioTelemetry.account(success.getBytesProcessed()); break; } case SResolvedFileIOPolicy::Strategy::Chunked: @@ -107,6 +162,7 @@ bool readTextFileWithPolicy(system::IFile* file, char* dst, size_t byteCount, co const size_t processed = success.getBytesProcessed(); if (processed == 0ull) return false; + ioTelemetry.account(processed); bytesRead += processed; } break; @@ -147,6 +203,23 @@ const char* goNextLine(const char* buf, const char* const bufEnd) return goFirstWord(buf, bufEnd); } +bool parseFloatToken(const char*& ptr, const char* const end, float& out) +{ + const auto parseResult = std::from_chars(ptr, end, out, std::chars_format::general); + if (parseResult.ec == std::errc() && parseResult.ptr != ptr) + { + ptr = parseResult.ptr; + return true; + } + + char* fallbackEnd = nullptr; + out = std::strtof(ptr, &fallbackEnd); + if (!fallbackEnd || fallbackEnd == ptr) + return false; + ptr = fallbackEnd; + return true; +} + const char* readVec3(const char* bufPtr, float vec[3], const char* const bufEnd) { bufPtr = goNextWord(bufPtr, bufEnd, false); @@ -155,11 +228,8 @@ const char* readVec3(const char* bufPtr, float vec[3], const char* const bufEnd) if (bufPtr >= bufEnd) return bufPtr; - char* endPtr = nullptr; - vec[i] = std::strtof(bufPtr, &endPtr); - if (endPtr == bufPtr) + if (!parseFloatToken(bufPtr, bufEnd, vec[i])) return bufPtr; - bufPtr = endPtr; while (bufPtr < bufEnd && core::isspace(*bufPtr) && *bufPtr != '\n' && *bufPtr != '\r') ++bufPtr; @@ -176,11 +246,8 @@ const char* readUV(const char* bufPtr, float vec[2], const char* const bufEnd) if (bufPtr >= bufEnd) return bufPtr; - char* endPtr = nullptr; - vec[i] = std::strtof(bufPtr, &endPtr); - if (endPtr == bufPtr) + if (!parseFloatToken(bufPtr, bufEnd, vec[i])) return bufPtr; - bufPtr = endPtr; while (bufPtr < bufEnd && core::isspace(*bufPtr) && *bufPtr != '\n' && *bufPtr != '\r') ++bufPtr; @@ -249,6 +316,83 @@ bool retrieveVertexIndices(const char* tokenBegin, const char* tokenEnd, int32_t return true; } +enum class EFastFaceTokenParseResult : uint8_t +{ + NotApplicable, + Success, + Error +}; + +bool parseUnsignedObjIndex(const char*& ptr, const char* const end, uint32_t& out) +{ + if (ptr >= end || !core::isdigit(*ptr)) + return false; + + uint64_t value = 0ull; + while (ptr < end && core::isdigit(*ptr)) + { + value = value * 10ull + static_cast(*ptr - '0'); + if (value > static_cast(std::numeric_limits::max())) + return false; + ++ptr; + } + + out = static_cast(value); + return true; +} + +EFastFaceTokenParseResult retrieveVertexIndicesFast(const char* tokenBegin, const char* tokenEnd, int32_t* idx) +{ + if (!tokenBegin || !idx || tokenBegin >= tokenEnd) + return EFastFaceTokenParseResult::NotApplicable; + + for (const char* c = tokenBegin; c < tokenEnd; ++c) + { + const char ch = *c; + if (ch == '-' || ch == '+') + return EFastFaceTokenParseResult::NotApplicable; + if (!core::isdigit(ch) && ch != '/') + return EFastFaceTokenParseResult::NotApplicable; + } + + idx[0] = -1; + idx[1] = -1; + idx[2] = -1; + + const char* ptr = tokenBegin; + uint32_t parsed = 0u; + if (!parseUnsignedObjIndex(ptr, tokenEnd, parsed) || parsed == 0u) + return EFastFaceTokenParseResult::Error; + idx[0] = static_cast(parsed - 1u); + + if (ptr >= tokenEnd) + return EFastFaceTokenParseResult::Success; + if (*ptr != '/') + return EFastFaceTokenParseResult::NotApplicable; + ++ptr; + + if (ptr < tokenEnd && *ptr != '/') + { + if (!parseUnsignedObjIndex(ptr, tokenEnd, parsed) || parsed == 0u) + return EFastFaceTokenParseResult::Error; + idx[1] = static_cast(parsed - 1u); + } + + if (ptr >= tokenEnd) + return EFastFaceTokenParseResult::Success; + if (*ptr != '/') + return EFastFaceTokenParseResult::Error; + ++ptr; + + if (ptr >= tokenEnd) + return EFastFaceTokenParseResult::Success; + if (!parseUnsignedObjIndex(ptr, tokenEnd, parsed) || parsed == 0u) + return EFastFaceTokenParseResult::Error; + idx[2] = static_cast(parsed - 1u); + + return ptr == tokenEnd ? EFastFaceTokenParseResult::Success : EFastFaceTokenParseResult::Error; +} + } COBJMeshFileLoader::COBJMeshFileLoader(IAssetManager* _manager) @@ -288,8 +432,12 @@ asset::SAssetBundle COBJMeshFileLoader::loadAsset(system::IFile* _file, const as double ioMs = 0.0; double parseMs = 0.0; double buildMs = 0.0; + double hashMs = 0.0; double aabbMs = 0.0; uint64_t faceCount = 0u; + uint64_t faceFastTokenCount = 0u; + uint64_t faceFallbackTokenCount = 0u; + SFileReadTelemetry ioTelemetry = {}; const long filesize = _file->getSize(); if (filesize <= 0) @@ -303,7 +451,7 @@ asset::SAssetBundle COBJMeshFileLoader::loadAsset(system::IFile* _file, const as std::string fileContents; fileContents.resize(static_cast(filesize)); - if (!readTextFileWithPolicy(_file, fileContents.data(), fileContents.size(), ioPlan, ioMs)) + if (!readTextFileWithPolicy(_file, fileContents.data(), fileContents.size(), ioPlan, ioMs, ioTelemetry)) return {}; const char* const buf = fileContents.data(); @@ -318,13 +466,14 @@ asset::SAssetBundle COBJMeshFileLoader::loadAsset(system::IFile* _file, const as core::vector outNormals; core::vector outUVs; core::vector indices; - - std::unordered_map vtxMap; + core::unordered_map vtxMap; bool hasNormals = false; bool hasUVs = false; hlsl::shapes::AABB<3, hlsl::float32_t> parsedAABB = hlsl::shapes::AABB<3, hlsl::float32_t>::create(); bool hasParsedAABB = false; + core::vector faceCorners; + faceCorners.reserve(16ull); const auto parseStart = clock_t::now(); while (bufPtr != bufEnd) @@ -364,41 +513,57 @@ asset::SAssetBundle COBJMeshFileLoader::loadAsset(system::IFile* _file, const as if (positions.empty()) return {}; ++faceCount; + if (faceCount == 1u) + { + vtxMap.reserve(positions.size() * 4ull); + indices.reserve(positions.size() * 6ull); + } const char* endPtr = bufPtr; while (endPtr != bufEnd && *endPtr != '\n' && *endPtr != '\r') ++endPtr; - core::vector faceCorners; - faceCorners.reserve(16ull); + faceCorners.clear(); const char* linePtr = goNextWord(bufPtr, endPtr); - while (linePtr < endPtr && 0 != linePtr[0]) + while (linePtr < endPtr) { int32_t idx[3] = { -1, -1, -1 }; const char* tokenEnd = linePtr; while (tokenEnd < endPtr && !core::isspace(*tokenEnd)) ++tokenEnd; - if (!retrieveVertexIndices(linePtr, tokenEnd, idx, positions.size(), uvs.size(), normals.size())) + const auto fastResult = retrieveVertexIndicesFast(linePtr, tokenEnd, idx); + if (fastResult == EFastFaceTokenParseResult::Success) + { + ++faceFastTokenCount; + } + else if (fastResult == EFastFaceTokenParseResult::NotApplicable) + { + if (!retrieveVertexIndices(linePtr, tokenEnd, idx, positions.size(), uvs.size(), normals.size())) + return {}; + ++faceFallbackTokenCount; + } + else + { return {}; + } if (idx[0] < 0 || static_cast(idx[0]) >= positions.size()) return {}; ObjVertexKey key = { idx[0], idx[1], idx[2] }; - auto it = vtxMap.find(key); - uint32_t outIx = 0u; - if (it == vtxMap.end()) + const uint32_t candidateIndex = static_cast(outPositions.size()); + auto [it, inserted] = vtxMap.try_emplace(key, candidateIndex); + uint32_t outIx = it->second; + if (inserted) { if (outPositions.empty()) { - outPositions.reserve(positions.size()); - outNormals.reserve(positions.size()); - outUVs.reserve(positions.size()); + const size_t estimatedVertexCount = positions.size() <= (std::numeric_limits::max() / 4ull) ? positions.size() * 4ull : positions.size(); + outPositions.reserve(estimatedVertexCount); + outNormals.reserve(estimatedVertexCount); + outUVs.reserve(estimatedVertexCount); } - outIx = static_cast(outPositions.size()); - vtxMap.emplace(key, outIx); - const auto& srcPos = positions[idx[0]]; outPositions.push_back(srcPos); extendAABB(parsedAABB, hasParsedAABB, srcPos); @@ -419,16 +584,10 @@ asset::SAssetBundle COBJMeshFileLoader::loadAsset(system::IFile* _file, const as } outNormals.push_back(normal); } - else - { - outIx = it->second; - } faceCorners.push_back(outIx); - while (tokenEnd < endPtr && core::isspace(*tokenEnd)) - ++tokenEnd; - linePtr = tokenEnd; + linePtr = goFirstWord(tokenEnd, endPtr, false); } for (uint32_t i = 1u; i + 1u < faceCorners.size(); ++i) @@ -450,30 +609,52 @@ asset::SAssetBundle COBJMeshFileLoader::loadAsset(system::IFile* _file, const as if (outPositions.empty()) return {}; + const size_t outVertexCount = outPositions.size(); + const size_t outIndexCount = indices.size(); const auto buildStart = clock_t::now(); auto geometry = core::make_smart_refctd_ptr(); - geometry->setPositionView(IGeometryLoader::createView(EF_R32G32B32_SFLOAT, outPositions.size(), outPositions.data())); + { + auto view = createAdoptedView(std::move(outPositions), EF_R32G32B32_SFLOAT); + if (!view) + return {}; + geometry->setPositionView(std::move(view)); + } if (hasNormals) - geometry->setNormalView(IGeometryLoader::createView(EF_R32G32B32_SFLOAT, outNormals.size(), outNormals.data())); + { + auto view = createAdoptedView(std::move(outNormals), EF_R32G32B32_SFLOAT); + if (!view) + return {}; + geometry->setNormalView(std::move(view)); + } if (hasUVs) - geometry->getAuxAttributeViews()->push_back(IGeometryLoader::createView(EF_R32G32_SFLOAT, outUVs.size(), outUVs.data())); + { + auto view = createAdoptedView(std::move(outUVs), EF_R32G32_SFLOAT); + if (!view) + return {}; + geometry->getAuxAttributeViews()->push_back(std::move(view)); + } if (!indices.empty()) { geometry->setIndexing(IPolygonGeometryBase::TriangleList()); - const auto maxIndex = *std::max_element(indices.begin(), indices.end()); - if (maxIndex <= std::numeric_limits::max()) + if (outVertexCount <= static_cast(std::numeric_limits::max()) + 1ull) { core::vector indices16(indices.size()); for (size_t i = 0u; i < indices.size(); ++i) indices16[i] = static_cast(indices[i]); - geometry->setIndexView(IGeometryLoader::createView(EF_R16_UINT, indices16.size(), indices16.data())); + auto view = createAdoptedView(std::move(indices16), EF_R16_UINT); + if (!view) + return {}; + geometry->setIndexView(std::move(view)); } else { - geometry->setIndexView(IGeometryLoader::createView(EF_R32_UINT, indices.size(), indices.data())); + auto view = createAdoptedView(std::move(indices), EF_R32_UINT); + if (!view) + return {}; + geometry->setIndexView(std::move(view)); } } else @@ -482,6 +663,10 @@ asset::SAssetBundle COBJMeshFileLoader::loadAsset(system::IFile* _file, const as } buildMs = std::chrono::duration(clock_t::now() - buildStart).count(); + const auto hashStart = clock_t::now(); + CPolygonGeometryManipulator::recomputeContentHashes(geometry.get()); + hashMs = std::chrono::duration(clock_t::now() - hashStart).count(); + const auto aabbStart = clock_t::now(); if (hasParsedAABB) { @@ -505,21 +690,43 @@ asset::SAssetBundle COBJMeshFileLoader::loadAsset(system::IFile* _file, const as aabbMs = std::chrono::duration(clock_t::now() - aabbStart).count(); const auto totalMs = std::chrono::duration(clock_t::now() - totalStart).count(); + if ( + static_cast(filesize) > (1ull << 20) && + ( + ioTelemetry.getAvgOrZero() < 1024ull || + (ioTelemetry.getMinOrZero() < 64ull && ioTelemetry.callCount > 1024ull) + ) + ) + { + _params.logger.log( + "OBJ loader tiny-io guard: file=%s reads=%llu min=%llu avg=%llu", + system::ILogger::ELL_WARNING, + _file->getFileName().string().c_str(), + static_cast(ioTelemetry.callCount), + static_cast(ioTelemetry.getMinOrZero()), + static_cast(ioTelemetry.getAvgOrZero())); + } _params.logger.log( - "OBJ loader perf: file=%s total=%.3f ms io=%.3f parse=%.3f build=%.3f aabb=%.3f in(v=%llu n=%llu uv=%llu) out(v=%llu idx=%llu faces=%llu) io_req=%s io_eff=%s io_chunk=%llu io_reason=%s", + "OBJ loader perf: file=%s total=%.3f ms io=%.3f parse=%.3f build=%.3f hash=%.3f aabb=%.3f in(v=%llu n=%llu uv=%llu) out(v=%llu idx=%llu faces=%llu face_fast_tokens=%llu face_fallback_tokens=%llu io_reads=%llu io_min_read=%llu io_avg_read=%llu) io_req=%s io_eff=%s io_chunk=%llu io_reason=%s", system::ILogger::ELL_PERFORMANCE, _file->getFileName().string().c_str(), totalMs, ioMs, parseMs, buildMs, + hashMs, aabbMs, static_cast(positions.size()), static_cast(normals.size()), static_cast(uvs.size()), - static_cast(outPositions.size()), - static_cast(indices.size()), + static_cast(outVertexCount), + static_cast(outIndexCount), static_cast(faceCount), + static_cast(faceFastTokenCount), + static_cast(faceFallbackTokenCount), + static_cast(ioTelemetry.callCount), + static_cast(ioTelemetry.getMinOrZero()), + static_cast(ioTelemetry.getAvgOrZero()), toString(_params.ioPolicy.strategy), toString(ioPlan.strategy), static_cast(ioPlan.chunkSizeBytes), diff --git a/src/nbl/asset/interchange/CPLYMeshFileLoader.cpp b/src/nbl/asset/interchange/CPLYMeshFileLoader.cpp index a7042601ed..fd88560490 100644 --- a/src/nbl/asset/interchange/CPLYMeshFileLoader.cpp +++ b/src/nbl/asset/interchange/CPLYMeshFileLoader.cpp @@ -9,6 +9,7 @@ #include "nbl/asset/metadata/CPLYMetadata.h" #include +#include #include #include #include @@ -69,6 +70,34 @@ T byteswap(const T& v) return retval; } +IGeometry::SDataView plyCreateAdoptedU32IndexView(core::vector&& indices) +{ + if (indices.empty()) + return {}; + + auto backer = core::make_smart_refctd_ptr>>(std::move(indices)); + auto& storage = backer->getBacker(); + auto* const ptr = storage.data(); + const size_t byteCount = storage.size() * sizeof(uint32_t); + auto buffer = ICPUBuffer::create({ { byteCount }, ptr, core::smart_refctd_ptr(std::move(backer)), alignof(uint32_t) }, core::adopt_memory); + if (!buffer) + return {}; + + IGeometry::SDataView view = { + .composed = { + .stride = sizeof(uint32_t), + .format = EF_R32_UINT, + .rangeFormat = IGeometryBase::getMatchingAABBFormat(EF_R32_UINT) + }, + .src = { + .offset = 0u, + .size = byteCount, + .buffer = std::move(buffer) + } + }; + return view; +} + struct SContext { static constexpr uint64_t ReadWindowPaddingBytes = 1ull; @@ -193,6 +222,10 @@ struct SContext system::IFile::success_t success; inner.mainFile->read(success,EndPointer,fileOffset,requestSize); const size_t bytesRead = success.getBytesProcessed(); + ++readCallCount; + readBytesTotal += bytesRead; + if (bytesRead < readMinBytes) + readMinBytes = bytesRead; fileOffset += bytesRead; EndPointer += bytesRead; @@ -335,7 +368,39 @@ struct SContext } return 0; } - return std::atoi(getNextWord()); + const char* word = getNextWord(); + if (!word) + return 0u; + const char* const wordEnd = word + std::strlen(word); + if (word == wordEnd) + return 0u; + + if (isSignedFormat(f)) + { + int64_t value = 0; + const auto parseResult = std::from_chars(word, wordEnd, value, 10); + if (parseResult.ec == std::errc() && parseResult.ptr == wordEnd) + return static_cast(value); + + char* fallbackEnd = nullptr; + const auto fallback = std::strtoll(word, &fallbackEnd, 10); + if (fallbackEnd && fallbackEnd != word) + return static_cast(fallback); + return 0u; + } + else + { + uint64_t value = 0u; + const auto parseResult = std::from_chars(word, wordEnd, value, 10); + if (parseResult.ec == std::errc() && parseResult.ptr == wordEnd) + return static_cast(value); + + char* fallbackEnd = nullptr; + const auto fallback = std::strtoull(word, &fallbackEnd, 10); + if (fallbackEnd && fallbackEnd != word) + return static_cast(fallback); + return 0u; + } } // read the next float from the file and move the start pointer along hlsl::float64_t getFloat(const E_FORMAT f) @@ -372,7 +437,23 @@ struct SContext } return 0; } - return std::strtod(getNextWord(), nullptr); + const char* word = getNextWord(); + if (!word) + return 0.0; + const char* const wordEnd = word + std::strlen(word); + if (word == wordEnd) + return 0.0; + + hlsl::float64_t value = 0.0; + const auto parseResult = std::from_chars(word, wordEnd, value, std::chars_format::general); + if (parseResult.ec == std::errc() && parseResult.ptr == wordEnd) + return value; + + char* fallbackEnd = nullptr; + const auto fallback = std::strtod(word, &fallbackEnd); + if (fallbackEnd && fallbackEnd != word) + return fallback; + return 0.0; } // read the next thing from the file and move the start pointer along void getData(void* dst, const E_FORMAT f) @@ -396,6 +477,69 @@ struct SContext uint32_t stride; E_FORMAT dstFmt; }; + enum class EFastVertexReadResult : uint8_t + { + NotApplicable, + Success, + Error + }; + EFastVertexReadResult readVertexElementFast(const SElement& el) + { + if (!IsBinaryFile || IsWrongEndian || el.Name != "vertex") + return EFastVertexReadResult::NotApplicable; + if (el.Properties.size() != 3u || vertAttrIts.size() != 3u) + return EFastVertexReadResult::NotApplicable; + + const auto& xProp = el.Properties[0]; + const auto& yProp = el.Properties[1]; + const auto& zProp = el.Properties[2]; + if (xProp.Name != "x" || yProp.Name != "y" || zProp.Name != "z") + return EFastVertexReadResult::NotApplicable; + if (xProp.type != EF_R32_SFLOAT || yProp.type != EF_R32_SFLOAT || zProp.type != EF_R32_SFLOAT) + return EFastVertexReadResult::NotApplicable; + + auto& xIt = vertAttrIts[0]; + auto& yIt = vertAttrIts[1]; + auto& zIt = vertAttrIts[2]; + if (!xIt.ptr || !yIt.ptr || !zIt.ptr) + return EFastVertexReadResult::NotApplicable; + if (xIt.dstFmt != EF_R32_SFLOAT || yIt.dstFmt != EF_R32_SFLOAT || zIt.dstFmt != EF_R32_SFLOAT) + return EFastVertexReadResult::NotApplicable; + if (xIt.stride != yIt.stride || xIt.stride != zIt.stride) + return EFastVertexReadResult::NotApplicable; + + const size_t floatBytes = sizeof(hlsl::float32_t); + if (yIt.ptr != xIt.ptr + floatBytes || zIt.ptr != xIt.ptr + 2ull * floatBytes) + return EFastVertexReadResult::NotApplicable; + + if (el.Count > (std::numeric_limits::max() / xIt.stride)) + return EFastVertexReadResult::Error; + const size_t dstAdvance = el.Count * xIt.stride; + const size_t srcBytesPerVertex = 3ull * floatBytes; + if (el.Count > (std::numeric_limits::max() / srcBytesPerVertex)) + return EFastVertexReadResult::Error; + const size_t copyBytes = el.Count * srcBytesPerVertex; + + uint8_t* dst = xIt.ptr; + size_t copied = 0ull; + while (copied < copyBytes) + { + if (StartPointer >= EndPointer) + fillBuffer(); + const size_t available = EndPointer > StartPointer ? static_cast(EndPointer - StartPointer) : 0ull; + if (available == 0ull) + return EFastVertexReadResult::Error; + const size_t toCopy = std::min(available, copyBytes - copied); + std::memcpy(dst + copied, StartPointer, toCopy); + StartPointer += toCopy; + copied += toCopy; + } + + xIt.ptr += dstAdvance; + yIt.ptr += dstAdvance; + zIt.ptr += dstAdvance; + return EFastVertexReadResult::Success; + } void readVertex(const IAssetLoader::SAssetLoadParams& _params, const SElement& el) { assert(el.Name=="vertex"); @@ -544,18 +688,25 @@ struct SContext return true; } - bool readFaceElementFast(const SElement& element, core::vector& _outIndices, uint32_t& _maxIndex, uint64_t& _faceCount) + enum class EFastFaceReadResult : uint8_t + { + NotApplicable, + Success, + Error + }; + + EFastFaceReadResult readFaceElementFast(const SElement& element, core::vector& _outIndices, uint32_t& _maxIndex, uint64_t& _faceCount, const uint32_t vertexCount) { if (!IsBinaryFile || IsWrongEndian) - return false; + return EFastFaceReadResult::NotApplicable; if (element.Properties.size() != 1u) - return false; + return EFastFaceReadResult::NotApplicable; const auto& prop = element.Properties[0]; if (!prop.isList() || (prop.Name != "vertex_indices" && prop.Name != "vertex_index")) - return false; + return EFastFaceReadResult::NotApplicable; if (prop.list.countType != EF_R8_UINT) - return false; + return EFastFaceReadResult::NotApplicable; const E_FORMAT srcIndexFmt = prop.list.itemType; const bool isSrcU32 = srcIndexFmt == EF_R32_UINT; @@ -563,68 +714,113 @@ struct SContext const bool isSrcU16 = srcIndexFmt == EF_R16_UINT; const bool isSrcS16 = srcIndexFmt == EF_R16_SINT; if (!isSrcU32 && !isSrcS32 && !isSrcU16 && !isSrcS16) - return false; + return EFastFaceReadResult::NotApplicable; const bool is32Bit = isSrcU32 || isSrcS32; const size_t indexSize = is32Bit ? sizeof(uint32_t) : sizeof(uint16_t); + const bool hasVertexCount = vertexCount != 0u; + const bool trackMaxIndex = !hasVertexCount || vertexCount <= std::numeric_limits::max(); const size_t minTriangleRecordSize = sizeof(uint8_t) + indexSize * 3u; + if (element.Count > (std::numeric_limits::max() / minTriangleRecordSize)) + return EFastFaceReadResult::Error; const size_t minBytesNeeded = element.Count * minTriangleRecordSize; if (StartPointer + minBytesNeeded <= EndPointer) { - char* scan = StartPointer; - bool allTriangles = true; - for (size_t j = 0u; j < element.Count; ++j) + if (element.Count > (std::numeric_limits::max() / 3u)) + return EFastFaceReadResult::Error; + const size_t triIndices = element.Count * 3u; + if (_outIndices.size() > (std::numeric_limits::max() - triIndices)) + return EFastFaceReadResult::Error; + const size_t oldSize = _outIndices.size(); + const uint32_t oldMaxIndex = _maxIndex; + _outIndices.resize(oldSize + triIndices); + uint32_t* out = _outIndices.data() + oldSize; + const uint8_t* ptr = reinterpret_cast(StartPointer); + bool fallbackToGeneric = false; + + if (is32Bit) { - const uint8_t c = static_cast(*scan++); - if (c != 3u) + if (isSrcU32) { - allTriangles = false; - break; + for (size_t j = 0u; j < element.Count; ++j) + { + const uint8_t c = *ptr++; + if (c != 3u) + { + fallbackToGeneric = true; + break; + } + std::memcpy(out, ptr, 3ull * sizeof(uint32_t)); + ptr += 3ull * sizeof(uint32_t); + if (trackMaxIndex) + { + if (out[0] > _maxIndex) _maxIndex = out[0]; + if (out[1] > _maxIndex) _maxIndex = out[1]; + if (out[2] > _maxIndex) _maxIndex = out[2]; + } + else if (out[0] >= vertexCount || out[1] >= vertexCount || out[2] >= vertexCount) + { + return EFastFaceReadResult::Error; + } + out += 3; + } + } + else + { + for (size_t j = 0u; j < element.Count; ++j) + { + const uint8_t c = *ptr++; + if (c != 3u) + { + fallbackToGeneric = true; + break; + } + std::memcpy(out, ptr, 3ull * sizeof(uint32_t)); + ptr += 3ull * sizeof(uint32_t); + if ((out[0] | out[1] | out[2]) & 0x80000000u) + return EFastFaceReadResult::Error; + if (trackMaxIndex) + { + if (out[0] > _maxIndex) _maxIndex = out[0]; + if (out[1] > _maxIndex) _maxIndex = out[1]; + if (out[2] > _maxIndex) _maxIndex = out[2]; + } + else if (out[0] >= vertexCount || out[1] >= vertexCount || out[2] >= vertexCount) + { + return EFastFaceReadResult::Error; + } + out += 3; + } } - scan += indexSize * 3u; } - - if (allTriangles) + else { - const size_t oldSize = _outIndices.size(); - _outIndices.resize(oldSize + element.Count * 3u); - uint32_t* out = _outIndices.data() + oldSize; - const uint8_t* ptr = reinterpret_cast(StartPointer); - - if (is32Bit) + if (isSrcU16) { for (size_t j = 0u; j < element.Count; ++j) { - ++ptr; // list count - uint32_t i0 = 0u, i1 = 0u, i2 = 0u; - if (isSrcU32) + const uint8_t c = *ptr++; + if (c != 3u) { - std::memcpy(&i0, ptr, sizeof(i0)); - ptr += sizeof(i0); - std::memcpy(&i1, ptr, sizeof(i1)); - ptr += sizeof(i1); - std::memcpy(&i2, ptr, sizeof(i2)); - ptr += sizeof(i2); + fallbackToGeneric = true; + break; + } + uint16_t tri[3] = {}; + std::memcpy(tri, ptr, sizeof(tri)); + ptr += sizeof(tri); + out[0] = tri[0]; + out[1] = tri[1]; + out[2] = tri[2]; + if (trackMaxIndex) + { + if (out[0] > _maxIndex) _maxIndex = out[0]; + if (out[1] > _maxIndex) _maxIndex = out[1]; + if (out[2] > _maxIndex) _maxIndex = out[2]; } - else + else if (out[0] >= vertexCount || out[1] >= vertexCount || out[2] >= vertexCount) { - int32_t s0 = 0, s1 = 0, s2 = 0; - std::memcpy(&s0, ptr, sizeof(s0)); - ptr += sizeof(s0); - std::memcpy(&s1, ptr, sizeof(s1)); - ptr += sizeof(s1); - std::memcpy(&s2, ptr, sizeof(s2)); - ptr += sizeof(s2); - if (s0 < 0 || s1 < 0 || s2 < 0) - return false; - i0 = static_cast(s0); - i1 = static_cast(s1); - i2 = static_cast(s2); + return EFastFaceReadResult::Error; } - _maxIndex = std::max(_maxIndex, std::max(i0, std::max(i1, i2))); - out[0] = i0; - out[1] = i1; - out[2] = i2; out += 3; } } @@ -632,51 +828,52 @@ struct SContext { for (size_t j = 0u; j < element.Count; ++j) { - ++ptr; // list count - uint32_t i0 = 0u, i1 = 0u, i2 = 0u; - if (isSrcU16) + const uint8_t c = *ptr++; + if (c != 3u) + { + fallbackToGeneric = true; + break; + } + int16_t tri[3] = {}; + std::memcpy(tri, ptr, sizeof(tri)); + ptr += sizeof(tri); + if ((static_cast(tri[0]) | static_cast(tri[1]) | static_cast(tri[2])) & 0x8000u) + return EFastFaceReadResult::Error; + out[0] = static_cast(tri[0]); + out[1] = static_cast(tri[1]); + out[2] = static_cast(tri[2]); + if (trackMaxIndex) { - uint16_t t0 = 0u, t1 = 0u, t2 = 0u; - std::memcpy(&t0, ptr, sizeof(t0)); - ptr += sizeof(t0); - std::memcpy(&t1, ptr, sizeof(t1)); - ptr += sizeof(t1); - std::memcpy(&t2, ptr, sizeof(t2)); - ptr += sizeof(t2); - i0 = t0; - i1 = t1; - i2 = t2; + if (out[0] > _maxIndex) _maxIndex = out[0]; + if (out[1] > _maxIndex) _maxIndex = out[1]; + if (out[2] > _maxIndex) _maxIndex = out[2]; } - else + else if (out[0] >= vertexCount || out[1] >= vertexCount || out[2] >= vertexCount) { - int16_t s0 = 0, s1 = 0, s2 = 0; - std::memcpy(&s0, ptr, sizeof(s0)); - ptr += sizeof(s0); - std::memcpy(&s1, ptr, sizeof(s1)); - ptr += sizeof(s1); - std::memcpy(&s2, ptr, sizeof(s2)); - ptr += sizeof(s2); - if (s0 < 0 || s1 < 0 || s2 < 0) - return false; - i0 = static_cast(s0); - i1 = static_cast(s1); - i2 = static_cast(s2); + return EFastFaceReadResult::Error; } - _maxIndex = std::max(_maxIndex, std::max(i0, std::max(i1, i2))); - out[0] = i0; - out[1] = i1; - out[2] = i2; out += 3; } } + } + if (!fallbackToGeneric) + { StartPointer = reinterpret_cast(const_cast(ptr)); _faceCount += element.Count; - return true; + return EFastFaceReadResult::Success; } + + _outIndices.resize(oldSize); + _maxIndex = oldMaxIndex; } - _outIndices.reserve(_outIndices.size() + element.Count * 3u); + if (element.Count > (std::numeric_limits::max() / 3u)) + return EFastFaceReadResult::Error; + const size_t reserveCount = element.Count * 3u; + if (_outIndices.size() > (std::numeric_limits::max() - reserveCount)) + return EFastFaceReadResult::Error; + _outIndices.reserve(_outIndices.size() + reserveCount); auto ensureBytes = [this](const size_t bytes)->bool { if (StartPointer + bytes > EndPointer) @@ -690,7 +887,7 @@ struct SContext outCount = static_cast(*StartPointer++); return true; }; - auto readIndex = [&ensureBytes, this, srcIndexFmt, is32Bit, isSrcU32, isSrcU16](uint32_t& out)->bool + auto readIndex = [&ensureBytes, this, is32Bit, isSrcU32, isSrcU16](uint32_t& out)->bool { if (is32Bit) { @@ -736,9 +933,7 @@ struct SContext { int32_t countSigned = 0; if (!readCount(countSigned)) - return false; - if (countSigned < 0) - return false; + return EFastFaceReadResult::Error; const uint32_t count = static_cast(countSigned); if (count < 3u) { @@ -746,7 +941,7 @@ struct SContext for (uint32_t k = 0u; k < count; ++k) { if (!readIndex(dummy)) - return false; + return EFastFaceReadResult::Error; } ++_faceCount; continue; @@ -756,9 +951,16 @@ struct SContext uint32_t i1 = 0u; uint32_t i2 = 0u; if (!readIndex(i0) || !readIndex(i1) || !readIndex(i2)) - return false; + return EFastFaceReadResult::Error; - _maxIndex = std::max(_maxIndex, std::max(i0, std::max(i1, i2))); + if (trackMaxIndex) + { + _maxIndex = std::max(_maxIndex, std::max(i0, std::max(i1, i2))); + } + else if (i0 >= vertexCount || i1 >= vertexCount || i2 >= vertexCount) + { + return EFastFaceReadResult::Error; + } _outIndices.push_back(i0); _outIndices.push_back(i1); _outIndices.push_back(i2); @@ -768,8 +970,15 @@ struct SContext { uint32_t idx = 0u; if (!readIndex(idx)) - return false; - _maxIndex = std::max(_maxIndex, idx); + return EFastFaceReadResult::Error; + if (trackMaxIndex) + { + _maxIndex = std::max(_maxIndex, idx); + } + else if (idx >= vertexCount) + { + return EFastFaceReadResult::Error; + } _outIndices.push_back(i0); _outIndices.push_back(prev); _outIndices.push_back(idx); @@ -779,7 +988,7 @@ struct SContext ++_faceCount; } - return true; + return EFastFaceReadResult::Success; } IAssetLoader::SAssetLoadContext inner; @@ -794,6 +1003,9 @@ struct SContext int32_t WordLength = -1; // this variable is a misnomer, its really the offset to next word minus one bool IsBinaryFile = false, IsWrongEndian = false, EndOfFile = false; size_t fileOffset = {}; + uint64_t readCallCount = 0ull; + uint64_t readBytesTotal = 0ull; + uint64_t readMinBytes = std::numeric_limits::max(); // core::vector vertAttrIts; }; @@ -816,6 +1028,7 @@ SAssetBundle CPLYMeshFileLoader::loadAsset(system::IFile* _file, const IAssetLoa double aabbMs = 0.0; uint64_t faceCount = 0u; uint64_t fastFaceElementCount = 0u; + uint64_t fastVertexElementCount = 0u; uint32_t maxIndexRead = 0u; const uint64_t fileSize = _file->getSize(); const auto ioPlan = resolveFileIOPolicy(_params.ioPolicy, fileSize, true); @@ -918,7 +1131,19 @@ SAssetBundle CPLYMeshFileLoader::loadAsset(system::IFile* _file, const IAssetLoa { auto& el = ctx.ElementList.emplace_back(); el.Name = ctx.getNextWord(); - el.Count = atoi(ctx.getNextWord()); + const char* const countWord = ctx.getNextWord(); + uint64_t parsedCount = 0ull; + if (countWord) + { + const char* const countWordEnd = countWord + std::strlen(countWord); + const auto parseResult = std::from_chars(countWord, countWordEnd, parsedCount, 10); + if (!(parseResult.ec == std::errc() && parseResult.ptr == countWordEnd)) + { + char* fallbackEnd = nullptr; + parsedCount = std::strtoull(countWord, &fallbackEnd, 10); + } + } + el.Count = static_cast(parsedCount); el.KnownSize = 0; if (el.Name=="vertex") vertCount = el.Count; @@ -1219,18 +1444,33 @@ SAssetBundle CPLYMeshFileLoader::loadAsset(system::IFile* _file, const IAssetLoa geometry->getAuxAttributeViews()->push_back(std::move(view)); // loop through vertex properties const auto vertexStart = clock_t::now(); - ctx.readVertex(_params,el); + const auto fastVertexResult = ctx.readVertexElementFast(el); + if (fastVertexResult == SContext::EFastVertexReadResult::Success) + { + ++fastVertexElementCount; + } + else if (fastVertexResult == SContext::EFastVertexReadResult::NotApplicable) + { + ctx.readVertex(_params,el); + } + else + { + _params.logger.log("PLY vertex fast path failed on malformed data for %s", system::ILogger::ELL_ERROR, _file->getFileName().string().c_str()); + return {}; + } vertexMs += std::chrono::duration(clock_t::now() - vertexStart).count(); verticesProcessed = true; } else if (el.Name=="face") { const auto faceStart = clock_t::now(); - if (ctx.readFaceElementFast(el,indices,maxIndexRead,faceCount)) + const uint32_t vertexCount32 = vertCount <= static_cast(std::numeric_limits::max()) ? static_cast(vertCount) : 0u; + const auto fastFaceResult = ctx.readFaceElementFast(el,indices,maxIndexRead,faceCount,vertexCount32); + if (fastFaceResult == SContext::EFastFaceReadResult::Success) { ++fastFaceElementCount; } - else + else if (fastFaceResult == SContext::EFastFaceReadResult::NotApplicable) { indices.reserve(indices.size() + el.Count * 3u); for (size_t j=0; jgetFileName().string().c_str()); + return {}; + } faceMs += std::chrono::duration(clock_t::now() - faceStart).count(); } else @@ -1252,12 +1497,11 @@ SAssetBundle CPLYMeshFileLoader::loadAsset(system::IFile* _file, const IAssetLoa } } - hashRangeMs = 0.0; - const auto aabbStart = clock_t::now(); CPolygonGeometryManipulator::recomputeAABB(geometry.get()); aabbMs = std::chrono::duration(clock_t::now() - aabbStart).count(); + const uint64_t indexCount = static_cast(indices.size()); const auto indexStart = clock_t::now(); if (indices.empty()) { @@ -1273,7 +1517,7 @@ SAssetBundle CPLYMeshFileLoader::loadAsset(system::IFile* _file, const IAssetLoa } geometry->setIndexing(IPolygonGeometryBase::TriangleList()); - if (maxIndexRead <= std::numeric_limits::max()) + if (vertCount <= std::numeric_limits::max() && maxIndexRead <= std::numeric_limits::max()) { auto view = IGeometryLoader::createView(EF_R16_UINT, indices.size()); if (!view) @@ -1285,18 +1529,39 @@ SAssetBundle CPLYMeshFileLoader::loadAsset(system::IFile* _file, const IAssetLoa } else { - auto view = IGeometryLoader::createView(EF_R32_UINT, indices.size()); + auto view = plyCreateAdoptedU32IndexView(std::move(indices)); if (!view) return {}; - std::memcpy(view.getPointer(), indices.data(), indices.size() * sizeof(uint32_t)); geometry->setIndexView(std::move(view)); } } indexBuildMs = std::chrono::duration(clock_t::now() - indexStart).count(); + const auto hashStart = clock_t::now(); + CPolygonGeometryManipulator::recomputeContentHashes(geometry.get()); + hashRangeMs = std::chrono::duration(clock_t::now() - hashStart).count(); + const auto totalMs = std::chrono::duration(clock_t::now() - totalStart).count(); + const uint64_t ioMinRead = ctx.readCallCount ? ctx.readMinBytes : 0ull; + const uint64_t ioAvgRead = ctx.readCallCount ? (ctx.readBytesTotal / ctx.readCallCount) : 0ull; + if ( + fileSize > (1ull << 20) && + ( + ioAvgRead < 1024ull || + (ioMinRead < 64ull && ctx.readCallCount > 1024ull) + ) + ) + { + _params.logger.log( + "PLY loader tiny-io guard: file=%s reads=%llu min=%llu avg=%llu", + system::ILogger::ELL_WARNING, + _file->getFileName().string().c_str(), + static_cast(ctx.readCallCount), + static_cast(ioMinRead), + static_cast(ioAvgRead)); + } _params.logger.log( - "PLY loader perf: file=%s total=%.3f ms header=%.3f vertex=%.3f face=%.3f skip=%.3f hash_range=%.3f index=%.3f aabb=%.3f binary=%d verts=%llu faces=%llu idx=%llu face_fast=%llu io_req=%s io_eff=%s io_chunk=%llu io_reason=%s", + "PLY loader perf: file=%s total=%.3f ms header=%.3f vertex=%.3f face=%.3f skip=%.3f hash_range=%.3f index=%.3f aabb=%.3f binary=%d verts=%llu faces=%llu idx=%llu vertex_fast=%llu face_fast=%llu io_reads=%llu io_min_read=%llu io_avg_read=%llu io_req=%s io_eff=%s io_chunk=%llu io_reason=%s", system::ILogger::ELL_PERFORMANCE, _file->getFileName().string().c_str(), totalMs, @@ -1310,8 +1575,12 @@ SAssetBundle CPLYMeshFileLoader::loadAsset(system::IFile* _file, const IAssetLoa ctx.IsBinaryFile ? 1 : 0, static_cast(vertCount), static_cast(faceCount), - static_cast(indices.size()), + static_cast(indexCount), + static_cast(fastVertexElementCount), static_cast(fastFaceElementCount), + static_cast(ctx.readCallCount), + static_cast(ioMinRead), + static_cast(ioAvgRead), toString(_params.ioPolicy.strategy), toString(ioPlan.strategy), static_cast(ioPlan.chunkSizeBytes), diff --git a/src/nbl/asset/interchange/CSTLMeshFileLoader.cpp b/src/nbl/asset/interchange/CSTLMeshFileLoader.cpp index 7a98d13a1d..7cb3a09e5a 100644 --- a/src/nbl/asset/interchange/CSTLMeshFileLoader.cpp +++ b/src/nbl/asset/interchange/CSTLMeshFileLoader.cpp @@ -13,19 +13,46 @@ #include "nbl/system/IFile.h" #include +#include #include #include -#include #include +#include #include namespace nbl::asset { +struct SFileReadTelemetry +{ + uint64_t callCount = 0ull; + uint64_t totalBytes = 0ull; + uint64_t minBytes = std::numeric_limits::max(); + + void account(const uint64_t bytes) + { + ++callCount; + totalBytes += bytes; + if (bytes < minBytes) + minBytes = bytes; + } + + uint64_t getMinOrZero() const + { + return callCount ? minBytes : 0ull; + } + + uint64_t getAvgOrZero() const + { + return callCount ? (totalBytes / callCount) : 0ull; + } +}; + struct SSTLContext { IAssetLoader::SAssetLoadContext inner; size_t fileOffset = 0ull; + SFileReadTelemetry ioTelemetry = {}; }; constexpr size_t StlTextProbeBytes = 6ull; @@ -38,10 +65,8 @@ constexpr size_t StlTriangleAttributeBytes = sizeof(uint16_t); constexpr size_t StlTriangleRecordBytes = StlTriangleFloatBytes + StlTriangleAttributeBytes; constexpr size_t StlVerticesPerTriangle = 3ull; constexpr size_t StlFloatChannelsPerVertex = 3ull; -constexpr size_t StlFloatsPerTriangleVertices = StlVerticesPerTriangle * StlFloatChannelsPerVertex; -constexpr size_t StlFloatsPerTriangleOutput = StlFloatsPerTriangleVertices; -bool stlReadExact(system::IFile* file, void* dst, const size_t offset, const size_t bytes) +bool stlReadExact(system::IFile* file, void* dst, const size_t offset, const size_t bytes, SFileReadTelemetry* ioTelemetry = nullptr) { if (!file || (!dst && bytes != 0ull)) return false; @@ -50,10 +75,12 @@ bool stlReadExact(system::IFile* file, void* dst, const size_t offset, const siz system::IFile::success_t success; file->read(success, dst, offset, bytes); + if (success && ioTelemetry) + ioTelemetry->account(success.getBytesProcessed()); return success && success.getBytesProcessed() == bytes; } -bool stlReadWithPolicy(system::IFile* file, uint8_t* dst, const size_t offset, const size_t bytes, const SResolvedFileIOPolicy& ioPlan) +bool stlReadWithPolicy(system::IFile* file, uint8_t* dst, const size_t offset, const size_t bytes, const SResolvedFileIOPolicy& ioPlan, SFileReadTelemetry* ioTelemetry = nullptr) { if (!file || (!dst && bytes != 0ull)) return false; @@ -64,7 +91,7 @@ bool stlReadWithPolicy(system::IFile* file, uint8_t* dst, const size_t offset, c switch (ioPlan.strategy) { case SResolvedFileIOPolicy::Strategy::WholeFile: - return stlReadExact(file, dst, offset, bytes); + return stlReadExact(file, dst, offset, bytes, ioTelemetry); case SResolvedFileIOPolicy::Strategy::Chunked: default: while (bytesRead < bytes) @@ -77,6 +104,8 @@ bool stlReadWithPolicy(system::IFile* file, uint8_t* dst, const size_t offset, c const size_t processed = success.getBytesProcessed(); if (processed == 0ull) return false; + if (ioTelemetry) + ioTelemetry->account(processed); bytesRead += processed; } return true; @@ -92,19 +121,7 @@ bool stlReadU8(SSTLContext* context, uint8_t& out) context->inner.mainFile->read(success, &out, context->fileOffset, sizeof(out)); if (!success || success.getBytesProcessed() != sizeof(out)) return false; - context->fileOffset += sizeof(out); - return true; -} - -bool stlReadF32(SSTLContext* context, float& out) -{ - if (!context) - return false; - - system::IFile::success_t success; - context->inner.mainFile->read(success, &out, context->fileOffset, sizeof(out)); - if (!success || success.getBytesProcessed() != sizeof(out)) - return false; + context->ioTelemetry.account(success.getBytesProcessed()); context->fileOffset += sizeof(out); return true; } @@ -149,47 +166,53 @@ const std::string& stlGetNextToken(SSTLContext* context, std::string& token) return token; } -void stlGoNextLine(SSTLContext* context) +const char* stlSkipWhitespace(const char* ptr, const char* const end) { - if (!context) - return; + while (ptr < end && core::isspace(*ptr)) + ++ptr; + return ptr; +} - uint8_t c = 0u; - while (context->fileOffset < context->inner.mainFile->getSize()) +bool stlReadTextToken(const char*& ptr, const char* const end, std::string_view& outToken) +{ + ptr = stlSkipWhitespace(ptr, end); + if (ptr >= end) { - if (!stlReadU8(context, c)) - break; - if (c == '\n' || c == '\r') - break; + outToken = {}; + return false; } + + const char* tokenEnd = ptr; + while (tokenEnd < end && !core::isspace(*tokenEnd)) + ++tokenEnd; + + outToken = std::string_view(ptr, static_cast(tokenEnd - ptr)); + ptr = tokenEnd; + return true; } -bool stlGetNextVector(SSTLContext* context, hlsl::float32_t3& vec, const bool binary) +bool stlReadTextFloat(const char*& ptr, const char* const end, float& outValue) { - if (!context) + ptr = stlSkipWhitespace(ptr, end); + if (ptr >= end) return false; - if (binary) - { - if (!stlReadF32(context, vec.x) || !stlReadF32(context, vec.y) || !stlReadF32(context, vec.z)) - return false; - return true; - } - - stlGoNextWord(context); - std::string tmp; - if (stlGetNextToken(context, tmp).empty()) + const auto parseResult = std::from_chars(ptr, end, outValue, std::chars_format::general); + if (parseResult.ec != std::errc() || parseResult.ptr == ptr) return false; - std::sscanf(tmp.c_str(), "%f", &vec.x); - if (stlGetNextToken(context, tmp).empty()) - return false; - std::sscanf(tmp.c_str(), "%f", &vec.y); - if (stlGetNextToken(context, tmp).empty()) - return false; - std::sscanf(tmp.c_str(), "%f", &vec.z); + + ptr = parseResult.ptr; return true; } +bool stlReadTextVec3(const char*& ptr, const char* const end, hlsl::float32_t3& outVec) +{ + return + stlReadTextFloat(ptr, end, outVec.x) && + stlReadTextFloat(ptr, end, outVec.y) && + stlReadTextFloat(ptr, end, outVec.z); +} + hlsl::float32_t3 stlNormalizeOrZero(const hlsl::float32_t3& v) { const float len2 = hlsl::dot(v, v); @@ -276,6 +299,7 @@ SAssetBundle CSTLMeshFileLoader::loadAsset(system::IFile* _file, const IAssetLoa double hashMs = 0.0; double aabbMs = 0.0; uint64_t triangleCount = 0u; + const char* parsePath = "unknown"; SSTLContext context = { asset::IAssetLoader::SAssetLoadContext{ @@ -297,11 +321,13 @@ SAssetBundle CSTLMeshFileLoader::loadAsset(system::IFile* _file, const IAssetLoa } bool binary = false; + bool hasBinaryTriCountFromDetect = false; + uint32_t binaryTriCountFromDetect = 0u; std::string token; { const auto detectStart = clock_t::now(); char header[StlTextProbeBytes] = {}; - if (!stlReadExact(context.inner.mainFile, header, 0ull, sizeof(header))) + if (!stlReadExact(context.inner.mainFile, header, 0ull, sizeof(header), &context.ioTelemetry)) return {}; const bool startsWithSolid = (std::strncmp(header, "solid ", StlTextProbeBytes) == 0); @@ -309,8 +335,10 @@ SAssetBundle CSTLMeshFileLoader::loadAsset(system::IFile* _file, const IAssetLoa if (filesize >= StlBinaryPrefixBytes) { uint32_t triCount = 0u; - if (stlReadExact(context.inner.mainFile, &triCount, StlBinaryHeaderBytes, sizeof(triCount))) + if (stlReadExact(context.inner.mainFile, &triCount, StlBinaryHeaderBytes, sizeof(triCount), &context.ioTelemetry)) { + binaryTriCountFromDetect = triCount; + hasBinaryTriCountFromDetect = true; const uint64_t expectedSize = StlBinaryPrefixBytes + static_cast(triCount) * StlTriangleRecordBytes; binaryBySize = (expectedSize == filesize); } @@ -336,12 +364,16 @@ SAssetBundle CSTLMeshFileLoader::loadAsset(system::IFile* _file, const IAssetLoa if (binary) { + parsePath = "binary_fast"; if (filesize < StlBinaryPrefixBytes) return {}; - uint32_t triangleCount32 = 0u; - if (!stlReadExact(context.inner.mainFile, &triangleCount32, StlBinaryHeaderBytes, sizeof(triangleCount32))) - return {}; + uint32_t triangleCount32 = binaryTriCountFromDetect; + if (!hasBinaryTriCountFromDetect) + { + if (!stlReadExact(context.inner.mainFile, &triangleCount32, StlBinaryHeaderBytes, sizeof(triangleCount32), &context.ioTelemetry)) + return {}; + } triangleCount = triangleCount32; const size_t dataSize = static_cast(triangleCount) * StlTriangleRecordBytes; @@ -351,9 +383,8 @@ SAssetBundle CSTLMeshFileLoader::loadAsset(system::IFile* _file, const IAssetLoa core::vector payload; payload.resize(dataSize); - const auto ioStart = clock_t::now(); - if (!stlReadWithPolicy(context.inner.mainFile, payload.data(), StlBinaryPrefixBytes, dataSize, ioPlan)) + if (!stlReadWithPolicy(context.inner.mainFile, payload.data(), StlBinaryPrefixBytes, dataSize, ioPlan, &context.ioTelemetry)) return {}; ioMs = std::chrono::duration(clock_t::now() - ioStart).count(); @@ -375,29 +406,33 @@ SAssetBundle CSTLMeshFileLoader::loadAsset(system::IFile* _file, const IAssetLoa const uint8_t* const end = cursor + payload.size(); auto* posOutFloat = reinterpret_cast(posOut); auto* normalOutFloat = reinterpret_cast(normalOut); + if (end < cursor || static_cast(end - cursor) < static_cast(triangleCount) * StlTriangleRecordBytes) + return {}; for (uint64_t tri = 0ull; tri < triangleCount; ++tri) { - if (cursor + StlTriangleRecordBytes > end) - return {}; - - float triData[StlTriangleFloatCount] = {}; - std::memcpy(triData, cursor, StlTriangleFloatBytes); - cursor += StlTriangleFloatBytes; - cursor += StlTriangleAttributeBytes; - - const float vertex0x = triData[9]; - const float vertex0y = triData[10]; - const float vertex0z = triData[11]; - const float vertex1x = triData[6]; - const float vertex1y = triData[7]; - const float vertex1z = triData[8]; - const float vertex2x = triData[3]; - const float vertex2y = triData[4]; - const float vertex2z = triData[5]; - - float normalX = triData[0]; - float normalY = triData[1]; - float normalZ = triData[2]; + const uint8_t* const triRecord = cursor; + cursor += StlTriangleRecordBytes; + + float normalData[StlFloatChannelsPerVertex] = {}; + std::memcpy(normalData, triRecord, sizeof(normalData)); + float normalX = normalData[0]; + float normalY = normalData[1]; + float normalZ = normalData[2]; + + const size_t base = static_cast(tri) * StlVerticesPerTriangle * StlFloatChannelsPerVertex; + std::memcpy(posOutFloat + base + 0ull, triRecord + 9ull * sizeof(float), sizeof(normalData)); + std::memcpy(posOutFloat + base + 3ull, triRecord + 6ull * sizeof(float), sizeof(normalData)); + std::memcpy(posOutFloat + base + 6ull, triRecord + 3ull * sizeof(float), sizeof(normalData)); + + const float vertex0x = posOutFloat[base + 0ull]; + const float vertex0y = posOutFloat[base + 1ull]; + const float vertex0z = posOutFloat[base + 2ull]; + const float vertex1x = posOutFloat[base + 3ull]; + const float vertex1y = posOutFloat[base + 4ull]; + const float vertex1z = posOutFloat[base + 5ull]; + const float vertex2x = posOutFloat[base + 6ull]; + const float vertex2y = posOutFloat[base + 7ull]; + const float vertex2z = posOutFloat[base + 8ull]; const float normalLen2 = normalX * normalX + normalY * normalY + normalZ * normalZ; if (normalLen2 <= 0.f) { @@ -434,17 +469,6 @@ SAssetBundle CSTLMeshFileLoader::loadAsset(system::IFile* _file, const IAssetLoa normalZ *= invLen; } - const size_t base = static_cast(tri) * StlFloatsPerTriangleOutput; - posOutFloat[base + 0ull] = vertex0x; - posOutFloat[base + 1ull] = vertex0y; - posOutFloat[base + 2ull] = vertex0z; - posOutFloat[base + 3ull] = vertex1x; - posOutFloat[base + 4ull] = vertex1y; - posOutFloat[base + 5ull] = vertex1z; - posOutFloat[base + 6ull] = vertex2x; - posOutFloat[base + 7ull] = vertex2y; - posOutFloat[base + 8ull] = vertex2z; - normalOutFloat[base + 0ull] = normalX; normalOutFloat[base + 1ull] = normalY; normalOutFloat[base + 2ull] = normalZ; @@ -495,44 +519,60 @@ SAssetBundle CSTLMeshFileLoader::loadAsset(system::IFile* _file, const IAssetLoa } else { + parsePath = "ascii_fallback"; + core::vector asciiPayload; + asciiPayload.resize(filesize + 1ull); + const auto ioStart = clock_t::now(); + if (!stlReadWithPolicy(context.inner.mainFile, asciiPayload.data(), 0ull, filesize, ioPlan, &context.ioTelemetry)) + return {}; + ioMs = std::chrono::duration(clock_t::now() - ioStart).count(); + asciiPayload[filesize] = 0u; + + const char* cursor = reinterpret_cast(asciiPayload.data()); + const char* const end = cursor + filesize; core::vector positions; core::vector normals; - stlGoNextLine(&context); - token.reserve(32); + std::string_view textToken = {}; + if (!stlReadTextToken(cursor, end, textToken) || textToken != std::string_view("solid")) + return {}; const auto parseStart = clock_t::now(); - while (context.fileOffset < filesize) + while (stlReadTextToken(cursor, end, textToken)) { - if (stlGetNextToken(&context, token) != "facet") + if (textToken == std::string_view("endsolid")) + break; + if (textToken != std::string_view("facet")) { - if (token == "endsolid") - break; - return {}; + continue; } - if (stlGetNextToken(&context, token) != "normal") + if (!stlReadTextToken(cursor, end, textToken) || textToken != std::string_view("normal")) return {}; hlsl::float32_t3 fileNormal = {}; - if (!stlGetNextVector(&context, fileNormal, false)) + if (!stlReadTextVec3(cursor, end, fileNormal)) return {}; normals.push_back(stlResolveStoredNormal(fileNormal)); - if (stlGetNextToken(&context, token) != "outer" || stlGetNextToken(&context, token) != "loop") + if (!stlReadTextToken(cursor, end, textToken) || textToken != std::string_view("outer")) + return {}; + if (!stlReadTextToken(cursor, end, textToken) || textToken != std::string_view("loop")) return {}; hlsl::float32_t3 p[3] = {}; for (uint32_t i = 0u; i < 3u; ++i) { - if (stlGetNextToken(&context, token) != "vertex") + if (!stlReadTextToken(cursor, end, textToken) || textToken != std::string_view("vertex")) return {}; - if (!stlGetNextVector(&context, p[i], false)) + if (!stlReadTextVec3(cursor, end, p[i])) return {}; } stlPushTriangleReversed(p, positions); - if (stlGetNextToken(&context, token) != "endloop" || stlGetNextToken(&context, token) != "endfacet") + if (!stlReadTextToken(cursor, end, textToken) || textToken != std::string_view("endloop")) + return {}; + if (!stlReadTextToken(cursor, end, textToken) || textToken != std::string_view("endfacet")) return {}; stlFixLastFaceNormal(normals, positions); @@ -599,8 +639,26 @@ SAssetBundle CSTLMeshFileLoader::loadAsset(system::IFile* _file, const IAssetLoa aabbMs = std::chrono::duration(clock_t::now() - aabbStart).count(); const auto totalMs = std::chrono::duration(clock_t::now() - totalStart).count(); + const uint64_t ioMinRead = context.ioTelemetry.getMinOrZero(); + const uint64_t ioAvgRead = context.ioTelemetry.getAvgOrZero(); + if ( + static_cast(filesize) > (1ull << 20) && + ( + ioAvgRead < 1024ull || + (ioMinRead < 64ull && context.ioTelemetry.callCount > 1024ull) + ) + ) + { + _params.logger.log( + "STL loader tiny-io guard: file=%s reads=%llu min=%llu avg=%llu", + system::ILogger::ELL_WARNING, + _file->getFileName().string().c_str(), + static_cast(context.ioTelemetry.callCount), + static_cast(ioMinRead), + static_cast(ioAvgRead)); + } _params.logger.log( - "STL loader perf: file=%s total=%.3f ms detect=%.3f io=%.3f parse=%.3f build=%.3f hash=%.3f aabb=%.3f binary=%d triangles=%llu vertices=%llu io_req=%s io_eff=%s io_chunk=%llu io_reason=%s", + "STL loader perf: file=%s total=%.3f ms detect=%.3f io=%.3f parse=%.3f build=%.3f hash=%.3f aabb=%.3f binary=%d parse_path=%s triangles=%llu vertices=%llu io_reads=%llu io_min_read=%llu io_avg_read=%llu io_req=%s io_eff=%s io_chunk=%llu io_reason=%s", system::ILogger::ELL_PERFORMANCE, _file->getFileName().string().c_str(), totalMs, @@ -611,8 +669,12 @@ SAssetBundle CSTLMeshFileLoader::loadAsset(system::IFile* _file, const IAssetLoa hashMs, aabbMs, binary ? 1 : 0, + parsePath, static_cast(triangleCount), static_cast(vertexCount), + static_cast(context.ioTelemetry.callCount), + static_cast(ioMinRead), + static_cast(ioAvgRead), toString(_params.ioPolicy.strategy), toString(ioPlan.strategy), static_cast(ioPlan.chunkSizeBytes), From d1bd3096f7b4b86513fd52dcfa42caa104fa8fc4 Mon Sep 17 00:00:00 2001 From: Arkadiusz Lachowicz Date: Sun, 8 Feb 2026 17:30:09 +0100 Subject: [PATCH 006/118] Integrate fast float and optimize mesh loader writer perf --- .gitmodules | 3 + 3rdparty/fast_float | 1 + src/nbl/CMakeLists.txt | 2 + .../asset/interchange/COBJMeshFileLoader.cpp | 204 +++++++-------- src/nbl/asset/interchange/COBJMeshWriter.cpp | 168 ++++++++++--- .../asset/interchange/CPLYMeshFileLoader.cpp | 108 ++++++-- src/nbl/asset/interchange/CPLYMeshWriter.cpp | 235 +++++++++++++++--- .../asset/interchange/CSTLMeshFileLoader.cpp | 133 ++++------ src/nbl/asset/interchange/CSTLMeshWriter.cpp | 106 +++++++- 9 files changed, 669 insertions(+), 291 deletions(-) create mode 160000 3rdparty/fast_float diff --git a/.gitmodules b/.gitmodules index 8a04f82d9d..17be178b94 100644 --- a/.gitmodules +++ b/.gitmodules @@ -129,3 +129,6 @@ [submodule "3rdparty/Vulkan-Tools"] path = 3rdparty/Vulkan-Tools url = git@github.com:Devsh-Graphics-Programming/Vulkan-Tools.git +[submodule "3rdparty/fast_float"] + path = 3rdparty/fast_float + url = https://github.com/fastfloat/fast_float.git diff --git a/3rdparty/fast_float b/3rdparty/fast_float new file mode 160000 index 0000000000..221a4920db --- /dev/null +++ b/3rdparty/fast_float @@ -0,0 +1 @@ +Subproject commit 221a4920db7d68d33ab9794af602daef19667351 diff --git a/src/nbl/CMakeLists.txt b/src/nbl/CMakeLists.txt index 3fceedf910..4de7ae83fb 100644 --- a/src/nbl/CMakeLists.txt +++ b/src/nbl/CMakeLists.txt @@ -556,6 +556,8 @@ else() endif() list(APPEND PUBLIC_BUILD_INCLUDE_DIRS ${THIRD_PARTY_SOURCE_DIR}/simdjson) +list(APPEND PUBLIC_BUILD_INCLUDE_DIRS ${THIRD_PARTY_SOURCE_DIR}/fast_float/include) + # libjpeg add_dependencies(Nabla jpeg-static) if(NBL_STATIC_BUILD) diff --git a/src/nbl/asset/interchange/COBJMeshFileLoader.cpp b/src/nbl/asset/interchange/COBJMeshFileLoader.cpp index 1e4d9fe90b..2c14c58c05 100644 --- a/src/nbl/asset/interchange/COBJMeshFileLoader.cpp +++ b/src/nbl/asset/interchange/COBJMeshFileLoader.cpp @@ -15,10 +15,12 @@ #include "COBJMeshFileLoader.h" #include +#include #include #include #include #include +#include #include #include @@ -44,10 +46,10 @@ struct ObjVertexKeyHash { size_t operator()(const ObjVertexKey& key) const noexcept { - size_t h = static_cast(static_cast(key.pos)); - h ^= static_cast(static_cast(key.uv)) + 0x9e3779b9 + (h << 6) + (h >> 2); - h ^= static_cast(static_cast(key.normal)) + 0x9e3779b9 + (h << 6) + (h >> 2); - return h; + const uint32_t p = static_cast(key.pos); + const uint32_t t = static_cast(key.uv); + const uint32_t n = static_cast(key.normal); + return static_cast((p * 73856093u) ^ (t * 19349663u) ^ (n * 83492791u)); } }; @@ -205,7 +207,7 @@ const char* goNextLine(const char* buf, const char* const bufEnd) bool parseFloatToken(const char*& ptr, const char* const end, float& out) { - const auto parseResult = std::from_chars(ptr, end, out, std::chars_format::general); + const auto parseResult = fast_float::from_chars(ptr, end, out); if (parseResult.ec == std::errc() && parseResult.ptr != ptr) { ptr = parseResult.ptr; @@ -257,140 +259,100 @@ const char* readUV(const char* bufPtr, float vec[2], const char* const bufEnd) return bufPtr; } -bool retrieveVertexIndices(const char* tokenBegin, const char* tokenEnd, int32_t* idx, uint32_t vbsize, uint32_t vtsize, uint32_t vnsize) +bool parseSignedObjIndex(const char*& ptr, const char* const end, int32_t& out) { - if (!tokenBegin || !idx) + const char* parseStart = ptr; + if (ptr >= end) return false; - idx[0] = -1; - idx[1] = -1; - idx[2] = -1; - - const char* p = tokenBegin; - for (uint32_t idxType = 0u; idxType < 3u && p < tokenEnd; ++idxType) + int64_t value = 0; + const auto parseResult = std::from_chars(ptr, end, value, 10); + if (!(parseResult.ec == std::errc() && parseResult.ptr != ptr)) { - if (*p == '/') - { - ++p; - continue; - } - - char* endNum = nullptr; - const long parsed = std::strtol(p, &endNum, 10); - if (endNum == p) + char* fallbackEnd = nullptr; + value = std::strtoll(parseStart, &fallbackEnd, 10); + if (!fallbackEnd || fallbackEnd == parseStart || fallbackEnd > end) return false; - - int32_t value = static_cast(parsed); - if (value < 0) - { - switch (idxType) - { - case 0: - value += static_cast(vbsize); - break; - case 1: - value += static_cast(vtsize); - break; - case 2: - value += static_cast(vnsize); - break; - default: - break; - } - } - else - { - value -= 1; - } - idx[idxType] = value; - - p = endNum; - if (p >= tokenEnd) - break; - - if (*p != '/') - break; - ++p; + ptr = fallbackEnd; } + else + { + ptr = parseResult.ptr; + } + if (value == 0) + return false; + if (value < static_cast(std::numeric_limits::min()) || value > static_cast(std::numeric_limits::max())) + return false; + out = static_cast(value); return true; } -enum class EFastFaceTokenParseResult : uint8_t +bool resolveObjIndex(const int32_t rawIndex, const size_t elementCount, int32_t& resolved) { - NotApplicable, - Success, - Error -}; - -bool parseUnsignedObjIndex(const char*& ptr, const char* const end, uint32_t& out) -{ - if (ptr >= end || !core::isdigit(*ptr)) - return false; - - uint64_t value = 0ull; - while (ptr < end && core::isdigit(*ptr)) + if (rawIndex > 0) { - value = value * 10ull + static_cast(*ptr - '0'); - if (value > static_cast(std::numeric_limits::max())) + const uint64_t oneBased = static_cast(rawIndex); + if (oneBased == 0ull) + return false; + const uint64_t zeroBased = oneBased - 1ull; + if (zeroBased >= elementCount) return false; - ++ptr; + resolved = static_cast(zeroBased); + return true; } - out = static_cast(value); + const int64_t zeroBased = static_cast(elementCount) + static_cast(rawIndex); + if (zeroBased < 0 || zeroBased >= static_cast(elementCount)) + return false; + resolved = static_cast(zeroBased); return true; } -EFastFaceTokenParseResult retrieveVertexIndicesFast(const char* tokenBegin, const char* tokenEnd, int32_t* idx) +bool parseObjFaceVertexToken(const char* tokenBegin, const char* tokenEnd, int32_t* idx, const size_t posCount, const size_t uvCount, const size_t normalCount) { if (!tokenBegin || !idx || tokenBegin >= tokenEnd) - return EFastFaceTokenParseResult::NotApplicable; - - for (const char* c = tokenBegin; c < tokenEnd; ++c) - { - const char ch = *c; - if (ch == '-' || ch == '+') - return EFastFaceTokenParseResult::NotApplicable; - if (!core::isdigit(ch) && ch != '/') - return EFastFaceTokenParseResult::NotApplicable; - } + return false; idx[0] = -1; idx[1] = -1; idx[2] = -1; const char* ptr = tokenBegin; - uint32_t parsed = 0u; - if (!parseUnsignedObjIndex(ptr, tokenEnd, parsed) || parsed == 0u) - return EFastFaceTokenParseResult::Error; - idx[0] = static_cast(parsed - 1u); + int32_t raw = 0; + if (!parseSignedObjIndex(ptr, tokenEnd, raw)) + return false; + if (!resolveObjIndex(raw, posCount, idx[0])) + return false; if (ptr >= tokenEnd) - return EFastFaceTokenParseResult::Success; + return true; if (*ptr != '/') - return EFastFaceTokenParseResult::NotApplicable; + return false; ++ptr; if (ptr < tokenEnd && *ptr != '/') { - if (!parseUnsignedObjIndex(ptr, tokenEnd, parsed) || parsed == 0u) - return EFastFaceTokenParseResult::Error; - idx[1] = static_cast(parsed - 1u); + if (!parseSignedObjIndex(ptr, tokenEnd, raw)) + return false; + if (!resolveObjIndex(raw, uvCount, idx[1])) + return false; } if (ptr >= tokenEnd) - return EFastFaceTokenParseResult::Success; + return true; if (*ptr != '/') - return EFastFaceTokenParseResult::Error; + return false; ++ptr; if (ptr >= tokenEnd) - return EFastFaceTokenParseResult::Success; - if (!parseUnsignedObjIndex(ptr, tokenEnd, parsed) || parsed == 0u) - return EFastFaceTokenParseResult::Error; - idx[2] = static_cast(parsed - 1u); + return true; + if (!parseSignedObjIndex(ptr, tokenEnd, raw)) + return false; + if (!resolveObjIndex(raw, normalCount, idx[2])) + return false; - return ptr == tokenEnd ? EFastFaceTokenParseResult::Success : EFastFaceTokenParseResult::Error; + return ptr == tokenEnd; } } @@ -434,6 +396,12 @@ asset::SAssetBundle COBJMeshFileLoader::loadAsset(system::IFile* _file, const as double buildMs = 0.0; double hashMs = 0.0; double aabbMs = 0.0; + double parseVms = 0.0; + double parseVNms = 0.0; + double parseVTms = 0.0; + double parseFaceMs = 0.0; + double dedupMs = 0.0; + double emitMs = 0.0; uint64_t faceCount = 0u; uint64_t faceFastTokenCount = 0u; uint64_t faceFallbackTokenCount = 0u; @@ -466,7 +434,7 @@ asset::SAssetBundle COBJMeshFileLoader::loadAsset(system::IFile* _file, const as core::vector outNormals; core::vector outUVs; core::vector indices; - core::unordered_map vtxMap; + boost::unordered_flat_map vtxMap; bool hasNormals = false; bool hasUVs = false; @@ -485,23 +453,29 @@ asset::SAssetBundle COBJMeshFileLoader::loadAsset(system::IFile* _file, const as { case ' ': { + const auto t = clock_t::now(); Float3 vec{}; bufPtr = readVec3(bufPtr, &vec.x, bufEnd); positions.push_back(vec); + parseVms += std::chrono::duration(clock_t::now() - t).count(); } break; case 'n': { + const auto t = clock_t::now(); Float3 vec{}; bufPtr = readVec3(bufPtr, &vec.x, bufEnd); normals.push_back(vec); + parseVNms += std::chrono::duration(clock_t::now() - t).count(); } break; case 't': { + const auto t = clock_t::now(); Float2 vec{}; bufPtr = readUV(bufPtr, &vec.x, bufEnd); uvs.push_back(vec); + parseVTms += std::chrono::duration(clock_t::now() - t).count(); } break; default: @@ -528,29 +502,20 @@ asset::SAssetBundle COBJMeshFileLoader::loadAsset(system::IFile* _file, const as const char* linePtr = goNextWord(bufPtr, endPtr); while (linePtr < endPtr) { + const auto tokenParseStart = clock_t::now(); int32_t idx[3] = { -1, -1, -1 }; const char* tokenEnd = linePtr; while (tokenEnd < endPtr && !core::isspace(*tokenEnd)) ++tokenEnd; - const auto fastResult = retrieveVertexIndicesFast(linePtr, tokenEnd, idx); - if (fastResult == EFastFaceTokenParseResult::Success) - { - ++faceFastTokenCount; - } - else if (fastResult == EFastFaceTokenParseResult::NotApplicable) - { - if (!retrieveVertexIndices(linePtr, tokenEnd, idx, positions.size(), uvs.size(), normals.size())) - return {}; - ++faceFallbackTokenCount; - } - else - { + if (!parseObjFaceVertexToken(linePtr, tokenEnd, idx, positions.size(), uvs.size(), normals.size())) return {}; - } + ++faceFastTokenCount; if (idx[0] < 0 || static_cast(idx[0]) >= positions.size()) return {}; + parseFaceMs += std::chrono::duration(clock_t::now() - tokenParseStart).count(); + const auto dedupStart = clock_t::now(); ObjVertexKey key = { idx[0], idx[1], idx[2] }; const uint32_t candidateIndex = static_cast(outPositions.size()); auto [it, inserted] = vtxMap.try_emplace(key, candidateIndex); @@ -584,18 +549,21 @@ asset::SAssetBundle COBJMeshFileLoader::loadAsset(system::IFile* _file, const as } outNormals.push_back(normal); } + dedupMs += std::chrono::duration(clock_t::now() - dedupStart).count(); faceCorners.push_back(outIx); linePtr = goFirstWord(tokenEnd, endPtr, false); } + const auto emitStart = clock_t::now(); for (uint32_t i = 1u; i + 1u < faceCorners.size(); ++i) { indices.push_back(faceCorners[i + 1]); indices.push_back(faceCorners[i]); indices.push_back(faceCorners[0]); } + emitMs += std::chrono::duration(clock_t::now() - emitStart).count(); } break; default: @@ -605,6 +573,7 @@ asset::SAssetBundle COBJMeshFileLoader::loadAsset(system::IFile* _file, const as bufPtr = goNextLine(bufPtr, bufEnd); } parseMs = std::chrono::duration(clock_t::now() - parseStart).count(); + const double parseScanMs = std::max(0.0, parseMs - (parseVms + parseVNms + parseVTms + parseFaceMs + dedupMs + emitMs)); if (outPositions.empty()) return {}; @@ -707,12 +676,19 @@ asset::SAssetBundle COBJMeshFileLoader::loadAsset(system::IFile* _file, const as static_cast(ioTelemetry.getAvgOrZero())); } _params.logger.log( - "OBJ loader perf: file=%s total=%.3f ms io=%.3f parse=%.3f build=%.3f hash=%.3f aabb=%.3f in(v=%llu n=%llu uv=%llu) out(v=%llu idx=%llu faces=%llu face_fast_tokens=%llu face_fallback_tokens=%llu io_reads=%llu io_min_read=%llu io_avg_read=%llu) io_req=%s io_eff=%s io_chunk=%llu io_reason=%s", + "OBJ loader perf: file=%s total=%.3f ms io=%.3f parse=%.3f parse_scan=%.3f parse_v=%.3f parse_vn=%.3f parse_vt=%.3f parse_f=%.3f dedup=%.3f emit=%.3f build=%.3f hash=%.3f aabb=%.3f in(v=%llu n=%llu uv=%llu) out(v=%llu idx=%llu faces=%llu face_fast_tokens=%llu face_fallback_tokens=%llu io_reads=%llu io_min_read=%llu io_avg_read=%llu) io_req=%s io_eff=%s io_chunk=%llu io_reason=%s", system::ILogger::ELL_PERFORMANCE, _file->getFileName().string().c_str(), totalMs, ioMs, parseMs, + parseScanMs, + parseVms, + parseVNms, + parseVTms, + parseFaceMs, + dedupMs, + emitMs, buildMs, hashMs, aabbMs, diff --git a/src/nbl/asset/interchange/COBJMeshWriter.cpp b/src/nbl/asset/interchange/COBJMeshWriter.cpp index 59f42b8225..c4282c1ac9 100644 --- a/src/nbl/asset/interchange/COBJMeshWriter.cpp +++ b/src/nbl/asset/interchange/COBJMeshWriter.cpp @@ -11,7 +11,9 @@ #include #include #include +#include #include +#include #include namespace nbl::asset @@ -46,6 +48,31 @@ namespace obj_writer_detail constexpr size_t ApproxObjBytesPerVertex = 96ull; constexpr size_t ApproxObjBytesPerFace = 48ull; +struct SFileWriteTelemetry +{ + uint64_t callCount = 0ull; + uint64_t totalBytes = 0ull; + uint64_t minBytes = std::numeric_limits::max(); + + void account(const uint64_t bytes) + { + ++callCount; + totalBytes += bytes; + if (bytes < minBytes) + minBytes = bytes; + } + + uint64_t getMinOrZero() const + { + return callCount ? minBytes : 0ull; + } + + uint64_t getAvgOrZero() const + { + return callCount ? (totalBytes / callCount) : 0ull; + } +}; + bool decodeVec4(const ICPUPolygonGeometry::SDataView& view, const size_t ix, hlsl::float64_t4& out) { out = hlsl::float64_t4(0.0, 0.0, 0.0, 0.0); @@ -82,9 +109,9 @@ void appendUInt(std::string& out, const uint32_t value) out.append(buf.data(), static_cast(res.ptr - buf.data())); } -void appendFloatFixed6(std::string& out, double value) +void appendFloatFixed6(std::string& out, float value) { - std::array buf = {}; + std::array buf = {}; const auto res = std::to_chars(buf.data(), buf.data() + buf.size(), value, std::chars_format::fixed, 6); if (res.ec == std::errc()) { @@ -92,18 +119,25 @@ void appendFloatFixed6(std::string& out, double value) return; } - const int written = std::snprintf(buf.data(), buf.size(), "%.6f", value); + const int written = std::snprintf(buf.data(), buf.size(), "%.6f", static_cast(value)); if (written > 0) out.append(buf.data(), static_cast(written)); } -bool writeBufferWithPolicy(system::IFile* file, const SResolvedFileIOPolicy& ioPlan, const uint8_t* data, size_t byteCount); +bool writeBufferWithPolicy(system::IFile* file, const SResolvedFileIOPolicy& ioPlan, const uint8_t* data, size_t byteCount, SFileWriteTelemetry* ioTelemetry = nullptr); } // namespace obj_writer_detail bool COBJMeshWriter::writeAsset(system::IFile* _file, const SAssetWriteParams& _params, IAssetWriterOverride* _override) { using namespace obj_writer_detail; + using clock_t = std::chrono::high_resolution_clock; + + const auto totalStart = clock_t::now(); + double encodeMs = 0.0; + double formatMs = 0.0; + double writeMs = 0.0; + SFileWriteTelemetry ioTelemetry = {}; if (!_override) getDefaultOverride(_override); @@ -160,6 +194,7 @@ bool COBJMeshWriter::writeAsset(system::IFile* _file, const SAssetWriteParams& _ core::vector indexData; const uint32_t* indices = nullptr; size_t faceCount = 0; + const auto encodeStart = clock_t::now(); if (indexView) { @@ -209,10 +244,12 @@ bool COBJMeshWriter::writeAsset(system::IFile* _file, const SAssetWriteParams& _ indices = indexData.data(); faceCount = vertexCount / 3u; } + encodeMs = std::chrono::duration(clock_t::now() - encodeStart).count(); const auto flags = _override->getAssetWritingFlags(ctx, geom, 0u); const bool flipHandedness = !(flags & E_WRITER_FLAGS::EWF_MESH_IS_RIGHT_HANDED); std::string output; + const auto formatStart = clock_t::now(); output.reserve(vertexCount * ApproxObjBytesPerVertex + faceCount * ApproxObjBytesPerFace); output += "# Nabla OBJ\n"; @@ -223,9 +260,9 @@ bool COBJMeshWriter::writeAsset(system::IFile* _file, const SAssetWriteParams& _ const hlsl::float32_t2* const tightUV = hasUVs ? getTightFloat2View(*uvView) : nullptr; for (size_t i = 0u; i < vertexCount; ++i) { - double x = 0.0; - double y = 0.0; - double z = 0.0; + float x = 0.f; + float y = 0.f; + float z = 0.f; if (tightPositions) { x = tightPositions[i].x; @@ -236,9 +273,9 @@ bool COBJMeshWriter::writeAsset(system::IFile* _file, const SAssetWriteParams& _ { if (!decodeVec4(positionView, i, tmp)) return false; - x = tmp.x; - y = tmp.y; - z = tmp.z; + x = static_cast(tmp.x); + y = static_cast(tmp.y); + z = static_cast(tmp.z); } if (flipHandedness) x = -x; @@ -256,19 +293,19 @@ bool COBJMeshWriter::writeAsset(system::IFile* _file, const SAssetWriteParams& _ { for (size_t i = 0u; i < vertexCount; ++i) { - double u = 0.0; - double v = 0.0; + float u = 0.f; + float v = 0.f; if (tightUV) { u = tightUV[i].x; - v = 1.0 - tightUV[i].y; + v = 1.f - tightUV[i].y; } else { if (!decodeVec4(*uvView, i, tmp)) return false; - u = tmp.x; - v = 1.0 - tmp.y; + u = static_cast(tmp.x); + v = 1.f - static_cast(tmp.y); } output += "vt "; @@ -283,9 +320,9 @@ bool COBJMeshWriter::writeAsset(system::IFile* _file, const SAssetWriteParams& _ { for (size_t i = 0u; i < vertexCount; ++i) { - double x = 0.0; - double y = 0.0; - double z = 0.0; + float x = 0.f; + float y = 0.f; + float z = 0.f; if (tightNormals) { x = tightNormals[i].x; @@ -296,9 +333,9 @@ bool COBJMeshWriter::writeAsset(system::IFile* _file, const SAssetWriteParams& _ { if (!decodeVec4(normalView, i, tmp)) return false; - x = tmp.x; - y = tmp.y; - z = tmp.z; + x = static_cast(tmp.x); + y = static_cast(tmp.y); + z = static_cast(tmp.z); } if (flipHandedness) x = -x; @@ -313,28 +350,32 @@ bool COBJMeshWriter::writeAsset(system::IFile* _file, const SAssetWriteParams& _ } } - auto appendFaceIndex = [&](const uint32_t idx) + core::vector faceIndexTokens; + faceIndexTokens.resize(vertexCount); + for (size_t i = 0u; i < vertexCount; ++i) { - const uint32_t objIx = idx + 1u; - appendUInt(output, objIx); + auto& token = faceIndexTokens[i]; + token.reserve(24ull); + const uint32_t objIx = static_cast(i + 1u); + appendUInt(token, objIx); if (hasUVs && hasNormals) { - output += "/"; - appendUInt(output, objIx); - output += "/"; - appendUInt(output, objIx); + token += "/"; + appendUInt(token, objIx); + token += "/"; + appendUInt(token, objIx); } else if (hasUVs) { - output += "/"; - appendUInt(output, objIx); + token += "/"; + appendUInt(token, objIx); } else if (hasNormals) { - output += "//"; - appendUInt(output, objIx); + token += "//"; + appendUInt(token, objIx); } - }; + } for (size_t i = 0u; i < faceCount; ++i) { @@ -345,15 +386,18 @@ bool COBJMeshWriter::writeAsset(system::IFile* _file, const SAssetWriteParams& _ const uint32_t f0 = i2; const uint32_t f1 = i1; const uint32_t f2 = i0; + if (f0 >= faceIndexTokens.size() || f1 >= faceIndexTokens.size() || f2 >= faceIndexTokens.size()) + return false; output += "f "; - appendFaceIndex(f0); + output += faceIndexTokens[f0]; output += " "; - appendFaceIndex(f1); + output += faceIndexTokens[f1]; output += " "; - appendFaceIndex(f2); + output += faceIndexTokens[f2]; output += "\n"; } + formatMs = std::chrono::duration(clock_t::now() - formatStart).count(); const auto ioPlan = resolveFileIOPolicy(_params.ioPolicy, static_cast(output.size()), true); if (!ioPlan.valid) @@ -362,10 +406,54 @@ bool COBJMeshWriter::writeAsset(system::IFile* _file, const SAssetWriteParams& _ return false; } - return writeBufferWithPolicy(file, ioPlan, reinterpret_cast(output.data()), output.size()); + const auto writeStart = clock_t::now(); + const bool writeOk = writeBufferWithPolicy(file, ioPlan, reinterpret_cast(output.data()), output.size(), &ioTelemetry); + writeMs = std::chrono::duration(clock_t::now() - writeStart).count(); + + const double totalMs = std::chrono::duration(clock_t::now() - totalStart).count(); + const double miscMs = std::max(0.0, totalMs - (encodeMs + formatMs + writeMs)); + const uint64_t ioMinWrite = ioTelemetry.getMinOrZero(); + const uint64_t ioAvgWrite = ioTelemetry.getAvgOrZero(); + if ( + static_cast(output.size()) > (1ull << 20) && + ( + ioAvgWrite < 1024ull || + (ioMinWrite < 64ull && ioTelemetry.callCount > 1024ull) + ) + ) + { + _params.logger.log( + "OBJ writer tiny-io guard: file=%s writes=%llu min=%llu avg=%llu", + system::ILogger::ELL_WARNING, + file->getFileName().string().c_str(), + static_cast(ioTelemetry.callCount), + static_cast(ioMinWrite), + static_cast(ioAvgWrite)); + } + _params.logger.log( + "OBJ writer perf: file=%s total=%.3f ms encode=%.3f format=%.3f write=%.3f misc=%.3f bytes=%llu vertices=%llu faces=%llu io_writes=%llu io_min_write=%llu io_avg_write=%llu io_req=%s io_eff=%s io_chunk=%llu io_reason=%s", + system::ILogger::ELL_PERFORMANCE, + file->getFileName().string().c_str(), + totalMs, + encodeMs, + formatMs, + writeMs, + miscMs, + static_cast(output.size()), + static_cast(vertexCount), + static_cast(faceCount), + static_cast(ioTelemetry.callCount), + static_cast(ioMinWrite), + static_cast(ioAvgWrite), + toString(_params.ioPolicy.strategy), + toString(ioPlan.strategy), + static_cast(ioPlan.chunkSizeBytes), + ioPlan.reason); + + return writeOk; } -bool obj_writer_detail::writeBufferWithPolicy(system::IFile* file, const SResolvedFileIOPolicy& ioPlan, const uint8_t* data, size_t byteCount) +bool obj_writer_detail::writeBufferWithPolicy(system::IFile* file, const SResolvedFileIOPolicy& ioPlan, const uint8_t* data, size_t byteCount, SFileWriteTelemetry* ioTelemetry) { if (!file || (!data && byteCount != 0ull)) return false; @@ -377,6 +465,8 @@ bool obj_writer_detail::writeBufferWithPolicy(system::IFile* file, const SResolv { system::IFile::success_t success; file->write(success, data, fileOffset, byteCount); + if (success && ioTelemetry) + ioTelemetry->account(success.getBytesProcessed()); return success && success.getBytesProcessed() == byteCount; } case SResolvedFileIOPolicy::Strategy::Chunked: @@ -392,6 +482,8 @@ bool obj_writer_detail::writeBufferWithPolicy(system::IFile* file, const SResolv const size_t written = success.getBytesProcessed(); if (written == 0ull) return false; + if (ioTelemetry) + ioTelemetry->account(written); fileOffset += written; } return true; diff --git a/src/nbl/asset/interchange/CPLYMeshFileLoader.cpp b/src/nbl/asset/interchange/CPLYMeshFileLoader.cpp index fd88560490..d9932bf6ea 100644 --- a/src/nbl/asset/interchange/CPLYMeshFileLoader.cpp +++ b/src/nbl/asset/interchange/CPLYMeshFileLoader.cpp @@ -15,6 +15,7 @@ #include #include #include +#include #include "nbl/asset/IAssetManager.h" @@ -98,6 +99,34 @@ IGeometry::SDataView plyCreateAdoptedU32IndexView(core::vector::SDataView plyCreateAdoptedU16IndexView(core::vector&& indices) +{ + if (indices.empty()) + return {}; + + auto backer = core::make_smart_refctd_ptr>>(std::move(indices)); + auto& storage = backer->getBacker(); + auto* const ptr = storage.data(); + const size_t byteCount = storage.size() * sizeof(uint16_t); + auto buffer = ICPUBuffer::create({ { byteCount }, ptr, core::smart_refctd_ptr(std::move(backer)), alignof(uint16_t) }, core::adopt_memory); + if (!buffer) + return {}; + + IGeometry::SDataView view = { + .composed = { + .stride = sizeof(uint16_t), + .format = EF_R16_UINT, + .rangeFormat = IGeometryBase::getMatchingAABBFormat(EF_R16_UINT) + }, + .src = { + .offset = 0u, + .size = byteCount, + .buffer = std::move(buffer) + } + }; + return view; +} + struct SContext { static constexpr uint64_t ReadWindowPaddingBytes = 1ull; @@ -319,13 +348,21 @@ struct SContext void moveForward(const size_t bytes) { assert(IsBinaryFile); - if (StartPointer+bytes>=EndPointer) - fillBuffer(); + size_t remaining = bytes; + while (remaining) + { + if (StartPointer >= EndPointer) + { + fillBuffer(); + if (StartPointer >= EndPointer) + return; + } - if (StartPointer+bytes(EndPointer - StartPointer); + const size_t step = std::min(available, remaining); + StartPointer += step; + remaining -= step; + } } // read the next int from the file and move the start pointer along @@ -371,7 +408,8 @@ struct SContext const char* word = getNextWord(); if (!word) return 0u; - const char* const wordEnd = word + std::strlen(word); + const size_t tokenLen = WordLength >= 0 ? static_cast(WordLength + 1) : std::strlen(word); + const char* const wordEnd = word + tokenLen; if (word == wordEnd) return 0u; @@ -440,12 +478,13 @@ struct SContext const char* word = getNextWord(); if (!word) return 0.0; - const char* const wordEnd = word + std::strlen(word); + const size_t tokenLen = WordLength >= 0 ? static_cast(WordLength + 1) : std::strlen(word); + const char* const wordEnd = word + tokenLen; if (word == wordEnd) return 0.0; hlsl::float64_t value = 0.0; - const auto parseResult = std::from_chars(word, wordEnd, value, std::chars_format::general); + const auto parseResult = fast_float::from_chars(word, wordEnd, value); if (parseResult.ec == std::errc() && parseResult.ptr == wordEnd) return value; @@ -1021,8 +1060,12 @@ SAssetBundle CPLYMeshFileLoader::loadAsset(system::IFile* _file, const IAssetLoa const auto totalStart = clock_t::now(); double headerMs = 0.0; double vertexMs = 0.0; + double vertexFastMs = 0.0; + double vertexGenericMs = 0.0; double faceMs = 0.0; double skipMs = 0.0; + double layoutNegotiateMs = 0.0; + double viewCreateMs = 0.0; double hashRangeMs = 0.0; double indexBuildMs = 0.0; double aabbMs = 0.0; @@ -1227,6 +1270,7 @@ SAssetBundle CPLYMeshFileLoader::loadAsset(system::IFile* _file, const IAssetLoa } ICPUPolygonGeometry::SDataViewBase posView = {}, normalView = {}, uvView = {}; core::vector extraViews; + const auto layoutStart = clock_t::now(); for (auto& vertexProperty : el.Properties) { const auto& propertyName = vertexProperty.Name; @@ -1257,9 +1301,12 @@ SAssetBundle CPLYMeshFileLoader::loadAsset(system::IFile* _file, const IAssetLoa else { // TODO: record the `propertyName` + const auto extraViewStart = clock_t::now(); extraViews.push_back(createView(vertexProperty.type,el.Count)); + viewCreateMs += std::chrono::duration(clock_t::now() - extraViewStart).count(); } } + layoutNegotiateMs += std::chrono::duration(clock_t::now() - layoutStart).count(); auto setFinalFormat = [&ctx](ICPUPolygonGeometry::SDataViewBase& view)->void { const auto componentFormat = view.format; @@ -1408,30 +1455,36 @@ SAssetBundle CPLYMeshFileLoader::loadAsset(system::IFile* _file, const IAssetLoa }; if (posView.format!=EF_UNKNOWN) { + const auto viewCreateStart = clock_t::now(); auto beginIx = ctx.vertAttrIts.size(); setFinalFormat(posView); auto view = createView(posView.format,el.Count); for (const auto size=ctx.vertAttrIts.size(); beginIx!=size; beginIx++) ctx.vertAttrIts[beginIx].ptr += ptrdiff_t(view.src.buffer->getPointer())+view.src.offset; geometry->setPositionView(std::move(view)); + viewCreateMs += std::chrono::duration(clock_t::now() - viewCreateStart).count(); } if (normalView.format!=EF_UNKNOWN) { + const auto viewCreateStart = clock_t::now(); auto beginIx = ctx.vertAttrIts.size(); setFinalFormat(normalView); auto view = createView(normalView.format,el.Count); for (const auto size=ctx.vertAttrIts.size(); beginIx!=size; beginIx++) ctx.vertAttrIts[beginIx].ptr += ptrdiff_t(view.src.buffer->getPointer())+view.src.offset; geometry->setNormalView(std::move(view)); + viewCreateMs += std::chrono::duration(clock_t::now() - viewCreateStart).count(); } if (uvView.format!=EF_UNKNOWN) { + const auto viewCreateStart = clock_t::now(); auto beginIx = ctx.vertAttrIts.size(); setFinalFormat(uvView); auto view = createView(uvView.format,el.Count); for (const auto size=ctx.vertAttrIts.size(); beginIx!=size; beginIx++) ctx.vertAttrIts[beginIx].ptr += ptrdiff_t(view.src.buffer->getPointer())+view.src.offset; geometry->getAuxAttributeViews()->push_back(std::move(view)); + viewCreateMs += std::chrono::duration(clock_t::now() - viewCreateStart).count(); } // for (auto& view : extraViews) @@ -1448,17 +1501,22 @@ SAssetBundle CPLYMeshFileLoader::loadAsset(system::IFile* _file, const IAssetLoa if (fastVertexResult == SContext::EFastVertexReadResult::Success) { ++fastVertexElementCount; + const double elapsedMs = std::chrono::duration(clock_t::now() - vertexStart).count(); + vertexFastMs += elapsedMs; + vertexMs += elapsedMs; } else if (fastVertexResult == SContext::EFastVertexReadResult::NotApplicable) { ctx.readVertex(_params,el); + const double elapsedMs = std::chrono::duration(clock_t::now() - vertexStart).count(); + vertexGenericMs += elapsedMs; + vertexMs += elapsedMs; } else { _params.logger.log("PLY vertex fast path failed on malformed data for %s", system::ILogger::ELL_ERROR, _file->getFileName().string().c_str()); return {}; } - vertexMs += std::chrono::duration(clock_t::now() - vertexStart).count(); verticesProcessed = true; } else if (el.Name=="face") @@ -1491,8 +1549,18 @@ SAssetBundle CPLYMeshFileLoader::loadAsset(system::IFile* _file, const IAssetLoa { // skip these elements const auto skipStart = clock_t::now(); - for (size_t j=0; j(el.KnownSize) * static_cast(el.Count); + if (bytesToSkip64 > static_cast(std::numeric_limits::max())) + return {}; + ctx.moveForward(static_cast(bytesToSkip64)); + } + else + { + for (size_t j=0; j(clock_t::now() - skipStart).count(); } } @@ -1519,12 +1587,12 @@ SAssetBundle CPLYMeshFileLoader::loadAsset(system::IFile* _file, const IAssetLoa geometry->setIndexing(IPolygonGeometryBase::TriangleList()); if (vertCount <= std::numeric_limits::max() && maxIndexRead <= std::numeric_limits::max()) { - auto view = IGeometryLoader::createView(EF_R16_UINT, indices.size()); + core::vector indices16(indices.size()); + for (size_t i = 0u; i < indices.size(); ++i) + indices16[i] = static_cast(indices[i]); + auto view = plyCreateAdoptedU16IndexView(std::move(indices16)); if (!view) return {}; - auto* dst = reinterpret_cast(view.getPointer()); - for (size_t i = 0u; i < indices.size(); ++i) - dst[i] = static_cast(indices[i]); geometry->setIndexView(std::move(view)); } else @@ -1542,6 +1610,7 @@ SAssetBundle CPLYMeshFileLoader::loadAsset(system::IFile* _file, const IAssetLoa hashRangeMs = std::chrono::duration(clock_t::now() - hashStart).count(); const auto totalMs = std::chrono::duration(clock_t::now() - totalStart).count(); + const double stageRemainderMs = std::max(0.0, totalMs - (headerMs + vertexMs + faceMs + skipMs + layoutNegotiateMs + viewCreateMs + hashRangeMs + indexBuildMs + aabbMs)); const uint64_t ioMinRead = ctx.readCallCount ? ctx.readMinBytes : 0ull; const uint64_t ioAvgRead = ctx.readCallCount ? (ctx.readBytesTotal / ctx.readCallCount) : 0ull; if ( @@ -1561,17 +1630,22 @@ SAssetBundle CPLYMeshFileLoader::loadAsset(system::IFile* _file, const IAssetLoa static_cast(ioAvgRead)); } _params.logger.log( - "PLY loader perf: file=%s total=%.3f ms header=%.3f vertex=%.3f face=%.3f skip=%.3f hash_range=%.3f index=%.3f aabb=%.3f binary=%d verts=%llu faces=%llu idx=%llu vertex_fast=%llu face_fast=%llu io_reads=%llu io_min_read=%llu io_avg_read=%llu io_req=%s io_eff=%s io_chunk=%llu io_reason=%s", + "PLY loader perf: file=%s total=%.3f ms header=%.3f vertex=%.3f vertex_fast_ms=%.3f vertex_generic_ms=%.3f face=%.3f skip=%.3f layout_negotiate=%.3f view_create=%.3f hash_range=%.3f index=%.3f aabb=%.3f remainder=%.3f binary=%d verts=%llu faces=%llu idx=%llu vertex_fast=%llu face_fast=%llu io_reads=%llu io_min_read=%llu io_avg_read=%llu io_req=%s io_eff=%s io_chunk=%llu io_reason=%s", system::ILogger::ELL_PERFORMANCE, _file->getFileName().string().c_str(), totalMs, headerMs, vertexMs, + vertexFastMs, + vertexGenericMs, faceMs, skipMs, + layoutNegotiateMs, + viewCreateMs, hashRangeMs, indexBuildMs, aabbMs, + stageRemainderMs, ctx.IsBinaryFile ? 1 : 0, static_cast(vertCount), static_cast(faceCount), diff --git a/src/nbl/asset/interchange/CPLYMeshWriter.cpp b/src/nbl/asset/interchange/CPLYMeshWriter.cpp index fb34c24748..e9a0faa032 100644 --- a/src/nbl/asset/interchange/CPLYMeshWriter.cpp +++ b/src/nbl/asset/interchange/CPLYMeshWriter.cpp @@ -13,7 +13,9 @@ #include #include #include +#include #include +#include #include namespace nbl::asset @@ -48,6 +50,31 @@ namespace ply_writer_detail constexpr size_t ApproxPlyTextBytesPerVertex = 96ull; constexpr size_t ApproxPlyTextBytesPerFace = 32ull; +struct SFileWriteTelemetry +{ + uint64_t callCount = 0ull; + uint64_t totalBytes = 0ull; + uint64_t minBytes = std::numeric_limits::max(); + + void account(const uint64_t bytes) + { + ++callCount; + totalBytes += bytes; + if (bytes < minBytes) + minBytes = bytes; + } + + uint64_t getMinOrZero() const + { + return callCount ? minBytes : 0ull; + } + + uint64_t getAvgOrZero() const + { + return callCount ? (totalBytes / callCount) : 0ull; + } +}; + bool decodeVec4(const ICPUPolygonGeometry::SDataView& view, const size_t ix, hlsl::float64_t4& out) { out = hlsl::float64_t4(0.0, 0.0, 0.0, 0.0); @@ -110,7 +137,7 @@ void appendVec(std::string& out, const double* values, size_t count, bool flipVe } } -bool writeBufferWithPolicy(system::IFile* file, const SResolvedFileIOPolicy& ioPlan, const uint8_t* data, size_t byteCount); +bool writeBufferWithPolicy(system::IFile* file, const SResolvedFileIOPolicy& ioPlan, const uint8_t* data, size_t byteCount, SFileWriteTelemetry* ioTelemetry = nullptr); bool writeBinary(const ICPUPolygonGeometry* geom, const ICPUPolygonGeometry::SDataView* uvView, bool writeNormals, size_t vertexCount, const uint32_t* indices, size_t faceCount, uint8_t* dst, bool flipVectors); bool writeText(const ICPUPolygonGeometry* geom, const ICPUPolygonGeometry::SDataView* uvView, bool writeNormals, size_t vertexCount, const uint32_t* indices, size_t faceCount, std::string& output, bool flipVectors); @@ -119,6 +146,13 @@ bool writeText(const ICPUPolygonGeometry* geom, const ICPUPolygonGeometry::SData bool CPLYMeshWriter::writeAsset(system::IFile* _file, const SAssetWriteParams& _params, IAssetWriterOverride* _override) { using namespace ply_writer_detail; + using clock_t = std::chrono::high_resolution_clock; + + const auto totalStart = clock_t::now(); + double encodeMs = 0.0; + double formatMs = 0.0; + double writeMs = 0.0; + SFileWriteTelemetry ioTelemetry = {}; if (!_override) getDefaultOverride(_override); @@ -170,6 +204,7 @@ bool CPLYMeshWriter::writeAsset(system::IFile* _file, const SAssetWriteParams& _ core::vector indexData; const uint32_t* indices = nullptr; size_t faceCount = 0; + const auto encodeStart = clock_t::now(); if (indexView) { @@ -219,10 +254,12 @@ bool CPLYMeshWriter::writeAsset(system::IFile* _file, const SAssetWriteParams& _ indices = indexData.data(); faceCount = vertexCount / 3u; } + encodeMs = std::chrono::duration(clock_t::now() - encodeStart).count(); const auto flags = _override->getAssetWritingFlags(ctx, geom, 0u); const bool binary = (flags & E_WRITER_FLAGS::EWF_BINARY) != 0u; + const auto formatStart = clock_t::now(); std::string header = "ply\n"; header += binary ? "format binary_little_endian 1.0" : "format ascii 1.0"; header += "\ncomment Nabla "; @@ -253,21 +290,26 @@ bool CPLYMeshWriter::writeAsset(system::IFile* _file, const SAssetWriteParams& _ header += std::to_string(faceCount); header += "\nproperty list uchar uint vertex_indices\n"; header += "end_header\n"; + formatMs += std::chrono::duration(clock_t::now() - formatStart).count(); const bool flipVectors = !(flags & E_WRITER_FLAGS::EWF_MESH_IS_RIGHT_HANDED); + bool writeOk = false; + size_t outputBytes = 0ull; if (binary) { const size_t vertexStride = sizeof(float) * (3u + (writeNormals ? 3u : 0u) + (uvView ? 2u : 0u)); const size_t faceStride = sizeof(uint8_t) + sizeof(uint32_t) * 3u; const size_t bodySize = vertexCount * vertexStride + faceCount * faceStride; + const auto binaryEncodeStart = clock_t::now(); core::vector output; output.resize(header.size() + bodySize); if (!header.empty()) std::memcpy(output.data(), header.data(), header.size()); if (!writeBinary(geom, uvView, writeNormals, vertexCount, indices, faceCount, output.data() + header.size(), flipVectors)) return false; + encodeMs += std::chrono::duration(clock_t::now() - binaryEncodeStart).count(); const auto ioPlan = resolveFileIOPolicy(_params.ioPolicy, static_cast(output.size()), true); if (!ioPlan.valid) @@ -275,26 +317,122 @@ bool CPLYMeshWriter::writeAsset(system::IFile* _file, const SAssetWriteParams& _ _params.logger.log("PLY writer: invalid io policy for %s reason=%s", system::ILogger::ELL_ERROR, file->getFileName().string().c_str(), ioPlan.reason); return false; } - return writeBufferWithPolicy(file, ioPlan, output.data(), output.size()); + + outputBytes = output.size(); + const auto writeStart = clock_t::now(); + writeOk = writeBufferWithPolicy(file, ioPlan, output.data(), output.size(), &ioTelemetry); + writeMs = std::chrono::duration(clock_t::now() - writeStart).count(); + + const double totalMs = std::chrono::duration(clock_t::now() - totalStart).count(); + const double miscMs = std::max(0.0, totalMs - (encodeMs + formatMs + writeMs)); + const uint64_t ioMinWrite = ioTelemetry.getMinOrZero(); + const uint64_t ioAvgWrite = ioTelemetry.getAvgOrZero(); + if ( + static_cast(outputBytes) > (1ull << 20) && + ( + ioAvgWrite < 1024ull || + (ioMinWrite < 64ull && ioTelemetry.callCount > 1024ull) + ) + ) + { + _params.logger.log( + "PLY writer tiny-io guard: file=%s writes=%llu min=%llu avg=%llu", + system::ILogger::ELL_WARNING, + file->getFileName().string().c_str(), + static_cast(ioTelemetry.callCount), + static_cast(ioMinWrite), + static_cast(ioAvgWrite)); + } + _params.logger.log( + "PLY writer perf: file=%s total=%.3f ms encode=%.3f format=%.3f write=%.3f misc=%.3f bytes=%llu vertices=%llu faces=%llu binary=%d io_writes=%llu io_min_write=%llu io_avg_write=%llu io_req=%s io_eff=%s io_chunk=%llu io_reason=%s", + system::ILogger::ELL_PERFORMANCE, + file->getFileName().string().c_str(), + totalMs, + encodeMs, + formatMs, + writeMs, + miscMs, + static_cast(outputBytes), + static_cast(vertexCount), + static_cast(faceCount), + binary ? 1 : 0, + static_cast(ioTelemetry.callCount), + static_cast(ioMinWrite), + static_cast(ioAvgWrite), + toString(_params.ioPolicy.strategy), + toString(ioPlan.strategy), + static_cast(ioPlan.chunkSizeBytes), + ioPlan.reason); + return writeOk; } + const auto textEncodeStart = clock_t::now(); std::string body; body.reserve(vertexCount * ApproxPlyTextBytesPerVertex + faceCount * ApproxPlyTextBytesPerFace); if (!writeText(geom, uvView, writeNormals, vertexCount, indices, faceCount, body, flipVectors)) return false; + encodeMs += std::chrono::duration(clock_t::now() - textEncodeStart).count(); + const auto textFormatStart = clock_t::now(); std::string output = header; output += body; + formatMs += std::chrono::duration(clock_t::now() - textFormatStart).count(); const auto ioPlan = resolveFileIOPolicy(_params.ioPolicy, static_cast(output.size()), true); if (!ioPlan.valid) { _params.logger.log("PLY writer: invalid io policy for %s reason=%s", system::ILogger::ELL_ERROR, file->getFileName().string().c_str(), ioPlan.reason); return false; } - return writeBufferWithPolicy(file, ioPlan, reinterpret_cast(output.data()), output.size()); + + outputBytes = output.size(); + const auto writeStart = clock_t::now(); + writeOk = writeBufferWithPolicy(file, ioPlan, reinterpret_cast(output.data()), output.size(), &ioTelemetry); + writeMs = std::chrono::duration(clock_t::now() - writeStart).count(); + + const double totalMs = std::chrono::duration(clock_t::now() - totalStart).count(); + const double miscMs = std::max(0.0, totalMs - (encodeMs + formatMs + writeMs)); + const uint64_t ioMinWrite = ioTelemetry.getMinOrZero(); + const uint64_t ioAvgWrite = ioTelemetry.getAvgOrZero(); + if ( + static_cast(outputBytes) > (1ull << 20) && + ( + ioAvgWrite < 1024ull || + (ioMinWrite < 64ull && ioTelemetry.callCount > 1024ull) + ) + ) + { + _params.logger.log( + "PLY writer tiny-io guard: file=%s writes=%llu min=%llu avg=%llu", + system::ILogger::ELL_WARNING, + file->getFileName().string().c_str(), + static_cast(ioTelemetry.callCount), + static_cast(ioMinWrite), + static_cast(ioAvgWrite)); + } + _params.logger.log( + "PLY writer perf: file=%s total=%.3f ms encode=%.3f format=%.3f write=%.3f misc=%.3f bytes=%llu vertices=%llu faces=%llu binary=%d io_writes=%llu io_min_write=%llu io_avg_write=%llu io_req=%s io_eff=%s io_chunk=%llu io_reason=%s", + system::ILogger::ELL_PERFORMANCE, + file->getFileName().string().c_str(), + totalMs, + encodeMs, + formatMs, + writeMs, + miscMs, + static_cast(outputBytes), + static_cast(vertexCount), + static_cast(faceCount), + binary ? 1 : 0, + static_cast(ioTelemetry.callCount), + static_cast(ioMinWrite), + static_cast(ioAvgWrite), + toString(_params.ioPolicy.strategy), + toString(ioPlan.strategy), + static_cast(ioPlan.chunkSizeBytes), + ioPlan.reason); + return writeOk; } -bool ply_writer_detail::writeBufferWithPolicy(system::IFile* file, const SResolvedFileIOPolicy& ioPlan, const uint8_t* data, size_t byteCount) +bool ply_writer_detail::writeBufferWithPolicy(system::IFile* file, const SResolvedFileIOPolicy& ioPlan, const uint8_t* data, size_t byteCount, SFileWriteTelemetry* ioTelemetry) { if (!file || (!data && byteCount != 0ull)) return false; @@ -306,6 +444,8 @@ bool ply_writer_detail::writeBufferWithPolicy(system::IFile* file, const SResolv { system::IFile::success_t success; file->write(success, data, fileOffset, byteCount); + if (success && ioTelemetry) + ioTelemetry->account(success.getBytesProcessed()); return success && success.getBytesProcessed() == byteCount; } case SResolvedFileIOPolicy::Strategy::Chunked: @@ -321,6 +461,8 @@ bool ply_writer_detail::writeBufferWithPolicy(system::IFile* file, const SResolv const size_t written = success.getBytesProcessed(); if (written == 0ull) return false; + if (ioTelemetry) + ioTelemetry->account(written); fileOffset += written; } return true; @@ -333,6 +475,8 @@ bool ply_writer_detail::writeBinary(const ICPUPolygonGeometry* geom, const ICPUP if (!dst) return false; + constexpr size_t Float3Bytes = sizeof(float) * 3ull; + constexpr size_t Float2Bytes = sizeof(float) * 2ull; const auto& positionView = geom->getPositionView(); const auto& normalView = geom->getNormalView(); const hlsl::float32_t3* const tightPos = getTightFloat3View(positionView); @@ -342,69 +486,80 @@ bool ply_writer_detail::writeBinary(const ICPUPolygonGeometry* geom, const ICPUP hlsl::float64_t4 tmp = {}; for (size_t i = 0; i < vertexCount; ++i) { - float pos[3] = {}; - if (tightPos) + if (tightPos && !flipVectors) { - pos[0] = tightPos[i].x; - pos[1] = tightPos[i].y; - pos[2] = tightPos[i].z; + std::memcpy(dst, tightPos + i, Float3Bytes); } else { - if (!decodeVec4(positionView, i, tmp)) - return false; - pos[0] = static_cast(tmp.x); - pos[1] = static_cast(tmp.y); - pos[2] = static_cast(tmp.z); + float pos[3] = {}; + if (tightPos) + { + pos[0] = tightPos[i].x; + pos[1] = tightPos[i].y; + pos[2] = tightPos[i].z; + } + else + { + if (!decodeVec4(positionView, i, tmp)) + return false; + pos[0] = static_cast(tmp.x); + pos[1] = static_cast(tmp.y); + pos[2] = static_cast(tmp.z); + } + if (flipVectors) + pos[0] = -pos[0]; + std::memcpy(dst, pos, Float3Bytes); } - if (flipVectors) - pos[0] = -pos[0]; - - std::memcpy(dst, pos, sizeof(pos)); - dst += sizeof(pos); + dst += Float3Bytes; if (writeNormals) { - float normal[3] = {}; - if (tightNormal) + if (tightNormal && !flipVectors) { - normal[0] = tightNormal[i].x; - normal[1] = tightNormal[i].y; - normal[2] = tightNormal[i].z; + std::memcpy(dst, tightNormal + i, Float3Bytes); } else { - if (!decodeVec4(normalView, i, tmp)) - return false; - normal[0] = static_cast(tmp.x); - normal[1] = static_cast(tmp.y); - normal[2] = static_cast(tmp.z); + float normal[3] = {}; + if (tightNormal) + { + normal[0] = tightNormal[i].x; + normal[1] = tightNormal[i].y; + normal[2] = tightNormal[i].z; + } + else + { + if (!decodeVec4(normalView, i, tmp)) + return false; + normal[0] = static_cast(tmp.x); + normal[1] = static_cast(tmp.y); + normal[2] = static_cast(tmp.z); + } + if (flipVectors) + normal[0] = -normal[0]; + + std::memcpy(dst, normal, Float3Bytes); } - if (flipVectors) - normal[0] = -normal[0]; - - std::memcpy(dst, normal, sizeof(normal)); - dst += sizeof(normal); + dst += Float3Bytes; } if (uvView) { - float uv[2] = {}; if (tightUV) { - uv[0] = tightUV[i].x; - uv[1] = tightUV[i].y; + std::memcpy(dst, tightUV + i, Float2Bytes); } else { + float uv[2] = {}; if (!decodeVec4(*uvView, i, tmp)) return false; uv[0] = static_cast(tmp.x); uv[1] = static_cast(tmp.y); + std::memcpy(dst, uv, Float2Bytes); } - - std::memcpy(dst, uv, sizeof(uv)); - dst += sizeof(uv); + dst += Float2Bytes; } } diff --git a/src/nbl/asset/interchange/CSTLMeshFileLoader.cpp b/src/nbl/asset/interchange/CSTLMeshFileLoader.cpp index 7cb3a09e5a..0d401adf3b 100644 --- a/src/nbl/asset/interchange/CSTLMeshFileLoader.cpp +++ b/src/nbl/asset/interchange/CSTLMeshFileLoader.cpp @@ -12,11 +12,15 @@ #include "nbl/asset/utils/CPolygonGeometryManipulator.h" #include "nbl/system/IFile.h" +#include #include #include #include #include +#include #include +#include +#include #include #include @@ -51,7 +55,6 @@ struct SFileReadTelemetry struct SSTLContext { IAssetLoader::SAssetLoadContext inner; - size_t fileOffset = 0ull; SFileReadTelemetry ioTelemetry = {}; }; @@ -112,60 +115,6 @@ bool stlReadWithPolicy(system::IFile* file, uint8_t* dst, const size_t offset, c } } -bool stlReadU8(SSTLContext* context, uint8_t& out) -{ - if (!context) - return false; - - system::IFile::success_t success; - context->inner.mainFile->read(success, &out, context->fileOffset, sizeof(out)); - if (!success || success.getBytesProcessed() != sizeof(out)) - return false; - context->ioTelemetry.account(success.getBytesProcessed()); - context->fileOffset += sizeof(out); - return true; -} - -void stlGoNextWord(SSTLContext* context) -{ - if (!context) - return; - - uint8_t c = 0u; - while (context->fileOffset < context->inner.mainFile->getSize()) - { - const size_t before = context->fileOffset; - if (!stlReadU8(context, c)) - break; - if (!core::isspace(c)) - { - context->fileOffset = before; - break; - } - } -} - -const std::string& stlGetNextToken(SSTLContext* context, std::string& token) -{ - stlGoNextWord(context); - token.clear(); - - char c = 0; - while (context->fileOffset < context->inner.mainFile->getSize()) - { - system::IFile::success_t success; - context->inner.mainFile->read(success, &c, context->fileOffset, sizeof(c)); - if (!success || success.getBytesProcessed() != sizeof(c)) - break; - context->fileOffset += sizeof(c); - if (core::isspace(c)) - break; - token += c; - } - - return token; -} - const char* stlSkipWhitespace(const char* ptr, const char* const end) { while (ptr < end && core::isspace(*ptr)) @@ -197,11 +146,18 @@ bool stlReadTextFloat(const char*& ptr, const char* const end, float& outValue) if (ptr >= end) return false; - const auto parseResult = std::from_chars(ptr, end, outValue, std::chars_format::general); - if (parseResult.ec != std::errc() || parseResult.ptr == ptr) - return false; + const auto parseResult = fast_float::from_chars(ptr, end, outValue); + if (parseResult.ec == std::errc() && parseResult.ptr != ptr) + { + ptr = parseResult.ptr; + return true; + } - ptr = parseResult.ptr; + char* fallbackEnd = nullptr; + outValue = std::strtof(ptr, &fallbackEnd); + if (!fallbackEnd || fallbackEnd == ptr) + return false; + ptr = fallbackEnd <= end ? fallbackEnd : end; return true; } @@ -296,6 +252,9 @@ SAssetBundle CSTLMeshFileLoader::loadAsset(system::IFile* _file, const IAssetLoa double ioMs = 0.0; double parseMs = 0.0; double buildMs = 0.0; + double buildAllocViewsMs = 0.0; + double buildSetViewsMs = 0.0; + double buildMiscMs = 0.0; double hashMs = 0.0; double aabbMs = 0.0; uint64_t triangleCount = 0u; @@ -323,25 +282,32 @@ SAssetBundle CSTLMeshFileLoader::loadAsset(system::IFile* _file, const IAssetLoa bool binary = false; bool hasBinaryTriCountFromDetect = false; uint32_t binaryTriCountFromDetect = 0u; - std::string token; { const auto detectStart = clock_t::now(); - char header[StlTextProbeBytes] = {}; - if (!stlReadExact(context.inner.mainFile, header, 0ull, sizeof(header), &context.ioTelemetry)) - return {}; + std::array prefix = {}; + const bool hasPrefix = filesize >= StlBinaryPrefixBytes && stlReadExact(context.inner.mainFile, prefix.data(), 0ull, StlBinaryPrefixBytes, &context.ioTelemetry); + bool startsWithSolid = false; + if (hasPrefix) + { + startsWithSolid = (std::memcmp(prefix.data(), "solid ", StlTextProbeBytes) == 0); + } + else + { + char header[StlTextProbeBytes] = {}; + if (!stlReadExact(context.inner.mainFile, header, 0ull, sizeof(header), &context.ioTelemetry)) + return {}; + startsWithSolid = (std::strncmp(header, "solid ", StlTextProbeBytes) == 0); + } - const bool startsWithSolid = (std::strncmp(header, "solid ", StlTextProbeBytes) == 0); bool binaryBySize = false; - if (filesize >= StlBinaryPrefixBytes) + if (hasPrefix) { uint32_t triCount = 0u; - if (stlReadExact(context.inner.mainFile, &triCount, StlBinaryHeaderBytes, sizeof(triCount), &context.ioTelemetry)) - { - binaryTriCountFromDetect = triCount; - hasBinaryTriCountFromDetect = true; - const uint64_t expectedSize = StlBinaryPrefixBytes + static_cast(triCount) * StlTriangleRecordBytes; - binaryBySize = (expectedSize == filesize); - } + std::memcpy(&triCount, prefix.data() + StlBinaryHeaderBytes, sizeof(triCount)); + binaryTriCountFromDetect = triCount; + hasBinaryTriCountFromDetect = true; + const uint64_t expectedSize = StlBinaryPrefixBytes + static_cast(triCount) * StlTriangleRecordBytes; + binaryBySize = (expectedSize == filesize); } if (binaryBySize) @@ -349,10 +315,8 @@ SAssetBundle CSTLMeshFileLoader::loadAsset(system::IFile* _file, const IAssetLoa else if (!startsWithSolid) binary = true; else - binary = (stlGetNextToken(&context, token) != "solid"); + binary = false; - if (binary) - context.fileOffset = 0ull; detectMs = std::chrono::duration(clock_t::now() - detectStart).count(); } @@ -399,7 +363,9 @@ SAssetBundle CSTLMeshFileLoader::loadAsset(system::IFile* _file, const IAssetLoa auto* normalOut = reinterpret_cast(normalView.getPointer()); if (!posOut || !normalOut) return {}; - buildMs += std::chrono::duration(clock_t::now() - buildPrepStart).count(); + const double buildPrepMs = std::chrono::duration(clock_t::now() - buildPrepStart).count(); + buildAllocViewsMs += buildPrepMs; + buildMs += buildPrepMs; const auto parseStart = clock_t::now(); const uint8_t* cursor = payload.data(); @@ -515,7 +481,9 @@ SAssetBundle CSTLMeshFileLoader::loadAsset(system::IFile* _file, const IAssetLoa const auto buildFinalizeStart = clock_t::now(); geometry->setPositionView(std::move(posView)); geometry->setNormalView(std::move(normalView)); - buildMs += std::chrono::duration(clock_t::now() - buildFinalizeStart).count(); + const double buildFinalizeMs = std::chrono::duration(clock_t::now() - buildFinalizeStart).count(); + buildSetViewsMs += buildFinalizeMs; + buildMs += buildFinalizeMs; } else { @@ -585,10 +553,12 @@ SAssetBundle CSTLMeshFileLoader::loadAsset(system::IFile* _file, const IAssetLoa vertexCount = positions.size(); const auto buildStart = clock_t::now(); + const auto allocStart = clock_t::now(); auto posView = createView(EF_R32G32B32_SFLOAT, positions.size()); auto normalView = createView(EF_R32G32B32_SFLOAT, positions.size()); if (!posView || !normalView) return {}; + buildAllocViewsMs += std::chrono::duration(clock_t::now() - allocStart).count(); auto* posOut = reinterpret_cast(posView.getPointer()); auto* normalOut = reinterpret_cast(normalView.getPointer()); @@ -604,8 +574,10 @@ SAssetBundle CSTLMeshFileLoader::loadAsset(system::IFile* _file, const IAssetLoa stlExtendAABB(parsedAABB, hasParsedAABB, posOut[i]); } + const auto setStart = clock_t::now(); geometry->setPositionView(std::move(posView)); geometry->setNormalView(std::move(normalView)); + buildSetViewsMs += std::chrono::duration(clock_t::now() - setStart).count(); buildMs = std::chrono::duration(clock_t::now() - buildStart).count(); } @@ -638,6 +610,8 @@ SAssetBundle CSTLMeshFileLoader::loadAsset(system::IFile* _file, const IAssetLoa } aabbMs = std::chrono::duration(clock_t::now() - aabbStart).count(); + buildMiscMs = std::max(0.0, buildMs - (buildAllocViewsMs + buildSetViewsMs)); + const auto totalMs = std::chrono::duration(clock_t::now() - totalStart).count(); const uint64_t ioMinRead = context.ioTelemetry.getMinOrZero(); const uint64_t ioAvgRead = context.ioTelemetry.getAvgOrZero(); @@ -658,7 +632,7 @@ SAssetBundle CSTLMeshFileLoader::loadAsset(system::IFile* _file, const IAssetLoa static_cast(ioAvgRead)); } _params.logger.log( - "STL loader perf: file=%s total=%.3f ms detect=%.3f io=%.3f parse=%.3f build=%.3f hash=%.3f aabb=%.3f binary=%d parse_path=%s triangles=%llu vertices=%llu io_reads=%llu io_min_read=%llu io_avg_read=%llu io_req=%s io_eff=%s io_chunk=%llu io_reason=%s", + "STL loader perf: file=%s total=%.3f ms detect=%.3f io=%.3f parse=%.3f build=%.3f build_alloc_views=%.3f build_set_views=%.3f build_misc=%.3f hash=%.3f aabb=%.3f binary=%d parse_path=%s triangles=%llu vertices=%llu io_reads=%llu io_min_read=%llu io_avg_read=%llu io_req=%s io_eff=%s io_chunk=%llu io_reason=%s", system::ILogger::ELL_PERFORMANCE, _file->getFileName().string().c_str(), totalMs, @@ -666,6 +640,9 @@ SAssetBundle CSTLMeshFileLoader::loadAsset(system::IFile* _file, const IAssetLoa ioMs, parseMs, buildMs, + buildAllocViewsMs, + buildSetViewsMs, + buildMiscMs, hashMs, aabbMs, binary ? 1 : 0, diff --git a/src/nbl/asset/interchange/CSTLMeshWriter.cpp b/src/nbl/asset/interchange/CSTLMeshWriter.cpp index 4433e7e235..812db24298 100644 --- a/src/nbl/asset/interchange/CSTLMeshWriter.cpp +++ b/src/nbl/asset/interchange/CSTLMeshWriter.cpp @@ -8,8 +8,10 @@ #include #include +#include #include #include +#include #include #include #include @@ -22,12 +24,41 @@ namespace nbl::asset namespace stl_writer_detail { +struct SFileWriteTelemetry +{ + uint64_t callCount = 0ull; + uint64_t totalBytes = 0ull; + uint64_t minBytes = std::numeric_limits::max(); + + void account(const uint64_t bytes) + { + ++callCount; + totalBytes += bytes; + if (bytes < minBytes) + minBytes = bytes; + } + + uint64_t getMinOrZero() const + { + return callCount ? minBytes : 0ull; + } + + uint64_t getAvgOrZero() const + { + return callCount ? (totalBytes / callCount) : 0ull; + } +}; + struct SContext { IAssetWriter::SAssetWriteContext writeContext; SResolvedFileIOPolicy ioPlan = {}; core::vector ioBuffer = {}; size_t fileOffset = 0ull; + double formatMs = 0.0; + double encodeMs = 0.0; + double writeMs = 0.0; + SFileWriteTelemetry writeTelemetry = {}; }; constexpr size_t BinaryHeaderBytes = 80ull; @@ -48,7 +79,7 @@ using SContext = stl_writer_detail::SContext; bool flushBytes(SContext* context); bool writeBytes(SContext* context, const void* data, size_t size); -bool writeBufferWithPolicy(system::IFile* file, const SResolvedFileIOPolicy& ioPlan, const uint8_t* data, size_t byteCount); +bool writeBufferWithPolicy(system::IFile* file, const SResolvedFileIOPolicy& ioPlan, const uint8_t* data, size_t byteCount, stl_writer_detail::SFileWriteTelemetry* ioTelemetry = nullptr); const hlsl::float32_t3* getTightFloat3View(const ICPUPolygonGeometry::SDataView& view); bool decodeTriangleIndices(const ICPUPolygonGeometry* geom, const ICPUPolygonGeometry::SDataView& posView, core::vector& indexData, const uint32_t*& outIndices, uint32_t& outFaceCount); bool decodeTriangle(const ICPUPolygonGeometry* geom, const IPolygonGeometryBase::IIndexingCallback* indexing, const ICPUPolygonGeometry::SDataView& posView, uint32_t primIx, core::vectorSIMDf& out0, core::vectorSIMDf& out1, core::vectorSIMDf& out2, uint32_t* outIdx); @@ -94,6 +125,9 @@ uint32_t CSTLMeshWriter::getForcedFlags() bool CSTLMeshWriter::writeAsset(system::IFile* _file, const SAssetWriteParams& _params, IAssetWriterOverride* _override) { + using clock_t = std::chrono::high_resolution_clock; + const auto totalStart = clock_t::now(); + if (!_override) getDefaultOverride(_override); @@ -118,6 +152,7 @@ bool CSTLMeshWriter::writeAsset(system::IFile* _file, const SAssetWriteParams& _ const asset::E_WRITER_FLAGS flags = _override->getAssetWritingFlags(context.writeContext, geom, 0u); const bool binary = (flags & asset::EWF_BINARY) != 0u; + const auto formatStart = clock_t::now(); uint64_t expectedSize = 0ull; bool sizeKnown = false; @@ -138,12 +173,56 @@ bool CSTLMeshWriter::writeAsset(system::IFile* _file, const SAssetWriteParams& _ context.ioBuffer.reserve(static_cast(expectedSize)); else context.ioBuffer.reserve(static_cast(std::min(context.ioPlan.chunkSizeBytes, stl_writer_detail::IoFallbackReserveBytes))); + context.formatMs = std::chrono::duration(clock_t::now() - formatStart).count(); const bool written = binary ? writeMeshBinary(geom, &context) : writeMeshASCII(geom, &context); if (!written) return false; - return flushBytes(&context); + const bool flushed = flushBytes(&context); + if (!flushed) + return false; + + const double totalMs = std::chrono::duration(clock_t::now() - totalStart).count(); + const double miscMs = std::max(0.0, totalMs - (context.formatMs + context.encodeMs + context.writeMs)); + const uint64_t ioMinWrite = context.writeTelemetry.getMinOrZero(); + const uint64_t ioAvgWrite = context.writeTelemetry.getAvgOrZero(); + if ( + (context.fileOffset > (1ull << 20)) && + ( + ioAvgWrite < 1024ull || + (ioMinWrite < 64ull && context.writeTelemetry.callCount > 1024ull) + ) + ) + { + _params.logger.log( + "STL writer tiny-io guard: file=%s writes=%llu min=%llu avg=%llu", + system::ILogger::ELL_WARNING, + file->getFileName().string().c_str(), + static_cast(context.writeTelemetry.callCount), + static_cast(ioMinWrite), + static_cast(ioAvgWrite)); + } + _params.logger.log( + "STL writer perf: file=%s total=%.3f ms format=%.3f encode=%.3f write=%.3f misc=%.3f bytes=%llu binary=%d io_writes=%llu io_min_write=%llu io_avg_write=%llu io_req=%s io_eff=%s io_chunk=%llu io_reason=%s", + system::ILogger::ELL_PERFORMANCE, + file->getFileName().string().c_str(), + totalMs, + context.formatMs, + context.encodeMs, + context.writeMs, + miscMs, + static_cast(context.fileOffset), + binary ? 1 : 0, + static_cast(context.writeTelemetry.callCount), + static_cast(ioMinWrite), + static_cast(ioAvgWrite), + toString(_params.ioPolicy.strategy), + toString(context.ioPlan.strategy), + static_cast(context.ioPlan.chunkSizeBytes), + context.ioPlan.reason); + + return true; } bool flushBytes(SContext* context) @@ -153,6 +232,8 @@ bool flushBytes(SContext* context) if (context->ioBuffer.empty()) return true; + using clock_t = std::chrono::high_resolution_clock; + const auto writeStart = clock_t::now(); size_t bytesWritten = 0ull; const size_t totalBytes = context->ioBuffer.size(); while (bytesWritten < totalBytes) @@ -168,10 +249,12 @@ bool flushBytes(SContext* context) const size_t processed = success.getBytesProcessed(); if (processed == 0ull) return false; + context->writeTelemetry.account(processed); bytesWritten += processed; } context->fileOffset += totalBytes; context->ioBuffer.clear(); + context->writeMs += std::chrono::duration(clock_t::now() - writeStart).count(); return true; } @@ -218,7 +301,7 @@ bool writeBytes(SContext* context, const void* data, size_t size) } } -bool writeBufferWithPolicy(system::IFile* file, const SResolvedFileIOPolicy& ioPlan, const uint8_t* data, size_t byteCount) +bool writeBufferWithPolicy(system::IFile* file, const SResolvedFileIOPolicy& ioPlan, const uint8_t* data, size_t byteCount, stl_writer_detail::SFileWriteTelemetry* ioTelemetry) { if (!file || (!data && byteCount != 0ull)) return false; @@ -230,6 +313,8 @@ bool writeBufferWithPolicy(system::IFile* file, const SResolvedFileIOPolicy& ioP { system::IFile::success_t success; file->write(success, data, fileOffset, byteCount); + if (success && ioTelemetry) + ioTelemetry->account(success.getBytesProcessed()); return success && success.getBytesProcessed() == byteCount; } case SResolvedFileIOPolicy::Strategy::Chunked: @@ -245,6 +330,8 @@ bool writeBufferWithPolicy(system::IFile* file, const SResolvedFileIOPolicy& ioP const size_t written = success.getBytesProcessed(); if (written == 0ull) return false; + if (ioTelemetry) + ioTelemetry->account(written); fileOffset += written; } return true; @@ -381,6 +468,8 @@ bool writeMeshBinary(const asset::ICPUPolygonGeometry* geom, SContext* context) { if (!geom || !context || !context->writeContext.outputFile) return false; + using clock_t = std::chrono::high_resolution_clock; + const auto encodeStart = clock_t::now(); const auto& posView = geom->getPositionView(); if (!posView) @@ -598,13 +687,21 @@ bool writeMeshBinary(const asset::ICPUPolygonGeometry* geom, SContext* context) } } - return writeBufferWithPolicy(context->writeContext.outputFile, context->ioPlan, output.get(), outputSize); + context->encodeMs += std::chrono::duration(clock_t::now() - encodeStart).count(); + const auto writeStart = clock_t::now(); + const bool writeOk = writeBufferWithPolicy(context->writeContext.outputFile, context->ioPlan, output.get(), outputSize, &context->writeTelemetry); + context->writeMs += std::chrono::duration(clock_t::now() - writeStart).count(); + if (writeOk) + context->fileOffset += outputSize; + return writeOk; } bool writeMeshASCII(const asset::ICPUPolygonGeometry* geom, SContext* context) { if (!geom) return false; + using clock_t = std::chrono::high_resolution_clock; + const auto encodeStart = clock_t::now(); const auto* indexing = geom->getIndexingCallback(); if (!indexing || indexing->degree() != 3u) @@ -649,6 +746,7 @@ bool writeMeshASCII(const asset::ICPUPolygonGeometry* geom, SContext* context) if (!writeBytes(context, solidName.data(), solidName.size())) return false; + context->encodeMs += std::chrono::duration(clock_t::now() - encodeStart).count(); return true; } From c61bc8e7517a35322f514d56d5c00a0508416b5c Mon Sep 17 00:00:00 2001 From: Arkadiusz Lachowicz Date: Sun, 8 Feb 2026 17:39:35 +0100 Subject: [PATCH 007/118] Improve OBJ and PLY parser hot paths --- .../asset/interchange/COBJMeshFileLoader.cpp | 127 ++++++----- src/nbl/asset/interchange/COBJMeshWriter.cpp | 105 ++++++--- .../asset/interchange/CPLYMeshFileLoader.cpp | 211 +++++++++++++++--- 3 files changed, 321 insertions(+), 122 deletions(-) diff --git a/src/nbl/asset/interchange/COBJMeshFileLoader.cpp b/src/nbl/asset/interchange/COBJMeshFileLoader.cpp index 2c14c58c05..f4e8e93303 100644 --- a/src/nbl/asset/interchange/COBJMeshFileLoader.cpp +++ b/src/nbl/asset/interchange/COBJMeshFileLoader.cpp @@ -261,24 +261,32 @@ const char* readUV(const char* bufPtr, float vec[2], const char* const bufEnd) bool parseSignedObjIndex(const char*& ptr, const char* const end, int32_t& out) { - const char* parseStart = ptr; if (ptr >= end) return false; - int64_t value = 0; - const auto parseResult = std::from_chars(ptr, end, value, 10); - if (!(parseResult.ec == std::errc() && parseResult.ptr != ptr)) + bool negative = false; + if (*ptr == '-') { - char* fallbackEnd = nullptr; - value = std::strtoll(parseStart, &fallbackEnd, 10); - if (!fallbackEnd || fallbackEnd == parseStart || fallbackEnd > end) - return false; - ptr = fallbackEnd; + negative = true; + ++ptr; } - else + else if (*ptr == '+') { - ptr = parseResult.ptr; + ++ptr; } + + if (ptr >= end || !core::isdigit(*ptr)) + return false; + + int64_t value = 0; + while (ptr < end && core::isdigit(*ptr)) + { + value = value * 10ll + static_cast(*ptr - '0'); + ++ptr; + } + if (negative) + value = -value; + if (value == 0) return false; if (value < static_cast(std::numeric_limits::min()) || value > static_cast(std::numeric_limits::max())) @@ -309,50 +317,67 @@ bool resolveObjIndex(const int32_t rawIndex, const size_t elementCount, int32_t& return true; } -bool parseObjFaceVertexToken(const char* tokenBegin, const char* tokenEnd, int32_t* idx, const size_t posCount, const size_t uvCount, const size_t normalCount) +bool parseObjFaceVertexTokenFast(const char*& linePtr, const char* const lineEnd, int32_t* idx, const size_t posCount, const size_t uvCount, const size_t normalCount) { - if (!tokenBegin || !idx || tokenBegin >= tokenEnd) + if (!idx) + return false; + + while (linePtr < lineEnd && core::isspace(*linePtr) && *linePtr != '\n' && *linePtr != '\r') + ++linePtr; + if (linePtr >= lineEnd) return false; idx[0] = -1; idx[1] = -1; idx[2] = -1; - const char* ptr = tokenBegin; + const char* ptr = linePtr; int32_t raw = 0; - if (!parseSignedObjIndex(ptr, tokenEnd, raw)) + if (!parseSignedObjIndex(ptr, lineEnd, raw)) return false; if (!resolveObjIndex(raw, posCount, idx[0])) return false; - if (ptr >= tokenEnd) - return true; - if (*ptr != '/') - return false; - ++ptr; - - if (ptr < tokenEnd && *ptr != '/') + if (ptr < lineEnd && *ptr == '/') { - if (!parseSignedObjIndex(ptr, tokenEnd, raw)) - return false; - if (!resolveObjIndex(raw, uvCount, idx[1])) + ++ptr; + + if (ptr < lineEnd && *ptr != '/') + { + if (!parseSignedObjIndex(ptr, lineEnd, raw)) + return false; + if (!resolveObjIndex(raw, uvCount, idx[1])) + return false; + } + + if (ptr < lineEnd && *ptr == '/') + { + ++ptr; + if (ptr < lineEnd && !core::isspace(*ptr)) + { + if (!parseSignedObjIndex(ptr, lineEnd, raw)) + return false; + if (!resolveObjIndex(raw, normalCount, idx[2])) + return false; + } + } + else if (ptr < lineEnd && !core::isspace(*ptr)) + { return false; + } } - - if (ptr >= tokenEnd) - return true; - if (*ptr != '/') + else if (ptr < lineEnd && !core::isspace(*ptr)) + { return false; - ++ptr; + } - if (ptr >= tokenEnd) - return true; - if (!parseSignedObjIndex(ptr, tokenEnd, raw)) - return false; - if (!resolveObjIndex(raw, normalCount, idx[2])) + if (ptr < lineEnd && !core::isspace(*ptr)) return false; + while (ptr < lineEnd && core::isspace(*ptr) && *ptr != '\n' && *ptr != '\r') + ++ptr; - return ptr == tokenEnd; + linePtr = ptr; + return true; } } @@ -489,8 +514,13 @@ asset::SAssetBundle COBJMeshFileLoader::loadAsset(system::IFile* _file, const as ++faceCount; if (faceCount == 1u) { - vtxMap.reserve(positions.size() * 4ull); - indices.reserve(positions.size() * 6ull); + const size_t estimatedVertexCount = positions.size() <= (std::numeric_limits::max() / 4ull) ? positions.size() * 4ull : positions.size(); + vtxMap.reserve(estimatedVertexCount); + outPositions.reserve(estimatedVertexCount); + outNormals.reserve(estimatedVertexCount); + outUVs.reserve(estimatedVertexCount); + const size_t estimatedIndexCount = estimatedVertexCount <= (std::numeric_limits::max() / 2ull) ? estimatedVertexCount * 2ull : estimatedVertexCount; + indices.reserve(estimatedIndexCount); } const char* endPtr = bufPtr; @@ -499,15 +529,17 @@ asset::SAssetBundle COBJMeshFileLoader::loadAsset(system::IFile* _file, const as faceCorners.clear(); - const char* linePtr = goNextWord(bufPtr, endPtr); + const char* linePtr = bufPtr + 1; while (linePtr < endPtr) { + while (linePtr < endPtr && core::isspace(*linePtr) && *linePtr != '\n' && *linePtr != '\r') + ++linePtr; + if (linePtr >= endPtr) + break; + const auto tokenParseStart = clock_t::now(); int32_t idx[3] = { -1, -1, -1 }; - const char* tokenEnd = linePtr; - while (tokenEnd < endPtr && !core::isspace(*tokenEnd)) - ++tokenEnd; - if (!parseObjFaceVertexToken(linePtr, tokenEnd, idx, positions.size(), uvs.size(), normals.size())) + if (!parseObjFaceVertexTokenFast(linePtr, endPtr, idx, positions.size(), uvs.size(), normals.size())) return {}; ++faceFastTokenCount; @@ -522,13 +554,6 @@ asset::SAssetBundle COBJMeshFileLoader::loadAsset(system::IFile* _file, const as uint32_t outIx = it->second; if (inserted) { - if (outPositions.empty()) - { - const size_t estimatedVertexCount = positions.size() <= (std::numeric_limits::max() / 4ull) ? positions.size() * 4ull : positions.size(); - outPositions.reserve(estimatedVertexCount); - outNormals.reserve(estimatedVertexCount); - outUVs.reserve(estimatedVertexCount); - } const auto& srcPos = positions[idx[0]]; outPositions.push_back(srcPos); extendAABB(parsedAABB, hasParsedAABB, srcPos); @@ -552,8 +577,6 @@ asset::SAssetBundle COBJMeshFileLoader::loadAsset(system::IFile* _file, const as dedupMs += std::chrono::duration(clock_t::now() - dedupStart).count(); faceCorners.push_back(outIx); - - linePtr = goFirstWord(tokenEnd, endPtr, false); } const auto emitStart = clock_t::now(); diff --git a/src/nbl/asset/interchange/COBJMeshWriter.cpp b/src/nbl/asset/interchange/COBJMeshWriter.cpp index c4282c1ac9..c9bded9309 100644 --- a/src/nbl/asset/interchange/COBJMeshWriter.cpp +++ b/src/nbl/asset/interchange/COBJMeshWriter.cpp @@ -48,6 +48,12 @@ namespace obj_writer_detail constexpr size_t ApproxObjBytesPerVertex = 96ull; constexpr size_t ApproxObjBytesPerFace = 48ull; +struct SIndexStringRef +{ + uint32_t offset = 0u; + uint16_t length = 0u; +}; + struct SFileWriteTelemetry { uint64_t callCount = 0ull; @@ -109,6 +115,32 @@ void appendUInt(std::string& out, const uint32_t value) out.append(buf.data(), static_cast(res.ptr - buf.data())); } +void appendUIntToStorage(std::string& storage, core::vector& refs, const uint32_t value) +{ + std::array buf = {}; + const auto res = std::to_chars(buf.data(), buf.data() + buf.size(), value); + if (res.ec != std::errc()) + { + refs.push_back({}); + return; + } + + const auto len = static_cast(res.ptr - buf.data()); + SIndexStringRef ref = {}; + ref.offset = static_cast(storage.size()); + ref.length = len; + storage.append(buf.data(), len); + refs.push_back(ref); +} + +void appendIndexRef(std::string& out, const std::string& storage, const core::vector& refs, const uint32_t index) +{ + if (index >= refs.size()) + return; + const auto& ref = refs[index]; + out.append(storage.data() + ref.offset, ref.length); +} + void appendFloatFixed6(std::string& out, float value) { std::array buf = {}; @@ -252,7 +284,7 @@ bool COBJMeshWriter::writeAsset(system::IFile* _file, const SAssetWriteParams& _ const auto formatStart = clock_t::now(); output.reserve(vertexCount * ApproxObjBytesPerVertex + faceCount * ApproxObjBytesPerFace); - output += "# Nabla OBJ\n"; + output.append("# Nabla OBJ\n"); hlsl::float64_t4 tmp = {}; const hlsl::float32_t3* const tightPositions = getTightFloat3View(positionView); @@ -280,13 +312,13 @@ bool COBJMeshWriter::writeAsset(system::IFile* _file, const SAssetWriteParams& _ if (flipHandedness) x = -x; - output += "v "; + output.append("v "); appendFloatFixed6(output, x); - output += " "; + output.push_back(' '); appendFloatFixed6(output, y); - output += " "; + output.push_back(' '); appendFloatFixed6(output, z); - output += "\n"; + output.push_back('\n'); } if (hasUVs) @@ -308,11 +340,11 @@ bool COBJMeshWriter::writeAsset(system::IFile* _file, const SAssetWriteParams& _ v = 1.f - static_cast(tmp.y); } - output += "vt "; + output.append("vt "); appendFloatFixed6(output, u); - output += " "; + output.push_back(' '); appendFloatFixed6(output, v); - output += "\n"; + output.push_back('\n'); } } @@ -340,42 +372,47 @@ bool COBJMeshWriter::writeAsset(system::IFile* _file, const SAssetWriteParams& _ if (flipHandedness) x = -x; - output += "vn "; + output.append("vn "); appendFloatFixed6(output, x); - output += " "; + output.push_back(' '); appendFloatFixed6(output, y); - output += " "; + output.push_back(' '); appendFloatFixed6(output, z); - output += "\n"; + output.push_back('\n'); } } - core::vector faceIndexTokens; - faceIndexTokens.resize(vertexCount); + core::vector faceIndexRefs; + faceIndexRefs.reserve(vertexCount); + std::string faceIndexStorage; + faceIndexStorage.reserve(vertexCount * 12ull); for (size_t i = 0u; i < vertexCount; ++i) { - auto& token = faceIndexTokens[i]; - token.reserve(24ull); const uint32_t objIx = static_cast(i + 1u); - appendUInt(token, objIx); + appendUIntToStorage(faceIndexStorage, faceIndexRefs, objIx); + } + + auto appendFaceCorner = [&](const uint32_t ix)->void + { + appendIndexRef(output, faceIndexStorage, faceIndexRefs, ix); if (hasUVs && hasNormals) { - token += "/"; - appendUInt(token, objIx); - token += "/"; - appendUInt(token, objIx); + output.push_back('/'); + appendIndexRef(output, faceIndexStorage, faceIndexRefs, ix); + output.push_back('/'); + appendIndexRef(output, faceIndexStorage, faceIndexRefs, ix); } else if (hasUVs) { - token += "/"; - appendUInt(token, objIx); + output.push_back('/'); + appendIndexRef(output, faceIndexStorage, faceIndexRefs, ix); } else if (hasNormals) { - token += "//"; - appendUInt(token, objIx); + output.append("//"); + appendIndexRef(output, faceIndexStorage, faceIndexRefs, ix); } - } + }; for (size_t i = 0u; i < faceCount; ++i) { @@ -386,16 +423,16 @@ bool COBJMeshWriter::writeAsset(system::IFile* _file, const SAssetWriteParams& _ const uint32_t f0 = i2; const uint32_t f1 = i1; const uint32_t f2 = i0; - if (f0 >= faceIndexTokens.size() || f1 >= faceIndexTokens.size() || f2 >= faceIndexTokens.size()) + if (f0 >= faceIndexRefs.size() || f1 >= faceIndexRefs.size() || f2 >= faceIndexRefs.size()) return false; - output += "f "; - output += faceIndexTokens[f0]; - output += " "; - output += faceIndexTokens[f1]; - output += " "; - output += faceIndexTokens[f2]; - output += "\n"; + output.append("f "); + appendFaceCorner(f0); + output.push_back(' '); + appendFaceCorner(f1); + output.push_back(' '); + appendFaceCorner(f2); + output.push_back('\n'); } formatMs = std::chrono::duration(clock_t::now() - formatStart).count(); diff --git a/src/nbl/asset/interchange/CPLYMeshFileLoader.cpp b/src/nbl/asset/interchange/CPLYMeshFileLoader.cpp index d9932bf6ea..8c2ef68630 100644 --- a/src/nbl/asset/interchange/CPLYMeshFileLoader.cpp +++ b/src/nbl/asset/interchange/CPLYMeshFileLoader.cpp @@ -349,6 +349,24 @@ struct SContext { assert(IsBinaryFile); size_t remaining = bytes; + if (remaining == 0ull) + return; + + const size_t availableInitially = EndPointer > StartPointer ? static_cast(EndPointer - StartPointer) : 0ull; + if (remaining > availableInitially) + { + remaining -= availableInitially; + StartPointer = EndPointer; + if (remaining > ioReadWindowSize) + { + const size_t fileSize = inner.mainFile->getSize(); + const size_t fileRemaining = fileSize > fileOffset ? (fileSize - fileOffset) : 0ull; + const size_t directSkip = std::min(remaining, fileRemaining); + fileOffset += directSkip; + remaining -= directSkip; + } + } + while (remaining) { if (StartPointer >= EndPointer) @@ -526,57 +544,178 @@ struct SContext { if (!IsBinaryFile || IsWrongEndian || el.Name != "vertex") return EFastVertexReadResult::NotApplicable; - if (el.Properties.size() != 3u || vertAttrIts.size() != 3u) - return EFastVertexReadResult::NotApplicable; - const auto& xProp = el.Properties[0]; - const auto& yProp = el.Properties[1]; - const auto& zProp = el.Properties[2]; - if (xProp.Name != "x" || yProp.Name != "y" || zProp.Name != "z") - return EFastVertexReadResult::NotApplicable; - if (xProp.type != EF_R32_SFLOAT || yProp.type != EF_R32_SFLOAT || zProp.type != EF_R32_SFLOAT) - return EFastVertexReadResult::NotApplicable; + enum class ELayoutKind : uint8_t + { + XYZ, + XYZ_N, + XYZ_N_UV + }; - auto& xIt = vertAttrIts[0]; - auto& yIt = vertAttrIts[1]; - auto& zIt = vertAttrIts[2]; - if (!xIt.ptr || !yIt.ptr || !zIt.ptr) - return EFastVertexReadResult::NotApplicable; - if (xIt.dstFmt != EF_R32_SFLOAT || yIt.dstFmt != EF_R32_SFLOAT || zIt.dstFmt != EF_R32_SFLOAT) + auto allF32 = [&el]()->bool + { + for (const auto& prop : el.Properties) + { + if (prop.type != EF_R32_SFLOAT) + return false; + } + return true; + }; + if (!allF32()) return EFastVertexReadResult::NotApplicable; - if (xIt.stride != yIt.stride || xIt.stride != zIt.stride) + + auto matchNames = [&el](std::initializer_list names)->bool + { + if (el.Properties.size() != names.size()) + return false; + size_t i = 0ull; + for (const auto* name : names) + { + if (el.Properties[i].Name != name) + return false; + ++i; + } + return true; + }; + + ELayoutKind layout = ELayoutKind::XYZ; + if (matchNames({ "x", "y", "z" })) + { + layout = ELayoutKind::XYZ; + } + else if (matchNames({ "x", "y", "z", "nx", "ny", "nz" })) + { + layout = ELayoutKind::XYZ_N; + } + else if (matchNames({ "x", "y", "z", "nx", "ny", "nz", "u", "v" }) || matchNames({ "x", "y", "z", "nx", "ny", "nz", "s", "t" })) + { + layout = ELayoutKind::XYZ_N_UV; + } + else + { return EFastVertexReadResult::NotApplicable; + } const size_t floatBytes = sizeof(hlsl::float32_t); - if (yIt.ptr != xIt.ptr + floatBytes || zIt.ptr != xIt.ptr + 2ull * floatBytes) - return EFastVertexReadResult::NotApplicable; + auto validateTuple = [&](const size_t beginIx, const size_t componentCount, uint32_t& outStride, uint8_t*& outBase)->bool + { + if (beginIx + componentCount > vertAttrIts.size()) + return false; + auto& first = vertAttrIts[beginIx]; + if (!first.ptr || first.dstFmt != EF_R32_SFLOAT) + return false; + outStride = first.stride; + outBase = first.ptr; + for (size_t c = 1ull; c < componentCount; ++c) + { + auto& it = vertAttrIts[beginIx + c]; + if (!it.ptr || it.dstFmt != EF_R32_SFLOAT) + return false; + if (it.stride != outStride) + return false; + if (it.ptr != outBase + c * floatBytes) + return false; + } + return true; + }; - if (el.Count > (std::numeric_limits::max() / xIt.stride)) - return EFastVertexReadResult::Error; - const size_t dstAdvance = el.Count * xIt.stride; - const size_t srcBytesPerVertex = 3ull * floatBytes; - if (el.Count > (std::numeric_limits::max() / srcBytesPerVertex)) + uint32_t posStride = 0u; + uint32_t normalStride = 0u; + uint32_t uvStride = 0u; + uint8_t* posBase = nullptr; + uint8_t* normalBase = nullptr; + uint8_t* uvBase = nullptr; + switch (layout) + { + case ELayoutKind::XYZ: + if (vertAttrIts.size() != 3u || !validateTuple(0u, 3u, posStride, posBase)) + return EFastVertexReadResult::NotApplicable; + break; + case ELayoutKind::XYZ_N: + if (vertAttrIts.size() != 6u) + return EFastVertexReadResult::NotApplicable; + if (!validateTuple(0u, 3u, posStride, posBase) || !validateTuple(3u, 3u, normalStride, normalBase)) + return EFastVertexReadResult::NotApplicable; + break; + case ELayoutKind::XYZ_N_UV: + if (vertAttrIts.size() != 8u) + return EFastVertexReadResult::NotApplicable; + if (!validateTuple(0u, 3u, posStride, posBase) || !validateTuple(3u, 3u, normalStride, normalBase) || !validateTuple(6u, 2u, uvStride, uvBase)) + return EFastVertexReadResult::NotApplicable; + break; + } + + const size_t srcBytesPerVertex = [layout]()->size_t + { + switch (layout) + { + case ELayoutKind::XYZ: + return sizeof(hlsl::float32_t) * 3ull; + case ELayoutKind::XYZ_N: + return sizeof(hlsl::float32_t) * 6ull; + case ELayoutKind::XYZ_N_UV: + return sizeof(hlsl::float32_t) * 8ull; + default: + return 0ull; + } + }(); + if (srcBytesPerVertex == 0ull || el.Count > (std::numeric_limits::max() / srcBytesPerVertex)) return EFastVertexReadResult::Error; - const size_t copyBytes = el.Count * srcBytesPerVertex; - uint8_t* dst = xIt.ptr; - size_t copied = 0ull; - while (copied < copyBytes) + size_t remainingVertices = el.Count; + while (remainingVertices > 0ull) { - if (StartPointer >= EndPointer) + if (StartPointer + srcBytesPerVertex > EndPointer) fillBuffer(); const size_t available = EndPointer > StartPointer ? static_cast(EndPointer - StartPointer) : 0ull; - if (available == 0ull) + if (available < srcBytesPerVertex) return EFastVertexReadResult::Error; - const size_t toCopy = std::min(available, copyBytes - copied); - std::memcpy(dst + copied, StartPointer, toCopy); - StartPointer += toCopy; - copied += toCopy; + + const size_t batchVertices = std::min(remainingVertices, available / srcBytesPerVertex); + const uint8_t* src = reinterpret_cast(StartPointer); + for (size_t v = 0ull; v < batchVertices; ++v) + { + std::memcpy(posBase, src, 3ull * floatBytes); + src += 3ull * floatBytes; + posBase += posStride; + + if (layout == ELayoutKind::XYZ_N || layout == ELayoutKind::XYZ_N_UV) + { + std::memcpy(normalBase, src, 3ull * floatBytes); + src += 3ull * floatBytes; + normalBase += normalStride; + } + + if (layout == ELayoutKind::XYZ_N_UV) + { + std::memcpy(uvBase, src, 2ull * floatBytes); + src += 2ull * floatBytes; + uvBase += uvStride; + } + } + + const size_t consumed = batchVertices * srcBytesPerVertex; + StartPointer += consumed; + remainingVertices -= batchVertices; } - xIt.ptr += dstAdvance; - yIt.ptr += dstAdvance; - zIt.ptr += dstAdvance; + const size_t posAdvance = el.Count * posStride; + vertAttrIts[0].ptr += posAdvance; + vertAttrIts[1].ptr += posAdvance; + vertAttrIts[2].ptr += posAdvance; + if (layout == ELayoutKind::XYZ_N || layout == ELayoutKind::XYZ_N_UV) + { + const size_t normalAdvance = el.Count * normalStride; + vertAttrIts[3].ptr += normalAdvance; + vertAttrIts[4].ptr += normalAdvance; + vertAttrIts[5].ptr += normalAdvance; + } + if (layout == ELayoutKind::XYZ_N_UV) + { + const size_t uvAdvance = el.Count * uvStride; + vertAttrIts[6].ptr += uvAdvance; + vertAttrIts[7].ptr += uvAdvance; + } return EFastVertexReadResult::Success; } void readVertex(const IAssetLoader::SAssetLoadParams& _params, const SElement& el) From fafb61679dfe35d2b3fb6636c3bc9f386f03d665 Mon Sep 17 00:00:00 2001 From: Arkadiusz Lachowicz Date: Sun, 8 Feb 2026 18:09:45 +0100 Subject: [PATCH 008/118] Optimize mesh loader writer hot paths --- src/nbl/asset/interchange/COBJMeshWriter.cpp | 76 ++++---- src/nbl/asset/interchange/CPLYMeshWriter.cpp | 170 +++++++++++------- .../asset/interchange/CSTLMeshFileLoader.cpp | 75 ++++---- src/nbl/asset/interchange/CSTLMeshWriter.cpp | 88 ++++++--- 4 files changed, 243 insertions(+), 166 deletions(-) diff --git a/src/nbl/asset/interchange/COBJMeshWriter.cpp b/src/nbl/asset/interchange/COBJMeshWriter.cpp index c9bded9309..1308c63bc2 100644 --- a/src/nbl/asset/interchange/COBJMeshWriter.cpp +++ b/src/nbl/asset/interchange/COBJMeshWriter.cpp @@ -115,24 +115,6 @@ void appendUInt(std::string& out, const uint32_t value) out.append(buf.data(), static_cast(res.ptr - buf.data())); } -void appendUIntToStorage(std::string& storage, core::vector& refs, const uint32_t value) -{ - std::array buf = {}; - const auto res = std::to_chars(buf.data(), buf.data() + buf.size(), value); - if (res.ec != std::errc()) - { - refs.push_back({}); - return; - } - - const auto len = static_cast(res.ptr - buf.data()); - SIndexStringRef ref = {}; - ref.offset = static_cast(storage.size()); - ref.length = len; - storage.append(buf.data(), len); - refs.push_back(ref); -} - void appendIndexRef(std::string& out, const std::string& storage, const core::vector& refs, const uint32_t index) { if (index >= refs.size()) @@ -141,6 +123,32 @@ void appendIndexRef(std::string& out, const std::string& storage, const core::ve out.append(storage.data() + ref.offset, ref.length); } +void appendIndexTokenToStorage(std::string& storage, core::vector& refs, const uint32_t objIx, const bool hasUVs, const bool hasNormals) +{ + SIndexStringRef ref = {}; + ref.offset = static_cast(storage.size()); + appendUInt(storage, objIx); + if (hasUVs && hasNormals) + { + storage.push_back('/'); + appendUInt(storage, objIx); + storage.push_back('/'); + appendUInt(storage, objIx); + } + else if (hasUVs) + { + storage.push_back('/'); + appendUInt(storage, objIx); + } + else if (hasNormals) + { + storage.append("//"); + appendUInt(storage, objIx); + } + ref.length = static_cast(storage.size() - ref.offset); + refs.push_back(ref); +} + void appendFloatFixed6(std::string& out, float value) { std::array buf = {}; @@ -385,35 +393,13 @@ bool COBJMeshWriter::writeAsset(system::IFile* _file, const SAssetWriteParams& _ core::vector faceIndexRefs; faceIndexRefs.reserve(vertexCount); std::string faceIndexStorage; - faceIndexStorage.reserve(vertexCount * 12ull); + faceIndexStorage.reserve(vertexCount * 24ull); for (size_t i = 0u; i < vertexCount; ++i) { const uint32_t objIx = static_cast(i + 1u); - appendUIntToStorage(faceIndexStorage, faceIndexRefs, objIx); + appendIndexTokenToStorage(faceIndexStorage, faceIndexRefs, objIx, hasUVs, hasNormals); } - auto appendFaceCorner = [&](const uint32_t ix)->void - { - appendIndexRef(output, faceIndexStorage, faceIndexRefs, ix); - if (hasUVs && hasNormals) - { - output.push_back('/'); - appendIndexRef(output, faceIndexStorage, faceIndexRefs, ix); - output.push_back('/'); - appendIndexRef(output, faceIndexStorage, faceIndexRefs, ix); - } - else if (hasUVs) - { - output.push_back('/'); - appendIndexRef(output, faceIndexStorage, faceIndexRefs, ix); - } - else if (hasNormals) - { - output.append("//"); - appendIndexRef(output, faceIndexStorage, faceIndexRefs, ix); - } - }; - for (size_t i = 0u; i < faceCount; ++i) { const uint32_t i0 = indices[i * 3u + 0u]; @@ -427,11 +413,11 @@ bool COBJMeshWriter::writeAsset(system::IFile* _file, const SAssetWriteParams& _ return false; output.append("f "); - appendFaceCorner(f0); + appendIndexRef(output, faceIndexStorage, faceIndexRefs, f0); output.push_back(' '); - appendFaceCorner(f1); + appendIndexRef(output, faceIndexStorage, faceIndexRefs, f1); output.push_back(' '); - appendFaceCorner(f2); + appendIndexRef(output, faceIndexStorage, faceIndexRefs, f2); output.push_back('\n'); } formatMs = std::chrono::duration(clock_t::now() - formatStart).count(); diff --git a/src/nbl/asset/interchange/CPLYMeshWriter.cpp b/src/nbl/asset/interchange/CPLYMeshWriter.cpp index e9a0faa032..cc2f018f40 100644 --- a/src/nbl/asset/interchange/CPLYMeshWriter.cpp +++ b/src/nbl/asset/interchange/CPLYMeshWriter.cpp @@ -133,11 +133,13 @@ void appendVec(std::string& out, const double* values, size_t count, bool flipVe { const bool flip = flipVectors && i == xID; appendFloatFixed6(out, flip ? -values[i] : values[i]); - out += " "; + out.push_back(' '); } } +bool writeBufferWithPolicyAtOffset(system::IFile* file, const SResolvedFileIOPolicy& ioPlan, const uint8_t* data, size_t byteCount, size_t& fileOffset, SFileWriteTelemetry* ioTelemetry = nullptr); bool writeBufferWithPolicy(system::IFile* file, const SResolvedFileIOPolicy& ioPlan, const uint8_t* data, size_t byteCount, SFileWriteTelemetry* ioTelemetry = nullptr); +bool writeTwoBuffersWithPolicy(system::IFile* file, const SResolvedFileIOPolicy& ioPlan, const uint8_t* dataA, size_t byteCountA, const uint8_t* dataB, size_t byteCountB, SFileWriteTelemetry* ioTelemetry = nullptr); bool writeBinary(const ICPUPolygonGeometry* geom, const ICPUPolygonGeometry::SDataView* uvView, bool writeNormals, size_t vertexCount, const uint32_t* indices, size_t faceCount, uint8_t* dst, bool flipVectors); bool writeText(const ICPUPolygonGeometry* geom, const ICPUPolygonGeometry::SDataView* uvView, bool writeNormals, size_t vertexCount, const uint32_t* indices, size_t faceCount, std::string& output, bool flipVectors); @@ -303,24 +305,30 @@ bool CPLYMeshWriter::writeAsset(system::IFile* _file, const SAssetWriteParams& _ const size_t bodySize = vertexCount * vertexStride + faceCount * faceStride; const auto binaryEncodeStart = clock_t::now(); - core::vector output; - output.resize(header.size() + bodySize); - if (!header.empty()) - std::memcpy(output.data(), header.data(), header.size()); - if (!writeBinary(geom, uvView, writeNormals, vertexCount, indices, faceCount, output.data() + header.size(), flipVectors)) + core::vector body; + body.resize(bodySize); + if (!writeBinary(geom, uvView, writeNormals, vertexCount, indices, faceCount, body.data(), flipVectors)) return false; encodeMs += std::chrono::duration(clock_t::now() - binaryEncodeStart).count(); - const auto ioPlan = resolveFileIOPolicy(_params.ioPolicy, static_cast(output.size()), true); + const size_t outputSize = header.size() + body.size(); + const auto ioPlan = resolveFileIOPolicy(_params.ioPolicy, static_cast(outputSize), true); if (!ioPlan.valid) { _params.logger.log("PLY writer: invalid io policy for %s reason=%s", system::ILogger::ELL_ERROR, file->getFileName().string().c_str(), ioPlan.reason); return false; } - outputBytes = output.size(); + outputBytes = outputSize; const auto writeStart = clock_t::now(); - writeOk = writeBufferWithPolicy(file, ioPlan, output.data(), output.size(), &ioTelemetry); + writeOk = writeTwoBuffersWithPolicy( + file, + ioPlan, + reinterpret_cast(header.data()), + header.size(), + body.data(), + body.size(), + &ioTelemetry); writeMs = std::chrono::duration(clock_t::now() - writeStart).count(); const double totalMs = std::chrono::duration(clock_t::now() - totalStart).count(); @@ -373,20 +381,24 @@ bool CPLYMeshWriter::writeAsset(system::IFile* _file, const SAssetWriteParams& _ return false; encodeMs += std::chrono::duration(clock_t::now() - textEncodeStart).count(); - const auto textFormatStart = clock_t::now(); - std::string output = header; - output += body; - formatMs += std::chrono::duration(clock_t::now() - textFormatStart).count(); - const auto ioPlan = resolveFileIOPolicy(_params.ioPolicy, static_cast(output.size()), true); + const size_t outputSize = header.size() + body.size(); + const auto ioPlan = resolveFileIOPolicy(_params.ioPolicy, static_cast(outputSize), true); if (!ioPlan.valid) { _params.logger.log("PLY writer: invalid io policy for %s reason=%s", system::ILogger::ELL_ERROR, file->getFileName().string().c_str(), ioPlan.reason); return false; } - outputBytes = output.size(); + outputBytes = outputSize; const auto writeStart = clock_t::now(); - writeOk = writeBufferWithPolicy(file, ioPlan, reinterpret_cast(output.data()), output.size(), &ioTelemetry); + writeOk = writeTwoBuffersWithPolicy( + file, + ioPlan, + reinterpret_cast(header.data()), + header.size(), + reinterpret_cast(body.data()), + body.size(), + &ioTelemetry); writeMs = std::chrono::duration(clock_t::now() - writeStart).count(); const double totalMs = std::chrono::duration(clock_t::now() - totalStart).count(); @@ -432,30 +444,40 @@ bool CPLYMeshWriter::writeAsset(system::IFile* _file, const SAssetWriteParams& _ return writeOk; } -bool ply_writer_detail::writeBufferWithPolicy(system::IFile* file, const SResolvedFileIOPolicy& ioPlan, const uint8_t* data, size_t byteCount, SFileWriteTelemetry* ioTelemetry) +bool ply_writer_detail::writeBufferWithPolicyAtOffset(system::IFile* file, const SResolvedFileIOPolicy& ioPlan, const uint8_t* data, size_t byteCount, size_t& fileOffset, SFileWriteTelemetry* ioTelemetry) { if (!file || (!data && byteCount != 0ull)) return false; - - size_t fileOffset = 0ull; switch (ioPlan.strategy) { case SResolvedFileIOPolicy::Strategy::WholeFile: { - system::IFile::success_t success; - file->write(success, data, fileOffset, byteCount); - if (success && ioTelemetry) - ioTelemetry->account(success.getBytesProcessed()); - return success && success.getBytesProcessed() == byteCount; + size_t writtenTotal = 0ull; + while (writtenTotal < byteCount) + { + system::IFile::success_t success; + file->write(success, data + writtenTotal, fileOffset + writtenTotal, byteCount - writtenTotal); + if (!success) + return false; + const size_t written = success.getBytesProcessed(); + if (written == 0ull) + return false; + if (ioTelemetry) + ioTelemetry->account(written); + writtenTotal += written; + } + fileOffset += writtenTotal; + return true; } case SResolvedFileIOPolicy::Strategy::Chunked: default: { - while (fileOffset < byteCount) + size_t writtenTotal = 0ull; + while (writtenTotal < byteCount) { - const size_t toWrite = static_cast(std::min(ioPlan.chunkSizeBytes, byteCount - fileOffset)); + const size_t toWrite = static_cast(std::min(ioPlan.chunkSizeBytes, byteCount - writtenTotal)); system::IFile::success_t success; - file->write(success, data + fileOffset, fileOffset, toWrite); + file->write(success, data + writtenTotal, fileOffset + writtenTotal, toWrite); if (!success) return false; const size_t written = success.getBytesProcessed(); @@ -463,13 +485,28 @@ bool ply_writer_detail::writeBufferWithPolicy(system::IFile* file, const SResolv return false; if (ioTelemetry) ioTelemetry->account(written); - fileOffset += written; + writtenTotal += written; } + fileOffset += writtenTotal; return true; } } } +bool ply_writer_detail::writeBufferWithPolicy(system::IFile* file, const SResolvedFileIOPolicy& ioPlan, const uint8_t* data, size_t byteCount, SFileWriteTelemetry* ioTelemetry) +{ + size_t fileOffset = 0ull; + return writeBufferWithPolicyAtOffset(file, ioPlan, data, byteCount, fileOffset, ioTelemetry); +} + +bool ply_writer_detail::writeTwoBuffersWithPolicy(system::IFile* file, const SResolvedFileIOPolicy& ioPlan, const uint8_t* dataA, size_t byteCountA, const uint8_t* dataB, size_t byteCountB, SFileWriteTelemetry* ioTelemetry) +{ + size_t fileOffset = 0ull; + if (!writeBufferWithPolicyAtOffset(file, ioPlan, dataA, byteCountA, fileOffset, ioTelemetry)) + return false; + return writeBufferWithPolicyAtOffset(file, ioPlan, dataB, byteCountB, fileOffset, ioTelemetry); +} + bool ply_writer_detail::writeBinary(const ICPUPolygonGeometry* geom, const ICPUPolygonGeometry::SDataView* uvView, bool writeNormals, size_t vertexCount, const uint32_t* indices, size_t faceCount, uint8_t* dst, bool flipVectors) { if (!dst) @@ -481,16 +518,31 @@ bool ply_writer_detail::writeBinary(const ICPUPolygonGeometry* geom, const ICPUP const auto& normalView = geom->getNormalView(); const hlsl::float32_t3* const tightPos = getTightFloat3View(positionView); const hlsl::float32_t3* const tightNormal = writeNormals ? getTightFloat3View(normalView) : nullptr; - const hlsl::float32_t2* const tightUV = uvView ? getTightFloat2View(*uvView) : nullptr; + const bool hasUV = uvView != nullptr; + const hlsl::float32_t2* const tightUV = hasUV ? getTightFloat2View(*uvView) : nullptr; - hlsl::float64_t4 tmp = {}; - for (size_t i = 0; i < vertexCount; ++i) + if (tightPos && (!writeNormals || tightNormal) && (!hasUV || tightUV) && !flipVectors) { - if (tightPos && !flipVectors) + for (size_t i = 0; i < vertexCount; ++i) { std::memcpy(dst, tightPos + i, Float3Bytes); + dst += Float3Bytes; + if (writeNormals) + { + std::memcpy(dst, tightNormal + i, Float3Bytes); + dst += Float3Bytes; + } + if (hasUV) + { + std::memcpy(dst, tightUV + i, Float2Bytes); + dst += Float2Bytes; + } } - else + } + else + { + hlsl::float64_t4 tmp = {}; + for (size_t i = 0; i < vertexCount; ++i) { float pos[3] = {}; if (tightPos) @@ -510,16 +562,9 @@ bool ply_writer_detail::writeBinary(const ICPUPolygonGeometry* geom, const ICPUP if (flipVectors) pos[0] = -pos[0]; std::memcpy(dst, pos, Float3Bytes); - } - dst += Float3Bytes; + dst += Float3Bytes; - if (writeNormals) - { - if (tightNormal && !flipVectors) - { - std::memcpy(dst, tightNormal + i, Float3Bytes); - } - else + if (writeNormals) { float normal[3] = {}; if (tightNormal) @@ -538,28 +583,27 @@ bool ply_writer_detail::writeBinary(const ICPUPolygonGeometry* geom, const ICPUP } if (flipVectors) normal[0] = -normal[0]; - std::memcpy(dst, normal, Float3Bytes); + dst += Float3Bytes; } - dst += Float3Bytes; - } - if (uvView) - { - if (tightUV) - { - std::memcpy(dst, tightUV + i, Float2Bytes); - } - else + if (hasUV) { - float uv[2] = {}; - if (!decodeVec4(*uvView, i, tmp)) - return false; - uv[0] = static_cast(tmp.x); - uv[1] = static_cast(tmp.y); - std::memcpy(dst, uv, Float2Bytes); + if (tightUV) + { + std::memcpy(dst, tightUV + i, Float2Bytes); + } + else + { + float uv[2] = {}; + if (!decodeVec4(*uvView, i, tmp)) + return false; + uv[0] = static_cast(tmp.x); + uv[1] = static_cast(tmp.y); + std::memcpy(dst, uv, Float2Bytes); + } + dst += Float2Bytes; } - dst += Float2Bytes; } } @@ -648,13 +692,13 @@ bool ply_writer_detail::writeText(const ICPUPolygonGeometry* geom, const ICPUPol for (size_t i = 0; i < faceCount; ++i) { const uint32_t* tri = indices + (i * 3u); - output += "3 "; + output.append("3 "); appendUInt(output, tri[0]); - output += " "; + output.push_back(' '); appendUInt(output, tri[1]); - output += " "; + output.push_back(' '); appendUInt(output, tri[2]); - output += "\n"; + output.push_back('\n'); } return true; } diff --git a/src/nbl/asset/interchange/CSTLMeshFileLoader.cpp b/src/nbl/asset/interchange/CSTLMeshFileLoader.cpp index 0d401adf3b..4fd350fdd6 100644 --- a/src/nbl/asset/interchange/CSTLMeshFileLoader.cpp +++ b/src/nbl/asset/interchange/CSTLMeshFileLoader.cpp @@ -197,18 +197,6 @@ void stlPushTriangleReversed(const hlsl::float32_t3 (&p)[3], core::vector& normals, const core::vector& positions) -{ - if (normals.empty() || positions.size() < 3ull) - return; - - const auto& lastNormal = normals.back(); - if (hlsl::dot(lastNormal, lastNormal) > 0.f) - return; - - normals.back() = stlComputeFaceNormal(*(positions.rbegin() + 2), *(positions.rbegin() + 1), *(positions.rbegin() + 0)); -} - void stlExtendAABB(hlsl::shapes::AABB<3, hlsl::float32_t>& aabb, bool& hasAABB, const hlsl::float32_t3& p) { if (!hasAABB) @@ -227,6 +215,35 @@ void stlExtendAABB(hlsl::shapes::AABB<3, hlsl::float32_t>& aabb, bool& hasAABB, if (p.z > aabb.maxVx.z) aabb.maxVx.z = p.z; } +ICPUPolygonGeometry::SDataView stlCreateAdoptedFloat3View(core::vector&& values) +{ + if (values.empty()) + return {}; + + auto backer = core::make_smart_refctd_ptr>>(std::move(values)); + auto& payload = backer->getBacker(); + auto* const payloadPtr = payload.data(); + const size_t byteCount = payload.size() * sizeof(hlsl::float32_t3); + auto buffer = ICPUBuffer::create( + { { byteCount }, payloadPtr, core::smart_refctd_ptr(std::move(backer)), alignof(hlsl::float32_t3) }, + core::adopt_memory); + if (!buffer) + return {}; + + ICPUPolygonGeometry::SDataView view = {}; + view.composed = { + .stride = sizeof(hlsl::float32_t3), + .format = EF_R32G32B32_SFLOAT, + .rangeFormat = IGeometryBase::getMatchingAABBFormat(EF_R32G32B32_SFLOAT) + }; + view.src = { + .offset = 0u, + .size = byteCount, + .buffer = std::move(buffer) + }; + return view; +} + CSTLMeshFileLoader::CSTLMeshFileLoader(asset::IAssetManager* _assetManager) { (void)_assetManager; @@ -358,11 +375,12 @@ SAssetBundle CSTLMeshFileLoader::loadAsset(system::IFile* _file, const IAssetLoa auto normalView = createView(EF_R32G32B32_SFLOAT, static_cast(vertexCount)); if (!posView || !normalView) return {}; - auto* posOut = reinterpret_cast(posView.getPointer()); auto* normalOut = reinterpret_cast(normalView.getPointer()); if (!posOut || !normalOut) return {}; + auto* posOutFloat = reinterpret_cast(posOut); + auto* normalOutFloat = reinterpret_cast(normalOut); const double buildPrepMs = std::chrono::duration(clock_t::now() - buildPrepStart).count(); buildAllocViewsMs += buildPrepMs; buildMs += buildPrepMs; @@ -370,8 +388,6 @@ SAssetBundle CSTLMeshFileLoader::loadAsset(system::IFile* _file, const IAssetLoa const auto parseStart = clock_t::now(); const uint8_t* cursor = payload.data(); const uint8_t* const end = cursor + payload.size(); - auto* posOutFloat = reinterpret_cast(posOut); - auto* normalOutFloat = reinterpret_cast(normalOut); if (end < cursor || static_cast(end - cursor) < static_cast(triangleCount) * StlTriangleRecordBytes) return {}; for (uint64_t tri = 0ull; tri < triangleCount; ++tri) @@ -537,13 +553,20 @@ SAssetBundle CSTLMeshFileLoader::loadAsset(system::IFile* _file, const IAssetLoa } stlPushTriangleReversed(p, positions); + hlsl::float32_t3 faceNormal = stlResolveStoredNormal(fileNormal); + if (hlsl::dot(faceNormal, faceNormal) <= 0.f) + faceNormal = stlComputeFaceNormal(p[2u], p[1u], p[0u]); + normals.push_back(faceNormal); + normals.push_back(faceNormal); + normals.push_back(faceNormal); + stlExtendAABB(parsedAABB, hasParsedAABB, p[2u]); + stlExtendAABB(parsedAABB, hasParsedAABB, p[1u]); + stlExtendAABB(parsedAABB, hasParsedAABB, p[0u]); if (!stlReadTextToken(cursor, end, textToken) || textToken != std::string_view("endloop")) return {}; if (!stlReadTextToken(cursor, end, textToken) || textToken != std::string_view("endfacet")) return {}; - - stlFixLastFaceNormal(normals, positions); } parseMs = std::chrono::duration(clock_t::now() - parseStart).count(); if (positions.empty()) @@ -554,26 +577,12 @@ SAssetBundle CSTLMeshFileLoader::loadAsset(system::IFile* _file, const IAssetLoa const auto buildStart = clock_t::now(); const auto allocStart = clock_t::now(); - auto posView = createView(EF_R32G32B32_SFLOAT, positions.size()); - auto normalView = createView(EF_R32G32B32_SFLOAT, positions.size()); + auto posView = stlCreateAdoptedFloat3View(std::move(positions)); + auto normalView = stlCreateAdoptedFloat3View(std::move(normals)); if (!posView || !normalView) return {}; buildAllocViewsMs += std::chrono::duration(clock_t::now() - allocStart).count(); - auto* posOut = reinterpret_cast(posView.getPointer()); - auto* normalOut = reinterpret_cast(normalView.getPointer()); - if (!posOut || !normalOut) - return {}; - - for (size_t i = 0u; i < positions.size(); ++i) - { - const auto& pos = positions[i]; - const auto& nrm = normals[i / 3u]; - posOut[i] = { pos.x, pos.y, pos.z }; - normalOut[i] = { nrm.x, nrm.y, nrm.z }; - stlExtendAABB(parsedAABB, hasParsedAABB, posOut[i]); - } - const auto setStart = clock_t::now(); geometry->setPositionView(std::move(posView)); geometry->setNormalView(std::move(normalView)); diff --git a/src/nbl/asset/interchange/CSTLMeshWriter.cpp b/src/nbl/asset/interchange/CSTLMeshWriter.cpp index 812db24298..7a6761de3d 100644 --- a/src/nbl/asset/interchange/CSTLMeshWriter.cpp +++ b/src/nbl/asset/interchange/CSTLMeshWriter.cpp @@ -527,19 +527,15 @@ bool writeMeshBinary(const asset::ICPUPolygonGeometry* geom, SContext* context) }; const bool hasFastTightPath = (indices == nullptr) && (tightPositions != nullptr) && (!hasNormals || (tightNormals != nullptr)); - if (hasFastTightPath) + if (hasFastTightPath && hasNormals) { - for (uint32_t primIx = 0u; primIx < facenum; ++primIx) + const hlsl::float32_t3* posTri = tightPositions; + const hlsl::float32_t3* nrmTri = tightNormals; + for (uint32_t primIx = 0u; primIx < facenum; ++primIx, posTri += 3u, nrmTri += 3u) { - const uint32_t i0 = primIx * 3u + 0u; - const uint32_t i1 = primIx * 3u + 1u; - const uint32_t i2 = primIx * 3u + 2u; - if (i0 >= vertexCount || i1 >= vertexCount || i2 >= vertexCount) - return false; - - const hlsl::float32_t3 vertex1 = tightPositions[i2]; - const hlsl::float32_t3 vertex2 = tightPositions[i1]; - const hlsl::float32_t3 vertex3 = tightPositions[i0]; + const hlsl::float32_t3 vertex1 = posTri[2u]; + const hlsl::float32_t3 vertex2 = posTri[1u]; + const hlsl::float32_t3 vertex3 = posTri[0u]; const float vertex1x = vertex1.x * handednessSign; const float vertex2x = vertex2.x * handednessSign; const float vertex3x = vertex3.x * handednessSign; @@ -547,21 +543,18 @@ bool writeMeshBinary(const asset::ICPUPolygonGeometry* geom, SContext* context) float normalX = 0.f; float normalY = 0.f; float normalZ = 0.f; - if (hasNormals) + hlsl::float32_t3 attrNormal = nrmTri[0u]; + if (attrNormal.x == 0.f && attrNormal.y == 0.f && attrNormal.z == 0.f) + attrNormal = nrmTri[1u]; + if (attrNormal.x == 0.f && attrNormal.y == 0.f && attrNormal.z == 0.f) + attrNormal = nrmTri[2u]; + if (!(attrNormal.x == 0.f && attrNormal.y == 0.f && attrNormal.z == 0.f)) { - hlsl::float32_t3 attrNormal = tightNormals[i0]; - if (attrNormal.x == 0.f && attrNormal.y == 0.f && attrNormal.z == 0.f) - attrNormal = tightNormals[i1]; - if (attrNormal.x == 0.f && attrNormal.y == 0.f && attrNormal.z == 0.f) - attrNormal = tightNormals[i2]; - if (!(attrNormal.x == 0.f && attrNormal.y == 0.f && attrNormal.z == 0.f)) - { - if (flipHandedness) - attrNormal.x = -attrNormal.x; - normalX = attrNormal.x; - normalY = attrNormal.y; - normalZ = attrNormal.z; - } + if (flipHandedness) + attrNormal.x = -attrNormal.x; + normalX = attrNormal.x; + normalY = attrNormal.y; + normalZ = attrNormal.z; } if (normalX == 0.f && normalY == 0.f && normalZ == 0.f) @@ -600,6 +593,51 @@ bool writeMeshBinary(const asset::ICPUPolygonGeometry* geom, SContext* context) dst += sizeof(color); } } + else if (hasFastTightPath) + { + const hlsl::float32_t3* posTri = tightPositions; + for (uint32_t primIx = 0u; primIx < facenum; ++primIx, posTri += 3u) + { + const hlsl::float32_t3 vertex1 = posTri[2u]; + const hlsl::float32_t3 vertex2 = posTri[1u]; + const hlsl::float32_t3 vertex3 = posTri[0u]; + const float vertex1x = vertex1.x * handednessSign; + const float vertex2x = vertex2.x * handednessSign; + const float vertex3x = vertex3.x * handednessSign; + + const float edge21x = vertex2x - vertex1x; + const float edge21y = vertex2.y - vertex1.y; + const float edge21z = vertex2.z - vertex1.z; + const float edge31x = vertex3x - vertex1x; + const float edge31y = vertex3.y - vertex1.y; + const float edge31z = vertex3.z - vertex1.z; + + float normalX = edge21y * edge31z - edge21z * edge31y; + float normalY = edge21z * edge31x - edge21x * edge31z; + float normalZ = edge21x * edge31y - edge21y * edge31x; + const float planeNormalLen2 = normalX * normalX + normalY * normalY + normalZ * normalZ; + if (planeNormalLen2 > 0.f) + { + const float invLen = 1.f / std::sqrt(planeNormalLen2); + normalX *= invLen; + normalY *= invLen; + normalZ *= invLen; + } + + const float packedData[12] = { + normalX, normalY, normalZ, + vertex1x, vertex1.y, vertex1.z, + vertex2x, vertex2.y, vertex2.z, + vertex3x, vertex3.y, vertex3.z + }; + std::memcpy(dst, packedData, sizeof(packedData)); + dst += sizeof(packedData); + + const uint16_t color = 0u; + std::memcpy(dst, &color, sizeof(color)); + dst += sizeof(color); + } + } else { for (uint32_t primIx = 0u; primIx < facenum; ++primIx) From e7c304aecd0af1cf81e36c888d3691b2365bee78 Mon Sep 17 00:00:00 2001 From: Arkadiusz Lachowicz Date: Sun, 8 Feb 2026 19:03:48 +0100 Subject: [PATCH 009/118] Optimize OBJ writer formatting and STL wholefile read --- src/nbl/asset/interchange/COBJMeshWriter.cpp | 200 ++++++++++++------ .../asset/interchange/CSTLMeshFileLoader.cpp | 75 +++++-- 2 files changed, 189 insertions(+), 86 deletions(-) diff --git a/src/nbl/asset/interchange/COBJMeshWriter.cpp b/src/nbl/asset/interchange/COBJMeshWriter.cpp index 1308c63bc2..f82a030551 100644 --- a/src/nbl/asset/interchange/COBJMeshWriter.cpp +++ b/src/nbl/asset/interchange/COBJMeshWriter.cpp @@ -12,8 +12,10 @@ #include #include #include +#include #include #include +#include #include namespace nbl::asset @@ -107,63 +109,149 @@ const hlsl::float32_t2* getTightFloat2View(const ICPUPolygonGeometry::SDataView& return reinterpret_cast(view.getPointer()); } -void appendUInt(std::string& out, const uint32_t value) +char* appendUIntToBuffer(char* dst, char* const end, const uint32_t value) { - std::array buf = {}; - const auto res = std::to_chars(buf.data(), buf.data() + buf.size(), value); - if (res.ec == std::errc()) - out.append(buf.data(), static_cast(res.ptr - buf.data())); + if (!dst || dst >= end) + return end; + + const auto result = std::to_chars(dst, end, value); + if (result.ec == std::errc()) + return result.ptr; + + const int written = std::snprintf(dst, static_cast(end - dst), "%u", value); + if (written <= 0) + return dst; + const size_t writeLen = static_cast(written); + return (writeLen < static_cast(end - dst)) ? (dst + writeLen) : end; } -void appendIndexRef(std::string& out, const std::string& storage, const core::vector& refs, const uint32_t index) +char* appendFloatFixed6ToBuffer(char* dst, char* const end, const float value) { - if (index >= refs.size()) - return; - const auto& ref = refs[index]; - out.append(storage.data() + ref.offset, ref.length); + if (!dst || dst >= end) + return end; + + const auto result = std::to_chars(dst, end, value, std::chars_format::fixed, 6); + if (result.ec == std::errc()) + return result.ptr; + + const int written = std::snprintf(dst, static_cast(end - dst), "%.6f", static_cast(value)); + if (written <= 0) + return dst; + const size_t writeLen = static_cast(written); + return (writeLen < static_cast(end - dst)) ? (dst + writeLen) : end; +} + +void appendVec3Line(std::string& out, const std::string_view prefix, const float x, const float y, const float z) +{ + std::array line = {}; + char* cursor = line.data(); + char* const lineEnd = line.data() + line.size(); + + const size_t prefixSize = std::min(prefix.size(), static_cast(lineEnd - cursor)); + std::memcpy(cursor, prefix.data(), prefixSize); + cursor += prefixSize; + + cursor = appendFloatFixed6ToBuffer(cursor, lineEnd, x); + if (cursor < lineEnd) + *(cursor++) = ' '; + cursor = appendFloatFixed6ToBuffer(cursor, lineEnd, y); + if (cursor < lineEnd) + *(cursor++) = ' '; + cursor = appendFloatFixed6ToBuffer(cursor, lineEnd, z); + if (cursor < lineEnd) + *(cursor++) = '\n'; + + out.append(line.data(), static_cast(cursor - line.data())); +} + +void appendVec2Line(std::string& out, const std::string_view prefix, const float x, const float y) +{ + std::array line = {}; + char* cursor = line.data(); + char* const lineEnd = line.data() + line.size(); + + const size_t prefixSize = std::min(prefix.size(), static_cast(lineEnd - cursor)); + std::memcpy(cursor, prefix.data(), prefixSize); + cursor += prefixSize; + + cursor = appendFloatFixed6ToBuffer(cursor, lineEnd, x); + if (cursor < lineEnd) + *(cursor++) = ' '; + cursor = appendFloatFixed6ToBuffer(cursor, lineEnd, y); + if (cursor < lineEnd) + *(cursor++) = '\n'; + + out.append(line.data(), static_cast(cursor - line.data())); +} + +void appendFaceLine(std::string& out, const std::string& storage, const core::vector& refs, const uint32_t i0, const uint32_t i1, const uint32_t i2) +{ + const auto& ref0 = refs[i0]; + const auto& ref1 = refs[i1]; + const auto& ref2 = refs[i2]; + std::array line = {}; + char* cursor = line.data(); + char* const lineEnd = line.data() + line.size(); + if (cursor < lineEnd) + *(cursor++) = 'f'; + if (cursor < lineEnd) + *(cursor++) = ' '; + const size_t len0 = std::min(ref0.length, static_cast(lineEnd - cursor)); + std::memcpy(cursor, storage.data() + ref0.offset, len0); + cursor += len0; + if (cursor < lineEnd) + *(cursor++) = ' '; + const size_t len1 = std::min(ref1.length, static_cast(lineEnd - cursor)); + std::memcpy(cursor, storage.data() + ref1.offset, len1); + cursor += len1; + if (cursor < lineEnd) + *(cursor++) = ' '; + const size_t len2 = std::min(ref2.length, static_cast(lineEnd - cursor)); + std::memcpy(cursor, storage.data() + ref2.offset, len2); + cursor += len2; + if (cursor < lineEnd) + *(cursor++) = '\n'; + out.append(line.data(), static_cast(cursor - line.data())); } void appendIndexTokenToStorage(std::string& storage, core::vector& refs, const uint32_t objIx, const bool hasUVs, const bool hasNormals) { SIndexStringRef ref = {}; ref.offset = static_cast(storage.size()); - appendUInt(storage, objIx); - if (hasUVs && hasNormals) - { - storage.push_back('/'); - appendUInt(storage, objIx); - storage.push_back('/'); - appendUInt(storage, objIx); - } - else if (hasUVs) - { - storage.push_back('/'); - appendUInt(storage, objIx); - } - else if (hasNormals) { - storage.append("//"); - appendUInt(storage, objIx); + std::array token = {}; + char* const tokenEnd = token.data() + token.size(); + char* cursor = token.data(); + cursor = appendUIntToBuffer(cursor, tokenEnd, objIx); + if (hasUVs && hasNormals) + { + if (cursor < tokenEnd) + *(cursor++) = '/'; + cursor = appendUIntToBuffer(cursor, tokenEnd, objIx); + if (cursor < tokenEnd) + *(cursor++) = '/'; + cursor = appendUIntToBuffer(cursor, tokenEnd, objIx); + } + else if (hasUVs) + { + if (cursor < tokenEnd) + *(cursor++) = '/'; + cursor = appendUIntToBuffer(cursor, tokenEnd, objIx); + } + else if (hasNormals) + { + if (cursor < tokenEnd) + *(cursor++) = '/'; + if (cursor < tokenEnd) + *(cursor++) = '/'; + cursor = appendUIntToBuffer(cursor, tokenEnd, objIx); + } + storage.append(token.data(), static_cast(cursor - token.data())); } ref.length = static_cast(storage.size() - ref.offset); refs.push_back(ref); } -void appendFloatFixed6(std::string& out, float value) -{ - std::array buf = {}; - const auto res = std::to_chars(buf.data(), buf.data() + buf.size(), value, std::chars_format::fixed, 6); - if (res.ec == std::errc()) - { - out.append(buf.data(), static_cast(res.ptr - buf.data())); - return; - } - - const int written = std::snprintf(buf.data(), buf.size(), "%.6f", static_cast(value)); - if (written > 0) - out.append(buf.data(), static_cast(written)); -} - bool writeBufferWithPolicy(system::IFile* file, const SResolvedFileIOPolicy& ioPlan, const uint8_t* data, size_t byteCount, SFileWriteTelemetry* ioTelemetry = nullptr); } // namespace obj_writer_detail @@ -320,13 +408,7 @@ bool COBJMeshWriter::writeAsset(system::IFile* _file, const SAssetWriteParams& _ if (flipHandedness) x = -x; - output.append("v "); - appendFloatFixed6(output, x); - output.push_back(' '); - appendFloatFixed6(output, y); - output.push_back(' '); - appendFloatFixed6(output, z); - output.push_back('\n'); + appendVec3Line(output, "v ", x, y, z); } if (hasUVs) @@ -348,11 +430,7 @@ bool COBJMeshWriter::writeAsset(system::IFile* _file, const SAssetWriteParams& _ v = 1.f - static_cast(tmp.y); } - output.append("vt "); - appendFloatFixed6(output, u); - output.push_back(' '); - appendFloatFixed6(output, v); - output.push_back('\n'); + appendVec2Line(output, "vt ", u, v); } } @@ -380,13 +458,7 @@ bool COBJMeshWriter::writeAsset(system::IFile* _file, const SAssetWriteParams& _ if (flipHandedness) x = -x; - output.append("vn "); - appendFloatFixed6(output, x); - output.push_back(' '); - appendFloatFixed6(output, y); - output.push_back(' '); - appendFloatFixed6(output, z); - output.push_back('\n'); + appendVec3Line(output, "vn ", x, y, z); } } @@ -412,13 +484,7 @@ bool COBJMeshWriter::writeAsset(system::IFile* _file, const SAssetWriteParams& _ if (f0 >= faceIndexRefs.size() || f1 >= faceIndexRefs.size() || f2 >= faceIndexRefs.size()) return false; - output.append("f "); - appendIndexRef(output, faceIndexStorage, faceIndexRefs, f0); - output.push_back(' '); - appendIndexRef(output, faceIndexStorage, faceIndexRefs, f1); - output.push_back(' '); - appendIndexRef(output, faceIndexStorage, faceIndexRefs, f2); - output.push_back('\n'); + appendFaceLine(output, faceIndexStorage, faceIndexRefs, f0, f1, f2); } formatMs = std::chrono::duration(clock_t::now() - formatStart).count(); diff --git a/src/nbl/asset/interchange/CSTLMeshFileLoader.cpp b/src/nbl/asset/interchange/CSTLMeshFileLoader.cpp index 4fd350fdd6..581f66cd82 100644 --- a/src/nbl/asset/interchange/CSTLMeshFileLoader.cpp +++ b/src/nbl/asset/interchange/CSTLMeshFileLoader.cpp @@ -296,13 +296,35 @@ SAssetBundle CSTLMeshFileLoader::loadAsset(system::IFile* _file, const IAssetLoa return {}; } + core::vector wholeFilePayload; + const uint8_t* wholeFileData = nullptr; + if (ioPlan.strategy == SResolvedFileIOPolicy::Strategy::WholeFile) + { + const auto ioStart = clock_t::now(); + wholeFilePayload.resize(filesize + 1ull); + if (!stlReadExact(context.inner.mainFile, wholeFilePayload.data(), 0ull, filesize, &context.ioTelemetry)) + return {}; + wholeFilePayload[filesize] = 0u; + wholeFileData = wholeFilePayload.data(); + ioMs = std::chrono::duration(clock_t::now() - ioStart).count(); + } + bool binary = false; bool hasBinaryTriCountFromDetect = false; uint32_t binaryTriCountFromDetect = 0u; { const auto detectStart = clock_t::now(); std::array prefix = {}; - const bool hasPrefix = filesize >= StlBinaryPrefixBytes && stlReadExact(context.inner.mainFile, prefix.data(), 0ull, StlBinaryPrefixBytes, &context.ioTelemetry); + bool hasPrefix = false; + if (wholeFileData && filesize >= StlBinaryPrefixBytes) + { + std::memcpy(prefix.data(), wholeFileData, StlBinaryPrefixBytes); + hasPrefix = true; + } + else + { + hasPrefix = filesize >= StlBinaryPrefixBytes && stlReadExact(context.inner.mainFile, prefix.data(), 0ull, StlBinaryPrefixBytes, &context.ioTelemetry); + } bool startsWithSolid = false; if (hasPrefix) { @@ -311,7 +333,9 @@ SAssetBundle CSTLMeshFileLoader::loadAsset(system::IFile* _file, const IAssetLoa else { char header[StlTextProbeBytes] = {}; - if (!stlReadExact(context.inner.mainFile, header, 0ull, sizeof(header), &context.ioTelemetry)) + if (wholeFileData) + std::memcpy(header, wholeFileData, sizeof(header)); + else if (!stlReadExact(context.inner.mainFile, header, 0ull, sizeof(header), &context.ioTelemetry)) return {}; startsWithSolid = (std::strncmp(header, "solid ", StlTextProbeBytes) == 0); } @@ -362,12 +386,22 @@ SAssetBundle CSTLMeshFileLoader::loadAsset(system::IFile* _file, const IAssetLoa if (filesize < expectedSize) return {}; - core::vector payload; - payload.resize(dataSize); - const auto ioStart = clock_t::now(); - if (!stlReadWithPolicy(context.inner.mainFile, payload.data(), StlBinaryPrefixBytes, dataSize, ioPlan, &context.ioTelemetry)) - return {}; - ioMs = std::chrono::duration(clock_t::now() - ioStart).count(); + const uint8_t* payloadData = nullptr; + if (wholeFileData) + { + payloadData = wholeFileData + StlBinaryPrefixBytes; + } + else + { + core::vector payload; + payload.resize(dataSize); + const auto ioStart = clock_t::now(); + if (!stlReadWithPolicy(context.inner.mainFile, payload.data(), StlBinaryPrefixBytes, dataSize, ioPlan, &context.ioTelemetry)) + return {}; + ioMs = std::chrono::duration(clock_t::now() - ioStart).count(); + wholeFilePayload = std::move(payload); + payloadData = wholeFilePayload.data(); + } vertexCount = triangleCount * StlVerticesPerTriangle; const auto buildPrepStart = clock_t::now(); @@ -386,8 +420,8 @@ SAssetBundle CSTLMeshFileLoader::loadAsset(system::IFile* _file, const IAssetLoa buildMs += buildPrepMs; const auto parseStart = clock_t::now(); - const uint8_t* cursor = payload.data(); - const uint8_t* const end = cursor + payload.size(); + const uint8_t* cursor = payloadData; + const uint8_t* const end = cursor + dataSize; if (end < cursor || static_cast(end - cursor) < static_cast(triangleCount) * StlTriangleRecordBytes) return {}; for (uint64_t tri = 0ull; tri < triangleCount; ++tri) @@ -443,7 +477,7 @@ SAssetBundle CSTLMeshFileLoader::loadAsset(system::IFile* _file, const IAssetLoa normalZ = 0.f; } } - else if (std::abs(normalLen2 - 1.f) >= 1e-4f) + else if (normalLen2 < 0.9999f || normalLen2 > 1.0001f) { const float invLen = 1.f / std::sqrt(normalLen2); normalX *= invLen; @@ -504,15 +538,18 @@ SAssetBundle CSTLMeshFileLoader::loadAsset(system::IFile* _file, const IAssetLoa else { parsePath = "ascii_fallback"; - core::vector asciiPayload; - asciiPayload.resize(filesize + 1ull); - const auto ioStart = clock_t::now(); - if (!stlReadWithPolicy(context.inner.mainFile, asciiPayload.data(), 0ull, filesize, ioPlan, &context.ioTelemetry)) - return {}; - ioMs = std::chrono::duration(clock_t::now() - ioStart).count(); - asciiPayload[filesize] = 0u; + if (!wholeFileData) + { + const auto ioStart = clock_t::now(); + wholeFilePayload.resize(filesize + 1ull); + if (!stlReadWithPolicy(context.inner.mainFile, wholeFilePayload.data(), 0ull, filesize, ioPlan, &context.ioTelemetry)) + return {}; + ioMs = std::chrono::duration(clock_t::now() - ioStart).count(); + wholeFilePayload[filesize] = 0u; + wholeFileData = wholeFilePayload.data(); + } - const char* cursor = reinterpret_cast(asciiPayload.data()); + const char* cursor = reinterpret_cast(wholeFileData); const char* const end = cursor + filesize; core::vector positions; core::vector normals; From 1f21b75c8f92ceb55036d398ca9f601c17ac66f9 Mon Sep 17 00:00:00 2001 From: Arkadiusz Lachowicz Date: Sun, 8 Feb 2026 19:50:55 +0100 Subject: [PATCH 010/118] Optimize mesh loader and writer hot loops --- src/nbl/asset/interchange/COBJMeshWriter.cpp | 89 +++---- .../asset/interchange/CPLYMeshFileLoader.cpp | 186 +++++++++---- src/nbl/asset/interchange/CSTLMeshWriter.cpp | 245 ++++++++++++------ 3 files changed, 335 insertions(+), 185 deletions(-) diff --git a/src/nbl/asset/interchange/COBJMeshWriter.cpp b/src/nbl/asset/interchange/COBJMeshWriter.cpp index f82a030551..6b10521a9b 100644 --- a/src/nbl/asset/interchange/COBJMeshWriter.cpp +++ b/src/nbl/asset/interchange/COBJMeshWriter.cpp @@ -9,13 +9,11 @@ #include "nbl/system/IFile.h" #include -#include #include #include #include #include #include -#include #include namespace nbl::asset @@ -49,6 +47,9 @@ namespace obj_writer_detail constexpr size_t ApproxObjBytesPerVertex = 96ull; constexpr size_t ApproxObjBytesPerFace = 48ull; +constexpr size_t MaxUInt32Chars = 10ull; +constexpr size_t MaxFloatFixed6Chars = 48ull; +constexpr size_t MaxIndexTokenBytes = MaxUInt32Chars * 3ull + 2ull; struct SIndexStringRef { @@ -141,14 +142,15 @@ char* appendFloatFixed6ToBuffer(char* dst, char* const end, const float value) return (writeLen < static_cast(end - dst)) ? (dst + writeLen) : end; } -void appendVec3Line(std::string& out, const std::string_view prefix, const float x, const float y, const float z) +void appendVec3Line(std::string& out, const char* prefix, const size_t prefixSize, const float x, const float y, const float z) { - std::array line = {}; - char* cursor = line.data(); - char* const lineEnd = line.data() + line.size(); + const size_t oldSize = out.size(); + out.resize(oldSize + prefixSize + (3ull * MaxFloatFixed6Chars) + 3ull); + char* const lineBegin = out.data() + oldSize; + char* cursor = lineBegin; + char* const lineEnd = out.data() + out.size(); - const size_t prefixSize = std::min(prefix.size(), static_cast(lineEnd - cursor)); - std::memcpy(cursor, prefix.data(), prefixSize); + std::memcpy(cursor, prefix, prefixSize); cursor += prefixSize; cursor = appendFloatFixed6ToBuffer(cursor, lineEnd, x); @@ -161,17 +163,18 @@ void appendVec3Line(std::string& out, const std::string_view prefix, const float if (cursor < lineEnd) *(cursor++) = '\n'; - out.append(line.data(), static_cast(cursor - line.data())); + out.resize(oldSize + static_cast(cursor - lineBegin)); } -void appendVec2Line(std::string& out, const std::string_view prefix, const float x, const float y) +void appendVec2Line(std::string& out, const char* prefix, const size_t prefixSize, const float x, const float y) { - std::array line = {}; - char* cursor = line.data(); - char* const lineEnd = line.data() + line.size(); + const size_t oldSize = out.size(); + out.resize(oldSize + prefixSize + (2ull * MaxFloatFixed6Chars) + 2ull); + char* const lineBegin = out.data() + oldSize; + char* cursor = lineBegin; + char* const lineEnd = out.data() + out.size(); - const size_t prefixSize = std::min(prefix.size(), static_cast(lineEnd - cursor)); - std::memcpy(cursor, prefix.data(), prefixSize); + std::memcpy(cursor, prefix, prefixSize); cursor += prefixSize; cursor = appendFloatFixed6ToBuffer(cursor, lineEnd, x); @@ -181,7 +184,7 @@ void appendVec2Line(std::string& out, const std::string_view prefix, const float if (cursor < lineEnd) *(cursor++) = '\n'; - out.append(line.data(), static_cast(cursor - line.data())); + out.resize(oldSize + static_cast(cursor - lineBegin)); } void appendFaceLine(std::string& out, const std::string& storage, const core::vector& refs, const uint32_t i0, const uint32_t i1, const uint32_t i2) @@ -189,29 +192,21 @@ void appendFaceLine(std::string& out, const std::string& storage, const core::ve const auto& ref0 = refs[i0]; const auto& ref1 = refs[i1]; const auto& ref2 = refs[i2]; - std::array line = {}; - char* cursor = line.data(); - char* const lineEnd = line.data() + line.size(); - if (cursor < lineEnd) - *(cursor++) = 'f'; - if (cursor < lineEnd) - *(cursor++) = ' '; - const size_t len0 = std::min(ref0.length, static_cast(lineEnd - cursor)); - std::memcpy(cursor, storage.data() + ref0.offset, len0); - cursor += len0; - if (cursor < lineEnd) - *(cursor++) = ' '; - const size_t len1 = std::min(ref1.length, static_cast(lineEnd - cursor)); - std::memcpy(cursor, storage.data() + ref1.offset, len1); - cursor += len1; - if (cursor < lineEnd) - *(cursor++) = ' '; - const size_t len2 = std::min(ref2.length, static_cast(lineEnd - cursor)); - std::memcpy(cursor, storage.data() + ref2.offset, len2); - cursor += len2; - if (cursor < lineEnd) - *(cursor++) = '\n'; - out.append(line.data(), static_cast(cursor - line.data())); + const size_t oldSize = out.size(); + const size_t lineSize = 2ull + static_cast(ref0.length) + 1ull + static_cast(ref1.length) + 1ull + static_cast(ref2.length) + 1ull; + out.resize(oldSize + lineSize); + char* cursor = out.data() + oldSize; + *(cursor++) = 'f'; + *(cursor++) = ' '; + std::memcpy(cursor, storage.data() + ref0.offset, ref0.length); + cursor += ref0.length; + *(cursor++) = ' '; + std::memcpy(cursor, storage.data() + ref1.offset, ref1.length); + cursor += ref1.length; + *(cursor++) = ' '; + std::memcpy(cursor, storage.data() + ref2.offset, ref2.length); + cursor += ref2.length; + *(cursor++) = '\n'; } void appendIndexTokenToStorage(std::string& storage, core::vector& refs, const uint32_t objIx, const bool hasUVs, const bool hasNormals) @@ -219,9 +214,11 @@ void appendIndexTokenToStorage(std::string& storage, core::vector(storage.size()); { - std::array token = {}; - char* const tokenEnd = token.data() + token.size(); - char* cursor = token.data(); + const size_t oldSize = storage.size(); + storage.resize(oldSize + MaxIndexTokenBytes); + char* const token = storage.data() + oldSize; + char* const tokenEnd = token + MaxIndexTokenBytes; + char* cursor = token; cursor = appendUIntToBuffer(cursor, tokenEnd, objIx); if (hasUVs && hasNormals) { @@ -246,7 +243,7 @@ void appendIndexTokenToStorage(std::string& storage, core::vector(cursor - token.data())); + storage.resize(oldSize + static_cast(cursor - token)); } ref.length = static_cast(storage.size() - ref.offset); refs.push_back(ref); @@ -408,7 +405,7 @@ bool COBJMeshWriter::writeAsset(system::IFile* _file, const SAssetWriteParams& _ if (flipHandedness) x = -x; - appendVec3Line(output, "v ", x, y, z); + appendVec3Line(output, "v ", sizeof("v ") - 1ull, x, y, z); } if (hasUVs) @@ -430,7 +427,7 @@ bool COBJMeshWriter::writeAsset(system::IFile* _file, const SAssetWriteParams& _ v = 1.f - static_cast(tmp.y); } - appendVec2Line(output, "vt ", u, v); + appendVec2Line(output, "vt ", sizeof("vt ") - 1ull, u, v); } } @@ -458,7 +455,7 @@ bool COBJMeshWriter::writeAsset(system::IFile* _file, const SAssetWriteParams& _ if (flipHandedness) x = -x; - appendVec3Line(output, "vn ", x, y, z); + appendVec3Line(output, "vn ", sizeof("vn ") - 1ull, x, y, z); } } diff --git a/src/nbl/asset/interchange/CPLYMeshFileLoader.cpp b/src/nbl/asset/interchange/CPLYMeshFileLoader.cpp index 8c2ef68630..7c515ebd0e 100644 --- a/src/nbl/asset/interchange/CPLYMeshFileLoader.cpp +++ b/src/nbl/asset/interchange/CPLYMeshFileLoader.cpp @@ -673,25 +673,47 @@ struct SContext const size_t batchVertices = std::min(remainingVertices, available / srcBytesPerVertex); const uint8_t* src = reinterpret_cast(StartPointer); - for (size_t v = 0ull; v < batchVertices; ++v) + switch (layout) { - std::memcpy(posBase, src, 3ull * floatBytes); - src += 3ull * floatBytes; - posBase += posStride; - - if (layout == ELayoutKind::XYZ_N || layout == ELayoutKind::XYZ_N_UV) + case ELayoutKind::XYZ: { - std::memcpy(normalBase, src, 3ull * floatBytes); - src += 3ull * floatBytes; - normalBase += normalStride; + for (size_t v = 0ull; v < batchVertices; ++v) + { + std::memcpy(posBase, src, 3ull * floatBytes); + src += 3ull * floatBytes; + posBase += posStride; + } } - - if (layout == ELayoutKind::XYZ_N_UV) + break; + case ELayoutKind::XYZ_N: + { + for (size_t v = 0ull; v < batchVertices; ++v) + { + std::memcpy(posBase, src, 3ull * floatBytes); + src += 3ull * floatBytes; + posBase += posStride; + std::memcpy(normalBase, src, 3ull * floatBytes); + src += 3ull * floatBytes; + normalBase += normalStride; + } + } + break; + case ELayoutKind::XYZ_N_UV: { - std::memcpy(uvBase, src, 2ull * floatBytes); - src += 2ull * floatBytes; - uvBase += uvStride; + for (size_t v = 0ull; v < batchVertices; ++v) + { + std::memcpy(posBase, src, 3ull * floatBytes); + src += 3ull * floatBytes; + posBase += posStride; + std::memcpy(normalBase, src, 3ull * floatBytes); + src += 3ull * floatBytes; + normalBase += normalStride; + std::memcpy(uvBase, src, 2ull * floatBytes); + src += 2ull * floatBytes; + uvBase += uvStride; + } } + break; } const size_t consumed = batchVertices * srcBytesPerVertex; @@ -919,6 +941,44 @@ struct SContext if (is32Bit) { if (isSrcU32) + { + if (trackMaxIndex) + { + for (size_t j = 0u; j < element.Count; ++j) + { + const uint8_t c = *ptr++; + if (c != 3u) + { + fallbackToGeneric = true; + break; + } + std::memcpy(out, ptr, 3ull * sizeof(uint32_t)); + ptr += 3ull * sizeof(uint32_t); + if (out[0] > _maxIndex) _maxIndex = out[0]; + if (out[1] > _maxIndex) _maxIndex = out[1]; + if (out[2] > _maxIndex) _maxIndex = out[2]; + out += 3; + } + } + else + { + for (size_t j = 0u; j < element.Count; ++j) + { + const uint8_t c = *ptr++; + if (c != 3u) + { + fallbackToGeneric = true; + break; + } + std::memcpy(out, ptr, 3ull * sizeof(uint32_t)); + ptr += 3ull * sizeof(uint32_t); + if (out[0] >= vertexCount || out[1] >= vertexCount || out[2] >= vertexCount) + return EFastFaceReadResult::Error; + out += 3; + } + } + } + else if (trackMaxIndex) { for (size_t j = 0u; j < element.Count; ++j) { @@ -930,16 +990,11 @@ struct SContext } std::memcpy(out, ptr, 3ull * sizeof(uint32_t)); ptr += 3ull * sizeof(uint32_t); - if (trackMaxIndex) - { - if (out[0] > _maxIndex) _maxIndex = out[0]; - if (out[1] > _maxIndex) _maxIndex = out[1]; - if (out[2] > _maxIndex) _maxIndex = out[2]; - } - else if (out[0] >= vertexCount || out[1] >= vertexCount || out[2] >= vertexCount) - { + if ((out[0] | out[1] | out[2]) & 0x80000000u) return EFastFaceReadResult::Error; - } + if (out[0] > _maxIndex) _maxIndex = out[0]; + if (out[1] > _maxIndex) _maxIndex = out[1]; + if (out[2] > _maxIndex) _maxIndex = out[2]; out += 3; } } @@ -957,23 +1012,61 @@ struct SContext ptr += 3ull * sizeof(uint32_t); if ((out[0] | out[1] | out[2]) & 0x80000000u) return EFastFaceReadResult::Error; - if (trackMaxIndex) + if (out[0] >= vertexCount || out[1] >= vertexCount || out[2] >= vertexCount) + return EFastFaceReadResult::Error; + out += 3; + } + } + } + else + { + if (isSrcU16) + { + if (trackMaxIndex) + { + for (size_t j = 0u; j < element.Count; ++j) { + const uint8_t c = *ptr++; + if (c != 3u) + { + fallbackToGeneric = true; + break; + } + uint16_t tri[3] = {}; + std::memcpy(tri, ptr, sizeof(tri)); + ptr += sizeof(tri); + out[0] = tri[0]; + out[1] = tri[1]; + out[2] = tri[2]; if (out[0] > _maxIndex) _maxIndex = out[0]; if (out[1] > _maxIndex) _maxIndex = out[1]; if (out[2] > _maxIndex) _maxIndex = out[2]; + out += 3; } - else if (out[0] >= vertexCount || out[1] >= vertexCount || out[2] >= vertexCount) + } + else + { + for (size_t j = 0u; j < element.Count; ++j) { - return EFastFaceReadResult::Error; + const uint8_t c = *ptr++; + if (c != 3u) + { + fallbackToGeneric = true; + break; + } + uint16_t tri[3] = {}; + std::memcpy(tri, ptr, sizeof(tri)); + ptr += sizeof(tri); + out[0] = tri[0]; + out[1] = tri[1]; + out[2] = tri[2]; + if (out[0] >= vertexCount || out[1] >= vertexCount || out[2] >= vertexCount) + return EFastFaceReadResult::Error; + out += 3; } - out += 3; } } - } - else - { - if (isSrcU16) + else if (trackMaxIndex) { for (size_t j = 0u; j < element.Count; ++j) { @@ -983,22 +1076,17 @@ struct SContext fallbackToGeneric = true; break; } - uint16_t tri[3] = {}; + int16_t tri[3] = {}; std::memcpy(tri, ptr, sizeof(tri)); ptr += sizeof(tri); - out[0] = tri[0]; - out[1] = tri[1]; - out[2] = tri[2]; - if (trackMaxIndex) - { - if (out[0] > _maxIndex) _maxIndex = out[0]; - if (out[1] > _maxIndex) _maxIndex = out[1]; - if (out[2] > _maxIndex) _maxIndex = out[2]; - } - else if (out[0] >= vertexCount || out[1] >= vertexCount || out[2] >= vertexCount) - { + if ((static_cast(tri[0]) | static_cast(tri[1]) | static_cast(tri[2])) & 0x8000u) return EFastFaceReadResult::Error; - } + out[0] = static_cast(tri[0]); + out[1] = static_cast(tri[1]); + out[2] = static_cast(tri[2]); + if (out[0] > _maxIndex) _maxIndex = out[0]; + if (out[1] > _maxIndex) _maxIndex = out[1]; + if (out[2] > _maxIndex) _maxIndex = out[2]; out += 3; } } @@ -1020,16 +1108,8 @@ struct SContext out[0] = static_cast(tri[0]); out[1] = static_cast(tri[1]); out[2] = static_cast(tri[2]); - if (trackMaxIndex) - { - if (out[0] > _maxIndex) _maxIndex = out[0]; - if (out[1] > _maxIndex) _maxIndex = out[1]; - if (out[2] > _maxIndex) _maxIndex = out[2]; - } - else if (out[0] >= vertexCount || out[1] >= vertexCount || out[2] >= vertexCount) - { + if (out[0] >= vertexCount || out[1] >= vertexCount || out[2] >= vertexCount) return EFastFaceReadResult::Error; - } out += 3; } } diff --git a/src/nbl/asset/interchange/CSTLMeshWriter.cpp b/src/nbl/asset/interchange/CSTLMeshWriter.cpp index 7a6761de3d..cea0fbfab9 100644 --- a/src/nbl/asset/interchange/CSTLMeshWriter.cpp +++ b/src/nbl/asset/interchange/CSTLMeshWriter.cpp @@ -6,11 +6,13 @@ #include "CSTLMeshWriter.h" -#include #include +#include +#include #include #include #include +#include #include #include #include @@ -69,6 +71,7 @@ constexpr size_t BinaryTriangleAttributeBytes = sizeof(uint16_t); constexpr size_t BinaryTriangleRecordBytes = BinaryTriangleFloatBytes + BinaryTriangleAttributeBytes; constexpr size_t BinaryPrefixBytes = BinaryHeaderBytes + BinaryTriangleCountBytes; constexpr size_t IoFallbackReserveBytes = 1ull << 20; +constexpr size_t AsciiFaceTextMaxBytes = 1024ull; constexpr char AsciiSolidPrefix[] = "solid "; constexpr char AsciiEndSolidPrefix[] = "endsolid "; constexpr char AsciiDefaultName[] = "nabla_mesh"; @@ -86,7 +89,6 @@ bool decodeTriangle(const ICPUPolygonGeometry* geom, const IPolygonGeometryBase: bool decodeTriangleNormal(const ICPUPolygonGeometry::SDataView& normalView, const uint32_t* idx, core::vectorSIMDf& outNormal); bool writeMeshBinary(const asset::ICPUPolygonGeometry* geom, SContext* context); bool writeMeshASCII(const asset::ICPUPolygonGeometry* geom, SContext* context); -void getVectorAsStringLine(const core::vectorSIMDf& v, std::string& s); bool writeFaceText( const core::vectorSIMDf& v1, const core::vectorSIMDf& v2, @@ -96,6 +98,10 @@ bool writeFaceText( const bool flipHandedness, SContext* context); +char* appendFloatFixed6ToBuffer(char* dst, char* const end, const float value); +bool appendLiteral(char*& cursor, char* const end, const char* text, const size_t textSize); +bool appendVectorAsAsciiLine(char*& cursor, char* const end, const core::vectorSIMDf& v); + CSTLMeshWriter::CSTLMeshWriter() { #ifdef _NBL_DEBUG @@ -339,6 +345,48 @@ bool writeBufferWithPolicy(system::IFile* file, const SResolvedFileIOPolicy& ioP } } +char* appendFloatFixed6ToBuffer(char* dst, char* const end, const float value) +{ + if (!dst || dst >= end) + return end; + + const auto result = std::to_chars(dst, end, value, std::chars_format::fixed, 6); + if (result.ec == std::errc()) + return result.ptr; + + const int written = std::snprintf(dst, static_cast(end - dst), "%.6f", static_cast(value)); + if (written <= 0) + return dst; + const size_t writeLen = static_cast(written); + return (writeLen < static_cast(end - dst)) ? (dst + writeLen) : end; +} + +bool appendLiteral(char*& cursor, char* const end, const char* text, const size_t textSize) +{ + if (!cursor || cursor + textSize > end) + return false; + std::memcpy(cursor, text, textSize); + cursor += textSize; + return true; +} + +bool appendVectorAsAsciiLine(char*& cursor, char* const end, const core::vectorSIMDf& v) +{ + cursor = appendFloatFixed6ToBuffer(cursor, end, v.X); + if (cursor >= end) + return false; + *(cursor++) = ' '; + cursor = appendFloatFixed6ToBuffer(cursor, end, v.Y); + if (cursor >= end) + return false; + *(cursor++) = ' '; + cursor = appendFloatFixed6ToBuffer(cursor, end, v.Z); + if (cursor >= end) + return false; + *(cursor++) = '\n'; + return true; +} + const hlsl::float32_t3* getTightFloat3View(const ICPUPolygonGeometry::SDataView& view) { if (!view) @@ -529,68 +577,112 @@ bool writeMeshBinary(const asset::ICPUPolygonGeometry* geom, SContext* context) const bool hasFastTightPath = (indices == nullptr) && (tightPositions != nullptr) && (!hasNormals || (tightNormals != nullptr)); if (hasFastTightPath && hasNormals) { + bool allFastNormalsNonZero = true; + const size_t normalCount = static_cast(facenum) * 3ull; + for (size_t i = 0ull; i < normalCount; ++i) + { + const auto& n = tightNormals[i]; + if (n.x == 0.f && n.y == 0.f && n.z == 0.f) + { + allFastNormalsNonZero = false; + break; + } + } + const hlsl::float32_t3* posTri = tightPositions; const hlsl::float32_t3* nrmTri = tightNormals; - for (uint32_t primIx = 0u; primIx < facenum; ++primIx, posTri += 3u, nrmTri += 3u) + if (allFastNormalsNonZero) { - const hlsl::float32_t3 vertex1 = posTri[2u]; - const hlsl::float32_t3 vertex2 = posTri[1u]; - const hlsl::float32_t3 vertex3 = posTri[0u]; - const float vertex1x = vertex1.x * handednessSign; - const float vertex2x = vertex2.x * handednessSign; - const float vertex3x = vertex3.x * handednessSign; - - float normalX = 0.f; - float normalY = 0.f; - float normalZ = 0.f; - hlsl::float32_t3 attrNormal = nrmTri[0u]; - if (attrNormal.x == 0.f && attrNormal.y == 0.f && attrNormal.z == 0.f) - attrNormal = nrmTri[1u]; - if (attrNormal.x == 0.f && attrNormal.y == 0.f && attrNormal.z == 0.f) - attrNormal = nrmTri[2u]; - if (!(attrNormal.x == 0.f && attrNormal.y == 0.f && attrNormal.z == 0.f)) + for (uint32_t primIx = 0u; primIx < facenum; ++primIx, posTri += 3u, nrmTri += 3u) { + const hlsl::float32_t3 vertex1 = posTri[2u]; + const hlsl::float32_t3 vertex2 = posTri[1u]; + const hlsl::float32_t3 vertex3 = posTri[0u]; + const float vertex1x = vertex1.x * handednessSign; + const float vertex2x = vertex2.x * handednessSign; + const float vertex3x = vertex3.x * handednessSign; + + hlsl::float32_t3 attrNormal = nrmTri[0u]; if (flipHandedness) attrNormal.x = -attrNormal.x; - normalX = attrNormal.x; - normalY = attrNormal.y; - normalZ = attrNormal.z; - } - if (normalX == 0.f && normalY == 0.f && normalZ == 0.f) + const float packedData[12] = { + attrNormal.x, attrNormal.y, attrNormal.z, + vertex1x, vertex1.y, vertex1.z, + vertex2x, vertex2.y, vertex2.z, + vertex3x, vertex3.y, vertex3.z + }; + std::memcpy(dst, packedData, sizeof(packedData)); + dst += sizeof(packedData); + + const uint16_t color = 0u; + std::memcpy(dst, &color, sizeof(color)); + dst += sizeof(color); + } + } + else + { + for (uint32_t primIx = 0u; primIx < facenum; ++primIx, posTri += 3u, nrmTri += 3u) { - const float edge21x = vertex2x - vertex1x; - const float edge21y = vertex2.y - vertex1.y; - const float edge21z = vertex2.z - vertex1.z; - const float edge31x = vertex3x - vertex1x; - const float edge31y = vertex3.y - vertex1.y; - const float edge31z = vertex3.z - vertex1.z; - - normalX = edge21y * edge31z - edge21z * edge31y; - normalY = edge21z * edge31x - edge21x * edge31z; - normalZ = edge21x * edge31y - edge21y * edge31x; - const float planeNormalLen2 = normalX * normalX + normalY * normalY + normalZ * normalZ; - if (planeNormalLen2 > 0.f) + const hlsl::float32_t3 vertex1 = posTri[2u]; + const hlsl::float32_t3 vertex2 = posTri[1u]; + const hlsl::float32_t3 vertex3 = posTri[0u]; + const float vertex1x = vertex1.x * handednessSign; + const float vertex2x = vertex2.x * handednessSign; + const float vertex3x = vertex3.x * handednessSign; + + float normalX = 0.f; + float normalY = 0.f; + float normalZ = 0.f; + hlsl::float32_t3 attrNormal = nrmTri[0u]; + if (attrNormal.x == 0.f && attrNormal.y == 0.f && attrNormal.z == 0.f) + attrNormal = nrmTri[1u]; + if (attrNormal.x == 0.f && attrNormal.y == 0.f && attrNormal.z == 0.f) + attrNormal = nrmTri[2u]; + if (!(attrNormal.x == 0.f && attrNormal.y == 0.f && attrNormal.z == 0.f)) { - const float invLen = 1.f / std::sqrt(planeNormalLen2); - normalX *= invLen; - normalY *= invLen; - normalZ *= invLen; + if (flipHandedness) + attrNormal.x = -attrNormal.x; + normalX = attrNormal.x; + normalY = attrNormal.y; + normalZ = attrNormal.z; } - } - const float packedData[12] = { - normalX, normalY, normalZ, - vertex1x, vertex1.y, vertex1.z, - vertex2x, vertex2.y, vertex2.z, - vertex3x, vertex3.y, vertex3.z - }; - std::memcpy(dst, packedData, sizeof(packedData)); - dst += sizeof(packedData); + if (normalX == 0.f && normalY == 0.f && normalZ == 0.f) + { + const float edge21x = vertex2x - vertex1x; + const float edge21y = vertex2.y - vertex1.y; + const float edge21z = vertex2.z - vertex1.z; + const float edge31x = vertex3x - vertex1x; + const float edge31y = vertex3.y - vertex1.y; + const float edge31z = vertex3.z - vertex1.z; + + normalX = edge21y * edge31z - edge21z * edge31y; + normalY = edge21z * edge31x - edge21x * edge31z; + normalZ = edge21x * edge31y - edge21y * edge31x; + const float planeNormalLen2 = normalX * normalX + normalY * normalY + normalZ * normalZ; + if (planeNormalLen2 > 0.f) + { + const float invLen = 1.f / std::sqrt(planeNormalLen2); + normalX *= invLen; + normalY *= invLen; + normalZ *= invLen; + } + } - const uint16_t color = 0u; - std::memcpy(dst, &color, sizeof(color)); - dst += sizeof(color); + const float packedData[12] = { + normalX, normalY, normalZ, + vertex1x, vertex1.y, vertex1.z, + vertex2x, vertex2.y, vertex2.z, + vertex3x, vertex3.y, vertex3.z + }; + std::memcpy(dst, packedData, sizeof(packedData)); + dst += sizeof(packedData); + + const uint16_t color = 0u; + std::memcpy(dst, &color, sizeof(color)); + dst += sizeof(color); + } } } else if (hasFastTightPath) @@ -788,13 +880,6 @@ bool writeMeshASCII(const asset::ICPUPolygonGeometry* geom, SContext* context) return true; } -void getVectorAsStringLine(const core::vectorSIMDf& v, std::string& s) -{ - std::ostringstream tmp; - tmp << v.X << " " << v.Y << " " << v.Z << "\n"; - s = std::string(tmp.str().c_str()); -} - bool writeFaceText( const core::vectorSIMDf& v1, const core::vectorSIMDf& v2, @@ -807,7 +892,6 @@ bool writeFaceText( core::vectorSIMDf vertex1 = v3; core::vectorSIMDf vertex2 = v2; core::vectorSIMDf vertex3 = v1; - std::string tmp; if (flipHandedness) { @@ -827,44 +911,33 @@ bool writeFaceText( normal = attrNormal; } - if (!writeBytes(context, "facet normal ", sizeof("facet normal ") - 1ull)) + std::array faceText = {}; + char* cursor = faceText.data(); + char* const end = faceText.data() + faceText.size(); + if (!appendLiteral(cursor, end, "facet normal ", sizeof("facet normal ") - 1ull)) return false; - - getVectorAsStringLine(normal, tmp); - if (!writeBytes(context, tmp.c_str(), tmp.size())) + if (!appendVectorAsAsciiLine(cursor, end, normal)) return false; - - if (!writeBytes(context, " outer loop\n", sizeof(" outer loop\n") - 1ull)) + if (!appendLiteral(cursor, end, " outer loop\n", sizeof(" outer loop\n") - 1ull)) return false; - - if (!writeBytes(context, " vertex ", sizeof(" vertex ") - 1ull)) + if (!appendLiteral(cursor, end, " vertex ", sizeof(" vertex ") - 1ull)) return false; - - getVectorAsStringLine(vertex1, tmp); - if (!writeBytes(context, tmp.c_str(), tmp.size())) + if (!appendVectorAsAsciiLine(cursor, end, vertex1)) return false; - - if (!writeBytes(context, " vertex ", sizeof(" vertex ") - 1ull)) + if (!appendLiteral(cursor, end, " vertex ", sizeof(" vertex ") - 1ull)) return false; - - getVectorAsStringLine(vertex2, tmp); - if (!writeBytes(context, tmp.c_str(), tmp.size())) + if (!appendVectorAsAsciiLine(cursor, end, vertex2)) return false; - - if (!writeBytes(context, " vertex ", sizeof(" vertex ") - 1ull)) + if (!appendLiteral(cursor, end, " vertex ", sizeof(" vertex ") - 1ull)) return false; - - getVectorAsStringLine(vertex3, tmp); - if (!writeBytes(context, tmp.c_str(), tmp.size())) + if (!appendVectorAsAsciiLine(cursor, end, vertex3)) return false; - - if (!writeBytes(context, " endloop\n", sizeof(" endloop\n") - 1ull)) + if (!appendLiteral(cursor, end, " endloop\n", sizeof(" endloop\n") - 1ull)) return false; - - if (!writeBytes(context, "endfacet\n", sizeof("endfacet\n") - 1ull)) + if (!appendLiteral(cursor, end, "endfacet\n", sizeof("endfacet\n") - 1ull)) return false; - return true; + return writeBytes(context, faceText.data(), static_cast(cursor - faceText.data())); } } From 307f601dab2be9f416ebcb3c794572ed9c5b32f4 Mon Sep 17 00:00:00 2001 From: Arkadiusz Lachowicz Date: Sun, 8 Feb 2026 20:22:36 +0100 Subject: [PATCH 011/118] Optimize loader selection and mesh IO hot paths --- examples_tests | 2 +- src/nbl/asset/IAssetManager.cpp | 14 ++-- .../asset/interchange/CPLYMeshFileLoader.cpp | 42 +++++++++--- src/nbl/asset/interchange/CSTLMeshWriter.cpp | 66 +++++++++---------- 4 files changed, 73 insertions(+), 51 deletions(-) diff --git a/examples_tests b/examples_tests index 99454acc4f..3335a72819 160000 --- a/examples_tests +++ b/examples_tests @@ -1 +1 @@ -Subproject commit 99454acc4f7dd20cd45b1cad256a94efacdf5b93 +Subproject commit 3335a72819fdf6928052a97c6109e7afa888bed0 diff --git a/src/nbl/asset/IAssetManager.cpp b/src/nbl/asset/IAssetManager.cpp index 3eaaa2b42e..f1b61fb470 100644 --- a/src/nbl/asset/IAssetManager.cpp +++ b/src/nbl/asset/IAssetManager.cpp @@ -8,6 +8,7 @@ #include "nbl/asset/interchange/CHLSLLoader.h" #include "nbl/asset/interchange/CSPVLoader.h" +#include #include #include @@ -227,15 +228,20 @@ SAssetBundle IAssetManager::getAssetInHierarchy_impl(system::IFile* _file, const auto ext = system::extension_wo_dot(filename); auto capableLoadersRng = m_loaders.perFileExt.findRange(ext); - // loaders associated with the file's extension tryout + core::vector extensionLoaders; + extensionLoaders.reserve(8u); for (auto& loader : capableLoadersRng) { - if (loader.second->isALoadableFileFormat(file.get()) && !(bundle = loader.second->loadAsset(file.get(), params, _override, _hierarchyLevel)).getContents().empty()) + extensionLoaders.push_back(loader.second); + if (!(bundle = loader.second->loadAsset(file.get(), params, _override, _hierarchyLevel)).getContents().empty()) break; } - for (auto loaderItr = std::begin(m_loaders.vector); bundle.getContents().empty() && loaderItr != std::end(m_loaders.vector); ++loaderItr) // all loaders tryout + for (auto loaderItr = std::begin(m_loaders.vector); bundle.getContents().empty() && loaderItr != std::end(m_loaders.vector); ++loaderItr) { - if ((*loaderItr)->isALoadableFileFormat(file.get()) && !(bundle = (*loaderItr)->loadAsset(file.get(), params, _override, _hierarchyLevel)).getContents().empty()) + auto* loader = loaderItr->get(); + if (std::find(extensionLoaders.begin(), extensionLoaders.end(), loader) != extensionLoaders.end()) + continue; + if (loader->isALoadableFileFormat(file.get()) && !(bundle = loader->loadAsset(file.get(), params, _override, _hierarchyLevel)).getContents().empty()) break; } diff --git a/src/nbl/asset/interchange/CPLYMeshFileLoader.cpp b/src/nbl/asset/interchange/CPLYMeshFileLoader.cpp index 7c515ebd0e..f6b30640c6 100644 --- a/src/nbl/asset/interchange/CPLYMeshFileLoader.cpp +++ b/src/nbl/asset/interchange/CPLYMeshFileLoader.cpp @@ -790,10 +790,11 @@ struct SContext it.ptr += it.stride; } } - bool readFace(const SElement& Element, core::vector& _outIndices, uint32_t& _maxIndex) + bool readFace(const SElement& Element, core::vector& _outIndices, uint32_t& _maxIndex, const uint32_t vertexCount) { if (!IsBinaryFile) getNextLine(); + const bool hasVertexCount = vertexCount != 0u; for (const auto& prop : Element.Properties) { @@ -809,12 +810,20 @@ struct SContext } if (count > 3u) _outIndices.reserve(_outIndices.size() + static_cast(count - 2u) * 3ull); - auto emitFan = [&_outIndices, &_maxIndex](auto&& readIndex, const uint32_t faceVertexCount)->void + auto emitFan = [&_outIndices, &_maxIndex, hasVertexCount, vertexCount](auto&& readIndex, const uint32_t faceVertexCount)->bool { uint32_t i0 = readIndex(); uint32_t i1 = readIndex(); uint32_t i2 = readIndex(); - _maxIndex = std::max(_maxIndex, std::max(i0, std::max(i1, i2))); + if (hasVertexCount) + { + if (i0 >= vertexCount || i1 >= vertexCount || i2 >= vertexCount) + return false; + } + else + { + _maxIndex = std::max(_maxIndex, std::max(i0, std::max(i1, i2))); + } _outIndices.push_back(i0); _outIndices.push_back(i1); _outIndices.push_back(i2); @@ -822,12 +831,21 @@ struct SContext for (uint32_t j = 3u; j < faceVertexCount; ++j) { const uint32_t idx = readIndex(); - _maxIndex = std::max(_maxIndex, idx); + if (hasVertexCount) + { + if (idx >= vertexCount) + return false; + } + else + { + _maxIndex = std::max(_maxIndex, idx); + } _outIndices.push_back(i0); _outIndices.push_back(prev); _outIndices.push_back(idx); prev = idx; } + return true; }; if (IsBinaryFile && !IsWrongEndian && srcIndexFmt == EF_R32_UINT) @@ -845,7 +863,8 @@ struct SContext ptr += sizeof(v); return v; }; - emitFan(readIndex, count); + if (!emitFan(readIndex, count)) + return false; StartPointer = reinterpret_cast(const_cast(ptr)); continue; } @@ -865,7 +884,8 @@ struct SContext ptr += sizeof(v); return static_cast(v); }; - emitFan(readIndex, count); + if (!emitFan(readIndex, count)) + return false; StartPointer = reinterpret_cast(const_cast(ptr)); continue; } @@ -875,7 +895,8 @@ struct SContext { return static_cast(getInt(srcIndexFmt)); }; - emitFan(readIndex, count); + if (!emitFan(readIndex, count)) + return false; } else if (prop.Name == "intensity") { @@ -919,7 +940,7 @@ struct SContext const bool is32Bit = isSrcU32 || isSrcS32; const size_t indexSize = is32Bit ? sizeof(uint32_t) : sizeof(uint16_t); const bool hasVertexCount = vertexCount != 0u; - const bool trackMaxIndex = !hasVertexCount || vertexCount <= std::numeric_limits::max(); + const bool trackMaxIndex = !hasVertexCount; const size_t minTriangleRecordSize = sizeof(uint8_t) + indexSize * 3u; if (element.Count > (std::numeric_limits::max() / minTriangleRecordSize)) return EFastFaceReadResult::Error; @@ -1752,7 +1773,7 @@ SAssetBundle CPLYMeshFileLoader::loadAsset(system::IFile* _file, const IAssetLoa indices.reserve(indices.size() + el.Count * 3u); for (size_t j=0; jsetIndexing(IPolygonGeometryBase::TriangleList()); - if (vertCount <= std::numeric_limits::max() && maxIndexRead <= std::numeric_limits::max()) + const bool canUseU16 = (vertCount != 0u) ? (vertCount <= std::numeric_limits::max()) : (maxIndexRead <= std::numeric_limits::max()); + if (canUseU16) { core::vector indices16(indices.size()); for (size_t i = 0u; i < indices.size(); ++i) diff --git a/src/nbl/asset/interchange/CSTLMeshWriter.cpp b/src/nbl/asset/interchange/CSTLMeshWriter.cpp index cea0fbfab9..ba1fe0e366 100644 --- a/src/nbl/asset/interchange/CSTLMeshWriter.cpp +++ b/src/nbl/asset/interchange/CSTLMeshWriter.cpp @@ -69,6 +69,14 @@ constexpr size_t BinaryTriangleFloatCount = 12ull; constexpr size_t BinaryTriangleFloatBytes = sizeof(float) * BinaryTriangleFloatCount; constexpr size_t BinaryTriangleAttributeBytes = sizeof(uint16_t); constexpr size_t BinaryTriangleRecordBytes = BinaryTriangleFloatBytes + BinaryTriangleAttributeBytes; +#pragma pack(push, 1) +struct SBinaryTriangleRecord +{ + float payload[BinaryTriangleFloatCount]; + uint16_t attribute = 0u; +}; +#pragma pack(pop) +static_assert(sizeof(SBinaryTriangleRecord) == BinaryTriangleRecordBytes); constexpr size_t BinaryPrefixBytes = BinaryHeaderBytes + BinaryTriangleCountBytes; constexpr size_t IoFallbackReserveBytes = 1ull << 20; constexpr size_t AsciiFaceTextMaxBytes = 1024ull; @@ -573,6 +581,20 @@ bool writeMeshBinary(const asset::ICPUPolygonGeometry* geom, SContext* context) } return normalView.decodeElement(ix, out); }; + auto writeRecord = [&dst](const float nx, const float ny, const float nz, const float v1x, const float v1y, const float v1z, const float v2x, const float v2y, const float v2z, const float v3x, const float v3y, const float v3z)->void + { + const stl_writer_detail::SBinaryTriangleRecord record = { + { + nx, ny, nz, + v1x, v1y, v1z, + v2x, v2y, v2z, + v3x, v3y, v3z + }, + 0u + }; + std::memcpy(dst, &record, sizeof(record)); + dst += sizeof(record); + }; const bool hasFastTightPath = (indices == nullptr) && (tightPositions != nullptr) && (!hasNormals || (tightNormals != nullptr)); if (hasFastTightPath && hasNormals) @@ -606,18 +628,11 @@ bool writeMeshBinary(const asset::ICPUPolygonGeometry* geom, SContext* context) if (flipHandedness) attrNormal.x = -attrNormal.x; - const float packedData[12] = { + writeRecord( attrNormal.x, attrNormal.y, attrNormal.z, vertex1x, vertex1.y, vertex1.z, vertex2x, vertex2.y, vertex2.z, - vertex3x, vertex3.y, vertex3.z - }; - std::memcpy(dst, packedData, sizeof(packedData)); - dst += sizeof(packedData); - - const uint16_t color = 0u; - std::memcpy(dst, &color, sizeof(color)); - dst += sizeof(color); + vertex3x, vertex3.y, vertex3.z); } } else @@ -670,18 +685,11 @@ bool writeMeshBinary(const asset::ICPUPolygonGeometry* geom, SContext* context) } } - const float packedData[12] = { + writeRecord( normalX, normalY, normalZ, vertex1x, vertex1.y, vertex1.z, vertex2x, vertex2.y, vertex2.z, - vertex3x, vertex3.y, vertex3.z - }; - std::memcpy(dst, packedData, sizeof(packedData)); - dst += sizeof(packedData); - - const uint16_t color = 0u; - std::memcpy(dst, &color, sizeof(color)); - dst += sizeof(color); + vertex3x, vertex3.y, vertex3.z); } } } @@ -716,18 +724,11 @@ bool writeMeshBinary(const asset::ICPUPolygonGeometry* geom, SContext* context) normalZ *= invLen; } - const float packedData[12] = { + writeRecord( normalX, normalY, normalZ, vertex1x, vertex1.y, vertex1.z, vertex2x, vertex2.y, vertex2.z, - vertex3x, vertex3.y, vertex3.z - }; - std::memcpy(dst, packedData, sizeof(packedData)); - dst += sizeof(packedData); - - const uint16_t color = 0u; - std::memcpy(dst, &color, sizeof(color)); - dst += sizeof(color); + vertex3x, vertex3.y, vertex3.z); } } else @@ -802,18 +803,11 @@ bool writeMeshBinary(const asset::ICPUPolygonGeometry* geom, SContext* context) } } - const float packedData[12] = { + writeRecord( normal.x, normal.y, normal.z, vertex1.x, vertex1.y, vertex1.z, vertex2.x, vertex2.y, vertex2.z, - vertex3.x, vertex3.y, vertex3.z - }; - std::memcpy(dst, packedData, sizeof(packedData)); - dst += sizeof(packedData); - - const uint16_t color = 0u; - std::memcpy(dst, &color, sizeof(color)); - dst += sizeof(color); + vertex3.x, vertex3.y, vertex3.z); } } From 335a2cbb98c71b2114e036b35d726f068a5f1412 Mon Sep 17 00:00:00 2001 From: Arkadiusz Lachowicz Date: Mon, 9 Feb 2026 15:33:26 +0100 Subject: [PATCH 012/118] Optimize loader hot paths and hashing --- include/nbl/asset/IAssetManager.h | 3 + .../nbl/asset/interchange/IGeometryLoader.h | 4 +- src/nbl/asset/interchange/CGLTFLoader.cpp | 2 +- .../asset/interchange/COBJMeshFileLoader.cpp | 642 +++++++++++++----- .../asset/interchange/CPLYMeshFileLoader.cpp | 214 +++++- .../asset/interchange/CSTLMeshFileLoader.cpp | 360 +++++++--- src/nbl/system/CSystemWin32.cpp | 54 +- 7 files changed, 986 insertions(+), 293 deletions(-) diff --git a/include/nbl/asset/IAssetManager.h b/include/nbl/asset/IAssetManager.h index 2105b6c4fe..22f61e848b 100644 --- a/include/nbl/asset/IAssetManager.h +++ b/include/nbl/asset/IAssetManager.h @@ -190,6 +190,9 @@ class NBL_API2 IAssetManager : public core::IReferenceCounted } system::ISystem::future_t> future; + m_system->createFile(future, filePath, static_cast(system::IFile::ECF_READ | system::IFile::ECF_MAPPABLE)); + if (auto file=future.acquire()) + return getAssetInHierarchy_impl(file->get(), filePath.string(), ctx.params, _hierarchyLevel, _override); m_system->createFile(future, filePath, system::IFile::ECF_READ); if (auto file=future.acquire()) return getAssetInHierarchy_impl(file->get(), filePath.string(), ctx.params, _hierarchyLevel, _override); diff --git a/include/nbl/asset/interchange/IGeometryLoader.h b/include/nbl/asset/interchange/IGeometryLoader.h index 6e6c7c4e26..01a7342e02 100644 --- a/include/nbl/asset/interchange/IGeometryLoader.h +++ b/include/nbl/asset/interchange/IGeometryLoader.h @@ -64,7 +64,7 @@ class IGeometryLoader : public IAssetLoader inline void deallocate(void* p, std::size_t bytes, std::size_t alignment) override { assert(m_file); - auto* const basePtr = reinterpret_cast(m_file->getMappedPointer()); + auto* const basePtr = reinterpret_cast(static_cast(m_file.get())->getMappedPointer()); assert(basePtr && basePtr<=p && p<=basePtr+m_file->getSize()); } @@ -73,7 +73,7 @@ class IGeometryLoader : public IAssetLoader }; static inline IGeometry::SDataView createView(const E_FORMAT format, const size_t elementCount, core::smart_refctd_ptr&& file, const size_t offsetInFile) { - if (auto* const basePtr=reinterpret_cast(file->getMappedPointer()); basePtr) + if (auto* const basePtr=reinterpret_cast(static_cast(file.get())->getMappedPointer()); basePtr) { auto resource = core::make_smart_refctd_ptr(std::move(file)); auto* const data = basePtr+offsetInFile; diff --git a/src/nbl/asset/interchange/CGLTFLoader.cpp b/src/nbl/asset/interchange/CGLTFLoader.cpp index fde9552179..d0941103f6 100644 --- a/src/nbl/asset/interchange/CGLTFLoader.cpp +++ b/src/nbl/asset/interchange/CGLTFLoader.cpp @@ -60,7 +60,7 @@ using namespace nbl::asset; core::smart_refctd_ptr glslFile = loadBuiltinData(decltype(constexprStringType)::value); auto glsl = asset::ICPUBuffer::create({ glslFile->getSize() }); - memcpy(glsl->getPointer(),glslFile->getMappedPointer(),glsl->getSize()); + memcpy(glsl->getPointer(),static_cast(glslFile.get())->getMappedPointer(),glsl->getSize()); auto unspecializedShader = core::make_smart_refctd_ptr(std::move(glsl), stage, asset::ICPUShader::E_CONTENT_TYPE::ECT_GLSL, stage != ICPUShader::ESS_VERTEX ? "?IrrlichtBAW glTFLoader FragmentShader?" : "?IrrlichtBAW glTFLoader VertexShader?"); if (extraDefine) diff --git a/src/nbl/asset/interchange/COBJMeshFileLoader.cpp b/src/nbl/asset/interchange/COBJMeshFileLoader.cpp index f4e8e93303..d3b944f0c3 100644 --- a/src/nbl/asset/interchange/COBJMeshFileLoader.cpp +++ b/src/nbl/asset/interchange/COBJMeshFileLoader.cpp @@ -15,7 +15,6 @@ #include "COBJMeshFileLoader.h" #include -#include #include #include #include @@ -30,27 +29,12 @@ namespace nbl::asset namespace { -struct ObjVertexKey +struct ObjVertexDedupNode { - int32_t pos; - int32_t uv; - int32_t normal; - - bool operator==(const ObjVertexKey& other) const - { - return pos == other.pos && uv == other.uv && normal == other.normal; - } -}; - -struct ObjVertexKeyHash -{ - size_t operator()(const ObjVertexKey& key) const noexcept - { - const uint32_t p = static_cast(key.pos); - const uint32_t t = static_cast(key.uv); - const uint32_t n = static_cast(key.normal); - return static_cast((p * 73856093u) ^ (t * 19349663u) ^ (n * 83492791u)); - } + int32_t uv = -1; + int32_t normal = -1; + uint32_t outIndex = 0u; + int32_t next = -1; }; struct SFileReadTelemetry @@ -84,6 +68,76 @@ using Float2 = hlsl::float32_t2; static_assert(sizeof(Float3) == sizeof(float) * 3ull); static_assert(sizeof(Float2) == sizeof(float) * 2ull); +NBL_FORCE_INLINE bool isObjInlineWhitespace(const char c) +{ + return c == ' ' || c == '\t' || c == '\v' || c == '\f'; +} + +NBL_FORCE_INLINE bool isObjDigit(const char c) +{ + return c >= '0' && c <= '9'; +} + +NBL_FORCE_INLINE bool parseObjFloat(const char*& ptr, const char* const end, float& out) +{ + const char* const start = ptr; + if (start >= end) + return false; + + auto parseWithFallback = [&]() -> bool + { + const auto parseResult = fast_float::from_chars(start, end, out); + if (parseResult.ec == std::errc() && parseResult.ptr != start) + { + ptr = parseResult.ptr; + return true; + } + return false; + }; + + const char* p = start; + bool negative = false; + if (*p == '-' || *p == '+') + { + negative = (*p == '-'); + ++p; + if (p >= end) + return false; + } + + if (*p == '.') + return parseWithFallback(); + if (!isObjDigit(*p)) + return parseWithFallback(); + + uint64_t integerPart = 0ull; + while (p < end && isObjDigit(*p)) + { + integerPart = integerPart * 10ull + static_cast(*p - '0'); + ++p; + } + + double value = static_cast(integerPart); + if (p < end && *p == '.') + { + ++p; + double scale = 0.1; + while (p < end && isObjDigit(*p)) + { + value += static_cast(*p - '0') * scale; + scale *= 0.1; + ++p; + } + } + + if (p < end && (*p == 'e' || *p == 'E')) + return parseWithFallback(); + + out = static_cast(negative ? -value : value); + ptr = p; + return true; +} + void extendAABB(hlsl::shapes::AABB<3, hlsl::float32_t>& aabb, bool& hasAABB, const Float3& p) { if (!hasAABB) @@ -259,7 +313,79 @@ const char* readUV(const char* bufPtr, float vec[2], const char* const bufEnd) return bufPtr; } -bool parseSignedObjIndex(const char*& ptr, const char* const end, int32_t& out) +NBL_FORCE_INLINE bool parseUnsignedObjIndex(const char*& ptr, const char* const end, uint32_t& out) +{ + if (ptr >= end || !isObjDigit(*ptr)) + return false; + + uint64_t value = 0ull; + while (ptr < end && isObjDigit(*ptr)) + { + value = value * 10ull + static_cast(*ptr - '0'); + ++ptr; + } + if (value == 0ull || value > static_cast(std::numeric_limits::max())) + return false; + + out = static_cast(value); + return true; +} + +NBL_FORCE_INLINE bool parseObjFaceTokenPositiveTriplet(const char*& ptr, const char* const end, int32_t* idx, const size_t posCount, const size_t uvCount, const size_t normalCount) +{ + while (ptr < end && isObjInlineWhitespace(*ptr)) + ++ptr; + if (ptr >= end || !isObjDigit(*ptr)) + return false; + + uint32_t posRaw = 0u; + if (!parseUnsignedObjIndex(ptr, end, posRaw)) + return false; + if (posRaw > posCount) + return false; + + if (ptr >= end || *ptr != '/') + return false; + ++ptr; + + uint32_t uvRaw = 0u; + if (!parseUnsignedObjIndex(ptr, end, uvRaw)) + return false; + if (uvRaw > uvCount) + return false; + + if (ptr >= end || *ptr != '/') + return false; + ++ptr; + + uint32_t normalRaw = 0u; + if (!parseUnsignedObjIndex(ptr, end, normalRaw)) + return false; + if (normalRaw > normalCount) + return false; + + idx[0] = static_cast(posRaw - 1u); + idx[1] = static_cast(uvRaw - 1u); + idx[2] = static_cast(normalRaw - 1u); + return true; +} + +NBL_FORCE_INLINE bool parseObjTrianglePositiveTripletLine(const char* const lineStart, const char* const lineEnd, int32_t* idx0, int32_t* idx1, int32_t* idx2, const size_t posCount, const size_t uvCount, const size_t normalCount) +{ + const char* ptr = lineStart; + if (!parseObjFaceTokenPositiveTriplet(ptr, lineEnd, idx0, posCount, uvCount, normalCount)) + return false; + if (!parseObjFaceTokenPositiveTriplet(ptr, lineEnd, idx1, posCount, uvCount, normalCount)) + return false; + if (!parseObjFaceTokenPositiveTriplet(ptr, lineEnd, idx2, posCount, uvCount, normalCount)) + return false; + + while (ptr < lineEnd && isObjInlineWhitespace(*ptr)) + ++ptr; + return ptr == lineEnd; +} + +NBL_FORCE_INLINE bool parseSignedObjIndex(const char*& ptr, const char* const end, int32_t& out) { if (ptr >= end) return false; @@ -275,11 +401,11 @@ bool parseSignedObjIndex(const char*& ptr, const char* const end, int32_t& out) ++ptr; } - if (ptr >= end || !core::isdigit(*ptr)) + if (ptr >= end || !isObjDigit(*ptr)) return false; int64_t value = 0; - while (ptr < end && core::isdigit(*ptr)) + while (ptr < end && isObjDigit(*ptr)) { value = value * 10ll + static_cast(*ptr - '0'); ++ptr; @@ -296,7 +422,7 @@ bool parseSignedObjIndex(const char*& ptr, const char* const end, int32_t& out) return true; } -bool resolveObjIndex(const int32_t rawIndex, const size_t elementCount, int32_t& resolved) +NBL_FORCE_INLINE bool resolveObjIndex(const int32_t rawIndex, const size_t elementCount, int32_t& resolved) { if (rawIndex > 0) { @@ -317,12 +443,12 @@ bool resolveObjIndex(const int32_t rawIndex, const size_t elementCount, int32_t& return true; } -bool parseObjFaceVertexTokenFast(const char*& linePtr, const char* const lineEnd, int32_t* idx, const size_t posCount, const size_t uvCount, const size_t normalCount) +NBL_FORCE_INLINE bool parseObjFaceVertexTokenFast(const char*& linePtr, const char* const lineEnd, int32_t* idx, const size_t posCount, const size_t uvCount, const size_t normalCount) { if (!idx) return false; - while (linePtr < lineEnd && core::isspace(*linePtr) && *linePtr != '\n' && *linePtr != '\r') + while (linePtr < lineEnd && isObjInlineWhitespace(*linePtr)) ++linePtr; if (linePtr >= lineEnd) return false; @@ -332,50 +458,95 @@ bool parseObjFaceVertexTokenFast(const char*& linePtr, const char* const lineEnd idx[2] = -1; const char* ptr = linePtr; - int32_t raw = 0; - if (!parseSignedObjIndex(ptr, lineEnd, raw)) - return false; - if (!resolveObjIndex(raw, posCount, idx[0])) - return false; - - if (ptr < lineEnd && *ptr == '/') + if (*ptr != '-' && *ptr != '+') { - ++ptr; + uint32_t posRaw = 0u; + if (!parseUnsignedObjIndex(ptr, lineEnd, posRaw)) + return false; + if (posRaw > posCount) + return false; + idx[0] = static_cast(posRaw - 1u); - if (ptr < lineEnd && *ptr != '/') + if (ptr < lineEnd && *ptr == '/') { - if (!parseSignedObjIndex(ptr, lineEnd, raw)) - return false; - if (!resolveObjIndex(raw, uvCount, idx[1])) + ++ptr; + if (ptr < lineEnd && *ptr != '/') + { + uint32_t uvRaw = 0u; + if (!parseUnsignedObjIndex(ptr, lineEnd, uvRaw)) + return false; + if (uvRaw > uvCount) + return false; + idx[1] = static_cast(uvRaw - 1u); + } + + if (ptr < lineEnd && *ptr == '/') + { + ++ptr; + if (ptr < lineEnd && !isObjInlineWhitespace(*ptr)) + { + uint32_t normalRaw = 0u; + if (!parseUnsignedObjIndex(ptr, lineEnd, normalRaw)) + return false; + if (normalRaw > normalCount) + return false; + idx[2] = static_cast(normalRaw - 1u); + } + } + else if (ptr < lineEnd && !isObjInlineWhitespace(*ptr)) + { return false; + } + } + else if (ptr < lineEnd && !isObjInlineWhitespace(*ptr)) + { + return false; } + } + else + { + int32_t raw = 0; + if (!parseSignedObjIndex(ptr, lineEnd, raw)) + return false; + if (!resolveObjIndex(raw, posCount, idx[0])) + return false; if (ptr < lineEnd && *ptr == '/') { ++ptr; - if (ptr < lineEnd && !core::isspace(*ptr)) + + if (ptr < lineEnd && *ptr != '/') { if (!parseSignedObjIndex(ptr, lineEnd, raw)) return false; - if (!resolveObjIndex(raw, normalCount, idx[2])) + if (!resolveObjIndex(raw, uvCount, idx[1])) return false; } + + if (ptr < lineEnd && *ptr == '/') + { + ++ptr; + if (ptr < lineEnd && !isObjInlineWhitespace(*ptr)) + { + if (!parseSignedObjIndex(ptr, lineEnd, raw)) + return false; + if (!resolveObjIndex(raw, normalCount, idx[2])) + return false; + } + } + else if (ptr < lineEnd && !isObjInlineWhitespace(*ptr)) + { + return false; + } } - else if (ptr < lineEnd && !core::isspace(*ptr)) + else if (ptr < lineEnd && !isObjInlineWhitespace(*ptr)) { return false; } } - else if (ptr < lineEnd && !core::isspace(*ptr)) - { - return false; - } - if (ptr < lineEnd && !core::isspace(*ptr)) + if (ptr < lineEnd && !isObjInlineWhitespace(*ptr)) return false; - while (ptr < lineEnd && core::isspace(*ptr) && *ptr != '\n' && *ptr != '\r') - ++ptr; - linePtr = ptr; return true; } @@ -442,12 +613,28 @@ asset::SAssetBundle COBJMeshFileLoader::loadAsset(system::IFile* _file, const as return {}; } - std::string fileContents; - fileContents.resize(static_cast(filesize)); - if (!readTextFileWithPolicy(_file, fileContents.data(), fileContents.size(), ioPlan, ioMs, ioTelemetry)) - return {}; + const auto ioStart = clock_t::now(); + std::string fileContents = {}; + const char* buf = nullptr; + if (ioPlan.strategy == SResolvedFileIOPolicy::Strategy::WholeFile) + { + const auto* constFile = static_cast(_file); + const auto* mapped = reinterpret_cast(constFile->getMappedPointer()); + if (mapped) + { + buf = mapped; + ioTelemetry.account(static_cast(filesize)); + } + } + if (!buf) + { + fileContents.resize(static_cast(filesize)); + if (!readTextFileWithPolicy(_file, fileContents.data(), fileContents.size(), ioPlan, ioMs, ioTelemetry)) + return {}; + buf = fileContents.data(); + } + ioMs = std::chrono::duration(clock_t::now() - ioStart).count(); - const char* const buf = fileContents.data(); const char* const bufEnd = buf + static_cast(filesize); const char* bufPtr = buf; @@ -459,141 +646,292 @@ asset::SAssetBundle COBJMeshFileLoader::loadAsset(system::IFile* _file, const as core::vector outNormals; core::vector outUVs; core::vector indices; - boost::unordered_flat_map vtxMap; + core::vector dedupHeadByPos; + core::vector dedupNodes; + const size_t estimatedAttributeCount = std::max(16ull, static_cast(filesize) / 32ull); + const size_t estimatedOutVertexCount = std::max(estimatedAttributeCount, static_cast(filesize) / 20ull); + const size_t estimatedOutIndexCount = (estimatedOutVertexCount <= (std::numeric_limits::max() / 3ull)) ? (estimatedOutVertexCount * 3ull) : std::numeric_limits::max(); + positions.reserve(estimatedAttributeCount); + normals.reserve(estimatedAttributeCount); + uvs.reserve(estimatedAttributeCount); + outPositions.reserve(estimatedOutVertexCount); + outNormals.reserve(estimatedOutVertexCount); + outUVs.reserve(estimatedOutVertexCount); + if (estimatedOutIndexCount != std::numeric_limits::max()) + indices.reserve(estimatedOutIndexCount); + dedupHeadByPos.reserve(estimatedAttributeCount); + dedupNodes.reserve(estimatedOutVertexCount); bool hasNormals = false; bool hasUVs = false; hlsl::shapes::AABB<3, hlsl::float32_t> parsedAABB = hlsl::shapes::AABB<3, hlsl::float32_t>::create(); bool hasParsedAABB = false; - core::vector faceCorners; - faceCorners.reserve(16ull); + auto acquireCornerIndex = [&](const int32_t* idx, uint32_t& outIx)->bool + { + if (!idx) + return false; + const int32_t posIx = idx[0]; + if (posIx < 0 || static_cast(posIx) >= positions.size()) + return false; + if (static_cast(posIx) >= dedupHeadByPos.size()) + dedupHeadByPos.resize(positions.size(), -1); + + int32_t nodeIx = dedupHeadByPos[posIx]; + while (nodeIx >= 0) + { + const auto& node = dedupNodes[static_cast(nodeIx)]; + if (node.uv == idx[1] && node.normal == idx[2]) + { + outIx = node.outIndex; + return true; + } + nodeIx = node.next; + } + + outIx = static_cast(outPositions.size()); + ObjVertexDedupNode node = {}; + node.uv = idx[1]; + node.normal = idx[2]; + node.outIndex = outIx; + node.next = dedupHeadByPos[posIx]; + dedupNodes.push_back(node); + dedupHeadByPos[posIx] = static_cast(dedupNodes.size() - 1ull); + + const auto& srcPos = positions[idx[0]]; + outPositions.push_back(srcPos); + extendAABB(parsedAABB, hasParsedAABB, srcPos); + + Float2 uv(0.f, 0.f); + if (idx[1] >= 0 && static_cast(idx[1]) < uvs.size()) + { + uv = uvs[idx[1]]; + hasUVs = true; + } + outUVs.push_back(uv); + + Float3 normal(0.f, 0.f, 1.f); + if (idx[2] >= 0 && static_cast(idx[2]) < normals.size()) + { + normal = normals[idx[2]]; + hasNormals = true; + } + outNormals.push_back(normal); + return true; + }; + auto acquireCornerIndexPositiveTriplet = [&](const int32_t posIx, const int32_t uvIx, const int32_t normalIx, uint32_t& outIx)->bool + { + int32_t nodeIx = dedupHeadByPos[static_cast(posIx)]; + while (nodeIx >= 0) + { + const auto& node = dedupNodes[static_cast(nodeIx)]; + if (node.uv == uvIx && node.normal == normalIx) + { + outIx = node.outIndex; + return true; + } + nodeIx = node.next; + } + + outIx = static_cast(outPositions.size()); + ObjVertexDedupNode node = {}; + node.uv = uvIx; + node.normal = normalIx; + node.outIndex = outIx; + node.next = dedupHeadByPos[static_cast(posIx)]; + dedupNodes.push_back(node); + dedupHeadByPos[static_cast(posIx)] = static_cast(dedupNodes.size() - 1ull); + + const auto& srcPos = positions[static_cast(posIx)]; + outPositions.push_back(srcPos); + extendAABB(parsedAABB, hasParsedAABB, srcPos); + outUVs.push_back(uvs[static_cast(uvIx)]); + outNormals.push_back(normals[static_cast(normalIx)]); + hasUVs = true; + hasNormals = true; + return true; + }; const auto parseStart = clock_t::now(); - while (bufPtr != bufEnd) + while (bufPtr < bufEnd) { - switch (bufPtr[0]) + const char* const lineStart = bufPtr; + const char* lineTerminator = lineStart; + while (lineTerminator < bufEnd && *lineTerminator != '\n' && *lineTerminator != '\r') + ++lineTerminator; + + const char* lineEnd = lineTerminator; + + if (lineStart < lineEnd) { - case 'v': - switch (bufPtr[1]) + if (*lineStart == 'v') + { + if ((lineStart + 1) < lineEnd && lineStart[1] == ' ') { - case ' ': + Float3 vec{}; + const char* ptr = lineStart + 2; + for (uint32_t i = 0u; i < 3u; ++i) { - const auto t = clock_t::now(); - Float3 vec{}; - bufPtr = readVec3(bufPtr, &vec.x, bufEnd); - positions.push_back(vec); - parseVms += std::chrono::duration(clock_t::now() - t).count(); + while (ptr < lineEnd && isObjInlineWhitespace(*ptr)) + ++ptr; + if (ptr >= lineEnd) + return {}; + if (!parseObjFloat(ptr, lineEnd, (&vec.x)[i])) + return {}; } - break; - case 'n': + positions.push_back(vec); + dedupHeadByPos.push_back(-1); + } + else if ((lineStart + 2) < lineEnd && lineStart[1] == 'n' && isObjInlineWhitespace(lineStart[2])) + { + Float3 vec{}; + const char* ptr = lineStart + 3; + for (uint32_t i = 0u; i < 3u; ++i) { - const auto t = clock_t::now(); - Float3 vec{}; - bufPtr = readVec3(bufPtr, &vec.x, bufEnd); - normals.push_back(vec); - parseVNms += std::chrono::duration(clock_t::now() - t).count(); + while (ptr < lineEnd && isObjInlineWhitespace(*ptr)) + ++ptr; + if (ptr >= lineEnd) + return {}; + if (!parseObjFloat(ptr, lineEnd, (&vec.x)[i])) + return {}; } - break; - case 't': + normals.push_back(vec); + } + else if ((lineStart + 2) < lineEnd && lineStart[1] == 't' && isObjInlineWhitespace(lineStart[2])) + { + Float2 vec{}; + const char* ptr = lineStart + 3; + for (uint32_t i = 0u; i < 2u; ++i) { - const auto t = clock_t::now(); - Float2 vec{}; - bufPtr = readUV(bufPtr, &vec.x, bufEnd); - uvs.push_back(vec); - parseVTms += std::chrono::duration(clock_t::now() - t).count(); + while (ptr < lineEnd && isObjInlineWhitespace(*ptr)) + ++ptr; + if (ptr >= lineEnd) + return {}; + if (!parseObjFloat(ptr, lineEnd, (&vec.x)[i])) + return {}; } - break; - default: - break; + vec.y = 1.f - vec.y; + uvs.push_back(vec); } - break; - case 'f': + } + else if (*lineStart == 'f' && (lineStart + 1) < lineEnd && isObjInlineWhitespace(lineStart[1])) { if (positions.empty()) return {}; ++faceCount; - if (faceCount == 1u) + const size_t posCount = positions.size(); + const size_t uvCount = uvs.size(); + const size_t normalCount = normals.size(); + const char* triLinePtr = lineStart + 1; + int32_t triIdx0[3] = { -1, -1, -1 }; + int32_t triIdx1[3] = { -1, -1, -1 }; + int32_t triIdx2[3] = { -1, -1, -1 }; + bool triangleFastPath = parseObjTrianglePositiveTripletLine(lineStart + 1, lineEnd, triIdx0, triIdx1, triIdx2, posCount, uvCount, normalCount); + bool parsedFirstThree = triangleFastPath; + if (!triangleFastPath) { - const size_t estimatedVertexCount = positions.size() <= (std::numeric_limits::max() / 4ull) ? positions.size() * 4ull : positions.size(); - vtxMap.reserve(estimatedVertexCount); - outPositions.reserve(estimatedVertexCount); - outNormals.reserve(estimatedVertexCount); - outUVs.reserve(estimatedVertexCount); - const size_t estimatedIndexCount = estimatedVertexCount <= (std::numeric_limits::max() / 2ull) ? estimatedVertexCount * 2ull : estimatedVertexCount; - indices.reserve(estimatedIndexCount); + triLinePtr = lineStart + 1; + parsedFirstThree = + parseObjFaceVertexTokenFast(triLinePtr, lineEnd, triIdx0, posCount, uvCount, normalCount) && + parseObjFaceVertexTokenFast(triLinePtr, lineEnd, triIdx1, posCount, uvCount, normalCount) && + parseObjFaceVertexTokenFast(triLinePtr, lineEnd, triIdx2, posCount, uvCount, normalCount); + triangleFastPath = parsedFirstThree; + if (parsedFirstThree) + { + while (triLinePtr < lineEnd && isObjInlineWhitespace(*triLinePtr)) + ++triLinePtr; + triangleFastPath = (triLinePtr == lineEnd); + } } - - const char* endPtr = bufPtr; - while (endPtr != bufEnd && *endPtr != '\n' && *endPtr != '\r') - ++endPtr; - - faceCorners.clear(); - - const char* linePtr = bufPtr + 1; - while (linePtr < endPtr) + if (triangleFastPath) { - while (linePtr < endPtr && core::isspace(*linePtr) && *linePtr != '\n' && *linePtr != '\r') - ++linePtr; - if (linePtr >= endPtr) - break; - - const auto tokenParseStart = clock_t::now(); - int32_t idx[3] = { -1, -1, -1 }; - if (!parseObjFaceVertexTokenFast(linePtr, endPtr, idx, positions.size(), uvs.size(), normals.size())) + uint32_t c0 = 0u; + uint32_t c1 = 0u; + uint32_t c2 = 0u; + if (!acquireCornerIndexPositiveTriplet(triIdx0[0], triIdx0[1], triIdx0[2], c0)) return {}; - ++faceFastTokenCount; - - if (idx[0] < 0 || static_cast(idx[0]) >= positions.size()) + if (!acquireCornerIndexPositiveTriplet(triIdx1[0], triIdx1[1], triIdx1[2], c1)) + return {}; + if (!acquireCornerIndexPositiveTriplet(triIdx2[0], triIdx2[1], triIdx2[2], c2)) return {}; - parseFaceMs += std::chrono::duration(clock_t::now() - tokenParseStart).count(); - - const auto dedupStart = clock_t::now(); - ObjVertexKey key = { idx[0], idx[1], idx[2] }; - const uint32_t candidateIndex = static_cast(outPositions.size()); - auto [it, inserted] = vtxMap.try_emplace(key, candidateIndex); - uint32_t outIx = it->second; - if (inserted) + faceFastTokenCount += 3u; + indices.push_back(c2); + indices.push_back(c1); + indices.push_back(c0); + } + else + { + const char* linePtr = lineStart + 1; + uint32_t firstCorner = 0u; + uint32_t previousCorner = 0u; + uint32_t cornerCount = 0u; + + if (parsedFirstThree) { - const auto& srcPos = positions[idx[0]]; - outPositions.push_back(srcPos); - extendAABB(parsedAABB, hasParsedAABB, srcPos); + uint32_t c0 = 0u; + uint32_t c1 = 0u; + uint32_t c2 = 0u; + if (!acquireCornerIndex(triIdx0, c0)) + return {}; + if (!acquireCornerIndex(triIdx1, c1)) + return {}; + if (!acquireCornerIndex(triIdx2, c2)) + return {}; + faceFastTokenCount += 3u; + indices.push_back(c2); + indices.push_back(c1); + indices.push_back(c0); + firstCorner = c0; + previousCorner = c2; + cornerCount = 3u; + linePtr = triLinePtr; + } - Float2 uv(0.f, 0.f); - if (idx[1] >= 0 && static_cast(idx[1]) < uvs.size()) + while (linePtr < lineEnd) + { + while (linePtr < lineEnd && isObjInlineWhitespace(*linePtr)) + ++linePtr; + if (linePtr >= lineEnd) + break; + + int32_t idx[3] = { -1, -1, -1 }; + if (!parseObjFaceVertexTokenFast(linePtr, lineEnd, idx, posCount, uvCount, normalCount)) + return {}; + ++faceFastTokenCount; + + uint32_t cornerIx = 0u; + if (!acquireCornerIndex(idx, cornerIx)) + return {}; + + if (cornerCount == 0u) { - uv = uvs[idx[1]]; - hasUVs = true; + firstCorner = cornerIx; + ++cornerCount; + continue; } - outUVs.push_back(uv); - Float3 normal(0.f, 0.f, 1.f); - if (idx[2] >= 0 && static_cast(idx[2]) < normals.size()) + if (cornerCount == 1u) { - normal = normals[idx[2]]; - hasNormals = true; + previousCorner = cornerIx; + ++cornerCount; + continue; } - outNormals.push_back(normal); - } - dedupMs += std::chrono::duration(clock_t::now() - dedupStart).count(); - faceCorners.push_back(outIx); - } - - const auto emitStart = clock_t::now(); - for (uint32_t i = 1u; i + 1u < faceCorners.size(); ++i) - { - indices.push_back(faceCorners[i + 1]); - indices.push_back(faceCorners[i]); - indices.push_back(faceCorners[0]); + indices.push_back(cornerIx); + indices.push_back(previousCorner); + indices.push_back(firstCorner); + previousCorner = cornerIx; + ++cornerCount; + } } - emitMs += std::chrono::duration(clock_t::now() - emitStart).count(); } - break; - default: - break; } - bufPtr = goNextLine(bufPtr, bufEnd); + if (lineTerminator >= bufEnd) + bufPtr = bufEnd; + else if (*lineTerminator == '\r' && (lineTerminator + 1) < bufEnd && lineTerminator[1] == '\n') + bufPtr = lineTerminator + 2; + else + bufPtr = lineTerminator + 1; } parseMs = std::chrono::duration(clock_t::now() - parseStart).count(); const double parseScanMs = std::max(0.0, parseMs - (parseVms + parseVNms + parseVTms + parseFaceMs + dedupMs + emitMs)); diff --git a/src/nbl/asset/interchange/CPLYMeshFileLoader.cpp b/src/nbl/asset/interchange/CPLYMeshFileLoader.cpp index f6b30640c6..e85e243b3e 100644 --- a/src/nbl/asset/interchange/CPLYMeshFileLoader.cpp +++ b/src/nbl/asset/interchange/CPLYMeshFileLoader.cpp @@ -15,6 +15,8 @@ #include #include #include +#include +#include #include #include "nbl/asset/IAssetManager.h" @@ -127,6 +129,70 @@ IGeometry::SDataView plyCreateAdoptedU16IndexView(core::vector> buffers; + auto appendViewBuffer = [&buffers](const IGeometry::SDataView& view) -> void + { + if (!view || !view.src.buffer) + return; + for (const auto& existing : buffers) + { + if (existing.get() == view.src.buffer.get()) + return; + } + buffers.push_back(core::smart_refctd_ptr(view.src.buffer)); + }; + + appendViewBuffer(geometry->getPositionView()); + appendViewBuffer(geometry->getIndexView()); + appendViewBuffer(geometry->getNormalView()); + for (const auto& view : *geometry->getAuxAttributeViews()) + appendViewBuffer(view); + for (const auto& view : *geometry->getJointWeightViews()) + { + appendViewBuffer(view.indices); + appendViewBuffer(view.weights); + } + if (auto jointOBB = geometry->getJointOBBView(); jointOBB) + appendViewBuffer(*jointOBB); + + if (buffers.empty()) + return; + + const size_t hw = std::thread::hardware_concurrency(); + const size_t workerCount = hw ? std::min(hw, buffers.size()) : 1ull; + if (workerCount <= 1ull) + { + for (auto& buffer : buffers) + buffer->setContentHash(buffer->computeContentHash()); + return; + } + + std::vector workers; + workers.reserve(workerCount > 0ull ? (workerCount - 1ull) : 0ull); + auto hashRange = [&buffers](const size_t beginIx, const size_t endIx) -> void + { + for (size_t i = beginIx; i < endIx; ++i) + { + auto& buffer = buffers[i]; + buffer->setContentHash(buffer->computeContentHash()); + } + }; + for (size_t workerIx = 1ull; workerIx < workerCount; ++workerIx) + { + const size_t begin = (buffers.size() * workerIx) / workerCount; + const size_t end = (buffers.size() * (workerIx + 1ull)) / workerCount; + workers.emplace_back(hashRange, begin, end); + } + hashRange(0ull, buffers.size() / workerCount); + for (auto& worker : workers) + worker.join(); +} + struct SContext { static constexpr uint64_t ReadWindowPaddingBytes = 1ull; @@ -344,6 +410,24 @@ struct SContext // return pointer to the start of current word return StartPointer; } + size_t getAbsoluteOffset(const char* ptr) const + { + if (!ptr || ptr > EndPointer) + return fileOffset; + const size_t trailingBytes = static_cast(EndPointer - ptr); + return fileOffset >= trailingBytes ? (fileOffset - trailingBytes) : 0ull; + } + void useMappedBinaryWindow(const char* data, const size_t sizeBytes) + { + if (!data) + return; + StartPointer = const_cast(data); + EndPointer = StartPointer + sizeBytes; + LineEndPointer = StartPointer - 1; + WordLength = -1; + EndOfFile = true; + fileOffset = inner.mainFile ? inner.mainFile->getSize() : fileOffset; + } // skips x bytes in the file, getting more data if required void moveForward(const size_t bytes) { @@ -677,11 +761,21 @@ struct SContext { case ELayoutKind::XYZ: { - for (size_t v = 0ull; v < batchVertices; ++v) + if (posStride == 3ull * floatBytes) { - std::memcpy(posBase, src, 3ull * floatBytes); - src += 3ull * floatBytes; - posBase += posStride; + const size_t batchBytes = batchVertices * 3ull * floatBytes; + std::memcpy(posBase, src, batchBytes); + src += batchBytes; + posBase += batchBytes; + } + else + { + for (size_t v = 0ull; v < batchVertices; ++v) + { + std::memcpy(posBase, src, 3ull * floatBytes); + src += 3ull * floatBytes; + posBase += posStride; + } } } break; @@ -958,6 +1052,92 @@ struct SContext uint32_t* out = _outIndices.data() + oldSize; const uint8_t* ptr = reinterpret_cast(StartPointer); bool fallbackToGeneric = false; + if (is32Bit) + { + const size_t hw = std::thread::hardware_concurrency(); + const size_t maxWorkersByWork = std::max(1ull, minBytesNeeded / (1ull << 20)); + size_t workerCount = hw ? std::min(hw, maxWorkersByWork) : 1ull; + if (workerCount > 1ull) + { + const size_t recordBytes = sizeof(uint8_t) + 3ull * sizeof(uint32_t); + const bool needMax = hasVertexCount || trackMaxIndex; + std::vector workerNonTriangle(workerCount, 0u); + std::vector workerInvalid(workerCount, 0u); + std::vector workerMax(needMax ? workerCount : 0ull, 0u); + std::vector workers; + workers.reserve(workerCount > 0ull ? (workerCount - 1ull) : 0ull); + auto parseChunk = [&](const size_t workerIx, const size_t beginFace, const size_t endFace) -> void + { + const uint8_t* in = ptr + beginFace * recordBytes; + uint32_t* outLocal = out + beginFace * 3ull; + uint32_t localMax = 0u; + uint32_t localSignBits = 0u; + for (size_t faceIx = beginFace; faceIx < endFace; ++faceIx) + { + if (*in != 3u) + { + workerNonTriangle[workerIx] = 1u; + break; + } + ++in; + std::memcpy(outLocal, in, 3ull * sizeof(uint32_t)); + const uint32_t i0 = outLocal[0]; + const uint32_t i1 = outLocal[1]; + const uint32_t i2 = outLocal[2]; + if (isSrcS32) + localSignBits |= (i0 | i1 | i2); + if (needMax) + { + if (i0 > localMax) localMax = i0; + if (i1 > localMax) localMax = i1; + if (i2 > localMax) localMax = i2; + } + in += 3ull * sizeof(uint32_t); + outLocal += 3ull; + } + if (isSrcS32 && (localSignBits & 0x80000000u)) + workerInvalid[workerIx] = 1u; + if (hasVertexCount && needMax && localMax >= vertexCount) + workerInvalid[workerIx] = 1u; + if (needMax) + workerMax[workerIx] = localMax; + }; + for (size_t workerIx = 1ull; workerIx < workerCount; ++workerIx) + { + const size_t begin = (element.Count * workerIx) / workerCount; + const size_t end = (element.Count * (workerIx + 1ull)) / workerCount; + workers.emplace_back(parseChunk, workerIx, begin, end); + } + parseChunk(0ull, 0ull, element.Count / workerCount); + for (auto& worker : workers) + worker.join(); + + const bool anyNonTriangle = std::any_of(workerNonTriangle.begin(), workerNonTriangle.end(), [](const uint8_t v) { return v != 0u; }); + if (anyNonTriangle) + { + _outIndices.resize(oldSize); + _maxIndex = oldMaxIndex; + return EFastFaceReadResult::NotApplicable; + } + const bool anyInvalid = std::any_of(workerInvalid.begin(), workerInvalid.end(), [](const uint8_t v) { return v != 0u; }); + if (anyInvalid) + { + _outIndices.resize(oldSize); + _maxIndex = oldMaxIndex; + return EFastFaceReadResult::Error; + } + if (trackMaxIndex) + { + for (const uint32_t local : workerMax) + if (local > _maxIndex) + _maxIndex = local; + } + + StartPointer = reinterpret_cast(const_cast(ptr + element.Count * recordBytes)); + _faceCount += element.Count; + return EFastFaceReadResult::Success; + } + } if (is32Bit) { @@ -1329,7 +1509,13 @@ SAssetBundle CPLYMeshFileLoader::loadAsset(system::IFile* _file, const IAssetLoa _hierarchyLevel, _override }; - const uint64_t desiredReadWindow = ioPlan.strategy == SResolvedFileIOPolicy::Strategy::WholeFile ? (fileSize + SContext::ReadWindowPaddingBytes) : ioPlan.chunkSizeBytes; + uint64_t desiredReadWindow = ioPlan.strategy == SResolvedFileIOPolicy::Strategy::WholeFile ? (fileSize + SContext::ReadWindowPaddingBytes) : ioPlan.chunkSizeBytes; + if (ioPlan.strategy == SResolvedFileIOPolicy::Strategy::WholeFile) + { + const bool mappedInput = static_cast(_file)->getMappedPointer() != nullptr; + if (mappedInput && fileSize > (SContext::DefaultIoReadWindowBytes * 2ull)) + desiredReadWindow = SContext::DefaultIoReadWindowBytes; + } const uint64_t safeReadWindow = std::min(desiredReadWindow, static_cast(std::numeric_limits::max() - SContext::ReadWindowPaddingBytes)); ctx.init(static_cast(safeReadWindow)); @@ -1472,7 +1658,20 @@ SAssetBundle CPLYMeshFileLoader::loadAsset(system::IFile* _file, const IAssetLoa { readingHeader = false; if (ctx.IsBinaryFile) - ctx.StartPointer = ctx.LineEndPointer+1; + { + char* const binaryStartInBuffer = ctx.LineEndPointer + 1; + const auto* const mappedBase = reinterpret_cast(static_cast(_file)->getMappedPointer()); + if (mappedBase) + { + const size_t binaryOffset = ctx.getAbsoluteOffset(binaryStartInBuffer); + const size_t remainingBytes = static_cast(binaryOffset < fileSize ? (fileSize - binaryOffset) : 0ull); + ctx.useMappedBinaryWindow(mappedBase + binaryOffset, remainingBytes); + } + else + { + ctx.StartPointer = binaryStartInBuffer; + } + } } else { @@ -1498,6 +1697,7 @@ SAssetBundle CPLYMeshFileLoader::loadAsset(system::IFile* _file, const IAssetLoa // loop through each of the elements bool verticesProcessed = false; + for (uint32_t i=0; i(clock_t::now() - indexStart).count(); const auto hashStart = clock_t::now(); - CPolygonGeometryManipulator::recomputeContentHashes(geometry.get()); + plyRecomputeContentHashesParallel(geometry.get()); hashRangeMs = std::chrono::duration(clock_t::now() - hashStart).count(); const auto totalMs = std::chrono::duration(clock_t::now() - totalStart).count(); diff --git a/src/nbl/asset/interchange/CSTLMeshFileLoader.cpp b/src/nbl/asset/interchange/CSTLMeshFileLoader.cpp index 581f66cd82..900c905c64 100644 --- a/src/nbl/asset/interchange/CSTLMeshFileLoader.cpp +++ b/src/nbl/asset/interchange/CSTLMeshFileLoader.cpp @@ -22,7 +22,9 @@ #include #include #include +#include #include +#include namespace nbl::asset { @@ -244,6 +246,70 @@ ICPUPolygonGeometry::SDataView stlCreateAdoptedFloat3View(core::vector> buffers; + auto appendViewBuffer = [&buffers](const IGeometry::SDataView& view) -> void + { + if (!view || !view.src.buffer) + return; + for (const auto& existing : buffers) + { + if (existing.get() == view.src.buffer.get()) + return; + } + buffers.push_back(core::smart_refctd_ptr(view.src.buffer)); + }; + + appendViewBuffer(geometry->getPositionView()); + appendViewBuffer(geometry->getIndexView()); + appendViewBuffer(geometry->getNormalView()); + for (const auto& view : *geometry->getAuxAttributeViews()) + appendViewBuffer(view); + for (const auto& view : *geometry->getJointWeightViews()) + { + appendViewBuffer(view.indices); + appendViewBuffer(view.weights); + } + if (auto jointOBB = geometry->getJointOBBView(); jointOBB) + appendViewBuffer(*jointOBB); + + if (buffers.empty()) + return; + + const size_t hw = std::thread::hardware_concurrency(); + const size_t workerCount = hw ? std::min(hw, buffers.size()) : 1ull; + if (workerCount <= 1ull) + { + for (auto& buffer : buffers) + buffer->setContentHash(buffer->computeContentHash()); + return; + } + + std::vector workers; + workers.reserve(workerCount > 0ull ? (workerCount - 1ull) : 0ull); + auto hashRange = [&buffers](const size_t beginIx, const size_t endIx) -> void + { + for (size_t i = beginIx; i < endIx; ++i) + { + auto& buffer = buffers[i]; + buffer->setContentHash(buffer->computeContentHash()); + } + }; + for (size_t workerIx = 1ull; workerIx < workerCount; ++workerIx) + { + const size_t begin = (buffers.size() * workerIx) / workerCount; + const size_t end = (buffers.size() * (workerIx + 1ull)) / workerCount; + workers.emplace_back(hashRange, begin, end); + } + hashRange(0ull, buffers.size() / workerCount); + for (auto& worker : workers) + worker.join(); +} + CSTLMeshFileLoader::CSTLMeshFileLoader(asset::IAssetManager* _assetManager) { (void)_assetManager; @@ -298,15 +364,27 @@ SAssetBundle CSTLMeshFileLoader::loadAsset(system::IFile* _file, const IAssetLoa core::vector wholeFilePayload; const uint8_t* wholeFileData = nullptr; + bool wholeFileDataIsMapped = false; if (ioPlan.strategy == SResolvedFileIOPolicy::Strategy::WholeFile) { - const auto ioStart = clock_t::now(); - wholeFilePayload.resize(filesize + 1ull); - if (!stlReadExact(context.inner.mainFile, wholeFilePayload.data(), 0ull, filesize, &context.ioTelemetry)) - return {}; - wholeFilePayload[filesize] = 0u; - wholeFileData = wholeFilePayload.data(); - ioMs = std::chrono::duration(clock_t::now() - ioStart).count(); + const auto* constFile = static_cast(context.inner.mainFile); + const auto* mapped = reinterpret_cast(constFile->getMappedPointer()); + if (mapped) + { + wholeFileData = mapped; + wholeFileDataIsMapped = true; + context.ioTelemetry.account(filesize); + } + else + { + const auto ioStart = clock_t::now(); + wholeFilePayload.resize(filesize + 1ull); + if (!stlReadExact(context.inner.mainFile, wholeFilePayload.data(), 0ull, filesize, &context.ioTelemetry)) + return {}; + wholeFilePayload[filesize] = 0u; + wholeFileData = wholeFilePayload.data(); + ioMs = std::chrono::duration(clock_t::now() - ioStart).count(); + } } bool binary = false; @@ -367,6 +445,15 @@ SAssetBundle CSTLMeshFileLoader::loadAsset(system::IFile* _file, const IAssetLoa bool hasParsedAABB = false; uint64_t vertexCount = 0ull; + if (!binary && wholeFileDataIsMapped) + { + wholeFilePayload.resize(filesize + 1ull); + std::memcpy(wholeFilePayload.data(), wholeFileData, filesize); + wholeFilePayload[filesize] = 0u; + wholeFileData = wholeFilePayload.data(); + wholeFileDataIsMapped = false; + } + if (binary) { parsePath = "binary_fast"; @@ -424,107 +511,168 @@ SAssetBundle CSTLMeshFileLoader::loadAsset(system::IFile* _file, const IAssetLoa const uint8_t* const end = cursor + dataSize; if (end < cursor || static_cast(end - cursor) < static_cast(triangleCount) * StlTriangleRecordBytes) return {}; - for (uint64_t tri = 0ull; tri < triangleCount; ++tri) + struct SThreadAABB + { + bool has = false; + float minX = 0.f; + float minY = 0.f; + float minZ = 0.f; + float maxX = 0.f; + float maxY = 0.f; + float maxZ = 0.f; + }; + + const size_t hw = std::thread::hardware_concurrency(); + const size_t maxWorkersByWork = std::max(1ull, static_cast(triangleCount / 16384ull)); + const size_t workerCount = hw ? std::max(1ull, std::min(hw, maxWorkersByWork)) : 1ull; + std::vector threadAABBs(workerCount); + auto parseRange = [&](const size_t workerIx, const uint64_t beginTri, const uint64_t endTri) -> void { - const uint8_t* const triRecord = cursor; - cursor += StlTriangleRecordBytes; - - float normalData[StlFloatChannelsPerVertex] = {}; - std::memcpy(normalData, triRecord, sizeof(normalData)); - float normalX = normalData[0]; - float normalY = normalData[1]; - float normalZ = normalData[2]; - - const size_t base = static_cast(tri) * StlVerticesPerTriangle * StlFloatChannelsPerVertex; - std::memcpy(posOutFloat + base + 0ull, triRecord + 9ull * sizeof(float), sizeof(normalData)); - std::memcpy(posOutFloat + base + 3ull, triRecord + 6ull * sizeof(float), sizeof(normalData)); - std::memcpy(posOutFloat + base + 6ull, triRecord + 3ull * sizeof(float), sizeof(normalData)); - - const float vertex0x = posOutFloat[base + 0ull]; - const float vertex0y = posOutFloat[base + 1ull]; - const float vertex0z = posOutFloat[base + 2ull]; - const float vertex1x = posOutFloat[base + 3ull]; - const float vertex1y = posOutFloat[base + 4ull]; - const float vertex1z = posOutFloat[base + 5ull]; - const float vertex2x = posOutFloat[base + 6ull]; - const float vertex2y = posOutFloat[base + 7ull]; - const float vertex2z = posOutFloat[base + 8ull]; - const float normalLen2 = normalX * normalX + normalY * normalY + normalZ * normalZ; - if (normalLen2 <= 0.f) + const uint8_t* localCursor = payloadData + beginTri * StlTriangleRecordBytes; + float* posCursor = posOutFloat + beginTri * StlVerticesPerTriangle * StlFloatChannelsPerVertex; + float* normalCursor = normalOutFloat + beginTri * StlVerticesPerTriangle * StlFloatChannelsPerVertex; + auto& localAABB = threadAABBs[workerIx]; + for (uint64_t tri = beginTri; tri < endTri; ++tri) { - const float edge10x = vertex1x - vertex0x; - const float edge10y = vertex1y - vertex0y; - const float edge10z = vertex1z - vertex0z; - const float edge20x = vertex2x - vertex0x; - const float edge20y = vertex2y - vertex0y; - const float edge20z = vertex2z - vertex0z; - - normalX = edge10y * edge20z - edge10z * edge20y; - normalY = edge10z * edge20x - edge10x * edge20z; - normalZ = edge10x * edge20y - edge10y * edge20x; - const float planeLen2 = normalX * normalX + normalY * normalY + normalZ * normalZ; - if (planeLen2 > 0.f) + const uint8_t* const triRecord = localCursor; + localCursor += StlTriangleRecordBytes; + + float normalX = 0.f; + float normalY = 0.f; + float normalZ = 0.f; + std::memcpy(&normalX, triRecord + 0u * sizeof(float), sizeof(float)); + std::memcpy(&normalY, triRecord + 1u * sizeof(float), sizeof(float)); + std::memcpy(&normalZ, triRecord + 2u * sizeof(float), sizeof(float)); + + std::memcpy(posCursor + 0ull, triRecord + 9ull * sizeof(float), 3ull * sizeof(float)); + std::memcpy(posCursor + 3ull, triRecord + 6ull * sizeof(float), 3ull * sizeof(float)); + std::memcpy(posCursor + 6ull, triRecord + 3ull * sizeof(float), 3ull * sizeof(float)); + + const float vertex0x = posCursor[0ull]; + const float vertex0y = posCursor[1ull]; + const float vertex0z = posCursor[2ull]; + const float vertex1x = posCursor[3ull]; + const float vertex1y = posCursor[4ull]; + const float vertex1z = posCursor[5ull]; + const float vertex2x = posCursor[6ull]; + const float vertex2y = posCursor[7ull]; + const float vertex2z = posCursor[8ull]; + if (!localAABB.has) { - const float invLen = 1.f / std::sqrt(planeLen2); + localAABB.minX = vertex0x; localAABB.maxX = vertex0x; + localAABB.minY = vertex0y; localAABB.maxY = vertex0y; + localAABB.minZ = vertex0z; localAABB.maxZ = vertex0z; + localAABB.has = true; + } + if (vertex0x < localAABB.minX) localAABB.minX = vertex0x; + if (vertex0y < localAABB.minY) localAABB.minY = vertex0y; + if (vertex0z < localAABB.minZ) localAABB.minZ = vertex0z; + if (vertex0x > localAABB.maxX) localAABB.maxX = vertex0x; + if (vertex0y > localAABB.maxY) localAABB.maxY = vertex0y; + if (vertex0z > localAABB.maxZ) localAABB.maxZ = vertex0z; + if (vertex1x < localAABB.minX) localAABB.minX = vertex1x; + if (vertex1y < localAABB.minY) localAABB.minY = vertex1y; + if (vertex1z < localAABB.minZ) localAABB.minZ = vertex1z; + if (vertex1x > localAABB.maxX) localAABB.maxX = vertex1x; + if (vertex1y > localAABB.maxY) localAABB.maxY = vertex1y; + if (vertex1z > localAABB.maxZ) localAABB.maxZ = vertex1z; + if (vertex2x < localAABB.minX) localAABB.minX = vertex2x; + if (vertex2y < localAABB.minY) localAABB.minY = vertex2y; + if (vertex2z < localAABB.minZ) localAABB.minZ = vertex2z; + if (vertex2x > localAABB.maxX) localAABB.maxX = vertex2x; + if (vertex2y > localAABB.maxY) localAABB.maxY = vertex2y; + if (vertex2z > localAABB.maxZ) localAABB.maxZ = vertex2z; + posCursor += StlVerticesPerTriangle * StlFloatChannelsPerVertex; + const float normalLen2 = normalX * normalX + normalY * normalY + normalZ * normalZ; + if (normalLen2 <= 0.f) + { + const float edge10x = vertex1x - vertex0x; + const float edge10y = vertex1y - vertex0y; + const float edge10z = vertex1z - vertex0z; + const float edge20x = vertex2x - vertex0x; + const float edge20y = vertex2y - vertex0y; + const float edge20z = vertex2z - vertex0z; + + normalX = edge10y * edge20z - edge10z * edge20y; + normalY = edge10z * edge20x - edge10x * edge20z; + normalZ = edge10x * edge20y - edge10y * edge20x; + const float planeLen2 = normalX * normalX + normalY * normalY + normalZ * normalZ; + if (planeLen2 > 0.f) + { + const float invLen = 1.f / std::sqrt(planeLen2); + normalX *= invLen; + normalY *= invLen; + normalZ *= invLen; + } + else + { + normalX = 0.f; + normalY = 0.f; + normalZ = 0.f; + } + } + else if (normalLen2 < 0.9999f || normalLen2 > 1.0001f) + { + const float invLen = 1.f / std::sqrt(normalLen2); normalX *= invLen; normalY *= invLen; normalZ *= invLen; } - else - { - normalX = 0.f; - normalY = 0.f; - normalZ = 0.f; - } + + normalCursor[0ull] = normalX; + normalCursor[1ull] = normalY; + normalCursor[2ull] = normalZ; + normalCursor[3ull] = normalX; + normalCursor[4ull] = normalY; + normalCursor[5ull] = normalZ; + normalCursor[6ull] = normalX; + normalCursor[7ull] = normalY; + normalCursor[8ull] = normalZ; + normalCursor += StlVerticesPerTriangle * StlFloatChannelsPerVertex; } - else if (normalLen2 < 0.9999f || normalLen2 > 1.0001f) + }; + + if (workerCount > 1ull) + { + std::vector workers; + workers.reserve(workerCount > 0ull ? (workerCount - 1ull) : 0ull); + for (size_t workerIx = 1ull; workerIx < workerCount; ++workerIx) { - const float invLen = 1.f / std::sqrt(normalLen2); - normalX *= invLen; - normalY *= invLen; - normalZ *= invLen; + const uint64_t begin = (triangleCount * workerIx) / workerCount; + const uint64_t endTri = (triangleCount * (workerIx + 1ull)) / workerCount; + workers.emplace_back(parseRange, workerIx, begin, endTri); } + parseRange(0ull, 0ull, triangleCount / workerCount); + for (auto& worker : workers) + worker.join(); + } + else + { + parseRange(0ull, 0ull, triangleCount); + } - normalOutFloat[base + 0ull] = normalX; - normalOutFloat[base + 1ull] = normalY; - normalOutFloat[base + 2ull] = normalZ; - normalOutFloat[base + 3ull] = normalX; - normalOutFloat[base + 4ull] = normalY; - normalOutFloat[base + 5ull] = normalZ; - normalOutFloat[base + 6ull] = normalX; - normalOutFloat[base + 7ull] = normalY; - normalOutFloat[base + 8ull] = normalZ; - + for (const auto& localAABB : threadAABBs) + { + if (!localAABB.has) + continue; if (!hasParsedAABB) { hasParsedAABB = true; - parsedAABB.minVx.x = vertex0x; - parsedAABB.minVx.y = vertex0y; - parsedAABB.minVx.z = vertex0z; - parsedAABB.maxVx.x = vertex0x; - parsedAABB.maxVx.y = vertex0y; - parsedAABB.maxVx.z = vertex0z; + parsedAABB = hlsl::shapes::AABB<3, hlsl::float32_t>::create(); + parsedAABB.minVx.x = localAABB.minX; + parsedAABB.minVx.y = localAABB.minY; + parsedAABB.minVx.z = localAABB.minZ; + parsedAABB.maxVx.x = localAABB.maxX; + parsedAABB.maxVx.y = localAABB.maxY; + parsedAABB.maxVx.z = localAABB.maxZ; + continue; } - - if (vertex0x < parsedAABB.minVx.x) parsedAABB.minVx.x = vertex0x; - if (vertex0y < parsedAABB.minVx.y) parsedAABB.minVx.y = vertex0y; - if (vertex0z < parsedAABB.minVx.z) parsedAABB.minVx.z = vertex0z; - if (vertex1x < parsedAABB.minVx.x) parsedAABB.minVx.x = vertex1x; - if (vertex1y < parsedAABB.minVx.y) parsedAABB.minVx.y = vertex1y; - if (vertex1z < parsedAABB.minVx.z) parsedAABB.minVx.z = vertex1z; - if (vertex2x < parsedAABB.minVx.x) parsedAABB.minVx.x = vertex2x; - if (vertex2y < parsedAABB.minVx.y) parsedAABB.minVx.y = vertex2y; - if (vertex2z < parsedAABB.minVx.z) parsedAABB.minVx.z = vertex2z; - - if (vertex0x > parsedAABB.maxVx.x) parsedAABB.maxVx.x = vertex0x; - if (vertex0y > parsedAABB.maxVx.y) parsedAABB.maxVx.y = vertex0y; - if (vertex0z > parsedAABB.maxVx.z) parsedAABB.maxVx.z = vertex0z; - if (vertex1x > parsedAABB.maxVx.x) parsedAABB.maxVx.x = vertex1x; - if (vertex1y > parsedAABB.maxVx.y) parsedAABB.maxVx.y = vertex1y; - if (vertex1z > parsedAABB.maxVx.z) parsedAABB.maxVx.z = vertex1z; - if (vertex2x > parsedAABB.maxVx.x) parsedAABB.maxVx.x = vertex2x; - if (vertex2y > parsedAABB.maxVx.y) parsedAABB.maxVx.y = vertex2y; - if (vertex2z > parsedAABB.maxVx.z) parsedAABB.maxVx.z = vertex2z; + if (localAABB.minX < parsedAABB.minVx.x) parsedAABB.minVx.x = localAABB.minX; + if (localAABB.minY < parsedAABB.minVx.y) parsedAABB.minVx.y = localAABB.minY; + if (localAABB.minZ < parsedAABB.minVx.z) parsedAABB.minVx.z = localAABB.minZ; + if (localAABB.maxX > parsedAABB.maxVx.x) parsedAABB.maxVx.x = localAABB.maxX; + if (localAABB.maxY > parsedAABB.maxVx.y) parsedAABB.maxVx.y = localAABB.maxY; + if (localAABB.maxZ > parsedAABB.maxVx.z) parsedAABB.maxVx.z = localAABB.maxZ; } parseMs = std::chrono::duration(clock_t::now() - parseStart).count(); @@ -631,7 +779,7 @@ SAssetBundle CSTLMeshFileLoader::loadAsset(system::IFile* _file, const IAssetLoa return {}; const auto hashStart = clock_t::now(); - CPolygonGeometryManipulator::recomputeContentHashes(geometry.get()); + stlRecomputeContentHashesParallel(geometry.get()); hashMs = std::chrono::duration(clock_t::now() - hashStart).count(); const auto aabbStart = clock_t::now(); @@ -713,21 +861,25 @@ bool CSTLMeshFileLoader::isALoadableFileFormat(system::IFile* _file, const syste if (!_file || _file->getSize() <= StlTextProbeBytes) return false; - char header[StlTextProbeBytes] = {}; - if (!stlReadExact(_file, header, 0ull, sizeof(header))) - return false; - - if (std::strncmp(header, "solid ", StlTextProbeBytes) == 0) - return true; + const size_t fileSize = _file->getSize(); + if (fileSize < StlBinaryPrefixBytes) + { + char header[StlTextProbeBytes] = {}; + if (!stlReadExact(_file, header, 0ull, sizeof(header))) + return false; + return std::strncmp(header, "solid ", StlTextProbeBytes) == 0; + } - if (_file->getSize() < StlBinaryPrefixBytes) + std::array prefix = {}; + if (!stlReadExact(_file, prefix.data(), 0ull, prefix.size())) return false; uint32_t triangleCount = 0u; - if (!stlReadExact(_file, &triangleCount, StlBinaryHeaderBytes, sizeof(triangleCount))) - return false; + std::memcpy(&triangleCount, prefix.data() + StlBinaryHeaderBytes, sizeof(triangleCount)); + if (std::memcmp(prefix.data(), "solid ", StlTextProbeBytes) == 0) + return true; - return _file->getSize() == (StlTriangleRecordBytes * triangleCount + StlBinaryPrefixBytes); + return fileSize == (StlTriangleRecordBytes * triangleCount + StlBinaryPrefixBytes); } } diff --git a/src/nbl/system/CSystemWin32.cpp b/src/nbl/system/CSystemWin32.cpp index 2798b4fb27..cd4ba4de1d 100644 --- a/src/nbl/system/CSystemWin32.cpp +++ b/src/nbl/system/CSystemWin32.cpp @@ -41,6 +41,7 @@ ISystem::SystemInfo CSystemWin32::getSystemInfo() const core::smart_refctd_ptr CSystemWin32::CCaller::createFile(const std::filesystem::path& filename, const core::bitflag flags) { + core::bitflag effectiveFlags = flags; const bool writeAccess = flags.value&IFile::ECF_WRITE; const DWORD fileAccess = ((flags.value&IFile::ECF_READ) ? FILE_GENERIC_READ:0)|(writeAccess ? FILE_GENERIC_WRITE:0); DWORD shareMode = FILE_SHARE_READ; @@ -73,38 +74,37 @@ core::smart_refctd_ptr CSystemWin32::CCaller::createFile(const std: For now it equals the size of a file so it'll work fine for archive reading, but if we try to write outside those boungs, things will go bad. */ - _fileMappingObj = CreateFileMappingA(_native,nullptr,writeAccess ? PAGE_READWRITE:PAGE_READONLY, 0, 0, filename.string().c_str()); + _fileMappingObj = CreateFileMappingA(_native,nullptr,writeAccess ? PAGE_READWRITE:PAGE_READONLY, 0, 0, nullptr); if (!_fileMappingObj) { - CloseHandle(_native); - return nullptr; + effectiveFlags.value &= ~(IFile::ECF_COHERENT | IFile::ECF_MAPPABLE); } - DWORD hi = 0; - size_t size = GetFileSize(_native,&hi); - size |= size_t(hi) << 32ull; - switch (flags.value&IFile::ECF_READ_WRITE) + else { - case IFile::ECF_READ: - _mappedPtr = MapViewOfFile(_fileMappingObj,FILE_MAP_READ,0,0,size); - break; - case IFile::ECF_WRITE: - _mappedPtr = MapViewOfFile(_fileMappingObj,FILE_MAP_WRITE,0,0,size); - break; - case IFile::ECF_READ_WRITE: - _mappedPtr = MapViewOfFile(_fileMappingObj,FILE_MAP_ALL_ACCESS,0,0,size); - break; - default: - assert(false); // should never happen - break; - } - if (!_mappedPtr) - { - CloseHandle(_native); - CloseHandle(_fileMappingObj); - return nullptr; - } + switch (flags.value&IFile::ECF_READ_WRITE) + { + case IFile::ECF_READ: + _mappedPtr = MapViewOfFile(_fileMappingObj,FILE_MAP_READ,0,0,0); + break; + case IFile::ECF_WRITE: + _mappedPtr = MapViewOfFile(_fileMappingObj,FILE_MAP_WRITE,0,0,0); + break; + case IFile::ECF_READ_WRITE: + _mappedPtr = MapViewOfFile(_fileMappingObj,FILE_MAP_ALL_ACCESS,0,0,0); + break; + default: + assert(false); // should never happen + break; + } + if (!_mappedPtr) + { + CloseHandle(_fileMappingObj); + _fileMappingObj = nullptr; + effectiveFlags.value &= ~(IFile::ECF_COHERENT | IFile::ECF_MAPPABLE); + } + } } - return core::make_smart_refctd_ptr(core::smart_refctd_ptr(m_system),path(filename),flags,_mappedPtr,_native,_fileMappingObj); + return core::make_smart_refctd_ptr(core::smart_refctd_ptr(m_system),path(filename),effectiveFlags,_mappedPtr,_native,_fileMappingObj); } bool isDebuggerAttached() From f6d8ae6dd088e634041ea61113963d536b1b13dd Mon Sep 17 00:00:00 2001 From: Arkadiusz Lachowicz Date: Tue, 10 Feb 2026 07:37:46 +0100 Subject: [PATCH 013/118] Optimize mesh loader baseline and hash fallback --- .../asset/interchange/COBJMeshFileLoader.cpp | 509 +++++++++++------- .../asset/interchange/CPLYMeshFileLoader.cpp | 182 +++++-- .../asset/interchange/CSTLMeshFileLoader.cpp | 203 +++++-- src/nbl/video/utilities/CAssetConverter.cpp | 9 +- 4 files changed, 628 insertions(+), 275 deletions(-) diff --git a/src/nbl/asset/interchange/COBJMeshFileLoader.cpp b/src/nbl/asset/interchange/COBJMeshFileLoader.cpp index d3b944f0c3..7a98146e7f 100644 --- a/src/nbl/asset/interchange/COBJMeshFileLoader.cpp +++ b/src/nbl/asset/interchange/COBJMeshFileLoader.cpp @@ -84,17 +84,6 @@ NBL_FORCE_INLINE bool parseObjFloat(const char*& ptr, const char* const end, flo if (start >= end) return false; - auto parseWithFallback = [&]() -> bool - { - const auto parseResult = fast_float::from_chars(start, end, out); - if (parseResult.ec == std::errc() && parseResult.ptr != start) - { - ptr = parseResult.ptr; - return true; - } - return false; - }; - const char* p = start; bool negative = false; if (*p == '-' || *p == '+') @@ -105,10 +94,16 @@ NBL_FORCE_INLINE bool parseObjFloat(const char*& ptr, const char* const end, flo return false; } - if (*p == '.') - return parseWithFallback(); - if (!isObjDigit(*p)) - return parseWithFallback(); + if (*p == '.' || !isObjDigit(*p)) + { + const auto parseResult = fast_float::from_chars(start, end, out); + if (parseResult.ec == std::errc() && parseResult.ptr != start) + { + ptr = parseResult.ptr; + return true; + } + return false; + } uint64_t integerPart = 0ull; while (p < end && isObjDigit(*p)) @@ -120,18 +115,79 @@ NBL_FORCE_INLINE bool parseObjFloat(const char*& ptr, const char* const end, flo double value = static_cast(integerPart); if (p < end && *p == '.') { + const char* const dot = p; + if ((dot + 7) <= end) + { + const char d0 = dot[1]; + const char d1 = dot[2]; + const char d2 = dot[3]; + const char d3 = dot[4]; + const char d4 = dot[5]; + const char d5 = dot[6]; + if ( + isObjDigit(d0) && isObjDigit(d1) && isObjDigit(d2) && + isObjDigit(d3) && isObjDigit(d4) && isObjDigit(d5) + ) + { + const bool hasNext = (dot + 7) < end; + const char next = hasNext ? dot[7] : '\0'; + if ((!hasNext || !isObjDigit(next)) && (!hasNext || (next != 'e' && next != 'E'))) + { + const uint32_t frac = + static_cast(d0 - '0') * 100000u + + static_cast(d1 - '0') * 10000u + + static_cast(d2 - '0') * 1000u + + static_cast(d3 - '0') * 100u + + static_cast(d4 - '0') * 10u + + static_cast(d5 - '0'); + value += static_cast(frac) * 1e-6; + p = dot + 7; + out = static_cast(negative ? -value : value); + ptr = p; + return true; + } + } + } + + static constexpr double InvPow10[] = { + 1.0, + 1e-1, 1e-2, 1e-3, 1e-4, 1e-5, + 1e-6, 1e-7, 1e-8, 1e-9, 1e-10, + 1e-11, 1e-12, 1e-13, 1e-14, 1e-15, + 1e-16, 1e-17, 1e-18 + }; ++p; - double scale = 0.1; + uint64_t fractionPart = 0ull; + uint32_t fractionDigits = 0u; while (p < end && isObjDigit(*p)) { - value += static_cast(*p - '0') * scale; - scale *= 0.1; + if (fractionDigits >= (std::size(InvPow10) - 1u)) + { + const auto parseResult = fast_float::from_chars(start, end, out); + if (parseResult.ec == std::errc() && parseResult.ptr != start) + { + ptr = parseResult.ptr; + return true; + } + return false; + } + fractionPart = fractionPart * 10ull + static_cast(*p - '0'); + ++fractionDigits; ++p; } + value += static_cast(fractionPart) * InvPow10[fractionDigits]; } if (p < end && (*p == 'e' || *p == 'E')) - return parseWithFallback(); + { + const auto parseResult = fast_float::from_chars(start, end, out); + if (parseResult.ec == std::errc() && parseResult.ptr != start) + { + ptr = parseResult.ptr; + return true; + } + return false; + } out = static_cast(negative ? -value : value); ptr = p; @@ -654,13 +710,56 @@ asset::SAssetBundle COBJMeshFileLoader::loadAsset(system::IFile* _file, const as positions.reserve(estimatedAttributeCount); normals.reserve(estimatedAttributeCount); uvs.reserve(estimatedAttributeCount); - outPositions.reserve(estimatedOutVertexCount); - outNormals.reserve(estimatedOutVertexCount); - outUVs.reserve(estimatedOutVertexCount); - if (estimatedOutIndexCount != std::numeric_limits::max()) - indices.reserve(estimatedOutIndexCount); + const size_t initialOutVertexCapacity = std::max(1ull, estimatedOutVertexCount); + const size_t initialOutIndexCapacity = (estimatedOutIndexCount == std::numeric_limits::max()) ? 3ull : std::max(3ull, estimatedOutIndexCount); + outPositions.resize(initialOutVertexCapacity); + outNormals.resize(initialOutVertexCapacity); + outUVs.resize(initialOutVertexCapacity); + indices.resize(initialOutIndexCapacity); dedupHeadByPos.reserve(estimatedAttributeCount); - dedupNodes.reserve(estimatedOutVertexCount); + dedupNodes.resize(initialOutVertexCapacity); + size_t outVertexWriteCount = 0ull; + size_t outIndexWriteCount = 0ull; + size_t dedupNodeCount = 0ull; + + auto allocateOutVertex = [&](uint32_t& outIx) -> bool + { + if (outVertexWriteCount >= outPositions.size()) + { + const size_t newCapacity = std::max(outVertexWriteCount + 1ull, outPositions.size() * 2ull); + outPositions.resize(newCapacity); + outNormals.resize(newCapacity); + outUVs.resize(newCapacity); + } + if (outVertexWriteCount > static_cast(std::numeric_limits::max())) + return false; + outIx = static_cast(outVertexWriteCount++); + return true; + }; + + auto appendIndex = [&](const uint32_t value) -> bool + { + if (outIndexWriteCount >= indices.size()) + { + const size_t newCapacity = std::max(outIndexWriteCount + 1ull, indices.size() * 2ull); + indices.resize(newCapacity); + } + indices[outIndexWriteCount++] = value; + return true; + }; + + auto allocateDedupNode = [&]() -> int32_t + { + if (dedupNodeCount >= dedupNodes.size()) + { + const size_t newCapacity = std::max(dedupNodeCount + 1ull, dedupNodes.size() * 2ull); + dedupNodes.resize(newCapacity); + } + if (dedupNodeCount > static_cast(std::numeric_limits::max())) + return -1; + const int32_t ix = static_cast(dedupNodeCount++); + return ix; + }; bool hasNormals = false; bool hasUVs = false; @@ -689,17 +788,20 @@ asset::SAssetBundle COBJMeshFileLoader::loadAsset(system::IFile* _file, const as nodeIx = node.next; } - outIx = static_cast(outPositions.size()); - ObjVertexDedupNode node = {}; + if (!allocateOutVertex(outIx)) + return false; + const int32_t newNodeIx = allocateDedupNode(); + if (newNodeIx < 0) + return false; + auto& node = dedupNodes[static_cast(newNodeIx)]; node.uv = idx[1]; node.normal = idx[2]; node.outIndex = outIx; node.next = dedupHeadByPos[posIx]; - dedupNodes.push_back(node); - dedupHeadByPos[posIx] = static_cast(dedupNodes.size() - 1ull); + dedupHeadByPos[posIx] = newNodeIx; const auto& srcPos = positions[idx[0]]; - outPositions.push_back(srcPos); + outPositions[static_cast(outIx)] = srcPos; extendAABB(parsedAABB, hasParsedAABB, srcPos); Float2 uv(0.f, 0.f); @@ -708,7 +810,7 @@ asset::SAssetBundle COBJMeshFileLoader::loadAsset(system::IFile* _file, const as uv = uvs[idx[1]]; hasUVs = true; } - outUVs.push_back(uv); + outUVs[static_cast(outIx)] = uv; Float3 normal(0.f, 0.f, 1.f); if (idx[2] >= 0 && static_cast(idx[2]) < normals.size()) @@ -716,7 +818,7 @@ asset::SAssetBundle COBJMeshFileLoader::loadAsset(system::IFile* _file, const as normal = normals[idx[2]]; hasNormals = true; } - outNormals.push_back(normal); + outNormals[static_cast(outIx)] = normal; return true; }; auto acquireCornerIndexPositiveTriplet = [&](const int32_t posIx, const int32_t uvIx, const int32_t normalIx, uint32_t& outIx)->bool @@ -733,212 +835,251 @@ asset::SAssetBundle COBJMeshFileLoader::loadAsset(system::IFile* _file, const as nodeIx = node.next; } - outIx = static_cast(outPositions.size()); - ObjVertexDedupNode node = {}; + if (!allocateOutVertex(outIx)) + return false; + const int32_t newNodeIx = allocateDedupNode(); + if (newNodeIx < 0) + return false; + auto& node = dedupNodes[static_cast(newNodeIx)]; node.uv = uvIx; node.normal = normalIx; node.outIndex = outIx; node.next = dedupHeadByPos[static_cast(posIx)]; - dedupNodes.push_back(node); - dedupHeadByPos[static_cast(posIx)] = static_cast(dedupNodes.size() - 1ull); + dedupHeadByPos[static_cast(posIx)] = newNodeIx; const auto& srcPos = positions[static_cast(posIx)]; - outPositions.push_back(srcPos); + outPositions[static_cast(outIx)] = srcPos; extendAABB(parsedAABB, hasParsedAABB, srcPos); - outUVs.push_back(uvs[static_cast(uvIx)]); - outNormals.push_back(normals[static_cast(normalIx)]); + outUVs[static_cast(outIx)] = uvs[static_cast(uvIx)]; + outNormals[static_cast(outIx)] = normals[static_cast(normalIx)]; hasUVs = true; hasNormals = true; return true; }; + + const bool trackStages = + _params.logger.get() != nullptr && + ((_params.logger.get()->getLogLevelMask() & system::ILogger::ELL_PERFORMANCE).value != 0u); const auto parseStart = clock_t::now(); while (bufPtr < bufEnd) { - const char* const lineStart = bufPtr; - const char* lineTerminator = lineStart; - while (lineTerminator < bufEnd && *lineTerminator != '\n' && *lineTerminator != '\r') - ++lineTerminator; + const char* const lineStart = bufPtr; + const char* lineTerminator = lineStart; + while (lineTerminator < bufEnd && *lineTerminator != '\n' && *lineTerminator != '\r') + ++lineTerminator; - const char* lineEnd = lineTerminator; + const char* lineEnd = lineTerminator; - if (lineStart < lineEnd) - { - if (*lineStart == 'v') + if (lineStart < lineEnd) { - if ((lineStart + 1) < lineEnd && lineStart[1] == ' ') + if (*lineStart == 'v') { - Float3 vec{}; - const char* ptr = lineStart + 2; - for (uint32_t i = 0u; i < 3u; ++i) + if ((lineStart + 1) < lineEnd && lineStart[1] == ' ') { - while (ptr < lineEnd && isObjInlineWhitespace(*ptr)) - ++ptr; - if (ptr >= lineEnd) - return {}; - if (!parseObjFloat(ptr, lineEnd, (&vec.x)[i])) - return {}; + const auto stageStart = trackStages ? clock_t::now() : clock_t::time_point{}; + Float3 vec{}; + const char* ptr = lineStart + 2; + for (uint32_t i = 0u; i < 3u; ++i) + { + while (ptr < lineEnd && isObjInlineWhitespace(*ptr)) + ++ptr; + if (ptr >= lineEnd) + return {}; + if (!parseObjFloat(ptr, lineEnd, (&vec.x)[i])) + return {}; + } + positions.push_back(vec); + dedupHeadByPos.push_back(-1); + if (trackStages) + parseVms += std::chrono::duration(clock_t::now() - stageStart).count(); } - positions.push_back(vec); - dedupHeadByPos.push_back(-1); - } - else if ((lineStart + 2) < lineEnd && lineStart[1] == 'n' && isObjInlineWhitespace(lineStart[2])) - { - Float3 vec{}; - const char* ptr = lineStart + 3; - for (uint32_t i = 0u; i < 3u; ++i) + else if ((lineStart + 2) < lineEnd && lineStart[1] == 'n' && isObjInlineWhitespace(lineStart[2])) { - while (ptr < lineEnd && isObjInlineWhitespace(*ptr)) - ++ptr; - if (ptr >= lineEnd) - return {}; - if (!parseObjFloat(ptr, lineEnd, (&vec.x)[i])) - return {}; + const auto stageStart = trackStages ? clock_t::now() : clock_t::time_point{}; + Float3 vec{}; + const char* ptr = lineStart + 3; + for (uint32_t i = 0u; i < 3u; ++i) + { + while (ptr < lineEnd && isObjInlineWhitespace(*ptr)) + ++ptr; + if (ptr >= lineEnd) + return {}; + if (!parseObjFloat(ptr, lineEnd, (&vec.x)[i])) + return {}; + } + normals.push_back(vec); + if (trackStages) + parseVNms += std::chrono::duration(clock_t::now() - stageStart).count(); } - normals.push_back(vec); - } - else if ((lineStart + 2) < lineEnd && lineStart[1] == 't' && isObjInlineWhitespace(lineStart[2])) - { - Float2 vec{}; - const char* ptr = lineStart + 3; - for (uint32_t i = 0u; i < 2u; ++i) + else if ((lineStart + 2) < lineEnd && lineStart[1] == 't' && isObjInlineWhitespace(lineStart[2])) { - while (ptr < lineEnd && isObjInlineWhitespace(*ptr)) - ++ptr; - if (ptr >= lineEnd) - return {}; - if (!parseObjFloat(ptr, lineEnd, (&vec.x)[i])) - return {}; + const auto stageStart = trackStages ? clock_t::now() : clock_t::time_point{}; + Float2 vec{}; + const char* ptr = lineStart + 3; + for (uint32_t i = 0u; i < 2u; ++i) + { + while (ptr < lineEnd && isObjInlineWhitespace(*ptr)) + ++ptr; + if (ptr >= lineEnd) + return {}; + if (!parseObjFloat(ptr, lineEnd, (&vec.x)[i])) + return {}; + } + vec.y = 1.f - vec.y; + uvs.push_back(vec); + if (trackStages) + parseVTms += std::chrono::duration(clock_t::now() - stageStart).count(); } - vec.y = 1.f - vec.y; - uvs.push_back(vec); } - } - else if (*lineStart == 'f' && (lineStart + 1) < lineEnd && isObjInlineWhitespace(lineStart[1])) - { - if (positions.empty()) - return {}; - ++faceCount; - const size_t posCount = positions.size(); - const size_t uvCount = uvs.size(); - const size_t normalCount = normals.size(); - const char* triLinePtr = lineStart + 1; - int32_t triIdx0[3] = { -1, -1, -1 }; - int32_t triIdx1[3] = { -1, -1, -1 }; - int32_t triIdx2[3] = { -1, -1, -1 }; - bool triangleFastPath = parseObjTrianglePositiveTripletLine(lineStart + 1, lineEnd, triIdx0, triIdx1, triIdx2, posCount, uvCount, normalCount); - bool parsedFirstThree = triangleFastPath; - if (!triangleFastPath) + else if (*lineStart == 'f' && (lineStart + 1) < lineEnd && isObjInlineWhitespace(lineStart[1])) { - triLinePtr = lineStart + 1; - parsedFirstThree = - parseObjFaceVertexTokenFast(triLinePtr, lineEnd, triIdx0, posCount, uvCount, normalCount) && - parseObjFaceVertexTokenFast(triLinePtr, lineEnd, triIdx1, posCount, uvCount, normalCount) && - parseObjFaceVertexTokenFast(triLinePtr, lineEnd, triIdx2, posCount, uvCount, normalCount); - triangleFastPath = parsedFirstThree; - if (parsedFirstThree) + const auto faceStart = trackStages ? clock_t::now() : clock_t::time_point{}; + if (positions.empty()) + return {}; + ++faceCount; + const size_t posCount = positions.size(); + const size_t uvCount = uvs.size(); + const size_t normalCount = normals.size(); + const char* triLinePtr = lineStart + 1; + int32_t triIdx0[3] = { -1, -1, -1 }; + int32_t triIdx1[3] = { -1, -1, -1 }; + int32_t triIdx2[3] = { -1, -1, -1 }; + bool triangleFastPath = parseObjTrianglePositiveTripletLine(lineStart + 1, lineEnd, triIdx0, triIdx1, triIdx2, posCount, uvCount, normalCount); + bool parsedFirstThree = triangleFastPath; + if (!triangleFastPath) { - while (triLinePtr < lineEnd && isObjInlineWhitespace(*triLinePtr)) - ++triLinePtr; - triangleFastPath = (triLinePtr == lineEnd); + triLinePtr = lineStart + 1; + parsedFirstThree = + parseObjFaceVertexTokenFast(triLinePtr, lineEnd, triIdx0, posCount, uvCount, normalCount) && + parseObjFaceVertexTokenFast(triLinePtr, lineEnd, triIdx1, posCount, uvCount, normalCount) && + parseObjFaceVertexTokenFast(triLinePtr, lineEnd, triIdx2, posCount, uvCount, normalCount); + triangleFastPath = parsedFirstThree; + if (parsedFirstThree) + { + while (triLinePtr < lineEnd && isObjInlineWhitespace(*triLinePtr)) + ++triLinePtr; + triangleFastPath = (triLinePtr == lineEnd); + } } - } - if (triangleFastPath) - { - uint32_t c0 = 0u; - uint32_t c1 = 0u; - uint32_t c2 = 0u; - if (!acquireCornerIndexPositiveTriplet(triIdx0[0], triIdx0[1], triIdx0[2], c0)) - return {}; - if (!acquireCornerIndexPositiveTriplet(triIdx1[0], triIdx1[1], triIdx1[2], c1)) - return {}; - if (!acquireCornerIndexPositiveTriplet(triIdx2[0], triIdx2[1], triIdx2[2], c2)) - return {}; - faceFastTokenCount += 3u; - indices.push_back(c2); - indices.push_back(c1); - indices.push_back(c0); - } - else - { - const char* linePtr = lineStart + 1; - uint32_t firstCorner = 0u; - uint32_t previousCorner = 0u; - uint32_t cornerCount = 0u; - - if (parsedFirstThree) + if (triangleFastPath) { + const auto dedupStart = trackStages ? clock_t::now() : clock_t::time_point{}; uint32_t c0 = 0u; uint32_t c1 = 0u; uint32_t c2 = 0u; - if (!acquireCornerIndex(triIdx0, c0)) + if (!acquireCornerIndexPositiveTriplet(triIdx0[0], triIdx0[1], triIdx0[2], c0)) return {}; - if (!acquireCornerIndex(triIdx1, c1)) + if (!acquireCornerIndexPositiveTriplet(triIdx1[0], triIdx1[1], triIdx1[2], c1)) return {}; - if (!acquireCornerIndex(triIdx2, c2)) + if (!acquireCornerIndexPositiveTriplet(triIdx2[0], triIdx2[1], triIdx2[2], c2)) return {}; + if (trackStages) + dedupMs += std::chrono::duration(clock_t::now() - dedupStart).count(); faceFastTokenCount += 3u; - indices.push_back(c2); - indices.push_back(c1); - indices.push_back(c0); - firstCorner = c0; - previousCorner = c2; - cornerCount = 3u; - linePtr = triLinePtr; + const auto emitStart = trackStages ? clock_t::now() : clock_t::time_point{}; + if (!appendIndex(c2) || !appendIndex(c1) || !appendIndex(c0)) + return {}; + if (trackStages) + emitMs += std::chrono::duration(clock_t::now() - emitStart).count(); } - - while (linePtr < lineEnd) + else { - while (linePtr < lineEnd && isObjInlineWhitespace(*linePtr)) - ++linePtr; - if (linePtr >= lineEnd) - break; + const char* linePtr = lineStart + 1; + uint32_t firstCorner = 0u; + uint32_t previousCorner = 0u; + uint32_t cornerCount = 0u; - int32_t idx[3] = { -1, -1, -1 }; - if (!parseObjFaceVertexTokenFast(linePtr, lineEnd, idx, posCount, uvCount, normalCount)) - return {}; - ++faceFastTokenCount; - - uint32_t cornerIx = 0u; - if (!acquireCornerIndex(idx, cornerIx)) - return {}; - - if (cornerCount == 0u) + if (parsedFirstThree) { - firstCorner = cornerIx; - ++cornerCount; - continue; + const auto dedupStart = trackStages ? clock_t::now() : clock_t::time_point{}; + uint32_t c0 = 0u; + uint32_t c1 = 0u; + uint32_t c2 = 0u; + if (!acquireCornerIndex(triIdx0, c0)) + return {}; + if (!acquireCornerIndex(triIdx1, c1)) + return {}; + if (!acquireCornerIndex(triIdx2, c2)) + return {}; + if (trackStages) + dedupMs += std::chrono::duration(clock_t::now() - dedupStart).count(); + faceFastTokenCount += 3u; + const auto emitStart = trackStages ? clock_t::now() : clock_t::time_point{}; + if (!appendIndex(c2) || !appendIndex(c1) || !appendIndex(c0)) + return {}; + if (trackStages) + emitMs += std::chrono::duration(clock_t::now() - emitStart).count(); + firstCorner = c0; + previousCorner = c2; + cornerCount = 3u; + linePtr = triLinePtr; } - if (cornerCount == 1u) + while (linePtr < lineEnd) { + while (linePtr < lineEnd && isObjInlineWhitespace(*linePtr)) + ++linePtr; + if (linePtr >= lineEnd) + break; + + int32_t idx[3] = { -1, -1, -1 }; + if (!parseObjFaceVertexTokenFast(linePtr, lineEnd, idx, posCount, uvCount, normalCount)) + return {}; + ++faceFastTokenCount; + + const auto dedupStart = trackStages ? clock_t::now() : clock_t::time_point{}; + uint32_t cornerIx = 0u; + if (!acquireCornerIndex(idx, cornerIx)) + return {}; + if (trackStages) + dedupMs += std::chrono::duration(clock_t::now() - dedupStart).count(); + + if (cornerCount == 0u) + { + firstCorner = cornerIx; + ++cornerCount; + continue; + } + + if (cornerCount == 1u) + { + previousCorner = cornerIx; + ++cornerCount; + continue; + } + + const auto emitStart = trackStages ? clock_t::now() : clock_t::time_point{}; + if (!appendIndex(cornerIx) || !appendIndex(previousCorner) || !appendIndex(firstCorner)) + return {}; + if (trackStages) + emitMs += std::chrono::duration(clock_t::now() - emitStart).count(); previousCorner = cornerIx; ++cornerCount; - continue; } - - indices.push_back(cornerIx); - indices.push_back(previousCorner); - indices.push_back(firstCorner); - previousCorner = cornerIx; - ++cornerCount; } + if (trackStages) + parseFaceMs += std::chrono::duration(clock_t::now() - faceStart).count(); } } - } - if (lineTerminator >= bufEnd) - bufPtr = bufEnd; - else if (*lineTerminator == '\r' && (lineTerminator + 1) < bufEnd && lineTerminator[1] == '\n') - bufPtr = lineTerminator + 2; - else - bufPtr = lineTerminator + 1; + if (lineTerminator >= bufEnd) + bufPtr = bufEnd; + else if (*lineTerminator == '\r' && (lineTerminator + 1) < bufEnd && lineTerminator[1] == '\n') + bufPtr = lineTerminator + 2; + else + bufPtr = lineTerminator + 1; } parseMs = std::chrono::duration(clock_t::now() - parseStart).count(); const double parseScanMs = std::max(0.0, parseMs - (parseVms + parseVNms + parseVTms + parseFaceMs + dedupMs + emitMs)); - if (outPositions.empty()) + if (outVertexWriteCount == 0ull) return {}; + outPositions.resize(outVertexWriteCount); + outNormals.resize(outVertexWriteCount); + outUVs.resize(outVertexWriteCount); + indices.resize(outIndexWriteCount); + const size_t outVertexCount = outPositions.size(); const size_t outIndexCount = indices.size(); const auto buildStart = clock_t::now(); @@ -993,9 +1134,7 @@ asset::SAssetBundle COBJMeshFileLoader::loadAsset(system::IFile* _file, const as } buildMs = std::chrono::duration(clock_t::now() - buildStart).count(); - const auto hashStart = clock_t::now(); - CPolygonGeometryManipulator::recomputeContentHashes(geometry.get()); - hashMs = std::chrono::duration(clock_t::now() - hashStart).count(); + hashMs = 0.0; const auto aabbStart = clock_t::now(); if (hasParsedAABB) diff --git a/src/nbl/asset/interchange/CPLYMeshFileLoader.cpp b/src/nbl/asset/interchange/CPLYMeshFileLoader.cpp index e85e243b3e..1ba1b30457 100644 --- a/src/nbl/asset/interchange/CPLYMeshFileLoader.cpp +++ b/src/nbl/asset/interchange/CPLYMeshFileLoader.cpp @@ -73,6 +73,84 @@ T byteswap(const T& v) return retval; } +template +void plyRunParallelWorkers(const size_t workerCount, Fn&& fn) +{ + if (workerCount <= 1ull) + { + fn(0ull); + return; + } + std::vector workers; + workers.reserve(workerCount - 1ull); + for (size_t workerIx = 1ull; workerIx < workerCount; ++workerIx) + { + workers.emplace_back([&fn, workerIx]() + { + fn(workerIx); + }); + } + fn(0ull); + for (auto& worker : workers) + worker.join(); +} + +class CPLYMappedFileMemoryResource final : public core::refctd_memory_resource +{ + public: + explicit CPLYMappedFileMemoryResource(core::smart_refctd_ptr&& file) : m_file(std::move(file)) + { + } + + inline void* allocate(std::size_t bytes, std::size_t alignment) override + { + (void)bytes; + (void)alignment; + assert(false); + return nullptr; + } + + inline void deallocate(void* p, std::size_t bytes, std::size_t alignment) override + { + (void)p; + (void)bytes; + (void)alignment; + } + + private: + core::smart_refctd_ptr m_file; +}; + +IGeometry::SDataView plyCreateMappedF32x3View(system::IFile* file, void* ptr, const size_t byteCount) +{ + if (!file || !ptr || byteCount == 0ull) + return {}; + + auto keepAliveResource = core::make_smart_refctd_ptr(core::smart_refctd_ptr(file)); + auto buffer = ICPUBuffer::create({ + { byteCount }, + ptr, + core::smart_refctd_ptr(keepAliveResource), + alignof(float) + }, core::adopt_memory); + if (!buffer) + return {}; + + IGeometry::SDataView view = { + .composed = { + .stride = sizeof(float) * 3ull, + .format = EF_R32G32B32_SFLOAT, + .rangeFormat = IGeometryBase::getMatchingAABBFormat(EF_R32G32B32_SFLOAT) + }, + .src = { + .offset = 0ull, + .size = byteCount, + .buffer = std::move(buffer) + } + }; + return view; +} + IGeometry::SDataView plyCreateAdoptedU32IndexView(core::vector&& indices) { if (indices.empty()) @@ -172,25 +250,17 @@ void plyRecomputeContentHashesParallel(ICPUPolygonGeometry* geometry) return; } - std::vector workers; - workers.reserve(workerCount > 0ull ? (workerCount - 1ull) : 0ull); - auto hashRange = [&buffers](const size_t beginIx, const size_t endIx) -> void + auto hashWorker = [&buffers, workerCount](const size_t workerIx) -> void { + const size_t beginIx = (buffers.size() * workerIx) / workerCount; + const size_t endIx = (buffers.size() * (workerIx + 1ull)) / workerCount; for (size_t i = beginIx; i < endIx; ++i) { auto& buffer = buffers[i]; buffer->setContentHash(buffer->computeContentHash()); } }; - for (size_t workerIx = 1ull; workerIx < workerCount; ++workerIx) - { - const size_t begin = (buffers.size() * workerIx) / workerCount; - const size_t end = (buffers.size() * (workerIx + 1ull)) / workerCount; - workers.emplace_back(hashRange, begin, end); - } - hashRange(0ull, buffers.size() / workerCount); - for (auto& worker : workers) - worker.join(); + plyRunParallelWorkers(workerCount, hashWorker); } struct SContext @@ -282,6 +352,7 @@ struct SContext Buffer.resize(ioReadWindowSize + ReadWindowPaddingBytes, '\0'); EndPointer = StartPointer = Buffer.data(); LineEndPointer = EndPointer-1; + UsingMappedBinaryWindow = false; fillBuffer(); } @@ -426,6 +497,7 @@ struct SContext LineEndPointer = StartPointer - 1; WordLength = -1; EndOfFile = true; + UsingMappedBinaryWindow = true; fileOffset = inner.mainFile ? inner.mainFile->getSize() : fileOffset; } // skips x bytes in the file, getting more data if required @@ -1055,17 +1127,16 @@ struct SContext if (is32Bit) { const size_t hw = std::thread::hardware_concurrency(); - const size_t maxWorkersByWork = std::max(1ull, minBytesNeeded / (1ull << 20)); + const size_t maxWorkersByWork = std::max(1ull, minBytesNeeded / (512ull << 10)); size_t workerCount = hw ? std::min(hw, maxWorkersByWork) : 1ull; if (workerCount > 1ull) { const size_t recordBytes = sizeof(uint8_t) + 3ull * sizeof(uint32_t); - const bool needMax = hasVertexCount || trackMaxIndex; + const bool needMax = true; + const bool validateAgainstVertexCount = hasVertexCount; std::vector workerNonTriangle(workerCount, 0u); std::vector workerInvalid(workerCount, 0u); std::vector workerMax(needMax ? workerCount : 0ull, 0u); - std::vector workers; - workers.reserve(workerCount > 0ull ? (workerCount - 1ull) : 0ull); auto parseChunk = [&](const size_t workerIx, const size_t beginFace, const size_t endFace) -> void { const uint8_t* in = ptr + beginFace * recordBytes; @@ -1086,31 +1157,25 @@ struct SContext const uint32_t i2 = outLocal[2]; if (isSrcS32) localSignBits |= (i0 | i1 | i2); - if (needMax) - { - if (i0 > localMax) localMax = i0; - if (i1 > localMax) localMax = i1; - if (i2 > localMax) localMax = i2; - } + if (i0 > localMax) localMax = i0; + if (i1 > localMax) localMax = i1; + if (i2 > localMax) localMax = i2; in += 3ull * sizeof(uint32_t); outLocal += 3ull; } if (isSrcS32 && (localSignBits & 0x80000000u)) workerInvalid[workerIx] = 1u; - if (hasVertexCount && needMax && localMax >= vertexCount) + if (validateAgainstVertexCount && localMax >= vertexCount) workerInvalid[workerIx] = 1u; if (needMax) workerMax[workerIx] = localMax; }; - for (size_t workerIx = 1ull; workerIx < workerCount; ++workerIx) + plyRunParallelWorkers(workerCount, [&](const size_t workerIx) { const size_t begin = (element.Count * workerIx) / workerCount; const size_t end = (element.Count * (workerIx + 1ull)) / workerCount; - workers.emplace_back(parseChunk, workerIx, begin, end); - } - parseChunk(0ull, 0ull, element.Count / workerCount); - for (auto& worker : workers) - worker.join(); + parseChunk(workerIx, begin, end); + }); const bool anyNonTriangle = std::any_of(workerNonTriangle.begin(), workerNonTriangle.end(), [](const uint8_t v) { return v != 0u; }); if (anyNonTriangle) @@ -1163,6 +1228,7 @@ struct SContext } else { + uint32_t localMax = 0u; for (size_t j = 0u; j < element.Count; ++j) { const uint8_t c = *ptr++; @@ -1173,10 +1239,13 @@ struct SContext } std::memcpy(out, ptr, 3ull * sizeof(uint32_t)); ptr += 3ull * sizeof(uint32_t); - if (out[0] >= vertexCount || out[1] >= vertexCount || out[2] >= vertexCount) - return EFastFaceReadResult::Error; + if (out[0] > localMax) localMax = out[0]; + if (out[1] > localMax) localMax = out[1]; + if (out[2] > localMax) localMax = out[2]; out += 3; } + if (!fallbackToGeneric && localMax >= vertexCount) + return EFastFaceReadResult::Error; } } else if (trackMaxIndex) @@ -1201,6 +1270,8 @@ struct SContext } else { + uint32_t localMax = 0u; + uint32_t localSignBits = 0u; for (size_t j = 0u; j < element.Count; ++j) { const uint8_t c = *ptr++; @@ -1211,11 +1282,18 @@ struct SContext } std::memcpy(out, ptr, 3ull * sizeof(uint32_t)); ptr += 3ull * sizeof(uint32_t); - if ((out[0] | out[1] | out[2]) & 0x80000000u) + localSignBits |= (out[0] | out[1] | out[2]); + if (out[0] > localMax) localMax = out[0]; + if (out[1] > localMax) localMax = out[1]; + if (out[2] > localMax) localMax = out[2]; + out += 3; + } + if (!fallbackToGeneric) + { + if (localSignBits & 0x80000000u) return EFastFaceReadResult::Error; - if (out[0] >= vertexCount || out[1] >= vertexCount || out[2] >= vertexCount) + if (localMax >= vertexCount) return EFastFaceReadResult::Error; - out += 3; } } } @@ -1461,6 +1539,7 @@ struct SContext int32_t LineLength = 0; int32_t WordLength = -1; // this variable is a misnomer, its really the offset to next word minus one bool IsBinaryFile = false, IsWrongEndian = false, EndOfFile = false; + bool UsingMappedBinaryWindow = false; size_t fileOffset = {}; uint64_t readCallCount = 0ull; uint64_t readBytesTotal = 0ull; @@ -1708,6 +1787,37 @@ SAssetBundle CPLYMeshFileLoader::loadAsset(system::IFile* _file, const IAssetLoa _params.logger.log("Multiple `vertex` elements not supported!", system::ILogger::ELL_ERROR); return {}; } + const bool mappedXYZAliasCandidate = + ctx.IsBinaryFile && + (!ctx.IsWrongEndian) && + ctx.UsingMappedBinaryWindow && + el.Properties.size() == 3u && + el.Properties[0].type == EF_R32_SFLOAT && + el.Properties[1].type == EF_R32_SFLOAT && + el.Properties[2].type == EF_R32_SFLOAT && + el.Properties[0].Name == "x" && + el.Properties[1].Name == "y" && + el.Properties[2].Name == "z"; + if (mappedXYZAliasCandidate) + { + if (el.Count > (std::numeric_limits::max() / (sizeof(float) * 3ull))) + return {}; + const size_t mappedBytes = el.Count * sizeof(float) * 3ull; + if (ctx.StartPointer + mappedBytes > ctx.EndPointer) + return {}; + const auto vertexStart = clock_t::now(); + auto mappedPosView = plyCreateMappedF32x3View(_file, ctx.StartPointer, mappedBytes); + if (!mappedPosView) + return {}; + geometry->setPositionView(std::move(mappedPosView)); + ctx.StartPointer += mappedBytes; + ++fastVertexElementCount; + const double elapsedMs = std::chrono::duration(clock_t::now() - vertexStart).count(); + vertexFastMs += elapsedMs; + vertexMs += elapsedMs; + verticesProcessed = true; + continue; + } ICPUPolygonGeometry::SDataViewBase posView = {}, normalView = {}, uvView = {}; core::vector extraViews; const auto layoutStart = clock_t::now(); @@ -2046,9 +2156,7 @@ SAssetBundle CPLYMeshFileLoader::loadAsset(system::IFile* _file, const IAssetLoa } indexBuildMs = std::chrono::duration(clock_t::now() - indexStart).count(); - const auto hashStart = clock_t::now(); - plyRecomputeContentHashesParallel(geometry.get()); - hashRangeMs = std::chrono::duration(clock_t::now() - hashStart).count(); + hashRangeMs = 0.0; const auto totalMs = std::chrono::duration(clock_t::now() - totalStart).count(); const double stageRemainderMs = std::max(0.0, totalMs - (headerMs + vertexMs + faceMs + skipMs + layoutNegotiateMs + viewCreateMs + hashRangeMs + indexBuildMs + aabbMs)); diff --git a/src/nbl/asset/interchange/CSTLMeshFileLoader.cpp b/src/nbl/asset/interchange/CSTLMeshFileLoader.cpp index 900c905c64..f2fdc9e7c4 100644 --- a/src/nbl/asset/interchange/CSTLMeshFileLoader.cpp +++ b/src/nbl/asset/interchange/CSTLMeshFileLoader.cpp @@ -19,8 +19,10 @@ #include #include #include +#include #include #include +#include #include #include #include @@ -71,6 +73,22 @@ constexpr size_t StlTriangleRecordBytes = StlTriangleFloatBytes + StlTriangleAtt constexpr size_t StlVerticesPerTriangle = 3ull; constexpr size_t StlFloatChannelsPerVertex = 3ull; +template +void stlRunParallelWorkers(const size_t workerCount, Fn&& fn) +{ + if (workerCount <= 1ull) + { + fn(0ull); + return; + } + core::vector workerIds(workerCount); + std::iota(workerIds.begin(), workerIds.end(), 0ull); + std::for_each(std::execution::par, workerIds.begin(), workerIds.end(), [&fn](const size_t workerIx) + { + fn(workerIx); + }); +} + bool stlReadExact(system::IFile* file, void* dst, const size_t offset, const size_t bytes, SFileReadTelemetry* ioTelemetry = nullptr) { if (!file || (!dst && bytes != 0ull)) @@ -199,6 +217,48 @@ void stlPushTriangleReversed(const hlsl::float32_t3 (&p)[3], core::vector&& upstream, + void* block, + const size_t blockBytes, + const size_t alignment + ) : m_upstream(std::move(upstream)), m_block(block), m_blockBytes(blockBytes), m_alignment(alignment) + { + } + + inline void* allocate(std::size_t bytes, std::size_t alignment) override + { + assert(false); + return nullptr; + } + + inline void deallocate(void* p, std::size_t bytes, std::size_t alignment) override + { + (void)alignment; + const auto* const begin = reinterpret_cast(m_block); + const auto* const end = begin + m_blockBytes; + const auto* const ptr = reinterpret_cast(p); + assert(ptr >= begin && ptr <= end); + assert(ptr + bytes <= end); + } + + protected: + inline ~CStlSplitBlockMemoryResource() override + { + if (m_upstream && m_block) + m_upstream->deallocate(m_block, m_blockBytes, m_alignment); + } + + private: + core::smart_refctd_ptr m_upstream; + void* m_block = nullptr; + size_t m_blockBytes = 0ull; + size_t m_alignment = 1ull; +}; + void stlExtendAABB(hlsl::shapes::AABB<3, hlsl::float32_t>& aabb, bool& hasAABB, const hlsl::float32_t3& p) { if (!hasAABB) @@ -289,25 +349,17 @@ void stlRecomputeContentHashesParallel(ICPUPolygonGeometry* geometry) return; } - std::vector workers; - workers.reserve(workerCount > 0ull ? (workerCount - 1ull) : 0ull); - auto hashRange = [&buffers](const size_t beginIx, const size_t endIx) -> void + auto hashWorker = [&buffers, workerCount](const size_t workerIx) -> void { + const size_t beginIx = (buffers.size() * workerIx) / workerCount; + const size_t endIx = (buffers.size() * (workerIx + 1ull)) / workerCount; for (size_t i = beginIx; i < endIx; ++i) { auto& buffer = buffers[i]; buffer->setContentHash(buffer->computeContentHash()); } }; - for (size_t workerIx = 1ull; workerIx < workerCount; ++workerIx) - { - const size_t begin = (buffers.size() * workerIx) / workerCount; - const size_t end = (buffers.size() * (workerIx + 1ull)) / workerCount; - workers.emplace_back(hashRange, begin, end); - } - hashRange(0ull, buffers.size() / workerCount); - for (auto& worker : workers) - worker.join(); + stlRunParallelWorkers(workerCount, hashWorker); } CSTLMeshFileLoader::CSTLMeshFileLoader(asset::IAssetManager* _assetManager) @@ -492,16 +544,64 @@ SAssetBundle CSTLMeshFileLoader::loadAsset(system::IFile* _file, const IAssetLoa vertexCount = triangleCount * StlVerticesPerTriangle; const auto buildPrepStart = clock_t::now(); - auto posView = createView(EF_R32G32B32_SFLOAT, static_cast(vertexCount)); - auto normalView = createView(EF_R32G32B32_SFLOAT, static_cast(vertexCount)); - if (!posView || !normalView) + const size_t vertexCountSizeT = static_cast(vertexCount); + if (vertexCountSizeT > (std::numeric_limits::max() / sizeof(hlsl::float32_t3))) + return {}; + const size_t viewByteSize = vertexCountSizeT * sizeof(hlsl::float32_t3); + if (viewByteSize > (std::numeric_limits::max() - viewByteSize)) return {}; - auto* posOut = reinterpret_cast(posView.getPointer()); - auto* normalOut = reinterpret_cast(normalView.getPointer()); - if (!posOut || !normalOut) + const size_t blockBytes = viewByteSize * 2ull; + auto upstream = core::getDefaultMemoryResource(); + if (!upstream) + return {}; + void* block = upstream->allocate(blockBytes, alignof(float)); + if (!block) + return {}; + auto blockResource = core::make_smart_refctd_ptr( + core::smart_refctd_ptr(std::move(upstream)), + block, + blockBytes, + alignof(float)); + auto posBuffer = ICPUBuffer::create({ + { viewByteSize }, + block, + core::smart_refctd_ptr(blockResource), + alignof(float) + }, core::adopt_memory); + auto normalBuffer = ICPUBuffer::create({ + { viewByteSize }, + reinterpret_cast(block) + viewByteSize, + core::smart_refctd_ptr(blockResource), + alignof(float) + }, core::adopt_memory); + if (!posBuffer || !normalBuffer) + return {}; + ICPUPolygonGeometry::SDataView posView = {}; + posView.composed = { + .stride = sizeof(hlsl::float32_t3), + .format = EF_R32G32B32_SFLOAT, + .rangeFormat = IGeometryBase::getMatchingAABBFormat(EF_R32G32B32_SFLOAT) + }; + posView.src = { + .offset = 0ull, + .size = viewByteSize, + .buffer = std::move(posBuffer) + }; + ICPUPolygonGeometry::SDataView normalView = {}; + normalView.composed = { + .stride = sizeof(hlsl::float32_t3), + .format = EF_R32G32B32_SFLOAT, + .rangeFormat = IGeometryBase::getMatchingAABBFormat(EF_R32G32B32_SFLOAT) + }; + normalView.src = { + .offset = 0ull, + .size = viewByteSize, + .buffer = std::move(normalBuffer) + }; + auto* posOutFloat = reinterpret_cast(posView.getPointer()); + auto* normalOutFloat = reinterpret_cast(normalView.getPointer()); + if (!posOutFloat || !normalOutFloat) return {}; - auto* posOutFloat = reinterpret_cast(posOut); - auto* normalOutFloat = reinterpret_cast(normalOut); const double buildPrepMs = std::chrono::duration(clock_t::now() - buildPrepStart).count(); buildAllocViewsMs += buildPrepMs; buildMs += buildPrepMs; @@ -523,7 +623,7 @@ SAssetBundle CSTLMeshFileLoader::loadAsset(system::IFile* _file, const IAssetLoa }; const size_t hw = std::thread::hardware_concurrency(); - const size_t maxWorkersByWork = std::max(1ull, static_cast(triangleCount / 16384ull)); + const size_t maxWorkersByWork = std::max(1ull, static_cast(triangleCount / 6144ull)); const size_t workerCount = hw ? std::max(1ull, std::min(hw, maxWorkersByWork)) : 1ull; std::vector threadAABBs(workerCount); auto parseRange = [&](const size_t workerIx, const uint64_t beginTri, const uint64_t endTri) -> void @@ -537,26 +637,32 @@ SAssetBundle CSTLMeshFileLoader::loadAsset(system::IFile* _file, const IAssetLoa const uint8_t* const triRecord = localCursor; localCursor += StlTriangleRecordBytes; - float normalX = 0.f; - float normalY = 0.f; - float normalZ = 0.f; - std::memcpy(&normalX, triRecord + 0u * sizeof(float), sizeof(float)); - std::memcpy(&normalY, triRecord + 1u * sizeof(float), sizeof(float)); - std::memcpy(&normalZ, triRecord + 2u * sizeof(float), sizeof(float)); - - std::memcpy(posCursor + 0ull, triRecord + 9ull * sizeof(float), 3ull * sizeof(float)); - std::memcpy(posCursor + 3ull, triRecord + 6ull * sizeof(float), 3ull * sizeof(float)); - std::memcpy(posCursor + 6ull, triRecord + 3ull * sizeof(float), 3ull * sizeof(float)); - - const float vertex0x = posCursor[0ull]; - const float vertex0y = posCursor[1ull]; - const float vertex0z = posCursor[2ull]; - const float vertex1x = posCursor[3ull]; - const float vertex1y = posCursor[4ull]; - const float vertex1z = posCursor[5ull]; - const float vertex2x = posCursor[6ull]; - const float vertex2y = posCursor[7ull]; - const float vertex2z = posCursor[8ull]; + float triValues[StlTriangleFloatCount]; + std::memcpy(triValues, triRecord, sizeof(triValues)); + + float normalX = triValues[0ull]; + float normalY = triValues[1ull]; + float normalZ = triValues[2ull]; + + const float vertex0x = triValues[9ull]; + const float vertex0y = triValues[10ull]; + const float vertex0z = triValues[11ull]; + const float vertex1x = triValues[6ull]; + const float vertex1y = triValues[7ull]; + const float vertex1z = triValues[8ull]; + const float vertex2x = triValues[3ull]; + const float vertex2y = triValues[4ull]; + const float vertex2z = triValues[5ull]; + + posCursor[0ull] = vertex0x; + posCursor[1ull] = vertex0y; + posCursor[2ull] = vertex0z; + posCursor[3ull] = vertex1x; + posCursor[4ull] = vertex1y; + posCursor[5ull] = vertex1z; + posCursor[6ull] = vertex2x; + posCursor[7ull] = vertex2y; + posCursor[8ull] = vertex2z; if (!localAABB.has) { localAABB.minX = vertex0x; localAABB.maxX = vertex0x; @@ -634,17 +740,12 @@ SAssetBundle CSTLMeshFileLoader::loadAsset(system::IFile* _file, const IAssetLoa if (workerCount > 1ull) { - std::vector workers; - workers.reserve(workerCount > 0ull ? (workerCount - 1ull) : 0ull); - for (size_t workerIx = 1ull; workerIx < workerCount; ++workerIx) + stlRunParallelWorkers(workerCount, [&](const size_t workerIx) { const uint64_t begin = (triangleCount * workerIx) / workerCount; const uint64_t endTri = (triangleCount * (workerIx + 1ull)) / workerCount; - workers.emplace_back(parseRange, workerIx, begin, endTri); - } - parseRange(0ull, 0ull, triangleCount / workerCount); - for (auto& worker : workers) - worker.join(); + parseRange(workerIx, begin, endTri); + }); } else { @@ -778,9 +879,7 @@ SAssetBundle CSTLMeshFileLoader::loadAsset(system::IFile* _file, const IAssetLoa if (vertexCount == 0ull) return {}; - const auto hashStart = clock_t::now(); - stlRecomputeContentHashesParallel(geometry.get()); - hashMs = std::chrono::duration(clock_t::now() - hashStart).count(); + hashMs = 0.0; const auto aabbStart = clock_t::now(); if (hasParsedAABB) diff --git a/src/nbl/video/utilities/CAssetConverter.cpp b/src/nbl/video/utilities/CAssetConverter.cpp index d7f2d7dbbc..077466ea06 100644 --- a/src/nbl/video/utilities/CAssetConverter.cpp +++ b/src/nbl/video/utilities/CAssetConverter.cpp @@ -1183,7 +1183,14 @@ bool CAssetConverter::CHashCache::hash_impl::operator()(lookup_t loo auto patchedParams = lookup.asset->getCreationParams(); assert(lookup.patch->usage.hasFlags(patchedParams.usage)); patchedParams.usage = lookup.patch->usage; - hasher.update(&patchedParams,sizeof(patchedParams)) << lookup.asset->getContentHash(); + auto contentHash = lookup.asset->getContentHash(); + if (contentHash==NoContentHash) + { + contentHash = lookup.asset->computeContentHash(); + if (auto* mutableAsset = const_cast(lookup.asset); mutableAsset && mutableAsset->isMutable()) + mutableAsset->setContentHash(contentHash); + } + hasher.update(&patchedParams,sizeof(patchedParams)) << contentHash; return true; } bool CAssetConverter::CHashCache::hash_impl::operator()(lookup_t lookup) From 9837d7d21aa1d06a29550ebb42cd40be75504489 Mon Sep 17 00:00:00 2001 From: Arkadiusz Lachowicz Date: Tue, 10 Feb 2026 08:21:40 +0100 Subject: [PATCH 014/118] Add optional loader hash compute flag and tune parser workers --- include/nbl/asset/interchange/IAssetLoader.h | 3 +- .../asset/interchange/COBJMeshFileLoader.cpp | 42 ++++++++++++++++++- .../asset/interchange/CPLYMeshFileLoader.cpp | 36 +++++----------- .../asset/interchange/CSTLMeshFileLoader.cpp | 34 ++++----------- 4 files changed, 62 insertions(+), 53 deletions(-) diff --git a/include/nbl/asset/interchange/IAssetLoader.h b/include/nbl/asset/interchange/IAssetLoader.h index ac4ac25782..415a751a75 100644 --- a/include/nbl/asset/interchange/IAssetLoader.h +++ b/include/nbl/asset/interchange/IAssetLoader.h @@ -91,7 +91,8 @@ class NBL_API2 IAssetLoader : public virtual core::IReferenceCounted ELPF_NONE = 0, //!< default value, it doesn't do anything //[[deprecated]] ELPF_RIGHT_HANDED_MESHES = 0x1, //!< specifies that a mesh will be flipped in such a way that it'll look correctly in right-handed camera system //[[deprecated]] ELPF_DONT_COMPILE_GLSL = 0x2, //!< it states that GLSL won't be compiled to SPIR-V if it is loaded or generated - ELPF_LOAD_METADATA_ONLY = 0x4 //!< it forces the loader to not load the entire scene for performance in special cases to fetch metadata. + ELPF_LOAD_METADATA_ONLY = 0x4, //!< it forces the loader to not load the entire scene for performance in special cases to fetch metadata. + ELPF_COMPUTE_CONTENT_HASHES = 0x8 //!< forces loaders to compute content hashes of produced buffers before returning. }; struct SAssetLoadParams diff --git a/src/nbl/asset/interchange/COBJMeshFileLoader.cpp b/src/nbl/asset/interchange/COBJMeshFileLoader.cpp index 7a98146e7f..6c2c6365bf 100644 --- a/src/nbl/asset/interchange/COBJMeshFileLoader.cpp +++ b/src/nbl/asset/interchange/COBJMeshFileLoader.cpp @@ -241,6 +241,41 @@ IGeometry::SDataView createAdoptedView(core::vector&& data, const return view; } +void objRecomputeContentHashes(ICPUPolygonGeometry* geometry) +{ + if (!geometry) + return; + + core::vector> buffers; + auto appendViewBuffer = [&buffers](const IGeometry::SDataView& view) -> void + { + if (!view || !view.src.buffer) + return; + for (const auto& existing : buffers) + { + if (existing.get() == view.src.buffer.get()) + return; + } + buffers.push_back(core::smart_refctd_ptr(view.src.buffer)); + }; + + appendViewBuffer(geometry->getPositionView()); + appendViewBuffer(geometry->getIndexView()); + appendViewBuffer(geometry->getNormalView()); + for (const auto& view : *geometry->getAuxAttributeViews()) + appendViewBuffer(view); + for (const auto& view : *geometry->getJointWeightViews()) + { + appendViewBuffer(view.indices); + appendViewBuffer(view.weights); + } + if (auto jointOBB = geometry->getJointOBBView(); jointOBB) + appendViewBuffer(*jointOBB); + + for (auto& buffer : buffers) + buffer->setContentHash(buffer->computeContentHash()); +} + bool readTextFileWithPolicy(system::IFile* file, char* dst, size_t byteCount, const SResolvedFileIOPolicy& ioPlan, double& ioMs, SFileReadTelemetry& ioTelemetry) { if (!file || !dst) @@ -1134,7 +1169,12 @@ asset::SAssetBundle COBJMeshFileLoader::loadAsset(system::IFile* _file, const as } buildMs = std::chrono::duration(clock_t::now() - buildStart).count(); - hashMs = 0.0; + if (_params.loaderFlags & IAssetLoader::ELPF_COMPUTE_CONTENT_HASHES) + { + const auto hashStart = clock_t::now(); + objRecomputeContentHashes(geometry.get()); + hashMs = std::chrono::duration(clock_t::now() - hashStart).count(); + } const auto aabbStart = clock_t::now(); if (hasParsedAABB) diff --git a/src/nbl/asset/interchange/CPLYMeshFileLoader.cpp b/src/nbl/asset/interchange/CPLYMeshFileLoader.cpp index 1ba1b30457..f52851a4b6 100644 --- a/src/nbl/asset/interchange/CPLYMeshFileLoader.cpp +++ b/src/nbl/asset/interchange/CPLYMeshFileLoader.cpp @@ -238,29 +238,8 @@ void plyRecomputeContentHashesParallel(ICPUPolygonGeometry* geometry) if (auto jointOBB = geometry->getJointOBBView(); jointOBB) appendViewBuffer(*jointOBB); - if (buffers.empty()) - return; - - const size_t hw = std::thread::hardware_concurrency(); - const size_t workerCount = hw ? std::min(hw, buffers.size()) : 1ull; - if (workerCount <= 1ull) - { - for (auto& buffer : buffers) - buffer->setContentHash(buffer->computeContentHash()); - return; - } - - auto hashWorker = [&buffers, workerCount](const size_t workerIx) -> void - { - const size_t beginIx = (buffers.size() * workerIx) / workerCount; - const size_t endIx = (buffers.size() * (workerIx + 1ull)) / workerCount; - for (size_t i = beginIx; i < endIx; ++i) - { - auto& buffer = buffers[i]; - buffer->setContentHash(buffer->computeContentHash()); - } - }; - plyRunParallelWorkers(workerCount, hashWorker); + for (auto& buffer : buffers) + buffer->setContentHash(buffer->computeContentHash()); } struct SContext @@ -1127,12 +1106,12 @@ struct SContext if (is32Bit) { const size_t hw = std::thread::hardware_concurrency(); - const size_t maxWorkersByWork = std::max(1ull, minBytesNeeded / (512ull << 10)); + const size_t maxWorkersByWork = std::max(1ull, minBytesNeeded / (640ull << 10)); size_t workerCount = hw ? std::min(hw, maxWorkersByWork) : 1ull; if (workerCount > 1ull) { const size_t recordBytes = sizeof(uint8_t) + 3ull * sizeof(uint32_t); - const bool needMax = true; + const bool needMax = trackMaxIndex; const bool validateAgainstVertexCount = hasVertexCount; std::vector workerNonTriangle(workerCount, 0u); std::vector workerInvalid(workerCount, 0u); @@ -2156,7 +2135,12 @@ SAssetBundle CPLYMeshFileLoader::loadAsset(system::IFile* _file, const IAssetLoa } indexBuildMs = std::chrono::duration(clock_t::now() - indexStart).count(); - hashRangeMs = 0.0; + if (_params.loaderFlags & IAssetLoader::ELPF_COMPUTE_CONTENT_HASHES) + { + const auto hashStart = clock_t::now(); + plyRecomputeContentHashesParallel(geometry.get()); + hashRangeMs = std::chrono::duration(clock_t::now() - hashStart).count(); + } const auto totalMs = std::chrono::duration(clock_t::now() - totalStart).count(); const double stageRemainderMs = std::max(0.0, totalMs - (headerMs + vertexMs + faceMs + skipMs + layoutNegotiateMs + viewCreateMs + hashRangeMs + indexBuildMs + aabbMs)); diff --git a/src/nbl/asset/interchange/CSTLMeshFileLoader.cpp b/src/nbl/asset/interchange/CSTLMeshFileLoader.cpp index f2fdc9e7c4..53e86b22d4 100644 --- a/src/nbl/asset/interchange/CSTLMeshFileLoader.cpp +++ b/src/nbl/asset/interchange/CSTLMeshFileLoader.cpp @@ -337,29 +337,8 @@ void stlRecomputeContentHashesParallel(ICPUPolygonGeometry* geometry) if (auto jointOBB = geometry->getJointOBBView(); jointOBB) appendViewBuffer(*jointOBB); - if (buffers.empty()) - return; - - const size_t hw = std::thread::hardware_concurrency(); - const size_t workerCount = hw ? std::min(hw, buffers.size()) : 1ull; - if (workerCount <= 1ull) - { - for (auto& buffer : buffers) - buffer->setContentHash(buffer->computeContentHash()); - return; - } - - auto hashWorker = [&buffers, workerCount](const size_t workerIx) -> void - { - const size_t beginIx = (buffers.size() * workerIx) / workerCount; - const size_t endIx = (buffers.size() * (workerIx + 1ull)) / workerCount; - for (size_t i = beginIx; i < endIx; ++i) - { - auto& buffer = buffers[i]; - buffer->setContentHash(buffer->computeContentHash()); - } - }; - stlRunParallelWorkers(workerCount, hashWorker); + for (auto& buffer : buffers) + buffer->setContentHash(buffer->computeContentHash()); } CSTLMeshFileLoader::CSTLMeshFileLoader(asset::IAssetManager* _assetManager) @@ -623,7 +602,7 @@ SAssetBundle CSTLMeshFileLoader::loadAsset(system::IFile* _file, const IAssetLoa }; const size_t hw = std::thread::hardware_concurrency(); - const size_t maxWorkersByWork = std::max(1ull, static_cast(triangleCount / 6144ull)); + const size_t maxWorkersByWork = std::max(1ull, static_cast(triangleCount / 8192ull)); const size_t workerCount = hw ? std::max(1ull, std::min(hw, maxWorkersByWork)) : 1ull; std::vector threadAABBs(workerCount); auto parseRange = [&](const size_t workerIx, const uint64_t beginTri, const uint64_t endTri) -> void @@ -879,7 +858,12 @@ SAssetBundle CSTLMeshFileLoader::loadAsset(system::IFile* _file, const IAssetLoa if (vertexCount == 0ull) return {}; - hashMs = 0.0; + if (_params.loaderFlags & IAssetLoader::ELPF_COMPUTE_CONTENT_HASHES) + { + const auto hashStart = clock_t::now(); + stlRecomputeContentHashesParallel(geometry.get()); + hashMs = std::chrono::duration(clock_t::now() - hashStart).count(); + } const auto aabbStart = clock_t::now(); if (hasParsedAABB) From ac82f69248654c3924772d76c5acdd11449a4430 Mon Sep 17 00:00:00 2001 From: Arkadiusz Lachowicz Date: Tue, 10 Feb 2026 09:31:31 +0100 Subject: [PATCH 015/118] Optimize loader hot paths --- .../asset/interchange/COBJMeshFileLoader.cpp | 56 ++++++++++++-- .../asset/interchange/CPLYMeshFileLoader.cpp | 41 ++++++++--- .../asset/interchange/CSTLMeshFileLoader.cpp | 73 +++++++++++-------- 3 files changed, 123 insertions(+), 47 deletions(-) diff --git a/src/nbl/asset/interchange/COBJMeshFileLoader.cpp b/src/nbl/asset/interchange/COBJMeshFileLoader.cpp index 6c2c6365bf..b630a735ac 100644 --- a/src/nbl/asset/interchange/COBJMeshFileLoader.cpp +++ b/src/nbl/asset/interchange/COBJMeshFileLoader.cpp @@ -464,12 +464,56 @@ NBL_FORCE_INLINE bool parseObjFaceTokenPositiveTriplet(const char*& ptr, const c NBL_FORCE_INLINE bool parseObjTrianglePositiveTripletLine(const char* const lineStart, const char* const lineEnd, int32_t* idx0, int32_t* idx1, int32_t* idx2, const size_t posCount, const size_t uvCount, const size_t normalCount) { const char* ptr = lineStart; - if (!parseObjFaceTokenPositiveTriplet(ptr, lineEnd, idx0, posCount, uvCount, normalCount)) - return false; - if (!parseObjFaceTokenPositiveTriplet(ptr, lineEnd, idx1, posCount, uvCount, normalCount)) - return false; - if (!parseObjFaceTokenPositiveTriplet(ptr, lineEnd, idx2, posCount, uvCount, normalCount)) - return false; + int32_t* const out[3] = { idx0, idx1, idx2 }; + for (uint32_t corner = 0u; corner < 3u; ++corner) + { + while (ptr < lineEnd && isObjInlineWhitespace(*ptr)) + ++ptr; + if (ptr >= lineEnd || !isObjDigit(*ptr)) + return false; + + uint64_t posRaw = 0ull; + while (ptr < lineEnd && isObjDigit(*ptr)) + { + posRaw = posRaw * 10ull + static_cast(*ptr - '0'); + ++ptr; + } + if (posRaw == 0ull || posRaw > static_cast(std::numeric_limits::max()) || posRaw > posCount) + return false; + if (ptr >= lineEnd || *ptr != '/') + return false; + ++ptr; + + uint64_t uvRaw = 0ull; + if (ptr >= lineEnd || !isObjDigit(*ptr)) + return false; + while (ptr < lineEnd && isObjDigit(*ptr)) + { + uvRaw = uvRaw * 10ull + static_cast(*ptr - '0'); + ++ptr; + } + if (uvRaw == 0ull || uvRaw > static_cast(std::numeric_limits::max()) || uvRaw > uvCount) + return false; + if (ptr >= lineEnd || *ptr != '/') + return false; + ++ptr; + + uint64_t normalRaw = 0ull; + if (ptr >= lineEnd || !isObjDigit(*ptr)) + return false; + while (ptr < lineEnd && isObjDigit(*ptr)) + { + normalRaw = normalRaw * 10ull + static_cast(*ptr - '0'); + ++ptr; + } + if (normalRaw == 0ull || normalRaw > static_cast(std::numeric_limits::max()) || normalRaw > normalCount) + return false; + + int32_t* const dst = out[corner]; + dst[0] = static_cast(posRaw - 1ull); + dst[1] = static_cast(uvRaw - 1ull); + dst[2] = static_cast(normalRaw - 1ull); + } while (ptr < lineEnd && isObjInlineWhitespace(*ptr)) ++ptr; diff --git a/src/nbl/asset/interchange/CPLYMeshFileLoader.cpp b/src/nbl/asset/interchange/CPLYMeshFileLoader.cpp index f52851a4b6..dc53420428 100644 --- a/src/nbl/asset/interchange/CPLYMeshFileLoader.cpp +++ b/src/nbl/asset/interchange/CPLYMeshFileLoader.cpp @@ -15,6 +15,7 @@ #include #include #include +#include #include #include #include @@ -81,18 +82,12 @@ void plyRunParallelWorkers(const size_t workerCount, Fn&& fn) fn(0ull); return; } - std::vector workers; - workers.reserve(workerCount - 1ull); - for (size_t workerIx = 1ull; workerIx < workerCount; ++workerIx) + core::vector workerIds(workerCount); + std::iota(workerIds.begin(), workerIds.end(), 0ull); + std::for_each(std::execution::par, workerIds.begin(), workerIds.end(), [&fn](const size_t workerIx) { - workers.emplace_back([&fn, workerIx]() - { - fn(workerIx); - }); - } - fn(0ull); - for (auto& worker : workers) - worker.join(); + fn(workerIx); + }); } class CPLYMappedFileMemoryResource final : public core::refctd_memory_resource @@ -238,6 +233,30 @@ void plyRecomputeContentHashesParallel(ICPUPolygonGeometry* geometry) if (auto jointOBB = geometry->getJointOBBView(); jointOBB) appendViewBuffer(*jointOBB); + if (buffers.empty()) + return; + + uint64_t totalBytes = 0ull; + for (const auto& buffer : buffers) + totalBytes += static_cast(buffer->getSize()); + + const size_t hw = std::thread::hardware_concurrency(); + const size_t workerCount = hw ? std::min(hw, buffers.size()) : 1ull; + if (workerCount > 1ull && totalBytes >= (2ull << 20)) + { + plyRunParallelWorkers(workerCount, [&buffers, workerCount](const size_t workerIx) + { + const size_t beginIx = (buffers.size() * workerIx) / workerCount; + const size_t endIx = (buffers.size() * (workerIx + 1ull)) / workerCount; + for (size_t i = beginIx; i < endIx; ++i) + { + auto& buffer = buffers[i]; + buffer->setContentHash(buffer->computeContentHash()); + } + }); + return; + } + for (auto& buffer : buffers) buffer->setContentHash(buffer->computeContentHash()); } diff --git a/src/nbl/asset/interchange/CSTLMeshFileLoader.cpp b/src/nbl/asset/interchange/CSTLMeshFileLoader.cpp index 53e86b22d4..646fabc872 100644 --- a/src/nbl/asset/interchange/CSTLMeshFileLoader.cpp +++ b/src/nbl/asset/interchange/CSTLMeshFileLoader.cpp @@ -337,6 +337,30 @@ void stlRecomputeContentHashesParallel(ICPUPolygonGeometry* geometry) if (auto jointOBB = geometry->getJointOBBView(); jointOBB) appendViewBuffer(*jointOBB); + if (buffers.empty()) + return; + + uint64_t totalBytes = 0ull; + for (const auto& buffer : buffers) + totalBytes += static_cast(buffer->getSize()); + + const size_t hw = std::thread::hardware_concurrency(); + const size_t workerCount = hw ? std::min(hw, buffers.size()) : 1ull; + if (workerCount > 1ull && totalBytes >= (2ull << 20)) + { + stlRunParallelWorkers(workerCount, [&buffers, workerCount](const size_t workerIx) + { + const size_t beginIx = (buffers.size() * workerIx) / workerCount; + const size_t endIx = (buffers.size() * (workerIx + 1ull)) / workerCount; + for (size_t i = beginIx; i < endIx; ++i) + { + auto& buffer = buffers[i]; + buffer->setContentHash(buffer->computeContentHash()); + } + }); + return; + } + for (auto& buffer : buffers) buffer->setContentHash(buffer->computeContentHash()); } @@ -642,34 +666,30 @@ SAssetBundle CSTLMeshFileLoader::loadAsset(system::IFile* _file, const IAssetLoa posCursor[6ull] = vertex2x; posCursor[7ull] = vertex2y; posCursor[8ull] = vertex2z; + const float triMinX = std::min(vertex0x, std::min(vertex1x, vertex2x)); + const float triMinY = std::min(vertex0y, std::min(vertex1y, vertex2y)); + const float triMinZ = std::min(vertex0z, std::min(vertex1z, vertex2z)); + const float triMaxX = std::max(vertex0x, std::max(vertex1x, vertex2x)); + const float triMaxY = std::max(vertex0y, std::max(vertex1y, vertex2y)); + const float triMaxZ = std::max(vertex0z, std::max(vertex1z, vertex2z)); if (!localAABB.has) { - localAABB.minX = vertex0x; localAABB.maxX = vertex0x; - localAABB.minY = vertex0y; localAABB.maxY = vertex0y; - localAABB.minZ = vertex0z; localAABB.maxZ = vertex0z; + localAABB.minX = triMinX; localAABB.maxX = triMaxX; + localAABB.minY = triMinY; localAABB.maxY = triMaxY; + localAABB.minZ = triMinZ; localAABB.maxZ = triMaxZ; localAABB.has = true; } - if (vertex0x < localAABB.minX) localAABB.minX = vertex0x; - if (vertex0y < localAABB.minY) localAABB.minY = vertex0y; - if (vertex0z < localAABB.minZ) localAABB.minZ = vertex0z; - if (vertex0x > localAABB.maxX) localAABB.maxX = vertex0x; - if (vertex0y > localAABB.maxY) localAABB.maxY = vertex0y; - if (vertex0z > localAABB.maxZ) localAABB.maxZ = vertex0z; - if (vertex1x < localAABB.minX) localAABB.minX = vertex1x; - if (vertex1y < localAABB.minY) localAABB.minY = vertex1y; - if (vertex1z < localAABB.minZ) localAABB.minZ = vertex1z; - if (vertex1x > localAABB.maxX) localAABB.maxX = vertex1x; - if (vertex1y > localAABB.maxY) localAABB.maxY = vertex1y; - if (vertex1z > localAABB.maxZ) localAABB.maxZ = vertex1z; - if (vertex2x < localAABB.minX) localAABB.minX = vertex2x; - if (vertex2y < localAABB.minY) localAABB.minY = vertex2y; - if (vertex2z < localAABB.minZ) localAABB.minZ = vertex2z; - if (vertex2x > localAABB.maxX) localAABB.maxX = vertex2x; - if (vertex2y > localAABB.maxY) localAABB.maxY = vertex2y; - if (vertex2z > localAABB.maxZ) localAABB.maxZ = vertex2z; + else + { + if (triMinX < localAABB.minX) localAABB.minX = triMinX; + if (triMinY < localAABB.minY) localAABB.minY = triMinY; + if (triMinZ < localAABB.minZ) localAABB.minZ = triMinZ; + if (triMaxX > localAABB.maxX) localAABB.maxX = triMaxX; + if (triMaxY > localAABB.maxY) localAABB.maxY = triMaxY; + if (triMaxZ > localAABB.maxZ) localAABB.maxZ = triMaxZ; + } posCursor += StlVerticesPerTriangle * StlFloatChannelsPerVertex; - const float normalLen2 = normalX * normalX + normalY * normalY + normalZ * normalZ; - if (normalLen2 <= 0.f) + if (normalX == 0.f && normalY == 0.f && normalZ == 0.f) { const float edge10x = vertex1x - vertex0x; const float edge10y = vertex1y - vertex0y; @@ -696,13 +716,6 @@ SAssetBundle CSTLMeshFileLoader::loadAsset(system::IFile* _file, const IAssetLoa normalZ = 0.f; } } - else if (normalLen2 < 0.9999f || normalLen2 > 1.0001f) - { - const float invLen = 1.f / std::sqrt(normalLen2); - normalX *= invLen; - normalY *= invLen; - normalZ *= invLen; - } normalCursor[0ull] = normalX; normalCursor[1ull] = normalY; From 7de369494da13f133d65f3402f6dd15b9dc8fbdb Mon Sep 17 00:00:00 2001 From: Arkadiusz Lachowicz Date: Tue, 10 Feb 2026 10:37:29 +0100 Subject: [PATCH 016/118] Tune STL and PLY worker heuristics --- .../asset/interchange/CPLYMeshFileLoader.cpp | 2 +- .../asset/interchange/CSTLMeshFileLoader.cpp | 107 ++++++++++-------- 2 files changed, 61 insertions(+), 48 deletions(-) diff --git a/src/nbl/asset/interchange/CPLYMeshFileLoader.cpp b/src/nbl/asset/interchange/CPLYMeshFileLoader.cpp index dc53420428..38d108fa11 100644 --- a/src/nbl/asset/interchange/CPLYMeshFileLoader.cpp +++ b/src/nbl/asset/interchange/CPLYMeshFileLoader.cpp @@ -1125,7 +1125,7 @@ struct SContext if (is32Bit) { const size_t hw = std::thread::hardware_concurrency(); - const size_t maxWorkersByWork = std::max(1ull, minBytesNeeded / (640ull << 10)); + const size_t maxWorkersByWork = std::max(1ull, minBytesNeeded / (256ull << 10)); size_t workerCount = hw ? std::min(hw, maxWorkersByWork) : 1ull; if (workerCount > 1ull) { diff --git a/src/nbl/asset/interchange/CSTLMeshFileLoader.cpp b/src/nbl/asset/interchange/CSTLMeshFileLoader.cpp index 646fabc872..8e44c7a3fa 100644 --- a/src/nbl/asset/interchange/CSTLMeshFileLoader.cpp +++ b/src/nbl/asset/interchange/CSTLMeshFileLoader.cpp @@ -614,6 +614,10 @@ SAssetBundle CSTLMeshFileLoader::loadAsset(system::IFile* _file, const IAssetLoa const uint8_t* const end = cursor + dataSize; if (end < cursor || static_cast(end - cursor) < static_cast(triangleCount) * StlTriangleRecordBytes) return {}; + const size_t hw = std::thread::hardware_concurrency(); + const size_t maxWorkersByWork = std::max(1ull, dataSize / (768ull << 10)); + const size_t workerCount = hw ? std::max(1ull, std::min(hw, maxWorkersByWork)) : 1ull; + static constexpr bool ComputeAABBInParse = true; struct SThreadAABB { bool has = false; @@ -624,17 +628,13 @@ SAssetBundle CSTLMeshFileLoader::loadAsset(system::IFile* _file, const IAssetLoa float maxY = 0.f; float maxZ = 0.f; }; - - const size_t hw = std::thread::hardware_concurrency(); - const size_t maxWorkersByWork = std::max(1ull, static_cast(triangleCount / 8192ull)); - const size_t workerCount = hw ? std::max(1ull, std::min(hw, maxWorkersByWork)) : 1ull; - std::vector threadAABBs(workerCount); + std::vector threadAABBs(ComputeAABBInParse ? workerCount : 0ull); auto parseRange = [&](const size_t workerIx, const uint64_t beginTri, const uint64_t endTri) -> void { const uint8_t* localCursor = payloadData + beginTri * StlTriangleRecordBytes; float* posCursor = posOutFloat + beginTri * StlVerticesPerTriangle * StlFloatChannelsPerVertex; float* normalCursor = normalOutFloat + beginTri * StlVerticesPerTriangle * StlFloatChannelsPerVertex; - auto& localAABB = threadAABBs[workerIx]; + SThreadAABB localAABB = {}; for (uint64_t tri = beginTri; tri < endTri; ++tri) { const uint8_t* const triRecord = localCursor; @@ -666,29 +666,38 @@ SAssetBundle CSTLMeshFileLoader::loadAsset(system::IFile* _file, const IAssetLoa posCursor[6ull] = vertex2x; posCursor[7ull] = vertex2y; posCursor[8ull] = vertex2z; - const float triMinX = std::min(vertex0x, std::min(vertex1x, vertex2x)); - const float triMinY = std::min(vertex0y, std::min(vertex1y, vertex2y)); - const float triMinZ = std::min(vertex0z, std::min(vertex1z, vertex2z)); - const float triMaxX = std::max(vertex0x, std::max(vertex1x, vertex2x)); - const float triMaxY = std::max(vertex0y, std::max(vertex1y, vertex2y)); - const float triMaxZ = std::max(vertex0z, std::max(vertex1z, vertex2z)); - if (!localAABB.has) - { - localAABB.minX = triMinX; localAABB.maxX = triMaxX; - localAABB.minY = triMinY; localAABB.maxY = triMaxY; - localAABB.minZ = triMinZ; localAABB.maxZ = triMaxZ; - localAABB.has = true; - } - else + posCursor += StlVerticesPerTriangle * StlFloatChannelsPerVertex; + if constexpr (ComputeAABBInParse) { - if (triMinX < localAABB.minX) localAABB.minX = triMinX; - if (triMinY < localAABB.minY) localAABB.minY = triMinY; - if (triMinZ < localAABB.minZ) localAABB.minZ = triMinZ; - if (triMaxX > localAABB.maxX) localAABB.maxX = triMaxX; - if (triMaxY > localAABB.maxY) localAABB.maxY = triMaxY; - if (triMaxZ > localAABB.maxZ) localAABB.maxZ = triMaxZ; + if (!localAABB.has) + { + localAABB.has = true; + localAABB.minX = vertex0x; + localAABB.minY = vertex0y; + localAABB.minZ = vertex0z; + localAABB.maxX = vertex0x; + localAABB.maxY = vertex0y; + localAABB.maxZ = vertex0z; + } + if (vertex0x < localAABB.minX) localAABB.minX = vertex0x; + if (vertex0y < localAABB.minY) localAABB.minY = vertex0y; + if (vertex0z < localAABB.minZ) localAABB.minZ = vertex0z; + if (vertex0x > localAABB.maxX) localAABB.maxX = vertex0x; + if (vertex0y > localAABB.maxY) localAABB.maxY = vertex0y; + if (vertex0z > localAABB.maxZ) localAABB.maxZ = vertex0z; + if (vertex1x < localAABB.minX) localAABB.minX = vertex1x; + if (vertex1y < localAABB.minY) localAABB.minY = vertex1y; + if (vertex1z < localAABB.minZ) localAABB.minZ = vertex1z; + if (vertex1x > localAABB.maxX) localAABB.maxX = vertex1x; + if (vertex1y > localAABB.maxY) localAABB.maxY = vertex1y; + if (vertex1z > localAABB.maxZ) localAABB.maxZ = vertex1z; + if (vertex2x < localAABB.minX) localAABB.minX = vertex2x; + if (vertex2y < localAABB.minY) localAABB.minY = vertex2y; + if (vertex2z < localAABB.minZ) localAABB.minZ = vertex2z; + if (vertex2x > localAABB.maxX) localAABB.maxX = vertex2x; + if (vertex2y > localAABB.maxY) localAABB.maxY = vertex2y; + if (vertex2z > localAABB.maxZ) localAABB.maxZ = vertex2z; } - posCursor += StlVerticesPerTriangle * StlFloatChannelsPerVertex; if (normalX == 0.f && normalY == 0.f && normalZ == 0.f) { const float edge10x = vertex1x - vertex0x; @@ -728,6 +737,8 @@ SAssetBundle CSTLMeshFileLoader::loadAsset(system::IFile* _file, const IAssetLoa normalCursor[8ull] = normalZ; normalCursor += StlVerticesPerTriangle * StlFloatChannelsPerVertex; } + if constexpr (ComputeAABBInParse) + threadAABBs[workerIx] = localAABB; }; if (workerCount > 1ull) @@ -743,29 +754,31 @@ SAssetBundle CSTLMeshFileLoader::loadAsset(system::IFile* _file, const IAssetLoa { parseRange(0ull, 0ull, triangleCount); } - - for (const auto& localAABB : threadAABBs) + if constexpr (ComputeAABBInParse) { - if (!localAABB.has) - continue; - if (!hasParsedAABB) + for (const auto& localAABB : threadAABBs) { - hasParsedAABB = true; - parsedAABB = hlsl::shapes::AABB<3, hlsl::float32_t>::create(); - parsedAABB.minVx.x = localAABB.minX; - parsedAABB.minVx.y = localAABB.minY; - parsedAABB.minVx.z = localAABB.minZ; - parsedAABB.maxVx.x = localAABB.maxX; - parsedAABB.maxVx.y = localAABB.maxY; - parsedAABB.maxVx.z = localAABB.maxZ; - continue; + if (!localAABB.has) + continue; + if (!hasParsedAABB) + { + hasParsedAABB = true; + parsedAABB = hlsl::shapes::AABB<3, hlsl::float32_t>::create(); + parsedAABB.minVx.x = localAABB.minX; + parsedAABB.minVx.y = localAABB.minY; + parsedAABB.minVx.z = localAABB.minZ; + parsedAABB.maxVx.x = localAABB.maxX; + parsedAABB.maxVx.y = localAABB.maxY; + parsedAABB.maxVx.z = localAABB.maxZ; + continue; + } + if (localAABB.minX < parsedAABB.minVx.x) parsedAABB.minVx.x = localAABB.minX; + if (localAABB.minY < parsedAABB.minVx.y) parsedAABB.minVx.y = localAABB.minY; + if (localAABB.minZ < parsedAABB.minVx.z) parsedAABB.minVx.z = localAABB.minZ; + if (localAABB.maxX > parsedAABB.maxVx.x) parsedAABB.maxVx.x = localAABB.maxX; + if (localAABB.maxY > parsedAABB.maxVx.y) parsedAABB.maxVx.y = localAABB.maxY; + if (localAABB.maxZ > parsedAABB.maxVx.z) parsedAABB.maxVx.z = localAABB.maxZ; } - if (localAABB.minX < parsedAABB.minVx.x) parsedAABB.minVx.x = localAABB.minX; - if (localAABB.minY < parsedAABB.minVx.y) parsedAABB.minVx.y = localAABB.minY; - if (localAABB.minZ < parsedAABB.minVx.z) parsedAABB.minVx.z = localAABB.minZ; - if (localAABB.maxX > parsedAABB.maxVx.x) parsedAABB.maxVx.x = localAABB.maxX; - if (localAABB.maxY > parsedAABB.maxVx.y) parsedAABB.maxVx.y = localAABB.maxY; - if (localAABB.maxZ > parsedAABB.maxVx.z) parsedAABB.maxVx.z = localAABB.maxZ; } parseMs = std::chrono::duration(clock_t::now() - parseStart).count(); From 58138ea283bb4bce734bbf845954a5135e827dbc Mon Sep 17 00:00:00 2001 From: Arkadiusz Lachowicz Date: Tue, 10 Feb 2026 21:50:43 +0100 Subject: [PATCH 017/118] Optimize mesh loader hashing and hot paths --- include/nbl/asset/ICPUBuffer.h | 15 +- include/nbl/core/hash/blake.h | 5 +- .../asset/interchange/COBJMeshFileLoader.cpp | 110 ++-- .../asset/interchange/CPLYMeshFileLoader.cpp | 115 +++- .../asset/interchange/CSTLMeshFileLoader.cpp | 136 ++++- src/nbl/core/hash/blake.cpp | 518 +++++++++++++++++- 6 files changed, 817 insertions(+), 82 deletions(-) diff --git a/include/nbl/asset/ICPUBuffer.h b/include/nbl/asset/ICPUBuffer.h index 26f45d4ced..044c0d9018 100644 --- a/include/nbl/asset/ICPUBuffer.h +++ b/include/nbl/asset/ICPUBuffer.h @@ -76,13 +76,12 @@ class ICPUBuffer final : public asset::IBuffer, public IPreHashed constexpr static inline auto AssetType = ET_BUFFER; inline IAsset::E_TYPE getAssetType() const override final { return AssetType; } - inline core::blake3_hash_t computeContentHash() const override - { - core::blake3_hasher hasher; - if (m_data) - hasher.update(m_data, m_creationParams.size); - return static_cast(hasher); - } + inline core::blake3_hash_t computeContentHash() const override + { + if (!m_data) + return static_cast(core::blake3_hasher{}); + return core::blake3_hash_buffer(m_data, m_creationParams.size); + } inline bool missingContent() const override { return !m_data; } @@ -149,4 +148,4 @@ class ICPUBuffer final : public asset::IBuffer, public IPreHashed } // end namespace nbl::asset -#endif \ No newline at end of file +#endif diff --git a/include/nbl/core/hash/blake.h b/include/nbl/core/hash/blake.h index fb91c9969f..5f5ab3fb95 100644 --- a/include/nbl/core/hash/blake.h +++ b/include/nbl/core/hash/blake.h @@ -55,6 +55,9 @@ class NBL_API2 blake3_hasher final explicit operator blake3_hash_t() const; }; +NBL_API2 blake3_hash_t blake3_hash_buffer(const void* data, size_t bytes); +NBL_API2 blake3_hash_t blake3_hash_buffer_sequential(const void* data, size_t bytes); + // Useful specializations template struct blake3_hasher::update_impl @@ -120,4 +123,4 @@ struct hash }; } -#endif // _NBL_CORE_HASH_BLAKE3_H_INCLUDED_ \ No newline at end of file +#endif // _NBL_CORE_HASH_BLAKE3_H_INCLUDED_ diff --git a/src/nbl/asset/interchange/COBJMeshFileLoader.cpp b/src/nbl/asset/interchange/COBJMeshFileLoader.cpp index b630a735ac..a0590f3c9a 100644 --- a/src/nbl/asset/interchange/COBJMeshFileLoader.cpp +++ b/src/nbl/asset/interchange/COBJMeshFileLoader.cpp @@ -14,6 +14,7 @@ #include "COBJMeshFileLoader.h" +#include #include #include #include @@ -37,6 +38,7 @@ struct ObjVertexDedupNode int32_t next = -1; }; + struct SFileReadTelemetry { uint64_t callCount = 0ull; @@ -461,6 +463,27 @@ NBL_FORCE_INLINE bool parseObjFaceTokenPositiveTriplet(const char*& ptr, const c return true; } +NBL_FORCE_INLINE bool parseObjPositiveIndexBounded(const char*& ptr, const char* const end, const size_t maxCount, int32_t& out) +{ + if (ptr >= end || !isObjDigit(*ptr)) + return false; + + uint32_t value = 0u; + while (ptr < end && isObjDigit(*ptr)) + { + const uint32_t digit = static_cast(*ptr - '0'); + if (value > 429496729u) + return false; + value = value * 10u + digit; + ++ptr; + } + if (value == 0u || value > maxCount) + return false; + + out = static_cast(value - 1u); + return true; +} + NBL_FORCE_INLINE bool parseObjTrianglePositiveTripletLine(const char* const lineStart, const char* const lineEnd, int32_t* idx0, int32_t* idx1, int32_t* idx2, const size_t posCount, const size_t uvCount, const size_t normalCount) { const char* ptr = lineStart; @@ -472,47 +495,28 @@ NBL_FORCE_INLINE bool parseObjTrianglePositiveTripletLine(const char* const line if (ptr >= lineEnd || !isObjDigit(*ptr)) return false; - uint64_t posRaw = 0ull; - while (ptr < lineEnd && isObjDigit(*ptr)) - { - posRaw = posRaw * 10ull + static_cast(*ptr - '0'); - ++ptr; - } - if (posRaw == 0ull || posRaw > static_cast(std::numeric_limits::max()) || posRaw > posCount) + int32_t posIx = -1; + if (!parseObjPositiveIndexBounded(ptr, lineEnd, posCount, posIx)) return false; if (ptr >= lineEnd || *ptr != '/') return false; ++ptr; - uint64_t uvRaw = 0ull; - if (ptr >= lineEnd || !isObjDigit(*ptr)) - return false; - while (ptr < lineEnd && isObjDigit(*ptr)) - { - uvRaw = uvRaw * 10ull + static_cast(*ptr - '0'); - ++ptr; - } - if (uvRaw == 0ull || uvRaw > static_cast(std::numeric_limits::max()) || uvRaw > uvCount) + int32_t uvIx = -1; + if (!parseObjPositiveIndexBounded(ptr, lineEnd, uvCount, uvIx)) return false; if (ptr >= lineEnd || *ptr != '/') return false; ++ptr; - uint64_t normalRaw = 0ull; - if (ptr >= lineEnd || !isObjDigit(*ptr)) - return false; - while (ptr < lineEnd && isObjDigit(*ptr)) - { - normalRaw = normalRaw * 10ull + static_cast(*ptr - '0'); - ++ptr; - } - if (normalRaw == 0ull || normalRaw > static_cast(std::numeric_limits::max()) || normalRaw > normalCount) + int32_t normalIx = -1; + if (!parseObjPositiveIndexBounded(ptr, lineEnd, normalCount, normalIx)) return false; int32_t* const dst = out[corner]; - dst[0] = static_cast(posRaw - 1ull); - dst[1] = static_cast(uvRaw - 1ull); - dst[2] = static_cast(normalRaw - 1ull); + dst[0] = posIx; + dst[1] = uvIx; + dst[2] = normalIx; } while (ptr < lineEnd && isObjInlineWhitespace(*ptr)) @@ -800,7 +804,20 @@ asset::SAssetBundle COBJMeshFileLoader::loadAsset(system::IFile* _file, const as size_t outVertexWriteCount = 0ull; size_t outIndexWriteCount = 0ull; size_t dedupNodeCount = 0ull; + struct SDedupHotEntry + { + int32_t pos = -1; + int32_t uv = -1; + int32_t normal = -1; + uint32_t outIndex = 0u; + }; + static constexpr size_t DedupHotEntryCount = 2048ull; + std::array dedupHotCache = {}; + bool hasNormals = false; + bool hasUVs = false; + hlsl::shapes::AABB<3, hlsl::float32_t> parsedAABB = hlsl::shapes::AABB<3, hlsl::float32_t>::create(); + bool hasParsedAABB = false; auto allocateOutVertex = [&](uint32_t& outIx) -> bool { if (outVertexWriteCount >= outPositions.size()) @@ -840,10 +857,6 @@ asset::SAssetBundle COBJMeshFileLoader::loadAsset(system::IFile* _file, const as return ix; }; - bool hasNormals = false; - bool hasUVs = false; - hlsl::shapes::AABB<3, hlsl::float32_t> parsedAABB = hlsl::shapes::AABB<3, hlsl::float32_t>::create(); - bool hasParsedAABB = false; auto acquireCornerIndex = [&](const int32_t* idx, uint32_t& outIx)->bool { if (!idx) @@ -900,8 +913,20 @@ asset::SAssetBundle COBJMeshFileLoader::loadAsset(system::IFile* _file, const as outNormals[static_cast(outIx)] = normal; return true; }; + auto acquireCornerIndexPositiveTriplet = [&](const int32_t posIx, const int32_t uvIx, const int32_t normalIx, uint32_t& outIx)->bool { + const uint32_t hotHash = + static_cast(posIx) * 73856093u ^ + static_cast(uvIx) * 19349663u ^ + static_cast(normalIx) * 83492791u; + auto& hotEntry = dedupHotCache[hotHash & static_cast(DedupHotEntryCount - 1ull)]; + if (hotEntry.pos == posIx && hotEntry.uv == uvIx && hotEntry.normal == normalIx) + { + outIx = hotEntry.outIndex; + return true; + } + int32_t nodeIx = dedupHeadByPos[static_cast(posIx)]; while (nodeIx >= 0) { @@ -909,6 +934,10 @@ asset::SAssetBundle COBJMeshFileLoader::loadAsset(system::IFile* _file, const as if (node.uv == uvIx && node.normal == normalIx) { outIx = node.outIndex; + hotEntry.pos = posIx; + hotEntry.uv = uvIx; + hotEntry.normal = normalIx; + hotEntry.outIndex = outIx; return true; } nodeIx = node.next; @@ -931,6 +960,10 @@ asset::SAssetBundle COBJMeshFileLoader::loadAsset(system::IFile* _file, const as extendAABB(parsedAABB, hasParsedAABB, srcPos); outUVs[static_cast(outIx)] = uvs[static_cast(uvIx)]; outNormals[static_cast(outIx)] = normals[static_cast(normalIx)]; + hotEntry.pos = posIx; + hotEntry.uv = uvIx; + hotEntry.normal = normalIx; + hotEntry.outIndex = outIx; hasUVs = true; hasNormals = true; return true; @@ -943,11 +976,16 @@ asset::SAssetBundle COBJMeshFileLoader::loadAsset(system::IFile* _file, const as while (bufPtr < bufEnd) { const char* const lineStart = bufPtr; - const char* lineTerminator = lineStart; - while (lineTerminator < bufEnd && *lineTerminator != '\n' && *lineTerminator != '\r') - ++lineTerminator; + const size_t remaining = static_cast(bufEnd - lineStart); + const char* lineTerminator = static_cast(std::memchr(lineStart, '\n', remaining)); + if (!lineTerminator) + lineTerminator = static_cast(std::memchr(lineStart, '\r', remaining)); + if (!lineTerminator) + lineTerminator = bufEnd; const char* lineEnd = lineTerminator; + if (lineEnd > lineStart && lineEnd[-1] == '\r') + --lineEnd; if (lineStart < lineEnd) { @@ -1012,7 +1050,6 @@ asset::SAssetBundle COBJMeshFileLoader::loadAsset(system::IFile* _file, const as } else if (*lineStart == 'f' && (lineStart + 1) < lineEnd && isObjInlineWhitespace(lineStart[1])) { - const auto faceStart = trackStages ? clock_t::now() : clock_t::time_point{}; if (positions.empty()) return {}; ++faceCount; @@ -1040,6 +1077,7 @@ asset::SAssetBundle COBJMeshFileLoader::loadAsset(system::IFile* _file, const as triangleFastPath = (triLinePtr == lineEnd); } } + const auto faceStart = trackStages ? clock_t::now() : clock_t::time_point{}; if (triangleFastPath) { const auto dedupStart = trackStages ? clock_t::now() : clock_t::time_point{}; diff --git a/src/nbl/asset/interchange/CPLYMeshFileLoader.cpp b/src/nbl/asset/interchange/CPLYMeshFileLoader.cpp index 38d108fa11..bcaf8596e5 100644 --- a/src/nbl/asset/interchange/CPLYMeshFileLoader.cpp +++ b/src/nbl/asset/interchange/CPLYMeshFileLoader.cpp @@ -236,29 +236,44 @@ void plyRecomputeContentHashesParallel(ICPUPolygonGeometry* geometry) if (buffers.empty()) return; + core::vector pending; + pending.reserve(buffers.size()); uint64_t totalBytes = 0ull; - for (const auto& buffer : buffers) + for (size_t i = 0ull; i < buffers.size(); ++i) + { + auto& buffer = buffers[i]; + if (buffer->getContentHash() != IPreHashed::INVALID_HASH) + continue; totalBytes += static_cast(buffer->getSize()); + pending.push_back(i); + } + + if (pending.empty()) + return; const size_t hw = std::thread::hardware_concurrency(); - const size_t workerCount = hw ? std::min(hw, buffers.size()) : 1ull; - if (workerCount > 1ull && totalBytes >= (2ull << 20)) + const size_t maxWorkersByBytes = std::max(1ull, static_cast(totalBytes / (2ull << 20))); + const size_t workerCount = hw ? std::min({ hw, pending.size(), maxWorkersByBytes }) : 1ull; + if (workerCount > 1ull) { - plyRunParallelWorkers(workerCount, [&buffers, workerCount](const size_t workerIx) + plyRunParallelWorkers(workerCount, [&](const size_t workerIx) { - const size_t beginIx = (buffers.size() * workerIx) / workerCount; - const size_t endIx = (buffers.size() * (workerIx + 1ull)) / workerCount; + const size_t beginIx = (pending.size() * workerIx) / workerCount; + const size_t endIx = (pending.size() * (workerIx + 1ull)) / workerCount; for (size_t i = beginIx; i < endIx; ++i) { - auto& buffer = buffers[i]; + auto& buffer = buffers[pending[i]]; buffer->setContentHash(buffer->computeContentHash()); } }); return; } - for (auto& buffer : buffers) + for (const auto pendingIx : pending) + { + auto& buffer = buffers[pendingIx]; buffer->setContentHash(buffer->computeContentHash()); + } } struct SContext @@ -1125,7 +1140,11 @@ struct SContext if (is32Bit) { const size_t hw = std::thread::hardware_concurrency(); - const size_t maxWorkersByWork = std::max(1ull, minBytesNeeded / (256ull << 10)); + constexpr size_t FaceParseBytesPerWorkerTarget = 512ull << 10; + constexpr size_t FaceParseFacesPerWorkerTarget = 32768ull; + const size_t maxWorkersByBytes = std::max(1ull, (minBytesNeeded + FaceParseBytesPerWorkerTarget - 1ull) / FaceParseBytesPerWorkerTarget); + const size_t maxWorkersByFaces = std::max(1ull, (element.Count + FaceParseFacesPerWorkerTarget - 1ull) / FaceParseFacesPerWorkerTarget); + const size_t maxWorkersByWork = std::min(maxWorkersByBytes, maxWorkersByFaces); size_t workerCount = hw ? std::min(hw, maxWorkersByWork) : 1ull; if (workerCount > 1ull) { @@ -1566,11 +1585,13 @@ SAssetBundle CPLYMeshFileLoader::loadAsset(system::IFile* _file, const IAssetLoa double hashRangeMs = 0.0; double indexBuildMs = 0.0; double aabbMs = 0.0; + const bool computeContentHashes = (_params.loaderFlags & IAssetLoader::ELPF_COMPUTE_CONTENT_HASHES) != 0; uint64_t faceCount = 0u; uint64_t fastFaceElementCount = 0u; uint64_t fastVertexElementCount = 0u; uint32_t maxIndexRead = 0u; const uint64_t fileSize = _file->getSize(); + const bool hashInBuild = computeContentHashes && (fileSize <= (1ull << 20)); const auto ioPlan = resolveFileIOPolicy(_params.ioPolicy, fileSize, true); if (!ioPlan.valid) { @@ -1597,8 +1618,61 @@ SAssetBundle CPLYMeshFileLoader::loadAsset(system::IFile* _file, const IAssetLoa ctx.init(static_cast(safeReadWindow)); // start with empty mesh - auto geometry = make_smart_refctd_ptr(); + auto geometry = make_smart_refctd_ptr(); uint32_t vertCount=0; + core::vector> hashedBuffers; + std::jthread deferredPositionHashThread; + auto hashBufferIfNeeded = [&](ICPUBuffer* buffer)->void + { + if (!hashInBuild || !buffer) + return; + for (const auto& hashed : hashedBuffers) + { + if (hashed.get() == buffer) + return; + } + const auto hashStart = clock_t::now(); + buffer->setContentHash(buffer->computeContentHash()); + hashRangeMs += std::chrono::duration(clock_t::now() - hashStart).count(); + hashedBuffers.push_back(core::smart_refctd_ptr(buffer)); + }; + auto tryLaunchDeferredHash = [&](const IGeometry::SDataView& view, std::jthread& deferredThread)->void + { + if (!computeContentHashes || hashInBuild || !view || !view.src.buffer) + return; + if (deferredThread.joinable()) + return; + if (view.src.buffer->getContentHash() != IPreHashed::INVALID_HASH) + return; + auto keepAlive = core::smart_refctd_ptr(view.src.buffer); + deferredThread = std::jthread([buffer = std::move(keepAlive)]() mutable + { + buffer->setContentHash(buffer->computeContentHash()); + }); + }; + auto hashViewBufferIfNeeded = [&](const IGeometry::SDataView& view)->void + { + if (!view || !view.src.buffer) + return; + hashBufferIfNeeded(view.src.buffer.get()); + }; + auto hashRemainingGeometryBuffers = [&]()->void + { + if (!hashInBuild) + return; + hashViewBufferIfNeeded(geometry->getPositionView()); + hashViewBufferIfNeeded(geometry->getIndexView()); + hashViewBufferIfNeeded(geometry->getNormalView()); + for (const auto& view : *geometry->getAuxAttributeViews()) + hashViewBufferIfNeeded(view); + for (const auto& view : *geometry->getJointWeightViews()) + { + hashViewBufferIfNeeded(view.indices); + hashViewBufferIfNeeded(view.weights); + } + if (const auto jointObb = geometry->getJointOBBView(); jointObb) + hashViewBufferIfNeeded(*jointObb); + }; // Currently only supports ASCII or binary meshes if (strcmp(ctx.getNextLine(),"ply")) @@ -1808,6 +1882,8 @@ SAssetBundle CPLYMeshFileLoader::loadAsset(system::IFile* _file, const IAssetLoa if (!mappedPosView) return {}; geometry->setPositionView(std::move(mappedPosView)); + hashViewBufferIfNeeded(geometry->getPositionView()); + tryLaunchDeferredHash(geometry->getPositionView(), deferredPositionHashThread); ctx.StartPointer += mappedBytes; ++fastVertexElementCount; const double elapsedMs = std::chrono::duration(clock_t::now() - vertexStart).count(); @@ -2065,6 +2141,11 @@ SAssetBundle CPLYMeshFileLoader::loadAsset(system::IFile* _file, const IAssetLoa _params.logger.log("PLY vertex fast path failed on malformed data for %s", system::ILogger::ELL_ERROR, _file->getFileName().string().c_str()); return {}; } + hashViewBufferIfNeeded(geometry->getPositionView()); + hashViewBufferIfNeeded(geometry->getNormalView()); + for (const auto& view : *geometry->getAuxAttributeViews()) + hashViewBufferIfNeeded(view); + tryLaunchDeferredHash(geometry->getPositionView(), deferredPositionHashThread); verticesProcessed = true; } else if (el.Name=="face") @@ -2143,6 +2224,7 @@ SAssetBundle CPLYMeshFileLoader::loadAsset(system::IFile* _file, const IAssetLoa if (!view) return {}; geometry->setIndexView(std::move(view)); + hashViewBufferIfNeeded(geometry->getIndexView()); } else { @@ -2150,15 +2232,22 @@ SAssetBundle CPLYMeshFileLoader::loadAsset(system::IFile* _file, const IAssetLoa if (!view) return {}; geometry->setIndexView(std::move(view)); + hashViewBufferIfNeeded(geometry->getIndexView()); } } indexBuildMs = std::chrono::duration(clock_t::now() - indexStart).count(); - if (_params.loaderFlags & IAssetLoader::ELPF_COMPUTE_CONTENT_HASHES) + if (computeContentHashes && !hashInBuild) { + if (deferredPositionHashThread.joinable()) + deferredPositionHashThread.join(); const auto hashStart = clock_t::now(); plyRecomputeContentHashesParallel(geometry.get()); - hashRangeMs = std::chrono::duration(clock_t::now() - hashStart).count(); + hashRangeMs += std::chrono::duration(clock_t::now() - hashStart).count(); + } + else + { + hashRemainingGeometryBuffers(); } const auto totalMs = std::chrono::duration(clock_t::now() - totalStart).count(); @@ -2219,3 +2308,5 @@ SAssetBundle CPLYMeshFileLoader::loadAsset(system::IFile* _file, const IAssetLoa } // end namespace nbl::asset #endif // _NBL_COMPILE_WITH_PLY_LOADER_ + + diff --git a/src/nbl/asset/interchange/CSTLMeshFileLoader.cpp b/src/nbl/asset/interchange/CSTLMeshFileLoader.cpp index 8e44c7a3fa..eb72aaa0a9 100644 --- a/src/nbl/asset/interchange/CSTLMeshFileLoader.cpp +++ b/src/nbl/asset/interchange/CSTLMeshFileLoader.cpp @@ -10,10 +10,12 @@ #include "nbl/asset/asset.h" #include "nbl/asset/metadata/CSTLMetadata.h" #include "nbl/asset/utils/CPolygonGeometryManipulator.h" +#include "nbl/core/hash/blake.h" #include "nbl/system/IFile.h" #include #include +#include #include #include #include @@ -355,6 +357,8 @@ void stlRecomputeContentHashesParallel(ICPUPolygonGeometry* geometry) for (size_t i = beginIx; i < endIx; ++i) { auto& buffer = buffers[i]; + if (buffer->getContentHash() != IPreHashed::INVALID_HASH) + continue; buffer->setContentHash(buffer->computeContentHash()); } }); @@ -362,7 +366,11 @@ void stlRecomputeContentHashesParallel(ICPUPolygonGeometry* geometry) } for (auto& buffer : buffers) + { + if (buffer->getContentHash() != IPreHashed::INVALID_HASH) + continue; buffer->setContentHash(buffer->computeContentHash()); + } } CSTLMeshFileLoader::CSTLMeshFileLoader(asset::IAssetManager* _assetManager) @@ -397,6 +405,8 @@ SAssetBundle CSTLMeshFileLoader::loadAsset(system::IFile* _file, const IAssetLoa double aabbMs = 0.0; uint64_t triangleCount = 0u; const char* parsePath = "unknown"; + const bool computeContentHashes = (_params.loaderFlags & IAssetLoader::ELPF_COMPUTE_CONTENT_HASHES) != 0; + bool contentHashesAssigned = false; SSTLContext context = { asset::IAssetLoader::SAssetLoadContext{ @@ -615,8 +625,13 @@ SAssetBundle CSTLMeshFileLoader::loadAsset(system::IFile* _file, const IAssetLoa if (end < cursor || static_cast(end - cursor) < static_cast(triangleCount) * StlTriangleRecordBytes) return {}; const size_t hw = std::thread::hardware_concurrency(); - const size_t maxWorkersByWork = std::max(1ull, dataSize / (768ull << 10)); - const size_t workerCount = hw ? std::max(1ull, std::min(hw, maxWorkersByWork)) : 1ull; + constexpr size_t StlParseBytesPerWorkerTarget = 384ull << 10; + constexpr uint64_t StlParseTrianglesPerWorkerTarget = 8192ull; + const size_t maxWorkersByBytes = std::max(1ull, (dataSize + StlParseBytesPerWorkerTarget - 1ull) / StlParseBytesPerWorkerTarget); + const size_t maxWorkersByTriangles = std::max(1ull, static_cast((triangleCount + StlParseTrianglesPerWorkerTarget - 1ull) / StlParseTrianglesPerWorkerTarget)); + const size_t maxWorkersByWork = std::min(maxWorkersByBytes, maxWorkersByTriangles); + const size_t parseHwBudget = hw; + const size_t workerCount = parseHwBudget ? std::max(1ull, std::min(parseHwBudget, maxWorkersByWork)) : 1ull; static constexpr bool ComputeAABBInParse = true; struct SThreadAABB { @@ -629,17 +644,26 @@ SAssetBundle CSTLMeshFileLoader::loadAsset(system::IFile* _file, const IAssetLoa float maxZ = 0.f; }; std::vector threadAABBs(ComputeAABBInParse ? workerCount : 0ull); - auto parseRange = [&](const size_t workerIx, const uint64_t beginTri, const uint64_t endTri) -> void + const uint64_t targetChunkCount = std::max(1ull, static_cast(workerCount) * 4ull); + const uint64_t dynamicChunkTriangles = (triangleCount + targetChunkCount - 1ull) / targetChunkCount; + const uint64_t parseChunkTriangles = std::clamp(dynamicChunkTriangles, 1024ull, 8192ull); + const size_t parseChunkCount = static_cast((triangleCount + parseChunkTriangles - 1ull) / parseChunkTriangles); + const bool hashInParsePipeline = computeContentHashes; + std::vector hashChunkReady(hashInParsePipeline ? parseChunkCount : 0ull, 0u); + double positionHashPipelineMs = 0.0; + double normalHashPipelineMs = 0.0; + std::atomic_bool hashPipelineOk = true; + core::blake3_hash_t parsedPositionHash = static_cast(core::blake3_hasher{}); + core::blake3_hash_t parsedNormalHash = static_cast(core::blake3_hasher{}); + auto parseRange = [&](const uint64_t beginTri, const uint64_t endTri, SThreadAABB& localAABB) -> void { const uint8_t* localCursor = payloadData + beginTri * StlTriangleRecordBytes; float* posCursor = posOutFloat + beginTri * StlVerticesPerTriangle * StlFloatChannelsPerVertex; float* normalCursor = normalOutFloat + beginTri * StlVerticesPerTriangle * StlFloatChannelsPerVertex; - SThreadAABB localAABB = {}; for (uint64_t tri = beginTri; tri < endTri; ++tri) { const uint8_t* const triRecord = localCursor; localCursor += StlTriangleRecordBytes; - float triValues[StlTriangleFloatCount]; std::memcpy(triValues, triRecord, sizeof(triValues)); @@ -666,7 +690,6 @@ SAssetBundle CSTLMeshFileLoader::loadAsset(system::IFile* _file, const IAssetLoa posCursor[6ull] = vertex2x; posCursor[7ull] = vertex2y; posCursor[8ull] = vertex2z; - posCursor += StlVerticesPerTriangle * StlFloatChannelsPerVertex; if constexpr (ComputeAABBInParse) { if (!localAABB.has) @@ -725,7 +748,6 @@ SAssetBundle CSTLMeshFileLoader::loadAsset(system::IFile* _file, const IAssetLoa normalZ = 0.f; } } - normalCursor[0ull] = normalX; normalCursor[1ull] = normalY; normalCursor[2ull] = normalZ; @@ -735,24 +757,104 @@ SAssetBundle CSTLMeshFileLoader::loadAsset(system::IFile* _file, const IAssetLoa normalCursor[6ull] = normalX; normalCursor[7ull] = normalY; normalCursor[8ull] = normalZ; + posCursor += StlVerticesPerTriangle * StlFloatChannelsPerVertex; normalCursor += StlVerticesPerTriangle * StlFloatChannelsPerVertex; } + }; + std::jthread positionHashThread; + std::jthread normalHashThread; + if (hashInParsePipeline) + { + positionHashThread = std::jthread([&]() + { + try + { + core::blake3_hasher positionHasher; + const auto hashThreadStart = clock_t::now(); + for (size_t chunkIx = 0ull; chunkIx < parseChunkCount; ++chunkIx) + { + auto ready = std::atomic_ref(hashChunkReady[chunkIx]); + while (ready.load(std::memory_order_acquire) == 0u) + std::this_thread::yield(); + const uint64_t begin = static_cast(chunkIx) * parseChunkTriangles; + const uint64_t endTri = std::min(begin + parseChunkTriangles, triangleCount); + const size_t chunkTriangles = static_cast(endTri - begin); + const size_t chunkBytes = chunkTriangles * StlVerticesPerTriangle * StlFloatChannelsPerVertex * sizeof(float); + positionHasher.update(posOutFloat + begin * StlVerticesPerTriangle * StlFloatChannelsPerVertex, chunkBytes); + } + positionHashPipelineMs = std::chrono::duration(clock_t::now() - hashThreadStart).count(); + parsedPositionHash = static_cast(positionHasher); + } + catch (...) + { + hashPipelineOk.store(false, std::memory_order_relaxed); + } + }); + normalHashThread = std::jthread([&]() + { + try + { + core::blake3_hasher normalHasher; + const auto hashThreadStart = clock_t::now(); + for (size_t chunkIx = 0ull; chunkIx < parseChunkCount; ++chunkIx) + { + auto ready = std::atomic_ref(hashChunkReady[chunkIx]); + while (ready.load(std::memory_order_acquire) == 0u) + std::this_thread::yield(); + const uint64_t begin = static_cast(chunkIx) * parseChunkTriangles; + const uint64_t endTri = std::min(begin + parseChunkTriangles, triangleCount); + const size_t chunkTriangles = static_cast(endTri - begin); + const size_t chunkBytes = chunkTriangles * StlVerticesPerTriangle * StlFloatChannelsPerVertex * sizeof(float); + normalHasher.update(normalOutFloat + begin * StlVerticesPerTriangle * StlFloatChannelsPerVertex, chunkBytes); + } + normalHashPipelineMs = std::chrono::duration(clock_t::now() - hashThreadStart).count(); + parsedNormalHash = static_cast(normalHasher); + } + catch (...) + { + hashPipelineOk.store(false, std::memory_order_relaxed); + } + }); + } + std::atomic_size_t nextChunkIx = 0ull; + auto parseWorker = [&](const size_t workerIx) -> void + { + SThreadAABB localAABB = {}; + while (true) + { + const size_t chunkIx = nextChunkIx.fetch_add(1ull, std::memory_order_relaxed); + if (chunkIx >= parseChunkCount) + break; + const uint64_t begin = static_cast(chunkIx) * parseChunkTriangles; + const uint64_t endTri = std::min(begin + parseChunkTriangles, triangleCount); + parseRange(begin, endTri, localAABB); + if (hashInParsePipeline) + std::atomic_ref(hashChunkReady[chunkIx]).store(1u, std::memory_order_release); + } if constexpr (ComputeAABBInParse) threadAABBs[workerIx] = localAABB; }; if (workerCount > 1ull) { - stlRunParallelWorkers(workerCount, [&](const size_t workerIx) - { - const uint64_t begin = (triangleCount * workerIx) / workerCount; - const uint64_t endTri = (triangleCount * (workerIx + 1ull)) / workerCount; - parseRange(workerIx, begin, endTri); - }); + stlRunParallelWorkers(workerCount, parseWorker); } else { - parseRange(0ull, 0ull, triangleCount); + parseWorker(0ull); + } + if (positionHashThread.joinable()) + positionHashThread.join(); + if (normalHashThread.joinable()) + normalHashThread.join(); + if (hashInParsePipeline) + { + if (!hashPipelineOk.load(std::memory_order_relaxed)) + return {}; + hashMs += positionHashPipelineMs + normalHashPipelineMs; + posView.src.buffer->setContentHash(parsedPositionHash); + normalView.src.buffer->setContentHash(parsedNormalHash); + contentHashesAssigned = true; } if constexpr (ComputeAABBInParse) { @@ -884,11 +986,11 @@ SAssetBundle CSTLMeshFileLoader::loadAsset(system::IFile* _file, const IAssetLoa if (vertexCount == 0ull) return {}; - if (_params.loaderFlags & IAssetLoader::ELPF_COMPUTE_CONTENT_HASHES) + if (computeContentHashes && !contentHashesAssigned) { const auto hashStart = clock_t::now(); stlRecomputeContentHashesParallel(geometry.get()); - hashMs = std::chrono::duration(clock_t::now() - hashStart).count(); + hashMs += std::chrono::duration(clock_t::now() - hashStart).count(); } const auto aabbStart = clock_t::now(); @@ -994,3 +1096,5 @@ bool CSTLMeshFileLoader::isALoadableFileFormat(system::IFile* _file, const syste } #endif // _NBL_COMPILE_WITH_STL_LOADER_ + + diff --git a/src/nbl/core/hash/blake.cpp b/src/nbl/core/hash/blake.cpp index 043c28698d..c8516115da 100644 --- a/src/nbl/core/hash/blake.cpp +++ b/src/nbl/core/hash/blake.cpp @@ -1,29 +1,529 @@ #include "nbl/core/hash/blake.h" +#include +#include +#include +#include +#include + +extern "C" +{ +#include "blake3_impl.h" +} + namespace nbl::core { +namespace +{ + +struct output_t +{ + uint32_t input_cv[8]; + uint64_t counter; + uint8_t block[BLAKE3_BLOCK_LEN]; + uint8_t block_len; + uint8_t flags; +}; + +INLINE void chunk_state_init_local(blake3_chunk_state* self, const uint32_t key[8], uint8_t flags) +{ + std::memcpy(self->cv, key, BLAKE3_KEY_LEN); + self->chunk_counter = 0; + std::memset(self->buf, 0, BLAKE3_BLOCK_LEN); + self->buf_len = 0; + self->blocks_compressed = 0; + self->flags = flags; +} + +INLINE void chunk_state_reset_local(blake3_chunk_state* self, const uint32_t key[8], uint64_t chunk_counter) +{ + std::memcpy(self->cv, key, BLAKE3_KEY_LEN); + self->chunk_counter = chunk_counter; + self->blocks_compressed = 0; + std::memset(self->buf, 0, BLAKE3_BLOCK_LEN); + self->buf_len = 0; +} + +INLINE size_t chunk_state_len_local(const blake3_chunk_state* self) +{ + return (BLAKE3_BLOCK_LEN * static_cast(self->blocks_compressed)) + static_cast(self->buf_len); +} + +INLINE size_t chunk_state_fill_buf_local(blake3_chunk_state* self, const uint8_t* input, size_t input_len) +{ + size_t take = BLAKE3_BLOCK_LEN - static_cast(self->buf_len); + if (take > input_len) + take = input_len; + auto* const dest = self->buf + static_cast(self->buf_len); + std::memcpy(dest, input, take); + self->buf_len += static_cast(take); + return take; +} + +INLINE uint8_t chunk_state_maybe_start_flag_local(const blake3_chunk_state* self) +{ + return self->blocks_compressed == 0 ? CHUNK_START : 0; +} + +INLINE output_t make_output_local(const uint32_t input_cv[8], const uint8_t block[BLAKE3_BLOCK_LEN], uint8_t block_len, uint64_t counter, uint8_t flags) +{ + output_t ret = {}; + std::memcpy(ret.input_cv, input_cv, 32); + std::memcpy(ret.block, block, BLAKE3_BLOCK_LEN); + ret.block_len = block_len; + ret.counter = counter; + ret.flags = flags; + return ret; +} + +INLINE void output_chaining_value_local(const output_t* self, uint8_t cv[32]) +{ + uint32_t cv_words[8]; + std::memcpy(cv_words, self->input_cv, 32); + blake3_compress_in_place(cv_words, self->block, self->block_len, self->counter, self->flags); + store_cv_words(cv, cv_words); +} + +INLINE void chunk_state_update_local(blake3_chunk_state* self, const uint8_t* input, size_t input_len) +{ + if (self->buf_len > 0) + { + size_t take = chunk_state_fill_buf_local(self, input, input_len); + input += take; + input_len -= take; + if (input_len > 0) + { + blake3_compress_in_place( + self->cv, + self->buf, + BLAKE3_BLOCK_LEN, + self->chunk_counter, + self->flags | chunk_state_maybe_start_flag_local(self)); + self->blocks_compressed += 1; + self->buf_len = 0; + std::memset(self->buf, 0, BLAKE3_BLOCK_LEN); + } + } + + while (input_len > BLAKE3_BLOCK_LEN) + { + blake3_compress_in_place( + self->cv, + input, + BLAKE3_BLOCK_LEN, + self->chunk_counter, + self->flags | chunk_state_maybe_start_flag_local(self)); + self->blocks_compressed += 1; + input += BLAKE3_BLOCK_LEN; + input_len -= BLAKE3_BLOCK_LEN; + } + + (void)chunk_state_fill_buf_local(self, input, input_len); +} + +INLINE output_t chunk_state_output_local(const blake3_chunk_state* self) +{ + const uint8_t block_flags = self->flags | chunk_state_maybe_start_flag_local(self) | CHUNK_END; + return make_output_local(self->cv, self->buf, self->buf_len, self->chunk_counter, block_flags); +} + +INLINE output_t parent_output_local(const uint8_t block[BLAKE3_BLOCK_LEN], const uint32_t key[8], uint8_t flags) +{ + return make_output_local(key, block, BLAKE3_BLOCK_LEN, 0, flags | PARENT); +} + +INLINE size_t left_len_local(size_t content_len) +{ + const size_t full_chunks = (content_len - 1) / BLAKE3_CHUNK_LEN; + return round_down_to_power_of_2(full_chunks) * BLAKE3_CHUNK_LEN; +} + +INLINE size_t compress_chunks_parallel_local( + const uint8_t* input, + size_t input_len, + const uint32_t key[8], + uint64_t chunk_counter, + uint8_t flags, + uint8_t* out) +{ + const uint8_t* chunks_array[MAX_SIMD_DEGREE]; + size_t input_position = 0; + size_t chunks_array_len = 0; + while (input_len - input_position >= BLAKE3_CHUNK_LEN) + { + chunks_array[chunks_array_len] = &input[input_position]; + input_position += BLAKE3_CHUNK_LEN; + chunks_array_len += 1; + } + + blake3_hash_many( + chunks_array, + chunks_array_len, + BLAKE3_CHUNK_LEN / BLAKE3_BLOCK_LEN, + key, + chunk_counter, + true, + flags, + CHUNK_START, + CHUNK_END, + out); + + if (input_len > input_position) + { + const uint64_t counter = chunk_counter + static_cast(chunks_array_len); + blake3_chunk_state chunk_state = {}; + chunk_state_init_local(&chunk_state, key, flags); + chunk_state.chunk_counter = counter; + chunk_state_update_local(&chunk_state, &input[input_position], input_len - input_position); + const auto output = chunk_state_output_local(&chunk_state); + output_chaining_value_local(&output, &out[chunks_array_len * BLAKE3_OUT_LEN]); + return chunks_array_len + 1; + } + + return chunks_array_len; +} + +INLINE size_t compress_parents_parallel_local( + const uint8_t* child_chaining_values, + size_t num_chaining_values, + const uint32_t key[8], + uint8_t flags, + uint8_t* out) +{ + const uint8_t* parents_array[MAX_SIMD_DEGREE_OR_2]; + size_t parents_array_len = 0; + while (num_chaining_values - (2 * parents_array_len) >= 2) + { + parents_array[parents_array_len] = + &child_chaining_values[2 * parents_array_len * BLAKE3_OUT_LEN]; + parents_array_len += 1; + } + + blake3_hash_many( + parents_array, + parents_array_len, + 1, + key, + 0, + false, + flags | PARENT, + 0, + 0, + out); + + if (num_chaining_values > 2 * parents_array_len) + { + std::memcpy( + &out[parents_array_len * BLAKE3_OUT_LEN], + &child_chaining_values[2 * parents_array_len * BLAKE3_OUT_LEN], + BLAKE3_OUT_LEN); + return parents_array_len + 1; + } + + return parents_array_len; +} + +constexpr size_t ParallelMinInputBytes = 1ull << 20; +constexpr size_t ParallelThreadGranularityBytes = 768ull << 10; +constexpr size_t ParallelSpawnMinSubtreeBytes = 512ull << 10; +constexpr uint32_t ParallelMaxThreads = 8u; +std::atomic_uint32_t g_parallelHashCalls = 0u; + +class SParallelCallGuard final +{ + public: + SParallelCallGuard() : m_active(g_parallelHashCalls.fetch_add(1u, std::memory_order_relaxed) + 1u) + { + } + + ~SParallelCallGuard() + { + g_parallelHashCalls.fetch_sub(1u, std::memory_order_relaxed); + } + + inline uint32_t activeCalls() const + { + return m_active; + } + + private: + uint32_t m_active = 1u; +}; + +size_t compress_subtree_wide_mt( + const uint8_t* input, + size_t input_len, + const uint32_t key[8], + uint64_t chunk_counter, + uint8_t flags, + uint8_t* out, + uint32_t threadBudget); + +INLINE void compress_subtree_to_parent_node_mt( + const uint8_t* input, + size_t input_len, + const uint32_t key[8], + uint64_t chunk_counter, + uint8_t flags, + uint8_t out[2 * BLAKE3_OUT_LEN], + uint32_t threadBudget) +{ + uint8_t cv_array[MAX_SIMD_DEGREE_OR_2 * BLAKE3_OUT_LEN]; + size_t num_cvs = compress_subtree_wide_mt(input, input_len, key, chunk_counter, flags, cv_array, threadBudget); + assert(num_cvs <= MAX_SIMD_DEGREE_OR_2); + +#if MAX_SIMD_DEGREE_OR_2 > 2 + uint8_t out_array[MAX_SIMD_DEGREE_OR_2 * BLAKE3_OUT_LEN / 2]; + while (num_cvs > 2) + { + num_cvs = compress_parents_parallel_local(cv_array, num_cvs, key, flags, out_array); + std::memcpy(cv_array, out_array, num_cvs * BLAKE3_OUT_LEN); + } +#endif + + std::memcpy(out, cv_array, 2 * BLAKE3_OUT_LEN); +} + +size_t compress_subtree_wide_mt( + const uint8_t* input, + size_t input_len, + const uint32_t key[8], + uint64_t chunk_counter, + uint8_t flags, + uint8_t* out, + uint32_t threadBudget) +{ + if (input_len <= blake3_simd_degree() * BLAKE3_CHUNK_LEN) + return compress_chunks_parallel_local(input, input_len, key, chunk_counter, flags, out); + + const size_t left_input_len = left_len_local(input_len); + const size_t right_input_len = input_len - left_input_len; + const uint8_t* const right_input = &input[left_input_len]; + const uint64_t right_chunk_counter = chunk_counter + static_cast(left_input_len / BLAKE3_CHUNK_LEN); + + uint8_t cv_array[2 * MAX_SIMD_DEGREE_OR_2 * BLAKE3_OUT_LEN]; + size_t degree = blake3_simd_degree(); + if (left_input_len > BLAKE3_CHUNK_LEN && degree == 1) + degree = 2; + uint8_t* const right_cvs = &cv_array[degree * BLAKE3_OUT_LEN]; + + size_t left_n = 0; + size_t right_n = 0; + bool spawned = false; + if ( + threadBudget > 1u && + left_input_len >= ParallelSpawnMinSubtreeBytes && + right_input_len >= ParallelSpawnMinSubtreeBytes) + { + try + { + uint32_t leftBudget = threadBudget / 2u; + if (leftBudget == 0u) + leftBudget = 1u; + uint32_t rightBudget = threadBudget - leftBudget; + if (rightBudget == 0u) + rightBudget = 1u; + + auto rightFuture = std::async(std::launch::async, [right_input, right_input_len, key, right_chunk_counter, flags, right_cvs, rightBudget]() -> size_t + { + return compress_subtree_wide_mt(right_input, right_input_len, key, right_chunk_counter, flags, right_cvs, rightBudget); + }); + left_n = compress_subtree_wide_mt(input, left_input_len, key, chunk_counter, flags, cv_array, leftBudget); + right_n = rightFuture.get(); + spawned = true; + } + catch (...) + { + spawned = false; + } + } + + if (!spawned) + { + left_n = compress_subtree_wide_mt(input, left_input_len, key, chunk_counter, flags, cv_array, 1u); + right_n = compress_subtree_wide_mt(right_input, right_input_len, key, right_chunk_counter, flags, right_cvs, 1u); + } + + if (left_n == 1) + { + std::memcpy(out, cv_array, 2 * BLAKE3_OUT_LEN); + return 2; + } + + const size_t num_chaining_values = left_n + right_n; + return compress_parents_parallel_local(cv_array, num_chaining_values, key, flags, out); +} + +INLINE void hasher_merge_cv_stack_local(::blake3_hasher* self, uint64_t total_len) +{ + const size_t post_merge_stack_len = static_cast(popcnt(total_len)); + while (self->cv_stack_len > post_merge_stack_len) + { + auto* const parent_node = &self->cv_stack[(self->cv_stack_len - 2) * BLAKE3_OUT_LEN]; + const auto output = parent_output_local(parent_node, self->key, self->chunk.flags); + output_chaining_value_local(&output, parent_node); + self->cv_stack_len -= 1; + } +} + +INLINE void hasher_push_cv_local(::blake3_hasher* self, uint8_t new_cv[BLAKE3_OUT_LEN], uint64_t chunk_counter) +{ + hasher_merge_cv_stack_local(self, chunk_counter); + std::memcpy(&self->cv_stack[self->cv_stack_len * BLAKE3_OUT_LEN], new_cv, BLAKE3_OUT_LEN); + self->cv_stack_len += 1; +} + +void hasher_update_parallel(::blake3_hasher* self, const uint8_t* input_bytes, size_t input_len, uint32_t threadBudget) +{ + if (input_len == 0) + return; + + if (chunk_state_len_local(&self->chunk) > 0) + { + size_t take = BLAKE3_CHUNK_LEN - chunk_state_len_local(&self->chunk); + if (take > input_len) + take = input_len; + chunk_state_update_local(&self->chunk, input_bytes, take); + input_bytes += take; + input_len -= take; + if (input_len > 0) + { + const auto output = chunk_state_output_local(&self->chunk); + uint8_t chunk_cv[BLAKE3_OUT_LEN]; + output_chaining_value_local(&output, chunk_cv); + hasher_push_cv_local(self, chunk_cv, self->chunk.chunk_counter); + chunk_state_reset_local(&self->chunk, self->key, self->chunk.chunk_counter + 1); + } + else + { + return; + } + } + + while (input_len > BLAKE3_CHUNK_LEN) + { + size_t subtree_len = round_down_to_power_of_2(input_len); + const uint64_t count_so_far = self->chunk.chunk_counter * BLAKE3_CHUNK_LEN; + while ((((uint64_t)(subtree_len - 1)) & count_so_far) != 0) + subtree_len /= 2; + + const uint64_t subtree_chunks = subtree_len / BLAKE3_CHUNK_LEN; + if (subtree_len <= BLAKE3_CHUNK_LEN) + { + blake3_chunk_state chunk_state = {}; + chunk_state_init_local(&chunk_state, self->key, self->chunk.flags); + chunk_state.chunk_counter = self->chunk.chunk_counter; + chunk_state_update_local(&chunk_state, input_bytes, subtree_len); + const auto output = chunk_state_output_local(&chunk_state); + uint8_t cv[BLAKE3_OUT_LEN]; + output_chaining_value_local(&output, cv); + hasher_push_cv_local(self, cv, chunk_state.chunk_counter); + } + else + { + uint8_t cv_pair[2 * BLAKE3_OUT_LEN]; + compress_subtree_to_parent_node_mt( + input_bytes, + subtree_len, + self->key, + self->chunk.chunk_counter, + self->chunk.flags, + cv_pair, + threadBudget); + hasher_push_cv_local(self, cv_pair, self->chunk.chunk_counter); + hasher_push_cv_local(self, &cv_pair[BLAKE3_OUT_LEN], self->chunk.chunk_counter + (subtree_chunks / 2)); + } + self->chunk.chunk_counter += subtree_chunks; + input_bytes += subtree_len; + input_len -= subtree_len; + } + + if (input_len > 0) + { + chunk_state_update_local(&self->chunk, input_bytes, input_len); + hasher_merge_cv_stack_local(self, self->chunk.chunk_counter); + } +} + +INLINE uint32_t pick_parallel_budget(const size_t bytes) +{ + const uint32_t hw = std::thread::hardware_concurrency(); + if (hw <= 1u || bytes < ParallelMinInputBytes) + return 1u; + + const uint32_t maxBySize = static_cast(std::max(1ull, bytes / ParallelThreadGranularityBytes)); + uint32_t budget = std::min(hw, ParallelMaxThreads); + budget = std::min(budget, maxBySize); + return std::max(1u, budget); +} + +} + blake3_hasher::blake3_hasher() { - ::blake3_hasher_init(&m_state); + ::blake3_hasher_init(&m_state); } blake3_hasher& blake3_hasher::update(const void* data, const size_t bytes) { - ::blake3_hasher_update(&m_state, data, bytes); - return *this; + ::blake3_hasher_update(&m_state, data, bytes); + return *this; } -void blake3_hasher::reset() { - ::blake3_hasher_reset(&m_state); +void blake3_hasher::reset() +{ + ::blake3_hasher_reset(&m_state); } blake3_hasher::operator blake3_hash_t() const { - blake3_hash_t retval; - // the blake3 docs say that the hasher can be finalized multiple times - ::blake3_hasher_finalize(&m_state, retval.data, sizeof(retval)); - return retval; + blake3_hash_t retval = {}; + ::blake3_hasher_finalize(&m_state, retval.data, sizeof(retval)); + return retval; +} + +blake3_hash_t blake3_hash_buffer(const void* data, size_t bytes) +{ + if (!data || bytes == 0ull) + return static_cast(blake3_hasher{}); + + uint32_t threadBudget = pick_parallel_budget(bytes); + if (threadBudget <= 1u) + { + blake3_hasher hasher; + hasher.update(data, bytes); + return static_cast(hasher); + } + + SParallelCallGuard guard; + const uint32_t activeCalls = std::max(1u, guard.activeCalls()); + const uint32_t hw = std::max(1u, std::thread::hardware_concurrency()); + const uint32_t hwShare = std::max(1u, hw / activeCalls); + threadBudget = std::min(threadBudget, hwShare); + if (threadBudget <= 1u) + { + blake3_hasher hasher; + hasher.update(data, bytes); + return static_cast(hasher); + } + + ::blake3_hasher hasherState = {}; + ::blake3_hasher_init(&hasherState); + hasher_update_parallel(&hasherState, reinterpret_cast(data), bytes, threadBudget); + blake3_hash_t retval = {}; + ::blake3_hasher_finalize(&hasherState, retval.data, sizeof(retval)); + return retval; +} + +blake3_hash_t blake3_hash_buffer_sequential(const void* data, size_t bytes) +{ + if (!data || bytes == 0ull) + return static_cast(blake3_hasher{}); + + blake3_hasher hasher; + hasher.update(data, bytes); + return static_cast(hasher); } } From 765882fe8850d216589f8ce7f2fa210a4e3b3198 Mon Sep 17 00:00:00 2001 From: Arkadiusz Lachowicz Date: Tue, 10 Feb 2026 22:44:12 +0100 Subject: [PATCH 018/118] Optimize PLY fast face validation path --- .../asset/interchange/CPLYMeshFileLoader.cpp | 55 +++++++++---------- 1 file changed, 26 insertions(+), 29 deletions(-) diff --git a/src/nbl/asset/interchange/CPLYMeshFileLoader.cpp b/src/nbl/asset/interchange/CPLYMeshFileLoader.cpp index bcaf8596e5..b46c9be738 100644 --- a/src/nbl/asset/interchange/CPLYMeshFileLoader.cpp +++ b/src/nbl/asset/interchange/CPLYMeshFileLoader.cpp @@ -1159,7 +1159,6 @@ struct SContext const uint8_t* in = ptr + beginFace * recordBytes; uint32_t* outLocal = out + beginFace * 3ull; uint32_t localMax = 0u; - uint32_t localSignBits = 0u; for (size_t faceIx = beginFace; faceIx < endFace; ++faceIx) { if (*in != 3u) @@ -1172,18 +1171,29 @@ struct SContext const uint32_t i0 = outLocal[0]; const uint32_t i1 = outLocal[1]; const uint32_t i2 = outLocal[2]; - if (isSrcS32) - localSignBits |= (i0 | i1 | i2); - if (i0 > localMax) localMax = i0; - if (i1 > localMax) localMax = i1; - if (i2 > localMax) localMax = i2; + const uint32_t triOr = (i0 | i1 | i2); + if (isSrcS32 && (triOr & 0x80000000u)) + { + workerInvalid[workerIx] = 1u; + break; + } + if (validateAgainstVertexCount) + { + if (i0 >= vertexCount || i1 >= vertexCount || i2 >= vertexCount) + { + workerInvalid[workerIx] = 1u; + break; + } + } + else if (needMax) + { + if (i0 > localMax) localMax = i0; + if (i1 > localMax) localMax = i1; + if (i2 > localMax) localMax = i2; + } in += 3ull * sizeof(uint32_t); outLocal += 3ull; } - if (isSrcS32 && (localSignBits & 0x80000000u)) - workerInvalid[workerIx] = 1u; - if (validateAgainstVertexCount && localMax >= vertexCount) - workerInvalid[workerIx] = 1u; if (needMax) workerMax[workerIx] = localMax; }; @@ -1245,7 +1255,6 @@ struct SContext } else { - uint32_t localMax = 0u; for (size_t j = 0u; j < element.Count; ++j) { const uint8_t c = *ptr++; @@ -1256,13 +1265,10 @@ struct SContext } std::memcpy(out, ptr, 3ull * sizeof(uint32_t)); ptr += 3ull * sizeof(uint32_t); - if (out[0] > localMax) localMax = out[0]; - if (out[1] > localMax) localMax = out[1]; - if (out[2] > localMax) localMax = out[2]; + if (out[0] >= vertexCount || out[1] >= vertexCount || out[2] >= vertexCount) + return EFastFaceReadResult::Error; out += 3; } - if (!fallbackToGeneric && localMax >= vertexCount) - return EFastFaceReadResult::Error; } } else if (trackMaxIndex) @@ -1287,8 +1293,6 @@ struct SContext } else { - uint32_t localMax = 0u; - uint32_t localSignBits = 0u; for (size_t j = 0u; j < element.Count; ++j) { const uint8_t c = *ptr++; @@ -1299,18 +1303,12 @@ struct SContext } std::memcpy(out, ptr, 3ull * sizeof(uint32_t)); ptr += 3ull * sizeof(uint32_t); - localSignBits |= (out[0] | out[1] | out[2]); - if (out[0] > localMax) localMax = out[0]; - if (out[1] > localMax) localMax = out[1]; - if (out[2] > localMax) localMax = out[2]; - out += 3; - } - if (!fallbackToGeneric) - { - if (localSignBits & 0x80000000u) + const uint32_t triOr = (out[0] | out[1] | out[2]); + if (triOr & 0x80000000u) return EFastFaceReadResult::Error; - if (localMax >= vertexCount) + if (out[0] >= vertexCount || out[1] >= vertexCount || out[2] >= vertexCount) return EFastFaceReadResult::Error; + out += 3; } } } @@ -2309,4 +2307,3 @@ SAssetBundle CPLYMeshFileLoader::loadAsset(system::IFile* _file, const IAssetLoa } // end namespace nbl::asset #endif // _NBL_COMPILE_WITH_PLY_LOADER_ - From 2289231d7158ff9b74834ad0ef36562a6a74ab78 Mon Sep 17 00:00:00 2001 From: Arkadiusz Lachowicz Date: Tue, 10 Feb 2026 23:44:53 +0100 Subject: [PATCH 019/118] Pipeline PLY index hash and tune STL chunks --- .../asset/interchange/CPLYMeshFileLoader.cpp | 77 ++++++++++++++++++- .../asset/interchange/CSTLMeshFileLoader.cpp | 2 +- 2 files changed, 76 insertions(+), 3 deletions(-) diff --git a/src/nbl/asset/interchange/CPLYMeshFileLoader.cpp b/src/nbl/asset/interchange/CPLYMeshFileLoader.cpp index b46c9be738..4ff49b6f2c 100644 --- a/src/nbl/asset/interchange/CPLYMeshFileLoader.cpp +++ b/src/nbl/asset/interchange/CPLYMeshFileLoader.cpp @@ -15,10 +15,12 @@ #include #include #include +#include #include #include #include #include +#include "nbl/core/hash/blake.h" #include "nbl/asset/IAssetManager.h" @@ -1095,7 +1097,15 @@ struct SContext Error }; - EFastFaceReadResult readFaceElementFast(const SElement& element, core::vector& _outIndices, uint32_t& _maxIndex, uint64_t& _faceCount, const uint32_t vertexCount) + EFastFaceReadResult readFaceElementFast( + const SElement& element, + core::vector& _outIndices, + uint32_t& _maxIndex, + uint64_t& _faceCount, + const uint32_t vertexCount, + const bool computeIndexHash, + core::blake3_hash_t& outIndexHash, + double& outIndexHashMs) { if (!IsBinaryFile || IsWrongEndian) return EFastFaceReadResult::NotApplicable; @@ -1120,6 +1130,7 @@ struct SContext const size_t indexSize = is32Bit ? sizeof(uint32_t) : sizeof(uint16_t); const bool hasVertexCount = vertexCount != 0u; const bool trackMaxIndex = !hasVertexCount; + outIndexHash = IPreHashed::INVALID_HASH; const size_t minTriangleRecordSize = sizeof(uint8_t) + indexSize * 3u; if (element.Count > (std::numeric_limits::max() / minTriangleRecordSize)) return EFastFaceReadResult::Error; @@ -1154,6 +1165,45 @@ struct SContext std::vector workerNonTriangle(workerCount, 0u); std::vector workerInvalid(workerCount, 0u); std::vector workerMax(needMax ? workerCount : 0ull, 0u); + const bool hashInParsePipeline = computeIndexHash; + std::vector workerReady(hashInParsePipeline ? workerCount : 0ull, 0u); + std::vector workerHashable(hashInParsePipeline ? workerCount : 0ull, 1u); + std::atomic_bool hashPipelineOk = true; + core::blake3_hash_t parsedIndexHash = IPreHashed::INVALID_HASH; + std::jthread hashThread; + if (hashInParsePipeline) + { + hashThread = std::jthread([&]() + { + try + { + core::blake3_hasher hasher; + const auto hashStart = std::chrono::high_resolution_clock::now(); + for (size_t workerIx = 0ull; workerIx < workerCount; ++workerIx) + { + auto ready = std::atomic_ref(workerReady[workerIx]); + while (ready.load(std::memory_order_acquire) == 0u) + std::this_thread::yield(); + if (workerHashable[workerIx] == 0u) + { + hashPipelineOk.store(false, std::memory_order_relaxed); + return; + } + + const size_t begin = (element.Count * workerIx) / workerCount; + const size_t end = (element.Count * (workerIx + 1ull)) / workerCount; + const size_t faceCount = end - begin; + hasher.update(out + begin * 3ull, faceCount * 3ull * sizeof(uint32_t)); + } + outIndexHashMs += std::chrono::duration(std::chrono::high_resolution_clock::now() - hashStart).count(); + parsedIndexHash = static_cast(hasher); + } + catch (...) + { + hashPipelineOk.store(false, std::memory_order_relaxed); + } + }); + } auto parseChunk = [&](const size_t workerIx, const size_t beginFace, const size_t endFace) -> void { const uint8_t* in = ptr + beginFace * recordBytes; @@ -1164,6 +1214,8 @@ struct SContext if (*in != 3u) { workerNonTriangle[workerIx] = 1u; + if (hashInParsePipeline) + workerHashable[workerIx] = 0u; break; } ++in; @@ -1175,6 +1227,8 @@ struct SContext if (isSrcS32 && (triOr & 0x80000000u)) { workerInvalid[workerIx] = 1u; + if (hashInParsePipeline) + workerHashable[workerIx] = 0u; break; } if (validateAgainstVertexCount) @@ -1182,6 +1236,8 @@ struct SContext if (i0 >= vertexCount || i1 >= vertexCount || i2 >= vertexCount) { workerInvalid[workerIx] = 1u; + if (hashInParsePipeline) + workerHashable[workerIx] = 0u; break; } } @@ -1196,6 +1252,8 @@ struct SContext } if (needMax) workerMax[workerIx] = localMax; + if (hashInParsePipeline) + std::atomic_ref(workerReady[workerIx]).store(1u, std::memory_order_release); }; plyRunParallelWorkers(workerCount, [&](const size_t workerIx) { @@ -1203,6 +1261,8 @@ struct SContext const size_t end = (element.Count * (workerIx + 1ull)) / workerCount; parseChunk(workerIx, begin, end); }); + if (hashThread.joinable()) + hashThread.join(); const bool anyNonTriangle = std::any_of(workerNonTriangle.begin(), workerNonTriangle.end(), [](const uint8_t v) { return v != 0u; }); if (anyNonTriangle) @@ -1224,6 +1284,8 @@ struct SContext if (local > _maxIndex) _maxIndex = local; } + if (hashInParsePipeline && hashPipelineOk.load(std::memory_order_relaxed)) + outIndexHash = parsedIndexHash; StartPointer = reinterpret_cast(const_cast(ptr + element.Count * recordBytes)); _faceCount += element.Count; @@ -1588,6 +1650,7 @@ SAssetBundle CPLYMeshFileLoader::loadAsset(system::IFile* _file, const IAssetLoa uint64_t fastFaceElementCount = 0u; uint64_t fastVertexElementCount = 0u; uint32_t maxIndexRead = 0u; + core::blake3_hash_t precomputedIndexHash = IPreHashed::INVALID_HASH; const uint64_t fileSize = _file->getSize(); const bool hashInBuild = computeContentHashes && (fileSize <= (1ull << 20)); const auto ioPlan = resolveFileIOPolicy(_params.ioPolicy, fileSize, true); @@ -2150,7 +2213,15 @@ SAssetBundle CPLYMeshFileLoader::loadAsset(system::IFile* _file, const IAssetLoa { const auto faceStart = clock_t::now(); const uint32_t vertexCount32 = vertCount <= static_cast(std::numeric_limits::max()) ? static_cast(vertCount) : 0u; - const auto fastFaceResult = ctx.readFaceElementFast(el,indices,maxIndexRead,faceCount,vertexCount32); + const auto fastFaceResult = ctx.readFaceElementFast( + el, + indices, + maxIndexRead, + faceCount, + vertexCount32, + computeContentHashes && !hashInBuild, + precomputedIndexHash, + hashRangeMs); if (fastFaceResult == SContext::EFastFaceReadResult::Success) { ++fastFaceElementCount; @@ -2229,6 +2300,8 @@ SAssetBundle CPLYMeshFileLoader::loadAsset(system::IFile* _file, const IAssetLoa auto view = plyCreateAdoptedU32IndexView(std::move(indices)); if (!view) return {}; + if (precomputedIndexHash != IPreHashed::INVALID_HASH) + view.src.buffer->setContentHash(precomputedIndexHash); geometry->setIndexView(std::move(view)); hashViewBufferIfNeeded(geometry->getIndexView()); } diff --git a/src/nbl/asset/interchange/CSTLMeshFileLoader.cpp b/src/nbl/asset/interchange/CSTLMeshFileLoader.cpp index eb72aaa0a9..c7bedad394 100644 --- a/src/nbl/asset/interchange/CSTLMeshFileLoader.cpp +++ b/src/nbl/asset/interchange/CSTLMeshFileLoader.cpp @@ -646,7 +646,7 @@ SAssetBundle CSTLMeshFileLoader::loadAsset(system::IFile* _file, const IAssetLoa std::vector threadAABBs(ComputeAABBInParse ? workerCount : 0ull); const uint64_t targetChunkCount = std::max(1ull, static_cast(workerCount) * 4ull); const uint64_t dynamicChunkTriangles = (triangleCount + targetChunkCount - 1ull) / targetChunkCount; - const uint64_t parseChunkTriangles = std::clamp(dynamicChunkTriangles, 1024ull, 8192ull); + const uint64_t parseChunkTriangles = std::clamp(dynamicChunkTriangles, 4096ull, 16384ull); const size_t parseChunkCount = static_cast((triangleCount + parseChunkTriangles - 1ull) / parseChunkTriangles); const bool hashInParsePipeline = computeContentHashes; std::vector hashChunkReady(hashInParsePipeline ? parseChunkCount : 0ull, 0u); From a3a0cb8141a3c5ac7b83685d4f7c8532619115e1 Mon Sep 17 00:00:00 2001 From: Arkadiusz Lachowicz Date: Wed, 11 Feb 2026 00:20:59 +0100 Subject: [PATCH 020/118] Use wait notify in PLY STL hash pipeline --- src/nbl/asset/interchange/CPLYMeshFileLoader.cpp | 12 ++++++++---- src/nbl/asset/interchange/CSTLMeshFileLoader.cpp | 14 +++++++++----- 2 files changed, 17 insertions(+), 9 deletions(-) diff --git a/src/nbl/asset/interchange/CPLYMeshFileLoader.cpp b/src/nbl/asset/interchange/CPLYMeshFileLoader.cpp index 4ff49b6f2c..cde552319e 100644 --- a/src/nbl/asset/interchange/CPLYMeshFileLoader.cpp +++ b/src/nbl/asset/interchange/CPLYMeshFileLoader.cpp @@ -19,6 +19,7 @@ #include #include #include +#include #include #include "nbl/core/hash/blake.h" @@ -84,8 +85,7 @@ void plyRunParallelWorkers(const size_t workerCount, Fn&& fn) fn(0ull); return; } - core::vector workerIds(workerCount); - std::iota(workerIds.begin(), workerIds.end(), 0ull); + auto workerIds = std::views::iota(size_t{0ull}, workerCount); std::for_each(std::execution::par, workerIds.begin(), workerIds.end(), [&fn](const size_t workerIx) { fn(workerIx); @@ -1183,7 +1183,7 @@ struct SContext { auto ready = std::atomic_ref(workerReady[workerIx]); while (ready.load(std::memory_order_acquire) == 0u) - std::this_thread::yield(); + ready.wait(0u, std::memory_order_acquire); if (workerHashable[workerIx] == 0u) { hashPipelineOk.store(false, std::memory_order_relaxed); @@ -1253,7 +1253,11 @@ struct SContext if (needMax) workerMax[workerIx] = localMax; if (hashInParsePipeline) - std::atomic_ref(workerReady[workerIx]).store(1u, std::memory_order_release); + { + auto ready = std::atomic_ref(workerReady[workerIx]); + ready.store(1u, std::memory_order_release); + ready.notify_one(); + } }; plyRunParallelWorkers(workerCount, [&](const size_t workerIx) { diff --git a/src/nbl/asset/interchange/CSTLMeshFileLoader.cpp b/src/nbl/asset/interchange/CSTLMeshFileLoader.cpp index c7bedad394..ff5760b0dc 100644 --- a/src/nbl/asset/interchange/CSTLMeshFileLoader.cpp +++ b/src/nbl/asset/interchange/CSTLMeshFileLoader.cpp @@ -29,6 +29,7 @@ #include #include #include +#include namespace nbl::asset { @@ -83,8 +84,7 @@ void stlRunParallelWorkers(const size_t workerCount, Fn&& fn) fn(0ull); return; } - core::vector workerIds(workerCount); - std::iota(workerIds.begin(), workerIds.end(), 0ull); + auto workerIds = std::views::iota(size_t{0ull}, workerCount); std::for_each(std::execution::par, workerIds.begin(), workerIds.end(), [&fn](const size_t workerIx) { fn(workerIx); @@ -775,7 +775,7 @@ SAssetBundle CSTLMeshFileLoader::loadAsset(system::IFile* _file, const IAssetLoa { auto ready = std::atomic_ref(hashChunkReady[chunkIx]); while (ready.load(std::memory_order_acquire) == 0u) - std::this_thread::yield(); + ready.wait(0u, std::memory_order_acquire); const uint64_t begin = static_cast(chunkIx) * parseChunkTriangles; const uint64_t endTri = std::min(begin + parseChunkTriangles, triangleCount); const size_t chunkTriangles = static_cast(endTri - begin); @@ -800,7 +800,7 @@ SAssetBundle CSTLMeshFileLoader::loadAsset(system::IFile* _file, const IAssetLoa { auto ready = std::atomic_ref(hashChunkReady[chunkIx]); while (ready.load(std::memory_order_acquire) == 0u) - std::this_thread::yield(); + ready.wait(0u, std::memory_order_acquire); const uint64_t begin = static_cast(chunkIx) * parseChunkTriangles; const uint64_t endTri = std::min(begin + parseChunkTriangles, triangleCount); const size_t chunkTriangles = static_cast(endTri - begin); @@ -829,7 +829,11 @@ SAssetBundle CSTLMeshFileLoader::loadAsset(system::IFile* _file, const IAssetLoa const uint64_t endTri = std::min(begin + parseChunkTriangles, triangleCount); parseRange(begin, endTri, localAABB); if (hashInParsePipeline) - std::atomic_ref(hashChunkReady[chunkIx]).store(1u, std::memory_order_release); + { + auto ready = std::atomic_ref(hashChunkReady[chunkIx]); + ready.store(1u, std::memory_order_release); + ready.notify_all(); + } } if constexpr (ComputeAABBInParse) threadAABBs[workerIx] = localAABB; From 73d348d67cc834363ea19edaafc68be0350458d8 Mon Sep 17 00:00:00 2001 From: Arkadiusz Lachowicz Date: Wed, 11 Feb 2026 10:14:48 +0100 Subject: [PATCH 021/118] Stabilize loader runtime tuning heuristics and hybrid mode --- include/nbl/asset/interchange/SFileIOPolicy.h | 21 ++ .../asset/interchange/SLoaderRuntimeTuning.h | 344 ++++++++++++++++++ .../asset/interchange/COBJMeshFileLoader.cpp | 74 +++- .../asset/interchange/CPLYMeshFileLoader.cpp | 54 ++- .../asset/interchange/CSTLMeshFileLoader.cpp | 105 ++++-- 5 files changed, 549 insertions(+), 49 deletions(-) create mode 100644 include/nbl/asset/interchange/SLoaderRuntimeTuning.h diff --git a/include/nbl/asset/interchange/SFileIOPolicy.h b/include/nbl/asset/interchange/SFileIOPolicy.h index 50e9acd0d2..24e99584e2 100644 --- a/include/nbl/asset/interchange/SFileIOPolicy.h +++ b/include/nbl/asset/interchange/SFileIOPolicy.h @@ -14,6 +14,26 @@ namespace nbl::asset struct SFileIOPolicy { + struct SRuntimeTuning + { + enum class Mode : uint8_t + { + None, + Heuristic, + Hybrid + }; + + Mode mode = Mode::Heuristic; + float maxOverheadRatio = 0.05f; + float samplingBudgetRatio = 0.05f; + float minExpectedGainRatio = 0.03f; + uint32_t maxWorkers = 0u; + uint32_t samplingMaxCandidates = 4u; + uint32_t samplingPasses = 1u; + uint64_t samplingMinWorkUnits = 0ull; + uint32_t targetChunksPerWorker = 4u; + }; + enum class Strategy : uint8_t { Auto, @@ -26,6 +46,7 @@ struct SFileIOPolicy uint64_t wholeFileThresholdBytes = 64ull * 1024ull * 1024ull; uint64_t chunkSizeBytes = 4ull * 1024ull * 1024ull; uint64_t maxStagingBytes = 256ull * 1024ull * 1024ull; + SRuntimeTuning runtimeTuning = {}; }; struct SResolvedFileIOPolicy diff --git a/include/nbl/asset/interchange/SLoaderRuntimeTuning.h b/include/nbl/asset/interchange/SLoaderRuntimeTuning.h new file mode 100644 index 0000000000..b6cea0cbc6 --- /dev/null +++ b/include/nbl/asset/interchange/SLoaderRuntimeTuning.h @@ -0,0 +1,344 @@ +// Copyright (C) 2025-2025 - DevSH Graphics Programming Sp. z O.O. +// This file is part of the "Nabla Engine". +// For conditions of distribution and use, see copyright notice in nabla.h +#ifndef _NBL_ASSET_S_LOADER_RUNTIME_TUNING_H_INCLUDED_ +#define _NBL_ASSET_S_LOADER_RUNTIME_TUNING_H_INCLUDED_ + + +#include "nbl/asset/interchange/SFileIOPolicy.h" + +#include +#include +#include +#include +#include +#include +#include +#include +#include + + +namespace nbl::asset +{ + +struct SLoaderRuntimeTuningRequest +{ + uint64_t inputBytes = 0ull; + uint64_t totalWorkUnits = 0ull; + uint64_t minWorkUnitsPerWorker = 1ull; + uint64_t minBytesPerWorker = 1ull; + uint32_t hardwareThreads = 0u; + uint32_t hardMaxWorkers = 0u; + uint32_t targetChunksPerWorker = 0u; + uint64_t minChunkWorkUnits = 1ull; + uint64_t maxChunkWorkUnits = std::numeric_limits::max(); + const uint8_t* sampleData = nullptr; + uint64_t sampleBytes = 0ull; + uint32_t samplePasses = 0u; + uint32_t sampleMaxCandidates = 0u; + uint64_t sampleMinWorkUnits = 0ull; +}; + +struct SLoaderRuntimeTuningResult +{ + size_t workerCount = 1ull; + uint64_t chunkWorkUnits = 1ull; + size_t chunkCount = 1ull; +}; + +constexpr uint64_t loaderRuntimeCeilDiv(const uint64_t numerator, const uint64_t denominator) +{ + return (numerator + denominator - 1ull) / denominator; +} + +inline size_t resolveLoaderHardwareThreads(const uint32_t requested = 0u) +{ + const size_t hw = requested ? static_cast(requested) : static_cast(std::thread::hardware_concurrency()); + return hw ? hw : 1ull; +} + +template +inline void loaderRuntimeDispatchWorkers(const size_t workerCount, Fn&& fn) +{ + if (workerCount <= 1ull) + { + fn(0ull); + return; + } + auto workerIds = std::views::iota(size_t{0ull}, workerCount); + std::for_each(std::execution::par, workerIds.begin(), workerIds.end(), [&fn](const size_t workerIx) + { + fn(workerIx); + }); +} + +inline uint64_t loaderRuntimeBenchmarkSample(const uint8_t* const sampleData, const uint64_t sampleBytes, const size_t workerCount, const uint32_t passes) +{ + if (!sampleData || sampleBytes == 0ull || workerCount == 0ull) + return 0ull; + + const uint32_t passCount = std::max(1u, passes); + std::vector partial(workerCount, 0ull); + uint64_t elapsedNs = 0ull; + using clock_t = std::chrono::steady_clock; + for (uint32_t passIx = 0u; passIx < passCount; ++passIx) + { + const auto passStart = clock_t::now(); + loaderRuntimeDispatchWorkers(workerCount, [&](const size_t workerIx) + { + const uint64_t begin = (sampleBytes * workerIx) / workerCount; + const uint64_t end = (sampleBytes * (workerIx + 1ull)) / workerCount; + const uint8_t* ptr = sampleData + begin; + uint64_t local = 0ull; + for (uint64_t i = 0ull, count = end - begin; i < count; ++i) + local += static_cast(ptr[i]); + partial[workerIx] ^= local; + }); + elapsedNs += static_cast(std::chrono::duration_cast(clock_t::now() - passStart).count()); + } + + uint64_t reduced = 0ull; + for (const uint64_t v : partial) + reduced ^= v; + static std::atomic sink = 0ull; + sink.fetch_xor(reduced, std::memory_order_relaxed); + return elapsedNs; +} + +struct SLoaderRuntimeSampleStats +{ + uint64_t medianNs = 0ull; + uint64_t minNs = 0ull; + uint64_t maxNs = 0ull; + uint64_t totalNs = 0ull; +}; + +inline SLoaderRuntimeSampleStats loaderRuntimeBenchmarkSampleStats( + const uint8_t* const sampleData, + const uint64_t sampleBytes, + const size_t workerCount, + const uint32_t passes, + const uint32_t observations +) +{ + SLoaderRuntimeSampleStats stats = {}; + if (!sampleData || sampleBytes == 0ull || workerCount == 0ull) + return stats; + + const uint32_t observationCount = std::max(1u, observations); + std::vector samples; + samples.reserve(observationCount); + + (void)loaderRuntimeBenchmarkSample(sampleData, sampleBytes, workerCount, 1u); + for (uint32_t obsIx = 0u; obsIx < observationCount; ++obsIx) + { + const uint64_t elapsedNs = loaderRuntimeBenchmarkSample(sampleData, sampleBytes, workerCount, passes); + if (elapsedNs == 0ull) + continue; + stats.totalNs += elapsedNs; + samples.push_back(elapsedNs); + } + + if (samples.empty()) + return SLoaderRuntimeSampleStats{}; + + std::sort(samples.begin(), samples.end()); + stats.minNs = samples.front(); + stats.maxNs = samples.back(); + if ((samples.size() & 1ull) != 0ull) + stats.medianNs = samples[samples.size() / 2ull]; + else + stats.medianNs = (samples[samples.size() / 2ull - 1ull] + samples[samples.size() / 2ull]) / 2ull; + return stats; +} + +inline void loaderRuntimeAppendCandidate(std::vector& dst, const size_t candidate) +{ + if (candidate == 0ull) + return; + if (std::find(dst.begin(), dst.end(), candidate) == dst.end()) + dst.push_back(candidate); +} + +inline SLoaderRuntimeTuningResult tuneLoaderRuntime(const SFileIOPolicy& ioPolicy, const SLoaderRuntimeTuningRequest& request) +{ + using RTMode = SFileIOPolicy::SRuntimeTuning::Mode; + SLoaderRuntimeTuningResult result = {}; + if (request.totalWorkUnits == 0ull) + { + result.chunkWorkUnits = 0ull; + result.chunkCount = 0ull; + return result; + } + + const size_t hw = resolveLoaderHardwareThreads(request.hardwareThreads); + size_t maxWorkers = hw; + if (request.hardMaxWorkers > 0u) + maxWorkers = std::min(maxWorkers, static_cast(request.hardMaxWorkers)); + if (ioPolicy.runtimeTuning.maxWorkers > 0u) + maxWorkers = std::min(maxWorkers, static_cast(ioPolicy.runtimeTuning.maxWorkers)); + maxWorkers = std::max(1ull, maxWorkers); + + const uint64_t minWorkUnitsPerWorker = std::max(1ull, request.minWorkUnitsPerWorker); + const uint64_t minBytesPerWorker = std::max(1ull, request.minBytesPerWorker); + const size_t maxByWork = static_cast(loaderRuntimeCeilDiv(request.totalWorkUnits, minWorkUnitsPerWorker)); + const size_t maxByBytes = request.inputBytes ? static_cast(loaderRuntimeCeilDiv(request.inputBytes, minBytesPerWorker)) : maxWorkers; + const bool heuristicEnabled = ioPolicy.runtimeTuning.mode != RTMode::None; + const bool hybridEnabled = ioPolicy.runtimeTuning.mode == RTMode::Hybrid; + + size_t workerCount = 1ull; + if (heuristicEnabled) + workerCount = std::max(1ull, std::min({ maxWorkers, maxByWork, maxByBytes })); + + const size_t targetChunksPerWorker = std::max( + 1ull, + static_cast(request.targetChunksPerWorker ? request.targetChunksPerWorker : ioPolicy.runtimeTuning.targetChunksPerWorker)); + if (workerCount > 1ull && heuristicEnabled) + { + const double maxOverheadRatio = std::max(0.0, static_cast(ioPolicy.runtimeTuning.maxOverheadRatio)); + const double minExpectedGainRatio = std::clamp(static_cast(ioPolicy.runtimeTuning.minExpectedGainRatio), 0.0, 0.99); + while (workerCount > 1ull) + { + const double idealGain = 1.0 - (1.0 / static_cast(workerCount)); + const double overheadRatio = static_cast(workerCount * targetChunksPerWorker) / static_cast(std::max(1ull, request.totalWorkUnits)); + if (idealGain < minExpectedGainRatio || overheadRatio > maxOverheadRatio) + { + --workerCount; + continue; + } + break; + } + } + + const size_t heuristicWorkerCount = std::max(1ull, workerCount); + if ( + heuristicEnabled && + hybridEnabled && + request.sampleData != nullptr && + request.sampleBytes > 0ull && + heuristicWorkerCount > 1ull && + maxWorkers > 1ull + ) + { + const uint64_t autoMinSamplingWorkUnits = std::max( + static_cast(targetChunksPerWorker) * 8ull, + static_cast(maxWorkers * targetChunksPerWorker)); + const uint64_t minSamplingWorkUnits = request.sampleMinWorkUnits ? + request.sampleMinWorkUnits : + (ioPolicy.runtimeTuning.samplingMinWorkUnits ? ioPolicy.runtimeTuning.samplingMinWorkUnits : autoMinSamplingWorkUnits); + if (request.totalWorkUnits >= minSamplingWorkUnits) + { + const double samplingBudgetRatio = std::clamp(static_cast(ioPolicy.runtimeTuning.samplingBudgetRatio), 0.0, 0.5); + uint64_t effectiveSampleBytes = request.sampleBytes; + if (request.inputBytes) + effectiveSampleBytes = std::min(effectiveSampleBytes, request.inputBytes); + if (effectiveSampleBytes > 0ull && samplingBudgetRatio > 0.0) + { + // keep probing lightweight: sample fraction scales with input and parallelism + if (request.inputBytes > 0ull) + { + const uint64_t sampleDivisor = std::max( + 4ull, + static_cast(heuristicWorkerCount) * static_cast(targetChunksPerWorker)); + const uint64_t adaptiveSampleBytes = std::max(1ull, request.inputBytes / sampleDivisor); + effectiveSampleBytes = std::min(effectiveSampleBytes, adaptiveSampleBytes); + } + + const uint32_t samplePasses = request.samplePasses ? request.samplePasses : ioPolicy.runtimeTuning.samplingPasses; + uint32_t maxCandidates = request.sampleMaxCandidates ? request.sampleMaxCandidates : ioPolicy.runtimeTuning.samplingMaxCandidates; + maxCandidates = std::max(2u, maxCandidates); + + std::vector candidates; + candidates.reserve(maxCandidates); + loaderRuntimeAppendCandidate(candidates, heuristicWorkerCount); + loaderRuntimeAppendCandidate(candidates, heuristicWorkerCount > 1ull ? (heuristicWorkerCount - 1ull) : 1ull); + loaderRuntimeAppendCandidate(candidates, std::min(maxWorkers, heuristicWorkerCount + 1ull)); + if (heuristicWorkerCount > 2ull) + loaderRuntimeAppendCandidate(candidates, heuristicWorkerCount - 2ull); + if (heuristicWorkerCount + 2ull <= maxWorkers) + loaderRuntimeAppendCandidate(candidates, heuristicWorkerCount + 2ull); + if (candidates.size() > maxCandidates) + candidates.resize(maxCandidates); + + // probe heuristic first and only continue when budget can amortize additional probes + const auto heuristicStatsProbe = loaderRuntimeBenchmarkSampleStats( + request.sampleData, effectiveSampleBytes, heuristicWorkerCount, samplePasses, 2u); + if (heuristicStatsProbe.medianNs > 0ull) + { + const double scale = request.inputBytes ? + (static_cast(request.inputBytes) / static_cast(effectiveSampleBytes)) : + 1.0; + const uint64_t estimatedFullNs = static_cast(static_cast(heuristicStatsProbe.medianNs) * std::max(1.0, scale)); + const uint64_t samplingBudgetNs = static_cast(static_cast(estimatedFullNs) * samplingBudgetRatio); + uint64_t spentNs = heuristicStatsProbe.totalNs; + const size_t alternativeCandidates = (candidates.size() > 0ull) ? (candidates.size() - 1ull) : 0ull; + if (alternativeCandidates > 0ull && spentNs < samplingBudgetNs) + { + const uint64_t spareBudgetNs = samplingBudgetNs - spentNs; + const uint64_t estimatedEvalNs = std::max(1ull, heuristicStatsProbe.medianNs); + const uint64_t estimatedEvaluations = std::max(1ull, spareBudgetNs / estimatedEvalNs); + uint32_t observations = static_cast(std::clamp( + estimatedEvaluations / static_cast(alternativeCandidates), + 1ull, + 3ull)); + + SLoaderRuntimeSampleStats bestStats = heuristicStatsProbe; + size_t bestWorker = heuristicWorkerCount; + + for (const size_t candidate : candidates) + { + if (candidate == heuristicWorkerCount) + continue; + if (spentNs >= samplingBudgetNs) + break; + const auto candidateStats = loaderRuntimeBenchmarkSampleStats( + request.sampleData, effectiveSampleBytes, candidate, samplePasses, observations); + if (candidateStats.medianNs == 0ull) + continue; + spentNs += candidateStats.totalNs; + if (candidateStats.medianNs < bestStats.medianNs) + { + bestStats = candidateStats; + bestWorker = candidate; + } + } + + if (bestWorker != heuristicWorkerCount) + { + const double gain = static_cast(heuristicStatsProbe.medianNs - bestStats.medianNs) / + static_cast(heuristicStatsProbe.medianNs); + const uint64_t heuristicSpan = heuristicStatsProbe.maxNs - heuristicStatsProbe.minNs; + const uint64_t bestSpan = bestStats.maxNs - bestStats.minNs; + const double heuristicNoise = static_cast(heuristicSpan) / + static_cast(std::max(1ull, heuristicStatsProbe.medianNs)); + const double bestNoise = static_cast(bestSpan) / + static_cast(std::max(1ull, bestStats.medianNs)); + const double requiredGain = std::max( + std::clamp(static_cast(ioPolicy.runtimeTuning.minExpectedGainRatio), 0.0, 0.99), + std::clamp(std::max(heuristicNoise, bestNoise) * 1.25, 0.0, 0.99)); + if (gain >= requiredGain) + workerCount = bestWorker; + } + } + } + } + } + } + + result.workerCount = std::max(1ull, workerCount); + + const uint64_t minChunkWorkUnits = std::max(1ull, request.minChunkWorkUnits); + uint64_t maxChunkWorkUnits = std::max(minChunkWorkUnits, request.maxChunkWorkUnits); + const uint64_t desiredChunkCount = static_cast(std::max(1ull, result.workerCount * targetChunksPerWorker)); + uint64_t chunkWorkUnits = loaderRuntimeCeilDiv(request.totalWorkUnits, desiredChunkCount); + chunkWorkUnits = std::clamp(chunkWorkUnits, minChunkWorkUnits, maxChunkWorkUnits); + + result.chunkWorkUnits = chunkWorkUnits; + result.chunkCount = static_cast(loaderRuntimeCeilDiv(request.totalWorkUnits, chunkWorkUnits)); + return result; +} + +} + + +#endif diff --git a/src/nbl/asset/interchange/COBJMeshFileLoader.cpp b/src/nbl/asset/interchange/COBJMeshFileLoader.cpp index a0590f3c9a..3aca293892 100644 --- a/src/nbl/asset/interchange/COBJMeshFileLoader.cpp +++ b/src/nbl/asset/interchange/COBJMeshFileLoader.cpp @@ -6,6 +6,7 @@ #include "nbl/core/declarations.h" #include "nbl/asset/IAssetManager.h" +#include "nbl/asset/interchange/SLoaderRuntimeTuning.h" #include "nbl/asset/utils/CPolygonGeometryManipulator.h" #ifdef _NBL_COMPILE_WITH_OBJ_LOADER_ @@ -16,6 +17,7 @@ #include #include +#include #include #include #include @@ -496,22 +498,62 @@ NBL_FORCE_INLINE bool parseObjTrianglePositiveTripletLine(const char* const line return false; int32_t posIx = -1; - if (!parseObjPositiveIndexBounded(ptr, lineEnd, posCount, posIx)) - return false; + { + uint32_t value = 0u; + while (ptr < lineEnd && isObjDigit(*ptr)) + { + const uint32_t digit = static_cast(*ptr - '0'); + if (value > 429496729u) + return false; + value = value * 10u + digit; + ++ptr; + } + if (value == 0u || value > posCount) + return false; + posIx = static_cast(value - 1u); + } if (ptr >= lineEnd || *ptr != '/') return false; ++ptr; int32_t uvIx = -1; - if (!parseObjPositiveIndexBounded(ptr, lineEnd, uvCount, uvIx)) - return false; + { + uint32_t value = 0u; + if (ptr >= lineEnd || !isObjDigit(*ptr)) + return false; + while (ptr < lineEnd && isObjDigit(*ptr)) + { + const uint32_t digit = static_cast(*ptr - '0'); + if (value > 429496729u) + return false; + value = value * 10u + digit; + ++ptr; + } + if (value == 0u || value > uvCount) + return false; + uvIx = static_cast(value - 1u); + } if (ptr >= lineEnd || *ptr != '/') return false; ++ptr; int32_t normalIx = -1; - if (!parseObjPositiveIndexBounded(ptr, lineEnd, normalCount, normalIx)) - return false; + { + uint32_t value = 0u; + if (ptr >= lineEnd || !isObjDigit(*ptr)) + return false; + while (ptr < lineEnd && isObjDigit(*ptr)) + { + const uint32_t digit = static_cast(*ptr - '0'); + if (value > 429496729u) + return false; + value = value * 10u + digit; + ++ptr; + } + if (value == 0u || value > normalCount) + return false; + normalIx = static_cast(value - 1u); + } int32_t* const dst = out[corner]; dst[0] = posIx; @@ -811,8 +853,22 @@ asset::SAssetBundle COBJMeshFileLoader::loadAsset(system::IFile* _file, const as int32_t normal = -1; uint32_t outIndex = 0u; }; - static constexpr size_t DedupHotEntryCount = 2048ull; - std::array dedupHotCache = {}; + const size_t hw = resolveLoaderHardwareThreads(); + SLoaderRuntimeTuningRequest dedupTuningRequest = {}; + dedupTuningRequest.inputBytes = static_cast(filesize); + dedupTuningRequest.totalWorkUnits = estimatedOutVertexCount; + dedupTuningRequest.hardwareThreads = static_cast(hw); + dedupTuningRequest.hardMaxWorkers = static_cast(hw); + dedupTuningRequest.targetChunksPerWorker = 1u; + dedupTuningRequest.sampleData = reinterpret_cast(buf); + dedupTuningRequest.sampleBytes = std::min(static_cast(filesize), 128ull << 10); + const auto dedupTuning = tuneLoaderRuntime(_params.ioPolicy, dedupTuningRequest); + const size_t dedupHotSeed = std::max( + 16ull, + estimatedOutVertexCount / std::max(1ull, dedupTuning.workerCount * 8ull)); + const size_t dedupHotEntryCount = std::bit_ceil(dedupHotSeed); + core::vector dedupHotCache(dedupHotEntryCount); + const size_t dedupHotMask = dedupHotEntryCount - 1ull; bool hasNormals = false; bool hasUVs = false; @@ -920,7 +976,7 @@ asset::SAssetBundle COBJMeshFileLoader::loadAsset(system::IFile* _file, const as static_cast(posIx) * 73856093u ^ static_cast(uvIx) * 19349663u ^ static_cast(normalIx) * 83492791u; - auto& hotEntry = dedupHotCache[hotHash & static_cast(DedupHotEntryCount - 1ull)]; + auto& hotEntry = dedupHotCache[static_cast(hotHash) & dedupHotMask]; if (hotEntry.pos == posIx && hotEntry.uv == uvIx && hotEntry.normal == normalIx) { outIx = hotEntry.outIndex; diff --git a/src/nbl/asset/interchange/CPLYMeshFileLoader.cpp b/src/nbl/asset/interchange/CPLYMeshFileLoader.cpp index cde552319e..24d870cf33 100644 --- a/src/nbl/asset/interchange/CPLYMeshFileLoader.cpp +++ b/src/nbl/asset/interchange/CPLYMeshFileLoader.cpp @@ -7,6 +7,7 @@ #include "CPLYMeshFileLoader.h" #include "nbl/asset/metadata/CPLYMetadata.h" +#include "nbl/asset/interchange/SLoaderRuntimeTuning.h" #include #include @@ -204,7 +205,7 @@ IGeometry::SDataView plyCreateAdoptedU16IndexView(core::vector(1ull, static_cast(totalBytes / (2ull << 20))); - const size_t workerCount = hw ? std::min({ hw, pending.size(), maxWorkersByBytes }) : 1ull; + const size_t hw = resolveLoaderHardwareThreads(); + const uint8_t* hashSampleData = nullptr; + uint64_t hashSampleBytes = 0ull; + for (const auto pendingIx : pending) + { + auto& buffer = buffers[pendingIx]; + const auto* ptr = reinterpret_cast(buffer->getPointer()); + if (!ptr) + continue; + hashSampleData = ptr; + hashSampleBytes = std::min(static_cast(buffer->getSize()), 128ull << 10); + if (hashSampleBytes > 0ull) + break; + } + SLoaderRuntimeTuningRequest tuningRequest = {}; + tuningRequest.inputBytes = totalBytes; + tuningRequest.totalWorkUnits = pending.size(); + tuningRequest.minBytesPerWorker = std::max(1ull, pending.empty() ? 1ull : loaderRuntimeCeilDiv(totalBytes, static_cast(pending.size()))); + tuningRequest.hardwareThreads = static_cast(hw); + tuningRequest.hardMaxWorkers = static_cast(std::min(pending.size(), hw)); + tuningRequest.targetChunksPerWorker = 1u; + tuningRequest.sampleData = hashSampleData; + tuningRequest.sampleBytes = hashSampleBytes; + const auto tuning = tuneLoaderRuntime(ioPolicy, tuningRequest); + const size_t workerCount = std::min(tuning.workerCount, pending.size()); if (workerCount > 1ull) { plyRunParallelWorkers(workerCount, [&](const size_t workerIx) @@ -1150,16 +1173,21 @@ struct SContext bool fallbackToGeneric = false; if (is32Bit) { - const size_t hw = std::thread::hardware_concurrency(); - constexpr size_t FaceParseBytesPerWorkerTarget = 512ull << 10; - constexpr size_t FaceParseFacesPerWorkerTarget = 32768ull; - const size_t maxWorkersByBytes = std::max(1ull, (minBytesNeeded + FaceParseBytesPerWorkerTarget - 1ull) / FaceParseBytesPerWorkerTarget); - const size_t maxWorkersByFaces = std::max(1ull, (element.Count + FaceParseFacesPerWorkerTarget - 1ull) / FaceParseFacesPerWorkerTarget); - const size_t maxWorkersByWork = std::min(maxWorkersByBytes, maxWorkersByFaces); - size_t workerCount = hw ? std::min(hw, maxWorkersByWork) : 1ull; + const size_t hw = resolveLoaderHardwareThreads(); + const size_t recordBytes = sizeof(uint8_t) + 3ull * sizeof(uint32_t); + SLoaderRuntimeTuningRequest faceTuningRequest = {}; + faceTuningRequest.inputBytes = minBytesNeeded; + faceTuningRequest.totalWorkUnits = element.Count; + faceTuningRequest.minBytesPerWorker = recordBytes; + faceTuningRequest.hardwareThreads = static_cast(hw); + faceTuningRequest.hardMaxWorkers = static_cast(hw); + faceTuningRequest.targetChunksPerWorker = 4u; + faceTuningRequest.sampleData = ptr; + faceTuningRequest.sampleBytes = std::min(minBytesNeeded, 128ull << 10); + const auto faceTuning = tuneLoaderRuntime(inner.params.ioPolicy, faceTuningRequest); + size_t workerCount = std::min(faceTuning.workerCount, element.Count); if (workerCount > 1ull) { - const size_t recordBytes = sizeof(uint8_t) + 3ull * sizeof(uint32_t); const bool needMax = trackMaxIndex; const bool validateAgainstVertexCount = hasVertexCount; std::vector workerNonTriangle(workerCount, 0u); @@ -2317,7 +2345,7 @@ SAssetBundle CPLYMeshFileLoader::loadAsset(system::IFile* _file, const IAssetLoa if (deferredPositionHashThread.joinable()) deferredPositionHashThread.join(); const auto hashStart = clock_t::now(); - plyRecomputeContentHashesParallel(geometry.get()); + plyRecomputeContentHashesParallel(geometry.get(), _params.ioPolicy); hashRangeMs += std::chrono::duration(clock_t::now() - hashStart).count(); } else diff --git a/src/nbl/asset/interchange/CSTLMeshFileLoader.cpp b/src/nbl/asset/interchange/CSTLMeshFileLoader.cpp index ff5760b0dc..1438f1e293 100644 --- a/src/nbl/asset/interchange/CSTLMeshFileLoader.cpp +++ b/src/nbl/asset/interchange/CSTLMeshFileLoader.cpp @@ -7,6 +7,7 @@ #ifdef _NBL_COMPILE_WITH_STL_LOADER_ +#include "nbl/asset/interchange/SLoaderRuntimeTuning.h" #include "nbl/asset/asset.h" #include "nbl/asset/metadata/CSTLMetadata.h" #include "nbl/asset/utils/CPolygonGeometryManipulator.h" @@ -308,7 +309,7 @@ ICPUPolygonGeometry::SDataView stlCreateAdoptedFloat3View(core::vector(buffer->getSize()); - const size_t hw = std::thread::hardware_concurrency(); - const size_t workerCount = hw ? std::min(hw, buffers.size()) : 1ull; - if (workerCount > 1ull && totalBytes >= (2ull << 20)) + const size_t hw = resolveLoaderHardwareThreads(); + const uint8_t* hashSampleData = nullptr; + uint64_t hashSampleBytes = 0ull; + for (const auto& buffer : buffers) + { + const auto* ptr = reinterpret_cast(buffer->getPointer()); + if (!ptr) + continue; + hashSampleData = ptr; + hashSampleBytes = std::min(static_cast(buffer->getSize()), 128ull << 10); + if (hashSampleBytes > 0ull) + break; + } + SLoaderRuntimeTuningRequest tuningRequest = {}; + tuningRequest.inputBytes = totalBytes; + tuningRequest.totalWorkUnits = buffers.size(); + tuningRequest.minBytesPerWorker = std::max(1ull, buffers.empty() ? 1ull : loaderRuntimeCeilDiv(totalBytes, static_cast(buffers.size()))); + tuningRequest.hardwareThreads = static_cast(hw); + tuningRequest.hardMaxWorkers = static_cast(std::min(hw, buffers.size())); + tuningRequest.targetChunksPerWorker = 1u; + tuningRequest.sampleData = hashSampleData; + tuningRequest.sampleBytes = hashSampleBytes; + const auto tuning = tuneLoaderRuntime(ioPolicy, tuningRequest); + const size_t workerCount = std::min(tuning.workerCount, buffers.size()); + if (workerCount > 1ull) { stlRunParallelWorkers(workerCount, [&buffers, workerCount](const size_t workerIx) { @@ -624,14 +647,20 @@ SAssetBundle CSTLMeshFileLoader::loadAsset(system::IFile* _file, const IAssetLoa const uint8_t* const end = cursor + dataSize; if (end < cursor || static_cast(end - cursor) < static_cast(triangleCount) * StlTriangleRecordBytes) return {}; - const size_t hw = std::thread::hardware_concurrency(); - constexpr size_t StlParseBytesPerWorkerTarget = 384ull << 10; - constexpr uint64_t StlParseTrianglesPerWorkerTarget = 8192ull; - const size_t maxWorkersByBytes = std::max(1ull, (dataSize + StlParseBytesPerWorkerTarget - 1ull) / StlParseBytesPerWorkerTarget); - const size_t maxWorkersByTriangles = std::max(1ull, static_cast((triangleCount + StlParseTrianglesPerWorkerTarget - 1ull) / StlParseTrianglesPerWorkerTarget)); - const size_t maxWorkersByWork = std::min(maxWorkersByBytes, maxWorkersByTriangles); - const size_t parseHwBudget = hw; - const size_t workerCount = parseHwBudget ? std::max(1ull, std::min(parseHwBudget, maxWorkersByWork)) : 1ull; + const size_t hw = resolveLoaderHardwareThreads(); + SLoaderRuntimeTuningRequest parseTuningRequest = {}; + parseTuningRequest.inputBytes = dataSize; + parseTuningRequest.totalWorkUnits = triangleCount; + parseTuningRequest.minBytesPerWorker = StlTriangleRecordBytes; + parseTuningRequest.hardwareThreads = static_cast(hw); + parseTuningRequest.hardMaxWorkers = static_cast(hw); + parseTuningRequest.targetChunksPerWorker = 4u; + parseTuningRequest.minChunkWorkUnits = 1ull; + parseTuningRequest.maxChunkWorkUnits = std::max(1ull, triangleCount); + parseTuningRequest.sampleData = payloadData; + parseTuningRequest.sampleBytes = std::min(dataSize, 128ull << 10); + const auto parseTuning = tuneLoaderRuntime(_params.ioPolicy, parseTuningRequest); + const size_t workerCount = std::max(1ull, std::min(parseTuning.workerCount, static_cast(std::max(1ull, triangleCount)))); static constexpr bool ComputeAABBInParse = true; struct SThreadAABB { @@ -644,10 +673,8 @@ SAssetBundle CSTLMeshFileLoader::loadAsset(system::IFile* _file, const IAssetLoa float maxZ = 0.f; }; std::vector threadAABBs(ComputeAABBInParse ? workerCount : 0ull); - const uint64_t targetChunkCount = std::max(1ull, static_cast(workerCount) * 4ull); - const uint64_t dynamicChunkTriangles = (triangleCount + targetChunkCount - 1ull) / targetChunkCount; - const uint64_t parseChunkTriangles = std::clamp(dynamicChunkTriangles, 4096ull, 16384ull); - const size_t parseChunkCount = static_cast((triangleCount + parseChunkTriangles - 1ull) / parseChunkTriangles); + const uint64_t parseChunkTriangles = std::max(1ull, parseTuning.chunkWorkUnits); + const size_t parseChunkCount = static_cast(loaderRuntimeCeilDiv(triangleCount, parseChunkTriangles)); const bool hashInParsePipeline = computeContentHashes; std::vector hashChunkReady(hashInParsePipeline ? parseChunkCount : 0ull, 0u); double positionHashPipelineMs = 0.0; @@ -771,16 +798,28 @@ SAssetBundle CSTLMeshFileLoader::loadAsset(system::IFile* _file, const IAssetLoa { core::blake3_hasher positionHasher; const auto hashThreadStart = clock_t::now(); - for (size_t chunkIx = 0ull; chunkIx < parseChunkCount; ++chunkIx) + size_t chunkIx = 0ull; + while (chunkIx < parseChunkCount) { auto ready = std::atomic_ref(hashChunkReady[chunkIx]); while (ready.load(std::memory_order_acquire) == 0u) ready.wait(0u, std::memory_order_acquire); + + size_t runEnd = chunkIx + 1ull; + while (runEnd < parseChunkCount) + { + const auto runReady = std::atomic_ref(hashChunkReady[runEnd]).load(std::memory_order_acquire); + if (runReady == 0u) + break; + ++runEnd; + } + const uint64_t begin = static_cast(chunkIx) * parseChunkTriangles; - const uint64_t endTri = std::min(begin + parseChunkTriangles, triangleCount); - const size_t chunkTriangles = static_cast(endTri - begin); - const size_t chunkBytes = chunkTriangles * StlVerticesPerTriangle * StlFloatChannelsPerVertex * sizeof(float); - positionHasher.update(posOutFloat + begin * StlVerticesPerTriangle * StlFloatChannelsPerVertex, chunkBytes); + const uint64_t endTri = std::min(static_cast(runEnd) * parseChunkTriangles, triangleCount); + const size_t runTriangles = static_cast(endTri - begin); + const size_t runBytes = runTriangles * StlVerticesPerTriangle * StlFloatChannelsPerVertex * sizeof(float); + positionHasher.update(posOutFloat + begin * StlVerticesPerTriangle * StlFloatChannelsPerVertex, runBytes); + chunkIx = runEnd; } positionHashPipelineMs = std::chrono::duration(clock_t::now() - hashThreadStart).count(); parsedPositionHash = static_cast(positionHasher); @@ -796,16 +835,28 @@ SAssetBundle CSTLMeshFileLoader::loadAsset(system::IFile* _file, const IAssetLoa { core::blake3_hasher normalHasher; const auto hashThreadStart = clock_t::now(); - for (size_t chunkIx = 0ull; chunkIx < parseChunkCount; ++chunkIx) + size_t chunkIx = 0ull; + while (chunkIx < parseChunkCount) { auto ready = std::atomic_ref(hashChunkReady[chunkIx]); while (ready.load(std::memory_order_acquire) == 0u) ready.wait(0u, std::memory_order_acquire); + + size_t runEnd = chunkIx + 1ull; + while (runEnd < parseChunkCount) + { + const auto runReady = std::atomic_ref(hashChunkReady[runEnd]).load(std::memory_order_acquire); + if (runReady == 0u) + break; + ++runEnd; + } + const uint64_t begin = static_cast(chunkIx) * parseChunkTriangles; - const uint64_t endTri = std::min(begin + parseChunkTriangles, triangleCount); - const size_t chunkTriangles = static_cast(endTri - begin); - const size_t chunkBytes = chunkTriangles * StlVerticesPerTriangle * StlFloatChannelsPerVertex * sizeof(float); - normalHasher.update(normalOutFloat + begin * StlVerticesPerTriangle * StlFloatChannelsPerVertex, chunkBytes); + const uint64_t endTri = std::min(static_cast(runEnd) * parseChunkTriangles, triangleCount); + const size_t runTriangles = static_cast(endTri - begin); + const size_t runBytes = runTriangles * StlVerticesPerTriangle * StlFloatChannelsPerVertex * sizeof(float); + normalHasher.update(normalOutFloat + begin * StlVerticesPerTriangle * StlFloatChannelsPerVertex, runBytes); + chunkIx = runEnd; } normalHashPipelineMs = std::chrono::duration(clock_t::now() - hashThreadStart).count(); parsedNormalHash = static_cast(normalHasher); @@ -993,7 +1044,7 @@ SAssetBundle CSTLMeshFileLoader::loadAsset(system::IFile* _file, const IAssetLoa if (computeContentHashes && !contentHashesAssigned) { const auto hashStart = clock_t::now(); - stlRecomputeContentHashesParallel(geometry.get()); + stlRecomputeContentHashesParallel(geometry.get(), _params.ioPolicy); hashMs += std::chrono::duration(clock_t::now() - hashStart).count(); } From 4778068db6f6b4bb3d737479b6383ea27b5ceb5b Mon Sep 17 00:00:00 2001 From: Arkadiusz Lachowicz Date: Wed, 11 Feb 2026 11:17:58 +0100 Subject: [PATCH 022/118] Tune STL runtime worker limits --- src/nbl/asset/interchange/CSTLMeshFileLoader.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/nbl/asset/interchange/CSTLMeshFileLoader.cpp b/src/nbl/asset/interchange/CSTLMeshFileLoader.cpp index 1438f1e293..c8cced9950 100644 --- a/src/nbl/asset/interchange/CSTLMeshFileLoader.cpp +++ b/src/nbl/asset/interchange/CSTLMeshFileLoader.cpp @@ -653,8 +653,8 @@ SAssetBundle CSTLMeshFileLoader::loadAsset(system::IFile* _file, const IAssetLoa parseTuningRequest.totalWorkUnits = triangleCount; parseTuningRequest.minBytesPerWorker = StlTriangleRecordBytes; parseTuningRequest.hardwareThreads = static_cast(hw); - parseTuningRequest.hardMaxWorkers = static_cast(hw); - parseTuningRequest.targetChunksPerWorker = 4u; + parseTuningRequest.hardMaxWorkers = static_cast(std::max(1ull, hw > 2ull ? (hw - 2ull) : hw)); + parseTuningRequest.targetChunksPerWorker = 2u; parseTuningRequest.minChunkWorkUnits = 1ull; parseTuningRequest.maxChunkWorkUnits = std::max(1ull, triangleCount); parseTuningRequest.sampleData = payloadData; From e8123bcfec08488dad83af39a5c8d33223d571b4 Mon Sep 17 00:00:00 2001 From: Arkadiusz Lachowicz Date: Wed, 11 Feb 2026 12:11:02 +0100 Subject: [PATCH 023/118] Refactor interchange IO and geometry hash helpers --- .../interchange/SGeometryContentHashCommon.h | 128 +++++++++++++ .../asset/interchange/SInterchangeIOCommon.h | 170 ++++++++++++++++ .../asset/interchange/COBJMeshFileLoader.cpp | 82 +------- src/nbl/asset/interchange/COBJMeshWriter.cpp | 76 +------- .../asset/interchange/CPLYMeshFileLoader.cpp | 94 +-------- src/nbl/asset/interchange/CPLYMeshWriter.cpp | 108 +---------- .../asset/interchange/CSTLMeshFileLoader.cpp | 181 ++---------------- src/nbl/asset/interchange/CSTLMeshWriter.cpp | 75 +------- 8 files changed, 331 insertions(+), 583 deletions(-) create mode 100644 include/nbl/asset/interchange/SGeometryContentHashCommon.h create mode 100644 include/nbl/asset/interchange/SInterchangeIOCommon.h diff --git a/include/nbl/asset/interchange/SGeometryContentHashCommon.h b/include/nbl/asset/interchange/SGeometryContentHashCommon.h new file mode 100644 index 0000000000..8ce5e4dfbe --- /dev/null +++ b/include/nbl/asset/interchange/SGeometryContentHashCommon.h @@ -0,0 +1,128 @@ +// Copyright (C) 2025-2025 - DevSH Graphics Programming Sp. z O.O. +// This file is part of the "Nabla Engine". +// For conditions of distribution and use, see copyright notice in nabla.h +#ifndef _NBL_ASSET_S_GEOMETRY_CONTENT_HASH_COMMON_H_INCLUDED_ +#define _NBL_ASSET_S_GEOMETRY_CONTENT_HASH_COMMON_H_INCLUDED_ + + +#include "nbl/asset/ICPUPolygonGeometry.h" +#include "nbl/asset/interchange/SLoaderRuntimeTuning.h" + +#include +#include +#include + + +namespace nbl::asset +{ + +inline void collectGeometryBuffers( + ICPUPolygonGeometry* geometry, + core::vector>& buffers) +{ + buffers.clear(); + if (!geometry) + return; + + auto appendViewBuffer = [&buffers](const IGeometry::SDataView& view) -> void + { + if (!view || !view.src.buffer) + return; + for (const auto& existing : buffers) + { + if (existing.get() == view.src.buffer.get()) + return; + } + buffers.push_back(core::smart_refctd_ptr(view.src.buffer)); + }; + + appendViewBuffer(geometry->getPositionView()); + appendViewBuffer(geometry->getIndexView()); + appendViewBuffer(geometry->getNormalView()); + for (const auto& view : *geometry->getAuxAttributeViews()) + appendViewBuffer(view); + for (const auto& view : *geometry->getJointWeightViews()) + { + appendViewBuffer(view.indices); + appendViewBuffer(view.weights); + } + if (auto jointOBB = geometry->getJointOBBView(); jointOBB) + appendViewBuffer(*jointOBB); +} + +inline void recomputeGeometryContentHashesParallel(ICPUPolygonGeometry* geometry, const SFileIOPolicy& ioPolicy) +{ + if (!geometry) + return; + + core::vector> buffers; + collectGeometryBuffers(geometry, buffers); + if (buffers.empty()) + return; + + core::vector pending; + pending.reserve(buffers.size()); + uint64_t totalBytes = 0ull; + for (size_t i = 0ull; i < buffers.size(); ++i) + { + auto& buffer = buffers[i]; + if (!buffer || buffer->getContentHash() != IPreHashed::INVALID_HASH) + continue; + totalBytes += static_cast(buffer->getSize()); + pending.push_back(i); + } + if (pending.empty()) + return; + + const size_t hw = resolveLoaderHardwareThreads(); + const uint8_t* hashSampleData = nullptr; + uint64_t hashSampleBytes = 0ull; + for (const auto pendingIx : pending) + { + auto& buffer = buffers[pendingIx]; + const auto* ptr = reinterpret_cast(buffer->getPointer()); + if (!ptr) + continue; + hashSampleData = ptr; + hashSampleBytes = std::min(static_cast(buffer->getSize()), 128ull << 10); + if (hashSampleBytes > 0ull) + break; + } + + SLoaderRuntimeTuningRequest tuningRequest = {}; + tuningRequest.inputBytes = totalBytes; + tuningRequest.totalWorkUnits = pending.size(); + tuningRequest.minBytesPerWorker = std::max(1ull, loaderRuntimeCeilDiv(totalBytes, static_cast(pending.size()))); + tuningRequest.hardwareThreads = static_cast(hw); + tuningRequest.hardMaxWorkers = static_cast(std::min(pending.size(), hw)); + tuningRequest.targetChunksPerWorker = 1u; + tuningRequest.sampleData = hashSampleData; + tuningRequest.sampleBytes = hashSampleBytes; + const auto tuning = tuneLoaderRuntime(ioPolicy, tuningRequest); + const size_t workerCount = std::min(tuning.workerCount, pending.size()); + + if (workerCount > 1ull) + { + loaderRuntimeDispatchWorkers(workerCount, [&](const size_t workerIx) + { + const size_t beginIx = (pending.size() * workerIx) / workerCount; + const size_t endIx = (pending.size() * (workerIx + 1ull)) / workerCount; + for (size_t i = beginIx; i < endIx; ++i) + { + auto& buffer = buffers[pending[i]]; + buffer->setContentHash(buffer->computeContentHash()); + } + }); + return; + } + + for (const auto pendingIx : pending) + { + auto& buffer = buffers[pendingIx]; + buffer->setContentHash(buffer->computeContentHash()); + } +} + +} + +#endif diff --git a/include/nbl/asset/interchange/SInterchangeIOCommon.h b/include/nbl/asset/interchange/SInterchangeIOCommon.h new file mode 100644 index 0000000000..a7e30f23db --- /dev/null +++ b/include/nbl/asset/interchange/SInterchangeIOCommon.h @@ -0,0 +1,170 @@ +// Copyright (C) 2025-2025 - DevSH Graphics Programming Sp. z O.O. +// This file is part of the "Nabla Engine". +// For conditions of distribution and use, see copyright notice in nabla.h +#ifndef _NBL_ASSET_S_INTERCHANGE_IO_COMMON_H_INCLUDED_ +#define _NBL_ASSET_S_INTERCHANGE_IO_COMMON_H_INCLUDED_ + + +#include "nbl/asset/interchange/SFileIOPolicy.h" +#include "nbl/system/IFile.h" + +#include +#include +#include +#include +#include + + +namespace nbl::asset +{ + +struct SFileIOTelemetry +{ + uint64_t callCount = 0ull; + uint64_t totalBytes = 0ull; + uint64_t minBytes = std::numeric_limits::max(); + + inline void account(const uint64_t bytes) + { + ++callCount; + totalBytes += bytes; + if (bytes < minBytes) + minBytes = bytes; + } + + inline uint64_t getMinOrZero() const + { + return callCount ? minBytes : 0ull; + } + + inline uint64_t getAvgOrZero() const + { + return callCount ? (totalBytes / callCount) : 0ull; + } +}; + +using SFileReadTelemetry = SFileIOTelemetry; +using SFileWriteTelemetry = SFileIOTelemetry; + +inline bool isTinyIOTelemetryLikely( + const SFileIOTelemetry& telemetry, + const uint64_t payloadBytes, + const uint64_t bigPayloadThresholdBytes = (1ull << 20), + const uint64_t lowAvgBytesThreshold = 1024ull, + const uint64_t tinyChunkBytesThreshold = 64ull, + const uint64_t tinyChunkCallsThreshold = 1024ull) +{ + if (payloadBytes <= bigPayloadThresholdBytes) + return false; + + const uint64_t minBytes = telemetry.getMinOrZero(); + const uint64_t avgBytes = telemetry.getAvgOrZero(); + return + avgBytes < lowAvgBytesThreshold || + (minBytes < tinyChunkBytesThreshold && telemetry.callCount > tinyChunkCallsThreshold); +} + +inline bool readFileExact(system::IFile* file, void* dst, const size_t offset, const size_t bytes, SFileReadTelemetry* ioTelemetry = nullptr) +{ + if (!file || (!dst && bytes != 0ull)) + return false; + if (bytes == 0ull) + return true; + + system::IFile::success_t success; + file->read(success, dst, offset, bytes); + if (success && ioTelemetry) + ioTelemetry->account(success.getBytesProcessed()); + return success && success.getBytesProcessed() == bytes; +} + +inline bool readFileWithPolicy(system::IFile* file, uint8_t* dst, const size_t offset, const size_t bytes, const SResolvedFileIOPolicy& ioPlan, SFileReadTelemetry* ioTelemetry = nullptr) +{ + if (!file || (!dst && bytes != 0ull)) + return false; + if (bytes == 0ull) + return true; + + switch (ioPlan.strategy) + { + case SResolvedFileIOPolicy::Strategy::WholeFile: + return readFileExact(file, dst, offset, bytes, ioTelemetry); + case SResolvedFileIOPolicy::Strategy::Chunked: + default: + { + size_t bytesRead = 0ull; + while (bytesRead < bytes) + { + const size_t toRead = static_cast(std::min(ioPlan.chunkSizeBytes, bytes - bytesRead)); + system::IFile::success_t success; + file->read(success, dst + bytesRead, offset + bytesRead, toRead); + if (!success) + return false; + const size_t processed = success.getBytesProcessed(); + if (processed == 0ull) + return false; + if (ioTelemetry) + ioTelemetry->account(processed); + bytesRead += processed; + } + return true; + } + } +} + +inline bool readFileWithPolicyTimed(system::IFile* file, uint8_t* dst, const size_t offset, const size_t bytes, const SResolvedFileIOPolicy& ioPlan, double* ioMs = nullptr, SFileReadTelemetry* ioTelemetry = nullptr) +{ + using clock_t = std::chrono::high_resolution_clock; + const auto ioStart = clock_t::now(); + const bool ok = readFileWithPolicy(file, dst, offset, bytes, ioPlan, ioTelemetry); + if (ioMs) + *ioMs = std::chrono::duration(clock_t::now() - ioStart).count(); + return ok; +} + +inline bool writeFileWithPolicyAtOffset(system::IFile* file, const SResolvedFileIOPolicy& ioPlan, const uint8_t* data, size_t byteCount, size_t& fileOffset, SFileWriteTelemetry* ioTelemetry = nullptr) +{ + if (!file || (!data && byteCount != 0ull)) + return false; + if (byteCount == 0ull) + return true; + + size_t writtenTotal = 0ull; + while (writtenTotal < byteCount) + { + const size_t toWrite = + ioPlan.strategy == SResolvedFileIOPolicy::Strategy::WholeFile ? + (byteCount - writtenTotal) : + static_cast(std::min(ioPlan.chunkSizeBytes, byteCount - writtenTotal)); + system::IFile::success_t success; + file->write(success, data + writtenTotal, fileOffset + writtenTotal, toWrite); + if (!success) + return false; + const size_t written = success.getBytesProcessed(); + if (written == 0ull) + return false; + if (ioTelemetry) + ioTelemetry->account(written); + writtenTotal += written; + } + fileOffset += writtenTotal; + return true; +} + +inline bool writeFileWithPolicy(system::IFile* file, const SResolvedFileIOPolicy& ioPlan, const uint8_t* data, size_t byteCount, SFileWriteTelemetry* ioTelemetry = nullptr) +{ + size_t fileOffset = 0ull; + return writeFileWithPolicyAtOffset(file, ioPlan, data, byteCount, fileOffset, ioTelemetry); +} + +inline bool writeTwoBuffersWithPolicy(system::IFile* file, const SResolvedFileIOPolicy& ioPlan, const uint8_t* dataA, size_t byteCountA, const uint8_t* dataB, size_t byteCountB, SFileWriteTelemetry* ioTelemetry = nullptr) +{ + size_t fileOffset = 0ull; + if (!writeFileWithPolicyAtOffset(file, ioPlan, dataA, byteCountA, fileOffset, ioTelemetry)) + return false; + return writeFileWithPolicyAtOffset(file, ioPlan, dataB, byteCountB, fileOffset, ioTelemetry); +} + +} + +#endif diff --git a/src/nbl/asset/interchange/COBJMeshFileLoader.cpp b/src/nbl/asset/interchange/COBJMeshFileLoader.cpp index 3aca293892..fb27e6d12b 100644 --- a/src/nbl/asset/interchange/COBJMeshFileLoader.cpp +++ b/src/nbl/asset/interchange/COBJMeshFileLoader.cpp @@ -6,6 +6,7 @@ #include "nbl/core/declarations.h" #include "nbl/asset/IAssetManager.h" +#include "nbl/asset/interchange/SInterchangeIOCommon.h" #include "nbl/asset/interchange/SLoaderRuntimeTuning.h" #include "nbl/asset/utils/CPolygonGeometryManipulator.h" @@ -40,32 +41,6 @@ struct ObjVertexDedupNode int32_t next = -1; }; - -struct SFileReadTelemetry -{ - uint64_t callCount = 0ull; - uint64_t totalBytes = 0ull; - uint64_t minBytes = std::numeric_limits::max(); - - void account(const uint64_t bytes) - { - ++callCount; - totalBytes += bytes; - if (bytes < minBytes) - minBytes = bytes; - } - - uint64_t getMinOrZero() const - { - return callCount ? minBytes : 0ull; - } - - uint64_t getAvgOrZero() const - { - return callCount ? (totalBytes / callCount) : 0ull; - } -}; - using Float3 = hlsl::float32_t3; using Float2 = hlsl::float32_t2; @@ -282,45 +257,14 @@ void objRecomputeContentHashes(ICPUPolygonGeometry* geometry) bool readTextFileWithPolicy(system::IFile* file, char* dst, size_t byteCount, const SResolvedFileIOPolicy& ioPlan, double& ioMs, SFileReadTelemetry& ioTelemetry) { - if (!file || !dst) - return false; - - using clock_t = std::chrono::high_resolution_clock; - const auto ioStart = clock_t::now(); - size_t bytesRead = 0ull; - switch (ioPlan.strategy) - { - case SResolvedFileIOPolicy::Strategy::WholeFile: - { - system::IFile::success_t success; - file->read(success, dst, 0ull, byteCount); - if (!success || success.getBytesProcessed() != byteCount) - return false; - bytesRead = byteCount; - ioTelemetry.account(success.getBytesProcessed()); - break; - } - case SResolvedFileIOPolicy::Strategy::Chunked: - default: - { - while (bytesRead < byteCount) - { - const size_t toRead = static_cast(std::min(ioPlan.chunkSizeBytes, byteCount - bytesRead)); - system::IFile::success_t success; - file->read(success, dst + bytesRead, bytesRead, toRead); - if (!success) - return false; - const size_t processed = success.getBytesProcessed(); - if (processed == 0ull) - return false; - ioTelemetry.account(processed); - bytesRead += processed; - } - break; - } - } - ioMs = std::chrono::duration(clock_t::now() - ioStart).count(); - return bytesRead == byteCount; + return readFileWithPolicyTimed( + file, + reinterpret_cast(dst), + 0ull, + byteCount, + ioPlan, + &ioMs, + &ioTelemetry); } const char* goFirstWord(const char* buf, const char* const bufEnd, bool acrossNewlines = true) @@ -1337,13 +1281,7 @@ asset::SAssetBundle COBJMeshFileLoader::loadAsset(system::IFile* _file, const as aabbMs = std::chrono::duration(clock_t::now() - aabbStart).count(); const auto totalMs = std::chrono::duration(clock_t::now() - totalStart).count(); - if ( - static_cast(filesize) > (1ull << 20) && - ( - ioTelemetry.getAvgOrZero() < 1024ull || - (ioTelemetry.getMinOrZero() < 64ull && ioTelemetry.callCount > 1024ull) - ) - ) + if (isTinyIOTelemetryLikely(ioTelemetry, static_cast(filesize))) { _params.logger.log( "OBJ loader tiny-io guard: file=%s reads=%llu min=%llu avg=%llu", diff --git a/src/nbl/asset/interchange/COBJMeshWriter.cpp b/src/nbl/asset/interchange/COBJMeshWriter.cpp index 6b10521a9b..309e3f6a98 100644 --- a/src/nbl/asset/interchange/COBJMeshWriter.cpp +++ b/src/nbl/asset/interchange/COBJMeshWriter.cpp @@ -3,6 +3,7 @@ // For conditions of distribution and use, see copyright notice in nabla.h #include "nbl/asset/interchange/COBJMeshWriter.h" +#include "nbl/asset/interchange/SInterchangeIOCommon.h" #ifdef _NBL_COMPILE_WITH_OBJ_WRITER_ @@ -57,31 +58,6 @@ struct SIndexStringRef uint16_t length = 0u; }; -struct SFileWriteTelemetry -{ - uint64_t callCount = 0ull; - uint64_t totalBytes = 0ull; - uint64_t minBytes = std::numeric_limits::max(); - - void account(const uint64_t bytes) - { - ++callCount; - totalBytes += bytes; - if (bytes < minBytes) - minBytes = bytes; - } - - uint64_t getMinOrZero() const - { - return callCount ? minBytes : 0ull; - } - - uint64_t getAvgOrZero() const - { - return callCount ? (totalBytes / callCount) : 0ull; - } -}; - bool decodeVec4(const ICPUPolygonGeometry::SDataView& view, const size_t ix, hlsl::float64_t4& out) { out = hlsl::float64_t4(0.0, 0.0, 0.0, 0.0); @@ -249,8 +225,6 @@ void appendIndexTokenToStorage(std::string& storage, core::vector(output.data()), output.size(), &ioTelemetry); + const bool writeOk = writeFileWithPolicy(file, ioPlan, reinterpret_cast(output.data()), output.size(), &ioTelemetry); writeMs = std::chrono::duration(clock_t::now() - writeStart).count(); const double totalMs = std::chrono::duration(clock_t::now() - totalStart).count(); const double miscMs = std::max(0.0, totalMs - (encodeMs + formatMs + writeMs)); const uint64_t ioMinWrite = ioTelemetry.getMinOrZero(); const uint64_t ioAvgWrite = ioTelemetry.getAvgOrZero(); - if ( - static_cast(output.size()) > (1ull << 20) && - ( - ioAvgWrite < 1024ull || - (ioMinWrite < 64ull && ioTelemetry.callCount > 1024ull) - ) - ) + if (isTinyIOTelemetryLikely(ioTelemetry, static_cast(output.size()))) { _params.logger.log( "OBJ writer tiny-io guard: file=%s writes=%llu min=%llu avg=%llu", @@ -539,44 +507,6 @@ bool COBJMeshWriter::writeAsset(system::IFile* _file, const SAssetWriteParams& _ return writeOk; } -bool obj_writer_detail::writeBufferWithPolicy(system::IFile* file, const SResolvedFileIOPolicy& ioPlan, const uint8_t* data, size_t byteCount, SFileWriteTelemetry* ioTelemetry) -{ - if (!file || (!data && byteCount != 0ull)) - return false; - - size_t fileOffset = 0ull; - switch (ioPlan.strategy) - { - case SResolvedFileIOPolicy::Strategy::WholeFile: - { - system::IFile::success_t success; - file->write(success, data, fileOffset, byteCount); - if (success && ioTelemetry) - ioTelemetry->account(success.getBytesProcessed()); - return success && success.getBytesProcessed() == byteCount; - } - case SResolvedFileIOPolicy::Strategy::Chunked: - default: - { - while (fileOffset < byteCount) - { - const size_t toWrite = static_cast(std::min(ioPlan.chunkSizeBytes, byteCount - fileOffset)); - system::IFile::success_t success; - file->write(success, data + fileOffset, fileOffset, toWrite); - if (!success) - return false; - const size_t written = success.getBytesProcessed(); - if (written == 0ull) - return false; - if (ioTelemetry) - ioTelemetry->account(written); - fileOffset += written; - } - return true; - } - } -} - } // namespace nbl::asset #endif // _NBL_COMPILE_WITH_OBJ_WRITER_ diff --git a/src/nbl/asset/interchange/CPLYMeshFileLoader.cpp b/src/nbl/asset/interchange/CPLYMeshFileLoader.cpp index 24d870cf33..c6d06cde6a 100644 --- a/src/nbl/asset/interchange/CPLYMeshFileLoader.cpp +++ b/src/nbl/asset/interchange/CPLYMeshFileLoader.cpp @@ -7,6 +7,7 @@ #include "CPLYMeshFileLoader.h" #include "nbl/asset/metadata/CPLYMetadata.h" +#include "nbl/asset/interchange/SGeometryContentHashCommon.h" #include "nbl/asset/interchange/SLoaderRuntimeTuning.h" #include @@ -207,98 +208,7 @@ IGeometry::SDataView plyCreateAdoptedU16IndexView(core::vector> buffers; - auto appendViewBuffer = [&buffers](const IGeometry::SDataView& view) -> void - { - if (!view || !view.src.buffer) - return; - for (const auto& existing : buffers) - { - if (existing.get() == view.src.buffer.get()) - return; - } - buffers.push_back(core::smart_refctd_ptr(view.src.buffer)); - }; - - appendViewBuffer(geometry->getPositionView()); - appendViewBuffer(geometry->getIndexView()); - appendViewBuffer(geometry->getNormalView()); - for (const auto& view : *geometry->getAuxAttributeViews()) - appendViewBuffer(view); - for (const auto& view : *geometry->getJointWeightViews()) - { - appendViewBuffer(view.indices); - appendViewBuffer(view.weights); - } - if (auto jointOBB = geometry->getJointOBBView(); jointOBB) - appendViewBuffer(*jointOBB); - - if (buffers.empty()) - return; - - core::vector pending; - pending.reserve(buffers.size()); - uint64_t totalBytes = 0ull; - for (size_t i = 0ull; i < buffers.size(); ++i) - { - auto& buffer = buffers[i]; - if (buffer->getContentHash() != IPreHashed::INVALID_HASH) - continue; - totalBytes += static_cast(buffer->getSize()); - pending.push_back(i); - } - - if (pending.empty()) - return; - - const size_t hw = resolveLoaderHardwareThreads(); - const uint8_t* hashSampleData = nullptr; - uint64_t hashSampleBytes = 0ull; - for (const auto pendingIx : pending) - { - auto& buffer = buffers[pendingIx]; - const auto* ptr = reinterpret_cast(buffer->getPointer()); - if (!ptr) - continue; - hashSampleData = ptr; - hashSampleBytes = std::min(static_cast(buffer->getSize()), 128ull << 10); - if (hashSampleBytes > 0ull) - break; - } - SLoaderRuntimeTuningRequest tuningRequest = {}; - tuningRequest.inputBytes = totalBytes; - tuningRequest.totalWorkUnits = pending.size(); - tuningRequest.minBytesPerWorker = std::max(1ull, pending.empty() ? 1ull : loaderRuntimeCeilDiv(totalBytes, static_cast(pending.size()))); - tuningRequest.hardwareThreads = static_cast(hw); - tuningRequest.hardMaxWorkers = static_cast(std::min(pending.size(), hw)); - tuningRequest.targetChunksPerWorker = 1u; - tuningRequest.sampleData = hashSampleData; - tuningRequest.sampleBytes = hashSampleBytes; - const auto tuning = tuneLoaderRuntime(ioPolicy, tuningRequest); - const size_t workerCount = std::min(tuning.workerCount, pending.size()); - if (workerCount > 1ull) - { - plyRunParallelWorkers(workerCount, [&](const size_t workerIx) - { - const size_t beginIx = (pending.size() * workerIx) / workerCount; - const size_t endIx = (pending.size() * (workerIx + 1ull)) / workerCount; - for (size_t i = beginIx; i < endIx; ++i) - { - auto& buffer = buffers[pending[i]]; - buffer->setContentHash(buffer->computeContentHash()); - } - }); - return; - } - - for (const auto pendingIx : pending) - { - auto& buffer = buffers[pendingIx]; - buffer->setContentHash(buffer->computeContentHash()); - } + recomputeGeometryContentHashesParallel(geometry, ioPolicy); } struct SContext diff --git a/src/nbl/asset/interchange/CPLYMeshWriter.cpp b/src/nbl/asset/interchange/CPLYMeshWriter.cpp index cc2f018f40..9c3cda3eea 100644 --- a/src/nbl/asset/interchange/CPLYMeshWriter.cpp +++ b/src/nbl/asset/interchange/CPLYMeshWriter.cpp @@ -4,6 +4,7 @@ // See the original file in irrlicht source for authors #include "CPLYMeshWriter.h" +#include "nbl/asset/interchange/SInterchangeIOCommon.h" #ifdef _NBL_COMPILE_WITH_PLY_WRITER_ @@ -50,31 +51,6 @@ namespace ply_writer_detail constexpr size_t ApproxPlyTextBytesPerVertex = 96ull; constexpr size_t ApproxPlyTextBytesPerFace = 32ull; -struct SFileWriteTelemetry -{ - uint64_t callCount = 0ull; - uint64_t totalBytes = 0ull; - uint64_t minBytes = std::numeric_limits::max(); - - void account(const uint64_t bytes) - { - ++callCount; - totalBytes += bytes; - if (bytes < minBytes) - minBytes = bytes; - } - - uint64_t getMinOrZero() const - { - return callCount ? minBytes : 0ull; - } - - uint64_t getAvgOrZero() const - { - return callCount ? (totalBytes / callCount) : 0ull; - } -}; - bool decodeVec4(const ICPUPolygonGeometry::SDataView& view, const size_t ix, hlsl::float64_t4& out) { out = hlsl::float64_t4(0.0, 0.0, 0.0, 0.0); @@ -137,9 +113,6 @@ void appendVec(std::string& out, const double* values, size_t count, bool flipVe } } -bool writeBufferWithPolicyAtOffset(system::IFile* file, const SResolvedFileIOPolicy& ioPlan, const uint8_t* data, size_t byteCount, size_t& fileOffset, SFileWriteTelemetry* ioTelemetry = nullptr); -bool writeBufferWithPolicy(system::IFile* file, const SResolvedFileIOPolicy& ioPlan, const uint8_t* data, size_t byteCount, SFileWriteTelemetry* ioTelemetry = nullptr); -bool writeTwoBuffersWithPolicy(system::IFile* file, const SResolvedFileIOPolicy& ioPlan, const uint8_t* dataA, size_t byteCountA, const uint8_t* dataB, size_t byteCountB, SFileWriteTelemetry* ioTelemetry = nullptr); bool writeBinary(const ICPUPolygonGeometry* geom, const ICPUPolygonGeometry::SDataView* uvView, bool writeNormals, size_t vertexCount, const uint32_t* indices, size_t faceCount, uint8_t* dst, bool flipVectors); bool writeText(const ICPUPolygonGeometry* geom, const ICPUPolygonGeometry::SDataView* uvView, bool writeNormals, size_t vertexCount, const uint32_t* indices, size_t faceCount, std::string& output, bool flipVectors); @@ -335,13 +308,7 @@ bool CPLYMeshWriter::writeAsset(system::IFile* _file, const SAssetWriteParams& _ const double miscMs = std::max(0.0, totalMs - (encodeMs + formatMs + writeMs)); const uint64_t ioMinWrite = ioTelemetry.getMinOrZero(); const uint64_t ioAvgWrite = ioTelemetry.getAvgOrZero(); - if ( - static_cast(outputBytes) > (1ull << 20) && - ( - ioAvgWrite < 1024ull || - (ioMinWrite < 64ull && ioTelemetry.callCount > 1024ull) - ) - ) + if (isTinyIOTelemetryLikely(ioTelemetry, static_cast(outputBytes))) { _params.logger.log( "PLY writer tiny-io guard: file=%s writes=%llu min=%llu avg=%llu", @@ -405,13 +372,7 @@ bool CPLYMeshWriter::writeAsset(system::IFile* _file, const SAssetWriteParams& _ const double miscMs = std::max(0.0, totalMs - (encodeMs + formatMs + writeMs)); const uint64_t ioMinWrite = ioTelemetry.getMinOrZero(); const uint64_t ioAvgWrite = ioTelemetry.getAvgOrZero(); - if ( - static_cast(outputBytes) > (1ull << 20) && - ( - ioAvgWrite < 1024ull || - (ioMinWrite < 64ull && ioTelemetry.callCount > 1024ull) - ) - ) + if (isTinyIOTelemetryLikely(ioTelemetry, static_cast(outputBytes))) { _params.logger.log( "PLY writer tiny-io guard: file=%s writes=%llu min=%llu avg=%llu", @@ -444,69 +405,6 @@ bool CPLYMeshWriter::writeAsset(system::IFile* _file, const SAssetWriteParams& _ return writeOk; } -bool ply_writer_detail::writeBufferWithPolicyAtOffset(system::IFile* file, const SResolvedFileIOPolicy& ioPlan, const uint8_t* data, size_t byteCount, size_t& fileOffset, SFileWriteTelemetry* ioTelemetry) -{ - if (!file || (!data && byteCount != 0ull)) - return false; - switch (ioPlan.strategy) - { - case SResolvedFileIOPolicy::Strategy::WholeFile: - { - size_t writtenTotal = 0ull; - while (writtenTotal < byteCount) - { - system::IFile::success_t success; - file->write(success, data + writtenTotal, fileOffset + writtenTotal, byteCount - writtenTotal); - if (!success) - return false; - const size_t written = success.getBytesProcessed(); - if (written == 0ull) - return false; - if (ioTelemetry) - ioTelemetry->account(written); - writtenTotal += written; - } - fileOffset += writtenTotal; - return true; - } - case SResolvedFileIOPolicy::Strategy::Chunked: - default: - { - size_t writtenTotal = 0ull; - while (writtenTotal < byteCount) - { - const size_t toWrite = static_cast(std::min(ioPlan.chunkSizeBytes, byteCount - writtenTotal)); - system::IFile::success_t success; - file->write(success, data + writtenTotal, fileOffset + writtenTotal, toWrite); - if (!success) - return false; - const size_t written = success.getBytesProcessed(); - if (written == 0ull) - return false; - if (ioTelemetry) - ioTelemetry->account(written); - writtenTotal += written; - } - fileOffset += writtenTotal; - return true; - } - } -} - -bool ply_writer_detail::writeBufferWithPolicy(system::IFile* file, const SResolvedFileIOPolicy& ioPlan, const uint8_t* data, size_t byteCount, SFileWriteTelemetry* ioTelemetry) -{ - size_t fileOffset = 0ull; - return writeBufferWithPolicyAtOffset(file, ioPlan, data, byteCount, fileOffset, ioTelemetry); -} - -bool ply_writer_detail::writeTwoBuffersWithPolicy(system::IFile* file, const SResolvedFileIOPolicy& ioPlan, const uint8_t* dataA, size_t byteCountA, const uint8_t* dataB, size_t byteCountB, SFileWriteTelemetry* ioTelemetry) -{ - size_t fileOffset = 0ull; - if (!writeBufferWithPolicyAtOffset(file, ioPlan, dataA, byteCountA, fileOffset, ioTelemetry)) - return false; - return writeBufferWithPolicyAtOffset(file, ioPlan, dataB, byteCountB, fileOffset, ioTelemetry); -} - bool ply_writer_detail::writeBinary(const ICPUPolygonGeometry* geom, const ICPUPolygonGeometry::SDataView* uvView, bool writeNormals, size_t vertexCount, const uint32_t* indices, size_t faceCount, uint8_t* dst, bool flipVectors) { if (!dst) diff --git a/src/nbl/asset/interchange/CSTLMeshFileLoader.cpp b/src/nbl/asset/interchange/CSTLMeshFileLoader.cpp index c8cced9950..756840bf0f 100644 --- a/src/nbl/asset/interchange/CSTLMeshFileLoader.cpp +++ b/src/nbl/asset/interchange/CSTLMeshFileLoader.cpp @@ -7,6 +7,8 @@ #ifdef _NBL_COMPILE_WITH_STL_LOADER_ +#include "nbl/asset/interchange/SGeometryContentHashCommon.h" +#include "nbl/asset/interchange/SInterchangeIOCommon.h" #include "nbl/asset/interchange/SLoaderRuntimeTuning.h" #include "nbl/asset/asset.h" #include "nbl/asset/metadata/CSTLMetadata.h" @@ -35,31 +37,6 @@ namespace nbl::asset { -struct SFileReadTelemetry -{ - uint64_t callCount = 0ull; - uint64_t totalBytes = 0ull; - uint64_t minBytes = std::numeric_limits::max(); - - void account(const uint64_t bytes) - { - ++callCount; - totalBytes += bytes; - if (bytes < minBytes) - minBytes = bytes; - } - - uint64_t getMinOrZero() const - { - return callCount ? minBytes : 0ull; - } - - uint64_t getAvgOrZero() const - { - return callCount ? (totalBytes / callCount) : 0ull; - } -}; - struct SSTLContext { IAssetLoader::SAssetLoadContext inner; @@ -92,52 +69,6 @@ void stlRunParallelWorkers(const size_t workerCount, Fn&& fn) }); } -bool stlReadExact(system::IFile* file, void* dst, const size_t offset, const size_t bytes, SFileReadTelemetry* ioTelemetry = nullptr) -{ - if (!file || (!dst && bytes != 0ull)) - return false; - if (bytes == 0ull) - return true; - - system::IFile::success_t success; - file->read(success, dst, offset, bytes); - if (success && ioTelemetry) - ioTelemetry->account(success.getBytesProcessed()); - return success && success.getBytesProcessed() == bytes; -} - -bool stlReadWithPolicy(system::IFile* file, uint8_t* dst, const size_t offset, const size_t bytes, const SResolvedFileIOPolicy& ioPlan, SFileReadTelemetry* ioTelemetry = nullptr) -{ - if (!file || (!dst && bytes != 0ull)) - return false; - if (bytes == 0ull) - return true; - - size_t bytesRead = 0ull; - switch (ioPlan.strategy) - { - case SResolvedFileIOPolicy::Strategy::WholeFile: - return stlReadExact(file, dst, offset, bytes, ioTelemetry); - case SResolvedFileIOPolicy::Strategy::Chunked: - default: - while (bytesRead < bytes) - { - const size_t chunk = static_cast(std::min(ioPlan.chunkSizeBytes, bytes - bytesRead)); - system::IFile::success_t success; - file->read(success, dst + bytesRead, offset + bytesRead, chunk); - if (!success) - return false; - const size_t processed = success.getBytesProcessed(); - if (processed == 0ull) - return false; - if (ioTelemetry) - ioTelemetry->account(processed); - bytesRead += processed; - } - return true; - } -} - const char* stlSkipWhitespace(const char* ptr, const char* const end) { while (ptr < end && core::isspace(*ptr)) @@ -311,89 +242,7 @@ ICPUPolygonGeometry::SDataView stlCreateAdoptedFloat3View(core::vector> buffers; - auto appendViewBuffer = [&buffers](const IGeometry::SDataView& view) -> void - { - if (!view || !view.src.buffer) - return; - for (const auto& existing : buffers) - { - if (existing.get() == view.src.buffer.get()) - return; - } - buffers.push_back(core::smart_refctd_ptr(view.src.buffer)); - }; - - appendViewBuffer(geometry->getPositionView()); - appendViewBuffer(geometry->getIndexView()); - appendViewBuffer(geometry->getNormalView()); - for (const auto& view : *geometry->getAuxAttributeViews()) - appendViewBuffer(view); - for (const auto& view : *geometry->getJointWeightViews()) - { - appendViewBuffer(view.indices); - appendViewBuffer(view.weights); - } - if (auto jointOBB = geometry->getJointOBBView(); jointOBB) - appendViewBuffer(*jointOBB); - - if (buffers.empty()) - return; - - uint64_t totalBytes = 0ull; - for (const auto& buffer : buffers) - totalBytes += static_cast(buffer->getSize()); - - const size_t hw = resolveLoaderHardwareThreads(); - const uint8_t* hashSampleData = nullptr; - uint64_t hashSampleBytes = 0ull; - for (const auto& buffer : buffers) - { - const auto* ptr = reinterpret_cast(buffer->getPointer()); - if (!ptr) - continue; - hashSampleData = ptr; - hashSampleBytes = std::min(static_cast(buffer->getSize()), 128ull << 10); - if (hashSampleBytes > 0ull) - break; - } - SLoaderRuntimeTuningRequest tuningRequest = {}; - tuningRequest.inputBytes = totalBytes; - tuningRequest.totalWorkUnits = buffers.size(); - tuningRequest.minBytesPerWorker = std::max(1ull, buffers.empty() ? 1ull : loaderRuntimeCeilDiv(totalBytes, static_cast(buffers.size()))); - tuningRequest.hardwareThreads = static_cast(hw); - tuningRequest.hardMaxWorkers = static_cast(std::min(hw, buffers.size())); - tuningRequest.targetChunksPerWorker = 1u; - tuningRequest.sampleData = hashSampleData; - tuningRequest.sampleBytes = hashSampleBytes; - const auto tuning = tuneLoaderRuntime(ioPolicy, tuningRequest); - const size_t workerCount = std::min(tuning.workerCount, buffers.size()); - if (workerCount > 1ull) - { - stlRunParallelWorkers(workerCount, [&buffers, workerCount](const size_t workerIx) - { - const size_t beginIx = (buffers.size() * workerIx) / workerCount; - const size_t endIx = (buffers.size() * (workerIx + 1ull)) / workerCount; - for (size_t i = beginIx; i < endIx; ++i) - { - auto& buffer = buffers[i]; - if (buffer->getContentHash() != IPreHashed::INVALID_HASH) - continue; - buffer->setContentHash(buffer->computeContentHash()); - } - }); - return; - } - - for (auto& buffer : buffers) - { - if (buffer->getContentHash() != IPreHashed::INVALID_HASH) - continue; - buffer->setContentHash(buffer->computeContentHash()); - } + recomputeGeometryContentHashesParallel(geometry, ioPolicy); } CSTLMeshFileLoader::CSTLMeshFileLoader(asset::IAssetManager* _assetManager) @@ -467,7 +316,7 @@ SAssetBundle CSTLMeshFileLoader::loadAsset(system::IFile* _file, const IAssetLoa { const auto ioStart = clock_t::now(); wholeFilePayload.resize(filesize + 1ull); - if (!stlReadExact(context.inner.mainFile, wholeFilePayload.data(), 0ull, filesize, &context.ioTelemetry)) + if (!readFileExact(context.inner.mainFile, wholeFilePayload.data(), 0ull, filesize, &context.ioTelemetry)) return {}; wholeFilePayload[filesize] = 0u; wholeFileData = wholeFilePayload.data(); @@ -489,7 +338,7 @@ SAssetBundle CSTLMeshFileLoader::loadAsset(system::IFile* _file, const IAssetLoa } else { - hasPrefix = filesize >= StlBinaryPrefixBytes && stlReadExact(context.inner.mainFile, prefix.data(), 0ull, StlBinaryPrefixBytes, &context.ioTelemetry); + hasPrefix = filesize >= StlBinaryPrefixBytes && readFileExact(context.inner.mainFile, prefix.data(), 0ull, StlBinaryPrefixBytes, &context.ioTelemetry); } bool startsWithSolid = false; if (hasPrefix) @@ -501,7 +350,7 @@ SAssetBundle CSTLMeshFileLoader::loadAsset(system::IFile* _file, const IAssetLoa char header[StlTextProbeBytes] = {}; if (wholeFileData) std::memcpy(header, wholeFileData, sizeof(header)); - else if (!stlReadExact(context.inner.mainFile, header, 0ull, sizeof(header), &context.ioTelemetry)) + else if (!readFileExact(context.inner.mainFile, header, 0ull, sizeof(header), &context.ioTelemetry)) return {}; startsWithSolid = (std::strncmp(header, "solid ", StlTextProbeBytes) == 0); } @@ -551,7 +400,7 @@ SAssetBundle CSTLMeshFileLoader::loadAsset(system::IFile* _file, const IAssetLoa uint32_t triangleCount32 = binaryTriCountFromDetect; if (!hasBinaryTriCountFromDetect) { - if (!stlReadExact(context.inner.mainFile, &triangleCount32, StlBinaryHeaderBytes, sizeof(triangleCount32), &context.ioTelemetry)) + if (!readFileExact(context.inner.mainFile, &triangleCount32, StlBinaryHeaderBytes, sizeof(triangleCount32), &context.ioTelemetry)) return {}; } @@ -571,7 +420,7 @@ SAssetBundle CSTLMeshFileLoader::loadAsset(system::IFile* _file, const IAssetLoa core::vector payload; payload.resize(dataSize); const auto ioStart = clock_t::now(); - if (!stlReadWithPolicy(context.inner.mainFile, payload.data(), StlBinaryPrefixBytes, dataSize, ioPlan, &context.ioTelemetry)) + if (!readFileWithPolicy(context.inner.mainFile, payload.data(), StlBinaryPrefixBytes, dataSize, ioPlan, &context.ioTelemetry)) return {}; ioMs = std::chrono::duration(clock_t::now() - ioStart).count(); wholeFilePayload = std::move(payload); @@ -953,7 +802,7 @@ SAssetBundle CSTLMeshFileLoader::loadAsset(system::IFile* _file, const IAssetLoa { const auto ioStart = clock_t::now(); wholeFilePayload.resize(filesize + 1ull); - if (!stlReadWithPolicy(context.inner.mainFile, wholeFilePayload.data(), 0ull, filesize, ioPlan, &context.ioTelemetry)) + if (!readFileWithPolicy(context.inner.mainFile, wholeFilePayload.data(), 0ull, filesize, ioPlan, &context.ioTelemetry)) return {}; ioMs = std::chrono::duration(clock_t::now() - ioStart).count(); wholeFilePayload[filesize] = 0u; @@ -1075,13 +924,7 @@ SAssetBundle CSTLMeshFileLoader::loadAsset(system::IFile* _file, const IAssetLoa const auto totalMs = std::chrono::duration(clock_t::now() - totalStart).count(); const uint64_t ioMinRead = context.ioTelemetry.getMinOrZero(); const uint64_t ioAvgRead = context.ioTelemetry.getAvgOrZero(); - if ( - static_cast(filesize) > (1ull << 20) && - ( - ioAvgRead < 1024ull || - (ioMinRead < 64ull && context.ioTelemetry.callCount > 1024ull) - ) - ) + if (isTinyIOTelemetryLikely(context.ioTelemetry, static_cast(filesize))) { _params.logger.log( "STL loader tiny-io guard: file=%s reads=%llu min=%llu avg=%llu", @@ -1131,13 +974,13 @@ bool CSTLMeshFileLoader::isALoadableFileFormat(system::IFile* _file, const syste if (fileSize < StlBinaryPrefixBytes) { char header[StlTextProbeBytes] = {}; - if (!stlReadExact(_file, header, 0ull, sizeof(header))) + if (!readFileExact(_file, header, 0ull, sizeof(header))) return false; return std::strncmp(header, "solid ", StlTextProbeBytes) == 0; } std::array prefix = {}; - if (!stlReadExact(_file, prefix.data(), 0ull, prefix.size())) + if (!readFileExact(_file, prefix.data(), 0ull, prefix.size())) return false; uint32_t triangleCount = 0u; diff --git a/src/nbl/asset/interchange/CSTLMeshWriter.cpp b/src/nbl/asset/interchange/CSTLMeshWriter.cpp index ba1fe0e366..339b9e5c41 100644 --- a/src/nbl/asset/interchange/CSTLMeshWriter.cpp +++ b/src/nbl/asset/interchange/CSTLMeshWriter.cpp @@ -5,6 +5,7 @@ #include "nbl/system/IFile.h" #include "CSTLMeshWriter.h" +#include "nbl/asset/interchange/SInterchangeIOCommon.h" #include #include @@ -26,31 +27,6 @@ namespace nbl::asset namespace stl_writer_detail { -struct SFileWriteTelemetry -{ - uint64_t callCount = 0ull; - uint64_t totalBytes = 0ull; - uint64_t minBytes = std::numeric_limits::max(); - - void account(const uint64_t bytes) - { - ++callCount; - totalBytes += bytes; - if (bytes < minBytes) - minBytes = bytes; - } - - uint64_t getMinOrZero() const - { - return callCount ? minBytes : 0ull; - } - - uint64_t getAvgOrZero() const - { - return callCount ? (totalBytes / callCount) : 0ull; - } -}; - struct SContext { IAssetWriter::SAssetWriteContext writeContext; @@ -90,7 +66,6 @@ using SContext = stl_writer_detail::SContext; bool flushBytes(SContext* context); bool writeBytes(SContext* context, const void* data, size_t size); -bool writeBufferWithPolicy(system::IFile* file, const SResolvedFileIOPolicy& ioPlan, const uint8_t* data, size_t byteCount, stl_writer_detail::SFileWriteTelemetry* ioTelemetry = nullptr); const hlsl::float32_t3* getTightFloat3View(const ICPUPolygonGeometry::SDataView& view); bool decodeTriangleIndices(const ICPUPolygonGeometry* geom, const ICPUPolygonGeometry::SDataView& posView, core::vector& indexData, const uint32_t*& outIndices, uint32_t& outFaceCount); bool decodeTriangle(const ICPUPolygonGeometry* geom, const IPolygonGeometryBase::IIndexingCallback* indexing, const ICPUPolygonGeometry::SDataView& posView, uint32_t primIx, core::vectorSIMDf& out0, core::vectorSIMDf& out1, core::vectorSIMDf& out2, uint32_t* outIdx); @@ -201,13 +176,7 @@ bool CSTLMeshWriter::writeAsset(system::IFile* _file, const SAssetWriteParams& _ const double miscMs = std::max(0.0, totalMs - (context.formatMs + context.encodeMs + context.writeMs)); const uint64_t ioMinWrite = context.writeTelemetry.getMinOrZero(); const uint64_t ioAvgWrite = context.writeTelemetry.getAvgOrZero(); - if ( - (context.fileOffset > (1ull << 20)) && - ( - ioAvgWrite < 1024ull || - (ioMinWrite < 64ull && context.writeTelemetry.callCount > 1024ull) - ) - ) + if (isTinyIOTelemetryLikely(context.writeTelemetry, context.fileOffset)) { _params.logger.log( "STL writer tiny-io guard: file=%s writes=%llu min=%llu avg=%llu", @@ -315,44 +284,6 @@ bool writeBytes(SContext* context, const void* data, size_t size) } } -bool writeBufferWithPolicy(system::IFile* file, const SResolvedFileIOPolicy& ioPlan, const uint8_t* data, size_t byteCount, stl_writer_detail::SFileWriteTelemetry* ioTelemetry) -{ - if (!file || (!data && byteCount != 0ull)) - return false; - - size_t fileOffset = 0ull; - switch (ioPlan.strategy) - { - case SResolvedFileIOPolicy::Strategy::WholeFile: - { - system::IFile::success_t success; - file->write(success, data, fileOffset, byteCount); - if (success && ioTelemetry) - ioTelemetry->account(success.getBytesProcessed()); - return success && success.getBytesProcessed() == byteCount; - } - case SResolvedFileIOPolicy::Strategy::Chunked: - default: - { - while (fileOffset < byteCount) - { - const size_t toWrite = static_cast(std::min(ioPlan.chunkSizeBytes, byteCount - fileOffset)); - system::IFile::success_t success; - file->write(success, data + fileOffset, fileOffset, toWrite); - if (!success) - return false; - const size_t written = success.getBytesProcessed(); - if (written == 0ull) - return false; - if (ioTelemetry) - ioTelemetry->account(written); - fileOffset += written; - } - return true; - } - } -} - char* appendFloatFixed6ToBuffer(char* dst, char* const end, const float value) { if (!dst || dst >= end) @@ -813,7 +744,7 @@ bool writeMeshBinary(const asset::ICPUPolygonGeometry* geom, SContext* context) context->encodeMs += std::chrono::duration(clock_t::now() - encodeStart).count(); const auto writeStart = clock_t::now(); - const bool writeOk = writeBufferWithPolicy(context->writeContext.outputFile, context->ioPlan, output.get(), outputSize, &context->writeTelemetry); + const bool writeOk = writeFileWithPolicy(context->writeContext.outputFile, context->ioPlan, output.get(), outputSize, &context->writeTelemetry); context->writeMs += std::chrono::duration(clock_t::now() - writeStart).count(); if (writeOk) context->fileOffset += outputSize; From 760443860c6d1fc62e47db8d2d85650054022806 Mon Sep 17 00:00:00 2001 From: Arkadiusz Lachowicz Date: Wed, 11 Feb 2026 14:28:25 +0100 Subject: [PATCH 024/118] Optimize asset path handling and remove misleading loader timing logs --- examples_tests | 2 +- include/nbl/asset/IAssetManager.h | 27 +++++++---- include/nbl/asset/interchange/IAssetLoader.h | 2 + src/nbl/asset/IAssetManager.cpp | 14 +++--- .../asset/interchange/COBJMeshFileLoader.cpp | 28 +++++------ src/nbl/asset/interchange/COBJMeshWriter.cpp | 12 ++--- .../asset/interchange/CPLYMeshFileLoader.cpp | 28 +++++------ src/nbl/asset/interchange/CPLYMeshWriter.cpp | 24 +++++----- .../asset/interchange/CSTLMeshFileLoader.cpp | 22 ++++----- src/nbl/asset/interchange/CSTLMeshWriter.cpp | 12 ++--- src/nbl/system/ISystem.cpp | 47 +++++++++++++------ 11 files changed, 125 insertions(+), 93 deletions(-) diff --git a/examples_tests b/examples_tests index 3335a72819..0c843be792 160000 --- a/examples_tests +++ b/examples_tests @@ -1 +1 @@ -Subproject commit 3335a72819fdf6928052a97c6109e7afa888bed0 +Subproject commit 0c843be7927b3060730268f9daf5b23c93ed6930 diff --git a/include/nbl/asset/IAssetManager.h b/include/nbl/asset/IAssetManager.h index 22f61e848b..d2d9a21e41 100644 --- a/include/nbl/asset/IAssetManager.h +++ b/include/nbl/asset/IAssetManager.h @@ -180,22 +180,29 @@ class NBL_API2 IAssetManager : public core::IReferenceCounted SAssetBundle getAssetInHierarchy_impl(const std::string& _filePath, const IAssetLoader::SAssetLoadParams& _params, uint32_t _hierarchyLevel, IAssetLoader::IAssetLoaderOverride* _override) { IAssetLoader::SAssetLoadContext ctx(_params, nullptr); + system::ISystem::future_t> future; system::path filePath = _filePath; _override->getLoadFilename(filePath, m_system.get(), ctx, _hierarchyLevel); - if (!m_system->exists(filePath,system::IFile::ECF_READ)) - { - filePath = _params.workingDirectory/filePath; - _override->getLoadFilename(filePath, m_system.get(), ctx, _hierarchyLevel); - } - - system::ISystem::future_t> future; m_system->createFile(future, filePath, static_cast(system::IFile::ECF_READ | system::IFile::ECF_MAPPABLE)); if (auto file=future.acquire()) return getAssetInHierarchy_impl(file->get(), filePath.string(), ctx.params, _hierarchyLevel, _override); m_system->createFile(future, filePath, system::IFile::ECF_READ); if (auto file=future.acquire()) return getAssetInHierarchy_impl(file->get(), filePath.string(), ctx.params, _hierarchyLevel, _override); + + auto fallbackPath = _params.workingDirectory / filePath; + if (fallbackPath != filePath) + { + filePath = std::move(fallbackPath); + _override->getLoadFilename(filePath, m_system.get(), ctx, _hierarchyLevel); + m_system->createFile(future, filePath, static_cast(system::IFile::ECF_READ | system::IFile::ECF_MAPPABLE)); + if (auto file=future.acquire()) + return getAssetInHierarchy_impl(file->get(), filePath.string(), ctx.params, _hierarchyLevel, _override); + m_system->createFile(future, filePath, system::IFile::ECF_READ); + if (auto file=future.acquire()) + return getAssetInHierarchy_impl(file->get(), filePath.string(), ctx.params, _hierarchyLevel, _override); + } return SAssetBundle(0); } @@ -353,8 +360,12 @@ class NBL_API2 IAssetManager : public core::IReferenceCounted if (!_override) _override = &defOverride; + system::path filename = _filename; + if (filename.is_relative() && !_params.workingDirectory.empty()) + filename = _params.workingDirectory / filename; + system::ISystem::future_t> future; - m_system->createFile(future, (_params.workingDirectory.generic_string()+_filename).c_str(), system::IFile::ECF_WRITE); + m_system->createFile(future, std::move(filename), system::IFile::ECF_WRITE); if (auto file=future.acquire()) return writeAsset(file->get(), _params, _override); return false; diff --git a/include/nbl/asset/interchange/IAssetLoader.h b/include/nbl/asset/interchange/IAssetLoader.h index 415a751a75..6c050e3213 100644 --- a/include/nbl/asset/interchange/IAssetLoader.h +++ b/include/nbl/asset/interchange/IAssetLoader.h @@ -235,6 +235,8 @@ class NBL_API2 IAssetLoader : public virtual core::IReferenceCounted //! Called before loading a file to determine the correct path (could be relative or absolute) inline virtual void getLoadFilename(system::path& inOutFilename, const system::ISystem* sys, const SAssetLoadContext& ctx, const uint32_t hierarchyLevel) { + if (inOutFilename.is_absolute() || inOutFilename.has_root_path()) + return; // try compute absolute path auto absolute = ctx.params.workingDirectory/inOutFilename; if (sys->exists(absolute,system::IFile::ECF_READ)) diff --git a/src/nbl/asset/IAssetManager.cpp b/src/nbl/asset/IAssetManager.cpp index f1b61fb470..8378976e97 100644 --- a/src/nbl/asset/IAssetManager.cpp +++ b/src/nbl/asset/IAssetManager.cpp @@ -203,9 +203,11 @@ SAssetBundle IAssetManager::getAssetInHierarchy_impl(system::IFile* _file, const IAssetLoader::SAssetLoadContext ctx{params,_file}; std::filesystem::path filename = _file ? _file->getFileName() : std::filesystem::path(_supposedFilename); - auto file = _override->getLoadFile(_file, filename.string(), ctx, _hierarchyLevel); + auto filenameString = filename.string(); + auto file = _override->getLoadFile(_file, filenameString, ctx, _hierarchyLevel); filename = file.get() ? file->getFileName() : std::filesystem::path(_supposedFilename); + filenameString = filename.string(); // TODO: should we remove? (is a root absolute path working dir ever needed) if (params.workingDirectory.empty()) params.workingDirectory = filename.parent_path(); @@ -215,10 +217,10 @@ SAssetBundle IAssetManager::getAssetInHierarchy_impl(system::IFile* _file, const SAssetBundle bundle; if ((levelFlags & IAssetLoader::ECF_DUPLICATE_TOP_LEVEL) != IAssetLoader::ECF_DUPLICATE_TOP_LEVEL) { - auto found = findAssets(filename.string()); + auto found = findAssets(filenameString); if (found->size()) return _override->chooseRelevantFromFound(found->begin(), found->end(), ctx, _hierarchyLevel); - else if (!(bundle = _override->handleSearchFail(filename.string(), ctx, _hierarchyLevel)).getContents().empty()) + else if (!(bundle = _override->handleSearchFail(filenameString, ctx, _hierarchyLevel)).getContents().empty()) return bundle; } @@ -249,14 +251,14 @@ SAssetBundle IAssetManager::getAssetInHierarchy_impl(system::IFile* _file, const ((levelFlags & IAssetLoader::ECF_DONT_CACHE_TOP_LEVEL) != IAssetLoader::ECF_DONT_CACHE_TOP_LEVEL) && ((levelFlags & IAssetLoader::ECF_DUPLICATE_TOP_LEVEL) != IAssetLoader::ECF_DUPLICATE_TOP_LEVEL)) { - _override->insertAssetIntoCache(bundle, filename.string(), ctx, _hierarchyLevel); + _override->insertAssetIntoCache(bundle, filenameString, ctx, _hierarchyLevel); } else if (bundle.getContents().empty()) { bool addToCache; - bundle = _override->handleLoadFail(addToCache, file.get(), filename.string(), filename.string(), ctx, _hierarchyLevel); + bundle = _override->handleLoadFail(addToCache, file.get(), filenameString, filenameString, ctx, _hierarchyLevel); if (!bundle.getContents().empty() && addToCache) - _override->insertAssetIntoCache(bundle, filename.string(), ctx, _hierarchyLevel); + _override->insertAssetIntoCache(bundle, filenameString, ctx, _hierarchyLevel); } return bundle; } diff --git a/src/nbl/asset/interchange/COBJMeshFileLoader.cpp b/src/nbl/asset/interchange/COBJMeshFileLoader.cpp index fb27e6d12b..da18128ff2 100644 --- a/src/nbl/asset/interchange/COBJMeshFileLoader.cpp +++ b/src/nbl/asset/interchange/COBJMeshFileLoader.cpp @@ -1291,23 +1291,23 @@ asset::SAssetBundle COBJMeshFileLoader::loadAsset(system::IFile* _file, const as static_cast(ioTelemetry.getMinOrZero()), static_cast(ioTelemetry.getAvgOrZero())); } + (void)totalMs; + (void)ioMs; + (void)parseMs; + (void)parseScanMs; + (void)parseVms; + (void)parseVNms; + (void)parseVTms; + (void)parseFaceMs; + (void)dedupMs; + (void)emitMs; + (void)buildMs; + (void)hashMs; + (void)aabbMs; _params.logger.log( - "OBJ loader perf: file=%s total=%.3f ms io=%.3f parse=%.3f parse_scan=%.3f parse_v=%.3f parse_vn=%.3f parse_vt=%.3f parse_f=%.3f dedup=%.3f emit=%.3f build=%.3f hash=%.3f aabb=%.3f in(v=%llu n=%llu uv=%llu) out(v=%llu idx=%llu faces=%llu face_fast_tokens=%llu face_fallback_tokens=%llu io_reads=%llu io_min_read=%llu io_avg_read=%llu) io_req=%s io_eff=%s io_chunk=%llu io_reason=%s", + "OBJ loader stats: file=%s in(v=%llu n=%llu uv=%llu) out(v=%llu idx=%llu faces=%llu face_fast_tokens=%llu face_fallback_tokens=%llu io_reads=%llu io_min_read=%llu io_avg_read=%llu io_req=%s io_eff=%s io_chunk=%llu io_reason=%s", system::ILogger::ELL_PERFORMANCE, _file->getFileName().string().c_str(), - totalMs, - ioMs, - parseMs, - parseScanMs, - parseVms, - parseVNms, - parseVTms, - parseFaceMs, - dedupMs, - emitMs, - buildMs, - hashMs, - aabbMs, static_cast(positions.size()), static_cast(normals.size()), static_cast(uvs.size()), diff --git a/src/nbl/asset/interchange/COBJMeshWriter.cpp b/src/nbl/asset/interchange/COBJMeshWriter.cpp index 309e3f6a98..879b5b28a7 100644 --- a/src/nbl/asset/interchange/COBJMeshWriter.cpp +++ b/src/nbl/asset/interchange/COBJMeshWriter.cpp @@ -485,14 +485,9 @@ bool COBJMeshWriter::writeAsset(system::IFile* _file, const SAssetWriteParams& _ static_cast(ioAvgWrite)); } _params.logger.log( - "OBJ writer perf: file=%s total=%.3f ms encode=%.3f format=%.3f write=%.3f misc=%.3f bytes=%llu vertices=%llu faces=%llu io_writes=%llu io_min_write=%llu io_avg_write=%llu io_req=%s io_eff=%s io_chunk=%llu io_reason=%s", + "OBJ writer stats: file=%s bytes=%llu vertices=%llu faces=%llu io_writes=%llu io_min_write=%llu io_avg_write=%llu io_req=%s io_eff=%s io_chunk=%llu io_reason=%s", system::ILogger::ELL_PERFORMANCE, file->getFileName().string().c_str(), - totalMs, - encodeMs, - formatMs, - writeMs, - miscMs, static_cast(output.size()), static_cast(vertexCount), static_cast(faceCount), @@ -503,6 +498,11 @@ bool COBJMeshWriter::writeAsset(system::IFile* _file, const SAssetWriteParams& _ toString(ioPlan.strategy), static_cast(ioPlan.chunkSizeBytes), ioPlan.reason); + (void)totalMs; + (void)encodeMs; + (void)formatMs; + (void)writeMs; + (void)miscMs; return writeOk; } diff --git a/src/nbl/asset/interchange/CPLYMeshFileLoader.cpp b/src/nbl/asset/interchange/CPLYMeshFileLoader.cpp index c6d06cde6a..74f048f340 100644 --- a/src/nbl/asset/interchange/CPLYMeshFileLoader.cpp +++ b/src/nbl/asset/interchange/CPLYMeshFileLoader.cpp @@ -2284,22 +2284,9 @@ SAssetBundle CPLYMeshFileLoader::loadAsset(system::IFile* _file, const IAssetLoa static_cast(ioAvgRead)); } _params.logger.log( - "PLY loader perf: file=%s total=%.3f ms header=%.3f vertex=%.3f vertex_fast_ms=%.3f vertex_generic_ms=%.3f face=%.3f skip=%.3f layout_negotiate=%.3f view_create=%.3f hash_range=%.3f index=%.3f aabb=%.3f remainder=%.3f binary=%d verts=%llu faces=%llu idx=%llu vertex_fast=%llu face_fast=%llu io_reads=%llu io_min_read=%llu io_avg_read=%llu io_req=%s io_eff=%s io_chunk=%llu io_reason=%s", + "PLY loader stats: file=%s binary=%d verts=%llu faces=%llu idx=%llu vertex_fast=%llu face_fast=%llu io_reads=%llu io_min_read=%llu io_avg_read=%llu io_req=%s io_eff=%s io_chunk=%llu io_reason=%s", system::ILogger::ELL_PERFORMANCE, _file->getFileName().string().c_str(), - totalMs, - headerMs, - vertexMs, - vertexFastMs, - vertexGenericMs, - faceMs, - skipMs, - layoutNegotiateMs, - viewCreateMs, - hashRangeMs, - indexBuildMs, - aabbMs, - stageRemainderMs, ctx.IsBinaryFile ? 1 : 0, static_cast(vertCount), static_cast(faceCount), @@ -2313,6 +2300,19 @@ SAssetBundle CPLYMeshFileLoader::loadAsset(system::IFile* _file, const IAssetLoa toString(ioPlan.strategy), static_cast(ioPlan.chunkSizeBytes), ioPlan.reason); + (void)totalMs; + (void)stageRemainderMs; + (void)headerMs; + (void)vertexMs; + (void)vertexFastMs; + (void)vertexGenericMs; + (void)faceMs; + (void)skipMs; + (void)layoutNegotiateMs; + (void)viewCreateMs; + (void)hashRangeMs; + (void)indexBuildMs; + (void)aabbMs; auto meta = core::make_smart_refctd_ptr(); return SAssetBundle(std::move(meta),{std::move(geometry)}); diff --git a/src/nbl/asset/interchange/CPLYMeshWriter.cpp b/src/nbl/asset/interchange/CPLYMeshWriter.cpp index 9c3cda3eea..9008bb4cdc 100644 --- a/src/nbl/asset/interchange/CPLYMeshWriter.cpp +++ b/src/nbl/asset/interchange/CPLYMeshWriter.cpp @@ -319,14 +319,9 @@ bool CPLYMeshWriter::writeAsset(system::IFile* _file, const SAssetWriteParams& _ static_cast(ioAvgWrite)); } _params.logger.log( - "PLY writer perf: file=%s total=%.3f ms encode=%.3f format=%.3f write=%.3f misc=%.3f bytes=%llu vertices=%llu faces=%llu binary=%d io_writes=%llu io_min_write=%llu io_avg_write=%llu io_req=%s io_eff=%s io_chunk=%llu io_reason=%s", + "PLY writer stats: file=%s bytes=%llu vertices=%llu faces=%llu binary=%d io_writes=%llu io_min_write=%llu io_avg_write=%llu io_req=%s io_eff=%s io_chunk=%llu io_reason=%s", system::ILogger::ELL_PERFORMANCE, file->getFileName().string().c_str(), - totalMs, - encodeMs, - formatMs, - writeMs, - miscMs, static_cast(outputBytes), static_cast(vertexCount), static_cast(faceCount), @@ -338,6 +333,11 @@ bool CPLYMeshWriter::writeAsset(system::IFile* _file, const SAssetWriteParams& _ toString(ioPlan.strategy), static_cast(ioPlan.chunkSizeBytes), ioPlan.reason); + (void)totalMs; + (void)encodeMs; + (void)formatMs; + (void)writeMs; + (void)miscMs; return writeOk; } @@ -383,14 +383,9 @@ bool CPLYMeshWriter::writeAsset(system::IFile* _file, const SAssetWriteParams& _ static_cast(ioAvgWrite)); } _params.logger.log( - "PLY writer perf: file=%s total=%.3f ms encode=%.3f format=%.3f write=%.3f misc=%.3f bytes=%llu vertices=%llu faces=%llu binary=%d io_writes=%llu io_min_write=%llu io_avg_write=%llu io_req=%s io_eff=%s io_chunk=%llu io_reason=%s", + "PLY writer stats: file=%s bytes=%llu vertices=%llu faces=%llu binary=%d io_writes=%llu io_min_write=%llu io_avg_write=%llu io_req=%s io_eff=%s io_chunk=%llu io_reason=%s", system::ILogger::ELL_PERFORMANCE, file->getFileName().string().c_str(), - totalMs, - encodeMs, - formatMs, - writeMs, - miscMs, static_cast(outputBytes), static_cast(vertexCount), static_cast(faceCount), @@ -402,6 +397,11 @@ bool CPLYMeshWriter::writeAsset(system::IFile* _file, const SAssetWriteParams& _ toString(ioPlan.strategy), static_cast(ioPlan.chunkSizeBytes), ioPlan.reason); + (void)totalMs; + (void)encodeMs; + (void)formatMs; + (void)writeMs; + (void)miscMs; return writeOk; } diff --git a/src/nbl/asset/interchange/CSTLMeshFileLoader.cpp b/src/nbl/asset/interchange/CSTLMeshFileLoader.cpp index 756840bf0f..088b6b91e4 100644 --- a/src/nbl/asset/interchange/CSTLMeshFileLoader.cpp +++ b/src/nbl/asset/interchange/CSTLMeshFileLoader.cpp @@ -935,19 +935,9 @@ SAssetBundle CSTLMeshFileLoader::loadAsset(system::IFile* _file, const IAssetLoa static_cast(ioAvgRead)); } _params.logger.log( - "STL loader perf: file=%s total=%.3f ms detect=%.3f io=%.3f parse=%.3f build=%.3f build_alloc_views=%.3f build_set_views=%.3f build_misc=%.3f hash=%.3f aabb=%.3f binary=%d parse_path=%s triangles=%llu vertices=%llu io_reads=%llu io_min_read=%llu io_avg_read=%llu io_req=%s io_eff=%s io_chunk=%llu io_reason=%s", + "STL loader stats: file=%s binary=%d parse_path=%s triangles=%llu vertices=%llu io_reads=%llu io_min_read=%llu io_avg_read=%llu io_req=%s io_eff=%s io_chunk=%llu io_reason=%s", system::ILogger::ELL_PERFORMANCE, _file->getFileName().string().c_str(), - totalMs, - detectMs, - ioMs, - parseMs, - buildMs, - buildAllocViewsMs, - buildSetViewsMs, - buildMiscMs, - hashMs, - aabbMs, binary ? 1 : 0, parsePath, static_cast(triangleCount), @@ -959,6 +949,16 @@ SAssetBundle CSTLMeshFileLoader::loadAsset(system::IFile* _file, const IAssetLoa toString(ioPlan.strategy), static_cast(ioPlan.chunkSizeBytes), ioPlan.reason); + (void)totalMs; + (void)detectMs; + (void)ioMs; + (void)parseMs; + (void)buildMs; + (void)buildAllocViewsMs; + (void)buildSetViewsMs; + (void)buildMiscMs; + (void)hashMs; + (void)aabbMs; auto meta = core::make_smart_refctd_ptr(); return SAssetBundle(std::move(meta), { std::move(geometry) }); diff --git a/src/nbl/asset/interchange/CSTLMeshWriter.cpp b/src/nbl/asset/interchange/CSTLMeshWriter.cpp index 339b9e5c41..300445d8f5 100644 --- a/src/nbl/asset/interchange/CSTLMeshWriter.cpp +++ b/src/nbl/asset/interchange/CSTLMeshWriter.cpp @@ -187,14 +187,9 @@ bool CSTLMeshWriter::writeAsset(system::IFile* _file, const SAssetWriteParams& _ static_cast(ioAvgWrite)); } _params.logger.log( - "STL writer perf: file=%s total=%.3f ms format=%.3f encode=%.3f write=%.3f misc=%.3f bytes=%llu binary=%d io_writes=%llu io_min_write=%llu io_avg_write=%llu io_req=%s io_eff=%s io_chunk=%llu io_reason=%s", + "STL writer stats: file=%s bytes=%llu binary=%d io_writes=%llu io_min_write=%llu io_avg_write=%llu io_req=%s io_eff=%s io_chunk=%llu io_reason=%s", system::ILogger::ELL_PERFORMANCE, file->getFileName().string().c_str(), - totalMs, - context.formatMs, - context.encodeMs, - context.writeMs, - miscMs, static_cast(context.fileOffset), binary ? 1 : 0, static_cast(context.writeTelemetry.callCount), @@ -204,6 +199,11 @@ bool CSTLMeshWriter::writeAsset(system::IFile* _file, const SAssetWriteParams& _ toString(context.ioPlan.strategy), static_cast(context.ioPlan.chunkSizeBytes), context.ioPlan.reason); + (void)totalMs; + (void)miscMs; + (void)context.formatMs; + (void)context.encodeMs; + (void)context.writeMs; return true; } diff --git a/src/nbl/system/ISystem.cpp b/src/nbl/system/ISystem.cpp index 6b25471f8d..b2fc0f5117 100644 --- a/src/nbl/system/ISystem.cpp +++ b/src/nbl/system/ISystem.cpp @@ -41,11 +41,12 @@ bool ISystem::exists(const system::path& filename, const core::bitflag= sizeof(SRequestParams_CREATE_FILE::filename)) return false; - // archive file - if (!writeUsage && findFileInArchive(filename).archive) - return true; // regular file - return std::filesystem::exists(filename); + std::error_code fsEc; + if (std::filesystem::exists(filename, fsEc) && !fsEc) + return true; + // archive file + return !writeUsage && findFileInArchive(filename).archive; } bool ISystem::isPathReadOnly(const system::path& p) const @@ -193,12 +194,25 @@ bool ISystem::copy(const system::path& from, const system::path& to) void ISystem::createFile(future_t>& future, std::filesystem::path filename, const core::bitflag flags, const std::string_view& accessToken) { - // canonicalize - if (std::filesystem::exists(filename)) - filename = std::filesystem::canonical(filename); + std::error_code fsEc; + const bool writeUsage = flags.value&IFile::ECF_WRITE; + const bool absoluteInput = filename.is_absolute(); + bool pathExists = false; + if (!writeUsage) + { + fsEc.clear(); + pathExists = std::filesystem::exists(filename, fsEc) && !fsEc; + if (pathExists && !absoluteInput) + { + fsEc.clear(); + const auto absolute = std::filesystem::absolute(filename, fsEc); + if (!fsEc) + filename = absolute; + } + } // try archives (readonly, for now) - if (!(flags.value&IFile::ECF_WRITE)) + if (!writeUsage && !pathExists) { const auto found = findFileInArchive(filename); if (found.archive) @@ -213,8 +227,6 @@ void ISystem::createFile(future_t>& future, std::f } // - if (std::filesystem::exists(filename)) - filename = std::filesystem::absolute(filename).generic_string(); if (filename.string().size()>=MAX_FILENAME_LENGTH) { future.set_result(nullptr); @@ -255,16 +267,21 @@ core::smart_refctd_ptr ISystem::openFileArchive(core::smart_refctd ISystem::FoundArchiveFile ISystem::findFileInArchive(const system::path& absolutePath) const { - system::path path = std::filesystem::exists(absolutePath) ? std::filesystem::canonical(absolutePath.parent_path()):absolutePath.parent_path(); + const auto normalizedAbsolutePath = absolutePath.lexically_normal(); + system::path path = normalizedAbsolutePath.parent_path().lexically_normal(); // going up the directory tree while (!path.empty() && path.parent_path()!=path) { - path = std::filesystem::exists(path) ? std::filesystem::canonical(path):path; - + std::error_code fsEc; + const auto relative = std::filesystem::relative(normalizedAbsolutePath, path, fsEc); + if (fsEc) + { + path = path.parent_path(); + continue; + } const auto archives = m_cachedArchiveFiles.findRange(path); for (auto& archive : archives) { - const auto relative = std::filesystem::relative(absolutePath,path); const auto items = static_cast(archive.second->listAssets()); const IFileArchive::SFileList::SEntry itemToFind = { relative }; @@ -394,4 +411,4 @@ bool ISystem::isDebuggerAttached() return false; } -#endif \ No newline at end of file +#endif From e452ab32b86b5cc4b177e1927c9cb9d92514195d Mon Sep 17 00:00:00 2001 From: Arkadiusz Lachowicz Date: Wed, 11 Feb 2026 15:40:22 +0100 Subject: [PATCH 025/118] Update examples submodule for MeshLoaders cleanup --- examples_tests | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/examples_tests b/examples_tests index 0c843be792..feb4ecf10a 160000 --- a/examples_tests +++ b/examples_tests @@ -1 +1 @@ -Subproject commit 0c843be7927b3060730268f9daf5b23c93ed6930 +Subproject commit feb4ecf10a5cfe4a3cf66b3dc37ab62ba719dcda From 83f15d0afa7648adccd918320accd1db1e945aa0 Mon Sep 17 00:00:00 2001 From: Arkadiusz Lachowicz Date: Thu, 12 Feb 2026 19:18:35 +0100 Subject: [PATCH 026/118] Refine mesh interchange API and loader writer pipeline --- examples_tests | 2 +- .../MonoDeviceApplication.hpp | 4 +- include/nbl/asset/ICPUBuffer.h | 2 +- .../nbl/asset/interchange/COBJMeshWriter.h | 2 +- include/nbl/asset/interchange/IAssetLoader.h | 4 +- .../nbl/asset/interchange/IGeometryLoader.h | 2 +- include/nbl/asset/interchange/SFileIOPolicy.h | 2 +- .../interchange/SGeometryContentHashCommon.h | 2 +- .../asset/interchange/SInterchangeIOCommon.h | 2 +- .../asset/interchange/SLoaderRuntimeTuning.h | 2 +- include/nbl/core/hash/blake.h | 2 +- src/nbl/CMakeLists.txt | 4 +- src/nbl/asset/IAssetManager.cpp | 2 +- src/nbl/asset/interchange/CGLTFLoader.cpp | 2 +- .../asset/interchange/COBJMeshFileLoader.cpp | 143 ++-------- .../asset/interchange/COBJMeshFileLoader.h | 2 +- src/nbl/asset/interchange/COBJMeshWriter.cpp | 2 +- .../asset/interchange/CPLYMeshFileLoader.cpp | 157 ++++------- .../asset/interchange/CPLYMeshFileLoader.h | 2 +- src/nbl/asset/interchange/CPLYMeshWriter.cpp | 2 +- src/nbl/asset/interchange/CPLYMeshWriter.h | 2 +- .../asset/interchange/CSTLMeshFileLoader.cpp | 266 +++++------------- .../asset/interchange/CSTLMeshFileLoader.h | 2 +- src/nbl/asset/interchange/CSTLMeshWriter.cpp | 2 +- src/nbl/asset/interchange/CSTLMeshWriter.h | 2 +- src/nbl/asset/interchange/IGeometryWriter.cpp | 2 +- src/nbl/asset/pch_asset.h | 2 +- src/nbl/video/utilities/CAssetConverter.cpp | 2 +- 28 files changed, 181 insertions(+), 441 deletions(-) diff --git a/examples_tests b/examples_tests index feb4ecf10a..c9a8735e85 160000 --- a/examples_tests +++ b/examples_tests @@ -1 +1 @@ -Subproject commit feb4ecf10a5cfe4a3cf66b3dc37ab62ba719dcda +Subproject commit c9a8735e85a20d18b406daa7980eef119cb91bb5 diff --git a/include/nbl/application_templates/MonoDeviceApplication.hpp b/include/nbl/application_templates/MonoDeviceApplication.hpp index a3a169d7b7..0ab461e6df 100644 --- a/include/nbl/application_templates/MonoDeviceApplication.hpp +++ b/include/nbl/application_templates/MonoDeviceApplication.hpp @@ -1,4 +1,4 @@ -// Copyright (C) 2023-2023 - DevSH Graphics Programming Sp. z O.O. +// Copyright (C) 2018-2025 - DevSH Graphics Programming Sp. z O.O. // This file is part of the "Nabla Engine". // For conditions of distribution and use, see copyright notice in nabla.h #ifndef _NBL_APPLICATION_TEMPLATES_MONO_DEVICE_APPLICATION_HPP_INCLUDED_ @@ -279,4 +279,4 @@ class MonoDeviceApplication : public virtual MonoSystemMonoLoggerApplication }; } -#endif \ No newline at end of file +#endif diff --git a/include/nbl/asset/ICPUBuffer.h b/include/nbl/asset/ICPUBuffer.h index 044c0d9018..5cd03363ef 100644 --- a/include/nbl/asset/ICPUBuffer.h +++ b/include/nbl/asset/ICPUBuffer.h @@ -1,4 +1,4 @@ -// Copyright (C) 2018-2020 - DevSH Graphics Programming Sp. z O.O. +// Copyright (C) 2018-2025 - DevSH Graphics Programming Sp. z O.O. // This file is part of the "Nabla Engine". // For conditions of distribution and use, see copyright notice in nabla.h #ifndef _NBL_ASSET_I_CPU_BUFFER_H_INCLUDED_ diff --git a/include/nbl/asset/interchange/COBJMeshWriter.h b/include/nbl/asset/interchange/COBJMeshWriter.h index 0f1cc3c2e6..fed9898659 100644 --- a/include/nbl/asset/interchange/COBJMeshWriter.h +++ b/include/nbl/asset/interchange/COBJMeshWriter.h @@ -1,4 +1,4 @@ -// Copyright (C) 2025-2025 - DevSH Graphics Programming Sp. z O.O. +// Copyright (C) 2018-2025 - DevSH Graphics Programming Sp. z O.O. // This file is part of the "Nabla Engine". // For conditions of distribution and use, see copyright notice in nabla.h #ifndef _NBL_ASSET_OBJ_MESH_WRITER_H_INCLUDED_ diff --git a/include/nbl/asset/interchange/IAssetLoader.h b/include/nbl/asset/interchange/IAssetLoader.h index 6c050e3213..2e48227c8d 100644 --- a/include/nbl/asset/interchange/IAssetLoader.h +++ b/include/nbl/asset/interchange/IAssetLoader.h @@ -1,4 +1,4 @@ -// Copyright (C) 2018-2024 - DevSH Graphics Programming Sp. z O.O. +// Copyright (C) 2018-2025 - DevSH Graphics Programming Sp. z O.O. // This file is part of the "Nabla Engine". // For conditions of distribution and use, see copyright notice in nabla.h #ifndef _NBL_ASSET_I_ASSET_LOADER_H_INCLUDED_ @@ -92,7 +92,7 @@ class NBL_API2 IAssetLoader : public virtual core::IReferenceCounted //[[deprecated]] ELPF_RIGHT_HANDED_MESHES = 0x1, //!< specifies that a mesh will be flipped in such a way that it'll look correctly in right-handed camera system //[[deprecated]] ELPF_DONT_COMPILE_GLSL = 0x2, //!< it states that GLSL won't be compiled to SPIR-V if it is loaded or generated ELPF_LOAD_METADATA_ONLY = 0x4, //!< it forces the loader to not load the entire scene for performance in special cases to fetch metadata. - ELPF_COMPUTE_CONTENT_HASHES = 0x8 //!< forces loaders to compute content hashes of produced buffers before returning. + ELPF_DONT_COMPUTE_CONTENT_HASHES = 0x8 //!< opt-out from computing content hashes of produced buffers before returning. }; struct SAssetLoadParams diff --git a/include/nbl/asset/interchange/IGeometryLoader.h b/include/nbl/asset/interchange/IGeometryLoader.h index 01a7342e02..52bedd06ee 100644 --- a/include/nbl/asset/interchange/IGeometryLoader.h +++ b/include/nbl/asset/interchange/IGeometryLoader.h @@ -1,4 +1,4 @@ -// Copyright (C) 2025-2025 - DevSH Graphics Programming Sp. z O.O. +// Copyright (C) 2018-2025 - DevSH Graphics Programming Sp. z O.O. // This file is part of the "Nabla Engine". // For conditions of distribution and use, see copyright notice in nabla.h #ifndef _NBL_ASSET_I_GEOMETRY_LOADER_H_INCLUDED_ diff --git a/include/nbl/asset/interchange/SFileIOPolicy.h b/include/nbl/asset/interchange/SFileIOPolicy.h index 24e99584e2..8bece21b96 100644 --- a/include/nbl/asset/interchange/SFileIOPolicy.h +++ b/include/nbl/asset/interchange/SFileIOPolicy.h @@ -1,4 +1,4 @@ -// Copyright (C) 2025-2025 - DevSH Graphics Programming Sp. z O.O. +// Copyright (C) 2018-2025 - DevSH Graphics Programming Sp. z O.O. // This file is part of the "Nabla Engine". // For conditions of distribution and use, see copyright notice in nabla.h #ifndef _NBL_ASSET_S_FILE_IO_POLICY_H_INCLUDED_ diff --git a/include/nbl/asset/interchange/SGeometryContentHashCommon.h b/include/nbl/asset/interchange/SGeometryContentHashCommon.h index 8ce5e4dfbe..a2017eed8b 100644 --- a/include/nbl/asset/interchange/SGeometryContentHashCommon.h +++ b/include/nbl/asset/interchange/SGeometryContentHashCommon.h @@ -1,4 +1,4 @@ -// Copyright (C) 2025-2025 - DevSH Graphics Programming Sp. z O.O. +// Copyright (C) 2018-2025 - DevSH Graphics Programming Sp. z O.O. // This file is part of the "Nabla Engine". // For conditions of distribution and use, see copyright notice in nabla.h #ifndef _NBL_ASSET_S_GEOMETRY_CONTENT_HASH_COMMON_H_INCLUDED_ diff --git a/include/nbl/asset/interchange/SInterchangeIOCommon.h b/include/nbl/asset/interchange/SInterchangeIOCommon.h index a7e30f23db..97d9ca84c1 100644 --- a/include/nbl/asset/interchange/SInterchangeIOCommon.h +++ b/include/nbl/asset/interchange/SInterchangeIOCommon.h @@ -1,4 +1,4 @@ -// Copyright (C) 2025-2025 - DevSH Graphics Programming Sp. z O.O. +// Copyright (C) 2018-2025 - DevSH Graphics Programming Sp. z O.O. // This file is part of the "Nabla Engine". // For conditions of distribution and use, see copyright notice in nabla.h #ifndef _NBL_ASSET_S_INTERCHANGE_IO_COMMON_H_INCLUDED_ diff --git a/include/nbl/asset/interchange/SLoaderRuntimeTuning.h b/include/nbl/asset/interchange/SLoaderRuntimeTuning.h index b6cea0cbc6..4df853c930 100644 --- a/include/nbl/asset/interchange/SLoaderRuntimeTuning.h +++ b/include/nbl/asset/interchange/SLoaderRuntimeTuning.h @@ -1,4 +1,4 @@ -// Copyright (C) 2025-2025 - DevSH Graphics Programming Sp. z O.O. +// Copyright (C) 2018-2025 - DevSH Graphics Programming Sp. z O.O. // This file is part of the "Nabla Engine". // For conditions of distribution and use, see copyright notice in nabla.h #ifndef _NBL_ASSET_S_LOADER_RUNTIME_TUNING_H_INCLUDED_ diff --git a/include/nbl/core/hash/blake.h b/include/nbl/core/hash/blake.h index 5f5ab3fb95..a13500ac77 100644 --- a/include/nbl/core/hash/blake.h +++ b/include/nbl/core/hash/blake.h @@ -1,4 +1,4 @@ -// Copyright (C) 2024-2024 - DevSH Graphics Programming Sp. z O.O. +// Copyright (C) 2018-2025 - DevSH Graphics Programming Sp. z O.O. // This file is part of the "Nabla Engine". // For conditions of distribution and use, see copyright notice in nabla.h #ifndef _NBL_CORE_HASH_BLAKE3_H_INCLUDED_ diff --git a/src/nbl/CMakeLists.txt b/src/nbl/CMakeLists.txt index 4de7ae83fb..1613fa45f8 100644 --- a/src/nbl/CMakeLists.txt +++ b/src/nbl/CMakeLists.txt @@ -1,5 +1,5 @@ -# Copyright (C) 2018-2020 - DevSH Graphics Programming Sp. z O.O. -# Copyright (C) 2018-2020 - DevSH Graphics Programming Sp. z O.O. +# Copyright (C) 2018-2025 - DevSH Graphics Programming Sp. z O.O. +# Copyright (C) 2018-2025 - DevSH Graphics Programming Sp. z O.O. # This file is part of the "Nabla Engine". # For conditions of distribution and use, see copyright notice in nabla.h diff --git a/src/nbl/asset/IAssetManager.cpp b/src/nbl/asset/IAssetManager.cpp index 8378976e97..de390e7cc8 100644 --- a/src/nbl/asset/IAssetManager.cpp +++ b/src/nbl/asset/IAssetManager.cpp @@ -1,4 +1,4 @@ -// Copyright (C) 2018-2020 - DevSH Graphics Programming Sp. z O.O. +// Copyright (C) 2018-2025 - DevSH Graphics Programming Sp. z O.O. // This file is part of the "Nabla Engine". // For conditions of distribution and use, see copyright notice in nabla.h diff --git a/src/nbl/asset/interchange/CGLTFLoader.cpp b/src/nbl/asset/interchange/CGLTFLoader.cpp index d0941103f6..01ca108331 100644 --- a/src/nbl/asset/interchange/CGLTFLoader.cpp +++ b/src/nbl/asset/interchange/CGLTFLoader.cpp @@ -1,4 +1,4 @@ -// Copyright (C) 2020 AnastaZIuk +// Copyright (C) 2018-2025 - DevSH Graphics Programming Sp. z O.O. // This file is part of the "Nabla Engine". // For conditions of distribution and use, see copyright notice in Nabla.h diff --git a/src/nbl/asset/interchange/COBJMeshFileLoader.cpp b/src/nbl/asset/interchange/COBJMeshFileLoader.cpp index da18128ff2..2976c389e7 100644 --- a/src/nbl/asset/interchange/COBJMeshFileLoader.cpp +++ b/src/nbl/asset/interchange/COBJMeshFileLoader.cpp @@ -1,4 +1,4 @@ -// Copyright (C) 2019 - DevSH Graphics Programming Sp. z O.O. +// Copyright (C) 2018-2025 - DevSH Graphics Programming Sp. z O.O. // This file is part of the "Nabla Engine" and was originally part of the "Irrlicht Engine" // For conditions of distribution and use, see copyright notice in nabla.h // See the original file in irrlicht source for authors @@ -16,15 +16,8 @@ #include "COBJMeshFileLoader.h" -#include -#include #include -#include -#include -#include -#include #include -#include #include namespace nbl::asset @@ -47,17 +40,17 @@ using Float2 = hlsl::float32_t2; static_assert(sizeof(Float3) == sizeof(float) * 3ull); static_assert(sizeof(Float2) == sizeof(float) * 2ull); -NBL_FORCE_INLINE bool isObjInlineWhitespace(const char c) +inline bool isObjInlineWhitespace(const char c) { return c == ' ' || c == '\t' || c == '\v' || c == '\f'; } -NBL_FORCE_INLINE bool isObjDigit(const char c) +inline bool isObjDigit(const char c) { return c >= '0' && c <= '9'; } -NBL_FORCE_INLINE bool parseObjFloat(const char*& ptr, const char* const end, float& out) +inline bool parseObjFloat(const char*& ptr, const char* const end, float& out) { const char* const start = ptr; if (start >= end) @@ -191,9 +184,9 @@ void extendAABB(hlsl::shapes::AABB<3, hlsl::float32_t>& aabb, bool& hasAABB, con if (p.z > aabb.maxVx.z) aabb.maxVx.z = p.z; } -template -IGeometry::SDataView createAdoptedView(core::vector&& data, const E_FORMAT format) +const auto createAdoptedView = [](auto&& data, const E_FORMAT format) -> IGeometry::SDataView { + using T = typename std::decay_t::value_type; if (data.empty()) return {}; @@ -218,7 +211,7 @@ IGeometry::SDataView createAdoptedView(core::vector&& data, const } }; return view; -} +}; void objRecomputeContentHashes(ICPUPolygonGeometry* geometry) { @@ -255,16 +248,9 @@ void objRecomputeContentHashes(ICPUPolygonGeometry* geometry) buffer->setContentHash(buffer->computeContentHash()); } -bool readTextFileWithPolicy(system::IFile* file, char* dst, size_t byteCount, const SResolvedFileIOPolicy& ioPlan, double& ioMs, SFileReadTelemetry& ioTelemetry) +bool readTextFileWithPolicy(system::IFile* file, char* dst, size_t byteCount, const SResolvedFileIOPolicy& ioPlan, SFileReadTelemetry& ioTelemetry) { - return readFileWithPolicyTimed( - file, - reinterpret_cast(dst), - 0ull, - byteCount, - ioPlan, - &ioMs, - &ioTelemetry); + return readFileWithPolicyTimed(file, reinterpret_cast(dst), 0ull, byteCount, ioPlan, nullptr, &ioTelemetry); } const char* goFirstWord(const char* buf, const char* const bufEnd, bool acrossNewlines = true) @@ -352,7 +338,7 @@ const char* readUV(const char* bufPtr, float vec[2], const char* const bufEnd) return bufPtr; } -NBL_FORCE_INLINE bool parseUnsignedObjIndex(const char*& ptr, const char* const end, uint32_t& out) +inline bool parseUnsignedObjIndex(const char*& ptr, const char* const end, uint32_t& out) { if (ptr >= end || !isObjDigit(*ptr)) return false; @@ -370,7 +356,7 @@ NBL_FORCE_INLINE bool parseUnsignedObjIndex(const char*& ptr, const char* const return true; } -NBL_FORCE_INLINE bool parseObjFaceTokenPositiveTriplet(const char*& ptr, const char* const end, int32_t* idx, const size_t posCount, const size_t uvCount, const size_t normalCount) +inline bool parseObjFaceTokenPositiveTriplet(const char*& ptr, const char* const end, int32_t* idx, const size_t posCount, const size_t uvCount, const size_t normalCount) { while (ptr < end && isObjInlineWhitespace(*ptr)) ++ptr; @@ -409,7 +395,7 @@ NBL_FORCE_INLINE bool parseObjFaceTokenPositiveTriplet(const char*& ptr, const c return true; } -NBL_FORCE_INLINE bool parseObjPositiveIndexBounded(const char*& ptr, const char* const end, const size_t maxCount, int32_t& out) +inline bool parseObjPositiveIndexBounded(const char*& ptr, const char* const end, const size_t maxCount, int32_t& out) { if (ptr >= end || !isObjDigit(*ptr)) return false; @@ -430,7 +416,7 @@ NBL_FORCE_INLINE bool parseObjPositiveIndexBounded(const char*& ptr, const char* return true; } -NBL_FORCE_INLINE bool parseObjTrianglePositiveTripletLine(const char* const lineStart, const char* const lineEnd, int32_t* idx0, int32_t* idx1, int32_t* idx2, const size_t posCount, const size_t uvCount, const size_t normalCount) +inline bool parseObjTrianglePositiveTripletLine(const char* const lineStart, const char* const lineEnd, int32_t* idx0, int32_t* idx1, int32_t* idx2, const size_t posCount, const size_t uvCount, const size_t normalCount) { const char* ptr = lineStart; int32_t* const out[3] = { idx0, idx1, idx2 }; @@ -510,7 +496,7 @@ NBL_FORCE_INLINE bool parseObjTrianglePositiveTripletLine(const char* const line return ptr == lineEnd; } -NBL_FORCE_INLINE bool parseSignedObjIndex(const char*& ptr, const char* const end, int32_t& out) +inline bool parseSignedObjIndex(const char*& ptr, const char* const end, int32_t& out) { if (ptr >= end) return false; @@ -547,7 +533,7 @@ NBL_FORCE_INLINE bool parseSignedObjIndex(const char*& ptr, const char* const en return true; } -NBL_FORCE_INLINE bool resolveObjIndex(const int32_t rawIndex, const size_t elementCount, int32_t& resolved) +inline bool resolveObjIndex(const int32_t rawIndex, const size_t elementCount, int32_t& resolved) { if (rawIndex > 0) { @@ -568,7 +554,7 @@ NBL_FORCE_INLINE bool resolveObjIndex(const int32_t rawIndex, const size_t eleme return true; } -NBL_FORCE_INLINE bool parseObjFaceVertexTokenFast(const char*& linePtr, const char* const lineEnd, int32_t* idx, const size_t posCount, const size_t uvCount, const size_t normalCount) +inline bool parseObjFaceVertexTokenFast(const char*& linePtr, const char* const lineEnd, int32_t* idx, const size_t posCount, const size_t uvCount, const size_t normalCount) { if (!idx) return false; @@ -678,16 +664,14 @@ NBL_FORCE_INLINE bool parseObjFaceVertexTokenFast(const char*& linePtr, const ch } -COBJMeshFileLoader::COBJMeshFileLoader(IAssetManager* _manager) +COBJMeshFileLoader::COBJMeshFileLoader(IAssetManager*) { - (void)_manager; } COBJMeshFileLoader::~COBJMeshFileLoader() = default; -bool COBJMeshFileLoader::isALoadableFileFormat(system::IFile* _file, const system::logger_opt_ptr logger) const +bool COBJMeshFileLoader::isALoadableFileFormat(system::IFile* _file, const system::logger_opt_ptr) const { - (void)logger; if (!_file) return false; system::IFile::success_t succ; @@ -702,27 +686,11 @@ const char** COBJMeshFileLoader::getAssociatedFileExtensions() const return ext; } -asset::SAssetBundle COBJMeshFileLoader::loadAsset(system::IFile* _file, const asset::IAssetLoader::SAssetLoadParams& _params, asset::IAssetLoader::IAssetLoaderOverride* _override, uint32_t _hierarchyLevel) +asset::SAssetBundle COBJMeshFileLoader::loadAsset(system::IFile* _file, const asset::IAssetLoader::SAssetLoadParams& _params, asset::IAssetLoader::IAssetLoaderOverride*, uint32_t) { - (void)_override; - (void)_hierarchyLevel; - if (!_file) return {}; - using clock_t = std::chrono::high_resolution_clock; - const auto totalStart = clock_t::now(); - double ioMs = 0.0; - double parseMs = 0.0; - double buildMs = 0.0; - double hashMs = 0.0; - double aabbMs = 0.0; - double parseVms = 0.0; - double parseVNms = 0.0; - double parseVTms = 0.0; - double parseFaceMs = 0.0; - double dedupMs = 0.0; - double emitMs = 0.0; uint64_t faceCount = 0u; uint64_t faceFastTokenCount = 0u; uint64_t faceFallbackTokenCount = 0u; @@ -738,7 +706,6 @@ asset::SAssetBundle COBJMeshFileLoader::loadAsset(system::IFile* _file, const as return {}; } - const auto ioStart = clock_t::now(); std::string fileContents = {}; const char* buf = nullptr; if (ioPlan.strategy == SResolvedFileIOPolicy::Strategy::WholeFile) @@ -754,11 +721,10 @@ asset::SAssetBundle COBJMeshFileLoader::loadAsset(system::IFile* _file, const as if (!buf) { fileContents.resize(static_cast(filesize)); - if (!readTextFileWithPolicy(_file, fileContents.data(), fileContents.size(), ioPlan, ioMs, ioTelemetry)) + if (!readTextFileWithPolicy(_file, fileContents.data(), fileContents.size(), ioPlan, ioTelemetry)) return {}; buf = fileContents.data(); } - ioMs = std::chrono::duration(clock_t::now() - ioStart).count(); const char* const bufEnd = buf + static_cast(filesize); const char* bufPtr = buf; @@ -969,10 +935,6 @@ asset::SAssetBundle COBJMeshFileLoader::loadAsset(system::IFile* _file, const as return true; }; - const bool trackStages = - _params.logger.get() != nullptr && - ((_params.logger.get()->getLogLevelMask() & system::ILogger::ELL_PERFORMANCE).value != 0u); - const auto parseStart = clock_t::now(); while (bufPtr < bufEnd) { const char* const lineStart = bufPtr; @@ -993,7 +955,6 @@ asset::SAssetBundle COBJMeshFileLoader::loadAsset(system::IFile* _file, const as { if ((lineStart + 1) < lineEnd && lineStart[1] == ' ') { - const auto stageStart = trackStages ? clock_t::now() : clock_t::time_point{}; Float3 vec{}; const char* ptr = lineStart + 2; for (uint32_t i = 0u; i < 3u; ++i) @@ -1007,12 +968,9 @@ asset::SAssetBundle COBJMeshFileLoader::loadAsset(system::IFile* _file, const as } positions.push_back(vec); dedupHeadByPos.push_back(-1); - if (trackStages) - parseVms += std::chrono::duration(clock_t::now() - stageStart).count(); } else if ((lineStart + 2) < lineEnd && lineStart[1] == 'n' && isObjInlineWhitespace(lineStart[2])) { - const auto stageStart = trackStages ? clock_t::now() : clock_t::time_point{}; Float3 vec{}; const char* ptr = lineStart + 3; for (uint32_t i = 0u; i < 3u; ++i) @@ -1025,12 +983,9 @@ asset::SAssetBundle COBJMeshFileLoader::loadAsset(system::IFile* _file, const as return {}; } normals.push_back(vec); - if (trackStages) - parseVNms += std::chrono::duration(clock_t::now() - stageStart).count(); } else if ((lineStart + 2) < lineEnd && lineStart[1] == 't' && isObjInlineWhitespace(lineStart[2])) { - const auto stageStart = trackStages ? clock_t::now() : clock_t::time_point{}; Float2 vec{}; const char* ptr = lineStart + 3; for (uint32_t i = 0u; i < 2u; ++i) @@ -1044,8 +999,6 @@ asset::SAssetBundle COBJMeshFileLoader::loadAsset(system::IFile* _file, const as } vec.y = 1.f - vec.y; uvs.push_back(vec); - if (trackStages) - parseVTms += std::chrono::duration(clock_t::now() - stageStart).count(); } } else if (*lineStart == 'f' && (lineStart + 1) < lineEnd && isObjInlineWhitespace(lineStart[1])) @@ -1077,10 +1030,17 @@ asset::SAssetBundle COBJMeshFileLoader::loadAsset(system::IFile* _file, const as triangleFastPath = (triLinePtr == lineEnd); } } - const auto faceStart = trackStages ? clock_t::now() : clock_t::time_point{}; if (triangleFastPath) { - const auto dedupStart = trackStages ? clock_t::now() : clock_t::time_point{}; + const bool fullTriplet = + triIdx0[0] >= 0 && triIdx0[1] >= 0 && triIdx0[2] >= 0 && + triIdx1[0] >= 0 && triIdx1[1] >= 0 && triIdx1[2] >= 0 && + triIdx2[0] >= 0 && triIdx2[1] >= 0 && triIdx2[2] >= 0; + if (!fullTriplet) + triangleFastPath = false; + } + if (triangleFastPath) + { uint32_t c0 = 0u; uint32_t c1 = 0u; uint32_t c2 = 0u; @@ -1090,14 +1050,9 @@ asset::SAssetBundle COBJMeshFileLoader::loadAsset(system::IFile* _file, const as return {}; if (!acquireCornerIndexPositiveTriplet(triIdx2[0], triIdx2[1], triIdx2[2], c2)) return {}; - if (trackStages) - dedupMs += std::chrono::duration(clock_t::now() - dedupStart).count(); faceFastTokenCount += 3u; - const auto emitStart = trackStages ? clock_t::now() : clock_t::time_point{}; if (!appendIndex(c2) || !appendIndex(c1) || !appendIndex(c0)) return {}; - if (trackStages) - emitMs += std::chrono::duration(clock_t::now() - emitStart).count(); } else { @@ -1108,7 +1063,6 @@ asset::SAssetBundle COBJMeshFileLoader::loadAsset(system::IFile* _file, const as if (parsedFirstThree) { - const auto dedupStart = trackStages ? clock_t::now() : clock_t::time_point{}; uint32_t c0 = 0u; uint32_t c1 = 0u; uint32_t c2 = 0u; @@ -1118,14 +1072,9 @@ asset::SAssetBundle COBJMeshFileLoader::loadAsset(system::IFile* _file, const as return {}; if (!acquireCornerIndex(triIdx2, c2)) return {}; - if (trackStages) - dedupMs += std::chrono::duration(clock_t::now() - dedupStart).count(); faceFastTokenCount += 3u; - const auto emitStart = trackStages ? clock_t::now() : clock_t::time_point{}; if (!appendIndex(c2) || !appendIndex(c1) || !appendIndex(c0)) return {}; - if (trackStages) - emitMs += std::chrono::duration(clock_t::now() - emitStart).count(); firstCorner = c0; previousCorner = c2; cornerCount = 3u; @@ -1144,12 +1093,9 @@ asset::SAssetBundle COBJMeshFileLoader::loadAsset(system::IFile* _file, const as return {}; ++faceFastTokenCount; - const auto dedupStart = trackStages ? clock_t::now() : clock_t::time_point{}; uint32_t cornerIx = 0u; if (!acquireCornerIndex(idx, cornerIx)) return {}; - if (trackStages) - dedupMs += std::chrono::duration(clock_t::now() - dedupStart).count(); if (cornerCount == 0u) { @@ -1165,17 +1111,12 @@ asset::SAssetBundle COBJMeshFileLoader::loadAsset(system::IFile* _file, const as continue; } - const auto emitStart = trackStages ? clock_t::now() : clock_t::time_point{}; if (!appendIndex(cornerIx) || !appendIndex(previousCorner) || !appendIndex(firstCorner)) return {}; - if (trackStages) - emitMs += std::chrono::duration(clock_t::now() - emitStart).count(); previousCorner = cornerIx; ++cornerCount; } } - if (trackStages) - parseFaceMs += std::chrono::duration(clock_t::now() - faceStart).count(); } } @@ -1186,9 +1127,6 @@ asset::SAssetBundle COBJMeshFileLoader::loadAsset(system::IFile* _file, const as else bufPtr = lineTerminator + 1; } - parseMs = std::chrono::duration(clock_t::now() - parseStart).count(); - const double parseScanMs = std::max(0.0, parseMs - (parseVms + parseVNms + parseVTms + parseFaceMs + dedupMs + emitMs)); - if (outVertexWriteCount == 0ull) return {}; @@ -1199,7 +1137,6 @@ asset::SAssetBundle COBJMeshFileLoader::loadAsset(system::IFile* _file, const as const size_t outVertexCount = outPositions.size(); const size_t outIndexCount = indices.size(); - const auto buildStart = clock_t::now(); auto geometry = core::make_smart_refctd_ptr(); { auto view = createAdoptedView(std::move(outPositions), EF_R32G32B32_SFLOAT); @@ -1249,16 +1186,12 @@ asset::SAssetBundle COBJMeshFileLoader::loadAsset(system::IFile* _file, const as { geometry->setIndexing(IPolygonGeometryBase::PointList()); } - buildMs = std::chrono::duration(clock_t::now() - buildStart).count(); - if (_params.loaderFlags & IAssetLoader::ELPF_COMPUTE_CONTENT_HASHES) + if ((_params.loaderFlags & IAssetLoader::ELPF_DONT_COMPUTE_CONTENT_HASHES) == 0) { - const auto hashStart = clock_t::now(); objRecomputeContentHashes(geometry.get()); - hashMs = std::chrono::duration(clock_t::now() - hashStart).count(); } - const auto aabbStart = clock_t::now(); if (hasParsedAABB) { geometry->visitAABB([&parsedAABB](auto& ref)->void @@ -1278,9 +1211,6 @@ asset::SAssetBundle COBJMeshFileLoader::loadAsset(system::IFile* _file, const as { CPolygonGeometryManipulator::recomputeAABB(geometry.get()); } - aabbMs = std::chrono::duration(clock_t::now() - aabbStart).count(); - - const auto totalMs = std::chrono::duration(clock_t::now() - totalStart).count(); if (isTinyIOTelemetryLikely(ioTelemetry, static_cast(filesize))) { _params.logger.log( @@ -1291,19 +1221,6 @@ asset::SAssetBundle COBJMeshFileLoader::loadAsset(system::IFile* _file, const as static_cast(ioTelemetry.getMinOrZero()), static_cast(ioTelemetry.getAvgOrZero())); } - (void)totalMs; - (void)ioMs; - (void)parseMs; - (void)parseScanMs; - (void)parseVms; - (void)parseVNms; - (void)parseVTms; - (void)parseFaceMs; - (void)dedupMs; - (void)emitMs; - (void)buildMs; - (void)hashMs; - (void)aabbMs; _params.logger.log( "OBJ loader stats: file=%s in(v=%llu n=%llu uv=%llu) out(v=%llu idx=%llu faces=%llu face_fast_tokens=%llu face_fallback_tokens=%llu io_reads=%llu io_min_read=%llu io_avg_read=%llu io_req=%s io_eff=%s io_chunk=%llu io_reason=%s", system::ILogger::ELL_PERFORMANCE, diff --git a/src/nbl/asset/interchange/COBJMeshFileLoader.h b/src/nbl/asset/interchange/COBJMeshFileLoader.h index 51b06f1fc7..d0b169984b 100644 --- a/src/nbl/asset/interchange/COBJMeshFileLoader.h +++ b/src/nbl/asset/interchange/COBJMeshFileLoader.h @@ -1,4 +1,4 @@ -// Copyright (C) 2019-2025 - DevSH Graphics Programming Sp. z O.O. +// Copyright (C) 2018-2025 - DevSH Graphics Programming Sp. z O.O. // This file is part of the "Nabla Engine" and was originally part of the "Irrlicht Engine" // For conditions of distribution and use, see copyright notice in nabla.h // See the original file in irrlicht source for authors diff --git a/src/nbl/asset/interchange/COBJMeshWriter.cpp b/src/nbl/asset/interchange/COBJMeshWriter.cpp index 879b5b28a7..98648a8862 100644 --- a/src/nbl/asset/interchange/COBJMeshWriter.cpp +++ b/src/nbl/asset/interchange/COBJMeshWriter.cpp @@ -1,4 +1,4 @@ -// Copyright (C) 2025-2025 - DevSH Graphics Programming Sp. z O.O. +// Copyright (C) 2018-2025 - DevSH Graphics Programming Sp. z O.O. // This file is part of the "Nabla Engine". // For conditions of distribution and use, see copyright notice in nabla.h diff --git a/src/nbl/asset/interchange/CPLYMeshFileLoader.cpp b/src/nbl/asset/interchange/CPLYMeshFileLoader.cpp index 74f048f340..795720148e 100644 --- a/src/nbl/asset/interchange/CPLYMeshFileLoader.cpp +++ b/src/nbl/asset/interchange/CPLYMeshFileLoader.cpp @@ -1,37 +1,19 @@ -// Copyright (C) 2019 - DevSH Graphics Programming Sp. z O.O. +// Copyright (C) 2018-2025 - DevSH Graphics Programming Sp. z O.O. // This file is part of the "Nabla Engine" and was originally part of the "Irrlicht Engine" // For conditions of distribution and use, see copyright notice in nabla.h // See the original file in irrlicht source for authors #ifdef _NBL_COMPILE_WITH_PLY_LOADER_ - #include "CPLYMeshFileLoader.h" -#include "nbl/asset/metadata/CPLYMetadata.h" #include "nbl/asset/interchange/SGeometryContentHashCommon.h" #include "nbl/asset/interchange/SLoaderRuntimeTuning.h" - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include "nbl/core/hash/blake.h" - #include "nbl/asset/IAssetManager.h" - +#include "nbl/asset/metadata/CPLYMetadata.h" +#include "nbl/core/hash/blake.h" #include "nbl/system/ISystem.h" #include "nbl/system/IFile.h" -//#include "nbl/asset/utils/IMeshManipulator.h" - +#include namespace nbl::asset { @@ -44,55 +26,35 @@ const char** CPLYMeshFileLoader::getAssociatedFileExtensions() const return ext; } -bool CPLYMeshFileLoader::isALoadableFileFormat(system::IFile* _file, const system::logger_opt_ptr logger) const +bool CPLYMeshFileLoader::isALoadableFileFormat(system::IFile* _file, const system::logger_opt_ptr) const { - char buf[40]; + char buf[40]; system::IFile::success_t success; - _file->read(success,buf,0,sizeof(buf)); + _file->read(success, buf, 0, sizeof(buf)); if (!success) return false; - char* header = buf; - if (strncmp(header,"ply",3u)!=0) - return false; - - header += 4; - char* lf = strstr(header,"\n"); - if (!lf) - return false; - - constexpr std::array headers = { - "format ascii 1.0", - "format binary_little_endian 1.0", - "format binary_big_endian 1.0" - }; - return std::find(headers.begin(),headers.end(),std::string_view(header,lf))!=headers.end(); -} + char* header = buf; + if (strncmp(header, "ply", 3u) != 0) + return false; -template -T byteswap(const T& v) -{ - T retval; - auto it = reinterpret_cast(&v); - std::reverse_copy(it,it+sizeof(T),reinterpret_cast(&retval)); - return retval; + header += 4; + char* lf = strstr(header, "\n"); + if (!lf) + return false; + + constexpr std::array headers = { "format ascii 1.0", "format binary_little_endian 1.0", "format binary_big_endian 1.0" }; + return std::find(headers.begin(), headers.end(), std::string_view(header, lf)) != headers.end(); } -template -void plyRunParallelWorkers(const size_t workerCount, Fn&& fn) +const auto plyByteswap = [](const auto value) { - if (workerCount <= 1ull) - { - fn(0ull); - return; - } - auto workerIds = std::views::iota(size_t{0ull}, workerCount); - std::for_each(std::execution::par, workerIds.begin(), workerIds.end(), [&fn](const size_t workerIx) - { - fn(workerIx); - }); -} + auto retval = value; + const auto* it = reinterpret_cast(&value); + std::reverse_copy(it, it + sizeof(retval), reinterpret_cast(&retval)); + return retval; +}; class CPLYMappedFileMemoryResource final : public core::refctd_memory_resource { @@ -101,19 +63,14 @@ class CPLYMappedFileMemoryResource final : public core::refctd_memory_resource { } - inline void* allocate(std::size_t bytes, std::size_t alignment) override + inline void* allocate(std::size_t, std::size_t) override { - (void)bytes; - (void)alignment; assert(false); return nullptr; } - inline void deallocate(void* p, std::size_t bytes, std::size_t alignment) override + inline void deallocate(void*, std::size_t, std::size_t) override { - (void)p; - (void)bytes; - (void)alignment; } private: @@ -126,12 +83,7 @@ IGeometry::SDataView plyCreateMappedF32x3View(system::IFile* file, v return {}; auto keepAliveResource = core::make_smart_refctd_ptr(core::smart_refctd_ptr(file)); - auto buffer = ICPUBuffer::create({ - { byteCount }, - ptr, - core::smart_refctd_ptr(keepAliveResource), - alignof(float) - }, core::adopt_memory); + auto buffer = ICPUBuffer::create({ { byteCount }, ptr, core::smart_refctd_ptr(keepAliveResource), alignof(float) }, core::adopt_memory); if (!buffer) return {}; @@ -402,13 +354,23 @@ struct SContext { // move the start pointer along StartPointer += WordLength + 1; - if (!*StartPointer) + if (StartPointer >= EndPointer) + { + if (EndOfFile) + { + WordLength = -1; + return EndPointer; + } + getNextLine(); + } + + if (StartPointer < EndPointer && !*StartPointer) getNextLine(); - if (StartPointer==LineEndPointer) + if (StartPointer >= LineEndPointer) { WordLength = -1; // - return LineEndPointer; + return StartPointer; } // process the next word { @@ -509,7 +471,7 @@ struct SContext break; auto retval = *(reinterpret_cast(StartPointer)++); if (IsWrongEndian) - retval = byteswap(retval); + retval = plyByteswap(retval); return retval; } case 4: @@ -518,7 +480,7 @@ struct SContext break; auto retval = *(reinterpret_cast(StartPointer)++); if (IsWrongEndian) - retval = byteswap(retval); + retval = plyByteswap(retval); return retval; } default: @@ -579,7 +541,7 @@ struct SContext break; auto retval = *(reinterpret_cast(StartPointer)++); if (IsWrongEndian) - retval = byteswap(retval); + retval = plyByteswap(retval); return retval; } case 8: @@ -588,7 +550,7 @@ struct SContext break; auto retval = *(reinterpret_cast(StartPointer)++); if (IsWrongEndian) - retval = byteswap(retval); + retval = plyByteswap(retval); return retval; } default: @@ -1197,12 +1159,16 @@ struct SContext ready.notify_one(); } }; - plyRunParallelWorkers(workerCount, [&](const size_t workerIx) + const auto runParallelWorkers = [](const size_t localWorkerCount, const auto& fn) -> void { - const size_t begin = (element.Count * workerIx) / workerCount; - const size_t end = (element.Count * (workerIx + 1ull)) / workerCount; - parseChunk(workerIx, begin, end); - }); + if (localWorkerCount <= 1ull) { fn(0ull); return; } + core::vector workers; + workers.reserve(localWorkerCount - 1ull); + for (size_t workerIx = 1ull; workerIx < localWorkerCount; ++workerIx) + workers.emplace_back([&fn, workerIx]() { fn(workerIx); }); + fn(0ull); + }; + runParallelWorkers(workerCount, [&](const size_t workerIx) { const size_t begin = (element.Count * workerIx) / workerCount; const size_t end = (element.Count * (workerIx + 1ull)) / workerCount; parseChunk(workerIx, begin, end); }); if (hashThread.joinable()) hashThread.join(); @@ -1575,7 +1541,6 @@ SAssetBundle CPLYMeshFileLoader::loadAsset(system::IFile* _file, const IAssetLoa return {}; using clock_t = std::chrono::high_resolution_clock; - const auto totalStart = clock_t::now(); double headerMs = 0.0; double vertexMs = 0.0; double vertexFastMs = 0.0; @@ -1587,7 +1552,7 @@ SAssetBundle CPLYMeshFileLoader::loadAsset(system::IFile* _file, const IAssetLoa double hashRangeMs = 0.0; double indexBuildMs = 0.0; double aabbMs = 0.0; - const bool computeContentHashes = (_params.loaderFlags & IAssetLoader::ELPF_COMPUTE_CONTENT_HASHES) != 0; + const bool computeContentHashes = (_params.loaderFlags & IAssetLoader::ELPF_DONT_COMPUTE_CONTENT_HASHES) == 0; uint64_t faceCount = 0u; uint64_t fastFaceElementCount = 0u; uint64_t fastVertexElementCount = 0u; @@ -2263,8 +2228,6 @@ SAssetBundle CPLYMeshFileLoader::loadAsset(system::IFile* _file, const IAssetLoa hashRemainingGeometryBuffers(); } - const auto totalMs = std::chrono::duration(clock_t::now() - totalStart).count(); - const double stageRemainderMs = std::max(0.0, totalMs - (headerMs + vertexMs + faceMs + skipMs + layoutNegotiateMs + viewCreateMs + hashRangeMs + indexBuildMs + aabbMs)); const uint64_t ioMinRead = ctx.readCallCount ? ctx.readMinBytes : 0ull; const uint64_t ioAvgRead = ctx.readCallCount ? (ctx.readBytesTotal / ctx.readCallCount) : 0ull; if ( @@ -2300,20 +2263,6 @@ SAssetBundle CPLYMeshFileLoader::loadAsset(system::IFile* _file, const IAssetLoa toString(ioPlan.strategy), static_cast(ioPlan.chunkSizeBytes), ioPlan.reason); - (void)totalMs; - (void)stageRemainderMs; - (void)headerMs; - (void)vertexMs; - (void)vertexFastMs; - (void)vertexGenericMs; - (void)faceMs; - (void)skipMs; - (void)layoutNegotiateMs; - (void)viewCreateMs; - (void)hashRangeMs; - (void)indexBuildMs; - (void)aabbMs; - auto meta = core::make_smart_refctd_ptr(); return SAssetBundle(std::move(meta),{std::move(geometry)}); } diff --git a/src/nbl/asset/interchange/CPLYMeshFileLoader.h b/src/nbl/asset/interchange/CPLYMeshFileLoader.h index df8b72f125..4d7b849e2e 100644 --- a/src/nbl/asset/interchange/CPLYMeshFileLoader.h +++ b/src/nbl/asset/interchange/CPLYMeshFileLoader.h @@ -1,4 +1,4 @@ -// Copyright (C) 2019-2025 - DevSH Graphics Programming Sp. z O.O. +// Copyright (C) 2018-2025 - DevSH Graphics Programming Sp. z O.O. // This file is part of the "Nabla Engine" and was originally part of the "Irrlicht Engine" // For conditions of distribution and use, see copyright notice in nabla.h // See the original file in irrlicht source for authors diff --git a/src/nbl/asset/interchange/CPLYMeshWriter.cpp b/src/nbl/asset/interchange/CPLYMeshWriter.cpp index 9008bb4cdc..d76a018a06 100644 --- a/src/nbl/asset/interchange/CPLYMeshWriter.cpp +++ b/src/nbl/asset/interchange/CPLYMeshWriter.cpp @@ -1,4 +1,4 @@ -// Copyright (C) 2019 - DevSH Graphics Programming Sp. z O.O. +// Copyright (C) 2018-2025 - DevSH Graphics Programming Sp. z O.O. // This file is part of the "Nabla Engine" and was originally part of the "Irrlicht Engine" // For conditions of distribution and use, see copyright notice in nabla.h // See the original file in irrlicht source for authors diff --git a/src/nbl/asset/interchange/CPLYMeshWriter.h b/src/nbl/asset/interchange/CPLYMeshWriter.h index 884ebb6238..750cd126dd 100644 --- a/src/nbl/asset/interchange/CPLYMeshWriter.h +++ b/src/nbl/asset/interchange/CPLYMeshWriter.h @@ -1,4 +1,4 @@ -// Copyright (C) 2019-2025 - DevSH Graphics Programming Sp. z O.O. +// Copyright (C) 2018-2025 - DevSH Graphics Programming Sp. z O.O. // This file is part of the "Nabla Engine" and was originally part of the "Irrlicht Engine" // For conditions of distribution and use, see copyright notice in nabla.h // See the original file in irrlicht source for authors diff --git a/src/nbl/asset/interchange/CSTLMeshFileLoader.cpp b/src/nbl/asset/interchange/CSTLMeshFileLoader.cpp index 088b6b91e4..73d1f17c60 100644 --- a/src/nbl/asset/interchange/CSTLMeshFileLoader.cpp +++ b/src/nbl/asset/interchange/CSTLMeshFileLoader.cpp @@ -1,4 +1,4 @@ -// Copyright (C) 2019 - DevSH Graphics Programming Sp. z O.O. +// Copyright (C) 2018-2025 - DevSH Graphics Programming Sp. z O.O. // This file is part of the "Nabla Engine" and was originally part of the "Irrlicht Engine" // For conditions of distribution and use, see copyright notice in nabla.h // See the original file in irrlicht source for authors @@ -16,23 +16,7 @@ #include "nbl/core/hash/blake.h" #include "nbl/system/IFile.h" -#include -#include -#include -#include -#include -#include -#include -#include -#include #include -#include -#include -#include -#include -#include -#include -#include namespace nbl::asset { @@ -41,34 +25,18 @@ struct SSTLContext { IAssetLoader::SAssetLoadContext inner; SFileReadTelemetry ioTelemetry = {}; + static constexpr size_t TextProbeBytes = 6ull; + static constexpr size_t BinaryHeaderBytes = 80ull; + static constexpr size_t TriangleCountBytes = sizeof(uint32_t); + static constexpr size_t BinaryPrefixBytes = BinaryHeaderBytes + TriangleCountBytes; + static constexpr size_t TriangleFloatCount = 12ull; + static constexpr size_t TriangleFloatBytes = sizeof(float) * TriangleFloatCount; + static constexpr size_t TriangleAttributeBytes = sizeof(uint16_t); + static constexpr size_t TriangleRecordBytes = TriangleFloatBytes + TriangleAttributeBytes; + static constexpr size_t VerticesPerTriangle = 3ull; + static constexpr size_t FloatChannelsPerVertex = 3ull; }; -constexpr size_t StlTextProbeBytes = 6ull; -constexpr size_t StlBinaryHeaderBytes = 80ull; -constexpr size_t StlTriangleCountBytes = sizeof(uint32_t); -constexpr size_t StlBinaryPrefixBytes = StlBinaryHeaderBytes + StlTriangleCountBytes; -constexpr size_t StlTriangleFloatCount = 12ull; -constexpr size_t StlTriangleFloatBytes = sizeof(float) * StlTriangleFloatCount; -constexpr size_t StlTriangleAttributeBytes = sizeof(uint16_t); -constexpr size_t StlTriangleRecordBytes = StlTriangleFloatBytes + StlTriangleAttributeBytes; -constexpr size_t StlVerticesPerTriangle = 3ull; -constexpr size_t StlFloatChannelsPerVertex = 3ull; - -template -void stlRunParallelWorkers(const size_t workerCount, Fn&& fn) -{ - if (workerCount <= 1ull) - { - fn(0ull); - return; - } - auto workerIds = std::views::iota(size_t{0ull}, workerCount); - std::for_each(std::execution::par, workerIds.begin(), workerIds.end(), [&fn](const size_t workerIx) - { - fn(workerIx); - }); -} - const char* stlSkipWhitespace(const char* ptr, const char* const end) { while (ptr < end && core::isspace(*ptr)) @@ -117,10 +85,7 @@ bool stlReadTextFloat(const char*& ptr, const char* const end, float& outValue) bool stlReadTextVec3(const char*& ptr, const char* const end, hlsl::float32_t3& outVec) { - return - stlReadTextFloat(ptr, end, outVec.x) && - stlReadTextFloat(ptr, end, outVec.y) && - stlReadTextFloat(ptr, end, outVec.z); + return stlReadTextFloat(ptr, end, outVec.x) && stlReadTextFloat(ptr, end, outVec.y) && stlReadTextFloat(ptr, end, outVec.z); } hlsl::float32_t3 stlNormalizeOrZero(const hlsl::float32_t3& v) @@ -163,15 +128,14 @@ class CStlSplitBlockMemoryResource final : public core::refctd_memory_resource { } - inline void* allocate(std::size_t bytes, std::size_t alignment) override + inline void* allocate(std::size_t, std::size_t) override { assert(false); return nullptr; } - inline void deallocate(void* p, std::size_t bytes, std::size_t alignment) override + inline void deallocate(void* p, std::size_t bytes, std::size_t) override { - (void)alignment; const auto* const begin = reinterpret_cast(m_block); const auto* const end = begin + m_blockBytes; const auto* const ptr = reinterpret_cast(p); @@ -220,9 +184,7 @@ ICPUPolygonGeometry::SDataView stlCreateAdoptedFloat3View(core::vectorgetBacker(); auto* const payloadPtr = payload.data(); const size_t byteCount = payload.size() * sizeof(hlsl::float32_t3); - auto buffer = ICPUBuffer::create( - { { byteCount }, payloadPtr, core::smart_refctd_ptr(std::move(backer)), alignof(hlsl::float32_t3) }, - core::adopt_memory); + auto buffer = ICPUBuffer::create({ { byteCount }, payloadPtr, core::smart_refctd_ptr(std::move(backer)), alignof(hlsl::float32_t3) }, core::adopt_memory); if (!buffer) return {}; @@ -245,9 +207,8 @@ void stlRecomputeContentHashesParallel(ICPUPolygonGeometry* geometry, const SFil recomputeGeometryContentHashesParallel(geometry, ioPolicy); } -CSTLMeshFileLoader::CSTLMeshFileLoader(asset::IAssetManager* _assetManager) +CSTLMeshFileLoader::CSTLMeshFileLoader(asset::IAssetManager*) { - (void)_assetManager; } const char** CSTLMeshFileLoader::getAssociatedFileExtensions() const @@ -256,40 +217,20 @@ const char** CSTLMeshFileLoader::getAssociatedFileExtensions() const return ext; } -SAssetBundle CSTLMeshFileLoader::loadAsset(system::IFile* _file, const IAssetLoader::SAssetLoadParams& _params, IAssetLoader::IAssetLoaderOverride* _override, uint32_t _hierarchyLevel) +SAssetBundle CSTLMeshFileLoader::loadAsset(system::IFile* _file, const IAssetLoader::SAssetLoadParams& _params, IAssetLoader::IAssetLoaderOverride*, uint32_t) { - (void)_override; - (void)_hierarchyLevel; - if (!_file) return {}; - using clock_t = std::chrono::high_resolution_clock; - const auto totalStart = clock_t::now(); - double detectMs = 0.0; - double ioMs = 0.0; - double parseMs = 0.0; - double buildMs = 0.0; - double buildAllocViewsMs = 0.0; - double buildSetViewsMs = 0.0; - double buildMiscMs = 0.0; - double hashMs = 0.0; - double aabbMs = 0.0; uint64_t triangleCount = 0u; const char* parsePath = "unknown"; - const bool computeContentHashes = (_params.loaderFlags & IAssetLoader::ELPF_COMPUTE_CONTENT_HASHES) != 0; + const bool computeContentHashes = (_params.loaderFlags & IAssetLoader::ELPF_DONT_COMPUTE_CONTENT_HASHES) == 0; bool contentHashesAssigned = false; - SSTLContext context = { - asset::IAssetLoader::SAssetLoadContext{ - _params, - _file - }, - 0ull - }; + SSTLContext context = { asset::IAssetLoader::SAssetLoadContext{ _params,_file },0ull }; const size_t filesize = context.inner.mainFile->getSize(); - if (filesize < StlTextProbeBytes) + if (filesize < SSTLContext::TextProbeBytes) return {}; const auto ioPlan = resolveFileIOPolicy(_params.ioPolicy, static_cast(filesize), true); @@ -314,13 +255,11 @@ SAssetBundle CSTLMeshFileLoader::loadAsset(system::IFile* _file, const IAssetLoa } else { - const auto ioStart = clock_t::now(); wholeFilePayload.resize(filesize + 1ull); if (!readFileExact(context.inner.mainFile, wholeFilePayload.data(), 0ull, filesize, &context.ioTelemetry)) return {}; wholeFilePayload[filesize] = 0u; wholeFileData = wholeFilePayload.data(); - ioMs = std::chrono::duration(clock_t::now() - ioStart).count(); } } @@ -328,41 +267,40 @@ SAssetBundle CSTLMeshFileLoader::loadAsset(system::IFile* _file, const IAssetLoa bool hasBinaryTriCountFromDetect = false; uint32_t binaryTriCountFromDetect = 0u; { - const auto detectStart = clock_t::now(); - std::array prefix = {}; + std::array prefix = {}; bool hasPrefix = false; - if (wholeFileData && filesize >= StlBinaryPrefixBytes) + if (wholeFileData && filesize >= SSTLContext::BinaryPrefixBytes) { - std::memcpy(prefix.data(), wholeFileData, StlBinaryPrefixBytes); + std::memcpy(prefix.data(), wholeFileData, SSTLContext::BinaryPrefixBytes); hasPrefix = true; } else { - hasPrefix = filesize >= StlBinaryPrefixBytes && readFileExact(context.inner.mainFile, prefix.data(), 0ull, StlBinaryPrefixBytes, &context.ioTelemetry); + hasPrefix = filesize >= SSTLContext::BinaryPrefixBytes && readFileExact(context.inner.mainFile, prefix.data(), 0ull, SSTLContext::BinaryPrefixBytes, &context.ioTelemetry); } bool startsWithSolid = false; if (hasPrefix) { - startsWithSolid = (std::memcmp(prefix.data(), "solid ", StlTextProbeBytes) == 0); + startsWithSolid = (std::memcmp(prefix.data(), "solid ", SSTLContext::TextProbeBytes) == 0); } else { - char header[StlTextProbeBytes] = {}; + char header[SSTLContext::TextProbeBytes] = {}; if (wholeFileData) std::memcpy(header, wholeFileData, sizeof(header)); else if (!readFileExact(context.inner.mainFile, header, 0ull, sizeof(header), &context.ioTelemetry)) return {}; - startsWithSolid = (std::strncmp(header, "solid ", StlTextProbeBytes) == 0); + startsWithSolid = (std::strncmp(header, "solid ", SSTLContext::TextProbeBytes) == 0); } bool binaryBySize = false; if (hasPrefix) { uint32_t triCount = 0u; - std::memcpy(&triCount, prefix.data() + StlBinaryHeaderBytes, sizeof(triCount)); + std::memcpy(&triCount, prefix.data() + SSTLContext::BinaryHeaderBytes, sizeof(triCount)); binaryTriCountFromDetect = triCount; hasBinaryTriCountFromDetect = true; - const uint64_t expectedSize = StlBinaryPrefixBytes + static_cast(triCount) * StlTriangleRecordBytes; + const uint64_t expectedSize = SSTLContext::BinaryPrefixBytes + static_cast(triCount) * SSTLContext::TriangleRecordBytes; binaryBySize = (expectedSize == filesize); } @@ -373,7 +311,6 @@ SAssetBundle CSTLMeshFileLoader::loadAsset(system::IFile* _file, const IAssetLoa else binary = false; - detectMs = std::chrono::duration(clock_t::now() - detectStart).count(); } auto geometry = core::make_smart_refctd_ptr(); @@ -394,41 +331,38 @@ SAssetBundle CSTLMeshFileLoader::loadAsset(system::IFile* _file, const IAssetLoa if (binary) { parsePath = "binary_fast"; - if (filesize < StlBinaryPrefixBytes) + if (filesize < SSTLContext::BinaryPrefixBytes) return {}; uint32_t triangleCount32 = binaryTriCountFromDetect; if (!hasBinaryTriCountFromDetect) { - if (!readFileExact(context.inner.mainFile, &triangleCount32, StlBinaryHeaderBytes, sizeof(triangleCount32), &context.ioTelemetry)) + if (!readFileExact(context.inner.mainFile, &triangleCount32, SSTLContext::BinaryHeaderBytes, sizeof(triangleCount32), &context.ioTelemetry)) return {}; } triangleCount = triangleCount32; - const size_t dataSize = static_cast(triangleCount) * StlTriangleRecordBytes; - const size_t expectedSize = StlBinaryPrefixBytes + dataSize; + const size_t dataSize = static_cast(triangleCount) * SSTLContext::TriangleRecordBytes; + const size_t expectedSize = SSTLContext::BinaryPrefixBytes + dataSize; if (filesize < expectedSize) return {}; const uint8_t* payloadData = nullptr; if (wholeFileData) { - payloadData = wholeFileData + StlBinaryPrefixBytes; + payloadData = wholeFileData + SSTLContext::BinaryPrefixBytes; } else { core::vector payload; payload.resize(dataSize); - const auto ioStart = clock_t::now(); - if (!readFileWithPolicy(context.inner.mainFile, payload.data(), StlBinaryPrefixBytes, dataSize, ioPlan, &context.ioTelemetry)) + if (!readFileWithPolicy(context.inner.mainFile, payload.data(), SSTLContext::BinaryPrefixBytes, dataSize, ioPlan, &context.ioTelemetry)) return {}; - ioMs = std::chrono::duration(clock_t::now() - ioStart).count(); wholeFilePayload = std::move(payload); payloadData = wholeFilePayload.data(); } - vertexCount = triangleCount * StlVerticesPerTriangle; - const auto buildPrepStart = clock_t::now(); + vertexCount = triangleCount * SSTLContext::VerticesPerTriangle; const size_t vertexCountSizeT = static_cast(vertexCount); if (vertexCountSizeT > (std::numeric_limits::max() / sizeof(hlsl::float32_t3))) return {}; @@ -447,18 +381,8 @@ SAssetBundle CSTLMeshFileLoader::loadAsset(system::IFile* _file, const IAssetLoa block, blockBytes, alignof(float)); - auto posBuffer = ICPUBuffer::create({ - { viewByteSize }, - block, - core::smart_refctd_ptr(blockResource), - alignof(float) - }, core::adopt_memory); - auto normalBuffer = ICPUBuffer::create({ - { viewByteSize }, - reinterpret_cast(block) + viewByteSize, - core::smart_refctd_ptr(blockResource), - alignof(float) - }, core::adopt_memory); + auto posBuffer = ICPUBuffer::create({ { viewByteSize },block,core::smart_refctd_ptr(blockResource),alignof(float) }, core::adopt_memory); + auto normalBuffer = ICPUBuffer::create({ { viewByteSize },reinterpret_cast(block) + viewByteSize,core::smart_refctd_ptr(blockResource),alignof(float) }, core::adopt_memory); if (!posBuffer || !normalBuffer) return {}; ICPUPolygonGeometry::SDataView posView = {}; @@ -487,20 +411,16 @@ SAssetBundle CSTLMeshFileLoader::loadAsset(system::IFile* _file, const IAssetLoa auto* normalOutFloat = reinterpret_cast(normalView.getPointer()); if (!posOutFloat || !normalOutFloat) return {}; - const double buildPrepMs = std::chrono::duration(clock_t::now() - buildPrepStart).count(); - buildAllocViewsMs += buildPrepMs; - buildMs += buildPrepMs; - const auto parseStart = clock_t::now(); const uint8_t* cursor = payloadData; const uint8_t* const end = cursor + dataSize; - if (end < cursor || static_cast(end - cursor) < static_cast(triangleCount) * StlTriangleRecordBytes) + if (end < cursor || static_cast(end - cursor) < static_cast(triangleCount) * SSTLContext::TriangleRecordBytes) return {}; const size_t hw = resolveLoaderHardwareThreads(); SLoaderRuntimeTuningRequest parseTuningRequest = {}; parseTuningRequest.inputBytes = dataSize; parseTuningRequest.totalWorkUnits = triangleCount; - parseTuningRequest.minBytesPerWorker = StlTriangleRecordBytes; + parseTuningRequest.minBytesPerWorker = SSTLContext::TriangleRecordBytes; parseTuningRequest.hardwareThreads = static_cast(hw); parseTuningRequest.hardMaxWorkers = static_cast(std::max(1ull, hw > 2ull ? (hw - 2ull) : hw)); parseTuningRequest.targetChunksPerWorker = 2u; @@ -526,21 +446,19 @@ SAssetBundle CSTLMeshFileLoader::loadAsset(system::IFile* _file, const IAssetLoa const size_t parseChunkCount = static_cast(loaderRuntimeCeilDiv(triangleCount, parseChunkTriangles)); const bool hashInParsePipeline = computeContentHashes; std::vector hashChunkReady(hashInParsePipeline ? parseChunkCount : 0ull, 0u); - double positionHashPipelineMs = 0.0; - double normalHashPipelineMs = 0.0; std::atomic_bool hashPipelineOk = true; core::blake3_hash_t parsedPositionHash = static_cast(core::blake3_hasher{}); core::blake3_hash_t parsedNormalHash = static_cast(core::blake3_hasher{}); auto parseRange = [&](const uint64_t beginTri, const uint64_t endTri, SThreadAABB& localAABB) -> void { - const uint8_t* localCursor = payloadData + beginTri * StlTriangleRecordBytes; - float* posCursor = posOutFloat + beginTri * StlVerticesPerTriangle * StlFloatChannelsPerVertex; - float* normalCursor = normalOutFloat + beginTri * StlVerticesPerTriangle * StlFloatChannelsPerVertex; + const uint8_t* localCursor = payloadData + beginTri * SSTLContext::TriangleRecordBytes; + float* posCursor = posOutFloat + beginTri * SSTLContext::VerticesPerTriangle * SSTLContext::FloatChannelsPerVertex; + float* normalCursor = normalOutFloat + beginTri * SSTLContext::VerticesPerTriangle * SSTLContext::FloatChannelsPerVertex; for (uint64_t tri = beginTri; tri < endTri; ++tri) { const uint8_t* const triRecord = localCursor; - localCursor += StlTriangleRecordBytes; - float triValues[StlTriangleFloatCount]; + localCursor += SSTLContext::TriangleRecordBytes; + float triValues[SSTLContext::TriangleFloatCount]; std::memcpy(triValues, triRecord, sizeof(triValues)); float normalX = triValues[0ull]; @@ -633,8 +551,8 @@ SAssetBundle CSTLMeshFileLoader::loadAsset(system::IFile* _file, const IAssetLoa normalCursor[6ull] = normalX; normalCursor[7ull] = normalY; normalCursor[8ull] = normalZ; - posCursor += StlVerticesPerTriangle * StlFloatChannelsPerVertex; - normalCursor += StlVerticesPerTriangle * StlFloatChannelsPerVertex; + posCursor += SSTLContext::VerticesPerTriangle * SSTLContext::FloatChannelsPerVertex; + normalCursor += SSTLContext::VerticesPerTriangle * SSTLContext::FloatChannelsPerVertex; } }; std::jthread positionHashThread; @@ -646,7 +564,6 @@ SAssetBundle CSTLMeshFileLoader::loadAsset(system::IFile* _file, const IAssetLoa try { core::blake3_hasher positionHasher; - const auto hashThreadStart = clock_t::now(); size_t chunkIx = 0ull; while (chunkIx < parseChunkCount) { @@ -666,11 +583,10 @@ SAssetBundle CSTLMeshFileLoader::loadAsset(system::IFile* _file, const IAssetLoa const uint64_t begin = static_cast(chunkIx) * parseChunkTriangles; const uint64_t endTri = std::min(static_cast(runEnd) * parseChunkTriangles, triangleCount); const size_t runTriangles = static_cast(endTri - begin); - const size_t runBytes = runTriangles * StlVerticesPerTriangle * StlFloatChannelsPerVertex * sizeof(float); - positionHasher.update(posOutFloat + begin * StlVerticesPerTriangle * StlFloatChannelsPerVertex, runBytes); + const size_t runBytes = runTriangles * SSTLContext::VerticesPerTriangle * SSTLContext::FloatChannelsPerVertex * sizeof(float); + positionHasher.update(posOutFloat + begin * SSTLContext::VerticesPerTriangle * SSTLContext::FloatChannelsPerVertex, runBytes); chunkIx = runEnd; } - positionHashPipelineMs = std::chrono::duration(clock_t::now() - hashThreadStart).count(); parsedPositionHash = static_cast(positionHasher); } catch (...) @@ -683,7 +599,6 @@ SAssetBundle CSTLMeshFileLoader::loadAsset(system::IFile* _file, const IAssetLoa try { core::blake3_hasher normalHasher; - const auto hashThreadStart = clock_t::now(); size_t chunkIx = 0ull; while (chunkIx < parseChunkCount) { @@ -703,11 +618,10 @@ SAssetBundle CSTLMeshFileLoader::loadAsset(system::IFile* _file, const IAssetLoa const uint64_t begin = static_cast(chunkIx) * parseChunkTriangles; const uint64_t endTri = std::min(static_cast(runEnd) * parseChunkTriangles, triangleCount); const size_t runTriangles = static_cast(endTri - begin); - const size_t runBytes = runTriangles * StlVerticesPerTriangle * StlFloatChannelsPerVertex * sizeof(float); - normalHasher.update(normalOutFloat + begin * StlVerticesPerTriangle * StlFloatChannelsPerVertex, runBytes); + const size_t runBytes = runTriangles * SSTLContext::VerticesPerTriangle * SSTLContext::FloatChannelsPerVertex * sizeof(float); + normalHasher.update(normalOutFloat + begin * SSTLContext::VerticesPerTriangle * SSTLContext::FloatChannelsPerVertex, runBytes); chunkIx = runEnd; } - normalHashPipelineMs = std::chrono::duration(clock_t::now() - hashThreadStart).count(); parsedNormalHash = static_cast(normalHasher); } catch (...) @@ -738,15 +652,16 @@ SAssetBundle CSTLMeshFileLoader::loadAsset(system::IFile* _file, const IAssetLoa if constexpr (ComputeAABBInParse) threadAABBs[workerIx] = localAABB; }; - - if (workerCount > 1ull) + const auto runParallelWorkers = [](const size_t localWorkerCount, const auto& fn) -> void { - stlRunParallelWorkers(workerCount, parseWorker); - } - else - { - parseWorker(0ull); - } + if (localWorkerCount <= 1ull) { fn(0ull); return; } + core::vector workers; + workers.reserve(localWorkerCount - 1ull); + for (size_t workerIx = 1ull; workerIx < localWorkerCount; ++workerIx) + workers.emplace_back([&fn, workerIx]() { fn(workerIx); }); + fn(0ull); + }; + runParallelWorkers(workerCount, parseWorker); if (positionHashThread.joinable()) positionHashThread.join(); if (normalHashThread.joinable()) @@ -755,7 +670,6 @@ SAssetBundle CSTLMeshFileLoader::loadAsset(system::IFile* _file, const IAssetLoa { if (!hashPipelineOk.load(std::memory_order_relaxed)) return {}; - hashMs += positionHashPipelineMs + normalHashPipelineMs; posView.src.buffer->setContentHash(parsedPositionHash); normalView.src.buffer->setContentHash(parsedNormalHash); contentHashesAssigned = true; @@ -786,25 +700,17 @@ SAssetBundle CSTLMeshFileLoader::loadAsset(system::IFile* _file, const IAssetLoa if (localAABB.maxZ > parsedAABB.maxVx.z) parsedAABB.maxVx.z = localAABB.maxZ; } } - parseMs = std::chrono::duration(clock_t::now() - parseStart).count(); - - const auto buildFinalizeStart = clock_t::now(); geometry->setPositionView(std::move(posView)); geometry->setNormalView(std::move(normalView)); - const double buildFinalizeMs = std::chrono::duration(clock_t::now() - buildFinalizeStart).count(); - buildSetViewsMs += buildFinalizeMs; - buildMs += buildFinalizeMs; } else { parsePath = "ascii_fallback"; if (!wholeFileData) { - const auto ioStart = clock_t::now(); wholeFilePayload.resize(filesize + 1ull); if (!readFileWithPolicy(context.inner.mainFile, wholeFilePayload.data(), 0ull, filesize, ioPlan, &context.ioTelemetry)) return {}; - ioMs = std::chrono::duration(clock_t::now() - ioStart).count(); wholeFilePayload[filesize] = 0u; wholeFileData = wholeFilePayload.data(); } @@ -817,7 +723,6 @@ SAssetBundle CSTLMeshFileLoader::loadAsset(system::IFile* _file, const IAssetLoa if (!stlReadTextToken(cursor, end, textToken) || textToken != std::string_view("solid")) return {}; - const auto parseStart = clock_t::now(); while (stlReadTextToken(cursor, end, textToken)) { if (textToken == std::string_view("endsolid")) @@ -833,8 +738,6 @@ SAssetBundle CSTLMeshFileLoader::loadAsset(system::IFile* _file, const IAssetLoa if (!stlReadTextVec3(cursor, end, fileNormal)) return {}; - normals.push_back(stlResolveStoredNormal(fileNormal)); - if (!stlReadTextToken(cursor, end, textToken) || textToken != std::string_view("outer")) return {}; if (!stlReadTextToken(cursor, end, textToken) || textToken != std::string_view("loop")) @@ -865,26 +768,18 @@ SAssetBundle CSTLMeshFileLoader::loadAsset(system::IFile* _file, const IAssetLoa if (!stlReadTextToken(cursor, end, textToken) || textToken != std::string_view("endfacet")) return {}; } - parseMs = std::chrono::duration(clock_t::now() - parseStart).count(); if (positions.empty()) return {}; - triangleCount = positions.size() / StlVerticesPerTriangle; + triangleCount = positions.size() / SSTLContext::VerticesPerTriangle; vertexCount = positions.size(); - const auto buildStart = clock_t::now(); - const auto allocStart = clock_t::now(); auto posView = stlCreateAdoptedFloat3View(std::move(positions)); auto normalView = stlCreateAdoptedFloat3View(std::move(normals)); if (!posView || !normalView) return {}; - buildAllocViewsMs += std::chrono::duration(clock_t::now() - allocStart).count(); - - const auto setStart = clock_t::now(); geometry->setPositionView(std::move(posView)); geometry->setNormalView(std::move(normalView)); - buildSetViewsMs += std::chrono::duration(clock_t::now() - setStart).count(); - buildMs = std::chrono::duration(clock_t::now() - buildStart).count(); } if (vertexCount == 0ull) @@ -892,12 +787,9 @@ SAssetBundle CSTLMeshFileLoader::loadAsset(system::IFile* _file, const IAssetLoa if (computeContentHashes && !contentHashesAssigned) { - const auto hashStart = clock_t::now(); stlRecomputeContentHashesParallel(geometry.get(), _params.ioPolicy); - hashMs += std::chrono::duration(clock_t::now() - hashStart).count(); } - const auto aabbStart = clock_t::now(); if (hasParsedAABB) { geometry->visitAABB([&parsedAABB](auto& ref)->void @@ -917,11 +809,6 @@ SAssetBundle CSTLMeshFileLoader::loadAsset(system::IFile* _file, const IAssetLoa { CPolygonGeometryManipulator::recomputeAABB(geometry.get()); } - aabbMs = std::chrono::duration(clock_t::now() - aabbStart).count(); - - buildMiscMs = std::max(0.0, buildMs - (buildAllocViewsMs + buildSetViewsMs)); - - const auto totalMs = std::chrono::duration(clock_t::now() - totalStart).count(); const uint64_t ioMinRead = context.ioTelemetry.getMinOrZero(); const uint64_t ioAvgRead = context.ioTelemetry.getAvgOrZero(); if (isTinyIOTelemetryLikely(context.ioTelemetry, static_cast(filesize))) @@ -949,50 +836,37 @@ SAssetBundle CSTLMeshFileLoader::loadAsset(system::IFile* _file, const IAssetLoa toString(ioPlan.strategy), static_cast(ioPlan.chunkSizeBytes), ioPlan.reason); - (void)totalMs; - (void)detectMs; - (void)ioMs; - (void)parseMs; - (void)buildMs; - (void)buildAllocViewsMs; - (void)buildSetViewsMs; - (void)buildMiscMs; - (void)hashMs; - (void)aabbMs; - auto meta = core::make_smart_refctd_ptr(); return SAssetBundle(std::move(meta), { std::move(geometry) }); } -bool CSTLMeshFileLoader::isALoadableFileFormat(system::IFile* _file, const system::logger_opt_ptr logger) const +bool CSTLMeshFileLoader::isALoadableFileFormat(system::IFile* _file, const system::logger_opt_ptr) const { - (void)logger; - if (!_file || _file->getSize() <= StlTextProbeBytes) + if (!_file || _file->getSize() <= SSTLContext::TextProbeBytes) return false; const size_t fileSize = _file->getSize(); - if (fileSize < StlBinaryPrefixBytes) + if (fileSize < SSTLContext::BinaryPrefixBytes) { - char header[StlTextProbeBytes] = {}; + char header[SSTLContext::TextProbeBytes] = {}; if (!readFileExact(_file, header, 0ull, sizeof(header))) return false; - return std::strncmp(header, "solid ", StlTextProbeBytes) == 0; + return std::strncmp(header, "solid ", SSTLContext::TextProbeBytes) == 0; } - std::array prefix = {}; + std::array prefix = {}; if (!readFileExact(_file, prefix.data(), 0ull, prefix.size())) return false; uint32_t triangleCount = 0u; - std::memcpy(&triangleCount, prefix.data() + StlBinaryHeaderBytes, sizeof(triangleCount)); - if (std::memcmp(prefix.data(), "solid ", StlTextProbeBytes) == 0) + std::memcpy(&triangleCount, prefix.data() + SSTLContext::BinaryHeaderBytes, sizeof(triangleCount)); + if (std::memcmp(prefix.data(), "solid ", SSTLContext::TextProbeBytes) == 0) return true; - return fileSize == (StlTriangleRecordBytes * triangleCount + StlBinaryPrefixBytes); + return fileSize == (SSTLContext::TriangleRecordBytes * triangleCount + SSTLContext::BinaryPrefixBytes); } } #endif // _NBL_COMPILE_WITH_STL_LOADER_ - diff --git a/src/nbl/asset/interchange/CSTLMeshFileLoader.h b/src/nbl/asset/interchange/CSTLMeshFileLoader.h index 535250e084..c5c982800d 100644 --- a/src/nbl/asset/interchange/CSTLMeshFileLoader.h +++ b/src/nbl/asset/interchange/CSTLMeshFileLoader.h @@ -1,4 +1,4 @@ -// Copyright (C) 2019-2025 - DevSH Graphics Programming Sp. z O.O. +// Copyright (C) 2018-2025 - DevSH Graphics Programming Sp. z O.O. // This file is part of the "Nabla Engine" and was originally part of the "Irrlicht Engine" // For conditions of distribution and use, see copyright notice in nabla.h // See the original file in irrlicht source for authors diff --git a/src/nbl/asset/interchange/CSTLMeshWriter.cpp b/src/nbl/asset/interchange/CSTLMeshWriter.cpp index 300445d8f5..ec307841fa 100644 --- a/src/nbl/asset/interchange/CSTLMeshWriter.cpp +++ b/src/nbl/asset/interchange/CSTLMeshWriter.cpp @@ -1,4 +1,4 @@ -// Copyright (C) 2019 - DevSH Graphics Programming Sp. z O.O. +// Copyright (C) 2018-2025 - DevSH Graphics Programming Sp. z O.O. // This file is part of the "Nabla Engine" and was originally part of the "Irrlicht Engine" // For conditions of distribution and use, see copyright notice in nabla.h // See the original file in irrlicht source for authors diff --git a/src/nbl/asset/interchange/CSTLMeshWriter.h b/src/nbl/asset/interchange/CSTLMeshWriter.h index 23994d27da..ca8ea62ceb 100644 --- a/src/nbl/asset/interchange/CSTLMeshWriter.h +++ b/src/nbl/asset/interchange/CSTLMeshWriter.h @@ -1,4 +1,4 @@ -// Copyright (C) 2019-2025 - DevSH Graphics Programming Sp. z O.O. +// Copyright (C) 2018-2025 - DevSH Graphics Programming Sp. z O.O. // This file is part of the "Nabla Engine" and was originally part of the "Irrlicht Engine" // For conditions of distribution and use, see copyright notice in nabla.h // See the original file in irrlicht source for authors diff --git a/src/nbl/asset/interchange/IGeometryWriter.cpp b/src/nbl/asset/interchange/IGeometryWriter.cpp index 10b55728e6..c66b7096bc 100644 --- a/src/nbl/asset/interchange/IGeometryWriter.cpp +++ b/src/nbl/asset/interchange/IGeometryWriter.cpp @@ -1,4 +1,4 @@ -// Copyright (C) 2025-2025 - DevSH Graphics Programming Sp. z O.O. +// Copyright (C) 2018-2025 - DevSH Graphics Programming Sp. z O.O. // This file is part of the "Nabla Engine". // For conditions of distribution and use, see copyright notice in nabla.h diff --git a/src/nbl/asset/pch_asset.h b/src/nbl/asset/pch_asset.h index 8ee0d9ca7d..d24252be24 100644 --- a/src/nbl/asset/pch_asset.h +++ b/src/nbl/asset/pch_asset.h @@ -1,4 +1,4 @@ -// Copyright (C) 2018-2020 - DevSH Graphics Programming Sp. z O.O. +// Copyright (C) 2018-2025 - DevSH Graphics Programming Sp. z O.O. // This file is part of the "Nabla Engine". // For conditions of distribution and use, see copyright notice in nabla.h #ifndef _NBL_ASSET_PCH_ASSET_H_INCLUDED_ diff --git a/src/nbl/video/utilities/CAssetConverter.cpp b/src/nbl/video/utilities/CAssetConverter.cpp index 077466ea06..8e58e3438a 100644 --- a/src/nbl/video/utilities/CAssetConverter.cpp +++ b/src/nbl/video/utilities/CAssetConverter.cpp @@ -1,4 +1,4 @@ -// Copyright (C) 2024-2024 - DevSH Graphics Programming Sp. z O.O. +// Copyright (C) 2018-2025 - DevSH Graphics Programming Sp. z O.O. // This file is part of the "Nabla Engine". #include "nbl/video/utilities/CAssetConverter.h" From 22b0d23bd0ad0e3e70b04378c5a172a122d1e1d0 Mon Sep 17 00:00:00 2001 From: Arkadiusz Lachowicz Date: Fri, 13 Feb 2026 11:22:02 +0100 Subject: [PATCH 027/118] Improve mesh interchange paths and optimize PLY parsing --- .../asset/interchange/COBJMeshFileLoader.cpp | 38 +- .../asset/interchange/CPLYMeshFileLoader.cpp | 436 +++++++++++------- src/nbl/asset/interchange/CPLYMeshWriter.cpp | 225 +++++---- .../asset/interchange/CSTLMeshFileLoader.cpp | 7 +- 4 files changed, 404 insertions(+), 302 deletions(-) diff --git a/src/nbl/asset/interchange/COBJMeshFileLoader.cpp b/src/nbl/asset/interchange/COBJMeshFileLoader.cpp index 2976c389e7..2070fe4d59 100644 --- a/src/nbl/asset/interchange/COBJMeshFileLoader.cpp +++ b/src/nbl/asset/interchange/COBJMeshFileLoader.cpp @@ -6,6 +6,7 @@ #include "nbl/core/declarations.h" #include "nbl/asset/IAssetManager.h" +#include "nbl/asset/interchange/SGeometryContentHashCommon.h" #include "nbl/asset/interchange/SInterchangeIOCommon.h" #include "nbl/asset/interchange/SLoaderRuntimeTuning.h" #include "nbl/asset/utils/CPolygonGeometryManipulator.h" @@ -213,41 +214,6 @@ const auto createAdoptedView = [](auto&& data, const E_FORMAT format) -> IGeomet return view; }; -void objRecomputeContentHashes(ICPUPolygonGeometry* geometry) -{ - if (!geometry) - return; - - core::vector> buffers; - auto appendViewBuffer = [&buffers](const IGeometry::SDataView& view) -> void - { - if (!view || !view.src.buffer) - return; - for (const auto& existing : buffers) - { - if (existing.get() == view.src.buffer.get()) - return; - } - buffers.push_back(core::smart_refctd_ptr(view.src.buffer)); - }; - - appendViewBuffer(geometry->getPositionView()); - appendViewBuffer(geometry->getIndexView()); - appendViewBuffer(geometry->getNormalView()); - for (const auto& view : *geometry->getAuxAttributeViews()) - appendViewBuffer(view); - for (const auto& view : *geometry->getJointWeightViews()) - { - appendViewBuffer(view.indices); - appendViewBuffer(view.weights); - } - if (auto jointOBB = geometry->getJointOBBView(); jointOBB) - appendViewBuffer(*jointOBB); - - for (auto& buffer : buffers) - buffer->setContentHash(buffer->computeContentHash()); -} - bool readTextFileWithPolicy(system::IFile* file, char* dst, size_t byteCount, const SResolvedFileIOPolicy& ioPlan, SFileReadTelemetry& ioTelemetry) { return readFileWithPolicyTimed(file, reinterpret_cast(dst), 0ull, byteCount, ioPlan, nullptr, &ioTelemetry); @@ -1189,7 +1155,7 @@ asset::SAssetBundle COBJMeshFileLoader::loadAsset(system::IFile* _file, const as if ((_params.loaderFlags & IAssetLoader::ELPF_DONT_COMPUTE_CONTENT_HASHES) == 0) { - objRecomputeContentHashes(geometry.get()); + recomputeGeometryContentHashesParallel(geometry.get(), _params.ioPolicy); } if (hasParsedAABB) diff --git a/src/nbl/asset/interchange/CPLYMeshFileLoader.cpp b/src/nbl/asset/interchange/CPLYMeshFileLoader.cpp index 795720148e..03aa6e3fd7 100644 --- a/src/nbl/asset/interchange/CPLYMeshFileLoader.cpp +++ b/src/nbl/asset/interchange/CPLYMeshFileLoader.cpp @@ -35,17 +35,18 @@ bool CPLYMeshFileLoader::isALoadableFileFormat(system::IFile* _file, const syste if (!success) return false; - char* header = buf; - if (strncmp(header, "ply", 3u) != 0) + const std::string_view fileHeader(buf, success.getBytesProcessed()); + if (!fileHeader.starts_with("ply\n")) return false; - header += 4; - char* lf = strstr(header, "\n"); - if (!lf) + const size_t formatLineBegin = 4ull; + const size_t formatLineEnd = fileHeader.find('\n', formatLineBegin); + if (formatLineEnd == std::string_view::npos) return false; + const std::string_view formatLine = fileHeader.substr(formatLineBegin, formatLineEnd - formatLineBegin); constexpr std::array headers = { "format ascii 1.0", "format binary_little_endian 1.0", "format binary_big_endian 1.0" }; - return std::find(headers.begin(), headers.end(), std::string_view(header, lf)) != headers.end(); + return std::find(headers.begin(), headers.end(), formatLine) != headers.end(); } const auto plyByteswap = [](const auto value) @@ -56,6 +57,61 @@ const auto plyByteswap = [](const auto value) return retval; }; +inline std::string_view plyToStringView(const char* text) +{ + return text ? std::string_view{ text } : std::string_view{}; +} + +struct SPlyAABBAccumulator +{ + bool has = false; + float minX = 0.f; + float minY = 0.f; + float minZ = 0.f; + float maxX = 0.f; + float maxY = 0.f; + float maxZ = 0.f; +}; + +inline void plyAABBExtend(SPlyAABBAccumulator& aabb, const float x, const float y, const float z) +{ + if (!aabb.has) + { + aabb.has = true; + aabb.minX = x; + aabb.minY = y; + aabb.minZ = z; + aabb.maxX = x; + aabb.maxY = y; + aabb.maxZ = z; + return; + } + if (x < aabb.minX) aabb.minX = x; + if (y < aabb.minY) aabb.minY = y; + if (z < aabb.minZ) aabb.minZ = z; + if (x > aabb.maxX) aabb.maxX = x; + if (y > aabb.maxY) aabb.maxY = y; + if (z > aabb.maxZ) aabb.maxZ = z; +} + +inline void plySetGeometryAABB(ICPUPolygonGeometry* geometry, const SPlyAABBAccumulator& aabb) +{ + if (!geometry || !aabb.has) + return; + geometry->visitAABB([&aabb](auto& ref)->void + { + ref = std::remove_reference_t::create(); + ref.minVx.x = aabb.minX; + ref.minVx.y = aabb.minY; + ref.minVx.z = aabb.minZ; + ref.minVx.w = 0.0; + ref.maxVx.x = aabb.maxX; + ref.maxVx.y = aabb.maxY; + ref.maxVx.z = aabb.maxZ; + ref.maxVx.w = 0.0; + }); +} + class CPLYMappedFileMemoryResource final : public core::refctd_memory_resource { public: @@ -77,21 +133,16 @@ class CPLYMappedFileMemoryResource final : public core::refctd_memory_resource core::smart_refctd_ptr m_file; }; -IGeometry::SDataView plyCreateMappedF32x3View(system::IFile* file, void* ptr, const size_t byteCount) +inline IGeometry::SDataView plyCreateDataView(core::smart_refctd_ptr&& buffer, const size_t byteCount, const uint32_t stride, const E_FORMAT format) { - if (!file || !ptr || byteCount == 0ull) + if (!buffer || byteCount == 0ull) return {}; - auto keepAliveResource = core::make_smart_refctd_ptr(core::smart_refctd_ptr(file)); - auto buffer = ICPUBuffer::create({ { byteCount }, ptr, core::smart_refctd_ptr(keepAliveResource), alignof(float) }, core::adopt_memory); - if (!buffer) - return {}; - - IGeometry::SDataView view = { + return { .composed = { - .stride = sizeof(float) * 3ull, - .format = EF_R32G32B32_SFLOAT, - .rangeFormat = IGeometryBase::getMatchingAABBFormat(EF_R32G32B32_SFLOAT) + .stride = stride, + .format = format, + .rangeFormat = IGeometryBase::getMatchingAABBFormat(format) }, .src = { .offset = 0ull, @@ -99,68 +150,30 @@ IGeometry::SDataView plyCreateMappedF32x3View(system::IFile* file, v .buffer = std::move(buffer) } }; - return view; } -IGeometry::SDataView plyCreateAdoptedU32IndexView(core::vector&& indices) +template +IGeometry::SDataView plyCreateAdoptedView(core::vector&& data) { - if (indices.empty()) + if (data.empty()) return {}; - auto backer = core::make_smart_refctd_ptr>>(std::move(indices)); + auto backer = core::make_smart_refctd_ptr>>(std::move(data)); auto& storage = backer->getBacker(); + const size_t byteCount = storage.size() * sizeof(ValueType); auto* const ptr = storage.data(); - const size_t byteCount = storage.size() * sizeof(uint32_t); - auto buffer = ICPUBuffer::create({ { byteCount }, ptr, core::smart_refctd_ptr(std::move(backer)), alignof(uint32_t) }, core::adopt_memory); - if (!buffer) - return {}; - - IGeometry::SDataView view = { - .composed = { - .stride = sizeof(uint32_t), - .format = EF_R32_UINT, - .rangeFormat = IGeometryBase::getMatchingAABBFormat(EF_R32_UINT) - }, - .src = { - .offset = 0u, - .size = byteCount, - .buffer = std::move(buffer) - } - }; - return view; + auto buffer = ICPUBuffer::create({ { byteCount }, ptr, core::smart_refctd_ptr(std::move(backer)), alignof(ValueType) }, core::adopt_memory); + return plyCreateDataView(std::move(buffer), byteCount, static_cast(sizeof(ValueType)), Format); } -IGeometry::SDataView plyCreateAdoptedU16IndexView(core::vector&& indices) +IGeometry::SDataView plyCreateMappedF32x3View(system::IFile* file, void* ptr, const size_t byteCount) { - if (indices.empty()) - return {}; - - auto backer = core::make_smart_refctd_ptr>>(std::move(indices)); - auto& storage = backer->getBacker(); - auto* const ptr = storage.data(); - const size_t byteCount = storage.size() * sizeof(uint16_t); - auto buffer = ICPUBuffer::create({ { byteCount }, ptr, core::smart_refctd_ptr(std::move(backer)), alignof(uint16_t) }, core::adopt_memory); - if (!buffer) + if (!file || !ptr || byteCount == 0ull) return {}; - IGeometry::SDataView view = { - .composed = { - .stride = sizeof(uint16_t), - .format = EF_R16_UINT, - .rangeFormat = IGeometryBase::getMatchingAABBFormat(EF_R16_UINT) - }, - .src = { - .offset = 0u, - .size = byteCount, - .buffer = std::move(buffer) - } - }; - return view; -} - -void plyRecomputeContentHashesParallel(ICPUPolygonGeometry* geometry, const SFileIOPolicy& ioPolicy) -{ - recomputeGeometryContentHashesParallel(geometry, ioPolicy); + auto keepAliveResource = core::make_smart_refctd_ptr(core::smart_refctd_ptr(file)); + auto buffer = ICPUBuffer::create({ { byteCount }, ptr, core::smart_refctd_ptr(keepAliveResource), alignof(float) }, core::adopt_memory); + return plyCreateDataView(std::move(buffer), byteCount, static_cast(sizeof(float) * 3ull), EF_R32G32B32_SFLOAT); } struct SContext @@ -172,24 +185,38 @@ struct SContext { static E_FORMAT getType(const char* typeString) { - if (strcmp(typeString, "char")==0 || strcmp(typeString, "int8")==0) - return EF_R8_SINT; - else if (strcmp(typeString, "uchar")==0 || strcmp(typeString, "uint8")==0) - return EF_R8_UINT; - else if (strcmp(typeString, "short")==0 || strcmp(typeString, "int16")==0) - return EF_R16_SINT; - else if (strcmp(typeString, "ushort")==0 || strcmp(typeString, "uint16")==0) - return EF_R16_UINT; - else if (strcmp(typeString, "long")==0 || strcmp(typeString, "int")==0 || strcmp(typeString, "int32")==0) - return EF_R32_SINT; - else if (strcmp(typeString, "ulong")==0 || strcmp(typeString, "uint")==0 || strcmp(typeString, "uint32")==0) - return EF_R32_UINT; - else if (strcmp(typeString, "float")==0 || strcmp(typeString, "float32")==0) - return EF_R32_SFLOAT; - else if (strcmp(typeString, "double")==0 || strcmp(typeString, "float64")==0) + struct STypeAlias + { + std::string_view name; + E_FORMAT format; + }; + constexpr std::array typeAliases = {{ + { "char", EF_R8_SINT }, + { "int8", EF_R8_SINT }, + { "uchar", EF_R8_UINT }, + { "uint8", EF_R8_UINT }, + { "short", EF_R16_SINT }, + { "int16", EF_R16_SINT }, + { "ushort", EF_R16_UINT }, + { "uint16", EF_R16_UINT }, + { "long", EF_R32_SINT }, + { "int", EF_R32_SINT }, + { "int32", EF_R32_SINT }, + { "ulong", EF_R32_UINT }, + { "uint", EF_R32_UINT }, + { "uint32", EF_R32_UINT }, + { "float", EF_R32_SFLOAT }, + { "float32", EF_R32_SFLOAT } + }}; + const std::string_view typeName = plyToStringView(typeString); + for (const auto& alias : typeAliases) + { + if (alias.name == typeName) + return alias.format; + } + if (typeName == "double" || typeName == "float64") return EF_R64_SFLOAT; - else - return EF_UNKNOWN; + return EF_UNKNOWN; } bool isList() const {return type==EF_UNKNOWN && asset::isIntegerFormat(list.countType) && asset::isIntegerFormat(list.itemType);} @@ -492,7 +519,7 @@ struct SContext const char* word = getNextWord(); if (!word) return 0u; - const size_t tokenLen = WordLength >= 0 ? static_cast(WordLength + 1) : std::strlen(word); + const size_t tokenLen = WordLength >= 0 ? static_cast(WordLength + 1) : std::char_traits::length(word); const char* const wordEnd = word + tokenLen; if (word == wordEnd) return 0u; @@ -503,11 +530,8 @@ struct SContext const auto parseResult = std::from_chars(word, wordEnd, value, 10); if (parseResult.ec == std::errc() && parseResult.ptr == wordEnd) return static_cast(value); - - char* fallbackEnd = nullptr; - const auto fallback = std::strtoll(word, &fallbackEnd, 10); - if (fallbackEnd && fallbackEnd != word) - return static_cast(fallback); + if (parseResult.ptr != word) + return static_cast(value); return 0u; } else @@ -516,11 +540,8 @@ struct SContext const auto parseResult = std::from_chars(word, wordEnd, value, 10); if (parseResult.ec == std::errc() && parseResult.ptr == wordEnd) return static_cast(value); - - char* fallbackEnd = nullptr; - const auto fallback = std::strtoull(word, &fallbackEnd, 10); - if (fallbackEnd && fallbackEnd != word) - return static_cast(fallback); + if (parseResult.ptr != word) + return static_cast(value); return 0u; } } @@ -562,7 +583,7 @@ struct SContext const char* word = getNextWord(); if (!word) return 0.0; - const size_t tokenLen = WordLength >= 0 ? static_cast(WordLength + 1) : std::strlen(word); + const size_t tokenLen = WordLength >= 0 ? static_cast(WordLength + 1) : std::char_traits::length(word); const char* const wordEnd = word + tokenLen; if (word == wordEnd) return 0.0; @@ -571,11 +592,8 @@ struct SContext const auto parseResult = fast_float::from_chars(word, wordEnd, value); if (parseResult.ec == std::errc() && parseResult.ptr == wordEnd) return value; - - char* fallbackEnd = nullptr; - const auto fallback = std::strtod(word, &fallbackEnd); - if (fallbackEnd && fallbackEnd != word) - return fallback; + if (parseResult.ptr != word) + return value; return 0.0; } // read the next thing from the file and move the start pointer along @@ -606,9 +624,9 @@ struct SContext Success, Error }; - EFastVertexReadResult readVertexElementFast(const SElement& el) + EFastVertexReadResult readVertexElementFast(const SElement& el, SPlyAABBAccumulator* parsedAABB) { - if (!IsBinaryFile || IsWrongEndian || el.Name != "vertex") + if (!IsBinaryFile || el.Name != "vertex") return EFastVertexReadResult::NotApplicable; enum class ELayoutKind : uint8_t @@ -728,6 +746,19 @@ struct SContext if (srcBytesPerVertex == 0ull || el.Count > (std::numeric_limits::max() / srcBytesPerVertex)) return EFastVertexReadResult::Error; + const bool trackAABB = parsedAABB != nullptr; + const bool needsByteSwap = IsWrongEndian; + auto decodeF32 = [needsByteSwap](const uint8_t* src)->float + { + uint32_t bits = 0u; + std::memcpy(&bits, src, sizeof(bits)); + if (needsByteSwap) + bits = plyByteswap(bits); + float value = 0.f; + std::memcpy(&value, &bits, sizeof(value)); + return value; + }; + size_t remainingVertices = el.Count; while (remainingVertices > 0ull) { @@ -743,7 +774,7 @@ struct SContext { case ELayoutKind::XYZ: { - if (posStride == 3ull * floatBytes) + if (posStride == 3ull * floatBytes && !needsByteSwap && !trackAABB) { const size_t batchBytes = batchVertices * 3ull * floatBytes; std::memcpy(posBase, src, batchBytes); @@ -754,7 +785,14 @@ struct SContext { for (size_t v = 0ull; v < batchVertices; ++v) { - std::memcpy(posBase, src, 3ull * floatBytes); + const float x = decodeF32(src + 0ull * floatBytes); + const float y = decodeF32(src + 1ull * floatBytes); + const float z = decodeF32(src + 2ull * floatBytes); + reinterpret_cast(posBase)[0] = x; + reinterpret_cast(posBase)[1] = y; + reinterpret_cast(posBase)[2] = z; + if (trackAABB) + plyAABBExtend(*parsedAABB, x, y, z); src += 3ull * floatBytes; posBase += posStride; } @@ -765,10 +803,19 @@ struct SContext { for (size_t v = 0ull; v < batchVertices; ++v) { - std::memcpy(posBase, src, 3ull * floatBytes); + const float x = decodeF32(src + 0ull * floatBytes); + const float y = decodeF32(src + 1ull * floatBytes); + const float z = decodeF32(src + 2ull * floatBytes); + reinterpret_cast(posBase)[0] = x; + reinterpret_cast(posBase)[1] = y; + reinterpret_cast(posBase)[2] = z; + if (trackAABB) + plyAABBExtend(*parsedAABB, x, y, z); src += 3ull * floatBytes; posBase += posStride; - std::memcpy(normalBase, src, 3ull * floatBytes); + reinterpret_cast(normalBase)[0] = decodeF32(src + 0ull * floatBytes); + reinterpret_cast(normalBase)[1] = decodeF32(src + 1ull * floatBytes); + reinterpret_cast(normalBase)[2] = decodeF32(src + 2ull * floatBytes); src += 3ull * floatBytes; normalBase += normalStride; } @@ -778,13 +825,23 @@ struct SContext { for (size_t v = 0ull; v < batchVertices; ++v) { - std::memcpy(posBase, src, 3ull * floatBytes); + const float x = decodeF32(src + 0ull * floatBytes); + const float y = decodeF32(src + 1ull * floatBytes); + const float z = decodeF32(src + 2ull * floatBytes); + reinterpret_cast(posBase)[0] = x; + reinterpret_cast(posBase)[1] = y; + reinterpret_cast(posBase)[2] = z; + if (trackAABB) + plyAABBExtend(*parsedAABB, x, y, z); src += 3ull * floatBytes; posBase += posStride; - std::memcpy(normalBase, src, 3ull * floatBytes); + reinterpret_cast(normalBase)[0] = decodeF32(src + 0ull * floatBytes); + reinterpret_cast(normalBase)[1] = decodeF32(src + 1ull * floatBytes); + reinterpret_cast(normalBase)[2] = decodeF32(src + 2ull * floatBytes); src += 3ull * floatBytes; normalBase += normalStride; - std::memcpy(uvBase, src, 2ull * floatBytes); + reinterpret_cast(uvBase)[0] = decodeF32(src + 0ull * floatBytes); + reinterpret_cast(uvBase)[1] = decodeF32(src + 1ull * floatBytes); src += 2ull * floatBytes; uvBase += uvStride; } @@ -1002,7 +1059,7 @@ struct SContext core::blake3_hash_t& outIndexHash, double& outIndexHashMs) { - if (!IsBinaryFile || IsWrongEndian) + if (!IsBinaryFile) return EFastFaceReadResult::NotApplicable; if (element.Properties.size() != 1u) return EFastFaceReadResult::NotApplicable; @@ -1022,6 +1079,7 @@ struct SContext return EFastFaceReadResult::NotApplicable; const bool is32Bit = isSrcU32 || isSrcS32; + const bool needEndianSwap = IsWrongEndian; const size_t indexSize = is32Bit ? sizeof(uint32_t) : sizeof(uint16_t); const bool hasVertexCount = vertexCount != 0u; const bool trackMaxIndex = !hasVertexCount; @@ -1042,6 +1100,22 @@ struct SContext _outIndices.resize(oldSize + triIndices); uint32_t* out = _outIndices.data() + oldSize; const uint8_t* ptr = reinterpret_cast(StartPointer); + auto readU32 = [needEndianSwap](const uint8_t* src)->uint32_t + { + uint32_t value = 0u; + std::memcpy(&value, src, sizeof(value)); + if (needEndianSwap) + value = plyByteswap(value); + return value; + }; + auto readU16 = [needEndianSwap](const uint8_t* src)->uint16_t + { + uint16_t value = 0u; + std::memcpy(&value, src, sizeof(value)); + if (needEndianSwap) + value = plyByteswap(value); + return value; + }; bool fallbackToGeneric = false; if (is32Bit) { @@ -1119,10 +1193,12 @@ struct SContext break; } ++in; - std::memcpy(outLocal, in, 3ull * sizeof(uint32_t)); - const uint32_t i0 = outLocal[0]; - const uint32_t i1 = outLocal[1]; - const uint32_t i2 = outLocal[2]; + const uint32_t i0 = readU32(in + 0ull * sizeof(uint32_t)); + const uint32_t i1 = readU32(in + 1ull * sizeof(uint32_t)); + const uint32_t i2 = readU32(in + 2ull * sizeof(uint32_t)); + outLocal[0] = i0; + outLocal[1] = i1; + outLocal[2] = i2; const uint32_t triOr = (i0 | i1 | i2); if (isSrcS32 && (triOr & 0x80000000u)) { @@ -1215,7 +1291,9 @@ struct SContext fallbackToGeneric = true; break; } - std::memcpy(out, ptr, 3ull * sizeof(uint32_t)); + out[0] = readU32(ptr + 0ull * sizeof(uint32_t)); + out[1] = readU32(ptr + 1ull * sizeof(uint32_t)); + out[2] = readU32(ptr + 2ull * sizeof(uint32_t)); ptr += 3ull * sizeof(uint32_t); if (out[0] > _maxIndex) _maxIndex = out[0]; if (out[1] > _maxIndex) _maxIndex = out[1]; @@ -1233,7 +1311,9 @@ struct SContext fallbackToGeneric = true; break; } - std::memcpy(out, ptr, 3ull * sizeof(uint32_t)); + out[0] = readU32(ptr + 0ull * sizeof(uint32_t)); + out[1] = readU32(ptr + 1ull * sizeof(uint32_t)); + out[2] = readU32(ptr + 2ull * sizeof(uint32_t)); ptr += 3ull * sizeof(uint32_t); if (out[0] >= vertexCount || out[1] >= vertexCount || out[2] >= vertexCount) return EFastFaceReadResult::Error; @@ -1251,7 +1331,9 @@ struct SContext fallbackToGeneric = true; break; } - std::memcpy(out, ptr, 3ull * sizeof(uint32_t)); + out[0] = readU32(ptr + 0ull * sizeof(uint32_t)); + out[1] = readU32(ptr + 1ull * sizeof(uint32_t)); + out[2] = readU32(ptr + 2ull * sizeof(uint32_t)); ptr += 3ull * sizeof(uint32_t); if ((out[0] | out[1] | out[2]) & 0x80000000u) return EFastFaceReadResult::Error; @@ -1271,7 +1353,9 @@ struct SContext fallbackToGeneric = true; break; } - std::memcpy(out, ptr, 3ull * sizeof(uint32_t)); + out[0] = readU32(ptr + 0ull * sizeof(uint32_t)); + out[1] = readU32(ptr + 1ull * sizeof(uint32_t)); + out[2] = readU32(ptr + 2ull * sizeof(uint32_t)); ptr += 3ull * sizeof(uint32_t); const uint32_t triOr = (out[0] | out[1] | out[2]); if (triOr & 0x80000000u) @@ -1296,12 +1380,10 @@ struct SContext fallbackToGeneric = true; break; } - uint16_t tri[3] = {}; - std::memcpy(tri, ptr, sizeof(tri)); - ptr += sizeof(tri); - out[0] = tri[0]; - out[1] = tri[1]; - out[2] = tri[2]; + out[0] = readU16(ptr + 0ull * sizeof(uint16_t)); + out[1] = readU16(ptr + 1ull * sizeof(uint16_t)); + out[2] = readU16(ptr + 2ull * sizeof(uint16_t)); + ptr += 3ull * sizeof(uint16_t); if (out[0] > _maxIndex) _maxIndex = out[0]; if (out[1] > _maxIndex) _maxIndex = out[1]; if (out[2] > _maxIndex) _maxIndex = out[2]; @@ -1318,12 +1400,10 @@ struct SContext fallbackToGeneric = true; break; } - uint16_t tri[3] = {}; - std::memcpy(tri, ptr, sizeof(tri)); - ptr += sizeof(tri); - out[0] = tri[0]; - out[1] = tri[1]; - out[2] = tri[2]; + out[0] = readU16(ptr + 0ull * sizeof(uint16_t)); + out[1] = readU16(ptr + 1ull * sizeof(uint16_t)); + out[2] = readU16(ptr + 2ull * sizeof(uint16_t)); + ptr += 3ull * sizeof(uint16_t); if (out[0] >= vertexCount || out[1] >= vertexCount || out[2] >= vertexCount) return EFastFaceReadResult::Error; out += 3; @@ -1340,14 +1420,15 @@ struct SContext fallbackToGeneric = true; break; } - int16_t tri[3] = {}; - std::memcpy(tri, ptr, sizeof(tri)); - ptr += sizeof(tri); - if ((static_cast(tri[0]) | static_cast(tri[1]) | static_cast(tri[2])) & 0x8000u) + const uint16_t t0 = readU16(ptr + 0ull * sizeof(uint16_t)); + const uint16_t t1 = readU16(ptr + 1ull * sizeof(uint16_t)); + const uint16_t t2 = readU16(ptr + 2ull * sizeof(uint16_t)); + ptr += 3ull * sizeof(uint16_t); + if ((t0 | t1 | t2) & 0x8000u) return EFastFaceReadResult::Error; - out[0] = static_cast(tri[0]); - out[1] = static_cast(tri[1]); - out[2] = static_cast(tri[2]); + out[0] = static_cast(t0); + out[1] = static_cast(t1); + out[2] = static_cast(t2); if (out[0] > _maxIndex) _maxIndex = out[0]; if (out[1] > _maxIndex) _maxIndex = out[1]; if (out[2] > _maxIndex) _maxIndex = out[2]; @@ -1364,14 +1445,15 @@ struct SContext fallbackToGeneric = true; break; } - int16_t tri[3] = {}; - std::memcpy(tri, ptr, sizeof(tri)); - ptr += sizeof(tri); - if ((static_cast(tri[0]) | static_cast(tri[1]) | static_cast(tri[2])) & 0x8000u) + const uint16_t t0 = readU16(ptr + 0ull * sizeof(uint16_t)); + const uint16_t t1 = readU16(ptr + 1ull * sizeof(uint16_t)); + const uint16_t t2 = readU16(ptr + 2ull * sizeof(uint16_t)); + ptr += 3ull * sizeof(uint16_t); + if ((t0 | t1 | t2) & 0x8000u) return EFastFaceReadResult::Error; - out[0] = static_cast(tri[0]); - out[1] = static_cast(tri[1]); - out[2] = static_cast(tri[2]); + out[0] = static_cast(t0); + out[1] = static_cast(t1); + out[2] = static_cast(t2); if (out[0] >= vertexCount || out[1] >= vertexCount || out[2] >= vertexCount) return EFastFaceReadResult::Error; out += 3; @@ -1409,7 +1491,7 @@ struct SContext outCount = static_cast(*StartPointer++); return true; }; - auto readIndex = [&ensureBytes, this, is32Bit, isSrcU32, isSrcU16](uint32_t& out)->bool + auto readIndex = [&ensureBytes, this, is32Bit, isSrcU32, isSrcU16, needEndianSwap](uint32_t& out)->bool { if (is32Bit) { @@ -1418,11 +1500,15 @@ struct SContext if (isSrcU32) { std::memcpy(&out, StartPointer, sizeof(uint32_t)); + if (needEndianSwap) + out = plyByteswap(out); } else { int32_t v = 0; std::memcpy(&v, StartPointer, sizeof(v)); + if (needEndianSwap) + v = plyByteswap(v); if (v < 0) return false; out = static_cast(v); @@ -1437,12 +1523,16 @@ struct SContext { uint16_t v = 0u; std::memcpy(&v, StartPointer, sizeof(uint16_t)); + if (needEndianSwap) + v = plyByteswap(v); out = v; } else { int16_t v = 0; std::memcpy(&v, StartPointer, sizeof(int16_t)); + if (needEndianSwap) + v = plyByteswap(v); if (v < 0) return false; out = static_cast(v); @@ -1587,6 +1677,7 @@ SAssetBundle CPLYMeshFileLoader::loadAsset(system::IFile* _file, const IAssetLoa // start with empty mesh auto geometry = make_smart_refctd_ptr(); + SPlyAABBAccumulator parsedAABB = {}; uint32_t vertCount=0; core::vector> hashedBuffers; std::jthread deferredPositionHashThread; @@ -1643,7 +1734,7 @@ SAssetBundle CPLYMeshFileLoader::loadAsset(system::IFile* _file, const IAssetLoa }; // Currently only supports ASCII or binary meshes - if (strcmp(ctx.getNextLine(),"ply")) + if (plyToStringView(ctx.getNextLine()) != "ply") { _params.logger.log("Not a valid PLY file %s", system::ILogger::ELL_ERROR,ctx.inner.mainFile->getFileName().string().c_str()); return {}; @@ -1654,7 +1745,7 @@ SAssetBundle CPLYMeshFileLoader::loadAsset(system::IFile* _file, const IAssetLoa // grab the word from this line const char* word = ctx.getNextWord(); // ignore comments - for (; strcmp(word,"comment")==0; ctx.getNextLine()) + for (; plyToStringView(word) == "comment"; ctx.getNextLine()) word = ctx.getNextWord(); bool readingHeader = true; @@ -1665,7 +1756,8 @@ SAssetBundle CPLYMeshFileLoader::loadAsset(system::IFile* _file, const IAssetLoa do { - if (strcmp(word,"property") == 0) + const std::string_view wordView = plyToStringView(word); + if (wordView == "property") { word = ctx.getNextWord(); @@ -1715,46 +1807,46 @@ SAssetBundle CPLYMeshFileLoader::loadAsset(system::IFile* _file, const IAssetLoa prop.Name = ctx.getNextWord(); } } - else if (strcmp(word,"element")==0) + else if (wordView == "element") { auto& el = ctx.ElementList.emplace_back(); el.Name = ctx.getNextWord(); const char* const countWord = ctx.getNextWord(); uint64_t parsedCount = 0ull; - if (countWord) + const std::string_view countWordView = plyToStringView(countWord); + if (!countWordView.empty()) { - const char* const countWordEnd = countWord + std::strlen(countWord); - const auto parseResult = std::from_chars(countWord, countWordEnd, parsedCount, 10); + const char* const countWordBegin = countWordView.data(); + const char* const countWordEnd = countWordBegin + countWordView.size(); + const auto parseResult = std::from_chars(countWordBegin, countWordEnd, parsedCount, 10); if (!(parseResult.ec == std::errc() && parseResult.ptr == countWordEnd)) - { - char* fallbackEnd = nullptr; - parsedCount = std::strtoull(countWord, &fallbackEnd, 10); - } + parsedCount = 0ull; } el.Count = static_cast(parsedCount); el.KnownSize = 0; if (el.Name=="vertex") vertCount = el.Count; } - else if (strcmp(word,"comment")==0) + else if (wordView == "comment") { // ignore line } // must be `format {binary_little_endian|binary_big_endian|ascii} 1.0` - else if (strcmp(word,"format") == 0) + else if (wordView == "format") { word = ctx.getNextWord(); + const std::string_view formatView = plyToStringView(word); - if (strcmp(word, "binary_little_endian") == 0) + if (formatView == "binary_little_endian") { ctx.IsBinaryFile = true; } - else if (strcmp(word, "binary_big_endian") == 0) + else if (formatView == "binary_big_endian") { ctx.IsBinaryFile = true; ctx.IsWrongEndian = true; } - else if (strcmp(word, "ascii")==0) + else if (formatView == "ascii") { } else @@ -1767,13 +1859,13 @@ SAssetBundle CPLYMeshFileLoader::loadAsset(system::IFile* _file, const IAssetLoa if (continueReading) { word = ctx.getNextWord(); - if (strcmp(word, "1.0")) + if (plyToStringView(word) != "1.0") { _params.logger.log("Unsupported PLY mesh version %s",system::ILogger::ELL_WARNING,word); } } } - else if (strcmp(word,"end_header")==0) + else if (wordView == "end_header") { readingHeader = false; if (ctx.IsBinaryFile) @@ -1850,6 +1942,9 @@ SAssetBundle CPLYMeshFileLoader::loadAsset(system::IFile* _file, const IAssetLoa if (!mappedPosView) return {}; geometry->setPositionView(std::move(mappedPosView)); + const auto* xyz = reinterpret_cast(ctx.StartPointer); + for (size_t v = 0ull; v < el.Count; ++v) + plyAABBExtend(parsedAABB, xyz[v * 3ull + 0ull], xyz[v * 3ull + 1ull], xyz[v * 3ull + 2ull]); hashViewBufferIfNeeded(geometry->getPositionView()); tryLaunchDeferredHash(geometry->getPositionView(), deferredPositionHashThread); ctx.StartPointer += mappedBytes; @@ -2089,7 +2184,7 @@ SAssetBundle CPLYMeshFileLoader::loadAsset(system::IFile* _file, const IAssetLoa geometry->getAuxAttributeViews()->push_back(std::move(view)); // loop through vertex properties const auto vertexStart = clock_t::now(); - const auto fastVertexResult = ctx.readVertexElementFast(el); + const auto fastVertexResult = ctx.readVertexElementFast(el, &parsedAABB); if (fastVertexResult == SContext::EFastVertexReadResult::Success) { ++fastVertexElementCount; @@ -2171,7 +2266,10 @@ SAssetBundle CPLYMeshFileLoader::loadAsset(system::IFile* _file, const IAssetLoa } const auto aabbStart = clock_t::now(); - CPolygonGeometryManipulator::recomputeAABB(geometry.get()); + if (parsedAABB.has) + plySetGeometryAABB(geometry.get(), parsedAABB); + else + CPolygonGeometryManipulator::recomputeAABB(geometry.get()); aabbMs = std::chrono::duration(clock_t::now() - aabbStart).count(); const uint64_t indexCount = static_cast(indices.size()); @@ -2196,7 +2294,7 @@ SAssetBundle CPLYMeshFileLoader::loadAsset(system::IFile* _file, const IAssetLoa core::vector indices16(indices.size()); for (size_t i = 0u; i < indices.size(); ++i) indices16[i] = static_cast(indices[i]); - auto view = plyCreateAdoptedU16IndexView(std::move(indices16)); + auto view = plyCreateAdoptedView(std::move(indices16)); if (!view) return {}; geometry->setIndexView(std::move(view)); @@ -2204,7 +2302,7 @@ SAssetBundle CPLYMeshFileLoader::loadAsset(system::IFile* _file, const IAssetLoa } else { - auto view = plyCreateAdoptedU32IndexView(std::move(indices)); + auto view = plyCreateAdoptedView(std::move(indices)); if (!view) return {}; if (precomputedIndexHash != IPreHashed::INVALID_HASH) @@ -2220,7 +2318,7 @@ SAssetBundle CPLYMeshFileLoader::loadAsset(system::IFile* _file, const IAssetLoa if (deferredPositionHashThread.joinable()) deferredPositionHashThread.join(); const auto hashStart = clock_t::now(); - plyRecomputeContentHashesParallel(geometry.get(), _params.ioPolicy); + recomputeGeometryContentHashesParallel(geometry.get(), _params.ioPolicy); hashRangeMs += std::chrono::duration(clock_t::now() - hashStart).count(); } else diff --git a/src/nbl/asset/interchange/CPLYMeshWriter.cpp b/src/nbl/asset/interchange/CPLYMeshWriter.cpp index d76a018a06..ae502ddd65 100644 --- a/src/nbl/asset/interchange/CPLYMeshWriter.cpp +++ b/src/nbl/asset/interchange/CPLYMeshWriter.cpp @@ -57,6 +57,70 @@ bool decodeVec4(const ICPUPolygonGeometry::SDataView& view, const size_t ix, hls return view.decodeElement(ix, out); } +template +inline bool readVec3(const ICPUPolygonGeometry::SDataView& view, const hlsl::float32_t3* tightView, const size_t ix, ScalarType (&out)[3]) +{ + if (tightView) + { + out[0] = static_cast(tightView[ix].x); + out[1] = static_cast(tightView[ix].y); + out[2] = static_cast(tightView[ix].z); + return true; + } + + hlsl::float64_t4 tmp = {}; + if (!decodeVec4(view, ix, tmp)) + return false; + out[0] = static_cast(tmp.x); + out[1] = static_cast(tmp.y); + out[2] = static_cast(tmp.z); + return true; +} + +template +inline bool readVec2(const ICPUPolygonGeometry::SDataView& view, const hlsl::float32_t2* tightView, const size_t ix, ScalarType (&out)[2]) +{ + if (tightView) + { + out[0] = static_cast(tightView[ix].x); + out[1] = static_cast(tightView[ix].y); + return true; + } + + hlsl::float64_t4 tmp = {}; + if (!decodeVec4(view, ix, tmp)) + return false; + out[0] = static_cast(tmp.x); + out[1] = static_cast(tmp.y); + return true; +} + +struct SExtraAuxView +{ + const ICPUPolygonGeometry::SDataView* view = nullptr; + uint32_t components = 0u; + uint32_t auxIndex = 0u; +}; + +template +inline bool emitExtraAuxValues(const core::vector& extraAuxViews, const size_t ix, EmitFn&& emit) +{ + hlsl::float64_t4 tmp = {}; + for (const auto& extra : extraAuxViews) + { + if (!extra.view || !decodeVec4(*extra.view, ix, tmp)) + return false; + const ScalarType values[4] = { + static_cast(tmp.x), + static_cast(tmp.y), + static_cast(tmp.z), + static_cast(tmp.w) + }; + emit(values, extra.components); + } + return true; +} + const hlsl::float32_t3* getTightFloat3View(const ICPUPolygonGeometry::SDataView& view) { if (!view) @@ -113,8 +177,8 @@ void appendVec(std::string& out, const double* values, size_t count, bool flipVe } } -bool writeBinary(const ICPUPolygonGeometry* geom, const ICPUPolygonGeometry::SDataView* uvView, bool writeNormals, size_t vertexCount, const uint32_t* indices, size_t faceCount, uint8_t* dst, bool flipVectors); -bool writeText(const ICPUPolygonGeometry* geom, const ICPUPolygonGeometry::SDataView* uvView, bool writeNormals, size_t vertexCount, const uint32_t* indices, size_t faceCount, std::string& output, bool flipVectors); +bool writeBinary(const ICPUPolygonGeometry* geom, const ICPUPolygonGeometry::SDataView* uvView, const core::vector& extraAuxViews, size_t extraAuxFloatCount, bool writeNormals, size_t vertexCount, const uint32_t* indices, size_t faceCount, uint8_t* dst, bool flipVectors); +bool writeText(const ICPUPolygonGeometry* geom, const ICPUPolygonGeometry::SDataView* uvView, const core::vector& extraAuxViews, bool writeNormals, size_t vertexCount, const uint32_t* indices, size_t faceCount, std::string& output, bool flipVectors); } // namespace ply_writer_detail @@ -163,6 +227,22 @@ bool CPLYMeshWriter::writeAsset(system::IFile* _file, const SAssetWriteParams& _ } } + core::vector extraAuxViews; + size_t extraAuxFloatCount = 0ull; + extraAuxViews.reserve(auxViews.size()); + for (uint32_t auxIx = 0u; auxIx < static_cast(auxViews.size()); ++auxIx) + { + const auto& view = auxViews[auxIx]; + if (!view || (&view == uvView)) + continue; + const uint32_t channels = getFormatChannelCount(view.composed.format); + if (channels == 0u) + continue; + const uint32_t components = std::min(4u, channels); + extraAuxViews.push_back({ &view, components, auxIx }); + extraAuxFloatCount += components; + } + const size_t vertexCount = positionView.getElementCount(); if (vertexCount == 0) return false; @@ -261,6 +341,21 @@ bool CPLYMeshWriter::writeAsset(system::IFile* _file, const SAssetWriteParams& _ header += "property float v\n"; } + for (const auto& extra : extraAuxViews) + { + for (uint32_t component = 0u; component < extra.components; ++component) + { + header += "property float aux"; + header += std::to_string(extra.auxIndex); + if (extra.components > 1u) + { + header += "_"; + header += std::to_string(component); + } + header += "\n"; + } + } + header += "element face "; header += std::to_string(faceCount); header += "\nproperty list uchar uint vertex_indices\n"; @@ -273,14 +368,14 @@ bool CPLYMeshWriter::writeAsset(system::IFile* _file, const SAssetWriteParams& _ size_t outputBytes = 0ull; if (binary) { - const size_t vertexStride = sizeof(float) * (3u + (writeNormals ? 3u : 0u) + (uvView ? 2u : 0u)); + const size_t vertexStride = sizeof(float) * (3u + (writeNormals ? 3u : 0u) + (uvView ? 2u : 0u) + extraAuxFloatCount); const size_t faceStride = sizeof(uint8_t) + sizeof(uint32_t) * 3u; const size_t bodySize = vertexCount * vertexStride + faceCount * faceStride; const auto binaryEncodeStart = clock_t::now(); core::vector body; body.resize(bodySize); - if (!writeBinary(geom, uvView, writeNormals, vertexCount, indices, faceCount, body.data(), flipVectors)) + if (!writeBinary(geom, uvView, extraAuxViews, extraAuxFloatCount, writeNormals, vertexCount, indices, faceCount, body.data(), flipVectors)) return false; encodeMs += std::chrono::duration(clock_t::now() - binaryEncodeStart).count(); @@ -344,7 +439,7 @@ bool CPLYMeshWriter::writeAsset(system::IFile* _file, const SAssetWriteParams& _ const auto textEncodeStart = clock_t::now(); std::string body; body.reserve(vertexCount * ApproxPlyTextBytesPerVertex + faceCount * ApproxPlyTextBytesPerFace); - if (!writeText(geom, uvView, writeNormals, vertexCount, indices, faceCount, body, flipVectors)) + if (!writeText(geom, uvView, extraAuxViews, writeNormals, vertexCount, indices, faceCount, body, flipVectors)) return false; encodeMs += std::chrono::duration(clock_t::now() - textEncodeStart).count(); @@ -405,7 +500,7 @@ bool CPLYMeshWriter::writeAsset(system::IFile* _file, const SAssetWriteParams& _ return writeOk; } -bool ply_writer_detail::writeBinary(const ICPUPolygonGeometry* geom, const ICPUPolygonGeometry::SDataView* uvView, bool writeNormals, size_t vertexCount, const uint32_t* indices, size_t faceCount, uint8_t* dst, bool flipVectors) +bool ply_writer_detail::writeBinary(const ICPUPolygonGeometry* geom, const ICPUPolygonGeometry::SDataView* uvView, const core::vector& extraAuxViews, size_t extraAuxFloatCount, bool writeNormals, size_t vertexCount, const uint32_t* indices, size_t faceCount, uint8_t* dst, bool flipVectors) { if (!dst) return false; @@ -418,8 +513,9 @@ bool ply_writer_detail::writeBinary(const ICPUPolygonGeometry* geom, const ICPUP const hlsl::float32_t3* const tightNormal = writeNormals ? getTightFloat3View(normalView) : nullptr; const bool hasUV = uvView != nullptr; const hlsl::float32_t2* const tightUV = hasUV ? getTightFloat2View(*uvView) : nullptr; + const bool hasExtraAux = extraAuxFloatCount > 0ull; - if (tightPos && (!writeNormals || tightNormal) && (!hasUV || tightUV) && !flipVectors) + if (tightPos && (!writeNormals || tightNormal) && (!hasUV || tightUV) && !hasExtraAux && !flipVectors) { for (size_t i = 0; i < vertexCount; ++i) { @@ -439,24 +535,11 @@ bool ply_writer_detail::writeBinary(const ICPUPolygonGeometry* geom, const ICPUP } else { - hlsl::float64_t4 tmp = {}; for (size_t i = 0; i < vertexCount; ++i) { float pos[3] = {}; - if (tightPos) - { - pos[0] = tightPos[i].x; - pos[1] = tightPos[i].y; - pos[2] = tightPos[i].z; - } - else - { - if (!decodeVec4(positionView, i, tmp)) - return false; - pos[0] = static_cast(tmp.x); - pos[1] = static_cast(tmp.y); - pos[2] = static_cast(tmp.z); - } + if (!readVec3(positionView, tightPos, i, pos)) + return false; if (flipVectors) pos[0] = -pos[0]; std::memcpy(dst, pos, Float3Bytes); @@ -465,20 +548,8 @@ bool ply_writer_detail::writeBinary(const ICPUPolygonGeometry* geom, const ICPUP if (writeNormals) { float normal[3] = {}; - if (tightNormal) - { - normal[0] = tightNormal[i].x; - normal[1] = tightNormal[i].y; - normal[2] = tightNormal[i].z; - } - else - { - if (!decodeVec4(normalView, i, tmp)) - return false; - normal[0] = static_cast(tmp.x); - normal[1] = static_cast(tmp.y); - normal[2] = static_cast(tmp.z); - } + if (!readVec3(normalView, tightNormal, i, normal)) + return false; if (flipVectors) normal[0] = -normal[0]; std::memcpy(dst, normal, Float3Bytes); @@ -487,21 +558,22 @@ bool ply_writer_detail::writeBinary(const ICPUPolygonGeometry* geom, const ICPUP if (hasUV) { - if (tightUV) - { - std::memcpy(dst, tightUV + i, Float2Bytes); - } - else - { - float uv[2] = {}; - if (!decodeVec4(*uvView, i, tmp)) - return false; - uv[0] = static_cast(tmp.x); - uv[1] = static_cast(tmp.y); - std::memcpy(dst, uv, Float2Bytes); - } + float uv[2] = {}; + if (!readVec2(*uvView, tightUV, i, uv)) + return false; + std::memcpy(dst, uv, Float2Bytes); dst += Float2Bytes; } + + if (hasExtraAux) + { + if (!emitExtraAuxValues(extraAuxViews, i, [&](const float* values, const uint32_t components) + { + std::memcpy(dst, values, sizeof(float) * components); + dst += sizeof(float) * components; + })) + return false; + } } } @@ -518,7 +590,7 @@ bool ply_writer_detail::writeBinary(const ICPUPolygonGeometry* geom, const ICPUP return true; } -bool ply_writer_detail::writeText(const ICPUPolygonGeometry* geom, const ICPUPolygonGeometry::SDataView* uvView, bool writeNormals, size_t vertexCount, const uint32_t* indices, size_t faceCount, std::string& output, bool flipVectors) +bool ply_writer_detail::writeText(const ICPUPolygonGeometry* geom, const ICPUPolygonGeometry::SDataView* uvView, const core::vector& extraAuxViews, bool writeNormals, size_t vertexCount, const uint32_t* indices, size_t faceCount, std::string& output, bool flipVectors) { const auto& positionView = geom->getPositionView(); const auto& normalView = geom->getNormalView(); @@ -526,64 +598,35 @@ bool ply_writer_detail::writeText(const ICPUPolygonGeometry* geom, const ICPUPol const hlsl::float32_t3* const tightNormal = writeNormals ? getTightFloat3View(normalView) : nullptr; const hlsl::float32_t2* const tightUV = uvView ? getTightFloat2View(*uvView) : nullptr; - hlsl::float64_t4 tmp = {}; for (size_t i = 0; i < vertexCount; ++i) { double pos[3] = {}; - if (tightPos) - { - pos[0] = tightPos[i].x; - pos[1] = tightPos[i].y; - pos[2] = tightPos[i].z; - } - else - { - if (!decodeVec4(positionView, i, tmp)) - return false; - pos[0] = tmp.x; - pos[1] = tmp.y; - pos[2] = tmp.z; - } + if (!readVec3(positionView, tightPos, i, pos)) + return false; appendVec(output, pos, 3u, flipVectors); if (writeNormals) { double normal[3] = {}; - if (tightNormal) - { - normal[0] = tightNormal[i].x; - normal[1] = tightNormal[i].y; - normal[2] = tightNormal[i].z; - } - else - { - if (!decodeVec4(normalView, i, tmp)) - return false; - normal[0] = tmp.x; - normal[1] = tmp.y; - normal[2] = tmp.z; - } + if (!readVec3(normalView, tightNormal, i, normal)) + return false; appendVec(output, normal, 3u, flipVectors); } if (uvView) { double uv[2] = {}; - if (tightUV) - { - uv[0] = tightUV[i].x; - uv[1] = tightUV[i].y; - } - else - { - if (!decodeVec4(*uvView, i, tmp)) - return false; - uv[0] = tmp.x; - uv[1] = tmp.y; - } + if (!readVec2(*uvView, tightUV, i, uv)) + return false; appendVec(output, uv, 2u, false); } + if (!emitExtraAuxValues(extraAuxViews, i, [&](const double* values, const uint32_t components) + { + appendVec(output, values, components, false); + })) + return false; + output += "\n"; } diff --git a/src/nbl/asset/interchange/CSTLMeshFileLoader.cpp b/src/nbl/asset/interchange/CSTLMeshFileLoader.cpp index 73d1f17c60..2af5fc9c9a 100644 --- a/src/nbl/asset/interchange/CSTLMeshFileLoader.cpp +++ b/src/nbl/asset/interchange/CSTLMeshFileLoader.cpp @@ -202,11 +202,6 @@ ICPUPolygonGeometry::SDataView stlCreateAdoptedFloat3View(core::vector Date: Fri, 13 Feb 2026 13:23:45 +0100 Subject: [PATCH 028/118] Update examples_tests submodule for meshloaders updates --- examples_tests | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/examples_tests b/examples_tests index c9a8735e85..07224bdb44 160000 --- a/examples_tests +++ b/examples_tests @@ -1 +1 @@ -Subproject commit c9a8735e85a20d18b406daa7980eef119cb91bb5 +Subproject commit 07224bdb448fd5be5659fdc79fb06d28de0a1144 From fbd56010e10c7e2d2c27ded2dcd98999e71a838c Mon Sep 17 00:00:00 2001 From: Arkadiusz Lachowicz Date: Fri, 13 Feb 2026 15:18:04 +0100 Subject: [PATCH 029/118] Unify generic AABB helpers and runtime tuning paths --- include/nbl/asset/interchange/SFileIOPolicy.h | 9 + .../asset/interchange/SGeometryAABBCommon.h | 104 ++++++++++++ .../interchange/SGeometryContentHashCommon.h | 7 +- .../asset/interchange/SGeometryWriterCommon.h | 59 +++++++ .../asset/interchange/SInterchangeIOCommon.h | 11 ++ .../asset/interchange/SLoaderRuntimeTuning.h | 46 ++++- .../asset/utils/CPolygonGeometryManipulator.h | 19 ++- .../asset/interchange/COBJMeshFileLoader.cpp | 52 ++---- src/nbl/asset/interchange/COBJMeshWriter.cpp | 63 +------ .../asset/interchange/CPLYMeshFileLoader.cpp | 159 +++--------------- src/nbl/asset/interchange/CPLYMeshWriter.cpp | 66 +------- .../asset/interchange/CSTLMeshFileLoader.cpp | 85 ++-------- src/nbl/asset/interchange/CSTLMeshWriter.cpp | 61 +------ 13 files changed, 296 insertions(+), 445 deletions(-) create mode 100644 include/nbl/asset/interchange/SGeometryAABBCommon.h create mode 100644 include/nbl/asset/interchange/SGeometryWriterCommon.h diff --git a/include/nbl/asset/interchange/SFileIOPolicy.h b/include/nbl/asset/interchange/SFileIOPolicy.h index 8bece21b96..7e41b201b5 100644 --- a/include/nbl/asset/interchange/SFileIOPolicy.h +++ b/include/nbl/asset/interchange/SFileIOPolicy.h @@ -28,10 +28,19 @@ struct SFileIOPolicy float samplingBudgetRatio = 0.05f; float minExpectedGainRatio = 0.03f; uint32_t maxWorkers = 0u; + uint32_t workerHeadroom = 2u; uint32_t samplingMaxCandidates = 4u; uint32_t samplingPasses = 1u; uint64_t samplingMinWorkUnits = 0ull; uint32_t targetChunksPerWorker = 4u; + uint32_t hashTaskTargetChunksPerWorker = 1u; + uint64_t hashInlineThresholdBytes = 1ull << 20; + uint64_t minSampleBytes = 4ull << 10; + uint64_t maxSampleBytes = 128ull << 10; + uint64_t tinyIoPayloadThresholdBytes = 1ull << 20; + uint64_t tinyIoAvgBytesThreshold = 1024ull; + uint64_t tinyIoMinBytesThreshold = 64ull; + uint64_t tinyIoMinCallCount = 1024ull; }; enum class Strategy : uint8_t diff --git a/include/nbl/asset/interchange/SGeometryAABBCommon.h b/include/nbl/asset/interchange/SGeometryAABBCommon.h new file mode 100644 index 0000000000..79a0d64ba5 --- /dev/null +++ b/include/nbl/asset/interchange/SGeometryAABBCommon.h @@ -0,0 +1,104 @@ +// Copyright (C) 2018-2025 - DevSH Graphics Programming Sp. z O.O. +// This file is part of the "Nabla Engine". +// For conditions of distribution and use, see copyright notice in nabla.h +#ifndef _NBL_ASSET_S_GEOMETRY_AABB_COMMON_H_INCLUDED_ +#define _NBL_ASSET_S_GEOMETRY_AABB_COMMON_H_INCLUDED_ + + +#include "nbl/asset/ICPUPolygonGeometry.h" + +#include +#include + + +namespace nbl::asset +{ + +template +struct SAABBAccumulator3 +{ + bool has = false; + std::array min = {}; + std::array max = {}; +}; + +template +inline void extendAABBAccumulator(SAABBAccumulator3& aabb, const Scalar x, const Scalar y, const Scalar z) +{ + if (!aabb.has) + { + aabb.has = true; + aabb.min[0] = x; + aabb.min[1] = y; + aabb.min[2] = z; + aabb.max[0] = x; + aabb.max[1] = y; + aabb.max[2] = z; + return; + } + + if (x < aabb.min[0]) aabb.min[0] = x; + if (y < aabb.min[1]) aabb.min[1] = y; + if (z < aabb.min[2]) aabb.min[2] = z; + if (x > aabb.max[0]) aabb.max[0] = x; + if (y > aabb.max[1]) aabb.max[1] = y; + if (z > aabb.max[2]) aabb.max[2] = z; +} + +template +inline void extendAABBAccumulator(SAABBAccumulator3& aabb, const Point& point) +{ + if constexpr (requires { point.x; point.y; point.z; }) + extendAABBAccumulator(aabb, static_cast(point.x), static_cast(point.y), static_cast(point.z)); + else + extendAABBAccumulator(aabb, static_cast(point[0]), static_cast(point[1]), static_cast(point[2])); +} + +template +inline void assignAABBFromAccumulator(AABB& dst, const SAABBAccumulator3& aabb) +{ + if (!aabb.has) + return; + + dst = std::remove_reference_t::create(); + if constexpr (requires { dst.minVx.x; dst.minVx.y; dst.minVx.z; dst.maxVx.x; dst.maxVx.y; dst.maxVx.z; }) + { + dst.minVx.x = static_cast(aabb.min[0]); + dst.minVx.y = static_cast(aabb.min[1]); + dst.minVx.z = static_cast(aabb.min[2]); + dst.maxVx.x = static_cast(aabb.max[0]); + dst.maxVx.y = static_cast(aabb.max[1]); + dst.maxVx.z = static_cast(aabb.max[2]); + if constexpr (requires { dst.minVx.w; dst.maxVx.w; }) + { + dst.minVx.w = 0; + dst.maxVx.w = 0; + } + } + else + { + dst.minVx[0] = static_cast(aabb.min[0]); + dst.minVx[1] = static_cast(aabb.min[1]); + dst.minVx[2] = static_cast(aabb.min[2]); + dst.maxVx[0] = static_cast(aabb.max[0]); + dst.maxVx[1] = static_cast(aabb.max[1]); + dst.maxVx[2] = static_cast(aabb.max[2]); + } +} + +template +inline void applyAABBToGeometry(ICPUPolygonGeometry* geometry, const SAABBAccumulator3& aabb) +{ + if (!geometry || !aabb.has) + return; + + geometry->visitAABB([&aabb](auto& ref)->void + { + assignAABBFromAccumulator(ref, aabb); + }); +} + +} + + +#endif diff --git a/include/nbl/asset/interchange/SGeometryContentHashCommon.h b/include/nbl/asset/interchange/SGeometryContentHashCommon.h index a2017eed8b..04475eaff8 100644 --- a/include/nbl/asset/interchange/SGeometryContentHashCommon.h +++ b/include/nbl/asset/interchange/SGeometryContentHashCommon.h @@ -84,7 +84,7 @@ inline void recomputeGeometryContentHashesParallel(ICPUPolygonGeometry* geometry if (!ptr) continue; hashSampleData = ptr; - hashSampleBytes = std::min(static_cast(buffer->getSize()), 128ull << 10); + hashSampleBytes = resolveLoaderRuntimeSampleBytes(ioPolicy, static_cast(buffer->getSize())); if (hashSampleBytes > 0ull) break; } @@ -94,8 +94,9 @@ inline void recomputeGeometryContentHashesParallel(ICPUPolygonGeometry* geometry tuningRequest.totalWorkUnits = pending.size(); tuningRequest.minBytesPerWorker = std::max(1ull, loaderRuntimeCeilDiv(totalBytes, static_cast(pending.size()))); tuningRequest.hardwareThreads = static_cast(hw); - tuningRequest.hardMaxWorkers = static_cast(std::min(pending.size(), hw)); - tuningRequest.targetChunksPerWorker = 1u; + const size_t hardMaxWorkers = resolveLoaderHardMaxWorkers(hw, ioPolicy.runtimeTuning.workerHeadroom); + tuningRequest.hardMaxWorkers = static_cast(std::min(pending.size(), hardMaxWorkers)); + tuningRequest.targetChunksPerWorker = ioPolicy.runtimeTuning.hashTaskTargetChunksPerWorker; tuningRequest.sampleData = hashSampleData; tuningRequest.sampleBytes = hashSampleBytes; const auto tuning = tuneLoaderRuntime(ioPolicy, tuningRequest); diff --git a/include/nbl/asset/interchange/SGeometryWriterCommon.h b/include/nbl/asset/interchange/SGeometryWriterCommon.h new file mode 100644 index 0000000000..172b72feb2 --- /dev/null +++ b/include/nbl/asset/interchange/SGeometryWriterCommon.h @@ -0,0 +1,59 @@ +// Copyright (C) 2018-2025 - DevSH Graphics Programming Sp. z O.O. +// This file is part of the "Nabla Engine". +// For conditions of distribution and use, see copyright notice in nabla.h +#ifndef _NBL_ASSET_S_GEOMETRY_WRITER_COMMON_H_INCLUDED_ +#define _NBL_ASSET_S_GEOMETRY_WRITER_COMMON_H_INCLUDED_ + + +#include "nbl/asset/ICPUPolygonGeometry.h" + +#include +#include +#include + + +namespace nbl::asset +{ + +inline const hlsl::float32_t3* getTightFloat3View(const ICPUPolygonGeometry::SDataView& view) +{ + if (!view) + return nullptr; + if (view.composed.format != EF_R32G32B32_SFLOAT) + return nullptr; + if (view.composed.getStride() != sizeof(hlsl::float32_t3)) + return nullptr; + return reinterpret_cast(view.getPointer()); +} + +inline const hlsl::float32_t2* getTightFloat2View(const ICPUPolygonGeometry::SDataView& view) +{ + if (!view) + return nullptr; + if (view.composed.format != EF_R32G32_SFLOAT) + return nullptr; + if (view.composed.getStride() != sizeof(hlsl::float32_t2)) + return nullptr; + return reinterpret_cast(view.getPointer()); +} + +inline char* appendFloatFixed6ToBuffer(char* dst, char* const end, const float value) +{ + if (!dst || dst >= end) + return end; + + const auto result = std::to_chars(dst, end, value, std::chars_format::fixed, 6); + if (result.ec == std::errc()) + return result.ptr; + + const int written = std::snprintf(dst, static_cast(end - dst), "%.6f", static_cast(value)); + if (written <= 0) + return dst; + const size_t writeLen = static_cast(written); + return (writeLen < static_cast(end - dst)) ? (dst + writeLen) : end; +} + +} + + +#endif diff --git a/include/nbl/asset/interchange/SInterchangeIOCommon.h b/include/nbl/asset/interchange/SInterchangeIOCommon.h index 97d9ca84c1..a499ae121b 100644 --- a/include/nbl/asset/interchange/SInterchangeIOCommon.h +++ b/include/nbl/asset/interchange/SInterchangeIOCommon.h @@ -64,6 +64,17 @@ inline bool isTinyIOTelemetryLikely( (minBytes < tinyChunkBytesThreshold && telemetry.callCount > tinyChunkCallsThreshold); } +inline bool isTinyIOTelemetryLikely(const SFileIOTelemetry& telemetry, const uint64_t payloadBytes, const SFileIOPolicy& ioPolicy) +{ + return isTinyIOTelemetryLikely( + telemetry, + payloadBytes, + ioPolicy.runtimeTuning.tinyIoPayloadThresholdBytes, + ioPolicy.runtimeTuning.tinyIoAvgBytesThreshold, + ioPolicy.runtimeTuning.tinyIoMinBytesThreshold, + ioPolicy.runtimeTuning.tinyIoMinCallCount); +} + inline bool readFileExact(system::IFile* file, void* dst, const size_t offset, const size_t bytes, SFileReadTelemetry* ioTelemetry = nullptr) { if (!file || (!dst && bytes != 0ull)) diff --git a/include/nbl/asset/interchange/SLoaderRuntimeTuning.h b/include/nbl/asset/interchange/SLoaderRuntimeTuning.h index 4df853c930..07258ea960 100644 --- a/include/nbl/asset/interchange/SLoaderRuntimeTuning.h +++ b/include/nbl/asset/interchange/SLoaderRuntimeTuning.h @@ -11,9 +11,7 @@ #include #include #include -#include #include -#include #include #include @@ -51,12 +49,43 @@ constexpr uint64_t loaderRuntimeCeilDiv(const uint64_t numerator, const uint64_t return (numerator + denominator - 1ull) / denominator; } +inline uint64_t resolveLoaderRuntimeSampleBytes(const SFileIOPolicy& ioPolicy, const uint64_t knownInputBytes) +{ + if (knownInputBytes == 0ull) + return 0ull; + + const uint64_t minSampleBytes = std::max(1ull, ioPolicy.runtimeTuning.minSampleBytes); + const uint64_t maxSampleBytes = std::max(minSampleBytes, ioPolicy.runtimeTuning.maxSampleBytes); + const uint64_t cappedMin = std::min(minSampleBytes, knownInputBytes); + const uint64_t cappedMax = std::min(maxSampleBytes, knownInputBytes); + const uint64_t adaptive = std::max(knownInputBytes / 64ull, cappedMin); + return std::clamp(adaptive, cappedMin, cappedMax); +} + +inline bool shouldInlineHashBuild(const SFileIOPolicy& ioPolicy, const uint64_t inputBytes) +{ + const uint64_t thresholdBytes = std::max(1ull, ioPolicy.runtimeTuning.hashInlineThresholdBytes); + return inputBytes <= thresholdBytes; +} + inline size_t resolveLoaderHardwareThreads(const uint32_t requested = 0u) { const size_t hw = requested ? static_cast(requested) : static_cast(std::thread::hardware_concurrency()); return hw ? hw : 1ull; } +inline size_t resolveLoaderHardMaxWorkers(const size_t hardwareThreads, const uint32_t workerHeadroom) +{ + const size_t hw = std::max(1ull, hardwareThreads); + const size_t minWorkers = hw >= 2ull ? 2ull : 1ull; + const size_t headroom = static_cast(workerHeadroom); + if (headroom == 0ull) + return hw; + if (hw <= headroom) + return minWorkers; + return std::max(minWorkers, hw - headroom); +} + template inline void loaderRuntimeDispatchWorkers(const size_t workerCount, Fn&& fn) { @@ -65,11 +94,12 @@ inline void loaderRuntimeDispatchWorkers(const size_t workerCount, Fn&& fn) fn(0ull); return; } - auto workerIds = std::views::iota(size_t{0ull}, workerCount); - std::for_each(std::execution::par, workerIds.begin(), workerIds.end(), [&fn](const size_t workerIx) - { - fn(workerIx); - }); + + std::vector workers; + workers.reserve(workerCount - 1ull); + for (size_t workerIx = 1ull; workerIx < workerCount; ++workerIx) + workers.emplace_back([&fn, workerIx]() { fn(workerIx); }); + fn(0ull); } inline uint64_t loaderRuntimeBenchmarkSample(const uint8_t* const sampleData, const uint64_t sampleBytes, const size_t workerCount, const uint32_t passes) @@ -129,7 +159,7 @@ inline SLoaderRuntimeSampleStats loaderRuntimeBenchmarkSampleStats( std::vector samples; samples.reserve(observationCount); - (void)loaderRuntimeBenchmarkSample(sampleData, sampleBytes, workerCount, 1u); + loaderRuntimeBenchmarkSample(sampleData, sampleBytes, workerCount, 1u); for (uint32_t obsIx = 0u; obsIx < observationCount; ++obsIx) { const uint64_t elapsedNs = loaderRuntimeBenchmarkSample(sampleData, sampleBytes, workerCount, passes); diff --git a/include/nbl/asset/utils/CPolygonGeometryManipulator.h b/include/nbl/asset/utils/CPolygonGeometryManipulator.h index 115a9a8720..305f9920a8 100644 --- a/include/nbl/asset/utils/CPolygonGeometryManipulator.h +++ b/include/nbl/asset/utils/CPolygonGeometryManipulator.h @@ -9,6 +9,7 @@ #include "nbl/core/hash/blake.h" #include "nbl/asset/ICPUPolygonGeometry.h" +#include "nbl/asset/interchange/SGeometryAABBCommon.h" #include "nbl/asset/utils/CGeometryManipulator.h" #include "nbl/asset/utils/CSmoothNormalGenerator.h" #include "nbl/asset/utils/COBBGenerator.h" @@ -91,6 +92,15 @@ class NBL_API2 CPolygonGeometryManipulator auto addToAABB = [&](auto& aabb)->void { using aabb_t = std::remove_reference_t; + using point_t = typename aabb_t::point_t; + using component_t = std::remove_cv_t>; + SAABBAccumulator3 parsedAABB = {}; + auto addVertexToAABB = [&](const uint32_t vertex_i)->void + { + point_t pt; + geo->getPositionView().decodeElement(vertex_i, pt); + extendAABBAccumulator(parsedAABB, pt); + }; if (geo->getIndexView()) { for (auto index_i = 0u; index_i != geo->getIndexView().getElementCount(); index_i++) @@ -98,20 +108,17 @@ class NBL_API2 CPolygonGeometryManipulator hlsl::vector vertex_i; geo->getIndexView().decodeElement(index_i, vertex_i); if (isVertexSkinned(geo, vertex_i.x)) continue; - typename aabb_t::point_t pt; - geo->getPositionView().decodeElement(vertex_i.x, pt); - aabb.addPoint(pt); + addVertexToAABB(vertex_i.x); } } else { for (auto vertex_i = 0u; vertex_i != geo->getPositionView().getElementCount(); vertex_i++) { if (isVertexSkinned(geo, vertex_i)) continue; - typename aabb_t::point_t pt; - geo->getPositionView().decodeElement(vertex_i, pt); - aabb.addPoint(pt); + addVertexToAABB(vertex_i); } } + assignAABBFromAccumulator(aabb, parsedAABB); }; IGeometryBase::SDataViewBase tmp = geo->getPositionView().composed; tmp.resetRange(); diff --git a/src/nbl/asset/interchange/COBJMeshFileLoader.cpp b/src/nbl/asset/interchange/COBJMeshFileLoader.cpp index 2070fe4d59..db99ee46d0 100644 --- a/src/nbl/asset/interchange/COBJMeshFileLoader.cpp +++ b/src/nbl/asset/interchange/COBJMeshFileLoader.cpp @@ -6,6 +6,7 @@ #include "nbl/core/declarations.h" #include "nbl/asset/IAssetManager.h" +#include "nbl/asset/interchange/SGeometryAABBCommon.h" #include "nbl/asset/interchange/SGeometryContentHashCommon.h" #include "nbl/asset/interchange/SInterchangeIOCommon.h" #include "nbl/asset/interchange/SLoaderRuntimeTuning.h" @@ -167,24 +168,6 @@ inline bool parseObjFloat(const char*& ptr, const char* const end, float& out) return true; } -void extendAABB(hlsl::shapes::AABB<3, hlsl::float32_t>& aabb, bool& hasAABB, const Float3& p) -{ - if (!hasAABB) - { - aabb.minVx = p; - aabb.maxVx = p; - hasAABB = true; - return; - } - - if (p.x < aabb.minVx.x) aabb.minVx.x = p.x; - if (p.y < aabb.minVx.y) aabb.minVx.y = p.y; - if (p.z < aabb.minVx.z) aabb.minVx.z = p.z; - if (p.x > aabb.maxVx.x) aabb.maxVx.x = p.x; - if (p.y > aabb.maxVx.y) aabb.maxVx.y = p.y; - if (p.z > aabb.maxVx.z) aabb.maxVx.z = p.z; -} - const auto createAdoptedView = [](auto&& data, const E_FORMAT format) -> IGeometry::SDataView { using T = typename std::decay_t::value_type; @@ -730,14 +713,15 @@ asset::SAssetBundle COBJMeshFileLoader::loadAsset(system::IFile* _file, const as uint32_t outIndex = 0u; }; const size_t hw = resolveLoaderHardwareThreads(); + const size_t hardMaxWorkers = resolveLoaderHardMaxWorkers(hw, _params.ioPolicy.runtimeTuning.workerHeadroom); SLoaderRuntimeTuningRequest dedupTuningRequest = {}; dedupTuningRequest.inputBytes = static_cast(filesize); dedupTuningRequest.totalWorkUnits = estimatedOutVertexCount; dedupTuningRequest.hardwareThreads = static_cast(hw); - dedupTuningRequest.hardMaxWorkers = static_cast(hw); - dedupTuningRequest.targetChunksPerWorker = 1u; + dedupTuningRequest.hardMaxWorkers = static_cast(hardMaxWorkers); + dedupTuningRequest.targetChunksPerWorker = _params.ioPolicy.runtimeTuning.targetChunksPerWorker; dedupTuningRequest.sampleData = reinterpret_cast(buf); - dedupTuningRequest.sampleBytes = std::min(static_cast(filesize), 128ull << 10); + dedupTuningRequest.sampleBytes = resolveLoaderRuntimeSampleBytes(_params.ioPolicy, static_cast(filesize)); const auto dedupTuning = tuneLoaderRuntime(_params.ioPolicy, dedupTuningRequest); const size_t dedupHotSeed = std::max( 16ull, @@ -748,8 +732,7 @@ asset::SAssetBundle COBJMeshFileLoader::loadAsset(system::IFile* _file, const as bool hasNormals = false; bool hasUVs = false; - hlsl::shapes::AABB<3, hlsl::float32_t> parsedAABB = hlsl::shapes::AABB<3, hlsl::float32_t>::create(); - bool hasParsedAABB = false; + SAABBAccumulator3 parsedAABB = {}; auto allocateOutVertex = [&](uint32_t& outIx) -> bool { if (outVertexWriteCount >= outPositions.size()) @@ -826,7 +809,7 @@ asset::SAssetBundle COBJMeshFileLoader::loadAsset(system::IFile* _file, const as const auto& srcPos = positions[idx[0]]; outPositions[static_cast(outIx)] = srcPos; - extendAABB(parsedAABB, hasParsedAABB, srcPos); + extendAABBAccumulator(parsedAABB, srcPos); Float2 uv(0.f, 0.f); if (idx[1] >= 0 && static_cast(idx[1]) < uvs.size()) @@ -889,7 +872,7 @@ asset::SAssetBundle COBJMeshFileLoader::loadAsset(system::IFile* _file, const as const auto& srcPos = positions[static_cast(posIx)]; outPositions[static_cast(outIx)] = srcPos; - extendAABB(parsedAABB, hasParsedAABB, srcPos); + extendAABBAccumulator(parsedAABB, srcPos); outUVs[static_cast(outIx)] = uvs[static_cast(uvIx)]; outNormals[static_cast(outIx)] = normals[static_cast(normalIx)]; hotEntry.pos = posIx; @@ -1158,26 +1141,13 @@ asset::SAssetBundle COBJMeshFileLoader::loadAsset(system::IFile* _file, const as recomputeGeometryContentHashesParallel(geometry.get(), _params.ioPolicy); } - if (hasParsedAABB) - { - geometry->visitAABB([&parsedAABB](auto& ref)->void - { - ref = std::remove_reference_t::create(); - ref.minVx.x = parsedAABB.minVx.x; - ref.minVx.y = parsedAABB.minVx.y; - ref.minVx.z = parsedAABB.minVx.z; - ref.minVx.w = 0.0; - ref.maxVx.x = parsedAABB.maxVx.x; - ref.maxVx.y = parsedAABB.maxVx.y; - ref.maxVx.z = parsedAABB.maxVx.z; - ref.maxVx.w = 0.0; - }); - } + if (parsedAABB.has) + applyAABBToGeometry(geometry.get(), parsedAABB); else { CPolygonGeometryManipulator::recomputeAABB(geometry.get()); } - if (isTinyIOTelemetryLikely(ioTelemetry, static_cast(filesize))) + if (isTinyIOTelemetryLikely(ioTelemetry, static_cast(filesize), _params.ioPolicy)) { _params.logger.log( "OBJ loader tiny-io guard: file=%s reads=%llu min=%llu avg=%llu", diff --git a/src/nbl/asset/interchange/COBJMeshWriter.cpp b/src/nbl/asset/interchange/COBJMeshWriter.cpp index 98648a8862..105f174716 100644 --- a/src/nbl/asset/interchange/COBJMeshWriter.cpp +++ b/src/nbl/asset/interchange/COBJMeshWriter.cpp @@ -3,6 +3,7 @@ // For conditions of distribution and use, see copyright notice in nabla.h #include "nbl/asset/interchange/COBJMeshWriter.h" +#include "nbl/asset/interchange/SGeometryWriterCommon.h" #include "nbl/asset/interchange/SInterchangeIOCommon.h" #ifdef _NBL_COMPILE_WITH_OBJ_WRITER_ @@ -11,7 +12,6 @@ #include #include -#include #include #include #include @@ -64,28 +64,6 @@ bool decodeVec4(const ICPUPolygonGeometry::SDataView& view, const size_t ix, hls return view.decodeElement(ix, out); } -const hlsl::float32_t3* getTightFloat3View(const ICPUPolygonGeometry::SDataView& view) -{ - if (!view) - return nullptr; - if (view.composed.format != EF_R32G32B32_SFLOAT) - return nullptr; - if (view.composed.getStride() != sizeof(hlsl::float32_t3)) - return nullptr; - return reinterpret_cast(view.getPointer()); -} - -const hlsl::float32_t2* getTightFloat2View(const ICPUPolygonGeometry::SDataView& view) -{ - if (!view) - return nullptr; - if (view.composed.format != EF_R32G32_SFLOAT) - return nullptr; - if (view.composed.getStride() != sizeof(hlsl::float32_t2)) - return nullptr; - return reinterpret_cast(view.getPointer()); -} - char* appendUIntToBuffer(char* dst, char* const end, const uint32_t value) { if (!dst || dst >= end) @@ -102,22 +80,6 @@ char* appendUIntToBuffer(char* dst, char* const end, const uint32_t value) return (writeLen < static_cast(end - dst)) ? (dst + writeLen) : end; } -char* appendFloatFixed6ToBuffer(char* dst, char* const end, const float value) -{ - if (!dst || dst >= end) - return end; - - const auto result = std::to_chars(dst, end, value, std::chars_format::fixed, 6); - if (result.ec == std::errc()) - return result.ptr; - - const int written = std::snprintf(dst, static_cast(end - dst), "%.6f", static_cast(value)); - if (written <= 0) - return dst; - const size_t writeLen = static_cast(written); - return (writeLen < static_cast(end - dst)) ? (dst + writeLen) : end; -} - void appendVec3Line(std::string& out, const char* prefix, const size_t prefixSize, const float x, const float y, const float z) { const size_t oldSize = out.size(); @@ -230,12 +192,6 @@ void appendIndexTokenToStorage(std::string& storage, core::vector indexData; const uint32_t* indices = nullptr; size_t faceCount = 0; - const auto encodeStart = clock_t::now(); - if (indexView) { const size_t indexCount = indexView.getElementCount(); @@ -343,12 +297,10 @@ bool COBJMeshWriter::writeAsset(system::IFile* _file, const SAssetWriteParams& _ indices = indexData.data(); faceCount = vertexCount / 3u; } - encodeMs = std::chrono::duration(clock_t::now() - encodeStart).count(); const auto flags = _override->getAssetWritingFlags(ctx, geom, 0u); const bool flipHandedness = !(flags & E_WRITER_FLAGS::EWF_MESH_IS_RIGHT_HANDED); std::string output; - const auto formatStart = clock_t::now(); output.reserve(vertexCount * ApproxObjBytesPerVertex + faceCount * ApproxObjBytesPerFace); output.append("# Nabla OBJ\n"); @@ -457,7 +409,6 @@ bool COBJMeshWriter::writeAsset(system::IFile* _file, const SAssetWriteParams& _ appendFaceLine(output, faceIndexStorage, faceIndexRefs, f0, f1, f2); } - formatMs = std::chrono::duration(clock_t::now() - formatStart).count(); const auto ioPlan = resolveFileIOPolicy(_params.ioPolicy, static_cast(output.size()), true); if (!ioPlan.valid) @@ -466,15 +417,10 @@ bool COBJMeshWriter::writeAsset(system::IFile* _file, const SAssetWriteParams& _ return false; } - const auto writeStart = clock_t::now(); const bool writeOk = writeFileWithPolicy(file, ioPlan, reinterpret_cast(output.data()), output.size(), &ioTelemetry); - writeMs = std::chrono::duration(clock_t::now() - writeStart).count(); - - const double totalMs = std::chrono::duration(clock_t::now() - totalStart).count(); - const double miscMs = std::max(0.0, totalMs - (encodeMs + formatMs + writeMs)); const uint64_t ioMinWrite = ioTelemetry.getMinOrZero(); const uint64_t ioAvgWrite = ioTelemetry.getAvgOrZero(); - if (isTinyIOTelemetryLikely(ioTelemetry, static_cast(output.size()))) + if (isTinyIOTelemetryLikely(ioTelemetry, static_cast(output.size()), _params.ioPolicy)) { _params.logger.log( "OBJ writer tiny-io guard: file=%s writes=%llu min=%llu avg=%llu", @@ -498,11 +444,6 @@ bool COBJMeshWriter::writeAsset(system::IFile* _file, const SAssetWriteParams& _ toString(ioPlan.strategy), static_cast(ioPlan.chunkSizeBytes), ioPlan.reason); - (void)totalMs; - (void)encodeMs; - (void)formatMs; - (void)writeMs; - (void)miscMs; return writeOk; } diff --git a/src/nbl/asset/interchange/CPLYMeshFileLoader.cpp b/src/nbl/asset/interchange/CPLYMeshFileLoader.cpp index 03aa6e3fd7..50f95b61cd 100644 --- a/src/nbl/asset/interchange/CPLYMeshFileLoader.cpp +++ b/src/nbl/asset/interchange/CPLYMeshFileLoader.cpp @@ -5,7 +5,9 @@ #ifdef _NBL_COMPILE_WITH_PLY_LOADER_ #include "CPLYMeshFileLoader.h" +#include "nbl/asset/interchange/SGeometryAABBCommon.h" #include "nbl/asset/interchange/SGeometryContentHashCommon.h" +#include "nbl/asset/interchange/SInterchangeIOCommon.h" #include "nbl/asset/interchange/SLoaderRuntimeTuning.h" #include "nbl/asset/IAssetManager.h" #include "nbl/asset/metadata/CPLYMetadata.h" @@ -62,56 +64,6 @@ inline std::string_view plyToStringView(const char* text) return text ? std::string_view{ text } : std::string_view{}; } -struct SPlyAABBAccumulator -{ - bool has = false; - float minX = 0.f; - float minY = 0.f; - float minZ = 0.f; - float maxX = 0.f; - float maxY = 0.f; - float maxZ = 0.f; -}; - -inline void plyAABBExtend(SPlyAABBAccumulator& aabb, const float x, const float y, const float z) -{ - if (!aabb.has) - { - aabb.has = true; - aabb.minX = x; - aabb.minY = y; - aabb.minZ = z; - aabb.maxX = x; - aabb.maxY = y; - aabb.maxZ = z; - return; - } - if (x < aabb.minX) aabb.minX = x; - if (y < aabb.minY) aabb.minY = y; - if (z < aabb.minZ) aabb.minZ = z; - if (x > aabb.maxX) aabb.maxX = x; - if (y > aabb.maxY) aabb.maxY = y; - if (z > aabb.maxZ) aabb.maxZ = z; -} - -inline void plySetGeometryAABB(ICPUPolygonGeometry* geometry, const SPlyAABBAccumulator& aabb) -{ - if (!geometry || !aabb.has) - return; - geometry->visitAABB([&aabb](auto& ref)->void - { - ref = std::remove_reference_t::create(); - ref.minVx.x = aabb.minX; - ref.minVx.y = aabb.minY; - ref.minVx.z = aabb.minZ; - ref.minVx.w = 0.0; - ref.maxVx.x = aabb.maxX; - ref.maxVx.y = aabb.maxY; - ref.maxVx.z = aabb.maxZ; - ref.maxVx.w = 0.0; - }); -} - class CPLYMappedFileMemoryResource final : public core::refctd_memory_resource { public: @@ -624,7 +576,7 @@ struct SContext Success, Error }; - EFastVertexReadResult readVertexElementFast(const SElement& el, SPlyAABBAccumulator* parsedAABB) + EFastVertexReadResult readVertexElementFast(const SElement& el, SAABBAccumulator3* parsedAABB) { if (!IsBinaryFile || el.Name != "vertex") return EFastVertexReadResult::NotApplicable; @@ -792,7 +744,7 @@ struct SContext reinterpret_cast(posBase)[1] = y; reinterpret_cast(posBase)[2] = z; if (trackAABB) - plyAABBExtend(*parsedAABB, x, y, z); + extendAABBAccumulator(*parsedAABB, x, y, z); src += 3ull * floatBytes; posBase += posStride; } @@ -810,7 +762,7 @@ struct SContext reinterpret_cast(posBase)[1] = y; reinterpret_cast(posBase)[2] = z; if (trackAABB) - plyAABBExtend(*parsedAABB, x, y, z); + extendAABBAccumulator(*parsedAABB, x, y, z); src += 3ull * floatBytes; posBase += posStride; reinterpret_cast(normalBase)[0] = decodeF32(src + 0ull * floatBytes); @@ -832,7 +784,7 @@ struct SContext reinterpret_cast(posBase)[1] = y; reinterpret_cast(posBase)[2] = z; if (trackAABB) - plyAABBExtend(*parsedAABB, x, y, z); + extendAABBAccumulator(*parsedAABB, x, y, z); src += 3ull * floatBytes; posBase += posStride; reinterpret_cast(normalBase)[0] = decodeF32(src + 0ull * floatBytes); @@ -1056,8 +1008,7 @@ struct SContext uint64_t& _faceCount, const uint32_t vertexCount, const bool computeIndexHash, - core::blake3_hash_t& outIndexHash, - double& outIndexHashMs) + core::blake3_hash_t& outIndexHash) { if (!IsBinaryFile) return EFastFaceReadResult::NotApplicable; @@ -1120,16 +1071,17 @@ struct SContext if (is32Bit) { const size_t hw = resolveLoaderHardwareThreads(); + const size_t hardMaxWorkers = resolveLoaderHardMaxWorkers(hw, inner.params.ioPolicy.runtimeTuning.workerHeadroom); const size_t recordBytes = sizeof(uint8_t) + 3ull * sizeof(uint32_t); SLoaderRuntimeTuningRequest faceTuningRequest = {}; faceTuningRequest.inputBytes = minBytesNeeded; faceTuningRequest.totalWorkUnits = element.Count; faceTuningRequest.minBytesPerWorker = recordBytes; faceTuningRequest.hardwareThreads = static_cast(hw); - faceTuningRequest.hardMaxWorkers = static_cast(hw); - faceTuningRequest.targetChunksPerWorker = 4u; + faceTuningRequest.hardMaxWorkers = static_cast(hardMaxWorkers); + faceTuningRequest.targetChunksPerWorker = inner.params.ioPolicy.runtimeTuning.targetChunksPerWorker; faceTuningRequest.sampleData = ptr; - faceTuningRequest.sampleBytes = std::min(minBytesNeeded, 128ull << 10); + faceTuningRequest.sampleBytes = resolveLoaderRuntimeSampleBytes(inner.params.ioPolicy, minBytesNeeded); const auto faceTuning = tuneLoaderRuntime(inner.params.ioPolicy, faceTuningRequest); size_t workerCount = std::min(faceTuning.workerCount, element.Count); if (workerCount > 1ull) @@ -1152,7 +1104,6 @@ struct SContext try { core::blake3_hasher hasher; - const auto hashStart = std::chrono::high_resolution_clock::now(); for (size_t workerIx = 0ull; workerIx < workerCount; ++workerIx) { auto ready = std::atomic_ref(workerReady[workerIx]); @@ -1169,7 +1120,6 @@ struct SContext const size_t faceCount = end - begin; hasher.update(out + begin * 3ull, faceCount * 3ull * sizeof(uint32_t)); } - outIndexHashMs += std::chrono::duration(std::chrono::high_resolution_clock::now() - hashStart).count(); parsedIndexHash = static_cast(hasher); } catch (...) @@ -1235,16 +1185,7 @@ struct SContext ready.notify_one(); } }; - const auto runParallelWorkers = [](const size_t localWorkerCount, const auto& fn) -> void - { - if (localWorkerCount <= 1ull) { fn(0ull); return; } - core::vector workers; - workers.reserve(localWorkerCount - 1ull); - for (size_t workerIx = 1ull; workerIx < localWorkerCount; ++workerIx) - workers.emplace_back([&fn, workerIx]() { fn(workerIx); }); - fn(0ull); - }; - runParallelWorkers(workerCount, [&](const size_t workerIx) { const size_t begin = (element.Count * workerIx) / workerCount; const size_t end = (element.Count * (workerIx + 1ull)) / workerCount; parseChunk(workerIx, begin, end); }); + loaderRuntimeDispatchWorkers(workerCount, [&](const size_t workerIx) { const size_t begin = (element.Count * workerIx) / workerCount; const size_t end = (element.Count * (workerIx + 1ull)) / workerCount; parseChunk(workerIx, begin, end); }); if (hashThread.joinable()) hashThread.join(); @@ -1630,18 +1571,6 @@ SAssetBundle CPLYMeshFileLoader::loadAsset(system::IFile* _file, const IAssetLoa if (!_file) return {}; - using clock_t = std::chrono::high_resolution_clock; - double headerMs = 0.0; - double vertexMs = 0.0; - double vertexFastMs = 0.0; - double vertexGenericMs = 0.0; - double faceMs = 0.0; - double skipMs = 0.0; - double layoutNegotiateMs = 0.0; - double viewCreateMs = 0.0; - double hashRangeMs = 0.0; - double indexBuildMs = 0.0; - double aabbMs = 0.0; const bool computeContentHashes = (_params.loaderFlags & IAssetLoader::ELPF_DONT_COMPUTE_CONTENT_HASHES) == 0; uint64_t faceCount = 0u; uint64_t fastFaceElementCount = 0u; @@ -1649,7 +1578,7 @@ SAssetBundle CPLYMeshFileLoader::loadAsset(system::IFile* _file, const IAssetLoa uint32_t maxIndexRead = 0u; core::blake3_hash_t precomputedIndexHash = IPreHashed::INVALID_HASH; const uint64_t fileSize = _file->getSize(); - const bool hashInBuild = computeContentHashes && (fileSize <= (1ull << 20)); + const bool hashInBuild = computeContentHashes && shouldInlineHashBuild(_params.ioPolicy, fileSize); const auto ioPlan = resolveFileIOPolicy(_params.ioPolicy, fileSize, true); if (!ioPlan.valid) { @@ -1677,7 +1606,7 @@ SAssetBundle CPLYMeshFileLoader::loadAsset(system::IFile* _file, const IAssetLoa // start with empty mesh auto geometry = make_smart_refctd_ptr(); - SPlyAABBAccumulator parsedAABB = {}; + SAABBAccumulator3 parsedAABB = {}; uint32_t vertCount=0; core::vector> hashedBuffers; std::jthread deferredPositionHashThread; @@ -1690,9 +1619,7 @@ SAssetBundle CPLYMeshFileLoader::loadAsset(system::IFile* _file, const IAssetLoa if (hashed.get() == buffer) return; } - const auto hashStart = clock_t::now(); buffer->setContentHash(buffer->computeContentHash()); - hashRangeMs += std::chrono::duration(clock_t::now() - hashStart).count(); hashedBuffers.push_back(core::smart_refctd_ptr(buffer)); }; auto tryLaunchDeferredHash = [&](const IGeometry::SDataView& view, std::jthread& deferredThread)->void @@ -1752,7 +1679,6 @@ SAssetBundle CPLYMeshFileLoader::loadAsset(system::IFile* _file, const IAssetLoa bool continueReading = true; ctx.IsBinaryFile = false; ctx.IsWrongEndian= false; - const auto headerStart = clock_t::now(); do { @@ -1896,8 +1822,6 @@ SAssetBundle CPLYMeshFileLoader::loadAsset(system::IFile* _file, const IAssetLoa } } while (readingHeader && continueReading); - headerMs = std::chrono::duration(clock_t::now() - headerStart).count(); - // if (!continueReading) return {}; @@ -1912,7 +1836,7 @@ SAssetBundle CPLYMeshFileLoader::loadAsset(system::IFile* _file, const IAssetLoa for (uint32_t i=0; i https://paulbourke.net/dataformats/ply/ + if (el.Name=="vertex") // multiple vertex elements are currently treated as unsupported { if (verticesProcessed) { @@ -1937,27 +1861,22 @@ SAssetBundle CPLYMeshFileLoader::loadAsset(system::IFile* _file, const IAssetLoa const size_t mappedBytes = el.Count * sizeof(float) * 3ull; if (ctx.StartPointer + mappedBytes > ctx.EndPointer) return {}; - const auto vertexStart = clock_t::now(); auto mappedPosView = plyCreateMappedF32x3View(_file, ctx.StartPointer, mappedBytes); if (!mappedPosView) return {}; geometry->setPositionView(std::move(mappedPosView)); const auto* xyz = reinterpret_cast(ctx.StartPointer); for (size_t v = 0ull; v < el.Count; ++v) - plyAABBExtend(parsedAABB, xyz[v * 3ull + 0ull], xyz[v * 3ull + 1ull], xyz[v * 3ull + 2ull]); + extendAABBAccumulator(parsedAABB, xyz[v * 3ull + 0ull], xyz[v * 3ull + 1ull], xyz[v * 3ull + 2ull]); hashViewBufferIfNeeded(geometry->getPositionView()); tryLaunchDeferredHash(geometry->getPositionView(), deferredPositionHashThread); ctx.StartPointer += mappedBytes; ++fastVertexElementCount; - const double elapsedMs = std::chrono::duration(clock_t::now() - vertexStart).count(); - vertexFastMs += elapsedMs; - vertexMs += elapsedMs; verticesProcessed = true; continue; } ICPUPolygonGeometry::SDataViewBase posView = {}, normalView = {}, uvView = {}; core::vector extraViews; - const auto layoutStart = clock_t::now(); for (auto& vertexProperty : el.Properties) { const auto& propertyName = vertexProperty.Name; @@ -1987,13 +1906,10 @@ SAssetBundle CPLYMeshFileLoader::loadAsset(system::IFile* _file, const IAssetLoa negotiateFormat(uvView,1); else { -// TODO: record the `propertyName` - const auto extraViewStart = clock_t::now(); +// property names for extra channels are currently not persisted in metadata extraViews.push_back(createView(vertexProperty.type,el.Count)); - viewCreateMs += std::chrono::duration(clock_t::now() - extraViewStart).count(); } } - layoutNegotiateMs += std::chrono::duration(clock_t::now() - layoutStart).count(); auto setFinalFormat = [&ctx](ICPUPolygonGeometry::SDataViewBase& view)->void { const auto componentFormat = view.format; @@ -2142,36 +2058,30 @@ SAssetBundle CPLYMeshFileLoader::loadAsset(system::IFile* _file, const IAssetLoa }; if (posView.format!=EF_UNKNOWN) { - const auto viewCreateStart = clock_t::now(); auto beginIx = ctx.vertAttrIts.size(); setFinalFormat(posView); auto view = createView(posView.format,el.Count); for (const auto size=ctx.vertAttrIts.size(); beginIx!=size; beginIx++) ctx.vertAttrIts[beginIx].ptr += ptrdiff_t(view.src.buffer->getPointer())+view.src.offset; geometry->setPositionView(std::move(view)); - viewCreateMs += std::chrono::duration(clock_t::now() - viewCreateStart).count(); } if (normalView.format!=EF_UNKNOWN) { - const auto viewCreateStart = clock_t::now(); auto beginIx = ctx.vertAttrIts.size(); setFinalFormat(normalView); auto view = createView(normalView.format,el.Count); for (const auto size=ctx.vertAttrIts.size(); beginIx!=size; beginIx++) ctx.vertAttrIts[beginIx].ptr += ptrdiff_t(view.src.buffer->getPointer())+view.src.offset; geometry->setNormalView(std::move(view)); - viewCreateMs += std::chrono::duration(clock_t::now() - viewCreateStart).count(); } if (uvView.format!=EF_UNKNOWN) { - const auto viewCreateStart = clock_t::now(); auto beginIx = ctx.vertAttrIts.size(); setFinalFormat(uvView); auto view = createView(uvView.format,el.Count); for (const auto size=ctx.vertAttrIts.size(); beginIx!=size; beginIx++) ctx.vertAttrIts[beginIx].ptr += ptrdiff_t(view.src.buffer->getPointer())+view.src.offset; geometry->getAuxAttributeViews()->push_back(std::move(view)); - viewCreateMs += std::chrono::duration(clock_t::now() - viewCreateStart).count(); } // for (auto& view : extraViews) @@ -2183,21 +2093,14 @@ SAssetBundle CPLYMeshFileLoader::loadAsset(system::IFile* _file, const IAssetLoa for (auto& view : extraViews) geometry->getAuxAttributeViews()->push_back(std::move(view)); // loop through vertex properties - const auto vertexStart = clock_t::now(); const auto fastVertexResult = ctx.readVertexElementFast(el, &parsedAABB); if (fastVertexResult == SContext::EFastVertexReadResult::Success) { ++fastVertexElementCount; - const double elapsedMs = std::chrono::duration(clock_t::now() - vertexStart).count(); - vertexFastMs += elapsedMs; - vertexMs += elapsedMs; } else if (fastVertexResult == SContext::EFastVertexReadResult::NotApplicable) { ctx.readVertex(_params,el); - const double elapsedMs = std::chrono::duration(clock_t::now() - vertexStart).count(); - vertexGenericMs += elapsedMs; - vertexMs += elapsedMs; } else { @@ -2213,7 +2116,6 @@ SAssetBundle CPLYMeshFileLoader::loadAsset(system::IFile* _file, const IAssetLoa } else if (el.Name=="face") { - const auto faceStart = clock_t::now(); const uint32_t vertexCount32 = vertCount <= static_cast(std::numeric_limits::max()) ? static_cast(vertCount) : 0u; const auto fastFaceResult = ctx.readFaceElementFast( el, @@ -2222,8 +2124,7 @@ SAssetBundle CPLYMeshFileLoader::loadAsset(system::IFile* _file, const IAssetLoa faceCount, vertexCount32, computeContentHashes && !hashInBuild, - precomputedIndexHash, - hashRangeMs); + precomputedIndexHash); if (fastFaceResult == SContext::EFastFaceReadResult::Success) { ++fastFaceElementCount; @@ -2243,12 +2144,10 @@ SAssetBundle CPLYMeshFileLoader::loadAsset(system::IFile* _file, const IAssetLoa _params.logger.log("PLY face fast path failed on malformed data for %s", system::ILogger::ELL_ERROR, _file->getFileName().string().c_str()); return {}; } - faceMs += std::chrono::duration(clock_t::now() - faceStart).count(); } else { // skip these elements - const auto skipStart = clock_t::now(); if (ctx.IsBinaryFile && el.KnownSize) { const uint64_t bytesToSkip64 = static_cast(el.KnownSize) * static_cast(el.Count); @@ -2261,19 +2160,15 @@ SAssetBundle CPLYMeshFileLoader::loadAsset(system::IFile* _file, const IAssetLoa for (size_t j=0; j(clock_t::now() - skipStart).count(); } } - const auto aabbStart = clock_t::now(); if (parsedAABB.has) - plySetGeometryAABB(geometry.get(), parsedAABB); + applyAABBToGeometry(geometry.get(), parsedAABB); else CPolygonGeometryManipulator::recomputeAABB(geometry.get()); - aabbMs = std::chrono::duration(clock_t::now() - aabbStart).count(); const uint64_t indexCount = static_cast(indices.size()); - const auto indexStart = clock_t::now(); if (indices.empty()) { // no index buffer means point cloud @@ -2311,15 +2206,12 @@ SAssetBundle CPLYMeshFileLoader::loadAsset(system::IFile* _file, const IAssetLoa hashViewBufferIfNeeded(geometry->getIndexView()); } } - indexBuildMs = std::chrono::duration(clock_t::now() - indexStart).count(); if (computeContentHashes && !hashInBuild) { if (deferredPositionHashThread.joinable()) deferredPositionHashThread.join(); - const auto hashStart = clock_t::now(); recomputeGeometryContentHashesParallel(geometry.get(), _params.ioPolicy); - hashRangeMs += std::chrono::duration(clock_t::now() - hashStart).count(); } else { @@ -2328,13 +2220,12 @@ SAssetBundle CPLYMeshFileLoader::loadAsset(system::IFile* _file, const IAssetLoa const uint64_t ioMinRead = ctx.readCallCount ? ctx.readMinBytes : 0ull; const uint64_t ioAvgRead = ctx.readCallCount ? (ctx.readBytesTotal / ctx.readCallCount) : 0ull; - if ( - fileSize > (1ull << 20) && - ( - ioAvgRead < 1024ull || - (ioMinRead < 64ull && ctx.readCallCount > 1024ull) - ) - ) + const SFileReadTelemetry ioTelemetry = { + .callCount = ctx.readCallCount, + .totalBytes = ctx.readBytesTotal, + .minBytes = ctx.readMinBytes + }; + if (isTinyIOTelemetryLikely(ioTelemetry, fileSize, _params.ioPolicy)) { _params.logger.log( "PLY loader tiny-io guard: file=%s reads=%llu min=%llu avg=%llu", diff --git a/src/nbl/asset/interchange/CPLYMeshWriter.cpp b/src/nbl/asset/interchange/CPLYMeshWriter.cpp index ae502ddd65..e74d24a480 100644 --- a/src/nbl/asset/interchange/CPLYMeshWriter.cpp +++ b/src/nbl/asset/interchange/CPLYMeshWriter.cpp @@ -4,6 +4,7 @@ // See the original file in irrlicht source for authors #include "CPLYMeshWriter.h" +#include "nbl/asset/interchange/SGeometryWriterCommon.h" #include "nbl/asset/interchange/SInterchangeIOCommon.h" #ifdef _NBL_COMPILE_WITH_PLY_WRITER_ @@ -14,7 +15,6 @@ #include #include #include -#include #include #include #include @@ -121,28 +121,6 @@ inline bool emitExtraAuxValues(const core::vector& extraAuxViews, return true; } -const hlsl::float32_t3* getTightFloat3View(const ICPUPolygonGeometry::SDataView& view) -{ - if (!view) - return nullptr; - if (view.composed.format != EF_R32G32B32_SFLOAT) - return nullptr; - if (view.composed.getStride() != sizeof(hlsl::float32_t3)) - return nullptr; - return reinterpret_cast(view.getPointer()); -} - -const hlsl::float32_t2* getTightFloat2View(const ICPUPolygonGeometry::SDataView& view) -{ - if (!view) - return nullptr; - if (view.composed.format != EF_R32G32_SFLOAT) - return nullptr; - if (view.composed.getStride() != sizeof(hlsl::float32_t2)) - return nullptr; - return reinterpret_cast(view.getPointer()); -} - void appendUInt(std::string& out, const uint32_t value) { std::array buf = {}; @@ -185,12 +163,6 @@ bool writeText(const ICPUPolygonGeometry* geom, const ICPUPolygonGeometry::SData bool CPLYMeshWriter::writeAsset(system::IFile* _file, const SAssetWriteParams& _params, IAssetWriterOverride* _override) { using namespace ply_writer_detail; - using clock_t = std::chrono::high_resolution_clock; - - const auto totalStart = clock_t::now(); - double encodeMs = 0.0; - double formatMs = 0.0; - double writeMs = 0.0; SFileWriteTelemetry ioTelemetry = {}; if (!_override) @@ -259,8 +231,6 @@ bool CPLYMeshWriter::writeAsset(system::IFile* _file, const SAssetWriteParams& _ core::vector indexData; const uint32_t* indices = nullptr; size_t faceCount = 0; - const auto encodeStart = clock_t::now(); - if (indexView) { const size_t indexCount = indexView.getElementCount(); @@ -309,12 +279,8 @@ bool CPLYMeshWriter::writeAsset(system::IFile* _file, const SAssetWriteParams& _ indices = indexData.data(); faceCount = vertexCount / 3u; } - encodeMs = std::chrono::duration(clock_t::now() - encodeStart).count(); - const auto flags = _override->getAssetWritingFlags(ctx, geom, 0u); const bool binary = (flags & E_WRITER_FLAGS::EWF_BINARY) != 0u; - - const auto formatStart = clock_t::now(); std::string header = "ply\n"; header += binary ? "format binary_little_endian 1.0" : "format ascii 1.0"; header += "\ncomment Nabla "; @@ -360,8 +326,6 @@ bool CPLYMeshWriter::writeAsset(system::IFile* _file, const SAssetWriteParams& _ header += std::to_string(faceCount); header += "\nproperty list uchar uint vertex_indices\n"; header += "end_header\n"; - formatMs += std::chrono::duration(clock_t::now() - formatStart).count(); - const bool flipVectors = !(flags & E_WRITER_FLAGS::EWF_MESH_IS_RIGHT_HANDED); bool writeOk = false; @@ -372,12 +336,10 @@ bool CPLYMeshWriter::writeAsset(system::IFile* _file, const SAssetWriteParams& _ const size_t faceStride = sizeof(uint8_t) + sizeof(uint32_t) * 3u; const size_t bodySize = vertexCount * vertexStride + faceCount * faceStride; - const auto binaryEncodeStart = clock_t::now(); core::vector body; body.resize(bodySize); if (!writeBinary(geom, uvView, extraAuxViews, extraAuxFloatCount, writeNormals, vertexCount, indices, faceCount, body.data(), flipVectors)) return false; - encodeMs += std::chrono::duration(clock_t::now() - binaryEncodeStart).count(); const size_t outputSize = header.size() + body.size(); const auto ioPlan = resolveFileIOPolicy(_params.ioPolicy, static_cast(outputSize), true); @@ -388,7 +350,6 @@ bool CPLYMeshWriter::writeAsset(system::IFile* _file, const SAssetWriteParams& _ } outputBytes = outputSize; - const auto writeStart = clock_t::now(); writeOk = writeTwoBuffersWithPolicy( file, ioPlan, @@ -397,13 +358,9 @@ bool CPLYMeshWriter::writeAsset(system::IFile* _file, const SAssetWriteParams& _ body.data(), body.size(), &ioTelemetry); - writeMs = std::chrono::duration(clock_t::now() - writeStart).count(); - - const double totalMs = std::chrono::duration(clock_t::now() - totalStart).count(); - const double miscMs = std::max(0.0, totalMs - (encodeMs + formatMs + writeMs)); const uint64_t ioMinWrite = ioTelemetry.getMinOrZero(); const uint64_t ioAvgWrite = ioTelemetry.getAvgOrZero(); - if (isTinyIOTelemetryLikely(ioTelemetry, static_cast(outputBytes))) + if (isTinyIOTelemetryLikely(ioTelemetry, static_cast(outputBytes), _params.ioPolicy)) { _params.logger.log( "PLY writer tiny-io guard: file=%s writes=%llu min=%llu avg=%llu", @@ -428,20 +385,13 @@ bool CPLYMeshWriter::writeAsset(system::IFile* _file, const SAssetWriteParams& _ toString(ioPlan.strategy), static_cast(ioPlan.chunkSizeBytes), ioPlan.reason); - (void)totalMs; - (void)encodeMs; - (void)formatMs; - (void)writeMs; - (void)miscMs; return writeOk; } - const auto textEncodeStart = clock_t::now(); std::string body; body.reserve(vertexCount * ApproxPlyTextBytesPerVertex + faceCount * ApproxPlyTextBytesPerFace); if (!writeText(geom, uvView, extraAuxViews, writeNormals, vertexCount, indices, faceCount, body, flipVectors)) return false; - encodeMs += std::chrono::duration(clock_t::now() - textEncodeStart).count(); const size_t outputSize = header.size() + body.size(); const auto ioPlan = resolveFileIOPolicy(_params.ioPolicy, static_cast(outputSize), true); @@ -452,7 +402,6 @@ bool CPLYMeshWriter::writeAsset(system::IFile* _file, const SAssetWriteParams& _ } outputBytes = outputSize; - const auto writeStart = clock_t::now(); writeOk = writeTwoBuffersWithPolicy( file, ioPlan, @@ -461,13 +410,9 @@ bool CPLYMeshWriter::writeAsset(system::IFile* _file, const SAssetWriteParams& _ reinterpret_cast(body.data()), body.size(), &ioTelemetry); - writeMs = std::chrono::duration(clock_t::now() - writeStart).count(); - - const double totalMs = std::chrono::duration(clock_t::now() - totalStart).count(); - const double miscMs = std::max(0.0, totalMs - (encodeMs + formatMs + writeMs)); const uint64_t ioMinWrite = ioTelemetry.getMinOrZero(); const uint64_t ioAvgWrite = ioTelemetry.getAvgOrZero(); - if (isTinyIOTelemetryLikely(ioTelemetry, static_cast(outputBytes))) + if (isTinyIOTelemetryLikely(ioTelemetry, static_cast(outputBytes), _params.ioPolicy)) { _params.logger.log( "PLY writer tiny-io guard: file=%s writes=%llu min=%llu avg=%llu", @@ -492,11 +437,6 @@ bool CPLYMeshWriter::writeAsset(system::IFile* _file, const SAssetWriteParams& _ toString(ioPlan.strategy), static_cast(ioPlan.chunkSizeBytes), ioPlan.reason); - (void)totalMs; - (void)encodeMs; - (void)formatMs; - (void)writeMs; - (void)miscMs; return writeOk; } diff --git a/src/nbl/asset/interchange/CSTLMeshFileLoader.cpp b/src/nbl/asset/interchange/CSTLMeshFileLoader.cpp index 2af5fc9c9a..d49bf94a6a 100644 --- a/src/nbl/asset/interchange/CSTLMeshFileLoader.cpp +++ b/src/nbl/asset/interchange/CSTLMeshFileLoader.cpp @@ -7,6 +7,7 @@ #ifdef _NBL_COMPILE_WITH_STL_LOADER_ +#include "nbl/asset/interchange/SGeometryAABBCommon.h" #include "nbl/asset/interchange/SGeometryContentHashCommon.h" #include "nbl/asset/interchange/SInterchangeIOCommon.h" #include "nbl/asset/interchange/SLoaderRuntimeTuning.h" @@ -157,24 +158,6 @@ class CStlSplitBlockMemoryResource final : public core::refctd_memory_resource size_t m_alignment = 1ull; }; -void stlExtendAABB(hlsl::shapes::AABB<3, hlsl::float32_t>& aabb, bool& hasAABB, const hlsl::float32_t3& p) -{ - if (!hasAABB) - { - aabb.minVx = p; - aabb.maxVx = p; - hasAABB = true; - return; - } - - if (p.x < aabb.minVx.x) aabb.minVx.x = p.x; - if (p.y < aabb.minVx.y) aabb.minVx.y = p.y; - if (p.z < aabb.minVx.z) aabb.minVx.z = p.z; - if (p.x > aabb.maxVx.x) aabb.maxVx.x = p.x; - if (p.y > aabb.maxVx.y) aabb.maxVx.y = p.y; - if (p.z > aabb.maxVx.z) aabb.maxVx.z = p.z; -} - ICPUPolygonGeometry::SDataView stlCreateAdoptedFloat3View(core::vector&& values) { if (values.empty()) @@ -310,8 +293,7 @@ SAssetBundle CSTLMeshFileLoader::loadAsset(system::IFile* _file, const IAssetLoa auto geometry = core::make_smart_refctd_ptr(); geometry->setIndexing(IPolygonGeometryBase::TriangleList()); - hlsl::shapes::AABB<3, hlsl::float32_t> parsedAABB = hlsl::shapes::AABB<3, hlsl::float32_t>::create(); - bool hasParsedAABB = false; + SAABBAccumulator3 parsedAABB = {}; uint64_t vertexCount = 0ull; if (!binary && wholeFileDataIsMapped) @@ -412,17 +394,18 @@ SAssetBundle CSTLMeshFileLoader::loadAsset(system::IFile* _file, const IAssetLoa if (end < cursor || static_cast(end - cursor) < static_cast(triangleCount) * SSTLContext::TriangleRecordBytes) return {}; const size_t hw = resolveLoaderHardwareThreads(); + const size_t hardMaxWorkers = resolveLoaderHardMaxWorkers(hw, _params.ioPolicy.runtimeTuning.workerHeadroom); SLoaderRuntimeTuningRequest parseTuningRequest = {}; parseTuningRequest.inputBytes = dataSize; parseTuningRequest.totalWorkUnits = triangleCount; parseTuningRequest.minBytesPerWorker = SSTLContext::TriangleRecordBytes; parseTuningRequest.hardwareThreads = static_cast(hw); - parseTuningRequest.hardMaxWorkers = static_cast(std::max(1ull, hw > 2ull ? (hw - 2ull) : hw)); - parseTuningRequest.targetChunksPerWorker = 2u; + parseTuningRequest.hardMaxWorkers = static_cast(hardMaxWorkers); + parseTuningRequest.targetChunksPerWorker = _params.ioPolicy.runtimeTuning.targetChunksPerWorker; parseTuningRequest.minChunkWorkUnits = 1ull; parseTuningRequest.maxChunkWorkUnits = std::max(1ull, triangleCount); parseTuningRequest.sampleData = payloadData; - parseTuningRequest.sampleBytes = std::min(dataSize, 128ull << 10); + parseTuningRequest.sampleBytes = resolveLoaderRuntimeSampleBytes(_params.ioPolicy, dataSize); const auto parseTuning = tuneLoaderRuntime(_params.ioPolicy, parseTuningRequest); const size_t workerCount = std::max(1ull, std::min(parseTuning.workerCount, static_cast(std::max(1ull, triangleCount)))); static constexpr bool ComputeAABBInParse = true; @@ -647,16 +630,7 @@ SAssetBundle CSTLMeshFileLoader::loadAsset(system::IFile* _file, const IAssetLoa if constexpr (ComputeAABBInParse) threadAABBs[workerIx] = localAABB; }; - const auto runParallelWorkers = [](const size_t localWorkerCount, const auto& fn) -> void - { - if (localWorkerCount <= 1ull) { fn(0ull); return; } - core::vector workers; - workers.reserve(localWorkerCount - 1ull); - for (size_t workerIx = 1ull; workerIx < localWorkerCount; ++workerIx) - workers.emplace_back([&fn, workerIx]() { fn(workerIx); }); - fn(0ull); - }; - runParallelWorkers(workerCount, parseWorker); + loaderRuntimeDispatchWorkers(workerCount, parseWorker); if (positionHashThread.joinable()) positionHashThread.join(); if (normalHashThread.joinable()) @@ -675,24 +649,8 @@ SAssetBundle CSTLMeshFileLoader::loadAsset(system::IFile* _file, const IAssetLoa { if (!localAABB.has) continue; - if (!hasParsedAABB) - { - hasParsedAABB = true; - parsedAABB = hlsl::shapes::AABB<3, hlsl::float32_t>::create(); - parsedAABB.minVx.x = localAABB.minX; - parsedAABB.minVx.y = localAABB.minY; - parsedAABB.minVx.z = localAABB.minZ; - parsedAABB.maxVx.x = localAABB.maxX; - parsedAABB.maxVx.y = localAABB.maxY; - parsedAABB.maxVx.z = localAABB.maxZ; - continue; - } - if (localAABB.minX < parsedAABB.minVx.x) parsedAABB.minVx.x = localAABB.minX; - if (localAABB.minY < parsedAABB.minVx.y) parsedAABB.minVx.y = localAABB.minY; - if (localAABB.minZ < parsedAABB.minVx.z) parsedAABB.minVx.z = localAABB.minZ; - if (localAABB.maxX > parsedAABB.maxVx.x) parsedAABB.maxVx.x = localAABB.maxX; - if (localAABB.maxY > parsedAABB.maxVx.y) parsedAABB.maxVx.y = localAABB.maxY; - if (localAABB.maxZ > parsedAABB.maxVx.z) parsedAABB.maxVx.z = localAABB.maxZ; + extendAABBAccumulator(parsedAABB, localAABB.minX, localAABB.minY, localAABB.minZ); + extendAABBAccumulator(parsedAABB, localAABB.maxX, localAABB.maxY, localAABB.maxZ); } } geometry->setPositionView(std::move(posView)); @@ -754,9 +712,9 @@ SAssetBundle CSTLMeshFileLoader::loadAsset(system::IFile* _file, const IAssetLoa normals.push_back(faceNormal); normals.push_back(faceNormal); normals.push_back(faceNormal); - stlExtendAABB(parsedAABB, hasParsedAABB, p[2u]); - stlExtendAABB(parsedAABB, hasParsedAABB, p[1u]); - stlExtendAABB(parsedAABB, hasParsedAABB, p[0u]); + extendAABBAccumulator(parsedAABB, p[2u]); + extendAABBAccumulator(parsedAABB, p[1u]); + extendAABBAccumulator(parsedAABB, p[0u]); if (!stlReadTextToken(cursor, end, textToken) || textToken != std::string_view("endloop")) return {}; @@ -785,28 +743,15 @@ SAssetBundle CSTLMeshFileLoader::loadAsset(system::IFile* _file, const IAssetLoa recomputeGeometryContentHashesParallel(geometry.get(), _params.ioPolicy); } - if (hasParsedAABB) - { - geometry->visitAABB([&parsedAABB](auto& ref)->void - { - ref = std::remove_reference_t::create(); - ref.minVx.x = parsedAABB.minVx.x; - ref.minVx.y = parsedAABB.minVx.y; - ref.minVx.z = parsedAABB.minVx.z; - ref.minVx.w = 0.0; - ref.maxVx.x = parsedAABB.maxVx.x; - ref.maxVx.y = parsedAABB.maxVx.y; - ref.maxVx.z = parsedAABB.maxVx.z; - ref.maxVx.w = 0.0; - }); - } + if (parsedAABB.has) + applyAABBToGeometry(geometry.get(), parsedAABB); else { CPolygonGeometryManipulator::recomputeAABB(geometry.get()); } const uint64_t ioMinRead = context.ioTelemetry.getMinOrZero(); const uint64_t ioAvgRead = context.ioTelemetry.getAvgOrZero(); - if (isTinyIOTelemetryLikely(context.ioTelemetry, static_cast(filesize))) + if (isTinyIOTelemetryLikely(context.ioTelemetry, static_cast(filesize), _params.ioPolicy)) { _params.logger.log( "STL loader tiny-io guard: file=%s reads=%llu min=%llu avg=%llu", diff --git a/src/nbl/asset/interchange/CSTLMeshWriter.cpp b/src/nbl/asset/interchange/CSTLMeshWriter.cpp index ec307841fa..7a65712a45 100644 --- a/src/nbl/asset/interchange/CSTLMeshWriter.cpp +++ b/src/nbl/asset/interchange/CSTLMeshWriter.cpp @@ -5,15 +5,13 @@ #include "nbl/system/IFile.h" #include "CSTLMeshWriter.h" +#include "nbl/asset/interchange/SGeometryWriterCommon.h" #include "nbl/asset/interchange/SInterchangeIOCommon.h" #include #include -#include -#include #include #include -#include #include #include #include @@ -33,9 +31,6 @@ struct SContext SResolvedFileIOPolicy ioPlan = {}; core::vector ioBuffer = {}; size_t fileOffset = 0ull; - double formatMs = 0.0; - double encodeMs = 0.0; - double writeMs = 0.0; SFileWriteTelemetry writeTelemetry = {}; }; @@ -66,7 +61,6 @@ using SContext = stl_writer_detail::SContext; bool flushBytes(SContext* context); bool writeBytes(SContext* context, const void* data, size_t size); -const hlsl::float32_t3* getTightFloat3View(const ICPUPolygonGeometry::SDataView& view); bool decodeTriangleIndices(const ICPUPolygonGeometry* geom, const ICPUPolygonGeometry::SDataView& posView, core::vector& indexData, const uint32_t*& outIndices, uint32_t& outFaceCount); bool decodeTriangle(const ICPUPolygonGeometry* geom, const IPolygonGeometryBase::IIndexingCallback* indexing, const ICPUPolygonGeometry::SDataView& posView, uint32_t primIx, core::vectorSIMDf& out0, core::vectorSIMDf& out1, core::vectorSIMDf& out2, uint32_t* outIdx); bool decodeTriangleNormal(const ICPUPolygonGeometry::SDataView& normalView, const uint32_t* idx, core::vectorSIMDf& outNormal); @@ -81,7 +75,6 @@ bool writeFaceText( const bool flipHandedness, SContext* context); -char* appendFloatFixed6ToBuffer(char* dst, char* const end, const float value); bool appendLiteral(char*& cursor, char* const end, const char* text, const size_t textSize); bool appendVectorAsAsciiLine(char*& cursor, char* const end, const core::vectorSIMDf& v); @@ -114,9 +107,6 @@ uint32_t CSTLMeshWriter::getForcedFlags() bool CSTLMeshWriter::writeAsset(system::IFile* _file, const SAssetWriteParams& _params, IAssetWriterOverride* _override) { - using clock_t = std::chrono::high_resolution_clock; - const auto totalStart = clock_t::now(); - if (!_override) getDefaultOverride(_override); @@ -141,7 +131,6 @@ bool CSTLMeshWriter::writeAsset(system::IFile* _file, const SAssetWriteParams& _ const asset::E_WRITER_FLAGS flags = _override->getAssetWritingFlags(context.writeContext, geom, 0u); const bool binary = (flags & asset::EWF_BINARY) != 0u; - const auto formatStart = clock_t::now(); uint64_t expectedSize = 0ull; bool sizeKnown = false; @@ -162,7 +151,6 @@ bool CSTLMeshWriter::writeAsset(system::IFile* _file, const SAssetWriteParams& _ context.ioBuffer.reserve(static_cast(expectedSize)); else context.ioBuffer.reserve(static_cast(std::min(context.ioPlan.chunkSizeBytes, stl_writer_detail::IoFallbackReserveBytes))); - context.formatMs = std::chrono::duration(clock_t::now() - formatStart).count(); const bool written = binary ? writeMeshBinary(geom, &context) : writeMeshASCII(geom, &context); if (!written) @@ -172,11 +160,9 @@ bool CSTLMeshWriter::writeAsset(system::IFile* _file, const SAssetWriteParams& _ if (!flushed) return false; - const double totalMs = std::chrono::duration(clock_t::now() - totalStart).count(); - const double miscMs = std::max(0.0, totalMs - (context.formatMs + context.encodeMs + context.writeMs)); const uint64_t ioMinWrite = context.writeTelemetry.getMinOrZero(); const uint64_t ioAvgWrite = context.writeTelemetry.getAvgOrZero(); - if (isTinyIOTelemetryLikely(context.writeTelemetry, context.fileOffset)) + if (isTinyIOTelemetryLikely(context.writeTelemetry, context.fileOffset, _params.ioPolicy)) { _params.logger.log( "STL writer tiny-io guard: file=%s writes=%llu min=%llu avg=%llu", @@ -199,11 +185,6 @@ bool CSTLMeshWriter::writeAsset(system::IFile* _file, const SAssetWriteParams& _ toString(context.ioPlan.strategy), static_cast(context.ioPlan.chunkSizeBytes), context.ioPlan.reason); - (void)totalMs; - (void)miscMs; - (void)context.formatMs; - (void)context.encodeMs; - (void)context.writeMs; return true; } @@ -215,8 +196,6 @@ bool flushBytes(SContext* context) if (context->ioBuffer.empty()) return true; - using clock_t = std::chrono::high_resolution_clock; - const auto writeStart = clock_t::now(); size_t bytesWritten = 0ull; const size_t totalBytes = context->ioBuffer.size(); while (bytesWritten < totalBytes) @@ -237,7 +216,6 @@ bool flushBytes(SContext* context) } context->fileOffset += totalBytes; context->ioBuffer.clear(); - context->writeMs += std::chrono::duration(clock_t::now() - writeStart).count(); return true; } @@ -284,22 +262,6 @@ bool writeBytes(SContext* context, const void* data, size_t size) } } -char* appendFloatFixed6ToBuffer(char* dst, char* const end, const float value) -{ - if (!dst || dst >= end) - return end; - - const auto result = std::to_chars(dst, end, value, std::chars_format::fixed, 6); - if (result.ec == std::errc()) - return result.ptr; - - const int written = std::snprintf(dst, static_cast(end - dst), "%.6f", static_cast(value)); - if (written <= 0) - return dst; - const size_t writeLen = static_cast(written); - return (writeLen < static_cast(end - dst)) ? (dst + writeLen) : end; -} - bool appendLiteral(char*& cursor, char* const end, const char* text, const size_t textSize) { if (!cursor || cursor + textSize > end) @@ -326,17 +288,6 @@ bool appendVectorAsAsciiLine(char*& cursor, char* const end, const core::vectorS return true; } -const hlsl::float32_t3* getTightFloat3View(const ICPUPolygonGeometry::SDataView& view) -{ - if (!view) - return nullptr; - if (view.composed.format != EF_R32G32B32_SFLOAT) - return nullptr; - if (view.composed.getStride() != sizeof(hlsl::float32_t3)) - return nullptr; - return reinterpret_cast(view.getPointer()); -} - bool decodeTriangleIndices(const ICPUPolygonGeometry* geom, const ICPUPolygonGeometry::SDataView& posView, core::vector& indexData, const uint32_t*& outIndices, uint32_t& outFaceCount) { const auto& indexView = geom->getIndexView(); @@ -455,8 +406,6 @@ bool writeMeshBinary(const asset::ICPUPolygonGeometry* geom, SContext* context) { if (!geom || !context || !context->writeContext.outputFile) return false; - using clock_t = std::chrono::high_resolution_clock; - const auto encodeStart = clock_t::now(); const auto& posView = geom->getPositionView(); if (!posView) @@ -742,10 +691,7 @@ bool writeMeshBinary(const asset::ICPUPolygonGeometry* geom, SContext* context) } } - context->encodeMs += std::chrono::duration(clock_t::now() - encodeStart).count(); - const auto writeStart = clock_t::now(); const bool writeOk = writeFileWithPolicy(context->writeContext.outputFile, context->ioPlan, output.get(), outputSize, &context->writeTelemetry); - context->writeMs += std::chrono::duration(clock_t::now() - writeStart).count(); if (writeOk) context->fileOffset += outputSize; return writeOk; @@ -755,8 +701,6 @@ bool writeMeshASCII(const asset::ICPUPolygonGeometry* geom, SContext* context) { if (!geom) return false; - using clock_t = std::chrono::high_resolution_clock; - const auto encodeStart = clock_t::now(); const auto* indexing = geom->getIndexingCallback(); if (!indexing || indexing->degree() != 3u) @@ -801,7 +745,6 @@ bool writeMeshASCII(const asset::ICPUPolygonGeometry* geom, SContext* context) if (!writeBytes(context, solidName.data(), solidName.size())) return false; - context->encodeMs += std::chrono::duration(clock_t::now() - encodeStart).count(); return true; } From caafb713c34e40a6185e721f3a40d2b95a4df478 Mon Sep 17 00:00:00 2001 From: Arkadiusz Lachowicz Date: Fri, 13 Feb 2026 15:41:53 +0100 Subject: [PATCH 030/118] Move generic AABB helpers to utils and simplify STL record writes --- .../asset/utils/CPolygonGeometryManipulator.h | 2 +- .../SGeometryAABBCommon.h | 0 .../asset/interchange/COBJMeshFileLoader.cpp | 2 +- .../asset/interchange/CPLYMeshFileLoader.cpp | 2 +- .../asset/interchange/CSTLMeshFileLoader.cpp | 2 +- src/nbl/asset/interchange/CSTLMeshWriter.cpp | 29 +++++++------------ 6 files changed, 15 insertions(+), 22 deletions(-) rename include/nbl/asset/{interchange => utils}/SGeometryAABBCommon.h (100%) diff --git a/include/nbl/asset/utils/CPolygonGeometryManipulator.h b/include/nbl/asset/utils/CPolygonGeometryManipulator.h index 305f9920a8..0edba1c866 100644 --- a/include/nbl/asset/utils/CPolygonGeometryManipulator.h +++ b/include/nbl/asset/utils/CPolygonGeometryManipulator.h @@ -9,10 +9,10 @@ #include "nbl/core/hash/blake.h" #include "nbl/asset/ICPUPolygonGeometry.h" -#include "nbl/asset/interchange/SGeometryAABBCommon.h" #include "nbl/asset/utils/CGeometryManipulator.h" #include "nbl/asset/utils/CSmoothNormalGenerator.h" #include "nbl/asset/utils/COBBGenerator.h" +#include "nbl/asset/utils/SGeometryAABBCommon.h" #include "nbl/builtin/hlsl/shapes/obb.hlsl" namespace nbl::asset diff --git a/include/nbl/asset/interchange/SGeometryAABBCommon.h b/include/nbl/asset/utils/SGeometryAABBCommon.h similarity index 100% rename from include/nbl/asset/interchange/SGeometryAABBCommon.h rename to include/nbl/asset/utils/SGeometryAABBCommon.h diff --git a/src/nbl/asset/interchange/COBJMeshFileLoader.cpp b/src/nbl/asset/interchange/COBJMeshFileLoader.cpp index db99ee46d0..4b055350d4 100644 --- a/src/nbl/asset/interchange/COBJMeshFileLoader.cpp +++ b/src/nbl/asset/interchange/COBJMeshFileLoader.cpp @@ -6,10 +6,10 @@ #include "nbl/core/declarations.h" #include "nbl/asset/IAssetManager.h" -#include "nbl/asset/interchange/SGeometryAABBCommon.h" #include "nbl/asset/interchange/SGeometryContentHashCommon.h" #include "nbl/asset/interchange/SInterchangeIOCommon.h" #include "nbl/asset/interchange/SLoaderRuntimeTuning.h" +#include "nbl/asset/utils/SGeometryAABBCommon.h" #include "nbl/asset/utils/CPolygonGeometryManipulator.h" #ifdef _NBL_COMPILE_WITH_OBJ_LOADER_ diff --git a/src/nbl/asset/interchange/CPLYMeshFileLoader.cpp b/src/nbl/asset/interchange/CPLYMeshFileLoader.cpp index 50f95b61cd..561a89afd1 100644 --- a/src/nbl/asset/interchange/CPLYMeshFileLoader.cpp +++ b/src/nbl/asset/interchange/CPLYMeshFileLoader.cpp @@ -5,12 +5,12 @@ #ifdef _NBL_COMPILE_WITH_PLY_LOADER_ #include "CPLYMeshFileLoader.h" -#include "nbl/asset/interchange/SGeometryAABBCommon.h" #include "nbl/asset/interchange/SGeometryContentHashCommon.h" #include "nbl/asset/interchange/SInterchangeIOCommon.h" #include "nbl/asset/interchange/SLoaderRuntimeTuning.h" #include "nbl/asset/IAssetManager.h" #include "nbl/asset/metadata/CPLYMetadata.h" +#include "nbl/asset/utils/SGeometryAABBCommon.h" #include "nbl/core/hash/blake.h" #include "nbl/system/ISystem.h" #include "nbl/system/IFile.h" diff --git a/src/nbl/asset/interchange/CSTLMeshFileLoader.cpp b/src/nbl/asset/interchange/CSTLMeshFileLoader.cpp index d49bf94a6a..05948f0754 100644 --- a/src/nbl/asset/interchange/CSTLMeshFileLoader.cpp +++ b/src/nbl/asset/interchange/CSTLMeshFileLoader.cpp @@ -7,10 +7,10 @@ #ifdef _NBL_COMPILE_WITH_STL_LOADER_ -#include "nbl/asset/interchange/SGeometryAABBCommon.h" #include "nbl/asset/interchange/SGeometryContentHashCommon.h" #include "nbl/asset/interchange/SInterchangeIOCommon.h" #include "nbl/asset/interchange/SLoaderRuntimeTuning.h" +#include "nbl/asset/utils/SGeometryAABBCommon.h" #include "nbl/asset/asset.h" #include "nbl/asset/metadata/CSTLMetadata.h" #include "nbl/asset/utils/CPolygonGeometryManipulator.h" diff --git a/src/nbl/asset/interchange/CSTLMeshWriter.cpp b/src/nbl/asset/interchange/CSTLMeshWriter.cpp index 7a65712a45..078c1ecc1b 100644 --- a/src/nbl/asset/interchange/CSTLMeshWriter.cpp +++ b/src/nbl/asset/interchange/CSTLMeshWriter.cpp @@ -40,14 +40,7 @@ constexpr size_t BinaryTriangleFloatCount = 12ull; constexpr size_t BinaryTriangleFloatBytes = sizeof(float) * BinaryTriangleFloatCount; constexpr size_t BinaryTriangleAttributeBytes = sizeof(uint16_t); constexpr size_t BinaryTriangleRecordBytes = BinaryTriangleFloatBytes + BinaryTriangleAttributeBytes; -#pragma pack(push, 1) -struct SBinaryTriangleRecord -{ - float payload[BinaryTriangleFloatCount]; - uint16_t attribute = 0u; -}; -#pragma pack(pop) -static_assert(sizeof(SBinaryTriangleRecord) == BinaryTriangleRecordBytes); +static_assert(BinaryTriangleRecordBytes == 50ull); constexpr size_t BinaryPrefixBytes = BinaryHeaderBytes + BinaryTriangleCountBytes; constexpr size_t IoFallbackReserveBytes = 1ull << 20; constexpr size_t AsciiFaceTextMaxBytes = 1024ull; @@ -463,17 +456,17 @@ bool writeMeshBinary(const asset::ICPUPolygonGeometry* geom, SContext* context) }; auto writeRecord = [&dst](const float nx, const float ny, const float nz, const float v1x, const float v1y, const float v1z, const float v2x, const float v2y, const float v2z, const float v3x, const float v3y, const float v3z)->void { - const stl_writer_detail::SBinaryTriangleRecord record = { - { - nx, ny, nz, - v1x, v1y, v1z, - v2x, v2y, v2z, - v3x, v3y, v3z - }, - 0u + const float payload[stl_writer_detail::BinaryTriangleFloatCount] = { + nx, ny, nz, + v1x, v1y, v1z, + v2x, v2y, v2z, + v3x, v3y, v3z }; - std::memcpy(dst, &record, sizeof(record)); - dst += sizeof(record); + std::memcpy(dst, payload, stl_writer_detail::BinaryTriangleFloatBytes); + dst += stl_writer_detail::BinaryTriangleFloatBytes; + const uint16_t attribute = 0u; + std::memcpy(dst, &attribute, stl_writer_detail::BinaryTriangleAttributeBytes); + dst += stl_writer_detail::BinaryTriangleAttributeBytes; }; const bool hasFastTightPath = (indices == nullptr) && (tightPositions != nullptr) && (!hasNormals || (tightNormals != nullptr)); From 858adb740c5054381b8ed4944b18b2ddc1e82de3 Mon Sep 17 00:00:00 2001 From: Arkadiusz Lachowicz Date: Fri, 13 Feb 2026 16:20:09 +0100 Subject: [PATCH 031/118] Clarify geometry hash modes and normalize EOF formatting --- .../interchange/SGeometryContentHashCommon.h | 22 +++++++++++++++++-- .../asset/interchange/COBJMeshFileLoader.cpp | 2 +- src/nbl/asset/interchange/COBJMeshWriter.cpp | 1 - .../asset/interchange/CPLYMeshFileLoader.cpp | 3 +-- .../asset/interchange/CSTLMeshFileLoader.cpp | 3 +-- src/nbl/asset/interchange/CSTLMeshWriter.cpp | 1 - 6 files changed, 23 insertions(+), 9 deletions(-) diff --git a/include/nbl/asset/interchange/SGeometryContentHashCommon.h b/include/nbl/asset/interchange/SGeometryContentHashCommon.h index 04475eaff8..e3577ae461 100644 --- a/include/nbl/asset/interchange/SGeometryContentHashCommon.h +++ b/include/nbl/asset/interchange/SGeometryContentHashCommon.h @@ -16,6 +16,12 @@ namespace nbl::asset { +enum class EGeometryContentHashMode : uint8_t +{ + MissingOnly, + RecomputeAll +}; + inline void collectGeometryBuffers( ICPUPolygonGeometry* geometry, core::vector>& buffers) @@ -50,7 +56,7 @@ inline void collectGeometryBuffers( appendViewBuffer(*jointOBB); } -inline void recomputeGeometryContentHashesParallel(ICPUPolygonGeometry* geometry, const SFileIOPolicy& ioPolicy) +inline void computeGeometryContentHashesParallel(ICPUPolygonGeometry* geometry, const SFileIOPolicy& ioPolicy, const EGeometryContentHashMode mode = EGeometryContentHashMode::MissingOnly) { if (!geometry) return; @@ -66,7 +72,9 @@ inline void recomputeGeometryContentHashesParallel(ICPUPolygonGeometry* geometry for (size_t i = 0ull; i < buffers.size(); ++i) { auto& buffer = buffers[i]; - if (!buffer || buffer->getContentHash() != IPreHashed::INVALID_HASH) + if (!buffer) + continue; + if (mode == EGeometryContentHashMode::MissingOnly && buffer->getContentHash() != IPreHashed::INVALID_HASH) continue; totalBytes += static_cast(buffer->getSize()); pending.push_back(i); @@ -124,6 +132,16 @@ inline void recomputeGeometryContentHashesParallel(ICPUPolygonGeometry* geometry } } +inline void computeMissingGeometryContentHashesParallel(ICPUPolygonGeometry* geometry, const SFileIOPolicy& ioPolicy) +{ + computeGeometryContentHashesParallel(geometry, ioPolicy, EGeometryContentHashMode::MissingOnly); +} + +inline void recomputeGeometryContentHashesParallel(ICPUPolygonGeometry* geometry, const SFileIOPolicy& ioPolicy) +{ + computeGeometryContentHashesParallel(geometry, ioPolicy, EGeometryContentHashMode::RecomputeAll); +} + } #endif diff --git a/src/nbl/asset/interchange/COBJMeshFileLoader.cpp b/src/nbl/asset/interchange/COBJMeshFileLoader.cpp index 4b055350d4..e6a0bdf9df 100644 --- a/src/nbl/asset/interchange/COBJMeshFileLoader.cpp +++ b/src/nbl/asset/interchange/COBJMeshFileLoader.cpp @@ -1138,7 +1138,7 @@ asset::SAssetBundle COBJMeshFileLoader::loadAsset(system::IFile* _file, const as if ((_params.loaderFlags & IAssetLoader::ELPF_DONT_COMPUTE_CONTENT_HASHES) == 0) { - recomputeGeometryContentHashesParallel(geometry.get(), _params.ioPolicy); + computeMissingGeometryContentHashesParallel(geometry.get(), _params.ioPolicy); } if (parsedAABB.has) diff --git a/src/nbl/asset/interchange/COBJMeshWriter.cpp b/src/nbl/asset/interchange/COBJMeshWriter.cpp index 105f174716..bef07fef50 100644 --- a/src/nbl/asset/interchange/COBJMeshWriter.cpp +++ b/src/nbl/asset/interchange/COBJMeshWriter.cpp @@ -451,4 +451,3 @@ bool COBJMeshWriter::writeAsset(system::IFile* _file, const SAssetWriteParams& _ } // namespace nbl::asset #endif // _NBL_COMPILE_WITH_OBJ_WRITER_ - diff --git a/src/nbl/asset/interchange/CPLYMeshFileLoader.cpp b/src/nbl/asset/interchange/CPLYMeshFileLoader.cpp index 561a89afd1..4824fae10a 100644 --- a/src/nbl/asset/interchange/CPLYMeshFileLoader.cpp +++ b/src/nbl/asset/interchange/CPLYMeshFileLoader.cpp @@ -2211,7 +2211,7 @@ SAssetBundle CPLYMeshFileLoader::loadAsset(system::IFile* _file, const IAssetLoa { if (deferredPositionHashThread.joinable()) deferredPositionHashThread.join(); - recomputeGeometryContentHashesParallel(geometry.get(), _params.ioPolicy); + computeMissingGeometryContentHashesParallel(geometry.get(), _params.ioPolicy); } else { @@ -2259,4 +2259,3 @@ SAssetBundle CPLYMeshFileLoader::loadAsset(system::IFile* _file, const IAssetLoa } // end namespace nbl::asset #endif // _NBL_COMPILE_WITH_PLY_LOADER_ - diff --git a/src/nbl/asset/interchange/CSTLMeshFileLoader.cpp b/src/nbl/asset/interchange/CSTLMeshFileLoader.cpp index 05948f0754..43e7c54b9d 100644 --- a/src/nbl/asset/interchange/CSTLMeshFileLoader.cpp +++ b/src/nbl/asset/interchange/CSTLMeshFileLoader.cpp @@ -740,7 +740,7 @@ SAssetBundle CSTLMeshFileLoader::loadAsset(system::IFile* _file, const IAssetLoa if (computeContentHashes && !contentHashesAssigned) { - recomputeGeometryContentHashesParallel(geometry.get(), _params.ioPolicy); + computeMissingGeometryContentHashesParallel(geometry.get(), _params.ioPolicy); } if (parsedAABB.has) @@ -809,4 +809,3 @@ bool CSTLMeshFileLoader::isALoadableFileFormat(system::IFile* _file, const syste } #endif // _NBL_COMPILE_WITH_STL_LOADER_ - diff --git a/src/nbl/asset/interchange/CSTLMeshWriter.cpp b/src/nbl/asset/interchange/CSTLMeshWriter.cpp index 078c1ecc1b..fc4ced51fe 100644 --- a/src/nbl/asset/interchange/CSTLMeshWriter.cpp +++ b/src/nbl/asset/interchange/CSTLMeshWriter.cpp @@ -804,4 +804,3 @@ bool writeFaceText( } #endif - From 50046f6d2e01e1ec9e065b901f8fdbf219728791 Mon Sep 17 00:00:00 2001 From: Arkadiusz Lachowicz Date: Fri, 13 Feb 2026 17:22:56 +0100 Subject: [PATCH 032/118] Update examples_tests submodule for hash test toggle --- examples_tests | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/examples_tests b/examples_tests index 07224bdb44..55d1112550 160000 --- a/examples_tests +++ b/examples_tests @@ -1 +1 @@ -Subproject commit 07224bdb448fd5be5659fdc79fb06d28de0a1144 +Subproject commit 55d1112550d6c0d717271a2b2dc663f87603481b From 5575861c5de5418ae7a19e5373ec6886caa39583 Mon Sep 17 00:00:00 2001 From: Arkadiusz Lachowicz Date: Fri, 13 Feb 2026 19:09:40 +0100 Subject: [PATCH 033/118] Fix converter hash fallback mutation and update examples_tests --- examples_tests | 2 +- src/nbl/video/utilities/CAssetConverter.cpp | 4 ---- 2 files changed, 1 insertion(+), 5 deletions(-) diff --git a/examples_tests b/examples_tests index 55d1112550..b39dea98e8 160000 --- a/examples_tests +++ b/examples_tests @@ -1 +1 @@ -Subproject commit 55d1112550d6c0d717271a2b2dc663f87603481b +Subproject commit b39dea98e8e5b01eacca35765f5f1e6ed08eee9d diff --git a/src/nbl/video/utilities/CAssetConverter.cpp b/src/nbl/video/utilities/CAssetConverter.cpp index 8e58e3438a..d2cfc5f7f1 100644 --- a/src/nbl/video/utilities/CAssetConverter.cpp +++ b/src/nbl/video/utilities/CAssetConverter.cpp @@ -1185,11 +1185,7 @@ bool CAssetConverter::CHashCache::hash_impl::operator()(lookup_t loo patchedParams.usage = lookup.patch->usage; auto contentHash = lookup.asset->getContentHash(); if (contentHash==NoContentHash) - { contentHash = lookup.asset->computeContentHash(); - if (auto* mutableAsset = const_cast(lookup.asset); mutableAsset && mutableAsset->isMutable()) - mutableAsset->setContentHash(contentHash); - } hasher.update(&patchedParams,sizeof(patchedParams)) << contentHash; return true; } From fbf300c6a4ad21df0e8cea0c11cbaf72d7c41329 Mon Sep 17 00:00:00 2001 From: Arkadiusz Lachowicz Date: Sat, 14 Feb 2026 07:35:12 +0100 Subject: [PATCH 034/118] Update examples_tests submodule for local swapchain transfer-src setup --- examples_tests | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/examples_tests b/examples_tests index b39dea98e8..294a21a2a6 160000 --- a/examples_tests +++ b/examples_tests @@ -1 +1 @@ -Subproject commit b39dea98e8e5b01eacca35765f5f1e6ed08eee9d +Subproject commit 294a21a2a661566c6548b0ce7bb93c05edd885a6 From 92b74474fc81939ea2d8a0d0ad428956ed1ac278 Mon Sep 17 00:00:00 2001 From: Arkadiusz Lachowicz Date: Sat, 14 Feb 2026 09:13:54 +0100 Subject: [PATCH 035/118] Document runtime policy fields and stabilize Win32 file mapping flags --- include/nbl/asset/interchange/SFileIOPolicy.h | 37 +++++++++++++++++++ .../asset/interchange/SLoaderRuntimeTuning.h | 17 +++++++++ include/nbl/system/CSystemWin32.h | 4 +- src/nbl/core/hash/blake.cpp | 21 +++++++++++ src/nbl/system/CSystemWin32.cpp | 9 ++--- 5 files changed, 81 insertions(+), 7 deletions(-) diff --git a/include/nbl/asset/interchange/SFileIOPolicy.h b/include/nbl/asset/interchange/SFileIOPolicy.h index 7e41b201b5..65a688628a 100644 --- a/include/nbl/asset/interchange/SFileIOPolicy.h +++ b/include/nbl/asset/interchange/SFileIOPolicy.h @@ -16,59 +16,96 @@ struct SFileIOPolicy { struct SRuntimeTuning { + // Runtime tuning strategy for worker/chunk selection. enum class Mode : uint8_t { + // Disable runtime tuning. Use static heuristics only. None, + // Use deterministic heuristics derived from input size and hardware. Heuristic, + // Use heuristics and optionally refine with lightweight sampling. Hybrid }; + // Runtime tuning mode. Mode mode = Mode::Heuristic; + // Maximum acceptable tuning overhead as a fraction of estimated full workload time. float maxOverheadRatio = 0.05f; + // Maximum sampling budget as a fraction of estimated full workload time. float samplingBudgetRatio = 0.05f; + // Minimum expected gain required to keep extra workers enabled. float minExpectedGainRatio = 0.03f; + // Hard cap for worker count. 0 means auto. uint32_t maxWorkers = 0u; + // Reserved hardware threads not used by the loader. Prevents full CPU saturation. uint32_t workerHeadroom = 2u; + // Maximum number of worker-count candidates tested in hybrid mode. uint32_t samplingMaxCandidates = 4u; + // Number of benchmark passes per candidate in hybrid mode. uint32_t samplingPasses = 1u; + // Minimum work units required before hybrid sampling is allowed. 0 means auto. uint64_t samplingMinWorkUnits = 0ull; + // Target chunk count assigned to each worker for loader stages. uint32_t targetChunksPerWorker = 4u; + // Target chunk count assigned to each worker for hash stages. uint32_t hashTaskTargetChunksPerWorker = 1u; + // Hash inlining threshold. Inputs up to this size prefer inline hash build. uint64_t hashInlineThresholdBytes = 1ull << 20; + // Lower bound for sampled byte count in hybrid mode. uint64_t minSampleBytes = 4ull << 10; + // Upper bound for sampled byte count in hybrid mode. uint64_t maxSampleBytes = 128ull << 10; + // Payload size threshold for tiny-IO anomaly detection. uint64_t tinyIoPayloadThresholdBytes = 1ull << 20; + // Average operation size threshold for tiny-IO anomaly detection. uint64_t tinyIoAvgBytesThreshold = 1024ull; + // Minimum operation size threshold for tiny-IO anomaly detection. uint64_t tinyIoMinBytesThreshold = 64ull; + // Minimum operation count required to report tiny-IO anomaly. uint64_t tinyIoMinCallCount = 1024ull; }; + // File IO strategy selection mode. enum class Strategy : uint8_t { + // Pick whole-file or chunked dynamically based on file size and policy limits. Auto, + // Force whole-file path. May fallback when not feasible unless strict=true. WholeFile, + // Force chunked path. Chunked }; + // Requested IO strategy. Strategy strategy = Strategy::Auto; + // If true and requested strategy is not feasible then resolution fails instead of fallback. bool strict = false; + // Maximum payload size allowed for whole-file strategy in auto mode. uint64_t wholeFileThresholdBytes = 64ull * 1024ull * 1024ull; + // Chunk size used by chunked strategy. uint64_t chunkSizeBytes = 4ull * 1024ull * 1024ull; + // Maximum staging allocation allowed for whole-file strategy. uint64_t maxStagingBytes = 256ull * 1024ull * 1024ull; + // Runtime tuning controls used by loaders and hash stages. SRuntimeTuning runtimeTuning = {}; }; struct SResolvedFileIOPolicy { + // Strategy selected after resolving SFileIOPolicy against runtime constraints. enum class Strategy : uint8_t { WholeFile, Chunked }; + // Effective strategy chosen by resolver. Strategy strategy = Strategy::Chunked; + // Effective chunk size. Also set for whole-file for telemetry consistency. uint64_t chunkSizeBytes = 0ull; + // False when strict policy cannot be satisfied. bool valid = true; + // Human-readable resolver reason used in logs and diagnostics. const char* reason = "ok"; }; diff --git a/include/nbl/asset/interchange/SLoaderRuntimeTuning.h b/include/nbl/asset/interchange/SLoaderRuntimeTuning.h index 07258ea960..c1f631b922 100644 --- a/include/nbl/asset/interchange/SLoaderRuntimeTuning.h +++ b/include/nbl/asset/interchange/SLoaderRuntimeTuning.h @@ -21,26 +21,43 @@ namespace nbl::asset struct SLoaderRuntimeTuningRequest { + // Total input bytes for the tuned stage. uint64_t inputBytes = 0ull; + // Total amount of stage work in logical units. uint64_t totalWorkUnits = 0ull; + // Minimum work units assigned to one worker. uint64_t minWorkUnitsPerWorker = 1ull; + // Minimum input bytes assigned to one worker. uint64_t minBytesPerWorker = 1ull; + // Hardware thread count override. 0 means auto-detect. uint32_t hardwareThreads = 0u; + // Hard cap for workers for this request. 0 means no extra cap. uint32_t hardMaxWorkers = 0u; + // Preferred chunk count per worker for this stage. 0 means policy default. uint32_t targetChunksPerWorker = 0u; + // Minimum work units in one chunk. uint64_t minChunkWorkUnits = 1ull; + // Maximum work units in one chunk. uint64_t maxChunkWorkUnits = std::numeric_limits::max(); + // Pointer to representative sample bytes for hybrid sampling. const uint8_t* sampleData = nullptr; + // Number of sample bytes available at sampleData. uint64_t sampleBytes = 0ull; + // Sampling pass count override. 0 means policy default. uint32_t samplePasses = 0u; + // Sampling candidate count override. 0 means policy default. uint32_t sampleMaxCandidates = 0u; + // Minimum work units required to allow sampling. 0 means policy or auto value. uint64_t sampleMinWorkUnits = 0ull; }; struct SLoaderRuntimeTuningResult { + // Selected worker count for the stage. size_t workerCount = 1ull; + // Work units per chunk assigned by tuner. uint64_t chunkWorkUnits = 1ull; + // Total chunk count for the stage. size_t chunkCount = 1ull; }; diff --git a/include/nbl/system/CSystemWin32.h b/include/nbl/system/CSystemWin32.h index 01766ddaa8..a6fe550410 100644 --- a/include/nbl/system/CSystemWin32.h +++ b/include/nbl/system/CSystemWin32.h @@ -19,7 +19,7 @@ class NBL_API2 CSystemWin32 : public ISystem public: CCaller(ISystem* _system) : ICaller(_system) {} - core::smart_refctd_ptr createFile(const std::filesystem::path& filename, const core::bitflag flags) override final; + core::smart_refctd_ptr createFile(const std::filesystem::path& filename, core::bitflag flags) override final; }; public: @@ -124,4 +124,4 @@ class NBL_API2 CSystemWin32 : public ISystem #endif -#endif \ No newline at end of file +#endif diff --git a/src/nbl/core/hash/blake.cpp b/src/nbl/core/hash/blake.cpp index c8516115da..88fa6d4093 100644 --- a/src/nbl/core/hash/blake.cpp +++ b/src/nbl/core/hash/blake.cpp @@ -11,6 +11,27 @@ extern "C" #include "blake3_impl.h" } +/* + BLAKE3 is tree-based and explicitly designed for parallel processing. The tree mode + (chunks and parent-node reduction) is part of the specification, so a parallel + implementation can be done without changing hash semantics. + + Why this local implementation exists: + - Nabla needs a multithreaded hash path integrated with its own runtime policy and + standard C++ threading. + - Upstream C API exposes a single-threaded update path and an optional oneTBB path + (`blake3_hasher_update_tbb`) which requires building with `BLAKE3_USE_TBB`. + - Here we keep the same algorithmic rules and final digest, while using only C++20 + standard facilities (`std::async`, `std::thread`) and no oneTBB dependency. + - The local helpers below are adapted from upstream tree-processing internals used + in `c/blake3.c` and the oneTBB integration path. + + Primary references: + - BLAKE3 spec repository (paper): https://github.com/BLAKE3-team/BLAKE3-specs + - C2SP BLAKE3 specification: https://c2sp.org/BLAKE3 + - Upstream BLAKE3 C API notes (`update_tbb`): https://github.com/BLAKE3-team/BLAKE3/blob/master/c/README.md +*/ + namespace nbl::core { diff --git a/src/nbl/system/CSystemWin32.cpp b/src/nbl/system/CSystemWin32.cpp index cd4ba4de1d..1982448886 100644 --- a/src/nbl/system/CSystemWin32.cpp +++ b/src/nbl/system/CSystemWin32.cpp @@ -39,9 +39,8 @@ ISystem::SystemInfo CSystemWin32::getSystemInfo() const } -core::smart_refctd_ptr CSystemWin32::CCaller::createFile(const std::filesystem::path& filename, const core::bitflag flags) +core::smart_refctd_ptr CSystemWin32::CCaller::createFile(const std::filesystem::path& filename, core::bitflag flags) { - core::bitflag effectiveFlags = flags; const bool writeAccess = flags.value&IFile::ECF_WRITE; const DWORD fileAccess = ((flags.value&IFile::ECF_READ) ? FILE_GENERIC_READ:0)|(writeAccess ? FILE_GENERIC_WRITE:0); DWORD shareMode = FILE_SHARE_READ; @@ -77,7 +76,7 @@ core::smart_refctd_ptr CSystemWin32::CCaller::createFile(const std: _fileMappingObj = CreateFileMappingA(_native,nullptr,writeAccess ? PAGE_READWRITE:PAGE_READONLY, 0, 0, nullptr); if (!_fileMappingObj) { - effectiveFlags.value &= ~(IFile::ECF_COHERENT | IFile::ECF_MAPPABLE); + flags.value &= ~(IFile::ECF_COHERENT | IFile::ECF_MAPPABLE); } else { @@ -100,11 +99,11 @@ core::smart_refctd_ptr CSystemWin32::CCaller::createFile(const std: { CloseHandle(_fileMappingObj); _fileMappingObj = nullptr; - effectiveFlags.value &= ~(IFile::ECF_COHERENT | IFile::ECF_MAPPABLE); + flags.value &= ~(IFile::ECF_COHERENT | IFile::ECF_MAPPABLE); } } } - return core::make_smart_refctd_ptr(core::smart_refctd_ptr(m_system),path(filename),effectiveFlags,_mappedPtr,_native,_fileMappingObj); + return core::make_smart_refctd_ptr(core::smart_refctd_ptr(m_system),path(filename),flags,_mappedPtr,_native,_fileMappingObj); } bool isDebuggerAttached() From 518ff448f4e6240efde9f302d11ba60a0cc7d78a Mon Sep 17 00:00:00 2001 From: Arkadiusz Lachowicz Date: Sat, 14 Feb 2026 11:03:12 +0100 Subject: [PATCH 036/118] Fix archive fallback and preflight checks for PLY loading --- src/nbl/asset/IAssetManager.cpp | 5 ++- .../asset/interchange/CPLYMeshFileLoader.cpp | 38 +++++++++++++++---- src/nbl/system/ISystem.cpp | 9 ++++- 3 files changed, 41 insertions(+), 11 deletions(-) diff --git a/src/nbl/asset/IAssetManager.cpp b/src/nbl/asset/IAssetManager.cpp index cdbeb3f2f1..e2e817567a 100644 --- a/src/nbl/asset/IAssetManager.cpp +++ b/src/nbl/asset/IAssetManager.cpp @@ -234,8 +234,9 @@ SAssetBundle IAssetManager::getAssetInHierarchy_impl(system::IFile* _file, const extensionLoaders.reserve(8u); for (auto& loader : capableLoadersRng) { - extensionLoaders.push_back(loader.second); - if (!(bundle = loader.second->loadAsset(file.get(), params, _override, _hierarchyLevel)).getContents().empty()) + auto* extensionLoader = loader.second; + extensionLoaders.push_back(extensionLoader); + if (extensionLoader->isALoadableFileFormat(file.get()) && !(bundle = extensionLoader->loadAsset(file.get(), params, _override, _hierarchyLevel)).getContents().empty()) break; } for (auto loaderItr = std::begin(m_loaders.vector); bundle.getContents().empty() && loaderItr != std::end(m_loaders.vector); ++loaderItr) diff --git a/src/nbl/asset/interchange/CPLYMeshFileLoader.cpp b/src/nbl/asset/interchange/CPLYMeshFileLoader.cpp index 4824fae10a..602de6161b 100644 --- a/src/nbl/asset/interchange/CPLYMeshFileLoader.cpp +++ b/src/nbl/asset/interchange/CPLYMeshFileLoader.cpp @@ -30,7 +30,7 @@ const char** CPLYMeshFileLoader::getAssociatedFileExtensions() const bool CPLYMeshFileLoader::isALoadableFileFormat(system::IFile* _file, const system::logger_opt_ptr) const { - char buf[40]; + char buf[128]; system::IFile::success_t success; _file->read(success, buf, 0, sizeof(buf)); @@ -38,17 +38,39 @@ bool CPLYMeshFileLoader::isALoadableFileFormat(system::IFile* _file, const syste return false; const std::string_view fileHeader(buf, success.getBytesProcessed()); - if (!fileHeader.starts_with("ply\n")) - return false; + auto trimWhitespace = [](std::string_view line) -> std::string_view + { + const auto isWhitespace = [](const char c) -> bool { return c == ' ' || c == '\t' || c == '\r'; }; + while (!line.empty() && isWhitespace(line.front())) + line.remove_prefix(1ull); + while (!line.empty() && isWhitespace(line.back())) + line.remove_suffix(1ull); + return line; + }; - const size_t formatLineBegin = 4ull; - const size_t formatLineEnd = fileHeader.find('\n', formatLineBegin); - if (formatLineEnd == std::string_view::npos) + size_t lineStart = 0ull; + const size_t firstLineEnd = fileHeader.find('\n'); + std::string_view firstLine = fileHeader.substr(0ull, firstLineEnd); + firstLine = trimWhitespace(firstLine); + if (firstLine != "ply") + return false; + if (firstLineEnd == std::string_view::npos) return false; - const std::string_view formatLine = fileHeader.substr(formatLineBegin, formatLineEnd - formatLineBegin); + lineStart = firstLineEnd + 1ull; constexpr std::array headers = { "format ascii 1.0", "format binary_little_endian 1.0", "format binary_big_endian 1.0" }; - return std::find(headers.begin(), headers.end(), formatLine) != headers.end(); + while (lineStart < fileHeader.size()) + { + size_t lineEnd = fileHeader.find('\n', lineStart); + if (lineEnd == std::string_view::npos) + lineEnd = fileHeader.size(); + std::string_view line = trimWhitespace(fileHeader.substr(lineStart, lineEnd - lineStart)); + if (line.starts_with("format ")) + return std::find(headers.begin(), headers.end(), line) != headers.end(); + lineStart = lineEnd + 1ull; + } + + return false; } const auto plyByteswap = [](const auto value) diff --git a/src/nbl/system/ISystem.cpp b/src/nbl/system/ISystem.cpp index b2fc0f5117..42717bdb0b 100644 --- a/src/nbl/system/ISystem.cpp +++ b/src/nbl/system/ISystem.cpp @@ -214,7 +214,14 @@ void ISystem::createFile(future_t>& future, std::f // try archives (readonly, for now) if (!writeUsage && !pathExists) { - const auto found = findFileInArchive(filename); + auto found = findFileInArchive(filename); + if (!found.archive && !absoluteInput) + { + fsEc.clear(); + const auto absolute = std::filesystem::absolute(filename, fsEc); + if (!fsEc) + found = findFileInArchive(absolute); + } if (found.archive) { auto file = found.archive->getFile(found.pathRelativeToArchive,flags,accessToken); From 356ab95081eefb7f13bfc5f596b2164f6819fc05 Mon Sep 17 00:00:00 2001 From: Arkadiusz Lachowicz Date: Sat, 14 Feb 2026 13:01:48 +0100 Subject: [PATCH 037/118] Fix OBJ fallback counters and archive path lookup --- .../asset/interchange/COBJMeshFileLoader.cpp | 149 +----------------- src/nbl/system/ISystem.cpp | 72 +++++++-- 2 files changed, 57 insertions(+), 164 deletions(-) diff --git a/src/nbl/asset/interchange/COBJMeshFileLoader.cpp b/src/nbl/asset/interchange/COBJMeshFileLoader.cpp index e6a0bdf9df..f76c424407 100644 --- a/src/nbl/asset/interchange/COBJMeshFileLoader.cpp +++ b/src/nbl/asset/interchange/COBJMeshFileLoader.cpp @@ -202,91 +202,6 @@ bool readTextFileWithPolicy(system::IFile* file, char* dst, size_t byteCount, co return readFileWithPolicyTimed(file, reinterpret_cast(dst), 0ull, byteCount, ioPlan, nullptr, &ioTelemetry); } -const char* goFirstWord(const char* buf, const char* const bufEnd, bool acrossNewlines = true) -{ - if (acrossNewlines) - while ((buf != bufEnd) && core::isspace(*buf)) - ++buf; - else - while ((buf != bufEnd) && core::isspace(*buf) && (*buf != '\n')) - ++buf; - - return buf; -} - -const char* goNextWord(const char* buf, const char* const bufEnd, bool acrossNewlines = true) -{ - while ((buf != bufEnd) && !core::isspace(*buf)) - ++buf; - - return goFirstWord(buf, bufEnd, acrossNewlines); -} - -const char* goNextLine(const char* buf, const char* const bufEnd) -{ - while (buf != bufEnd) - { - if (*buf == '\n' || *buf == '\r') - break; - ++buf; - } - return goFirstWord(buf, bufEnd); -} - -bool parseFloatToken(const char*& ptr, const char* const end, float& out) -{ - const auto parseResult = fast_float::from_chars(ptr, end, out); - if (parseResult.ec == std::errc() && parseResult.ptr != ptr) - { - ptr = parseResult.ptr; - return true; - } - - char* fallbackEnd = nullptr; - out = std::strtof(ptr, &fallbackEnd); - if (!fallbackEnd || fallbackEnd == ptr) - return false; - ptr = fallbackEnd; - return true; -} - -const char* readVec3(const char* bufPtr, float vec[3], const char* const bufEnd) -{ - bufPtr = goNextWord(bufPtr, bufEnd, false); - for (uint32_t i = 0u; i < 3u; ++i) - { - if (bufPtr >= bufEnd) - return bufPtr; - - if (!parseFloatToken(bufPtr, bufEnd, vec[i])) - return bufPtr; - - while (bufPtr < bufEnd && core::isspace(*bufPtr) && *bufPtr != '\n' && *bufPtr != '\r') - ++bufPtr; - } - - return bufPtr; -} - -const char* readUV(const char* bufPtr, float vec[2], const char* const bufEnd) -{ - bufPtr = goNextWord(bufPtr, bufEnd, false); - for (uint32_t i = 0u; i < 2u; ++i) - { - if (bufPtr >= bufEnd) - return bufPtr; - - if (!parseFloatToken(bufPtr, bufEnd, vec[i])) - return bufPtr; - - while (bufPtr < bufEnd && core::isspace(*bufPtr) && *bufPtr != '\n' && *bufPtr != '\r') - ++bufPtr; - } - - vec[1] = 1.f - vec[1]; - return bufPtr; -} - inline bool parseUnsignedObjIndex(const char*& ptr, const char* const end, uint32_t& out) { if (ptr >= end || !isObjDigit(*ptr)) @@ -305,66 +220,6 @@ inline bool parseUnsignedObjIndex(const char*& ptr, const char* const end, uint3 return true; } -inline bool parseObjFaceTokenPositiveTriplet(const char*& ptr, const char* const end, int32_t* idx, const size_t posCount, const size_t uvCount, const size_t normalCount) -{ - while (ptr < end && isObjInlineWhitespace(*ptr)) - ++ptr; - if (ptr >= end || !isObjDigit(*ptr)) - return false; - - uint32_t posRaw = 0u; - if (!parseUnsignedObjIndex(ptr, end, posRaw)) - return false; - if (posRaw > posCount) - return false; - - if (ptr >= end || *ptr != '/') - return false; - ++ptr; - - uint32_t uvRaw = 0u; - if (!parseUnsignedObjIndex(ptr, end, uvRaw)) - return false; - if (uvRaw > uvCount) - return false; - - if (ptr >= end || *ptr != '/') - return false; - ++ptr; - - uint32_t normalRaw = 0u; - if (!parseUnsignedObjIndex(ptr, end, normalRaw)) - return false; - if (normalRaw > normalCount) - return false; - - idx[0] = static_cast(posRaw - 1u); - idx[1] = static_cast(uvRaw - 1u); - idx[2] = static_cast(normalRaw - 1u); - return true; -} - -inline bool parseObjPositiveIndexBounded(const char*& ptr, const char* const end, const size_t maxCount, int32_t& out) -{ - if (ptr >= end || !isObjDigit(*ptr)) - return false; - - uint32_t value = 0u; - while (ptr < end && isObjDigit(*ptr)) - { - const uint32_t digit = static_cast(*ptr - '0'); - if (value > 429496729u) - return false; - value = value * 10u + digit; - ++ptr; - } - if (value == 0u || value > maxCount) - return false; - - out = static_cast(value - 1u); - return true; -} - inline bool parseObjTrianglePositiveTripletLine(const char* const lineStart, const char* const lineEnd, int32_t* idx0, int32_t* idx1, int32_t* idx2, const size_t posCount, const size_t uvCount, const size_t normalCount) { const char* ptr = lineStart; @@ -1021,7 +876,7 @@ asset::SAssetBundle COBJMeshFileLoader::loadAsset(system::IFile* _file, const as return {}; if (!acquireCornerIndex(triIdx2, c2)) return {}; - faceFastTokenCount += 3u; + faceFallbackTokenCount += 3u; if (!appendIndex(c2) || !appendIndex(c1) || !appendIndex(c0)) return {}; firstCorner = c0; @@ -1040,7 +895,7 @@ asset::SAssetBundle COBJMeshFileLoader::loadAsset(system::IFile* _file, const as int32_t idx[3] = { -1, -1, -1 }; if (!parseObjFaceVertexTokenFast(linePtr, lineEnd, idx, posCount, uvCount, normalCount)) return {}; - ++faceFastTokenCount; + ++faceFallbackTokenCount; uint32_t cornerIx = 0u; if (!acquireCornerIndex(idx, cornerIx)) diff --git a/src/nbl/system/ISystem.cpp b/src/nbl/system/ISystem.cpp index 42717bdb0b..6d103035dc 100644 --- a/src/nbl/system/ISystem.cpp +++ b/src/nbl/system/ISystem.cpp @@ -274,31 +274,69 @@ core::smart_refctd_ptr ISystem::openFileArchive(core::smart_refctd ISystem::FoundArchiveFile ISystem::findFileInArchive(const system::path& absolutePath) const { - const auto normalizedAbsolutePath = absolutePath.lexically_normal(); - system::path path = normalizedAbsolutePath.parent_path().lexically_normal(); - // going up the directory tree - while (!path.empty() && path.parent_path()!=path) + std::error_code fsEc; + const system::path normalizedAbsolutePath = absolutePath.lexically_normal(); + system::path normalizedAbsoluteFallback = {}; + bool hasAbsoluteFallback = false; + if (!normalizedAbsolutePath.is_absolute()) { - std::error_code fsEc; - const auto relative = std::filesystem::relative(normalizedAbsolutePath, path, fsEc); - if (fsEc) + const auto absoluteCandidate = std::filesystem::absolute(normalizedAbsolutePath, fsEc); + if (!fsEc) { - path = path.parent_path(); - continue; + normalizedAbsoluteFallback = absoluteCandidate.lexically_normal(); + hasAbsoluteFallback = true; } - const auto archives = m_cachedArchiveFiles.findRange(path); - for (auto& archive : archives) + } + + auto tryMatchAtPath = [&](const system::path& archivePath) -> FoundArchiveFile + { + auto tryMatchSingle = [&](const system::path& normalizedPath) -> FoundArchiveFile { - const auto items = static_cast(archive.second->listAssets()); + std::error_code relativeEc; + const auto relative = std::filesystem::relative(normalizedPath, archivePath, relativeEc); + if (relativeEc) + return { nullptr, {} }; + + const auto archives = m_cachedArchiveFiles.findRange(archivePath); + for (auto& archive : archives) + { + const auto items = static_cast(archive.second->listAssets()); + const IFileArchive::SFileList::SEntry itemToFind = { relative }; + auto found = std::lower_bound(items.begin(), items.end(), itemToFind); + if (found != items.end() && found->pathRelativeToArchive == relative) + return { archive.second.get(), relative }; + } + return { nullptr, {} }; + }; + + if (auto found = tryMatchSingle(normalizedAbsolutePath); found.archive) + return found; + if (hasAbsoluteFallback) + return tryMatchSingle(normalizedAbsoluteFallback); + return { nullptr, {} }; + }; - const IFileArchive::SFileList::SEntry itemToFind = { relative }; - auto found = std::lower_bound(items.begin(), items.end(), itemToFind); - if (found!=items.end() && found->pathRelativeToArchive==relative) - return {archive.second.get(),relative}; + system::path path = normalizedAbsolutePath.parent_path().lexically_normal(); + while (!path.empty() && path.parent_path() != path) + { + if (auto found = tryMatchAtPath(path); found.archive) + return found; + + fsEc.clear(); + if (std::filesystem::exists(path, fsEc) && !fsEc) + { + fsEc.clear(); + const auto canonicalPath = std::filesystem::canonical(path, fsEc); + if (!fsEc && canonicalPath != path) + { + if (auto found = tryMatchAtPath(canonicalPath); found.archive) + return found; + } } + path = path.parent_path(); } - return { nullptr,{} }; + return { nullptr, {} }; } From 06789811c1fc1213cfcdf3fa6eb24ba11c4aa358 Mon Sep 17 00:00:00 2001 From: Arkadiusz Lachowicz Date: Sat, 14 Feb 2026 13:41:26 +0100 Subject: [PATCH 038/118] Remove PLY file-backed buffer aliasing --- .../asset/interchange/CPLYMeshFileLoader.cpp | 66 ------------------- 1 file changed, 66 deletions(-) diff --git a/src/nbl/asset/interchange/CPLYMeshFileLoader.cpp b/src/nbl/asset/interchange/CPLYMeshFileLoader.cpp index 602de6161b..4c4ffb13a4 100644 --- a/src/nbl/asset/interchange/CPLYMeshFileLoader.cpp +++ b/src/nbl/asset/interchange/CPLYMeshFileLoader.cpp @@ -86,27 +86,6 @@ inline std::string_view plyToStringView(const char* text) return text ? std::string_view{ text } : std::string_view{}; } -class CPLYMappedFileMemoryResource final : public core::refctd_memory_resource -{ - public: - explicit CPLYMappedFileMemoryResource(core::smart_refctd_ptr&& file) : m_file(std::move(file)) - { - } - - inline void* allocate(std::size_t, std::size_t) override - { - assert(false); - return nullptr; - } - - inline void deallocate(void*, std::size_t, std::size_t) override - { - } - - private: - core::smart_refctd_ptr m_file; -}; - inline IGeometry::SDataView plyCreateDataView(core::smart_refctd_ptr&& buffer, const size_t byteCount, const uint32_t stride, const E_FORMAT format) { if (!buffer || byteCount == 0ull) @@ -140,16 +119,6 @@ IGeometry::SDataView plyCreateAdoptedView(core::vector&& return plyCreateDataView(std::move(buffer), byteCount, static_cast(sizeof(ValueType)), Format); } -IGeometry::SDataView plyCreateMappedF32x3View(system::IFile* file, void* ptr, const size_t byteCount) -{ - if (!file || !ptr || byteCount == 0ull) - return {}; - - auto keepAliveResource = core::make_smart_refctd_ptr(core::smart_refctd_ptr(file)); - auto buffer = ICPUBuffer::create({ { byteCount }, ptr, core::smart_refctd_ptr(keepAliveResource), alignof(float) }, core::adopt_memory); - return plyCreateDataView(std::move(buffer), byteCount, static_cast(sizeof(float) * 3ull), EF_R32G32B32_SFLOAT); -} - struct SContext { static constexpr uint64_t ReadWindowPaddingBytes = 1ull; @@ -253,7 +222,6 @@ struct SContext Buffer.resize(ioReadWindowSize + ReadWindowPaddingBytes, '\0'); EndPointer = StartPointer = Buffer.data(); LineEndPointer = EndPointer-1; - UsingMappedBinaryWindow = false; fillBuffer(); } @@ -408,7 +376,6 @@ struct SContext LineEndPointer = StartPointer - 1; WordLength = -1; EndOfFile = true; - UsingMappedBinaryWindow = true; fileOffset = inner.mainFile ? inner.mainFile->getSize() : fileOffset; } // skips x bytes in the file, getting more data if required @@ -1577,7 +1544,6 @@ struct SContext int32_t LineLength = 0; int32_t WordLength = -1; // this variable is a misnomer, its really the offset to next word minus one bool IsBinaryFile = false, IsWrongEndian = false, EndOfFile = false; - bool UsingMappedBinaryWindow = false; size_t fileOffset = {}; uint64_t readCallCount = 0ull; uint64_t readBytesTotal = 0ull; @@ -1865,38 +1831,6 @@ SAssetBundle CPLYMeshFileLoader::loadAsset(system::IFile* _file, const IAssetLoa _params.logger.log("Multiple `vertex` elements not supported!", system::ILogger::ELL_ERROR); return {}; } - const bool mappedXYZAliasCandidate = - ctx.IsBinaryFile && - (!ctx.IsWrongEndian) && - ctx.UsingMappedBinaryWindow && - el.Properties.size() == 3u && - el.Properties[0].type == EF_R32_SFLOAT && - el.Properties[1].type == EF_R32_SFLOAT && - el.Properties[2].type == EF_R32_SFLOAT && - el.Properties[0].Name == "x" && - el.Properties[1].Name == "y" && - el.Properties[2].Name == "z"; - if (mappedXYZAliasCandidate) - { - if (el.Count > (std::numeric_limits::max() / (sizeof(float) * 3ull))) - return {}; - const size_t mappedBytes = el.Count * sizeof(float) * 3ull; - if (ctx.StartPointer + mappedBytes > ctx.EndPointer) - return {}; - auto mappedPosView = plyCreateMappedF32x3View(_file, ctx.StartPointer, mappedBytes); - if (!mappedPosView) - return {}; - geometry->setPositionView(std::move(mappedPosView)); - const auto* xyz = reinterpret_cast(ctx.StartPointer); - for (size_t v = 0ull; v < el.Count; ++v) - extendAABBAccumulator(parsedAABB, xyz[v * 3ull + 0ull], xyz[v * 3ull + 1ull], xyz[v * 3ull + 2ull]); - hashViewBufferIfNeeded(geometry->getPositionView()); - tryLaunchDeferredHash(geometry->getPositionView(), deferredPositionHashThread); - ctx.StartPointer += mappedBytes; - ++fastVertexElementCount; - verticesProcessed = true; - continue; - } ICPUPolygonGeometry::SDataViewBase posView = {}, normalView = {}, uvView = {}; core::vector extraViews; for (auto& vertexProperty : el.Properties) From 2b8a34d6e478acfae4804b70d3ed6b5fc418af12 Mon Sep 17 00:00:00 2001 From: Arkadiusz Lachowicz Date: Sat, 14 Feb 2026 17:08:29 +0100 Subject: [PATCH 039/118] Fix mesh loader normal handling and PLY scalar typing --- .../asset/interchange/COBJMeshFileLoader.cpp | 172 ++++- src/nbl/asset/interchange/COBJMeshWriter.cpp | 2 +- src/nbl/asset/interchange/CPLYMeshWriter.cpp | 609 +++++++++++++----- 3 files changed, 618 insertions(+), 165 deletions(-) diff --git a/src/nbl/asset/interchange/COBJMeshFileLoader.cpp b/src/nbl/asset/interchange/COBJMeshFileLoader.cpp index f76c424407..5c62cf34b5 100644 --- a/src/nbl/asset/interchange/COBJMeshFileLoader.cpp +++ b/src/nbl/asset/interchange/COBJMeshFileLoader.cpp @@ -19,6 +19,7 @@ #include "COBJMeshFileLoader.h" #include +#include #include #include @@ -32,6 +33,7 @@ struct ObjVertexDedupNode { int32_t uv = -1; int32_t normal = -1; + uint32_t smoothingGroup = 0u; uint32_t outIndex = 0u; int32_t next = -1; }; @@ -220,6 +222,66 @@ inline bool parseUnsignedObjIndex(const char*& ptr, const char* const end, uint3 return true; } +inline char toObjLowerAscii(const char c) +{ + if (c >= 'A' && c <= 'Z') + return static_cast(c - 'A' + 'a'); + return c; +} + +inline void parseObjSmoothingGroup(const char* linePtr, const char* const lineEnd, uint32_t& outGroup) +{ + while (linePtr < lineEnd && isObjInlineWhitespace(*linePtr)) + ++linePtr; + + if (linePtr >= lineEnd) + { + outGroup = 0u; + return; + } + + const char* const tokenStart = linePtr; + while (linePtr < lineEnd && !isObjInlineWhitespace(*linePtr)) + ++linePtr; + const size_t tokenLength = static_cast(linePtr - tokenStart); + + if (tokenLength == 2u && + toObjLowerAscii(tokenStart[0]) == 'o' && + toObjLowerAscii(tokenStart[1]) == 'n') + { + outGroup = 1u; + return; + } + if (tokenLength == 3u && + toObjLowerAscii(tokenStart[0]) == 'o' && + toObjLowerAscii(tokenStart[1]) == 'f' && + toObjLowerAscii(tokenStart[2]) == 'f') + { + outGroup = 0u; + return; + } + + uint64_t value = 0ull; + bool sawDigit = false; + for (const char* it = tokenStart; it < linePtr; ++it) + { + if (!isObjDigit(*it)) + { + outGroup = 0u; + return; + } + sawDigit = true; + value = value * 10ull + static_cast(*it - '0'); + if (value > static_cast(std::numeric_limits::max())) + { + outGroup = 0u; + return; + } + } + + outGroup = sawDigit ? static_cast(value) : 0u; +} + inline bool parseObjTrianglePositiveTripletLine(const char* const lineStart, const char* const lineEnd, int32_t* idx0, int32_t* idx1, int32_t* idx2, const size_t posCount, const size_t uvCount, const size_t normalCount) { const char* ptr = lineStart; @@ -539,6 +601,7 @@ asset::SAssetBundle COBJMeshFileLoader::loadAsset(system::IFile* _file, const as core::vector outPositions; core::vector outNormals; + core::vector outNormalNeedsGeneration; core::vector outUVs; core::vector indices; core::vector dedupHeadByPos; @@ -553,6 +616,7 @@ asset::SAssetBundle COBJMeshFileLoader::loadAsset(system::IFile* _file, const as const size_t initialOutIndexCapacity = (estimatedOutIndexCount == std::numeric_limits::max()) ? 3ull : std::max(3ull, estimatedOutIndexCount); outPositions.resize(initialOutVertexCapacity); outNormals.resize(initialOutVertexCapacity); + outNormalNeedsGeneration.resize(initialOutVertexCapacity, 0u); outUVs.resize(initialOutVertexCapacity); indices.resize(initialOutIndexCapacity); dedupHeadByPos.reserve(estimatedAttributeCount); @@ -585,7 +649,8 @@ asset::SAssetBundle COBJMeshFileLoader::loadAsset(system::IFile* _file, const as core::vector dedupHotCache(dedupHotEntryCount); const size_t dedupHotMask = dedupHotEntryCount - 1ull; - bool hasNormals = false; + bool hasProvidedNormals = false; + bool needsNormalGeneration = false; bool hasUVs = false; SAABBAccumulator3 parsedAABB = {}; auto allocateOutVertex = [&](uint32_t& outIx) -> bool @@ -595,6 +660,7 @@ asset::SAssetBundle COBJMeshFileLoader::loadAsset(system::IFile* _file, const as const size_t newCapacity = std::max(outVertexWriteCount + 1ull, outPositions.size() * 2ull); outPositions.resize(newCapacity); outNormals.resize(newCapacity); + outNormalNeedsGeneration.resize(newCapacity, 0u); outUVs.resize(newCapacity); } if (outVertexWriteCount > static_cast(std::numeric_limits::max())) @@ -627,7 +693,7 @@ asset::SAssetBundle COBJMeshFileLoader::loadAsset(system::IFile* _file, const as return ix; }; - auto acquireCornerIndex = [&](const int32_t* idx, uint32_t& outIx)->bool + auto acquireCornerIndex = [&](const int32_t* idx, const uint32_t smoothingGroup, uint32_t& outIx)->bool { if (!idx) return false; @@ -635,6 +701,7 @@ asset::SAssetBundle COBJMeshFileLoader::loadAsset(system::IFile* _file, const as const int32_t posIx = idx[0]; if (posIx < 0 || static_cast(posIx) >= positions.size()) return false; + const uint32_t dedupSmoothingGroup = (idx[2] >= 0) ? 0u : smoothingGroup; if (static_cast(posIx) >= dedupHeadByPos.size()) dedupHeadByPos.resize(positions.size(), -1); @@ -642,7 +709,7 @@ asset::SAssetBundle COBJMeshFileLoader::loadAsset(system::IFile* _file, const as while (nodeIx >= 0) { const auto& node = dedupNodes[static_cast(nodeIx)]; - if (node.uv == idx[1] && node.normal == idx[2]) + if (node.uv == idx[1] && node.normal == idx[2] && node.smoothingGroup == dedupSmoothingGroup) { outIx = node.outIndex; return true; @@ -658,6 +725,7 @@ asset::SAssetBundle COBJMeshFileLoader::loadAsset(system::IFile* _file, const as auto& node = dedupNodes[static_cast(newNodeIx)]; node.uv = idx[1]; node.normal = idx[2]; + node.smoothingGroup = dedupSmoothingGroup; node.outIndex = outIx; node.next = dedupHeadByPos[posIx]; dedupHeadByPos[posIx] = newNodeIx; @@ -674,11 +742,17 @@ asset::SAssetBundle COBJMeshFileLoader::loadAsset(system::IFile* _file, const as } outUVs[static_cast(outIx)] = uv; - Float3 normal(0.f, 0.f, 1.f); + Float3 normal(0.f, 0.f, 0.f); if (idx[2] >= 0 && static_cast(idx[2]) < normals.size()) { normal = normals[idx[2]]; - hasNormals = true; + hasProvidedNormals = true; + outNormalNeedsGeneration[static_cast(outIx)] = 0u; + } + else + { + needsNormalGeneration = true; + outNormalNeedsGeneration[static_cast(outIx)] = 1u; } outNormals[static_cast(outIx)] = normal; return true; @@ -721,6 +795,7 @@ asset::SAssetBundle COBJMeshFileLoader::loadAsset(system::IFile* _file, const as auto& node = dedupNodes[static_cast(newNodeIx)]; node.uv = uvIx; node.normal = normalIx; + node.smoothingGroup = 0u; node.outIndex = outIx; node.next = dedupHeadByPos[static_cast(posIx)]; dedupHeadByPos[static_cast(posIx)] = newNodeIx; @@ -735,10 +810,12 @@ asset::SAssetBundle COBJMeshFileLoader::loadAsset(system::IFile* _file, const as hotEntry.normal = normalIx; hotEntry.outIndex = outIx; hasUVs = true; - hasNormals = true; + hasProvidedNormals = true; + outNormalNeedsGeneration[static_cast(outIx)] = 0u; return true; }; + uint32_t currentSmoothingGroup = 0u; while (bufPtr < bufEnd) { const char* const lineStart = bufPtr; @@ -805,6 +882,10 @@ asset::SAssetBundle COBJMeshFileLoader::loadAsset(system::IFile* _file, const as uvs.push_back(vec); } } + else if (*lineStart == 's' && (lineStart + 1) < lineEnd && isObjInlineWhitespace(lineStart[1])) + { + parseObjSmoothingGroup(lineStart + 2, lineEnd, currentSmoothingGroup); + } else if (*lineStart == 'f' && (lineStart + 1) < lineEnd && isObjInlineWhitespace(lineStart[1])) { if (positions.empty()) @@ -870,11 +951,11 @@ asset::SAssetBundle COBJMeshFileLoader::loadAsset(system::IFile* _file, const as uint32_t c0 = 0u; uint32_t c1 = 0u; uint32_t c2 = 0u; - if (!acquireCornerIndex(triIdx0, c0)) + if (!acquireCornerIndex(triIdx0, currentSmoothingGroup, c0)) return {}; - if (!acquireCornerIndex(triIdx1, c1)) + if (!acquireCornerIndex(triIdx1, currentSmoothingGroup, c1)) return {}; - if (!acquireCornerIndex(triIdx2, c2)) + if (!acquireCornerIndex(triIdx2, currentSmoothingGroup, c2)) return {}; faceFallbackTokenCount += 3u; if (!appendIndex(c2) || !appendIndex(c1) || !appendIndex(c0)) @@ -898,7 +979,7 @@ asset::SAssetBundle COBJMeshFileLoader::loadAsset(system::IFile* _file, const as ++faceFallbackTokenCount; uint32_t cornerIx = 0u; - if (!acquireCornerIndex(idx, cornerIx)) + if (!acquireCornerIndex(idx, currentSmoothingGroup, cornerIx)) return {}; if (cornerCount == 0u) @@ -936,9 +1017,79 @@ asset::SAssetBundle COBJMeshFileLoader::loadAsset(system::IFile* _file, const as outPositions.resize(outVertexWriteCount); outNormals.resize(outVertexWriteCount); + outNormalNeedsGeneration.resize(outVertexWriteCount); outUVs.resize(outVertexWriteCount); indices.resize(outIndexWriteCount); + if (needsNormalGeneration) + { + core::vector generatedNormals(outVertexWriteCount, Float3(0.f, 0.f, 0.f)); + const size_t triangleCount = indices.size() / 3ull; + for (size_t triIx = 0ull; triIx < triangleCount; ++triIx) + { + const uint32_t i0 = indices[triIx * 3ull + 0ull]; + const uint32_t i1 = indices[triIx * 3ull + 1ull]; + const uint32_t i2 = indices[triIx * 3ull + 2ull]; + if (i0 >= outVertexWriteCount || i1 >= outVertexWriteCount || i2 >= outVertexWriteCount) + continue; + + const auto& p0 = outPositions[static_cast(i0)]; + const auto& p1 = outPositions[static_cast(i1)]; + const auto& p2 = outPositions[static_cast(i2)]; + + const float e10x = p1.x - p0.x; + const float e10y = p1.y - p0.y; + const float e10z = p1.z - p0.z; + const float e20x = p2.x - p0.x; + const float e20y = p2.y - p0.y; + const float e20z = p2.z - p0.z; + + const Float3 faceNormal( + e10y * e20z - e10z * e20y, + e10z * e20x - e10x * e20z, + e10x * e20y - e10y * e20x); + + const float faceLenSq = faceNormal.x * faceNormal.x + faceNormal.y * faceNormal.y + faceNormal.z * faceNormal.z; + if (faceLenSq <= 1e-20f) + continue; + + auto accumulateIfNeeded = [&](const uint32_t vertexIx)->void + { + if (outNormalNeedsGeneration[static_cast(vertexIx)] == 0u) + return; + auto& dstNormal = generatedNormals[static_cast(vertexIx)]; + dstNormal.x += faceNormal.x; + dstNormal.y += faceNormal.y; + dstNormal.z += faceNormal.z; + }; + + accumulateIfNeeded(i0); + accumulateIfNeeded(i1); + accumulateIfNeeded(i2); + } + + for (size_t i = 0ull; i < outVertexWriteCount; ++i) + { + if (outNormalNeedsGeneration[i] == 0u) + continue; + + auto normal = generatedNormals[i]; + const float lenSq = normal.x * normal.x + normal.y * normal.y + normal.z * normal.z; + if (lenSq > 1e-20f) + { + const float invLen = 1.f / std::sqrt(lenSq); + normal.x *= invLen; + normal.y *= invLen; + normal.z *= invLen; + } + else + { + normal = Float3(0.f, 0.f, 1.f); + } + outNormals[i] = normal; + } + } + const size_t outVertexCount = outPositions.size(); const size_t outIndexCount = indices.size(); auto geometry = core::make_smart_refctd_ptr(); @@ -949,6 +1100,7 @@ asset::SAssetBundle COBJMeshFileLoader::loadAsset(system::IFile* _file, const as geometry->setPositionView(std::move(view)); } + const bool hasNormals = hasProvidedNormals || needsNormalGeneration; if (hasNormals) { auto view = createAdoptedView(std::move(outNormals), EF_R32G32B32_SFLOAT); diff --git a/src/nbl/asset/interchange/COBJMeshWriter.cpp b/src/nbl/asset/interchange/COBJMeshWriter.cpp index bef07fef50..1e864b685d 100644 --- a/src/nbl/asset/interchange/COBJMeshWriter.cpp +++ b/src/nbl/asset/interchange/COBJMeshWriter.cpp @@ -223,7 +223,7 @@ bool COBJMeshWriter::writeAsset(system::IFile* _file, const SAssetWriteParams& _ if (!view) continue; const auto channels = getFormatChannelCount(view.composed.format); - if (channels >= 2u) + if (channels == 2u) { uvView = &view; break; diff --git a/src/nbl/asset/interchange/CPLYMeshWriter.cpp b/src/nbl/asset/interchange/CPLYMeshWriter.cpp index e74d24a480..bedc0013df 100644 --- a/src/nbl/asset/interchange/CPLYMeshWriter.cpp +++ b/src/nbl/asset/interchange/CPLYMeshWriter.cpp @@ -12,6 +12,7 @@ #include "nbl/system/IFile.h" #include +#include #include #include #include @@ -51,79 +52,161 @@ namespace ply_writer_detail constexpr size_t ApproxPlyTextBytesPerVertex = 96ull; constexpr size_t ApproxPlyTextBytesPerFace = 32ull; -bool decodeVec4(const ICPUPolygonGeometry::SDataView& view, const size_t ix, hlsl::float64_t4& out) +enum class EPlyScalarType : uint8_t { - out = hlsl::float64_t4(0.0, 0.0, 0.0, 0.0); - return view.decodeElement(ix, out); -} + Int8, + UInt8, + Int16, + UInt16, + Int32, + UInt32, + Float32, + Float64 +}; + +struct SPlyScalarMeta +{ + const char* name = "float32"; + uint32_t byteSize = sizeof(float); + bool integer = false; + bool signedType = true; +}; -template -inline bool readVec3(const ICPUPolygonGeometry::SDataView& view, const hlsl::float32_t3* tightView, const size_t ix, ScalarType (&out)[3]) +SPlyScalarMeta getPlyScalarMeta(const EPlyScalarType type) { - if (tightView) + switch (type) { - out[0] = static_cast(tightView[ix].x); - out[1] = static_cast(tightView[ix].y); - out[2] = static_cast(tightView[ix].z); - return true; + case EPlyScalarType::Int8: return { "int8", sizeof(int8_t), true, true }; + case EPlyScalarType::UInt8: return { "uint8", sizeof(uint8_t), true, false }; + case EPlyScalarType::Int16: return { "int16", sizeof(int16_t), true, true }; + case EPlyScalarType::UInt16: return { "uint16", sizeof(uint16_t), true, false }; + case EPlyScalarType::Int32: return { "int32", sizeof(int32_t), true, true }; + case EPlyScalarType::UInt32: return { "uint32", sizeof(uint32_t), true, false }; + case EPlyScalarType::Float64: return { "float64", sizeof(double), false, true }; + default: return { "float32", sizeof(float), false, true }; } +} - hlsl::float64_t4 tmp = {}; - if (!decodeVec4(view, ix, tmp)) - return false; - out[0] = static_cast(tmp.x); - out[1] = static_cast(tmp.y); - out[2] = static_cast(tmp.z); - return true; +bool isPlyUnsupportedPackedFormat(const E_FORMAT format) +{ + switch (format) + { + case EF_A2R10G10B10_UINT_PACK32: + case EF_A2R10G10B10_SINT_PACK32: + case EF_A2R10G10B10_UNORM_PACK32: + case EF_A2R10G10B10_SNORM_PACK32: + case EF_A2R10G10B10_USCALED_PACK32: + case EF_A2R10G10B10_SSCALED_PACK32: + case EF_A2B10G10R10_UINT_PACK32: + case EF_A2B10G10R10_SINT_PACK32: + case EF_A2B10G10R10_UNORM_PACK32: + case EF_A2B10G10R10_SNORM_PACK32: + case EF_A2B10G10R10_USCALED_PACK32: + case EF_A2B10G10R10_SSCALED_PACK32: + case EF_B10G11R11_UFLOAT_PACK32: + case EF_E5B9G9R9_UFLOAT_PACK32: + return true; + default: + return false; + } } -template -inline bool readVec2(const ICPUPolygonGeometry::SDataView& view, const hlsl::float32_t2* tightView, const size_t ix, ScalarType (&out)[2]) +EPlyScalarType selectPlyScalarType(const E_FORMAT format) { - if (tightView) + if (format == EF_UNKNOWN || isPlyUnsupportedPackedFormat(format)) + return EPlyScalarType::Float32; + if (isNormalizedFormat(format) || isScaledFormat(format)) + return EPlyScalarType::Float32; + + const uint32_t channels = getFormatChannelCount(format); + if (channels == 0u) + return EPlyScalarType::Float32; + + const auto bytesPerPixel = getBytesPerPixel(format); + if (bytesPerPixel.getDenominator() != 1u) + return EPlyScalarType::Float32; + const uint32_t pixelBytes = bytesPerPixel.getNumerator(); + if (pixelBytes == 0u || (pixelBytes % channels) != 0u) + return EPlyScalarType::Float32; + const uint32_t bytesPerChannel = pixelBytes / channels; + + if (isIntegerFormat(format)) { - out[0] = static_cast(tightView[ix].x); - out[1] = static_cast(tightView[ix].y); - return true; + const bool signedType = isSignedFormat(format); + switch (bytesPerChannel) + { + case 1u: return signedType ? EPlyScalarType::Int8 : EPlyScalarType::UInt8; + case 2u: return signedType ? EPlyScalarType::Int16 : EPlyScalarType::UInt16; + case 4u: return signedType ? EPlyScalarType::Int32 : EPlyScalarType::UInt32; + default: return EPlyScalarType::Float64; + } } - hlsl::float64_t4 tmp = {}; - if (!decodeVec4(view, ix, tmp)) - return false; - out[0] = static_cast(tmp.x); - out[1] = static_cast(tmp.y); - return true; + if (isFloatingPointFormat(format)) + return bytesPerChannel >= 8u ? EPlyScalarType::Float64 : EPlyScalarType::Float32; + + return EPlyScalarType::Float32; } -struct SExtraAuxView +bool decodeVec4(const ICPUPolygonGeometry::SDataView& view, const size_t ix, hlsl::float64_t4& out) { - const ICPUPolygonGeometry::SDataView* view = nullptr; - uint32_t components = 0u; - uint32_t auxIndex = 0u; -}; + out = hlsl::float64_t4(0.0, 0.0, 0.0, 0.0); + if (!view.composed.isFormatted()) + return false; -template -inline bool emitExtraAuxValues(const core::vector& extraAuxViews, const size_t ix, EmitFn&& emit) -{ - hlsl::float64_t4 tmp = {}; - for (const auto& extra : extraAuxViews) + const void* src = view.getPointer(ix); + if (!src) + return false; + + const void* srcArr[4] = { src, nullptr, nullptr, nullptr }; + double tmp[4] = {}; + if (!decodePixels(view.composed.format, srcArr, tmp, 0u, 0u)) + return false; + + const uint32_t channels = std::min(4u, getFormatChannelCount(view.composed.format)); + if (isNormalizedFormat(view.composed.format)) { - if (!extra.view || !decodeVec4(*extra.view, ix, tmp)) - return false; - const ScalarType values[4] = { - static_cast(tmp.x), - static_cast(tmp.y), - static_cast(tmp.z), - static_cast(tmp.w) - }; - emit(values, extra.components); + const auto range = view.composed.getRange>(); + for (uint32_t i = 0u; i < channels; ++i) + (&out.x)[i] = tmp[i] * (range.maxVx[i] - range.minVx[i]) + range.minVx[i]; + } + else + { + for (uint32_t i = 0u; i < channels; ++i) + (&out.x)[i] = tmp[i]; } return true; } -void appendUInt(std::string& out, const uint32_t value) +bool decodeSigned4Raw(const ICPUPolygonGeometry::SDataView& view, const size_t ix, int64_t (&out)[4]) { - std::array buf = {}; + const void* src = view.getPointer(ix); + if (!src) + return false; + const void* srcArr[4] = { src, nullptr, nullptr, nullptr }; + return decodePixels(view.composed.format, srcArr, out, 0u, 0u); +} + +bool decodeUnsigned4Raw(const ICPUPolygonGeometry::SDataView& view, const size_t ix, uint64_t (&out)[4]) +{ + const void* src = view.getPointer(ix); + if (!src) + return false; + const void* srcArr[4] = { src, nullptr, nullptr, nullptr }; + return decodePixels(view.composed.format, srcArr, out, 0u, 0u); +} + +void appendUInt(std::string& out, const uint64_t value) +{ + std::array buf = {}; + const auto res = std::to_chars(buf.data(), buf.data() + buf.size(), value); + if (res.ec == std::errc()) + out.append(buf.data(), static_cast(res.ptr - buf.data())); +} + +void appendInt(std::string& out, const int64_t value) +{ + std::array buf = {}; const auto res = std::to_chars(buf.data(), buf.data() + buf.size(), value); if (res.ec == std::errc()) out.append(buf.data(), static_cast(res.ptr - buf.data())); @@ -155,8 +238,209 @@ void appendVec(std::string& out, const double* values, size_t count, bool flipVe } } -bool writeBinary(const ICPUPolygonGeometry* geom, const ICPUPolygonGeometry::SDataView* uvView, const core::vector& extraAuxViews, size_t extraAuxFloatCount, bool writeNormals, size_t vertexCount, const uint32_t* indices, size_t faceCount, uint8_t* dst, bool flipVectors); -bool writeText(const ICPUPolygonGeometry* geom, const ICPUPolygonGeometry::SDataView* uvView, const core::vector& extraAuxViews, bool writeNormals, size_t vertexCount, const uint32_t* indices, size_t faceCount, std::string& output, bool flipVectors); +inline bool writeTypedViewBinary(const ICPUPolygonGeometry::SDataView& view, const size_t ix, const uint32_t componentCount, const EPlyScalarType scalarType, const bool flipVectors, uint8_t*& dst) +{ + if (!dst) + return false; + + switch (scalarType) + { + case EPlyScalarType::Float64: + case EPlyScalarType::Float32: + { + hlsl::float64_t4 tmp = {}; + if (!decodeVec4(view, ix, tmp)) + return false; + for (uint32_t c = 0u; c < componentCount; ++c) + { + double value = (&tmp.x)[c]; + if (flipVectors && c == 0u) + value = -value; + if (scalarType == EPlyScalarType::Float64) + { + const double typed = value; + std::memcpy(dst, &typed, sizeof(typed)); + dst += sizeof(typed); + } + else + { + const float typed = static_cast(value); + std::memcpy(dst, &typed, sizeof(typed)); + dst += sizeof(typed); + } + } + return true; + } + case EPlyScalarType::Int8: + case EPlyScalarType::Int16: + case EPlyScalarType::Int32: + { + int64_t tmp[4] = {}; + if (!decodeSigned4Raw(view, ix, tmp)) + return false; + for (uint32_t c = 0u; c < componentCount; ++c) + { + int64_t value = tmp[c]; + if (flipVectors && c == 0u) + value = -value; + switch (scalarType) + { + case EPlyScalarType::Int8: + { + const int8_t typed = static_cast(value); + std::memcpy(dst, &typed, sizeof(typed)); + dst += sizeof(typed); + break; + } + case EPlyScalarType::Int16: + { + const int16_t typed = static_cast(value); + std::memcpy(dst, &typed, sizeof(typed)); + dst += sizeof(typed); + break; + } + default: + { + const int32_t typed = static_cast(value); + std::memcpy(dst, &typed, sizeof(typed)); + dst += sizeof(typed); + break; + } + } + } + return true; + } + case EPlyScalarType::UInt8: + case EPlyScalarType::UInt16: + case EPlyScalarType::UInt32: + { + uint64_t tmp[4] = {}; + if (!decodeUnsigned4Raw(view, ix, tmp)) + return false; + for (uint32_t c = 0u; c < componentCount; ++c) + { + uint64_t value = tmp[c]; + switch (scalarType) + { + case EPlyScalarType::UInt8: + { + const uint8_t typed = static_cast(value); + std::memcpy(dst, &typed, sizeof(typed)); + dst += sizeof(typed); + break; + } + case EPlyScalarType::UInt16: + { + const uint16_t typed = static_cast(value); + std::memcpy(dst, &typed, sizeof(typed)); + dst += sizeof(typed); + break; + } + default: + { + const uint32_t typed = static_cast(value); + std::memcpy(dst, &typed, sizeof(typed)); + dst += sizeof(typed); + break; + } + } + } + return true; + } + } + return false; +} + +inline bool writeTypedViewText(std::string& output, const ICPUPolygonGeometry::SDataView& view, const size_t ix, const uint32_t componentCount, const EPlyScalarType scalarType, const bool flipVectors) +{ + switch (scalarType) + { + case EPlyScalarType::Float64: + case EPlyScalarType::Float32: + { + hlsl::float64_t4 tmp = {}; + if (!decodeVec4(view, ix, tmp)) + return false; + for (uint32_t c = 0u; c < componentCount; ++c) + { + double value = (&tmp.x)[c]; + if (flipVectors && c == 0u) + value = -value; + appendFloatFixed6(output, value); + output.push_back(' '); + } + return true; + } + case EPlyScalarType::Int8: + case EPlyScalarType::Int16: + case EPlyScalarType::Int32: + { + int64_t tmp[4] = {}; + if (!decodeSigned4Raw(view, ix, tmp)) + return false; + for (uint32_t c = 0u; c < componentCount; ++c) + { + int64_t value = tmp[c]; + if (flipVectors && c == 0u) + value = -value; + appendInt(output, value); + output.push_back(' '); + } + return true; + } + case EPlyScalarType::UInt8: + case EPlyScalarType::UInt16: + case EPlyScalarType::UInt32: + { + uint64_t tmp[4] = {}; + if (!decodeUnsigned4Raw(view, ix, tmp)) + return false; + for (uint32_t c = 0u; c < componentCount; ++c) + { + appendUInt(output, tmp[c]); + output.push_back(' '); + } + return true; + } + } + return false; +} + +struct SExtraAuxView +{ + const ICPUPolygonGeometry::SDataView* view = nullptr; + uint32_t components = 0u; + uint32_t auxIndex = 0u; + EPlyScalarType scalarType = EPlyScalarType::Float32; +}; + +bool writeBinary( + const ICPUPolygonGeometry* geom, + const EPlyScalarType positionScalarType, + const ICPUPolygonGeometry::SDataView* uvView, + const EPlyScalarType uvScalarType, + const core::vector& extraAuxViews, + const bool writeNormals, + const EPlyScalarType normalScalarType, + const size_t vertexCount, + const uint32_t* indices, + const size_t faceCount, + const bool write16BitIndices, + uint8_t* dst, + const bool flipVectors); +bool writeText( + const ICPUPolygonGeometry* geom, + const EPlyScalarType positionScalarType, + const ICPUPolygonGeometry::SDataView* uvView, + const EPlyScalarType uvScalarType, + const core::vector& extraAuxViews, + const bool writeNormals, + const EPlyScalarType normalScalarType, + const size_t vertexCount, + const uint32_t* indices, + const size_t faceCount, + std::string& output, + const bool flipVectors); } // namespace ply_writer_detail @@ -192,7 +476,7 @@ bool CPLYMeshWriter::writeAsset(system::IFile* _file, const SAssetWriteParams& _ if (!view) continue; const auto channels = getFormatChannelCount(view.composed.format); - if (channels >= 2u) + if (channels == 2u) { uvView = &view; break; @@ -200,7 +484,6 @@ bool CPLYMeshWriter::writeAsset(system::IFile* _file, const SAssetWriteParams& _ } core::vector extraAuxViews; - size_t extraAuxFloatCount = 0ull; extraAuxViews.reserve(auxViews.size()); for (uint32_t auxIx = 0u; auxIx < static_cast(auxViews.size()); ++auxIx) { @@ -211,8 +494,7 @@ bool CPLYMeshWriter::writeAsset(system::IFile* _file, const SAssetWriteParams& _ if (channels == 0u) continue; const uint32_t components = std::min(4u, channels); - extraAuxViews.push_back({ &view, components, auxIx }); - extraAuxFloatCount += components; + extraAuxViews.push_back({ &view, components, auxIx, selectPlyScalarType(view.composed.format) }); } const size_t vertexCount = positionView.getElementCount(); @@ -281,6 +563,25 @@ bool CPLYMeshWriter::writeAsset(system::IFile* _file, const SAssetWriteParams& _ } const auto flags = _override->getAssetWritingFlags(ctx, geom, 0u); const bool binary = (flags & E_WRITER_FLAGS::EWF_BINARY) != 0u; + const bool flipVectors = !(flags & E_WRITER_FLAGS::EWF_MESH_IS_RIGHT_HANDED); + const bool write16BitIndices = vertexCount <= static_cast(std::numeric_limits::max()) + 1ull; + + EPlyScalarType positionScalarType = selectPlyScalarType(positionView.composed.format); + if (flipVectors && getPlyScalarMeta(positionScalarType).integer && !getPlyScalarMeta(positionScalarType).signedType) + positionScalarType = EPlyScalarType::Float32; + EPlyScalarType normalScalarType = selectPlyScalarType(normalView.composed.format); + if (flipVectors && getPlyScalarMeta(normalScalarType).integer && !getPlyScalarMeta(normalScalarType).signedType) + normalScalarType = EPlyScalarType::Float32; + const EPlyScalarType uvScalarType = uvView ? selectPlyScalarType(uvView->composed.format) : EPlyScalarType::Float32; + + const auto positionMeta = getPlyScalarMeta(positionScalarType); + const auto normalMeta = getPlyScalarMeta(normalScalarType); + const auto uvMeta = getPlyScalarMeta(uvScalarType); + + size_t extraAuxBytesPerVertex = 0ull; + for (const auto& extra : extraAuxViews) + extraAuxBytesPerVertex += static_cast(extra.components) * getPlyScalarMeta(extra.scalarType).byteSize; + std::string header = "ply\n"; header += binary ? "format binary_little_endian 1.0" : "format ascii 1.0"; header += "\ncomment Nabla "; @@ -290,28 +591,47 @@ bool CPLYMeshWriter::writeAsset(system::IFile* _file, const SAssetWriteParams& _ header += std::to_string(vertexCount); header += "\n"; - header += "property float x\n"; - header += "property float y\n"; - header += "property float z\n"; + header += "property "; + header += positionMeta.name; + header += " x\n"; + header += "property "; + header += positionMeta.name; + header += " y\n"; + header += "property "; + header += positionMeta.name; + header += " z\n"; if (writeNormals) { - header += "property float nx\n"; - header += "property float ny\n"; - header += "property float nz\n"; + header += "property "; + header += normalMeta.name; + header += " nx\n"; + header += "property "; + header += normalMeta.name; + header += " ny\n"; + header += "property "; + header += normalMeta.name; + header += " nz\n"; } if (uvView) { - header += "property float u\n"; - header += "property float v\n"; + header += "property "; + header += uvMeta.name; + header += " u\n"; + header += "property "; + header += uvMeta.name; + header += " v\n"; } for (const auto& extra : extraAuxViews) { + const auto extraMeta = getPlyScalarMeta(extra.scalarType); for (uint32_t component = 0u; component < extra.components; ++component) { - header += "property float aux"; + header += "property "; + header += extraMeta.name; + header += " aux"; header += std::to_string(extra.auxIndex); if (extra.components > 1u) { @@ -324,21 +644,24 @@ bool CPLYMeshWriter::writeAsset(system::IFile* _file, const SAssetWriteParams& _ header += "element face "; header += std::to_string(faceCount); - header += "\nproperty list uchar uint vertex_indices\n"; + header += write16BitIndices ? "\nproperty list uchar uint16 vertex_indices\n" : "\nproperty list uchar uint32 vertex_indices\n"; header += "end_header\n"; - const bool flipVectors = !(flags & E_WRITER_FLAGS::EWF_MESH_IS_RIGHT_HANDED); bool writeOk = false; size_t outputBytes = 0ull; if (binary) { - const size_t vertexStride = sizeof(float) * (3u + (writeNormals ? 3u : 0u) + (uvView ? 2u : 0u) + extraAuxFloatCount); - const size_t faceStride = sizeof(uint8_t) + sizeof(uint32_t) * 3u; + const size_t vertexStride = + static_cast(positionMeta.byteSize) * 3ull + + (writeNormals ? static_cast(normalMeta.byteSize) * 3ull : 0ull) + + (uvView ? static_cast(uvMeta.byteSize) * 2ull : 0ull) + + extraAuxBytesPerVertex; + const size_t faceStride = sizeof(uint8_t) + (write16BitIndices ? sizeof(uint16_t) : sizeof(uint32_t)) * 3u; const size_t bodySize = vertexCount * vertexStride + faceCount * faceStride; core::vector body; body.resize(bodySize); - if (!writeBinary(geom, uvView, extraAuxViews, extraAuxFloatCount, writeNormals, vertexCount, indices, faceCount, body.data(), flipVectors)) + if (!writeBinary(geom, positionScalarType, uvView, uvScalarType, extraAuxViews, writeNormals, normalScalarType, vertexCount, indices, faceCount, write16BitIndices, body.data(), flipVectors)) return false; const size_t outputSize = header.size() + body.size(); @@ -390,7 +713,7 @@ bool CPLYMeshWriter::writeAsset(system::IFile* _file, const SAssetWriteParams& _ std::string body; body.reserve(vertexCount * ApproxPlyTextBytesPerVertex + faceCount * ApproxPlyTextBytesPerFace); - if (!writeText(geom, uvView, extraAuxViews, writeNormals, vertexCount, indices, faceCount, body, flipVectors)) + if (!writeText(geom, positionScalarType, uvView, uvScalarType, extraAuxViews, writeNormals, normalScalarType, vertexCount, indices, faceCount, body, flipVectors)) return false; const size_t outputSize = header.size() + body.size(); @@ -440,80 +763,42 @@ bool CPLYMeshWriter::writeAsset(system::IFile* _file, const SAssetWriteParams& _ return writeOk; } -bool ply_writer_detail::writeBinary(const ICPUPolygonGeometry* geom, const ICPUPolygonGeometry::SDataView* uvView, const core::vector& extraAuxViews, size_t extraAuxFloatCount, bool writeNormals, size_t vertexCount, const uint32_t* indices, size_t faceCount, uint8_t* dst, bool flipVectors) +bool ply_writer_detail::writeBinary( + const ICPUPolygonGeometry* geom, + const EPlyScalarType positionScalarType, + const ICPUPolygonGeometry::SDataView* uvView, + const EPlyScalarType uvScalarType, + const core::vector& extraAuxViews, + const bool writeNormals, + const EPlyScalarType normalScalarType, + const size_t vertexCount, + const uint32_t* indices, + const size_t faceCount, + const bool write16BitIndices, + uint8_t* dst, + const bool flipVectors) { if (!dst) return false; - constexpr size_t Float3Bytes = sizeof(float) * 3ull; - constexpr size_t Float2Bytes = sizeof(float) * 2ull; const auto& positionView = geom->getPositionView(); const auto& normalView = geom->getNormalView(); - const hlsl::float32_t3* const tightPos = getTightFloat3View(positionView); - const hlsl::float32_t3* const tightNormal = writeNormals ? getTightFloat3View(normalView) : nullptr; - const bool hasUV = uvView != nullptr; - const hlsl::float32_t2* const tightUV = hasUV ? getTightFloat2View(*uvView) : nullptr; - const bool hasExtraAux = extraAuxFloatCount > 0ull; - if (tightPos && (!writeNormals || tightNormal) && (!hasUV || tightUV) && !hasExtraAux && !flipVectors) - { - for (size_t i = 0; i < vertexCount; ++i) - { - std::memcpy(dst, tightPos + i, Float3Bytes); - dst += Float3Bytes; - if (writeNormals) - { - std::memcpy(dst, tightNormal + i, Float3Bytes); - dst += Float3Bytes; - } - if (hasUV) - { - std::memcpy(dst, tightUV + i, Float2Bytes); - dst += Float2Bytes; - } - } - } - else + for (size_t i = 0; i < vertexCount; ++i) { - for (size_t i = 0; i < vertexCount; ++i) - { - float pos[3] = {}; - if (!readVec3(positionView, tightPos, i, pos)) - return false; - if (flipVectors) - pos[0] = -pos[0]; - std::memcpy(dst, pos, Float3Bytes); - dst += Float3Bytes; + if (!writeTypedViewBinary(positionView, i, 3u, positionScalarType, flipVectors, dst)) + return false; - if (writeNormals) - { - float normal[3] = {}; - if (!readVec3(normalView, tightNormal, i, normal)) - return false; - if (flipVectors) - normal[0] = -normal[0]; - std::memcpy(dst, normal, Float3Bytes); - dst += Float3Bytes; - } + if (writeNormals && !writeTypedViewBinary(normalView, i, 3u, normalScalarType, flipVectors, dst)) + return false; - if (hasUV) - { - float uv[2] = {}; - if (!readVec2(*uvView, tightUV, i, uv)) - return false; - std::memcpy(dst, uv, Float2Bytes); - dst += Float2Bytes; - } + if (uvView && !writeTypedViewBinary(*uvView, i, 2u, uvScalarType, false, dst)) + return false; - if (hasExtraAux) - { - if (!emitExtraAuxValues(extraAuxViews, i, [&](const float* values, const uint32_t components) - { - std::memcpy(dst, values, sizeof(float) * components); - dst += sizeof(float) * components; - })) - return false; - } + for (const auto& extra : extraAuxViews) + { + if (!extra.view || !writeTypedViewBinary(*extra.view, i, extra.components, extra.scalarType, false, dst)) + return false; } } @@ -523,49 +808,65 @@ bool ply_writer_detail::writeBinary(const ICPUPolygonGeometry* geom, const ICPUP *dst++ = listSize; const uint32_t* tri = indices + (i * 3u); - std::memcpy(dst, tri, sizeof(uint32_t) * 3u); - dst += sizeof(uint32_t) * 3u; + if (write16BitIndices) + { + const uint16_t tri16[3] = { + static_cast(tri[0]), + static_cast(tri[1]), + static_cast(tri[2]) + }; + std::memcpy(dst, tri16, sizeof(tri16)); + dst += sizeof(tri16); + } + else + { + std::memcpy(dst, tri, sizeof(uint32_t) * 3u); + dst += sizeof(uint32_t) * 3u; + } } return true; } -bool ply_writer_detail::writeText(const ICPUPolygonGeometry* geom, const ICPUPolygonGeometry::SDataView* uvView, const core::vector& extraAuxViews, bool writeNormals, size_t vertexCount, const uint32_t* indices, size_t faceCount, std::string& output, bool flipVectors) +bool ply_writer_detail::writeText( + const ICPUPolygonGeometry* geom, + const EPlyScalarType positionScalarType, + const ICPUPolygonGeometry::SDataView* uvView, + const EPlyScalarType uvScalarType, + const core::vector& extraAuxViews, + const bool writeNormals, + const EPlyScalarType normalScalarType, + const size_t vertexCount, + const uint32_t* indices, + const size_t faceCount, + std::string& output, + const bool flipVectors) { const auto& positionView = geom->getPositionView(); const auto& normalView = geom->getNormalView(); - const hlsl::float32_t3* const tightPos = getTightFloat3View(positionView); - const hlsl::float32_t3* const tightNormal = writeNormals ? getTightFloat3View(normalView) : nullptr; - const hlsl::float32_t2* const tightUV = uvView ? getTightFloat2View(*uvView) : nullptr; for (size_t i = 0; i < vertexCount; ++i) { - double pos[3] = {}; - if (!readVec3(positionView, tightPos, i, pos)) + if (!writeTypedViewText(output, positionView, i, 3u, positionScalarType, flipVectors)) return false; - appendVec(output, pos, 3u, flipVectors); if (writeNormals) { - double normal[3] = {}; - if (!readVec3(normalView, tightNormal, i, normal)) + if (!writeTypedViewText(output, normalView, i, 3u, normalScalarType, flipVectors)) return false; - appendVec(output, normal, 3u, flipVectors); } if (uvView) { - double uv[2] = {}; - if (!readVec2(*uvView, tightUV, i, uv)) + if (!writeTypedViewText(output, *uvView, i, 2u, uvScalarType, false)) return false; - appendVec(output, uv, 2u, false); } - if (!emitExtraAuxValues(extraAuxViews, i, [&](const double* values, const uint32_t components) + for (const auto& extra : extraAuxViews) { - appendVec(output, values, components, false); - })) - return false; + if (!extra.view || !writeTypedViewText(output, *extra.view, i, extra.components, extra.scalarType, false)) + return false; + } output += "\n"; } From 02d8913303609ef0c28f47adbdd6075f75fba8e2 Mon Sep 17 00:00:00 2001 From: Arkadiusz Lachowicz Date: Sat, 14 Feb 2026 18:00:41 +0100 Subject: [PATCH 040/118] Restore STL color feature using format conversion API --- .../asset/interchange/CSTLMeshFileLoader.cpp | 69 ++++++++- src/nbl/asset/interchange/CSTLMeshWriter.cpp | 136 +++++++++++++++++- 2 files changed, 195 insertions(+), 10 deletions(-) diff --git a/src/nbl/asset/interchange/CSTLMeshFileLoader.cpp b/src/nbl/asset/interchange/CSTLMeshFileLoader.cpp index 43e7c54b9d..383ee86b31 100644 --- a/src/nbl/asset/interchange/CSTLMeshFileLoader.cpp +++ b/src/nbl/asset/interchange/CSTLMeshFileLoader.cpp @@ -10,6 +10,7 @@ #include "nbl/asset/interchange/SGeometryContentHashCommon.h" #include "nbl/asset/interchange/SInterchangeIOCommon.h" #include "nbl/asset/interchange/SLoaderRuntimeTuning.h" +#include "nbl/asset/format/convertColor.h" #include "nbl/asset/utils/SGeometryAABBCommon.h" #include "nbl/asset/asset.h" #include "nbl/asset/metadata/CSTLMetadata.h" @@ -117,6 +118,14 @@ void stlPushTriangleReversed(const hlsl::float32_t3 (&p)[3], core::vector(src, &outColor, 0u, 0u); + return outColor; +} + class CStlSplitBlockMemoryResource final : public core::refctd_memory_resource { public: @@ -185,6 +194,33 @@ ICPUPolygonGeometry::SDataView stlCreateAdoptedFloat3View(core::vector&& values) +{ + if (values.empty()) + return {}; + + auto backer = core::make_smart_refctd_ptr>>(std::move(values)); + auto& payload = backer->getBacker(); + auto* const payloadPtr = payload.data(); + const size_t byteCount = payload.size() * sizeof(uint32_t); + auto buffer = ICPUBuffer::create({ { byteCount }, payloadPtr, core::smart_refctd_ptr(std::move(backer)), alignof(uint32_t) }, core::adopt_memory); + if (!buffer) + return {}; + + ICPUPolygonGeometry::SDataView view = {}; + view.composed = { + .stride = sizeof(uint32_t), + .format = EF_B8G8R8A8_UNORM, + .rangeFormat = IGeometryBase::getMatchingAABBFormat(EF_B8G8R8A8_UNORM) + }; + view.src = { + .offset = 0u, + .size = byteCount, + .buffer = std::move(buffer) + }; + return view; +} + CSTLMeshFileLoader::CSTLMeshFileLoader(asset::IAssetManager*) { } @@ -203,7 +239,7 @@ SAssetBundle CSTLMeshFileLoader::loadAsset(system::IFile* _file, const IAssetLoa uint64_t triangleCount = 0u; const char* parsePath = "unknown"; const bool computeContentHashes = (_params.loaderFlags & IAssetLoader::ELPF_DONT_COMPUTE_CONTENT_HASHES) == 0; - bool contentHashesAssigned = false; + bool hasTriangleColors = false; SSTLContext context = { asset::IAssetLoader::SAssetLoadContext{ _params,_file },0ull }; @@ -393,6 +429,8 @@ SAssetBundle CSTLMeshFileLoader::loadAsset(system::IFile* _file, const IAssetLoa const uint8_t* const end = cursor + dataSize; if (end < cursor || static_cast(end - cursor) < static_cast(triangleCount) * SSTLContext::TriangleRecordBytes) return {}; + core::vector faceColors(static_cast(triangleCount), 0u); + std::atomic_bool colorValidForAllFaces = true; const size_t hw = resolveLoaderHardwareThreads(); const size_t hardMaxWorkers = resolveLoaderHardMaxWorkers(hw, _params.ioPolicy.runtimeTuning.workerHeadroom); SLoaderRuntimeTuningRequest parseTuningRequest = {}; @@ -438,6 +476,12 @@ SAssetBundle CSTLMeshFileLoader::loadAsset(system::IFile* _file, const IAssetLoa localCursor += SSTLContext::TriangleRecordBytes; float triValues[SSTLContext::TriangleFloatCount]; std::memcpy(triValues, triRecord, sizeof(triValues)); + uint16_t packedColor = 0u; + std::memcpy(&packedColor, triRecord + SSTLContext::TriangleFloatBytes, sizeof(packedColor)); + if (packedColor & 0x8000u) + faceColors[static_cast(tri)] = stlDecodeViscamColorToB8G8R8A8(packedColor); + else + colorValidForAllFaces.store(false, std::memory_order_relaxed); float normalX = triValues[0ull]; float normalY = triValues[1ull]; @@ -641,7 +685,6 @@ SAssetBundle CSTLMeshFileLoader::loadAsset(system::IFile* _file, const IAssetLoa return {}; posView.src.buffer->setContentHash(parsedPositionHash); normalView.src.buffer->setContentHash(parsedNormalHash); - contentHashesAssigned = true; } if constexpr (ComputeAABBInParse) { @@ -655,6 +698,23 @@ SAssetBundle CSTLMeshFileLoader::loadAsset(system::IFile* _file, const IAssetLoa } geometry->setPositionView(std::move(posView)); geometry->setNormalView(std::move(normalView)); + if (colorValidForAllFaces.load(std::memory_order_relaxed)) + { + core::vector vertexColors(vertexCountSizeT); + for (size_t triIx = 0ull; triIx < static_cast(triangleCount); ++triIx) + { + const uint32_t triColor = faceColors[triIx]; + const size_t baseIx = triIx * SSTLContext::VerticesPerTriangle; + vertexColors[baseIx + 0ull] = triColor; + vertexColors[baseIx + 1ull] = triColor; + vertexColors[baseIx + 2ull] = triColor; + } + auto colorView = stlCreateAdoptedColorView(std::move(vertexColors)); + if (!colorView) + return {}; + geometry->getAuxAttributeViews()->push_back(std::move(colorView)); + hasTriangleColors = true; + } } else { @@ -738,7 +798,7 @@ SAssetBundle CSTLMeshFileLoader::loadAsset(system::IFile* _file, const IAssetLoa if (vertexCount == 0ull) return {}; - if (computeContentHashes && !contentHashesAssigned) + if (computeContentHashes) { computeMissingGeometryContentHashesParallel(geometry.get(), _params.ioPolicy); } @@ -762,13 +822,14 @@ SAssetBundle CSTLMeshFileLoader::loadAsset(system::IFile* _file, const IAssetLoa static_cast(ioAvgRead)); } _params.logger.log( - "STL loader stats: file=%s binary=%d parse_path=%s triangles=%llu vertices=%llu io_reads=%llu io_min_read=%llu io_avg_read=%llu io_req=%s io_eff=%s io_chunk=%llu io_reason=%s", + "STL loader stats: file=%s binary=%d parse_path=%s triangles=%llu vertices=%llu colors=%d io_reads=%llu io_min_read=%llu io_avg_read=%llu io_req=%s io_eff=%s io_chunk=%llu io_reason=%s", system::ILogger::ELL_PERFORMANCE, _file->getFileName().string().c_str(), binary ? 1 : 0, parsePath, static_cast(triangleCount), static_cast(vertexCount), + hasTriangleColors ? 1 : 0, static_cast(context.ioTelemetry.callCount), static_cast(ioMinRead), static_cast(ioAvgRead), diff --git a/src/nbl/asset/interchange/CSTLMeshWriter.cpp b/src/nbl/asset/interchange/CSTLMeshWriter.cpp index fc4ced51fe..6c12ebb302 100644 --- a/src/nbl/asset/interchange/CSTLMeshWriter.cpp +++ b/src/nbl/asset/interchange/CSTLMeshWriter.cpp @@ -5,6 +5,7 @@ #include "nbl/system/IFile.h" #include "CSTLMeshWriter.h" +#include "nbl/asset/format/convertColor.h" #include "nbl/asset/interchange/SGeometryWriterCommon.h" #include "nbl/asset/interchange/SInterchangeIOCommon.h" @@ -57,6 +58,11 @@ bool writeBytes(SContext* context, const void* data, size_t size); bool decodeTriangleIndices(const ICPUPolygonGeometry* geom, const ICPUPolygonGeometry::SDataView& posView, core::vector& indexData, const uint32_t*& outIndices, uint32_t& outFaceCount); bool decodeTriangle(const ICPUPolygonGeometry* geom, const IPolygonGeometryBase::IIndexingCallback* indexing, const ICPUPolygonGeometry::SDataView& posView, uint32_t primIx, core::vectorSIMDf& out0, core::vectorSIMDf& out1, core::vectorSIMDf& out2, uint32_t* outIdx); bool decodeTriangleNormal(const ICPUPolygonGeometry::SDataView& normalView, const uint32_t* idx, core::vectorSIMDf& outNormal); +double stlNormalizeColorComponentToUnit(double value); +uint16_t stlPackViscamColorFromB8G8R8A8(uint32_t color); +const ICPUPolygonGeometry::SDataView* stlFindColorView(const ICPUPolygonGeometry* geom, size_t vertexCount); +bool stlDecodeColorB8G8R8A8(const ICPUPolygonGeometry::SDataView& colorView, const uint32_t ix, uint32_t& outColor); +void stlDecodeColorUnitRGBAFromB8G8R8A8(uint32_t color, double (&out)[4]); bool writeMeshBinary(const asset::ICPUPolygonGeometry* geom, SContext* context); bool writeMeshASCII(const asset::ICPUPolygonGeometry* geom, SContext* context); bool writeFaceText( @@ -395,6 +401,76 @@ bool decodeTriangleNormal(const ICPUPolygonGeometry::SDataView& normalView, cons return true; } +double stlNormalizeColorComponentToUnit(double value) +{ + if (!std::isfinite(value)) + return 0.0; + if (value > 1.0) + value /= 255.0; + return std::clamp(value, 0.0, 1.0); +} + +uint16_t stlPackViscamColorFromB8G8R8A8(const uint32_t color) +{ + const void* src[4] = { &color, nullptr, nullptr, nullptr }; + uint16_t packed = 0u; + convertColor(src, &packed, 0u, 0u); + packed |= 0x8000u; + return packed; +} + +const ICPUPolygonGeometry::SDataView* stlFindColorView(const ICPUPolygonGeometry* geom, const size_t vertexCount) +{ + if (!geom) + return nullptr; + + const auto& auxViews = geom->getAuxAttributeViews(); + const ICPUPolygonGeometry::SDataView* fallback = nullptr; + for (const auto& view : auxViews) + { + if (!view || view.getElementCount() != vertexCount) + continue; + const uint32_t channels = getFormatChannelCount(view.composed.format); + if (channels < 3u) + continue; + if (view.composed.format == EF_B8G8R8A8_UNORM) + return &view; + if (!fallback) + fallback = &view; + } + return fallback; +} + +bool stlDecodeColorB8G8R8A8(const ICPUPolygonGeometry::SDataView& colorView, const uint32_t ix, uint32_t& outColor) +{ + if (colorView.composed.format == EF_B8G8R8A8_UNORM && colorView.composed.getStride() == sizeof(uint32_t)) + { + const auto* const ptr = reinterpret_cast(colorView.getPointer()); + if (!ptr) + return false; + std::memcpy(&outColor, ptr + static_cast(ix) * sizeof(uint32_t), sizeof(outColor)); + return true; + } + + hlsl::float64_t4 decoded = {}; + if (!colorView.decodeElement(ix, decoded)) + return false; + const double rgbaUnit[4] = { + stlNormalizeColorComponentToUnit(decoded.x), + stlNormalizeColorComponentToUnit(decoded.y), + stlNormalizeColorComponentToUnit(decoded.z), + stlNormalizeColorComponentToUnit(decoded.w) + }; + encodePixels(&outColor, rgbaUnit); + return true; +} + +void stlDecodeColorUnitRGBAFromB8G8R8A8(const uint32_t color, double (&out)[4]) +{ + const void* src[4] = { &color, nullptr, nullptr, nullptr }; + decodePixels(src, out, 0u, 0u); +} + bool writeMeshBinary(const asset::ICPUPolygonGeometry* geom, SContext* context) { if (!geom || !context || !context->writeContext.outputFile) @@ -429,6 +505,7 @@ bool writeMeshBinary(const asset::ICPUPolygonGeometry* geom, SContext* context) const auto& normalView = geom->getNormalView(); const bool hasNormals = static_cast(normalView); + const auto* const colorView = stlFindColorView(geom, vertexCount); const hlsl::float32_t3* const tightPositions = getTightFloat3View(posView); const hlsl::float32_t3* const tightNormals = hasNormals ? getTightFloat3View(normalView) : nullptr; const float handednessSign = flipHandedness ? -1.f : 1.f; @@ -454,7 +531,36 @@ bool writeMeshBinary(const asset::ICPUPolygonGeometry* geom, SContext* context) } return normalView.decodeElement(ix, out); }; - auto writeRecord = [&dst](const float nx, const float ny, const float nz, const float v1x, const float v1y, const float v1z, const float v2x, const float v2y, const float v2z, const float v3x, const float v3y, const float v3z)->void + auto computeFaceColor = [&](const uint32_t i0, const uint32_t i1, const uint32_t i2, uint16_t& outColor)->bool + { + outColor = 0u; + if (!colorView) + return true; + uint32_t c0 = 0u, c1 = 0u, c2 = 0u; + if (!stlDecodeColorB8G8R8A8(*colorView, i0, c0)) + return false; + if (!stlDecodeColorB8G8R8A8(*colorView, i1, c1)) + return false; + if (!stlDecodeColorB8G8R8A8(*colorView, i2, c2)) + return false; + double rgba0[4] = {}; + double rgba1[4] = {}; + double rgba2[4] = {}; + stlDecodeColorUnitRGBAFromB8G8R8A8(c0, rgba0); + stlDecodeColorUnitRGBAFromB8G8R8A8(c1, rgba1); + stlDecodeColorUnitRGBAFromB8G8R8A8(c2, rgba2); + const double rgbaAvg[4] = { + (rgba0[0] + rgba1[0] + rgba2[0]) / 3.0, + (rgba0[1] + rgba1[1] + rgba2[1]) / 3.0, + (rgba0[2] + rgba1[2] + rgba2[2]) / 3.0, + 1.0 + }; + uint32_t avgColor = 0u; + encodePixels(&avgColor, rgbaAvg); + outColor = stlPackViscamColorFromB8G8R8A8(avgColor); + return true; + }; + auto writeRecord = [&dst](const float nx, const float ny, const float nz, const float v1x, const float v1y, const float v1z, const float v2x, const float v2y, const float v2z, const float v3x, const float v3y, const float v3z, const uint16_t attribute)->void { const float payload[stl_writer_detail::BinaryTriangleFloatCount] = { nx, ny, nz, @@ -464,7 +570,6 @@ bool writeMeshBinary(const asset::ICPUPolygonGeometry* geom, SContext* context) }; std::memcpy(dst, payload, stl_writer_detail::BinaryTriangleFloatBytes); dst += stl_writer_detail::BinaryTriangleFloatBytes; - const uint16_t attribute = 0u; std::memcpy(dst, &attribute, stl_writer_detail::BinaryTriangleAttributeBytes); dst += stl_writer_detail::BinaryTriangleAttributeBytes; }; @@ -490,6 +595,10 @@ bool writeMeshBinary(const asset::ICPUPolygonGeometry* geom, SContext* context) { for (uint32_t primIx = 0u; primIx < facenum; ++primIx, posTri += 3u, nrmTri += 3u) { + uint16_t faceColor = 0u; + if (!computeFaceColor(primIx * 3u + 0u, primIx * 3u + 1u, primIx * 3u + 2u, faceColor)) + return false; + const hlsl::float32_t3 vertex1 = posTri[2u]; const hlsl::float32_t3 vertex2 = posTri[1u]; const hlsl::float32_t3 vertex3 = posTri[0u]; @@ -505,13 +614,18 @@ bool writeMeshBinary(const asset::ICPUPolygonGeometry* geom, SContext* context) attrNormal.x, attrNormal.y, attrNormal.z, vertex1x, vertex1.y, vertex1.z, vertex2x, vertex2.y, vertex2.z, - vertex3x, vertex3.y, vertex3.z); + vertex3x, vertex3.y, vertex3.z, + faceColor); } } else { for (uint32_t primIx = 0u; primIx < facenum; ++primIx, posTri += 3u, nrmTri += 3u) { + uint16_t faceColor = 0u; + if (!computeFaceColor(primIx * 3u + 0u, primIx * 3u + 1u, primIx * 3u + 2u, faceColor)) + return false; + const hlsl::float32_t3 vertex1 = posTri[2u]; const hlsl::float32_t3 vertex2 = posTri[1u]; const hlsl::float32_t3 vertex3 = posTri[0u]; @@ -562,7 +676,8 @@ bool writeMeshBinary(const asset::ICPUPolygonGeometry* geom, SContext* context) normalX, normalY, normalZ, vertex1x, vertex1.y, vertex1.z, vertex2x, vertex2.y, vertex2.z, - vertex3x, vertex3.y, vertex3.z); + vertex3x, vertex3.y, vertex3.z, + faceColor); } } } @@ -571,6 +686,10 @@ bool writeMeshBinary(const asset::ICPUPolygonGeometry* geom, SContext* context) const hlsl::float32_t3* posTri = tightPositions; for (uint32_t primIx = 0u; primIx < facenum; ++primIx, posTri += 3u) { + uint16_t faceColor = 0u; + if (!computeFaceColor(primIx * 3u + 0u, primIx * 3u + 1u, primIx * 3u + 2u, faceColor)) + return false; + const hlsl::float32_t3 vertex1 = posTri[2u]; const hlsl::float32_t3 vertex2 = posTri[1u]; const hlsl::float32_t3 vertex3 = posTri[0u]; @@ -601,7 +720,8 @@ bool writeMeshBinary(const asset::ICPUPolygonGeometry* geom, SContext* context) normalX, normalY, normalZ, vertex1x, vertex1.y, vertex1.z, vertex2x, vertex2.y, vertex2.z, - vertex3x, vertex3.y, vertex3.z); + vertex3x, vertex3.y, vertex3.z, + faceColor); } } else @@ -613,6 +733,9 @@ bool writeMeshBinary(const asset::ICPUPolygonGeometry* geom, SContext* context) const uint32_t i2 = indices ? indices[primIx * 3u + 2u] : (primIx * 3u + 2u); if (i0 >= vertexCount || i1 >= vertexCount || i2 >= vertexCount) return false; + uint16_t faceColor = 0u; + if (!computeFaceColor(i0, i1, i2, faceColor)) + return false; hlsl::float32_t3 p0 = {}; hlsl::float32_t3 p1 = {}; @@ -680,7 +803,8 @@ bool writeMeshBinary(const asset::ICPUPolygonGeometry* geom, SContext* context) normal.x, normal.y, normal.z, vertex1.x, vertex1.y, vertex1.z, vertex2.x, vertex2.y, vertex2.z, - vertex3.x, vertex3.y, vertex3.z); + vertex3.x, vertex3.y, vertex3.z, + faceColor); } } From be6d48b5a4aeea23a788b7bf5d16baa2e13ebf1f Mon Sep 17 00:00:00 2001 From: Arkadiusz Lachowicz Date: Thu, 19 Feb 2026 13:58:52 +0100 Subject: [PATCH 041/118] Address PR 1000 review comments --- examples_tests | 2 +- include/nbl/asset/IAssetManager.h | 26 +- include/nbl/asset/ICPUPolygonGeometry.h | 37 +- .../nbl/asset/interchange/COBJMeshWriter.h | 4 +- include/nbl/asset/interchange/IAssetWriter.h | 11 +- include/nbl/asset/interchange/SFileIOPolicy.h | 221 ++++--- .../interchange/SGeometryContentHashCommon.h | 143 +---- .../asset/interchange/SGeometryWriterCommon.h | 76 +-- .../asset/interchange/SInterchangeIOCommon.h | 286 ++++----- .../asset/utils/CPolygonGeometryManipulator.h | 36 +- include/nbl/asset/utils/SGeometryAABBCommon.h | 84 ++- .../builtin/hlsl/math/linalg/transform.hlsl | 13 +- .../builtin/hlsl/shapes/AABBAccumulator.hlsl | 64 +++ include/nbl/system/ISystem.h | 3 +- src/nbl/asset/interchange/CGLIWriter.h | 4 +- src/nbl/asset/interchange/CGLTFWriter.h | 4 +- src/nbl/asset/interchange/CImageWriterJPG.cpp | 8 +- src/nbl/asset/interchange/CImageWriterJPG.h | 4 +- .../asset/interchange/CImageWriterOpenEXR.h | 4 +- src/nbl/asset/interchange/CImageWriterPNG.h | 6 +- src/nbl/asset/interchange/CImageWriterTGA.h | 4 +- .../asset/interchange/COBJMeshFileLoader.cpp | 543 ++++++++++++------ src/nbl/asset/interchange/COBJMeshWriter.cpp | 37 +- .../asset/interchange/CPLYMeshFileLoader.cpp | 19 +- src/nbl/asset/interchange/CPLYMeshWriter.cpp | 32 +- src/nbl/asset/interchange/CPLYMeshWriter.h | 4 +- .../asset/interchange/CSTLMeshFileLoader.cpp | 33 +- src/nbl/asset/interchange/CSTLMeshWriter.cpp | 39 +- src/nbl/asset/interchange/CSTLMeshWriter.h | 4 +- .../utils/CPolygonGeometryManipulator.cpp | 157 ++--- src/nbl/system/CSystemWin32.cpp | 2 + src/nbl/video/utilities/CAssetConverter.cpp | 4 +- 32 files changed, 1127 insertions(+), 787 deletions(-) create mode 100644 include/nbl/builtin/hlsl/shapes/AABBAccumulator.hlsl diff --git a/examples_tests b/examples_tests index 294a21a2a6..d8227dbab6 160000 --- a/examples_tests +++ b/examples_tests @@ -1 +1 @@ -Subproject commit 294a21a2a661566c6548b0ce7bb93c05edd885a6 +Subproject commit d8227dbab6ceafa3462c21041134070c27e97bfa diff --git a/include/nbl/asset/IAssetManager.h b/include/nbl/asset/IAssetManager.h index c320dcb7c9..edc83edd49 100644 --- a/include/nbl/asset/IAssetManager.h +++ b/include/nbl/asset/IAssetManager.h @@ -181,27 +181,29 @@ class NBL_API2 IAssetManager : public core::IReferenceCounted { IAssetLoader::SAssetLoadContext ctx(_params, nullptr); system::ISystem::future_t> future; + const auto tryLoadAssetFromPath = [&](const system::path& path)->SAssetBundle + { + m_system->createFile(future, path, static_cast(system::IFile::ECF_READ | system::IFile::ECF_MAPPABLE)); + if (auto file=future.acquire()) + return getAssetInHierarchy_impl(file->get(), path.string(), ctx.params, _hierarchyLevel, _override); + m_system->createFile(future, path, system::IFile::ECF_READ); + if (auto file=future.acquire()) + return getAssetInHierarchy_impl(file->get(), path.string(), ctx.params, _hierarchyLevel, _override); + return SAssetBundle(0); + }; system::path filePath = _filePath; _override->getLoadFilename(filePath, m_system.get(), ctx, _hierarchyLevel); - m_system->createFile(future, filePath, static_cast(system::IFile::ECF_READ | system::IFile::ECF_MAPPABLE)); - if (auto file=future.acquire()) - return getAssetInHierarchy_impl(file->get(), filePath.string(), ctx.params, _hierarchyLevel, _override); - m_system->createFile(future, filePath, system::IFile::ECF_READ); - if (auto file=future.acquire()) - return getAssetInHierarchy_impl(file->get(), filePath.string(), ctx.params, _hierarchyLevel, _override); + if (auto bundle=tryLoadAssetFromPath(filePath); !bundle.getContents().empty()) + return bundle; auto fallbackPath = _params.workingDirectory / filePath; if (fallbackPath != filePath) { filePath = std::move(fallbackPath); _override->getLoadFilename(filePath, m_system.get(), ctx, _hierarchyLevel); - m_system->createFile(future, filePath, static_cast(system::IFile::ECF_READ | system::IFile::ECF_MAPPABLE)); - if (auto file=future.acquire()) - return getAssetInHierarchy_impl(file->get(), filePath.string(), ctx.params, _hierarchyLevel, _override); - m_system->createFile(future, filePath, system::IFile::ECF_READ); - if (auto file=future.acquire()) - return getAssetInHierarchy_impl(file->get(), filePath.string(), ctx.params, _hierarchyLevel, _override); + if (auto bundle=tryLoadAssetFromPath(filePath); !bundle.getContents().empty()) + return bundle; } return SAssetBundle(0); } diff --git a/include/nbl/asset/ICPUPolygonGeometry.h b/include/nbl/asset/ICPUPolygonGeometry.h index 2fb640e02b..b8d97a54a3 100644 --- a/include/nbl/asset/ICPUPolygonGeometry.h +++ b/include/nbl/asset/ICPUPolygonGeometry.h @@ -113,6 +113,41 @@ class NBL_API2 ICPUPolygonGeometry final : public IPolygonGeometry } template inline bool setAABB(const hlsl::shapes::AABB<3,Scalar>& aabb) {return visitAABB([&aabb](auto&& ref)->void{ref=aabb;});} + template + inline bool applyAABB(const hlsl::shapes::AABB<3, Scalar>& aabb) + { + if ( + aabb.minVx.x > aabb.maxVx.x || + aabb.minVx.y > aabb.maxVx.y || + aabb.minVx.z > aabb.maxVx.z) + return false; + return visitAABB([&aabb](auto&& ref)->void + { + if constexpr (requires { ref.minVx.x; ref.minVx.y; ref.minVx.z; ref.maxVx.x; ref.maxVx.y; ref.maxVx.z; }) + { + ref.minVx.x = static_cast(aabb.minVx.x); + ref.minVx.y = static_cast(aabb.minVx.y); + ref.minVx.z = static_cast(aabb.minVx.z); + ref.maxVx.x = static_cast(aabb.maxVx.x); + ref.maxVx.y = static_cast(aabb.maxVx.y); + ref.maxVx.z = static_cast(aabb.maxVx.z); + if constexpr (requires { ref.minVx.w; ref.maxVx.w; }) + { + ref.minVx.w = 0; + ref.maxVx.w = 0; + } + } + else + { + ref.minVx[0] = static_cast(aabb.minVx[0]); + ref.minVx[1] = static_cast(aabb.minVx[1]); + ref.minVx[2] = static_cast(aabb.minVx[2]); + ref.maxVx[0] = static_cast(aabb.maxVx[0]); + ref.maxVx[1] = static_cast(aabb.maxVx[1]); + ref.maxVx[2] = static_cast(aabb.maxVx[2]); + } + }); + } // inline bool setJointCount(const uint32_t count) @@ -194,4 +229,4 @@ class NBL_API2 ICPUPolygonGeometry final : public IPolygonGeometry }; } -#endif \ No newline at end of file +#endif diff --git a/include/nbl/asset/interchange/COBJMeshWriter.h b/include/nbl/asset/interchange/COBJMeshWriter.h index fed9898659..d17cbbcf69 100644 --- a/include/nbl/asset/interchange/COBJMeshWriter.h +++ b/include/nbl/asset/interchange/COBJMeshWriter.h @@ -19,9 +19,9 @@ class COBJMeshWriter : public IGeometryWriter const char** getAssociatedFileExtensions() const override; - uint32_t getSupportedFlags() override; + writer_flags_t getSupportedFlags() override; - uint32_t getForcedFlags() override; + writer_flags_t getForcedFlags() override; bool writeAsset(system::IFile* _file, const SAssetWriteParams& _params, IAssetWriterOverride* _override = nullptr) override; }; diff --git a/include/nbl/asset/interchange/IAssetWriter.h b/include/nbl/asset/interchange/IAssetWriter.h index 46055c08a1..4a4734fedb 100644 --- a/include/nbl/asset/interchange/IAssetWriter.h +++ b/include/nbl/asset/interchange/IAssetWriter.h @@ -40,6 +40,7 @@ enum E_WRITER_FLAGS : uint32_t //!< specifies the incoming orientation of loaded mesh we want to write. Flipping will be performed if needed in dependency of format extension orientation EWF_MESH_IS_RIGHT_HANDED = 1u << 3u }; +using writer_flags_t = core::bitflag; //! A class that defines rules during Asset-writing (saving) process /** @@ -86,7 +87,7 @@ class IAssetWriter : public virtual core::IReferenceCounted */ struct SAssetWriteParams { - SAssetWriteParams(IAsset* _asset, const E_WRITER_FLAGS& _flags = EWF_NONE, const float& _compressionLevel = 0.f, const size_t& _encryptionKeyLen = 0, const uint8_t* _encryptionKey = nullptr, const void* _userData = nullptr, const system::logger_opt_ptr _logger = nullptr, system::path cwd = "", const SFileIOPolicy& _ioPolicy = {}) : + SAssetWriteParams(IAsset* _asset, const writer_flags_t _flags = EWF_NONE, const float& _compressionLevel = 0.f, const size_t& _encryptionKeyLen = 0, const uint8_t* _encryptionKey = nullptr, const void* _userData = nullptr, const system::logger_opt_ptr _logger = nullptr, system::path cwd = "", const SFileIOPolicy& _ioPolicy = {}) : rootAsset(_asset), flags(_flags), compressionLevel(_compressionLevel), encryptionKeyLen(_encryptionKeyLen), encryptionKey(_encryptionKey), userData(_userData), logger(_logger), workingDirectory(cwd), ioPolicy(_ioPolicy) @@ -94,7 +95,7 @@ class IAssetWriter : public virtual core::IReferenceCounted } const IAsset* rootAsset; //!< An Asset on which entire writing process is based. - E_WRITER_FLAGS flags; //!< Flags set by user that defines rules during writing process. + writer_flags_t flags; //!< Flags set by user that defines rules during writing process. float compressionLevel; //!< The more compression level, the more expensive (slower) compression algorithm is launched. size_t encryptionKeyLen; //!< Stores a size of data in encryptionKey pointer for correct iteration. const uint8_t* encryptionKey; //!< Stores an encryption key used for encryption process. @@ -132,10 +133,10 @@ class IAssetWriter : public virtual core::IReferenceCounted virtual uint64_t getSupportedAssetTypesBitfield() const { return 0; } //! Returns which flags are supported for writing modes - virtual uint32_t getSupportedFlags() = 0; + virtual writer_flags_t getSupportedFlags() = 0; //! Returns which flags are forced for writing modes, i.e. a writer can only support binary - virtual uint32_t getForcedFlags() = 0; + virtual writer_flags_t getForcedFlags() = 0; //! Override class to facilitate changing how assets are written, especially the sub-assets /* @@ -148,7 +149,7 @@ class IAssetWriter : public virtual core::IReferenceCounted //! The only reason these functions are not declared static is to allow stateful overrides public: //! To allow the asset writer to write different sub-assets with different flags - inline virtual E_WRITER_FLAGS getAssetWritingFlags(const SAssetWriteContext& ctx, const IAsset* assetToWrite, const uint32_t& hierarchyLevel) + inline virtual writer_flags_t getAssetWritingFlags(const SAssetWriteContext& ctx, const IAsset* assetToWrite, const uint32_t& hierarchyLevel) { return ctx.params.flags; } diff --git a/include/nbl/asset/interchange/SFileIOPolicy.h b/include/nbl/asset/interchange/SFileIOPolicy.h index 65a688628a..04ffbff3ba 100644 --- a/include/nbl/asset/interchange/SFileIOPolicy.h +++ b/include/nbl/asset/interchange/SFileIOPolicy.h @@ -5,8 +5,12 @@ #define _NBL_ASSET_S_FILE_IO_POLICY_H_INCLUDED_ +#include "nbl/core/util/bitflag.h" +#include "nbl/system/to_string.h" + #include #include +#include namespace nbl::asset @@ -36,19 +40,19 @@ struct SFileIOPolicy // Minimum expected gain required to keep extra workers enabled. float minExpectedGainRatio = 0.03f; // Hard cap for worker count. 0 means auto. - uint32_t maxWorkers = 0u; + uint16_t maxWorkers = 0u; // Reserved hardware threads not used by the loader. Prevents full CPU saturation. - uint32_t workerHeadroom = 2u; + uint8_t workerHeadroom = 2u; // Maximum number of worker-count candidates tested in hybrid mode. - uint32_t samplingMaxCandidates = 4u; + uint8_t samplingMaxCandidates = 4u; // Number of benchmark passes per candidate in hybrid mode. - uint32_t samplingPasses = 1u; + uint8_t samplingPasses = 1u; // Minimum work units required before hybrid sampling is allowed. 0 means auto. uint64_t samplingMinWorkUnits = 0ull; // Target chunk count assigned to each worker for loader stages. - uint32_t targetChunksPerWorker = 4u; + uint8_t targetChunksPerWorker = 4u; // Target chunk count assigned to each worker for hash stages. - uint32_t hashTaskTargetChunksPerWorker = 1u; + uint8_t hashTaskTargetChunksPerWorker = 1u; // Hash inlining threshold. Inputs up to this size prefer inline hash build. uint64_t hashInlineThresholdBytes = 1ull << 20; // Lower bound for sampled byte count in hybrid mode. @@ -70,24 +74,63 @@ struct SFileIOPolicy { // Pick whole-file or chunked dynamically based on file size and policy limits. Auto, - // Force whole-file path. May fallback when not feasible unless strict=true. + // Force whole-file strategy. May fallback when not feasible unless strict=true. WholeFile, - // Force chunked path. + // Force chunked strategy. Chunked }; + enum E_FLAGS : uint8_t + { + EF_NONE = 0u, + EF_STRICT_BIT = 1u << 0u + }; + + static inline constexpr uint8_t MIN_CHUNK_SIZE_LOG2 = 16u; + static inline constexpr uint8_t MAX_BYTE_SIZE_LOG2 = 63u; + + static inline constexpr uint8_t clampBytesLog2(const uint8_t value, const uint8_t minValue = 0u) + { + return std::clamp(value, minValue, MAX_BYTE_SIZE_LOG2); + } + + static inline constexpr uint64_t bytesFromLog2(const uint8_t value, const uint8_t minValue = 0u) + { + return 1ull << clampBytesLog2(value, minValue); + } + // Requested IO strategy. Strategy strategy = Strategy::Auto; - // If true and requested strategy is not feasible then resolution fails instead of fallback. - bool strict = false; + // Resolution flags. + core::bitflag flags = EF_NONE; // Maximum payload size allowed for whole-file strategy in auto mode. - uint64_t wholeFileThresholdBytes = 64ull * 1024ull * 1024ull; - // Chunk size used by chunked strategy. - uint64_t chunkSizeBytes = 4ull * 1024ull * 1024ull; - // Maximum staging allocation allowed for whole-file strategy. - uint64_t maxStagingBytes = 256ull * 1024ull * 1024ull; + uint8_t wholeFileThresholdLog2 = 26u; // 64 MiB + // Chunk size used by chunked strategy encoded as log2(bytes). + uint8_t chunkSizeLog2 = 22u; // 4 MiB + // Maximum staging allocation for whole-file strategy encoded as log2(bytes). + uint8_t maxStagingLog2 = 28u; // 256 MiB // Runtime tuning controls used by loaders and hash stages. SRuntimeTuning runtimeTuning = {}; + + inline bool strict() const + { + return flags.hasAnyFlag(EF_STRICT_BIT); + } + + inline uint64_t wholeFileThresholdBytes() const + { + return bytesFromLog2(wholeFileThresholdLog2, MIN_CHUNK_SIZE_LOG2); + } + + inline uint64_t chunkSizeBytes() const + { + return bytesFromLog2(chunkSizeLog2, MIN_CHUNK_SIZE_LOG2); + } + + inline uint64_t maxStagingBytes() const + { + return bytesFromLog2(maxStagingLog2, MIN_CHUNK_SIZE_LOG2); + } }; struct SResolvedFileIOPolicy @@ -95,82 +138,89 @@ struct SResolvedFileIOPolicy // Strategy selected after resolving SFileIOPolicy against runtime constraints. enum class Strategy : uint8_t { + Invalid = 0u, WholeFile, Chunked }; - // Effective strategy chosen by resolver. - Strategy strategy = Strategy::Chunked; - // Effective chunk size. Also set for whole-file for telemetry consistency. - uint64_t chunkSizeBytes = 0ull; - // False when strict policy cannot be satisfied. - bool valid = true; - // Human-readable resolver reason used in logs and diagnostics. - const char* reason = "ok"; -}; + SResolvedFileIOPolicy() = default; + inline SResolvedFileIOPolicy(const SFileIOPolicy& policy, const uint64_t byteCount, const bool sizeKnown = true, const bool fileMappable = false) : + SResolvedFileIOPolicy(resolve(policy, byteCount, sizeKnown, fileMappable)) + { + } -inline SResolvedFileIOPolicy resolveFileIOPolicy(const SFileIOPolicy& _policy, const uint64_t byteCount, const bool sizeKnown = true) -{ - constexpr uint64_t MIN_CHUNK_SIZE = 64ull * 1024ull; + // Effective strategy chosen by resolver. Invalid means strict policy resolution failed. + Strategy strategy = Strategy::Invalid; + // Effective chunk size encoded as log2(bytes). Also set for whole-file for telemetry consistency. + uint8_t chunkSizeLog2 = SFileIOPolicy::MIN_CHUNK_SIZE_LOG2; + // Human-readable resolver reason used in logs and diagnostics. + const char* reason = "invalid"; - const uint64_t maxStaging = std::max(_policy.maxStagingBytes, MIN_CHUNK_SIZE); - const uint64_t requestedChunk = std::max(_policy.chunkSizeBytes, MIN_CHUNK_SIZE); - const uint64_t chunkSize = std::min(requestedChunk, maxStaging); + inline bool isValid() const + { + return strategy != Strategy::Invalid; + } - auto makeChunked = [&](const char* reason) -> SResolvedFileIOPolicy + inline uint64_t chunkSizeBytes() const { - return SResolvedFileIOPolicy{ - .strategy = SResolvedFileIOPolicy::Strategy::Chunked, - .chunkSizeBytes = chunkSize, - .valid = true, - .reason = reason - }; - }; - auto makeWhole = [&](const char* reason) -> SResolvedFileIOPolicy + return SFileIOPolicy::bytesFromLog2(chunkSizeLog2, SFileIOPolicy::MIN_CHUNK_SIZE_LOG2); + } + + static inline SResolvedFileIOPolicy resolve(const SFileIOPolicy& policy, const uint64_t byteCount, const bool sizeKnown = true, const bool fileMappable = false) { - return SResolvedFileIOPolicy{ - .strategy = SResolvedFileIOPolicy::Strategy::WholeFile, - .chunkSizeBytes = chunkSize, - .valid = true, - .reason = reason + const uint8_t maxStagingLog2 = SFileIOPolicy::clampBytesLog2(policy.maxStagingLog2, SFileIOPolicy::MIN_CHUNK_SIZE_LOG2); + const uint8_t chunkSizeLog2 = std::min( + SFileIOPolicy::clampBytesLog2(policy.chunkSizeLog2, SFileIOPolicy::MIN_CHUNK_SIZE_LOG2), + maxStagingLog2); + const uint64_t maxStaging = SFileIOPolicy::bytesFromLog2(maxStagingLog2, SFileIOPolicy::MIN_CHUNK_SIZE_LOG2); + const uint64_t wholeThreshold = policy.wholeFileThresholdBytes(); + + auto makeResolved = [&](const Strategy strategy, const char* const reason) -> SResolvedFileIOPolicy + { + SResolvedFileIOPolicy resolved = {}; + resolved.strategy = strategy; + resolved.chunkSizeLog2 = chunkSizeLog2; + resolved.reason = reason; + return resolved; }; - }; - switch (_policy.strategy) - { - case SFileIOPolicy::Strategy::WholeFile: + switch (policy.strategy) { - if (sizeKnown && byteCount <= maxStaging) - return makeWhole("requested_whole_file"); - if (_policy.strict) + case SFileIOPolicy::Strategy::WholeFile: { - return SResolvedFileIOPolicy{ - .strategy = SResolvedFileIOPolicy::Strategy::WholeFile, - .chunkSizeBytes = chunkSize, - .valid = false, - .reason = "whole_file_not_feasible_strict" - }; + if (fileMappable || (sizeKnown && byteCount <= maxStaging)) + return makeResolved(Strategy::WholeFile, fileMappable ? "requested_whole_file_mappable" : "requested_whole_file"); + if (policy.strict()) + return makeResolved(Strategy::Invalid, "whole_file_not_feasible_strict"); + return makeResolved(Strategy::Chunked, sizeKnown ? "whole_file_not_feasible_fallback_chunked" : "whole_file_unknown_size_fallback_chunked"); + } + case SFileIOPolicy::Strategy::Chunked: + return makeResolved(Strategy::Chunked, "requested_chunked"); + case SFileIOPolicy::Strategy::Auto: + default: + { + if (!sizeKnown) + return makeResolved(fileMappable ? Strategy::WholeFile : Strategy::Chunked, fileMappable ? "auto_unknown_size_mappable_whole_file" : "auto_unknown_size"); + + const uint64_t wholeLimit = fileMappable ? + std::max(wholeThreshold, maxStaging) : + std::min(wholeThreshold, maxStaging); + if (byteCount <= wholeLimit) + return makeResolved(Strategy::WholeFile, fileMappable ? "auto_mappable_prefers_whole_file" : "auto_small_enough_for_whole_file"); + return makeResolved(Strategy::Chunked, "auto_too_large_for_whole_file"); } - return makeChunked(sizeKnown ? "whole_file_not_feasible_fallback_chunked" : "whole_file_unknown_size_fallback_chunked"); - } - case SFileIOPolicy::Strategy::Chunked: - return makeChunked("requested_chunked"); - case SFileIOPolicy::Strategy::Auto: - default: - { - if (!sizeKnown) - return makeChunked("auto_unknown_size"); - const uint64_t wholeThreshold = std::min(_policy.wholeFileThresholdBytes, maxStaging); - if (byteCount <= wholeThreshold) - return makeWhole("auto_small_enough_for_whole_file"); - return makeChunked("auto_too_large_for_whole_file"); } } +}; + +inline SResolvedFileIOPolicy resolveFileIOPolicy(const SFileIOPolicy& policy, const uint64_t byteCount, const bool sizeKnown = true, const bool fileMappable = false) +{ + return SResolvedFileIOPolicy(policy, byteCount, sizeKnown, fileMappable); } -inline const char* toString(const SFileIOPolicy::Strategy strategy) +inline const char* toString(const SFileIOPolicy::Strategy value) { - switch (strategy) + switch (value) { case SFileIOPolicy::Strategy::Auto: return "auto"; @@ -183,10 +233,12 @@ inline const char* toString(const SFileIOPolicy::Strategy strategy) } } -inline const char* toString(const SResolvedFileIOPolicy::Strategy strategy) +inline const char* toString(const SResolvedFileIOPolicy::Strategy value) { - switch (strategy) + switch (value) { + case SResolvedFileIOPolicy::Strategy::Invalid: + return "invalid"; case SResolvedFileIOPolicy::Strategy::WholeFile: return "whole"; case SResolvedFileIOPolicy::Strategy::Chunked: @@ -198,4 +250,25 @@ inline const char* toString(const SResolvedFileIOPolicy::Strategy strategy) } +namespace nbl::system::impl +{ +template<> +struct to_string_helper +{ + static inline std::string __call(const asset::SFileIOPolicy::Strategy value) + { + return asset::toString(value); + } +}; + +template<> +struct to_string_helper +{ + static inline std::string __call(const asset::SResolvedFileIOPolicy::Strategy value) + { + return asset::toString(value); + } +}; +} + #endif diff --git a/include/nbl/asset/interchange/SGeometryContentHashCommon.h b/include/nbl/asset/interchange/SGeometryContentHashCommon.h index e3577ae461..e9e5a07dfe 100644 --- a/include/nbl/asset/interchange/SGeometryContentHashCommon.h +++ b/include/nbl/asset/interchange/SGeometryContentHashCommon.h @@ -5,142 +5,39 @@ #define _NBL_ASSET_S_GEOMETRY_CONTENT_HASH_COMMON_H_INCLUDED_ -#include "nbl/asset/ICPUPolygonGeometry.h" -#include "nbl/asset/interchange/SLoaderRuntimeTuning.h" - -#include -#include -#include +#include "nbl/asset/utils/CPolygonGeometryManipulator.h" namespace nbl::asset { -enum class EGeometryContentHashMode : uint8_t -{ - MissingOnly, - RecomputeAll -}; - -inline void collectGeometryBuffers( - ICPUPolygonGeometry* geometry, - core::vector>& buffers) +class SPolygonGeometryContentHash { - buffers.clear(); - if (!geometry) - return; + public: + using EMode = CPolygonGeometryManipulator::EContentHashMode; - auto appendViewBuffer = [&buffers](const IGeometry::SDataView& view) -> void - { - if (!view || !view.src.buffer) - return; - for (const auto& existing : buffers) + static inline void collectBuffers( + ICPUPolygonGeometry* geometry, + core::vector>& buffers) { - if (existing.get() == view.src.buffer.get()) - return; + CPolygonGeometryManipulator::collectUniqueBuffers(geometry, buffers); } - buffers.push_back(core::smart_refctd_ptr(view.src.buffer)); - }; - - appendViewBuffer(geometry->getPositionView()); - appendViewBuffer(geometry->getIndexView()); - appendViewBuffer(geometry->getNormalView()); - for (const auto& view : *geometry->getAuxAttributeViews()) - appendViewBuffer(view); - for (const auto& view : *geometry->getJointWeightViews()) - { - appendViewBuffer(view.indices); - appendViewBuffer(view.weights); - } - if (auto jointOBB = geometry->getJointOBBView(); jointOBB) - appendViewBuffer(*jointOBB); -} - -inline void computeGeometryContentHashesParallel(ICPUPolygonGeometry* geometry, const SFileIOPolicy& ioPolicy, const EGeometryContentHashMode mode = EGeometryContentHashMode::MissingOnly) -{ - if (!geometry) - return; - - core::vector> buffers; - collectGeometryBuffers(geometry, buffers); - if (buffers.empty()) - return; - - core::vector pending; - pending.reserve(buffers.size()); - uint64_t totalBytes = 0ull; - for (size_t i = 0ull; i < buffers.size(); ++i) - { - auto& buffer = buffers[i]; - if (!buffer) - continue; - if (mode == EGeometryContentHashMode::MissingOnly && buffer->getContentHash() != IPreHashed::INVALID_HASH) - continue; - totalBytes += static_cast(buffer->getSize()); - pending.push_back(i); - } - if (pending.empty()) - return; - - const size_t hw = resolveLoaderHardwareThreads(); - const uint8_t* hashSampleData = nullptr; - uint64_t hashSampleBytes = 0ull; - for (const auto pendingIx : pending) - { - auto& buffer = buffers[pendingIx]; - const auto* ptr = reinterpret_cast(buffer->getPointer()); - if (!ptr) - continue; - hashSampleData = ptr; - hashSampleBytes = resolveLoaderRuntimeSampleBytes(ioPolicy, static_cast(buffer->getSize())); - if (hashSampleBytes > 0ull) - break; - } - - SLoaderRuntimeTuningRequest tuningRequest = {}; - tuningRequest.inputBytes = totalBytes; - tuningRequest.totalWorkUnits = pending.size(); - tuningRequest.minBytesPerWorker = std::max(1ull, loaderRuntimeCeilDiv(totalBytes, static_cast(pending.size()))); - tuningRequest.hardwareThreads = static_cast(hw); - const size_t hardMaxWorkers = resolveLoaderHardMaxWorkers(hw, ioPolicy.runtimeTuning.workerHeadroom); - tuningRequest.hardMaxWorkers = static_cast(std::min(pending.size(), hardMaxWorkers)); - tuningRequest.targetChunksPerWorker = ioPolicy.runtimeTuning.hashTaskTargetChunksPerWorker; - tuningRequest.sampleData = hashSampleData; - tuningRequest.sampleBytes = hashSampleBytes; - const auto tuning = tuneLoaderRuntime(ioPolicy, tuningRequest); - const size_t workerCount = std::min(tuning.workerCount, pending.size()); - if (workerCount > 1ull) - { - loaderRuntimeDispatchWorkers(workerCount, [&](const size_t workerIx) + static inline void computeParallel(ICPUPolygonGeometry* geometry, const SFileIOPolicy& ioPolicy, const EMode mode = EMode::MissingOnly) { - const size_t beginIx = (pending.size() * workerIx) / workerCount; - const size_t endIx = (pending.size() * (workerIx + 1ull)) / workerCount; - for (size_t i = beginIx; i < endIx; ++i) - { - auto& buffer = buffers[pending[i]]; - buffer->setContentHash(buffer->computeContentHash()); - } - }); - return; - } - - for (const auto pendingIx : pending) - { - auto& buffer = buffers[pendingIx]; - buffer->setContentHash(buffer->computeContentHash()); - } -} + CPolygonGeometryManipulator::computeContentHashesParallel(geometry, ioPolicy, mode); + } -inline void computeMissingGeometryContentHashesParallel(ICPUPolygonGeometry* geometry, const SFileIOPolicy& ioPolicy) -{ - computeGeometryContentHashesParallel(geometry, ioPolicy, EGeometryContentHashMode::MissingOnly); -} + static inline void computeMissingParallel(ICPUPolygonGeometry* geometry, const SFileIOPolicy& ioPolicy) + { + CPolygonGeometryManipulator::computeMissingContentHashesParallel(geometry, ioPolicy); + } -inline void recomputeGeometryContentHashesParallel(ICPUPolygonGeometry* geometry, const SFileIOPolicy& ioPolicy) -{ - computeGeometryContentHashesParallel(geometry, ioPolicy, EGeometryContentHashMode::RecomputeAll); -} + static inline void recomputeParallel(ICPUPolygonGeometry* geometry, const SFileIOPolicy& ioPolicy) + { + CPolygonGeometryManipulator::recomputeContentHashesParallel(geometry, ioPolicy); + } +}; } diff --git a/include/nbl/asset/interchange/SGeometryWriterCommon.h b/include/nbl/asset/interchange/SGeometryWriterCommon.h index 172b72feb2..633c26a6a5 100644 --- a/include/nbl/asset/interchange/SGeometryWriterCommon.h +++ b/include/nbl/asset/interchange/SGeometryWriterCommon.h @@ -15,43 +15,47 @@ namespace nbl::asset { -inline const hlsl::float32_t3* getTightFloat3View(const ICPUPolygonGeometry::SDataView& view) +class SGeometryWriterCommon { - if (!view) - return nullptr; - if (view.composed.format != EF_R32G32B32_SFLOAT) - return nullptr; - if (view.composed.getStride() != sizeof(hlsl::float32_t3)) - return nullptr; - return reinterpret_cast(view.getPointer()); -} - -inline const hlsl::float32_t2* getTightFloat2View(const ICPUPolygonGeometry::SDataView& view) -{ - if (!view) - return nullptr; - if (view.composed.format != EF_R32G32_SFLOAT) - return nullptr; - if (view.composed.getStride() != sizeof(hlsl::float32_t2)) - return nullptr; - return reinterpret_cast(view.getPointer()); -} - -inline char* appendFloatFixed6ToBuffer(char* dst, char* const end, const float value) -{ - if (!dst || dst >= end) - return end; - - const auto result = std::to_chars(dst, end, value, std::chars_format::fixed, 6); - if (result.ec == std::errc()) - return result.ptr; - - const int written = std::snprintf(dst, static_cast(end - dst), "%.6f", static_cast(value)); - if (written <= 0) - return dst; - const size_t writeLen = static_cast(written); - return (writeLen < static_cast(end - dst)) ? (dst + writeLen) : end; -} + public: + static inline const hlsl::float32_t3* getTightFloat3View(const ICPUPolygonGeometry::SDataView& view) + { + if (!view) + return nullptr; + if (view.composed.format != EF_R32G32B32_SFLOAT) + return nullptr; + if (view.composed.getStride() != sizeof(hlsl::float32_t3)) + return nullptr; + return reinterpret_cast(view.getPointer()); + } + + static inline const hlsl::float32_t2* getTightFloat2View(const ICPUPolygonGeometry::SDataView& view) + { + if (!view) + return nullptr; + if (view.composed.format != EF_R32G32_SFLOAT) + return nullptr; + if (view.composed.getStride() != sizeof(hlsl::float32_t2)) + return nullptr; + return reinterpret_cast(view.getPointer()); + } + + static inline char* appendFloatFixed6ToBuffer(char* dst, char* const end, const float value) + { + if (!dst || dst >= end) + return end; + + const auto result = std::to_chars(dst, end, value, std::chars_format::fixed, 6); + if (result.ec == std::errc()) + return result.ptr; + + const int written = std::snprintf(dst, static_cast(end - dst), "%.6f", static_cast(value)); + if (written <= 0) + return dst; + const size_t writeLen = static_cast(written); + return (writeLen < static_cast(end - dst)) ? (dst + writeLen) : end; + } +}; } diff --git a/include/nbl/asset/interchange/SInterchangeIOCommon.h b/include/nbl/asset/interchange/SInterchangeIOCommon.h index a499ae121b..f5fc01ebaf 100644 --- a/include/nbl/asset/interchange/SInterchangeIOCommon.h +++ b/include/nbl/asset/interchange/SInterchangeIOCommon.h @@ -18,163 +18,173 @@ namespace nbl::asset { -struct SFileIOTelemetry +class SInterchangeIOCommon { - uint64_t callCount = 0ull; - uint64_t totalBytes = 0ull; - uint64_t minBytes = std::numeric_limits::max(); - - inline void account(const uint64_t bytes) - { - ++callCount; - totalBytes += bytes; - if (bytes < minBytes) - minBytes = bytes; - } - - inline uint64_t getMinOrZero() const - { - return callCount ? minBytes : 0ull; - } - - inline uint64_t getAvgOrZero() const - { - return callCount ? (totalBytes / callCount) : 0ull; - } -}; + public: + struct STelemetry + { + uint64_t callCount = 0ull; + uint64_t totalBytes = 0ull; + uint64_t minBytes = std::numeric_limits::max(); -using SFileReadTelemetry = SFileIOTelemetry; -using SFileWriteTelemetry = SFileIOTelemetry; + inline void account(const uint64_t bytes) + { + ++callCount; + totalBytes += bytes; + if (bytes < minBytes) + minBytes = bytes; + } -inline bool isTinyIOTelemetryLikely( - const SFileIOTelemetry& telemetry, - const uint64_t payloadBytes, - const uint64_t bigPayloadThresholdBytes = (1ull << 20), - const uint64_t lowAvgBytesThreshold = 1024ull, - const uint64_t tinyChunkBytesThreshold = 64ull, - const uint64_t tinyChunkCallsThreshold = 1024ull) -{ - if (payloadBytes <= bigPayloadThresholdBytes) - return false; - - const uint64_t minBytes = telemetry.getMinOrZero(); - const uint64_t avgBytes = telemetry.getAvgOrZero(); - return - avgBytes < lowAvgBytesThreshold || - (minBytes < tinyChunkBytesThreshold && telemetry.callCount > tinyChunkCallsThreshold); -} + inline uint64_t getMinOrZero() const + { + return callCount ? minBytes : 0ull; + } -inline bool isTinyIOTelemetryLikely(const SFileIOTelemetry& telemetry, const uint64_t payloadBytes, const SFileIOPolicy& ioPolicy) -{ - return isTinyIOTelemetryLikely( - telemetry, - payloadBytes, - ioPolicy.runtimeTuning.tinyIoPayloadThresholdBytes, - ioPolicy.runtimeTuning.tinyIoAvgBytesThreshold, - ioPolicy.runtimeTuning.tinyIoMinBytesThreshold, - ioPolicy.runtimeTuning.tinyIoMinCallCount); -} + inline uint64_t getAvgOrZero() const + { + return callCount ? (totalBytes / callCount) : 0ull; + } + }; + + using SReadTelemetry = STelemetry; + using SWriteTelemetry = STelemetry; + + static inline bool isTinyIOTelemetryLikely( + const STelemetry& telemetry, + const uint64_t payloadBytes, + const uint64_t bigPayloadThresholdBytes = (1ull << 20), + const uint64_t lowAvgBytesThreshold = 1024ull, + const uint64_t tinyChunkBytesThreshold = 64ull, + const uint64_t tinyChunkCallsThreshold = 1024ull) + { + if (payloadBytes <= bigPayloadThresholdBytes) + return false; + + const uint64_t minBytes = telemetry.getMinOrZero(); + const uint64_t avgBytes = telemetry.getAvgOrZero(); + return + avgBytes < lowAvgBytesThreshold || + (minBytes < tinyChunkBytesThreshold && telemetry.callCount > tinyChunkCallsThreshold); + } -inline bool readFileExact(system::IFile* file, void* dst, const size_t offset, const size_t bytes, SFileReadTelemetry* ioTelemetry = nullptr) -{ - if (!file || (!dst && bytes != 0ull)) - return false; - if (bytes == 0ull) - return true; - - system::IFile::success_t success; - file->read(success, dst, offset, bytes); - if (success && ioTelemetry) - ioTelemetry->account(success.getBytesProcessed()); - return success && success.getBytesProcessed() == bytes; -} + static inline bool isTinyIOTelemetryLikely(const STelemetry& telemetry, const uint64_t payloadBytes, const SFileIOPolicy& ioPolicy) + { + return isTinyIOTelemetryLikely( + telemetry, + payloadBytes, + ioPolicy.runtimeTuning.tinyIoPayloadThresholdBytes, + ioPolicy.runtimeTuning.tinyIoAvgBytesThreshold, + ioPolicy.runtimeTuning.tinyIoMinBytesThreshold, + ioPolicy.runtimeTuning.tinyIoMinCallCount); + } -inline bool readFileWithPolicy(system::IFile* file, uint8_t* dst, const size_t offset, const size_t bytes, const SResolvedFileIOPolicy& ioPlan, SFileReadTelemetry* ioTelemetry = nullptr) -{ - if (!file || (!dst && bytes != 0ull)) - return false; - if (bytes == 0ull) - return true; - - switch (ioPlan.strategy) - { - case SResolvedFileIOPolicy::Strategy::WholeFile: - return readFileExact(file, dst, offset, bytes, ioTelemetry); - case SResolvedFileIOPolicy::Strategy::Chunked: - default: + static inline bool readFileExact(system::IFile* file, void* dst, const size_t offset, const size_t bytes, SReadTelemetry* ioTelemetry = nullptr) + { + if (!file || (!dst && bytes != 0ull)) + return false; + if (bytes == 0ull) + return true; + + system::IFile::success_t success; + file->read(success, dst, offset, bytes); + if (success && ioTelemetry) + ioTelemetry->account(success.getBytesProcessed()); + return success && success.getBytesProcessed() == bytes; + } + + static inline bool readFileWithPolicy(system::IFile* file, uint8_t* dst, const size_t offset, const size_t bytes, const SResolvedFileIOPolicy& ioPlan, SReadTelemetry* ioTelemetry = nullptr) + { + if (!file || (!dst && bytes != 0ull)) + return false; + if (bytes == 0ull) + return true; + + switch (ioPlan.strategy) + { + case SResolvedFileIOPolicy::Strategy::WholeFile: + return readFileExact(file, dst, offset, bytes, ioTelemetry); + case SResolvedFileIOPolicy::Strategy::Chunked: + default: + { + size_t bytesRead = 0ull; + const uint64_t chunkSizeBytes = ioPlan.chunkSizeBytes(); + while (bytesRead < bytes) + { + const size_t toRead = static_cast(std::min(chunkSizeBytes, bytes - bytesRead)); + system::IFile::success_t success; + file->read(success, dst + bytesRead, offset + bytesRead, toRead); + if (!success) + return false; + const size_t processed = success.getBytesProcessed(); + if (processed == 0ull) + return false; + if (ioTelemetry) + ioTelemetry->account(processed); + bytesRead += processed; + } + return true; + } + } + } + + static inline bool readFileWithPolicyTimed(system::IFile* file, uint8_t* dst, const size_t offset, const size_t bytes, const SResolvedFileIOPolicy& ioPlan, double* ioMs = nullptr, SReadTelemetry* ioTelemetry = nullptr) + { + using clock_t = std::chrono::high_resolution_clock; + const auto ioStart = clock_t::now(); + const bool ok = readFileWithPolicy(file, dst, offset, bytes, ioPlan, ioTelemetry); + if (ioMs) + *ioMs = std::chrono::duration(clock_t::now() - ioStart).count(); + return ok; + } + + static inline bool writeFileWithPolicyAtOffset(system::IFile* file, const SResolvedFileIOPolicy& ioPlan, const uint8_t* data, size_t byteCount, size_t& fileOffset, SWriteTelemetry* ioTelemetry = nullptr) { - size_t bytesRead = 0ull; - while (bytesRead < bytes) + if (!file || (!data && byteCount != 0ull)) + return false; + if (byteCount == 0ull) + return true; + + size_t writtenTotal = 0ull; + const uint64_t chunkSizeBytes = ioPlan.chunkSizeBytes(); + while (writtenTotal < byteCount) { - const size_t toRead = static_cast(std::min(ioPlan.chunkSizeBytes, bytes - bytesRead)); + const size_t toWrite = + ioPlan.strategy == SResolvedFileIOPolicy::Strategy::WholeFile ? + (byteCount - writtenTotal) : + static_cast(std::min(chunkSizeBytes, byteCount - writtenTotal)); system::IFile::success_t success; - file->read(success, dst + bytesRead, offset + bytesRead, toRead); + file->write(success, data + writtenTotal, fileOffset + writtenTotal, toWrite); if (!success) return false; - const size_t processed = success.getBytesProcessed(); - if (processed == 0ull) + const size_t written = success.getBytesProcessed(); + if (written == 0ull) return false; if (ioTelemetry) - ioTelemetry->account(processed); - bytesRead += processed; + ioTelemetry->account(written); + writtenTotal += written; } + fileOffset += writtenTotal; return true; } - } -} - -inline bool readFileWithPolicyTimed(system::IFile* file, uint8_t* dst, const size_t offset, const size_t bytes, const SResolvedFileIOPolicy& ioPlan, double* ioMs = nullptr, SFileReadTelemetry* ioTelemetry = nullptr) -{ - using clock_t = std::chrono::high_resolution_clock; - const auto ioStart = clock_t::now(); - const bool ok = readFileWithPolicy(file, dst, offset, bytes, ioPlan, ioTelemetry); - if (ioMs) - *ioMs = std::chrono::duration(clock_t::now() - ioStart).count(); - return ok; -} -inline bool writeFileWithPolicyAtOffset(system::IFile* file, const SResolvedFileIOPolicy& ioPlan, const uint8_t* data, size_t byteCount, size_t& fileOffset, SFileWriteTelemetry* ioTelemetry = nullptr) -{ - if (!file || (!data && byteCount != 0ull)) - return false; - if (byteCount == 0ull) - return true; - - size_t writtenTotal = 0ull; - while (writtenTotal < byteCount) - { - const size_t toWrite = - ioPlan.strategy == SResolvedFileIOPolicy::Strategy::WholeFile ? - (byteCount - writtenTotal) : - static_cast(std::min(ioPlan.chunkSizeBytes, byteCount - writtenTotal)); - system::IFile::success_t success; - file->write(success, data + writtenTotal, fileOffset + writtenTotal, toWrite); - if (!success) - return false; - const size_t written = success.getBytesProcessed(); - if (written == 0ull) - return false; - if (ioTelemetry) - ioTelemetry->account(written); - writtenTotal += written; - } - fileOffset += writtenTotal; - return true; -} + static inline bool writeFileWithPolicy(system::IFile* file, const SResolvedFileIOPolicy& ioPlan, const uint8_t* data, size_t byteCount, SWriteTelemetry* ioTelemetry = nullptr) + { + size_t fileOffset = 0ull; + return writeFileWithPolicyAtOffset(file, ioPlan, data, byteCount, fileOffset, ioTelemetry); + } -inline bool writeFileWithPolicy(system::IFile* file, const SResolvedFileIOPolicy& ioPlan, const uint8_t* data, size_t byteCount, SFileWriteTelemetry* ioTelemetry = nullptr) -{ - size_t fileOffset = 0ull; - return writeFileWithPolicyAtOffset(file, ioPlan, data, byteCount, fileOffset, ioTelemetry); -} + static inline bool writeTwoBuffersWithPolicy(system::IFile* file, const SResolvedFileIOPolicy& ioPlan, const uint8_t* dataA, size_t byteCountA, const uint8_t* dataB, size_t byteCountB, SWriteTelemetry* ioTelemetry = nullptr) + { + size_t fileOffset = 0ull; + if (!writeFileWithPolicyAtOffset(file, ioPlan, dataA, byteCountA, fileOffset, ioTelemetry)) + return false; + return writeFileWithPolicyAtOffset(file, ioPlan, dataB, byteCountB, fileOffset, ioTelemetry); + } +}; -inline bool writeTwoBuffersWithPolicy(system::IFile* file, const SResolvedFileIOPolicy& ioPlan, const uint8_t* dataA, size_t byteCountA, const uint8_t* dataB, size_t byteCountB, SFileWriteTelemetry* ioTelemetry = nullptr) -{ - size_t fileOffset = 0ull; - if (!writeFileWithPolicyAtOffset(file, ioPlan, dataA, byteCountA, fileOffset, ioTelemetry)) - return false; - return writeFileWithPolicyAtOffset(file, ioPlan, dataB, byteCountB, fileOffset, ioTelemetry); -} +using SFileIOTelemetry = SInterchangeIOCommon::STelemetry; +using SFileReadTelemetry = SInterchangeIOCommon::SReadTelemetry; +using SFileWriteTelemetry = SInterchangeIOCommon::SWriteTelemetry; } diff --git a/include/nbl/asset/utils/CPolygonGeometryManipulator.h b/include/nbl/asset/utils/CPolygonGeometryManipulator.h index 0edba1c866..97394a91d7 100644 --- a/include/nbl/asset/utils/CPolygonGeometryManipulator.h +++ b/include/nbl/asset/utils/CPolygonGeometryManipulator.h @@ -6,9 +6,9 @@ #include "nbl/core/declarations.h" -#include "nbl/core/hash/blake.h" #include "nbl/asset/ICPUPolygonGeometry.h" +#include "nbl/asset/interchange/SFileIOPolicy.h" #include "nbl/asset/utils/CGeometryManipulator.h" #include "nbl/asset/utils/CSmoothNormalGenerator.h" #include "nbl/asset/utils/COBBGenerator.h" @@ -22,24 +22,26 @@ namespace nbl::asset class NBL_API2 CPolygonGeometryManipulator { public: - static core::blake3_hash_t computeDeterministicContentHash(const ICPUPolygonGeometry* geo); + enum class EContentHashMode : uint8_t + { + MissingOnly, + RecomputeAll + }; + + static void collectUniqueBuffers(ICPUPolygonGeometry* geo, core::vector>& outBuffers); + static void computeContentHashesParallel(ICPUPolygonGeometry* geo, const SFileIOPolicy& ioPolicy, const EContentHashMode mode = EContentHashMode::MissingOnly); + static inline void computeMissingContentHashesParallel(ICPUPolygonGeometry* geo, const SFileIOPolicy& ioPolicy) + { + computeContentHashesParallel(geo, ioPolicy, EContentHashMode::MissingOnly); + } + static inline void recomputeContentHashesParallel(ICPUPolygonGeometry* geo, const SFileIOPolicy& ioPolicy) + { + computeContentHashesParallel(geo, ioPolicy, EContentHashMode::RecomputeAll); + } static inline void recomputeContentHashes(ICPUPolygonGeometry* geo) { - if (!geo) - return; - CGeometryManipulator::recomputeContentHash(geo->getPositionView()); - CGeometryManipulator::recomputeContentHash(geo->getIndexView()); - CGeometryManipulator::recomputeContentHash(geo->getNormalView()); - for (const auto& view : *geo->getJointWeightViews()) - { - CGeometryManipulator::recomputeContentHash(view.indices); - CGeometryManipulator::recomputeContentHash(view.weights); - } - if (auto pView=geo->getJointOBBView(); pView) - CGeometryManipulator::recomputeContentHash(*pView); - for (const auto& view : *geo->getAuxAttributeViews()) - CGeometryManipulator::recomputeContentHash(view); + recomputeContentHashesParallel(geo, SFileIOPolicy{}); } // @@ -94,7 +96,7 @@ class NBL_API2 CPolygonGeometryManipulator using aabb_t = std::remove_reference_t; using point_t = typename aabb_t::point_t; using component_t = std::remove_cv_t>; - SAABBAccumulator3 parsedAABB = {}; + SAABBAccumulator3 parsedAABB = createAABBAccumulator(); auto addVertexToAABB = [&](const uint32_t vertex_i)->void { point_t pt; diff --git a/include/nbl/asset/utils/SGeometryAABBCommon.h b/include/nbl/asset/utils/SGeometryAABBCommon.h index 79a0d64ba5..6095b70eee 100644 --- a/include/nbl/asset/utils/SGeometryAABBCommon.h +++ b/include/nbl/asset/utils/SGeometryAABBCommon.h @@ -6,8 +6,8 @@ #include "nbl/asset/ICPUPolygonGeometry.h" +#include "nbl/builtin/hlsl/shapes/AABBAccumulator.hlsl" -#include #include @@ -15,60 +15,54 @@ namespace nbl::asset { template -struct SAABBAccumulator3 +using SAABBAccumulator3 = hlsl::shapes::util::AABBAccumulator3; + +template +inline SAABBAccumulator3 createAABBAccumulator() { - bool has = false; - std::array min = {}; - std::array max = {}; -}; + return SAABBAccumulator3::create(); +} template inline void extendAABBAccumulator(SAABBAccumulator3& aabb, const Scalar x, const Scalar y, const Scalar z) { - if (!aabb.has) - { - aabb.has = true; - aabb.min[0] = x; - aabb.min[1] = y; - aabb.min[2] = z; - aabb.max[0] = x; - aabb.max[1] = y; - aabb.max[2] = z; - return; - } - - if (x < aabb.min[0]) aabb.min[0] = x; - if (y < aabb.min[1]) aabb.min[1] = y; - if (z < aabb.min[2]) aabb.min[2] = z; - if (x > aabb.max[0]) aabb.max[0] = x; - if (y > aabb.max[1]) aabb.max[1] = y; - if (z > aabb.max[2]) aabb.max[2] = z; + aabb.addXYZ(x, y, z); } template inline void extendAABBAccumulator(SAABBAccumulator3& aabb, const Point& point) { + typename SAABBAccumulator3::point_t converted; if constexpr (requires { point.x; point.y; point.z; }) - extendAABBAccumulator(aabb, static_cast(point.x), static_cast(point.y), static_cast(point.z)); + { + converted.x = static_cast(point.x); + converted.y = static_cast(point.y); + converted.z = static_cast(point.z); + } else - extendAABBAccumulator(aabb, static_cast(point[0]), static_cast(point[1]), static_cast(point[2])); + { + converted.x = static_cast(point[0]); + converted.y = static_cast(point[1]); + converted.z = static_cast(point[2]); + } + aabb.addPoint(converted); } template inline void assignAABBFromAccumulator(AABB& dst, const SAABBAccumulator3& aabb) { - if (!aabb.has) + if (aabb.empty()) return; dst = std::remove_reference_t::create(); if constexpr (requires { dst.minVx.x; dst.minVx.y; dst.minVx.z; dst.maxVx.x; dst.maxVx.y; dst.maxVx.z; }) { - dst.minVx.x = static_cast(aabb.min[0]); - dst.minVx.y = static_cast(aabb.min[1]); - dst.minVx.z = static_cast(aabb.min[2]); - dst.maxVx.x = static_cast(aabb.max[0]); - dst.maxVx.y = static_cast(aabb.max[1]); - dst.maxVx.z = static_cast(aabb.max[2]); + dst.minVx.x = static_cast(aabb.value.minVx.x); + dst.minVx.y = static_cast(aabb.value.minVx.y); + dst.minVx.z = static_cast(aabb.value.minVx.z); + dst.maxVx.x = static_cast(aabb.value.maxVx.x); + dst.maxVx.y = static_cast(aabb.value.maxVx.y); + dst.maxVx.z = static_cast(aabb.value.maxVx.z); if constexpr (requires { dst.minVx.w; dst.maxVx.w; }) { dst.minVx.w = 0; @@ -77,27 +71,15 @@ inline void assignAABBFromAccumulator(AABB& dst, const SAABBAccumulator3 } else { - dst.minVx[0] = static_cast(aabb.min[0]); - dst.minVx[1] = static_cast(aabb.min[1]); - dst.minVx[2] = static_cast(aabb.min[2]); - dst.maxVx[0] = static_cast(aabb.max[0]); - dst.maxVx[1] = static_cast(aabb.max[1]); - dst.maxVx[2] = static_cast(aabb.max[2]); + dst.minVx[0] = static_cast(aabb.value.minVx[0]); + dst.minVx[1] = static_cast(aabb.value.minVx[1]); + dst.minVx[2] = static_cast(aabb.value.minVx[2]); + dst.maxVx[0] = static_cast(aabb.value.maxVx[0]); + dst.maxVx[1] = static_cast(aabb.value.maxVx[1]); + dst.maxVx[2] = static_cast(aabb.value.maxVx[2]); } } -template -inline void applyAABBToGeometry(ICPUPolygonGeometry* geometry, const SAABBAccumulator3& aabb) -{ - if (!geometry || !aabb.has) - return; - - geometry->visitAABB([&aabb](auto& ref)->void - { - assignAABBFromAccumulator(ref, aabb); - }); -} - } diff --git a/include/nbl/builtin/hlsl/math/linalg/transform.hlsl b/include/nbl/builtin/hlsl/math/linalg/transform.hlsl index e46dfe997b..99fdc61dfd 100644 --- a/include/nbl/builtin/hlsl/math/linalg/transform.hlsl +++ b/include/nbl/builtin/hlsl/math/linalg/transform.hlsl @@ -8,6 +8,7 @@ #include #include #include +#include namespace nbl { @@ -52,7 +53,17 @@ inline matrix rhLookAt( r[1] = vector(yaxis, -hlsl::dot(yaxis, position)); r[2] = vector(zaxis, -hlsl::dot(zaxis, position)); - return r; + return r; +} + +template) +inline shapes::AABB<3, T> pseudo_mul(NBL_CONST_REF_ARG(matrix) lhs, NBL_CONST_REF_ARG(shapes::AABB<3, T>) rhs) +{ + const auto translation = hlsl::transpose(lhs)[3]; + auto transformed = shapes::util::transform(lhs, rhs); + transformed.minVx += translation; + transformed.maxVx += translation; + return transformed; } } diff --git a/include/nbl/builtin/hlsl/shapes/AABBAccumulator.hlsl b/include/nbl/builtin/hlsl/shapes/AABBAccumulator.hlsl new file mode 100644 index 0000000000..2962298d01 --- /dev/null +++ b/include/nbl/builtin/hlsl/shapes/AABBAccumulator.hlsl @@ -0,0 +1,64 @@ +// Copyright (C) 2018-2026 - DevSH Graphics Programming Sp. z O.O. +// This file is part of the "Nabla Engine". +// For conditions of distribution and use, see copyright notice in nabla.h +#ifndef _NBL_BUILTIN_HLSL_SHAPES_AABB_ACCUMULATOR_INCLUDED_ +#define _NBL_BUILTIN_HLSL_SHAPES_AABB_ACCUMULATOR_INCLUDED_ + + +#include "nbl/builtin/hlsl/shapes/aabb.hlsl" + + +namespace nbl +{ +namespace hlsl +{ +namespace shapes +{ +namespace util +{ + +template +struct AABBAccumulator3 +{ + using scalar_t = Scalar; + using aabb_t = AABB<3, Scalar>; + using point_t = typename aabb_t::point_t; + + static AABBAccumulator3 create() + { + AABBAccumulator3 retval = {}; + retval.value = aabb_t::create(); + return retval; + } + + bool empty() NBL_CONST_MEMBER_FUNC + { + return + value.minVx.x > value.maxVx.x || + value.minVx.y > value.maxVx.y || + value.minVx.z > value.maxVx.z; + } + + void addPoint(NBL_CONST_REF_ARG(point_t) point) + { + value.addPoint(point); + } + + void addXYZ(const Scalar x, const Scalar y, const Scalar z) + { + point_t point; + point.x = x; + point.y = y; + point.z = z; + value.addPoint(point); + } + + aabb_t value; +}; + +} +} +} +} + +#endif diff --git a/include/nbl/system/ISystem.h b/include/nbl/system/ISystem.h index 65f0351582..0ab163b330 100644 --- a/include/nbl/system/ISystem.h +++ b/include/nbl/system/ISystem.h @@ -105,7 +105,8 @@ class NBL_API2 ISystem : public core::IReferenceCounted void createFile( future_t>& future, // creation may happen on a dedicated thread, so its async path filename, // absolute path within our virtual filesystem - const core::bitflag flags, // access flags (IMPORTANT: files from most archives wont open with ECF_WRITE bit) + const core::bitflag flags, // intended access flags (IMPORTANT: files from most archives wont open with ECF_WRITE bit) + // actual file flags may be downgraded when backend/archive cannot honor all requested flags (for example mapping/coherency) const std::string_view& accessToken="" // usually password for archives, but should be SSH key for URL downloads ); diff --git a/src/nbl/asset/interchange/CGLIWriter.h b/src/nbl/asset/interchange/CGLIWriter.h index db88583054..fccde37735 100644 --- a/src/nbl/asset/interchange/CGLIWriter.h +++ b/src/nbl/asset/interchange/CGLIWriter.h @@ -35,9 +35,9 @@ class CGLIWriter final : public asset::IAssetWriter uint64_t getSupportedAssetTypesBitfield() const override { return asset::IAsset::ET_IMAGE_VIEW; } - uint32_t getSupportedFlags() override { return asset::EWF_NONE | asset::EWF_BINARY; } + writer_flags_t getSupportedFlags() override { return asset::EWF_BINARY; } - uint32_t getForcedFlags() override { return asset::EWF_NONE | asset::EWF_BINARY; } + writer_flags_t getForcedFlags() override { return asset::EWF_BINARY; } bool writeAsset(system::IFile* _file, const SAssetWriteParams& _params, IAssetWriterOverride* _override = nullptr) override; diff --git a/src/nbl/asset/interchange/CGLTFWriter.h b/src/nbl/asset/interchange/CGLTFWriter.h index 6184bc0be2..7fde5eb319 100644 --- a/src/nbl/asset/interchange/CGLTFWriter.h +++ b/src/nbl/asset/interchange/CGLTFWriter.h @@ -40,9 +40,9 @@ namespace nbl uint64_t getSupportedAssetTypesBitfield() const override { return asset::IAsset::ET_MESH; } - uint32_t getSupportedFlags() override { return asset::EWF_NONE; } + writer_flags_t getSupportedFlags() override { return asset::EWF_NONE; } - uint32_t getForcedFlags() override { return asset::EWF_NONE; } + writer_flags_t getForcedFlags() override { return asset::EWF_NONE; } bool writeAsset(system::IFile* _file, const SAssetWriteParams& _params, IAssetWriterOverride* _override = nullptr) override; }; diff --git a/src/nbl/asset/interchange/CImageWriterJPG.cpp b/src/nbl/asset/interchange/CImageWriterJPG.cpp index 3943e207ed..4557b0e3d4 100644 --- a/src/nbl/asset/interchange/CImageWriterJPG.cpp +++ b/src/nbl/asset/interchange/CImageWriterJPG.cpp @@ -198,16 +198,16 @@ bool CImageWriterJPG::writeAsset(system::IFile* _file, const SAssetWriteParams& #else SAssetWriteContext ctx{ _params, _file }; - auto imageView = IAsset::castDown(_params.rootAsset); + auto imageView = IAsset::castDown(_params.rootAsset); system::IFile* file = _override->getOutputFile(_file, ctx, { imageView, 0u}); - const asset::E_WRITER_FLAGS flags = _override->getAssetWritingFlags(ctx, imageView, 0u); + const auto flags = _override->getAssetWritingFlags(ctx, imageView, 0u); const float comprLvl = _override->getAssetCompressionLevel(ctx, imageView, 0u); - return writeJPEGFile(file, m_system.get(), imageView, (!!(flags & asset::EWF_COMPRESSED)) * static_cast((1.f-comprLvl)*100.f), _params.logger); // if quality==0, then it defaults to 75 + return writeJPEGFile(file, m_system.get(), imageView, flags.hasAnyFlag(asset::EWF_COMPRESSED) * static_cast((1.f-comprLvl)*100.f), _params.logger); // if quality==0, then it defaults to 75 #endif//!defined(_NBL_COMPILE_WITH_LIBJPEG_ ) } #undef OUTPUT_BUF_SIZE -#endif \ No newline at end of file +#endif diff --git a/src/nbl/asset/interchange/CImageWriterJPG.h b/src/nbl/asset/interchange/CImageWriterJPG.h index 40157f0bf6..1d2b5f2963 100644 --- a/src/nbl/asset/interchange/CImageWriterJPG.h +++ b/src/nbl/asset/interchange/CImageWriterJPG.h @@ -33,9 +33,9 @@ class CImageWriterJPG : public asset::IAssetWriter virtual uint64_t getSupportedAssetTypesBitfield() const override { return asset::IAsset::ET_IMAGE_VIEW; } - virtual uint32_t getSupportedFlags() override { return asset::EWF_COMPRESSED; } + virtual writer_flags_t getSupportedFlags() override { return asset::EWF_COMPRESSED; } - virtual uint32_t getForcedFlags() { return asset::EWF_BINARY; } + virtual writer_flags_t getForcedFlags() { return asset::EWF_BINARY; } virtual bool writeAsset(system::IFile* _file, const SAssetWriteParams& _params, IAssetWriterOverride* _override = nullptr) override; }; diff --git a/src/nbl/asset/interchange/CImageWriterOpenEXR.h b/src/nbl/asset/interchange/CImageWriterOpenEXR.h index 37da219c64..5a2e0a1cda 100644 --- a/src/nbl/asset/interchange/CImageWriterOpenEXR.h +++ b/src/nbl/asset/interchange/CImageWriterOpenEXR.h @@ -33,9 +33,9 @@ class CImageWriterOpenEXR final : public IImageWriter uint64_t getSupportedAssetTypesBitfield() const override { return asset::IAsset::ET_IMAGE_VIEW; } - uint32_t getSupportedFlags() override { return asset::EWF_BINARY; } + writer_flags_t getSupportedFlags() override { return asset::EWF_BINARY; } - uint32_t getForcedFlags() { return asset::EWF_BINARY; } + writer_flags_t getForcedFlags() { return asset::EWF_BINARY; } bool writeAsset(system::IFile* _file, const SAssetWriteParams& _params, IAssetWriterOverride* _override = nullptr) override; diff --git a/src/nbl/asset/interchange/CImageWriterPNG.h b/src/nbl/asset/interchange/CImageWriterPNG.h index ec2f3b39ef..5111df6ac5 100644 --- a/src/nbl/asset/interchange/CImageWriterPNG.h +++ b/src/nbl/asset/interchange/CImageWriterPNG.h @@ -39,9 +39,9 @@ class CImageWriterPNG : public asset::IAssetWriter virtual uint64_t getSupportedAssetTypesBitfield() const override { return asset::IAsset::ET_IMAGE_VIEW; } - virtual uint32_t getSupportedFlags() override { return 0u; } - - virtual uint32_t getForcedFlags() { return asset::EWF_BINARY; } + virtual writer_flags_t getSupportedFlags() override { return asset::EWF_NONE; } + + virtual writer_flags_t getForcedFlags() { return asset::EWF_BINARY; } virtual bool writeAsset(system::IFile* _file, const SAssetWriteParams& _params, IAssetWriterOverride* _override = nullptr) override; }; diff --git a/src/nbl/asset/interchange/CImageWriterTGA.h b/src/nbl/asset/interchange/CImageWriterTGA.h index 2341d1a910..a741898fbb 100644 --- a/src/nbl/asset/interchange/CImageWriterTGA.h +++ b/src/nbl/asset/interchange/CImageWriterTGA.h @@ -33,9 +33,9 @@ class CImageWriterTGA : public asset::IAssetWriter return asset::IAsset::ET_IMAGE_VIEW; } - virtual uint32_t getSupportedFlags() override { return 0u; } + virtual writer_flags_t getSupportedFlags() override { return asset::EWF_NONE; } - virtual uint32_t getForcedFlags() { return asset::EWF_BINARY; } + virtual writer_flags_t getForcedFlags() { return asset::EWF_BINARY; } virtual bool writeAsset(system::IFile* _file, const SAssetWriteParams& _params, IAssetWriterOverride* _override = nullptr) override; }; diff --git a/src/nbl/asset/interchange/COBJMeshFileLoader.cpp b/src/nbl/asset/interchange/COBJMeshFileLoader.cpp index 5c62cf34b5..e7be9ef8ef 100644 --- a/src/nbl/asset/interchange/COBJMeshFileLoader.cpp +++ b/src/nbl/asset/interchange/COBJMeshFileLoader.cpp @@ -6,6 +6,7 @@ #include "nbl/core/declarations.h" #include "nbl/asset/IAssetManager.h" +#include "nbl/asset/ICPUGeometryCollection.h" #include "nbl/asset/interchange/SGeometryContentHashCommon.h" #include "nbl/asset/interchange/SInterchangeIOCommon.h" #include "nbl/asset/interchange/SLoaderRuntimeTuning.h" @@ -18,9 +19,11 @@ #include "COBJMeshFileLoader.h" +#include #include #include #include +#include #include namespace nbl::asset @@ -201,7 +204,7 @@ const auto createAdoptedView = [](auto&& data, const E_FORMAT format) -> IGeomet bool readTextFileWithPolicy(system::IFile* file, char* dst, size_t byteCount, const SResolvedFileIOPolicy& ioPlan, SFileReadTelemetry& ioTelemetry) { - return readFileWithPolicyTimed(file, reinterpret_cast(dst), 0ull, byteCount, ioPlan, nullptr, &ioTelemetry); + return SInterchangeIOCommon::readFileWithPolicyTimed(file, reinterpret_cast(dst), 0ull, byteCount, ioPlan, nullptr, &ioTelemetry); } inline bool parseUnsignedObjIndex(const char*& ptr, const char* const end, uint32_t& out) @@ -282,6 +285,19 @@ inline void parseObjSmoothingGroup(const char* linePtr, const char* const lineEn outGroup = sawDigit ? static_cast(value) : 0u; } +inline std::string parseObjIdentifier(const char* linePtr, const char* const lineEnd, const std::string_view fallback) +{ + const char* endPtr = lineEnd; + while (linePtr < lineEnd && isObjInlineWhitespace(*linePtr)) + ++linePtr; + while (endPtr > linePtr && isObjInlineWhitespace(endPtr[-1])) + --endPtr; + + if (linePtr >= endPtr) + return std::string(fallback); + return std::string(linePtr, static_cast(endPtr - linePtr)); +} + inline bool parseObjTrianglePositiveTripletLine(const char* const lineStart, const char* const lineEnd, int32_t* idx0, int32_t* idx1, int32_t* idx2, const size_t posCount, const size_t uvCount, const size_t normalCount) { const char* ptr = lineStart; @@ -540,10 +556,55 @@ bool COBJMeshFileLoader::isALoadableFileFormat(system::IFile* _file, const syste { if (!_file) return false; + const auto fileSize = _file->getSize(); + if (fileSize <= 0) + return false; + + constexpr size_t ProbeBytes = 4096ull; + const size_t bytesToRead = std::min(ProbeBytes, static_cast(fileSize)); + std::array probe = {}; system::IFile::success_t succ; - char firstChar = 0; - _file->read(succ, &firstChar, 0ull, sizeof(firstChar)); - return succ && (firstChar == '#' || firstChar == 'v'); + _file->read(succ, probe.data(), 0ull, bytesToRead); + if (!succ || bytesToRead == 0ull) + return false; + + const char* ptr = probe.data(); + const char* const end = probe.data() + bytesToRead; + + if ((end - ptr) >= 3 && static_cast(ptr[0]) == 0xEFu && static_cast(ptr[1]) == 0xBBu && static_cast(ptr[2]) == 0xBFu) + ptr += 3; + + while (ptr < end) + { + while (ptr < end && (*ptr == ' ' || *ptr == '\t' || *ptr == '\r' || *ptr == '\n')) + ++ptr; + if (ptr >= end) + break; + + if (*ptr == '#') + { + while (ptr < end && *ptr != '\n') + ++ptr; + continue; + } + + switch (toObjLowerAscii(*ptr)) + { + case 'v': + case 'f': + case 'o': + case 'g': + case 's': + case 'u': + case 'm': + case 'l': + case 'p': + return true; + default: + return false; + } + } + return false; } const char** COBJMeshFileLoader::getAssociatedFileExtensions() const @@ -565,8 +626,9 @@ asset::SAssetBundle COBJMeshFileLoader::loadAsset(system::IFile* _file, const as const long filesize = _file->getSize(); if (filesize <= 0) return {}; - const auto ioPlan = resolveFileIOPolicy(_params.ioPolicy, static_cast(filesize), true); - if (!ioPlan.valid) + const bool fileMappable = core::bitflag(_file->getFlags()).hasAnyFlag(system::IFile::ECF_MAPPABLE); + const auto ioPlan = resolveFileIOPolicy(_params.ioPolicy, static_cast(filesize), true, fileMappable); + if (!ioPlan.isValid()) { _params.logger.log("OBJ loader: invalid io policy for %s reason=%s", system::ILogger::ELL_ERROR, _file->getFileName().string().c_str(), ioPlan.reason); return {}; @@ -598,6 +660,10 @@ asset::SAssetBundle COBJMeshFileLoader::loadAsset(system::IFile* _file, const as core::vector positions; core::vector normals; core::vector uvs; + const size_t estimatedAttributeCount = std::max(16ull, static_cast(filesize) / 32ull); + positions.reserve(estimatedAttributeCount); + normals.reserve(estimatedAttributeCount); + uvs.reserve(estimatedAttributeCount); core::vector outPositions; core::vector outNormals; @@ -606,21 +672,10 @@ asset::SAssetBundle COBJMeshFileLoader::loadAsset(system::IFile* _file, const as core::vector indices; core::vector dedupHeadByPos; core::vector dedupNodes; - const size_t estimatedAttributeCount = std::max(16ull, static_cast(filesize) / 32ull); const size_t estimatedOutVertexCount = std::max(estimatedAttributeCount, static_cast(filesize) / 20ull); const size_t estimatedOutIndexCount = (estimatedOutVertexCount <= (std::numeric_limits::max() / 3ull)) ? (estimatedOutVertexCount * 3ull) : std::numeric_limits::max(); - positions.reserve(estimatedAttributeCount); - normals.reserve(estimatedAttributeCount); - uvs.reserve(estimatedAttributeCount); const size_t initialOutVertexCapacity = std::max(1ull, estimatedOutVertexCount); const size_t initialOutIndexCapacity = (estimatedOutIndexCount == std::numeric_limits::max()) ? 3ull : std::max(3ull, estimatedOutIndexCount); - outPositions.resize(initialOutVertexCapacity); - outNormals.resize(initialOutVertexCapacity); - outNormalNeedsGeneration.resize(initialOutVertexCapacity, 0u); - outUVs.resize(initialOutVertexCapacity); - indices.resize(initialOutIndexCapacity); - dedupHeadByPos.reserve(estimatedAttributeCount); - dedupNodes.resize(initialOutVertexCapacity); size_t outVertexWriteCount = 0ull; size_t outIndexWriteCount = 0ull; size_t dedupNodeCount = 0ull; @@ -649,10 +704,212 @@ asset::SAssetBundle COBJMeshFileLoader::loadAsset(system::IFile* _file, const as core::vector dedupHotCache(dedupHotEntryCount); const size_t dedupHotMask = dedupHotEntryCount - 1ull; + struct SLoadedGeometry + { + core::smart_refctd_ptr geometry = {}; + std::string objectName = {}; + std::string groupName = {}; + uint64_t faceCount = 0ull; + uint64_t faceFastTokenCount = 0ull; + uint64_t faceFallbackTokenCount = 0ull; + }; + + core::vector loadedGeometries; + std::string currentObjectName = "default_object"; + std::string currentGroupName = "default_group"; + bool sawObjectDirective = false; + bool sawGroupDirective = false; bool hasProvidedNormals = false; bool needsNormalGeneration = false; bool hasUVs = false; - SAABBAccumulator3 parsedAABB = {}; + SAABBAccumulator3 parsedAABB = createAABBAccumulator(); + uint64_t currentFaceCount = 0ull; + uint64_t currentFaceFastTokenCount = 0ull; + uint64_t currentFaceFallbackTokenCount = 0ull; + + const auto resetBuilderState = [&]() -> void + { + outPositions.clear(); + outNormals.clear(); + outNormalNeedsGeneration.clear(); + outUVs.clear(); + indices.clear(); + dedupNodes.clear(); + + outPositions.resize(initialOutVertexCapacity); + outNormals.resize(initialOutVertexCapacity); + outNormalNeedsGeneration.resize(initialOutVertexCapacity, 0u); + outUVs.resize(initialOutVertexCapacity); + indices.resize(initialOutIndexCapacity); + dedupHeadByPos.assign(positions.size(), -1); + dedupNodes.resize(initialOutVertexCapacity); + + outVertexWriteCount = 0ull; + outIndexWriteCount = 0ull; + dedupNodeCount = 0ull; + hasProvidedNormals = false; + needsNormalGeneration = false; + hasUVs = false; + parsedAABB = createAABBAccumulator(); + currentFaceCount = 0ull; + currentFaceFastTokenCount = 0ull; + currentFaceFallbackTokenCount = 0ull; + const SDedupHotEntry emptyHotEntry = {}; + std::fill(dedupHotCache.begin(), dedupHotCache.end(), emptyHotEntry); + }; + + const auto finalizeCurrentGeometry = [&]() -> bool + { + if (outVertexWriteCount == 0ull) + return true; + + outPositions.resize(outVertexWriteCount); + outNormals.resize(outVertexWriteCount); + outNormalNeedsGeneration.resize(outVertexWriteCount); + outUVs.resize(outVertexWriteCount); + indices.resize(outIndexWriteCount); + + if (needsNormalGeneration) + { + core::vector generatedNormals(outVertexWriteCount, Float3(0.f, 0.f, 0.f)); + const size_t triangleCount = indices.size() / 3ull; + for (size_t triIx = 0ull; triIx < triangleCount; ++triIx) + { + const uint32_t i0 = indices[triIx * 3ull + 0ull]; + const uint32_t i1 = indices[triIx * 3ull + 1ull]; + const uint32_t i2 = indices[triIx * 3ull + 2ull]; + if (i0 >= outVertexWriteCount || i1 >= outVertexWriteCount || i2 >= outVertexWriteCount) + continue; + + const auto& p0 = outPositions[static_cast(i0)]; + const auto& p1 = outPositions[static_cast(i1)]; + const auto& p2 = outPositions[static_cast(i2)]; + + const float e10x = p1.x - p0.x; + const float e10y = p1.y - p0.y; + const float e10z = p1.z - p0.z; + const float e20x = p2.x - p0.x; + const float e20y = p2.y - p0.y; + const float e20z = p2.z - p0.z; + + const Float3 faceNormal( + e10y * e20z - e10z * e20y, + e10z * e20x - e10x * e20z, + e10x * e20y - e10y * e20x); + + const float faceLenSq = faceNormal.x * faceNormal.x + faceNormal.y * faceNormal.y + faceNormal.z * faceNormal.z; + if (faceLenSq <= 1e-20f) + continue; + + const auto accumulateIfNeeded = [&](const uint32_t vertexIx)->void + { + if (outNormalNeedsGeneration[static_cast(vertexIx)] == 0u) + return; + auto& dstNormal = generatedNormals[static_cast(vertexIx)]; + dstNormal.x += faceNormal.x; + dstNormal.y += faceNormal.y; + dstNormal.z += faceNormal.z; + }; + + accumulateIfNeeded(i0); + accumulateIfNeeded(i1); + accumulateIfNeeded(i2); + } + + for (size_t i = 0ull; i < outVertexWriteCount; ++i) + { + if (outNormalNeedsGeneration[i] == 0u) + continue; + + auto normal = generatedNormals[i]; + const float lenSq = normal.x * normal.x + normal.y * normal.y + normal.z * normal.z; + if (lenSq > 1e-20f) + { + const float invLen = 1.f / std::sqrt(lenSq); + normal.x *= invLen; + normal.y *= invLen; + normal.z *= invLen; + } + else + { + normal = Float3(0.f, 0.f, 1.f); + } + outNormals[i] = normal; + } + } + + const size_t outVertexCount = outPositions.size(); + auto geometry = core::make_smart_refctd_ptr(); + { + auto view = createAdoptedView(std::move(outPositions), EF_R32G32B32_SFLOAT); + if (!view) + return false; + geometry->setPositionView(std::move(view)); + } + + const bool hasNormals = hasProvidedNormals || needsNormalGeneration; + if (hasNormals) + { + auto view = createAdoptedView(std::move(outNormals), EF_R32G32B32_SFLOAT); + if (!view) + return false; + geometry->setNormalView(std::move(view)); + } + + if (hasUVs) + { + auto view = createAdoptedView(std::move(outUVs), EF_R32G32_SFLOAT); + if (!view) + return false; + geometry->getAuxAttributeViews()->push_back(std::move(view)); + } + + if (!indices.empty()) + { + geometry->setIndexing(IPolygonGeometryBase::TriangleList()); + if (outVertexCount <= static_cast(std::numeric_limits::max()) + 1ull) + { + core::vector indices16(indices.size()); + for (size_t i = 0u; i < indices.size(); ++i) + indices16[i] = static_cast(indices[i]); + auto view = createAdoptedView(std::move(indices16), EF_R16_UINT); + if (!view) + return false; + geometry->setIndexView(std::move(view)); + } + else + { + auto view = createAdoptedView(std::move(indices), EF_R32_UINT); + if (!view) + return false; + geometry->setIndexView(std::move(view)); + } + } + else + { + geometry->setIndexing(IPolygonGeometryBase::PointList()); + } + + if ((_params.loaderFlags & IAssetLoader::ELPF_DONT_COMPUTE_CONTENT_HASHES) == 0) + SPolygonGeometryContentHash::computeMissingParallel(geometry.get(), _params.ioPolicy); + + if (!parsedAABB.empty()) + geometry->applyAABB(parsedAABB.value); + else + CPolygonGeometryManipulator::recomputeAABB(geometry.get()); + + loadedGeometries.push_back(SLoadedGeometry{ + .geometry = std::move(geometry), + .objectName = currentObjectName, + .groupName = currentGroupName, + .faceCount = currentFaceCount, + .faceFastTokenCount = currentFaceFastTokenCount, + .faceFallbackTokenCount = currentFaceFallbackTokenCount + }); + return true; + }; + + resetBuilderState(); auto allocateOutVertex = [&](uint32_t& outIx) -> bool { if (outVertexWriteCount >= outPositions.size()) @@ -832,9 +1089,11 @@ asset::SAssetBundle COBJMeshFileLoader::loadAsset(system::IFile* _file, const as if (lineStart < lineEnd) { - if (*lineStart == 'v') + const char lineType = toObjLowerAscii(*lineStart); + if (lineType == 'v') { - if ((lineStart + 1) < lineEnd && lineStart[1] == ' ') + const char subType = ((lineStart + 1) < lineEnd) ? toObjLowerAscii(lineStart[1]) : '\0'; + if ((lineStart + 1) < lineEnd && subType == ' ') { Float3 vec{}; const char* ptr = lineStart + 2; @@ -850,7 +1109,7 @@ asset::SAssetBundle COBJMeshFileLoader::loadAsset(system::IFile* _file, const as positions.push_back(vec); dedupHeadByPos.push_back(-1); } - else if ((lineStart + 2) < lineEnd && lineStart[1] == 'n' && isObjInlineWhitespace(lineStart[2])) + else if ((lineStart + 2) < lineEnd && subType == 'n' && isObjInlineWhitespace(lineStart[2])) { Float3 vec{}; const char* ptr = lineStart + 3; @@ -865,7 +1124,7 @@ asset::SAssetBundle COBJMeshFileLoader::loadAsset(system::IFile* _file, const as } normals.push_back(vec); } - else if ((lineStart + 2) < lineEnd && lineStart[1] == 't' && isObjInlineWhitespace(lineStart[2])) + else if ((lineStart + 2) < lineEnd && subType == 't' && isObjInlineWhitespace(lineStart[2])) { Float2 vec{}; const char* ptr = lineStart + 3; @@ -882,15 +1141,32 @@ asset::SAssetBundle COBJMeshFileLoader::loadAsset(system::IFile* _file, const as uvs.push_back(vec); } } - else if (*lineStart == 's' && (lineStart + 1) < lineEnd && isObjInlineWhitespace(lineStart[1])) + else if (lineType == 'o' && (lineStart + 1) < lineEnd && isObjInlineWhitespace(lineStart[1])) + { + if (!finalizeCurrentGeometry()) + return {}; + resetBuilderState(); + currentObjectName = parseObjIdentifier(lineStart + 2, lineEnd, "default_object"); + sawObjectDirective = true; + } + else if (lineType == 'g' && (lineStart + 1) < lineEnd && isObjInlineWhitespace(lineStart[1])) + { + if (!finalizeCurrentGeometry()) + return {}; + resetBuilderState(); + currentGroupName = parseObjIdentifier(lineStart + 2, lineEnd, "default_group"); + sawGroupDirective = true; + } + else if (lineType == 's' && (lineStart + 1) < lineEnd && isObjInlineWhitespace(lineStart[1])) { parseObjSmoothingGroup(lineStart + 2, lineEnd, currentSmoothingGroup); } - else if (*lineStart == 'f' && (lineStart + 1) < lineEnd && isObjInlineWhitespace(lineStart[1])) + else if (lineType == 'f' && (lineStart + 1) < lineEnd && isObjInlineWhitespace(lineStart[1])) { if (positions.empty()) return {}; ++faceCount; + ++currentFaceCount; const size_t posCount = positions.size(); const size_t uvCount = uvs.size(); const size_t normalCount = normals.size(); @@ -936,6 +1212,7 @@ asset::SAssetBundle COBJMeshFileLoader::loadAsset(system::IFile* _file, const as if (!acquireCornerIndexPositiveTriplet(triIdx2[0], triIdx2[1], triIdx2[2], c2)) return {}; faceFastTokenCount += 3u; + currentFaceFastTokenCount += 3u; if (!appendIndex(c2) || !appendIndex(c1) || !appendIndex(c0)) return {}; } @@ -958,6 +1235,7 @@ asset::SAssetBundle COBJMeshFileLoader::loadAsset(system::IFile* _file, const as if (!acquireCornerIndex(triIdx2, currentSmoothingGroup, c2)) return {}; faceFallbackTokenCount += 3u; + currentFaceFallbackTokenCount += 3u; if (!appendIndex(c2) || !appendIndex(c1) || !appendIndex(c0)) return {}; firstCorner = c0; @@ -977,6 +1255,7 @@ asset::SAssetBundle COBJMeshFileLoader::loadAsset(system::IFile* _file, const as if (!parseObjFaceVertexTokenFast(linePtr, lineEnd, idx, posCount, uvCount, normalCount)) return {}; ++faceFallbackTokenCount; + ++currentFaceFallbackTokenCount; uint32_t cornerIx = 0u; if (!acquireCornerIndex(idx, currentSmoothingGroup, cornerIx)) @@ -1012,160 +1291,102 @@ asset::SAssetBundle COBJMeshFileLoader::loadAsset(system::IFile* _file, const as else bufPtr = lineTerminator + 1; } - if (outVertexWriteCount == 0ull) + if (!finalizeCurrentGeometry()) + return {}; + if (loadedGeometries.empty()) return {}; - outPositions.resize(outVertexWriteCount); - outNormals.resize(outVertexWriteCount); - outNormalNeedsGeneration.resize(outVertexWriteCount); - outUVs.resize(outVertexWriteCount); - indices.resize(outIndexWriteCount); - - if (needsNormalGeneration) + uint64_t outVertexCount = 0ull; + uint64_t outIndexCount = 0ull; + uint64_t faceFastTokenCountSum = 0ull; + uint64_t faceFallbackTokenCountSum = 0ull; + for (const auto& loaded : loadedGeometries) { - core::vector generatedNormals(outVertexWriteCount, Float3(0.f, 0.f, 0.f)); - const size_t triangleCount = indices.size() / 3ull; - for (size_t triIx = 0ull; triIx < triangleCount; ++triIx) - { - const uint32_t i0 = indices[triIx * 3ull + 0ull]; - const uint32_t i1 = indices[triIx * 3ull + 1ull]; - const uint32_t i2 = indices[triIx * 3ull + 2ull]; - if (i0 >= outVertexWriteCount || i1 >= outVertexWriteCount || i2 >= outVertexWriteCount) - continue; - - const auto& p0 = outPositions[static_cast(i0)]; - const auto& p1 = outPositions[static_cast(i1)]; - const auto& p2 = outPositions[static_cast(i2)]; - - const float e10x = p1.x - p0.x; - const float e10y = p1.y - p0.y; - const float e10z = p1.z - p0.z; - const float e20x = p2.x - p0.x; - const float e20y = p2.y - p0.y; - const float e20z = p2.z - p0.z; - - const Float3 faceNormal( - e10y * e20z - e10z * e20y, - e10z * e20x - e10x * e20z, - e10x * e20y - e10y * e20x); - - const float faceLenSq = faceNormal.x * faceNormal.x + faceNormal.y * faceNormal.y + faceNormal.z * faceNormal.z; - if (faceLenSq <= 1e-20f) - continue; - - auto accumulateIfNeeded = [&](const uint32_t vertexIx)->void - { - if (outNormalNeedsGeneration[static_cast(vertexIx)] == 0u) - return; - auto& dstNormal = generatedNormals[static_cast(vertexIx)]; - dstNormal.x += faceNormal.x; - dstNormal.y += faceNormal.y; - dstNormal.z += faceNormal.z; - }; - - accumulateIfNeeded(i0); - accumulateIfNeeded(i1); - accumulateIfNeeded(i2); - } - - for (size_t i = 0ull; i < outVertexWriteCount; ++i) - { - if (outNormalNeedsGeneration[i] == 0u) - continue; - - auto normal = generatedNormals[i]; - const float lenSq = normal.x * normal.x + normal.y * normal.y + normal.z * normal.z; - if (lenSq > 1e-20f) - { - const float invLen = 1.f / std::sqrt(lenSq); - normal.x *= invLen; - normal.y *= invLen; - normal.z *= invLen; - } - else - { - normal = Float3(0.f, 0.f, 1.f); - } - outNormals[i] = normal; - } + const auto& posView = loaded.geometry->getPositionView(); + outVertexCount += static_cast(posView ? posView.getElementCount() : 0ull); + const auto& indexView = loaded.geometry->getIndexView(); + outIndexCount += static_cast(indexView ? indexView.getElementCount() : 0ull); + faceFastTokenCountSum += loaded.faceFastTokenCount; + faceFallbackTokenCountSum += loaded.faceFallbackTokenCount; } - const size_t outVertexCount = outPositions.size(); - const size_t outIndexCount = indices.size(); - auto geometry = core::make_smart_refctd_ptr(); + if (SInterchangeIOCommon::isTinyIOTelemetryLikely(ioTelemetry, static_cast(filesize), _params.ioPolicy)) { - auto view = createAdoptedView(std::move(outPositions), EF_R32G32B32_SFLOAT); - if (!view) - return {}; - geometry->setPositionView(std::move(view)); + _params.logger.log( + "OBJ loader tiny-io guard: file=%s reads=%llu min=%llu avg=%llu", + system::ILogger::ELL_WARNING, + _file->getFileName().string().c_str(), + static_cast(ioTelemetry.callCount), + static_cast(ioTelemetry.getMinOrZero()), + static_cast(ioTelemetry.getAvgOrZero())); } - const bool hasNormals = hasProvidedNormals || needsNormalGeneration; - if (hasNormals) + const bool buildCollections = sawObjectDirective || sawGroupDirective || loadedGeometries.size() > 1ull; + if (!buildCollections) { - auto view = createAdoptedView(std::move(outNormals), EF_R32G32B32_SFLOAT); - if (!view) - return {}; - geometry->setNormalView(std::move(view)); - } + _params.logger.log( + "OBJ loader stats: file=%s in(v=%llu n=%llu uv=%llu) out(v=%llu idx=%llu faces=%llu face_fast_tokens=%llu face_fallback_tokens=%llu geometries=%llu objects=%llu io_reads=%llu io_min_read=%llu io_avg_read=%llu io_req=%s io_eff=%s io_chunk=%llu io_reason=%s", + system::ILogger::ELL_PERFORMANCE, + _file->getFileName().string().c_str(), + static_cast(positions.size()), + static_cast(normals.size()), + static_cast(uvs.size()), + static_cast(outVertexCount), + static_cast(outIndexCount), + static_cast(faceCount), + static_cast(faceFastTokenCountSum), + static_cast(faceFallbackTokenCountSum), + static_cast(loadedGeometries.size()), + 1ull, + static_cast(ioTelemetry.callCount), + static_cast(ioTelemetry.getMinOrZero()), + static_cast(ioTelemetry.getAvgOrZero()), + toString(_params.ioPolicy.strategy), + toString(ioPlan.strategy), + static_cast(ioPlan.chunkSizeBytes()), + ioPlan.reason); - if (hasUVs) - { - auto view = createAdoptedView(std::move(outUVs), EF_R32G32_SFLOAT); - if (!view) - return {}; - geometry->getAuxAttributeViews()->push_back(std::move(view)); + return SAssetBundle(core::smart_refctd_ptr(), { core::smart_refctd_ptr_static_cast(std::move(loadedGeometries.front().geometry)) }); } - if (!indices.empty()) + core::vector objectNames; + core::vector> objectCollections; + for (auto& loaded : loadedGeometries) { - geometry->setIndexing(IPolygonGeometryBase::TriangleList()); - if (outVertexCount <= static_cast(std::numeric_limits::max()) + 1ull) + size_t objectIx = objectNames.size(); + for (size_t i = 0ull; i < objectNames.size(); ++i) { - core::vector indices16(indices.size()); - for (size_t i = 0u; i < indices.size(); ++i) - indices16[i] = static_cast(indices[i]); - auto view = createAdoptedView(std::move(indices16), EF_R16_UINT); - if (!view) - return {}; - geometry->setIndexView(std::move(view)); + if (objectNames[i] == loaded.objectName) + { + objectIx = i; + break; + } } - else + if (objectIx == objectNames.size()) { - auto view = createAdoptedView(std::move(indices), EF_R32_UINT); - if (!view) + objectNames.push_back(loaded.objectName); + auto collection = core::make_smart_refctd_ptr(); + if (!collection) return {}; - geometry->setIndexView(std::move(view)); + objectCollections.push_back(std::move(collection)); } - } - else - { - geometry->setIndexing(IPolygonGeometryBase::PointList()); - } - if ((_params.loaderFlags & IAssetLoader::ELPF_DONT_COMPUTE_CONTENT_HASHES) == 0) - { - computeMissingGeometryContentHashesParallel(geometry.get(), _params.ioPolicy); - } + auto* refs = objectCollections[objectIx]->getGeometries(); + if (!refs) + return {}; - if (parsedAABB.has) - applyAABBToGeometry(geometry.get(), parsedAABB); - else - { - CPolygonGeometryManipulator::recomputeAABB(geometry.get()); - } - if (isTinyIOTelemetryLikely(ioTelemetry, static_cast(filesize), _params.ioPolicy)) - { - _params.logger.log( - "OBJ loader tiny-io guard: file=%s reads=%llu min=%llu avg=%llu", - system::ILogger::ELL_WARNING, - _file->getFileName().string().c_str(), - static_cast(ioTelemetry.callCount), - static_cast(ioTelemetry.getMinOrZero()), - static_cast(ioTelemetry.getAvgOrZero())); + IGeometryCollection::SGeometryReference ref = {}; + ref.geometry = core::smart_refctd_ptr_static_cast>(loaded.geometry); + refs->push_back(std::move(ref)); } + + core::vector> collectionAssets; + collectionAssets.reserve(objectCollections.size()); + for (auto& collection : objectCollections) + collectionAssets.push_back(core::smart_refctd_ptr_static_cast(std::move(collection))); + _params.logger.log( - "OBJ loader stats: file=%s in(v=%llu n=%llu uv=%llu) out(v=%llu idx=%llu faces=%llu face_fast_tokens=%llu face_fallback_tokens=%llu io_reads=%llu io_min_read=%llu io_avg_read=%llu io_req=%s io_eff=%s io_chunk=%llu io_reason=%s", + "OBJ loader stats: file=%s in(v=%llu n=%llu uv=%llu) out(v=%llu idx=%llu faces=%llu face_fast_tokens=%llu face_fallback_tokens=%llu geometries=%llu objects=%llu io_reads=%llu io_min_read=%llu io_avg_read=%llu io_req=%s io_eff=%s io_chunk=%llu io_reason=%s", system::ILogger::ELL_PERFORMANCE, _file->getFileName().string().c_str(), static_cast(positions.size()), @@ -1174,17 +1395,19 @@ asset::SAssetBundle COBJMeshFileLoader::loadAsset(system::IFile* _file, const as static_cast(outVertexCount), static_cast(outIndexCount), static_cast(faceCount), - static_cast(faceFastTokenCount), - static_cast(faceFallbackTokenCount), + static_cast(faceFastTokenCountSum), + static_cast(faceFallbackTokenCountSum), + static_cast(loadedGeometries.size()), + static_cast(collectionAssets.size()), static_cast(ioTelemetry.callCount), static_cast(ioTelemetry.getMinOrZero()), static_cast(ioTelemetry.getAvgOrZero()), toString(_params.ioPolicy.strategy), toString(ioPlan.strategy), - static_cast(ioPlan.chunkSizeBytes), + static_cast(ioPlan.chunkSizeBytes()), ioPlan.reason); - return SAssetBundle(core::smart_refctd_ptr(), { std::move(geometry) }); + return SAssetBundle(core::smart_refctd_ptr(), std::move(collectionAssets)); } } diff --git a/src/nbl/asset/interchange/COBJMeshWriter.cpp b/src/nbl/asset/interchange/COBJMeshWriter.cpp index 1e864b685d..322c66b94d 100644 --- a/src/nbl/asset/interchange/COBJMeshWriter.cpp +++ b/src/nbl/asset/interchange/COBJMeshWriter.cpp @@ -33,14 +33,14 @@ const char** COBJMeshWriter::getAssociatedFileExtensions() const return ext; } -uint32_t COBJMeshWriter::getSupportedFlags() +writer_flags_t COBJMeshWriter::getSupportedFlags() { - return 0u; + return EWF_NONE; } -uint32_t COBJMeshWriter::getForcedFlags() +writer_flags_t COBJMeshWriter::getForcedFlags() { - return 0u; + return EWF_NONE; } namespace obj_writer_detail @@ -91,13 +91,13 @@ void appendVec3Line(std::string& out, const char* prefix, const size_t prefixSiz std::memcpy(cursor, prefix, prefixSize); cursor += prefixSize; - cursor = appendFloatFixed6ToBuffer(cursor, lineEnd, x); + cursor = SGeometryWriterCommon::appendFloatFixed6ToBuffer(cursor, lineEnd, x); if (cursor < lineEnd) *(cursor++) = ' '; - cursor = appendFloatFixed6ToBuffer(cursor, lineEnd, y); + cursor = SGeometryWriterCommon::appendFloatFixed6ToBuffer(cursor, lineEnd, y); if (cursor < lineEnd) *(cursor++) = ' '; - cursor = appendFloatFixed6ToBuffer(cursor, lineEnd, z); + cursor = SGeometryWriterCommon::appendFloatFixed6ToBuffer(cursor, lineEnd, z); if (cursor < lineEnd) *(cursor++) = '\n'; @@ -115,10 +115,10 @@ void appendVec2Line(std::string& out, const char* prefix, const size_t prefixSiz std::memcpy(cursor, prefix, prefixSize); cursor += prefixSize; - cursor = appendFloatFixed6ToBuffer(cursor, lineEnd, x); + cursor = SGeometryWriterCommon::appendFloatFixed6ToBuffer(cursor, lineEnd, x); if (cursor < lineEnd) *(cursor++) = ' '; - cursor = appendFloatFixed6ToBuffer(cursor, lineEnd, y); + cursor = SGeometryWriterCommon::appendFloatFixed6ToBuffer(cursor, lineEnd, y); if (cursor < lineEnd) *(cursor++) = '\n'; @@ -299,16 +299,16 @@ bool COBJMeshWriter::writeAsset(system::IFile* _file, const SAssetWriteParams& _ } const auto flags = _override->getAssetWritingFlags(ctx, geom, 0u); - const bool flipHandedness = !(flags & E_WRITER_FLAGS::EWF_MESH_IS_RIGHT_HANDED); + const bool flipHandedness = !flags.hasAnyFlag(E_WRITER_FLAGS::EWF_MESH_IS_RIGHT_HANDED); std::string output; output.reserve(vertexCount * ApproxObjBytesPerVertex + faceCount * ApproxObjBytesPerFace); output.append("# Nabla OBJ\n"); hlsl::float64_t4 tmp = {}; - const hlsl::float32_t3* const tightPositions = getTightFloat3View(positionView); - const hlsl::float32_t3* const tightNormals = hasNormals ? getTightFloat3View(normalView) : nullptr; - const hlsl::float32_t2* const tightUV = hasUVs ? getTightFloat2View(*uvView) : nullptr; + const hlsl::float32_t3* const tightPositions = SGeometryWriterCommon::getTightFloat3View(positionView); + const hlsl::float32_t3* const tightNormals = hasNormals ? SGeometryWriterCommon::getTightFloat3View(normalView) : nullptr; + const hlsl::float32_t2* const tightUV = hasUVs ? SGeometryWriterCommon::getTightFloat2View(*uvView) : nullptr; for (size_t i = 0u; i < vertexCount; ++i) { float x = 0.f; @@ -410,17 +410,18 @@ bool COBJMeshWriter::writeAsset(system::IFile* _file, const SAssetWriteParams& _ appendFaceLine(output, faceIndexStorage, faceIndexRefs, f0, f1, f2); } - const auto ioPlan = resolveFileIOPolicy(_params.ioPolicy, static_cast(output.size()), true); - if (!ioPlan.valid) + const bool fileMappable = core::bitflag(file->getFlags()).hasAnyFlag(system::IFile::ECF_MAPPABLE); + const auto ioPlan = resolveFileIOPolicy(_params.ioPolicy, static_cast(output.size()), true, fileMappable); + if (!ioPlan.isValid()) { _params.logger.log("OBJ writer: invalid io policy for %s reason=%s", system::ILogger::ELL_ERROR, file->getFileName().string().c_str(), ioPlan.reason); return false; } - const bool writeOk = writeFileWithPolicy(file, ioPlan, reinterpret_cast(output.data()), output.size(), &ioTelemetry); + const bool writeOk = SInterchangeIOCommon::writeFileWithPolicy(file, ioPlan, reinterpret_cast(output.data()), output.size(), &ioTelemetry); const uint64_t ioMinWrite = ioTelemetry.getMinOrZero(); const uint64_t ioAvgWrite = ioTelemetry.getAvgOrZero(); - if (isTinyIOTelemetryLikely(ioTelemetry, static_cast(output.size()), _params.ioPolicy)) + if (SInterchangeIOCommon::isTinyIOTelemetryLikely(ioTelemetry, static_cast(output.size()), _params.ioPolicy)) { _params.logger.log( "OBJ writer tiny-io guard: file=%s writes=%llu min=%llu avg=%llu", @@ -442,7 +443,7 @@ bool COBJMeshWriter::writeAsset(system::IFile* _file, const SAssetWriteParams& _ static_cast(ioAvgWrite), toString(_params.ioPolicy.strategy), toString(ioPlan.strategy), - static_cast(ioPlan.chunkSizeBytes), + static_cast(ioPlan.chunkSizeBytes()), ioPlan.reason); return writeOk; diff --git a/src/nbl/asset/interchange/CPLYMeshFileLoader.cpp b/src/nbl/asset/interchange/CPLYMeshFileLoader.cpp index 4c4ffb13a4..ffb9afe5ec 100644 --- a/src/nbl/asset/interchange/CPLYMeshFileLoader.cpp +++ b/src/nbl/asset/interchange/CPLYMeshFileLoader.cpp @@ -1567,8 +1567,9 @@ SAssetBundle CPLYMeshFileLoader::loadAsset(system::IFile* _file, const IAssetLoa core::blake3_hash_t precomputedIndexHash = IPreHashed::INVALID_HASH; const uint64_t fileSize = _file->getSize(); const bool hashInBuild = computeContentHashes && shouldInlineHashBuild(_params.ioPolicy, fileSize); - const auto ioPlan = resolveFileIOPolicy(_params.ioPolicy, fileSize, true); - if (!ioPlan.valid) + const bool fileMappable = core::bitflag(_file->getFlags()).hasAnyFlag(system::IFile::ECF_MAPPABLE); + const auto ioPlan = resolveFileIOPolicy(_params.ioPolicy, fileSize, true, fileMappable); + if (!ioPlan.isValid()) { _params.logger.log("PLY loader: invalid io policy for %s reason=%s", system::ILogger::ELL_ERROR, _file->getFileName().string().c_str(), ioPlan.reason); return {}; @@ -1582,7 +1583,7 @@ SAssetBundle CPLYMeshFileLoader::loadAsset(system::IFile* _file, const IAssetLoa _hierarchyLevel, _override }; - uint64_t desiredReadWindow = ioPlan.strategy == SResolvedFileIOPolicy::Strategy::WholeFile ? (fileSize + SContext::ReadWindowPaddingBytes) : ioPlan.chunkSizeBytes; + uint64_t desiredReadWindow = ioPlan.strategy == SResolvedFileIOPolicy::Strategy::WholeFile ? (fileSize + SContext::ReadWindowPaddingBytes) : ioPlan.chunkSizeBytes(); if (ioPlan.strategy == SResolvedFileIOPolicy::Strategy::WholeFile) { const bool mappedInput = static_cast(_file)->getMappedPointer() != nullptr; @@ -1594,7 +1595,7 @@ SAssetBundle CPLYMeshFileLoader::loadAsset(system::IFile* _file, const IAssetLoa // start with empty mesh auto geometry = make_smart_refctd_ptr(); - SAABBAccumulator3 parsedAABB = {}; + SAABBAccumulator3 parsedAABB = createAABBAccumulator(); uint32_t vertCount=0; core::vector> hashedBuffers; std::jthread deferredPositionHashThread; @@ -2119,8 +2120,8 @@ SAssetBundle CPLYMeshFileLoader::loadAsset(system::IFile* _file, const IAssetLoa } } - if (parsedAABB.has) - applyAABBToGeometry(geometry.get(), parsedAABB); + if (!parsedAABB.empty()) + geometry->applyAABB(parsedAABB.value); else CPolygonGeometryManipulator::recomputeAABB(geometry.get()); @@ -2167,7 +2168,7 @@ SAssetBundle CPLYMeshFileLoader::loadAsset(system::IFile* _file, const IAssetLoa { if (deferredPositionHashThread.joinable()) deferredPositionHashThread.join(); - computeMissingGeometryContentHashesParallel(geometry.get(), _params.ioPolicy); + SPolygonGeometryContentHash::computeMissingParallel(geometry.get(), _params.ioPolicy); } else { @@ -2181,7 +2182,7 @@ SAssetBundle CPLYMeshFileLoader::loadAsset(system::IFile* _file, const IAssetLoa .totalBytes = ctx.readBytesTotal, .minBytes = ctx.readMinBytes }; - if (isTinyIOTelemetryLikely(ioTelemetry, fileSize, _params.ioPolicy)) + if (SInterchangeIOCommon::isTinyIOTelemetryLikely(ioTelemetry, fileSize, _params.ioPolicy)) { _params.logger.log( "PLY loader tiny-io guard: file=%s reads=%llu min=%llu avg=%llu", @@ -2206,7 +2207,7 @@ SAssetBundle CPLYMeshFileLoader::loadAsset(system::IFile* _file, const IAssetLoa static_cast(ioAvgRead), toString(_params.ioPolicy.strategy), toString(ioPlan.strategy), - static_cast(ioPlan.chunkSizeBytes), + static_cast(ioPlan.chunkSizeBytes()), ioPlan.reason); auto meta = core::make_smart_refctd_ptr(); return SAssetBundle(std::move(meta),{std::move(geometry)}); diff --git a/src/nbl/asset/interchange/CPLYMeshWriter.cpp b/src/nbl/asset/interchange/CPLYMeshWriter.cpp index bedc0013df..18a08fbdb8 100644 --- a/src/nbl/asset/interchange/CPLYMeshWriter.cpp +++ b/src/nbl/asset/interchange/CPLYMeshWriter.cpp @@ -36,14 +36,14 @@ const char** CPLYMeshWriter::getAssociatedFileExtensions() const return ext; } -uint32_t CPLYMeshWriter::getSupportedFlags() +writer_flags_t CPLYMeshWriter::getSupportedFlags() { return asset::EWF_BINARY; } -uint32_t CPLYMeshWriter::getForcedFlags() +writer_flags_t CPLYMeshWriter::getForcedFlags() { - return 0u; + return EWF_NONE; } namespace ply_writer_detail @@ -562,8 +562,8 @@ bool CPLYMeshWriter::writeAsset(system::IFile* _file, const SAssetWriteParams& _ faceCount = vertexCount / 3u; } const auto flags = _override->getAssetWritingFlags(ctx, geom, 0u); - const bool binary = (flags & E_WRITER_FLAGS::EWF_BINARY) != 0u; - const bool flipVectors = !(flags & E_WRITER_FLAGS::EWF_MESH_IS_RIGHT_HANDED); + const bool binary = flags.hasAnyFlag(E_WRITER_FLAGS::EWF_BINARY); + const bool flipVectors = !flags.hasAnyFlag(E_WRITER_FLAGS::EWF_MESH_IS_RIGHT_HANDED); const bool write16BitIndices = vertexCount <= static_cast(std::numeric_limits::max()) + 1ull; EPlyScalarType positionScalarType = selectPlyScalarType(positionView.composed.format); @@ -665,15 +665,16 @@ bool CPLYMeshWriter::writeAsset(system::IFile* _file, const SAssetWriteParams& _ return false; const size_t outputSize = header.size() + body.size(); - const auto ioPlan = resolveFileIOPolicy(_params.ioPolicy, static_cast(outputSize), true); - if (!ioPlan.valid) + const bool fileMappable = core::bitflag(file->getFlags()).hasAnyFlag(system::IFile::ECF_MAPPABLE); + const auto ioPlan = resolveFileIOPolicy(_params.ioPolicy, static_cast(outputSize), true, fileMappable); + if (!ioPlan.isValid()) { _params.logger.log("PLY writer: invalid io policy for %s reason=%s", system::ILogger::ELL_ERROR, file->getFileName().string().c_str(), ioPlan.reason); return false; } outputBytes = outputSize; - writeOk = writeTwoBuffersWithPolicy( + writeOk = SInterchangeIOCommon::writeTwoBuffersWithPolicy( file, ioPlan, reinterpret_cast(header.data()), @@ -683,7 +684,7 @@ bool CPLYMeshWriter::writeAsset(system::IFile* _file, const SAssetWriteParams& _ &ioTelemetry); const uint64_t ioMinWrite = ioTelemetry.getMinOrZero(); const uint64_t ioAvgWrite = ioTelemetry.getAvgOrZero(); - if (isTinyIOTelemetryLikely(ioTelemetry, static_cast(outputBytes), _params.ioPolicy)) + if (SInterchangeIOCommon::isTinyIOTelemetryLikely(ioTelemetry, static_cast(outputBytes), _params.ioPolicy)) { _params.logger.log( "PLY writer tiny-io guard: file=%s writes=%llu min=%llu avg=%llu", @@ -706,7 +707,7 @@ bool CPLYMeshWriter::writeAsset(system::IFile* _file, const SAssetWriteParams& _ static_cast(ioAvgWrite), toString(_params.ioPolicy.strategy), toString(ioPlan.strategy), - static_cast(ioPlan.chunkSizeBytes), + static_cast(ioPlan.chunkSizeBytes()), ioPlan.reason); return writeOk; } @@ -717,15 +718,16 @@ bool CPLYMeshWriter::writeAsset(system::IFile* _file, const SAssetWriteParams& _ return false; const size_t outputSize = header.size() + body.size(); - const auto ioPlan = resolveFileIOPolicy(_params.ioPolicy, static_cast(outputSize), true); - if (!ioPlan.valid) + const bool fileMappable = core::bitflag(file->getFlags()).hasAnyFlag(system::IFile::ECF_MAPPABLE); + const auto ioPlan = resolveFileIOPolicy(_params.ioPolicy, static_cast(outputSize), true, fileMappable); + if (!ioPlan.isValid()) { _params.logger.log("PLY writer: invalid io policy for %s reason=%s", system::ILogger::ELL_ERROR, file->getFileName().string().c_str(), ioPlan.reason); return false; } outputBytes = outputSize; - writeOk = writeTwoBuffersWithPolicy( + writeOk = SInterchangeIOCommon::writeTwoBuffersWithPolicy( file, ioPlan, reinterpret_cast(header.data()), @@ -735,7 +737,7 @@ bool CPLYMeshWriter::writeAsset(system::IFile* _file, const SAssetWriteParams& _ &ioTelemetry); const uint64_t ioMinWrite = ioTelemetry.getMinOrZero(); const uint64_t ioAvgWrite = ioTelemetry.getAvgOrZero(); - if (isTinyIOTelemetryLikely(ioTelemetry, static_cast(outputBytes), _params.ioPolicy)) + if (SInterchangeIOCommon::isTinyIOTelemetryLikely(ioTelemetry, static_cast(outputBytes), _params.ioPolicy)) { _params.logger.log( "PLY writer tiny-io guard: file=%s writes=%llu min=%llu avg=%llu", @@ -758,7 +760,7 @@ bool CPLYMeshWriter::writeAsset(system::IFile* _file, const SAssetWriteParams& _ static_cast(ioAvgWrite), toString(_params.ioPolicy.strategy), toString(ioPlan.strategy), - static_cast(ioPlan.chunkSizeBytes), + static_cast(ioPlan.chunkSizeBytes()), ioPlan.reason); return writeOk; } diff --git a/src/nbl/asset/interchange/CPLYMeshWriter.h b/src/nbl/asset/interchange/CPLYMeshWriter.h index 750cd126dd..3331e6bba3 100644 --- a/src/nbl/asset/interchange/CPLYMeshWriter.h +++ b/src/nbl/asset/interchange/CPLYMeshWriter.h @@ -20,8 +20,8 @@ class CPLYMeshWriter : public IGeometryWriter const char** getAssociatedFileExtensions() const override; - uint32_t getSupportedFlags() override; - uint32_t getForcedFlags() override; + writer_flags_t getSupportedFlags() override; + writer_flags_t getForcedFlags() override; bool writeAsset(system::IFile* _file, const SAssetWriteParams& _params, IAssetWriterOverride* _override = nullptr) override; }; diff --git a/src/nbl/asset/interchange/CSTLMeshFileLoader.cpp b/src/nbl/asset/interchange/CSTLMeshFileLoader.cpp index 383ee86b31..561123abd2 100644 --- a/src/nbl/asset/interchange/CSTLMeshFileLoader.cpp +++ b/src/nbl/asset/interchange/CSTLMeshFileLoader.cpp @@ -247,8 +247,9 @@ SAssetBundle CSTLMeshFileLoader::loadAsset(system::IFile* _file, const IAssetLoa if (filesize < SSTLContext::TextProbeBytes) return {}; - const auto ioPlan = resolveFileIOPolicy(_params.ioPolicy, static_cast(filesize), true); - if (!ioPlan.valid) + const bool fileMappable = core::bitflag(_file->getFlags()).hasAnyFlag(system::IFile::ECF_MAPPABLE); + const auto ioPlan = resolveFileIOPolicy(_params.ioPolicy, static_cast(filesize), true, fileMappable); + if (!ioPlan.isValid()) { _params.logger.log("STL loader: invalid io policy for %s reason=%s", system::ILogger::ELL_ERROR, _file->getFileName().string().c_str(), ioPlan.reason); return {}; @@ -270,7 +271,7 @@ SAssetBundle CSTLMeshFileLoader::loadAsset(system::IFile* _file, const IAssetLoa else { wholeFilePayload.resize(filesize + 1ull); - if (!readFileExact(context.inner.mainFile, wholeFilePayload.data(), 0ull, filesize, &context.ioTelemetry)) + if (!SInterchangeIOCommon::readFileExact(context.inner.mainFile, wholeFilePayload.data(), 0ull, filesize, &context.ioTelemetry)) return {}; wholeFilePayload[filesize] = 0u; wholeFileData = wholeFilePayload.data(); @@ -290,7 +291,7 @@ SAssetBundle CSTLMeshFileLoader::loadAsset(system::IFile* _file, const IAssetLoa } else { - hasPrefix = filesize >= SSTLContext::BinaryPrefixBytes && readFileExact(context.inner.mainFile, prefix.data(), 0ull, SSTLContext::BinaryPrefixBytes, &context.ioTelemetry); + hasPrefix = filesize >= SSTLContext::BinaryPrefixBytes && SInterchangeIOCommon::readFileExact(context.inner.mainFile, prefix.data(), 0ull, SSTLContext::BinaryPrefixBytes, &context.ioTelemetry); } bool startsWithSolid = false; if (hasPrefix) @@ -302,7 +303,7 @@ SAssetBundle CSTLMeshFileLoader::loadAsset(system::IFile* _file, const IAssetLoa char header[SSTLContext::TextProbeBytes] = {}; if (wholeFileData) std::memcpy(header, wholeFileData, sizeof(header)); - else if (!readFileExact(context.inner.mainFile, header, 0ull, sizeof(header), &context.ioTelemetry)) + else if (!SInterchangeIOCommon::readFileExact(context.inner.mainFile, header, 0ull, sizeof(header), &context.ioTelemetry)) return {}; startsWithSolid = (std::strncmp(header, "solid ", SSTLContext::TextProbeBytes) == 0); } @@ -329,7 +330,7 @@ SAssetBundle CSTLMeshFileLoader::loadAsset(system::IFile* _file, const IAssetLoa auto geometry = core::make_smart_refctd_ptr(); geometry->setIndexing(IPolygonGeometryBase::TriangleList()); - SAABBAccumulator3 parsedAABB = {}; + SAABBAccumulator3 parsedAABB = createAABBAccumulator(); uint64_t vertexCount = 0ull; if (!binary && wholeFileDataIsMapped) @@ -350,7 +351,7 @@ SAssetBundle CSTLMeshFileLoader::loadAsset(system::IFile* _file, const IAssetLoa uint32_t triangleCount32 = binaryTriCountFromDetect; if (!hasBinaryTriCountFromDetect) { - if (!readFileExact(context.inner.mainFile, &triangleCount32, SSTLContext::BinaryHeaderBytes, sizeof(triangleCount32), &context.ioTelemetry)) + if (!SInterchangeIOCommon::readFileExact(context.inner.mainFile, &triangleCount32, SSTLContext::BinaryHeaderBytes, sizeof(triangleCount32), &context.ioTelemetry)) return {}; } @@ -369,7 +370,7 @@ SAssetBundle CSTLMeshFileLoader::loadAsset(system::IFile* _file, const IAssetLoa { core::vector payload; payload.resize(dataSize); - if (!readFileWithPolicy(context.inner.mainFile, payload.data(), SSTLContext::BinaryPrefixBytes, dataSize, ioPlan, &context.ioTelemetry)) + if (!SInterchangeIOCommon::readFileWithPolicy(context.inner.mainFile, payload.data(), SSTLContext::BinaryPrefixBytes, dataSize, ioPlan, &context.ioTelemetry)) return {}; wholeFilePayload = std::move(payload); payloadData = wholeFilePayload.data(); @@ -722,7 +723,7 @@ SAssetBundle CSTLMeshFileLoader::loadAsset(system::IFile* _file, const IAssetLoa if (!wholeFileData) { wholeFilePayload.resize(filesize + 1ull); - if (!readFileWithPolicy(context.inner.mainFile, wholeFilePayload.data(), 0ull, filesize, ioPlan, &context.ioTelemetry)) + if (!SInterchangeIOCommon::readFileWithPolicy(context.inner.mainFile, wholeFilePayload.data(), 0ull, filesize, ioPlan, &context.ioTelemetry)) return {}; wholeFilePayload[filesize] = 0u; wholeFileData = wholeFilePayload.data(); @@ -800,18 +801,18 @@ SAssetBundle CSTLMeshFileLoader::loadAsset(system::IFile* _file, const IAssetLoa if (computeContentHashes) { - computeMissingGeometryContentHashesParallel(geometry.get(), _params.ioPolicy); + SPolygonGeometryContentHash::computeMissingParallel(geometry.get(), _params.ioPolicy); } - if (parsedAABB.has) - applyAABBToGeometry(geometry.get(), parsedAABB); + if (!parsedAABB.empty()) + geometry->applyAABB(parsedAABB.value); else { CPolygonGeometryManipulator::recomputeAABB(geometry.get()); } const uint64_t ioMinRead = context.ioTelemetry.getMinOrZero(); const uint64_t ioAvgRead = context.ioTelemetry.getAvgOrZero(); - if (isTinyIOTelemetryLikely(context.ioTelemetry, static_cast(filesize), _params.ioPolicy)) + if (SInterchangeIOCommon::isTinyIOTelemetryLikely(context.ioTelemetry, static_cast(filesize), _params.ioPolicy)) { _params.logger.log( "STL loader tiny-io guard: file=%s reads=%llu min=%llu avg=%llu", @@ -835,7 +836,7 @@ SAssetBundle CSTLMeshFileLoader::loadAsset(system::IFile* _file, const IAssetLoa static_cast(ioAvgRead), toString(_params.ioPolicy.strategy), toString(ioPlan.strategy), - static_cast(ioPlan.chunkSizeBytes), + static_cast(ioPlan.chunkSizeBytes()), ioPlan.reason); auto meta = core::make_smart_refctd_ptr(); return SAssetBundle(std::move(meta), { std::move(geometry) }); @@ -850,13 +851,13 @@ bool CSTLMeshFileLoader::isALoadableFileFormat(system::IFile* _file, const syste if (fileSize < SSTLContext::BinaryPrefixBytes) { char header[SSTLContext::TextProbeBytes] = {}; - if (!readFileExact(_file, header, 0ull, sizeof(header))) + if (!SInterchangeIOCommon::readFileExact(_file, header, 0ull, sizeof(header))) return false; return std::strncmp(header, "solid ", SSTLContext::TextProbeBytes) == 0; } std::array prefix = {}; - if (!readFileExact(_file, prefix.data(), 0ull, prefix.size())) + if (!SInterchangeIOCommon::readFileExact(_file, prefix.data(), 0ull, prefix.size())) return false; uint32_t triangleCount = 0u; diff --git a/src/nbl/asset/interchange/CSTLMeshWriter.cpp b/src/nbl/asset/interchange/CSTLMeshWriter.cpp index 6c12ebb302..11146f1145 100644 --- a/src/nbl/asset/interchange/CSTLMeshWriter.cpp +++ b/src/nbl/asset/interchange/CSTLMeshWriter.cpp @@ -94,14 +94,14 @@ const char** CSTLMeshWriter::getAssociatedFileExtensions() const return ext; } -uint32_t CSTLMeshWriter::getSupportedFlags() +writer_flags_t CSTLMeshWriter::getSupportedFlags() { return asset::EWF_BINARY; } -uint32_t CSTLMeshWriter::getForcedFlags() +writer_flags_t CSTLMeshWriter::getForcedFlags() { - return 0u; + return EWF_NONE; } bool CSTLMeshWriter::writeAsset(system::IFile* _file, const SAssetWriteParams& _params, IAssetWriterOverride* _override) @@ -128,8 +128,8 @@ bool CSTLMeshWriter::writeAsset(system::IFile* _file, const SAssetWriteParams& _ _params.logger.log("WRITING STL: writing the file %s", system::ILogger::ELL_INFO, file->getFileName().string().c_str()); - const asset::E_WRITER_FLAGS flags = _override->getAssetWritingFlags(context.writeContext, geom, 0u); - const bool binary = (flags & asset::EWF_BINARY) != 0u; + const auto flags = _override->getAssetWritingFlags(context.writeContext, geom, 0u); + const bool binary = flags.hasAnyFlag(asset::EWF_BINARY); uint64_t expectedSize = 0ull; bool sizeKnown = false; @@ -139,8 +139,9 @@ bool CSTLMeshWriter::writeAsset(system::IFile* _file, const SAssetWriteParams& _ sizeKnown = true; } - context.ioPlan = resolveFileIOPolicy(_params.ioPolicy, expectedSize, sizeKnown); - if (!context.ioPlan.valid) + const bool fileMappable = core::bitflag(file->getFlags()).hasAnyFlag(system::IFile::ECF_MAPPABLE); + context.ioPlan = resolveFileIOPolicy(_params.ioPolicy, expectedSize, sizeKnown, fileMappable); + if (!context.ioPlan.isValid()) { _params.logger.log("STL writer: invalid io policy for %s reason=%s", system::ILogger::ELL_ERROR, file->getFileName().string().c_str(), context.ioPlan.reason); return false; @@ -149,7 +150,7 @@ bool CSTLMeshWriter::writeAsset(system::IFile* _file, const SAssetWriteParams& _ if (context.ioPlan.strategy == SResolvedFileIOPolicy::Strategy::WholeFile && sizeKnown) context.ioBuffer.reserve(static_cast(expectedSize)); else - context.ioBuffer.reserve(static_cast(std::min(context.ioPlan.chunkSizeBytes, stl_writer_detail::IoFallbackReserveBytes))); + context.ioBuffer.reserve(static_cast(std::min(context.ioPlan.chunkSizeBytes(), stl_writer_detail::IoFallbackReserveBytes))); const bool written = binary ? writeMeshBinary(geom, &context) : writeMeshASCII(geom, &context); if (!written) @@ -161,7 +162,7 @@ bool CSTLMeshWriter::writeAsset(system::IFile* _file, const SAssetWriteParams& _ const uint64_t ioMinWrite = context.writeTelemetry.getMinOrZero(); const uint64_t ioAvgWrite = context.writeTelemetry.getAvgOrZero(); - if (isTinyIOTelemetryLikely(context.writeTelemetry, context.fileOffset, _params.ioPolicy)) + if (SInterchangeIOCommon::isTinyIOTelemetryLikely(context.writeTelemetry, context.fileOffset, _params.ioPolicy)) { _params.logger.log( "STL writer tiny-io guard: file=%s writes=%llu min=%llu avg=%llu", @@ -182,7 +183,7 @@ bool CSTLMeshWriter::writeAsset(system::IFile* _file, const SAssetWriteParams& _ static_cast(ioAvgWrite), toString(_params.ioPolicy.strategy), toString(context.ioPlan.strategy), - static_cast(context.ioPlan.chunkSizeBytes), + static_cast(context.ioPlan.chunkSizeBytes()), context.ioPlan.reason); return true; @@ -238,7 +239,7 @@ bool writeBytes(SContext* context, const void* data, size_t size) case SResolvedFileIOPolicy::Strategy::Chunked: default: { - const size_t chunkSize = static_cast(context->ioPlan.chunkSizeBytes); + const size_t chunkSize = static_cast(context->ioPlan.chunkSizeBytes()); size_t remaining = size; while (remaining > 0ull) { @@ -272,15 +273,15 @@ bool appendLiteral(char*& cursor, char* const end, const char* text, const size_ bool appendVectorAsAsciiLine(char*& cursor, char* const end, const core::vectorSIMDf& v) { - cursor = appendFloatFixed6ToBuffer(cursor, end, v.X); + cursor = SGeometryWriterCommon::appendFloatFixed6ToBuffer(cursor, end, v.X); if (cursor >= end) return false; *(cursor++) = ' '; - cursor = appendFloatFixed6ToBuffer(cursor, end, v.Y); + cursor = SGeometryWriterCommon::appendFloatFixed6ToBuffer(cursor, end, v.Y); if (cursor >= end) return false; *(cursor++) = ' '; - cursor = appendFloatFixed6ToBuffer(cursor, end, v.Z); + cursor = SGeometryWriterCommon::appendFloatFixed6ToBuffer(cursor, end, v.Z); if (cursor >= end) return false; *(cursor++) = '\n'; @@ -480,7 +481,7 @@ bool writeMeshBinary(const asset::ICPUPolygonGeometry* geom, SContext* context) if (!posView) return false; - const bool flipHandedness = !(context->writeContext.params.flags & E_WRITER_FLAGS::EWF_MESH_IS_RIGHT_HANDED); + const bool flipHandedness = !context->writeContext.params.flags.hasAnyFlag(E_WRITER_FLAGS::EWF_MESH_IS_RIGHT_HANDED); const size_t vertexCount = posView.getElementCount(); if (vertexCount == 0ull) return false; @@ -506,8 +507,8 @@ bool writeMeshBinary(const asset::ICPUPolygonGeometry* geom, SContext* context) const auto& normalView = geom->getNormalView(); const bool hasNormals = static_cast(normalView); const auto* const colorView = stlFindColorView(geom, vertexCount); - const hlsl::float32_t3* const tightPositions = getTightFloat3View(posView); - const hlsl::float32_t3* const tightNormals = hasNormals ? getTightFloat3View(normalView) : nullptr; + const hlsl::float32_t3* const tightPositions = SGeometryWriterCommon::getTightFloat3View(posView); + const hlsl::float32_t3* const tightNormals = hasNormals ? SGeometryWriterCommon::getTightFloat3View(normalView) : nullptr; const float handednessSign = flipHandedness ? -1.f : 1.f; auto decodePosition = [&](const uint32_t ix, hlsl::float32_t3& out)->bool @@ -808,7 +809,7 @@ bool writeMeshBinary(const asset::ICPUPolygonGeometry* geom, SContext* context) } } - const bool writeOk = writeFileWithPolicy(context->writeContext.outputFile, context->ioPlan, output.get(), outputSize, &context->writeTelemetry); + const bool writeOk = SInterchangeIOCommon::writeFileWithPolicy(context->writeContext.outputFile, context->ioPlan, output.get(), outputSize, &context->writeTelemetry); if (writeOk) context->fileOffset += outputSize; return writeOk; @@ -827,7 +828,7 @@ bool writeMeshASCII(const asset::ICPUPolygonGeometry* geom, SContext* context) if (!posView) return false; const auto& normalView = geom->getNormalView(); - const bool flipHandedness = !(context->writeContext.params.flags & E_WRITER_FLAGS::EWF_MESH_IS_RIGHT_HANDED); + const bool flipHandedness = !context->writeContext.params.flags.hasAnyFlag(E_WRITER_FLAGS::EWF_MESH_IS_RIGHT_HANDED); const std::string name = context->writeContext.outputFile->getFileName().filename().replace_extension().string(); const std::string_view solidName = name.empty() ? std::string_view(stl_writer_detail::AsciiDefaultName) : std::string_view(name); diff --git a/src/nbl/asset/interchange/CSTLMeshWriter.h b/src/nbl/asset/interchange/CSTLMeshWriter.h index ca8ea62ceb..5841096cec 100644 --- a/src/nbl/asset/interchange/CSTLMeshWriter.h +++ b/src/nbl/asset/interchange/CSTLMeshWriter.h @@ -21,8 +21,8 @@ class CSTLMeshWriter : public IGeometryWriter const char** getAssociatedFileExtensions() const override; - uint32_t getSupportedFlags() override; - uint32_t getForcedFlags() override; + writer_flags_t getSupportedFlags() override; + writer_flags_t getForcedFlags() override; bool writeAsset(system::IFile* _file, const SAssetWriteParams& _params, IAssetWriterOverride* _override = nullptr) override; }; diff --git a/src/nbl/asset/utils/CPolygonGeometryManipulator.cpp b/src/nbl/asset/utils/CPolygonGeometryManipulator.cpp index d591b3c63b..a11d25631c 100644 --- a/src/nbl/asset/utils/CPolygonGeometryManipulator.cpp +++ b/src/nbl/asset/utils/CPolygonGeometryManipulator.cpp @@ -9,6 +9,7 @@ #include #include "nbl/asset/utils/CPolygonGeometryManipulator.h" +#include "nbl/asset/interchange/SLoaderRuntimeTuning.h" #include "nbl/asset/utils/CVertexWelder.h" #include "nbl/asset/utils/CSmoothNormalGenerator.h" #include "nbl/asset/utils/CForsythVertexCacheOptimizer.h" @@ -20,89 +21,115 @@ namespace nbl::asset { -core::blake3_hash_t CPolygonGeometryManipulator::computeDeterministicContentHash(const ICPUPolygonGeometry* geo) +void CPolygonGeometryManipulator::collectUniqueBuffers(ICPUPolygonGeometry* geo, core::vector>& outBuffers) { if (!geo) - return IPreHashed::INVALID_HASH; - - const auto* indexing = geo->getIndexingCallback(); - if (!indexing) - return IPreHashed::INVALID_HASH; - - // Keep this as a standalone helper instead of an IPreHashed override on geometry. - // A polygon geometry is a composition of shared views over external buffers, not a single owned payload. - // Caching a hash inside the geometry object would need global invalidation across external buffer mutations. - core::blake3_hasher hasher; - hasher << indexing->degree(); - hasher << indexing->rate(); - hasher << indexing->knownTopology(); + { + outBuffers.clear(); + return; + } - auto hashView = [&](const IGeometry::SDataView& view)->bool + outBuffers.clear(); + auto appendBuffer = [&outBuffers](const IGeometry::SDataView& view)->void { - if (!view) + if (!view || !view.src.buffer) + return; + for (const auto& existing : outBuffers) { - hasher << false; - return true; + if (existing.get() == view.src.buffer.get()) + return; } + outBuffers.push_back(core::smart_refctd_ptr(view.src.buffer)); + }; - hasher << true; - hasher << view.composed.format; - hasher << view.composed.stride; - hasher << view.composed.getStride(); - hasher << view.composed.rangeFormat; - hasher << view.src.offset; - hasher << view.src.actualSize(); - - const auto* const buffer = view.src.buffer.get(); - if (!buffer || buffer->missingContent()) - return false; + appendBuffer(geo->getPositionView()); + appendBuffer(geo->getIndexView()); + appendBuffer(geo->getNormalView()); + for (const auto& view : *geo->getAuxAttributeViews()) + appendBuffer(view); + for (const auto& view : *geo->getJointWeightViews()) + { + appendBuffer(view.indices); + appendBuffer(view.weights); + } + if (auto jointOBB = geo->getJointOBBView(); jointOBB) + appendBuffer(*jointOBB); +} - const auto* const data = reinterpret_cast(buffer->getPointer()); - if (!data) - return false; +void CPolygonGeometryManipulator::computeContentHashesParallel(ICPUPolygonGeometry* geo, const SFileIOPolicy& ioPolicy, const EContentHashMode mode) +{ + if (!geo) + return; - hasher.update(data + view.src.offset, view.src.actualSize()); - return true; - }; + core::vector> buffers; + collectUniqueBuffers(geo, buffers); + if (buffers.empty()) + return; - if (!hashView(geo->getPositionView())) - return IPreHashed::INVALID_HASH; - if (!hashView(geo->getIndexView())) - return IPreHashed::INVALID_HASH; - if (!hashView(geo->getNormalView())) - return IPreHashed::INVALID_HASH; + core::vector pending; + pending.reserve(buffers.size()); + uint64_t totalBytes = 0ull; + for (size_t i = 0ull; i < buffers.size(); ++i) + { + auto& buffer = buffers[i]; + if (!buffer) + continue; + if (mode == EContentHashMode::MissingOnly && buffer->getContentHash() != IPreHashed::INVALID_HASH) + continue; + totalBytes += static_cast(buffer->getSize()); + pending.push_back(i); + } + if (pending.empty()) + return; - hasher << geo->getJointCount(); - if (geo->isSkinned()) + const size_t hw = resolveLoaderHardwareThreads(); + const uint8_t* hashSampleData = nullptr; + uint64_t hashSampleBytes = 0ull; + for (const auto pendingIx : pending) { - if (const auto* jointOBBView = geo->getJointOBBView(); jointOBBView) - { - if (!hashView(*jointOBBView)) - return IPreHashed::INVALID_HASH; - } - else - hasher << false; + auto& buffer = buffers[pendingIx]; + const auto* ptr = reinterpret_cast(buffer->getPointer()); + if (!ptr) + continue; + hashSampleData = ptr; + hashSampleBytes = resolveLoaderRuntimeSampleBytes(ioPolicy, static_cast(buffer->getSize())); + if (hashSampleBytes > 0ull) + break; + } - const auto& jointWeightViews = geo->getJointWeightViews(); - hasher << jointWeightViews.size(); - for (const auto& view : jointWeightViews) + SLoaderRuntimeTuningRequest tuningRequest = {}; + tuningRequest.inputBytes = totalBytes; + tuningRequest.totalWorkUnits = pending.size(); + tuningRequest.minBytesPerWorker = std::max(1ull, loaderRuntimeCeilDiv(totalBytes, static_cast(pending.size()))); + tuningRequest.hardwareThreads = static_cast(hw); + const size_t hardMaxWorkers = resolveLoaderHardMaxWorkers(hw, ioPolicy.runtimeTuning.workerHeadroom); + tuningRequest.hardMaxWorkers = static_cast(std::min(pending.size(), hardMaxWorkers)); + tuningRequest.targetChunksPerWorker = ioPolicy.runtimeTuning.hashTaskTargetChunksPerWorker; + tuningRequest.sampleData = hashSampleData; + tuningRequest.sampleBytes = hashSampleBytes; + const auto tuning = tuneLoaderRuntime(ioPolicy, tuningRequest); + const size_t workerCount = std::min(tuning.workerCount, pending.size()); + + if (workerCount > 1ull) + { + loaderRuntimeDispatchWorkers(workerCount, [&](const size_t workerIx) { - if (!hashView(view.indices)) - return IPreHashed::INVALID_HASH; - if (!hashView(view.weights)) - return IPreHashed::INVALID_HASH; - } + const size_t beginIx = (pending.size() * workerIx) / workerCount; + const size_t endIx = (pending.size() * (workerIx + 1ull)) / workerCount; + for (size_t i = beginIx; i < endIx; ++i) + { + auto& buffer = buffers[pending[i]]; + buffer->setContentHash(buffer->computeContentHash()); + } + }); + return; } - const auto& auxAttributeViews = geo->getAuxAttributeViews(); - hasher << auxAttributeViews.size(); - for (const auto& view : auxAttributeViews) + for (const auto pendingIx : pending) { - if (!hashView(view)) - return IPreHashed::INVALID_HASH; + auto& buffer = buffers[pendingIx]; + buffer->setContentHash(buffer->computeContentHash()); } - - return static_cast(hasher); } diff --git a/src/nbl/system/CSystemWin32.cpp b/src/nbl/system/CSystemWin32.cpp index 1982448886..7dcf429603 100644 --- a/src/nbl/system/CSystemWin32.cpp +++ b/src/nbl/system/CSystemWin32.cpp @@ -76,6 +76,7 @@ core::smart_refctd_ptr CSystemWin32::CCaller::createFile(const std: _fileMappingObj = CreateFileMappingA(_native,nullptr,writeAccess ? PAGE_READWRITE:PAGE_READONLY, 0, 0, nullptr); if (!_fileMappingObj) { + // backend fallback: file opens successfully but mapping-related flags are removed flags.value &= ~(IFile::ECF_COHERENT | IFile::ECF_MAPPABLE); } else @@ -99,6 +100,7 @@ core::smart_refctd_ptr CSystemWin32::CCaller::createFile(const std: { CloseHandle(_fileMappingObj); _fileMappingObj = nullptr; + // backend fallback: file opens successfully but mapping-related flags are removed flags.value &= ~(IFile::ECF_COHERENT | IFile::ECF_MAPPABLE); } } diff --git a/src/nbl/video/utilities/CAssetConverter.cpp b/src/nbl/video/utilities/CAssetConverter.cpp index d2cfc5f7f1..4be0913ebe 100644 --- a/src/nbl/video/utilities/CAssetConverter.cpp +++ b/src/nbl/video/utilities/CAssetConverter.cpp @@ -1183,9 +1183,9 @@ bool CAssetConverter::CHashCache::hash_impl::operator()(lookup_t loo auto patchedParams = lookup.asset->getCreationParams(); assert(lookup.patch->usage.hasFlags(patchedParams.usage)); patchedParams.usage = lookup.patch->usage; - auto contentHash = lookup.asset->getContentHash(); + const auto contentHash = lookup.asset->getContentHash(); if (contentHash==NoContentHash) - contentHash = lookup.asset->computeContentHash(); + return false; hasher.update(&patchedParams,sizeof(patchedParams)) << contentHash; return true; } From b32f5662753f387b5e096708fcd4782924d7d465 Mon Sep 17 00:00:00 2001 From: Arkadiusz Lachowicz Date: Thu, 5 Mar 2026 09:59:39 +0100 Subject: [PATCH 042/118] Finalize loader tuning API cleanup --- examples_tests | 2 +- .../asset/interchange/SLoaderRuntimeTuning.h | 24 +++++++++++++++++-- 2 files changed, 23 insertions(+), 3 deletions(-) diff --git a/examples_tests b/examples_tests index 2c1d70ef5c..48945f7bee 160000 --- a/examples_tests +++ b/examples_tests @@ -1 +1 @@ -Subproject commit 2c1d70ef5c7e2f7f915a489b6fd182aa771df121 +Subproject commit 48945f7bee9e38de01378432ca7c72d3ba76eb49 diff --git a/include/nbl/asset/interchange/SLoaderRuntimeTuning.h b/include/nbl/asset/interchange/SLoaderRuntimeTuning.h index c1f631b922..dd029c4674 100644 --- a/include/nbl/asset/interchange/SLoaderRuntimeTuning.h +++ b/include/nbl/asset/interchange/SLoaderRuntimeTuning.h @@ -13,6 +13,7 @@ #include #include #include +#include #include @@ -61,6 +62,14 @@ struct SLoaderRuntimeTuningResult size_t chunkCount = 1ull; }; +struct SLoaderRuntimeTuner +{ + template + static void dispatchWorkers(const size_t workerCount, Fn&& fn); + + static inline SLoaderRuntimeTuningResult tune(const SFileIOPolicy& ioPolicy, const SLoaderRuntimeTuningRequest& request); +}; + constexpr uint64_t loaderRuntimeCeilDiv(const uint64_t numerator, const uint64_t denominator) { return (numerator + denominator - 1ull) / denominator; @@ -104,7 +113,7 @@ inline size_t resolveLoaderHardMaxWorkers(const size_t hardwareThreads, const ui } template -inline void loaderRuntimeDispatchWorkers(const size_t workerCount, Fn&& fn) +void SLoaderRuntimeTuner::dispatchWorkers(const size_t workerCount, Fn&& fn) { if (workerCount <= 1ull) { @@ -119,6 +128,12 @@ inline void loaderRuntimeDispatchWorkers(const size_t workerCount, Fn&& fn) fn(0ull); } +template +inline void loaderRuntimeDispatchWorkers(const size_t workerCount, Fn&& fn) +{ + SLoaderRuntimeTuner::dispatchWorkers(workerCount, std::forward(fn)); +} + inline uint64_t loaderRuntimeBenchmarkSample(const uint8_t* const sampleData, const uint64_t sampleBytes, const size_t workerCount, const uint32_t passes) { if (!sampleData || sampleBytes == 0ull || workerCount == 0ull) @@ -207,7 +222,7 @@ inline void loaderRuntimeAppendCandidate(std::vector& dst, const size_t dst.push_back(candidate); } -inline SLoaderRuntimeTuningResult tuneLoaderRuntime(const SFileIOPolicy& ioPolicy, const SLoaderRuntimeTuningRequest& request) +SLoaderRuntimeTuningResult SLoaderRuntimeTuner::tune(const SFileIOPolicy& ioPolicy, const SLoaderRuntimeTuningRequest& request) { using RTMode = SFileIOPolicy::SRuntimeTuning::Mode; SLoaderRuntimeTuningResult result = {}; @@ -385,6 +400,11 @@ inline SLoaderRuntimeTuningResult tuneLoaderRuntime(const SFileIOPolicy& ioPolic return result; } +inline SLoaderRuntimeTuningResult tuneLoaderRuntime(const SFileIOPolicy& ioPolicy, const SLoaderRuntimeTuningRequest& request) +{ + return SLoaderRuntimeTuner::tune(ioPolicy, request); +} + } From 20f56c226a58e208aff4615d054333319c15ea81 Mon Sep 17 00:00:00 2001 From: Arkadiusz Lachowicz Date: Thu, 5 Mar 2026 15:41:57 +0100 Subject: [PATCH 043/118] Unify geometry hash API and add hcp parity tool --- examples_tests | 2 +- include/nbl/asset/interchange/SFileIOPolicy.h | 6 +- .../interchange/SGeometryContentHashCommon.h | 38 ++- .../asset/interchange/SLoaderRuntimeTuning.h | 2 +- .../asset/utils/CPolygonGeometryManipulator.h | 2 +- .../asset/interchange/COBJMeshFileLoader.cpp | 2 +- .../asset/interchange/CPLYMeshFileLoader.cpp | 2 +- .../asset/interchange/CSTLMeshFileLoader.cpp | 2 +- .../utils/CPolygonGeometryManipulator.cpp | 33 +- tools/CMakeLists.txt | 3 +- tools/hcp/CMakeLists.txt | 34 ++ tools/hcp/README.md | 20 ++ tools/hcp/main.cpp | 298 ++++++++++++++++++ 13 files changed, 417 insertions(+), 27 deletions(-) create mode 100644 tools/hcp/CMakeLists.txt create mode 100644 tools/hcp/README.md create mode 100644 tools/hcp/main.cpp diff --git a/examples_tests b/examples_tests index 3b3a359eac..391e4ebd9d 160000 --- a/examples_tests +++ b/examples_tests @@ -1 +1 @@ -Subproject commit 3b3a359eac22e037781bfe8c6568228a281e288c +Subproject commit 391e4ebd9d29a3be501a714a1cc340c4b3d8bc04 diff --git a/include/nbl/asset/interchange/SFileIOPolicy.h b/include/nbl/asset/interchange/SFileIOPolicy.h index 04ffbff3ba..fc1203395c 100644 --- a/include/nbl/asset/interchange/SFileIOPolicy.h +++ b/include/nbl/asset/interchange/SFileIOPolicy.h @@ -23,8 +23,10 @@ struct SFileIOPolicy // Runtime tuning strategy for worker/chunk selection. enum class Mode : uint8_t { - // Disable runtime tuning. Use static heuristics only. - None, + // Disable runtime tuning and force sequential execution. + Sequential, + // Backward-compatible alias for Sequential. + None = Sequential, // Use deterministic heuristics derived from input size and hardware. Heuristic, // Use heuristics and optionally refine with lightweight sampling. diff --git a/include/nbl/asset/interchange/SGeometryContentHashCommon.h b/include/nbl/asset/interchange/SGeometryContentHashCommon.h index e9e5a07dfe..22c1c845c2 100644 --- a/include/nbl/asset/interchange/SGeometryContentHashCommon.h +++ b/include/nbl/asset/interchange/SGeometryContentHashCommon.h @@ -5,7 +5,9 @@ #define _NBL_ASSET_S_GEOMETRY_CONTENT_HASH_COMMON_H_INCLUDED_ +#include "nbl/asset/IPreHashed.h" #include "nbl/asset/utils/CPolygonGeometryManipulator.h" +#include "nbl/core/hash/blake.h" namespace nbl::asset @@ -17,25 +19,51 @@ class SPolygonGeometryContentHash using EMode = CPolygonGeometryManipulator::EContentHashMode; static inline void collectBuffers( - ICPUPolygonGeometry* geometry, + const ICPUPolygonGeometry* geometry, core::vector>& buffers) { CPolygonGeometryManipulator::collectUniqueBuffers(geometry, buffers); } - static inline void computeParallel(ICPUPolygonGeometry* geometry, const SFileIOPolicy& ioPolicy, const EMode mode = EMode::MissingOnly) + static inline void reset(ICPUPolygonGeometry* geometry) { - CPolygonGeometryManipulator::computeContentHashesParallel(geometry, ioPolicy, mode); + core::vector> buffers; + collectBuffers(geometry, buffers); + for (auto& buffer : buffers) + if (buffer) + buffer->setContentHash(IPreHashed::INVALID_HASH); } - static inline void computeMissingParallel(ICPUPolygonGeometry* geometry, const SFileIOPolicy& ioPolicy) + static inline core::blake3_hash_t getHash(const ICPUPolygonGeometry* geometry) + { + if (!geometry) + return IPreHashed::INVALID_HASH; + + core::blake3_hasher hasher; + if (const auto* indexing = geometry->getIndexingCallback(); indexing) + { + hasher << indexing->degree(); + hasher << indexing->rate(); + hasher << indexing->knownTopology(); + } + + core::vector> buffers; + collectBuffers(geometry, buffers); + for (const auto& buffer : buffers) + hasher << (buffer ? buffer->getContentHash() : IPreHashed::INVALID_HASH); + return static_cast(hasher); + } + + static inline core::blake3_hash_t computeMissing(ICPUPolygonGeometry* geometry, const SFileIOPolicy& ioPolicy) { CPolygonGeometryManipulator::computeMissingContentHashesParallel(geometry, ioPolicy); + return getHash(geometry); } - static inline void recomputeParallel(ICPUPolygonGeometry* geometry, const SFileIOPolicy& ioPolicy) + static inline core::blake3_hash_t recompute(ICPUPolygonGeometry* geometry, const SFileIOPolicy& ioPolicy) { CPolygonGeometryManipulator::recomputeContentHashesParallel(geometry, ioPolicy); + return getHash(geometry); } }; diff --git a/include/nbl/asset/interchange/SLoaderRuntimeTuning.h b/include/nbl/asset/interchange/SLoaderRuntimeTuning.h index dd029c4674..dd979708b0 100644 --- a/include/nbl/asset/interchange/SLoaderRuntimeTuning.h +++ b/include/nbl/asset/interchange/SLoaderRuntimeTuning.h @@ -245,7 +245,7 @@ SLoaderRuntimeTuningResult SLoaderRuntimeTuner::tune(const SFileIOPolicy& ioPoli const uint64_t minBytesPerWorker = std::max(1ull, request.minBytesPerWorker); const size_t maxByWork = static_cast(loaderRuntimeCeilDiv(request.totalWorkUnits, minWorkUnitsPerWorker)); const size_t maxByBytes = request.inputBytes ? static_cast(loaderRuntimeCeilDiv(request.inputBytes, minBytesPerWorker)) : maxWorkers; - const bool heuristicEnabled = ioPolicy.runtimeTuning.mode != RTMode::None; + const bool heuristicEnabled = ioPolicy.runtimeTuning.mode != RTMode::Sequential; const bool hybridEnabled = ioPolicy.runtimeTuning.mode == RTMode::Hybrid; size_t workerCount = 1ull; diff --git a/include/nbl/asset/utils/CPolygonGeometryManipulator.h b/include/nbl/asset/utils/CPolygonGeometryManipulator.h index 9cc935ecdf..9e1548a5bf 100644 --- a/include/nbl/asset/utils/CPolygonGeometryManipulator.h +++ b/include/nbl/asset/utils/CPolygonGeometryManipulator.h @@ -28,7 +28,7 @@ class NBL_API2 CPolygonGeometryManipulator RecomputeAll }; - static void collectUniqueBuffers(ICPUPolygonGeometry* geo, core::vector>& outBuffers); + static void collectUniqueBuffers(const ICPUPolygonGeometry* geo, core::vector>& outBuffers); static void computeContentHashesParallel(ICPUPolygonGeometry* geo, const SFileIOPolicy& ioPolicy, const EContentHashMode mode = EContentHashMode::MissingOnly); static inline void computeMissingContentHashesParallel(ICPUPolygonGeometry* geo, const SFileIOPolicy& ioPolicy) { diff --git a/src/nbl/asset/interchange/COBJMeshFileLoader.cpp b/src/nbl/asset/interchange/COBJMeshFileLoader.cpp index e7be9ef8ef..3b7cebd9e7 100644 --- a/src/nbl/asset/interchange/COBJMeshFileLoader.cpp +++ b/src/nbl/asset/interchange/COBJMeshFileLoader.cpp @@ -891,7 +891,7 @@ asset::SAssetBundle COBJMeshFileLoader::loadAsset(system::IFile* _file, const as } if ((_params.loaderFlags & IAssetLoader::ELPF_DONT_COMPUTE_CONTENT_HASHES) == 0) - SPolygonGeometryContentHash::computeMissingParallel(geometry.get(), _params.ioPolicy); + SPolygonGeometryContentHash::computeMissing(geometry.get(), _params.ioPolicy); if (!parsedAABB.empty()) geometry->applyAABB(parsedAABB.value); diff --git a/src/nbl/asset/interchange/CPLYMeshFileLoader.cpp b/src/nbl/asset/interchange/CPLYMeshFileLoader.cpp index ffb9afe5ec..0f3117a48d 100644 --- a/src/nbl/asset/interchange/CPLYMeshFileLoader.cpp +++ b/src/nbl/asset/interchange/CPLYMeshFileLoader.cpp @@ -2168,7 +2168,7 @@ SAssetBundle CPLYMeshFileLoader::loadAsset(system::IFile* _file, const IAssetLoa { if (deferredPositionHashThread.joinable()) deferredPositionHashThread.join(); - SPolygonGeometryContentHash::computeMissingParallel(geometry.get(), _params.ioPolicy); + SPolygonGeometryContentHash::computeMissing(geometry.get(), _params.ioPolicy); } else { diff --git a/src/nbl/asset/interchange/CSTLMeshFileLoader.cpp b/src/nbl/asset/interchange/CSTLMeshFileLoader.cpp index 561123abd2..e7b58296d4 100644 --- a/src/nbl/asset/interchange/CSTLMeshFileLoader.cpp +++ b/src/nbl/asset/interchange/CSTLMeshFileLoader.cpp @@ -801,7 +801,7 @@ SAssetBundle CSTLMeshFileLoader::loadAsset(system::IFile* _file, const IAssetLoa if (computeContentHashes) { - SPolygonGeometryContentHash::computeMissingParallel(geometry.get(), _params.ioPolicy); + SPolygonGeometryContentHash::computeMissing(geometry.get(), _params.ioPolicy); } if (!parsedAABB.empty()) diff --git a/src/nbl/asset/utils/CPolygonGeometryManipulator.cpp b/src/nbl/asset/utils/CPolygonGeometryManipulator.cpp index cb6ad127ce..ab64908e83 100644 --- a/src/nbl/asset/utils/CPolygonGeometryManipulator.cpp +++ b/src/nbl/asset/utils/CPolygonGeometryManipulator.cpp @@ -21,7 +21,7 @@ namespace nbl::asset { -void CPolygonGeometryManipulator::collectUniqueBuffers(ICPUPolygonGeometry* geo, core::vector>& outBuffers) +void CPolygonGeometryManipulator::collectUniqueBuffers(const ICPUPolygonGeometry* geo, core::vector>& outBuffers) { if (!geo) { @@ -45,9 +45,9 @@ void CPolygonGeometryManipulator::collectUniqueBuffers(ICPUPolygonGeometry* geo, appendBuffer(geo->getPositionView()); appendBuffer(geo->getIndexView()); appendBuffer(geo->getNormalView()); - for (const auto& view : *geo->getAuxAttributeViews()) + for (const auto& view : geo->getAuxAttributeViews()) appendBuffer(view); - for (const auto& view : *geo->getJointWeightViews()) + for (const auto& view : geo->getJointWeightViews()) { appendBuffer(view.indices); appendBuffer(view.weights); @@ -82,6 +82,21 @@ void CPolygonGeometryManipulator::computeContentHashesParallel(ICPUPolygonGeomet if (pending.empty()) return; + const auto hashPendingRange = [&](const size_t beginIx, const size_t endIx) -> void + { + for (size_t i = beginIx; i < endIx; ++i) + { + auto& buffer = buffers[pending[i]]; + buffer->setContentHash(buffer->computeContentHash()); + } + }; + + if (ioPolicy.runtimeTuning.mode == SFileIOPolicy::SRuntimeTuning::Mode::Sequential) + { + hashPendingRange(0ull, pending.size()); + return; + } + const size_t hw = resolveLoaderHardwareThreads(); const uint8_t* hashSampleData = nullptr; uint64_t hashSampleBytes = 0ull; @@ -116,20 +131,12 @@ void CPolygonGeometryManipulator::computeContentHashesParallel(ICPUPolygonGeomet { const size_t beginIx = (pending.size() * workerIx) / workerCount; const size_t endIx = (pending.size() * (workerIx + 1ull)) / workerCount; - for (size_t i = beginIx; i < endIx; ++i) - { - auto& buffer = buffers[pending[i]]; - buffer->setContentHash(buffer->computeContentHash()); - } + hashPendingRange(beginIx, endIx); }); return; } - for (const auto pendingIx : pending) - { - auto& buffer = buffers[pendingIx]; - buffer->setContentHash(buffer->computeContentHash()); - } + hashPendingRange(0ull, pending.size()); } diff --git a/tools/CMakeLists.txt b/tools/CMakeLists.txt index 57f66ad44b..8d78ea75db 100644 --- a/tools/CMakeLists.txt +++ b/tools/CMakeLists.txt @@ -1,8 +1,9 @@ add_subdirectory(nsc) add_subdirectory(xxHash256) +add_subdirectory(hcp) if(NBL_BUILD_IMGUI) add_subdirectory(nite EXCLUDE_FROM_ALL) endif() -NBL_ADJUST_FOLDERS(tools) \ No newline at end of file +NBL_ADJUST_FOLDERS(tools) diff --git a/tools/hcp/CMakeLists.txt b/tools/hcp/CMakeLists.txt new file mode 100644 index 0000000000..0d8f5a2033 --- /dev/null +++ b/tools/hcp/CMakeLists.txt @@ -0,0 +1,34 @@ +nbl_create_executable_project("" "" "" "") +add_dependencies(${EXECUTABLE_NAME} argparse) +target_include_directories(${EXECUTABLE_NAME} PRIVATE $) + +enable_testing() + +set(NBL_HCP_CI_ARGS + --buffer-bytes 67108864 + --seed 12345 +) + +add_test(NAME NBL_HCP_SEQUENTIAL + COMMAND "$" --runtime-tuning sequential ${NBL_HCP_CI_ARGS} + WORKING_DIRECTORY "$" + COMMAND_EXPAND_LISTS +) +add_test(NAME NBL_HCP_HEURISTIC + COMMAND "$" --runtime-tuning heuristic ${NBL_HCP_CI_ARGS} + WORKING_DIRECTORY "$" + COMMAND_EXPAND_LISTS +) +add_test(NAME NBL_HCP_HYBRID + COMMAND "$" --runtime-tuning hybrid ${NBL_HCP_CI_ARGS} + WORKING_DIRECTORY "$" + COMMAND_EXPAND_LISTS +) + +set_tests_properties( + NBL_HCP_SEQUENTIAL + NBL_HCP_HEURISTIC + NBL_HCP_HYBRID + PROPERTIES + LABELS "hash;ci" +) diff --git a/tools/hcp/README.md b/tools/hcp/README.md new file mode 100644 index 0000000000..81e8713fee --- /dev/null +++ b/tools/hcp/README.md @@ -0,0 +1,20 @@ +# hcp + +Headless parity checker for polygon geometry content hashing. + +## What it checks +- `recompute(..., sequential)` as baseline +- `recompute(..., )` equals baseline hash +- `computeMissing(..., )` preserves pre-set hashes and equals baseline hash +- timing logs for baseline, recompute and computeMissing + +## Args +- `--runtime-tuning ` (alias: `none` -> `sequential`, default: `heuristic`) +- `--buffer-bytes ` (minimum: `2097152`) +- `--seed ` (deterministic payload seed) + +## Example +`./hcp_d.exe --runtime-tuning heuristic --buffer-bytes 67108864 --seed 12345` + +## CTest +`ctest --output-on-failure -C Debug -R NBL_HCP` diff --git a/tools/hcp/main.cpp b/tools/hcp/main.cpp new file mode 100644 index 0000000000..e104b497ba --- /dev/null +++ b/tools/hcp/main.cpp @@ -0,0 +1,298 @@ +// Copyright (C) 2018-2025 - DevSH Graphics Programming Sp. z O.O. +// This file is part of the "Nabla Engine". +// For conditions of distribution and use, see copyright notice in nabla.h + +#include "nabla.h" +#include "nbl/system/IApplicationFramework.h" +#include "nbl/system/CStdoutLogger.h" + +#include "nbl/asset/interchange/SFileIOPolicy.h" +#include "nbl/asset/interchange/SGeometryContentHashCommon.h" +#include "nbl/core/hash/blake.h" +#include "argparse/argparse.hpp" + +#include +#include +#include +#include +#include +#include + +using namespace nbl; +using namespace nbl::asset; +using namespace nbl::system; + +constexpr size_t kMinBufferBytes = 2ull * 1024ull * 1024ull; +constexpr uint64_t kDefaultSeed = 0x6a09e667f3bcc909ull; + +enum class RuntimeMode : uint8_t +{ + Sequential, + Heuristic, + Hybrid +}; + +struct Options +{ + RuntimeMode mode = RuntimeMode::Heuristic; + size_t bufferBytes = kMinBufferBytes; + uint64_t seed = kDefaultSeed; +}; + +static const char* modeName(RuntimeMode mode) +{ + if (mode == RuntimeMode::Sequential) + return "sequential"; + if (mode == RuntimeMode::Hybrid) + return "hybrid"; + return "heuristic"; +} + +static SFileIOPolicy makePolicy(RuntimeMode mode) +{ + SFileIOPolicy policy = {}; + if (mode == RuntimeMode::Sequential) + policy.runtimeTuning.mode = SFileIOPolicy::SRuntimeTuning::Mode::Sequential; + else if (mode == RuntimeMode::Hybrid) + policy.runtimeTuning.mode = SFileIOPolicy::SRuntimeTuning::Mode::Hybrid; + else + policy.runtimeTuning.mode = SFileIOPolicy::SRuntimeTuning::Mode::Heuristic; + return policy; +} + +static uint64_t nextRand(uint64_t& state) +{ + state ^= state >> 12u; + state ^= state << 25u; + state ^= state >> 27u; + return state * 2685821657736338717ull; +} + +static std::optional parseOptions(const core::vector& args) +{ + argparse::ArgumentParser parser("hcp"); + parser.add_argument("--runtime-tuning").default_value(std::string("heuristic")); + parser.add_argument("--buffer-bytes").default_value(std::to_string(kMinBufferBytes)); + parser.add_argument("--seed").default_value(std::to_string(kDefaultSeed)); + + try + { + parser.parse_args({ args.data(), args.data() + args.size() }); + } + catch (const std::exception&) + { + return std::nullopt; + } + + auto parseU64 = [](const std::string& v) -> std::optional + { + try { return std::stoull(v, nullptr, 10); } catch (...) { return std::nullopt; } + }; + auto parseSize = [](const std::string& v) -> std::optional + { + try + { + const auto x = std::stoull(v, nullptr, 10); + if (x > static_cast(std::numeric_limits::max())) + return std::nullopt; + return static_cast(x); + } + catch (...) + { + return std::nullopt; + } + }; + + Options options = {}; + const auto mode = parser.get("--runtime-tuning"); + if (mode == "sequential" || mode == "none") + options.mode = RuntimeMode::Sequential; + else if (mode == "heuristic") + options.mode = RuntimeMode::Heuristic; + else if (mode == "hybrid") + options.mode = RuntimeMode::Hybrid; + else + return std::nullopt; + + const auto bytes = parseSize(parser.get("--buffer-bytes")); + const auto seed = parseU64(parser.get("--seed")); + if (!bytes.has_value() || !seed.has_value() || *bytes < kMinBufferBytes) + return std::nullopt; + + options.bufferBytes = *bytes; + options.seed = *seed; + return options; +} + +static core::smart_refctd_ptr createGeometry(const Options& options) +{ + constexpr E_FORMAT positionFormat = EF_R32G32B32_SFLOAT; + constexpr E_FORMAT normalFormat = EF_R32G32B32_SFLOAT; + constexpr E_FORMAT indexFormat = EF_R32_UINT; + constexpr E_FORMAT colorFormat = EF_R8G8B8A8_UNORM; + + const uint32_t positionStride = getTexelOrBlockBytesize(positionFormat); + const uint32_t normalStride = getTexelOrBlockBytesize(normalFormat); + const uint32_t indexStride = getTexelOrBlockBytesize(indexFormat); + const uint32_t colorStride = getTexelOrBlockBytesize(colorFormat); + const auto alignDown = [&](uint32_t stride) -> size_t { return options.bufferBytes - (options.bufferBytes % stride); }; + + auto makeBuffer = [&](size_t bytes, core::bitflag usage, uint64_t stream) -> core::smart_refctd_ptr + { + std::vector data(bytes); + uint64_t state = options.seed ^ (stream * 0x9e3779b97f4a7c15ull); + if (state == 0ull) + state = kDefaultSeed ^ stream; + for (auto& b : data) + b = static_cast(nextRand(state) & 0xffull); + + ICPUBuffer::SCreationParams params = {}; + params.size = data.size(); + params.usage = usage; + params.data = data.data(); + return ICPUBuffer::create(std::move(params)); + }; + + auto makeView = [](const core::smart_refctd_ptr& buffer, E_FORMAT format, uint32_t stride) -> ICPUPolygonGeometry::SDataView + { + ICPUPolygonGeometry::SDataView view = {}; + view.composed.format = format; + view.composed.stride = stride; + view.composed.rangeFormat = IGeometryBase::getMatchingAABBFormat(format); + view.composed.resetRange(); + view.src.offset = 0ull; + view.src.size = buffer ? buffer->getSize() : 0ull; + view.src.buffer = buffer; + return view; + }; + + auto positionBuffer = makeBuffer(alignDown(positionStride), IBuffer::EUF_VERTEX_BUFFER_BIT, 1ull); + auto normalBuffer = makeBuffer(alignDown(normalStride), IBuffer::EUF_VERTEX_BUFFER_BIT, 2ull); + auto indexBuffer = makeBuffer(alignDown(indexStride), IBuffer::EUF_INDEX_BUFFER_BIT, 3ull); + auto colorBuffer = makeBuffer(alignDown(colorStride), IBuffer::EUF_VERTEX_BUFFER_BIT, 4ull); + if (!positionBuffer || !normalBuffer || !indexBuffer || !colorBuffer) + return nullptr; + + auto geometry = core::make_smart_refctd_ptr(); + geometry->setIndexing(IPolygonGeometryBase::TriangleList()); + geometry->setPositionView(makeView(positionBuffer, positionFormat, positionStride)); + geometry->setNormalView(makeView(normalBuffer, normalFormat, normalStride)); + geometry->setIndexView(makeView(indexBuffer, indexFormat, indexStride)); + geometry->getAuxAttributeViews()->push_back(makeView(colorBuffer, colorFormat, colorStride)); + geometry->getAuxAttributeViews()->push_back(makeView(colorBuffer, colorFormat, colorStride)); + return geometry; +} + +static bool runParityCheck(const Options& options, ILogger* logger) +{ + using clock_t = std::chrono::high_resolution_clock; + auto toMs = [](clock_t::duration d) { return std::chrono::duration(d).count(); }; + auto toMiB = [](size_t bytes) { return static_cast(bytes) / (1024.0 * 1024.0); }; + auto throughput = [&](size_t bytes, double ms) { return ms > 0.0 ? toMiB(bytes) * 1000.0 / ms : 0.0; }; + + auto geometry = createGeometry(options); + if (!geometry) + { + logger->log("Failed to create dummy geometry.", ILogger::ELL_ERROR); + return false; + } + + core::vector> buffers; + SPolygonGeometryContentHash::collectBuffers(geometry.get(), buffers); + if (buffers.empty()) + { + logger->log("No buffers collected from geometry.", ILogger::ELL_ERROR); + return false; + } + + size_t totalBytes = 0ull; + for (const auto& buffer : buffers) + totalBytes += buffer ? buffer->getSize() : 0ull; + if (totalBytes == 0ull) + { + logger->log("Collected zero-sized buffers.", ILogger::ELL_ERROR); + return false; + } + + const auto legacyPolicy = makePolicy(RuntimeMode::Sequential); + SPolygonGeometryContentHash::reset(geometry.get()); + const auto legacyStart = clock_t::now(); + const auto legacyHash = SPolygonGeometryContentHash::recompute(geometry.get(), legacyPolicy); + const double legacyMs = toMs(clock_t::now() - legacyStart); + + SPolygonGeometryContentHash::reset(geometry.get()); + const auto recomputeStart = clock_t::now(); + const auto recomputeHash = SPolygonGeometryContentHash::recompute(geometry.get(), makePolicy(options.mode)); + const double recomputeMs = toMs(clock_t::now() - recomputeStart); + if (recomputeHash != legacyHash) + { + logger->log("recompute hash mismatch.", ILogger::ELL_ERROR); + return false; + } + + if (!buffers[0]) + { + logger->log("First geometry buffer is null.", ILogger::ELL_ERROR); + return false; + } + const auto preservedHash = buffers[0]->getContentHash(); + const size_t missingBytes = totalBytes - buffers[0]->getSize(); + SPolygonGeometryContentHash::reset(geometry.get()); + buffers[0]->setContentHash(preservedHash); + const auto missingStart = clock_t::now(); + const auto missingHash = SPolygonGeometryContentHash::computeMissing(geometry.get(), makePolicy(options.mode)); + const double missingMs = toMs(clock_t::now() - missingStart); + if (buffers[0]->getContentHash() != preservedHash) + { + logger->log("computeMissing overwrote pre-set hash.", ILogger::ELL_ERROR); + return false; + } + if (missingHash != legacyHash) + { + logger->log("computeMissing hash mismatch.", ILogger::ELL_ERROR); + return false; + } + + logger->log("HCP mode=%s buffers=%llu total_mib=%.3f", ILogger::ELL_INFO, modeName(options.mode), static_cast(buffers.size()), toMiB(totalBytes)); + logger->log("HCP legacy ms=%.3f mib_s=%.3f", ILogger::ELL_INFO, legacyMs, throughput(totalBytes, legacyMs)); + logger->log("HCP recompute ms=%.3f mib_s=%.3f", ILogger::ELL_INFO, recomputeMs, throughput(totalBytes, recomputeMs)); + logger->log("HCP computeMissing ms=%.3f mib_s=%.3f missing_mib=%.3f", ILogger::ELL_INFO, missingMs, throughput(missingBytes, missingMs), toMiB(missingBytes)); + return true; +} + +class HashContentParityApp final : public IApplicationFramework +{ +public: + using IApplicationFramework::IApplicationFramework; + + bool onAppInitialized(core::smart_refctd_ptr&&) override + { + m_logger = core::make_smart_refctd_ptr(ILogger::DefaultLogMask()); + if (!isAPILoaded()) + { + m_logger->log("Could not load Nabla API.", ILogger::ELL_ERROR); + return false; + } + + const auto options = parseOptions(argv); + if (!options.has_value()) + { + m_logger->log("Usage: hcp [--runtime-tuning sequential|heuristic|hybrid] [--buffer-bytes N] [--seed U64]", ILogger::ELL_ERROR); + m_logger->log("Constraint: --buffer-bytes must be >= %llu", ILogger::ELL_ERROR, static_cast(kMinBufferBytes)); + return false; + } + + if (!runParityCheck(*options, m_logger.get())) + return false; + m_logger->log("OK", ILogger::ELL_INFO); + return true; + } + + void workLoopBody() override {} + bool keepRunning() override { return false; } + +private: + core::smart_refctd_ptr m_logger; +}; + +NBL_MAIN_FUNC(HashContentParityApp) From b286a04af0f2325ea1326d19f126b2fa8ee3dfda Mon Sep 17 00:00:00 2001 From: Arkadiusz Lachowicz Date: Thu, 5 Mar 2026 15:47:11 +0100 Subject: [PATCH 044/118] Clarify HCP README parity scope --- tools/hcp/README.md | 2 ++ 1 file changed, 2 insertions(+) diff --git a/tools/hcp/README.md b/tools/hcp/README.md index 81e8713fee..e1a11fffb7 100644 --- a/tools/hcp/README.md +++ b/tools/hcp/README.md @@ -3,9 +3,11 @@ Headless parity checker for polygon geometry content hashing. ## What it checks +- input geometry buffers are generated as deterministic dummy blobs from `--seed` - `recompute(..., sequential)` as baseline - `recompute(..., )` equals baseline hash - `computeMissing(..., )` preserves pre-set hashes and equals baseline hash +- confirms `BLAKE3` content hashing parity independent of runtime tuning mode - timing logs for baseline, recompute and computeMissing ## Args From 57a87721f9d5d6791fc7a22df8d3b5c015ee5847 Mon Sep 17 00:00:00 2001 From: Arkadiusz Lachowicz Date: Fri, 6 Mar 2026 06:56:03 +0100 Subject: [PATCH 045/118] Finalize loader and writer cleanup --- examples_tests | 2 +- .../nbl/asset/interchange/COBJMeshWriter.h | 2 + .../asset/interchange/SGeometryLoaderCommon.h | 69 +++ .../asset/interchange/SGeometryWriterCommon.h | 136 +++++ .../CGraphicsPipelineLoaderMTL.cpp | 30 +- .../asset/interchange/COBJMeshFileLoader.cpp | 295 ++-------- src/nbl/asset/interchange/COBJMeshWriter.cpp | 157 ++---- .../asset/interchange/CPLYMeshFileLoader.cpp | 38 +- src/nbl/asset/interchange/CPLYMeshWriter.cpp | 363 +++++------- .../asset/interchange/CSTLMeshFileLoader.cpp | 213 +++---- src/nbl/asset/interchange/CSTLMeshWriter.cpp | 210 +++---- src/nbl/core/hash/blake.cpp | 532 +----------------- 12 files changed, 654 insertions(+), 1393 deletions(-) create mode 100644 include/nbl/asset/interchange/SGeometryLoaderCommon.h diff --git a/examples_tests b/examples_tests index 391e4ebd9d..4eb1314632 160000 --- a/examples_tests +++ b/examples_tests @@ -1 +1 @@ -Subproject commit 391e4ebd9d29a3be501a714a1cc340c4b3d8bc04 +Subproject commit 4eb131463299c77ea8f3ed0e17ede02271f89bff diff --git a/include/nbl/asset/interchange/COBJMeshWriter.h b/include/nbl/asset/interchange/COBJMeshWriter.h index d17cbbcf69..b08f0dceee 100644 --- a/include/nbl/asset/interchange/COBJMeshWriter.h +++ b/include/nbl/asset/interchange/COBJMeshWriter.h @@ -17,6 +17,8 @@ class COBJMeshWriter : public IGeometryWriter public: COBJMeshWriter(); + uint64_t getSupportedAssetTypesBitfield() const override; + const char** getAssociatedFileExtensions() const override; writer_flags_t getSupportedFlags() override; diff --git a/include/nbl/asset/interchange/SGeometryLoaderCommon.h b/include/nbl/asset/interchange/SGeometryLoaderCommon.h new file mode 100644 index 0000000000..7e45f7b685 --- /dev/null +++ b/include/nbl/asset/interchange/SGeometryLoaderCommon.h @@ -0,0 +1,69 @@ +// Copyright (C) 2018-2025 - DevSH Graphics Programming Sp. z O.O. +// This file is part of the "Nabla Engine". +// For conditions of distribution and use, see copyright notice in nabla.h +#ifndef _NBL_ASSET_S_GEOMETRY_LOADER_COMMON_H_INCLUDED_ +#define _NBL_ASSET_S_GEOMETRY_LOADER_COMMON_H_INCLUDED_ + + +#include "nbl/asset/ICPUPolygonGeometry.h" + + +namespace nbl::asset +{ + +class SGeometryLoaderCommon +{ + public: + static inline IGeometry::SDataView createDataView(core::smart_refctd_ptr&& buffer, const size_t byteCount, const uint32_t stride, const E_FORMAT format) + { + if (!buffer || byteCount == 0ull) + return {}; + + return { + .composed = { + .stride = stride, + .format = format, + .rangeFormat = IGeometryBase::getMatchingAABBFormat(format) + }, + .src = { + .offset = 0ull, + .size = byteCount, + .buffer = std::move(buffer) + } + }; + } + + template + static inline IGeometry::SDataView createAdoptedView(core::vector&& data) + { + if (data.empty()) + return {}; + + auto backer = core::make_smart_refctd_ptr>>(std::move(data)); + auto& storage = backer->getBacker(); + const size_t byteCount = storage.size() * sizeof(ValueType); + auto* const ptr = storage.data(); + auto buffer = ICPUBuffer::create( + { { byteCount }, ptr, core::smart_refctd_ptr(std::move(backer)), alignof(ValueType) }, + core::adopt_memory); + return createDataView(std::move(buffer), byteCount, static_cast(sizeof(ValueType)), Format); + } + + static inline hlsl::float32_t3 normalizeOrZero(const hlsl::float32_t3& v) + { + const float len2 = hlsl::dot(v, v); + if (len2 <= 0.f) + return hlsl::float32_t3(0.f, 0.f, 0.f); + return hlsl::normalize(v); + } + + static inline hlsl::float32_t3 computeFaceNormal(const hlsl::float32_t3& a, const hlsl::float32_t3& b, const hlsl::float32_t3& c) + { + return normalizeOrZero(hlsl::cross(b - a, c - a)); + } +}; + +} + + +#endif diff --git a/include/nbl/asset/interchange/SGeometryWriterCommon.h b/include/nbl/asset/interchange/SGeometryWriterCommon.h index 633c26a6a5..bf969fb8c7 100644 --- a/include/nbl/asset/interchange/SGeometryWriterCommon.h +++ b/include/nbl/asset/interchange/SGeometryWriterCommon.h @@ -5,10 +5,12 @@ #define _NBL_ASSET_S_GEOMETRY_WRITER_COMMON_H_INCLUDED_ +#include "nbl/asset/ICPUScene.h" #include "nbl/asset/ICPUPolygonGeometry.h" #include #include +#include #include @@ -18,6 +20,124 @@ namespace nbl::asset class SGeometryWriterCommon { public: + static inline const ICPUPolygonGeometry* resolvePolygonGeometry(const IAsset* rootAsset) + { + if (!rootAsset) + return nullptr; + + if (const auto* geometry = IAsset::castDown(rootAsset)) + return geometry; + + const auto* scene = IAsset::castDown(rootAsset); + if (!scene) + return nullptr; + + for (const auto& morphTargetsRef : scene->getInstances().getMorphTargets()) + { + const auto* morphTargets = morphTargetsRef.get(); + if (!morphTargets) + continue; + for (const auto& target : morphTargets->getTargets()) + { + const auto* const collection = target.geoCollection.get(); + if (!collection) + continue; + for (const auto& geoRef : collection->getGeometries()) + { + if (const auto* geometry = IAsset::castDown(geoRef.geometry.get())) + return geometry; + } + } + } + + return nullptr; + } + + static inline const ICPUPolygonGeometry::SDataView* findFirstAuxViewByChannelCount(const ICPUPolygonGeometry* geom, const uint32_t channels, const size_t requiredElementCount = 0ull) + { + if (!geom || channels == 0u) + return nullptr; + + for (const auto& view : geom->getAuxAttributeViews()) + { + if (!view) + continue; + if (requiredElementCount && view.getElementCount() != requiredElementCount) + continue; + if (getFormatChannelCount(view.composed.format) == channels) + return &view; + } + + return nullptr; + } + + static inline bool decodeTriangleIndices(const ICPUPolygonGeometry* geom, core::vector& indexData, const uint32_t*& outIndices, size_t& outFaceCount) + { + outIndices = nullptr; + outFaceCount = 0ull; + if (!geom) + return false; + + const auto& positionView = geom->getPositionView(); + const size_t vertexCount = positionView.getElementCount(); + if (vertexCount == 0ull) + return false; + + const auto& indexView = geom->getIndexView(); + if (indexView) + { + const size_t indexCount = indexView.getElementCount(); + if ((indexCount % 3ull) != 0ull) + return false; + + const void* src = indexView.getPointer(); + if (!src) + return false; + + if (indexView.composed.format == EF_R32_UINT && indexView.composed.getStride() == sizeof(uint32_t)) + { + outIndices = reinterpret_cast(src); + } + else if (indexView.composed.format == EF_R16_UINT && indexView.composed.getStride() == sizeof(uint16_t)) + { + indexData.resize(indexCount); + const auto* src16 = reinterpret_cast(src); + for (size_t i = 0ull; i < indexCount; ++i) + indexData[i] = src16[i]; + outIndices = indexData.data(); + } + else + { + indexData.resize(indexCount); + hlsl::vector decoded = {}; + for (size_t i = 0ull; i < indexCount; ++i) + { + if (!indexView.decodeElement(i, decoded)) + return false; + indexData[i] = decoded.x; + } + outIndices = indexData.data(); + } + + for (size_t i = 0ull; i < indexCount; ++i) + if (outIndices[i] >= vertexCount) + return false; + + outFaceCount = indexCount / 3ull; + return true; + } + + if ((vertexCount % 3ull) != 0ull) + return false; + + indexData.resize(vertexCount); + for (size_t i = 0ull; i < vertexCount; ++i) + indexData[i] = static_cast(i); + outIndices = indexData.data(); + outFaceCount = vertexCount / 3ull; + return true; + } + static inline const hlsl::float32_t3* getTightFloat3View(const ICPUPolygonGeometry::SDataView& view) { if (!view) @@ -55,6 +175,22 @@ class SGeometryWriterCommon const size_t writeLen = static_cast(written); return (writeLen < static_cast(end - dst)) ? (dst + writeLen) : end; } + + static inline char* appendUIntToBuffer(char* dst, char* const end, const uint32_t value) + { + if (!dst || dst >= end) + return end; + + const auto result = std::to_chars(dst, end, value); + if (result.ec == std::errc()) + return result.ptr; + + const int written = std::snprintf(dst, static_cast(end - dst), "%u", value); + if (written <= 0) + return dst; + const size_t writeLen = static_cast(written); + return (writeLen < static_cast(end - dst)) ? (dst + writeLen) : end; + } }; } diff --git a/src/nbl/asset/interchange/CGraphicsPipelineLoaderMTL.cpp b/src/nbl/asset/interchange/CGraphicsPipelineLoaderMTL.cpp index b538f75eb3..6521fa9775 100644 --- a/src/nbl/asset/interchange/CGraphicsPipelineLoaderMTL.cpp +++ b/src/nbl/asset/interchange/CGraphicsPipelineLoaderMTL.cpp @@ -9,6 +9,8 @@ #include #include #include +#include +#include #include "nbl/system/CFileView.h" @@ -458,10 +460,15 @@ const char* CGraphicsPipelineLoaderMTL::readTexture(const char* _bufPtr, const c mapType = found->second; } } - else if (strncmp(_bufPtr,"-bm",3)==0) + else if (strncmp(_bufPtr,"-bm",3)==0) { _bufPtr = goAndCopyNextWord(tmpbuf, _bufPtr, WORD_BUFFER_LENGTH, _bufEnd); - sscanf(tmpbuf, "%f", &_currMaterial->params.bumpFactor); + const char* tokenEnd = tmpbuf; + while (*tokenEnd != '\0') + ++tokenEnd; + const auto parseResult = fast_float::from_chars(tmpbuf, tokenEnd, _currMaterial->params.bumpFactor); + if (!(parseResult.ec == std::errc() && parseResult.ptr == tokenEnd)) + _currMaterial->params.bumpFactor = 0.f; } else if (strncmp(_bufPtr,"-blendu",7)==0) @@ -763,12 +770,15 @@ auto CGraphicsPipelineLoaderMTL::readMaterials(system::IFile* _file, const syste char tmpbuf[WORD_BUFFER_LENGTH]{}; auto readFloat = [&tmpbuf, &bufPtr, bufEnd] { - float f = 0.f; - bufPtr = goAndCopyNextWord(tmpbuf, bufPtr, WORD_BUFFER_LENGTH, bufEnd); - sscanf(tmpbuf, "%f", &f); - return f; + const char* tokenEnd = tmpbuf; + while (*tokenEnd != '\0') + ++tokenEnd; + + float f = 0.f; + const auto parseResult = fast_float::from_chars(tmpbuf, tokenEnd, f); + return (parseResult.ec == std::errc() && parseResult.ptr == tokenEnd) ? f : 0.f; }; auto readRGB = [&readFloat] { core::vector3df_SIMD rgb(1.f); @@ -817,7 +827,13 @@ auto CGraphicsPipelineLoaderMTL::readMaterials(system::IFile* _file, const syste if (currMaterial) { bufPtr = goAndCopyNextWord(tmpbuf, bufPtr, WORD_BUFFER_LENGTH, bufEnd); - currMaterial->params.extra |= (atol(tmpbuf)&0x0f);//illum values are in range [0;10] + const char* tokenEnd = tmpbuf; + while (*tokenEnd != '\0') + ++tokenEnd; + uint32_t illum = 0u; + const auto parseResult = std::from_chars(tmpbuf, tokenEnd, illum, 10); + if (parseResult.ec == std::errc() && parseResult.ptr == tokenEnd) + currMaterial->params.extra |= (illum & 0x0fu);//illum values are in range [0;10] } break; case 'N': diff --git a/src/nbl/asset/interchange/COBJMeshFileLoader.cpp b/src/nbl/asset/interchange/COBJMeshFileLoader.cpp index 3b7cebd9e7..d76d92c7e0 100644 --- a/src/nbl/asset/interchange/COBJMeshFileLoader.cpp +++ b/src/nbl/asset/interchange/COBJMeshFileLoader.cpp @@ -8,6 +8,7 @@ #include "nbl/asset/IAssetManager.h" #include "nbl/asset/ICPUGeometryCollection.h" #include "nbl/asset/interchange/SGeometryContentHashCommon.h" +#include "nbl/asset/interchange/SGeometryLoaderCommon.h" #include "nbl/asset/interchange/SInterchangeIOCommon.h" #include "nbl/asset/interchange/SLoaderRuntimeTuning.h" #include "nbl/asset/utils/SGeometryAABBCommon.h" @@ -21,6 +22,8 @@ #include #include +#include +#include #include #include #include @@ -54,154 +57,18 @@ inline bool isObjInlineWhitespace(const char c) inline bool isObjDigit(const char c) { - return c >= '0' && c <= '9'; + return std::isdigit(static_cast(c)) != 0; } inline bool parseObjFloat(const char*& ptr, const char* const end, float& out) { - const char* const start = ptr; - if (start >= end) + const auto parseResult = fast_float::from_chars(ptr, end, out); + if (parseResult.ec != std::errc() || parseResult.ptr == ptr) return false; - - const char* p = start; - bool negative = false; - if (*p == '-' || *p == '+') - { - negative = (*p == '-'); - ++p; - if (p >= end) - return false; - } - - if (*p == '.' || !isObjDigit(*p)) - { - const auto parseResult = fast_float::from_chars(start, end, out); - if (parseResult.ec == std::errc() && parseResult.ptr != start) - { - ptr = parseResult.ptr; - return true; - } - return false; - } - - uint64_t integerPart = 0ull; - while (p < end && isObjDigit(*p)) - { - integerPart = integerPart * 10ull + static_cast(*p - '0'); - ++p; - } - - double value = static_cast(integerPart); - if (p < end && *p == '.') - { - const char* const dot = p; - if ((dot + 7) <= end) - { - const char d0 = dot[1]; - const char d1 = dot[2]; - const char d2 = dot[3]; - const char d3 = dot[4]; - const char d4 = dot[5]; - const char d5 = dot[6]; - if ( - isObjDigit(d0) && isObjDigit(d1) && isObjDigit(d2) && - isObjDigit(d3) && isObjDigit(d4) && isObjDigit(d5) - ) - { - const bool hasNext = (dot + 7) < end; - const char next = hasNext ? dot[7] : '\0'; - if ((!hasNext || !isObjDigit(next)) && (!hasNext || (next != 'e' && next != 'E'))) - { - const uint32_t frac = - static_cast(d0 - '0') * 100000u + - static_cast(d1 - '0') * 10000u + - static_cast(d2 - '0') * 1000u + - static_cast(d3 - '0') * 100u + - static_cast(d4 - '0') * 10u + - static_cast(d5 - '0'); - value += static_cast(frac) * 1e-6; - p = dot + 7; - out = static_cast(negative ? -value : value); - ptr = p; - return true; - } - } - } - - static constexpr double InvPow10[] = { - 1.0, - 1e-1, 1e-2, 1e-3, 1e-4, 1e-5, - 1e-6, 1e-7, 1e-8, 1e-9, 1e-10, - 1e-11, 1e-12, 1e-13, 1e-14, 1e-15, - 1e-16, 1e-17, 1e-18 - }; - ++p; - uint64_t fractionPart = 0ull; - uint32_t fractionDigits = 0u; - while (p < end && isObjDigit(*p)) - { - if (fractionDigits >= (std::size(InvPow10) - 1u)) - { - const auto parseResult = fast_float::from_chars(start, end, out); - if (parseResult.ec == std::errc() && parseResult.ptr != start) - { - ptr = parseResult.ptr; - return true; - } - return false; - } - fractionPart = fractionPart * 10ull + static_cast(*p - '0'); - ++fractionDigits; - ++p; - } - value += static_cast(fractionPart) * InvPow10[fractionDigits]; - } - - if (p < end && (*p == 'e' || *p == 'E')) - { - const auto parseResult = fast_float::from_chars(start, end, out); - if (parseResult.ec == std::errc() && parseResult.ptr != start) - { - ptr = parseResult.ptr; - return true; - } - return false; - } - - out = static_cast(negative ? -value : value); - ptr = p; + ptr = parseResult.ptr; return true; } -const auto createAdoptedView = [](auto&& data, const E_FORMAT format) -> IGeometry::SDataView -{ - using T = typename std::decay_t::value_type; - if (data.empty()) - return {}; - - auto backer = core::make_smart_refctd_ptr>>(std::move(data)); - auto& storage = backer->getBacker(); - auto* const ptr = storage.data(); - const size_t byteCount = storage.size() * sizeof(T); - auto buffer = ICPUBuffer::create({ { byteCount }, ptr, core::smart_refctd_ptr(std::move(backer)), alignof(T) }, core::adopt_memory); - if (!buffer) - return {}; - - IGeometry::SDataView view = { - .composed = { - .stride = sizeof(T), - .format = format, - .rangeFormat = IGeometryBase::getMatchingAABBFormat(format) - }, - .src = { - .offset = 0u, - .size = byteCount, - .buffer = std::move(buffer) - } - }; - return view; -}; - bool readTextFileWithPolicy(system::IFile* file, char* dst, size_t byteCount, const SResolvedFileIOPolicy& ioPlan, SFileReadTelemetry& ioTelemetry) { return SInterchangeIOCommon::readFileWithPolicyTimed(file, reinterpret_cast(dst), 0ull, byteCount, ioPlan, nullptr, &ioTelemetry); @@ -209,29 +76,17 @@ bool readTextFileWithPolicy(system::IFile* file, char* dst, size_t byteCount, co inline bool parseUnsignedObjIndex(const char*& ptr, const char* const end, uint32_t& out) { - if (ptr >= end || !isObjDigit(*ptr)) + uint32_t value = 0u; + const auto parseResult = std::from_chars(ptr, end, value); + if (parseResult.ec != std::errc() || parseResult.ptr == ptr) return false; - - uint64_t value = 0ull; - while (ptr < end && isObjDigit(*ptr)) - { - value = value * 10ull + static_cast(*ptr - '0'); - ++ptr; - } - if (value == 0ull || value > static_cast(std::numeric_limits::max())) + if (value == 0u || value > static_cast(std::numeric_limits::max())) return false; - - out = static_cast(value); + ptr = parseResult.ptr; + out = value; return true; } -inline char toObjLowerAscii(const char c) -{ - if (c >= 'A' && c <= 'Z') - return static_cast(c - 'A' + 'a'); - return c; -} - inline void parseObjSmoothingGroup(const char* linePtr, const char* const lineEnd, uint32_t& outGroup) { while (linePtr < lineEnd && isObjInlineWhitespace(*linePtr)) @@ -246,43 +101,27 @@ inline void parseObjSmoothingGroup(const char* linePtr, const char* const lineEn const char* const tokenStart = linePtr; while (linePtr < lineEnd && !isObjInlineWhitespace(*linePtr)) ++linePtr; - const size_t tokenLength = static_cast(linePtr - tokenStart); + const std::string_view token(tokenStart, static_cast(linePtr - tokenStart)); - if (tokenLength == 2u && - toObjLowerAscii(tokenStart[0]) == 'o' && - toObjLowerAscii(tokenStart[1]) == 'n') + if (token.size() == 2u && + static_cast(std::tolower(static_cast(token[0]))) == 'o' && + static_cast(std::tolower(static_cast(token[1]))) == 'n') { outGroup = 1u; return; } - if (tokenLength == 3u && - toObjLowerAscii(tokenStart[0]) == 'o' && - toObjLowerAscii(tokenStart[1]) == 'f' && - toObjLowerAscii(tokenStart[2]) == 'f') + if (token.size() == 3u && + static_cast(std::tolower(static_cast(token[0]))) == 'o' && + static_cast(std::tolower(static_cast(token[1]))) == 'f' && + static_cast(std::tolower(static_cast(token[2]))) == 'f') { outGroup = 0u; return; } - uint64_t value = 0ull; - bool sawDigit = false; - for (const char* it = tokenStart; it < linePtr; ++it) - { - if (!isObjDigit(*it)) - { - outGroup = 0u; - return; - } - sawDigit = true; - value = value * 10ull + static_cast(*it - '0'); - if (value > static_cast(std::numeric_limits::max())) - { - outGroup = 0u; - return; - } - } - - outGroup = sawDigit ? static_cast(value) : 0u; + uint32_t value = 0u; + const auto parseResult = std::from_chars(token.data(), token.data() + token.size(), value); + outGroup = (parseResult.ec == std::errc() && parseResult.ptr == token.data() + token.size()) ? value : 0u; } inline std::string parseObjIdentifier(const char* linePtr, const char* const lineEnd, const std::string_view fallback) @@ -312,15 +151,9 @@ inline bool parseObjTrianglePositiveTripletLine(const char* const lineStart, con int32_t posIx = -1; { uint32_t value = 0u; - while (ptr < lineEnd && isObjDigit(*ptr)) - { - const uint32_t digit = static_cast(*ptr - '0'); - if (value > 429496729u) - return false; - value = value * 10u + digit; - ++ptr; - } - if (value == 0u || value > posCount) + if (!parseUnsignedObjIndex(ptr, lineEnd, value)) + return false; + if (value > posCount) return false; posIx = static_cast(value - 1u); } @@ -331,17 +164,9 @@ inline bool parseObjTrianglePositiveTripletLine(const char* const lineStart, con int32_t uvIx = -1; { uint32_t value = 0u; - if (ptr >= lineEnd || !isObjDigit(*ptr)) + if (!parseUnsignedObjIndex(ptr, lineEnd, value)) return false; - while (ptr < lineEnd && isObjDigit(*ptr)) - { - const uint32_t digit = static_cast(*ptr - '0'); - if (value > 429496729u) - return false; - value = value * 10u + digit; - ++ptr; - } - if (value == 0u || value > uvCount) + if (value > uvCount) return false; uvIx = static_cast(value - 1u); } @@ -352,17 +177,9 @@ inline bool parseObjTrianglePositiveTripletLine(const char* const lineStart, con int32_t normalIx = -1; { uint32_t value = 0u; - if (ptr >= lineEnd || !isObjDigit(*ptr)) + if (!parseUnsignedObjIndex(ptr, lineEnd, value)) return false; - while (ptr < lineEnd && isObjDigit(*ptr)) - { - const uint32_t digit = static_cast(*ptr - '0'); - if (value > 429496729u) - return false; - value = value * 10u + digit; - ++ptr; - } - if (value == 0u || value > normalCount) + if (value > normalCount) return false; normalIx = static_cast(value - 1u); } @@ -380,38 +197,14 @@ inline bool parseObjTrianglePositiveTripletLine(const char* const lineStart, con inline bool parseSignedObjIndex(const char*& ptr, const char* const end, int32_t& out) { - if (ptr >= end) + int32_t value = 0; + const auto parseResult = std::from_chars(ptr, end, value); + if (parseResult.ec != std::errc() || parseResult.ptr == ptr) return false; - - bool negative = false; - if (*ptr == '-') - { - negative = true; - ++ptr; - } - else if (*ptr == '+') - { - ++ptr; - } - - if (ptr >= end || !isObjDigit(*ptr)) - return false; - - int64_t value = 0; - while (ptr < end && isObjDigit(*ptr)) - { - value = value * 10ll + static_cast(*ptr - '0'); - ++ptr; - } - if (negative) - value = -value; - if (value == 0) return false; - if (value < static_cast(std::numeric_limits::min()) || value > static_cast(std::numeric_limits::max())) - return false; - - out = static_cast(value); + ptr = parseResult.ptr; + out = value; return true; } @@ -588,7 +381,7 @@ bool COBJMeshFileLoader::isALoadableFileFormat(system::IFile* _file, const syste continue; } - switch (toObjLowerAscii(*ptr)) + switch (static_cast(std::tolower(static_cast(*ptr)))) { case 'v': case 'f': @@ -841,7 +634,7 @@ asset::SAssetBundle COBJMeshFileLoader::loadAsset(system::IFile* _file, const as const size_t outVertexCount = outPositions.size(); auto geometry = core::make_smart_refctd_ptr(); { - auto view = createAdoptedView(std::move(outPositions), EF_R32G32B32_SFLOAT); + auto view = SGeometryLoaderCommon::createAdoptedView(std::move(outPositions)); if (!view) return false; geometry->setPositionView(std::move(view)); @@ -850,7 +643,7 @@ asset::SAssetBundle COBJMeshFileLoader::loadAsset(system::IFile* _file, const as const bool hasNormals = hasProvidedNormals || needsNormalGeneration; if (hasNormals) { - auto view = createAdoptedView(std::move(outNormals), EF_R32G32B32_SFLOAT); + auto view = SGeometryLoaderCommon::createAdoptedView(std::move(outNormals)); if (!view) return false; geometry->setNormalView(std::move(view)); @@ -858,7 +651,7 @@ asset::SAssetBundle COBJMeshFileLoader::loadAsset(system::IFile* _file, const as if (hasUVs) { - auto view = createAdoptedView(std::move(outUVs), EF_R32G32_SFLOAT); + auto view = SGeometryLoaderCommon::createAdoptedView(std::move(outUVs)); if (!view) return false; geometry->getAuxAttributeViews()->push_back(std::move(view)); @@ -872,14 +665,14 @@ asset::SAssetBundle COBJMeshFileLoader::loadAsset(system::IFile* _file, const as core::vector indices16(indices.size()); for (size_t i = 0u; i < indices.size(); ++i) indices16[i] = static_cast(indices[i]); - auto view = createAdoptedView(std::move(indices16), EF_R16_UINT); + auto view = SGeometryLoaderCommon::createAdoptedView(std::move(indices16)); if (!view) return false; geometry->setIndexView(std::move(view)); } else { - auto view = createAdoptedView(std::move(indices), EF_R32_UINT); + auto view = SGeometryLoaderCommon::createAdoptedView(std::move(indices)); if (!view) return false; geometry->setIndexView(std::move(view)); @@ -1089,10 +882,10 @@ asset::SAssetBundle COBJMeshFileLoader::loadAsset(system::IFile* _file, const as if (lineStart < lineEnd) { - const char lineType = toObjLowerAscii(*lineStart); + const char lineType = static_cast(std::tolower(static_cast(*lineStart))); if (lineType == 'v') { - const char subType = ((lineStart + 1) < lineEnd) ? toObjLowerAscii(lineStart[1]) : '\0'; + const char subType = ((lineStart + 1) < lineEnd) ? static_cast(std::tolower(static_cast(lineStart[1]))) : '\0'; if ((lineStart + 1) < lineEnd && subType == ' ') { Float3 vec{}; diff --git a/src/nbl/asset/interchange/COBJMeshWriter.cpp b/src/nbl/asset/interchange/COBJMeshWriter.cpp index 322c66b94d..d749c80abf 100644 --- a/src/nbl/asset/interchange/COBJMeshWriter.cpp +++ b/src/nbl/asset/interchange/COBJMeshWriter.cpp @@ -27,6 +27,11 @@ COBJMeshWriter::COBJMeshWriter() #endif } +uint64_t COBJMeshWriter::getSupportedAssetTypesBitfield() const +{ + return IAsset::ET_GEOMETRY | IAsset::ET_SCENE; +} + const char** COBJMeshWriter::getAssociatedFileExtensions() const { static const char* ext[] = { "obj", nullptr }; @@ -48,8 +53,8 @@ namespace obj_writer_detail constexpr size_t ApproxObjBytesPerVertex = 96ull; constexpr size_t ApproxObjBytesPerFace = 48ull; -constexpr size_t MaxUInt32Chars = 10ull; constexpr size_t MaxFloatFixed6Chars = 48ull; +constexpr size_t MaxUInt32Chars = std::numeric_limits::digits10 + 1ull; constexpr size_t MaxIndexTokenBytes = MaxUInt32Chars * 3ull + 2ull; struct SIndexStringRef @@ -64,23 +69,7 @@ bool decodeVec4(const ICPUPolygonGeometry::SDataView& view, const size_t ix, hls return view.decodeElement(ix, out); } -char* appendUIntToBuffer(char* dst, char* const end, const uint32_t value) -{ - if (!dst || dst >= end) - return end; - - const auto result = std::to_chars(dst, end, value); - if (result.ec == std::errc()) - return result.ptr; - - const int written = std::snprintf(dst, static_cast(end - dst), "%u", value); - if (written <= 0) - return dst; - const size_t writeLen = static_cast(written); - return (writeLen < static_cast(end - dst)) ? (dst + writeLen) : end; -} - -void appendVec3Line(std::string& out, const char* prefix, const size_t prefixSize, const float x, const float y, const float z) +void appendVec3Line(std::string& out, const char* prefix, const size_t prefixSize, const hlsl::float32_t3& v) { const size_t oldSize = out.size(); out.resize(oldSize + prefixSize + (3ull * MaxFloatFixed6Chars) + 3ull); @@ -91,20 +80,20 @@ void appendVec3Line(std::string& out, const char* prefix, const size_t prefixSiz std::memcpy(cursor, prefix, prefixSize); cursor += prefixSize; - cursor = SGeometryWriterCommon::appendFloatFixed6ToBuffer(cursor, lineEnd, x); + cursor = SGeometryWriterCommon::appendFloatFixed6ToBuffer(cursor, lineEnd, v.x); if (cursor < lineEnd) *(cursor++) = ' '; - cursor = SGeometryWriterCommon::appendFloatFixed6ToBuffer(cursor, lineEnd, y); + cursor = SGeometryWriterCommon::appendFloatFixed6ToBuffer(cursor, lineEnd, v.y); if (cursor < lineEnd) *(cursor++) = ' '; - cursor = SGeometryWriterCommon::appendFloatFixed6ToBuffer(cursor, lineEnd, z); + cursor = SGeometryWriterCommon::appendFloatFixed6ToBuffer(cursor, lineEnd, v.z); if (cursor < lineEnd) *(cursor++) = '\n'; out.resize(oldSize + static_cast(cursor - lineBegin)); } -void appendVec2Line(std::string& out, const char* prefix, const size_t prefixSize, const float x, const float y) +void appendVec2Line(std::string& out, const char* prefix, const size_t prefixSize, const hlsl::float32_t2& v) { const size_t oldSize = out.size(); out.resize(oldSize + prefixSize + (2ull * MaxFloatFixed6Chars) + 2ull); @@ -115,10 +104,10 @@ void appendVec2Line(std::string& out, const char* prefix, const size_t prefixSiz std::memcpy(cursor, prefix, prefixSize); cursor += prefixSize; - cursor = SGeometryWriterCommon::appendFloatFixed6ToBuffer(cursor, lineEnd, x); + cursor = SGeometryWriterCommon::appendFloatFixed6ToBuffer(cursor, lineEnd, v.x); if (cursor < lineEnd) *(cursor++) = ' '; - cursor = SGeometryWriterCommon::appendFloatFixed6ToBuffer(cursor, lineEnd, y); + cursor = SGeometryWriterCommon::appendFloatFixed6ToBuffer(cursor, lineEnd, v.y); if (cursor < lineEnd) *(cursor++) = '\n'; @@ -157,21 +146,21 @@ void appendIndexTokenToStorage(std::string& storage, core::vector(cursor - token)); } @@ -200,7 +189,7 @@ bool COBJMeshWriter::writeAsset(system::IFile* _file, const SAssetWriteParams& _ if (!_file || !_params.rootAsset) return false; - const auto* geom = IAsset::castDown(_params.rootAsset); + const auto* geom = SGeometryWriterCommon::resolvePolygonGeometry(_params.rootAsset); if (!geom || !geom->valid()) return false; @@ -216,19 +205,7 @@ bool COBJMeshWriter::writeAsset(system::IFile* _file, const SAssetWriteParams& _ const auto& normalView = geom->getNormalView(); const bool hasNormals = static_cast(normalView); - const auto& auxViews = geom->getAuxAttributeViews(); - const ICPUPolygonGeometry::SDataView* uvView = nullptr; - for (const auto& view : auxViews) - { - if (!view) - continue; - const auto channels = getFormatChannelCount(view.composed.format); - if (channels == 2u) - { - uvView = &view; - break; - } - } + const ICPUPolygonGeometry::SDataView* uvView = SGeometryWriterCommon::findFirstAuxViewByChannelCount(geom, 2u); const bool hasUVs = uvView != nullptr; const size_t vertexCount = positionView.getElementCount(); @@ -245,58 +222,11 @@ bool COBJMeshWriter::writeAsset(system::IFile* _file, const SAssetWriteParams& _ if (indexing->knownTopology() != E_PRIMITIVE_TOPOLOGY::EPT_TRIANGLE_LIST) return false; - const auto& indexView = geom->getIndexView(); core::vector indexData; const uint32_t* indices = nullptr; size_t faceCount = 0; - if (indexView) - { - const size_t indexCount = indexView.getElementCount(); - if (indexCount % 3u != 0u) - return false; - - const void* src = indexView.getPointer(); - if (!src) - return false; - - if (indexView.composed.format == EF_R32_UINT && indexView.composed.getStride() == sizeof(uint32_t)) - { - indices = reinterpret_cast(src); - } - else if (indexView.composed.format == EF_R16_UINT && indexView.composed.getStride() == sizeof(uint16_t)) - { - indexData.resize(indexCount); - const uint16_t* src16 = reinterpret_cast(src); - for (size_t i = 0; i < indexCount; ++i) - indexData[i] = src16[i]; - indices = indexData.data(); - } - else - { - indexData.resize(indexCount); - hlsl::vector decoded = {}; - for (size_t i = 0; i < indexCount; ++i) - { - if (!indexView.decodeElement(i, decoded)) - return false; - indexData[i] = decoded.x; - } - indices = indexData.data(); - } - faceCount = indexCount / 3u; - } - else - { - if (vertexCount % 3u != 0u) - return false; - - indexData.resize(vertexCount); - for (size_t i = 0; i < vertexCount; ++i) - indexData[i] = static_cast(i); - - indices = indexData.data(); - faceCount = vertexCount / 3u; - } + if (!SGeometryWriterCommon::decodeTriangleIndices(geom, indexData, indices, faceCount)) + return false; const auto flags = _override->getAssetWritingFlags(ctx, geom, 0u); const bool flipHandedness = !flags.hasAnyFlag(E_WRITER_FLAGS::EWF_MESH_IS_RIGHT_HANDED); @@ -311,49 +241,40 @@ bool COBJMeshWriter::writeAsset(system::IFile* _file, const SAssetWriteParams& _ const hlsl::float32_t2* const tightUV = hasUVs ? SGeometryWriterCommon::getTightFloat2View(*uvView) : nullptr; for (size_t i = 0u; i < vertexCount; ++i) { - float x = 0.f; - float y = 0.f; - float z = 0.f; + hlsl::float32_t3 vertex = {}; if (tightPositions) { - x = tightPositions[i].x; - y = tightPositions[i].y; - z = tightPositions[i].z; + vertex = tightPositions[i]; } else { if (!decodeVec4(positionView, i, tmp)) return false; - x = static_cast(tmp.x); - y = static_cast(tmp.y); - z = static_cast(tmp.z); + vertex = hlsl::float32_t3(static_cast(tmp.x), static_cast(tmp.y), static_cast(tmp.z)); } if (flipHandedness) - x = -x; + vertex.x = -vertex.x; - appendVec3Line(output, "v ", sizeof("v ") - 1ull, x, y, z); + appendVec3Line(output, "v ", sizeof("v ") - 1ull, vertex); } if (hasUVs) { for (size_t i = 0u; i < vertexCount; ++i) { - float u = 0.f; - float v = 0.f; + hlsl::float32_t2 uv = {}; if (tightUV) { - u = tightUV[i].x; - v = 1.f - tightUV[i].y; + uv = hlsl::float32_t2(tightUV[i].x, 1.f - tightUV[i].y); } else { if (!decodeVec4(*uvView, i, tmp)) return false; - u = static_cast(tmp.x); - v = 1.f - static_cast(tmp.y); + uv = hlsl::float32_t2(static_cast(tmp.x), 1.f - static_cast(tmp.y)); } - appendVec2Line(output, "vt ", sizeof("vt ") - 1ull, u, v); + appendVec2Line(output, "vt ", sizeof("vt ") - 1ull, uv); } } @@ -361,27 +282,21 @@ bool COBJMeshWriter::writeAsset(system::IFile* _file, const SAssetWriteParams& _ { for (size_t i = 0u; i < vertexCount; ++i) { - float x = 0.f; - float y = 0.f; - float z = 0.f; + hlsl::float32_t3 normal = {}; if (tightNormals) { - x = tightNormals[i].x; - y = tightNormals[i].y; - z = tightNormals[i].z; + normal = tightNormals[i]; } else { if (!decodeVec4(normalView, i, tmp)) return false; - x = static_cast(tmp.x); - y = static_cast(tmp.y); - z = static_cast(tmp.z); + normal = hlsl::float32_t3(static_cast(tmp.x), static_cast(tmp.y), static_cast(tmp.z)); } if (flipHandedness) - x = -x; + normal.x = -normal.x; - appendVec3Line(output, "vn ", sizeof("vn ") - 1ull, x, y, z); + appendVec3Line(output, "vn ", sizeof("vn ") - 1ull, normal); } } diff --git a/src/nbl/asset/interchange/CPLYMeshFileLoader.cpp b/src/nbl/asset/interchange/CPLYMeshFileLoader.cpp index 0f3117a48d..6b41a259f3 100644 --- a/src/nbl/asset/interchange/CPLYMeshFileLoader.cpp +++ b/src/nbl/asset/interchange/CPLYMeshFileLoader.cpp @@ -6,6 +6,7 @@ #include "CPLYMeshFileLoader.h" #include "nbl/asset/interchange/SGeometryContentHashCommon.h" +#include "nbl/asset/interchange/SGeometryLoaderCommon.h" #include "nbl/asset/interchange/SInterchangeIOCommon.h" #include "nbl/asset/interchange/SLoaderRuntimeTuning.h" #include "nbl/asset/IAssetManager.h" @@ -86,39 +87,6 @@ inline std::string_view plyToStringView(const char* text) return text ? std::string_view{ text } : std::string_view{}; } -inline IGeometry::SDataView plyCreateDataView(core::smart_refctd_ptr&& buffer, const size_t byteCount, const uint32_t stride, const E_FORMAT format) -{ - if (!buffer || byteCount == 0ull) - return {}; - - return { - .composed = { - .stride = stride, - .format = format, - .rangeFormat = IGeometryBase::getMatchingAABBFormat(format) - }, - .src = { - .offset = 0ull, - .size = byteCount, - .buffer = std::move(buffer) - } - }; -} - -template -IGeometry::SDataView plyCreateAdoptedView(core::vector&& data) -{ - if (data.empty()) - return {}; - - auto backer = core::make_smart_refctd_ptr>>(std::move(data)); - auto& storage = backer->getBacker(); - const size_t byteCount = storage.size() * sizeof(ValueType); - auto* const ptr = storage.data(); - auto buffer = ICPUBuffer::create({ { byteCount }, ptr, core::smart_refctd_ptr(std::move(backer)), alignof(ValueType) }, core::adopt_memory); - return plyCreateDataView(std::move(buffer), byteCount, static_cast(sizeof(ValueType)), Format); -} - struct SContext { static constexpr uint64_t ReadWindowPaddingBytes = 1ull; @@ -2146,7 +2114,7 @@ SAssetBundle CPLYMeshFileLoader::loadAsset(system::IFile* _file, const IAssetLoa core::vector indices16(indices.size()); for (size_t i = 0u; i < indices.size(); ++i) indices16[i] = static_cast(indices[i]); - auto view = plyCreateAdoptedView(std::move(indices16)); + auto view = SGeometryLoaderCommon::createAdoptedView(std::move(indices16)); if (!view) return {}; geometry->setIndexView(std::move(view)); @@ -2154,7 +2122,7 @@ SAssetBundle CPLYMeshFileLoader::loadAsset(system::IFile* _file, const IAssetLoa } else { - auto view = plyCreateAdoptedView(std::move(indices)); + auto view = SGeometryLoaderCommon::createAdoptedView(std::move(indices)); if (!view) return {}; if (precomputedIndexHash != IPreHashed::INVALID_HASH) diff --git a/src/nbl/asset/interchange/CPLYMeshWriter.cpp b/src/nbl/asset/interchange/CPLYMeshWriter.cpp index 18a08fbdb8..bb099b8277 100644 --- a/src/nbl/asset/interchange/CPLYMeshWriter.cpp +++ b/src/nbl/asset/interchange/CPLYMeshWriter.cpp @@ -13,11 +13,13 @@ #include #include +#include #include #include #include #include #include +#include #include namespace nbl::asset @@ -49,8 +51,8 @@ writer_flags_t CPLYMeshWriter::getForcedFlags() namespace ply_writer_detail { -constexpr size_t ApproxPlyTextBytesPerVertex = 96ull; -constexpr size_t ApproxPlyTextBytesPerFace = 32ull; +constexpr size_t ApproxPlyTextBytesPerVertex = sizeof("0.000000 0.000000 0.000000 0.000000 0.000000 0.000000\n") - 1ull; +constexpr size_t ApproxPlyTextBytesPerFace = sizeof("3 4294967295 4294967295 4294967295\n") - 1ull; enum class EPlyScalarType : uint8_t { @@ -87,40 +89,42 @@ SPlyScalarMeta getPlyScalarMeta(const EPlyScalarType type) } } -bool isPlyUnsupportedPackedFormat(const E_FORMAT format) +bool isPlySupportedScalarFormat(const E_FORMAT format) { - switch (format) - { - case EF_A2R10G10B10_UINT_PACK32: - case EF_A2R10G10B10_SINT_PACK32: - case EF_A2R10G10B10_UNORM_PACK32: - case EF_A2R10G10B10_SNORM_PACK32: - case EF_A2R10G10B10_USCALED_PACK32: - case EF_A2R10G10B10_SSCALED_PACK32: - case EF_A2B10G10R10_UINT_PACK32: - case EF_A2B10G10R10_SINT_PACK32: - case EF_A2B10G10R10_UNORM_PACK32: - case EF_A2B10G10R10_SNORM_PACK32: - case EF_A2B10G10R10_USCALED_PACK32: - case EF_A2B10G10R10_SSCALED_PACK32: - case EF_B10G11R11_UFLOAT_PACK32: - case EF_E5B9G9R9_UFLOAT_PACK32: - return true; - default: - return false; - } + if (format == EF_UNKNOWN) + return false; + + const uint32_t channels = getFormatChannelCount(format); + if (channels == 0u) + return false; + + if (!(isIntegerFormat(format) || isFloatingPointFormat(format) || isNormalizedFormat(format) || isScaledFormat(format))) + return false; + + const auto bytesPerPixel = getBytesPerPixel(format); + if (bytesPerPixel.getDenominator() != 1u) + return false; + const uint32_t pixelBytes = bytesPerPixel.getNumerator(); + if (pixelBytes == 0u || (pixelBytes % channels) != 0u) + return false; + + const uint32_t bytesPerChannel = pixelBytes / channels; + return bytesPerChannel == 1u || bytesPerChannel == 2u || bytesPerChannel == 4u || bytesPerChannel == 8u; } EPlyScalarType selectPlyScalarType(const E_FORMAT format) { - if (format == EF_UNKNOWN || isPlyUnsupportedPackedFormat(format)) + if (!isPlySupportedScalarFormat(format)) return EPlyScalarType::Float32; if (isNormalizedFormat(format) || isScaledFormat(format)) return EPlyScalarType::Float32; const uint32_t channels = getFormatChannelCount(format); if (channels == 0u) + { + assert(format == EF_UNKNOWN); return EPlyScalarType::Float32; + } const auto bytesPerPixel = getBytesPerPixel(format); if (bytesPerPixel.getDenominator() != 1u) @@ -414,33 +418,28 @@ struct SExtraAuxView EPlyScalarType scalarType = EPlyScalarType::Float32; }; +struct SWriteInput +{ + const ICPUPolygonGeometry* geom = nullptr; + EPlyScalarType positionScalarType = EPlyScalarType::Float32; + const ICPUPolygonGeometry::SDataView* uvView = nullptr; + EPlyScalarType uvScalarType = EPlyScalarType::Float32; + const core::vector* extraAuxViews = nullptr; + bool writeNormals = false; + EPlyScalarType normalScalarType = EPlyScalarType::Float32; + size_t vertexCount = 0ull; + const uint32_t* indices = nullptr; + size_t faceCount = 0ull; + bool write16BitIndices = false; + bool flipVectors = false; +}; + bool writeBinary( - const ICPUPolygonGeometry* geom, - const EPlyScalarType positionScalarType, - const ICPUPolygonGeometry::SDataView* uvView, - const EPlyScalarType uvScalarType, - const core::vector& extraAuxViews, - const bool writeNormals, - const EPlyScalarType normalScalarType, - const size_t vertexCount, - const uint32_t* indices, - const size_t faceCount, - const bool write16BitIndices, - uint8_t* dst, - const bool flipVectors); + const SWriteInput& input, + uint8_t* dst); bool writeText( - const ICPUPolygonGeometry* geom, - const EPlyScalarType positionScalarType, - const ICPUPolygonGeometry::SDataView* uvView, - const EPlyScalarType uvScalarType, - const core::vector& extraAuxViews, - const bool writeNormals, - const EPlyScalarType normalScalarType, - const size_t vertexCount, - const uint32_t* indices, - const size_t faceCount, - std::string& output, - const bool flipVectors); + const SWriteInput& input, + std::string& output); } // namespace ply_writer_detail @@ -453,43 +452,53 @@ bool CPLYMeshWriter::writeAsset(system::IFile* _file, const SAssetWriteParams& _ getDefaultOverride(_override); if (!_file || !_params.rootAsset) + { + _params.logger.log("PLY writer: missing output file or root asset.", system::ILogger::ELL_ERROR); return false; + } - const auto* geom = IAsset::castDown(_params.rootAsset); + const auto* geom = SGeometryWriterCommon::resolvePolygonGeometry(_params.rootAsset); if (!geom || !geom->valid()) + { + _params.logger.log("PLY writer: root asset is not a valid polygon geometry.", system::ILogger::ELL_ERROR); return false; + } SAssetWriteContext ctx = { _params, _file }; system::IFile* file = _override->getOutputFile(_file, ctx, { geom, 0u }); if (!file) + { + _params.logger.log("PLY writer: output override returned null file.", system::ILogger::ELL_ERROR); return false; + } const auto& positionView = geom->getPositionView(); const auto& normalView = geom->getNormalView(); - const auto& auxViews = geom->getAuxAttributeViews(); - + const size_t vertexCount = positionView.getElementCount(); + if (vertexCount == 0ull) + { + _params.logger.log("PLY writer: empty position view.", system::ILogger::ELL_ERROR); + return false; + } const bool writeNormals = static_cast(normalView); - - const ICPUPolygonGeometry::SDataView* uvView = nullptr; - for (const auto& view : auxViews) + if (writeNormals && normalView.getElementCount() != vertexCount) { - if (!view) - continue; - const auto channels = getFormatChannelCount(view.composed.format); - if (channels == 2u) - { - uvView = &view; - break; - } + _params.logger.log("PLY writer: normal vertex count mismatch.", system::ILogger::ELL_ERROR); + return false; } + const ICPUPolygonGeometry::SDataView* uvView = SGeometryWriterCommon::findFirstAuxViewByChannelCount(geom, 2u, vertexCount); + core::vector extraAuxViews; + const auto& auxViews = geom->getAuxAttributeViews(); extraAuxViews.reserve(auxViews.size()); for (uint32_t auxIx = 0u; auxIx < static_cast(auxViews.size()); ++auxIx) { const auto& view = auxViews[auxIx]; if (!view || (&view == uvView)) continue; + if (view.getElementCount() != vertexCount) + continue; const uint32_t channels = getFormatChannelCount(view.composed.format); if (channels == 0u) continue; @@ -497,70 +506,28 @@ bool CPLYMeshWriter::writeAsset(system::IFile* _file, const SAssetWriteParams& _ extraAuxViews.push_back({ &view, components, auxIx, selectPlyScalarType(view.composed.format) }); } - const size_t vertexCount = positionView.getElementCount(); - if (vertexCount == 0) - return false; - const auto* indexing = geom->getIndexingCallback(); if (!indexing) + { + _params.logger.log("PLY writer: missing indexing callback.", system::ILogger::ELL_ERROR); return false; + } if (indexing->knownTopology() != E_PRIMITIVE_TOPOLOGY::EPT_TRIANGLE_LIST) + { + _params.logger.log("PLY writer: only triangle-list topology is supported.", system::ILogger::ELL_ERROR); return false; - - const auto& indexView = geom->getIndexView(); + } core::vector indexData; const uint32_t* indices = nullptr; - size_t faceCount = 0; - if (indexView) + size_t faceCount = 0ull; + if (!SGeometryWriterCommon::decodeTriangleIndices(geom, indexData, indices, faceCount)) { - const size_t indexCount = indexView.getElementCount(); - if (indexCount % 3u != 0u) - return false; - - const void* src = indexView.getPointer(); - if (!src) - return false; - - if (indexView.composed.format == EF_R32_UINT && indexView.composed.getStride() == sizeof(uint32_t)) - { - indices = reinterpret_cast(src); - } - else if (indexView.composed.format == EF_R16_UINT && indexView.composed.getStride() == sizeof(uint16_t)) - { - indexData.resize(indexCount); - const uint16_t* src16 = reinterpret_cast(src); - for (size_t i = 0; i < indexCount; ++i) - indexData[i] = src16[i]; - indices = indexData.data(); - } - else - { - indexData.resize(indexCount); - hlsl::vector decoded = {}; - for (size_t i = 0; i < indexCount; ++i) - { - if (!indexView.decodeElement(i, decoded)) - return false; - indexData[i] = decoded.x; - } - indices = indexData.data(); - } - faceCount = indexCount / 3u; + _params.logger.log("PLY writer: failed to decode triangle indices.", system::ILogger::ELL_ERROR); + return false; } - else - { - if (vertexCount % 3u != 0u) - return false; - indexData.resize(vertexCount); - for (size_t i = 0; i < vertexCount; ++i) - indexData[i] = static_cast(i); - - indices = indexData.data(); - faceCount = vertexCount / 3u; - } const auto flags = _override->getAssetWritingFlags(ctx, geom, 0u); const bool binary = flags.hasAnyFlag(E_WRITER_FLAGS::EWF_BINARY); const bool flipVectors = !flags.hasAnyFlag(E_WRITER_FLAGS::EWF_MESH_IS_RIGHT_HANDED); @@ -582,46 +549,27 @@ bool CPLYMeshWriter::writeAsset(system::IFile* _file, const SAssetWriteParams& _ for (const auto& extra : extraAuxViews) extraAuxBytesPerVertex += static_cast(extra.components) * getPlyScalarMeta(extra.scalarType).byteSize; - std::string header = "ply\n"; - header += binary ? "format binary_little_endian 1.0" : "format ascii 1.0"; - header += "\ncomment Nabla "; - header += NABLA_SDK_VERSION; - - header += "\nelement vertex "; - header += std::to_string(vertexCount); - header += "\n"; - - header += "property "; - header += positionMeta.name; - header += " x\n"; - header += "property "; - header += positionMeta.name; - header += " y\n"; - header += "property "; - header += positionMeta.name; - header += " z\n"; + std::ostringstream headerBuilder; + headerBuilder << "ply\n"; + headerBuilder << (binary ? "format binary_little_endian 1.0" : "format ascii 1.0"); + headerBuilder << "\ncomment Nabla " << NABLA_SDK_VERSION; + headerBuilder << "\nelement vertex " << vertexCount << "\n"; + + headerBuilder << "property " << positionMeta.name << " x\n"; + headerBuilder << "property " << positionMeta.name << " y\n"; + headerBuilder << "property " << positionMeta.name << " z\n"; if (writeNormals) { - header += "property "; - header += normalMeta.name; - header += " nx\n"; - header += "property "; - header += normalMeta.name; - header += " ny\n"; - header += "property "; - header += normalMeta.name; - header += " nz\n"; + headerBuilder << "property " << normalMeta.name << " nx\n"; + headerBuilder << "property " << normalMeta.name << " ny\n"; + headerBuilder << "property " << normalMeta.name << " nz\n"; } if (uvView) { - header += "property "; - header += uvMeta.name; - header += " u\n"; - header += "property "; - header += uvMeta.name; - header += " v\n"; + headerBuilder << "property " << uvMeta.name << " u\n"; + headerBuilder << "property " << uvMeta.name << " v\n"; } for (const auto& extra : extraAuxViews) @@ -629,23 +577,32 @@ bool CPLYMeshWriter::writeAsset(system::IFile* _file, const SAssetWriteParams& _ const auto extraMeta = getPlyScalarMeta(extra.scalarType); for (uint32_t component = 0u; component < extra.components; ++component) { - header += "property "; - header += extraMeta.name; - header += " aux"; - header += std::to_string(extra.auxIndex); + headerBuilder << "property " << extraMeta.name << " aux" << extra.auxIndex; if (extra.components > 1u) - { - header += "_"; - header += std::to_string(component); - } - header += "\n"; + headerBuilder << "_" << component; + headerBuilder << "\n"; } } - header += "element face "; - header += std::to_string(faceCount); - header += write16BitIndices ? "\nproperty list uchar uint16 vertex_indices\n" : "\nproperty list uchar uint32 vertex_indices\n"; - header += "end_header\n"; + headerBuilder << "element face " << faceCount; + headerBuilder << (write16BitIndices ? "\nproperty list uchar uint16 vertex_indices\n" : "\nproperty list uchar uint32 vertex_indices\n"); + headerBuilder << "end_header\n"; + const std::string header = headerBuilder.str(); + + const SWriteInput input = { + .geom = geom, + .positionScalarType = positionScalarType, + .uvView = uvView, + .uvScalarType = uvScalarType, + .extraAuxViews = &extraAuxViews, + .writeNormals = writeNormals, + .normalScalarType = normalScalarType, + .vertexCount = vertexCount, + .indices = indices, + .faceCount = faceCount, + .write16BitIndices = write16BitIndices, + .flipVectors = flipVectors + }; bool writeOk = false; size_t outputBytes = 0ull; @@ -661,8 +618,11 @@ bool CPLYMeshWriter::writeAsset(system::IFile* _file, const SAssetWriteParams& _ core::vector body; body.resize(bodySize); - if (!writeBinary(geom, positionScalarType, uvView, uvScalarType, extraAuxViews, writeNormals, normalScalarType, vertexCount, indices, faceCount, write16BitIndices, body.data(), flipVectors)) + if (!writeBinary(input, body.data())) + { + _params.logger.log("PLY writer: binary payload generation failed.", system::ILogger::ELL_ERROR); return false; + } const size_t outputSize = header.size() + body.size(); const bool fileMappable = core::bitflag(file->getFlags()).hasAnyFlag(system::IFile::ECF_MAPPABLE); @@ -714,8 +674,11 @@ bool CPLYMeshWriter::writeAsset(system::IFile* _file, const SAssetWriteParams& _ std::string body; body.reserve(vertexCount * ApproxPlyTextBytesPerVertex + faceCount * ApproxPlyTextBytesPerFace); - if (!writeText(geom, positionScalarType, uvView, uvScalarType, extraAuxViews, writeNormals, normalScalarType, vertexCount, indices, faceCount, body, flipVectors)) + if (!writeText(input, body)) + { + _params.logger.log("PLY writer: text payload generation failed.", system::ILogger::ELL_ERROR); return false; + } const size_t outputSize = header.size() + body.size(); const bool fileMappable = core::bitflag(file->getFlags()).hasAnyFlag(system::IFile::ECF_MAPPABLE); @@ -766,35 +729,25 @@ bool CPLYMeshWriter::writeAsset(system::IFile* _file, const SAssetWriteParams& _ } bool ply_writer_detail::writeBinary( - const ICPUPolygonGeometry* geom, - const EPlyScalarType positionScalarType, - const ICPUPolygonGeometry::SDataView* uvView, - const EPlyScalarType uvScalarType, - const core::vector& extraAuxViews, - const bool writeNormals, - const EPlyScalarType normalScalarType, - const size_t vertexCount, - const uint32_t* indices, - const size_t faceCount, - const bool write16BitIndices, - uint8_t* dst, - const bool flipVectors) + const SWriteInput& input, + uint8_t* dst) { - if (!dst) + if (!input.geom || !input.extraAuxViews || !input.indices || !dst) return false; - const auto& positionView = geom->getPositionView(); - const auto& normalView = geom->getNormalView(); + const auto& positionView = input.geom->getPositionView(); + const auto& normalView = input.geom->getNormalView(); + const auto& extraAuxViews = *input.extraAuxViews; - for (size_t i = 0; i < vertexCount; ++i) + for (size_t i = 0; i < input.vertexCount; ++i) { - if (!writeTypedViewBinary(positionView, i, 3u, positionScalarType, flipVectors, dst)) + if (!writeTypedViewBinary(positionView, i, 3u, input.positionScalarType, input.flipVectors, dst)) return false; - if (writeNormals && !writeTypedViewBinary(normalView, i, 3u, normalScalarType, flipVectors, dst)) + if (input.writeNormals && !writeTypedViewBinary(normalView, i, 3u, input.normalScalarType, input.flipVectors, dst)) return false; - if (uvView && !writeTypedViewBinary(*uvView, i, 2u, uvScalarType, false, dst)) + if (input.uvView && !writeTypedViewBinary(*input.uvView, i, 2u, input.uvScalarType, false, dst)) return false; for (const auto& extra : extraAuxViews) @@ -804,13 +757,13 @@ bool ply_writer_detail::writeBinary( } } - for (size_t i = 0; i < faceCount; ++i) + for (size_t i = 0; i < input.faceCount; ++i) { const uint8_t listSize = 3u; *dst++ = listSize; - const uint32_t* tri = indices + (i * 3u); - if (write16BitIndices) + const uint32_t* tri = input.indices + (i * 3u); + if (input.write16BitIndices) { const uint16_t tri16[3] = { static_cast(tri[0]), @@ -831,36 +784,30 @@ bool ply_writer_detail::writeBinary( } bool ply_writer_detail::writeText( - const ICPUPolygonGeometry* geom, - const EPlyScalarType positionScalarType, - const ICPUPolygonGeometry::SDataView* uvView, - const EPlyScalarType uvScalarType, - const core::vector& extraAuxViews, - const bool writeNormals, - const EPlyScalarType normalScalarType, - const size_t vertexCount, - const uint32_t* indices, - const size_t faceCount, - std::string& output, - const bool flipVectors) + const SWriteInput& input, + std::string& output) { - const auto& positionView = geom->getPositionView(); - const auto& normalView = geom->getNormalView(); + if (!input.geom || !input.extraAuxViews || !input.indices) + return false; + + const auto& positionView = input.geom->getPositionView(); + const auto& normalView = input.geom->getNormalView(); + const auto& extraAuxViews = *input.extraAuxViews; - for (size_t i = 0; i < vertexCount; ++i) + for (size_t i = 0; i < input.vertexCount; ++i) { - if (!writeTypedViewText(output, positionView, i, 3u, positionScalarType, flipVectors)) + if (!writeTypedViewText(output, positionView, i, 3u, input.positionScalarType, input.flipVectors)) return false; - if (writeNormals) + if (input.writeNormals) { - if (!writeTypedViewText(output, normalView, i, 3u, normalScalarType, flipVectors)) + if (!writeTypedViewText(output, normalView, i, 3u, input.normalScalarType, input.flipVectors)) return false; } - if (uvView) + if (input.uvView) { - if (!writeTypedViewText(output, *uvView, i, 2u, uvScalarType, false)) + if (!writeTypedViewText(output, *input.uvView, i, 2u, input.uvScalarType, false)) return false; } @@ -873,9 +820,9 @@ bool ply_writer_detail::writeText( output += "\n"; } - for (size_t i = 0; i < faceCount; ++i) + for (size_t i = 0; i < input.faceCount; ++i) { - const uint32_t* tri = indices + (i * 3u); + const uint32_t* tri = input.indices + (i * 3u); output.append("3 "); appendUInt(output, tri[0]); output.push_back(' '); diff --git a/src/nbl/asset/interchange/CSTLMeshFileLoader.cpp b/src/nbl/asset/interchange/CSTLMeshFileLoader.cpp index e7b58296d4..0e3f11265e 100644 --- a/src/nbl/asset/interchange/CSTLMeshFileLoader.cpp +++ b/src/nbl/asset/interchange/CSTLMeshFileLoader.cpp @@ -8,6 +8,7 @@ #ifdef _NBL_COMPILE_WITH_STL_LOADER_ #include "nbl/asset/interchange/SGeometryContentHashCommon.h" +#include "nbl/asset/interchange/SGeometryLoaderCommon.h" #include "nbl/asset/interchange/SInterchangeIOCommon.h" #include "nbl/asset/interchange/SLoaderRuntimeTuning.h" #include "nbl/asset/format/convertColor.h" @@ -19,6 +20,7 @@ #include "nbl/system/IFile.h" #include +#include namespace nbl::asset { @@ -39,76 +41,69 @@ struct SSTLContext static constexpr size_t FloatChannelsPerVertex = 3ull; }; -const char* stlSkipWhitespace(const char* ptr, const char* const end) +class SStlAsciiParser { - while (ptr < end && core::isspace(*ptr)) - ++ptr; - return ptr; -} - -bool stlReadTextToken(const char*& ptr, const char* const end, std::string_view& outToken) -{ - ptr = stlSkipWhitespace(ptr, end); - if (ptr >= end) - { - outToken = {}; - return false; - } + public: + inline SStlAsciiParser(const char* begin, const char* end) : m_cursor(begin), m_end(end) {} - const char* tokenEnd = ptr; - while (tokenEnd < end && !core::isspace(*tokenEnd)) - ++tokenEnd; + inline std::optional readToken() + { + skipWhitespace(); + if (m_cursor >= m_end) + return std::nullopt; - outToken = std::string_view(ptr, static_cast(tokenEnd - ptr)); - ptr = tokenEnd; - return true; -} + const char* tokenEnd = m_cursor; + while (tokenEnd < m_end && !core::isspace(*tokenEnd)) + ++tokenEnd; -bool stlReadTextFloat(const char*& ptr, const char* const end, float& outValue) -{ - ptr = stlSkipWhitespace(ptr, end); - if (ptr >= end) - return false; + const std::string_view token(m_cursor, static_cast(tokenEnd - m_cursor)); + m_cursor = tokenEnd; + return token; + } - const auto parseResult = fast_float::from_chars(ptr, end, outValue); - if (parseResult.ec == std::errc() && parseResult.ptr != ptr) - { - ptr = parseResult.ptr; - return true; - } + inline std::optional readFloat() + { + skipWhitespace(); + if (m_cursor >= m_end) + return std::nullopt; - char* fallbackEnd = nullptr; - outValue = std::strtof(ptr, &fallbackEnd); - if (!fallbackEnd || fallbackEnd == ptr) - return false; - ptr = fallbackEnd <= end ? fallbackEnd : end; - return true; -} + float value = 0.f; + const auto parseResult = fast_float::from_chars(m_cursor, m_end, value); + if (parseResult.ec == std::errc() && parseResult.ptr != m_cursor) + { + m_cursor = parseResult.ptr; + return value; + } + return std::nullopt; + } -bool stlReadTextVec3(const char*& ptr, const char* const end, hlsl::float32_t3& outVec) -{ - return stlReadTextFloat(ptr, end, outVec.x) && stlReadTextFloat(ptr, end, outVec.y) && stlReadTextFloat(ptr, end, outVec.z); -} + inline std::optional readVec3() + { + const auto x = readFloat(); + const auto y = readFloat(); + const auto z = readFloat(); + if (!x.has_value() || !y.has_value() || !z.has_value()) + return std::nullopt; + return hlsl::float32_t3(*x, *y, *z); + } -hlsl::float32_t3 stlNormalizeOrZero(const hlsl::float32_t3& v) -{ - const float len2 = hlsl::dot(v, v); - if (len2 <= 0.f) - return hlsl::float32_t3(0.f, 0.f, 0.f); - return hlsl::normalize(v); -} + private: + inline void skipWhitespace() + { + while (m_cursor < m_end && core::isspace(*m_cursor)) + ++m_cursor; + } -hlsl::float32_t3 stlComputeFaceNormal(const hlsl::float32_t3& a, const hlsl::float32_t3& b, const hlsl::float32_t3& c) -{ - return stlNormalizeOrZero(hlsl::cross(b - a, c - a)); -} + const char* m_cursor = nullptr; + const char* m_end = nullptr; +}; hlsl::float32_t3 stlResolveStoredNormal(const hlsl::float32_t3& fileNormal) { const float fileLen2 = hlsl::dot(fileNormal, fileNormal); if (fileLen2 > 0.f && std::abs(fileLen2 - 1.f) < 1e-4f) return fileNormal; - return stlNormalizeOrZero(fileNormal); + return SGeometryLoaderCommon::normalizeOrZero(fileNormal); } void stlPushTriangleReversed(const hlsl::float32_t3 (&p)[3], core::vector& positions) @@ -167,60 +162,6 @@ class CStlSplitBlockMemoryResource final : public core::refctd_memory_resource size_t m_alignment = 1ull; }; -ICPUPolygonGeometry::SDataView stlCreateAdoptedFloat3View(core::vector&& values) -{ - if (values.empty()) - return {}; - - auto backer = core::make_smart_refctd_ptr>>(std::move(values)); - auto& payload = backer->getBacker(); - auto* const payloadPtr = payload.data(); - const size_t byteCount = payload.size() * sizeof(hlsl::float32_t3); - auto buffer = ICPUBuffer::create({ { byteCount }, payloadPtr, core::smart_refctd_ptr(std::move(backer)), alignof(hlsl::float32_t3) }, core::adopt_memory); - if (!buffer) - return {}; - - ICPUPolygonGeometry::SDataView view = {}; - view.composed = { - .stride = sizeof(hlsl::float32_t3), - .format = EF_R32G32B32_SFLOAT, - .rangeFormat = IGeometryBase::getMatchingAABBFormat(EF_R32G32B32_SFLOAT) - }; - view.src = { - .offset = 0u, - .size = byteCount, - .buffer = std::move(buffer) - }; - return view; -} - -ICPUPolygonGeometry::SDataView stlCreateAdoptedColorView(core::vector&& values) -{ - if (values.empty()) - return {}; - - auto backer = core::make_smart_refctd_ptr>>(std::move(values)); - auto& payload = backer->getBacker(); - auto* const payloadPtr = payload.data(); - const size_t byteCount = payload.size() * sizeof(uint32_t); - auto buffer = ICPUBuffer::create({ { byteCount }, payloadPtr, core::smart_refctd_ptr(std::move(backer)), alignof(uint32_t) }, core::adopt_memory); - if (!buffer) - return {}; - - ICPUPolygonGeometry::SDataView view = {}; - view.composed = { - .stride = sizeof(uint32_t), - .format = EF_B8G8R8A8_UNORM, - .rangeFormat = IGeometryBase::getMatchingAABBFormat(EF_B8G8R8A8_UNORM) - }; - view.src = { - .offset = 0u, - .size = byteCount, - .buffer = std::move(buffer) - }; - return view; -} - CSTLMeshFileLoader::CSTLMeshFileLoader(asset::IAssetManager*) { } @@ -710,7 +651,7 @@ SAssetBundle CSTLMeshFileLoader::loadAsset(system::IFile* _file, const IAssetLoa vertexColors[baseIx + 1ull] = triColor; vertexColors[baseIx + 2ull] = triColor; } - auto colorView = stlCreateAdoptedColorView(std::move(vertexColors)); + auto colorView = SGeometryLoaderCommon::createAdoptedView(std::move(vertexColors)); if (!colorView) return {}; geometry->getAuxAttributeViews()->push_back(std::move(colorView)); @@ -729,47 +670,57 @@ SAssetBundle CSTLMeshFileLoader::loadAsset(system::IFile* _file, const IAssetLoa wholeFileData = wholeFilePayload.data(); } - const char* cursor = reinterpret_cast(wholeFileData); - const char* const end = cursor + filesize; + const char* const begin = reinterpret_cast(wholeFileData); + const char* const end = begin + filesize; + SStlAsciiParser parser(begin, end); core::vector positions; core::vector normals; - std::string_view textToken = {}; - if (!stlReadTextToken(cursor, end, textToken) || textToken != std::string_view("solid")) + const auto firstToken = parser.readToken(); + if (!firstToken.has_value() || *firstToken != std::string_view("solid")) return {}; - while (stlReadTextToken(cursor, end, textToken)) + for (;;) { + const auto maybeToken = parser.readToken(); + if (!maybeToken.has_value()) + break; + const std::string_view textToken = *maybeToken; if (textToken == std::string_view("endsolid")) break; if (textToken != std::string_view("facet")) - { continue; - } - if (!stlReadTextToken(cursor, end, textToken) || textToken != std::string_view("normal")) + + const auto normalKeyword = parser.readToken(); + if (!normalKeyword.has_value() || *normalKeyword != std::string_view("normal")) return {}; - hlsl::float32_t3 fileNormal = {}; - if (!stlReadTextVec3(cursor, end, fileNormal)) + const auto fileNormal = parser.readVec3(); + if (!fileNormal.has_value()) return {}; - if (!stlReadTextToken(cursor, end, textToken) || textToken != std::string_view("outer")) + const auto outerKeyword = parser.readToken(); + if (!outerKeyword.has_value() || *outerKeyword != std::string_view("outer")) return {}; - if (!stlReadTextToken(cursor, end, textToken) || textToken != std::string_view("loop")) + const auto loopKeyword = parser.readToken(); + if (!loopKeyword.has_value() || *loopKeyword != std::string_view("loop")) return {}; hlsl::float32_t3 p[3] = {}; for (uint32_t i = 0u; i < 3u; ++i) { - if (!stlReadTextToken(cursor, end, textToken) || textToken != std::string_view("vertex")) + const auto vertexKeyword = parser.readToken(); + if (!vertexKeyword.has_value() || *vertexKeyword != std::string_view("vertex")) return {}; - if (!stlReadTextVec3(cursor, end, p[i])) + const auto vertex = parser.readVec3(); + if (!vertex.has_value()) return {}; + p[i] = *vertex; } stlPushTriangleReversed(p, positions); - hlsl::float32_t3 faceNormal = stlResolveStoredNormal(fileNormal); + hlsl::float32_t3 faceNormal = stlResolveStoredNormal(*fileNormal); if (hlsl::dot(faceNormal, faceNormal) <= 0.f) - faceNormal = stlComputeFaceNormal(p[2u], p[1u], p[0u]); + faceNormal = SGeometryLoaderCommon::computeFaceNormal(p[2u], p[1u], p[0u]); normals.push_back(faceNormal); normals.push_back(faceNormal); normals.push_back(faceNormal); @@ -777,9 +728,11 @@ SAssetBundle CSTLMeshFileLoader::loadAsset(system::IFile* _file, const IAssetLoa extendAABBAccumulator(parsedAABB, p[1u]); extendAABBAccumulator(parsedAABB, p[0u]); - if (!stlReadTextToken(cursor, end, textToken) || textToken != std::string_view("endloop")) + const auto endLoopKeyword = parser.readToken(); + if (!endLoopKeyword.has_value() || *endLoopKeyword != std::string_view("endloop")) return {}; - if (!stlReadTextToken(cursor, end, textToken) || textToken != std::string_view("endfacet")) + const auto endFacetKeyword = parser.readToken(); + if (!endFacetKeyword.has_value() || *endFacetKeyword != std::string_view("endfacet")) return {}; } if (positions.empty()) @@ -788,8 +741,8 @@ SAssetBundle CSTLMeshFileLoader::loadAsset(system::IFile* _file, const IAssetLoa triangleCount = positions.size() / SSTLContext::VerticesPerTriangle; vertexCount = positions.size(); - auto posView = stlCreateAdoptedFloat3View(std::move(positions)); - auto normalView = stlCreateAdoptedFloat3View(std::move(normals)); + auto posView = SGeometryLoaderCommon::createAdoptedView(std::move(positions)); + auto normalView = SGeometryLoaderCommon::createAdoptedView(std::move(normals)); if (!posView || !normalView) return {}; geometry->setPositionView(std::move(posView)); diff --git a/src/nbl/asset/interchange/CSTLMeshWriter.cpp b/src/nbl/asset/interchange/CSTLMeshWriter.cpp index 11146f1145..d63fddb19e 100644 --- a/src/nbl/asset/interchange/CSTLMeshWriter.cpp +++ b/src/nbl/asset/interchange/CSTLMeshWriter.cpp @@ -33,6 +33,9 @@ struct SContext core::vector ioBuffer = {}; size_t fileOffset = 0ull; SFileWriteTelemetry writeTelemetry = {}; + + bool flush(); + bool write(const void* data, size_t size); }; constexpr size_t BinaryHeaderBytes = 80ull; @@ -53,11 +56,8 @@ constexpr char AsciiDefaultName[] = "nabla_mesh"; using SContext = stl_writer_detail::SContext; -bool flushBytes(SContext* context); -bool writeBytes(SContext* context, const void* data, size_t size); -bool decodeTriangleIndices(const ICPUPolygonGeometry* geom, const ICPUPolygonGeometry::SDataView& posView, core::vector& indexData, const uint32_t*& outIndices, uint32_t& outFaceCount); -bool decodeTriangle(const ICPUPolygonGeometry* geom, const IPolygonGeometryBase::IIndexingCallback* indexing, const ICPUPolygonGeometry::SDataView& posView, uint32_t primIx, core::vectorSIMDf& out0, core::vectorSIMDf& out1, core::vectorSIMDf& out2, uint32_t* outIdx); -bool decodeTriangleNormal(const ICPUPolygonGeometry::SDataView& normalView, const uint32_t* idx, core::vectorSIMDf& outNormal); +bool decodeTriangle(const ICPUPolygonGeometry* geom, const IPolygonGeometryBase::IIndexingCallback* indexing, const ICPUPolygonGeometry::SDataView& posView, uint32_t primIx, hlsl::float32_t3& out0, hlsl::float32_t3& out1, hlsl::float32_t3& out2, uint32_t* outIdx); +bool decodeTriangleNormal(const ICPUPolygonGeometry::SDataView& normalView, const uint32_t* idx, hlsl::float32_t3& outNormal); double stlNormalizeColorComponentToUnit(double value); uint16_t stlPackViscamColorFromB8G8R8A8(uint32_t color); const ICPUPolygonGeometry::SDataView* stlFindColorView(const ICPUPolygonGeometry* geom, size_t vertexCount); @@ -66,16 +66,16 @@ void stlDecodeColorUnitRGBAFromB8G8R8A8(uint32_t color, double (&out)[4]); bool writeMeshBinary(const asset::ICPUPolygonGeometry* geom, SContext* context); bool writeMeshASCII(const asset::ICPUPolygonGeometry* geom, SContext* context); bool writeFaceText( - const core::vectorSIMDf& v1, - const core::vectorSIMDf& v2, - const core::vectorSIMDf& v3, + const hlsl::float32_t3& v1, + const hlsl::float32_t3& v2, + const hlsl::float32_t3& v3, const uint32_t* idx, const asset::ICPUPolygonGeometry::SDataView& normalView, const bool flipHandedness, SContext* context); bool appendLiteral(char*& cursor, char* const end, const char* text, const size_t textSize); -bool appendVectorAsAsciiLine(char*& cursor, char* const end, const core::vectorSIMDf& v); +bool appendVectorAsAsciiLine(char*& cursor, char* const end, const hlsl::float32_t3& v); CSTLMeshWriter::CSTLMeshWriter() { @@ -111,12 +111,7 @@ bool CSTLMeshWriter::writeAsset(system::IFile* _file, const SAssetWriteParams& _ IAssetWriter::SAssetWriteContext inCtx{_params, _file}; - const asset::ICPUPolygonGeometry* geom = -#ifndef _NBL_DEBUG - static_cast(_params.rootAsset); -#else - dynamic_cast(_params.rootAsset); -#endif + const asset::ICPUPolygonGeometry* geom = IAsset::castDown(_params.rootAsset); if (!geom) return false; @@ -156,7 +151,7 @@ bool CSTLMeshWriter::writeAsset(system::IFile* _file, const SAssetWriteParams& _ if (!written) return false; - const bool flushed = flushBytes(&context); + const bool flushed = context.flush(); if (!flushed) return false; @@ -189,71 +184,69 @@ bool CSTLMeshWriter::writeAsset(system::IFile* _file, const SAssetWriteParams& _ return true; } -bool flushBytes(SContext* context) +bool stl_writer_detail::SContext::flush() { - if (!context) - return false; - if (context->ioBuffer.empty()) + if (ioBuffer.empty()) return true; size_t bytesWritten = 0ull; - const size_t totalBytes = context->ioBuffer.size(); + const size_t totalBytes = ioBuffer.size(); while (bytesWritten < totalBytes) { system::IFile::success_t success; - context->writeContext.outputFile->write( + writeContext.outputFile->write( success, - context->ioBuffer.data() + bytesWritten, - context->fileOffset + bytesWritten, + ioBuffer.data() + bytesWritten, + fileOffset + bytesWritten, totalBytes - bytesWritten); if (!success) return false; const size_t processed = success.getBytesProcessed(); if (processed == 0ull) return false; - context->writeTelemetry.account(processed); + writeTelemetry.account(processed); bytesWritten += processed; } - context->fileOffset += totalBytes; - context->ioBuffer.clear(); + fileOffset += totalBytes; + ioBuffer.clear(); return true; } -bool writeBytes(SContext* context, const void* data, size_t size) +bool stl_writer_detail::SContext::write(const void* data, size_t size) { - if (!context || (!data && size != 0ull)) + if (!data && size != 0ull) return false; if (size == 0ull) return true; const uint8_t* src = reinterpret_cast(data); - switch (context->ioPlan.strategy) + switch (ioPlan.strategy) { case SResolvedFileIOPolicy::Strategy::WholeFile: { - const size_t oldSize = context->ioBuffer.size(); - context->ioBuffer.resize(oldSize + size); - std::memcpy(context->ioBuffer.data() + oldSize, src, size); + const size_t oldSize = ioBuffer.size(); + ioBuffer.resize(oldSize + size); + std::memcpy(ioBuffer.data() + oldSize, src, size); return true; } case SResolvedFileIOPolicy::Strategy::Chunked: default: { - const size_t chunkSize = static_cast(context->ioPlan.chunkSizeBytes()); + const size_t chunkSize = static_cast(ioPlan.chunkSizeBytes()); size_t remaining = size; while (remaining > 0ull) { - const size_t freeSpace = chunkSize - context->ioBuffer.size(); + const size_t freeSpace = chunkSize - ioBuffer.size(); const size_t toCopy = std::min(freeSpace, remaining); - const size_t oldSize = context->ioBuffer.size(); - context->ioBuffer.resize(oldSize + toCopy); - std::memcpy(context->ioBuffer.data() + oldSize, src, toCopy); + const size_t oldSize = ioBuffer.size(); + ioBuffer.resize(oldSize + toCopy); + std::memcpy(ioBuffer.data() + oldSize, src, toCopy); src += toCopy; remaining -= toCopy; - if (context->ioBuffer.size() == chunkSize) + if (ioBuffer.size() == chunkSize) { - if (!flushBytes(context)) + if (!flush()) return false; } } @@ -271,74 +264,24 @@ bool appendLiteral(char*& cursor, char* const end, const char* text, const size_ return true; } -bool appendVectorAsAsciiLine(char*& cursor, char* const end, const core::vectorSIMDf& v) +bool appendVectorAsAsciiLine(char*& cursor, char* const end, const hlsl::float32_t3& v) { - cursor = SGeometryWriterCommon::appendFloatFixed6ToBuffer(cursor, end, v.X); + cursor = SGeometryWriterCommon::appendFloatFixed6ToBuffer(cursor, end, v.x); if (cursor >= end) return false; *(cursor++) = ' '; - cursor = SGeometryWriterCommon::appendFloatFixed6ToBuffer(cursor, end, v.Y); + cursor = SGeometryWriterCommon::appendFloatFixed6ToBuffer(cursor, end, v.y); if (cursor >= end) return false; *(cursor++) = ' '; - cursor = SGeometryWriterCommon::appendFloatFixed6ToBuffer(cursor, end, v.Z); + cursor = SGeometryWriterCommon::appendFloatFixed6ToBuffer(cursor, end, v.z); if (cursor >= end) return false; *(cursor++) = '\n'; return true; } -bool decodeTriangleIndices(const ICPUPolygonGeometry* geom, const ICPUPolygonGeometry::SDataView& posView, core::vector& indexData, const uint32_t*& outIndices, uint32_t& outFaceCount) -{ - const auto& indexView = geom->getIndexView(); - if (indexView) - { - const size_t indexCount = indexView.getElementCount(); - if ((indexCount % 3ull) != 0ull) - return false; - - const void* src = indexView.getPointer(); - if (!src) - return false; - - if (indexView.composed.format == EF_R32_UINT && indexView.composed.getStride() == sizeof(uint32_t)) - { - outIndices = reinterpret_cast(src); - } - else if (indexView.composed.format == EF_R16_UINT && indexView.composed.getStride() == sizeof(uint16_t)) - { - indexData.resize(indexCount); - const auto* src16 = reinterpret_cast(src); - for (size_t i = 0ull; i < indexCount; ++i) - indexData[i] = src16[i]; - outIndices = indexData.data(); - } - else - { - indexData.resize(indexCount); - hlsl::vector decoded = {}; - for (size_t i = 0ull; i < indexCount; ++i) - { - if (!indexView.decodeElement(i, decoded)) - return false; - indexData[i] = decoded.x; - } - outIndices = indexData.data(); - } - outFaceCount = static_cast(indexCount / 3ull); - return true; - } - - const size_t vertexCount = posView.getElementCount(); - if ((vertexCount % 3ull) != 0ull) - return false; - - outIndices = nullptr; - outFaceCount = static_cast(vertexCount / 3ull); - return true; -} - -bool decodeTriangle(const ICPUPolygonGeometry* geom, const IPolygonGeometryBase::IIndexingCallback* indexing, const ICPUPolygonGeometry::SDataView& posView, uint32_t primIx, core::vectorSIMDf& out0, core::vectorSIMDf& out1, core::vectorSIMDf& out2, uint32_t* outIdx) +bool decodeTriangle(const ICPUPolygonGeometry* geom, const IPolygonGeometryBase::IIndexingCallback* indexing, const ICPUPolygonGeometry::SDataView& posView, uint32_t primIx, hlsl::float32_t3& out0, hlsl::float32_t3& out1, hlsl::float32_t3& out2, uint32_t* outIdx) { uint32_t idx[3] = {}; const auto& indexView = geom->getIndexView(); @@ -369,13 +312,13 @@ bool decodeTriangle(const ICPUPolygonGeometry* geom, const IPolygonGeometryBase: if (!posView.decodeElement(idx[2], p2)) return false; - out0 = core::vectorSIMDf(p0.x, p0.y, p0.z, 1.f); - out1 = core::vectorSIMDf(p1.x, p1.y, p1.z, 1.f); - out2 = core::vectorSIMDf(p2.x, p2.y, p2.z, 1.f); + out0 = p0; + out1 = p1; + out2 = p2; return true; } -bool decodeTriangleNormal(const ICPUPolygonGeometry::SDataView& normalView, const uint32_t* idx, core::vectorSIMDf& outNormal) +bool decodeTriangleNormal(const ICPUPolygonGeometry::SDataView& normalView, const uint32_t* idx, hlsl::float32_t3& outNormal) { if (!normalView || !idx) return false; @@ -390,12 +333,12 @@ bool decodeTriangleNormal(const ICPUPolygonGeometry::SDataView& normalView, cons if (!normalView.decodeElement(idx[2], n2)) return false; - auto normal = core::vectorSIMDf(n0.x, n0.y, n0.z, 0.f); - if ((normal == core::vectorSIMDf(0.f)).all()) - normal = core::vectorSIMDf(n1.x, n1.y, n1.z, 0.f); - if ((normal == core::vectorSIMDf(0.f)).all()) - normal = core::vectorSIMDf(n2.x, n2.y, n2.z, 0.f); - if ((normal == core::vectorSIMDf(0.f)).all()) + auto normal = n0; + if (normal.x == 0.f && normal.y == 0.f && normal.z == 0.f) + normal = n1; + if (normal.x == 0.f && normal.y == 0.f && normal.z == 0.f) + normal = n2; + if (normal.x == 0.f && normal.y == 0.f && normal.z == 0.f) return false; outNormal = normal; @@ -489,8 +432,12 @@ bool writeMeshBinary(const asset::ICPUPolygonGeometry* geom, SContext* context) core::vector indexData; const uint32_t* indices = nullptr; uint32_t facenum = 0u; - if (!decodeTriangleIndices(geom, posView, indexData, indices, facenum)) + size_t faceCount = 0ull; + if (!SGeometryWriterCommon::decodeTriangleIndices(geom, indexData, indices, faceCount)) + return false; + if (faceCount > static_cast(std::numeric_limits::max())) return false; + facenum = static_cast(faceCount); const size_t outputSize = stl_writer_detail::BinaryPrefixBytes + static_cast(facenum) * stl_writer_detail::BinaryTriangleRecordBytes; std::unique_ptr output(new (std::nothrow) uint8_t[outputSize]); @@ -833,66 +780,71 @@ bool writeMeshASCII(const asset::ICPUPolygonGeometry* geom, SContext* context) const std::string name = context->writeContext.outputFile->getFileName().filename().replace_extension().string(); const std::string_view solidName = name.empty() ? std::string_view(stl_writer_detail::AsciiDefaultName) : std::string_view(name); - if (!writeBytes(context, stl_writer_detail::AsciiSolidPrefix, sizeof(stl_writer_detail::AsciiSolidPrefix) - 1ull)) + if (!context->write(stl_writer_detail::AsciiSolidPrefix, sizeof(stl_writer_detail::AsciiSolidPrefix) - 1ull)) return false; - if (!writeBytes(context, solidName.data(), solidName.size())) + if (!context->write(solidName.data(), solidName.size())) return false; - if (!writeBytes(context, "\n", sizeof("\n") - 1ull)) + if (!context->write("\n", sizeof("\n") - 1ull)) return false; const uint32_t faceCount = static_cast(geom->getPrimitiveCount()); for (uint32_t primIx = 0u; primIx < faceCount; ++primIx) { - core::vectorSIMDf v0; - core::vectorSIMDf v1; - core::vectorSIMDf v2; + hlsl::float32_t3 v0 = {}; + hlsl::float32_t3 v1 = {}; + hlsl::float32_t3 v2 = {}; uint32_t idx[3] = {}; if (!decodeTriangle(geom, indexing, posView, primIx, v0, v1, v2, idx)) return false; if (!writeFaceText(v0, v1, v2, idx, normalView, flipHandedness, context)) return false; - if (!writeBytes(context, "\n", sizeof("\n") - 1ull)) + if (!context->write("\n", sizeof("\n") - 1ull)) return false; } - if (!writeBytes(context, stl_writer_detail::AsciiEndSolidPrefix, sizeof(stl_writer_detail::AsciiEndSolidPrefix) - 1ull)) + if (!context->write(stl_writer_detail::AsciiEndSolidPrefix, sizeof(stl_writer_detail::AsciiEndSolidPrefix) - 1ull)) return false; - if (!writeBytes(context, solidName.data(), solidName.size())) + if (!context->write(solidName.data(), solidName.size())) return false; return true; } bool writeFaceText( - const core::vectorSIMDf& v1, - const core::vectorSIMDf& v2, - const core::vectorSIMDf& v3, + const hlsl::float32_t3& v1, + const hlsl::float32_t3& v2, + const hlsl::float32_t3& v3, const uint32_t* idx, const asset::ICPUPolygonGeometry::SDataView& normalView, const bool flipHandedness, SContext* context) { - core::vectorSIMDf vertex1 = v3; - core::vectorSIMDf vertex2 = v2; - core::vectorSIMDf vertex3 = v1; + hlsl::float32_t3 vertex1 = v3; + hlsl::float32_t3 vertex2 = v2; + hlsl::float32_t3 vertex3 = v1; if (flipHandedness) { - vertex1.X = -vertex1.X; - vertex2.X = -vertex2.X; - vertex3.X = -vertex3.X; + vertex1.x = -vertex1.x; + vertex2.x = -vertex2.x; + vertex3.x = -vertex3.x; } - core::vectorSIMDf normal = core::plane3dSIMDf(vertex1, vertex2, vertex3).getNormal(); - core::vectorSIMDf attrNormal; + const hlsl::float32_t3 planeNormal = hlsl::cross(vertex2 - vertex1, vertex3 - vertex1); + const float planeNormalLen2 = hlsl::dot(planeNormal, planeNormal); + hlsl::float32_t3 normal = hlsl::float32_t3(0.f, 0.f, 0.f); + if (planeNormalLen2 > 0.f) + normal = hlsl::normalize(planeNormal); + + hlsl::float32_t3 attrNormal = {}; if (decodeTriangleNormal(normalView, idx, attrNormal)) { if (flipHandedness) - attrNormal.X = -attrNormal.X; - if (core::dot(attrNormal, normal).X < 0.f) + attrNormal.x = -attrNormal.x; + if (planeNormalLen2 > 0.f && hlsl::dot(attrNormal, planeNormal) < 0.f) attrNormal = -attrNormal; normal = attrNormal; } @@ -923,7 +875,7 @@ bool writeFaceText( if (!appendLiteral(cursor, end, "endfacet\n", sizeof("endfacet\n") - 1ull)) return false; - return writeBytes(context, faceText.data(), static_cast(cursor - faceText.data())); + return context->write(faceText.data(), static_cast(cursor - faceText.data())); } } diff --git a/src/nbl/core/hash/blake.cpp b/src/nbl/core/hash/blake.cpp index 88fa6d4093..12642dcf79 100644 --- a/src/nbl/core/hash/blake.cpp +++ b/src/nbl/core/hash/blake.cpp @@ -1,486 +1,11 @@ #include "nbl/core/hash/blake.h" -#include -#include +#include #include -#include -#include - -extern "C" -{ -#include "blake3_impl.h" -} - -/* - BLAKE3 is tree-based and explicitly designed for parallel processing. The tree mode - (chunks and parent-node reduction) is part of the specification, so a parallel - implementation can be done without changing hash semantics. - - Why this local implementation exists: - - Nabla needs a multithreaded hash path integrated with its own runtime policy and - standard C++ threading. - - Upstream C API exposes a single-threaded update path and an optional oneTBB path - (`blake3_hasher_update_tbb`) which requires building with `BLAKE3_USE_TBB`. - - Here we keep the same algorithmic rules and final digest, while using only C++20 - standard facilities (`std::async`, `std::thread`) and no oneTBB dependency. - - The local helpers below are adapted from upstream tree-processing internals used - in `c/blake3.c` and the oneTBB integration path. - - Primary references: - - BLAKE3 spec repository (paper): https://github.com/BLAKE3-team/BLAKE3-specs - - C2SP BLAKE3 specification: https://c2sp.org/BLAKE3 - - Upstream BLAKE3 C API notes (`update_tbb`): https://github.com/BLAKE3-team/BLAKE3/blob/master/c/README.md -*/ namespace nbl::core { -namespace -{ - -struct output_t -{ - uint32_t input_cv[8]; - uint64_t counter; - uint8_t block[BLAKE3_BLOCK_LEN]; - uint8_t block_len; - uint8_t flags; -}; - -INLINE void chunk_state_init_local(blake3_chunk_state* self, const uint32_t key[8], uint8_t flags) -{ - std::memcpy(self->cv, key, BLAKE3_KEY_LEN); - self->chunk_counter = 0; - std::memset(self->buf, 0, BLAKE3_BLOCK_LEN); - self->buf_len = 0; - self->blocks_compressed = 0; - self->flags = flags; -} - -INLINE void chunk_state_reset_local(blake3_chunk_state* self, const uint32_t key[8], uint64_t chunk_counter) -{ - std::memcpy(self->cv, key, BLAKE3_KEY_LEN); - self->chunk_counter = chunk_counter; - self->blocks_compressed = 0; - std::memset(self->buf, 0, BLAKE3_BLOCK_LEN); - self->buf_len = 0; -} - -INLINE size_t chunk_state_len_local(const blake3_chunk_state* self) -{ - return (BLAKE3_BLOCK_LEN * static_cast(self->blocks_compressed)) + static_cast(self->buf_len); -} - -INLINE size_t chunk_state_fill_buf_local(blake3_chunk_state* self, const uint8_t* input, size_t input_len) -{ - size_t take = BLAKE3_BLOCK_LEN - static_cast(self->buf_len); - if (take > input_len) - take = input_len; - auto* const dest = self->buf + static_cast(self->buf_len); - std::memcpy(dest, input, take); - self->buf_len += static_cast(take); - return take; -} - -INLINE uint8_t chunk_state_maybe_start_flag_local(const blake3_chunk_state* self) -{ - return self->blocks_compressed == 0 ? CHUNK_START : 0; -} - -INLINE output_t make_output_local(const uint32_t input_cv[8], const uint8_t block[BLAKE3_BLOCK_LEN], uint8_t block_len, uint64_t counter, uint8_t flags) -{ - output_t ret = {}; - std::memcpy(ret.input_cv, input_cv, 32); - std::memcpy(ret.block, block, BLAKE3_BLOCK_LEN); - ret.block_len = block_len; - ret.counter = counter; - ret.flags = flags; - return ret; -} - -INLINE void output_chaining_value_local(const output_t* self, uint8_t cv[32]) -{ - uint32_t cv_words[8]; - std::memcpy(cv_words, self->input_cv, 32); - blake3_compress_in_place(cv_words, self->block, self->block_len, self->counter, self->flags); - store_cv_words(cv, cv_words); -} - -INLINE void chunk_state_update_local(blake3_chunk_state* self, const uint8_t* input, size_t input_len) -{ - if (self->buf_len > 0) - { - size_t take = chunk_state_fill_buf_local(self, input, input_len); - input += take; - input_len -= take; - if (input_len > 0) - { - blake3_compress_in_place( - self->cv, - self->buf, - BLAKE3_BLOCK_LEN, - self->chunk_counter, - self->flags | chunk_state_maybe_start_flag_local(self)); - self->blocks_compressed += 1; - self->buf_len = 0; - std::memset(self->buf, 0, BLAKE3_BLOCK_LEN); - } - } - - while (input_len > BLAKE3_BLOCK_LEN) - { - blake3_compress_in_place( - self->cv, - input, - BLAKE3_BLOCK_LEN, - self->chunk_counter, - self->flags | chunk_state_maybe_start_flag_local(self)); - self->blocks_compressed += 1; - input += BLAKE3_BLOCK_LEN; - input_len -= BLAKE3_BLOCK_LEN; - } - - (void)chunk_state_fill_buf_local(self, input, input_len); -} - -INLINE output_t chunk_state_output_local(const blake3_chunk_state* self) -{ - const uint8_t block_flags = self->flags | chunk_state_maybe_start_flag_local(self) | CHUNK_END; - return make_output_local(self->cv, self->buf, self->buf_len, self->chunk_counter, block_flags); -} - -INLINE output_t parent_output_local(const uint8_t block[BLAKE3_BLOCK_LEN], const uint32_t key[8], uint8_t flags) -{ - return make_output_local(key, block, BLAKE3_BLOCK_LEN, 0, flags | PARENT); -} - -INLINE size_t left_len_local(size_t content_len) -{ - const size_t full_chunks = (content_len - 1) / BLAKE3_CHUNK_LEN; - return round_down_to_power_of_2(full_chunks) * BLAKE3_CHUNK_LEN; -} - -INLINE size_t compress_chunks_parallel_local( - const uint8_t* input, - size_t input_len, - const uint32_t key[8], - uint64_t chunk_counter, - uint8_t flags, - uint8_t* out) -{ - const uint8_t* chunks_array[MAX_SIMD_DEGREE]; - size_t input_position = 0; - size_t chunks_array_len = 0; - while (input_len - input_position >= BLAKE3_CHUNK_LEN) - { - chunks_array[chunks_array_len] = &input[input_position]; - input_position += BLAKE3_CHUNK_LEN; - chunks_array_len += 1; - } - - blake3_hash_many( - chunks_array, - chunks_array_len, - BLAKE3_CHUNK_LEN / BLAKE3_BLOCK_LEN, - key, - chunk_counter, - true, - flags, - CHUNK_START, - CHUNK_END, - out); - - if (input_len > input_position) - { - const uint64_t counter = chunk_counter + static_cast(chunks_array_len); - blake3_chunk_state chunk_state = {}; - chunk_state_init_local(&chunk_state, key, flags); - chunk_state.chunk_counter = counter; - chunk_state_update_local(&chunk_state, &input[input_position], input_len - input_position); - const auto output = chunk_state_output_local(&chunk_state); - output_chaining_value_local(&output, &out[chunks_array_len * BLAKE3_OUT_LEN]); - return chunks_array_len + 1; - } - - return chunks_array_len; -} - -INLINE size_t compress_parents_parallel_local( - const uint8_t* child_chaining_values, - size_t num_chaining_values, - const uint32_t key[8], - uint8_t flags, - uint8_t* out) -{ - const uint8_t* parents_array[MAX_SIMD_DEGREE_OR_2]; - size_t parents_array_len = 0; - while (num_chaining_values - (2 * parents_array_len) >= 2) - { - parents_array[parents_array_len] = - &child_chaining_values[2 * parents_array_len * BLAKE3_OUT_LEN]; - parents_array_len += 1; - } - - blake3_hash_many( - parents_array, - parents_array_len, - 1, - key, - 0, - false, - flags | PARENT, - 0, - 0, - out); - - if (num_chaining_values > 2 * parents_array_len) - { - std::memcpy( - &out[parents_array_len * BLAKE3_OUT_LEN], - &child_chaining_values[2 * parents_array_len * BLAKE3_OUT_LEN], - BLAKE3_OUT_LEN); - return parents_array_len + 1; - } - - return parents_array_len; -} - -constexpr size_t ParallelMinInputBytes = 1ull << 20; -constexpr size_t ParallelThreadGranularityBytes = 768ull << 10; -constexpr size_t ParallelSpawnMinSubtreeBytes = 512ull << 10; -constexpr uint32_t ParallelMaxThreads = 8u; -std::atomic_uint32_t g_parallelHashCalls = 0u; - -class SParallelCallGuard final -{ - public: - SParallelCallGuard() : m_active(g_parallelHashCalls.fetch_add(1u, std::memory_order_relaxed) + 1u) - { - } - - ~SParallelCallGuard() - { - g_parallelHashCalls.fetch_sub(1u, std::memory_order_relaxed); - } - - inline uint32_t activeCalls() const - { - return m_active; - } - - private: - uint32_t m_active = 1u; -}; - -size_t compress_subtree_wide_mt( - const uint8_t* input, - size_t input_len, - const uint32_t key[8], - uint64_t chunk_counter, - uint8_t flags, - uint8_t* out, - uint32_t threadBudget); - -INLINE void compress_subtree_to_parent_node_mt( - const uint8_t* input, - size_t input_len, - const uint32_t key[8], - uint64_t chunk_counter, - uint8_t flags, - uint8_t out[2 * BLAKE3_OUT_LEN], - uint32_t threadBudget) -{ - uint8_t cv_array[MAX_SIMD_DEGREE_OR_2 * BLAKE3_OUT_LEN]; - size_t num_cvs = compress_subtree_wide_mt(input, input_len, key, chunk_counter, flags, cv_array, threadBudget); - assert(num_cvs <= MAX_SIMD_DEGREE_OR_2); - -#if MAX_SIMD_DEGREE_OR_2 > 2 - uint8_t out_array[MAX_SIMD_DEGREE_OR_2 * BLAKE3_OUT_LEN / 2]; - while (num_cvs > 2) - { - num_cvs = compress_parents_parallel_local(cv_array, num_cvs, key, flags, out_array); - std::memcpy(cv_array, out_array, num_cvs * BLAKE3_OUT_LEN); - } -#endif - - std::memcpy(out, cv_array, 2 * BLAKE3_OUT_LEN); -} - -size_t compress_subtree_wide_mt( - const uint8_t* input, - size_t input_len, - const uint32_t key[8], - uint64_t chunk_counter, - uint8_t flags, - uint8_t* out, - uint32_t threadBudget) -{ - if (input_len <= blake3_simd_degree() * BLAKE3_CHUNK_LEN) - return compress_chunks_parallel_local(input, input_len, key, chunk_counter, flags, out); - - const size_t left_input_len = left_len_local(input_len); - const size_t right_input_len = input_len - left_input_len; - const uint8_t* const right_input = &input[left_input_len]; - const uint64_t right_chunk_counter = chunk_counter + static_cast(left_input_len / BLAKE3_CHUNK_LEN); - - uint8_t cv_array[2 * MAX_SIMD_DEGREE_OR_2 * BLAKE3_OUT_LEN]; - size_t degree = blake3_simd_degree(); - if (left_input_len > BLAKE3_CHUNK_LEN && degree == 1) - degree = 2; - uint8_t* const right_cvs = &cv_array[degree * BLAKE3_OUT_LEN]; - - size_t left_n = 0; - size_t right_n = 0; - bool spawned = false; - if ( - threadBudget > 1u && - left_input_len >= ParallelSpawnMinSubtreeBytes && - right_input_len >= ParallelSpawnMinSubtreeBytes) - { - try - { - uint32_t leftBudget = threadBudget / 2u; - if (leftBudget == 0u) - leftBudget = 1u; - uint32_t rightBudget = threadBudget - leftBudget; - if (rightBudget == 0u) - rightBudget = 1u; - - auto rightFuture = std::async(std::launch::async, [right_input, right_input_len, key, right_chunk_counter, flags, right_cvs, rightBudget]() -> size_t - { - return compress_subtree_wide_mt(right_input, right_input_len, key, right_chunk_counter, flags, right_cvs, rightBudget); - }); - left_n = compress_subtree_wide_mt(input, left_input_len, key, chunk_counter, flags, cv_array, leftBudget); - right_n = rightFuture.get(); - spawned = true; - } - catch (...) - { - spawned = false; - } - } - - if (!spawned) - { - left_n = compress_subtree_wide_mt(input, left_input_len, key, chunk_counter, flags, cv_array, 1u); - right_n = compress_subtree_wide_mt(right_input, right_input_len, key, right_chunk_counter, flags, right_cvs, 1u); - } - - if (left_n == 1) - { - std::memcpy(out, cv_array, 2 * BLAKE3_OUT_LEN); - return 2; - } - - const size_t num_chaining_values = left_n + right_n; - return compress_parents_parallel_local(cv_array, num_chaining_values, key, flags, out); -} - -INLINE void hasher_merge_cv_stack_local(::blake3_hasher* self, uint64_t total_len) -{ - const size_t post_merge_stack_len = static_cast(popcnt(total_len)); - while (self->cv_stack_len > post_merge_stack_len) - { - auto* const parent_node = &self->cv_stack[(self->cv_stack_len - 2) * BLAKE3_OUT_LEN]; - const auto output = parent_output_local(parent_node, self->key, self->chunk.flags); - output_chaining_value_local(&output, parent_node); - self->cv_stack_len -= 1; - } -} - -INLINE void hasher_push_cv_local(::blake3_hasher* self, uint8_t new_cv[BLAKE3_OUT_LEN], uint64_t chunk_counter) -{ - hasher_merge_cv_stack_local(self, chunk_counter); - std::memcpy(&self->cv_stack[self->cv_stack_len * BLAKE3_OUT_LEN], new_cv, BLAKE3_OUT_LEN); - self->cv_stack_len += 1; -} - -void hasher_update_parallel(::blake3_hasher* self, const uint8_t* input_bytes, size_t input_len, uint32_t threadBudget) -{ - if (input_len == 0) - return; - - if (chunk_state_len_local(&self->chunk) > 0) - { - size_t take = BLAKE3_CHUNK_LEN - chunk_state_len_local(&self->chunk); - if (take > input_len) - take = input_len; - chunk_state_update_local(&self->chunk, input_bytes, take); - input_bytes += take; - input_len -= take; - if (input_len > 0) - { - const auto output = chunk_state_output_local(&self->chunk); - uint8_t chunk_cv[BLAKE3_OUT_LEN]; - output_chaining_value_local(&output, chunk_cv); - hasher_push_cv_local(self, chunk_cv, self->chunk.chunk_counter); - chunk_state_reset_local(&self->chunk, self->key, self->chunk.chunk_counter + 1); - } - else - { - return; - } - } - - while (input_len > BLAKE3_CHUNK_LEN) - { - size_t subtree_len = round_down_to_power_of_2(input_len); - const uint64_t count_so_far = self->chunk.chunk_counter * BLAKE3_CHUNK_LEN; - while ((((uint64_t)(subtree_len - 1)) & count_so_far) != 0) - subtree_len /= 2; - - const uint64_t subtree_chunks = subtree_len / BLAKE3_CHUNK_LEN; - if (subtree_len <= BLAKE3_CHUNK_LEN) - { - blake3_chunk_state chunk_state = {}; - chunk_state_init_local(&chunk_state, self->key, self->chunk.flags); - chunk_state.chunk_counter = self->chunk.chunk_counter; - chunk_state_update_local(&chunk_state, input_bytes, subtree_len); - const auto output = chunk_state_output_local(&chunk_state); - uint8_t cv[BLAKE3_OUT_LEN]; - output_chaining_value_local(&output, cv); - hasher_push_cv_local(self, cv, chunk_state.chunk_counter); - } - else - { - uint8_t cv_pair[2 * BLAKE3_OUT_LEN]; - compress_subtree_to_parent_node_mt( - input_bytes, - subtree_len, - self->key, - self->chunk.chunk_counter, - self->chunk.flags, - cv_pair, - threadBudget); - hasher_push_cv_local(self, cv_pair, self->chunk.chunk_counter); - hasher_push_cv_local(self, &cv_pair[BLAKE3_OUT_LEN], self->chunk.chunk_counter + (subtree_chunks / 2)); - } - self->chunk.chunk_counter += subtree_chunks; - input_bytes += subtree_len; - input_len -= subtree_len; - } - - if (input_len > 0) - { - chunk_state_update_local(&self->chunk, input_bytes, input_len); - hasher_merge_cv_stack_local(self, self->chunk.chunk_counter); - } -} - -INLINE uint32_t pick_parallel_budget(const size_t bytes) -{ - const uint32_t hw = std::thread::hardware_concurrency(); - if (hw <= 1u || bytes < ParallelMinInputBytes) - return 1u; - - const uint32_t maxBySize = static_cast(std::max(1ull, bytes / ParallelThreadGranularityBytes)); - uint32_t budget = std::min(hw, ParallelMaxThreads); - budget = std::min(budget, maxBySize); - return std::max(1u, budget); -} - -} - blake3_hasher::blake3_hasher() { ::blake3_hasher_init(&m_state); @@ -488,63 +13,48 @@ blake3_hasher::blake3_hasher() blake3_hasher& blake3_hasher::update(const void* data, const size_t bytes) { + if (bytes == 0ull) + return *this; + + assert(data != nullptr); + if (!data) + return *this; + ::blake3_hasher_update(&m_state, data, bytes); return *this; } void blake3_hasher::reset() { - ::blake3_hasher_reset(&m_state); + ::blake3_hasher_init(&m_state); } blake3_hasher::operator blake3_hash_t() const { blake3_hash_t retval = {}; - ::blake3_hasher_finalize(&m_state, retval.data, sizeof(retval)); + ::blake3_hasher stateCopy = m_state; + ::blake3_hasher_finalize(&stateCopy, retval.data, BLAKE3_OUT_LEN); return retval; } -blake3_hash_t blake3_hash_buffer(const void* data, size_t bytes) +blake3_hash_t blake3_hash_buffer_sequential(const void* data, size_t bytes) { - if (!data || bytes == 0ull) - return static_cast(blake3_hasher{}); - - uint32_t threadBudget = pick_parallel_budget(bytes); - if (threadBudget <= 1u) - { - blake3_hasher hasher; - hasher.update(data, bytes); - return static_cast(hasher); - } + if (!data && bytes != 0ull) + return {}; - SParallelCallGuard guard; - const uint32_t activeCalls = std::max(1u, guard.activeCalls()); - const uint32_t hw = std::max(1u, std::thread::hardware_concurrency()); - const uint32_t hwShare = std::max(1u, hw / activeCalls); - threadBudget = std::min(threadBudget, hwShare); - if (threadBudget <= 1u) - { - blake3_hasher hasher; - hasher.update(data, bytes); - return static_cast(hasher); - } + ::blake3_hasher hasher = {}; + ::blake3_hasher_init(&hasher); + if (bytes != 0ull) + ::blake3_hasher_update(&hasher, data, bytes); - ::blake3_hasher hasherState = {}; - ::blake3_hasher_init(&hasherState); - hasher_update_parallel(&hasherState, reinterpret_cast(data), bytes, threadBudget); blake3_hash_t retval = {}; - ::blake3_hasher_finalize(&hasherState, retval.data, sizeof(retval)); + ::blake3_hasher_finalize(&hasher, retval.data, BLAKE3_OUT_LEN); return retval; } -blake3_hash_t blake3_hash_buffer_sequential(const void* data, size_t bytes) +blake3_hash_t blake3_hash_buffer(const void* data, size_t bytes) { - if (!data || bytes == 0ull) - return static_cast(blake3_hasher{}); - - blake3_hasher hasher; - hasher.update(data, bytes); - return static_cast(hasher); + return blake3_hash_buffer_sequential(data, bytes); } } From 5b1cd7fc907c717079bca6cb7df5be34949faf08 Mon Sep 17 00:00:00 2001 From: Arkadiusz Lachowicz Date: Fri, 6 Mar 2026 13:00:44 +0100 Subject: [PATCH 046/118] Address remaining review nits --- .../nbl/asset/interchange/COBJMeshWriter.h | 4 +- include/nbl/asset/interchange/ISceneWriter.h | 29 +++++ .../asset/interchange/SLoaderRuntimeTuning.h | 101 ++++++++---------- include/nbl/system/ISystem.h | 4 +- .../asset/interchange/COBJMeshFileLoader.cpp | 8 +- .../asset/interchange/CPLYMeshFileLoader.cpp | 12 +-- .../asset/interchange/CSTLMeshFileLoader.cpp | 12 +-- .../utils/CPolygonGeometryManipulator.cpp | 12 +-- 8 files changed, 101 insertions(+), 81 deletions(-) create mode 100644 include/nbl/asset/interchange/ISceneWriter.h diff --git a/include/nbl/asset/interchange/COBJMeshWriter.h b/include/nbl/asset/interchange/COBJMeshWriter.h index b08f0dceee..a511d294af 100644 --- a/include/nbl/asset/interchange/COBJMeshWriter.h +++ b/include/nbl/asset/interchange/COBJMeshWriter.h @@ -5,14 +5,14 @@ #define _NBL_ASSET_OBJ_MESH_WRITER_H_INCLUDED_ -#include "nbl/asset/interchange/IGeometryWriter.h" +#include "nbl/asset/interchange/ISceneWriter.h" namespace nbl::asset { //! class to write OBJ mesh files -class COBJMeshWriter : public IGeometryWriter +class COBJMeshWriter : public ISceneWriter { public: COBJMeshWriter(); diff --git a/include/nbl/asset/interchange/ISceneWriter.h b/include/nbl/asset/interchange/ISceneWriter.h new file mode 100644 index 0000000000..897a592dbc --- /dev/null +++ b/include/nbl/asset/interchange/ISceneWriter.h @@ -0,0 +1,29 @@ +// Copyright (C) 2025-2025 - DevSH Graphics Programming Sp. z O.O. +// This file is part of the "Nabla Engine". +// For conditions of distribution and use, see copyright notice in nabla.h +#ifndef _NBL_ASSET_I_SCENE_WRITER_H_INCLUDED_ +#define _NBL_ASSET_I_SCENE_WRITER_H_INCLUDED_ + + +#include "nbl/core/declarations.h" + +#include "nbl/asset/ICPUScene.h" +#include "nbl/asset/interchange/IAssetWriter.h" + + +namespace nbl::asset +{ + +class ISceneWriter : public IAssetWriter +{ + public: + virtual inline uint64_t getSupportedAssetTypesBitfield() const override { return IAsset::ET_SCENE; } + + protected: + ISceneWriter() = default; + virtual ~ISceneWriter() = default; +}; + +} + +#endif diff --git a/include/nbl/asset/interchange/SLoaderRuntimeTuning.h b/include/nbl/asset/interchange/SLoaderRuntimeTuning.h index dd979708b0..d83376cccb 100644 --- a/include/nbl/asset/interchange/SLoaderRuntimeTuning.h +++ b/include/nbl/asset/interchange/SLoaderRuntimeTuning.h @@ -67,50 +67,50 @@ struct SLoaderRuntimeTuner template static void dispatchWorkers(const size_t workerCount, Fn&& fn); - static inline SLoaderRuntimeTuningResult tune(const SFileIOPolicy& ioPolicy, const SLoaderRuntimeTuningRequest& request); -}; - -constexpr uint64_t loaderRuntimeCeilDiv(const uint64_t numerator, const uint64_t denominator) -{ - return (numerator + denominator - 1ull) / denominator; -} + static constexpr uint64_t ceilDiv(const uint64_t numerator, const uint64_t denominator) + { + return (numerator + denominator - 1ull) / denominator; + } -inline uint64_t resolveLoaderRuntimeSampleBytes(const SFileIOPolicy& ioPolicy, const uint64_t knownInputBytes) -{ - if (knownInputBytes == 0ull) - return 0ull; + static inline uint64_t resolveSampleBytes(const SFileIOPolicy& ioPolicy, const uint64_t knownInputBytes) + { + if (knownInputBytes == 0ull) + return 0ull; + + const uint64_t minSampleBytes = std::max(1ull, ioPolicy.runtimeTuning.minSampleBytes); + const uint64_t maxSampleBytes = std::max(minSampleBytes, ioPolicy.runtimeTuning.maxSampleBytes); + const uint64_t cappedMin = std::min(minSampleBytes, knownInputBytes); + const uint64_t cappedMax = std::min(maxSampleBytes, knownInputBytes); + const uint64_t adaptive = std::max(knownInputBytes / 64ull, cappedMin); + return std::clamp(adaptive, cappedMin, cappedMax); + } - const uint64_t minSampleBytes = std::max(1ull, ioPolicy.runtimeTuning.minSampleBytes); - const uint64_t maxSampleBytes = std::max(minSampleBytes, ioPolicy.runtimeTuning.maxSampleBytes); - const uint64_t cappedMin = std::min(minSampleBytes, knownInputBytes); - const uint64_t cappedMax = std::min(maxSampleBytes, knownInputBytes); - const uint64_t adaptive = std::max(knownInputBytes / 64ull, cappedMin); - return std::clamp(adaptive, cappedMin, cappedMax); -} + static inline bool shouldInlineHashBuild(const SFileIOPolicy& ioPolicy, const uint64_t inputBytes) + { + const uint64_t thresholdBytes = std::max(1ull, ioPolicy.runtimeTuning.hashInlineThresholdBytes); + return inputBytes <= thresholdBytes; + } -inline bool shouldInlineHashBuild(const SFileIOPolicy& ioPolicy, const uint64_t inputBytes) -{ - const uint64_t thresholdBytes = std::max(1ull, ioPolicy.runtimeTuning.hashInlineThresholdBytes); - return inputBytes <= thresholdBytes; -} + static inline size_t resolveHardwareThreads(const uint32_t requested = 0u) + { + const size_t hw = requested ? static_cast(requested) : static_cast(std::thread::hardware_concurrency()); + return hw ? hw : 1ull; + } -inline size_t resolveLoaderHardwareThreads(const uint32_t requested = 0u) -{ - const size_t hw = requested ? static_cast(requested) : static_cast(std::thread::hardware_concurrency()); - return hw ? hw : 1ull; -} + static inline size_t resolveHardMaxWorkers(const size_t hardwareThreads, const uint32_t workerHeadroom) + { + const size_t hw = std::max(1ull, hardwareThreads); + const size_t minWorkers = hw >= 2ull ? 2ull : 1ull; + const size_t headroom = static_cast(workerHeadroom); + if (headroom == 0ull) + return hw; + if (hw <= headroom) + return minWorkers; + return std::max(minWorkers, hw - headroom); + } -inline size_t resolveLoaderHardMaxWorkers(const size_t hardwareThreads, const uint32_t workerHeadroom) -{ - const size_t hw = std::max(1ull, hardwareThreads); - const size_t minWorkers = hw >= 2ull ? 2ull : 1ull; - const size_t headroom = static_cast(workerHeadroom); - if (headroom == 0ull) - return hw; - if (hw <= headroom) - return minWorkers; - return std::max(minWorkers, hw - headroom); -} + static inline SLoaderRuntimeTuningResult tune(const SFileIOPolicy& ioPolicy, const SLoaderRuntimeTuningRequest& request); +}; template void SLoaderRuntimeTuner::dispatchWorkers(const size_t workerCount, Fn&& fn) @@ -128,12 +128,6 @@ void SLoaderRuntimeTuner::dispatchWorkers(const size_t workerCount, Fn&& fn) fn(0ull); } -template -inline void loaderRuntimeDispatchWorkers(const size_t workerCount, Fn&& fn) -{ - SLoaderRuntimeTuner::dispatchWorkers(workerCount, std::forward(fn)); -} - inline uint64_t loaderRuntimeBenchmarkSample(const uint8_t* const sampleData, const uint64_t sampleBytes, const size_t workerCount, const uint32_t passes) { if (!sampleData || sampleBytes == 0ull || workerCount == 0ull) @@ -146,7 +140,7 @@ inline uint64_t loaderRuntimeBenchmarkSample(const uint8_t* const sampleData, co for (uint32_t passIx = 0u; passIx < passCount; ++passIx) { const auto passStart = clock_t::now(); - loaderRuntimeDispatchWorkers(workerCount, [&](const size_t workerIx) + SLoaderRuntimeTuner::dispatchWorkers(workerCount, [&](const size_t workerIx) { const uint64_t begin = (sampleBytes * workerIx) / workerCount; const uint64_t end = (sampleBytes * (workerIx + 1ull)) / workerCount; @@ -233,7 +227,7 @@ SLoaderRuntimeTuningResult SLoaderRuntimeTuner::tune(const SFileIOPolicy& ioPoli return result; } - const size_t hw = resolveLoaderHardwareThreads(request.hardwareThreads); + const size_t hw = SLoaderRuntimeTuner::resolveHardwareThreads(request.hardwareThreads); size_t maxWorkers = hw; if (request.hardMaxWorkers > 0u) maxWorkers = std::min(maxWorkers, static_cast(request.hardMaxWorkers)); @@ -243,8 +237,8 @@ SLoaderRuntimeTuningResult SLoaderRuntimeTuner::tune(const SFileIOPolicy& ioPoli const uint64_t minWorkUnitsPerWorker = std::max(1ull, request.minWorkUnitsPerWorker); const uint64_t minBytesPerWorker = std::max(1ull, request.minBytesPerWorker); - const size_t maxByWork = static_cast(loaderRuntimeCeilDiv(request.totalWorkUnits, minWorkUnitsPerWorker)); - const size_t maxByBytes = request.inputBytes ? static_cast(loaderRuntimeCeilDiv(request.inputBytes, minBytesPerWorker)) : maxWorkers; + const size_t maxByWork = static_cast(SLoaderRuntimeTuner::ceilDiv(request.totalWorkUnits, minWorkUnitsPerWorker)); + const size_t maxByBytes = request.inputBytes ? static_cast(SLoaderRuntimeTuner::ceilDiv(request.inputBytes, minBytesPerWorker)) : maxWorkers; const bool heuristicEnabled = ioPolicy.runtimeTuning.mode != RTMode::Sequential; const bool hybridEnabled = ioPolicy.runtimeTuning.mode == RTMode::Hybrid; @@ -392,19 +386,14 @@ SLoaderRuntimeTuningResult SLoaderRuntimeTuner::tune(const SFileIOPolicy& ioPoli const uint64_t minChunkWorkUnits = std::max(1ull, request.minChunkWorkUnits); uint64_t maxChunkWorkUnits = std::max(minChunkWorkUnits, request.maxChunkWorkUnits); const uint64_t desiredChunkCount = static_cast(std::max(1ull, result.workerCount * targetChunksPerWorker)); - uint64_t chunkWorkUnits = loaderRuntimeCeilDiv(request.totalWorkUnits, desiredChunkCount); + uint64_t chunkWorkUnits = SLoaderRuntimeTuner::ceilDiv(request.totalWorkUnits, desiredChunkCount); chunkWorkUnits = std::clamp(chunkWorkUnits, minChunkWorkUnits, maxChunkWorkUnits); result.chunkWorkUnits = chunkWorkUnits; - result.chunkCount = static_cast(loaderRuntimeCeilDiv(request.totalWorkUnits, chunkWorkUnits)); + result.chunkCount = static_cast(SLoaderRuntimeTuner::ceilDiv(request.totalWorkUnits, chunkWorkUnits)); return result; } -inline SLoaderRuntimeTuningResult tuneLoaderRuntime(const SFileIOPolicy& ioPolicy, const SLoaderRuntimeTuningRequest& request) -{ - return SLoaderRuntimeTuner::tune(ioPolicy, request); -} - } diff --git a/include/nbl/system/ISystem.h b/include/nbl/system/ISystem.h index 0ab163b330..1b31bc9061 100644 --- a/include/nbl/system/ISystem.h +++ b/include/nbl/system/ISystem.h @@ -106,7 +106,8 @@ class NBL_API2 ISystem : public core::IReferenceCounted future_t>& future, // creation may happen on a dedicated thread, so its async path filename, // absolute path within our virtual filesystem const core::bitflag flags, // intended access flags (IMPORTANT: files from most archives wont open with ECF_WRITE bit) - // actual file flags may be downgraded when backend/archive cannot honor all requested flags (for example mapping/coherency) + // actual file flags may be downgraded when backend/archive cannot honor all requested flags + // for example a backend may open the file successfully but strip mapping/coherency when it cannot provide them const std::string_view& accessToken="" // usually password for archives, but should be SSH key for URL downloads ); @@ -169,6 +170,7 @@ class NBL_API2 ISystem : public core::IReferenceCounted { public: // each per-platform backend must override this function + // returned files may expose fewer flags than requested if the backend had to fall back virtual core::smart_refctd_ptr createFile(const std::filesystem::path& filename, const core::bitflag flags) = 0; // these contain some hoisted common sense checks diff --git a/src/nbl/asset/interchange/COBJMeshFileLoader.cpp b/src/nbl/asset/interchange/COBJMeshFileLoader.cpp index d76d92c7e0..68f5937a24 100644 --- a/src/nbl/asset/interchange/COBJMeshFileLoader.cpp +++ b/src/nbl/asset/interchange/COBJMeshFileLoader.cpp @@ -479,8 +479,8 @@ asset::SAssetBundle COBJMeshFileLoader::loadAsset(system::IFile* _file, const as int32_t normal = -1; uint32_t outIndex = 0u; }; - const size_t hw = resolveLoaderHardwareThreads(); - const size_t hardMaxWorkers = resolveLoaderHardMaxWorkers(hw, _params.ioPolicy.runtimeTuning.workerHeadroom); + const size_t hw = SLoaderRuntimeTuner::resolveHardwareThreads(); + const size_t hardMaxWorkers = SLoaderRuntimeTuner::resolveHardMaxWorkers(hw, _params.ioPolicy.runtimeTuning.workerHeadroom); SLoaderRuntimeTuningRequest dedupTuningRequest = {}; dedupTuningRequest.inputBytes = static_cast(filesize); dedupTuningRequest.totalWorkUnits = estimatedOutVertexCount; @@ -488,8 +488,8 @@ asset::SAssetBundle COBJMeshFileLoader::loadAsset(system::IFile* _file, const as dedupTuningRequest.hardMaxWorkers = static_cast(hardMaxWorkers); dedupTuningRequest.targetChunksPerWorker = _params.ioPolicy.runtimeTuning.targetChunksPerWorker; dedupTuningRequest.sampleData = reinterpret_cast(buf); - dedupTuningRequest.sampleBytes = resolveLoaderRuntimeSampleBytes(_params.ioPolicy, static_cast(filesize)); - const auto dedupTuning = tuneLoaderRuntime(_params.ioPolicy, dedupTuningRequest); + dedupTuningRequest.sampleBytes = SLoaderRuntimeTuner::resolveSampleBytes(_params.ioPolicy, static_cast(filesize)); + const auto dedupTuning = SLoaderRuntimeTuner::tune(_params.ioPolicy, dedupTuningRequest); const size_t dedupHotSeed = std::max( 16ull, estimatedOutVertexCount / std::max(1ull, dedupTuning.workerCount * 8ull)); diff --git a/src/nbl/asset/interchange/CPLYMeshFileLoader.cpp b/src/nbl/asset/interchange/CPLYMeshFileLoader.cpp index 6b41a259f3..709d7f3849 100644 --- a/src/nbl/asset/interchange/CPLYMeshFileLoader.cpp +++ b/src/nbl/asset/interchange/CPLYMeshFileLoader.cpp @@ -1027,8 +1027,8 @@ struct SContext bool fallbackToGeneric = false; if (is32Bit) { - const size_t hw = resolveLoaderHardwareThreads(); - const size_t hardMaxWorkers = resolveLoaderHardMaxWorkers(hw, inner.params.ioPolicy.runtimeTuning.workerHeadroom); + const size_t hw = SLoaderRuntimeTuner::resolveHardwareThreads(); + const size_t hardMaxWorkers = SLoaderRuntimeTuner::resolveHardMaxWorkers(hw, inner.params.ioPolicy.runtimeTuning.workerHeadroom); const size_t recordBytes = sizeof(uint8_t) + 3ull * sizeof(uint32_t); SLoaderRuntimeTuningRequest faceTuningRequest = {}; faceTuningRequest.inputBytes = minBytesNeeded; @@ -1038,8 +1038,8 @@ struct SContext faceTuningRequest.hardMaxWorkers = static_cast(hardMaxWorkers); faceTuningRequest.targetChunksPerWorker = inner.params.ioPolicy.runtimeTuning.targetChunksPerWorker; faceTuningRequest.sampleData = ptr; - faceTuningRequest.sampleBytes = resolveLoaderRuntimeSampleBytes(inner.params.ioPolicy, minBytesNeeded); - const auto faceTuning = tuneLoaderRuntime(inner.params.ioPolicy, faceTuningRequest); + faceTuningRequest.sampleBytes = SLoaderRuntimeTuner::resolveSampleBytes(inner.params.ioPolicy, minBytesNeeded); + const auto faceTuning = SLoaderRuntimeTuner::tune(inner.params.ioPolicy, faceTuningRequest); size_t workerCount = std::min(faceTuning.workerCount, element.Count); if (workerCount > 1ull) { @@ -1142,7 +1142,7 @@ struct SContext ready.notify_one(); } }; - loaderRuntimeDispatchWorkers(workerCount, [&](const size_t workerIx) { const size_t begin = (element.Count * workerIx) / workerCount; const size_t end = (element.Count * (workerIx + 1ull)) / workerCount; parseChunk(workerIx, begin, end); }); + SLoaderRuntimeTuner::dispatchWorkers(workerCount, [&](const size_t workerIx) { const size_t begin = (element.Count * workerIx) / workerCount; const size_t end = (element.Count * (workerIx + 1ull)) / workerCount; parseChunk(workerIx, begin, end); }); if (hashThread.joinable()) hashThread.join(); @@ -1534,7 +1534,7 @@ SAssetBundle CPLYMeshFileLoader::loadAsset(system::IFile* _file, const IAssetLoa uint32_t maxIndexRead = 0u; core::blake3_hash_t precomputedIndexHash = IPreHashed::INVALID_HASH; const uint64_t fileSize = _file->getSize(); - const bool hashInBuild = computeContentHashes && shouldInlineHashBuild(_params.ioPolicy, fileSize); + const bool hashInBuild = computeContentHashes && SLoaderRuntimeTuner::shouldInlineHashBuild(_params.ioPolicy, fileSize); const bool fileMappable = core::bitflag(_file->getFlags()).hasAnyFlag(system::IFile::ECF_MAPPABLE); const auto ioPlan = resolveFileIOPolicy(_params.ioPolicy, fileSize, true, fileMappable); if (!ioPlan.isValid()) diff --git a/src/nbl/asset/interchange/CSTLMeshFileLoader.cpp b/src/nbl/asset/interchange/CSTLMeshFileLoader.cpp index 0e3f11265e..075502d283 100644 --- a/src/nbl/asset/interchange/CSTLMeshFileLoader.cpp +++ b/src/nbl/asset/interchange/CSTLMeshFileLoader.cpp @@ -373,8 +373,8 @@ SAssetBundle CSTLMeshFileLoader::loadAsset(system::IFile* _file, const IAssetLoa return {}; core::vector faceColors(static_cast(triangleCount), 0u); std::atomic_bool colorValidForAllFaces = true; - const size_t hw = resolveLoaderHardwareThreads(); - const size_t hardMaxWorkers = resolveLoaderHardMaxWorkers(hw, _params.ioPolicy.runtimeTuning.workerHeadroom); + const size_t hw = SLoaderRuntimeTuner::resolveHardwareThreads(); + const size_t hardMaxWorkers = SLoaderRuntimeTuner::resolveHardMaxWorkers(hw, _params.ioPolicy.runtimeTuning.workerHeadroom); SLoaderRuntimeTuningRequest parseTuningRequest = {}; parseTuningRequest.inputBytes = dataSize; parseTuningRequest.totalWorkUnits = triangleCount; @@ -385,8 +385,8 @@ SAssetBundle CSTLMeshFileLoader::loadAsset(system::IFile* _file, const IAssetLoa parseTuningRequest.minChunkWorkUnits = 1ull; parseTuningRequest.maxChunkWorkUnits = std::max(1ull, triangleCount); parseTuningRequest.sampleData = payloadData; - parseTuningRequest.sampleBytes = resolveLoaderRuntimeSampleBytes(_params.ioPolicy, dataSize); - const auto parseTuning = tuneLoaderRuntime(_params.ioPolicy, parseTuningRequest); + parseTuningRequest.sampleBytes = SLoaderRuntimeTuner::resolveSampleBytes(_params.ioPolicy, dataSize); + const auto parseTuning = SLoaderRuntimeTuner::tune(_params.ioPolicy, parseTuningRequest); const size_t workerCount = std::max(1ull, std::min(parseTuning.workerCount, static_cast(std::max(1ull, triangleCount)))); static constexpr bool ComputeAABBInParse = true; struct SThreadAABB @@ -401,7 +401,7 @@ SAssetBundle CSTLMeshFileLoader::loadAsset(system::IFile* _file, const IAssetLoa }; std::vector threadAABBs(ComputeAABBInParse ? workerCount : 0ull); const uint64_t parseChunkTriangles = std::max(1ull, parseTuning.chunkWorkUnits); - const size_t parseChunkCount = static_cast(loaderRuntimeCeilDiv(triangleCount, parseChunkTriangles)); + const size_t parseChunkCount = static_cast(SLoaderRuntimeTuner::ceilDiv(triangleCount, parseChunkTriangles)); const bool hashInParsePipeline = computeContentHashes; std::vector hashChunkReady(hashInParsePipeline ? parseChunkCount : 0ull, 0u); std::atomic_bool hashPipelineOk = true; @@ -616,7 +616,7 @@ SAssetBundle CSTLMeshFileLoader::loadAsset(system::IFile* _file, const IAssetLoa if constexpr (ComputeAABBInParse) threadAABBs[workerIx] = localAABB; }; - loaderRuntimeDispatchWorkers(workerCount, parseWorker); + SLoaderRuntimeTuner::dispatchWorkers(workerCount, parseWorker); if (positionHashThread.joinable()) positionHashThread.join(); if (normalHashThread.joinable()) diff --git a/src/nbl/asset/utils/CPolygonGeometryManipulator.cpp b/src/nbl/asset/utils/CPolygonGeometryManipulator.cpp index ab64908e83..afe0fca59c 100644 --- a/src/nbl/asset/utils/CPolygonGeometryManipulator.cpp +++ b/src/nbl/asset/utils/CPolygonGeometryManipulator.cpp @@ -97,7 +97,7 @@ void CPolygonGeometryManipulator::computeContentHashesParallel(ICPUPolygonGeomet return; } - const size_t hw = resolveLoaderHardwareThreads(); + const size_t hw = SLoaderRuntimeTuner::resolveHardwareThreads(); const uint8_t* hashSampleData = nullptr; uint64_t hashSampleBytes = 0ull; for (const auto pendingIx : pending) @@ -107,7 +107,7 @@ void CPolygonGeometryManipulator::computeContentHashesParallel(ICPUPolygonGeomet if (!ptr) continue; hashSampleData = ptr; - hashSampleBytes = resolveLoaderRuntimeSampleBytes(ioPolicy, static_cast(buffer->getSize())); + hashSampleBytes = SLoaderRuntimeTuner::resolveSampleBytes(ioPolicy, static_cast(buffer->getSize())); if (hashSampleBytes > 0ull) break; } @@ -115,19 +115,19 @@ void CPolygonGeometryManipulator::computeContentHashesParallel(ICPUPolygonGeomet SLoaderRuntimeTuningRequest tuningRequest = {}; tuningRequest.inputBytes = totalBytes; tuningRequest.totalWorkUnits = pending.size(); - tuningRequest.minBytesPerWorker = std::max(1ull, loaderRuntimeCeilDiv(totalBytes, static_cast(pending.size()))); + tuningRequest.minBytesPerWorker = std::max(1ull, SLoaderRuntimeTuner::ceilDiv(totalBytes, static_cast(pending.size()))); tuningRequest.hardwareThreads = static_cast(hw); - const size_t hardMaxWorkers = resolveLoaderHardMaxWorkers(hw, ioPolicy.runtimeTuning.workerHeadroom); + const size_t hardMaxWorkers = SLoaderRuntimeTuner::resolveHardMaxWorkers(hw, ioPolicy.runtimeTuning.workerHeadroom); tuningRequest.hardMaxWorkers = static_cast(std::min(pending.size(), hardMaxWorkers)); tuningRequest.targetChunksPerWorker = ioPolicy.runtimeTuning.hashTaskTargetChunksPerWorker; tuningRequest.sampleData = hashSampleData; tuningRequest.sampleBytes = hashSampleBytes; - const auto tuning = tuneLoaderRuntime(ioPolicy, tuningRequest); + const auto tuning = SLoaderRuntimeTuner::tune(ioPolicy, tuningRequest); const size_t workerCount = std::min(tuning.workerCount, pending.size()); if (workerCount > 1ull) { - loaderRuntimeDispatchWorkers(workerCount, [&](const size_t workerIx) + SLoaderRuntimeTuner::dispatchWorkers(workerCount, [&](const size_t workerIx) { const size_t beginIx = (pending.size() * workerIx) / workerCount; const size_t endIx = (pending.size() * (workerIx + 1ull)) / workerCount; From f1a0721bffc423a7072ee885729e0d23a897af83 Mon Sep 17 00:00:00 2001 From: Arkadiusz Lachowicz Date: Fri, 6 Mar 2026 14:31:53 +0100 Subject: [PATCH 047/118] Finish SIMD cleanup in loaders --- src/nbl/asset/interchange/CGLTFLoader.cpp | 24 +++++++++++------------ 1 file changed, 12 insertions(+), 12 deletions(-) diff --git a/src/nbl/asset/interchange/CGLTFLoader.cpp b/src/nbl/asset/interchange/CGLTFLoader.cpp index 01ca108331..c7c4be034b 100644 --- a/src/nbl/asset/interchange/CGLTFLoader.cpp +++ b/src/nbl/asset/interchange/CGLTFLoader.cpp @@ -1274,26 +1274,26 @@ using namespace nbl::asset; auto* packedJointsData = reinterpret_cast(reinterpret_cast(vOverrideRepackedJointsBuffer->getPointer()) + vAttributeIx * repackJointsTexelByteSize); auto* packedWeightsData = reinterpret_cast(reinterpret_cast(vOverrideRepackedWeightsBuffer->getPointer()) + vAttributeIx * repackWeightsTexelByteSize); - auto quantize = [&](const core::vectorSIMDf& input, void* data, const E_FORMAT requestQuantizeFormat) + auto quantize = [&](const hlsl::float32_t4& input, void* data, const E_FORMAT requestQuantizeFormat) { - return ICPUMeshBuffer::setAttribute(input, data, requestQuantizeFormat); + return ICPUMeshBuffer::setAttribute(&input[0], data, requestQuantizeFormat); }; auto decodeQuant = [&](void* data, const E_FORMAT requestQuantizeFormat) { - core::vectorSIMDf out; - ICPUMeshBuffer::getAttribute(out, data, requestQuantizeFormat); + hlsl::float32_t4 out = {}; + ICPUMeshBuffer::getAttribute(&out[0], data, requestQuantizeFormat); return out; }; - core::vectorSIMDf packedWeightsStream; //! always go with full vectorSIMDf stream, weights being not used are leaved with default vector's compoment value and are not considered + hlsl::float32_t4 packedWeightsStream = {}; //! always go with full float4 stream, weights being not used are leaved with default vector's compoment value and are not considered for (uint16_t i = 0, vxSkinComponentOffset = 0; i < 4u; ++i) //! packing { if (unpackedWeightsData[i]) { packedJointsData[vxSkinComponentOffset] = unpackedJointsData[i]; - packedWeightsStream.pointer[i] = packedWeightsData[vxSkinComponentOffset] = unpackedWeightsData[i]; + packedWeightsStream[i] = packedWeightsData[vxSkinComponentOffset] = unpackedWeightsData[i]; ++vxSkinComponentOffset; assert(vxSkinComponentOffset <= maxJointsPerVertex); @@ -1309,14 +1309,14 @@ using namespace nbl::asset; const E_FORMAT requestQuantFormat = std::get(encode); quantize(packedWeightsStream, quantBuffer, requestQuantFormat); - core::vectorSIMDf quantsDecoded = decodeQuant(quantBuffer, requestQuantFormat); + hlsl::float32_t4 quantsDecoded = decodeQuant(quantBuffer, requestQuantFormat); for (uint16_t i = 0; i < MAX_INFLUENCE_WEIGHTS_PER_VERTEX; ++i) { - const auto& weightInput = packedWeightsStream.pointer[i]; + const auto weightInput = packedWeightsStream[i]; if (weightInput) { - const typename QuantRequest::ERROR_TYPE& errorComponent = errorBuffer[i] = core::abs(quantsDecoded.pointer[i] - weightInput); + const typename QuantRequest::ERROR_TYPE& errorComponent = errorBuffer[i] = core::abs(quantsDecoded[i] - weightInput); if (errorComponent) { @@ -1420,13 +1420,13 @@ using namespace nbl::asset; const size_t quantizedVWeightsOffset = vAttributeIx * weightComponentsByteStride; void* quantizedWeightsData = reinterpret_cast(vOverrideQuantizedWeightsBuffer->getPointer()) + quantizedVWeightsOffset; - core::vectorSIMDf packedWeightsStream; //! always go with full vectorSIMDf stream, weights being not used are leaved with default vector's compoment value and are not considered + hlsl::float32_t4 packedWeightsStream = {}; //! always go with full float4 stream, weights being not used are leaved with default vector's compoment value and are not considered auto* packedWeightsData = reinterpret_cast(reinterpret_cast(vOverrideRepackedWeightsBuffer->getPointer()) + vAttributeIx * repackWeightsTexelByteSize); for (uint16_t i = 0; i < maxJointsPerVertex; ++i) - packedWeightsStream.pointer[i] = packedWeightsData[i]; + packedWeightsStream[i] = packedWeightsData[i]; - ICPUMeshBuffer::setAttribute(packedWeightsStream, quantizedWeightsData, weightsQuantizeFormat); //! quantize + ICPUMeshBuffer::setAttribute(&packedWeightsStream[0], quantizedWeightsData, weightsQuantizeFormat); //! quantize } } From 2ae072ad2bc0238cc112c0d97678081efb1dcbf5 Mon Sep 17 00:00:00 2001 From: Arkadiusz Lachowicz Date: Fri, 6 Mar 2026 16:57:24 +0100 Subject: [PATCH 048/118] Loader updates --- include/nbl/asset/interchange/SFileIOPolicy.h | 149 +++++++++--------- .../interchange/SGeometryContentHashCommon.h | 18 +-- .../asset/interchange/SGeometryWriterCommon.h | 38 +---- .../asset/utils/CPolygonGeometryManipulator.h | 7 + src/nbl/CMakeLists.txt | 1 + .../asset/interchange/COBJMeshFileLoader.cpp | 80 ++-------- src/nbl/asset/interchange/COBJMeshWriter.cpp | 26 +-- .../asset/interchange/CPLYMeshFileLoader.cpp | 4 +- src/nbl/asset/interchange/CPLYMeshWriter.cpp | 33 ++-- .../asset/interchange/CSTLMeshFileLoader.cpp | 4 +- src/nbl/asset/interchange/CSTLMeshWriter.cpp | 14 +- .../interchange/SGeometryWriterCommon.cpp | 63 ++++++++ .../utils/CPolygonGeometryManipulator.cpp | 79 ++++++++++ 13 files changed, 291 insertions(+), 225 deletions(-) create mode 100644 src/nbl/asset/interchange/SGeometryWriterCommon.cpp diff --git a/include/nbl/asset/interchange/SFileIOPolicy.h b/include/nbl/asset/interchange/SFileIOPolicy.h index fc1203395c..4afc51b3b0 100644 --- a/include/nbl/asset/interchange/SFileIOPolicy.h +++ b/include/nbl/asset/interchange/SFileIOPolicy.h @@ -10,12 +10,44 @@ #include #include +#include #include namespace nbl::asset { +namespace impl +{ +inline constexpr bool hasSingleBit(const uint64_t value) +{ + return value && ((value & (value - 1u)) == 0u); +} + +inline constexpr uint8_t bytesToLog2(uint64_t value) +{ + uint8_t result = 0u; + while (value > 1u) + { + value >>= 1u; + ++result; + } + return result; +} +} + +enum class EFileIOStrategy : uint8_t +{ + // Sentinel used when strategy resolution fails or the value is uninitialized. + Invalid = 0u, + // Pick whole-file or chunked dynamically based on file size and policy limits. + Auto, + // Force whole-file strategy. May fallback when not feasible unless strict=true. + WholeFile, + // Force chunked strategy. + Chunked +}; + struct SFileIOPolicy { struct SRuntimeTuning @@ -71,16 +103,7 @@ struct SFileIOPolicy uint64_t tinyIoMinCallCount = 1024ull; }; - // File IO strategy selection mode. - enum class Strategy : uint8_t - { - // Pick whole-file or chunked dynamically based on file size and policy limits. - Auto, - // Force whole-file strategy. May fallback when not feasible unless strict=true. - WholeFile, - // Force chunked strategy. - Chunked - }; + using Strategy = EFileIOStrategy; enum E_FLAGS : uint8_t { @@ -88,8 +111,17 @@ struct SFileIOPolicy EF_STRICT_BIT = 1u << 0u }; - static inline constexpr uint8_t MIN_CHUNK_SIZE_LOG2 = 16u; - static inline constexpr uint8_t MAX_BYTE_SIZE_LOG2 = 63u; + static inline constexpr uint64_t MIN_CHUNK_SIZE_BYTES = 64ull << 10u; + static inline constexpr uint8_t MIN_CHUNK_SIZE_LOG2 = impl::bytesToLog2(MIN_CHUNK_SIZE_BYTES); + static inline constexpr uint8_t MAX_BYTE_SIZE_LOG2 = std::numeric_limits::digits - 1u; + static inline constexpr uint64_t DEFAULT_WHOLE_FILE_THRESHOLD_BYTES = 64ull << 20u; + static inline constexpr uint64_t DEFAULT_CHUNK_SIZE_BYTES = 4ull << 20u; + static inline constexpr uint64_t DEFAULT_MAX_STAGING_BYTES = 256ull << 20u; + + static_assert(impl::hasSingleBit(MIN_CHUNK_SIZE_BYTES)); + static_assert(impl::hasSingleBit(DEFAULT_WHOLE_FILE_THRESHOLD_BYTES)); + static_assert(impl::hasSingleBit(DEFAULT_CHUNK_SIZE_BYTES)); + static_assert(impl::hasSingleBit(DEFAULT_MAX_STAGING_BYTES)); static inline constexpr uint8_t clampBytesLog2(const uint8_t value, const uint8_t minValue = 0u) { @@ -106,30 +138,30 @@ struct SFileIOPolicy // Resolution flags. core::bitflag flags = EF_NONE; // Maximum payload size allowed for whole-file strategy in auto mode. - uint8_t wholeFileThresholdLog2 = 26u; // 64 MiB + uint8_t wholeFileThresholdLog2 = impl::bytesToLog2(DEFAULT_WHOLE_FILE_THRESHOLD_BYTES); // Chunk size used by chunked strategy encoded as log2(bytes). - uint8_t chunkSizeLog2 = 22u; // 4 MiB + uint8_t chunkSizeLog2 = impl::bytesToLog2(DEFAULT_CHUNK_SIZE_BYTES); // Maximum staging allocation for whole-file strategy encoded as log2(bytes). - uint8_t maxStagingLog2 = 28u; // 256 MiB + uint8_t maxStagingLog2 = impl::bytesToLog2(DEFAULT_MAX_STAGING_BYTES); // Runtime tuning controls used by loaders and hash stages. SRuntimeTuning runtimeTuning = {}; - inline bool strict() const + inline constexpr bool strict() const { return flags.hasAnyFlag(EF_STRICT_BIT); } - inline uint64_t wholeFileThresholdBytes() const + inline constexpr uint64_t wholeFileThresholdBytes() const { return bytesFromLog2(wholeFileThresholdLog2, MIN_CHUNK_SIZE_LOG2); } - inline uint64_t chunkSizeBytes() const + inline constexpr uint64_t chunkSizeBytes() const { return bytesFromLog2(chunkSizeLog2, MIN_CHUNK_SIZE_LOG2); } - inline uint64_t maxStagingBytes() const + inline constexpr uint64_t maxStagingBytes() const { return bytesFromLog2(maxStagingLog2, MIN_CHUNK_SIZE_LOG2); } @@ -137,16 +169,10 @@ struct SFileIOPolicy struct SResolvedFileIOPolicy { - // Strategy selected after resolving SFileIOPolicy against runtime constraints. - enum class Strategy : uint8_t - { - Invalid = 0u, - WholeFile, - Chunked - }; + using Strategy = EFileIOStrategy; - SResolvedFileIOPolicy() = default; - inline SResolvedFileIOPolicy(const SFileIOPolicy& policy, const uint64_t byteCount, const bool sizeKnown = true, const bool fileMappable = false) : + constexpr SResolvedFileIOPolicy() = default; + inline constexpr SResolvedFileIOPolicy(const SFileIOPolicy& policy, const uint64_t byteCount, const bool sizeKnown = true, const bool fileMappable = false) : SResolvedFileIOPolicy(resolve(policy, byteCount, sizeKnown, fileMappable)) { } @@ -158,17 +184,17 @@ struct SResolvedFileIOPolicy // Human-readable resolver reason used in logs and diagnostics. const char* reason = "invalid"; - inline bool isValid() const + inline constexpr bool isValid() const { return strategy != Strategy::Invalid; } - inline uint64_t chunkSizeBytes() const + inline constexpr uint64_t chunkSizeBytes() const { return SFileIOPolicy::bytesFromLog2(chunkSizeLog2, SFileIOPolicy::MIN_CHUNK_SIZE_LOG2); } - static inline SResolvedFileIOPolicy resolve(const SFileIOPolicy& policy, const uint64_t byteCount, const bool sizeKnown = true, const bool fileMappable = false) + static inline constexpr SResolvedFileIOPolicy resolve(const SFileIOPolicy& policy, const uint64_t byteCount, const bool sizeKnown = true, const bool fileMappable = false) { const uint8_t maxStagingLog2 = SFileIOPolicy::clampBytesLog2(policy.maxStagingLog2, SFileIOPolicy::MIN_CHUNK_SIZE_LOG2); const uint8_t chunkSizeLog2 = std::min( @@ -188,6 +214,8 @@ struct SResolvedFileIOPolicy switch (policy.strategy) { + case SFileIOPolicy::Strategy::Invalid: + return makeResolved(Strategy::Invalid, "invalid_requested_strategy"); case SFileIOPolicy::Strategy::WholeFile: { if (fileMappable || (sizeKnown && byteCount <= maxStaging)) @@ -215,60 +243,33 @@ struct SResolvedFileIOPolicy } }; -inline SResolvedFileIOPolicy resolveFileIOPolicy(const SFileIOPolicy& policy, const uint64_t byteCount, const bool sizeKnown = true, const bool fileMappable = false) +inline constexpr SResolvedFileIOPolicy resolveFileIOPolicy(const SFileIOPolicy& policy, const uint64_t byteCount, const bool sizeKnown = true, const bool fileMappable = false) { return SResolvedFileIOPolicy(policy, byteCount, sizeKnown, fileMappable); } -inline const char* toString(const SFileIOPolicy::Strategy value) -{ - switch (value) - { - case SFileIOPolicy::Strategy::Auto: - return "auto"; - case SFileIOPolicy::Strategy::WholeFile: - return "whole"; - case SFileIOPolicy::Strategy::Chunked: - return "chunked"; - default: - return "unknown"; - } -} - -inline const char* toString(const SResolvedFileIOPolicy::Strategy value) -{ - switch (value) - { - case SResolvedFileIOPolicy::Strategy::Invalid: - return "invalid"; - case SResolvedFileIOPolicy::Strategy::WholeFile: - return "whole"; - case SResolvedFileIOPolicy::Strategy::Chunked: - return "chunked"; - default: - return "unknown"; - } -} - } namespace nbl::system::impl { template<> -struct to_string_helper +struct to_string_helper { - static inline std::string __call(const asset::SFileIOPolicy::Strategy value) + static inline std::string __call(const asset::EFileIOStrategy value) { - return asset::toString(value); - } -}; - -template<> -struct to_string_helper -{ - static inline std::string __call(const asset::SResolvedFileIOPolicy::Strategy value) - { - return asset::toString(value); + switch (value) + { + case asset::EFileIOStrategy::Invalid: + return "invalid"; + case asset::EFileIOStrategy::Auto: + return "auto"; + case asset::EFileIOStrategy::WholeFile: + return "whole"; + case asset::EFileIOStrategy::Chunked: + return "chunked"; + default: + return "unknown"; + } } }; } diff --git a/include/nbl/asset/interchange/SGeometryContentHashCommon.h b/include/nbl/asset/interchange/SGeometryContentHashCommon.h index 22c1c845c2..f25f8fede4 100644 --- a/include/nbl/asset/interchange/SGeometryContentHashCommon.h +++ b/include/nbl/asset/interchange/SGeometryContentHashCommon.h @@ -34,36 +34,36 @@ class SPolygonGeometryContentHash buffer->setContentHash(IPreHashed::INVALID_HASH); } - static inline core::blake3_hash_t getHash(const ICPUPolygonGeometry* geometry) + static inline core::blake3_hash_t computeHash(const ICPUPolygonGeometry* geometry) { if (!geometry) return IPreHashed::INVALID_HASH; - core::blake3_hasher hasher; + core::blake3_hasher hashBuilder = {}; if (const auto* indexing = geometry->getIndexingCallback(); indexing) { - hasher << indexing->degree(); - hasher << indexing->rate(); - hasher << indexing->knownTopology(); + hashBuilder << indexing->degree(); + hashBuilder << indexing->rate(); + hashBuilder << indexing->knownTopology(); } core::vector> buffers; collectBuffers(geometry, buffers); for (const auto& buffer : buffers) - hasher << (buffer ? buffer->getContentHash() : IPreHashed::INVALID_HASH); - return static_cast(hasher); + hashBuilder << (buffer ? buffer->getContentHash() : IPreHashed::INVALID_HASH); + return static_cast(hashBuilder); } static inline core::blake3_hash_t computeMissing(ICPUPolygonGeometry* geometry, const SFileIOPolicy& ioPolicy) { CPolygonGeometryManipulator::computeMissingContentHashesParallel(geometry, ioPolicy); - return getHash(geometry); + return computeHash(geometry); } static inline core::blake3_hash_t recompute(ICPUPolygonGeometry* geometry, const SFileIOPolicy& ioPolicy) { CPolygonGeometryManipulator::recomputeContentHashesParallel(geometry, ioPolicy); - return getHash(geometry); + return computeHash(geometry); } }; diff --git a/include/nbl/asset/interchange/SGeometryWriterCommon.h b/include/nbl/asset/interchange/SGeometryWriterCommon.h index bf969fb8c7..5497a892e1 100644 --- a/include/nbl/asset/interchange/SGeometryWriterCommon.h +++ b/include/nbl/asset/interchange/SGeometryWriterCommon.h @@ -10,7 +10,6 @@ #include #include -#include #include @@ -138,43 +137,20 @@ class SGeometryWriterCommon return true; } - static inline const hlsl::float32_t3* getTightFloat3View(const ICPUPolygonGeometry::SDataView& view) + template + static inline const T* getTightView(const ICPUPolygonGeometry::SDataView& view) { if (!view) return nullptr; - if (view.composed.format != EF_R32G32B32_SFLOAT) + if (view.composed.format != ExpectedFormat) return nullptr; - if (view.composed.getStride() != sizeof(hlsl::float32_t3)) + if (view.composed.getStride() != sizeof(T)) return nullptr; - return reinterpret_cast(view.getPointer()); + return reinterpret_cast(view.getPointer()); } - static inline const hlsl::float32_t2* getTightFloat2View(const ICPUPolygonGeometry::SDataView& view) - { - if (!view) - return nullptr; - if (view.composed.format != EF_R32G32_SFLOAT) - return nullptr; - if (view.composed.getStride() != sizeof(hlsl::float32_t2)) - return nullptr; - return reinterpret_cast(view.getPointer()); - } - - static inline char* appendFloatFixed6ToBuffer(char* dst, char* const end, const float value) - { - if (!dst || dst >= end) - return end; - - const auto result = std::to_chars(dst, end, value, std::chars_format::fixed, 6); - if (result.ec == std::errc()) - return result.ptr; - - const int written = std::snprintf(dst, static_cast(end - dst), "%.6f", static_cast(value)); - if (written <= 0) - return dst; - const size_t writeLen = static_cast(written); - return (writeLen < static_cast(end - dst)) ? (dst + writeLen) : end; - } + static char* appendFloatToBuffer(char* dst, char* end, float value); + static char* appendFloatToBuffer(char* dst, char* end, double value); static inline char* appendUIntToBuffer(char* dst, char* const end, const uint32_t value) { diff --git a/include/nbl/asset/utils/CPolygonGeometryManipulator.h b/include/nbl/asset/utils/CPolygonGeometryManipulator.h index 9e1548a5bf..1490725a9a 100644 --- a/include/nbl/asset/utils/CPolygonGeometryManipulator.h +++ b/include/nbl/asset/utils/CPolygonGeometryManipulator.h @@ -44,6 +44,13 @@ class NBL_API2 CPolygonGeometryManipulator recomputeContentHashesParallel(geo, SFileIOPolicy{}); } + static bool generateMissingSmoothNormals( + core::vector& normals, + const core::vector& positions, + const core::vector& indices, + const core::vector& normalNeedsGeneration + ); + // static inline void recomputeRanges(ICPUPolygonGeometry* geo, const bool deduceRangeFormats=true) { diff --git a/src/nbl/CMakeLists.txt b/src/nbl/CMakeLists.txt index 4f0852c687..7746e20271 100644 --- a/src/nbl/CMakeLists.txt +++ b/src/nbl/CMakeLists.txt @@ -200,6 +200,7 @@ set(NBL_ASSET_SOURCES asset/interchange/CGLTFLoader.cpp # Mesh writers + asset/interchange/SGeometryWriterCommon.cpp asset/interchange/COBJMeshWriter.cpp asset/interchange/CPLYMeshWriter.cpp asset/interchange/CSTLMeshWriter.cpp diff --git a/src/nbl/asset/interchange/COBJMeshFileLoader.cpp b/src/nbl/asset/interchange/COBJMeshFileLoader.cpp index 68f5937a24..d6119d4d53 100644 --- a/src/nbl/asset/interchange/COBJMeshFileLoader.cpp +++ b/src/nbl/asset/interchange/COBJMeshFileLoader.cpp @@ -564,71 +564,13 @@ asset::SAssetBundle COBJMeshFileLoader::loadAsset(system::IFile* _file, const as if (needsNormalGeneration) { - core::vector generatedNormals(outVertexWriteCount, Float3(0.f, 0.f, 0.f)); - const size_t triangleCount = indices.size() / 3ull; - for (size_t triIx = 0ull; triIx < triangleCount; ++triIx) - { - const uint32_t i0 = indices[triIx * 3ull + 0ull]; - const uint32_t i1 = indices[triIx * 3ull + 1ull]; - const uint32_t i2 = indices[triIx * 3ull + 2ull]; - if (i0 >= outVertexWriteCount || i1 >= outVertexWriteCount || i2 >= outVertexWriteCount) - continue; - - const auto& p0 = outPositions[static_cast(i0)]; - const auto& p1 = outPositions[static_cast(i1)]; - const auto& p2 = outPositions[static_cast(i2)]; - - const float e10x = p1.x - p0.x; - const float e10y = p1.y - p0.y; - const float e10z = p1.z - p0.z; - const float e20x = p2.x - p0.x; - const float e20y = p2.y - p0.y; - const float e20z = p2.z - p0.z; - - const Float3 faceNormal( - e10y * e20z - e10z * e20y, - e10z * e20x - e10x * e20z, - e10x * e20y - e10y * e20x); - - const float faceLenSq = faceNormal.x * faceNormal.x + faceNormal.y * faceNormal.y + faceNormal.z * faceNormal.z; - if (faceLenSq <= 1e-20f) - continue; - - const auto accumulateIfNeeded = [&](const uint32_t vertexIx)->void - { - if (outNormalNeedsGeneration[static_cast(vertexIx)] == 0u) - return; - auto& dstNormal = generatedNormals[static_cast(vertexIx)]; - dstNormal.x += faceNormal.x; - dstNormal.y += faceNormal.y; - dstNormal.z += faceNormal.z; - }; - - accumulateIfNeeded(i0); - accumulateIfNeeded(i1); - accumulateIfNeeded(i2); - } - - for (size_t i = 0ull; i < outVertexWriteCount; ++i) - { - if (outNormalNeedsGeneration[i] == 0u) - continue; - - auto normal = generatedNormals[i]; - const float lenSq = normal.x * normal.x + normal.y * normal.y + normal.z * normal.z; - if (lenSq > 1e-20f) - { - const float invLen = 1.f / std::sqrt(lenSq); - normal.x *= invLen; - normal.y *= invLen; - normal.z *= invLen; - } - else - { - normal = Float3(0.f, 0.f, 1.f); - } - outNormals[i] = normal; - } + // OBJ smoothing groups are already encoded in the parser-side vertex split + // corners that must stay sharp become different output vertices even if they share position. + // This helper works on that final indexed output and fills only normals missing in the source. + // `createSmoothVertexNormal` is still not enough here even with indexed-view support, + // because it would also need a "missing only" mode and proper OBJ smoothing-group handling. + if (!CPolygonGeometryManipulator::generateMissingSmoothNormals(outNormals, outPositions, indices, outNormalNeedsGeneration)) + return false; } const size_t outVertexCount = outPositions.size(); @@ -1134,8 +1076,8 @@ asset::SAssetBundle COBJMeshFileLoader::loadAsset(system::IFile* _file, const as static_cast(ioTelemetry.callCount), static_cast(ioTelemetry.getMinOrZero()), static_cast(ioTelemetry.getAvgOrZero()), - toString(_params.ioPolicy.strategy), - toString(ioPlan.strategy), + system::to_string(_params.ioPolicy.strategy).c_str(), + system::to_string(ioPlan.strategy).c_str(), static_cast(ioPlan.chunkSizeBytes()), ioPlan.reason); @@ -1195,8 +1137,8 @@ asset::SAssetBundle COBJMeshFileLoader::loadAsset(system::IFile* _file, const as static_cast(ioTelemetry.callCount), static_cast(ioTelemetry.getMinOrZero()), static_cast(ioTelemetry.getAvgOrZero()), - toString(_params.ioPolicy.strategy), - toString(ioPlan.strategy), + system::to_string(_params.ioPolicy.strategy).c_str(), + system::to_string(ioPlan.strategy).c_str(), static_cast(ioPlan.chunkSizeBytes()), ioPlan.reason); diff --git a/src/nbl/asset/interchange/COBJMeshWriter.cpp b/src/nbl/asset/interchange/COBJMeshWriter.cpp index d749c80abf..492ecd7747 100644 --- a/src/nbl/asset/interchange/COBJMeshWriter.cpp +++ b/src/nbl/asset/interchange/COBJMeshWriter.cpp @@ -53,7 +53,7 @@ namespace obj_writer_detail constexpr size_t ApproxObjBytesPerVertex = 96ull; constexpr size_t ApproxObjBytesPerFace = 48ull; -constexpr size_t MaxFloatFixed6Chars = 48ull; +constexpr size_t MaxFloatTextChars = std::numeric_limits::max_digits10 + 8ull; constexpr size_t MaxUInt32Chars = std::numeric_limits::digits10 + 1ull; constexpr size_t MaxIndexTokenBytes = MaxUInt32Chars * 3ull + 2ull; @@ -72,7 +72,7 @@ bool decodeVec4(const ICPUPolygonGeometry::SDataView& view, const size_t ix, hls void appendVec3Line(std::string& out, const char* prefix, const size_t prefixSize, const hlsl::float32_t3& v) { const size_t oldSize = out.size(); - out.resize(oldSize + prefixSize + (3ull * MaxFloatFixed6Chars) + 3ull); + out.resize(oldSize + prefixSize + (3ull * MaxFloatTextChars) + 3ull); char* const lineBegin = out.data() + oldSize; char* cursor = lineBegin; char* const lineEnd = out.data() + out.size(); @@ -80,13 +80,13 @@ void appendVec3Line(std::string& out, const char* prefix, const size_t prefixSiz std::memcpy(cursor, prefix, prefixSize); cursor += prefixSize; - cursor = SGeometryWriterCommon::appendFloatFixed6ToBuffer(cursor, lineEnd, v.x); + cursor = SGeometryWriterCommon::appendFloatToBuffer(cursor, lineEnd, v.x); if (cursor < lineEnd) *(cursor++) = ' '; - cursor = SGeometryWriterCommon::appendFloatFixed6ToBuffer(cursor, lineEnd, v.y); + cursor = SGeometryWriterCommon::appendFloatToBuffer(cursor, lineEnd, v.y); if (cursor < lineEnd) *(cursor++) = ' '; - cursor = SGeometryWriterCommon::appendFloatFixed6ToBuffer(cursor, lineEnd, v.z); + cursor = SGeometryWriterCommon::appendFloatToBuffer(cursor, lineEnd, v.z); if (cursor < lineEnd) *(cursor++) = '\n'; @@ -96,7 +96,7 @@ void appendVec3Line(std::string& out, const char* prefix, const size_t prefixSiz void appendVec2Line(std::string& out, const char* prefix, const size_t prefixSize, const hlsl::float32_t2& v) { const size_t oldSize = out.size(); - out.resize(oldSize + prefixSize + (2ull * MaxFloatFixed6Chars) + 2ull); + out.resize(oldSize + prefixSize + (2ull * MaxFloatTextChars) + 2ull); char* const lineBegin = out.data() + oldSize; char* cursor = lineBegin; char* const lineEnd = out.data() + out.size(); @@ -104,10 +104,10 @@ void appendVec2Line(std::string& out, const char* prefix, const size_t prefixSiz std::memcpy(cursor, prefix, prefixSize); cursor += prefixSize; - cursor = SGeometryWriterCommon::appendFloatFixed6ToBuffer(cursor, lineEnd, v.x); + cursor = SGeometryWriterCommon::appendFloatToBuffer(cursor, lineEnd, v.x); if (cursor < lineEnd) *(cursor++) = ' '; - cursor = SGeometryWriterCommon::appendFloatFixed6ToBuffer(cursor, lineEnd, v.y); + cursor = SGeometryWriterCommon::appendFloatToBuffer(cursor, lineEnd, v.y); if (cursor < lineEnd) *(cursor++) = '\n'; @@ -236,9 +236,9 @@ bool COBJMeshWriter::writeAsset(system::IFile* _file, const SAssetWriteParams& _ output.append("# Nabla OBJ\n"); hlsl::float64_t4 tmp = {}; - const hlsl::float32_t3* const tightPositions = SGeometryWriterCommon::getTightFloat3View(positionView); - const hlsl::float32_t3* const tightNormals = hasNormals ? SGeometryWriterCommon::getTightFloat3View(normalView) : nullptr; - const hlsl::float32_t2* const tightUV = hasUVs ? SGeometryWriterCommon::getTightFloat2View(*uvView) : nullptr; + const hlsl::float32_t3* const tightPositions = SGeometryWriterCommon::getTightView(positionView); + const hlsl::float32_t3* const tightNormals = hasNormals ? SGeometryWriterCommon::getTightView(normalView) : nullptr; + const hlsl::float32_t2* const tightUV = hasUVs ? SGeometryWriterCommon::getTightView(*uvView) : nullptr; for (size_t i = 0u; i < vertexCount; ++i) { hlsl::float32_t3 vertex = {}; @@ -356,8 +356,8 @@ bool COBJMeshWriter::writeAsset(system::IFile* _file, const SAssetWriteParams& _ static_cast(ioTelemetry.callCount), static_cast(ioMinWrite), static_cast(ioAvgWrite), - toString(_params.ioPolicy.strategy), - toString(ioPlan.strategy), + system::to_string(_params.ioPolicy.strategy).c_str(), + system::to_string(ioPlan.strategy).c_str(), static_cast(ioPlan.chunkSizeBytes()), ioPlan.reason); diff --git a/src/nbl/asset/interchange/CPLYMeshFileLoader.cpp b/src/nbl/asset/interchange/CPLYMeshFileLoader.cpp index 709d7f3849..7cab287dff 100644 --- a/src/nbl/asset/interchange/CPLYMeshFileLoader.cpp +++ b/src/nbl/asset/interchange/CPLYMeshFileLoader.cpp @@ -2173,8 +2173,8 @@ SAssetBundle CPLYMeshFileLoader::loadAsset(system::IFile* _file, const IAssetLoa static_cast(ctx.readCallCount), static_cast(ioMinRead), static_cast(ioAvgRead), - toString(_params.ioPolicy.strategy), - toString(ioPlan.strategy), + system::to_string(_params.ioPolicy.strategy).c_str(), + system::to_string(ioPlan.strategy).c_str(), static_cast(ioPlan.chunkSizeBytes()), ioPlan.reason); auto meta = core::make_smart_refctd_ptr(); diff --git a/src/nbl/asset/interchange/CPLYMeshWriter.cpp b/src/nbl/asset/interchange/CPLYMeshWriter.cpp index bb099b8277..efcae14c6e 100644 --- a/src/nbl/asset/interchange/CPLYMeshWriter.cpp +++ b/src/nbl/asset/interchange/CPLYMeshWriter.cpp @@ -216,19 +216,16 @@ void appendInt(std::string& out, const int64_t value) out.append(buf.data(), static_cast(res.ptr - buf.data())); } -void appendFloatFixed6(std::string& out, double value) -{ - std::array buf = {}; - const auto res = std::to_chars(buf.data(), buf.data() + buf.size(), value, std::chars_format::fixed, 6); - if (res.ec == std::errc()) - { - out.append(buf.data(), static_cast(res.ptr - buf.data())); - return; - } +constexpr size_t MaxFloatTextChars = std::numeric_limits::max_digits10 + 16ull; - const int written = std::snprintf(buf.data(), buf.size(), "%.6f", value); - if (written > 0) - out.append(buf.data(), static_cast(written)); +void appendFloat(std::string& out, double value) +{ + const size_t oldSize = out.size(); + out.resize(oldSize + MaxFloatTextChars); + char* const begin = out.data() + oldSize; + char* const end = begin + MaxFloatTextChars; + char* const cursor = SGeometryWriterCommon::appendFloatToBuffer(begin, end, value); + out.resize(oldSize + static_cast(cursor - begin)); } void appendVec(std::string& out, const double* values, size_t count, bool flipVectors = false) @@ -237,7 +234,7 @@ void appendVec(std::string& out, const double* values, size_t count, bool flipVe for (size_t i = 0u; i < count; ++i) { const bool flip = flipVectors && i == xID; - appendFloatFixed6(out, flip ? -values[i] : values[i]); + appendFloat(out, flip ? -values[i] : values[i]); out.push_back(' '); } } @@ -370,7 +367,7 @@ inline bool writeTypedViewText(std::string& output, const ICPUPolygonGeometry::S double value = (&tmp.x)[c]; if (flipVectors && c == 0u) value = -value; - appendFloatFixed6(output, value); + appendFloat(output, value); output.push_back(' '); } return true; @@ -665,8 +662,8 @@ bool CPLYMeshWriter::writeAsset(system::IFile* _file, const SAssetWriteParams& _ static_cast(ioTelemetry.callCount), static_cast(ioMinWrite), static_cast(ioAvgWrite), - toString(_params.ioPolicy.strategy), - toString(ioPlan.strategy), + system::to_string(_params.ioPolicy.strategy).c_str(), + system::to_string(ioPlan.strategy).c_str(), static_cast(ioPlan.chunkSizeBytes()), ioPlan.reason); return writeOk; @@ -721,8 +718,8 @@ bool CPLYMeshWriter::writeAsset(system::IFile* _file, const SAssetWriteParams& _ static_cast(ioTelemetry.callCount), static_cast(ioMinWrite), static_cast(ioAvgWrite), - toString(_params.ioPolicy.strategy), - toString(ioPlan.strategy), + system::to_string(_params.ioPolicy.strategy).c_str(), + system::to_string(ioPlan.strategy).c_str(), static_cast(ioPlan.chunkSizeBytes()), ioPlan.reason); return writeOk; diff --git a/src/nbl/asset/interchange/CSTLMeshFileLoader.cpp b/src/nbl/asset/interchange/CSTLMeshFileLoader.cpp index 075502d283..e9f8337d45 100644 --- a/src/nbl/asset/interchange/CSTLMeshFileLoader.cpp +++ b/src/nbl/asset/interchange/CSTLMeshFileLoader.cpp @@ -787,8 +787,8 @@ SAssetBundle CSTLMeshFileLoader::loadAsset(system::IFile* _file, const IAssetLoa static_cast(context.ioTelemetry.callCount), static_cast(ioMinRead), static_cast(ioAvgRead), - toString(_params.ioPolicy.strategy), - toString(ioPlan.strategy), + system::to_string(_params.ioPolicy.strategy).c_str(), + system::to_string(ioPlan.strategy).c_str(), static_cast(ioPlan.chunkSizeBytes()), ioPlan.reason); auto meta = core::make_smart_refctd_ptr(); diff --git a/src/nbl/asset/interchange/CSTLMeshWriter.cpp b/src/nbl/asset/interchange/CSTLMeshWriter.cpp index d63fddb19e..d3c0817827 100644 --- a/src/nbl/asset/interchange/CSTLMeshWriter.cpp +++ b/src/nbl/asset/interchange/CSTLMeshWriter.cpp @@ -176,8 +176,8 @@ bool CSTLMeshWriter::writeAsset(system::IFile* _file, const SAssetWriteParams& _ static_cast(context.writeTelemetry.callCount), static_cast(ioMinWrite), static_cast(ioAvgWrite), - toString(_params.ioPolicy.strategy), - toString(context.ioPlan.strategy), + system::to_string(_params.ioPolicy.strategy).c_str(), + system::to_string(context.ioPlan.strategy).c_str(), static_cast(context.ioPlan.chunkSizeBytes()), context.ioPlan.reason); @@ -266,15 +266,15 @@ bool appendLiteral(char*& cursor, char* const end, const char* text, const size_ bool appendVectorAsAsciiLine(char*& cursor, char* const end, const hlsl::float32_t3& v) { - cursor = SGeometryWriterCommon::appendFloatFixed6ToBuffer(cursor, end, v.x); + cursor = SGeometryWriterCommon::appendFloatToBuffer(cursor, end, v.x); if (cursor >= end) return false; *(cursor++) = ' '; - cursor = SGeometryWriterCommon::appendFloatFixed6ToBuffer(cursor, end, v.y); + cursor = SGeometryWriterCommon::appendFloatToBuffer(cursor, end, v.y); if (cursor >= end) return false; *(cursor++) = ' '; - cursor = SGeometryWriterCommon::appendFloatFixed6ToBuffer(cursor, end, v.z); + cursor = SGeometryWriterCommon::appendFloatToBuffer(cursor, end, v.z); if (cursor >= end) return false; *(cursor++) = '\n'; @@ -454,8 +454,8 @@ bool writeMeshBinary(const asset::ICPUPolygonGeometry* geom, SContext* context) const auto& normalView = geom->getNormalView(); const bool hasNormals = static_cast(normalView); const auto* const colorView = stlFindColorView(geom, vertexCount); - const hlsl::float32_t3* const tightPositions = SGeometryWriterCommon::getTightFloat3View(posView); - const hlsl::float32_t3* const tightNormals = hasNormals ? SGeometryWriterCommon::getTightFloat3View(normalView) : nullptr; + const hlsl::float32_t3* const tightPositions = SGeometryWriterCommon::getTightView(posView); + const hlsl::float32_t3* const tightNormals = hasNormals ? SGeometryWriterCommon::getTightView(normalView) : nullptr; const float handednessSign = flipHandedness ? -1.f : 1.f; auto decodePosition = [&](const uint32_t ix, hlsl::float32_t3& out)->bool diff --git a/src/nbl/asset/interchange/SGeometryWriterCommon.cpp b/src/nbl/asset/interchange/SGeometryWriterCommon.cpp new file mode 100644 index 0000000000..1ab5cd1ee9 --- /dev/null +++ b/src/nbl/asset/interchange/SGeometryWriterCommon.cpp @@ -0,0 +1,63 @@ +// Copyright (C) 2018-2025 - DevSH Graphics Programming Sp. z O.O. +// This file is part of the "Nabla Engine". +// For conditions of distribution and use, see copyright notice in nabla.h + +#include "nbl/asset/interchange/SGeometryWriterCommon.h" + +#include +#include +#include +#include +#include +#include +#include + + +namespace nbl::asset +{ + +namespace +{ + +template +inline constexpr size_t FloatingPointScratchSize = std::numeric_limits::max_digits10 + 9ull; + +template +char* appendFloatingPointToBuffer(char* dst, char* const end, const T value) +{ + static_assert(std::is_same_v || std::is_same_v); + + if (!dst || dst >= end) + return end; + + const auto result = std::to_chars(dst, end, value); + if (result.ec == std::errc()) + return result.ptr; + + std::array> scratch = {}; + constexpr int Precision = std::numeric_limits::max_digits10; + const int written = std::snprintf(scratch.data(), scratch.size(), "%.*g", Precision, static_cast(value)); + if (written <= 0) + return dst; + + const size_t writeLen = static_cast(written); + if (writeLen > static_cast(end - dst)) + return end; + + std::memcpy(dst, scratch.data(), writeLen); + return dst + writeLen; +} + +} + +char* SGeometryWriterCommon::appendFloatToBuffer(char* dst, char* end, float value) +{ + return appendFloatingPointToBuffer(dst, end, value); +} + +char* SGeometryWriterCommon::appendFloatToBuffer(char* dst, char* end, double value) +{ + return appendFloatingPointToBuffer(dst, end, value); +} + +} diff --git a/src/nbl/asset/utils/CPolygonGeometryManipulator.cpp b/src/nbl/asset/utils/CPolygonGeometryManipulator.cpp index afe0fca59c..1ac97716ba 100644 --- a/src/nbl/asset/utils/CPolygonGeometryManipulator.cpp +++ b/src/nbl/asset/utils/CPolygonGeometryManipulator.cpp @@ -139,6 +139,85 @@ void CPolygonGeometryManipulator::computeContentHashesParallel(ICPUPolygonGeomet hashPendingRange(0ull, pending.size()); } +bool CPolygonGeometryManipulator::generateMissingSmoothNormals( + core::vector& normals, + const core::vector& positions, + const core::vector& indices, + const core::vector& normalNeedsGeneration +) +{ + if (normals.size() != positions.size() || normals.size() != normalNeedsGeneration.size()) + return false; + + core::vector generatedNormals(positions.size(), hlsl::float32_t3(0.f, 0.f, 0.f)); + const size_t triangleCount = indices.size() / 3ull; + for (size_t triIx = 0ull; triIx < triangleCount; ++triIx) + { + const uint32_t i0 = indices[triIx * 3ull + 0ull]; + const uint32_t i1 = indices[triIx * 3ull + 1ull]; + const uint32_t i2 = indices[triIx * 3ull + 2ull]; + if (i0 >= positions.size() || i1 >= positions.size() || i2 >= positions.size()) + continue; + + const auto& p0 = positions[static_cast(i0)]; + const auto& p1 = positions[static_cast(i1)]; + const auto& p2 = positions[static_cast(i2)]; + + const float e10x = p1.x - p0.x; + const float e10y = p1.y - p0.y; + const float e10z = p1.z - p0.z; + const float e20x = p2.x - p0.x; + const float e20y = p2.y - p0.y; + const float e20z = p2.z - p0.z; + + const hlsl::float32_t3 faceNormal( + e10y * e20z - e10z * e20y, + e10z * e20x - e10x * e20z, + e10x * e20y - e10y * e20x); + + const float faceLenSq = faceNormal.x * faceNormal.x + faceNormal.y * faceNormal.y + faceNormal.z * faceNormal.z; + if (faceLenSq <= 1e-20f) + continue; + + const auto accumulateIfNeeded = [&](const uint32_t vertexIx)->void + { + if (normalNeedsGeneration[static_cast(vertexIx)] == 0u) + return; + auto& dstNormal = generatedNormals[static_cast(vertexIx)]; + dstNormal.x += faceNormal.x; + dstNormal.y += faceNormal.y; + dstNormal.z += faceNormal.z; + }; + + accumulateIfNeeded(i0); + accumulateIfNeeded(i1); + accumulateIfNeeded(i2); + } + + for (size_t i = 0ull; i < normals.size(); ++i) + { + if (normalNeedsGeneration[i] == 0u) + continue; + + auto normal = generatedNormals[i]; + const float lenSq = normal.x * normal.x + normal.y * normal.y + normal.z * normal.z; + if (lenSq > 1e-20f) + { + const float invLen = 1.f / std::sqrt(lenSq); + normal.x *= invLen; + normal.y *= invLen; + normal.z *= invLen; + } + else + { + normal = hlsl::float32_t3(0.f, 0.f, 1.f); + } + normals[i] = normal; + } + + return true; +} + core::smart_refctd_ptr CPolygonGeometryManipulator::createUnweldedList(const ICPUPolygonGeometry* inGeo, const bool reverse, const bool recomputeHash) { From 47bcf0040790b73c61da3973a280fa36130e25e4 Mon Sep 17 00:00:00 2001 From: Arkadiusz Lachowicz Date: Fri, 6 Mar 2026 17:10:48 +0100 Subject: [PATCH 049/118] Use std bit utilities --- include/nbl/asset/interchange/SFileIOPolicy.h | 56 +++++++------------ 1 file changed, 19 insertions(+), 37 deletions(-) diff --git a/include/nbl/asset/interchange/SFileIOPolicy.h b/include/nbl/asset/interchange/SFileIOPolicy.h index 4afc51b3b0..7c78450821 100644 --- a/include/nbl/asset/interchange/SFileIOPolicy.h +++ b/include/nbl/asset/interchange/SFileIOPolicy.h @@ -9,6 +9,7 @@ #include "nbl/system/to_string.h" #include +#include #include #include #include @@ -17,25 +18,6 @@ namespace nbl::asset { -namespace impl -{ -inline constexpr bool hasSingleBit(const uint64_t value) -{ - return value && ((value & (value - 1u)) == 0u); -} - -inline constexpr uint8_t bytesToLog2(uint64_t value) -{ - uint8_t result = 0u; - while (value > 1u) - { - value >>= 1u; - ++result; - } - return result; -} -} - enum class EFileIOStrategy : uint8_t { // Sentinel used when strategy resolution fails or the value is uninitialized. @@ -88,17 +70,17 @@ struct SFileIOPolicy // Target chunk count assigned to each worker for hash stages. uint8_t hashTaskTargetChunksPerWorker = 1u; // Hash inlining threshold. Inputs up to this size prefer inline hash build. - uint64_t hashInlineThresholdBytes = 1ull << 20; + uint64_t hashInlineThresholdBytes = 1ull << 20; // 1 MiB // Lower bound for sampled byte count in hybrid mode. - uint64_t minSampleBytes = 4ull << 10; + uint64_t minSampleBytes = 4ull << 10; // 4 KiB // Upper bound for sampled byte count in hybrid mode. - uint64_t maxSampleBytes = 128ull << 10; + uint64_t maxSampleBytes = 128ull << 10; // 128 KiB // Payload size threshold for tiny-IO anomaly detection. - uint64_t tinyIoPayloadThresholdBytes = 1ull << 20; + uint64_t tinyIoPayloadThresholdBytes = 1ull << 20; // 1 MiB // Average operation size threshold for tiny-IO anomaly detection. - uint64_t tinyIoAvgBytesThreshold = 1024ull; + uint64_t tinyIoAvgBytesThreshold = 1024ull; // 1 KiB // Minimum operation size threshold for tiny-IO anomaly detection. - uint64_t tinyIoMinBytesThreshold = 64ull; + uint64_t tinyIoMinBytesThreshold = 64ull; // 64 B // Minimum operation count required to report tiny-IO anomaly. uint64_t tinyIoMinCallCount = 1024ull; }; @@ -111,17 +93,17 @@ struct SFileIOPolicy EF_STRICT_BIT = 1u << 0u }; - static inline constexpr uint64_t MIN_CHUNK_SIZE_BYTES = 64ull << 10u; - static inline constexpr uint8_t MIN_CHUNK_SIZE_LOG2 = impl::bytesToLog2(MIN_CHUNK_SIZE_BYTES); + static inline constexpr uint64_t MIN_CHUNK_SIZE_BYTES = 64ull << 10u; // 64 KiB + static inline constexpr uint8_t MIN_CHUNK_SIZE_LOG2 = static_cast(std::bit_width(MIN_CHUNK_SIZE_BYTES) - 1u); static inline constexpr uint8_t MAX_BYTE_SIZE_LOG2 = std::numeric_limits::digits - 1u; - static inline constexpr uint64_t DEFAULT_WHOLE_FILE_THRESHOLD_BYTES = 64ull << 20u; - static inline constexpr uint64_t DEFAULT_CHUNK_SIZE_BYTES = 4ull << 20u; - static inline constexpr uint64_t DEFAULT_MAX_STAGING_BYTES = 256ull << 20u; + static inline constexpr uint64_t DEFAULT_WHOLE_FILE_THRESHOLD_BYTES = 64ull << 20u; // 64 MiB + static inline constexpr uint64_t DEFAULT_CHUNK_SIZE_BYTES = 4ull << 20u; // 4 MiB + static inline constexpr uint64_t DEFAULT_MAX_STAGING_BYTES = 256ull << 20u; // 256 MiB - static_assert(impl::hasSingleBit(MIN_CHUNK_SIZE_BYTES)); - static_assert(impl::hasSingleBit(DEFAULT_WHOLE_FILE_THRESHOLD_BYTES)); - static_assert(impl::hasSingleBit(DEFAULT_CHUNK_SIZE_BYTES)); - static_assert(impl::hasSingleBit(DEFAULT_MAX_STAGING_BYTES)); + static_assert(std::has_single_bit(MIN_CHUNK_SIZE_BYTES)); + static_assert(std::has_single_bit(DEFAULT_WHOLE_FILE_THRESHOLD_BYTES)); + static_assert(std::has_single_bit(DEFAULT_CHUNK_SIZE_BYTES)); + static_assert(std::has_single_bit(DEFAULT_MAX_STAGING_BYTES)); static inline constexpr uint8_t clampBytesLog2(const uint8_t value, const uint8_t minValue = 0u) { @@ -138,11 +120,11 @@ struct SFileIOPolicy // Resolution flags. core::bitflag flags = EF_NONE; // Maximum payload size allowed for whole-file strategy in auto mode. - uint8_t wholeFileThresholdLog2 = impl::bytesToLog2(DEFAULT_WHOLE_FILE_THRESHOLD_BYTES); + uint8_t wholeFileThresholdLog2 = static_cast(std::bit_width(DEFAULT_WHOLE_FILE_THRESHOLD_BYTES) - 1u); // Chunk size used by chunked strategy encoded as log2(bytes). - uint8_t chunkSizeLog2 = impl::bytesToLog2(DEFAULT_CHUNK_SIZE_BYTES); + uint8_t chunkSizeLog2 = static_cast(std::bit_width(DEFAULT_CHUNK_SIZE_BYTES) - 1u); // Maximum staging allocation for whole-file strategy encoded as log2(bytes). - uint8_t maxStagingLog2 = impl::bytesToLog2(DEFAULT_MAX_STAGING_BYTES); + uint8_t maxStagingLog2 = static_cast(std::bit_width(DEFAULT_MAX_STAGING_BYTES) - 1u); // Runtime tuning controls used by loaders and hash stages. SRuntimeTuning runtimeTuning = {}; From ec74bd76e91faf2268803f2ef8c88b8542a8a93e Mon Sep 17 00:00:00 2001 From: Arkadiusz Lachowicz Date: Fri, 6 Mar 2026 17:22:41 +0100 Subject: [PATCH 050/118] Loader flag updates --- include/nbl/asset/interchange/IAssetLoader.h | 7 +++++-- include/nbl/asset/interchange/IGeometryLoader.h | 4 ++-- src/nbl/asset/interchange/COBJMeshFileLoader.cpp | 2 +- src/nbl/asset/interchange/CPLYMeshFileLoader.cpp | 2 +- src/nbl/asset/interchange/CSTLMeshFileLoader.cpp | 2 +- 5 files changed, 10 insertions(+), 7 deletions(-) diff --git a/include/nbl/asset/interchange/IAssetLoader.h b/include/nbl/asset/interchange/IAssetLoader.h index 18a10f9c4c..58a160322f 100644 --- a/include/nbl/asset/interchange/IAssetLoader.h +++ b/include/nbl/asset/interchange/IAssetLoader.h @@ -10,6 +10,8 @@ #include "nbl/system/ISystem.h" #include "nbl/system/ILogger.h" +#include "nbl/core/util/bitflag.h" + #include "nbl/asset/interchange/SAssetBundle.h" #include "nbl/asset/interchange/SFileIOPolicy.h" #include "nbl/asset/utils/CGeometryCreator.h" @@ -95,11 +97,12 @@ class NBL_API2 IAssetLoader : public virtual core::IReferenceCounted ELPF_LOAD_METADATA_ONLY = 0x4, //!< it forces the loader to not load the entire scene for performance in special cases to fetch metadata. ELPF_DONT_COMPUTE_CONTENT_HASHES = 0x8 //!< opt-out from computing content hashes of produced buffers before returning. }; + using loader_flags_t = core::bitflag; struct SAssetLoadParams { inline SAssetLoadParams(const size_t _decryptionKeyLen = 0u, const uint8_t* const _decryptionKey = nullptr, - const E_CACHING_FLAGS _cacheFlags = ECF_CACHE_EVERYTHING,const E_LOADER_PARAMETER_FLAGS _loaderFlags = ELPF_NONE, + const E_CACHING_FLAGS _cacheFlags = ECF_CACHE_EVERYTHING, const loader_flags_t _loaderFlags = ELPF_NONE, const system::logger_opt_ptr _logger = nullptr, const std::filesystem::path& cwd = "", const SFileIOPolicy& _ioPolicy = {}) : decryptionKeyLen(_decryptionKeyLen), decryptionKey(_decryptionKey), cacheFlags(_cacheFlags), loaderFlags(_loaderFlags), @@ -121,7 +124,7 @@ class NBL_API2 IAssetLoader : public virtual core::IReferenceCounted size_t decryptionKeyLen; const uint8_t* decryptionKey; E_CACHING_FLAGS cacheFlags; - E_LOADER_PARAMETER_FLAGS loaderFlags; //!< Flags having an impact on extraordinary tasks during loading process + loader_flags_t loaderFlags; //!< Flags having an impact on extraordinary tasks during loading process std::filesystem::path workingDirectory = ""; system::logger_opt_ptr logger; SFileIOPolicy ioPolicy = {}; diff --git a/include/nbl/asset/interchange/IGeometryLoader.h b/include/nbl/asset/interchange/IGeometryLoader.h index 52bedd06ee..90d1caa725 100644 --- a/include/nbl/asset/interchange/IGeometryLoader.h +++ b/include/nbl/asset/interchange/IGeometryLoader.h @@ -64,7 +64,7 @@ class IGeometryLoader : public IAssetLoader inline void deallocate(void* p, std::size_t bytes, std::size_t alignment) override { assert(m_file); - auto* const basePtr = reinterpret_cast(static_cast(m_file.get())->getMappedPointer()); + auto* const basePtr = reinterpret_cast(m_file->getMappedPointer()); assert(basePtr && basePtr<=p && p<=basePtr+m_file->getSize()); } @@ -73,7 +73,7 @@ class IGeometryLoader : public IAssetLoader }; static inline IGeometry::SDataView createView(const E_FORMAT format, const size_t elementCount, core::smart_refctd_ptr&& file, const size_t offsetInFile) { - if (auto* const basePtr=reinterpret_cast(static_cast(file.get())->getMappedPointer()); basePtr) + if (auto* const basePtr=reinterpret_cast(file->getMappedPointer()); basePtr) { auto resource = core::make_smart_refctd_ptr(std::move(file)); auto* const data = basePtr+offsetInFile; diff --git a/src/nbl/asset/interchange/COBJMeshFileLoader.cpp b/src/nbl/asset/interchange/COBJMeshFileLoader.cpp index d6119d4d53..9a38d635da 100644 --- a/src/nbl/asset/interchange/COBJMeshFileLoader.cpp +++ b/src/nbl/asset/interchange/COBJMeshFileLoader.cpp @@ -625,7 +625,7 @@ asset::SAssetBundle COBJMeshFileLoader::loadAsset(system::IFile* _file, const as geometry->setIndexing(IPolygonGeometryBase::PointList()); } - if ((_params.loaderFlags & IAssetLoader::ELPF_DONT_COMPUTE_CONTENT_HASHES) == 0) + if (!_params.loaderFlags.hasAnyFlag(IAssetLoader::ELPF_DONT_COMPUTE_CONTENT_HASHES)) SPolygonGeometryContentHash::computeMissing(geometry.get(), _params.ioPolicy); if (!parsedAABB.empty()) diff --git a/src/nbl/asset/interchange/CPLYMeshFileLoader.cpp b/src/nbl/asset/interchange/CPLYMeshFileLoader.cpp index 7cab287dff..c969a7c97b 100644 --- a/src/nbl/asset/interchange/CPLYMeshFileLoader.cpp +++ b/src/nbl/asset/interchange/CPLYMeshFileLoader.cpp @@ -1527,7 +1527,7 @@ SAssetBundle CPLYMeshFileLoader::loadAsset(system::IFile* _file, const IAssetLoa if (!_file) return {}; - const bool computeContentHashes = (_params.loaderFlags & IAssetLoader::ELPF_DONT_COMPUTE_CONTENT_HASHES) == 0; + const bool computeContentHashes = !_params.loaderFlags.hasAnyFlag(IAssetLoader::ELPF_DONT_COMPUTE_CONTENT_HASHES); uint64_t faceCount = 0u; uint64_t fastFaceElementCount = 0u; uint64_t fastVertexElementCount = 0u; diff --git a/src/nbl/asset/interchange/CSTLMeshFileLoader.cpp b/src/nbl/asset/interchange/CSTLMeshFileLoader.cpp index e9f8337d45..bc1bf6246d 100644 --- a/src/nbl/asset/interchange/CSTLMeshFileLoader.cpp +++ b/src/nbl/asset/interchange/CSTLMeshFileLoader.cpp @@ -179,7 +179,7 @@ SAssetBundle CSTLMeshFileLoader::loadAsset(system::IFile* _file, const IAssetLoa uint64_t triangleCount = 0u; const char* parsePath = "unknown"; - const bool computeContentHashes = (_params.loaderFlags & IAssetLoader::ELPF_DONT_COMPUTE_CONTENT_HASHES) == 0; + const bool computeContentHashes = !_params.loaderFlags.hasAnyFlag(IAssetLoader::ELPF_DONT_COMPUTE_CONTENT_HASHES); bool hasTriangleColors = false; SSTLContext context = { asset::IAssetLoader::SAssetLoadContext{ _params,_file },0ull }; From a567d1458568e2becdca36c6baf262dcf6013e72 Mon Sep 17 00:00:00 2001 From: Arkadiusz Lachowicz Date: Fri, 6 Mar 2026 17:27:49 +0100 Subject: [PATCH 051/118] Cache flag updates --- include/nbl/asset/interchange/IAssetLoader.h | 29 ++++++++++---------- src/nbl/asset/IAssetManager.cpp | 8 +++--- src/nbl/asset/interchange/IAssetLoader.cpp | 10 +++---- 3 files changed, 24 insertions(+), 23 deletions(-) diff --git a/include/nbl/asset/interchange/IAssetLoader.h b/include/nbl/asset/interchange/IAssetLoader.h index 58a160322f..42a1ecc855 100644 --- a/include/nbl/asset/interchange/IAssetLoader.h +++ b/include/nbl/asset/interchange/IAssetLoader.h @@ -78,6 +78,7 @@ class NBL_API2 IAssetLoader : public virtual core::IReferenceCounted //! meaning identical as to ECF_DUPLICATE_TOP_LEVEL but for any asset in the chain ECF_DUPLICATE_REFERENCES = 0xffffffffffffffffull }; + using caching_flags_t = core::bitflag; //! Parameter flags for a loader /** @@ -102,7 +103,7 @@ class NBL_API2 IAssetLoader : public virtual core::IReferenceCounted struct SAssetLoadParams { inline SAssetLoadParams(const size_t _decryptionKeyLen = 0u, const uint8_t* const _decryptionKey = nullptr, - const E_CACHING_FLAGS _cacheFlags = ECF_CACHE_EVERYTHING, const loader_flags_t _loaderFlags = ELPF_NONE, + const caching_flags_t _cacheFlags = ECF_CACHE_EVERYTHING, const loader_flags_t _loaderFlags = ELPF_NONE, const system::logger_opt_ptr _logger = nullptr, const std::filesystem::path& cwd = "", const SFileIOPolicy& _ioPolicy = {}) : decryptionKeyLen(_decryptionKeyLen), decryptionKey(_decryptionKey), cacheFlags(_cacheFlags), loaderFlags(_loaderFlags), @@ -123,7 +124,7 @@ class NBL_API2 IAssetLoader : public virtual core::IReferenceCounted size_t decryptionKeyLen; const uint8_t* decryptionKey; - E_CACHING_FLAGS cacheFlags; + caching_flags_t cacheFlags; loader_flags_t loaderFlags; //!< Flags having an impact on extraordinary tasks during loading process std::filesystem::path workingDirectory = ""; system::logger_opt_ptr logger; @@ -140,37 +141,37 @@ class NBL_API2 IAssetLoader : public virtual core::IReferenceCounted }; // following could be inlined - static E_CACHING_FLAGS ECF_DONT_CACHE_LEVEL(uint64_t N) + static caching_flags_t ECF_DONT_CACHE_LEVEL(uint64_t N) { N *= 2ull; - return (E_CACHING_FLAGS)(ECF_DONT_CACHE_TOP_LEVEL << N); + return caching_flags_t(static_cast(ECF_DONT_CACHE_TOP_LEVEL) << N); } - static E_CACHING_FLAGS ECF_DUPLICATE_LEVEL(uint64_t N) + static caching_flags_t ECF_DUPLICATE_LEVEL(uint64_t N) { N *= 2ull; - return (E_CACHING_FLAGS)(ECF_DUPLICATE_TOP_LEVEL << N); + return caching_flags_t(static_cast(ECF_DUPLICATE_TOP_LEVEL) << N); } - static E_CACHING_FLAGS ECF_DONT_CACHE_FROM_LEVEL(uint64_t N) + static caching_flags_t ECF_DONT_CACHE_FROM_LEVEL(uint64_t N) { // (Criss) Shouldn't be set all DONT_CACHE bits from hierarchy numbers N-1 to 32 (64==2*32) ? Same for ECF_DUPLICATE_FROM_LEVEL below N *= 2ull; - return (E_CACHING_FLAGS)(ECF_DONT_CACHE_REFERENCES << N); + return caching_flags_t(static_cast(ECF_DONT_CACHE_REFERENCES) << N); } - static E_CACHING_FLAGS ECF_DUPLICATE_FROM_LEVEL(uint64_t N) + static caching_flags_t ECF_DUPLICATE_FROM_LEVEL(uint64_t N) { N *= 2ull; - return (E_CACHING_FLAGS)(ECF_DUPLICATE_REFERENCES << N); + return caching_flags_t(static_cast(ECF_DUPLICATE_REFERENCES) << N); } - static E_CACHING_FLAGS ECF_DONT_CACHE_UNTIL_LEVEL(uint64_t N) + static caching_flags_t ECF_DONT_CACHE_UNTIL_LEVEL(uint64_t N) { // (Criss) is this ok? Shouldn't be set all DONT_CACHE bits from hierarchy numbers 0 to N-1? Same for ECF_DUPLICATE_UNTIL_LEVEL below N = 64ull - N * 2ull; - return (E_CACHING_FLAGS)(ECF_DONT_CACHE_REFERENCES >> N); + return caching_flags_t(static_cast(ECF_DONT_CACHE_REFERENCES) >> N); } - static E_CACHING_FLAGS ECF_DUPLICATE_UNTIL_LEVEL(uint64_t N) + static caching_flags_t ECF_DUPLICATE_UNTIL_LEVEL(uint64_t N) { N = 64ull - N * 2ull; - return (E_CACHING_FLAGS)(ECF_DUPLICATE_REFERENCES >> N); + return caching_flags_t(static_cast(ECF_DUPLICATE_REFERENCES) >> N); } //! Override class to facilitate changing how assets are loaded diff --git a/src/nbl/asset/IAssetManager.cpp b/src/nbl/asset/IAssetManager.cpp index e2e817567a..4d5e762aa3 100644 --- a/src/nbl/asset/IAssetManager.cpp +++ b/src/nbl/asset/IAssetManager.cpp @@ -212,10 +212,10 @@ SAssetBundle IAssetManager::getAssetInHierarchy_impl(system::IFile* _file, const if (params.workingDirectory.empty()) params.workingDirectory = filename.parent_path(); - const uint64_t levelFlags = params.cacheFlags >> ((uint64_t)_hierarchyLevel * 2ull); + const auto levelFlags = IAssetLoader::caching_flags_t(static_cast(params.cacheFlags.value) >> ((uint64_t)_hierarchyLevel * 2ull)); SAssetBundle bundle; - if ((levelFlags & IAssetLoader::ECF_DUPLICATE_TOP_LEVEL) != IAssetLoader::ECF_DUPLICATE_TOP_LEVEL) + if (!levelFlags.hasFlags(IAssetLoader::ECF_DUPLICATE_TOP_LEVEL)) { auto found = findAssets(filenameString); if (found->size()) @@ -249,8 +249,8 @@ SAssetBundle IAssetManager::getAssetInHierarchy_impl(system::IFile* _file, const } if (!bundle.getContents().empty() && - ((levelFlags & IAssetLoader::ECF_DONT_CACHE_TOP_LEVEL) != IAssetLoader::ECF_DONT_CACHE_TOP_LEVEL) && - ((levelFlags & IAssetLoader::ECF_DUPLICATE_TOP_LEVEL) != IAssetLoader::ECF_DUPLICATE_TOP_LEVEL)) + !levelFlags.hasFlags(IAssetLoader::ECF_DONT_CACHE_TOP_LEVEL) && + !levelFlags.hasFlags(IAssetLoader::ECF_DUPLICATE_TOP_LEVEL)) { _override->insertAssetIntoCache(bundle, filenameString, ctx.params, _hierarchyLevel); } diff --git a/src/nbl/asset/interchange/IAssetLoader.cpp b/src/nbl/asset/interchange/IAssetLoader.cpp index 4a9a8f0378..98f579257d 100644 --- a/src/nbl/asset/interchange/IAssetLoader.cpp +++ b/src/nbl/asset/interchange/IAssetLoader.cpp @@ -16,8 +16,8 @@ IAssetLoader::IAssetLoaderOverride::IAssetLoaderOverride(SCreationParams&& param SAssetBundle IAssetLoader::IAssetLoaderOverride::findCachedAsset(const std::string& inSearchKey, const IAsset::E_TYPE* inAssetTypes, const SAssetLoadContext& ctx, const uint32_t hierarchyLevel) { - auto levelFlag = ctx.params.cacheFlags >> (uint64_t(hierarchyLevel) * 2ull); - if ((levelFlag & ECF_DUPLICATE_TOP_LEVEL) == ECF_DUPLICATE_TOP_LEVEL) + const auto levelFlags = caching_flags_t(static_cast(ctx.params.cacheFlags.value) >> (uint64_t(hierarchyLevel) * 2ull)); + if (levelFlags.hasFlags(ECF_DUPLICATE_TOP_LEVEL)) return {}; auto found = getManager()->findAssets(inSearchKey, inAssetTypes); @@ -30,8 +30,8 @@ void IAssetLoader::IAssetLoaderOverride::insertAssetIntoCache(SAssetBundle& asse { getManager()->changeAssetKey(asset, supposedKey); - auto levelFlag = _params.cacheFlags >> (uint64_t(hierarchyLevel) * 2ull); - if (!(levelFlag&ECF_DONT_CACHE_TOP_LEVEL)) + const auto levelFlags = caching_flags_t(static_cast(_params.cacheFlags.value) >> (uint64_t(hierarchyLevel) * 2ull)); + if (!levelFlags.hasAnyFlag(ECF_DONT_CACHE_TOP_LEVEL)) getManager()->insertAssetIntoCache(asset,ASSET_MUTABILITY_ON_CACHE_INSERT); } @@ -126,4 +126,4 @@ smart_refctd_ptr IAssetLoader::createDefaultImageView(core::smart .viewType = viewType, .format = imageParams.format }); -} \ No newline at end of file +} From 41935c1ff75dbcb8c08ae698487e22ea14952ec3 Mon Sep 17 00:00:00 2001 From: Arkadiusz Lachowicz Date: Fri, 6 Mar 2026 17:57:05 +0100 Subject: [PATCH 052/118] Unify face normal helpers --- .../asset/interchange/SGeometryLoaderCommon.h | 13 ----- .../nbl/asset/utils/SGeometryNormalCommon.h | 43 +++++++++++++++ .../asset/interchange/CSTLMeshFileLoader.cpp | 5 +- src/nbl/asset/utils/CGeometryCreator.cpp | 55 +++---------------- 4 files changed, 55 insertions(+), 61 deletions(-) create mode 100644 include/nbl/asset/utils/SGeometryNormalCommon.h diff --git a/include/nbl/asset/interchange/SGeometryLoaderCommon.h b/include/nbl/asset/interchange/SGeometryLoaderCommon.h index 7e45f7b685..94cee6dd1f 100644 --- a/include/nbl/asset/interchange/SGeometryLoaderCommon.h +++ b/include/nbl/asset/interchange/SGeometryLoaderCommon.h @@ -48,19 +48,6 @@ class SGeometryLoaderCommon core::adopt_memory); return createDataView(std::move(buffer), byteCount, static_cast(sizeof(ValueType)), Format); } - - static inline hlsl::float32_t3 normalizeOrZero(const hlsl::float32_t3& v) - { - const float len2 = hlsl::dot(v, v); - if (len2 <= 0.f) - return hlsl::float32_t3(0.f, 0.f, 0.f); - return hlsl::normalize(v); - } - - static inline hlsl::float32_t3 computeFaceNormal(const hlsl::float32_t3& a, const hlsl::float32_t3& b, const hlsl::float32_t3& c) - { - return normalizeOrZero(hlsl::cross(b - a, c - a)); - } }; } diff --git a/include/nbl/asset/utils/SGeometryNormalCommon.h b/include/nbl/asset/utils/SGeometryNormalCommon.h new file mode 100644 index 0000000000..72e0348002 --- /dev/null +++ b/include/nbl/asset/utils/SGeometryNormalCommon.h @@ -0,0 +1,43 @@ +// Copyright (C) 2018-2025 - DevSH Graphics Programming Sp. z O.O. +// This file is part of the "Nabla Engine". +// For conditions of distribution and use, see copyright notice in nabla.h +#ifndef _NBL_ASSET_S_GEOMETRY_NORMAL_COMMON_H_INCLUDED_ +#define _NBL_ASSET_S_GEOMETRY_NORMAL_COMMON_H_INCLUDED_ + + +#include "nbl/builtin/hlsl/tgmath.hlsl" + + +namespace nbl::asset +{ + +class SGeometryNormalCommon +{ + public: + static_assert(sizeof(hlsl::float32_t3) == sizeof(float[3])); + static_assert(alignof(hlsl::float32_t3) == alignof(float)); + + static inline hlsl::float32_t3 normalizeOrZero(const hlsl::float32_t3& v, const float epsilon = 0.f) + { + const float len2 = hlsl::dot(v, v); + const float epsilon2 = epsilon * epsilon; + if (len2 <= epsilon2) + return hlsl::float32_t3(0.f, 0.f, 0.f); + return hlsl::normalize(v); + } + + static inline hlsl::float32_t3 computeFaceNormal(const hlsl::float32_t3& a, const hlsl::float32_t3& b, const hlsl::float32_t3& c, const float epsilon = 0.000001f) + { + return normalizeOrZero(hlsl::cross(b - a, c - a), epsilon); + } + + static inline void computeFaceNormal(const float a[3], const float b[3], const float c[3], float normal[3], const float epsilon = 0.000001f) + { + *(hlsl::float32_t3*)normal = computeFaceNormal(*(const hlsl::float32_t3*)a, *(const hlsl::float32_t3*)b, *(const hlsl::float32_t3*)c, epsilon); + } +}; + +} + + +#endif diff --git a/src/nbl/asset/interchange/CSTLMeshFileLoader.cpp b/src/nbl/asset/interchange/CSTLMeshFileLoader.cpp index bc1bf6246d..2f754d5b9a 100644 --- a/src/nbl/asset/interchange/CSTLMeshFileLoader.cpp +++ b/src/nbl/asset/interchange/CSTLMeshFileLoader.cpp @@ -13,6 +13,7 @@ #include "nbl/asset/interchange/SLoaderRuntimeTuning.h" #include "nbl/asset/format/convertColor.h" #include "nbl/asset/utils/SGeometryAABBCommon.h" +#include "nbl/asset/utils/SGeometryNormalCommon.h" #include "nbl/asset/asset.h" #include "nbl/asset/metadata/CSTLMetadata.h" #include "nbl/asset/utils/CPolygonGeometryManipulator.h" @@ -103,7 +104,7 @@ hlsl::float32_t3 stlResolveStoredNormal(const hlsl::float32_t3& fileNormal) const float fileLen2 = hlsl::dot(fileNormal, fileNormal); if (fileLen2 > 0.f && std::abs(fileLen2 - 1.f) < 1e-4f) return fileNormal; - return SGeometryLoaderCommon::normalizeOrZero(fileNormal); + return SGeometryNormalCommon::normalizeOrZero(fileNormal); } void stlPushTriangleReversed(const hlsl::float32_t3 (&p)[3], core::vector& positions) @@ -720,7 +721,7 @@ SAssetBundle CSTLMeshFileLoader::loadAsset(system::IFile* _file, const IAssetLoa stlPushTriangleReversed(p, positions); hlsl::float32_t3 faceNormal = stlResolveStoredNormal(*fileNormal); if (hlsl::dot(faceNormal, faceNormal) <= 0.f) - faceNormal = SGeometryLoaderCommon::computeFaceNormal(p[2u], p[1u], p[0u]); + faceNormal = SGeometryNormalCommon::computeFaceNormal(p[2u], p[1u], p[0u]); normals.push_back(faceNormal); normals.push_back(faceNormal); normals.push_back(faceNormal); diff --git a/src/nbl/asset/utils/CGeometryCreator.cpp b/src/nbl/asset/utils/CGeometryCreator.cpp index 3750a37a70..9950f8b997 100644 --- a/src/nbl/asset/utils/CGeometryCreator.cpp +++ b/src/nbl/asset/utils/CGeometryCreator.cpp @@ -4,6 +4,7 @@ #include "nbl/asset/utils/CGeometryCreator.h" +#include "nbl/asset/utils/SGeometryNormalCommon.h" #include "nbl/builtin/hlsl/tgmath.hlsl" #include "nbl/builtin/hlsl/math/linalg/transform.hlsl" #include "nbl/builtin/hlsl/math/quaternions.hlsl" @@ -946,44 +947,6 @@ class Icosphere private: - /* - return face normal (4th param) of a triangle v1-v2-v3 - if a triangle has no surface (normal length = 0), then return a zero vector - */ - - static inline void computeFaceNormal(const float v1[3], const float v2[3], const float v3[3], float normal[3]) - { - constexpr float EPSILON = 0.000001f; - - // default return value (0, 0, 0) - normal[0] = normal[1] = normal[2] = 0; - - // find 2 edge vectors: v1-v2, v1-v3 - float ex1 = v2[0] - v1[0]; - float ey1 = v2[1] - v1[1]; - float ez1 = v2[2] - v1[2]; - float ex2 = v3[0] - v1[0]; - float ey2 = v3[1] - v1[1]; - float ez2 = v3[2] - v1[2]; - - // cross product: e1 x e2 - float nx, ny, nz; - nx = ey1 * ez2 - ez1 * ey2; - ny = ez1 * ex2 - ex1 * ez2; - nz = ex1 * ey2 - ey1 * ex2; - - // normalize only if the length is > 0 - float length = sqrtf(nx * nx + ny * ny + nz * nz); - if (length > EPSILON) - { - // normalize - float lengthInv = 1.0f / length; - normal[0] = nx * lengthInv; - normal[1] = ny * lengthInv; - normal[2] = nz * lengthInv; - } - } - /* return vertex normal (2nd param) by mormalizing the vertex vector */ @@ -1229,27 +1192,27 @@ class Icosphere t11[0] = 2 * i * S_STEP; t11[1] = T_STEP * 3; // add a triangle in 1st row - Icosphere::computeFaceNormal(v0, v1, v2, n); + SGeometryNormalCommon::computeFaceNormal(v0, v1, v2, n); addVertices(v0, v1, v2); addNormals(n, n, n); addTexCoords(t0, t1, t2); addIndices(index, index + 1, index + 2); // add 2 triangles in 2nd row - Icosphere::computeFaceNormal(v1, v3, v2, n); + SGeometryNormalCommon::computeFaceNormal(v1, v3, v2, n); addVertices(v1, v3, v2); addNormals(n, n, n); addTexCoords(t1, t3, t2); addIndices(index + 3, index + 4, index + 5); - Icosphere::computeFaceNormal(v2, v3, v4, n); + SGeometryNormalCommon::computeFaceNormal(v2, v3, v4, n); addVertices(v2, v3, v4); addNormals(n, n, n); addTexCoords(t2, t3, t4); addIndices(index + 6, index + 7, index + 8); // add a triangle in 3rd row - Icosphere::computeFaceNormal(v3, v11, v4, n); + SGeometryNormalCommon::computeFaceNormal(v3, v11, v4, n); addVertices(v3, v11, v4); addNormals(n, n, n); addTexCoords(t3, t11, t4); @@ -1562,25 +1525,25 @@ class Icosphere // add 4 new triangles addVertices(v1, newV1, newV3); addTexCoords(t1, newT1, newT3); - computeFaceNormal(v1, newV1, newV3, normal); + SGeometryNormalCommon::computeFaceNormal(v1, newV1, newV3, normal); addNormals(normal, normal, normal); addIndices(index, index + 1, index + 2); addVertices(newV1, v2, newV2); addTexCoords(newT1, t2, newT2); - computeFaceNormal(newV1, v2, newV2, normal); + SGeometryNormalCommon::computeFaceNormal(newV1, v2, newV2, normal); addNormals(normal, normal, normal); addIndices(index + 3, index + 4, index + 5); addVertices(newV1, newV2, newV3); addTexCoords(newT1, newT2, newT3); - computeFaceNormal(newV1, newV2, newV3, normal); + SGeometryNormalCommon::computeFaceNormal(newV1, newV2, newV3, normal); addNormals(normal, normal, normal); addIndices(index + 6, index + 7, index + 8); addVertices(newV3, newV2, v3); addTexCoords(newT3, newT2, t3); - computeFaceNormal(newV3, newV2, v3, normal); + SGeometryNormalCommon::computeFaceNormal(newV3, newV2, v3, normal); addNormals(normal, normal, normal); addIndices(index + 9, index + 10, index + 11); From da8acca1c3ca1c626ba329842f537452e1e56261 Mon Sep 17 00:00:00 2001 From: Arkadiusz Lachowicz Date: Fri, 6 Mar 2026 21:15:15 +0100 Subject: [PATCH 053/118] Update OBJ geometry writing --- .../nbl/asset/interchange/COBJMeshWriter.h | 13 +- include/nbl/asset/interchange/SFileIOPolicy.h | 3 +- .../interchange/SGeometryContentHashCommon.h | 8 +- .../asset/interchange/SGeometryLoaderCommon.h | 40 ++- .../asset/interchange/SGeometryWriterCommon.h | 108 +++++-- .../asset/interchange/COBJMeshFileLoader.cpp | 14 +- .../asset/interchange/COBJMeshFileLoader.h | 11 +- src/nbl/asset/interchange/COBJMeshWriter.cpp | 298 +++++++++++------- .../asset/interchange/CPLYMeshFileLoader.cpp | 4 +- src/nbl/asset/interchange/CPLYMeshWriter.cpp | 14 +- .../asset/interchange/CSTLMeshFileLoader.cpp | 6 +- 11 files changed, 358 insertions(+), 161 deletions(-) diff --git a/include/nbl/asset/interchange/COBJMeshWriter.h b/include/nbl/asset/interchange/COBJMeshWriter.h index a511d294af..d0a1c0c3dc 100644 --- a/include/nbl/asset/interchange/COBJMeshWriter.h +++ b/include/nbl/asset/interchange/COBJMeshWriter.h @@ -4,14 +4,17 @@ #ifndef _NBL_ASSET_OBJ_MESH_WRITER_H_INCLUDED_ #define _NBL_ASSET_OBJ_MESH_WRITER_H_INCLUDED_ - #include "nbl/asset/interchange/ISceneWriter.h" - namespace nbl::asset { - -//! class to write OBJ mesh files +/* + Writes OBJ from a single polygon geometry, a geometry collection, or a scene. + OBJ itself is still treated here as final flattened geometry data, not as a scene format. + Scene input is accepted only as export input: the writer bakes transforms and serializes all collected polygon geometries into one OBJ stream. + This preserves the final shape but does not try to keep scene-only structure such as hierarchy or instancing. + In other words `ET_SCENE -> OBJ` is supported as flattening, not as round-tripping scene semantics through the OBJ format. +*/ class COBJMeshWriter : public ISceneWriter { public: @@ -30,4 +33,4 @@ class COBJMeshWriter : public ISceneWriter } // end namespace -#endif +#endif \ No newline at end of file diff --git a/include/nbl/asset/interchange/SFileIOPolicy.h b/include/nbl/asset/interchange/SFileIOPolicy.h index 7c78450821..458f2bc1b8 100644 --- a/include/nbl/asset/interchange/SFileIOPolicy.h +++ b/include/nbl/asset/interchange/SFileIOPolicy.h @@ -100,6 +100,7 @@ struct SFileIOPolicy static inline constexpr uint64_t DEFAULT_CHUNK_SIZE_BYTES = 4ull << 20u; // 4 MiB static inline constexpr uint64_t DEFAULT_MAX_STAGING_BYTES = 256ull << 20u; // 256 MiB + // These defaults are stored and clamped as log2(byte_count), so the source byte values must stay powers of two. static_assert(std::has_single_bit(MIN_CHUNK_SIZE_BYTES)); static_assert(std::has_single_bit(DEFAULT_WHOLE_FILE_THRESHOLD_BYTES)); static_assert(std::has_single_bit(DEFAULT_CHUNK_SIZE_BYTES)); @@ -163,7 +164,7 @@ struct SResolvedFileIOPolicy Strategy strategy = Strategy::Invalid; // Effective chunk size encoded as log2(bytes). Also set for whole-file for telemetry consistency. uint8_t chunkSizeLog2 = SFileIOPolicy::MIN_CHUNK_SIZE_LOG2; - // Human-readable resolver reason used in logs and diagnostics. + // Resolver reason string used in logs and diagnostics. const char* reason = "invalid"; inline constexpr bool isValid() const diff --git a/include/nbl/asset/interchange/SGeometryContentHashCommon.h b/include/nbl/asset/interchange/SGeometryContentHashCommon.h index f25f8fede4..5eba269949 100644 --- a/include/nbl/asset/interchange/SGeometryContentHashCommon.h +++ b/include/nbl/asset/interchange/SGeometryContentHashCommon.h @@ -34,7 +34,9 @@ class SPolygonGeometryContentHash buffer->setContentHash(IPreHashed::INVALID_HASH); } - static inline core::blake3_hash_t computeHash(const ICPUPolygonGeometry* geometry) + // Composes a geometry hash from indexing metadata and the current content hashes of referenced buffers. + // It does not compute missing buffer content hashes. Any buffer without a content hash contributes INVALID_HASH. + static inline core::blake3_hash_t composeHashFromBufferContentHashes(const ICPUPolygonGeometry* geometry) { if (!geometry) return IPreHashed::INVALID_HASH; @@ -57,13 +59,13 @@ class SPolygonGeometryContentHash static inline core::blake3_hash_t computeMissing(ICPUPolygonGeometry* geometry, const SFileIOPolicy& ioPolicy) { CPolygonGeometryManipulator::computeMissingContentHashesParallel(geometry, ioPolicy); - return computeHash(geometry); + return composeHashFromBufferContentHashes(geometry); } static inline core::blake3_hash_t recompute(ICPUPolygonGeometry* geometry, const SFileIOPolicy& ioPolicy) { CPolygonGeometryManipulator::recomputeContentHashesParallel(geometry, ioPolicy); - return computeHash(geometry); + return composeHashFromBufferContentHashes(geometry); } }; diff --git a/include/nbl/asset/interchange/SGeometryLoaderCommon.h b/include/nbl/asset/interchange/SGeometryLoaderCommon.h index 94cee6dd1f..0ebbfe3b74 100644 --- a/include/nbl/asset/interchange/SGeometryLoaderCommon.h +++ b/include/nbl/asset/interchange/SGeometryLoaderCommon.h @@ -5,12 +5,34 @@ #define _NBL_ASSET_S_GEOMETRY_LOADER_COMMON_H_INCLUDED_ +#include +#include +#include +#include + #include "nbl/asset/ICPUPolygonGeometry.h" namespace nbl::asset { +namespace impl +{ + +// Owns contiguous storage that can be adopted by the buffer. Views like std::span are rejected. +template +concept AdoptedViewStorage = + std::ranges::contiguous_range> && + std::ranges::sized_range> && + (!std::ranges::view>) && + requires(std::remove_reference_t& storage) + { + typename std::ranges::range_value_t>; + { std::ranges::data(storage) } -> std::same_as>*>; + }; + +} + class SGeometryLoaderCommon { public: @@ -33,20 +55,22 @@ class SGeometryLoaderCommon }; } - template - static inline IGeometry::SDataView createAdoptedView(core::vector&& data) + template + static inline IGeometry::SDataView createAdoptedView(Storage&& data) { - if (data.empty()) + using storage_t = std::remove_cvref_t; + using value_t = std::ranges::range_value_t; + + if (std::ranges::empty(data)) return {}; - auto backer = core::make_smart_refctd_ptr>>(std::move(data)); + auto backer = core::make_smart_refctd_ptr>(std::forward(data)); auto& storage = backer->getBacker(); - const size_t byteCount = storage.size() * sizeof(ValueType); - auto* const ptr = storage.data(); + const size_t byteCount = std::ranges::size(storage) * sizeof(value_t); auto buffer = ICPUBuffer::create( - { { byteCount }, ptr, core::smart_refctd_ptr(std::move(backer)), alignof(ValueType) }, + { { byteCount }, std::ranges::data(storage), core::smart_refctd_ptr(std::move(backer)), alignof(value_t) }, core::adopt_memory); - return createDataView(std::move(buffer), byteCount, static_cast(sizeof(ValueType)), Format); + return createDataView(std::move(buffer), byteCount, static_cast(sizeof(value_t)), Format); } }; diff --git a/include/nbl/asset/interchange/SGeometryWriterCommon.h b/include/nbl/asset/interchange/SGeometryWriterCommon.h index 5497a892e1..3149ff14ae 100644 --- a/include/nbl/asset/interchange/SGeometryWriterCommon.h +++ b/include/nbl/asset/interchange/SGeometryWriterCommon.h @@ -5,8 +5,11 @@ #define _NBL_ASSET_S_GEOMETRY_WRITER_COMMON_H_INCLUDED_ +#include #include "nbl/asset/ICPUScene.h" +#include "nbl/asset/ICPUGeometryCollection.h" #include "nbl/asset/ICPUPolygonGeometry.h" +#include "nbl/builtin/hlsl/math/linalg/fast_affine.hlsl" #include #include @@ -16,40 +19,101 @@ namespace nbl::asset { +namespace impl +{ + +inline hlsl::float32_t3x4 identityAffineTransform() +{ + return hlsl::math::linalg::diagonal(1.f); +} + +template concept PolygonGeometryWriteItemContainer = requires(Container& c, const ICPUPolygonGeometry* geometry, const hlsl::float32_t3x4 transform, const uint32_t instanceIx, const uint32_t targetIx, const uint32_t geometryIx) { c.emplace_back(geometry, transform, instanceIx, targetIx, geometryIx); }; + +template +static inline void appendPolygonGeometryWriteItemsFromCollection(Container& out, const ICPUGeometryCollection* collection, const hlsl::float32_t3x4& parentTransform, const uint32_t instanceIx, const uint32_t targetIx) +{ + if (!collection) + return; + + const auto identity = identityAffineTransform(); + const auto& geometries = collection->getGeometries(); + for (uint32_t geometryIx = 0u; geometryIx < geometries.size(); ++geometryIx) + { + const auto& ref = geometries[geometryIx]; + if (!ref.geometry || ref.geometry->getPrimitiveType() != IGeometryBase::EPrimitiveType::Polygon) + continue; + const auto* geometry = static_cast(ref.geometry.get()); + const auto localTransform = ref.hasTransform() ? ref.transform : identity; + out.emplace_back(geometry, hlsl::math::linalg::promoted_mul(parentTransform, localTransform), instanceIx, targetIx, geometryIx); + } +} + +} + class SGeometryWriterCommon { public: - static inline const ICPUPolygonGeometry* resolvePolygonGeometry(const IAsset* rootAsset) + struct SPolygonGeometryWriteItem { + inline SPolygonGeometryWriteItem(const ICPUPolygonGeometry* _geometry, const hlsl::float32_t3x4& _transform, const uint32_t _instanceIx, const uint32_t _targetIx, const uint32_t _geometryIx) : geometry(_geometry), transform(_transform), instanceIx(_instanceIx), targetIx(_targetIx), geometryIx(_geometryIx) {} + + const ICPUPolygonGeometry* geometry = nullptr; + hlsl::float32_t3x4 transform = impl::identityAffineTransform(); + uint32_t instanceIx = ~0u; + uint32_t targetIx = ~0u; + uint32_t geometryIx = 0u; + }; + + template> requires impl::PolygonGeometryWriteItemContainer + static inline Container collectPolygonGeometryWriteItems(const IAsset* rootAsset) + { + Container out = {}; if (!rootAsset) - return nullptr; + return out; - if (const auto* geometry = IAsset::castDown(rootAsset)) - return geometry; + const auto identity = impl::identityAffineTransform(); + if (rootAsset->getAssetType() == IAsset::ET_GEOMETRY) + { + const auto* geometry = static_cast*>(rootAsset); + if (geometry->getPrimitiveType() == IGeometryBase::EPrimitiveType::Polygon) + out.emplace_back(static_cast(rootAsset), identity, ~0u, ~0u, 0u); + return out; + } - const auto* scene = IAsset::castDown(rootAsset); - if (!scene) - return nullptr; + if (rootAsset->getAssetType() == IAsset::ET_GEOMETRY_COLLECTION) + { + impl::appendPolygonGeometryWriteItemsFromCollection(out, static_cast(rootAsset), identity, ~0u, ~0u); + return out; + } + + if (rootAsset->getAssetType() != IAsset::ET_SCENE) + return out; + const auto* scene = static_cast(rootAsset); - for (const auto& morphTargetsRef : scene->getInstances().getMorphTargets()) + const auto& instances = scene->getInstances(); + const auto& morphTargets = instances.getMorphTargets(); + const auto& initialTransforms = instances.getInitialTransforms(); + for (uint32_t instanceIx = 0u; instanceIx < morphTargets.size(); ++instanceIx) { - const auto* morphTargets = morphTargetsRef.get(); - if (!morphTargets) + const auto* targets = morphTargets[instanceIx].get(); + if (!targets) continue; - for (const auto& target : morphTargets->getTargets()) - { - const auto* const collection = target.geoCollection.get(); - if (!collection) - continue; - for (const auto& geoRef : collection->getGeometries()) - { - if (const auto* geometry = IAsset::castDown(geoRef.geometry.get())) - return geometry; - } - } + + const auto instanceTransform = initialTransforms.empty() ? identity : initialTransforms[instanceIx]; + const auto& targetList = targets->getTargets(); + for (uint32_t targetIx = 0u; targetIx < targetList.size(); ++targetIx) + impl::appendPolygonGeometryWriteItemsFromCollection(out, targetList[targetIx].geoCollection.get(), instanceTransform, instanceIx, targetIx); } - return nullptr; + return out; + } + + static inline bool isIdentityTransform(const hlsl::float32_t3x4& transform) + { + return + transform[0].x == 1.f && transform[0].y == 0.f && transform[0].z == 0.f && transform[0].w == 0.f && + transform[1].x == 0.f && transform[1].y == 1.f && transform[1].z == 0.f && transform[1].w == 0.f && + transform[2].x == 0.f && transform[2].y == 0.f && transform[2].z == 1.f && transform[2].w == 0.f; } static inline const ICPUPolygonGeometry::SDataView* findFirstAuxViewByChannelCount(const ICPUPolygonGeometry* geom, const uint32_t channels, const size_t requiredElementCount = 0ull) diff --git a/src/nbl/asset/interchange/COBJMeshFileLoader.cpp b/src/nbl/asset/interchange/COBJMeshFileLoader.cpp index 9a38d635da..08b2f3402b 100644 --- a/src/nbl/asset/interchange/COBJMeshFileLoader.cpp +++ b/src/nbl/asset/interchange/COBJMeshFileLoader.cpp @@ -576,7 +576,7 @@ asset::SAssetBundle COBJMeshFileLoader::loadAsset(system::IFile* _file, const as const size_t outVertexCount = outPositions.size(); auto geometry = core::make_smart_refctd_ptr(); { - auto view = SGeometryLoaderCommon::createAdoptedView(std::move(outPositions)); + auto view = SGeometryLoaderCommon::createAdoptedView(std::move(outPositions)); if (!view) return false; geometry->setPositionView(std::move(view)); @@ -585,7 +585,7 @@ asset::SAssetBundle COBJMeshFileLoader::loadAsset(system::IFile* _file, const as const bool hasNormals = hasProvidedNormals || needsNormalGeneration; if (hasNormals) { - auto view = SGeometryLoaderCommon::createAdoptedView(std::move(outNormals)); + auto view = SGeometryLoaderCommon::createAdoptedView(std::move(outNormals)); if (!view) return false; geometry->setNormalView(std::move(view)); @@ -593,7 +593,7 @@ asset::SAssetBundle COBJMeshFileLoader::loadAsset(system::IFile* _file, const as if (hasUVs) { - auto view = SGeometryLoaderCommon::createAdoptedView(std::move(outUVs)); + auto view = SGeometryLoaderCommon::createAdoptedView(std::move(outUVs)); if (!view) return false; geometry->getAuxAttributeViews()->push_back(std::move(view)); @@ -607,14 +607,14 @@ asset::SAssetBundle COBJMeshFileLoader::loadAsset(system::IFile* _file, const as core::vector indices16(indices.size()); for (size_t i = 0u; i < indices.size(); ++i) indices16[i] = static_cast(indices[i]); - auto view = SGeometryLoaderCommon::createAdoptedView(std::move(indices16)); + auto view = SGeometryLoaderCommon::createAdoptedView(std::move(indices16)); if (!view) return false; geometry->setIndexView(std::move(view)); } else { - auto view = SGeometryLoaderCommon::createAdoptedView(std::move(indices)); + auto view = SGeometryLoaderCommon::createAdoptedView(std::move(indices)); if (!view) return false; geometry->setIndexView(std::move(view)); @@ -1081,9 +1081,13 @@ asset::SAssetBundle COBJMeshFileLoader::loadAsset(system::IFile* _file, const as static_cast(ioPlan.chunkSizeBytes()), ioPlan.reason); + // Plain OBJ is still just one polygon geometry here. return SAssetBundle(core::smart_refctd_ptr(), { core::smart_refctd_ptr_static_cast(std::move(loadedGeometries.front().geometry)) }); } + // Plain OBJ can group many polygon geometries with `o` and `g`, but it still does not define + // a real scene graph, instancing, or node transforms. Keep that as geometry collections instead + // of fabricating an ICPUScene on load. core::vector objectNames; core::vector> objectCollections; for (auto& loaded : loadedGeometries) diff --git a/src/nbl/asset/interchange/COBJMeshFileLoader.h b/src/nbl/asset/interchange/COBJMeshFileLoader.h index d0b169984b..68161da937 100644 --- a/src/nbl/asset/interchange/COBJMeshFileLoader.h +++ b/src/nbl/asset/interchange/COBJMeshFileLoader.h @@ -10,7 +10,16 @@ namespace nbl::asset { -//! Meshloader capable of loading obj meshes. +/* + Loads plain OBJ as polygon geometry or geometry collections. + Multiple `o` and `g` blocks mean multiple geometry pieces in one file, not a real scene. + This loader keeps that split as geometry collections because plain OBJ does not define scene hierarchy, instancing, or node transforms. + OBJ/MTL material data also belongs here and remains TODO, but that still does not turn plain OBJ into a scene format. + A single mesh payload can therefore load as one geometry, while multiple split pieces still load as geometry collections instead of a synthetic scene. + References: + - https://www.loc.gov/preservation/digital/formats/fdd/fdd000507 + - https://www.fileformat.info/format/wavefrontobj/egff.htm +*/ class COBJMeshFileLoader : public IGeometryLoader { public: diff --git a/src/nbl/asset/interchange/COBJMeshWriter.cpp b/src/nbl/asset/interchange/COBJMeshWriter.cpp index 492ecd7747..60a4dd2c5d 100644 --- a/src/nbl/asset/interchange/COBJMeshWriter.cpp +++ b/src/nbl/asset/interchange/COBJMeshWriter.cpp @@ -29,7 +29,8 @@ COBJMeshWriter::COBJMeshWriter() uint64_t COBJMeshWriter::getSupportedAssetTypesBitfield() const { - return IAsset::ET_GEOMETRY | IAsset::ET_SCENE; + // OBJ can store a single geometry, a geometry collection, or a flattened scene export. + return IAsset::ET_GEOMETRY | IAsset::ET_GEOMETRY_COLLECTION | IAsset::ET_SCENE; } const char** COBJMeshWriter::getAssociatedFileExtensions() const @@ -136,7 +137,7 @@ void appendFaceLine(std::string& out, const std::string& storage, const core::ve *(cursor++) = '\n'; } -void appendIndexTokenToStorage(std::string& storage, core::vector& refs, const uint32_t objIx, const bool hasUVs, const bool hasNormals) +void appendIndexTokenToStorage(std::string& storage, core::vector& refs, const uint32_t positionIx, const bool hasUVs, const uint32_t uvIx, const bool hasNormals, const uint32_t normalIx) { SIndexStringRef ref = {}; ref.offset = static_cast(storage.size()); @@ -146,21 +147,21 @@ void appendIndexTokenToStorage(std::string& storage, core::vector(cursor - token)); } @@ -176,6 +177,54 @@ void appendIndexTokenToStorage(std::string& storage, core::vector normalTransform; +}; + +inline SGeometryTransformState createTransformState(const hlsl::float32_t3x4& transform) +{ + const auto linear = hlsl::float32_t3x3(transform); + return { + .transform = transform, + .linear = linear, + .identity = SGeometryWriterCommon::isIdentityTransform(transform), + .reverseWinding = hlsl::determinant(linear) < 0.f, + .normalTransform = hlsl::math::linalg::cofactors_base::create(linear) + }; +} + +inline hlsl::float32_t3 applyPositionTransform(const SGeometryTransformState& state, const hlsl::float32_t3& value) +{ + if (state.identity) + return value; + + return hlsl::float32_t3( + state.transform[0].x * value.x + state.transform[0].y * value.y + state.transform[0].z * value.z + state.transform[0].w, + state.transform[1].x * value.x + state.transform[1].y * value.y + state.transform[1].z * value.z + state.transform[1].w, + state.transform[2].x * value.x + state.transform[2].y * value.y + state.transform[2].z * value.z + state.transform[2].w + ); +} + +inline hlsl::float32_t3 applyNormalTransform(const SGeometryTransformState& state, const hlsl::float32_t3& value) +{ + return state.identity ? value : state.normalTransform.normalTransform(value); +} + } // namespace obj_writer_detail bool COBJMeshWriter::writeAsset(system::IFile* _file, const SAssetWriteParams& _params, IAssetWriterOverride* _override) @@ -189,140 +238,168 @@ bool COBJMeshWriter::writeAsset(system::IFile* _file, const SAssetWriteParams& _ if (!_file || !_params.rootAsset) return false; - const auto* geom = SGeometryWriterCommon::resolvePolygonGeometry(_params.rootAsset); - if (!geom || !geom->valid()) + // Scene input is flattened here by baking transforms and writing every collected + // polygon geometry as its own OBJ object block. + const auto items = SGeometryWriterCommon::collectPolygonGeometryWriteItems(_params.rootAsset); + if (items.empty()) return false; SAssetWriteContext ctx = { _params, _file }; - system::IFile* file = _override->getOutputFile(_file, ctx, { geom, 0u }); + system::IFile* file = _override->getOutputFile(_file, ctx, { _params.rootAsset, 0u }); if (!file) return false; - const auto& positionView = geom->getPositionView(); - if (!positionView) - return false; - - const auto& normalView = geom->getNormalView(); - const bool hasNormals = static_cast(normalView); - - const ICPUPolygonGeometry::SDataView* uvView = SGeometryWriterCommon::findFirstAuxViewByChannelCount(geom, 2u); - const bool hasUVs = uvView != nullptr; - - const size_t vertexCount = positionView.getElementCount(); - if (vertexCount == 0) - return false; - if (hasNormals && normalView.getElementCount() != vertexCount) - return false; - if (hasUVs && uvView->getElementCount() != vertexCount) - return false; + std::string output; + output.append("# Nabla OBJ\n"); + uint64_t totalVertexCount = 0ull; + uint64_t totalFaceCount = 0ull; + uint32_t positionBase = 1u; + uint32_t uvBase = 1u; + uint32_t normalBase = 1u; + hlsl::float64_t4 tmp = {}; + for (size_t itemIx = 0u; itemIx < items.size(); ++itemIx) + { + const auto& item = items[itemIx]; + const auto* geom = item.geometry; + if (!geom || !geom->valid()) + return false; - const auto* indexing = geom->getIndexingCallback(); - if (!indexing) - return false; - if (indexing->knownTopology() != E_PRIMITIVE_TOPOLOGY::EPT_TRIANGLE_LIST) - return false; + const auto& positionView = geom->getPositionView(); + if (!positionView) + return false; - core::vector indexData; - const uint32_t* indices = nullptr; - size_t faceCount = 0; - if (!SGeometryWriterCommon::decodeTriangleIndices(geom, indexData, indices, faceCount)) - return false; + const auto& normalView = geom->getNormalView(); + const bool hasNormals = static_cast(normalView); + const ICPUPolygonGeometry::SDataView* uvView = SGeometryWriterCommon::findFirstAuxViewByChannelCount(geom, 2u); + const bool hasUVs = uvView != nullptr; + const size_t vertexCount = positionView.getElementCount(); + if (vertexCount == 0ull) + return false; + if (hasNormals && normalView.getElementCount() != vertexCount) + return false; + if (hasUVs && uvView->getElementCount() != vertexCount) + return false; - const auto flags = _override->getAssetWritingFlags(ctx, geom, 0u); - const bool flipHandedness = !flags.hasAnyFlag(E_WRITER_FLAGS::EWF_MESH_IS_RIGHT_HANDED); - std::string output; - output.reserve(vertexCount * ApproxObjBytesPerVertex + faceCount * ApproxObjBytesPerFace); + const auto* indexing = geom->getIndexingCallback(); + if (!indexing) + return false; + if (indexing->knownTopology() != E_PRIMITIVE_TOPOLOGY::EPT_TRIANGLE_LIST) + return false; - output.append("# Nabla OBJ\n"); + core::vector indexData; + const uint32_t* indices = nullptr; + size_t faceCount = 0ull; + if (!SGeometryWriterCommon::decodeTriangleIndices(geom, indexData, indices, faceCount)) + return false; - hlsl::float64_t4 tmp = {}; - const hlsl::float32_t3* const tightPositions = SGeometryWriterCommon::getTightView(positionView); - const hlsl::float32_t3* const tightNormals = hasNormals ? SGeometryWriterCommon::getTightView(normalView) : nullptr; - const hlsl::float32_t2* const tightUV = hasUVs ? SGeometryWriterCommon::getTightView(*uvView) : nullptr; - for (size_t i = 0u; i < vertexCount; ++i) - { - hlsl::float32_t3 vertex = {}; - if (tightPositions) - { - vertex = tightPositions[i]; - } - else - { - if (!decodeVec4(positionView, i, tmp)) - return false; - vertex = hlsl::float32_t3(static_cast(tmp.x), static_cast(tmp.y), static_cast(tmp.z)); - } - if (flipHandedness) - vertex.x = -vertex.x; + const auto flags = _override->getAssetWritingFlags(ctx, geom, 0u); + const bool flipHandedness = !flags.hasAnyFlag(E_WRITER_FLAGS::EWF_MESH_IS_RIGHT_HANDED); + const auto transformState = createTransformState(item.transform); + const hlsl::float32_t3* const tightPositions = SGeometryWriterCommon::getTightView(positionView); + const hlsl::float32_t3* const tightNormals = hasNormals ? SGeometryWriterCommon::getTightView(normalView) : nullptr; + const hlsl::float32_t2* const tightUV = hasUVs ? SGeometryWriterCommon::getTightView(*uvView) : nullptr; - appendVec3Line(output, "v ", sizeof("v ") - 1ull, vertex); - } + if (itemIx != 0u) + output.push_back('\n'); + appendObjectHeader(output, item); - if (hasUVs) - { for (size_t i = 0u; i < vertexCount; ++i) { - hlsl::float32_t2 uv = {}; - if (tightUV) + hlsl::float32_t3 vertex = {}; + if (tightPositions) { - uv = hlsl::float32_t2(tightUV[i].x, 1.f - tightUV[i].y); + vertex = tightPositions[i]; } else { - if (!decodeVec4(*uvView, i, tmp)) + if (!decodeVec4(positionView, i, tmp)) return false; - uv = hlsl::float32_t2(static_cast(tmp.x), 1.f - static_cast(tmp.y)); + vertex = hlsl::float32_t3(static_cast(tmp.x), static_cast(tmp.y), static_cast(tmp.z)); } + vertex = applyPositionTransform(transformState, vertex); + if (flipHandedness) + vertex.x = -vertex.x; - appendVec2Line(output, "vt ", sizeof("vt ") - 1ull, uv); + appendVec3Line(output, "v ", sizeof("v ") - 1ull, vertex); } - } - if (hasNormals) - { - for (size_t i = 0u; i < vertexCount; ++i) + if (hasUVs) { - hlsl::float32_t3 normal = {}; - if (tightNormals) + for (size_t i = 0u; i < vertexCount; ++i) { - normal = tightNormals[i]; + hlsl::float32_t2 uv = {}; + if (tightUV) + { + uv = hlsl::float32_t2(tightUV[i].x, 1.f - tightUV[i].y); + } + else + { + if (!decodeVec4(*uvView, i, tmp)) + return false; + uv = hlsl::float32_t2(static_cast(tmp.x), 1.f - static_cast(tmp.y)); + } + + appendVec2Line(output, "vt ", sizeof("vt ") - 1ull, uv); } - else + } + + if (hasNormals) + { + for (size_t i = 0u; i < vertexCount; ++i) { - if (!decodeVec4(normalView, i, tmp)) - return false; - normal = hlsl::float32_t3(static_cast(tmp.x), static_cast(tmp.y), static_cast(tmp.z)); + hlsl::float32_t3 normal = {}; + if (tightNormals) + { + normal = tightNormals[i]; + } + else + { + if (!decodeVec4(normalView, i, tmp)) + return false; + normal = hlsl::float32_t3(static_cast(tmp.x), static_cast(tmp.y), static_cast(tmp.z)); + } + normal = applyNormalTransform(transformState, normal); + if (flipHandedness) + normal.x = -normal.x; + + appendVec3Line(output, "vn ", sizeof("vn ") - 1ull, normal); } - if (flipHandedness) - normal.x = -normal.x; + } - appendVec3Line(output, "vn ", sizeof("vn ") - 1ull, normal); + core::vector faceIndexRefs; + faceIndexRefs.reserve(vertexCount); + std::string faceIndexStorage; + faceIndexStorage.reserve(vertexCount * 24ull); + for (size_t i = 0u; i < vertexCount; ++i) + { + const uint32_t positionIx = positionBase + static_cast(i); + const uint32_t uvIx = hasUVs ? (uvBase + static_cast(i)) : 0u; + const uint32_t normalIx = hasNormals ? (normalBase + static_cast(i)) : 0u; + appendIndexTokenToStorage(faceIndexStorage, faceIndexRefs, positionIx, hasUVs, uvIx, hasNormals, normalIx); } - } - core::vector faceIndexRefs; - faceIndexRefs.reserve(vertexCount); - std::string faceIndexStorage; - faceIndexStorage.reserve(vertexCount * 24ull); - for (size_t i = 0u; i < vertexCount; ++i) - { - const uint32_t objIx = static_cast(i + 1u); - appendIndexTokenToStorage(faceIndexStorage, faceIndexRefs, objIx, hasUVs, hasNormals); - } + for (size_t i = 0u; i < faceCount; ++i) + { + const uint32_t i0 = indices[i * 3u + 0u]; + const uint32_t i1 = indices[i * 3u + 1u]; + const uint32_t i2 = indices[i * 3u + 2u]; + + const uint32_t f0 = transformState.reverseWinding ? i0 : i2; + const uint32_t f1 = i1; + const uint32_t f2 = transformState.reverseWinding ? i2 : i0; + if (f0 >= faceIndexRefs.size() || f1 >= faceIndexRefs.size() || f2 >= faceIndexRefs.size()) + return false; - for (size_t i = 0u; i < faceCount; ++i) - { - const uint32_t i0 = indices[i * 3u + 0u]; - const uint32_t i1 = indices[i * 3u + 1u]; - const uint32_t i2 = indices[i * 3u + 2u]; - - const uint32_t f0 = i2; - const uint32_t f1 = i1; - const uint32_t f2 = i0; - if (f0 >= faceIndexRefs.size() || f1 >= faceIndexRefs.size() || f2 >= faceIndexRefs.size()) - return false; + appendFaceLine(output, faceIndexStorage, faceIndexRefs, f0, f1, f2); + } - appendFaceLine(output, faceIndexStorage, faceIndexRefs, f0, f1, f2); + positionBase += static_cast(vertexCount); + if (hasUVs) + uvBase += static_cast(vertexCount); + if (hasNormals) + normalBase += static_cast(vertexCount); + totalVertexCount += vertexCount; + totalFaceCount += faceCount; } const bool fileMappable = core::bitflag(file->getFlags()).hasAnyFlag(system::IFile::ECF_MAPPABLE); @@ -347,12 +424,13 @@ bool COBJMeshWriter::writeAsset(system::IFile* _file, const SAssetWriteParams& _ static_cast(ioAvgWrite)); } _params.logger.log( - "OBJ writer stats: file=%s bytes=%llu vertices=%llu faces=%llu io_writes=%llu io_min_write=%llu io_avg_write=%llu io_req=%s io_eff=%s io_chunk=%llu io_reason=%s", + "OBJ writer stats: file=%s bytes=%llu vertices=%llu faces=%llu geometries=%llu io_writes=%llu io_min_write=%llu io_avg_write=%llu io_req=%s io_eff=%s io_chunk=%llu io_reason=%s", system::ILogger::ELL_PERFORMANCE, file->getFileName().string().c_str(), static_cast(output.size()), - static_cast(vertexCount), - static_cast(faceCount), + static_cast(totalVertexCount), + static_cast(totalFaceCount), + static_cast(items.size()), static_cast(ioTelemetry.callCount), static_cast(ioMinWrite), static_cast(ioAvgWrite), diff --git a/src/nbl/asset/interchange/CPLYMeshFileLoader.cpp b/src/nbl/asset/interchange/CPLYMeshFileLoader.cpp index c969a7c97b..b102e08985 100644 --- a/src/nbl/asset/interchange/CPLYMeshFileLoader.cpp +++ b/src/nbl/asset/interchange/CPLYMeshFileLoader.cpp @@ -2114,7 +2114,7 @@ SAssetBundle CPLYMeshFileLoader::loadAsset(system::IFile* _file, const IAssetLoa core::vector indices16(indices.size()); for (size_t i = 0u; i < indices.size(); ++i) indices16[i] = static_cast(indices[i]); - auto view = SGeometryLoaderCommon::createAdoptedView(std::move(indices16)); + auto view = SGeometryLoaderCommon::createAdoptedView(std::move(indices16)); if (!view) return {}; geometry->setIndexView(std::move(view)); @@ -2122,7 +2122,7 @@ SAssetBundle CPLYMeshFileLoader::loadAsset(system::IFile* _file, const IAssetLoa } else { - auto view = SGeometryLoaderCommon::createAdoptedView(std::move(indices)); + auto view = SGeometryLoaderCommon::createAdoptedView(std::move(indices)); if (!view) return {}; if (precomputedIndexHash != IPreHashed::INVALID_HASH) diff --git a/src/nbl/asset/interchange/CPLYMeshWriter.cpp b/src/nbl/asset/interchange/CPLYMeshWriter.cpp index efcae14c6e..d2a7a951a4 100644 --- a/src/nbl/asset/interchange/CPLYMeshWriter.cpp +++ b/src/nbl/asset/interchange/CPLYMeshWriter.cpp @@ -454,12 +454,24 @@ bool CPLYMeshWriter::writeAsset(system::IFile* _file, const SAssetWriteParams& _ return false; } - const auto* geom = SGeometryWriterCommon::resolvePolygonGeometry(_params.rootAsset); + const auto items = SGeometryWriterCommon::collectPolygonGeometryWriteItems(_params.rootAsset); + if (items.size() != 1u) + { + _params.logger.log("PLY writer: expected exactly one polygon geometry to write.", system::ILogger::ELL_ERROR); + return false; + } + const auto& item = items.front(); + const auto* geom = item.geometry; if (!geom || !geom->valid()) { _params.logger.log("PLY writer: root asset is not a valid polygon geometry.", system::ILogger::ELL_ERROR); return false; } + if (!SGeometryWriterCommon::isIdentityTransform(item.transform)) + { + _params.logger.log("PLY writer: transformed scene or collection export is not supported.", system::ILogger::ELL_ERROR); + return false; + } SAssetWriteContext ctx = { _params, _file }; system::IFile* file = _override->getOutputFile(_file, ctx, { geom, 0u }); diff --git a/src/nbl/asset/interchange/CSTLMeshFileLoader.cpp b/src/nbl/asset/interchange/CSTLMeshFileLoader.cpp index 2f754d5b9a..fe5836038d 100644 --- a/src/nbl/asset/interchange/CSTLMeshFileLoader.cpp +++ b/src/nbl/asset/interchange/CSTLMeshFileLoader.cpp @@ -652,7 +652,7 @@ SAssetBundle CSTLMeshFileLoader::loadAsset(system::IFile* _file, const IAssetLoa vertexColors[baseIx + 1ull] = triColor; vertexColors[baseIx + 2ull] = triColor; } - auto colorView = SGeometryLoaderCommon::createAdoptedView(std::move(vertexColors)); + auto colorView = SGeometryLoaderCommon::createAdoptedView(std::move(vertexColors)); if (!colorView) return {}; geometry->getAuxAttributeViews()->push_back(std::move(colorView)); @@ -742,8 +742,8 @@ SAssetBundle CSTLMeshFileLoader::loadAsset(system::IFile* _file, const IAssetLoa triangleCount = positions.size() / SSTLContext::VerticesPerTriangle; vertexCount = positions.size(); - auto posView = SGeometryLoaderCommon::createAdoptedView(std::move(positions)); - auto normalView = SGeometryLoaderCommon::createAdoptedView(std::move(normals)); + auto posView = SGeometryLoaderCommon::createAdoptedView(std::move(positions)); + auto normalView = SGeometryLoaderCommon::createAdoptedView(std::move(normals)); if (!posView || !normalView) return {}; geometry->setPositionView(std::move(posView)); From 234afe12cc8da93fe1118dfc8394666613f0942a Mon Sep 17 00:00:00 2001 From: Arkadiusz Lachowicz Date: Fri, 6 Mar 2026 21:29:37 +0100 Subject: [PATCH 054/118] Simplify identity transform checks --- .../asset/interchange/SGeometryWriterCommon.h | 17 ++++------------- 1 file changed, 4 insertions(+), 13 deletions(-) diff --git a/include/nbl/asset/interchange/SGeometryWriterCommon.h b/include/nbl/asset/interchange/SGeometryWriterCommon.h index 3149ff14ae..441b1c4991 100644 --- a/include/nbl/asset/interchange/SGeometryWriterCommon.h +++ b/include/nbl/asset/interchange/SGeometryWriterCommon.h @@ -21,12 +21,6 @@ namespace nbl::asset namespace impl { - -inline hlsl::float32_t3x4 identityAffineTransform() -{ - return hlsl::math::linalg::diagonal(1.f); -} - template concept PolygonGeometryWriteItemContainer = requires(Container& c, const ICPUPolygonGeometry* geometry, const hlsl::float32_t3x4 transform, const uint32_t instanceIx, const uint32_t targetIx, const uint32_t geometryIx) { c.emplace_back(geometry, transform, instanceIx, targetIx, geometryIx); }; template @@ -35,7 +29,7 @@ static inline void appendPolygonGeometryWriteItemsFromCollection(Container& out, if (!collection) return; - const auto identity = identityAffineTransform(); + const auto identity = hlsl::math::linalg::identity(); const auto& geometries = collection->getGeometries(); for (uint32_t geometryIx = 0u; geometryIx < geometries.size(); ++geometryIx) { @@ -58,7 +52,7 @@ class SGeometryWriterCommon inline SPolygonGeometryWriteItem(const ICPUPolygonGeometry* _geometry, const hlsl::float32_t3x4& _transform, const uint32_t _instanceIx, const uint32_t _targetIx, const uint32_t _geometryIx) : geometry(_geometry), transform(_transform), instanceIx(_instanceIx), targetIx(_targetIx), geometryIx(_geometryIx) {} const ICPUPolygonGeometry* geometry = nullptr; - hlsl::float32_t3x4 transform = impl::identityAffineTransform(); + hlsl::float32_t3x4 transform = hlsl::math::linalg::identity(); uint32_t instanceIx = ~0u; uint32_t targetIx = ~0u; uint32_t geometryIx = 0u; @@ -71,7 +65,7 @@ class SGeometryWriterCommon if (!rootAsset) return out; - const auto identity = impl::identityAffineTransform(); + const auto identity = hlsl::math::linalg::identity(); if (rootAsset->getAssetType() == IAsset::ET_GEOMETRY) { const auto* geometry = static_cast*>(rootAsset); @@ -110,10 +104,7 @@ class SGeometryWriterCommon static inline bool isIdentityTransform(const hlsl::float32_t3x4& transform) { - return - transform[0].x == 1.f && transform[0].y == 0.f && transform[0].z == 0.f && transform[0].w == 0.f && - transform[1].x == 0.f && transform[1].y == 1.f && transform[1].z == 0.f && transform[1].w == 0.f && - transform[2].x == 0.f && transform[2].y == 0.f && transform[2].z == 1.f && transform[2].w == 0.f; + return transform == hlsl::math::linalg::identity(); } static inline const ICPUPolygonGeometry::SDataView* findFirstAuxViewByChannelCount(const ICPUPolygonGeometry* geom, const uint32_t channels, const size_t requiredElementCount = 0ull) From c30ac398a11b003f483afa0a5fd966b8d06b0e33 Mon Sep 17 00:00:00 2001 From: Arkadiusz Lachowicz Date: Fri, 6 Mar 2026 21:57:52 +0100 Subject: [PATCH 055/118] Consolidate AABB helpers --- .../asset/utils/CPolygonGeometryManipulator.h | 8 +- include/nbl/asset/utils/SGeometryAABBCommon.h | 86 ------------------- .../builtin/hlsl/math/linalg/transform.hlsl | 2 + .../builtin/hlsl/shapes/AABBAccumulator.hlsl | 46 ++++++++++ .../asset/interchange/COBJMeshFileLoader.cpp | 10 +-- .../asset/interchange/CPLYMeshFileLoader.cpp | 12 +-- .../asset/interchange/CSTLMeshFileLoader.cpp | 14 +-- 7 files changed, 70 insertions(+), 108 deletions(-) delete mode 100644 include/nbl/asset/utils/SGeometryAABBCommon.h diff --git a/include/nbl/asset/utils/CPolygonGeometryManipulator.h b/include/nbl/asset/utils/CPolygonGeometryManipulator.h index 1490725a9a..7163cbfbce 100644 --- a/include/nbl/asset/utils/CPolygonGeometryManipulator.h +++ b/include/nbl/asset/utils/CPolygonGeometryManipulator.h @@ -12,7 +12,7 @@ #include "nbl/asset/utils/CGeometryManipulator.h" #include "nbl/asset/utils/CSmoothNormalGenerator.h" #include "nbl/asset/utils/COBBGenerator.h" -#include "nbl/asset/utils/SGeometryAABBCommon.h" +#include "nbl/builtin/hlsl/shapes/AABBAccumulator.hlsl" #include "nbl/builtin/hlsl/shapes/obb.hlsl" namespace nbl::asset @@ -103,12 +103,12 @@ class NBL_API2 CPolygonGeometryManipulator using aabb_t = std::remove_reference_t; using point_t = typename aabb_t::point_t; using component_t = std::remove_cv_t>; - SAABBAccumulator3 parsedAABB = createAABBAccumulator(); + hlsl::shapes::util::AABBAccumulator3 parsedAABB = hlsl::shapes::util::createAABBAccumulator(); auto addVertexToAABB = [&](const uint32_t vertex_i)->void { point_t pt; geo->getPositionView().decodeElement(vertex_i, pt); - extendAABBAccumulator(parsedAABB, pt); + hlsl::shapes::util::extendAABBAccumulator(parsedAABB, pt); }; if (geo->getIndexView()) { @@ -127,7 +127,7 @@ class NBL_API2 CPolygonGeometryManipulator addVertexToAABB(vertex_i); } } - assignAABBFromAccumulator(aabb, parsedAABB); + hlsl::shapes::util::assignAABBFromAccumulator(aabb, parsedAABB); }; IGeometryBase::SDataViewBase tmp = geo->getPositionView().composed; tmp.resetRange(); diff --git a/include/nbl/asset/utils/SGeometryAABBCommon.h b/include/nbl/asset/utils/SGeometryAABBCommon.h deleted file mode 100644 index 6095b70eee..0000000000 --- a/include/nbl/asset/utils/SGeometryAABBCommon.h +++ /dev/null @@ -1,86 +0,0 @@ -// Copyright (C) 2018-2025 - DevSH Graphics Programming Sp. z O.O. -// This file is part of the "Nabla Engine". -// For conditions of distribution and use, see copyright notice in nabla.h -#ifndef _NBL_ASSET_S_GEOMETRY_AABB_COMMON_H_INCLUDED_ -#define _NBL_ASSET_S_GEOMETRY_AABB_COMMON_H_INCLUDED_ - - -#include "nbl/asset/ICPUPolygonGeometry.h" -#include "nbl/builtin/hlsl/shapes/AABBAccumulator.hlsl" - -#include - - -namespace nbl::asset -{ - -template -using SAABBAccumulator3 = hlsl::shapes::util::AABBAccumulator3; - -template -inline SAABBAccumulator3 createAABBAccumulator() -{ - return SAABBAccumulator3::create(); -} - -template -inline void extendAABBAccumulator(SAABBAccumulator3& aabb, const Scalar x, const Scalar y, const Scalar z) -{ - aabb.addXYZ(x, y, z); -} - -template -inline void extendAABBAccumulator(SAABBAccumulator3& aabb, const Point& point) -{ - typename SAABBAccumulator3::point_t converted; - if constexpr (requires { point.x; point.y; point.z; }) - { - converted.x = static_cast(point.x); - converted.y = static_cast(point.y); - converted.z = static_cast(point.z); - } - else - { - converted.x = static_cast(point[0]); - converted.y = static_cast(point[1]); - converted.z = static_cast(point[2]); - } - aabb.addPoint(converted); -} - -template -inline void assignAABBFromAccumulator(AABB& dst, const SAABBAccumulator3& aabb) -{ - if (aabb.empty()) - return; - - dst = std::remove_reference_t::create(); - if constexpr (requires { dst.minVx.x; dst.minVx.y; dst.minVx.z; dst.maxVx.x; dst.maxVx.y; dst.maxVx.z; }) - { - dst.minVx.x = static_cast(aabb.value.minVx.x); - dst.minVx.y = static_cast(aabb.value.minVx.y); - dst.minVx.z = static_cast(aabb.value.minVx.z); - dst.maxVx.x = static_cast(aabb.value.maxVx.x); - dst.maxVx.y = static_cast(aabb.value.maxVx.y); - dst.maxVx.z = static_cast(aabb.value.maxVx.z); - if constexpr (requires { dst.minVx.w; dst.maxVx.w; }) - { - dst.minVx.w = 0; - dst.maxVx.w = 0; - } - } - else - { - dst.minVx[0] = static_cast(aabb.value.minVx[0]); - dst.minVx[1] = static_cast(aabb.value.minVx[1]); - dst.minVx[2] = static_cast(aabb.value.minVx[2]); - dst.maxVx[0] = static_cast(aabb.value.maxVx[0]); - dst.maxVx[1] = static_cast(aabb.value.maxVx[1]); - dst.maxVx[2] = static_cast(aabb.value.maxVx[2]); - } -} - -} - - -#endif diff --git a/include/nbl/builtin/hlsl/math/linalg/transform.hlsl b/include/nbl/builtin/hlsl/math/linalg/transform.hlsl index 99fdc61dfd..82001770a1 100644 --- a/include/nbl/builtin/hlsl/math/linalg/transform.hlsl +++ b/include/nbl/builtin/hlsl/math/linalg/transform.hlsl @@ -56,6 +56,8 @@ inline matrix rhLookAt( return r; } +// Transforms an AABB by a full affine 3x4 matrix and returns the enclosing AABB. +// This exists because shapes::util::transform(matrix, AABB) applies only the linear part and leaves translation out. template) inline shapes::AABB<3, T> pseudo_mul(NBL_CONST_REF_ARG(matrix) lhs, NBL_CONST_REF_ARG(shapes::AABB<3, T>) rhs) { diff --git a/include/nbl/builtin/hlsl/shapes/AABBAccumulator.hlsl b/include/nbl/builtin/hlsl/shapes/AABBAccumulator.hlsl index 2962298d01..0621db23b4 100644 --- a/include/nbl/builtin/hlsl/shapes/AABBAccumulator.hlsl +++ b/include/nbl/builtin/hlsl/shapes/AABBAccumulator.hlsl @@ -6,6 +6,8 @@ #include "nbl/builtin/hlsl/shapes/aabb.hlsl" +#include "nbl/builtin/hlsl/array_accessors.hlsl" +#include "nbl/builtin/hlsl/concepts/vector.hlsl" namespace nbl @@ -56,6 +58,50 @@ struct AABBAccumulator3 aabb_t value; }; +template +inline AABBAccumulator3 createAABBAccumulator() +{ + return AABBAccumulator3::create(); +} + +template +inline void extendAABBAccumulator(NBL_REF_ARG(AABBAccumulator3) aabb, const Scalar x, const Scalar y, const Scalar z) +{ + aabb.addXYZ(x, y, z); +} + +template && (vector_traits::Dimension >= 3)) +inline void extendAABBAccumulator(NBL_REF_ARG(AABBAccumulator3) aabb, NBL_CONST_REF_ARG(Point) point) +{ + array_get::scalar_type> getter; + typename AABBAccumulator3::point_t converted; + converted.x = Scalar(getter(point, 0)); + converted.y = Scalar(getter(point, 1)); + converted.z = Scalar(getter(point, 2)); + aabb.addPoint(converted); +} + +template= 3) +inline void assignAABBFromAccumulator(NBL_REF_ARG(AABB) dst, NBL_CONST_REF_ARG(AABBAccumulator3) aabb) +{ + if (aabb.empty()) + return; + + dst = AABB::create(); + array_set::point_t, DstScalar> setter; + setter(dst.minVx, 0, DstScalar(aabb.value.minVx.x)); + setter(dst.minVx, 1, DstScalar(aabb.value.minVx.y)); + setter(dst.minVx, 2, DstScalar(aabb.value.minVx.z)); + setter(dst.maxVx, 0, DstScalar(aabb.value.maxVx.x)); + setter(dst.maxVx, 1, DstScalar(aabb.value.maxVx.y)); + setter(dst.maxVx, 2, DstScalar(aabb.value.maxVx.z)); + for (int16_t i = 3; i < D; ++i) + { + setter(dst.minVx, i, DstScalar(0)); + setter(dst.maxVx, i, DstScalar(0)); + } +} + } } } diff --git a/src/nbl/asset/interchange/COBJMeshFileLoader.cpp b/src/nbl/asset/interchange/COBJMeshFileLoader.cpp index 08b2f3402b..47a2d7b948 100644 --- a/src/nbl/asset/interchange/COBJMeshFileLoader.cpp +++ b/src/nbl/asset/interchange/COBJMeshFileLoader.cpp @@ -11,8 +11,8 @@ #include "nbl/asset/interchange/SGeometryLoaderCommon.h" #include "nbl/asset/interchange/SInterchangeIOCommon.h" #include "nbl/asset/interchange/SLoaderRuntimeTuning.h" -#include "nbl/asset/utils/SGeometryAABBCommon.h" #include "nbl/asset/utils/CPolygonGeometryManipulator.h" +#include "nbl/builtin/hlsl/shapes/AABBAccumulator.hlsl" #ifdef _NBL_COMPILE_WITH_OBJ_LOADER_ @@ -515,7 +515,7 @@ asset::SAssetBundle COBJMeshFileLoader::loadAsset(system::IFile* _file, const as bool hasProvidedNormals = false; bool needsNormalGeneration = false; bool hasUVs = false; - SAABBAccumulator3 parsedAABB = createAABBAccumulator(); + hlsl::shapes::util::AABBAccumulator3 parsedAABB = hlsl::shapes::util::createAABBAccumulator(); uint64_t currentFaceCount = 0ull; uint64_t currentFaceFastTokenCount = 0ull; uint64_t currentFaceFallbackTokenCount = 0ull; @@ -543,7 +543,7 @@ asset::SAssetBundle COBJMeshFileLoader::loadAsset(system::IFile* _file, const as hasProvidedNormals = false; needsNormalGeneration = false; hasUVs = false; - parsedAABB = createAABBAccumulator(); + parsedAABB = hlsl::shapes::util::createAABBAccumulator(); currentFaceCount = 0ull; currentFaceFastTokenCount = 0ull; currentFaceFallbackTokenCount = 0ull; @@ -724,7 +724,7 @@ asset::SAssetBundle COBJMeshFileLoader::loadAsset(system::IFile* _file, const as const auto& srcPos = positions[idx[0]]; outPositions[static_cast(outIx)] = srcPos; - extendAABBAccumulator(parsedAABB, srcPos); + hlsl::shapes::util::extendAABBAccumulator(parsedAABB, srcPos); Float2 uv(0.f, 0.f); if (idx[1] >= 0 && static_cast(idx[1]) < uvs.size()) @@ -794,7 +794,7 @@ asset::SAssetBundle COBJMeshFileLoader::loadAsset(system::IFile* _file, const as const auto& srcPos = positions[static_cast(posIx)]; outPositions[static_cast(outIx)] = srcPos; - extendAABBAccumulator(parsedAABB, srcPos); + hlsl::shapes::util::extendAABBAccumulator(parsedAABB, srcPos); outUVs[static_cast(outIx)] = uvs[static_cast(uvIx)]; outNormals[static_cast(outIx)] = normals[static_cast(normalIx)]; hotEntry.pos = posIx; diff --git a/src/nbl/asset/interchange/CPLYMeshFileLoader.cpp b/src/nbl/asset/interchange/CPLYMeshFileLoader.cpp index b102e08985..8625a17522 100644 --- a/src/nbl/asset/interchange/CPLYMeshFileLoader.cpp +++ b/src/nbl/asset/interchange/CPLYMeshFileLoader.cpp @@ -11,7 +11,7 @@ #include "nbl/asset/interchange/SLoaderRuntimeTuning.h" #include "nbl/asset/IAssetManager.h" #include "nbl/asset/metadata/CPLYMetadata.h" -#include "nbl/asset/utils/SGeometryAABBCommon.h" +#include "nbl/builtin/hlsl/shapes/AABBAccumulator.hlsl" #include "nbl/core/hash/blake.h" #include "nbl/system/ISystem.h" #include "nbl/system/IFile.h" @@ -533,7 +533,7 @@ struct SContext Success, Error }; - EFastVertexReadResult readVertexElementFast(const SElement& el, SAABBAccumulator3* parsedAABB) + EFastVertexReadResult readVertexElementFast(const SElement& el, hlsl::shapes::util::AABBAccumulator3* parsedAABB) { if (!IsBinaryFile || el.Name != "vertex") return EFastVertexReadResult::NotApplicable; @@ -701,7 +701,7 @@ struct SContext reinterpret_cast(posBase)[1] = y; reinterpret_cast(posBase)[2] = z; if (trackAABB) - extendAABBAccumulator(*parsedAABB, x, y, z); + hlsl::shapes::util::extendAABBAccumulator(*parsedAABB, x, y, z); src += 3ull * floatBytes; posBase += posStride; } @@ -719,7 +719,7 @@ struct SContext reinterpret_cast(posBase)[1] = y; reinterpret_cast(posBase)[2] = z; if (trackAABB) - extendAABBAccumulator(*parsedAABB, x, y, z); + hlsl::shapes::util::extendAABBAccumulator(*parsedAABB, x, y, z); src += 3ull * floatBytes; posBase += posStride; reinterpret_cast(normalBase)[0] = decodeF32(src + 0ull * floatBytes); @@ -741,7 +741,7 @@ struct SContext reinterpret_cast(posBase)[1] = y; reinterpret_cast(posBase)[2] = z; if (trackAABB) - extendAABBAccumulator(*parsedAABB, x, y, z); + hlsl::shapes::util::extendAABBAccumulator(*parsedAABB, x, y, z); src += 3ull * floatBytes; posBase += posStride; reinterpret_cast(normalBase)[0] = decodeF32(src + 0ull * floatBytes); @@ -1563,7 +1563,7 @@ SAssetBundle CPLYMeshFileLoader::loadAsset(system::IFile* _file, const IAssetLoa // start with empty mesh auto geometry = make_smart_refctd_ptr(); - SAABBAccumulator3 parsedAABB = createAABBAccumulator(); + hlsl::shapes::util::AABBAccumulator3 parsedAABB = hlsl::shapes::util::createAABBAccumulator(); uint32_t vertCount=0; core::vector> hashedBuffers; std::jthread deferredPositionHashThread; diff --git a/src/nbl/asset/interchange/CSTLMeshFileLoader.cpp b/src/nbl/asset/interchange/CSTLMeshFileLoader.cpp index fe5836038d..5b88ff6c64 100644 --- a/src/nbl/asset/interchange/CSTLMeshFileLoader.cpp +++ b/src/nbl/asset/interchange/CSTLMeshFileLoader.cpp @@ -12,11 +12,11 @@ #include "nbl/asset/interchange/SInterchangeIOCommon.h" #include "nbl/asset/interchange/SLoaderRuntimeTuning.h" #include "nbl/asset/format/convertColor.h" -#include "nbl/asset/utils/SGeometryAABBCommon.h" #include "nbl/asset/utils/SGeometryNormalCommon.h" #include "nbl/asset/asset.h" #include "nbl/asset/metadata/CSTLMetadata.h" #include "nbl/asset/utils/CPolygonGeometryManipulator.h" +#include "nbl/builtin/hlsl/shapes/AABBAccumulator.hlsl" #include "nbl/core/hash/blake.h" #include "nbl/system/IFile.h" @@ -272,7 +272,7 @@ SAssetBundle CSTLMeshFileLoader::loadAsset(system::IFile* _file, const IAssetLoa auto geometry = core::make_smart_refctd_ptr(); geometry->setIndexing(IPolygonGeometryBase::TriangleList()); - SAABBAccumulator3 parsedAABB = createAABBAccumulator(); + hlsl::shapes::util::AABBAccumulator3 parsedAABB = hlsl::shapes::util::createAABBAccumulator(); uint64_t vertexCount = 0ull; if (!binary && wholeFileDataIsMapped) @@ -635,8 +635,8 @@ SAssetBundle CSTLMeshFileLoader::loadAsset(system::IFile* _file, const IAssetLoa { if (!localAABB.has) continue; - extendAABBAccumulator(parsedAABB, localAABB.minX, localAABB.minY, localAABB.minZ); - extendAABBAccumulator(parsedAABB, localAABB.maxX, localAABB.maxY, localAABB.maxZ); + hlsl::shapes::util::extendAABBAccumulator(parsedAABB, localAABB.minX, localAABB.minY, localAABB.minZ); + hlsl::shapes::util::extendAABBAccumulator(parsedAABB, localAABB.maxX, localAABB.maxY, localAABB.maxZ); } } geometry->setPositionView(std::move(posView)); @@ -725,9 +725,9 @@ SAssetBundle CSTLMeshFileLoader::loadAsset(system::IFile* _file, const IAssetLoa normals.push_back(faceNormal); normals.push_back(faceNormal); normals.push_back(faceNormal); - extendAABBAccumulator(parsedAABB, p[2u]); - extendAABBAccumulator(parsedAABB, p[1u]); - extendAABBAccumulator(parsedAABB, p[0u]); + hlsl::shapes::util::extendAABBAccumulator(parsedAABB, p[2u]); + hlsl::shapes::util::extendAABBAccumulator(parsedAABB, p[1u]); + hlsl::shapes::util::extendAABBAccumulator(parsedAABB, p[0u]); const auto endLoopKeyword = parser.readToken(); if (!endLoopKeyword.has_value() || *endLoopKeyword != std::string_view("endloop")) From 16eb8ba9c4debc4f1797b4cd325689e759cb3e38 Mon Sep 17 00:00:00 2001 From: Arkadiusz Lachowicz Date: Sat, 7 Mar 2026 12:01:02 +0100 Subject: [PATCH 056/118] Update polygon geometry aux layouts --- include/nbl/asset/interchange/SFileIOPolicy.h | 10 +- .../interchange/SGeometryContentHashCommon.h | 2 +- .../asset/interchange/SGeometryWriterCommon.h | 133 +++++++++++------- .../asset/interchange/COBJMeshFileLoader.cpp | 5 +- src/nbl/asset/interchange/COBJMeshWriter.cpp | 21 ++- .../asset/interchange/CPLYMeshFileLoader.cpp | 5 +- src/nbl/asset/interchange/CPLYMeshWriter.cpp | 50 +++---- .../asset/interchange/CSTLMeshFileLoader.cpp | 5 +- src/nbl/asset/interchange/CSTLMeshWriter.cpp | 45 ++---- .../SOBJPolygonGeometryAuxLayout.h | 18 +++ .../SPLYPolygonGeometryAuxLayout.h | 18 +++ .../SSTLPolygonGeometryAuxLayout.h | 18 +++ 12 files changed, 204 insertions(+), 126 deletions(-) create mode 100644 src/nbl/asset/interchange/SOBJPolygonGeometryAuxLayout.h create mode 100644 src/nbl/asset/interchange/SPLYPolygonGeometryAuxLayout.h create mode 100644 src/nbl/asset/interchange/SSTLPolygonGeometryAuxLayout.h diff --git a/include/nbl/asset/interchange/SFileIOPolicy.h b/include/nbl/asset/interchange/SFileIOPolicy.h index 458f2bc1b8..fc9269d990 100644 --- a/include/nbl/asset/interchange/SFileIOPolicy.h +++ b/include/nbl/asset/interchange/SFileIOPolicy.h @@ -116,15 +116,15 @@ struct SFileIOPolicy return 1ull << clampBytesLog2(value, minValue); } - // Requested IO strategy. + // Requested IO strategy. Defaults to Auto. Strategy strategy = Strategy::Auto; - // Resolution flags. + // Resolution flags. Defaults to none. core::bitflag flags = EF_NONE; - // Maximum payload size allowed for whole-file strategy in auto mode. + // Maximum payload size allowed for whole-file strategy in auto mode. Defaults to 64 MiB. uint8_t wholeFileThresholdLog2 = static_cast(std::bit_width(DEFAULT_WHOLE_FILE_THRESHOLD_BYTES) - 1u); - // Chunk size used by chunked strategy encoded as log2(bytes). + // Chunk size used by chunked strategy encoded as log2(bytes). Defaults to 4 MiB. uint8_t chunkSizeLog2 = static_cast(std::bit_width(DEFAULT_CHUNK_SIZE_BYTES) - 1u); - // Maximum staging allocation for whole-file strategy encoded as log2(bytes). + // Maximum staging allocation for whole-file strategy encoded as log2(bytes). Defaults to 256 MiB. uint8_t maxStagingLog2 = static_cast(std::bit_width(DEFAULT_MAX_STAGING_BYTES) - 1u); // Runtime tuning controls used by loaders and hash stages. SRuntimeTuning runtimeTuning = {}; diff --git a/include/nbl/asset/interchange/SGeometryContentHashCommon.h b/include/nbl/asset/interchange/SGeometryContentHashCommon.h index 5eba269949..d216cbd2c0 100644 --- a/include/nbl/asset/interchange/SGeometryContentHashCommon.h +++ b/include/nbl/asset/interchange/SGeometryContentHashCommon.h @@ -16,7 +16,7 @@ namespace nbl::asset class SPolygonGeometryContentHash { public: - using EMode = CPolygonGeometryManipulator::EContentHashMode; + using mode_t = CPolygonGeometryManipulator::EContentHashMode; static inline void collectBuffers( const ICPUPolygonGeometry* geometry, diff --git a/include/nbl/asset/interchange/SGeometryWriterCommon.h b/include/nbl/asset/interchange/SGeometryWriterCommon.h index 441b1c4991..6a91ac81a2 100644 --- a/include/nbl/asset/interchange/SGeometryWriterCommon.h +++ b/include/nbl/asset/interchange/SGeometryWriterCommon.h @@ -14,6 +14,7 @@ #include #include #include +#include namespace nbl::asset @@ -24,6 +25,7 @@ namespace impl template concept PolygonGeometryWriteItemContainer = requires(Container& c, const ICPUPolygonGeometry* geometry, const hlsl::float32_t3x4 transform, const uint32_t instanceIx, const uint32_t targetIx, const uint32_t geometryIx) { c.emplace_back(geometry, transform, instanceIx, targetIx, geometryIx); }; template +// Flattens a geometry collection into per-geometry write items and bakes the parent transform into each entry. static inline void appendPolygonGeometryWriteItemsFromCollection(Container& out, const ICPUGeometryCollection* collection, const hlsl::float32_t3x4& parentTransform, const uint32_t instanceIx, const uint32_t targetIx) { if (!collection) @@ -59,6 +61,7 @@ class SGeometryWriterCommon }; template> requires impl::PolygonGeometryWriteItemContainer + // Collects every polygon geometry a writer can serialize from a geometry, collection, or flattened scene. static inline Container collectPolygonGeometryWriteItems(const IAsset* rootAsset) { Container out = {}; @@ -107,27 +110,27 @@ class SGeometryWriterCommon return transform == hlsl::math::linalg::identity(); } - static inline const ICPUPolygonGeometry::SDataView* findFirstAuxViewByChannelCount(const ICPUPolygonGeometry* geom, const uint32_t channels, const size_t requiredElementCount = 0ull) + // Returns the aux view stored at a specific semantic slot when it exists. + static inline const ICPUPolygonGeometry::SDataView* getAuxViewAt(const ICPUPolygonGeometry* geom, const uint32_t auxViewIx, const size_t requiredElementCount = 0ull) { - if (!geom || channels == 0u) + if (!geom) return nullptr; - for (const auto& view : geom->getAuxAttributeViews()) - { - if (!view) - continue; - if (requiredElementCount && view.getElementCount() != requiredElementCount) - continue; - if (getFormatChannelCount(view.composed.format) == channels) - return &view; - } + const auto& auxViews = geom->getAuxAttributeViews(); + if (auxViewIx >= auxViews.size()) + return nullptr; - return nullptr; + const auto& view = auxViews[auxViewIx]; + if (!view) + return nullptr; + if (requiredElementCount && view.getElementCount() != requiredElementCount) + return nullptr; + return &view; } - static inline bool decodeTriangleIndices(const ICPUPolygonGeometry* geom, core::vector& indexData, const uint32_t*& outIndices, size_t& outFaceCount) + // Validates triangle-list indexing and returns the number of faces the writer will emit. + static inline bool getTriangleFaceCount(const ICPUPolygonGeometry* geom, size_t& outFaceCount) { - outIndices = nullptr; outFaceCount = 0ull; if (!geom) return false; @@ -143,53 +146,87 @@ class SGeometryWriterCommon const size_t indexCount = indexView.getElementCount(); if ((indexCount % 3ull) != 0ull) return false; + outFaceCount = indexCount / 3ull; + return true; + } + + if ((vertexCount % 3ull) != 0ull) + return false; + + outFaceCount = vertexCount / 3ull; + return true; + } + + // Calls `visitor(i0, i1, i2)` once per triangle after validating indices and normalizing implicit/R16/R32 indexing to uint32_t. + template + static inline bool visitTriangleIndices(const ICPUPolygonGeometry* geom, Visitor&& visitor) + { + if (!geom) + return false; + + const auto& positionView = geom->getPositionView(); + const size_t vertexCount = positionView.getElementCount(); + if (vertexCount == 0ull) + return false; - const void* src = indexView.getPointer(); - if (!src) + auto visit = [&](const IndexT i0, const IndexT i1, const IndexT i2)->bool + { + const uint32_t u0 = static_cast(i0); + const uint32_t u1 = static_cast(i1); + const uint32_t u2 = static_cast(i2); + if (u0 >= vertexCount || u1 >= vertexCount || u2 >= vertexCount) return false; - if (indexView.composed.format == EF_R32_UINT && indexView.composed.getStride() == sizeof(uint32_t)) - { - outIndices = reinterpret_cast(src); - } - else if (indexView.composed.format == EF_R16_UINT && indexView.composed.getStride() == sizeof(uint16_t)) - { - indexData.resize(indexCount); - const auto* src16 = reinterpret_cast(src); - for (size_t i = 0ull; i < indexCount; ++i) - indexData[i] = src16[i]; - outIndices = indexData.data(); - } + if constexpr (std::is_same_v, bool>) + return visitor(u0, u1, u2); else { - indexData.resize(indexCount); - hlsl::vector decoded = {}; - for (size_t i = 0ull; i < indexCount; ++i) - { - if (!indexView.decodeElement(i, decoded)) - return false; - indexData[i] = decoded.x; - } - outIndices = indexData.data(); + visitor(u0, u1, u2); + return true; } + }; - for (size_t i = 0ull; i < indexCount; ++i) - if (outIndices[i] >= vertexCount) - return false; + const auto& indexView = geom->getIndexView(); + if (!indexView) + { + if ((vertexCount % 3ull) != 0ull) + return false; - outFaceCount = indexCount / 3ull; + for (uint32_t i = 0u; i < vertexCount; i += 3u) + if (!visit(i + 0u, i + 1u, i + 2u)) + return false; return true; } - if ((vertexCount % 3ull) != 0ull) + const size_t indexCount = indexView.getElementCount(); + if ((indexCount % 3ull) != 0ull) return false; - indexData.resize(vertexCount); - for (size_t i = 0ull; i < vertexCount; ++i) - indexData[i] = static_cast(i); - outIndices = indexData.data(); - outFaceCount = vertexCount / 3ull; - return true; + const void* const src = indexView.getPointer(); + if (!src) + return false; + + switch (geom->getIndexType()) + { + case EIT_32BIT: + { + const auto* indices = reinterpret_cast(src); + for (size_t i = 0ull; i < indexCount; i += 3ull) + if (!visit(indices[i + 0ull], indices[i + 1ull], indices[i + 2ull])) + return false; + return true; + } + case EIT_16BIT: + { + const auto* indices = reinterpret_cast(src); + for (size_t i = 0ull; i < indexCount; i += 3ull) + if (!visit(indices[i + 0ull], indices[i + 1ull], indices[i + 2ull])) + return false; + return true; + } + default: + return false; + } } template diff --git a/src/nbl/asset/interchange/COBJMeshFileLoader.cpp b/src/nbl/asset/interchange/COBJMeshFileLoader.cpp index 47a2d7b948..441fa467c1 100644 --- a/src/nbl/asset/interchange/COBJMeshFileLoader.cpp +++ b/src/nbl/asset/interchange/COBJMeshFileLoader.cpp @@ -13,6 +13,7 @@ #include "nbl/asset/interchange/SLoaderRuntimeTuning.h" #include "nbl/asset/utils/CPolygonGeometryManipulator.h" #include "nbl/builtin/hlsl/shapes/AABBAccumulator.hlsl" +#include "SOBJPolygonGeometryAuxLayout.h" #ifdef _NBL_COMPILE_WITH_OBJ_LOADER_ @@ -596,7 +597,9 @@ asset::SAssetBundle COBJMeshFileLoader::loadAsset(system::IFile* _file, const as auto view = SGeometryLoaderCommon::createAdoptedView(std::move(outUVs)); if (!view) return false; - geometry->getAuxAttributeViews()->push_back(std::move(view)); + auto* const auxViews = geometry->getAuxAttributeViews(); + auxViews->resize(SOBJPolygonGeometryAuxLayout::UV0 + 1u); + auxViews->operator[](SOBJPolygonGeometryAuxLayout::UV0) = std::move(view); } if (!indices.empty()) diff --git a/src/nbl/asset/interchange/COBJMeshWriter.cpp b/src/nbl/asset/interchange/COBJMeshWriter.cpp index 60a4dd2c5d..c19db1b386 100644 --- a/src/nbl/asset/interchange/COBJMeshWriter.cpp +++ b/src/nbl/asset/interchange/COBJMeshWriter.cpp @@ -5,6 +5,7 @@ #include "nbl/asset/interchange/COBJMeshWriter.h" #include "nbl/asset/interchange/SGeometryWriterCommon.h" #include "nbl/asset/interchange/SInterchangeIOCommon.h" +#include "SOBJPolygonGeometryAuxLayout.h" #ifdef _NBL_COMPILE_WITH_OBJ_WRITER_ @@ -270,9 +271,11 @@ bool COBJMeshWriter::writeAsset(system::IFile* _file, const SAssetWriteParams& _ const auto& normalView = geom->getNormalView(); const bool hasNormals = static_cast(normalView); - const ICPUPolygonGeometry::SDataView* uvView = SGeometryWriterCommon::findFirstAuxViewByChannelCount(geom, 2u); - const bool hasUVs = uvView != nullptr; const size_t vertexCount = positionView.getElementCount(); + const ICPUPolygonGeometry::SDataView* uvView = SGeometryWriterCommon::getAuxViewAt(geom, SOBJPolygonGeometryAuxLayout::UV0, vertexCount); + if (uvView && getFormatChannelCount(uvView->composed.format) != 2u) + uvView = nullptr; + const bool hasUVs = uvView != nullptr; if (vertexCount == 0ull) return false; if (hasNormals && normalView.getElementCount() != vertexCount) @@ -286,10 +289,8 @@ bool COBJMeshWriter::writeAsset(system::IFile* _file, const SAssetWriteParams& _ if (indexing->knownTopology() != E_PRIMITIVE_TOPOLOGY::EPT_TRIANGLE_LIST) return false; - core::vector indexData; - const uint32_t* indices = nullptr; size_t faceCount = 0ull; - if (!SGeometryWriterCommon::decodeTriangleIndices(geom, indexData, indices, faceCount)) + if (!SGeometryWriterCommon::getTriangleFaceCount(geom, faceCount)) return false; const auto flags = _override->getAssetWritingFlags(ctx, geom, 0u); @@ -378,12 +379,8 @@ bool COBJMeshWriter::writeAsset(system::IFile* _file, const SAssetWriteParams& _ appendIndexTokenToStorage(faceIndexStorage, faceIndexRefs, positionIx, hasUVs, uvIx, hasNormals, normalIx); } - for (size_t i = 0u; i < faceCount; ++i) + if (!SGeometryWriterCommon::visitTriangleIndices(geom, [&](const uint32_t i0, const uint32_t i1, const uint32_t i2)->bool { - const uint32_t i0 = indices[i * 3u + 0u]; - const uint32_t i1 = indices[i * 3u + 1u]; - const uint32_t i2 = indices[i * 3u + 2u]; - const uint32_t f0 = transformState.reverseWinding ? i0 : i2; const uint32_t f1 = i1; const uint32_t f2 = transformState.reverseWinding ? i2 : i0; @@ -391,7 +388,9 @@ bool COBJMeshWriter::writeAsset(system::IFile* _file, const SAssetWriteParams& _ return false; appendFaceLine(output, faceIndexStorage, faceIndexRefs, f0, f1, f2); - } + return true; + })) + return false; positionBase += static_cast(vertexCount); if (hasUVs) diff --git a/src/nbl/asset/interchange/CPLYMeshFileLoader.cpp b/src/nbl/asset/interchange/CPLYMeshFileLoader.cpp index 8625a17522..15383789ec 100644 --- a/src/nbl/asset/interchange/CPLYMeshFileLoader.cpp +++ b/src/nbl/asset/interchange/CPLYMeshFileLoader.cpp @@ -15,6 +15,7 @@ #include "nbl/core/hash/blake.h" #include "nbl/system/ISystem.h" #include "nbl/system/IFile.h" +#include "SPLYPolygonGeometryAuxLayout.h" #include @@ -2006,7 +2007,9 @@ SAssetBundle CPLYMeshFileLoader::loadAsset(system::IFile* _file, const IAssetLoa auto view = createView(uvView.format,el.Count); for (const auto size=ctx.vertAttrIts.size(); beginIx!=size; beginIx++) ctx.vertAttrIts[beginIx].ptr += ptrdiff_t(view.src.buffer->getPointer())+view.src.offset; - geometry->getAuxAttributeViews()->push_back(std::move(view)); + auto* const auxViews = geometry->getAuxAttributeViews(); + auxViews->resize(SPLYPolygonGeometryAuxLayout::UV0 + 1u); + auxViews->operator[](SPLYPolygonGeometryAuxLayout::UV0) = std::move(view); } // for (auto& view : extraViews) diff --git a/src/nbl/asset/interchange/CPLYMeshWriter.cpp b/src/nbl/asset/interchange/CPLYMeshWriter.cpp index d2a7a951a4..0c6772e12c 100644 --- a/src/nbl/asset/interchange/CPLYMeshWriter.cpp +++ b/src/nbl/asset/interchange/CPLYMeshWriter.cpp @@ -6,6 +6,7 @@ #include "CPLYMeshWriter.h" #include "nbl/asset/interchange/SGeometryWriterCommon.h" #include "nbl/asset/interchange/SInterchangeIOCommon.h" +#include "SPLYPolygonGeometryAuxLayout.h" #ifdef _NBL_COMPILE_WITH_PLY_WRITER_ @@ -425,7 +426,6 @@ struct SWriteInput bool writeNormals = false; EPlyScalarType normalScalarType = EPlyScalarType::Float32; size_t vertexCount = 0ull; - const uint32_t* indices = nullptr; size_t faceCount = 0ull; bool write16BitIndices = false; bool flipVectors = false; @@ -496,7 +496,9 @@ bool CPLYMeshWriter::writeAsset(system::IFile* _file, const SAssetWriteParams& _ return false; } - const ICPUPolygonGeometry::SDataView* uvView = SGeometryWriterCommon::findFirstAuxViewByChannelCount(geom, 2u, vertexCount); + const ICPUPolygonGeometry::SDataView* uvView = SGeometryWriterCommon::getAuxViewAt(geom, SPLYPolygonGeometryAuxLayout::UV0, vertexCount); + if (uvView && getFormatChannelCount(uvView->composed.format) != 2u) + uvView = nullptr; core::vector extraAuxViews; const auto& auxViews = geom->getAuxAttributeViews(); @@ -504,7 +506,7 @@ bool CPLYMeshWriter::writeAsset(system::IFile* _file, const SAssetWriteParams& _ for (uint32_t auxIx = 0u; auxIx < static_cast(auxViews.size()); ++auxIx) { const auto& view = auxViews[auxIx]; - if (!view || (&view == uvView)) + if (!view || (uvView && auxIx == SPLYPolygonGeometryAuxLayout::UV0)) continue; if (view.getElementCount() != vertexCount) continue; @@ -528,12 +530,10 @@ bool CPLYMeshWriter::writeAsset(system::IFile* _file, const SAssetWriteParams& _ return false; } - core::vector indexData; - const uint32_t* indices = nullptr; size_t faceCount = 0ull; - if (!SGeometryWriterCommon::decodeTriangleIndices(geom, indexData, indices, faceCount)) + if (!SGeometryWriterCommon::getTriangleFaceCount(geom, faceCount)) { - _params.logger.log("PLY writer: failed to decode triangle indices.", system::ILogger::ELL_ERROR); + _params.logger.log("PLY writer: failed to validate triangle indexing.", system::ILogger::ELL_ERROR); return false; } @@ -607,7 +607,6 @@ bool CPLYMeshWriter::writeAsset(system::IFile* _file, const SAssetWriteParams& _ .writeNormals = writeNormals, .normalScalarType = normalScalarType, .vertexCount = vertexCount, - .indices = indices, .faceCount = faceCount, .write16BitIndices = write16BitIndices, .flipVectors = flipVectors @@ -741,7 +740,7 @@ bool ply_writer_detail::writeBinary( const SWriteInput& input, uint8_t* dst) { - if (!input.geom || !input.extraAuxViews || !input.indices || !dst) + if (!input.geom || !input.extraAuxViews || !dst) return false; const auto& positionView = input.geom->getPositionView(); @@ -766,37 +765,36 @@ bool ply_writer_detail::writeBinary( } } - for (size_t i = 0; i < input.faceCount; ++i) + return SGeometryWriterCommon::visitTriangleIndices(input.geom, [&](const uint32_t i0, const uint32_t i1, const uint32_t i2)->bool { const uint8_t listSize = 3u; *dst++ = listSize; - const uint32_t* tri = input.indices + (i * 3u); if (input.write16BitIndices) { const uint16_t tri16[3] = { - static_cast(tri[0]), - static_cast(tri[1]), - static_cast(tri[2]) + static_cast(i0), + static_cast(i1), + static_cast(i2) }; std::memcpy(dst, tri16, sizeof(tri16)); dst += sizeof(tri16); } else { - std::memcpy(dst, tri, sizeof(uint32_t) * 3u); - dst += sizeof(uint32_t) * 3u; + const uint32_t tri[3] = { i0, i1, i2 }; + std::memcpy(dst, tri, sizeof(tri)); + dst += sizeof(tri); } - } - - return true; + return true; + }); } bool ply_writer_detail::writeText( const SWriteInput& input, std::string& output) { - if (!input.geom || !input.extraAuxViews || !input.indices) + if (!input.geom || !input.extraAuxViews) return false; const auto& positionView = input.geom->getPositionView(); @@ -829,18 +827,16 @@ bool ply_writer_detail::writeText( output += "\n"; } - for (size_t i = 0; i < input.faceCount; ++i) + return SGeometryWriterCommon::visitTriangleIndices(input.geom, [&](const uint32_t i0, const uint32_t i1, const uint32_t i2) { - const uint32_t* tri = input.indices + (i * 3u); output.append("3 "); - appendUInt(output, tri[0]); + appendUInt(output, i0); output.push_back(' '); - appendUInt(output, tri[1]); + appendUInt(output, i1); output.push_back(' '); - appendUInt(output, tri[2]); + appendUInt(output, i2); output.push_back('\n'); - } - return true; + }); } } // namespace nbl::asset diff --git a/src/nbl/asset/interchange/CSTLMeshFileLoader.cpp b/src/nbl/asset/interchange/CSTLMeshFileLoader.cpp index 5b88ff6c64..2a2637e23c 100644 --- a/src/nbl/asset/interchange/CSTLMeshFileLoader.cpp +++ b/src/nbl/asset/interchange/CSTLMeshFileLoader.cpp @@ -19,6 +19,7 @@ #include "nbl/builtin/hlsl/shapes/AABBAccumulator.hlsl" #include "nbl/core/hash/blake.h" #include "nbl/system/IFile.h" +#include "SSTLPolygonGeometryAuxLayout.h" #include #include @@ -655,7 +656,9 @@ SAssetBundle CSTLMeshFileLoader::loadAsset(system::IFile* _file, const IAssetLoa auto colorView = SGeometryLoaderCommon::createAdoptedView(std::move(vertexColors)); if (!colorView) return {}; - geometry->getAuxAttributeViews()->push_back(std::move(colorView)); + auto* const auxViews = geometry->getAuxAttributeViews(); + auxViews->resize(SSTLPolygonGeometryAuxLayout::COLOR0 + 1u); + auxViews->operator[](SSTLPolygonGeometryAuxLayout::COLOR0) = std::move(colorView); hasTriangleColors = true; } } diff --git a/src/nbl/asset/interchange/CSTLMeshWriter.cpp b/src/nbl/asset/interchange/CSTLMeshWriter.cpp index d3c0817827..3feb743eb2 100644 --- a/src/nbl/asset/interchange/CSTLMeshWriter.cpp +++ b/src/nbl/asset/interchange/CSTLMeshWriter.cpp @@ -8,6 +8,7 @@ #include "nbl/asset/format/convertColor.h" #include "nbl/asset/interchange/SGeometryWriterCommon.h" #include "nbl/asset/interchange/SInterchangeIOCommon.h" +#include "SSTLPolygonGeometryAuxLayout.h" #include #include @@ -60,7 +61,7 @@ bool decodeTriangle(const ICPUPolygonGeometry* geom, const IPolygonGeometryBase: bool decodeTriangleNormal(const ICPUPolygonGeometry::SDataView& normalView, const uint32_t* idx, hlsl::float32_t3& outNormal); double stlNormalizeColorComponentToUnit(double value); uint16_t stlPackViscamColorFromB8G8R8A8(uint32_t color); -const ICPUPolygonGeometry::SDataView* stlFindColorView(const ICPUPolygonGeometry* geom, size_t vertexCount); +const ICPUPolygonGeometry::SDataView* stlGetColorView(const ICPUPolygonGeometry* geom, size_t vertexCount); bool stlDecodeColorB8G8R8A8(const ICPUPolygonGeometry::SDataView& colorView, const uint32_t ix, uint32_t& outColor); void stlDecodeColorUnitRGBAFromB8G8R8A8(uint32_t color, double (&out)[4]); bool writeMeshBinary(const asset::ICPUPolygonGeometry* geom, SContext* context); @@ -363,26 +364,12 @@ uint16_t stlPackViscamColorFromB8G8R8A8(const uint32_t color) return packed; } -const ICPUPolygonGeometry::SDataView* stlFindColorView(const ICPUPolygonGeometry* geom, const size_t vertexCount) +const ICPUPolygonGeometry::SDataView* stlGetColorView(const ICPUPolygonGeometry* geom, const size_t vertexCount) { - if (!geom) + const auto* view = SGeometryWriterCommon::getAuxViewAt(geom, SSTLPolygonGeometryAuxLayout::COLOR0, vertexCount); + if (!view) return nullptr; - - const auto& auxViews = geom->getAuxAttributeViews(); - const ICPUPolygonGeometry::SDataView* fallback = nullptr; - for (const auto& view : auxViews) - { - if (!view || view.getElementCount() != vertexCount) - continue; - const uint32_t channels = getFormatChannelCount(view.composed.format); - if (channels < 3u) - continue; - if (view.composed.format == EF_B8G8R8A8_UNORM) - return &view; - if (!fallback) - fallback = &view; - } - return fallback; + return getFormatChannelCount(view->composed.format) >= 3u ? view : nullptr; } bool stlDecodeColorB8G8R8A8(const ICPUPolygonGeometry::SDataView& colorView, const uint32_t ix, uint32_t& outColor) @@ -429,11 +416,9 @@ bool writeMeshBinary(const asset::ICPUPolygonGeometry* geom, SContext* context) if (vertexCount == 0ull) return false; - core::vector indexData; - const uint32_t* indices = nullptr; uint32_t facenum = 0u; size_t faceCount = 0ull; - if (!SGeometryWriterCommon::decodeTriangleIndices(geom, indexData, indices, faceCount)) + if (!SGeometryWriterCommon::getTriangleFaceCount(geom, faceCount)) return false; if (faceCount > static_cast(std::numeric_limits::max())) return false; @@ -453,9 +438,10 @@ bool writeMeshBinary(const asset::ICPUPolygonGeometry* geom, SContext* context) const auto& normalView = geom->getNormalView(); const bool hasNormals = static_cast(normalView); - const auto* const colorView = stlFindColorView(geom, vertexCount); + const auto* const colorView = stlGetColorView(geom, vertexCount); const hlsl::float32_t3* const tightPositions = SGeometryWriterCommon::getTightView(posView); const hlsl::float32_t3* const tightNormals = hasNormals ? SGeometryWriterCommon::getTightView(normalView) : nullptr; + const bool hasImplicitTriangleIndices = !geom->getIndexView(); const float handednessSign = flipHandedness ? -1.f : 1.f; auto decodePosition = [&](const uint32_t ix, hlsl::float32_t3& out)->bool @@ -522,7 +508,7 @@ bool writeMeshBinary(const asset::ICPUPolygonGeometry* geom, SContext* context) dst += stl_writer_detail::BinaryTriangleAttributeBytes; }; - const bool hasFastTightPath = (indices == nullptr) && (tightPositions != nullptr) && (!hasNormals || (tightNormals != nullptr)); + const bool hasFastTightPath = hasImplicitTriangleIndices && (tightPositions != nullptr) && (!hasNormals || (tightNormals != nullptr)); if (hasFastTightPath && hasNormals) { bool allFastNormalsNonZero = true; @@ -674,13 +660,8 @@ bool writeMeshBinary(const asset::ICPUPolygonGeometry* geom, SContext* context) } else { - for (uint32_t primIx = 0u; primIx < facenum; ++primIx) + if (!SGeometryWriterCommon::visitTriangleIndices(geom, [&](const uint32_t i0, const uint32_t i1, const uint32_t i2)->bool { - const uint32_t i0 = indices ? indices[primIx * 3u + 0u] : (primIx * 3u + 0u); - const uint32_t i1 = indices ? indices[primIx * 3u + 1u] : (primIx * 3u + 1u); - const uint32_t i2 = indices ? indices[primIx * 3u + 2u] : (primIx * 3u + 2u); - if (i0 >= vertexCount || i1 >= vertexCount || i2 >= vertexCount) - return false; uint16_t faceColor = 0u; if (!computeFaceColor(i0, i1, i2, faceColor)) return false; @@ -753,7 +734,9 @@ bool writeMeshBinary(const asset::ICPUPolygonGeometry* geom, SContext* context) vertex2.x, vertex2.y, vertex2.z, vertex3.x, vertex3.y, vertex3.z, faceColor); - } + return true; + })) + return false; } const bool writeOk = SInterchangeIOCommon::writeFileWithPolicy(context->writeContext.outputFile, context->ioPlan, output.get(), outputSize, &context->writeTelemetry); diff --git a/src/nbl/asset/interchange/SOBJPolygonGeometryAuxLayout.h b/src/nbl/asset/interchange/SOBJPolygonGeometryAuxLayout.h new file mode 100644 index 0000000000..4c7b287f10 --- /dev/null +++ b/src/nbl/asset/interchange/SOBJPolygonGeometryAuxLayout.h @@ -0,0 +1,18 @@ +// Copyright (C) 2018-2025 - DevSH Graphics Programming Sp. z O.O. +// This file is part of the "Nabla Engine". +// For conditions of distribution and use, see copyright notice in nabla.h +#ifndef _NBL_ASSET_S_OBJ_POLYGON_GEOMETRY_AUX_LAYOUT_H_INCLUDED_ +#define _NBL_ASSET_S_OBJ_POLYGON_GEOMETRY_AUX_LAYOUT_H_INCLUDED_ + +namespace nbl::asset +{ + +class SOBJPolygonGeometryAuxLayout +{ + public: + static inline constexpr unsigned int UV0 = 0u; +}; + +} + +#endif diff --git a/src/nbl/asset/interchange/SPLYPolygonGeometryAuxLayout.h b/src/nbl/asset/interchange/SPLYPolygonGeometryAuxLayout.h new file mode 100644 index 0000000000..6077dcda49 --- /dev/null +++ b/src/nbl/asset/interchange/SPLYPolygonGeometryAuxLayout.h @@ -0,0 +1,18 @@ +// Copyright (C) 2018-2025 - DevSH Graphics Programming Sp. z O.O. +// This file is part of the "Nabla Engine". +// For conditions of distribution and use, see copyright notice in nabla.h +#ifndef _NBL_ASSET_S_PLY_POLYGON_GEOMETRY_AUX_LAYOUT_H_INCLUDED_ +#define _NBL_ASSET_S_PLY_POLYGON_GEOMETRY_AUX_LAYOUT_H_INCLUDED_ + +namespace nbl::asset +{ + +class SPLYPolygonGeometryAuxLayout +{ + public: + static inline constexpr unsigned int UV0 = 0u; +}; + +} + +#endif diff --git a/src/nbl/asset/interchange/SSTLPolygonGeometryAuxLayout.h b/src/nbl/asset/interchange/SSTLPolygonGeometryAuxLayout.h new file mode 100644 index 0000000000..7bc0d27a85 --- /dev/null +++ b/src/nbl/asset/interchange/SSTLPolygonGeometryAuxLayout.h @@ -0,0 +1,18 @@ +// Copyright (C) 2018-2025 - DevSH Graphics Programming Sp. z O.O. +// This file is part of the "Nabla Engine". +// For conditions of distribution and use, see copyright notice in nabla.h +#ifndef _NBL_ASSET_S_STL_POLYGON_GEOMETRY_AUX_LAYOUT_H_INCLUDED_ +#define _NBL_ASSET_S_STL_POLYGON_GEOMETRY_AUX_LAYOUT_H_INCLUDED_ + +namespace nbl::asset +{ + +class SSTLPolygonGeometryAuxLayout +{ + public: + static inline constexpr unsigned int COLOR0 = 0u; +}; + +} + +#endif From 79218cf0a36781e66e114f869dcc84f052b0c0e5 Mon Sep 17 00:00:00 2001 From: Arkadiusz Lachowicz Date: Sat, 7 Mar 2026 12:11:54 +0100 Subject: [PATCH 057/118] Deduplicate triangle index visitor --- .../asset/interchange/SGeometryWriterCommon.h | 31 ++++++++----------- 1 file changed, 13 insertions(+), 18 deletions(-) diff --git a/include/nbl/asset/interchange/SGeometryWriterCommon.h b/include/nbl/asset/interchange/SGeometryWriterCommon.h index 6a91ac81a2..a66c68fe8d 100644 --- a/include/nbl/asset/interchange/SGeometryWriterCommon.h +++ b/include/nbl/asset/interchange/SGeometryWriterCommon.h @@ -24,8 +24,8 @@ namespace impl { template concept PolygonGeometryWriteItemContainer = requires(Container& c, const ICPUPolygonGeometry* geometry, const hlsl::float32_t3x4 transform, const uint32_t instanceIx, const uint32_t targetIx, const uint32_t geometryIx) { c.emplace_back(geometry, transform, instanceIx, targetIx, geometryIx); }; -template // Flattens a geometry collection into per-geometry write items and bakes the parent transform into each entry. +template static inline void appendPolygonGeometryWriteItemsFromCollection(Container& out, const ICPUGeometryCollection* collection, const hlsl::float32_t3x4& parentTransform, const uint32_t instanceIx, const uint32_t targetIx) { if (!collection) @@ -60,8 +60,8 @@ class SGeometryWriterCommon uint32_t geometryIx = 0u; }; - template> requires impl::PolygonGeometryWriteItemContainer // Collects every polygon geometry a writer can serialize from a geometry, collection, or flattened scene. + template> requires impl::PolygonGeometryWriteItemContainer static inline Container collectPolygonGeometryWriteItems(const IAsset* rootAsset) { Container out = {}; @@ -206,24 +206,19 @@ class SGeometryWriterCommon if (!src) return false; + auto visitIndexed = [&]()->bool + { + const auto* indices = reinterpret_cast(src); + for (size_t i = 0ull; i < indexCount; i += 3ull) + if (!visit(indices[i + 0ull], indices[i + 1ull], indices[i + 2ull])) + return false; + return true; + }; + switch (geom->getIndexType()) { - case EIT_32BIT: - { - const auto* indices = reinterpret_cast(src); - for (size_t i = 0ull; i < indexCount; i += 3ull) - if (!visit(indices[i + 0ull], indices[i + 1ull], indices[i + 2ull])) - return false; - return true; - } - case EIT_16BIT: - { - const auto* indices = reinterpret_cast(src); - for (size_t i = 0ull; i < indexCount; i += 3ull) - if (!visit(indices[i + 0ull], indices[i + 1ull], indices[i + 2ull])) - return false; - return true; - } + case EIT_32BIT: return visitIndexed.template operator()(); + case EIT_16BIT: return visitIndexed.template operator()(); default: return false; } From 976df29a837a2f2b28094af0ea34c296b9fd3d6a Mon Sep 17 00:00:00 2001 From: Arkadiusz Lachowicz Date: Sat, 7 Mar 2026 12:31:37 +0100 Subject: [PATCH 058/118] Move IO helpers into structs --- include/nbl/asset/interchange/SFileIOPolicy.h | 5 -- .../asset/interchange/SGeometryWriterCommon.h | 47 +++++++++-------- .../asset/interchange/SLoaderRuntimeTuning.h | 52 +++++++++++-------- .../asset/interchange/COBJMeshFileLoader.cpp | 2 +- src/nbl/asset/interchange/COBJMeshWriter.cpp | 2 +- .../asset/interchange/CPLYMeshFileLoader.cpp | 2 +- src/nbl/asset/interchange/CPLYMeshWriter.cpp | 4 +- .../asset/interchange/CSTLMeshFileLoader.cpp | 2 +- src/nbl/asset/interchange/CSTLMeshWriter.cpp | 2 +- 9 files changed, 60 insertions(+), 58 deletions(-) diff --git a/include/nbl/asset/interchange/SFileIOPolicy.h b/include/nbl/asset/interchange/SFileIOPolicy.h index fc9269d990..b4c833c96f 100644 --- a/include/nbl/asset/interchange/SFileIOPolicy.h +++ b/include/nbl/asset/interchange/SFileIOPolicy.h @@ -226,11 +226,6 @@ struct SResolvedFileIOPolicy } }; -inline constexpr SResolvedFileIOPolicy resolveFileIOPolicy(const SFileIOPolicy& policy, const uint64_t byteCount, const bool sizeKnown = true, const bool fileMappable = false) -{ - return SResolvedFileIOPolicy(policy, byteCount, sizeKnown, fileMappable); -} - } namespace nbl::system::impl diff --git a/include/nbl/asset/interchange/SGeometryWriterCommon.h b/include/nbl/asset/interchange/SGeometryWriterCommon.h index a66c68fe8d..8d93970dfb 100644 --- a/include/nbl/asset/interchange/SGeometryWriterCommon.h +++ b/include/nbl/asset/interchange/SGeometryWriterCommon.h @@ -23,31 +23,32 @@ namespace nbl::asset namespace impl { template concept PolygonGeometryWriteItemContainer = requires(Container& c, const ICPUPolygonGeometry* geometry, const hlsl::float32_t3x4 transform, const uint32_t instanceIx, const uint32_t targetIx, const uint32_t geometryIx) { c.emplace_back(geometry, transform, instanceIx, targetIx, geometryIx); }; - -// Flattens a geometry collection into per-geometry write items and bakes the parent transform into each entry. -template -static inline void appendPolygonGeometryWriteItemsFromCollection(Container& out, const ICPUGeometryCollection* collection, const hlsl::float32_t3x4& parentTransform, const uint32_t instanceIx, const uint32_t targetIx) -{ - if (!collection) - return; - - const auto identity = hlsl::math::linalg::identity(); - const auto& geometries = collection->getGeometries(); - for (uint32_t geometryIx = 0u; geometryIx < geometries.size(); ++geometryIx) - { - const auto& ref = geometries[geometryIx]; - if (!ref.geometry || ref.geometry->getPrimitiveType() != IGeometryBase::EPrimitiveType::Polygon) - continue; - const auto* geometry = static_cast(ref.geometry.get()); - const auto localTransform = ref.hasTransform() ? ref.transform : identity; - out.emplace_back(geometry, hlsl::math::linalg::promoted_mul(parentTransform, localTransform), instanceIx, targetIx, geometryIx); - } -} - } class SGeometryWriterCommon { + template + struct SPolygonGeometryWriteItemCollector + { + static inline void appendFromCollection(Container& out, const ICPUGeometryCollection* collection, const hlsl::float32_t3x4& parentTransform, const uint32_t instanceIx, const uint32_t targetIx) + { + if (!collection) + return; + + const auto identity = hlsl::math::linalg::identity(); + const auto& geometries = collection->getGeometries(); + for (uint32_t geometryIx = 0u; geometryIx < geometries.size(); ++geometryIx) + { + const auto& ref = geometries[geometryIx]; + if (!ref.geometry || ref.geometry->getPrimitiveType() != IGeometryBase::EPrimitiveType::Polygon) + continue; + const auto* geometry = static_cast(ref.geometry.get()); + const auto localTransform = ref.hasTransform() ? ref.transform : identity; + out.emplace_back(geometry, hlsl::math::linalg::promoted_mul(parentTransform, localTransform), instanceIx, targetIx, geometryIx); + } + } + }; + public: struct SPolygonGeometryWriteItem { @@ -79,7 +80,7 @@ class SGeometryWriterCommon if (rootAsset->getAssetType() == IAsset::ET_GEOMETRY_COLLECTION) { - impl::appendPolygonGeometryWriteItemsFromCollection(out, static_cast(rootAsset), identity, ~0u, ~0u); + SPolygonGeometryWriteItemCollector::appendFromCollection(out, static_cast(rootAsset), identity, ~0u, ~0u); return out; } @@ -99,7 +100,7 @@ class SGeometryWriterCommon const auto instanceTransform = initialTransforms.empty() ? identity : initialTransforms[instanceIx]; const auto& targetList = targets->getTargets(); for (uint32_t targetIx = 0u; targetIx < targetList.size(); ++targetIx) - impl::appendPolygonGeometryWriteItemsFromCollection(out, targetList[targetIx].geoCollection.get(), instanceTransform, instanceIx, targetIx); + SPolygonGeometryWriteItemCollector::appendFromCollection(out, targetList[targetIx].geoCollection.get(), instanceTransform, instanceIx, targetIx); } return out; diff --git a/include/nbl/asset/interchange/SLoaderRuntimeTuning.h b/include/nbl/asset/interchange/SLoaderRuntimeTuning.h index d83376cccb..e6d8672d7f 100644 --- a/include/nbl/asset/interchange/SLoaderRuntimeTuning.h +++ b/include/nbl/asset/interchange/SLoaderRuntimeTuning.h @@ -64,6 +64,20 @@ struct SLoaderRuntimeTuningResult struct SLoaderRuntimeTuner { + private: + struct SBenchmarkSampleStats + { + uint64_t medianNs = 0ull; + uint64_t minNs = 0ull; + uint64_t maxNs = 0ull; + uint64_t totalNs = 0ull; + }; + + static inline uint64_t benchmarkSample(const uint8_t* sampleData, uint64_t sampleBytes, size_t workerCount, uint32_t passes); + static inline SBenchmarkSampleStats benchmarkSampleStats(const uint8_t* sampleData, uint64_t sampleBytes, size_t workerCount, uint32_t passes, uint32_t observations); + static inline void appendCandidate(std::vector& dst, size_t candidate); + + public: template static void dispatchWorkers(const size_t workerCount, Fn&& fn); @@ -128,7 +142,7 @@ void SLoaderRuntimeTuner::dispatchWorkers(const size_t workerCount, Fn&& fn) fn(0ull); } -inline uint64_t loaderRuntimeBenchmarkSample(const uint8_t* const sampleData, const uint64_t sampleBytes, const size_t workerCount, const uint32_t passes) +inline uint64_t SLoaderRuntimeTuner::benchmarkSample(const uint8_t* const sampleData, const uint64_t sampleBytes, const size_t workerCount, const uint32_t passes) { if (!sampleData || sampleBytes == 0ull || workerCount == 0ull) return 0ull; @@ -161,15 +175,7 @@ inline uint64_t loaderRuntimeBenchmarkSample(const uint8_t* const sampleData, co return elapsedNs; } -struct SLoaderRuntimeSampleStats -{ - uint64_t medianNs = 0ull; - uint64_t minNs = 0ull; - uint64_t maxNs = 0ull; - uint64_t totalNs = 0ull; -}; - -inline SLoaderRuntimeSampleStats loaderRuntimeBenchmarkSampleStats( +inline SLoaderRuntimeTuner::SBenchmarkSampleStats SLoaderRuntimeTuner::benchmarkSampleStats( const uint8_t* const sampleData, const uint64_t sampleBytes, const size_t workerCount, @@ -177,7 +183,7 @@ inline SLoaderRuntimeSampleStats loaderRuntimeBenchmarkSampleStats( const uint32_t observations ) { - SLoaderRuntimeSampleStats stats = {}; + SBenchmarkSampleStats stats = {}; if (!sampleData || sampleBytes == 0ull || workerCount == 0ull) return stats; @@ -185,10 +191,10 @@ inline SLoaderRuntimeSampleStats loaderRuntimeBenchmarkSampleStats( std::vector samples; samples.reserve(observationCount); - loaderRuntimeBenchmarkSample(sampleData, sampleBytes, workerCount, 1u); + benchmarkSample(sampleData, sampleBytes, workerCount, 1u); for (uint32_t obsIx = 0u; obsIx < observationCount; ++obsIx) { - const uint64_t elapsedNs = loaderRuntimeBenchmarkSample(sampleData, sampleBytes, workerCount, passes); + const uint64_t elapsedNs = benchmarkSample(sampleData, sampleBytes, workerCount, passes); if (elapsedNs == 0ull) continue; stats.totalNs += elapsedNs; @@ -196,7 +202,7 @@ inline SLoaderRuntimeSampleStats loaderRuntimeBenchmarkSampleStats( } if (samples.empty()) - return SLoaderRuntimeSampleStats{}; + return SBenchmarkSampleStats{}; std::sort(samples.begin(), samples.end()); stats.minNs = samples.front(); @@ -208,7 +214,7 @@ inline SLoaderRuntimeSampleStats loaderRuntimeBenchmarkSampleStats( return stats; } -inline void loaderRuntimeAppendCandidate(std::vector& dst, const size_t candidate) +inline void SLoaderRuntimeTuner::appendCandidate(std::vector& dst, const size_t candidate) { if (candidate == 0ull) return; @@ -306,18 +312,18 @@ SLoaderRuntimeTuningResult SLoaderRuntimeTuner::tune(const SFileIOPolicy& ioPoli std::vector candidates; candidates.reserve(maxCandidates); - loaderRuntimeAppendCandidate(candidates, heuristicWorkerCount); - loaderRuntimeAppendCandidate(candidates, heuristicWorkerCount > 1ull ? (heuristicWorkerCount - 1ull) : 1ull); - loaderRuntimeAppendCandidate(candidates, std::min(maxWorkers, heuristicWorkerCount + 1ull)); + appendCandidate(candidates, heuristicWorkerCount); + appendCandidate(candidates, heuristicWorkerCount > 1ull ? (heuristicWorkerCount - 1ull) : 1ull); + appendCandidate(candidates, std::min(maxWorkers, heuristicWorkerCount + 1ull)); if (heuristicWorkerCount > 2ull) - loaderRuntimeAppendCandidate(candidates, heuristicWorkerCount - 2ull); + appendCandidate(candidates, heuristicWorkerCount - 2ull); if (heuristicWorkerCount + 2ull <= maxWorkers) - loaderRuntimeAppendCandidate(candidates, heuristicWorkerCount + 2ull); + appendCandidate(candidates, heuristicWorkerCount + 2ull); if (candidates.size() > maxCandidates) candidates.resize(maxCandidates); // probe heuristic first and only continue when budget can amortize additional probes - const auto heuristicStatsProbe = loaderRuntimeBenchmarkSampleStats( + const auto heuristicStatsProbe = benchmarkSampleStats( request.sampleData, effectiveSampleBytes, heuristicWorkerCount, samplePasses, 2u); if (heuristicStatsProbe.medianNs > 0ull) { @@ -338,7 +344,7 @@ SLoaderRuntimeTuningResult SLoaderRuntimeTuner::tune(const SFileIOPolicy& ioPoli 1ull, 3ull)); - SLoaderRuntimeSampleStats bestStats = heuristicStatsProbe; + SBenchmarkSampleStats bestStats = heuristicStatsProbe; size_t bestWorker = heuristicWorkerCount; for (const size_t candidate : candidates) @@ -347,7 +353,7 @@ SLoaderRuntimeTuningResult SLoaderRuntimeTuner::tune(const SFileIOPolicy& ioPoli continue; if (spentNs >= samplingBudgetNs) break; - const auto candidateStats = loaderRuntimeBenchmarkSampleStats( + const auto candidateStats = benchmarkSampleStats( request.sampleData, effectiveSampleBytes, candidate, samplePasses, observations); if (candidateStats.medianNs == 0ull) continue; diff --git a/src/nbl/asset/interchange/COBJMeshFileLoader.cpp b/src/nbl/asset/interchange/COBJMeshFileLoader.cpp index 441fa467c1..e8cd24e380 100644 --- a/src/nbl/asset/interchange/COBJMeshFileLoader.cpp +++ b/src/nbl/asset/interchange/COBJMeshFileLoader.cpp @@ -421,7 +421,7 @@ asset::SAssetBundle COBJMeshFileLoader::loadAsset(system::IFile* _file, const as if (filesize <= 0) return {}; const bool fileMappable = core::bitflag(_file->getFlags()).hasAnyFlag(system::IFile::ECF_MAPPABLE); - const auto ioPlan = resolveFileIOPolicy(_params.ioPolicy, static_cast(filesize), true, fileMappable); + const auto ioPlan = SResolvedFileIOPolicy(_params.ioPolicy, static_cast(filesize), true, fileMappable); if (!ioPlan.isValid()) { _params.logger.log("OBJ loader: invalid io policy for %s reason=%s", system::ILogger::ELL_ERROR, _file->getFileName().string().c_str(), ioPlan.reason); diff --git a/src/nbl/asset/interchange/COBJMeshWriter.cpp b/src/nbl/asset/interchange/COBJMeshWriter.cpp index c19db1b386..14a7dde610 100644 --- a/src/nbl/asset/interchange/COBJMeshWriter.cpp +++ b/src/nbl/asset/interchange/COBJMeshWriter.cpp @@ -402,7 +402,7 @@ bool COBJMeshWriter::writeAsset(system::IFile* _file, const SAssetWriteParams& _ } const bool fileMappable = core::bitflag(file->getFlags()).hasAnyFlag(system::IFile::ECF_MAPPABLE); - const auto ioPlan = resolveFileIOPolicy(_params.ioPolicy, static_cast(output.size()), true, fileMappable); + const auto ioPlan = SResolvedFileIOPolicy(_params.ioPolicy, static_cast(output.size()), true, fileMappable); if (!ioPlan.isValid()) { _params.logger.log("OBJ writer: invalid io policy for %s reason=%s", system::ILogger::ELL_ERROR, file->getFileName().string().c_str(), ioPlan.reason); diff --git a/src/nbl/asset/interchange/CPLYMeshFileLoader.cpp b/src/nbl/asset/interchange/CPLYMeshFileLoader.cpp index 15383789ec..63eb2cd1b7 100644 --- a/src/nbl/asset/interchange/CPLYMeshFileLoader.cpp +++ b/src/nbl/asset/interchange/CPLYMeshFileLoader.cpp @@ -1537,7 +1537,7 @@ SAssetBundle CPLYMeshFileLoader::loadAsset(system::IFile* _file, const IAssetLoa const uint64_t fileSize = _file->getSize(); const bool hashInBuild = computeContentHashes && SLoaderRuntimeTuner::shouldInlineHashBuild(_params.ioPolicy, fileSize); const bool fileMappable = core::bitflag(_file->getFlags()).hasAnyFlag(system::IFile::ECF_MAPPABLE); - const auto ioPlan = resolveFileIOPolicy(_params.ioPolicy, fileSize, true, fileMappable); + const auto ioPlan = SResolvedFileIOPolicy(_params.ioPolicy, fileSize, true, fileMappable); if (!ioPlan.isValid()) { _params.logger.log("PLY loader: invalid io policy for %s reason=%s", system::ILogger::ELL_ERROR, _file->getFileName().string().c_str(), ioPlan.reason); diff --git a/src/nbl/asset/interchange/CPLYMeshWriter.cpp b/src/nbl/asset/interchange/CPLYMeshWriter.cpp index 0c6772e12c..a5cbeec7ad 100644 --- a/src/nbl/asset/interchange/CPLYMeshWriter.cpp +++ b/src/nbl/asset/interchange/CPLYMeshWriter.cpp @@ -634,7 +634,7 @@ bool CPLYMeshWriter::writeAsset(system::IFile* _file, const SAssetWriteParams& _ const size_t outputSize = header.size() + body.size(); const bool fileMappable = core::bitflag(file->getFlags()).hasAnyFlag(system::IFile::ECF_MAPPABLE); - const auto ioPlan = resolveFileIOPolicy(_params.ioPolicy, static_cast(outputSize), true, fileMappable); + const auto ioPlan = SResolvedFileIOPolicy(_params.ioPolicy, static_cast(outputSize), true, fileMappable); if (!ioPlan.isValid()) { _params.logger.log("PLY writer: invalid io policy for %s reason=%s", system::ILogger::ELL_ERROR, file->getFileName().string().c_str(), ioPlan.reason); @@ -690,7 +690,7 @@ bool CPLYMeshWriter::writeAsset(system::IFile* _file, const SAssetWriteParams& _ const size_t outputSize = header.size() + body.size(); const bool fileMappable = core::bitflag(file->getFlags()).hasAnyFlag(system::IFile::ECF_MAPPABLE); - const auto ioPlan = resolveFileIOPolicy(_params.ioPolicy, static_cast(outputSize), true, fileMappable); + const auto ioPlan = SResolvedFileIOPolicy(_params.ioPolicy, static_cast(outputSize), true, fileMappable); if (!ioPlan.isValid()) { _params.logger.log("PLY writer: invalid io policy for %s reason=%s", system::ILogger::ELL_ERROR, file->getFileName().string().c_str(), ioPlan.reason); diff --git a/src/nbl/asset/interchange/CSTLMeshFileLoader.cpp b/src/nbl/asset/interchange/CSTLMeshFileLoader.cpp index 2a2637e23c..ea6f54235d 100644 --- a/src/nbl/asset/interchange/CSTLMeshFileLoader.cpp +++ b/src/nbl/asset/interchange/CSTLMeshFileLoader.cpp @@ -191,7 +191,7 @@ SAssetBundle CSTLMeshFileLoader::loadAsset(system::IFile* _file, const IAssetLoa return {}; const bool fileMappable = core::bitflag(_file->getFlags()).hasAnyFlag(system::IFile::ECF_MAPPABLE); - const auto ioPlan = resolveFileIOPolicy(_params.ioPolicy, static_cast(filesize), true, fileMappable); + const auto ioPlan = SResolvedFileIOPolicy(_params.ioPolicy, static_cast(filesize), true, fileMappable); if (!ioPlan.isValid()) { _params.logger.log("STL loader: invalid io policy for %s reason=%s", system::ILogger::ELL_ERROR, _file->getFileName().string().c_str(), ioPlan.reason); diff --git a/src/nbl/asset/interchange/CSTLMeshWriter.cpp b/src/nbl/asset/interchange/CSTLMeshWriter.cpp index 3feb743eb2..b04f39f132 100644 --- a/src/nbl/asset/interchange/CSTLMeshWriter.cpp +++ b/src/nbl/asset/interchange/CSTLMeshWriter.cpp @@ -136,7 +136,7 @@ bool CSTLMeshWriter::writeAsset(system::IFile* _file, const SAssetWriteParams& _ } const bool fileMappable = core::bitflag(file->getFlags()).hasAnyFlag(system::IFile::ECF_MAPPABLE); - context.ioPlan = resolveFileIOPolicy(_params.ioPolicy, expectedSize, sizeKnown, fileMappable); + context.ioPlan = SResolvedFileIOPolicy(_params.ioPolicy, expectedSize, sizeKnown, fileMappable); if (!context.ioPlan.isValid()) { _params.logger.log("STL writer: invalid io policy for %s reason=%s", system::ILogger::ELL_ERROR, file->getFileName().string().c_str(), context.ioPlan.reason); From 91ecd79199c5968889bb1e5405c0e72d118bf4c4 Mon Sep 17 00:00:00 2001 From: Arkadiusz Lachowicz Date: Sat, 7 Mar 2026 12:45:47 +0100 Subject: [PATCH 059/118] Extract buffer adoption helpers --- include/nbl/asset/SBufferAdoptionCommon.h | 60 +++++++++++++++++++ .../asset/interchange/SGeometryLoaderCommon.h | 33 ++-------- 2 files changed, 65 insertions(+), 28 deletions(-) create mode 100644 include/nbl/asset/SBufferAdoptionCommon.h diff --git a/include/nbl/asset/SBufferAdoptionCommon.h b/include/nbl/asset/SBufferAdoptionCommon.h new file mode 100644 index 0000000000..2f64f412d2 --- /dev/null +++ b/include/nbl/asset/SBufferAdoptionCommon.h @@ -0,0 +1,60 @@ +// Copyright (C) 2018-2025 - DevSH Graphics Programming Sp. z O.O. +// This file is part of the "Nabla Engine". +// For conditions of distribution and use, see copyright notice in nabla.h +#ifndef _NBL_ASSET_S_BUFFER_ADOPTION_COMMON_H_INCLUDED_ +#define _NBL_ASSET_S_BUFFER_ADOPTION_COMMON_H_INCLUDED_ + + +#include +#include +#include +#include + +#include "nbl/asset/ICPUBuffer.h" + + +namespace nbl::asset +{ + +namespace impl +{ + +// Owns contiguous storage that can be adopted by a CPU buffer. Views like std::span are rejected. +template +concept AdoptedBufferStorage = + std::ranges::contiguous_range> && + std::ranges::sized_range> && + (!std::ranges::view>) && + requires(std::remove_reference_t& storage) + { + typename std::ranges::range_value_t>; + { std::ranges::data(storage) } -> std::same_as>*>; + }; + +} + +class SBufferAdoptionCommon +{ + public: + template + static inline core::smart_refctd_ptr createAdoptedBuffer(Storage&& data) + { + using storage_t = std::remove_cvref_t; + using value_t = std::ranges::range_value_t; + + if (std::ranges::empty(data)) + return nullptr; + + auto backer = core::make_smart_refctd_ptr>(std::forward(data)); + auto& storage = backer->getBacker(); + const size_t byteCount = std::ranges::size(storage) * sizeof(value_t); + return ICPUBuffer::create( + { { byteCount }, std::ranges::data(storage), core::smart_refctd_ptr(std::move(backer)), alignof(value_t) }, + core::adopt_memory); + } +}; + +} + + +#endif diff --git a/include/nbl/asset/interchange/SGeometryLoaderCommon.h b/include/nbl/asset/interchange/SGeometryLoaderCommon.h index 0ebbfe3b74..c00b419bae 100644 --- a/include/nbl/asset/interchange/SGeometryLoaderCommon.h +++ b/include/nbl/asset/interchange/SGeometryLoaderCommon.h @@ -5,34 +5,16 @@ #define _NBL_ASSET_S_GEOMETRY_LOADER_COMMON_H_INCLUDED_ -#include #include #include -#include +#include "nbl/asset/SBufferAdoptionCommon.h" #include "nbl/asset/ICPUPolygonGeometry.h" namespace nbl::asset { -namespace impl -{ - -// Owns contiguous storage that can be adopted by the buffer. Views like std::span are rejected. -template -concept AdoptedViewStorage = - std::ranges::contiguous_range> && - std::ranges::sized_range> && - (!std::ranges::view>) && - requires(std::remove_reference_t& storage) - { - typename std::ranges::range_value_t>; - { std::ranges::data(storage) } -> std::same_as>*>; - }; - -} - class SGeometryLoaderCommon { public: @@ -55,21 +37,16 @@ class SGeometryLoaderCommon }; } - template + template static inline IGeometry::SDataView createAdoptedView(Storage&& data) { using storage_t = std::remove_cvref_t; using value_t = std::ranges::range_value_t; - if (std::ranges::empty(data)) + auto buffer = SBufferAdoptionCommon::createAdoptedBuffer(std::forward(data)); + if (!buffer) return {}; - - auto backer = core::make_smart_refctd_ptr>(std::forward(data)); - auto& storage = backer->getBacker(); - const size_t byteCount = std::ranges::size(storage) * sizeof(value_t); - auto buffer = ICPUBuffer::create( - { { byteCount }, std::ranges::data(storage), core::smart_refctd_ptr(std::move(backer)), alignof(value_t) }, - core::adopt_memory); + const size_t byteCount = buffer->getSize(); return createDataView(std::move(buffer), byteCount, static_cast(sizeof(value_t)), Format); } }; From 7f017668828a7fd7ef8eb02f9d90109f6f3c17d9 Mon Sep 17 00:00:00 2001 From: Arkadiusz Lachowicz Date: Sat, 7 Mar 2026 13:11:58 +0100 Subject: [PATCH 060/118] Refine writer and buffer adoption helpers --- ...fferAdoptionCommon.h => SBufferAdoption.h} | 8 +-- .../asset/interchange/SGeometryLoaderCommon.h | 4 +- .../asset/interchange/SGeometryWriterCommon.h | 65 ++++++++++++++----- 3 files changed, 55 insertions(+), 22 deletions(-) rename include/nbl/asset/{SBufferAdoptionCommon.h => SBufferAdoption.h} (88%) diff --git a/include/nbl/asset/SBufferAdoptionCommon.h b/include/nbl/asset/SBufferAdoption.h similarity index 88% rename from include/nbl/asset/SBufferAdoptionCommon.h rename to include/nbl/asset/SBufferAdoption.h index 2f64f412d2..4e80c74235 100644 --- a/include/nbl/asset/SBufferAdoptionCommon.h +++ b/include/nbl/asset/SBufferAdoption.h @@ -1,8 +1,8 @@ // Copyright (C) 2018-2025 - DevSH Graphics Programming Sp. z O.O. // This file is part of the "Nabla Engine". // For conditions of distribution and use, see copyright notice in nabla.h -#ifndef _NBL_ASSET_S_BUFFER_ADOPTION_COMMON_H_INCLUDED_ -#define _NBL_ASSET_S_BUFFER_ADOPTION_COMMON_H_INCLUDED_ +#ifndef _NBL_ASSET_S_BUFFER_ADOPTION_H_INCLUDED_ +#define _NBL_ASSET_S_BUFFER_ADOPTION_H_INCLUDED_ #include @@ -33,11 +33,11 @@ concept AdoptedBufferStorage = } -class SBufferAdoptionCommon +class SBufferAdoption { public: template - static inline core::smart_refctd_ptr createAdoptedBuffer(Storage&& data) + static inline core::smart_refctd_ptr create(Storage&& data) { using storage_t = std::remove_cvref_t; using value_t = std::ranges::range_value_t; diff --git a/include/nbl/asset/interchange/SGeometryLoaderCommon.h b/include/nbl/asset/interchange/SGeometryLoaderCommon.h index c00b419bae..7060e6d404 100644 --- a/include/nbl/asset/interchange/SGeometryLoaderCommon.h +++ b/include/nbl/asset/interchange/SGeometryLoaderCommon.h @@ -8,7 +8,7 @@ #include #include -#include "nbl/asset/SBufferAdoptionCommon.h" +#include "nbl/asset/SBufferAdoption.h" #include "nbl/asset/ICPUPolygonGeometry.h" @@ -43,7 +43,7 @@ class SGeometryLoaderCommon using storage_t = std::remove_cvref_t; using value_t = std::ranges::range_value_t; - auto buffer = SBufferAdoptionCommon::createAdoptedBuffer(std::forward(data)); + auto buffer = SBufferAdoption::create(std::forward(data)); if (!buffer) return {}; const size_t byteCount = buffer->getSize(); diff --git a/include/nbl/asset/interchange/SGeometryWriterCommon.h b/include/nbl/asset/interchange/SGeometryWriterCommon.h index 8d93970dfb..fc98c2bb54 100644 --- a/include/nbl/asset/interchange/SGeometryWriterCommon.h +++ b/include/nbl/asset/interchange/SGeometryWriterCommon.h @@ -19,24 +19,38 @@ namespace nbl::asset { - -namespace impl -{ -template concept PolygonGeometryWriteItemContainer = requires(Container& c, const ICPUPolygonGeometry* geometry, const hlsl::float32_t3x4 transform, const uint32_t instanceIx, const uint32_t targetIx, const uint32_t geometryIx) { c.emplace_back(geometry, transform, instanceIx, targetIx, geometryIx); }; -} - class SGeometryWriterCommon { + struct SWriteState + { + hlsl::float32_t3x4 transform = hlsl::math::linalg::identity(); + uint32_t instanceIx = ~0u; + uint32_t targetIx = ~0u; + uint32_t geometryIx = 0u; + }; + + public: + struct SWriteParams : SWriteState + { + const ICPUPolygonGeometry* geometry = nullptr; + }; + + private: + struct SCollectionParams : SWriteState + { + const ICPUGeometryCollection* collection = nullptr; + }; + template - struct SPolygonGeometryWriteItemCollector + struct SWriteCollector { - static inline void appendFromCollection(Container& out, const ICPUGeometryCollection* collection, const hlsl::float32_t3x4& parentTransform, const uint32_t instanceIx, const uint32_t targetIx) + static inline void appendFromCollection(Container& out, const SCollectionParams& params) { - if (!collection) + if (!params.collection) return; const auto identity = hlsl::math::linalg::identity(); - const auto& geometries = collection->getGeometries(); + const auto& geometries = params.collection->getGeometries(); for (uint32_t geometryIx = 0u; geometryIx < geometries.size(); ++geometryIx) { const auto& ref = geometries[geometryIx]; @@ -44,7 +58,13 @@ class SGeometryWriterCommon continue; const auto* geometry = static_cast(ref.geometry.get()); const auto localTransform = ref.hasTransform() ? ref.transform : identity; - out.emplace_back(geometry, hlsl::math::linalg::promoted_mul(parentTransform, localTransform), instanceIx, targetIx, geometryIx); + SWriteParams itemParams = {}; + itemParams.geometry = geometry; + itemParams.transform = hlsl::math::linalg::promoted_mul(params.transform, localTransform); + itemParams.instanceIx = params.instanceIx; + itemParams.targetIx = params.targetIx; + itemParams.geometryIx = geometryIx; + out.emplace_back(itemParams); } } }; @@ -52,7 +72,7 @@ class SGeometryWriterCommon public: struct SPolygonGeometryWriteItem { - inline SPolygonGeometryWriteItem(const ICPUPolygonGeometry* _geometry, const hlsl::float32_t3x4& _transform, const uint32_t _instanceIx, const uint32_t _targetIx, const uint32_t _geometryIx) : geometry(_geometry), transform(_transform), instanceIx(_instanceIx), targetIx(_targetIx), geometryIx(_geometryIx) {} + inline SPolygonGeometryWriteItem(const SWriteParams& params) : geometry(params.geometry), transform(params.transform), instanceIx(params.instanceIx), targetIx(params.targetIx), geometryIx(params.geometryIx) {} const ICPUPolygonGeometry* geometry = nullptr; hlsl::float32_t3x4 transform = hlsl::math::linalg::identity(); @@ -62,7 +82,7 @@ class SGeometryWriterCommon }; // Collects every polygon geometry a writer can serialize from a geometry, collection, or flattened scene. - template> requires impl::PolygonGeometryWriteItemContainer + template> requires requires(Container& c, const SWriteParams& params) { c.emplace_back(params); } static inline Container collectPolygonGeometryWriteItems(const IAsset* rootAsset) { Container out = {}; @@ -74,13 +94,19 @@ class SGeometryWriterCommon { const auto* geometry = static_cast*>(rootAsset); if (geometry->getPrimitiveType() == IGeometryBase::EPrimitiveType::Polygon) - out.emplace_back(static_cast(rootAsset), identity, ~0u, ~0u, 0u); + { + SWriteParams itemParams = {}; + itemParams.geometry = static_cast(rootAsset); + out.emplace_back(itemParams); + } return out; } if (rootAsset->getAssetType() == IAsset::ET_GEOMETRY_COLLECTION) { - SPolygonGeometryWriteItemCollector::appendFromCollection(out, static_cast(rootAsset), identity, ~0u, ~0u); + SCollectionParams appendParams = {}; + appendParams.collection = static_cast(rootAsset); + SWriteCollector::appendFromCollection(out, appendParams); return out; } @@ -100,7 +126,14 @@ class SGeometryWriterCommon const auto instanceTransform = initialTransforms.empty() ? identity : initialTransforms[instanceIx]; const auto& targetList = targets->getTargets(); for (uint32_t targetIx = 0u; targetIx < targetList.size(); ++targetIx) - SPolygonGeometryWriteItemCollector::appendFromCollection(out, targetList[targetIx].geoCollection.get(), instanceTransform, instanceIx, targetIx); + { + SCollectionParams appendParams = {}; + appendParams.collection = targetList[targetIx].geoCollection.get(); + appendParams.transform = instanceTransform; + appendParams.instanceIx = instanceIx; + appendParams.targetIx = targetIx; + SWriteCollector::appendFromCollection(out, appendParams); + } } return out; From dc0c5b225470ba5b601cba909e6c95792212f8af Mon Sep 17 00:00:00 2001 From: Arkadiusz Lachowicz Date: Sat, 7 Mar 2026 13:37:19 +0100 Subject: [PATCH 061/118] Refine geometry writer helpers --- ...entHashCommon.h => SGeometryContentHash.h} | 4 +- .../asset/interchange/SGeometryWriterCommon.h | 47 +++++++------------ .../asset/interchange/COBJMeshFileLoader.cpp | 2 +- .../asset/interchange/CPLYMeshFileLoader.cpp | 2 +- .../asset/interchange/CSTLMeshFileLoader.cpp | 2 +- tools/hcp/main.cpp | 2 +- 6 files changed, 23 insertions(+), 36 deletions(-) rename include/nbl/asset/interchange/{SGeometryContentHashCommon.h => SGeometryContentHash.h} (95%) diff --git a/include/nbl/asset/interchange/SGeometryContentHashCommon.h b/include/nbl/asset/interchange/SGeometryContentHash.h similarity index 95% rename from include/nbl/asset/interchange/SGeometryContentHashCommon.h rename to include/nbl/asset/interchange/SGeometryContentHash.h index d216cbd2c0..331fd9e1da 100644 --- a/include/nbl/asset/interchange/SGeometryContentHashCommon.h +++ b/include/nbl/asset/interchange/SGeometryContentHash.h @@ -1,8 +1,8 @@ // Copyright (C) 2018-2025 - DevSH Graphics Programming Sp. z O.O. // This file is part of the "Nabla Engine". // For conditions of distribution and use, see copyright notice in nabla.h -#ifndef _NBL_ASSET_S_GEOMETRY_CONTENT_HASH_COMMON_H_INCLUDED_ -#define _NBL_ASSET_S_GEOMETRY_CONTENT_HASH_COMMON_H_INCLUDED_ +#ifndef _NBL_ASSET_S_GEOMETRY_CONTENT_HASH_H_INCLUDED_ +#define _NBL_ASSET_S_GEOMETRY_CONTENT_HASH_H_INCLUDED_ #include "nbl/asset/IPreHashed.h" diff --git a/include/nbl/asset/interchange/SGeometryWriterCommon.h b/include/nbl/asset/interchange/SGeometryWriterCommon.h index fc98c2bb54..e01dca8b3e 100644 --- a/include/nbl/asset/interchange/SGeometryWriterCommon.h +++ b/include/nbl/asset/interchange/SGeometryWriterCommon.h @@ -21,6 +21,7 @@ namespace nbl::asset { class SGeometryWriterCommon { + public: struct SWriteState { hlsl::float32_t3x4 transform = hlsl::math::linalg::identity(); @@ -29,14 +30,12 @@ class SGeometryWriterCommon uint32_t geometryIx = 0u; }; - public: - struct SWriteParams : SWriteState + struct SPolygonGeometryWriteItem : SWriteState { const ICPUPolygonGeometry* geometry = nullptr; }; - private: - struct SCollectionParams : SWriteState + struct SGeometryCollectionWriteParams : SWriteState { const ICPUGeometryCollection* collection = nullptr; }; @@ -44,7 +43,7 @@ class SGeometryWriterCommon template struct SWriteCollector { - static inline void appendFromCollection(Container& out, const SCollectionParams& params) + static inline void appendFromCollection(Container& out, const SGeometryCollectionWriteParams& params) { if (!params.collection) return; @@ -58,31 +57,19 @@ class SGeometryWriterCommon continue; const auto* geometry = static_cast(ref.geometry.get()); const auto localTransform = ref.hasTransform() ? ref.transform : identity; - SWriteParams itemParams = {}; - itemParams.geometry = geometry; - itemParams.transform = hlsl::math::linalg::promoted_mul(params.transform, localTransform); - itemParams.instanceIx = params.instanceIx; - itemParams.targetIx = params.targetIx; - itemParams.geometryIx = geometryIx; - out.emplace_back(itemParams); + SPolygonGeometryWriteItem item = {}; + item.geometry = geometry; + item.transform = hlsl::math::linalg::promoted_mul(params.transform, localTransform); + item.instanceIx = params.instanceIx; + item.targetIx = params.targetIx; + item.geometryIx = geometryIx; + out.emplace_back(item); } } }; - public: - struct SPolygonGeometryWriteItem - { - inline SPolygonGeometryWriteItem(const SWriteParams& params) : geometry(params.geometry), transform(params.transform), instanceIx(params.instanceIx), targetIx(params.targetIx), geometryIx(params.geometryIx) {} - - const ICPUPolygonGeometry* geometry = nullptr; - hlsl::float32_t3x4 transform = hlsl::math::linalg::identity(); - uint32_t instanceIx = ~0u; - uint32_t targetIx = ~0u; - uint32_t geometryIx = 0u; - }; - // Collects every polygon geometry a writer can serialize from a geometry, collection, or flattened scene. - template> requires requires(Container& c, const SWriteParams& params) { c.emplace_back(params); } + template> requires requires(Container& c, const SPolygonGeometryWriteItem& item) { c.emplace_back(item); } static inline Container collectPolygonGeometryWriteItems(const IAsset* rootAsset) { Container out = {}; @@ -95,16 +82,16 @@ class SGeometryWriterCommon const auto* geometry = static_cast*>(rootAsset); if (geometry->getPrimitiveType() == IGeometryBase::EPrimitiveType::Polygon) { - SWriteParams itemParams = {}; - itemParams.geometry = static_cast(rootAsset); - out.emplace_back(itemParams); + SPolygonGeometryWriteItem item = {}; + item.geometry = static_cast(rootAsset); + out.emplace_back(item); } return out; } if (rootAsset->getAssetType() == IAsset::ET_GEOMETRY_COLLECTION) { - SCollectionParams appendParams = {}; + SGeometryCollectionWriteParams appendParams = {}; appendParams.collection = static_cast(rootAsset); SWriteCollector::appendFromCollection(out, appendParams); return out; @@ -127,7 +114,7 @@ class SGeometryWriterCommon const auto& targetList = targets->getTargets(); for (uint32_t targetIx = 0u; targetIx < targetList.size(); ++targetIx) { - SCollectionParams appendParams = {}; + SGeometryCollectionWriteParams appendParams = {}; appendParams.collection = targetList[targetIx].geoCollection.get(); appendParams.transform = instanceTransform; appendParams.instanceIx = instanceIx; diff --git a/src/nbl/asset/interchange/COBJMeshFileLoader.cpp b/src/nbl/asset/interchange/COBJMeshFileLoader.cpp index e8cd24e380..7c5656cc6f 100644 --- a/src/nbl/asset/interchange/COBJMeshFileLoader.cpp +++ b/src/nbl/asset/interchange/COBJMeshFileLoader.cpp @@ -7,7 +7,7 @@ #include "nbl/asset/IAssetManager.h" #include "nbl/asset/ICPUGeometryCollection.h" -#include "nbl/asset/interchange/SGeometryContentHashCommon.h" +#include "nbl/asset/interchange/SGeometryContentHash.h" #include "nbl/asset/interchange/SGeometryLoaderCommon.h" #include "nbl/asset/interchange/SInterchangeIOCommon.h" #include "nbl/asset/interchange/SLoaderRuntimeTuning.h" diff --git a/src/nbl/asset/interchange/CPLYMeshFileLoader.cpp b/src/nbl/asset/interchange/CPLYMeshFileLoader.cpp index 63eb2cd1b7..131506670e 100644 --- a/src/nbl/asset/interchange/CPLYMeshFileLoader.cpp +++ b/src/nbl/asset/interchange/CPLYMeshFileLoader.cpp @@ -5,7 +5,7 @@ #ifdef _NBL_COMPILE_WITH_PLY_LOADER_ #include "CPLYMeshFileLoader.h" -#include "nbl/asset/interchange/SGeometryContentHashCommon.h" +#include "nbl/asset/interchange/SGeometryContentHash.h" #include "nbl/asset/interchange/SGeometryLoaderCommon.h" #include "nbl/asset/interchange/SInterchangeIOCommon.h" #include "nbl/asset/interchange/SLoaderRuntimeTuning.h" diff --git a/src/nbl/asset/interchange/CSTLMeshFileLoader.cpp b/src/nbl/asset/interchange/CSTLMeshFileLoader.cpp index ea6f54235d..fd76921b54 100644 --- a/src/nbl/asset/interchange/CSTLMeshFileLoader.cpp +++ b/src/nbl/asset/interchange/CSTLMeshFileLoader.cpp @@ -7,7 +7,7 @@ #ifdef _NBL_COMPILE_WITH_STL_LOADER_ -#include "nbl/asset/interchange/SGeometryContentHashCommon.h" +#include "nbl/asset/interchange/SGeometryContentHash.h" #include "nbl/asset/interchange/SGeometryLoaderCommon.h" #include "nbl/asset/interchange/SInterchangeIOCommon.h" #include "nbl/asset/interchange/SLoaderRuntimeTuning.h" diff --git a/tools/hcp/main.cpp b/tools/hcp/main.cpp index e104b497ba..24a9c16ba5 100644 --- a/tools/hcp/main.cpp +++ b/tools/hcp/main.cpp @@ -7,7 +7,7 @@ #include "nbl/system/CStdoutLogger.h" #include "nbl/asset/interchange/SFileIOPolicy.h" -#include "nbl/asset/interchange/SGeometryContentHashCommon.h" +#include "nbl/asset/interchange/SGeometryContentHash.h" #include "nbl/core/hash/blake.h" #include "argparse/argparse.hpp" From a87215283982d78371ef19bd0699f4663d2296df Mon Sep 17 00:00:00 2001 From: Arkadiusz Lachowicz Date: Sat, 7 Mar 2026 14:17:30 +0100 Subject: [PATCH 062/118] Refine interchange IO helpers --- .../asset/interchange/SGeometryWriterCommon.h | 2 +- .../asset/interchange/SInterchangeIOCommon.h | 118 +++++++++++------- .../asset/interchange/COBJMeshFileLoader.cpp | 2 +- src/nbl/asset/interchange/CPLYMeshWriter.cpp | 28 ++--- 4 files changed, 88 insertions(+), 62 deletions(-) diff --git a/include/nbl/asset/interchange/SGeometryWriterCommon.h b/include/nbl/asset/interchange/SGeometryWriterCommon.h index e01dca8b3e..37dedb2424 100644 --- a/include/nbl/asset/interchange/SGeometryWriterCommon.h +++ b/include/nbl/asset/interchange/SGeometryWriterCommon.h @@ -40,7 +40,7 @@ class SGeometryWriterCommon const ICPUGeometryCollection* collection = nullptr; }; - template + template requires requires(Container& c, const SPolygonGeometryWriteItem& item) { c.emplace_back(item); } struct SWriteCollector { static inline void appendFromCollection(Container& out, const SGeometryCollectionWriteParams& params) diff --git a/include/nbl/asset/interchange/SInterchangeIOCommon.h b/include/nbl/asset/interchange/SInterchangeIOCommon.h index f5fc01ebaf..91a48ef89f 100644 --- a/include/nbl/asset/interchange/SInterchangeIOCommon.h +++ b/include/nbl/asset/interchange/SInterchangeIOCommon.h @@ -10,9 +10,11 @@ #include #include +#include #include #include #include +#include namespace nbl::asset @@ -21,6 +23,7 @@ namespace nbl::asset class SInterchangeIOCommon { public: + // Tracks IO call count and byte distribution for tiny-io diagnostics. struct STelemetry { uint64_t callCount = 0ull; @@ -49,13 +52,14 @@ class SInterchangeIOCommon using SReadTelemetry = STelemetry; using SWriteTelemetry = STelemetry; + // Flags large payloads that were served through suspiciously small IO calls. static inline bool isTinyIOTelemetryLikely( const STelemetry& telemetry, const uint64_t payloadBytes, - const uint64_t bigPayloadThresholdBytes = (1ull << 20), - const uint64_t lowAvgBytesThreshold = 1024ull, - const uint64_t tinyChunkBytesThreshold = 64ull, - const uint64_t tinyChunkCallsThreshold = 1024ull) + const uint64_t bigPayloadThresholdBytes = (1ull << 20), // Default 1 MiB. + const uint64_t lowAvgBytesThreshold = 1024ull, // Default 1 KiB. + const uint64_t tinyChunkBytesThreshold = 64ull, // Default 64 B. + const uint64_t tinyChunkCallsThreshold = 1024ull) // Default 1024 calls. { if (payloadBytes <= bigPayloadThresholdBytes) return false; @@ -67,6 +71,7 @@ class SInterchangeIOCommon (minBytes < tinyChunkBytesThreshold && telemetry.callCount > tinyChunkCallsThreshold); } + // Same tiny-io heuristic but pulls thresholds from the resolved IO policy. static inline bool isTinyIOTelemetryLikely(const STelemetry& telemetry, const uint64_t payloadBytes, const SFileIOPolicy& ioPolicy) { return isTinyIOTelemetryLikely( @@ -78,6 +83,7 @@ class SInterchangeIOCommon ioPolicy.runtimeTuning.tinyIoMinCallCount); } + // Issues one read request and verifies that the full byte count was returned. static inline bool readFileExact(system::IFile* file, void* dst, const size_t offset, const size_t bytes, SReadTelemetry* ioTelemetry = nullptr) { if (!file || (!dst && bytes != 0ull)) @@ -92,17 +98,30 @@ class SInterchangeIOCommon return success && success.getBytesProcessed() == bytes; } - static inline bool readFileWithPolicy(system::IFile* file, uint8_t* dst, const size_t offset, const size_t bytes, const SResolvedFileIOPolicy& ioPlan, SReadTelemetry* ioTelemetry = nullptr) + // Reads a byte range using the resolved whole-file or chunked strategy. + // When ioTime is non-null it also reports wall time in TimeUnit. Default TimeUnit is milliseconds. + template> + requires std::same_as> + static inline bool readFileWithPolicy(system::IFile* file, uint8_t* dst, const size_t offset, const size_t bytes, const SResolvedFileIOPolicy& ioPlan, SReadTelemetry* ioTelemetry = nullptr, TimeUnit* ioTime = nullptr) { + using clock_t = std::chrono::high_resolution_clock; + const auto ioStart = ioTime ? clock_t::now() : clock_t::time_point{}; + auto finalize = [&](const bool ok) -> bool + { + if (ioTime) + *ioTime = std::chrono::duration_cast(clock_t::now() - ioStart); + return ok; + }; + if (!file || (!dst && bytes != 0ull)) - return false; + return finalize(false); if (bytes == 0ull) - return true; + return finalize(true); switch (ioPlan.strategy) { case SResolvedFileIOPolicy::Strategy::WholeFile: - return readFileExact(file, dst, offset, bytes, ioTelemetry); + return finalize(readFileExact(file, dst, offset, bytes, ioTelemetry)); case SResolvedFileIOPolicy::Strategy::Chunked: default: { @@ -122,63 +141,74 @@ class SInterchangeIOCommon ioTelemetry->account(processed); bytesRead += processed; } - return true; + return finalize(true); } } } - static inline bool readFileWithPolicyTimed(system::IFile* file, uint8_t* dst, const size_t offset, const size_t bytes, const SResolvedFileIOPolicy& ioPlan, double* ioMs = nullptr, SReadTelemetry* ioTelemetry = nullptr) + // Describes one contiguous output buffer written as part of a larger stream. + struct SBufferRange { - using clock_t = std::chrono::high_resolution_clock; - const auto ioStart = clock_t::now(); - const bool ok = readFileWithPolicy(file, dst, offset, bytes, ioPlan, ioTelemetry); - if (ioMs) - *ioMs = std::chrono::duration(clock_t::now() - ioStart).count(); - return ok; - } + const uint8_t* data = nullptr; + size_t byteCount = 0ull; + }; - static inline bool writeFileWithPolicyAtOffset(system::IFile* file, const SResolvedFileIOPolicy& ioPlan, const uint8_t* data, size_t byteCount, size_t& fileOffset, SWriteTelemetry* ioTelemetry = nullptr) + // Writes one or more buffers sequentially at fileOffset and advances it on success. + static inline bool writeBuffersWithPolicyAtOffset(system::IFile* file, const SResolvedFileIOPolicy& ioPlan, const std::span buffers, size_t& fileOffset, SWriteTelemetry* ioTelemetry = nullptr) { - if (!file || (!data && byteCount != 0ull)) + if (!file) return false; - if (byteCount == 0ull) - return true; - size_t writtenTotal = 0ull; const uint64_t chunkSizeBytes = ioPlan.chunkSizeBytes(); - while (writtenTotal < byteCount) + for (const auto& buffer : buffers) { - const size_t toWrite = - ioPlan.strategy == SResolvedFileIOPolicy::Strategy::WholeFile ? - (byteCount - writtenTotal) : - static_cast(std::min(chunkSizeBytes, byteCount - writtenTotal)); - system::IFile::success_t success; - file->write(success, data + writtenTotal, fileOffset + writtenTotal, toWrite); - if (!success) - return false; - const size_t written = success.getBytesProcessed(); - if (written == 0ull) + if (!buffer.data && buffer.byteCount != 0ull) return false; - if (ioTelemetry) - ioTelemetry->account(written); - writtenTotal += written; + if (buffer.byteCount == 0ull) + continue; + + size_t writtenTotal = 0ull; + while (writtenTotal < buffer.byteCount) + { + const size_t toWrite = + ioPlan.strategy == SResolvedFileIOPolicy::Strategy::WholeFile ? + (buffer.byteCount - writtenTotal) : + static_cast(std::min(chunkSizeBytes, buffer.byteCount - writtenTotal)); + system::IFile::success_t success; + file->write(success, buffer.data + writtenTotal, fileOffset + writtenTotal, toWrite); + if (!success) + return false; + const size_t written = success.getBytesProcessed(); + if (written == 0ull) + return false; + if (ioTelemetry) + ioTelemetry->account(written); + writtenTotal += written; + } + fileOffset += writtenTotal; } - fileOffset += writtenTotal; return true; } - static inline bool writeFileWithPolicy(system::IFile* file, const SResolvedFileIOPolicy& ioPlan, const uint8_t* data, size_t byteCount, SWriteTelemetry* ioTelemetry = nullptr) + // Writes one or more buffers starting from file offset 0. + static inline bool writeBuffersWithPolicy(system::IFile* file, const SResolvedFileIOPolicy& ioPlan, const std::span buffers, SWriteTelemetry* ioTelemetry = nullptr) { size_t fileOffset = 0ull; - return writeFileWithPolicyAtOffset(file, ioPlan, data, byteCount, fileOffset, ioTelemetry); + return writeBuffersWithPolicyAtOffset(file, ioPlan, buffers, fileOffset, ioTelemetry); } - static inline bool writeTwoBuffersWithPolicy(system::IFile* file, const SResolvedFileIOPolicy& ioPlan, const uint8_t* dataA, size_t byteCountA, const uint8_t* dataB, size_t byteCountB, SWriteTelemetry* ioTelemetry = nullptr) + // Single-buffer convenience wrapper over writeBuffersWithPolicyAtOffset. + static inline bool writeFileWithPolicyAtOffset(system::IFile* file, const SResolvedFileIOPolicy& ioPlan, const uint8_t* data, size_t byteCount, size_t& fileOffset, SWriteTelemetry* ioTelemetry = nullptr) { - size_t fileOffset = 0ull; - if (!writeFileWithPolicyAtOffset(file, ioPlan, dataA, byteCountA, fileOffset, ioTelemetry)) - return false; - return writeFileWithPolicyAtOffset(file, ioPlan, dataB, byteCountB, fileOffset, ioTelemetry); + const SBufferRange buffers[] = { { .data = data, .byteCount = byteCount } }; + return writeBuffersWithPolicyAtOffset(file, ioPlan, buffers, fileOffset, ioTelemetry); + } + + // Single-buffer convenience wrapper over writeBuffersWithPolicy. + static inline bool writeFileWithPolicy(system::IFile* file, const SResolvedFileIOPolicy& ioPlan, const uint8_t* data, size_t byteCount, SWriteTelemetry* ioTelemetry = nullptr) + { + const SBufferRange buffers[] = { { .data = data, .byteCount = byteCount } }; + return writeBuffersWithPolicy(file, ioPlan, buffers, ioTelemetry); } }; diff --git a/src/nbl/asset/interchange/COBJMeshFileLoader.cpp b/src/nbl/asset/interchange/COBJMeshFileLoader.cpp index 7c5656cc6f..62a5fd5448 100644 --- a/src/nbl/asset/interchange/COBJMeshFileLoader.cpp +++ b/src/nbl/asset/interchange/COBJMeshFileLoader.cpp @@ -72,7 +72,7 @@ inline bool parseObjFloat(const char*& ptr, const char* const end, float& out) bool readTextFileWithPolicy(system::IFile* file, char* dst, size_t byteCount, const SResolvedFileIOPolicy& ioPlan, SFileReadTelemetry& ioTelemetry) { - return SInterchangeIOCommon::readFileWithPolicyTimed(file, reinterpret_cast(dst), 0ull, byteCount, ioPlan, nullptr, &ioTelemetry); + return SInterchangeIOCommon::readFileWithPolicy(file, reinterpret_cast(dst), 0ull, byteCount, ioPlan, &ioTelemetry); } inline bool parseUnsignedObjIndex(const char*& ptr, const char* const end, uint32_t& out) diff --git a/src/nbl/asset/interchange/CPLYMeshWriter.cpp b/src/nbl/asset/interchange/CPLYMeshWriter.cpp index a5cbeec7ad..171840772e 100644 --- a/src/nbl/asset/interchange/CPLYMeshWriter.cpp +++ b/src/nbl/asset/interchange/CPLYMeshWriter.cpp @@ -642,14 +642,12 @@ bool CPLYMeshWriter::writeAsset(system::IFile* _file, const SAssetWriteParams& _ } outputBytes = outputSize; - writeOk = SInterchangeIOCommon::writeTwoBuffersWithPolicy( - file, - ioPlan, - reinterpret_cast(header.data()), - header.size(), - body.data(), - body.size(), - &ioTelemetry); + const SInterchangeIOCommon::SBufferRange writeBuffers[] = + { + { .data = reinterpret_cast(header.data()), .byteCount = header.size() }, + { .data = body.data(), .byteCount = body.size() } + }; + writeOk = SInterchangeIOCommon::writeBuffersWithPolicy(file, ioPlan, writeBuffers, &ioTelemetry); const uint64_t ioMinWrite = ioTelemetry.getMinOrZero(); const uint64_t ioAvgWrite = ioTelemetry.getAvgOrZero(); if (SInterchangeIOCommon::isTinyIOTelemetryLikely(ioTelemetry, static_cast(outputBytes), _params.ioPolicy)) @@ -698,14 +696,12 @@ bool CPLYMeshWriter::writeAsset(system::IFile* _file, const SAssetWriteParams& _ } outputBytes = outputSize; - writeOk = SInterchangeIOCommon::writeTwoBuffersWithPolicy( - file, - ioPlan, - reinterpret_cast(header.data()), - header.size(), - reinterpret_cast(body.data()), - body.size(), - &ioTelemetry); + const SInterchangeIOCommon::SBufferRange writeBuffers[] = + { + { .data = reinterpret_cast(header.data()), .byteCount = header.size() }, + { .data = reinterpret_cast(body.data()), .byteCount = body.size() } + }; + writeOk = SInterchangeIOCommon::writeBuffersWithPolicy(file, ioPlan, writeBuffers, &ioTelemetry); const uint64_t ioMinWrite = ioTelemetry.getMinOrZero(); const uint64_t ioAvgWrite = ioTelemetry.getAvgOrZero(); if (SInterchangeIOCommon::isTinyIOTelemetryLikely(ioTelemetry, static_cast(outputBytes), _params.ioPolicy)) From 4a6920eb6d375594423f910a6999c7efd7825154 Mon Sep 17 00:00:00 2001 From: Arkadiusz Lachowicz Date: Sat, 7 Mar 2026 14:23:33 +0100 Subject: [PATCH 063/118] Rename interchange IO helper --- ...InterchangeIOCommon.h => SInterchangeIO.h} | 12 +++++------ .../asset/interchange/COBJMeshFileLoader.cpp | 6 +++--- src/nbl/asset/interchange/COBJMeshWriter.cpp | 6 +++--- .../asset/interchange/CPLYMeshFileLoader.cpp | 4 ++-- src/nbl/asset/interchange/CPLYMeshWriter.cpp | 14 ++++++------- .../asset/interchange/CSTLMeshFileLoader.cpp | 20 +++++++++---------- src/nbl/asset/interchange/CSTLMeshWriter.cpp | 6 +++--- 7 files changed, 34 insertions(+), 34 deletions(-) rename include/nbl/asset/interchange/{SInterchangeIOCommon.h => SInterchangeIO.h} (96%) diff --git a/include/nbl/asset/interchange/SInterchangeIOCommon.h b/include/nbl/asset/interchange/SInterchangeIO.h similarity index 96% rename from include/nbl/asset/interchange/SInterchangeIOCommon.h rename to include/nbl/asset/interchange/SInterchangeIO.h index 91a48ef89f..08998e5ea8 100644 --- a/include/nbl/asset/interchange/SInterchangeIOCommon.h +++ b/include/nbl/asset/interchange/SInterchangeIO.h @@ -1,8 +1,8 @@ // Copyright (C) 2018-2025 - DevSH Graphics Programming Sp. z O.O. // This file is part of the "Nabla Engine". // For conditions of distribution and use, see copyright notice in nabla.h -#ifndef _NBL_ASSET_S_INTERCHANGE_IO_COMMON_H_INCLUDED_ -#define _NBL_ASSET_S_INTERCHANGE_IO_COMMON_H_INCLUDED_ +#ifndef _NBL_ASSET_S_INTERCHANGE_IO_H_INCLUDED_ +#define _NBL_ASSET_S_INTERCHANGE_IO_H_INCLUDED_ #include "nbl/asset/interchange/SFileIOPolicy.h" @@ -20,7 +20,7 @@ namespace nbl::asset { -class SInterchangeIOCommon +class SInterchangeIO { public: // Tracks IO call count and byte distribution for tiny-io diagnostics. @@ -212,9 +212,9 @@ class SInterchangeIOCommon } }; -using SFileIOTelemetry = SInterchangeIOCommon::STelemetry; -using SFileReadTelemetry = SInterchangeIOCommon::SReadTelemetry; -using SFileWriteTelemetry = SInterchangeIOCommon::SWriteTelemetry; +using SFileIOTelemetry = SInterchangeIO::STelemetry; +using SFileReadTelemetry = SInterchangeIO::SReadTelemetry; +using SFileWriteTelemetry = SInterchangeIO::SWriteTelemetry; } diff --git a/src/nbl/asset/interchange/COBJMeshFileLoader.cpp b/src/nbl/asset/interchange/COBJMeshFileLoader.cpp index 62a5fd5448..851e939b79 100644 --- a/src/nbl/asset/interchange/COBJMeshFileLoader.cpp +++ b/src/nbl/asset/interchange/COBJMeshFileLoader.cpp @@ -9,7 +9,7 @@ #include "nbl/asset/ICPUGeometryCollection.h" #include "nbl/asset/interchange/SGeometryContentHash.h" #include "nbl/asset/interchange/SGeometryLoaderCommon.h" -#include "nbl/asset/interchange/SInterchangeIOCommon.h" +#include "nbl/asset/interchange/SInterchangeIO.h" #include "nbl/asset/interchange/SLoaderRuntimeTuning.h" #include "nbl/asset/utils/CPolygonGeometryManipulator.h" #include "nbl/builtin/hlsl/shapes/AABBAccumulator.hlsl" @@ -72,7 +72,7 @@ inline bool parseObjFloat(const char*& ptr, const char* const end, float& out) bool readTextFileWithPolicy(system::IFile* file, char* dst, size_t byteCount, const SResolvedFileIOPolicy& ioPlan, SFileReadTelemetry& ioTelemetry) { - return SInterchangeIOCommon::readFileWithPolicy(file, reinterpret_cast(dst), 0ull, byteCount, ioPlan, &ioTelemetry); + return SInterchangeIO::readFileWithPolicy(file, reinterpret_cast(dst), 0ull, byteCount, ioPlan, &ioTelemetry); } inline bool parseUnsignedObjIndex(const char*& ptr, const char* const end, uint32_t& out) @@ -1048,7 +1048,7 @@ asset::SAssetBundle COBJMeshFileLoader::loadAsset(system::IFile* _file, const as faceFallbackTokenCountSum += loaded.faceFallbackTokenCount; } - if (SInterchangeIOCommon::isTinyIOTelemetryLikely(ioTelemetry, static_cast(filesize), _params.ioPolicy)) + if (SInterchangeIO::isTinyIOTelemetryLikely(ioTelemetry, static_cast(filesize), _params.ioPolicy)) { _params.logger.log( "OBJ loader tiny-io guard: file=%s reads=%llu min=%llu avg=%llu", diff --git a/src/nbl/asset/interchange/COBJMeshWriter.cpp b/src/nbl/asset/interchange/COBJMeshWriter.cpp index 14a7dde610..ef7eae3eb3 100644 --- a/src/nbl/asset/interchange/COBJMeshWriter.cpp +++ b/src/nbl/asset/interchange/COBJMeshWriter.cpp @@ -4,7 +4,7 @@ #include "nbl/asset/interchange/COBJMeshWriter.h" #include "nbl/asset/interchange/SGeometryWriterCommon.h" -#include "nbl/asset/interchange/SInterchangeIOCommon.h" +#include "nbl/asset/interchange/SInterchangeIO.h" #include "SOBJPolygonGeometryAuxLayout.h" #ifdef _NBL_COMPILE_WITH_OBJ_WRITER_ @@ -409,10 +409,10 @@ bool COBJMeshWriter::writeAsset(system::IFile* _file, const SAssetWriteParams& _ return false; } - const bool writeOk = SInterchangeIOCommon::writeFileWithPolicy(file, ioPlan, reinterpret_cast(output.data()), output.size(), &ioTelemetry); + const bool writeOk = SInterchangeIO::writeFileWithPolicy(file, ioPlan, reinterpret_cast(output.data()), output.size(), &ioTelemetry); const uint64_t ioMinWrite = ioTelemetry.getMinOrZero(); const uint64_t ioAvgWrite = ioTelemetry.getAvgOrZero(); - if (SInterchangeIOCommon::isTinyIOTelemetryLikely(ioTelemetry, static_cast(output.size()), _params.ioPolicy)) + if (SInterchangeIO::isTinyIOTelemetryLikely(ioTelemetry, static_cast(output.size()), _params.ioPolicy)) { _params.logger.log( "OBJ writer tiny-io guard: file=%s writes=%llu min=%llu avg=%llu", diff --git a/src/nbl/asset/interchange/CPLYMeshFileLoader.cpp b/src/nbl/asset/interchange/CPLYMeshFileLoader.cpp index 131506670e..468725fdd1 100644 --- a/src/nbl/asset/interchange/CPLYMeshFileLoader.cpp +++ b/src/nbl/asset/interchange/CPLYMeshFileLoader.cpp @@ -7,7 +7,7 @@ #include "CPLYMeshFileLoader.h" #include "nbl/asset/interchange/SGeometryContentHash.h" #include "nbl/asset/interchange/SGeometryLoaderCommon.h" -#include "nbl/asset/interchange/SInterchangeIOCommon.h" +#include "nbl/asset/interchange/SInterchangeIO.h" #include "nbl/asset/interchange/SLoaderRuntimeTuning.h" #include "nbl/asset/IAssetManager.h" #include "nbl/asset/metadata/CPLYMetadata.h" @@ -2153,7 +2153,7 @@ SAssetBundle CPLYMeshFileLoader::loadAsset(system::IFile* _file, const IAssetLoa .totalBytes = ctx.readBytesTotal, .minBytes = ctx.readMinBytes }; - if (SInterchangeIOCommon::isTinyIOTelemetryLikely(ioTelemetry, fileSize, _params.ioPolicy)) + if (SInterchangeIO::isTinyIOTelemetryLikely(ioTelemetry, fileSize, _params.ioPolicy)) { _params.logger.log( "PLY loader tiny-io guard: file=%s reads=%llu min=%llu avg=%llu", diff --git a/src/nbl/asset/interchange/CPLYMeshWriter.cpp b/src/nbl/asset/interchange/CPLYMeshWriter.cpp index 171840772e..5e569f9466 100644 --- a/src/nbl/asset/interchange/CPLYMeshWriter.cpp +++ b/src/nbl/asset/interchange/CPLYMeshWriter.cpp @@ -5,7 +5,7 @@ #include "CPLYMeshWriter.h" #include "nbl/asset/interchange/SGeometryWriterCommon.h" -#include "nbl/asset/interchange/SInterchangeIOCommon.h" +#include "nbl/asset/interchange/SInterchangeIO.h" #include "SPLYPolygonGeometryAuxLayout.h" #ifdef _NBL_COMPILE_WITH_PLY_WRITER_ @@ -642,15 +642,15 @@ bool CPLYMeshWriter::writeAsset(system::IFile* _file, const SAssetWriteParams& _ } outputBytes = outputSize; - const SInterchangeIOCommon::SBufferRange writeBuffers[] = + const SInterchangeIO::SBufferRange writeBuffers[] = { { .data = reinterpret_cast(header.data()), .byteCount = header.size() }, { .data = body.data(), .byteCount = body.size() } }; - writeOk = SInterchangeIOCommon::writeBuffersWithPolicy(file, ioPlan, writeBuffers, &ioTelemetry); + writeOk = SInterchangeIO::writeBuffersWithPolicy(file, ioPlan, writeBuffers, &ioTelemetry); const uint64_t ioMinWrite = ioTelemetry.getMinOrZero(); const uint64_t ioAvgWrite = ioTelemetry.getAvgOrZero(); - if (SInterchangeIOCommon::isTinyIOTelemetryLikely(ioTelemetry, static_cast(outputBytes), _params.ioPolicy)) + if (SInterchangeIO::isTinyIOTelemetryLikely(ioTelemetry, static_cast(outputBytes), _params.ioPolicy)) { _params.logger.log( "PLY writer tiny-io guard: file=%s writes=%llu min=%llu avg=%llu", @@ -696,15 +696,15 @@ bool CPLYMeshWriter::writeAsset(system::IFile* _file, const SAssetWriteParams& _ } outputBytes = outputSize; - const SInterchangeIOCommon::SBufferRange writeBuffers[] = + const SInterchangeIO::SBufferRange writeBuffers[] = { { .data = reinterpret_cast(header.data()), .byteCount = header.size() }, { .data = reinterpret_cast(body.data()), .byteCount = body.size() } }; - writeOk = SInterchangeIOCommon::writeBuffersWithPolicy(file, ioPlan, writeBuffers, &ioTelemetry); + writeOk = SInterchangeIO::writeBuffersWithPolicy(file, ioPlan, writeBuffers, &ioTelemetry); const uint64_t ioMinWrite = ioTelemetry.getMinOrZero(); const uint64_t ioAvgWrite = ioTelemetry.getAvgOrZero(); - if (SInterchangeIOCommon::isTinyIOTelemetryLikely(ioTelemetry, static_cast(outputBytes), _params.ioPolicy)) + if (SInterchangeIO::isTinyIOTelemetryLikely(ioTelemetry, static_cast(outputBytes), _params.ioPolicy)) { _params.logger.log( "PLY writer tiny-io guard: file=%s writes=%llu min=%llu avg=%llu", diff --git a/src/nbl/asset/interchange/CSTLMeshFileLoader.cpp b/src/nbl/asset/interchange/CSTLMeshFileLoader.cpp index fd76921b54..3021dc91e1 100644 --- a/src/nbl/asset/interchange/CSTLMeshFileLoader.cpp +++ b/src/nbl/asset/interchange/CSTLMeshFileLoader.cpp @@ -9,7 +9,7 @@ #include "nbl/asset/interchange/SGeometryContentHash.h" #include "nbl/asset/interchange/SGeometryLoaderCommon.h" -#include "nbl/asset/interchange/SInterchangeIOCommon.h" +#include "nbl/asset/interchange/SInterchangeIO.h" #include "nbl/asset/interchange/SLoaderRuntimeTuning.h" #include "nbl/asset/format/convertColor.h" #include "nbl/asset/utils/SGeometryNormalCommon.h" @@ -214,7 +214,7 @@ SAssetBundle CSTLMeshFileLoader::loadAsset(system::IFile* _file, const IAssetLoa else { wholeFilePayload.resize(filesize + 1ull); - if (!SInterchangeIOCommon::readFileExact(context.inner.mainFile, wholeFilePayload.data(), 0ull, filesize, &context.ioTelemetry)) + if (!SInterchangeIO::readFileExact(context.inner.mainFile, wholeFilePayload.data(), 0ull, filesize, &context.ioTelemetry)) return {}; wholeFilePayload[filesize] = 0u; wholeFileData = wholeFilePayload.data(); @@ -234,7 +234,7 @@ SAssetBundle CSTLMeshFileLoader::loadAsset(system::IFile* _file, const IAssetLoa } else { - hasPrefix = filesize >= SSTLContext::BinaryPrefixBytes && SInterchangeIOCommon::readFileExact(context.inner.mainFile, prefix.data(), 0ull, SSTLContext::BinaryPrefixBytes, &context.ioTelemetry); + hasPrefix = filesize >= SSTLContext::BinaryPrefixBytes && SInterchangeIO::readFileExact(context.inner.mainFile, prefix.data(), 0ull, SSTLContext::BinaryPrefixBytes, &context.ioTelemetry); } bool startsWithSolid = false; if (hasPrefix) @@ -246,7 +246,7 @@ SAssetBundle CSTLMeshFileLoader::loadAsset(system::IFile* _file, const IAssetLoa char header[SSTLContext::TextProbeBytes] = {}; if (wholeFileData) std::memcpy(header, wholeFileData, sizeof(header)); - else if (!SInterchangeIOCommon::readFileExact(context.inner.mainFile, header, 0ull, sizeof(header), &context.ioTelemetry)) + else if (!SInterchangeIO::readFileExact(context.inner.mainFile, header, 0ull, sizeof(header), &context.ioTelemetry)) return {}; startsWithSolid = (std::strncmp(header, "solid ", SSTLContext::TextProbeBytes) == 0); } @@ -294,7 +294,7 @@ SAssetBundle CSTLMeshFileLoader::loadAsset(system::IFile* _file, const IAssetLoa uint32_t triangleCount32 = binaryTriCountFromDetect; if (!hasBinaryTriCountFromDetect) { - if (!SInterchangeIOCommon::readFileExact(context.inner.mainFile, &triangleCount32, SSTLContext::BinaryHeaderBytes, sizeof(triangleCount32), &context.ioTelemetry)) + if (!SInterchangeIO::readFileExact(context.inner.mainFile, &triangleCount32, SSTLContext::BinaryHeaderBytes, sizeof(triangleCount32), &context.ioTelemetry)) return {}; } @@ -313,7 +313,7 @@ SAssetBundle CSTLMeshFileLoader::loadAsset(system::IFile* _file, const IAssetLoa { core::vector payload; payload.resize(dataSize); - if (!SInterchangeIOCommon::readFileWithPolicy(context.inner.mainFile, payload.data(), SSTLContext::BinaryPrefixBytes, dataSize, ioPlan, &context.ioTelemetry)) + if (!SInterchangeIO::readFileWithPolicy(context.inner.mainFile, payload.data(), SSTLContext::BinaryPrefixBytes, dataSize, ioPlan, &context.ioTelemetry)) return {}; wholeFilePayload = std::move(payload); payloadData = wholeFilePayload.data(); @@ -668,7 +668,7 @@ SAssetBundle CSTLMeshFileLoader::loadAsset(system::IFile* _file, const IAssetLoa if (!wholeFileData) { wholeFilePayload.resize(filesize + 1ull); - if (!SInterchangeIOCommon::readFileWithPolicy(context.inner.mainFile, wholeFilePayload.data(), 0ull, filesize, ioPlan, &context.ioTelemetry)) + if (!SInterchangeIO::readFileWithPolicy(context.inner.mainFile, wholeFilePayload.data(), 0ull, filesize, ioPlan, &context.ioTelemetry)) return {}; wholeFilePayload[filesize] = 0u; wholeFileData = wholeFilePayload.data(); @@ -769,7 +769,7 @@ SAssetBundle CSTLMeshFileLoader::loadAsset(system::IFile* _file, const IAssetLoa } const uint64_t ioMinRead = context.ioTelemetry.getMinOrZero(); const uint64_t ioAvgRead = context.ioTelemetry.getAvgOrZero(); - if (SInterchangeIOCommon::isTinyIOTelemetryLikely(context.ioTelemetry, static_cast(filesize), _params.ioPolicy)) + if (SInterchangeIO::isTinyIOTelemetryLikely(context.ioTelemetry, static_cast(filesize), _params.ioPolicy)) { _params.logger.log( "STL loader tiny-io guard: file=%s reads=%llu min=%llu avg=%llu", @@ -808,13 +808,13 @@ bool CSTLMeshFileLoader::isALoadableFileFormat(system::IFile* _file, const syste if (fileSize < SSTLContext::BinaryPrefixBytes) { char header[SSTLContext::TextProbeBytes] = {}; - if (!SInterchangeIOCommon::readFileExact(_file, header, 0ull, sizeof(header))) + if (!SInterchangeIO::readFileExact(_file, header, 0ull, sizeof(header))) return false; return std::strncmp(header, "solid ", SSTLContext::TextProbeBytes) == 0; } std::array prefix = {}; - if (!SInterchangeIOCommon::readFileExact(_file, prefix.data(), 0ull, prefix.size())) + if (!SInterchangeIO::readFileExact(_file, prefix.data(), 0ull, prefix.size())) return false; uint32_t triangleCount = 0u; diff --git a/src/nbl/asset/interchange/CSTLMeshWriter.cpp b/src/nbl/asset/interchange/CSTLMeshWriter.cpp index b04f39f132..b221c75ea2 100644 --- a/src/nbl/asset/interchange/CSTLMeshWriter.cpp +++ b/src/nbl/asset/interchange/CSTLMeshWriter.cpp @@ -7,7 +7,7 @@ #include "CSTLMeshWriter.h" #include "nbl/asset/format/convertColor.h" #include "nbl/asset/interchange/SGeometryWriterCommon.h" -#include "nbl/asset/interchange/SInterchangeIOCommon.h" +#include "nbl/asset/interchange/SInterchangeIO.h" #include "SSTLPolygonGeometryAuxLayout.h" #include @@ -158,7 +158,7 @@ bool CSTLMeshWriter::writeAsset(system::IFile* _file, const SAssetWriteParams& _ const uint64_t ioMinWrite = context.writeTelemetry.getMinOrZero(); const uint64_t ioAvgWrite = context.writeTelemetry.getAvgOrZero(); - if (SInterchangeIOCommon::isTinyIOTelemetryLikely(context.writeTelemetry, context.fileOffset, _params.ioPolicy)) + if (SInterchangeIO::isTinyIOTelemetryLikely(context.writeTelemetry, context.fileOffset, _params.ioPolicy)) { _params.logger.log( "STL writer tiny-io guard: file=%s writes=%llu min=%llu avg=%llu", @@ -739,7 +739,7 @@ bool writeMeshBinary(const asset::ICPUPolygonGeometry* geom, SContext* context) return false; } - const bool writeOk = SInterchangeIOCommon::writeFileWithPolicy(context->writeContext.outputFile, context->ioPlan, output.get(), outputSize, &context->writeTelemetry); + const bool writeOk = SInterchangeIO::writeFileWithPolicy(context->writeContext.outputFile, context->ioPlan, output.get(), outputSize, &context->writeTelemetry); if (writeOk) context->fileOffset += outputSize; return writeOk; From b9dd4fa54b2eefc4b325cc7222047fa1c5544a8c Mon Sep 17 00:00:00 2001 From: Arkadiusz Lachowicz Date: Sat, 7 Mar 2026 15:44:28 +0100 Subject: [PATCH 064/118] Refine loader runtime tuning --- .../asset/interchange/SLoaderRuntimeTuning.h | 605 +++++++++--------- 1 file changed, 304 insertions(+), 301 deletions(-) diff --git a/include/nbl/asset/interchange/SLoaderRuntimeTuning.h b/include/nbl/asset/interchange/SLoaderRuntimeTuning.h index e6d8672d7f..0d2d52bc2a 100644 --- a/include/nbl/asset/interchange/SLoaderRuntimeTuning.h +++ b/include/nbl/asset/interchange/SLoaderRuntimeTuning.h @@ -4,19 +4,18 @@ #ifndef _NBL_ASSET_S_LOADER_RUNTIME_TUNING_H_INCLUDED_ #define _NBL_ASSET_S_LOADER_RUNTIME_TUNING_H_INCLUDED_ - #include "nbl/asset/interchange/SFileIOPolicy.h" #include #include #include +#include #include #include #include #include #include - namespace nbl::asset { @@ -65,6 +64,7 @@ struct SLoaderRuntimeTuningResult struct SLoaderRuntimeTuner { private: + // Aggregated timings collected while probing one worker-count candidate. struct SBenchmarkSampleStats { uint64_t medianNs = 0ull; @@ -73,334 +73,337 @@ struct SLoaderRuntimeTuner uint64_t totalNs = 0ull; }; - static inline uint64_t benchmarkSample(const uint8_t* sampleData, uint64_t sampleBytes, size_t workerCount, uint32_t passes); - static inline SBenchmarkSampleStats benchmarkSampleStats(const uint8_t* sampleData, uint64_t sampleBytes, size_t workerCount, uint32_t passes, uint32_t observations); - static inline void appendCandidate(std::vector& dst, size_t candidate); - public: - template - static void dispatchWorkers(const size_t workerCount, Fn&& fn); - - static constexpr uint64_t ceilDiv(const uint64_t numerator, const uint64_t denominator) - { - return (numerator + denominator - 1ull) / denominator; - } - - static inline uint64_t resolveSampleBytes(const SFileIOPolicy& ioPolicy, const uint64_t knownInputBytes) - { - if (knownInputBytes == 0ull) - return 0ull; - - const uint64_t minSampleBytes = std::max(1ull, ioPolicy.runtimeTuning.minSampleBytes); - const uint64_t maxSampleBytes = std::max(minSampleBytes, ioPolicy.runtimeTuning.maxSampleBytes); - const uint64_t cappedMin = std::min(minSampleBytes, knownInputBytes); - const uint64_t cappedMax = std::min(maxSampleBytes, knownInputBytes); - const uint64_t adaptive = std::max(knownInputBytes / 64ull, cappedMin); - return std::clamp(adaptive, cappedMin, cappedMax); - } - - static inline bool shouldInlineHashBuild(const SFileIOPolicy& ioPolicy, const uint64_t inputBytes) - { - const uint64_t thresholdBytes = std::max(1ull, ioPolicy.runtimeTuning.hashInlineThresholdBytes); - return inputBytes <= thresholdBytes; - } - - static inline size_t resolveHardwareThreads(const uint32_t requested = 0u) - { - const size_t hw = requested ? static_cast(requested) : static_cast(std::thread::hardware_concurrency()); - return hw ? hw : 1ull; - } - - static inline size_t resolveHardMaxWorkers(const size_t hardwareThreads, const uint32_t workerHeadroom) - { - const size_t hw = std::max(1ull, hardwareThreads); - const size_t minWorkers = hw >= 2ull ? 2ull : 1ull; - const size_t headroom = static_cast(workerHeadroom); - if (headroom == 0ull) - return hw; - if (hw <= headroom) - return minWorkers; - return std::max(minWorkers, hw - headroom); - } - - static inline SLoaderRuntimeTuningResult tune(const SFileIOPolicy& ioPolicy, const SLoaderRuntimeTuningRequest& request); -}; + template + requires std::invocable + static void dispatchWorkers(const size_t workerCount, Fn&& fn) + { + if (workerCount <= 1ull) + { + fn(0ull); + return; + } -template -void SLoaderRuntimeTuner::dispatchWorkers(const size_t workerCount, Fn&& fn) -{ - if (workerCount <= 1ull) - { - fn(0ull); - return; - } - - std::vector workers; - workers.reserve(workerCount - 1ull); - for (size_t workerIx = 1ull; workerIx < workerCount; ++workerIx) - workers.emplace_back([&fn, workerIx]() { fn(workerIx); }); - fn(0ull); -} + // std::jthread starts execution in its constructor, so emplace_back launches workers 1..N-1 immediately. + // The current thread runs worker 0 and std::jthread joins automatically when the local vector is destroyed. + std::vector workers; + workers.reserve(workerCount - 1ull); + for (size_t workerIx = 1ull; workerIx < workerCount; ++workerIx) + workers.emplace_back([&fn, workerIx]() { fn(workerIx); }); + fn(0ull); + } -inline uint64_t SLoaderRuntimeTuner::benchmarkSample(const uint8_t* const sampleData, const uint64_t sampleBytes, const size_t workerCount, const uint32_t passes) -{ - if (!sampleData || sampleBytes == 0ull || workerCount == 0ull) - return 0ull; - - const uint32_t passCount = std::max(1u, passes); - std::vector partial(workerCount, 0ull); - uint64_t elapsedNs = 0ull; - using clock_t = std::chrono::steady_clock; - for (uint32_t passIx = 0u; passIx < passCount; ++passIx) - { - const auto passStart = clock_t::now(); - SLoaderRuntimeTuner::dispatchWorkers(workerCount, [&](const size_t workerIx) + // Integer ceil division. Callers must pass a non-zero denominator. + static constexpr uint64_t ceilDiv(const uint64_t numerator, const uint64_t denominator) { - const uint64_t begin = (sampleBytes * workerIx) / workerCount; - const uint64_t end = (sampleBytes * (workerIx + 1ull)) / workerCount; - const uint8_t* ptr = sampleData + begin; - uint64_t local = 0ull; - for (uint64_t i = 0ull, count = end - begin; i < count; ++i) - local += static_cast(ptr[i]); - partial[workerIx] ^= local; - }); - elapsedNs += static_cast(std::chrono::duration_cast(clock_t::now() - passStart).count()); - } - - uint64_t reduced = 0ull; - for (const uint64_t v : partial) - reduced ^= v; - static std::atomic sink = 0ull; - sink.fetch_xor(reduced, std::memory_order_relaxed); - return elapsedNs; -} - -inline SLoaderRuntimeTuner::SBenchmarkSampleStats SLoaderRuntimeTuner::benchmarkSampleStats( - const uint8_t* const sampleData, - const uint64_t sampleBytes, - const size_t workerCount, - const uint32_t passes, - const uint32_t observations -) -{ - SBenchmarkSampleStats stats = {}; - if (!sampleData || sampleBytes == 0ull || workerCount == 0ull) - return stats; - - const uint32_t observationCount = std::max(1u, observations); - std::vector samples; - samples.reserve(observationCount); - - benchmarkSample(sampleData, sampleBytes, workerCount, 1u); - for (uint32_t obsIx = 0u; obsIx < observationCount; ++obsIx) - { - const uint64_t elapsedNs = benchmarkSample(sampleData, sampleBytes, workerCount, passes); - if (elapsedNs == 0ull) - continue; - stats.totalNs += elapsedNs; - samples.push_back(elapsedNs); - } - - if (samples.empty()) - return SBenchmarkSampleStats{}; - - std::sort(samples.begin(), samples.end()); - stats.minNs = samples.front(); - stats.maxNs = samples.back(); - if ((samples.size() & 1ull) != 0ull) - stats.medianNs = samples[samples.size() / 2ull]; - else - stats.medianNs = (samples[samples.size() / 2ull - 1ull] + samples[samples.size() / 2ull]) / 2ull; - return stats; -} + return (numerator + denominator - 1ull) / denominator; + } -inline void SLoaderRuntimeTuner::appendCandidate(std::vector& dst, const size_t candidate) -{ - if (candidate == 0ull) - return; - if (std::find(dst.begin(), dst.end(), candidate) == dst.end()) - dst.push_back(candidate); -} + template + requires std::same_as> + // Measures one sampled memory-touch pass configuration and returns aggregate wall time across all passes. + static inline TimeUnit benchmarkSample(const uint8_t* const sampleData, const uint64_t sampleBytes, const size_t workerCount, const uint32_t passes) + { + if (!sampleData || sampleBytes == 0ull || workerCount == 0ull) + return TimeUnit::zero(); + + const uint32_t passCount = std::max(1u, passes); + std::vector partial(workerCount, 0ull); + uint64_t elapsedNs = 0ull; + using clock_t = std::chrono::steady_clock; + for (uint32_t passIx = 0u; passIx < passCount; ++passIx) + { + const auto passStart = clock_t::now(); + SLoaderRuntimeTuner::dispatchWorkers(workerCount, [&](const size_t workerIx) + { + const uint64_t begin = (sampleBytes * workerIx) / workerCount; + const uint64_t end = (sampleBytes * (workerIx + 1ull)) / workerCount; + const uint8_t* ptr = sampleData + begin; + uint64_t local = 0ull; + for (uint64_t i = 0ull, count = end - begin; i < count; ++i) + local += static_cast(ptr[i]); + partial[workerIx] ^= local; + }); + elapsedNs += static_cast(std::chrono::duration_cast(clock_t::now() - passStart).count()); + } -SLoaderRuntimeTuningResult SLoaderRuntimeTuner::tune(const SFileIOPolicy& ioPolicy, const SLoaderRuntimeTuningRequest& request) -{ - using RTMode = SFileIOPolicy::SRuntimeTuning::Mode; - SLoaderRuntimeTuningResult result = {}; - if (request.totalWorkUnits == 0ull) - { - result.chunkWorkUnits = 0ull; - result.chunkCount = 0ull; - return result; - } - - const size_t hw = SLoaderRuntimeTuner::resolveHardwareThreads(request.hardwareThreads); - size_t maxWorkers = hw; - if (request.hardMaxWorkers > 0u) - maxWorkers = std::min(maxWorkers, static_cast(request.hardMaxWorkers)); - if (ioPolicy.runtimeTuning.maxWorkers > 0u) - maxWorkers = std::min(maxWorkers, static_cast(ioPolicy.runtimeTuning.maxWorkers)); - maxWorkers = std::max(1ull, maxWorkers); - - const uint64_t minWorkUnitsPerWorker = std::max(1ull, request.minWorkUnitsPerWorker); - const uint64_t minBytesPerWorker = std::max(1ull, request.minBytesPerWorker); - const size_t maxByWork = static_cast(SLoaderRuntimeTuner::ceilDiv(request.totalWorkUnits, minWorkUnitsPerWorker)); - const size_t maxByBytes = request.inputBytes ? static_cast(SLoaderRuntimeTuner::ceilDiv(request.inputBytes, minBytesPerWorker)) : maxWorkers; - const bool heuristicEnabled = ioPolicy.runtimeTuning.mode != RTMode::Sequential; - const bool hybridEnabled = ioPolicy.runtimeTuning.mode == RTMode::Hybrid; + uint64_t reduced = 0ull; + for (const uint64_t v : partial) + reduced ^= v; + static std::atomic sink = 0ull; + sink.fetch_xor(reduced, std::memory_order_relaxed); + return std::chrono::duration_cast(std::chrono::nanoseconds(elapsedNs)); + } - size_t workerCount = 1ull; - if (heuristicEnabled) - workerCount = std::max(1ull, std::min({ maxWorkers, maxByWork, maxByBytes })); - - const size_t targetChunksPerWorker = std::max( - 1ull, - static_cast(request.targetChunksPerWorker ? request.targetChunksPerWorker : ioPolicy.runtimeTuning.targetChunksPerWorker)); - if (workerCount > 1ull && heuristicEnabled) - { - const double maxOverheadRatio = std::max(0.0, static_cast(ioPolicy.runtimeTuning.maxOverheadRatio)); - const double minExpectedGainRatio = std::clamp(static_cast(ioPolicy.runtimeTuning.minExpectedGainRatio), 0.0, 0.99); - while (workerCount > 1ull) + // Warms up once and then collects timing observations for one worker-count candidate. + static inline SBenchmarkSampleStats benchmarkSampleStats( + const uint8_t* const sampleData, + const uint64_t sampleBytes, + const size_t workerCount, + const uint32_t passes, + const uint32_t observations + ) { - const double idealGain = 1.0 - (1.0 / static_cast(workerCount)); - const double overheadRatio = static_cast(workerCount * targetChunksPerWorker) / static_cast(std::max(1ull, request.totalWorkUnits)); - if (idealGain < minExpectedGainRatio || overheadRatio > maxOverheadRatio) + SBenchmarkSampleStats stats = {}; + if (!sampleData || sampleBytes == 0ull || workerCount == 0ull) + return stats; + + const uint32_t observationCount = std::max(1u, observations); + std::vector samples; + samples.reserve(observationCount); + + benchmarkSample(sampleData, sampleBytes, workerCount, 1u); + for (uint32_t obsIx = 0u; obsIx < observationCount; ++obsIx) { - --workerCount; - continue; + const uint64_t elapsedNs = static_cast(benchmarkSample(sampleData, sampleBytes, workerCount, passes).count()); + if (elapsedNs == 0ull) + continue; + stats.totalNs += elapsedNs; + samples.push_back(elapsedNs); } - break; + + if (samples.empty()) + return SBenchmarkSampleStats{}; + + std::sort(samples.begin(), samples.end()); + stats.minNs = samples.front(); + stats.maxNs = samples.back(); + if ((samples.size() & 1ull) != 0ull) + stats.medianNs = samples[samples.size() / 2ull]; + else + stats.medianNs = (samples[samples.size() / 2ull - 1ull] + samples[samples.size() / 2ull]) / 2ull; + return stats; } - } - - const size_t heuristicWorkerCount = std::max(1ull, workerCount); - if ( - heuristicEnabled && - hybridEnabled && - request.sampleData != nullptr && - request.sampleBytes > 0ull && - heuristicWorkerCount > 1ull && - maxWorkers > 1ull - ) - { - const uint64_t autoMinSamplingWorkUnits = std::max( - static_cast(targetChunksPerWorker) * 8ull, - static_cast(maxWorkers * targetChunksPerWorker)); - const uint64_t minSamplingWorkUnits = request.sampleMinWorkUnits ? - request.sampleMinWorkUnits : - (ioPolicy.runtimeTuning.samplingMinWorkUnits ? ioPolicy.runtimeTuning.samplingMinWorkUnits : autoMinSamplingWorkUnits); - if (request.totalWorkUnits >= minSamplingWorkUnits) + + // Keeps the candidate probe list unique while preserving insertion order. + static inline void appendCandidate(std::vector& dst, const size_t candidate) { - const double samplingBudgetRatio = std::clamp(static_cast(ioPolicy.runtimeTuning.samplingBudgetRatio), 0.0, 0.5); - uint64_t effectiveSampleBytes = request.sampleBytes; - if (request.inputBytes) - effectiveSampleBytes = std::min(effectiveSampleBytes, request.inputBytes); - if (effectiveSampleBytes > 0ull && samplingBudgetRatio > 0.0) + if (candidate == 0ull) + return; + if (std::find(dst.begin(), dst.end(), candidate) == dst.end()) + dst.push_back(candidate); + } + + // Chooses the sample byte budget used by hybrid tuning from the known input size and policy clamps. + static inline uint64_t resolveSampleBytes(const SFileIOPolicy& ioPolicy, const uint64_t knownInputBytes) + { + if (knownInputBytes == 0ull) + return 0ull; + + const uint64_t minSampleBytes = std::max(1ull, ioPolicy.runtimeTuning.minSampleBytes); + const uint64_t maxSampleBytes = std::max(minSampleBytes, ioPolicy.runtimeTuning.maxSampleBytes); + const uint64_t cappedMin = std::min(minSampleBytes, knownInputBytes); + const uint64_t cappedMax = std::min(maxSampleBytes, knownInputBytes); + const uint64_t adaptive = std::max(knownInputBytes / 64ull, cappedMin); + return std::clamp(adaptive, cappedMin, cappedMax); + } + + // Returns true when the hash build is small enough to stay on the caller thread. + static inline bool shouldInlineHashBuild(const SFileIOPolicy& ioPolicy, const uint64_t inputBytes) + { + const uint64_t thresholdBytes = std::max(1ull, ioPolicy.runtimeTuning.hashInlineThresholdBytes); + return inputBytes <= thresholdBytes; + } + + // Resolves the effective hardware thread count and always returns at least one worker. + static inline size_t resolveHardwareThreads(const uint32_t requested = 0u) + { + const size_t hw = requested ? static_cast(requested) : static_cast(std::thread::hardware_concurrency()); + return hw ? hw : 1ull; + } + + // Applies worker headroom while keeping at least two workers when parallel hardware is available. + static inline size_t resolveHardMaxWorkers(const size_t hardwareThreads, const uint32_t workerHeadroom) + { + const size_t hw = std::max(1ull, hardwareThreads); + const size_t minWorkers = hw >= 2ull ? 2ull : 1ull; + const size_t headroom = static_cast(workerHeadroom); + if (headroom == 0ull) + return hw; + if (hw <= headroom) + return minWorkers; + return std::max(minWorkers, hw - headroom); + } + + // Resolves worker and chunk counts for one stage using policy limits plus optional hybrid sampling. + static inline SLoaderRuntimeTuningResult tune(const SFileIOPolicy& ioPolicy, const SLoaderRuntimeTuningRequest& request) + { + using RTMode = SFileIOPolicy::SRuntimeTuning::Mode; + SLoaderRuntimeTuningResult result = {}; + if (request.totalWorkUnits == 0ull) + { + result.chunkWorkUnits = 0ull; + result.chunkCount = 0ull; + return result; + } + + const size_t hw = SLoaderRuntimeTuner::resolveHardwareThreads(request.hardwareThreads); + size_t maxWorkers = hw; + if (request.hardMaxWorkers > 0u) + maxWorkers = std::min(maxWorkers, static_cast(request.hardMaxWorkers)); + if (ioPolicy.runtimeTuning.maxWorkers > 0u) + maxWorkers = std::min(maxWorkers, static_cast(ioPolicy.runtimeTuning.maxWorkers)); + maxWorkers = std::max(1ull, maxWorkers); + + const uint64_t minWorkUnitsPerWorker = std::max(1ull, request.minWorkUnitsPerWorker); + const uint64_t minBytesPerWorker = std::max(1ull, request.minBytesPerWorker); + const size_t maxByWork = static_cast(SLoaderRuntimeTuner::ceilDiv(request.totalWorkUnits, minWorkUnitsPerWorker)); + const size_t maxByBytes = request.inputBytes ? static_cast(SLoaderRuntimeTuner::ceilDiv(request.inputBytes, minBytesPerWorker)) : maxWorkers; + const bool heuristicEnabled = ioPolicy.runtimeTuning.mode != RTMode::Sequential; + const bool hybridEnabled = ioPolicy.runtimeTuning.mode == RTMode::Hybrid; + + size_t workerCount = 1ull; + if (heuristicEnabled) + workerCount = std::max(1ull, std::min({ maxWorkers, maxByWork, maxByBytes })); + + const size_t targetChunksPerWorker = std::max( + 1ull, + static_cast(request.targetChunksPerWorker ? request.targetChunksPerWorker : ioPolicy.runtimeTuning.targetChunksPerWorker)); + if (workerCount > 1ull && heuristicEnabled) { - // keep probing lightweight: sample fraction scales with input and parallelism - if (request.inputBytes > 0ull) + const double maxOverheadRatio = std::max(0.0, static_cast(ioPolicy.runtimeTuning.maxOverheadRatio)); + const double minExpectedGainRatio = std::clamp(static_cast(ioPolicy.runtimeTuning.minExpectedGainRatio), 0.0, 0.99); + while (workerCount > 1ull) { - const uint64_t sampleDivisor = std::max( - 4ull, - static_cast(heuristicWorkerCount) * static_cast(targetChunksPerWorker)); - const uint64_t adaptiveSampleBytes = std::max(1ull, request.inputBytes / sampleDivisor); - effectiveSampleBytes = std::min(effectiveSampleBytes, adaptiveSampleBytes); + const double idealGain = 1.0 - (1.0 / static_cast(workerCount)); + const double overheadRatio = static_cast(workerCount * targetChunksPerWorker) / static_cast(std::max(1ull, request.totalWorkUnits)); + if (idealGain < minExpectedGainRatio || overheadRatio > maxOverheadRatio) + { + --workerCount; + continue; + } + break; } + } - const uint32_t samplePasses = request.samplePasses ? request.samplePasses : ioPolicy.runtimeTuning.samplingPasses; - uint32_t maxCandidates = request.sampleMaxCandidates ? request.sampleMaxCandidates : ioPolicy.runtimeTuning.samplingMaxCandidates; - maxCandidates = std::max(2u, maxCandidates); - - std::vector candidates; - candidates.reserve(maxCandidates); - appendCandidate(candidates, heuristicWorkerCount); - appendCandidate(candidates, heuristicWorkerCount > 1ull ? (heuristicWorkerCount - 1ull) : 1ull); - appendCandidate(candidates, std::min(maxWorkers, heuristicWorkerCount + 1ull)); - if (heuristicWorkerCount > 2ull) - appendCandidate(candidates, heuristicWorkerCount - 2ull); - if (heuristicWorkerCount + 2ull <= maxWorkers) - appendCandidate(candidates, heuristicWorkerCount + 2ull); - if (candidates.size() > maxCandidates) - candidates.resize(maxCandidates); - - // probe heuristic first and only continue when budget can amortize additional probes - const auto heuristicStatsProbe = benchmarkSampleStats( - request.sampleData, effectiveSampleBytes, heuristicWorkerCount, samplePasses, 2u); - if (heuristicStatsProbe.medianNs > 0ull) + const size_t heuristicWorkerCount = std::max(1ull, workerCount); + if ( + heuristicEnabled && + hybridEnabled && + request.sampleData != nullptr && + request.sampleBytes > 0ull && + heuristicWorkerCount > 1ull && + maxWorkers > 1ull + ) + { + const uint64_t autoMinSamplingWorkUnits = std::max( + static_cast(targetChunksPerWorker) * 8ull, + static_cast(maxWorkers * targetChunksPerWorker)); + const uint64_t minSamplingWorkUnits = request.sampleMinWorkUnits ? + request.sampleMinWorkUnits : + (ioPolicy.runtimeTuning.samplingMinWorkUnits ? ioPolicy.runtimeTuning.samplingMinWorkUnits : autoMinSamplingWorkUnits); + if (request.totalWorkUnits >= minSamplingWorkUnits) { - const double scale = request.inputBytes ? - (static_cast(request.inputBytes) / static_cast(effectiveSampleBytes)) : - 1.0; - const uint64_t estimatedFullNs = static_cast(static_cast(heuristicStatsProbe.medianNs) * std::max(1.0, scale)); - const uint64_t samplingBudgetNs = static_cast(static_cast(estimatedFullNs) * samplingBudgetRatio); - uint64_t spentNs = heuristicStatsProbe.totalNs; - const size_t alternativeCandidates = (candidates.size() > 0ull) ? (candidates.size() - 1ull) : 0ull; - if (alternativeCandidates > 0ull && spentNs < samplingBudgetNs) + const double samplingBudgetRatio = std::clamp(static_cast(ioPolicy.runtimeTuning.samplingBudgetRatio), 0.0, 0.5); + uint64_t effectiveSampleBytes = request.sampleBytes; + if (request.inputBytes) + effectiveSampleBytes = std::min(effectiveSampleBytes, request.inputBytes); + if (effectiveSampleBytes > 0ull && samplingBudgetRatio > 0.0) { - const uint64_t spareBudgetNs = samplingBudgetNs - spentNs; - const uint64_t estimatedEvalNs = std::max(1ull, heuristicStatsProbe.medianNs); - const uint64_t estimatedEvaluations = std::max(1ull, spareBudgetNs / estimatedEvalNs); - uint32_t observations = static_cast(std::clamp( - estimatedEvaluations / static_cast(alternativeCandidates), - 1ull, - 3ull)); - - SBenchmarkSampleStats bestStats = heuristicStatsProbe; - size_t bestWorker = heuristicWorkerCount; - - for (const size_t candidate : candidates) + // keep probing lightweight: sample fraction scales with input and parallelism + if (request.inputBytes > 0ull) { - if (candidate == heuristicWorkerCount) - continue; - if (spentNs >= samplingBudgetNs) - break; - const auto candidateStats = benchmarkSampleStats( - request.sampleData, effectiveSampleBytes, candidate, samplePasses, observations); - if (candidateStats.medianNs == 0ull) - continue; - spentNs += candidateStats.totalNs; - if (candidateStats.medianNs < bestStats.medianNs) - { - bestStats = candidateStats; - bestWorker = candidate; - } + const uint64_t sampleDivisor = std::max( + 4ull, + static_cast(heuristicWorkerCount) * static_cast(targetChunksPerWorker)); + const uint64_t adaptiveSampleBytes = std::max(1ull, request.inputBytes / sampleDivisor); + effectiveSampleBytes = std::min(effectiveSampleBytes, adaptiveSampleBytes); } - if (bestWorker != heuristicWorkerCount) + const uint32_t samplePasses = request.samplePasses ? request.samplePasses : ioPolicy.runtimeTuning.samplingPasses; + uint32_t maxCandidates = request.sampleMaxCandidates ? request.sampleMaxCandidates : ioPolicy.runtimeTuning.samplingMaxCandidates; + maxCandidates = std::max(2u, maxCandidates); + + std::vector candidates; + candidates.reserve(maxCandidates); + appendCandidate(candidates, heuristicWorkerCount); + appendCandidate(candidates, heuristicWorkerCount > 1ull ? (heuristicWorkerCount - 1ull) : 1ull); + appendCandidate(candidates, std::min(maxWorkers, heuristicWorkerCount + 1ull)); + if (heuristicWorkerCount > 2ull) + appendCandidate(candidates, heuristicWorkerCount - 2ull); + if (heuristicWorkerCount + 2ull <= maxWorkers) + appendCandidate(candidates, heuristicWorkerCount + 2ull); + if (candidates.size() > maxCandidates) + candidates.resize(maxCandidates); + + // probe heuristic first and only continue when budget can amortize additional probes + const auto heuristicStatsProbe = benchmarkSampleStats( + request.sampleData, effectiveSampleBytes, heuristicWorkerCount, samplePasses, 2u); + if (heuristicStatsProbe.medianNs > 0ull) { - const double gain = static_cast(heuristicStatsProbe.medianNs - bestStats.medianNs) / - static_cast(heuristicStatsProbe.medianNs); - const uint64_t heuristicSpan = heuristicStatsProbe.maxNs - heuristicStatsProbe.minNs; - const uint64_t bestSpan = bestStats.maxNs - bestStats.minNs; - const double heuristicNoise = static_cast(heuristicSpan) / - static_cast(std::max(1ull, heuristicStatsProbe.medianNs)); - const double bestNoise = static_cast(bestSpan) / - static_cast(std::max(1ull, bestStats.medianNs)); - const double requiredGain = std::max( - std::clamp(static_cast(ioPolicy.runtimeTuning.minExpectedGainRatio), 0.0, 0.99), - std::clamp(std::max(heuristicNoise, bestNoise) * 1.25, 0.0, 0.99)); - if (gain >= requiredGain) - workerCount = bestWorker; + const double scale = request.inputBytes ? + (static_cast(request.inputBytes) / static_cast(effectiveSampleBytes)) : + 1.0; + const uint64_t estimatedFullNs = static_cast(static_cast(heuristicStatsProbe.medianNs) * std::max(1.0, scale)); + const uint64_t samplingBudgetNs = static_cast(static_cast(estimatedFullNs) * samplingBudgetRatio); + uint64_t spentNs = heuristicStatsProbe.totalNs; + const size_t alternativeCandidates = (candidates.size() > 0ull) ? (candidates.size() - 1ull) : 0ull; + if (alternativeCandidates > 0ull && spentNs < samplingBudgetNs) + { + const uint64_t spareBudgetNs = samplingBudgetNs - spentNs; + const uint64_t estimatedEvalNs = std::max(1ull, heuristicStatsProbe.medianNs); + const uint64_t estimatedEvaluations = std::max(1ull, spareBudgetNs / estimatedEvalNs); + uint32_t observations = static_cast(std::clamp( + estimatedEvaluations / static_cast(alternativeCandidates), + 1ull, + 3ull)); + + SBenchmarkSampleStats bestStats = heuristicStatsProbe; + size_t bestWorker = heuristicWorkerCount; + + for (const size_t candidate : candidates) + { + if (candidate == heuristicWorkerCount) + continue; + if (spentNs >= samplingBudgetNs) + break; + const auto candidateStats = benchmarkSampleStats( + request.sampleData, effectiveSampleBytes, candidate, samplePasses, observations); + if (candidateStats.medianNs == 0ull) + continue; + spentNs += candidateStats.totalNs; + if (candidateStats.medianNs < bestStats.medianNs) + { + bestStats = candidateStats; + bestWorker = candidate; + } + } + + if (bestWorker != heuristicWorkerCount) + { + const double gain = static_cast(heuristicStatsProbe.medianNs - bestStats.medianNs) / + static_cast(heuristicStatsProbe.medianNs); + const uint64_t heuristicSpan = heuristicStatsProbe.maxNs - heuristicStatsProbe.minNs; + const uint64_t bestSpan = bestStats.maxNs - bestStats.minNs; + const double heuristicNoise = static_cast(heuristicSpan) / + static_cast(std::max(1ull, heuristicStatsProbe.medianNs)); + const double bestNoise = static_cast(bestSpan) / + static_cast(std::max(1ull, bestStats.medianNs)); + const double requiredGain = std::max( + std::clamp(static_cast(ioPolicy.runtimeTuning.minExpectedGainRatio), 0.0, 0.99), + std::clamp(std::max(heuristicNoise, bestNoise) * 1.25, 0.0, 0.99)); + if (gain >= requiredGain) + workerCount = bestWorker; + } + } } } } } - } - } - result.workerCount = std::max(1ull, workerCount); + result.workerCount = std::max(1ull, workerCount); - const uint64_t minChunkWorkUnits = std::max(1ull, request.minChunkWorkUnits); - uint64_t maxChunkWorkUnits = std::max(minChunkWorkUnits, request.maxChunkWorkUnits); - const uint64_t desiredChunkCount = static_cast(std::max(1ull, result.workerCount * targetChunksPerWorker)); - uint64_t chunkWorkUnits = SLoaderRuntimeTuner::ceilDiv(request.totalWorkUnits, desiredChunkCount); - chunkWorkUnits = std::clamp(chunkWorkUnits, minChunkWorkUnits, maxChunkWorkUnits); + const uint64_t minChunkWorkUnits = std::max(1ull, request.minChunkWorkUnits); + uint64_t maxChunkWorkUnits = std::max(minChunkWorkUnits, request.maxChunkWorkUnits); + const uint64_t desiredChunkCount = static_cast(std::max(1ull, result.workerCount * targetChunksPerWorker)); + uint64_t chunkWorkUnits = SLoaderRuntimeTuner::ceilDiv(request.totalWorkUnits, desiredChunkCount); + chunkWorkUnits = std::clamp(chunkWorkUnits, minChunkWorkUnits, maxChunkWorkUnits); - result.chunkWorkUnits = chunkWorkUnits; - result.chunkCount = static_cast(SLoaderRuntimeTuner::ceilDiv(request.totalWorkUnits, chunkWorkUnits)); - return result; -} + result.chunkWorkUnits = chunkWorkUnits; + result.chunkCount = static_cast(SLoaderRuntimeTuner::ceilDiv(request.totalWorkUnits, chunkWorkUnits)); + return result; + } +}; } - - #endif From 1e50ae0d5964ae312f9c36bc475c7d09f42131ff Mon Sep 17 00:00:00 2001 From: Arkadiusz Lachowicz Date: Sat, 7 Mar 2026 15:59:54 +0100 Subject: [PATCH 065/118] Document interchange helpers --- include/nbl/asset/SBufferAdoption.h | 1 + include/nbl/asset/interchange/SFileIOPolicy.h | 3 +++ include/nbl/asset/interchange/SGeometryContentHash.h | 1 + include/nbl/asset/interchange/SGeometryLoaderCommon.h | 1 + include/nbl/asset/interchange/SGeometryWriterCommon.h | 5 +++++ include/nbl/asset/interchange/SInterchangeIO.h | 1 + include/nbl/asset/interchange/SLoaderRuntimeTuning.h | 3 +++ include/nbl/asset/utils/SGeometryNormalCommon.h | 1 + src/nbl/asset/interchange/SOBJPolygonGeometryAuxLayout.h | 1 + src/nbl/asset/interchange/SPLYPolygonGeometryAuxLayout.h | 1 + src/nbl/asset/interchange/SSTLPolygonGeometryAuxLayout.h | 1 + 11 files changed, 19 insertions(+) diff --git a/include/nbl/asset/SBufferAdoption.h b/include/nbl/asset/SBufferAdoption.h index 4e80c74235..3ea123a2a4 100644 --- a/include/nbl/asset/SBufferAdoption.h +++ b/include/nbl/asset/SBufferAdoption.h @@ -33,6 +33,7 @@ concept AdoptedBufferStorage = } +// Generic CPU-buffer adoption helper for owning contiguous storage such as std::vector or core::vector. class SBufferAdoption { public: diff --git a/include/nbl/asset/interchange/SFileIOPolicy.h b/include/nbl/asset/interchange/SFileIOPolicy.h index b4c833c96f..49e149bdaa 100644 --- a/include/nbl/asset/interchange/SFileIOPolicy.h +++ b/include/nbl/asset/interchange/SFileIOPolicy.h @@ -30,8 +30,10 @@ enum class EFileIOStrategy : uint8_t Chunked }; +// Requested IO policy shared by loaders, writers, and hash stages before file constraints are resolved. struct SFileIOPolicy { + // Runtime tuning knobs shared by loader parallelism and IO anomaly diagnostics. struct SRuntimeTuning { // Runtime tuning strategy for worker/chunk selection. @@ -150,6 +152,7 @@ struct SFileIOPolicy } }; +// Resolved IO plan chosen from SFileIOPolicy after considering file size, mapping, and staging limits. struct SResolvedFileIOPolicy { using Strategy = EFileIOStrategy; diff --git a/include/nbl/asset/interchange/SGeometryContentHash.h b/include/nbl/asset/interchange/SGeometryContentHash.h index 331fd9e1da..12a5c80567 100644 --- a/include/nbl/asset/interchange/SGeometryContentHash.h +++ b/include/nbl/asset/interchange/SGeometryContentHash.h @@ -13,6 +13,7 @@ namespace nbl::asset { +// Polygon-geometry content-hash helper built on top of CPolygonGeometryManipulator buffer hashing. class SPolygonGeometryContentHash { public: diff --git a/include/nbl/asset/interchange/SGeometryLoaderCommon.h b/include/nbl/asset/interchange/SGeometryLoaderCommon.h index 7060e6d404..a77917aa52 100644 --- a/include/nbl/asset/interchange/SGeometryLoaderCommon.h +++ b/include/nbl/asset/interchange/SGeometryLoaderCommon.h @@ -15,6 +15,7 @@ namespace nbl::asset { +// Loader-side helpers for building polygon-geometry data views backed by adopted CPU buffers. class SGeometryLoaderCommon { public: diff --git a/include/nbl/asset/interchange/SGeometryWriterCommon.h b/include/nbl/asset/interchange/SGeometryWriterCommon.h index 37dedb2424..e7d80b5d74 100644 --- a/include/nbl/asset/interchange/SGeometryWriterCommon.h +++ b/include/nbl/asset/interchange/SGeometryWriterCommon.h @@ -19,9 +19,11 @@ namespace nbl::asset { +// Writer-side helpers for flattening scene inputs and serializing polygon geometry views safely. class SGeometryWriterCommon { public: + // Shared write context propagated while flattening geometry collections and scenes. struct SWriteState { hlsl::float32_t3x4 transform = hlsl::math::linalg::identity(); @@ -30,17 +32,20 @@ class SGeometryWriterCommon uint32_t geometryIx = 0u; }; + // One polygon geometry scheduled for writing together with the transform and scene indices that produced it. struct SPolygonGeometryWriteItem : SWriteState { const ICPUPolygonGeometry* geometry = nullptr; }; + // Parameters used when expanding one geometry collection into polygon write items. struct SGeometryCollectionWriteParams : SWriteState { const ICPUGeometryCollection* collection = nullptr; }; template requires requires(Container& c, const SPolygonGeometryWriteItem& item) { c.emplace_back(item); } + // Collector used by collectPolygonGeometryWriteItems to flatten one collection into a caller-provided container. struct SWriteCollector { static inline void appendFromCollection(Container& out, const SGeometryCollectionWriteParams& params) diff --git a/include/nbl/asset/interchange/SInterchangeIO.h b/include/nbl/asset/interchange/SInterchangeIO.h index 08998e5ea8..ff911d9e6b 100644 --- a/include/nbl/asset/interchange/SInterchangeIO.h +++ b/include/nbl/asset/interchange/SInterchangeIO.h @@ -20,6 +20,7 @@ namespace nbl::asset { +// Shared read/write helpers that execute a resolved IO plan and collect simple telemetry. class SInterchangeIO { public: diff --git a/include/nbl/asset/interchange/SLoaderRuntimeTuning.h b/include/nbl/asset/interchange/SLoaderRuntimeTuning.h index 0d2d52bc2a..94a623732b 100644 --- a/include/nbl/asset/interchange/SLoaderRuntimeTuning.h +++ b/include/nbl/asset/interchange/SLoaderRuntimeTuning.h @@ -19,6 +19,7 @@ namespace nbl::asset { +// Input describing one loader or hash stage that needs worker and chunk sizing. struct SLoaderRuntimeTuningRequest { // Total input bytes for the tuned stage. @@ -51,6 +52,7 @@ struct SLoaderRuntimeTuningRequest uint64_t sampleMinWorkUnits = 0ull; }; +// Final worker and chunk layout selected for one stage. struct SLoaderRuntimeTuningResult { // Selected worker count for the stage. @@ -61,6 +63,7 @@ struct SLoaderRuntimeTuningResult size_t chunkCount = 1ull; }; +// Stateless runtime tuner used by loaders and hash stages to size worker pools and chunking. struct SLoaderRuntimeTuner { private: diff --git a/include/nbl/asset/utils/SGeometryNormalCommon.h b/include/nbl/asset/utils/SGeometryNormalCommon.h index 72e0348002..4589b6d40e 100644 --- a/include/nbl/asset/utils/SGeometryNormalCommon.h +++ b/include/nbl/asset/utils/SGeometryNormalCommon.h @@ -11,6 +11,7 @@ namespace nbl::asset { +// Shared normal helpers used by loaders and geometry utilities for simple face-normal generation. class SGeometryNormalCommon { public: diff --git a/src/nbl/asset/interchange/SOBJPolygonGeometryAuxLayout.h b/src/nbl/asset/interchange/SOBJPolygonGeometryAuxLayout.h index 4c7b287f10..074c597624 100644 --- a/src/nbl/asset/interchange/SOBJPolygonGeometryAuxLayout.h +++ b/src/nbl/asset/interchange/SOBJPolygonGeometryAuxLayout.h @@ -7,6 +7,7 @@ namespace nbl::asset { +// Private OBJ loader/writer contract for aux slots stored in ICPUPolygonGeometry. class SOBJPolygonGeometryAuxLayout { public: diff --git a/src/nbl/asset/interchange/SPLYPolygonGeometryAuxLayout.h b/src/nbl/asset/interchange/SPLYPolygonGeometryAuxLayout.h index 6077dcda49..097f6160d8 100644 --- a/src/nbl/asset/interchange/SPLYPolygonGeometryAuxLayout.h +++ b/src/nbl/asset/interchange/SPLYPolygonGeometryAuxLayout.h @@ -7,6 +7,7 @@ namespace nbl::asset { +// Private PLY loader/writer contract for reserved aux slots stored in ICPUPolygonGeometry. class SPLYPolygonGeometryAuxLayout { public: diff --git a/src/nbl/asset/interchange/SSTLPolygonGeometryAuxLayout.h b/src/nbl/asset/interchange/SSTLPolygonGeometryAuxLayout.h index 7bc0d27a85..f2ef0a5027 100644 --- a/src/nbl/asset/interchange/SSTLPolygonGeometryAuxLayout.h +++ b/src/nbl/asset/interchange/SSTLPolygonGeometryAuxLayout.h @@ -7,6 +7,7 @@ namespace nbl::asset { +// Private STL loader/writer contract for aux slots stored in ICPUPolygonGeometry. class SSTLPolygonGeometryAuxLayout { public: From 2fc1b1cafd2f6f6ae3ff33748db763bbccecedc4 Mon Sep 17 00:00:00 2001 From: Arkadiusz Lachowicz Date: Sat, 7 Mar 2026 16:56:04 +0100 Subject: [PATCH 066/118] Fix AABB helper reuse and HLSL compile --- include/nbl/asset/ICPUPolygonGeometry.h | 33 ++--------- .../builtin/hlsl/shapes/AABBAccumulator.hlsl | 57 ++++++++++--------- include/nbl/builtin/hlsl/shapes/aabb.hlsl | 12 ++-- 3 files changed, 43 insertions(+), 59 deletions(-) diff --git a/include/nbl/asset/ICPUPolygonGeometry.h b/include/nbl/asset/ICPUPolygonGeometry.h index b8d97a54a3..988b5ada4a 100644 --- a/include/nbl/asset/ICPUPolygonGeometry.h +++ b/include/nbl/asset/ICPUPolygonGeometry.h @@ -8,6 +8,7 @@ #include "nbl/asset/IAsset.h" #include "nbl/asset/ICPUBuffer.h" #include "nbl/asset/IPolygonGeometry.h" +#include "nbl/builtin/hlsl/shapes/AABBAccumulator.hlsl" namespace nbl::asset @@ -112,7 +113,10 @@ class NBL_API2 ICPUPolygonGeometry final : public IPolygonGeometry return false; } template - inline bool setAABB(const hlsl::shapes::AABB<3,Scalar>& aabb) {return visitAABB([&aabb](auto&& ref)->void{ref=aabb;});} + inline bool setAABB(const hlsl::shapes::AABB<3,Scalar>& aabb) + { + return visitAABB([&aabb](auto&& ref)->void{ hlsl::shapes::util::assignAABB(ref, aabb); }); + } template inline bool applyAABB(const hlsl::shapes::AABB<3, Scalar>& aabb) { @@ -121,32 +125,7 @@ class NBL_API2 ICPUPolygonGeometry final : public IPolygonGeometry aabb.minVx.y > aabb.maxVx.y || aabb.minVx.z > aabb.maxVx.z) return false; - return visitAABB([&aabb](auto&& ref)->void - { - if constexpr (requires { ref.minVx.x; ref.minVx.y; ref.minVx.z; ref.maxVx.x; ref.maxVx.y; ref.maxVx.z; }) - { - ref.minVx.x = static_cast(aabb.minVx.x); - ref.minVx.y = static_cast(aabb.minVx.y); - ref.minVx.z = static_cast(aabb.minVx.z); - ref.maxVx.x = static_cast(aabb.maxVx.x); - ref.maxVx.y = static_cast(aabb.maxVx.y); - ref.maxVx.z = static_cast(aabb.maxVx.z); - if constexpr (requires { ref.minVx.w; ref.maxVx.w; }) - { - ref.minVx.w = 0; - ref.maxVx.w = 0; - } - } - else - { - ref.minVx[0] = static_cast(aabb.minVx[0]); - ref.minVx[1] = static_cast(aabb.minVx[1]); - ref.minVx[2] = static_cast(aabb.minVx[2]); - ref.maxVx[0] = static_cast(aabb.maxVx[0]); - ref.maxVx[1] = static_cast(aabb.maxVx[1]); - ref.maxVx[2] = static_cast(aabb.maxVx[2]); - } - }); + return visitAABB([&aabb](auto&& ref)->void{ hlsl::shapes::util::assignAABB(ref, aabb); }); } // diff --git a/include/nbl/builtin/hlsl/shapes/AABBAccumulator.hlsl b/include/nbl/builtin/hlsl/shapes/AABBAccumulator.hlsl index 0621db23b4..ca787aa87b 100644 --- a/include/nbl/builtin/hlsl/shapes/AABBAccumulator.hlsl +++ b/include/nbl/builtin/hlsl/shapes/AABBAccumulator.hlsl @@ -28,7 +28,7 @@ struct AABBAccumulator3 static AABBAccumulator3 create() { - AABBAccumulator3 retval = {}; + AABBAccumulator3 retval; retval.value = aabb_t::create(); return retval; } @@ -41,18 +41,15 @@ struct AABBAccumulator3 value.minVx.z > value.maxVx.z; } - void addPoint(NBL_CONST_REF_ARG(point_t) point) + void addPoint(NBL_CONST_REF_ARG(point_t) pt) { - value.addPoint(point); + value.addPoint(pt); } void addXYZ(const Scalar x, const Scalar y, const Scalar z) { - point_t point; - point.x = x; - point.y = y; - point.z = z; - value.addPoint(point); + point_t pt = point_t(x, y, z); + value.addPoint(pt); } aabb_t value; @@ -71,37 +68,45 @@ inline void extendAABBAccumulator(NBL_REF_ARG(AABBAccumulator3) aabb, co } template && (vector_traits::Dimension >= 3)) -inline void extendAABBAccumulator(NBL_REF_ARG(AABBAccumulator3) aabb, NBL_CONST_REF_ARG(Point) point) +inline void extendAABBAccumulator(NBL_REF_ARG(AABBAccumulator3) aabb, NBL_CONST_REF_ARG(Point) pt) { array_get::scalar_type> getter; - typename AABBAccumulator3::point_t converted; - converted.x = Scalar(getter(point, 0)); - converted.y = Scalar(getter(point, 1)); - converted.z = Scalar(getter(point, 2)); + typename AABBAccumulator3::point_t converted = typename AABBAccumulator3::point_t( + Scalar(getter(pt, 0)), + Scalar(getter(pt, 1)), + Scalar(getter(pt, 2)) + ); aabb.addPoint(converted); } -template= 3) -inline void assignAABBFromAccumulator(NBL_REF_ARG(AABB) dst, NBL_CONST_REF_ARG(AABBAccumulator3) aabb) +template= 3 && SrcD >= 3) +inline void assignAABB(NBL_REF_ARG(AABB) dst, NBL_CONST_REF_ARG(AABB) src) { - if (aabb.empty()) - return; + array_set::point_t, DstScalar> setter; + array_get::point_t, SrcScalar> getter; - dst = AABB::create(); - array_set::point_t, DstScalar> setter; - setter(dst.minVx, 0, DstScalar(aabb.value.minVx.x)); - setter(dst.minVx, 1, DstScalar(aabb.value.minVx.y)); - setter(dst.minVx, 2, DstScalar(aabb.value.minVx.z)); - setter(dst.maxVx, 0, DstScalar(aabb.value.maxVx.x)); - setter(dst.maxVx, 1, DstScalar(aabb.value.maxVx.y)); - setter(dst.maxVx, 2, DstScalar(aabb.value.maxVx.z)); - for (int16_t i = 3; i < D; ++i) + dst = AABB::create(); + NBL_UNROLL for (int16_t i = 0; i < 3; ++i) + { + setter(dst.minVx, i, DstScalar(getter(src.minVx, i))); + setter(dst.maxVx, i, DstScalar(getter(src.maxVx, i))); + } + NBL_UNROLL for (int16_t i = 3; i < DstD; ++i) { setter(dst.minVx, i, DstScalar(0)); setter(dst.maxVx, i, DstScalar(0)); } } +template= 3) +inline void assignAABBFromAccumulator(NBL_REF_ARG(AABB) dst, NBL_CONST_REF_ARG(AABBAccumulator3) aabb) +{ + if (aabb.empty()) + return; + + assignAABB(dst, aabb.value); +} + } } } diff --git a/include/nbl/builtin/hlsl/shapes/aabb.hlsl b/include/nbl/builtin/hlsl/shapes/aabb.hlsl index 07219c6687..ec916f2734 100644 --- a/include/nbl/builtin/hlsl/shapes/aabb.hlsl +++ b/include/nbl/builtin/hlsl/shapes/aabb.hlsl @@ -66,28 +66,28 @@ namespace util namespace impl { template -struct intersect_helper> +struct intersect_helper > { using type = AABB; static inline type __call(NBL_CONST_REF_ARG(type) lhs, NBL_CONST_REF_ARG(type) rhs) { type retval; - retval.minVx = hlsl::max(lhs.minVx,rhs.minVx); - retval.maxVx = hlsl::min(lhs.maxVx,rhs.maxVx); + retval.minVx = hlsl::max(lhs.minVx,rhs.minVx); + retval.maxVx = hlsl::min(lhs.maxVx,rhs.maxVx); return retval; } }; template -struct union_helper> +struct union_helper > { using type = AABB; static inline type __call(NBL_CONST_REF_ARG(type) lhs, NBL_CONST_REF_ARG(type) rhs) { type retval; - retval.minVx = hlsl::min(lhs.minVx,rhs.minVx); - retval.maxVx = hlsl::max(lhs.maxVx,rhs.maxVx); + retval.minVx = hlsl::min(lhs.minVx,rhs.minVx); + retval.maxVx = hlsl::max(lhs.maxVx,rhs.maxVx); return retval; } }; From da25e8d9e6d16357e9c248bd72a8cf9f0f7257a2 Mon Sep 17 00:00:00 2001 From: Arkadiusz Lachowicz Date: Sat, 7 Mar 2026 17:12:34 +0100 Subject: [PATCH 067/118] Deduplicate AABB assignment paths --- include/nbl/asset/ICPUPolygonGeometry.h | 14 +++++++------- .../nbl/builtin/hlsl/shapes/AABBAccumulator.hlsl | 15 +++++++++++---- 2 files changed, 18 insertions(+), 11 deletions(-) diff --git a/include/nbl/asset/ICPUPolygonGeometry.h b/include/nbl/asset/ICPUPolygonGeometry.h index 988b5ada4a..e877499443 100644 --- a/include/nbl/asset/ICPUPolygonGeometry.h +++ b/include/nbl/asset/ICPUPolygonGeometry.h @@ -115,17 +115,17 @@ class NBL_API2 ICPUPolygonGeometry final : public IPolygonGeometry template inline bool setAABB(const hlsl::shapes::AABB<3,Scalar>& aabb) { - return visitAABB([&aabb](auto&& ref)->void{ hlsl::shapes::util::assignAABB(ref, aabb); }); + bool assigned = false; + const bool visited = visitAABB([&aabb, &assigned](auto&& ref)->void + { + assigned = hlsl::shapes::util::assignAABB(ref, aabb); + }); + return visited && assigned; } template inline bool applyAABB(const hlsl::shapes::AABB<3, Scalar>& aabb) { - if ( - aabb.minVx.x > aabb.maxVx.x || - aabb.minVx.y > aabb.maxVx.y || - aabb.minVx.z > aabb.maxVx.z) - return false; - return visitAABB([&aabb](auto&& ref)->void{ hlsl::shapes::util::assignAABB(ref, aabb); }); + return setAABB(aabb); } // diff --git a/include/nbl/builtin/hlsl/shapes/AABBAccumulator.hlsl b/include/nbl/builtin/hlsl/shapes/AABBAccumulator.hlsl index ca787aa87b..eda7ef536f 100644 --- a/include/nbl/builtin/hlsl/shapes/AABBAccumulator.hlsl +++ b/include/nbl/builtin/hlsl/shapes/AABBAccumulator.hlsl @@ -80,11 +80,17 @@ inline void extendAABBAccumulator(NBL_REF_ARG(AABBAccumulator3) aabb, NB } template= 3 && SrcD >= 3) -inline void assignAABB(NBL_REF_ARG(AABB) dst, NBL_CONST_REF_ARG(AABB) src) +inline bool assignAABB(NBL_REF_ARG(AABB) dst, NBL_CONST_REF_ARG(AABB) src) { array_set::point_t, DstScalar> setter; array_get::point_t, SrcScalar> getter; + if ( + getter(src.minVx, 0) > getter(src.maxVx, 0) || + getter(src.minVx, 1) > getter(src.maxVx, 1) || + getter(src.minVx, 2) > getter(src.maxVx, 2)) + return false; + dst = AABB::create(); NBL_UNROLL for (int16_t i = 0; i < 3; ++i) { @@ -96,15 +102,16 @@ inline void assignAABB(NBL_REF_ARG(AABB) dst, NBL_CONST_REF_ARG setter(dst.minVx, i, DstScalar(0)); setter(dst.maxVx, i, DstScalar(0)); } + return true; } template= 3) -inline void assignAABBFromAccumulator(NBL_REF_ARG(AABB) dst, NBL_CONST_REF_ARG(AABBAccumulator3) aabb) +inline bool assignAABBFromAccumulator(NBL_REF_ARG(AABB) dst, NBL_CONST_REF_ARG(AABBAccumulator3) aabb) { if (aabb.empty()) - return; + return false; - assignAABB(dst, aabb.value); + return assignAABB(dst, aabb.value); } } From 6a7fde168fbecb53c61480f094d1352e598bc79c Mon Sep 17 00:00:00 2001 From: Arkadiusz Lachowicz Date: Sat, 7 Mar 2026 17:37:19 +0100 Subject: [PATCH 068/118] Inline geometry writer helpers --- .../asset/interchange/SGeometryWriterCommon.h | 44 ++++++++++++- src/nbl/CMakeLists.txt | 1 - .../interchange/SGeometryWriterCommon.cpp | 63 ------------------- tools/hcp/CMakeLists.txt | 27 ++++---- 4 files changed, 53 insertions(+), 82 deletions(-) delete mode 100644 src/nbl/asset/interchange/SGeometryWriterCommon.cpp diff --git a/include/nbl/asset/interchange/SGeometryWriterCommon.h b/include/nbl/asset/interchange/SGeometryWriterCommon.h index e7d80b5d74..40cafca654 100644 --- a/include/nbl/asset/interchange/SGeometryWriterCommon.h +++ b/include/nbl/asset/interchange/SGeometryWriterCommon.h @@ -11,8 +11,11 @@ #include "nbl/asset/ICPUPolygonGeometry.h" #include "nbl/builtin/hlsl/math/linalg/fast_affine.hlsl" +#include #include #include +#include +#include #include #include @@ -44,8 +47,8 @@ class SGeometryWriterCommon const ICPUGeometryCollection* collection = nullptr; }; - template requires requires(Container& c, const SPolygonGeometryWriteItem& item) { c.emplace_back(item); } // Collector used by collectPolygonGeometryWriteItems to flatten one collection into a caller-provided container. + template requires requires(Container& c, const SPolygonGeometryWriteItem& item) { c.emplace_back(item); } struct SWriteCollector { static inline void appendFromCollection(Container& out, const SGeometryCollectionWriteParams& params) @@ -262,8 +265,15 @@ class SGeometryWriterCommon return reinterpret_cast(view.getPointer()); } - static char* appendFloatToBuffer(char* dst, char* end, float value); - static char* appendFloatToBuffer(char* dst, char* end, double value); + static inline char* appendFloatToBuffer(char* dst, char* end, float value) + { + return appendFloatingPointToBuffer(dst, end, value); + } + + static inline char* appendFloatToBuffer(char* dst, char* end, double value) + { + return appendFloatingPointToBuffer(dst, end, value); + } static inline char* appendUIntToBuffer(char* dst, char* const end, const uint32_t value) { @@ -280,6 +290,34 @@ class SGeometryWriterCommon const size_t writeLen = static_cast(written); return (writeLen < static_cast(end - dst)) ? (dst + writeLen) : end; } + + private: + template + static inline char* appendFloatingPointToBuffer(char* dst, char* const end, const T value) + { + static_assert(std::is_same_v || std::is_same_v); + + if (!dst || dst >= end) + return end; + + const auto result = std::to_chars(dst, end, value); + if (result.ec == std::errc()) + return result.ptr; + + constexpr size_t FloatingPointScratchSize = std::numeric_limits::max_digits10 + 9ull; + std::array scratch = {}; + constexpr int Precision = std::numeric_limits::max_digits10; + const int written = std::snprintf(scratch.data(), scratch.size(), "%.*g", Precision, static_cast(value)); + if (written <= 0) + return dst; + + const size_t writeLen = static_cast(written); + if (writeLen > static_cast(end - dst)) + return end; + + std::memcpy(dst, scratch.data(), writeLen); + return dst + writeLen; + } }; } diff --git a/src/nbl/CMakeLists.txt b/src/nbl/CMakeLists.txt index 7746e20271..4f0852c687 100644 --- a/src/nbl/CMakeLists.txt +++ b/src/nbl/CMakeLists.txt @@ -200,7 +200,6 @@ set(NBL_ASSET_SOURCES asset/interchange/CGLTFLoader.cpp # Mesh writers - asset/interchange/SGeometryWriterCommon.cpp asset/interchange/COBJMeshWriter.cpp asset/interchange/CPLYMeshWriter.cpp asset/interchange/CSTLMeshWriter.cpp diff --git a/src/nbl/asset/interchange/SGeometryWriterCommon.cpp b/src/nbl/asset/interchange/SGeometryWriterCommon.cpp deleted file mode 100644 index 1ab5cd1ee9..0000000000 --- a/src/nbl/asset/interchange/SGeometryWriterCommon.cpp +++ /dev/null @@ -1,63 +0,0 @@ -// Copyright (C) 2018-2025 - DevSH Graphics Programming Sp. z O.O. -// This file is part of the "Nabla Engine". -// For conditions of distribution and use, see copyright notice in nabla.h - -#include "nbl/asset/interchange/SGeometryWriterCommon.h" - -#include -#include -#include -#include -#include -#include -#include - - -namespace nbl::asset -{ - -namespace -{ - -template -inline constexpr size_t FloatingPointScratchSize = std::numeric_limits::max_digits10 + 9ull; - -template -char* appendFloatingPointToBuffer(char* dst, char* const end, const T value) -{ - static_assert(std::is_same_v || std::is_same_v); - - if (!dst || dst >= end) - return end; - - const auto result = std::to_chars(dst, end, value); - if (result.ec == std::errc()) - return result.ptr; - - std::array> scratch = {}; - constexpr int Precision = std::numeric_limits::max_digits10; - const int written = std::snprintf(scratch.data(), scratch.size(), "%.*g", Precision, static_cast(value)); - if (written <= 0) - return dst; - - const size_t writeLen = static_cast(written); - if (writeLen > static_cast(end - dst)) - return end; - - std::memcpy(dst, scratch.data(), writeLen); - return dst + writeLen; -} - -} - -char* SGeometryWriterCommon::appendFloatToBuffer(char* dst, char* end, float value) -{ - return appendFloatingPointToBuffer(dst, end, value); -} - -char* SGeometryWriterCommon::appendFloatToBuffer(char* dst, char* end, double value) -{ - return appendFloatingPointToBuffer(dst, end, value); -} - -} diff --git a/tools/hcp/CMakeLists.txt b/tools/hcp/CMakeLists.txt index 0d8f5a2033..456b0f3e1b 100644 --- a/tools/hcp/CMakeLists.txt +++ b/tools/hcp/CMakeLists.txt @@ -9,21 +9,18 @@ set(NBL_HCP_CI_ARGS --seed 12345 ) -add_test(NAME NBL_HCP_SEQUENTIAL - COMMAND "$" --runtime-tuning sequential ${NBL_HCP_CI_ARGS} - WORKING_DIRECTORY "$" - COMMAND_EXPAND_LISTS -) -add_test(NAME NBL_HCP_HEURISTIC - COMMAND "$" --runtime-tuning heuristic ${NBL_HCP_CI_ARGS} - WORKING_DIRECTORY "$" - COMMAND_EXPAND_LISTS -) -add_test(NAME NBL_HCP_HYBRID - COMMAND "$" --runtime-tuning hybrid ${NBL_HCP_CI_ARGS} - WORKING_DIRECTORY "$" - COMMAND_EXPAND_LISTS -) +function(nbl_hcp_add_ci_test mode) + string(TOUPPER "${mode}" mode_upper) + add_test(NAME "NBL_HCP_${mode_upper}" + COMMAND "$" --runtime-tuning "${mode}" ${NBL_HCP_CI_ARGS} + WORKING_DIRECTORY "$" + COMMAND_EXPAND_LISTS + ) +endfunction() + +nbl_hcp_add_ci_test(sequential) +nbl_hcp_add_ci_test(heuristic) +nbl_hcp_add_ci_test(hybrid) set_tests_properties( NBL_HCP_SEQUENTIAL From 31a66969a16428cf8ff40d9b3c865db58f8df93d Mon Sep 17 00:00:00 2001 From: Arkadiusz Lachowicz Date: Sat, 7 Mar 2026 19:29:02 +0100 Subject: [PATCH 069/118] Hide vendor Blake3 from public API --- 3rdparty/CMakeLists.txt | 2 - include/nbl/core/hash/blake.h | 20 +- src/nbl/CMakeLists.txt | 1 - src/nbl/asset/ICPUImage.cpp | 25 +- src/nbl/core/hash/blake.cpp | 532 +++++++++++++++++++++++++++++++++- tools/hcp/main.cpp | 75 ++++- 6 files changed, 618 insertions(+), 37 deletions(-) diff --git a/3rdparty/CMakeLists.txt b/3rdparty/CMakeLists.txt index 68e821dfdf..966dac1fe0 100755 --- a/3rdparty/CMakeLists.txt +++ b/3rdparty/CMakeLists.txt @@ -528,8 +528,6 @@ nbl_install_dir(glm/glm) nbl_install_file_spec(${CMAKE_CURRENT_BINARY_DIR}/imath/config/ImathConfig.h imath) nbl_install_dir(imath/src/Imath) -nbl_install_file(blake/c/blake3.h) - nbl_install_dir(boost/superproject/libs/preprocessor/include/boost) nbl_install_file_spec(renderdoc/renderdoc_app.h renderdoc) diff --git a/include/nbl/core/hash/blake.h b/include/nbl/core/hash/blake.h index a13500ac77..61d1c02d9a 100644 --- a/include/nbl/core/hash/blake.h +++ b/include/nbl/core/hash/blake.h @@ -6,20 +6,25 @@ #include "nbl/config/BuildConfigOptions.h" -#include "blake3.h" +#include +#include #include +#include +#include namespace nbl::core { struct blake3_hash_t final { + static inline constexpr size_t DigestSize = 32ull; + inline bool operator==(const blake3_hash_t&) const = default; // could initialize this to a hash of a zero-length array, // but that requires a .cpp file and a static - uint8_t data[BLAKE3_OUT_LEN]; + uint8_t data[DigestSize]; }; class NBL_API2 blake3_hasher final @@ -37,7 +42,12 @@ class NBL_API2 blake3_hasher final } }; - ::blake3_hasher m_state; + static inline constexpr size_t OpaqueStateSize = 1920ull; + static inline constexpr size_t OpaqueStateAlign = 16ull; + + static void validateOpaqueStateLayout(); + + alignas(OpaqueStateAlign) unsigned char m_state[OpaqueStateSize]; public: blake3_hasher(); @@ -58,7 +68,7 @@ class NBL_API2 blake3_hasher final NBL_API2 blake3_hash_t blake3_hash_buffer(const void* data, size_t bytes); NBL_API2 blake3_hash_t blake3_hash_buffer_sequential(const void* data, size_t bytes); -// Useful specializations +// Convenience specializations for common wrapper inputs. template struct blake3_hasher::update_impl { @@ -116,7 +126,7 @@ struct hash { auto* as_p_uint64_t = reinterpret_cast(blake3.data); size_t retval = as_p_uint64_t[0]; - for (auto i=1; i> 2); return retval; } diff --git a/src/nbl/CMakeLists.txt b/src/nbl/CMakeLists.txt index 4f0852c687..f0aabc1c95 100644 --- a/src/nbl/CMakeLists.txt +++ b/src/nbl/CMakeLists.txt @@ -506,7 +506,6 @@ endif() # blake3 add_dependencies(Nabla blake3) -list(APPEND PUBLIC_BUILD_INCLUDE_DIRS $) if(NBL_STATIC_BUILD) target_link_libraries(Nabla INTERFACE blake3) else() diff --git a/src/nbl/asset/ICPUImage.cpp b/src/nbl/asset/ICPUImage.cpp index cd3f884890..1e06f4ccf7 100644 --- a/src/nbl/asset/ICPUImage.cpp +++ b/src/nbl/asset/ICPUImage.cpp @@ -1,4 +1,5 @@ #include +#include #include "nbl/asset/ICPUImage.h" #include "nbl/asset/filters/CMatchedSizeInOutImageFilterCommon.h" #include "nbl/asset/filters/CFlattenRegionsImageFilter.h" @@ -37,7 +38,7 @@ class CFlattenRegionsStreamHashImageFilter : public CMatchedSizeInOutImageFilter const auto product = parameters.mipLevels * parameters.arrayLayers; size_t bufferSize = product * sizeof(CState::outHash); - bufferSize += product * sizeof(blake3_hasher); + bufferSize += product * sizeof(core::blake3_hasher); bufferSize += getFlattenBufferSize(input); return bufferSize; @@ -136,9 +137,11 @@ class CFlattenRegionsStreamHashImageFilter : public CMatchedSizeInOutImageFilter const auto product = parameters.mipLevels * parameters.arrayLayers; scratch.hashes = { static_cast(state->scratch.memory), product }; - scratch.hashers = { reinterpret_cast(scratch.hashes.data() + scratch.hashes.size()), product }; + scratch.hashers = { reinterpret_cast(scratch.hashes.data() + scratch.hashes.size()), product }; scratch.flatten = { .offset = scratch.hashes.size_bytes() + scratch.hashers.size_bytes(), .size = state->scratch.size - scratch.hashers.size_bytes() - scratch.hashes.size_bytes(), .buffer = buffer}; } + for (auto& hasher : scratch.hashers) + std::construct_at(&hasher); const auto isFullyFlatten = scratch.flatten.size == 0ull; @@ -225,7 +228,7 @@ class CFlattenRegionsStreamHashImageFilter : public CMatchedSizeInOutImageFilter auto* const hasher = hashers + pOffset; auto* const hash = hashes + pOffset; - blake3_hasher_init(hasher); + hasher->reset(); IImage::SSubresourceLayers subresource = { .aspectMask = static_cast(0u), .mipLevel = miplevel, .baseArrayLayer = layer, .layerCount = 1u }; // stick to given mip level and single layer CMatchedSizeInOutImageFilterCommon::state_type::TexelRange range = { .offset = {}, .extent = { parameters.extent.width, parameters.extent.height, parameters.extent.depth } }; // cover all texels within layer range, take 0th mip level size to not clip anything at all @@ -233,7 +236,7 @@ class CFlattenRegionsStreamHashImageFilter : public CMatchedSizeInOutImageFilter auto executePerTexelOrBlock = [&](uint32_t readBlockArrayOffset, core::vectorSIMDu32 readBlockPos) -> void { - blake3_hasher_update(hasher, inData + readBlockArrayOffset, texelOrBlockByteSize); + hasher->update(inData + readBlockArrayOffset, texelOrBlockByteSize); }; const auto regions = image->getRegions(miplevel); @@ -242,7 +245,7 @@ class CFlattenRegionsStreamHashImageFilter : public CMatchedSizeInOutImageFilter if (!performNullHash) CBasicImageFilterCommon::executePerRegion(std::execution::seq, image, executePerTexelOrBlock, regions, clipFunctor); // fire the hasher for a layer, note we forcing seq policy because texels/blocks cannot be handled with par policies when we hash them - blake3_hasher_finalize(hasher, reinterpret_cast(hash), sizeof(CState::hash_t)); // finalize hash for layer + put it to heap for given mip level + *hash = static_cast(*hasher); // finalize hash for layer + put it to heap for given mip level }; std::for_each(policy, layers.begin(), layers.end(), executePerLayer); // fire per layer for given given mip level with specified execution policy, yes you can use parallel policy here if you want at it will work @@ -255,8 +258,8 @@ class CFlattenRegionsStreamHashImageFilter : public CMatchedSizeInOutImageFilter time to use them and compute final hash */ - blake3_hasher hasher; - blake3_hasher_init(&hasher); + core::blake3_hasher hasher; + hasher.reset(); { for (auto miplevel = 0u; miplevel < parameters.mipLevels; ++miplevel) { @@ -265,11 +268,11 @@ class CFlattenRegionsStreamHashImageFilter : public CMatchedSizeInOutImageFilter for (auto layer = 0u; layer < parameters.arrayLayers; ++layer) { auto* hash = hashes + mipOffset + layer; - blake3_hasher_update(&hasher, hash->data, sizeof(CState::hash_t)); + hasher.update(hash->data, sizeof(CState::hash_t)); } } - blake3_hasher_finalize(&hasher, reinterpret_cast(&state->outHash), sizeof(CState::hash_t)); // finalize output hash for whole image given all hashes + state->outHash = static_cast(hasher); // finalize output hash for whole image given all hashes } return true; @@ -284,7 +287,7 @@ class CFlattenRegionsStreamHashImageFilter : public CMatchedSizeInOutImageFilter struct ScratchMap { std::span hashes; // hashes, single hash is obtained from given miplevel & layer, full hash for an image is a hash of this hash buffer - std::span hashers; // hashers, used to produce a hash + std::span hashers; // hashers, used to produce a hash asset::SBufferRange flatten; // tightly packed texels from input, no memory gaps }; }; @@ -307,4 +310,4 @@ core::blake3_hash_t ICPUImage::computeContentHash() const assert(passed); // actually this should never fail, leaving in case return state.outHash; -} \ No newline at end of file +} diff --git a/src/nbl/core/hash/blake.cpp b/src/nbl/core/hash/blake.cpp index 12642dcf79..5e6ee253e9 100644 --- a/src/nbl/core/hash/blake.cpp +++ b/src/nbl/core/hash/blake.cpp @@ -1,14 +1,501 @@ #include "nbl/core/hash/blake.h" +#include +#include #include #include +#include +#include + +extern "C" +{ +#include "blake3.h" +#include "blake3_impl.h" +} + +/* + BLAKE3 is tree-based and explicitly designed for parallel processing. The tree mode + (chunks and parent-node reduction) is part of the specification, so a parallel + implementation can be done without changing hash semantics. + + Why this local implementation exists: + - Nabla needs a multithreaded hash path integrated with its own runtime policy and + standard C++ threading. + - Upstream C API exposes a single-threaded update path and an optional oneTBB path + (`blake3_hasher_update_tbb`) which requires building with `BLAKE3_USE_TBB`. + - Here we keep the same algorithmic rules and final digest, while using only C++20 + standard facilities (`std::async`, `std::thread`) and no oneTBB dependency. + - The local helpers below are adapted from upstream tree-processing internals used + in `c/blake3.c` and the oneTBB integration path. + + Primary references: + - BLAKE3 spec repository (paper): https://github.com/BLAKE3-team/BLAKE3-specs + - C2SP BLAKE3 specification: https://c2sp.org/BLAKE3 + - Upstream BLAKE3 C API notes (`update_tbb`): https://github.com/BLAKE3-team/BLAKE3/blob/master/c/README.md +*/ namespace nbl::core { +namespace +{ + +struct output_t +{ + uint32_t input_cv[8]; + uint64_t counter; + uint8_t block[BLAKE3_BLOCK_LEN]; + uint8_t block_len; + uint8_t flags; +}; + +INLINE void chunk_state_init_local(blake3_chunk_state* self, const uint32_t key[8], uint8_t flags) +{ + std::memcpy(self->cv, key, BLAKE3_KEY_LEN); + self->chunk_counter = 0; + std::memset(self->buf, 0, BLAKE3_BLOCK_LEN); + self->buf_len = 0; + self->blocks_compressed = 0; + self->flags = flags; +} + +INLINE void chunk_state_reset_local(blake3_chunk_state* self, const uint32_t key[8], uint64_t chunk_counter) +{ + std::memcpy(self->cv, key, BLAKE3_KEY_LEN); + self->chunk_counter = chunk_counter; + self->blocks_compressed = 0; + std::memset(self->buf, 0, BLAKE3_BLOCK_LEN); + self->buf_len = 0; +} + +INLINE size_t chunk_state_len_local(const blake3_chunk_state* self) +{ + return (BLAKE3_BLOCK_LEN * static_cast(self->blocks_compressed)) + static_cast(self->buf_len); +} + +INLINE size_t chunk_state_fill_buf_local(blake3_chunk_state* self, const uint8_t* input, size_t input_len) +{ + size_t take = BLAKE3_BLOCK_LEN - static_cast(self->buf_len); + if (take > input_len) + take = input_len; + auto* const dest = self->buf + static_cast(self->buf_len); + std::memcpy(dest, input, take); + self->buf_len += static_cast(take); + return take; +} + +INLINE uint8_t chunk_state_maybe_start_flag_local(const blake3_chunk_state* self) +{ + return self->blocks_compressed == 0 ? CHUNK_START : 0; +} + +INLINE output_t make_output_local(const uint32_t input_cv[8], const uint8_t block[BLAKE3_BLOCK_LEN], uint8_t block_len, uint64_t counter, uint8_t flags) +{ + output_t ret = {}; + std::memcpy(ret.input_cv, input_cv, 32); + std::memcpy(ret.block, block, BLAKE3_BLOCK_LEN); + ret.block_len = block_len; + ret.counter = counter; + ret.flags = flags; + return ret; +} + +INLINE void output_chaining_value_local(const output_t* self, uint8_t cv[32]) +{ + uint32_t cv_words[8]; + std::memcpy(cv_words, self->input_cv, 32); + blake3_compress_in_place(cv_words, self->block, self->block_len, self->counter, self->flags); + store_cv_words(cv, cv_words); +} + +INLINE void chunk_state_update_local(blake3_chunk_state* self, const uint8_t* input, size_t input_len) +{ + if (self->buf_len > 0) + { + size_t take = chunk_state_fill_buf_local(self, input, input_len); + input += take; + input_len -= take; + if (input_len > 0) + { + blake3_compress_in_place( + self->cv, + self->buf, + BLAKE3_BLOCK_LEN, + self->chunk_counter, + self->flags | chunk_state_maybe_start_flag_local(self)); + self->blocks_compressed += 1; + self->buf_len = 0; + std::memset(self->buf, 0, BLAKE3_BLOCK_LEN); + } + } + + while (input_len > BLAKE3_BLOCK_LEN) + { + blake3_compress_in_place( + self->cv, + input, + BLAKE3_BLOCK_LEN, + self->chunk_counter, + self->flags | chunk_state_maybe_start_flag_local(self)); + self->blocks_compressed += 1; + input += BLAKE3_BLOCK_LEN; + input_len -= BLAKE3_BLOCK_LEN; + } + + (void)chunk_state_fill_buf_local(self, input, input_len); +} + +INLINE output_t chunk_state_output_local(const blake3_chunk_state* self) +{ + const uint8_t block_flags = self->flags | chunk_state_maybe_start_flag_local(self) | CHUNK_END; + return make_output_local(self->cv, self->buf, self->buf_len, self->chunk_counter, block_flags); +} + +INLINE output_t parent_output_local(const uint8_t block[BLAKE3_BLOCK_LEN], const uint32_t key[8], uint8_t flags) +{ + return make_output_local(key, block, BLAKE3_BLOCK_LEN, 0, flags | PARENT); +} + +INLINE size_t left_len_local(size_t content_len) +{ + const size_t full_chunks = (content_len - 1) / BLAKE3_CHUNK_LEN; + return round_down_to_power_of_2(full_chunks) * BLAKE3_CHUNK_LEN; +} + +INLINE size_t compress_chunks_parallel_local( + const uint8_t* input, + size_t input_len, + const uint32_t key[8], + uint64_t chunk_counter, + uint8_t flags, + uint8_t* out) +{ + const uint8_t* chunks_array[MAX_SIMD_DEGREE]; + size_t input_position = 0; + size_t chunks_array_len = 0; + while (input_len - input_position >= BLAKE3_CHUNK_LEN) + { + chunks_array[chunks_array_len] = &input[input_position]; + input_position += BLAKE3_CHUNK_LEN; + chunks_array_len += 1; + } + + blake3_hash_many( + chunks_array, + chunks_array_len, + BLAKE3_CHUNK_LEN / BLAKE3_BLOCK_LEN, + key, + chunk_counter, + true, + flags, + CHUNK_START, + CHUNK_END, + out); + + if (input_len > input_position) + { + const uint64_t counter = chunk_counter + static_cast(chunks_array_len); + blake3_chunk_state chunk_state = {}; + chunk_state_init_local(&chunk_state, key, flags); + chunk_state.chunk_counter = counter; + chunk_state_update_local(&chunk_state, &input[input_position], input_len - input_position); + const auto output = chunk_state_output_local(&chunk_state); + output_chaining_value_local(&output, &out[chunks_array_len * BLAKE3_OUT_LEN]); + return chunks_array_len + 1; + } + + return chunks_array_len; +} + +INLINE size_t compress_parents_parallel_local( + const uint8_t* child_chaining_values, + size_t num_chaining_values, + const uint32_t key[8], + uint8_t flags, + uint8_t* out) +{ + const uint8_t* parents_array[MAX_SIMD_DEGREE_OR_2]; + size_t parents_array_len = 0; + while (num_chaining_values - (2 * parents_array_len) >= 2) + { + parents_array[parents_array_len] = + &child_chaining_values[2 * parents_array_len * BLAKE3_OUT_LEN]; + parents_array_len += 1; + } + + blake3_hash_many( + parents_array, + parents_array_len, + 1, + key, + 0, + false, + flags | PARENT, + 0, + 0, + out); + + if (num_chaining_values > 2 * parents_array_len) + { + std::memcpy( + &out[parents_array_len * BLAKE3_OUT_LEN], + &child_chaining_values[2 * parents_array_len * BLAKE3_OUT_LEN], + BLAKE3_OUT_LEN); + return parents_array_len + 1; + } + + return parents_array_len; +} + +constexpr size_t ParallelMinInputBytes = 1ull << 20; +constexpr size_t ParallelThreadGranularityBytes = 768ull << 10; +constexpr size_t ParallelSpawnMinSubtreeBytes = 512ull << 10; +constexpr uint32_t ParallelMaxThreads = 8u; +std::atomic_uint32_t g_parallelHashCalls = 0u; + +class SParallelCallGuard final +{ + public: + SParallelCallGuard() : m_active(g_parallelHashCalls.fetch_add(1u, std::memory_order_relaxed) + 1u) + { + } + + ~SParallelCallGuard() + { + g_parallelHashCalls.fetch_sub(1u, std::memory_order_relaxed); + } + + inline uint32_t activeCalls() const + { + return m_active; + } + + private: + uint32_t m_active = 1u; +}; + +size_t compress_subtree_wide_mt( + const uint8_t* input, + size_t input_len, + const uint32_t key[8], + uint64_t chunk_counter, + uint8_t flags, + uint8_t* out, + uint32_t threadBudget); + +INLINE void compress_subtree_to_parent_node_mt( + const uint8_t* input, + size_t input_len, + const uint32_t key[8], + uint64_t chunk_counter, + uint8_t flags, + uint8_t out[2 * BLAKE3_OUT_LEN], + uint32_t threadBudget) +{ + uint8_t cv_array[MAX_SIMD_DEGREE_OR_2 * BLAKE3_OUT_LEN]; + size_t num_cvs = compress_subtree_wide_mt(input, input_len, key, chunk_counter, flags, cv_array, threadBudget); + assert(num_cvs <= MAX_SIMD_DEGREE_OR_2); + +#if MAX_SIMD_DEGREE_OR_2 > 2 + uint8_t out_array[MAX_SIMD_DEGREE_OR_2 * BLAKE3_OUT_LEN / 2]; + while (num_cvs > 2) + { + num_cvs = compress_parents_parallel_local(cv_array, num_cvs, key, flags, out_array); + std::memcpy(cv_array, out_array, num_cvs * BLAKE3_OUT_LEN); + } +#endif + + std::memcpy(out, cv_array, 2 * BLAKE3_OUT_LEN); +} + +size_t compress_subtree_wide_mt( + const uint8_t* input, + size_t input_len, + const uint32_t key[8], + uint64_t chunk_counter, + uint8_t flags, + uint8_t* out, + uint32_t threadBudget) +{ + if (input_len <= blake3_simd_degree() * BLAKE3_CHUNK_LEN) + return compress_chunks_parallel_local(input, input_len, key, chunk_counter, flags, out); + + const size_t left_input_len = left_len_local(input_len); + const size_t right_input_len = input_len - left_input_len; + const uint8_t* const right_input = &input[left_input_len]; + const uint64_t right_chunk_counter = chunk_counter + static_cast(left_input_len / BLAKE3_CHUNK_LEN); + + uint8_t cv_array[2 * MAX_SIMD_DEGREE_OR_2 * BLAKE3_OUT_LEN]; + size_t degree = blake3_simd_degree(); + if (left_input_len > BLAKE3_CHUNK_LEN && degree == 1) + degree = 2; + uint8_t* const right_cvs = &cv_array[degree * BLAKE3_OUT_LEN]; + + size_t left_n = 0; + size_t right_n = 0; + bool spawned = false; + if ( + threadBudget > 1u && + left_input_len >= ParallelSpawnMinSubtreeBytes && + right_input_len >= ParallelSpawnMinSubtreeBytes) + { + try + { + uint32_t leftBudget = threadBudget / 2u; + if (leftBudget == 0u) + leftBudget = 1u; + uint32_t rightBudget = threadBudget - leftBudget; + if (rightBudget == 0u) + rightBudget = 1u; + + auto rightFuture = std::async(std::launch::async, [right_input, right_input_len, key, right_chunk_counter, flags, right_cvs, rightBudget]() -> size_t + { + return compress_subtree_wide_mt(right_input, right_input_len, key, right_chunk_counter, flags, right_cvs, rightBudget); + }); + left_n = compress_subtree_wide_mt(input, left_input_len, key, chunk_counter, flags, cv_array, leftBudget); + right_n = rightFuture.get(); + spawned = true; + } + catch (...) + { + spawned = false; + } + } + + if (!spawned) + { + left_n = compress_subtree_wide_mt(input, left_input_len, key, chunk_counter, flags, cv_array, 1u); + right_n = compress_subtree_wide_mt(right_input, right_input_len, key, right_chunk_counter, flags, right_cvs, 1u); + } + + if (left_n == 1) + { + std::memcpy(out, cv_array, 2 * BLAKE3_OUT_LEN); + return 2; + } + + const size_t num_chaining_values = left_n + right_n; + return compress_parents_parallel_local(cv_array, num_chaining_values, key, flags, out); +} + +INLINE void hasher_merge_cv_stack_local(::blake3_hasher* self, uint64_t total_len) +{ + const size_t post_merge_stack_len = static_cast(popcnt(total_len)); + while (self->cv_stack_len > post_merge_stack_len) + { + auto* const parent_node = &self->cv_stack[(self->cv_stack_len - 2) * BLAKE3_OUT_LEN]; + const auto output = parent_output_local(parent_node, self->key, self->chunk.flags); + output_chaining_value_local(&output, parent_node); + self->cv_stack_len -= 1; + } +} + +INLINE void hasher_push_cv_local(::blake3_hasher* self, uint8_t new_cv[BLAKE3_OUT_LEN], uint64_t chunk_counter) +{ + hasher_merge_cv_stack_local(self, chunk_counter); + std::memcpy(&self->cv_stack[self->cv_stack_len * BLAKE3_OUT_LEN], new_cv, BLAKE3_OUT_LEN); + self->cv_stack_len += 1; +} + +void hasher_update_parallel(::blake3_hasher* self, const uint8_t* input_bytes, size_t input_len, uint32_t threadBudget) +{ + if (input_len == 0) + return; + + if (chunk_state_len_local(&self->chunk) > 0) + { + size_t take = BLAKE3_CHUNK_LEN - chunk_state_len_local(&self->chunk); + if (take > input_len) + take = input_len; + chunk_state_update_local(&self->chunk, input_bytes, take); + input_bytes += take; + input_len -= take; + if (input_len > 0) + { + const auto output = chunk_state_output_local(&self->chunk); + uint8_t chunk_cv[BLAKE3_OUT_LEN]; + output_chaining_value_local(&output, chunk_cv); + hasher_push_cv_local(self, chunk_cv, self->chunk.chunk_counter); + chunk_state_reset_local(&self->chunk, self->key, self->chunk.chunk_counter + 1); + } + else + { + return; + } + } + + while (input_len > BLAKE3_CHUNK_LEN) + { + size_t subtree_len = round_down_to_power_of_2(input_len); + const uint64_t count_so_far = self->chunk.chunk_counter * BLAKE3_CHUNK_LEN; + while ((((uint64_t)(subtree_len - 1)) & count_so_far) != 0) + subtree_len /= 2; + + const uint64_t subtree_chunks = subtree_len / BLAKE3_CHUNK_LEN; + if (subtree_len <= BLAKE3_CHUNK_LEN) + { + blake3_chunk_state chunk_state = {}; + chunk_state_init_local(&chunk_state, self->key, self->chunk.flags); + chunk_state.chunk_counter = self->chunk.chunk_counter; + chunk_state_update_local(&chunk_state, input_bytes, subtree_len); + const auto output = chunk_state_output_local(&chunk_state); + uint8_t cv[BLAKE3_OUT_LEN]; + output_chaining_value_local(&output, cv); + hasher_push_cv_local(self, cv, chunk_state.chunk_counter); + } + else + { + uint8_t cv_pair[2 * BLAKE3_OUT_LEN]; + compress_subtree_to_parent_node_mt( + input_bytes, + subtree_len, + self->key, + self->chunk.chunk_counter, + self->chunk.flags, + cv_pair, + threadBudget); + hasher_push_cv_local(self, cv_pair, self->chunk.chunk_counter); + hasher_push_cv_local(self, &cv_pair[BLAKE3_OUT_LEN], self->chunk.chunk_counter + (subtree_chunks / 2)); + } + self->chunk.chunk_counter += subtree_chunks; + input_bytes += subtree_len; + input_len -= subtree_len; + } + + if (input_len > 0) + { + chunk_state_update_local(&self->chunk, input_bytes, input_len); + hasher_merge_cv_stack_local(self, self->chunk.chunk_counter); + } +} + +INLINE uint32_t pick_parallel_budget(const size_t bytes) +{ + const uint32_t hw = std::thread::hardware_concurrency(); + if (hw <= 1u || bytes < ParallelMinInputBytes) + return 1u; + + const uint32_t maxBySize = static_cast(std::max(1ull, bytes / ParallelThreadGranularityBytes)); + uint32_t budget = std::min(hw, ParallelMaxThreads); + budget = std::min(budget, maxBySize); + return std::max(1u, budget); +} + +} + +void blake3_hasher::validateOpaqueStateLayout() +{ + // The wrapper keeps a small inline storage margin so the real vendor hasher + // stays out of the public API. The margin gives us a safe footprint reserve + // for ABI or platform differences and only increases the wrapper size slightly. + static_assert(sizeof(::blake3_hasher) <= OpaqueStateSize); + static_assert(alignof(::blake3_hasher) <= OpaqueStateAlign); +} + blake3_hasher::blake3_hasher() { - ::blake3_hasher_init(&m_state); + validateOpaqueStateLayout(); + ::blake3_hasher_init(reinterpret_cast<::blake3_hasher*>(m_state)); } blake3_hasher& blake3_hasher::update(const void* data, const size_t bytes) @@ -20,20 +507,48 @@ blake3_hasher& blake3_hasher::update(const void* data, const size_t bytes) if (!data) return *this; - ::blake3_hasher_update(&m_state, data, bytes); + ::blake3_hasher_update(reinterpret_cast<::blake3_hasher*>(m_state), data, bytes); return *this; } void blake3_hasher::reset() { - ::blake3_hasher_init(&m_state); + ::blake3_hasher_init(reinterpret_cast<::blake3_hasher*>(m_state)); } blake3_hasher::operator blake3_hash_t() const { blake3_hash_t retval = {}; - ::blake3_hasher stateCopy = m_state; - ::blake3_hasher_finalize(&stateCopy, retval.data, BLAKE3_OUT_LEN); + const auto* const state = reinterpret_cast(m_state); + ::blake3_hasher stateCopy = *state; + ::blake3_hasher_finalize(&stateCopy, retval.data, blake3_hash_t::DigestSize); + return retval; +} + +blake3_hash_t blake3_hash_buffer(const void* data, size_t bytes) +{ + if (!data && bytes != 0ull) + return {}; + if (bytes == 0ull) + return static_cast(blake3_hasher{}); + + uint32_t threadBudget = pick_parallel_budget(bytes); + if (threadBudget <= 1u) + return blake3_hash_buffer_sequential(data, bytes); + + SParallelCallGuard guard; + const uint32_t activeCalls = std::max(1u, guard.activeCalls()); + const uint32_t hw = std::max(1u, std::thread::hardware_concurrency()); + const uint32_t hwShare = std::max(1u, hw / activeCalls); + threadBudget = std::min(threadBudget, hwShare); + if (threadBudget <= 1u) + return blake3_hash_buffer_sequential(data, bytes); + + ::blake3_hasher hasherState = {}; + ::blake3_hasher_init(&hasherState); + hasher_update_parallel(&hasherState, reinterpret_cast(data), bytes, threadBudget); + blake3_hash_t retval = {}; + ::blake3_hasher_finalize(&hasherState, retval.data, blake3_hash_t::DigestSize); return retval; } @@ -48,13 +563,8 @@ blake3_hash_t blake3_hash_buffer_sequential(const void* data, size_t bytes) ::blake3_hasher_update(&hasher, data, bytes); blake3_hash_t retval = {}; - ::blake3_hasher_finalize(&hasher, retval.data, BLAKE3_OUT_LEN); + ::blake3_hasher_finalize(&hasher, retval.data, blake3_hash_t::DigestSize); return retval; } -blake3_hash_t blake3_hash_buffer(const void* data, size_t bytes) -{ - return blake3_hash_buffer_sequential(data, bytes); -} - } diff --git a/tools/hcp/main.cpp b/tools/hcp/main.cpp index 24a9c16ba5..afc8373280 100644 --- a/tools/hcp/main.cpp +++ b/tools/hcp/main.cpp @@ -68,6 +68,17 @@ static uint64_t nextRand(uint64_t& state) return state * 2685821657736338717ull; } +static std::vector makeRandomBytes(const size_t byteCount, const uint64_t seed, const uint64_t stream) +{ + std::vector data(byteCount); + uint64_t state = seed ^ (stream * 0x9e3779b97f4a7c15ull); + if (state == 0ull) + state = kDefaultSeed ^ stream; + for (auto& byte : data) + byte = static_cast(nextRand(state) & 0xffull); + return data; +} + static std::optional parseOptions(const core::vector& args) { argparse::ArgumentParser parser("hcp"); @@ -139,12 +150,7 @@ static core::smart_refctd_ptr createGeometry(const Options& auto makeBuffer = [&](size_t bytes, core::bitflag usage, uint64_t stream) -> core::smart_refctd_ptr { - std::vector data(bytes); - uint64_t state = options.seed ^ (stream * 0x9e3779b97f4a7c15ull); - if (state == 0ull) - state = kDefaultSeed ^ stream; - for (auto& b : data) - b = static_cast(nextRand(state) & 0xffull); + auto data = makeRandomBytes(bytes, options.seed, stream); ICPUBuffer::SCreationParams params = {}; params.size = data.size(); @@ -183,7 +189,55 @@ static core::smart_refctd_ptr createGeometry(const Options& return geometry; } -static bool runParityCheck(const Options& options, ILogger* logger) +static bool runStandaloneBufferParityCheck(const Options& options, ILogger* logger) +{ + using clock_t = std::chrono::high_resolution_clock; + auto toMs = [](clock_t::duration d) { return std::chrono::duration(d).count(); }; + auto toMiB = [](size_t bytes) { return static_cast(bytes) / (1024.0 * 1024.0); }; + auto throughput = [&](size_t bytes, double ms) { return ms > 0.0 ? toMiB(bytes) * 1000.0 / ms : 0.0; }; + + auto data = makeRandomBytes(options.bufferBytes, options.seed, 0x11ull); + ICPUBuffer::SCreationParams params = {}; + params.size = data.size(); + params.usage = IBuffer::EUF_TRANSFER_SRC_BIT; + params.data = data.data(); + auto buffer = ICPUBuffer::create(std::move(params)); + if (!buffer) + { + logger->log("Failed to create standalone buffer.", ILogger::ELL_ERROR); + return false; + } + + const auto legacyStart = clock_t::now(); + const auto legacyHash = core::blake3_hash_buffer_sequential(data.data(), data.size()); + const double legacyMs = toMs(clock_t::now() - legacyStart); + + const auto directStart = clock_t::now(); + const auto directHash = core::blake3_hash_buffer(data.data(), data.size()); + const double directMs = toMs(clock_t::now() - directStart); + if (directHash != legacyHash) + { + logger->log("Direct BLAKE3 hash mismatch.", ILogger::ELL_ERROR); + return false; + } + + const auto bufferStart = clock_t::now(); + const auto bufferHash = buffer->computeContentHash(); + const double bufferMs = toMs(clock_t::now() - bufferStart); + if (bufferHash != legacyHash) + { + logger->log("ICPUBuffer::computeContentHash mismatch.", ILogger::ELL_ERROR); + return false; + } + + logger->log("HCP single-buffer bytes=%llu mib=%.3f", ILogger::ELL_INFO, static_cast(data.size()), toMiB(data.size())); + logger->log("HCP single-buffer legacy ms=%.3f mib_s=%.3f", ILogger::ELL_INFO, legacyMs, throughput(data.size(), legacyMs)); + logger->log("HCP single-buffer direct ms=%.3f mib_s=%.3f", ILogger::ELL_INFO, directMs, throughput(data.size(), directMs)); + logger->log("HCP single-buffer api ms=%.3f mib_s=%.3f", ILogger::ELL_INFO, bufferMs, throughput(data.size(), bufferMs)); + return true; +} + +static bool runGeometryParityCheck(const Options& options, ILogger* logger) { using clock_t = std::chrono::high_resolution_clock; auto toMs = [](clock_t::duration d) { return std::chrono::duration(d).count(); }; @@ -260,6 +314,13 @@ static bool runParityCheck(const Options& options, ILogger* logger) return true; } +static bool runParityCheck(const Options& options, ILogger* logger) +{ + if (!runStandaloneBufferParityCheck(options, logger)) + return false; + return runGeometryParityCheck(options, logger); +} + class HashContentParityApp final : public IApplicationFramework { public: From 187836b8c2fdb9f50cce4b562a523c0f7cbbf58a Mon Sep 17 00:00:00 2001 From: Arkadiusz Lachowicz Date: Sat, 7 Mar 2026 20:13:50 +0100 Subject: [PATCH 070/118] Clean up OBJ PLY STL code paths --- .../asset/interchange/COBJMeshFileLoader.cpp | 98 +++++++------------ src/nbl/asset/interchange/COBJMeshWriter.cpp | 51 +++------- .../asset/interchange/CPLYMeshFileLoader.cpp | 60 ++++++------ src/nbl/asset/interchange/CPLYMeshWriter.cpp | 21 ++-- .../asset/interchange/CSTLMeshFileLoader.cpp | 94 ++++++------------ src/nbl/asset/interchange/CSTLMeshWriter.cpp | 16 +-- 6 files changed, 129 insertions(+), 211 deletions(-) diff --git a/src/nbl/asset/interchange/COBJMeshFileLoader.cpp b/src/nbl/asset/interchange/COBJMeshFileLoader.cpp index 851e939b79..f412527a2f 100644 --- a/src/nbl/asset/interchange/COBJMeshFileLoader.cpp +++ b/src/nbl/asset/interchange/COBJMeshFileLoader.cpp @@ -48,9 +48,6 @@ struct ObjVertexDedupNode using Float3 = hlsl::float32_t3; using Float2 = hlsl::float32_t2; -static_assert(sizeof(Float3) == sizeof(float) * 3ull); -static_assert(sizeof(Float2) == sizeof(float) * 2ull); - inline bool isObjInlineWhitespace(const char c) { return c == ' ' || c == '\t' || c == '\v' || c == '\f'; @@ -688,59 +685,59 @@ asset::SAssetBundle COBJMeshFileLoader::loadAsset(system::IFile* _file, const as return ix; }; - auto acquireCornerIndex = [&](const int32_t* idx, const uint32_t smoothingGroup, uint32_t& outIx)->bool + auto findCornerIndex = [&](const int32_t posIx, const int32_t uvIx, const int32_t normalIx, const uint32_t dedupSmoothingGroup, uint32_t& outIx)->bool { - if (!idx) - return false; - - const int32_t posIx = idx[0]; if (posIx < 0 || static_cast(posIx) >= positions.size()) return false; - const uint32_t dedupSmoothingGroup = (idx[2] >= 0) ? 0u : smoothingGroup; if (static_cast(posIx) >= dedupHeadByPos.size()) dedupHeadByPos.resize(positions.size(), -1); - int32_t nodeIx = dedupHeadByPos[posIx]; + int32_t nodeIx = dedupHeadByPos[static_cast(posIx)]; while (nodeIx >= 0) { const auto& node = dedupNodes[static_cast(nodeIx)]; - if (node.uv == idx[1] && node.normal == idx[2] && node.smoothingGroup == dedupSmoothingGroup) + if (node.uv == uvIx && node.normal == normalIx && node.smoothingGroup == dedupSmoothingGroup) { outIx = node.outIndex; return true; } nodeIx = node.next; } + return false; + }; + auto materializeCornerIndex = [&](const int32_t posIx, const int32_t uvIx, const int32_t normalIx, const uint32_t dedupSmoothingGroup, uint32_t& outIx)->bool + { if (!allocateOutVertex(outIx)) return false; const int32_t newNodeIx = allocateDedupNode(); if (newNodeIx < 0) return false; + auto& node = dedupNodes[static_cast(newNodeIx)]; - node.uv = idx[1]; - node.normal = idx[2]; + node.uv = uvIx; + node.normal = normalIx; node.smoothingGroup = dedupSmoothingGroup; node.outIndex = outIx; - node.next = dedupHeadByPos[posIx]; - dedupHeadByPos[posIx] = newNodeIx; + node.next = dedupHeadByPos[static_cast(posIx)]; + dedupHeadByPos[static_cast(posIx)] = newNodeIx; - const auto& srcPos = positions[idx[0]]; + const auto& srcPos = positions[static_cast(posIx)]; outPositions[static_cast(outIx)] = srcPos; hlsl::shapes::util::extendAABBAccumulator(parsedAABB, srcPos); Float2 uv(0.f, 0.f); - if (idx[1] >= 0 && static_cast(idx[1]) < uvs.size()) + if (uvIx >= 0 && static_cast(uvIx) < uvs.size()) { - uv = uvs[idx[1]]; + uv = uvs[static_cast(uvIx)]; hasUVs = true; } outUVs[static_cast(outIx)] = uv; Float3 normal(0.f, 0.f, 0.f); - if (idx[2] >= 0 && static_cast(idx[2]) < normals.size()) + if (normalIx >= 0 && static_cast(normalIx) < normals.size()) { - normal = normals[idx[2]]; + normal = normals[static_cast(normalIx)]; hasProvidedNormals = true; outNormalNeedsGeneration[static_cast(outIx)] = 0u; } @@ -753,6 +750,20 @@ asset::SAssetBundle COBJMeshFileLoader::loadAsset(system::IFile* _file, const as return true; }; + auto acquireCornerIndex = [&](const int32_t* idx, const uint32_t smoothingGroup, uint32_t& outIx)->bool + { + if (!idx) + return false; + + const int32_t posIx = idx[0]; + if (posIx < 0 || static_cast(posIx) >= positions.size()) + return false; + const uint32_t dedupSmoothingGroup = (idx[2] >= 0) ? 0u : smoothingGroup; + if (findCornerIndex(posIx, idx[1], idx[2], dedupSmoothingGroup, outIx)) + return true; + return materializeCornerIndex(posIx, idx[1], idx[2], dedupSmoothingGroup, outIx); + }; + auto acquireCornerIndexPositiveTriplet = [&](const int32_t posIx, const int32_t uvIx, const int32_t normalIx, uint32_t& outIx)->bool { const uint32_t hotHash = @@ -766,48 +777,15 @@ asset::SAssetBundle COBJMeshFileLoader::loadAsset(system::IFile* _file, const as return true; } - int32_t nodeIx = dedupHeadByPos[static_cast(posIx)]; - while (nodeIx >= 0) + if (findCornerIndex(posIx, uvIx, normalIx, 0u, outIx) || materializeCornerIndex(posIx, uvIx, normalIx, 0u, outIx)) { - const auto& node = dedupNodes[static_cast(nodeIx)]; - if (node.uv == uvIx && node.normal == normalIx) - { - outIx = node.outIndex; - hotEntry.pos = posIx; - hotEntry.uv = uvIx; - hotEntry.normal = normalIx; - hotEntry.outIndex = outIx; - return true; - } - nodeIx = node.next; + hotEntry.pos = posIx; + hotEntry.uv = uvIx; + hotEntry.normal = normalIx; + hotEntry.outIndex = outIx; + return true; } - - if (!allocateOutVertex(outIx)) - return false; - const int32_t newNodeIx = allocateDedupNode(); - if (newNodeIx < 0) - return false; - auto& node = dedupNodes[static_cast(newNodeIx)]; - node.uv = uvIx; - node.normal = normalIx; - node.smoothingGroup = 0u; - node.outIndex = outIx; - node.next = dedupHeadByPos[static_cast(posIx)]; - dedupHeadByPos[static_cast(posIx)] = newNodeIx; - - const auto& srcPos = positions[static_cast(posIx)]; - outPositions[static_cast(outIx)] = srcPos; - hlsl::shapes::util::extendAABBAccumulator(parsedAABB, srcPos); - outUVs[static_cast(outIx)] = uvs[static_cast(uvIx)]; - outNormals[static_cast(outIx)] = normals[static_cast(normalIx)]; - hotEntry.pos = posIx; - hotEntry.uv = uvIx; - hotEntry.normal = normalIx; - hotEntry.outIndex = outIx; - hasUVs = true; - hasProvidedNormals = true; - outNormalNeedsGeneration[static_cast(outIx)] = 0u; - return true; + return false; }; uint32_t currentSmoothingGroup = 0u; diff --git a/src/nbl/asset/interchange/COBJMeshWriter.cpp b/src/nbl/asset/interchange/COBJMeshWriter.cpp index ef7eae3eb3..646240b83f 100644 --- a/src/nbl/asset/interchange/COBJMeshWriter.cpp +++ b/src/nbl/asset/interchange/COBJMeshWriter.cpp @@ -5,6 +5,8 @@ #include "nbl/asset/interchange/COBJMeshWriter.h" #include "nbl/asset/interchange/SGeometryWriterCommon.h" #include "nbl/asset/interchange/SInterchangeIO.h" +#include "nbl/builtin/hlsl/array_accessors.hlsl" +#include "nbl/builtin/hlsl/vector_utils/vector_traits.hlsl" #include "SOBJPolygonGeometryAuxLayout.h" #ifdef _NBL_COMPILE_WITH_OBJ_WRITER_ @@ -71,47 +73,26 @@ bool decodeVec4(const ICPUPolygonGeometry::SDataView& view, const size_t ix, hls return view.decodeElement(ix, out); } -void appendVec3Line(std::string& out, const char* prefix, const size_t prefixSize, const hlsl::float32_t3& v) +template +void appendVecLine(std::string& out, const char* prefix, const size_t prefixSize, const Vec& values) { + constexpr size_t N = hlsl::vector_traits::Dimension; const size_t oldSize = out.size(); - out.resize(oldSize + prefixSize + (3ull * MaxFloatTextChars) + 3ull); + out.resize(oldSize + prefixSize + (N * MaxFloatTextChars) + N); char* const lineBegin = out.data() + oldSize; char* cursor = lineBegin; char* const lineEnd = out.data() + out.size(); + hlsl::array_get getter; std::memcpy(cursor, prefix, prefixSize); cursor += prefixSize; - cursor = SGeometryWriterCommon::appendFloatToBuffer(cursor, lineEnd, v.x); - if (cursor < lineEnd) - *(cursor++) = ' '; - cursor = SGeometryWriterCommon::appendFloatToBuffer(cursor, lineEnd, v.y); - if (cursor < lineEnd) - *(cursor++) = ' '; - cursor = SGeometryWriterCommon::appendFloatToBuffer(cursor, lineEnd, v.z); - if (cursor < lineEnd) - *(cursor++) = '\n'; - - out.resize(oldSize + static_cast(cursor - lineBegin)); -} - -void appendVec2Line(std::string& out, const char* prefix, const size_t prefixSize, const hlsl::float32_t2& v) -{ - const size_t oldSize = out.size(); - out.resize(oldSize + prefixSize + (2ull * MaxFloatTextChars) + 2ull); - char* const lineBegin = out.data() + oldSize; - char* cursor = lineBegin; - char* const lineEnd = out.data() + out.size(); - - std::memcpy(cursor, prefix, prefixSize); - cursor += prefixSize; - - cursor = SGeometryWriterCommon::appendFloatToBuffer(cursor, lineEnd, v.x); - if (cursor < lineEnd) - *(cursor++) = ' '; - cursor = SGeometryWriterCommon::appendFloatToBuffer(cursor, lineEnd, v.y); - if (cursor < lineEnd) - *(cursor++) = '\n'; + for (size_t i = 0ull; i < N; ++i) + { + cursor = SGeometryWriterCommon::appendFloatToBuffer(cursor, lineEnd, getter(values, static_cast(i))); + if (cursor < lineEnd) + *(cursor++) = (i + 1ull < N) ? ' ' : '\n'; + } out.resize(oldSize + static_cast(cursor - lineBegin)); } @@ -321,7 +302,7 @@ bool COBJMeshWriter::writeAsset(system::IFile* _file, const SAssetWriteParams& _ if (flipHandedness) vertex.x = -vertex.x; - appendVec3Line(output, "v ", sizeof("v ") - 1ull, vertex); + appendVecLine(output, "v ", sizeof("v ") - 1ull, vertex); } if (hasUVs) @@ -340,7 +321,7 @@ bool COBJMeshWriter::writeAsset(system::IFile* _file, const SAssetWriteParams& _ uv = hlsl::float32_t2(static_cast(tmp.x), 1.f - static_cast(tmp.y)); } - appendVec2Line(output, "vt ", sizeof("vt ") - 1ull, uv); + appendVecLine(output, "vt ", sizeof("vt ") - 1ull, uv); } } @@ -363,7 +344,7 @@ bool COBJMeshWriter::writeAsset(system::IFile* _file, const SAssetWriteParams& _ if (flipHandedness) normal.x = -normal.x; - appendVec3Line(output, "vn ", sizeof("vn ") - 1ull, normal); + appendVecLine(output, "vn ", sizeof("vn ") - 1ull, normal); } } diff --git a/src/nbl/asset/interchange/CPLYMeshFileLoader.cpp b/src/nbl/asset/interchange/CPLYMeshFileLoader.cpp index 468725fdd1..e8750dc5f4 100644 --- a/src/nbl/asset/interchange/CPLYMeshFileLoader.cpp +++ b/src/nbl/asset/interchange/CPLYMeshFileLoader.cpp @@ -668,6 +668,25 @@ struct SContext std::memcpy(&value, &bits, sizeof(value)); return value; }; + auto decodeFloat3 = [&](const uint8_t* src)->hlsl::float32_t3 + { + return hlsl::float32_t3( + decodeF32(src + 0ull * floatBytes), + decodeF32(src + 1ull * floatBytes), + decodeF32(src + 2ull * floatBytes) + ); + }; + auto storeFloat3 = [](uint8_t* dst, const hlsl::float32_t3& value) -> void + { + reinterpret_cast(dst)[0] = value.x; + reinterpret_cast(dst)[1] = value.y; + reinterpret_cast(dst)[2] = value.z; + }; + auto storeFloat2 = [](uint8_t* dst, const hlsl::float32_t2& value) -> void + { + reinterpret_cast(dst)[0] = value.x; + reinterpret_cast(dst)[1] = value.y; + }; size_t remainingVertices = el.Count; while (remainingVertices > 0ull) @@ -695,14 +714,10 @@ struct SContext { for (size_t v = 0ull; v < batchVertices; ++v) { - const float x = decodeF32(src + 0ull * floatBytes); - const float y = decodeF32(src + 1ull * floatBytes); - const float z = decodeF32(src + 2ull * floatBytes); - reinterpret_cast(posBase)[0] = x; - reinterpret_cast(posBase)[1] = y; - reinterpret_cast(posBase)[2] = z; + const hlsl::float32_t3 position = decodeFloat3(src); + storeFloat3(posBase, position); if (trackAABB) - hlsl::shapes::util::extendAABBAccumulator(*parsedAABB, x, y, z); + hlsl::shapes::util::extendAABBAccumulator(*parsedAABB, position); src += 3ull * floatBytes; posBase += posStride; } @@ -713,19 +728,13 @@ struct SContext { for (size_t v = 0ull; v < batchVertices; ++v) { - const float x = decodeF32(src + 0ull * floatBytes); - const float y = decodeF32(src + 1ull * floatBytes); - const float z = decodeF32(src + 2ull * floatBytes); - reinterpret_cast(posBase)[0] = x; - reinterpret_cast(posBase)[1] = y; - reinterpret_cast(posBase)[2] = z; + const hlsl::float32_t3 position = decodeFloat3(src); + storeFloat3(posBase, position); if (trackAABB) - hlsl::shapes::util::extendAABBAccumulator(*parsedAABB, x, y, z); + hlsl::shapes::util::extendAABBAccumulator(*parsedAABB, position); src += 3ull * floatBytes; posBase += posStride; - reinterpret_cast(normalBase)[0] = decodeF32(src + 0ull * floatBytes); - reinterpret_cast(normalBase)[1] = decodeF32(src + 1ull * floatBytes); - reinterpret_cast(normalBase)[2] = decodeF32(src + 2ull * floatBytes); + storeFloat3(normalBase, decodeFloat3(src)); src += 3ull * floatBytes; normalBase += normalStride; } @@ -735,23 +744,16 @@ struct SContext { for (size_t v = 0ull; v < batchVertices; ++v) { - const float x = decodeF32(src + 0ull * floatBytes); - const float y = decodeF32(src + 1ull * floatBytes); - const float z = decodeF32(src + 2ull * floatBytes); - reinterpret_cast(posBase)[0] = x; - reinterpret_cast(posBase)[1] = y; - reinterpret_cast(posBase)[2] = z; + const hlsl::float32_t3 position = decodeFloat3(src); + storeFloat3(posBase, position); if (trackAABB) - hlsl::shapes::util::extendAABBAccumulator(*parsedAABB, x, y, z); + hlsl::shapes::util::extendAABBAccumulator(*parsedAABB, position); src += 3ull * floatBytes; posBase += posStride; - reinterpret_cast(normalBase)[0] = decodeF32(src + 0ull * floatBytes); - reinterpret_cast(normalBase)[1] = decodeF32(src + 1ull * floatBytes); - reinterpret_cast(normalBase)[2] = decodeF32(src + 2ull * floatBytes); + storeFloat3(normalBase, decodeFloat3(src)); src += 3ull * floatBytes; normalBase += normalStride; - reinterpret_cast(uvBase)[0] = decodeF32(src + 0ull * floatBytes); - reinterpret_cast(uvBase)[1] = decodeF32(src + 1ull * floatBytes); + storeFloat2(uvBase, hlsl::float32_t2(decodeF32(src + 0ull * floatBytes), decodeF32(src + 1ull * floatBytes))); src += 2ull * floatBytes; uvBase += uvStride; } diff --git a/src/nbl/asset/interchange/CPLYMeshWriter.cpp b/src/nbl/asset/interchange/CPLYMeshWriter.cpp index 5e569f9466..cd87939d2f 100644 --- a/src/nbl/asset/interchange/CPLYMeshWriter.cpp +++ b/src/nbl/asset/interchange/CPLYMeshWriter.cpp @@ -201,15 +201,8 @@ bool decodeUnsigned4Raw(const ICPUPolygonGeometry::SDataView& view, const size_t return decodePixels(view.composed.format, srcArr, out, 0u, 0u); } -void appendUInt(std::string& out, const uint64_t value) -{ - std::array buf = {}; - const auto res = std::to_chars(buf.data(), buf.data() + buf.size(), value); - if (res.ec == std::errc()) - out.append(buf.data(), static_cast(res.ptr - buf.data())); -} - -void appendInt(std::string& out, const int64_t value) +template +void appendIntegral(std::string& out, const T value) { std::array buf = {}; const auto res = std::to_chars(buf.data(), buf.data() + buf.size(), value); @@ -385,7 +378,7 @@ inline bool writeTypedViewText(std::string& output, const ICPUPolygonGeometry::S int64_t value = tmp[c]; if (flipVectors && c == 0u) value = -value; - appendInt(output, value); + appendIntegral(output, value); output.push_back(' '); } return true; @@ -399,7 +392,7 @@ inline bool writeTypedViewText(std::string& output, const ICPUPolygonGeometry::S return false; for (uint32_t c = 0u; c < componentCount; ++c) { - appendUInt(output, tmp[c]); + appendIntegral(output, tmp[c]); output.push_back(' '); } return true; @@ -826,11 +819,11 @@ bool ply_writer_detail::writeText( return SGeometryWriterCommon::visitTriangleIndices(input.geom, [&](const uint32_t i0, const uint32_t i1, const uint32_t i2) { output.append("3 "); - appendUInt(output, i0); + appendIntegral(output, i0); output.push_back(' '); - appendUInt(output, i1); + appendIntegral(output, i1); output.push_back(' '); - appendUInt(output, i2); + appendIntegral(output, i2); output.push_back('\n'); }); } diff --git a/src/nbl/asset/interchange/CSTLMeshFileLoader.cpp b/src/nbl/asset/interchange/CSTLMeshFileLoader.cpp index 3021dc91e1..8dabfa723f 100644 --- a/src/nbl/asset/interchange/CSTLMeshFileLoader.cpp +++ b/src/nbl/asset/interchange/CSTLMeshFileLoader.cpp @@ -525,76 +525,46 @@ SAssetBundle CSTLMeshFileLoader::loadAsset(system::IFile* _file, const IAssetLoa std::jthread normalHashThread; if (hashInParsePipeline) { - positionHashThread = std::jthread([&]() + auto launchHashThread = [&](const float* srcFloat, core::blake3_hash_t& outHash) -> std::jthread { - try + return std::jthread([&, srcFloat, outHashPtr = &outHash]() { - core::blake3_hasher positionHasher; - size_t chunkIx = 0ull; - while (chunkIx < parseChunkCount) + try { - auto ready = std::atomic_ref(hashChunkReady[chunkIx]); - while (ready.load(std::memory_order_acquire) == 0u) - ready.wait(0u, std::memory_order_acquire); - - size_t runEnd = chunkIx + 1ull; - while (runEnd < parseChunkCount) + core::blake3_hasher hasher; + size_t chunkIx = 0ull; + while (chunkIx < parseChunkCount) { - const auto runReady = std::atomic_ref(hashChunkReady[runEnd]).load(std::memory_order_acquire); - if (runReady == 0u) - break; - ++runEnd; + auto ready = std::atomic_ref(hashChunkReady[chunkIx]); + while (ready.load(std::memory_order_acquire) == 0u) + ready.wait(0u, std::memory_order_acquire); + + size_t runEnd = chunkIx + 1ull; + while (runEnd < parseChunkCount) + { + const auto runReady = std::atomic_ref(hashChunkReady[runEnd]).load(std::memory_order_acquire); + if (runReady == 0u) + break; + ++runEnd; + } + + const uint64_t begin = static_cast(chunkIx) * parseChunkTriangles; + const uint64_t endTri = std::min(static_cast(runEnd) * parseChunkTriangles, triangleCount); + const size_t runTriangles = static_cast(endTri - begin); + const size_t runBytes = runTriangles * SSTLContext::VerticesPerTriangle * SSTLContext::FloatChannelsPerVertex * sizeof(float); + hasher.update(srcFloat + begin * SSTLContext::VerticesPerTriangle * SSTLContext::FloatChannelsPerVertex, runBytes); + chunkIx = runEnd; } - - const uint64_t begin = static_cast(chunkIx) * parseChunkTriangles; - const uint64_t endTri = std::min(static_cast(runEnd) * parseChunkTriangles, triangleCount); - const size_t runTriangles = static_cast(endTri - begin); - const size_t runBytes = runTriangles * SSTLContext::VerticesPerTriangle * SSTLContext::FloatChannelsPerVertex * sizeof(float); - positionHasher.update(posOutFloat + begin * SSTLContext::VerticesPerTriangle * SSTLContext::FloatChannelsPerVertex, runBytes); - chunkIx = runEnd; + *outHashPtr = static_cast(hasher); } - parsedPositionHash = static_cast(positionHasher); - } - catch (...) - { - hashPipelineOk.store(false, std::memory_order_relaxed); - } - }); - normalHashThread = std::jthread([&]() - { - try - { - core::blake3_hasher normalHasher; - size_t chunkIx = 0ull; - while (chunkIx < parseChunkCount) + catch (...) { - auto ready = std::atomic_ref(hashChunkReady[chunkIx]); - while (ready.load(std::memory_order_acquire) == 0u) - ready.wait(0u, std::memory_order_acquire); - - size_t runEnd = chunkIx + 1ull; - while (runEnd < parseChunkCount) - { - const auto runReady = std::atomic_ref(hashChunkReady[runEnd]).load(std::memory_order_acquire); - if (runReady == 0u) - break; - ++runEnd; - } - - const uint64_t begin = static_cast(chunkIx) * parseChunkTriangles; - const uint64_t endTri = std::min(static_cast(runEnd) * parseChunkTriangles, triangleCount); - const size_t runTriangles = static_cast(endTri - begin); - const size_t runBytes = runTriangles * SSTLContext::VerticesPerTriangle * SSTLContext::FloatChannelsPerVertex * sizeof(float); - normalHasher.update(normalOutFloat + begin * SSTLContext::VerticesPerTriangle * SSTLContext::FloatChannelsPerVertex, runBytes); - chunkIx = runEnd; + hashPipelineOk.store(false, std::memory_order_relaxed); } - parsedNormalHash = static_cast(normalHasher); - } - catch (...) - { - hashPipelineOk.store(false, std::memory_order_relaxed); - } - }); + }); + }; + positionHashThread = launchHashThread(posOutFloat, parsedPositionHash); + normalHashThread = launchHashThread(normalOutFloat, parsedNormalHash); } std::atomic_size_t nextChunkIx = 0ull; auto parseWorker = [&](const size_t workerIx) -> void diff --git a/src/nbl/asset/interchange/CSTLMeshWriter.cpp b/src/nbl/asset/interchange/CSTLMeshWriter.cpp index b221c75ea2..e807e69d82 100644 --- a/src/nbl/asset/interchange/CSTLMeshWriter.cpp +++ b/src/nbl/asset/interchange/CSTLMeshWriter.cpp @@ -835,23 +835,17 @@ bool writeFaceText( std::array faceText = {}; char* cursor = faceText.data(); char* const end = faceText.data() + faceText.size(); + auto appendVertex = [&](const hlsl::float32_t3& vertex) -> bool + { + return appendLiteral(cursor, end, " vertex ", sizeof(" vertex ") - 1ull) && appendVectorAsAsciiLine(cursor, end, vertex); + }; if (!appendLiteral(cursor, end, "facet normal ", sizeof("facet normal ") - 1ull)) return false; if (!appendVectorAsAsciiLine(cursor, end, normal)) return false; if (!appendLiteral(cursor, end, " outer loop\n", sizeof(" outer loop\n") - 1ull)) return false; - if (!appendLiteral(cursor, end, " vertex ", sizeof(" vertex ") - 1ull)) - return false; - if (!appendVectorAsAsciiLine(cursor, end, vertex1)) - return false; - if (!appendLiteral(cursor, end, " vertex ", sizeof(" vertex ") - 1ull)) - return false; - if (!appendVectorAsAsciiLine(cursor, end, vertex2)) - return false; - if (!appendLiteral(cursor, end, " vertex ", sizeof(" vertex ") - 1ull)) - return false; - if (!appendVectorAsAsciiLine(cursor, end, vertex3)) + if (!appendVertex(vertex1) || !appendVertex(vertex2) || !appendVertex(vertex3)) return false; if (!appendLiteral(cursor, end, " endloop\n", sizeof(" endloop\n") - 1ull)) return false; From 14b3c98ffe76b98e537e7a942e8aca54c6754cc5 Mon Sep 17 00:00:00 2001 From: Arkadiusz Lachowicz Date: Sat, 7 Mar 2026 20:18:24 +0100 Subject: [PATCH 071/118] Generalize loader cleanup helpers --- .../asset/interchange/COBJMeshFileLoader.cpp | 25 +++--- .../asset/interchange/CPLYMeshFileLoader.cpp | 88 +++++++++---------- 2 files changed, 55 insertions(+), 58 deletions(-) diff --git a/src/nbl/asset/interchange/COBJMeshFileLoader.cpp b/src/nbl/asset/interchange/COBJMeshFileLoader.cpp index f412527a2f..c5d9bb6ae0 100644 --- a/src/nbl/asset/interchange/COBJMeshFileLoader.cpp +++ b/src/nbl/asset/interchange/COBJMeshFileLoader.cpp @@ -45,9 +45,6 @@ struct ObjVertexDedupNode int32_t next = -1; }; -using Float3 = hlsl::float32_t3; -using Float2 = hlsl::float32_t2; - inline bool isObjInlineWhitespace(const char c) { return c == ' ' || c == '\t' || c == '\v' || c == '\f'; @@ -448,18 +445,18 @@ asset::SAssetBundle COBJMeshFileLoader::loadAsset(system::IFile* _file, const as const char* const bufEnd = buf + static_cast(filesize); const char* bufPtr = buf; - core::vector positions; - core::vector normals; - core::vector uvs; + core::vector positions; + core::vector normals; + core::vector uvs; const size_t estimatedAttributeCount = std::max(16ull, static_cast(filesize) / 32ull); positions.reserve(estimatedAttributeCount); normals.reserve(estimatedAttributeCount); uvs.reserve(estimatedAttributeCount); - core::vector outPositions; - core::vector outNormals; + core::vector outPositions; + core::vector outNormals; core::vector outNormalNeedsGeneration; - core::vector outUVs; + core::vector outUVs; core::vector indices; core::vector dedupHeadByPos; core::vector dedupNodes; @@ -726,7 +723,7 @@ asset::SAssetBundle COBJMeshFileLoader::loadAsset(system::IFile* _file, const as outPositions[static_cast(outIx)] = srcPos; hlsl::shapes::util::extendAABBAccumulator(parsedAABB, srcPos); - Float2 uv(0.f, 0.f); + hlsl::float32_t2 uv(0.f, 0.f); if (uvIx >= 0 && static_cast(uvIx) < uvs.size()) { uv = uvs[static_cast(uvIx)]; @@ -734,7 +731,7 @@ asset::SAssetBundle COBJMeshFileLoader::loadAsset(system::IFile* _file, const as } outUVs[static_cast(outIx)] = uv; - Float3 normal(0.f, 0.f, 0.f); + hlsl::float32_t3 normal(0.f, 0.f, 0.f); if (normalIx >= 0 && static_cast(normalIx) < normals.size()) { normal = normals[static_cast(normalIx)]; @@ -811,7 +808,7 @@ asset::SAssetBundle COBJMeshFileLoader::loadAsset(system::IFile* _file, const as const char subType = ((lineStart + 1) < lineEnd) ? static_cast(std::tolower(static_cast(lineStart[1]))) : '\0'; if ((lineStart + 1) < lineEnd && subType == ' ') { - Float3 vec{}; + hlsl::float32_t3 vec{}; const char* ptr = lineStart + 2; for (uint32_t i = 0u; i < 3u; ++i) { @@ -827,7 +824,7 @@ asset::SAssetBundle COBJMeshFileLoader::loadAsset(system::IFile* _file, const as } else if ((lineStart + 2) < lineEnd && subType == 'n' && isObjInlineWhitespace(lineStart[2])) { - Float3 vec{}; + hlsl::float32_t3 vec{}; const char* ptr = lineStart + 3; for (uint32_t i = 0u; i < 3u; ++i) { @@ -842,7 +839,7 @@ asset::SAssetBundle COBJMeshFileLoader::loadAsset(system::IFile* _file, const as } else if ((lineStart + 2) < lineEnd && subType == 't' && isObjInlineWhitespace(lineStart[2])) { - Float2 vec{}; + hlsl::float32_t2 vec{}; const char* ptr = lineStart + 3; for (uint32_t i = 0u; i < 2u; ++i) { diff --git a/src/nbl/asset/interchange/CPLYMeshFileLoader.cpp b/src/nbl/asset/interchange/CPLYMeshFileLoader.cpp index e8750dc5f4..a65445ecd8 100644 --- a/src/nbl/asset/interchange/CPLYMeshFileLoader.cpp +++ b/src/nbl/asset/interchange/CPLYMeshFileLoader.cpp @@ -11,7 +11,9 @@ #include "nbl/asset/interchange/SLoaderRuntimeTuning.h" #include "nbl/asset/IAssetManager.h" #include "nbl/asset/metadata/CPLYMetadata.h" +#include "nbl/builtin/hlsl/array_accessors.hlsl" #include "nbl/builtin/hlsl/shapes/AABBAccumulator.hlsl" +#include "nbl/builtin/hlsl/vector_utils/vector_traits.hlsl" #include "nbl/core/hash/blake.h" #include "nbl/system/ISystem.h" #include "nbl/system/IFile.h" @@ -668,25 +670,23 @@ struct SContext std::memcpy(&value, &bits, sizeof(value)); return value; }; - auto decodeFloat3 = [&](const uint8_t* src)->hlsl::float32_t3 - { - return hlsl::float32_t3( - decodeF32(src + 0ull * floatBytes), - decodeF32(src + 1ull * floatBytes), - decodeF32(src + 2ull * floatBytes) - ); - }; - auto storeFloat3 = [](uint8_t* dst, const hlsl::float32_t3& value) -> void - { - reinterpret_cast(dst)[0] = value.x; - reinterpret_cast(dst)[1] = value.y; - reinterpret_cast(dst)[2] = value.z; - }; - auto storeFloat2 = [](uint8_t* dst, const hlsl::float32_t2& value) -> void - { - reinterpret_cast(dst)[0] = value.x; - reinterpret_cast(dst)[1] = value.y; - }; + auto decodeVector = [&](const uint8_t* src)->Vec + { + constexpr uint32_t N = hlsl::vector_traits::Dimension; + Vec value{}; + hlsl::array_set setter; + for (uint32_t i = 0u; i < N; ++i) + setter(value, i, decodeF32(src + static_cast(i) * floatBytes)); + return value; + }; + auto storeVector = [](uint8_t* dst, const Vec& value) -> void + { + constexpr uint32_t N = hlsl::vector_traits::Dimension; + hlsl::array_get getter; + auto* const out = reinterpret_cast(dst); + for (uint32_t i = 0u; i < N; ++i) + out[i] = getter(value, i); + }; size_t remainingVertices = el.Count; while (remainingVertices > 0ull) @@ -714,10 +714,10 @@ struct SContext { for (size_t v = 0ull; v < batchVertices; ++v) { - const hlsl::float32_t3 position = decodeFloat3(src); - storeFloat3(posBase, position); - if (trackAABB) - hlsl::shapes::util::extendAABBAccumulator(*parsedAABB, position); + const hlsl::float32_t3 position = decodeVector.operator()(src); + storeVector.operator()(posBase, position); + if (trackAABB) + hlsl::shapes::util::extendAABBAccumulator(*parsedAABB, position); src += 3ull * floatBytes; posBase += posStride; } @@ -728,15 +728,15 @@ struct SContext { for (size_t v = 0ull; v < batchVertices; ++v) { - const hlsl::float32_t3 position = decodeFloat3(src); - storeFloat3(posBase, position); - if (trackAABB) - hlsl::shapes::util::extendAABBAccumulator(*parsedAABB, position); - src += 3ull * floatBytes; - posBase += posStride; - storeFloat3(normalBase, decodeFloat3(src)); - src += 3ull * floatBytes; - normalBase += normalStride; + const hlsl::float32_t3 position = decodeVector.operator()(src); + storeVector.operator()(posBase, position); + if (trackAABB) + hlsl::shapes::util::extendAABBAccumulator(*parsedAABB, position); + src += 3ull * floatBytes; + posBase += posStride; + storeVector.operator()(normalBase, decodeVector.operator()(src)); + src += 3ull * floatBytes; + normalBase += normalStride; } } break; @@ -744,18 +744,18 @@ struct SContext { for (size_t v = 0ull; v < batchVertices; ++v) { - const hlsl::float32_t3 position = decodeFloat3(src); - storeFloat3(posBase, position); - if (trackAABB) - hlsl::shapes::util::extendAABBAccumulator(*parsedAABB, position); - src += 3ull * floatBytes; - posBase += posStride; - storeFloat3(normalBase, decodeFloat3(src)); - src += 3ull * floatBytes; - normalBase += normalStride; - storeFloat2(uvBase, hlsl::float32_t2(decodeF32(src + 0ull * floatBytes), decodeF32(src + 1ull * floatBytes))); - src += 2ull * floatBytes; - uvBase += uvStride; + const hlsl::float32_t3 position = decodeVector.operator()(src); + storeVector.operator()(posBase, position); + if (trackAABB) + hlsl::shapes::util::extendAABBAccumulator(*parsedAABB, position); + src += 3ull * floatBytes; + posBase += posStride; + storeVector.operator()(normalBase, decodeVector.operator()(src)); + src += 3ull * floatBytes; + normalBase += normalStride; + storeVector.operator()(uvBase, decodeVector.operator()(src)); + src += 2ull * floatBytes; + uvBase += uvStride; } } break; From ef7c706d30df938443856e80884c67ec4e8f4d0e Mon Sep 17 00:00:00 2001 From: Arkadiusz Lachowicz Date: Sun, 8 Mar 2026 09:25:51 +0100 Subject: [PATCH 072/118] Reduce loader and writer duplication --- .../asset/interchange/COBJMeshFileLoader.cpp | 45 ++-- src/nbl/asset/interchange/COBJMeshWriter.cpp | 28 +-- .../asset/interchange/CPLYMeshFileLoader.cpp | 215 ++++++------------ src/nbl/asset/interchange/CPLYMeshWriter.cpp | 98 ++------ src/nbl/asset/interchange/CSTLMeshWriter.cpp | 132 ++++------- 5 files changed, 168 insertions(+), 350 deletions(-) diff --git a/src/nbl/asset/interchange/COBJMeshFileLoader.cpp b/src/nbl/asset/interchange/COBJMeshFileLoader.cpp index c5d9bb6ae0..2baed21f97 100644 --- a/src/nbl/asset/interchange/COBJMeshFileLoader.cpp +++ b/src/nbl/asset/interchange/COBJMeshFileLoader.cpp @@ -805,51 +805,38 @@ asset::SAssetBundle COBJMeshFileLoader::loadAsset(system::IFile* _file, const as const char lineType = static_cast(std::tolower(static_cast(*lineStart))); if (lineType == 'v') { - const char subType = ((lineStart + 1) < lineEnd) ? static_cast(std::tolower(static_cast(lineStart[1]))) : '\0'; - if ((lineStart + 1) < lineEnd && subType == ' ') + auto parseVector = [&](const char* ptr, float* values, const uint32_t count)->bool { - hlsl::float32_t3 vec{}; - const char* ptr = lineStart + 2; - for (uint32_t i = 0u; i < 3u; ++i) + for (uint32_t i = 0u; i < count; ++i) { while (ptr < lineEnd && isObjInlineWhitespace(*ptr)) ++ptr; - if (ptr >= lineEnd) - return {}; - if (!parseObjFloat(ptr, lineEnd, (&vec.x)[i])) - return {}; + if (ptr >= lineEnd || !parseObjFloat(ptr, lineEnd, values[i])) + return false; } + return true; + }; + const char subType = ((lineStart + 1) < lineEnd) ? static_cast(std::tolower(static_cast(lineStart[1]))) : '\0'; + if ((lineStart + 1) < lineEnd && subType == ' ') + { + hlsl::float32_t3 vec{}; + if (!parseVector(lineStart + 2, &vec.x, 3u)) + return {}; positions.push_back(vec); dedupHeadByPos.push_back(-1); } else if ((lineStart + 2) < lineEnd && subType == 'n' && isObjInlineWhitespace(lineStart[2])) { hlsl::float32_t3 vec{}; - const char* ptr = lineStart + 3; - for (uint32_t i = 0u; i < 3u; ++i) - { - while (ptr < lineEnd && isObjInlineWhitespace(*ptr)) - ++ptr; - if (ptr >= lineEnd) - return {}; - if (!parseObjFloat(ptr, lineEnd, (&vec.x)[i])) - return {}; - } + if (!parseVector(lineStart + 3, &vec.x, 3u)) + return {}; normals.push_back(vec); } else if ((lineStart + 2) < lineEnd && subType == 't' && isObjInlineWhitespace(lineStart[2])) { hlsl::float32_t2 vec{}; - const char* ptr = lineStart + 3; - for (uint32_t i = 0u; i < 2u; ++i) - { - while (ptr < lineEnd && isObjInlineWhitespace(*ptr)) - ++ptr; - if (ptr >= lineEnd) - return {}; - if (!parseObjFloat(ptr, lineEnd, (&vec.x)[i])) - return {}; - } + if (!parseVector(lineStart + 3, &vec.x, 2u)) + return {}; vec.y = 1.f - vec.y; uvs.push_back(vec); } diff --git a/src/nbl/asset/interchange/COBJMeshWriter.cpp b/src/nbl/asset/interchange/COBJMeshWriter.cpp index 646240b83f..8e4f1a9f79 100644 --- a/src/nbl/asset/interchange/COBJMeshWriter.cpp +++ b/src/nbl/asset/interchange/COBJMeshWriter.cpp @@ -130,28 +130,18 @@ void appendIndexTokenToStorage(std::string& storage, core::vector(cursor - token)); } diff --git a/src/nbl/asset/interchange/CPLYMeshFileLoader.cpp b/src/nbl/asset/interchange/CPLYMeshFileLoader.cpp index a65445ecd8..cdb40e28ab 100644 --- a/src/nbl/asset/interchange/CPLYMeshFileLoader.cpp +++ b/src/nbl/asset/interchange/CPLYMeshFileLoader.cpp @@ -540,12 +540,12 @@ struct SContext { if (!IsBinaryFile || el.Name != "vertex") return EFastVertexReadResult::NotApplicable; - - enum class ELayoutKind : uint8_t + struct SLayoutDesc { - XYZ, - XYZ_N, - XYZ_N_UV + uint32_t propertyCount; + uint32_t srcBytesPerVertex; + bool hasNormals; + bool hasUVs; }; auto allF32 = [&el]()->bool @@ -573,24 +573,18 @@ struct SContext } return true; }; - - ELayoutKind layout = ELayoutKind::XYZ; + static constexpr SLayoutDesc xyz = { 3u, sizeof(hlsl::float32_t) * 3u, false, false }; + static constexpr SLayoutDesc xyz_n = { 6u, sizeof(hlsl::float32_t) * 6u, true, false }; + static constexpr SLayoutDesc xyz_n_uv = { 8u, sizeof(hlsl::float32_t) * 8u, true, true }; + const SLayoutDesc* layout = nullptr; if (matchNames({ "x", "y", "z" })) - { - layout = ELayoutKind::XYZ; - } + layout = &xyz; else if (matchNames({ "x", "y", "z", "nx", "ny", "nz" })) - { - layout = ELayoutKind::XYZ_N; - } + layout = &xyz_n; else if (matchNames({ "x", "y", "z", "nx", "ny", "nz", "u", "v" }) || matchNames({ "x", "y", "z", "nx", "ny", "nz", "s", "t" })) - { - layout = ELayoutKind::XYZ_N_UV; - } - else - { + layout = &xyz_n_uv; + if (!layout) return EFastVertexReadResult::NotApplicable; - } const size_t floatBytes = sizeof(hlsl::float32_t); auto validateTuple = [&](const size_t beginIx, const size_t componentCount, uint32_t& outStride, uint8_t*& outBase)->bool @@ -621,41 +615,9 @@ struct SContext uint8_t* posBase = nullptr; uint8_t* normalBase = nullptr; uint8_t* uvBase = nullptr; - switch (layout) - { - case ELayoutKind::XYZ: - if (vertAttrIts.size() != 3u || !validateTuple(0u, 3u, posStride, posBase)) - return EFastVertexReadResult::NotApplicable; - break; - case ELayoutKind::XYZ_N: - if (vertAttrIts.size() != 6u) - return EFastVertexReadResult::NotApplicable; - if (!validateTuple(0u, 3u, posStride, posBase) || !validateTuple(3u, 3u, normalStride, normalBase)) - return EFastVertexReadResult::NotApplicable; - break; - case ELayoutKind::XYZ_N_UV: - if (vertAttrIts.size() != 8u) - return EFastVertexReadResult::NotApplicable; - if (!validateTuple(0u, 3u, posStride, posBase) || !validateTuple(3u, 3u, normalStride, normalBase) || !validateTuple(6u, 2u, uvStride, uvBase)) - return EFastVertexReadResult::NotApplicable; - break; - } - - const size_t srcBytesPerVertex = [layout]()->size_t - { - switch (layout) - { - case ELayoutKind::XYZ: - return sizeof(hlsl::float32_t) * 3ull; - case ELayoutKind::XYZ_N: - return sizeof(hlsl::float32_t) * 6ull; - case ELayoutKind::XYZ_N_UV: - return sizeof(hlsl::float32_t) * 8ull; - default: - return 0ull; - } - }(); - if (srcBytesPerVertex == 0ull || el.Count > (std::numeric_limits::max() / srcBytesPerVertex)) + if (vertAttrIts.size() != layout->propertyCount || !validateTuple(0u, 3u, posStride, posBase) || (layout->hasNormals && !validateTuple(3u, 3u, normalStride, normalBase)) || (layout->hasUVs && !validateTuple(6u, 2u, uvStride, uvBase))) + return EFastVertexReadResult::NotApplicable; + if (el.Count > (std::numeric_limits::max() / layout->srcBytesPerVertex)) return EFastVertexReadResult::Error; const bool trackAABB = parsedAABB != nullptr; @@ -670,119 +632,82 @@ struct SContext std::memcpy(&value, &bits, sizeof(value)); return value; }; - auto decodeVector = [&](const uint8_t* src)->Vec - { - constexpr uint32_t N = hlsl::vector_traits::Dimension; - Vec value{}; - hlsl::array_set setter; - for (uint32_t i = 0u; i < N; ++i) - setter(value, i, decodeF32(src + static_cast(i) * floatBytes)); - return value; - }; - auto storeVector = [](uint8_t* dst, const Vec& value) -> void - { - constexpr uint32_t N = hlsl::vector_traits::Dimension; - hlsl::array_get getter; - auto* const out = reinterpret_cast(dst); - for (uint32_t i = 0u; i < N; ++i) - out[i] = getter(value, i); - }; + auto decodeVector = [&](const uint8_t* src)->Vec + { + constexpr uint32_t N = hlsl::vector_traits::Dimension; + Vec value{}; + hlsl::array_set setter; + for (uint32_t i = 0u; i < N; ++i) + setter(value, i, decodeF32(src + static_cast(i) * floatBytes)); + return value; + }; + auto storeVector = [](uint8_t* dst, const Vec& value) -> void + { + constexpr uint32_t N = hlsl::vector_traits::Dimension; + hlsl::array_get getter; + auto* const out = reinterpret_cast(dst); + for (uint32_t i = 0u; i < N; ++i) + out[i] = getter(value, i); + }; + auto advanceTuple = [&](const uint32_t beginIx, const uint32_t componentCount, const size_t advance) -> void + { + for (uint32_t i = 0u; i < componentCount; ++i) + vertAttrIts[beginIx + i].ptr += advance; + }; size_t remainingVertices = el.Count; while (remainingVertices > 0ull) { - if (StartPointer + srcBytesPerVertex > EndPointer) + if (StartPointer + layout->srcBytesPerVertex > EndPointer) fillBuffer(); const size_t available = EndPointer > StartPointer ? static_cast(EndPointer - StartPointer) : 0ull; - if (available < srcBytesPerVertex) + if (available < layout->srcBytesPerVertex) return EFastVertexReadResult::Error; - const size_t batchVertices = std::min(remainingVertices, available / srcBytesPerVertex); + const size_t batchVertices = std::min(remainingVertices, available / layout->srcBytesPerVertex); const uint8_t* src = reinterpret_cast(StartPointer); - switch (layout) + if (!layout->hasNormals && !layout->hasUVs && posStride == 3ull * floatBytes && !needsByteSwap && !trackAABB) { - case ELayoutKind::XYZ: - { - if (posStride == 3ull * floatBytes && !needsByteSwap && !trackAABB) - { - const size_t batchBytes = batchVertices * 3ull * floatBytes; - std::memcpy(posBase, src, batchBytes); - src += batchBytes; - posBase += batchBytes; - } - else - { - for (size_t v = 0ull; v < batchVertices; ++v) - { - const hlsl::float32_t3 position = decodeVector.operator()(src); - storeVector.operator()(posBase, position); - if (trackAABB) - hlsl::shapes::util::extendAABBAccumulator(*parsedAABB, position); - src += 3ull * floatBytes; - posBase += posStride; - } - } - } - break; - case ELayoutKind::XYZ_N: + const size_t batchBytes = batchVertices * 3ull * floatBytes; + std::memcpy(posBase, src, batchBytes); + src += batchBytes; + posBase += batchBytes; + } + else + { + for (size_t v = 0ull; v < batchVertices; ++v) { - for (size_t v = 0ull; v < batchVertices; ++v) + const hlsl::float32_t3 position = decodeVector.operator()(src); + storeVector.operator()(posBase, position); + if (trackAABB) + hlsl::shapes::util::extendAABBAccumulator(*parsedAABB, position); + src += 3ull * floatBytes; + posBase += posStride; + if (layout->hasNormals) { - const hlsl::float32_t3 position = decodeVector.operator()(src); - storeVector.operator()(posBase, position); - if (trackAABB) - hlsl::shapes::util::extendAABBAccumulator(*parsedAABB, position); - src += 3ull * floatBytes; - posBase += posStride; - storeVector.operator()(normalBase, decodeVector.operator()(src)); - src += 3ull * floatBytes; - normalBase += normalStride; + storeVector.operator()(normalBase, decodeVector.operator()(src)); + src += 3ull * floatBytes; + normalBase += normalStride; } - } - break; - case ELayoutKind::XYZ_N_UV: - { - for (size_t v = 0ull; v < batchVertices; ++v) + if (layout->hasUVs) { - const hlsl::float32_t3 position = decodeVector.operator()(src); - storeVector.operator()(posBase, position); - if (trackAABB) - hlsl::shapes::util::extendAABBAccumulator(*parsedAABB, position); - src += 3ull * floatBytes; - posBase += posStride; - storeVector.operator()(normalBase, decodeVector.operator()(src)); - src += 3ull * floatBytes; - normalBase += normalStride; - storeVector.operator()(uvBase, decodeVector.operator()(src)); - src += 2ull * floatBytes; - uvBase += uvStride; + storeVector.operator()(uvBase, decodeVector.operator()(src)); + src += 2ull * floatBytes; + uvBase += uvStride; } } - break; } - const size_t consumed = batchVertices * srcBytesPerVertex; + const size_t consumed = batchVertices * layout->srcBytesPerVertex; StartPointer += consumed; remainingVertices -= batchVertices; } - const size_t posAdvance = el.Count * posStride; - vertAttrIts[0].ptr += posAdvance; - vertAttrIts[1].ptr += posAdvance; - vertAttrIts[2].ptr += posAdvance; - if (layout == ELayoutKind::XYZ_N || layout == ELayoutKind::XYZ_N_UV) - { - const size_t normalAdvance = el.Count * normalStride; - vertAttrIts[3].ptr += normalAdvance; - vertAttrIts[4].ptr += normalAdvance; - vertAttrIts[5].ptr += normalAdvance; - } - if (layout == ELayoutKind::XYZ_N_UV) - { - const size_t uvAdvance = el.Count * uvStride; - vertAttrIts[6].ptr += uvAdvance; - vertAttrIts[7].ptr += uvAdvance; - } + advanceTuple(0u, 3u, el.Count * posStride); + if (layout->hasNormals) + advanceTuple(3u, 3u, el.Count * normalStride); + if (layout->hasUVs) + advanceTuple(6u, 2u, el.Count * uvStride); return EFastVertexReadResult::Success; } void readVertex(const IAssetLoader::SAssetLoadParams& _params, const SElement& el) diff --git a/src/nbl/asset/interchange/CPLYMeshWriter.cpp b/src/nbl/asset/interchange/CPLYMeshWriter.cpp index cd87939d2f..b63f898065 100644 --- a/src/nbl/asset/interchange/CPLYMeshWriter.cpp +++ b/src/nbl/asset/interchange/CPLYMeshWriter.cpp @@ -222,17 +222,6 @@ void appendFloat(std::string& out, double value) out.resize(oldSize + static_cast(cursor - begin)); } -void appendVec(std::string& out, const double* values, size_t count, bool flipVectors = false) -{ - constexpr size_t xID = 0u; - for (size_t i = 0u; i < count; ++i) - { - const bool flip = flipVectors && i == xID; - appendFloat(out, flip ? -values[i] : values[i]); - out.push_back(' '); - } -} - inline bool writeTypedViewBinary(const ICPUPolygonGeometry::SDataView& view, const size_t ix, const uint32_t componentCount, const EPlyScalarType scalarType, const bool flipVectors, uint8_t*& dst) { if (!dst) @@ -607,25 +596,9 @@ bool CPLYMeshWriter::writeAsset(system::IFile* _file, const SAssetWriteParams& _ bool writeOk = false; size_t outputBytes = 0ull; - if (binary) + auto writePayload = [&](const uint8_t* bodyData, const size_t bodySize) -> bool { - const size_t vertexStride = - static_cast(positionMeta.byteSize) * 3ull + - (writeNormals ? static_cast(normalMeta.byteSize) * 3ull : 0ull) + - (uvView ? static_cast(uvMeta.byteSize) * 2ull : 0ull) + - extraAuxBytesPerVertex; - const size_t faceStride = sizeof(uint8_t) + (write16BitIndices ? sizeof(uint16_t) : sizeof(uint32_t)) * 3u; - const size_t bodySize = vertexCount * vertexStride + faceCount * faceStride; - - core::vector body; - body.resize(bodySize); - if (!writeBinary(input, body.data())) - { - _params.logger.log("PLY writer: binary payload generation failed.", system::ILogger::ELL_ERROR); - return false; - } - - const size_t outputSize = header.size() + body.size(); + const size_t outputSize = header.size() + bodySize; const bool fileMappable = core::bitflag(file->getFlags()).hasAnyFlag(system::IFile::ECF_MAPPABLE); const auto ioPlan = SResolvedFileIOPolicy(_params.ioPolicy, static_cast(outputSize), true, fileMappable); if (!ioPlan.isValid()) @@ -638,7 +611,7 @@ bool CPLYMeshWriter::writeAsset(system::IFile* _file, const SAssetWriteParams& _ const SInterchangeIO::SBufferRange writeBuffers[] = { { .data = reinterpret_cast(header.data()), .byteCount = header.size() }, - { .data = body.data(), .byteCount = body.size() } + { .data = bodyData, .byteCount = bodySize } }; writeOk = SInterchangeIO::writeBuffersWithPolicy(file, ioPlan, writeBuffers, &ioTelemetry); const uint64_t ioMinWrite = ioTelemetry.getMinOrZero(); @@ -669,6 +642,25 @@ bool CPLYMeshWriter::writeAsset(system::IFile* _file, const SAssetWriteParams& _ static_cast(ioPlan.chunkSizeBytes()), ioPlan.reason); return writeOk; + }; + if (binary) + { + const size_t vertexStride = + static_cast(positionMeta.byteSize) * 3ull + + (writeNormals ? static_cast(normalMeta.byteSize) * 3ull : 0ull) + + (uvView ? static_cast(uvMeta.byteSize) * 2ull : 0ull) + + extraAuxBytesPerVertex; + const size_t faceStride = sizeof(uint8_t) + (write16BitIndices ? sizeof(uint16_t) : sizeof(uint32_t)) * 3u; + const size_t bodySize = vertexCount * vertexStride + faceCount * faceStride; + + core::vector body; + body.resize(bodySize); + if (!writeBinary(input, body.data())) + { + _params.logger.log("PLY writer: binary payload generation failed.", system::ILogger::ELL_ERROR); + return false; + } + return writePayload(body.data(), body.size()); } std::string body; @@ -678,51 +670,7 @@ bool CPLYMeshWriter::writeAsset(system::IFile* _file, const SAssetWriteParams& _ _params.logger.log("PLY writer: text payload generation failed.", system::ILogger::ELL_ERROR); return false; } - - const size_t outputSize = header.size() + body.size(); - const bool fileMappable = core::bitflag(file->getFlags()).hasAnyFlag(system::IFile::ECF_MAPPABLE); - const auto ioPlan = SResolvedFileIOPolicy(_params.ioPolicy, static_cast(outputSize), true, fileMappable); - if (!ioPlan.isValid()) - { - _params.logger.log("PLY writer: invalid io policy for %s reason=%s", system::ILogger::ELL_ERROR, file->getFileName().string().c_str(), ioPlan.reason); - return false; - } - - outputBytes = outputSize; - const SInterchangeIO::SBufferRange writeBuffers[] = - { - { .data = reinterpret_cast(header.data()), .byteCount = header.size() }, - { .data = reinterpret_cast(body.data()), .byteCount = body.size() } - }; - writeOk = SInterchangeIO::writeBuffersWithPolicy(file, ioPlan, writeBuffers, &ioTelemetry); - const uint64_t ioMinWrite = ioTelemetry.getMinOrZero(); - const uint64_t ioAvgWrite = ioTelemetry.getAvgOrZero(); - if (SInterchangeIO::isTinyIOTelemetryLikely(ioTelemetry, static_cast(outputBytes), _params.ioPolicy)) - { - _params.logger.log( - "PLY writer tiny-io guard: file=%s writes=%llu min=%llu avg=%llu", - system::ILogger::ELL_WARNING, - file->getFileName().string().c_str(), - static_cast(ioTelemetry.callCount), - static_cast(ioMinWrite), - static_cast(ioAvgWrite)); - } - _params.logger.log( - "PLY writer stats: file=%s bytes=%llu vertices=%llu faces=%llu binary=%d io_writes=%llu io_min_write=%llu io_avg_write=%llu io_req=%s io_eff=%s io_chunk=%llu io_reason=%s", - system::ILogger::ELL_PERFORMANCE, - file->getFileName().string().c_str(), - static_cast(outputBytes), - static_cast(vertexCount), - static_cast(faceCount), - binary ? 1 : 0, - static_cast(ioTelemetry.callCount), - static_cast(ioMinWrite), - static_cast(ioAvgWrite), - system::to_string(_params.ioPolicy.strategy).c_str(), - system::to_string(ioPlan.strategy).c_str(), - static_cast(ioPlan.chunkSizeBytes()), - ioPlan.reason); - return writeOk; + return writePayload(reinterpret_cast(body.data()), body.size()); } bool ply_writer_detail::writeBinary( diff --git a/src/nbl/asset/interchange/CSTLMeshWriter.cpp b/src/nbl/asset/interchange/CSTLMeshWriter.cpp index e807e69d82..d39dc801f6 100644 --- a/src/nbl/asset/interchange/CSTLMeshWriter.cpp +++ b/src/nbl/asset/interchange/CSTLMeshWriter.cpp @@ -442,7 +442,6 @@ bool writeMeshBinary(const asset::ICPUPolygonGeometry* geom, SContext* context) const hlsl::float32_t3* const tightPositions = SGeometryWriterCommon::getTightView(posView); const hlsl::float32_t3* const tightNormals = hasNormals ? SGeometryWriterCommon::getTightView(normalView) : nullptr; const bool hasImplicitTriangleIndices = !geom->getIndexView(); - const float handednessSign = flipHandedness ? -1.f : 1.f; auto decodePosition = [&](const uint32_t ix, hlsl::float32_t3& out)->bool { @@ -507,6 +506,24 @@ bool writeMeshBinary(const asset::ICPUPolygonGeometry* geom, SContext* context) std::memcpy(dst, &attribute, stl_writer_detail::BinaryTriangleAttributeBytes); dst += stl_writer_detail::BinaryTriangleAttributeBytes; }; + auto prepareVertices = [&](const hlsl::float32_t3& p0, const hlsl::float32_t3& p1, const hlsl::float32_t3& p2, hlsl::float32_t3& vertex1, hlsl::float32_t3& vertex2, hlsl::float32_t3& vertex3)->void + { + vertex1 = p2; + vertex2 = p1; + vertex3 = p0; + if (flipHandedness) + { + vertex1.x = -vertex1.x; + vertex2.x = -vertex2.x; + vertex3.x = -vertex3.x; + } + }; + auto computePlaneNormal = [&](const hlsl::float32_t3& vertex1, const hlsl::float32_t3& vertex2, const hlsl::float32_t3& vertex3)->hlsl::float32_t3 + { + const hlsl::float32_t3 planeNormal = hlsl::cross(vertex2 - vertex1, vertex3 - vertex1); + const float planeNormalLen2 = hlsl::dot(planeNormal, planeNormal); + return planeNormalLen2 > 0.f ? hlsl::normalize(planeNormal) : hlsl::float32_t3(0.f, 0.f, 0.f); + }; const bool hasFastTightPath = hasImplicitTriangleIndices && (tightPositions != nullptr) && (!hasNormals || (tightNormals != nullptr)); if (hasFastTightPath && hasNormals) @@ -533,12 +550,10 @@ bool writeMeshBinary(const asset::ICPUPolygonGeometry* geom, SContext* context) if (!computeFaceColor(primIx * 3u + 0u, primIx * 3u + 1u, primIx * 3u + 2u, faceColor)) return false; - const hlsl::float32_t3 vertex1 = posTri[2u]; - const hlsl::float32_t3 vertex2 = posTri[1u]; - const hlsl::float32_t3 vertex3 = posTri[0u]; - const float vertex1x = vertex1.x * handednessSign; - const float vertex2x = vertex2.x * handednessSign; - const float vertex3x = vertex3.x * handednessSign; + hlsl::float32_t3 vertex1 = {}; + hlsl::float32_t3 vertex2 = {}; + hlsl::float32_t3 vertex3 = {}; + prepareVertices(posTri[0u], posTri[1u], posTri[2u], vertex1, vertex2, vertex3); hlsl::float32_t3 attrNormal = nrmTri[0u]; if (flipHandedness) @@ -546,9 +561,9 @@ bool writeMeshBinary(const asset::ICPUPolygonGeometry* geom, SContext* context) writeRecord( attrNormal.x, attrNormal.y, attrNormal.z, - vertex1x, vertex1.y, vertex1.z, - vertex2x, vertex2.y, vertex2.z, - vertex3x, vertex3.y, vertex3.z, + vertex1.x, vertex1.y, vertex1.z, + vertex2.x, vertex2.y, vertex2.z, + vertex3.x, vertex3.y, vertex3.z, faceColor); } } @@ -560,16 +575,12 @@ bool writeMeshBinary(const asset::ICPUPolygonGeometry* geom, SContext* context) if (!computeFaceColor(primIx * 3u + 0u, primIx * 3u + 1u, primIx * 3u + 2u, faceColor)) return false; - const hlsl::float32_t3 vertex1 = posTri[2u]; - const hlsl::float32_t3 vertex2 = posTri[1u]; - const hlsl::float32_t3 vertex3 = posTri[0u]; - const float vertex1x = vertex1.x * handednessSign; - const float vertex2x = vertex2.x * handednessSign; - const float vertex3x = vertex3.x * handednessSign; + hlsl::float32_t3 vertex1 = {}; + hlsl::float32_t3 vertex2 = {}; + hlsl::float32_t3 vertex3 = {}; + prepareVertices(posTri[0u], posTri[1u], posTri[2u], vertex1, vertex2, vertex3); - float normalX = 0.f; - float normalY = 0.f; - float normalZ = 0.f; + hlsl::float32_t3 normal = hlsl::float32_t3(0.f, 0.f, 0.f); hlsl::float32_t3 attrNormal = nrmTri[0u]; if (attrNormal.x == 0.f && attrNormal.y == 0.f && attrNormal.z == 0.f) attrNormal = nrmTri[1u]; @@ -579,38 +590,17 @@ bool writeMeshBinary(const asset::ICPUPolygonGeometry* geom, SContext* context) { if (flipHandedness) attrNormal.x = -attrNormal.x; - normalX = attrNormal.x; - normalY = attrNormal.y; - normalZ = attrNormal.z; + normal = attrNormal; } - if (normalX == 0.f && normalY == 0.f && normalZ == 0.f) - { - const float edge21x = vertex2x - vertex1x; - const float edge21y = vertex2.y - vertex1.y; - const float edge21z = vertex2.z - vertex1.z; - const float edge31x = vertex3x - vertex1x; - const float edge31y = vertex3.y - vertex1.y; - const float edge31z = vertex3.z - vertex1.z; - - normalX = edge21y * edge31z - edge21z * edge31y; - normalY = edge21z * edge31x - edge21x * edge31z; - normalZ = edge21x * edge31y - edge21y * edge31x; - const float planeNormalLen2 = normalX * normalX + normalY * normalY + normalZ * normalZ; - if (planeNormalLen2 > 0.f) - { - const float invLen = 1.f / std::sqrt(planeNormalLen2); - normalX *= invLen; - normalY *= invLen; - normalZ *= invLen; - } - } + if (normal.x == 0.f && normal.y == 0.f && normal.z == 0.f) + normal = computePlaneNormal(vertex1, vertex2, vertex3); writeRecord( - normalX, normalY, normalZ, - vertex1x, vertex1.y, vertex1.z, - vertex2x, vertex2.y, vertex2.z, - vertex3x, vertex3.y, vertex3.z, + normal.x, normal.y, normal.z, + vertex1.x, vertex1.y, vertex1.z, + vertex2.x, vertex2.y, vertex2.z, + vertex3.x, vertex3.y, vertex3.z, faceColor); } } @@ -624,37 +614,17 @@ bool writeMeshBinary(const asset::ICPUPolygonGeometry* geom, SContext* context) if (!computeFaceColor(primIx * 3u + 0u, primIx * 3u + 1u, primIx * 3u + 2u, faceColor)) return false; - const hlsl::float32_t3 vertex1 = posTri[2u]; - const hlsl::float32_t3 vertex2 = posTri[1u]; - const hlsl::float32_t3 vertex3 = posTri[0u]; - const float vertex1x = vertex1.x * handednessSign; - const float vertex2x = vertex2.x * handednessSign; - const float vertex3x = vertex3.x * handednessSign; - - const float edge21x = vertex2x - vertex1x; - const float edge21y = vertex2.y - vertex1.y; - const float edge21z = vertex2.z - vertex1.z; - const float edge31x = vertex3x - vertex1x; - const float edge31y = vertex3.y - vertex1.y; - const float edge31z = vertex3.z - vertex1.z; - - float normalX = edge21y * edge31z - edge21z * edge31y; - float normalY = edge21z * edge31x - edge21x * edge31z; - float normalZ = edge21x * edge31y - edge21y * edge31x; - const float planeNormalLen2 = normalX * normalX + normalY * normalY + normalZ * normalZ; - if (planeNormalLen2 > 0.f) - { - const float invLen = 1.f / std::sqrt(planeNormalLen2); - normalX *= invLen; - normalY *= invLen; - normalZ *= invLen; - } + hlsl::float32_t3 vertex1 = {}; + hlsl::float32_t3 vertex2 = {}; + hlsl::float32_t3 vertex3 = {}; + prepareVertices(posTri[0u], posTri[1u], posTri[2u], vertex1, vertex2, vertex3); + const hlsl::float32_t3 normal = computePlaneNormal(vertex1, vertex2, vertex3); writeRecord( - normalX, normalY, normalZ, - vertex1x, vertex1.y, vertex1.z, - vertex2x, vertex2.y, vertex2.z, - vertex3x, vertex3.y, vertex3.z, + normal.x, normal.y, normal.z, + vertex1.x, vertex1.y, vertex1.z, + vertex2.x, vertex2.y, vertex2.z, + vertex3.x, vertex3.y, vertex3.z, faceColor); } } @@ -835,18 +805,16 @@ bool writeFaceText( std::array faceText = {}; char* cursor = faceText.data(); char* const end = faceText.data() + faceText.size(); - auto appendVertex = [&](const hlsl::float32_t3& vertex) -> bool - { - return appendLiteral(cursor, end, " vertex ", sizeof(" vertex ") - 1ull) && appendVectorAsAsciiLine(cursor, end, vertex); - }; + const hlsl::float32_t3 vertices[3] = { vertex1, vertex2, vertex3 }; if (!appendLiteral(cursor, end, "facet normal ", sizeof("facet normal ") - 1ull)) return false; if (!appendVectorAsAsciiLine(cursor, end, normal)) return false; if (!appendLiteral(cursor, end, " outer loop\n", sizeof(" outer loop\n") - 1ull)) return false; - if (!appendVertex(vertex1) || !appendVertex(vertex2) || !appendVertex(vertex3)) - return false; + for (const auto& vertex : vertices) + if (!appendLiteral(cursor, end, " vertex ", sizeof(" vertex ") - 1ull) || !appendVectorAsAsciiLine(cursor, end, vertex)) + return false; if (!appendLiteral(cursor, end, " endloop\n", sizeof(" endloop\n") - 1ull)) return false; if (!appendLiteral(cursor, end, "endfacet\n", sizeof("endfacet\n") - 1ull)) From 9bbc3d3bfcd73cf6e0843795b3eda708b12b88f6 Mon Sep 17 00:00:00 2001 From: Arkadiusz Lachowicz Date: Sun, 8 Mar 2026 13:09:57 +0100 Subject: [PATCH 073/118] Refactor mesh interchange helpers --- .../nbl/asset/interchange/SInterchangeIO.h | 16 +- .../asset/interchange/COBJMeshFileLoader.cpp | 1398 +++--- src/nbl/asset/interchange/COBJMeshWriter.cpp | 305 +- .../asset/interchange/CPLYMeshFileLoader.cpp | 3982 ++++++++--------- src/nbl/asset/interchange/CPLYMeshWriter.cpp | 1231 +++-- .../asset/interchange/CSTLMeshFileLoader.cpp | 1440 +++--- src/nbl/asset/interchange/CSTLMeshWriter.cpp | 1383 +++--- .../asset/interchange/SGeometryViewDecode.h | 106 + src/nbl/asset/interchange/impl/SBinaryData.h | 51 + src/nbl/asset/interchange/impl/SFileAccess.h | 60 + .../asset/interchange/impl/SIODiagnostics.h | 40 + src/nbl/asset/interchange/impl/STextParse.h | 171 + 12 files changed, 5020 insertions(+), 5163 deletions(-) create mode 100644 src/nbl/asset/interchange/SGeometryViewDecode.h create mode 100644 src/nbl/asset/interchange/impl/SBinaryData.h create mode 100644 src/nbl/asset/interchange/impl/SFileAccess.h create mode 100644 src/nbl/asset/interchange/impl/SIODiagnostics.h create mode 100644 src/nbl/asset/interchange/impl/STextParse.h diff --git a/include/nbl/asset/interchange/SInterchangeIO.h b/include/nbl/asset/interchange/SInterchangeIO.h index ff911d9e6b..0bdf5af8ef 100644 --- a/include/nbl/asset/interchange/SInterchangeIO.h +++ b/include/nbl/asset/interchange/SInterchangeIO.h @@ -103,7 +103,7 @@ class SInterchangeIO // When ioTime is non-null it also reports wall time in TimeUnit. Default TimeUnit is milliseconds. template> requires std::same_as> - static inline bool readFileWithPolicy(system::IFile* file, uint8_t* dst, const size_t offset, const size_t bytes, const SResolvedFileIOPolicy& ioPlan, SReadTelemetry* ioTelemetry = nullptr, TimeUnit* ioTime = nullptr) + static inline bool readFileWithPolicy(system::IFile* file, void* dst, const size_t offset, const size_t bytes, const SResolvedFileIOPolicy& ioPlan, SReadTelemetry* ioTelemetry = nullptr, TimeUnit* ioTime = nullptr) { using clock_t = std::chrono::high_resolution_clock; const auto ioStart = ioTime ? clock_t::now() : clock_t::time_point{}; @@ -118,11 +118,12 @@ class SInterchangeIO return finalize(false); if (bytes == 0ull) return finalize(true); + auto* out = reinterpret_cast(dst); switch (ioPlan.strategy) { case SResolvedFileIOPolicy::Strategy::WholeFile: - return finalize(readFileExact(file, dst, offset, bytes, ioTelemetry)); + return finalize(readFileExact(file, out, offset, bytes, ioTelemetry)); case SResolvedFileIOPolicy::Strategy::Chunked: default: { @@ -132,7 +133,7 @@ class SInterchangeIO { const size_t toRead = static_cast(std::min(chunkSizeBytes, bytes - bytesRead)); system::IFile::success_t success; - file->read(success, dst + bytesRead, offset + bytesRead, toRead); + file->read(success, out + bytesRead, offset + bytesRead, toRead); if (!success) return false; const size_t processed = success.getBytesProcessed(); @@ -150,7 +151,7 @@ class SInterchangeIO // Describes one contiguous output buffer written as part of a larger stream. struct SBufferRange { - const uint8_t* data = nullptr; + const void* data = nullptr; size_t byteCount = 0ull; }; @@ -167,6 +168,7 @@ class SInterchangeIO return false; if (buffer.byteCount == 0ull) continue; + const auto* data = reinterpret_cast(buffer.data); size_t writtenTotal = 0ull; while (writtenTotal < buffer.byteCount) @@ -176,7 +178,7 @@ class SInterchangeIO (buffer.byteCount - writtenTotal) : static_cast(std::min(chunkSizeBytes, buffer.byteCount - writtenTotal)); system::IFile::success_t success; - file->write(success, buffer.data + writtenTotal, fileOffset + writtenTotal, toWrite); + file->write(success, data + writtenTotal, fileOffset + writtenTotal, toWrite); if (!success) return false; const size_t written = success.getBytesProcessed(); @@ -199,14 +201,14 @@ class SInterchangeIO } // Single-buffer convenience wrapper over writeBuffersWithPolicyAtOffset. - static inline bool writeFileWithPolicyAtOffset(system::IFile* file, const SResolvedFileIOPolicy& ioPlan, const uint8_t* data, size_t byteCount, size_t& fileOffset, SWriteTelemetry* ioTelemetry = nullptr) + static inline bool writeFileWithPolicyAtOffset(system::IFile* file, const SResolvedFileIOPolicy& ioPlan, const void* data, size_t byteCount, size_t& fileOffset, SWriteTelemetry* ioTelemetry = nullptr) { const SBufferRange buffers[] = { { .data = data, .byteCount = byteCount } }; return writeBuffersWithPolicyAtOffset(file, ioPlan, buffers, fileOffset, ioTelemetry); } // Single-buffer convenience wrapper over writeBuffersWithPolicy. - static inline bool writeFileWithPolicy(system::IFile* file, const SResolvedFileIOPolicy& ioPlan, const uint8_t* data, size_t byteCount, SWriteTelemetry* ioTelemetry = nullptr) + static inline bool writeFileWithPolicy(system::IFile* file, const SResolvedFileIOPolicy& ioPlan, const void* data, size_t byteCount, SWriteTelemetry* ioTelemetry = nullptr) { const SBufferRange buffers[] = { { .data = data, .byteCount = byteCount } }; return writeBuffersWithPolicy(file, ioPlan, buffers, ioTelemetry); diff --git a/src/nbl/asset/interchange/COBJMeshFileLoader.cpp b/src/nbl/asset/interchange/COBJMeshFileLoader.cpp index 2baed21f97..1983ccbaac 100644 --- a/src/nbl/asset/interchange/COBJMeshFileLoader.cpp +++ b/src/nbl/asset/interchange/COBJMeshFileLoader.cpp @@ -1,3 +1,5 @@ +#ifdef _NBL_COMPILE_WITH_OBJ_LOADER_ + // Copyright (C) 2018-2025 - DevSH Graphics Programming Sp. z O.O. // This file is part of the "Nabla Engine" and was originally part of the "Irrlicht Engine" // For conditions of distribution and use, see copyright notice in nabla.h @@ -5,6 +7,7 @@ #include "nbl/core/declarations.h" +#include "SOBJPolygonGeometryAuxLayout.h" #include "nbl/asset/IAssetManager.h" #include "nbl/asset/ICPUGeometryCollection.h" #include "nbl/asset/interchange/SGeometryContentHash.h" @@ -13,22 +16,19 @@ #include "nbl/asset/interchange/SLoaderRuntimeTuning.h" #include "nbl/asset/utils/CPolygonGeometryManipulator.h" #include "nbl/builtin/hlsl/shapes/AABBAccumulator.hlsl" -#include "SOBJPolygonGeometryAuxLayout.h" - -#ifdef _NBL_COMPILE_WITH_OBJ_LOADER_ #include "nbl/system/IFile.h" #include "COBJMeshFileLoader.h" +#include "impl/SFileAccess.h" +#include "impl/SIODiagnostics.h" +#include "impl/STextParse.h" #include #include -#include #include #include -#include #include -#include namespace nbl::asset { @@ -36,302 +36,239 @@ namespace nbl::asset namespace { -struct ObjVertexDedupNode +struct Parse { - int32_t uv = -1; - int32_t normal = -1; - uint32_t smoothingGroup = 0u; - uint32_t outIndex = 0u; - int32_t next = -1; + using Common = impl::TextParse; + + struct VertexDedupNode + { + int32_t uv = -1; + int32_t normal = -1; + uint32_t smoothingGroup = 0u; + uint32_t outIndex = 0u; + int32_t next = -1; + }; + + static bool resolveIndex(const int32_t rawIndex, const size_t elementCount, int32_t& resolved) + { + if (rawIndex > 0) + { + const uint64_t oneBased = static_cast(rawIndex); + if (oneBased == 0ull) + return false; + const uint64_t zeroBased = oneBased - 1ull; + if (zeroBased >= elementCount) + return false; + resolved = static_cast(zeroBased); + return true; + } + + const int64_t zeroBased = static_cast(elementCount) + static_cast(rawIndex); + if (zeroBased < 0 || zeroBased >= static_cast(elementCount)) + return false; + resolved = static_cast(zeroBased); + return true; + } + + static void parseSmoothingGroup(const char* linePtr, const char* const lineEnd, uint32_t& outGroup) + { + Common::skipInlineWhitespace(linePtr, lineEnd); + if (linePtr >= lineEnd) + { + outGroup = 0u; + return; + } + + const char* const tokenStart = linePtr; + while (linePtr < lineEnd && !Common::isInlineWhitespace(*linePtr)) + ++linePtr; + const std::string_view token(tokenStart, static_cast(linePtr - tokenStart)); + + if (token.size() == 2u && std::tolower(token[0]) == 'o' && std::tolower(token[1]) == 'n') + { + outGroup = 1u; + return; + } + if (token.size() == 3u && std::tolower(token[0]) == 'o' && std::tolower(token[1]) == 'f' && std::tolower(token[2]) == 'f') + { + outGroup = 0u; + return; + } + + uint32_t value = 0u; + outGroup = Common::parseExactNumber(token, value) ? value : 0u; + } + + static std::string parseIdentifier(const char* linePtr, const char* const lineEnd, const std::string_view fallback) + { + const char* endPtr = lineEnd; + Common::skipInlineWhitespace(linePtr, lineEnd); + while (endPtr > linePtr && Common::isInlineWhitespace(endPtr[-1])) + --endPtr; + + if (linePtr >= endPtr) + return std::string(fallback); + return std::string(linePtr, static_cast(endPtr - linePtr)); + } + + static bool parseTrianglePositiveTripletLine(const char* const lineStart, const char* const lineEnd, std::array& out, const size_t posCount, const size_t uvCount, const size_t normalCount) + { + const char* ptr = lineStart; + for (uint32_t corner = 0u; corner < 3u; ++corner) + { + Common::skipInlineWhitespace(ptr, lineEnd); + if (ptr >= lineEnd || !core::isdigit(*ptr)) + return false; + + int32_t posIx = -1; + { + uint32_t value = 0u; + if (!Common::parseNonZeroNumber(ptr, lineEnd, value)) + return false; + if (value > static_cast(std::numeric_limits::max())) + return false; + if (value > posCount) + return false; + posIx = value - 1u; + } + if (ptr >= lineEnd || *ptr != '/') + return false; + ++ptr; + + int32_t uvIx = -1; + { + uint32_t value = 0u; + if (!Common::parseNonZeroNumber(ptr, lineEnd, value)) + return false; + if (value > static_cast(std::numeric_limits::max())) + return false; + if (value > uvCount) + return false; + uvIx = value - 1u; + } + if (ptr >= lineEnd || *ptr != '/') + return false; + ++ptr; + + int32_t normalIx = -1; + { + uint32_t value = 0u; + if (!Common::parseNonZeroNumber(ptr, lineEnd, value)) + return false; + if (value > static_cast(std::numeric_limits::max())) + return false; + if (value > normalCount) + return false; + normalIx = value - 1u; + } + + out[corner] = hlsl::int32_t3(posIx, uvIx, normalIx); + } + + Common::skipInlineWhitespace(ptr, lineEnd); + return ptr == lineEnd; + } + + static bool parseFaceVertexToken(const char*& linePtr, const char* const lineEnd, hlsl::int32_t3& idx, const size_t posCount, const size_t uvCount, const size_t normalCount) + { + Common::skipInlineWhitespace(linePtr, lineEnd); + if (linePtr >= lineEnd) + return false; + + idx = hlsl::int32_t3(-1, -1, -1); + + const char* ptr = linePtr; + if (*ptr != '-' && *ptr != '+') + { + uint32_t posRaw = 0u; + if (!Common::parseNonZeroNumber(ptr, lineEnd, posRaw)) + return false; + if (posRaw > static_cast(std::numeric_limits::max())) + return false; + if (posRaw > posCount) + return false; + idx.x = posRaw - 1u; + + if (ptr < lineEnd && *ptr == '/') + { + ++ptr; + if (ptr < lineEnd && *ptr != '/') + { + uint32_t uvRaw = 0u; + if (!Common::parseNonZeroNumber(ptr, lineEnd, uvRaw)) + return false; + if (uvRaw > static_cast(std::numeric_limits::max())) + return false; + if (uvRaw > uvCount) + return false; + idx.y = uvRaw - 1u; + } + + if (ptr < lineEnd && *ptr == '/') + { + ++ptr; + if (ptr < lineEnd && !Common::isInlineWhitespace(*ptr)) + { + uint32_t normalRaw = 0u; + if (!Common::parseNonZeroNumber(ptr, lineEnd, normalRaw)) + return false; + if (normalRaw > static_cast(std::numeric_limits::max())) + return false; + if (normalRaw > normalCount) + return false; + idx.z = normalRaw - 1u; + } + } + else if (ptr < lineEnd && !Common::isInlineWhitespace(*ptr)) + return false; + } + else if (ptr < lineEnd && !Common::isInlineWhitespace(*ptr)) + return false; + } + else + { + int32_t raw = 0; + if (!Common::parseNonZeroNumber(ptr, lineEnd, raw)) + return false; + if (!resolveIndex(raw, posCount, idx.x)) + return false; + + if (ptr < lineEnd && *ptr == '/') + { + ++ptr; + if (ptr < lineEnd && *ptr != '/') + { + if (!Common::parseNonZeroNumber(ptr, lineEnd, raw)) + return false; + if (!resolveIndex(raw, uvCount, idx.y)) + return false; + } + + if (ptr < lineEnd && *ptr == '/') + { + ++ptr; + if (ptr < lineEnd && !Common::isInlineWhitespace(*ptr)) + { + if (!Common::parseNonZeroNumber(ptr, lineEnd, raw)) + return false; + if (!resolveIndex(raw, normalCount, idx.z)) + return false; + } + } + else if (ptr < lineEnd && !Common::isInlineWhitespace(*ptr)) + return false; + } + else if (ptr < lineEnd && !Common::isInlineWhitespace(*ptr)) + return false; + } + + if (ptr < lineEnd && !Common::isInlineWhitespace(*ptr)) + return false; + linePtr = ptr; + return true; + } }; -inline bool isObjInlineWhitespace(const char c) -{ - return c == ' ' || c == '\t' || c == '\v' || c == '\f'; -} - -inline bool isObjDigit(const char c) -{ - return std::isdigit(static_cast(c)) != 0; -} - -inline bool parseObjFloat(const char*& ptr, const char* const end, float& out) -{ - const auto parseResult = fast_float::from_chars(ptr, end, out); - if (parseResult.ec != std::errc() || parseResult.ptr == ptr) - return false; - ptr = parseResult.ptr; - return true; -} - -bool readTextFileWithPolicy(system::IFile* file, char* dst, size_t byteCount, const SResolvedFileIOPolicy& ioPlan, SFileReadTelemetry& ioTelemetry) -{ - return SInterchangeIO::readFileWithPolicy(file, reinterpret_cast(dst), 0ull, byteCount, ioPlan, &ioTelemetry); -} - -inline bool parseUnsignedObjIndex(const char*& ptr, const char* const end, uint32_t& out) -{ - uint32_t value = 0u; - const auto parseResult = std::from_chars(ptr, end, value); - if (parseResult.ec != std::errc() || parseResult.ptr == ptr) - return false; - if (value == 0u || value > static_cast(std::numeric_limits::max())) - return false; - ptr = parseResult.ptr; - out = value; - return true; -} - -inline void parseObjSmoothingGroup(const char* linePtr, const char* const lineEnd, uint32_t& outGroup) -{ - while (linePtr < lineEnd && isObjInlineWhitespace(*linePtr)) - ++linePtr; - - if (linePtr >= lineEnd) - { - outGroup = 0u; - return; - } - - const char* const tokenStart = linePtr; - while (linePtr < lineEnd && !isObjInlineWhitespace(*linePtr)) - ++linePtr; - const std::string_view token(tokenStart, static_cast(linePtr - tokenStart)); - - if (token.size() == 2u && - static_cast(std::tolower(static_cast(token[0]))) == 'o' && - static_cast(std::tolower(static_cast(token[1]))) == 'n') - { - outGroup = 1u; - return; - } - if (token.size() == 3u && - static_cast(std::tolower(static_cast(token[0]))) == 'o' && - static_cast(std::tolower(static_cast(token[1]))) == 'f' && - static_cast(std::tolower(static_cast(token[2]))) == 'f') - { - outGroup = 0u; - return; - } - - uint32_t value = 0u; - const auto parseResult = std::from_chars(token.data(), token.data() + token.size(), value); - outGroup = (parseResult.ec == std::errc() && parseResult.ptr == token.data() + token.size()) ? value : 0u; -} - -inline std::string parseObjIdentifier(const char* linePtr, const char* const lineEnd, const std::string_view fallback) -{ - const char* endPtr = lineEnd; - while (linePtr < lineEnd && isObjInlineWhitespace(*linePtr)) - ++linePtr; - while (endPtr > linePtr && isObjInlineWhitespace(endPtr[-1])) - --endPtr; - - if (linePtr >= endPtr) - return std::string(fallback); - return std::string(linePtr, static_cast(endPtr - linePtr)); -} - -inline bool parseObjTrianglePositiveTripletLine(const char* const lineStart, const char* const lineEnd, int32_t* idx0, int32_t* idx1, int32_t* idx2, const size_t posCount, const size_t uvCount, const size_t normalCount) -{ - const char* ptr = lineStart; - int32_t* const out[3] = { idx0, idx1, idx2 }; - for (uint32_t corner = 0u; corner < 3u; ++corner) - { - while (ptr < lineEnd && isObjInlineWhitespace(*ptr)) - ++ptr; - if (ptr >= lineEnd || !isObjDigit(*ptr)) - return false; - - int32_t posIx = -1; - { - uint32_t value = 0u; - if (!parseUnsignedObjIndex(ptr, lineEnd, value)) - return false; - if (value > posCount) - return false; - posIx = static_cast(value - 1u); - } - if (ptr >= lineEnd || *ptr != '/') - return false; - ++ptr; - - int32_t uvIx = -1; - { - uint32_t value = 0u; - if (!parseUnsignedObjIndex(ptr, lineEnd, value)) - return false; - if (value > uvCount) - return false; - uvIx = static_cast(value - 1u); - } - if (ptr >= lineEnd || *ptr != '/') - return false; - ++ptr; - - int32_t normalIx = -1; - { - uint32_t value = 0u; - if (!parseUnsignedObjIndex(ptr, lineEnd, value)) - return false; - if (value > normalCount) - return false; - normalIx = static_cast(value - 1u); - } - - int32_t* const dst = out[corner]; - dst[0] = posIx; - dst[1] = uvIx; - dst[2] = normalIx; - } - - while (ptr < lineEnd && isObjInlineWhitespace(*ptr)) - ++ptr; - return ptr == lineEnd; -} - -inline bool parseSignedObjIndex(const char*& ptr, const char* const end, int32_t& out) -{ - int32_t value = 0; - const auto parseResult = std::from_chars(ptr, end, value); - if (parseResult.ec != std::errc() || parseResult.ptr == ptr) - return false; - if (value == 0) - return false; - ptr = parseResult.ptr; - out = value; - return true; -} - -inline bool resolveObjIndex(const int32_t rawIndex, const size_t elementCount, int32_t& resolved) -{ - if (rawIndex > 0) - { - const uint64_t oneBased = static_cast(rawIndex); - if (oneBased == 0ull) - return false; - const uint64_t zeroBased = oneBased - 1ull; - if (zeroBased >= elementCount) - return false; - resolved = static_cast(zeroBased); - return true; - } - - const int64_t zeroBased = static_cast(elementCount) + static_cast(rawIndex); - if (zeroBased < 0 || zeroBased >= static_cast(elementCount)) - return false; - resolved = static_cast(zeroBased); - return true; -} - -inline bool parseObjFaceVertexTokenFast(const char*& linePtr, const char* const lineEnd, int32_t* idx, const size_t posCount, const size_t uvCount, const size_t normalCount) -{ - if (!idx) - return false; - - while (linePtr < lineEnd && isObjInlineWhitespace(*linePtr)) - ++linePtr; - if (linePtr >= lineEnd) - return false; - - idx[0] = -1; - idx[1] = -1; - idx[2] = -1; - - const char* ptr = linePtr; - if (*ptr != '-' && *ptr != '+') - { - uint32_t posRaw = 0u; - if (!parseUnsignedObjIndex(ptr, lineEnd, posRaw)) - return false; - if (posRaw > posCount) - return false; - idx[0] = static_cast(posRaw - 1u); - - if (ptr < lineEnd && *ptr == '/') - { - ++ptr; - if (ptr < lineEnd && *ptr != '/') - { - uint32_t uvRaw = 0u; - if (!parseUnsignedObjIndex(ptr, lineEnd, uvRaw)) - return false; - if (uvRaw > uvCount) - return false; - idx[1] = static_cast(uvRaw - 1u); - } - - if (ptr < lineEnd && *ptr == '/') - { - ++ptr; - if (ptr < lineEnd && !isObjInlineWhitespace(*ptr)) - { - uint32_t normalRaw = 0u; - if (!parseUnsignedObjIndex(ptr, lineEnd, normalRaw)) - return false; - if (normalRaw > normalCount) - return false; - idx[2] = static_cast(normalRaw - 1u); - } - } - else if (ptr < lineEnd && !isObjInlineWhitespace(*ptr)) - { - return false; - } - } - else if (ptr < lineEnd && !isObjInlineWhitespace(*ptr)) - { - return false; - } - } - else - { - int32_t raw = 0; - if (!parseSignedObjIndex(ptr, lineEnd, raw)) - return false; - if (!resolveObjIndex(raw, posCount, idx[0])) - return false; - - if (ptr < lineEnd && *ptr == '/') - { - ++ptr; - - if (ptr < lineEnd && *ptr != '/') - { - if (!parseSignedObjIndex(ptr, lineEnd, raw)) - return false; - if (!resolveObjIndex(raw, uvCount, idx[1])) - return false; - } - - if (ptr < lineEnd && *ptr == '/') - { - ++ptr; - if (ptr < lineEnd && !isObjInlineWhitespace(*ptr)) - { - if (!parseSignedObjIndex(ptr, lineEnd, raw)) - return false; - if (!resolveObjIndex(raw, normalCount, idx[2])) - return false; - } - } - else if (ptr < lineEnd && !isObjInlineWhitespace(*ptr)) - { - return false; - } - } - else if (ptr < lineEnd && !isObjInlineWhitespace(*ptr)) - { - return false; - } - } - - if (ptr < lineEnd && !isObjInlineWhitespace(*ptr)) - return false; - linePtr = ptr; - return true; -} - } COBJMeshFileLoader::COBJMeshFileLoader(IAssetManager*) @@ -342,67 +279,68 @@ COBJMeshFileLoader::~COBJMeshFileLoader() = default; bool COBJMeshFileLoader::isALoadableFileFormat(system::IFile* _file, const system::logger_opt_ptr) const { - if (!_file) - return false; - const auto fileSize = _file->getSize(); - if (fileSize <= 0) - return false; - - constexpr size_t ProbeBytes = 4096ull; - const size_t bytesToRead = std::min(ProbeBytes, static_cast(fileSize)); - std::array probe = {}; - system::IFile::success_t succ; - _file->read(succ, probe.data(), 0ull, bytesToRead); - if (!succ || bytesToRead == 0ull) - return false; - - const char* ptr = probe.data(); - const char* const end = probe.data() + bytesToRead; - - if ((end - ptr) >= 3 && static_cast(ptr[0]) == 0xEFu && static_cast(ptr[1]) == 0xBBu && static_cast(ptr[2]) == 0xBFu) - ptr += 3; - - while (ptr < end) - { - while (ptr < end && (*ptr == ' ' || *ptr == '\t' || *ptr == '\r' || *ptr == '\n')) - ++ptr; - if (ptr >= end) - break; - - if (*ptr == '#') - { - while (ptr < end && *ptr != '\n') - ++ptr; - continue; - } - - switch (static_cast(std::tolower(static_cast(*ptr)))) - { - case 'v': - case 'f': - case 'o': - case 'g': - case 's': - case 'u': - case 'm': - case 'l': - case 'p': - return true; - default: - return false; - } - } - return false; + if (!_file) + return false; + const auto fileSize = _file->getSize(); + if (fileSize <= 0) + return false; + + constexpr size_t ProbeBytes = 4096ull; + const size_t bytesToRead = std::min(ProbeBytes, static_cast(fileSize)); + std::array probe = {}; + system::IFile::success_t succ; + _file->read(succ, probe.data(), 0ull, bytesToRead); + if (!succ || bytesToRead == 0ull) + return false; + + const char* ptr = probe.data(); + const char* const end = probe.data() + bytesToRead; + if ((end - ptr) >= 3 && static_cast(ptr[0]) == 0xEFu && static_cast(ptr[1]) == 0xBBu && static_cast(ptr[2]) == 0xBFu) + ptr += 3; + + while (ptr < end) + { + while (ptr < end && (*ptr == ' ' || *ptr == '\t' || *ptr == '\r' || *ptr == '\n')) + ++ptr; + if (ptr >= end) + break; + + if (*ptr == '#') + { + while (ptr < end && *ptr != '\n') + ++ptr; + continue; + } + + switch (std::tolower(*ptr)) + { + case 'v': + case 'f': + case 'o': + case 'g': + case 's': + case 'u': + case 'm': + case 'l': + case 'p': + return true; + default: + return false; + } + } + return false; } const char** COBJMeshFileLoader::getAssociatedFileExtensions() const { - static const char* ext[] = { "obj", nullptr }; - return ext; + static const char* ext[] = { "obj", nullptr }; + return ext; } -asset::SAssetBundle COBJMeshFileLoader::loadAsset(system::IFile* _file, const asset::IAssetLoader::SAssetLoadParams& _params, asset::IAssetLoader::IAssetLoaderOverride*, uint32_t) -{ +asset::SAssetBundle COBJMeshFileLoader::loadAsset( + system::IFile* _file, const asset::IAssetLoader::SAssetLoadParams& _params, + asset::IAssetLoader::IAssetLoaderOverride* _override [[maybe_unused]], + uint32_t _hierarchyLevel [[maybe_unused]]) { if (!_file) return {}; @@ -414,33 +352,15 @@ asset::SAssetBundle COBJMeshFileLoader::loadAsset(system::IFile* _file, const as const long filesize = _file->getSize(); if (filesize <= 0) return {}; - const bool fileMappable = core::bitflag(_file->getFlags()).hasAnyFlag(system::IFile::ECF_MAPPABLE); - const auto ioPlan = SResolvedFileIOPolicy(_params.ioPolicy, static_cast(filesize), true, fileMappable); - if (!ioPlan.isValid()) - { - _params.logger.log("OBJ loader: invalid io policy for %s reason=%s", system::ILogger::ELL_ERROR, _file->getFileName().string().c_str(), ioPlan.reason); + const auto ioPlan = impl::SFileAccess::resolvePlan(_params.ioPolicy, static_cast(filesize), true, _file); + if (impl::SIODiagnostics::logInvalidPlan(_params.logger, "OBJ loader", _file->getFileName().string().c_str(), ioPlan)) return {}; - } - std::string fileContents = {}; - const char* buf = nullptr; - if (ioPlan.strategy == SResolvedFileIOPolicy::Strategy::WholeFile) - { - const auto* constFile = static_cast(_file); - const auto* mapped = reinterpret_cast(constFile->getMappedPointer()); - if (mapped) - { - buf = mapped; - ioTelemetry.account(static_cast(filesize)); - } - } - if (!buf) - { - fileContents.resize(static_cast(filesize)); - if (!readTextFileWithPolicy(_file, fileContents.data(), fileContents.size(), ioPlan, ioTelemetry)) - return {}; - buf = fileContents.data(); - } + core::vector fileContents; + const auto* fileData = impl::SFileAccess::mapOrReadWholeFile(_file, static_cast(filesize), fileContents, ioPlan, &ioTelemetry); + if (!fileData) + return {}; + const char* const buf = reinterpret_cast(fileData); const char* const bufEnd = buf + static_cast(filesize); const char* bufPtr = buf; @@ -448,7 +368,8 @@ asset::SAssetBundle COBJMeshFileLoader::loadAsset(system::IFile* _file, const as core::vector positions; core::vector normals; core::vector uvs; - const size_t estimatedAttributeCount = std::max(16ull, static_cast(filesize) / 32ull); + const size_t estimatedAttributeCount = + std::max(16ull, static_cast(filesize) / 32ull); positions.reserve(estimatedAttributeCount); normals.reserve(estimatedAttributeCount); uvs.reserve(estimatedAttributeCount); @@ -459,41 +380,51 @@ asset::SAssetBundle COBJMeshFileLoader::loadAsset(system::IFile* _file, const as core::vector outUVs; core::vector indices; core::vector dedupHeadByPos; - core::vector dedupNodes; - const size_t estimatedOutVertexCount = std::max(estimatedAttributeCount, static_cast(filesize) / 20ull); - const size_t estimatedOutIndexCount = (estimatedOutVertexCount <= (std::numeric_limits::max() / 3ull)) ? (estimatedOutVertexCount * 3ull) : std::numeric_limits::max(); - const size_t initialOutVertexCapacity = std::max(1ull, estimatedOutVertexCount); - const size_t initialOutIndexCapacity = (estimatedOutIndexCount == std::numeric_limits::max()) ? 3ull : std::max(3ull, estimatedOutIndexCount); + core::vector dedupNodes; + const size_t estimatedOutVertexCount = std::max( + estimatedAttributeCount, static_cast(filesize) / 20ull); + const size_t estimatedOutIndexCount = + (estimatedOutVertexCount <= (std::numeric_limits::max() / 3ull)) + ? (estimatedOutVertexCount * 3ull) + : std::numeric_limits::max(); + const size_t initialOutVertexCapacity = + std::max(1ull, estimatedOutVertexCount); + const size_t initialOutIndexCapacity = + (estimatedOutIndexCount == std::numeric_limits::max()) + ? 3ull + : std::max(3ull, estimatedOutIndexCount); size_t outVertexWriteCount = 0ull; size_t outIndexWriteCount = 0ull; size_t dedupNodeCount = 0ull; - struct SDedupHotEntry - { + struct SDedupHotEntry { int32_t pos = -1; int32_t uv = -1; int32_t normal = -1; uint32_t outIndex = 0u; }; const size_t hw = SLoaderRuntimeTuner::resolveHardwareThreads(); - const size_t hardMaxWorkers = SLoaderRuntimeTuner::resolveHardMaxWorkers(hw, _params.ioPolicy.runtimeTuning.workerHeadroom); + const size_t hardMaxWorkers = SLoaderRuntimeTuner::resolveHardMaxWorkers( + hw, _params.ioPolicy.runtimeTuning.workerHeadroom); SLoaderRuntimeTuningRequest dedupTuningRequest = {}; dedupTuningRequest.inputBytes = static_cast(filesize); dedupTuningRequest.totalWorkUnits = estimatedOutVertexCount; dedupTuningRequest.hardwareThreads = static_cast(hw); dedupTuningRequest.hardMaxWorkers = static_cast(hardMaxWorkers); - dedupTuningRequest.targetChunksPerWorker = _params.ioPolicy.runtimeTuning.targetChunksPerWorker; + dedupTuningRequest.targetChunksPerWorker = + _params.ioPolicy.runtimeTuning.targetChunksPerWorker; dedupTuningRequest.sampleData = reinterpret_cast(buf); - dedupTuningRequest.sampleBytes = SLoaderRuntimeTuner::resolveSampleBytes(_params.ioPolicy, static_cast(filesize)); - const auto dedupTuning = SLoaderRuntimeTuner::tune(_params.ioPolicy, dedupTuningRequest); + dedupTuningRequest.sampleBytes = SLoaderRuntimeTuner::resolveSampleBytes( + _params.ioPolicy, static_cast(filesize)); + const auto dedupTuning = + SLoaderRuntimeTuner::tune(_params.ioPolicy, dedupTuningRequest); const size_t dedupHotSeed = std::max( - 16ull, - estimatedOutVertexCount / std::max(1ull, dedupTuning.workerCount * 8ull)); + 16ull, estimatedOutVertexCount / + std::max(1ull, dedupTuning.workerCount * 8ull)); const size_t dedupHotEntryCount = std::bit_ceil(dedupHotSeed); core::vector dedupHotCache(dedupHotEntryCount); const size_t dedupHotMask = dedupHotEntryCount - 1ull; - struct SLoadedGeometry - { + struct SLoadedGeometry { core::smart_refctd_ptr geometry = {}; std::string objectName = {}; std::string groupName = {}; @@ -510,13 +441,13 @@ asset::SAssetBundle COBJMeshFileLoader::loadAsset(system::IFile* _file, const as bool hasProvidedNormals = false; bool needsNormalGeneration = false; bool hasUVs = false; - hlsl::shapes::util::AABBAccumulator3 parsedAABB = hlsl::shapes::util::createAABBAccumulator(); + hlsl::shapes::util::AABBAccumulator3 parsedAABB = + hlsl::shapes::util::createAABBAccumulator(); uint64_t currentFaceCount = 0ull; uint64_t currentFaceFastTokenCount = 0ull; uint64_t currentFaceFallbackTokenCount = 0ull; - const auto resetBuilderState = [&]() -> void - { + const auto resetBuilderState = [&]() -> void { outPositions.clear(); outNormals.clear(); outNormalNeedsGeneration.clear(); @@ -546,8 +477,7 @@ asset::SAssetBundle COBJMeshFileLoader::loadAsset(system::IFile* _file, const as std::fill(dedupHotCache.begin(), dedupHotCache.end(), emptyHotEntry); }; - const auto finalizeCurrentGeometry = [&]() -> bool - { + const auto finalizeCurrentGeometry = [&]() -> bool { if (outVertexWriteCount == 0ull) return true; @@ -557,38 +487,41 @@ asset::SAssetBundle COBJMeshFileLoader::loadAsset(system::IFile* _file, const as outUVs.resize(outVertexWriteCount); indices.resize(outIndexWriteCount); - if (needsNormalGeneration) - { - // OBJ smoothing groups are already encoded in the parser-side vertex split - // corners that must stay sharp become different output vertices even if they share position. - // This helper works on that final indexed output and fills only normals missing in the source. - // `createSmoothVertexNormal` is still not enough here even with indexed-view support, - // because it would also need a "missing only" mode and proper OBJ smoothing-group handling. - if (!CPolygonGeometryManipulator::generateMissingSmoothNormals(outNormals, outPositions, indices, outNormalNeedsGeneration)) + if (needsNormalGeneration) { + // OBJ smoothing groups are already encoded in the parser-side vertex + // split corners that must stay sharp become different output vertices + // even if they share position. This helper works on that final indexed + // output and fills only normals missing in the source. + // `createSmoothVertexNormal` is still not enough here even with + // indexed-view support, because it would also need a "missing only" mode + // and proper OBJ smoothing-group handling. + if (!CPolygonGeometryManipulator::generateMissingSmoothNormals( + outNormals, outPositions, indices, outNormalNeedsGeneration)) return false; } const size_t outVertexCount = outPositions.size(); auto geometry = core::make_smart_refctd_ptr(); { - auto view = SGeometryLoaderCommon::createAdoptedView(std::move(outPositions)); + auto view = SGeometryLoaderCommon::createAdoptedView( + std::move(outPositions)); if (!view) return false; geometry->setPositionView(std::move(view)); } const bool hasNormals = hasProvidedNormals || needsNormalGeneration; - if (hasNormals) - { - auto view = SGeometryLoaderCommon::createAdoptedView(std::move(outNormals)); + if (hasNormals) { + auto view = SGeometryLoaderCommon::createAdoptedView( + std::move(outNormals)); if (!view) return false; geometry->setNormalView(std::move(view)); } - if (hasUVs) - { - auto view = SGeometryLoaderCommon::createAdoptedView(std::move(outUVs)); + if (hasUVs) { + auto view = SGeometryLoaderCommon::createAdoptedView( + std::move(outUVs)); if (!view) return false; auto* const auxViews = geometry->getAuxAttributeViews(); @@ -596,34 +529,33 @@ asset::SAssetBundle COBJMeshFileLoader::loadAsset(system::IFile* _file, const as auxViews->operator[](SOBJPolygonGeometryAuxLayout::UV0) = std::move(view); } - if (!indices.empty()) - { + if (!indices.empty()) { geometry->setIndexing(IPolygonGeometryBase::TriangleList()); - if (outVertexCount <= static_cast(std::numeric_limits::max()) + 1ull) - { + if (outVertexCount <= + static_cast(std::numeric_limits::max()) + 1ull) { core::vector indices16(indices.size()); for (size_t i = 0u; i < indices.size(); ++i) indices16[i] = static_cast(indices[i]); - auto view = SGeometryLoaderCommon::createAdoptedView(std::move(indices16)); + auto view = SGeometryLoaderCommon::createAdoptedView( + std::move(indices16)); if (!view) return false; geometry->setIndexView(std::move(view)); - } - else - { - auto view = SGeometryLoaderCommon::createAdoptedView(std::move(indices)); + } else { + auto view = SGeometryLoaderCommon::createAdoptedView( + std::move(indices)); if (!view) return false; geometry->setIndexView(std::move(view)); } - } - else - { + } else { geometry->setIndexing(IPolygonGeometryBase::PointList()); } - if (!_params.loaderFlags.hasAnyFlag(IAssetLoader::ELPF_DONT_COMPUTE_CONTENT_HASHES)) - SPolygonGeometryContentHash::computeMissing(geometry.get(), _params.ioPolicy); + if (!_params.loaderFlags.hasAnyFlag( + IAssetLoader::ELPF_DONT_COMPUTE_CONTENT_HASHES)) + SPolygonGeometryContentHash::computeMissing(geometry.get(), + _params.ioPolicy); if (!parsedAABB.empty()) geometry->applyAABB(parsedAABB.value); @@ -636,65 +568,63 @@ asset::SAssetBundle COBJMeshFileLoader::loadAsset(system::IFile* _file, const as .groupName = currentGroupName, .faceCount = currentFaceCount, .faceFastTokenCount = currentFaceFastTokenCount, - .faceFallbackTokenCount = currentFaceFallbackTokenCount - }); + .faceFallbackTokenCount = currentFaceFallbackTokenCount}); return true; }; resetBuilderState(); - auto allocateOutVertex = [&](uint32_t& outIx) -> bool - { - if (outVertexWriteCount >= outPositions.size()) - { - const size_t newCapacity = std::max(outVertexWriteCount + 1ull, outPositions.size() * 2ull); + auto allocateOutVertex = [&](uint32_t& outIx) -> bool { + if (outVertexWriteCount >= outPositions.size()) { + const size_t newCapacity = std::max(outVertexWriteCount + 1ull, + outPositions.size() * 2ull); outPositions.resize(newCapacity); outNormals.resize(newCapacity); outNormalNeedsGeneration.resize(newCapacity, 0u); outUVs.resize(newCapacity); } - if (outVertexWriteCount > static_cast(std::numeric_limits::max())) + if (outVertexWriteCount > + static_cast(std::numeric_limits::max())) return false; outIx = static_cast(outVertexWriteCount++); return true; }; - auto appendIndex = [&](const uint32_t value) -> bool - { - if (outIndexWriteCount >= indices.size()) - { - const size_t newCapacity = std::max(outIndexWriteCount + 1ull, indices.size() * 2ull); + auto appendIndex = [&](const uint32_t value) -> bool { + if (outIndexWriteCount >= indices.size()) { + const size_t newCapacity = + std::max(outIndexWriteCount + 1ull, indices.size() * 2ull); indices.resize(newCapacity); } indices[outIndexWriteCount++] = value; return true; }; - auto allocateDedupNode = [&]() -> int32_t - { - if (dedupNodeCount >= dedupNodes.size()) - { - const size_t newCapacity = std::max(dedupNodeCount + 1ull, dedupNodes.size() * 2ull); + auto allocateDedupNode = [&]() -> int32_t { + if (dedupNodeCount >= dedupNodes.size()) { + const size_t newCapacity = + std::max(dedupNodeCount + 1ull, dedupNodes.size() * 2ull); dedupNodes.resize(newCapacity); } - if (dedupNodeCount > static_cast(std::numeric_limits::max())) + if (dedupNodeCount > + static_cast(std::numeric_limits::max())) return -1; const int32_t ix = static_cast(dedupNodeCount++); return ix; }; - auto findCornerIndex = [&](const int32_t posIx, const int32_t uvIx, const int32_t normalIx, const uint32_t dedupSmoothingGroup, uint32_t& outIx)->bool - { + auto findCornerIndex = + [&](const int32_t posIx, const int32_t uvIx, const int32_t normalIx, + const uint32_t dedupSmoothingGroup, uint32_t& outIx) -> bool { if (posIx < 0 || static_cast(posIx) >= positions.size()) return false; if (static_cast(posIx) >= dedupHeadByPos.size()) dedupHeadByPos.resize(positions.size(), -1); int32_t nodeIx = dedupHeadByPos[static_cast(posIx)]; - while (nodeIx >= 0) - { + while (nodeIx >= 0) { const auto& node = dedupNodes[static_cast(nodeIx)]; - if (node.uv == uvIx && node.normal == normalIx && node.smoothingGroup == dedupSmoothingGroup) - { + if (node.uv == uvIx && node.normal == normalIx && + node.smoothingGroup == dedupSmoothingGroup) { outIx = node.outIndex; return true; } @@ -703,8 +633,9 @@ asset::SAssetBundle COBJMeshFileLoader::loadAsset(system::IFile* _file, const as return false; }; - auto materializeCornerIndex = [&](const int32_t posIx, const int32_t uvIx, const int32_t normalIx, const uint32_t dedupSmoothingGroup, uint32_t& outIx)->bool - { + auto materializeCornerIndex = + [&](const int32_t posIx, const int32_t uvIx, const int32_t normalIx, + const uint32_t dedupSmoothingGroup, uint32_t& outIx) -> bool { if (!allocateOutVertex(outIx)) return false; const int32_t newNodeIx = allocateDedupNode(); @@ -724,22 +655,18 @@ asset::SAssetBundle COBJMeshFileLoader::loadAsset(system::IFile* _file, const as hlsl::shapes::util::extendAABBAccumulator(parsedAABB, srcPos); hlsl::float32_t2 uv(0.f, 0.f); - if (uvIx >= 0 && static_cast(uvIx) < uvs.size()) - { + if (uvIx >= 0 && static_cast(uvIx) < uvs.size()) { uv = uvs[static_cast(uvIx)]; hasUVs = true; } outUVs[static_cast(outIx)] = uv; hlsl::float32_t3 normal(0.f, 0.f, 0.f); - if (normalIx >= 0 && static_cast(normalIx) < normals.size()) - { + if (normalIx >= 0 && static_cast(normalIx) < normals.size()) { normal = normals[static_cast(normalIx)]; hasProvidedNormals = true; outNormalNeedsGeneration[static_cast(outIx)] = 0u; - } - else - { + } else { needsNormalGeneration = true; outNormalNeedsGeneration[static_cast(outIx)] = 1u; } @@ -747,38 +674,36 @@ asset::SAssetBundle COBJMeshFileLoader::loadAsset(system::IFile* _file, const as return true; }; - auto acquireCornerIndex = [&](const int32_t* idx, const uint32_t smoothingGroup, uint32_t& outIx)->bool - { - if (!idx) - return false; - - const int32_t posIx = idx[0]; + auto acquireCornerIndex = [&](const hlsl::int32_t3& idx, + const uint32_t smoothingGroup, + uint32_t& outIx) -> bool { + const int32_t posIx = idx.x; if (posIx < 0 || static_cast(posIx) >= positions.size()) return false; - const uint32_t dedupSmoothingGroup = (idx[2] >= 0) ? 0u : smoothingGroup; - if (findCornerIndex(posIx, idx[1], idx[2], dedupSmoothingGroup, outIx)) + const uint32_t dedupSmoothingGroup = idx.z >= 0 ? 0u : smoothingGroup; + if (findCornerIndex(posIx, idx.y, idx.z, dedupSmoothingGroup, outIx)) return true; - return materializeCornerIndex(posIx, idx[1], idx[2], dedupSmoothingGroup, outIx); + return materializeCornerIndex(posIx, idx.y, idx.z, dedupSmoothingGroup, + outIx); }; - auto acquireCornerIndexPositiveTriplet = [&](const int32_t posIx, const int32_t uvIx, const int32_t normalIx, uint32_t& outIx)->bool - { - const uint32_t hotHash = - static_cast(posIx) * 73856093u ^ - static_cast(uvIx) * 19349663u ^ - static_cast(normalIx) * 83492791u; + auto acquireCornerIndexPositiveTriplet = [&](const hlsl::int32_t3& idx, + uint32_t& outIx) -> bool { + const uint32_t hotHash = static_cast(idx.x) * 73856093u ^ + static_cast(idx.y) * 19349663u ^ + static_cast(idx.z) * 83492791u; auto& hotEntry = dedupHotCache[static_cast(hotHash) & dedupHotMask]; - if (hotEntry.pos == posIx && hotEntry.uv == uvIx && hotEntry.normal == normalIx) - { + if (hotEntry.pos == idx.x && hotEntry.uv == idx.y && + hotEntry.normal == idx.z) { outIx = hotEntry.outIndex; return true; } - if (findCornerIndex(posIx, uvIx, normalIx, 0u, outIx) || materializeCornerIndex(posIx, uvIx, normalIx, 0u, outIx)) - { - hotEntry.pos = posIx; - hotEntry.uv = uvIx; - hotEntry.normal = normalIx; + if (findCornerIndex(idx.x, idx.y, idx.z, 0u, outIx) || + materializeCornerIndex(idx.x, idx.y, idx.z, 0u, outIx)) { + hotEntry.pos = idx.x; + hotEntry.uv = idx.y; + hotEntry.normal = idx.z; hotEntry.outIndex = outIx; return true; } @@ -786,210 +711,206 @@ asset::SAssetBundle COBJMeshFileLoader::loadAsset(system::IFile* _file, const as }; uint32_t currentSmoothingGroup = 0u; - while (bufPtr < bufEnd) - { - const char* const lineStart = bufPtr; - const size_t remaining = static_cast(bufEnd - lineStart); - const char* lineTerminator = static_cast(std::memchr(lineStart, '\n', remaining)); - if (!lineTerminator) - lineTerminator = static_cast(std::memchr(lineStart, '\r', remaining)); - if (!lineTerminator) - lineTerminator = bufEnd; - - const char* lineEnd = lineTerminator; - if (lineEnd > lineStart && lineEnd[-1] == '\r') - --lineEnd; - - if (lineStart < lineEnd) - { - const char lineType = static_cast(std::tolower(static_cast(*lineStart))); - if (lineType == 'v') - { - auto parseVector = [&](const char* ptr, float* values, const uint32_t count)->bool - { - for (uint32_t i = 0u; i < count; ++i) - { - while (ptr < lineEnd && isObjInlineWhitespace(*ptr)) - ++ptr; - if (ptr >= lineEnd || !parseObjFloat(ptr, lineEnd, values[i])) - return false; - } - return true; - }; - const char subType = ((lineStart + 1) < lineEnd) ? static_cast(std::tolower(static_cast(lineStart[1]))) : '\0'; - if ((lineStart + 1) < lineEnd && subType == ' ') - { - hlsl::float32_t3 vec{}; - if (!parseVector(lineStart + 2, &vec.x, 3u)) - return {}; - positions.push_back(vec); - dedupHeadByPos.push_back(-1); - } - else if ((lineStart + 2) < lineEnd && subType == 'n' && isObjInlineWhitespace(lineStart[2])) - { - hlsl::float32_t3 vec{}; - if (!parseVector(lineStart + 3, &vec.x, 3u)) - return {}; - normals.push_back(vec); - } - else if ((lineStart + 2) < lineEnd && subType == 't' && isObjInlineWhitespace(lineStart[2])) - { - hlsl::float32_t2 vec{}; - if (!parseVector(lineStart + 3, &vec.x, 2u)) - return {}; - vec.y = 1.f - vec.y; - uvs.push_back(vec); + while (bufPtr < bufEnd) { + const char* const lineStart = bufPtr; + const size_t remaining = static_cast(bufEnd - lineStart); + const char* lineTerminator = + static_cast(std::memchr(lineStart, '\n', remaining)); + if (!lineTerminator) + lineTerminator = + static_cast(std::memchr(lineStart, '\r', remaining)); + if (!lineTerminator) + lineTerminator = bufEnd; + + const char* lineEnd = lineTerminator; + if (lineEnd > lineStart && lineEnd[-1] == '\r') + --lineEnd; + + if (lineStart < lineEnd) { + const char lineType = std::tolower(*lineStart); + if (lineType == 'v') { + auto parseVector = [&](const char* ptr, float* values, + const uint32_t count) -> bool { + for (uint32_t i = 0u; i < count; ++i) { + while (ptr < lineEnd && Parse::Common::isInlineWhitespace(*ptr)) + ++ptr; + if (ptr >= lineEnd || + !Parse::Common::parseNumber(ptr, lineEnd, values[i])) + return false; } - } - else if (lineType == 'o' && (lineStart + 1) < lineEnd && isObjInlineWhitespace(lineStart[1])) - { - if (!finalizeCurrentGeometry()) + return true; + }; + const char subType = + ((lineStart + 1) < lineEnd) ? std::tolower(lineStart[1]) : '\0'; + if ((lineStart + 1) < lineEnd && subType == ' ') { + hlsl::float32_t3 vec{}; + if (!parseVector(lineStart + 2, &vec.x, 3u)) return {}; - resetBuilderState(); - currentObjectName = parseObjIdentifier(lineStart + 2, lineEnd, "default_object"); - sawObjectDirective = true; - } - else if (lineType == 'g' && (lineStart + 1) < lineEnd && isObjInlineWhitespace(lineStart[1])) - { - if (!finalizeCurrentGeometry()) + positions.push_back(vec); + dedupHeadByPos.push_back(-1); + } else if ((lineStart + 2) < lineEnd && subType == 'n' && + Parse::Common::isInlineWhitespace(lineStart[2])) { + hlsl::float32_t3 vec{}; + if (!parseVector(lineStart + 3, &vec.x, 3u)) + return {}; + normals.push_back(vec); + } else if ((lineStart + 2) < lineEnd && subType == 't' && + Parse::Common::isInlineWhitespace(lineStart[2])) { + hlsl::float32_t2 vec{}; + if (!parseVector(lineStart + 3, &vec.x, 2u)) return {}; - resetBuilderState(); - currentGroupName = parseObjIdentifier(lineStart + 2, lineEnd, "default_group"); - sawGroupDirective = true; + vec.y = 1.f - vec.y; + uvs.push_back(vec); } - else if (lineType == 's' && (lineStart + 1) < lineEnd && isObjInlineWhitespace(lineStart[1])) - { - parseObjSmoothingGroup(lineStart + 2, lineEnd, currentSmoothingGroup); + } else if (lineType == 'o' && (lineStart + 1) < lineEnd && + Parse::Common::isInlineWhitespace(lineStart[1])) { + if (!finalizeCurrentGeometry()) + return {}; + resetBuilderState(); + currentObjectName = + Parse::parseIdentifier(lineStart + 2, lineEnd, "default_object"); + sawObjectDirective = true; + } else if (lineType == 'g' && (lineStart + 1) < lineEnd && + Parse::Common::isInlineWhitespace(lineStart[1])) { + if (!finalizeCurrentGeometry()) + return {}; + resetBuilderState(); + currentGroupName = + Parse::parseIdentifier(lineStart + 2, lineEnd, "default_group"); + sawGroupDirective = true; + } else if (lineType == 's' && (lineStart + 1) < lineEnd && + Parse::Common::isInlineWhitespace(lineStart[1])) { + Parse::parseSmoothingGroup(lineStart + 2, lineEnd, + currentSmoothingGroup); + } else if (lineType == 'f' && (lineStart + 1) < lineEnd && + Parse::Common::isInlineWhitespace(lineStart[1])) { + if (positions.empty()) + return {}; + ++faceCount; + ++currentFaceCount; + const size_t posCount = positions.size(); + const size_t uvCount = uvs.size(); + const size_t normalCount = normals.size(); + const char* triLinePtr = lineStart + 1; + std::array triIdx = {hlsl::int32_t3(-1, -1, -1), + hlsl::int32_t3(-1, -1, -1), + hlsl::int32_t3(-1, -1, -1)}; + bool triangleFastPath = Parse::parseTrianglePositiveTripletLine( + lineStart + 1, lineEnd, triIdx, posCount, uvCount, normalCount); + bool parsedFirstThree = triangleFastPath; + if (!triangleFastPath) { + triLinePtr = lineStart + 1; + parsedFirstThree = + Parse::parseFaceVertexToken(triLinePtr, lineEnd, triIdx[0], + posCount, uvCount, normalCount) && + Parse::parseFaceVertexToken(triLinePtr, lineEnd, triIdx[1], + posCount, uvCount, normalCount) && + Parse::parseFaceVertexToken(triLinePtr, lineEnd, triIdx[2], + posCount, uvCount, normalCount); + triangleFastPath = parsedFirstThree; + if (parsedFirstThree) { + while (triLinePtr < lineEnd && + Parse::Common::isInlineWhitespace(*triLinePtr)) + ++triLinePtr; + triangleFastPath = (triLinePtr == lineEnd); + } } - else if (lineType == 'f' && (lineStart + 1) < lineEnd && isObjInlineWhitespace(lineStart[1])) - { - if (positions.empty()) + if (triangleFastPath) { + const bool fullTriplet = std::all_of( + triIdx.begin(), triIdx.end(), [](const hlsl::int32_t3& idx) { + return hlsl::all(glm::greaterThanEqual(idx, hlsl::int32_t3(0))); + }); + if (!fullTriplet) + triangleFastPath = false; + } + if (triangleFastPath) { + hlsl::uint32_t3 cornerIx = {}; + if (!acquireCornerIndexPositiveTriplet(triIdx[0], cornerIx.x)) return {}; - ++faceCount; - ++currentFaceCount; - const size_t posCount = positions.size(); - const size_t uvCount = uvs.size(); - const size_t normalCount = normals.size(); - const char* triLinePtr = lineStart + 1; - int32_t triIdx0[3] = { -1, -1, -1 }; - int32_t triIdx1[3] = { -1, -1, -1 }; - int32_t triIdx2[3] = { -1, -1, -1 }; - bool triangleFastPath = parseObjTrianglePositiveTripletLine(lineStart + 1, lineEnd, triIdx0, triIdx1, triIdx2, posCount, uvCount, normalCount); - bool parsedFirstThree = triangleFastPath; - if (!triangleFastPath) - { - triLinePtr = lineStart + 1; - parsedFirstThree = - parseObjFaceVertexTokenFast(triLinePtr, lineEnd, triIdx0, posCount, uvCount, normalCount) && - parseObjFaceVertexTokenFast(triLinePtr, lineEnd, triIdx1, posCount, uvCount, normalCount) && - parseObjFaceVertexTokenFast(triLinePtr, lineEnd, triIdx2, posCount, uvCount, normalCount); - triangleFastPath = parsedFirstThree; - if (parsedFirstThree) - { - while (triLinePtr < lineEnd && isObjInlineWhitespace(*triLinePtr)) - ++triLinePtr; - triangleFastPath = (triLinePtr == lineEnd); - } - } - if (triangleFastPath) - { - const bool fullTriplet = - triIdx0[0] >= 0 && triIdx0[1] >= 0 && triIdx0[2] >= 0 && - triIdx1[0] >= 0 && triIdx1[1] >= 0 && triIdx1[2] >= 0 && - triIdx2[0] >= 0 && triIdx2[1] >= 0 && triIdx2[2] >= 0; - if (!fullTriplet) - triangleFastPath = false; - } - if (triangleFastPath) - { - uint32_t c0 = 0u; - uint32_t c1 = 0u; - uint32_t c2 = 0u; - if (!acquireCornerIndexPositiveTriplet(triIdx0[0], triIdx0[1], triIdx0[2], c0)) + if (!acquireCornerIndexPositiveTriplet(triIdx[1], cornerIx.y)) + return {}; + if (!acquireCornerIndexPositiveTriplet(triIdx[2], cornerIx.z)) + return {}; + faceFastTokenCount += 3u; + currentFaceFastTokenCount += 3u; + if (!appendIndex(cornerIx.z) || !appendIndex(cornerIx.y) || + !appendIndex(cornerIx.x)) + return {}; + } else { + const char* linePtr = lineStart + 1; + uint32_t firstCorner = 0u; + uint32_t previousCorner = 0u; + uint32_t cornerCount = 0u; + + if (parsedFirstThree) { + hlsl::uint32_t3 cornerIx = {}; + if (!acquireCornerIndex(triIdx[0], currentSmoothingGroup, + cornerIx.x)) return {}; - if (!acquireCornerIndexPositiveTriplet(triIdx1[0], triIdx1[1], triIdx1[2], c1)) + if (!acquireCornerIndex(triIdx[1], currentSmoothingGroup, + cornerIx.y)) return {}; - if (!acquireCornerIndexPositiveTriplet(triIdx2[0], triIdx2[1], triIdx2[2], c2)) + if (!acquireCornerIndex(triIdx[2], currentSmoothingGroup, + cornerIx.z)) return {}; - faceFastTokenCount += 3u; - currentFaceFastTokenCount += 3u; - if (!appendIndex(c2) || !appendIndex(c1) || !appendIndex(c0)) + faceFallbackTokenCount += 3u; + currentFaceFallbackTokenCount += 3u; + if (!appendIndex(cornerIx.z) || !appendIndex(cornerIx.y) || + !appendIndex(cornerIx.x)) return {}; + firstCorner = cornerIx.x; + previousCorner = cornerIx.z; + cornerCount = 3u; + linePtr = triLinePtr; } - else - { - const char* linePtr = lineStart + 1; - uint32_t firstCorner = 0u; - uint32_t previousCorner = 0u; - uint32_t cornerCount = 0u; - - if (parsedFirstThree) - { - uint32_t c0 = 0u; - uint32_t c1 = 0u; - uint32_t c2 = 0u; - if (!acquireCornerIndex(triIdx0, currentSmoothingGroup, c0)) - return {}; - if (!acquireCornerIndex(triIdx1, currentSmoothingGroup, c1)) - return {}; - if (!acquireCornerIndex(triIdx2, currentSmoothingGroup, c2)) - return {}; - faceFallbackTokenCount += 3u; - currentFaceFallbackTokenCount += 3u; - if (!appendIndex(c2) || !appendIndex(c1) || !appendIndex(c0)) - return {}; - firstCorner = c0; - previousCorner = c2; - cornerCount = 3u; - linePtr = triLinePtr; + + while (linePtr < lineEnd) { + while (linePtr < lineEnd && + Parse::Common::isInlineWhitespace(*linePtr)) + ++linePtr; + if (linePtr >= lineEnd) + break; + + hlsl::int32_t3 idx(-1, -1, -1); + if (!Parse::parseFaceVertexToken(linePtr, lineEnd, idx, posCount, + uvCount, normalCount)) + return {}; + ++faceFallbackTokenCount; + ++currentFaceFallbackTokenCount; + + uint32_t cornerIx = 0u; + if (!acquireCornerIndex(idx, currentSmoothingGroup, cornerIx)) + return {}; + + if (cornerCount == 0u) { + firstCorner = cornerIx; + ++cornerCount; + continue; } - while (linePtr < lineEnd) - { - while (linePtr < lineEnd && isObjInlineWhitespace(*linePtr)) - ++linePtr; - if (linePtr >= lineEnd) - break; - - int32_t idx[3] = { -1, -1, -1 }; - if (!parseObjFaceVertexTokenFast(linePtr, lineEnd, idx, posCount, uvCount, normalCount)) - return {}; - ++faceFallbackTokenCount; - ++currentFaceFallbackTokenCount; - - uint32_t cornerIx = 0u; - if (!acquireCornerIndex(idx, currentSmoothingGroup, cornerIx)) - return {}; - - if (cornerCount == 0u) - { - firstCorner = cornerIx; - ++cornerCount; - continue; - } - - if (cornerCount == 1u) - { - previousCorner = cornerIx; - ++cornerCount; - continue; - } - - if (!appendIndex(cornerIx) || !appendIndex(previousCorner) || !appendIndex(firstCorner)) - return {}; + if (cornerCount == 1u) { previousCorner = cornerIx; ++cornerCount; + continue; } + + if (!appendIndex(cornerIx) || !appendIndex(previousCorner) || + !appendIndex(firstCorner)) + return {}; + previousCorner = cornerIx; + ++cornerCount; } } } + } - if (lineTerminator >= bufEnd) - bufPtr = bufEnd; - else if (*lineTerminator == '\r' && (lineTerminator + 1) < bufEnd && lineTerminator[1] == '\n') - bufPtr = lineTerminator + 2; - else - bufPtr = lineTerminator + 1; + if (lineTerminator >= bufEnd) + bufPtr = bufEnd; + else if (*lineTerminator == '\r' && (lineTerminator + 1) < bufEnd && + lineTerminator[1] == '\n') + bufPtr = lineTerminator + 2; + else + bufPtr = lineTerminator + 1; } if (!finalizeCurrentGeometry()) return {}; @@ -1000,99 +921,74 @@ asset::SAssetBundle COBJMeshFileLoader::loadAsset(system::IFile* _file, const as uint64_t outIndexCount = 0ull; uint64_t faceFastTokenCountSum = 0ull; uint64_t faceFallbackTokenCountSum = 0ull; - for (const auto& loaded : loadedGeometries) - { + for (const auto& loaded : loadedGeometries) { const auto& posView = loaded.geometry->getPositionView(); - outVertexCount += static_cast(posView ? posView.getElementCount() : 0ull); + outVertexCount += + static_cast(posView ? posView.getElementCount() : 0ull); const auto& indexView = loaded.geometry->getIndexView(); - outIndexCount += static_cast(indexView ? indexView.getElementCount() : 0ull); + outIndexCount += + static_cast(indexView ? indexView.getElementCount() : 0ull); faceFastTokenCountSum += loaded.faceFastTokenCount; faceFallbackTokenCountSum += loaded.faceFallbackTokenCount; } - if (SInterchangeIO::isTinyIOTelemetryLikely(ioTelemetry, static_cast(filesize), _params.ioPolicy)) - { - _params.logger.log( - "OBJ loader tiny-io guard: file=%s reads=%llu min=%llu avg=%llu", - system::ILogger::ELL_WARNING, - _file->getFileName().string().c_str(), - static_cast(ioTelemetry.callCount), - static_cast(ioTelemetry.getMinOrZero()), - static_cast(ioTelemetry.getAvgOrZero())); - } - - const bool buildCollections = sawObjectDirective || sawGroupDirective || loadedGeometries.size() > 1ull; - if (!buildCollections) - { - _params.logger.log( - "OBJ loader stats: file=%s in(v=%llu n=%llu uv=%llu) out(v=%llu idx=%llu faces=%llu face_fast_tokens=%llu face_fallback_tokens=%llu geometries=%llu objects=%llu io_reads=%llu io_min_read=%llu io_avg_read=%llu io_req=%s io_eff=%s io_chunk=%llu io_reason=%s", - system::ILogger::ELL_PERFORMANCE, - _file->getFileName().string().c_str(), - static_cast(positions.size()), - static_cast(normals.size()), - static_cast(uvs.size()), - static_cast(outVertexCount), - static_cast(outIndexCount), - static_cast(faceCount), - static_cast(faceFastTokenCountSum), - static_cast(faceFallbackTokenCountSum), - static_cast(loadedGeometries.size()), - 1ull, - static_cast(ioTelemetry.callCount), - static_cast(ioTelemetry.getMinOrZero()), - static_cast(ioTelemetry.getAvgOrZero()), - system::to_string(_params.ioPolicy.strategy).c_str(), - system::to_string(ioPlan.strategy).c_str(), - static_cast(ioPlan.chunkSizeBytes()), - ioPlan.reason); + impl::SIODiagnostics::logTinyIO(_params.logger, "OBJ loader", _file->getFileName().string().c_str(), ioTelemetry, static_cast(filesize), _params.ioPolicy, "reads"); + const bool buildCollections = + sawObjectDirective || sawGroupDirective || loadedGeometries.size() > 1ull; + core::vector> outputAssets; + uint64_t objectCount = 1ull; + if (!buildCollections) { // Plain OBJ is still just one polygon geometry here. - return SAssetBundle(core::smart_refctd_ptr(), { core::smart_refctd_ptr_static_cast(std::move(loadedGeometries.front().geometry)) }); - } - - // Plain OBJ can group many polygon geometries with `o` and `g`, but it still does not define - // a real scene graph, instancing, or node transforms. Keep that as geometry collections instead - // of fabricating an ICPUScene on load. - core::vector objectNames; - core::vector> objectCollections; - for (auto& loaded : loadedGeometries) - { - size_t objectIx = objectNames.size(); - for (size_t i = 0ull; i < objectNames.size(); ++i) - { - if (objectNames[i] == loaded.objectName) - { - objectIx = i; - break; + outputAssets.push_back(core::smart_refctd_ptr_static_cast( + std::move(loadedGeometries.front().geometry))); + } else { + // Plain OBJ can group many polygon geometries with `o` and `g`, but it + // still does not define a real scene graph, instancing, or node transforms. + // Keep that as geometry collections instead of fabricating an ICPUScene on + // load. + core::vector objectNames; + core::vector> + objectCollections; + for (auto& loaded : loadedGeometries) { + size_t objectIx = objectNames.size(); + for (size_t i = 0ull; i < objectNames.size(); ++i) { + if (objectNames[i] == loaded.objectName) { + objectIx = i; + break; + } } - } - if (objectIx == objectNames.size()) - { - objectNames.push_back(loaded.objectName); - auto collection = core::make_smart_refctd_ptr(); - if (!collection) + if (objectIx == objectNames.size()) { + objectNames.push_back(loaded.objectName); + auto collection = core::make_smart_refctd_ptr(); + if (!collection) + return {}; + objectCollections.push_back(std::move(collection)); + } + + auto* refs = objectCollections[objectIx]->getGeometries(); + if (!refs) return {}; - objectCollections.push_back(std::move(collection)); - } - auto* refs = objectCollections[objectIx]->getGeometries(); - if (!refs) - return {}; + IGeometryCollection::SGeometryReference ref = {}; + ref.geometry = core::smart_refctd_ptr_static_cast>( + loaded.geometry); + refs->push_back(std::move(ref)); + } - IGeometryCollection::SGeometryReference ref = {}; - ref.geometry = core::smart_refctd_ptr_static_cast>(loaded.geometry); - refs->push_back(std::move(ref)); + outputAssets.reserve(objectCollections.size()); + for (auto& collection : objectCollections) + outputAssets.push_back( + core::smart_refctd_ptr_static_cast(std::move(collection))); + objectCount = outputAssets.size(); } - core::vector> collectionAssets; - collectionAssets.reserve(objectCollections.size()); - for (auto& collection : objectCollections) - collectionAssets.push_back(core::smart_refctd_ptr_static_cast(std::move(collection))); - _params.logger.log( - "OBJ loader stats: file=%s in(v=%llu n=%llu uv=%llu) out(v=%llu idx=%llu faces=%llu face_fast_tokens=%llu face_fallback_tokens=%llu geometries=%llu objects=%llu io_reads=%llu io_min_read=%llu io_avg_read=%llu io_req=%s io_eff=%s io_chunk=%llu io_reason=%s", - system::ILogger::ELL_PERFORMANCE, - _file->getFileName().string().c_str(), + "OBJ loader stats: file=%s in(v=%llu n=%llu uv=%llu) out(v=%llu idx=%llu " + "faces=%llu face_fast_tokens=%llu face_fallback_tokens=%llu " + "geometries=%llu objects=%llu io_reads=%llu io_min_read=%llu " + "io_avg_read=%llu io_req=%s io_eff=%s io_chunk=%llu io_reason=%s", + system::ILogger::ELL_PERFORMANCE, _file->getFileName().string().c_str(), static_cast(positions.size()), static_cast(normals.size()), static_cast(uvs.size()), @@ -1102,16 +998,16 @@ asset::SAssetBundle COBJMeshFileLoader::loadAsset(system::IFile* _file, const as static_cast(faceFastTokenCountSum), static_cast(faceFallbackTokenCountSum), static_cast(loadedGeometries.size()), - static_cast(collectionAssets.size()), + static_cast(objectCount), static_cast(ioTelemetry.callCount), static_cast(ioTelemetry.getMinOrZero()), static_cast(ioTelemetry.getAvgOrZero()), system::to_string(_params.ioPolicy.strategy).c_str(), system::to_string(ioPlan.strategy).c_str(), - static_cast(ioPlan.chunkSizeBytes()), - ioPlan.reason); + static_cast(ioPlan.chunkSizeBytes()), ioPlan.reason); - return SAssetBundle(core::smart_refctd_ptr(), std::move(collectionAssets)); + return SAssetBundle(core::smart_refctd_ptr(), + std::move(outputAssets)); } } diff --git a/src/nbl/asset/interchange/COBJMeshWriter.cpp b/src/nbl/asset/interchange/COBJMeshWriter.cpp index 8e4f1a9f79..1c088a99e0 100644 --- a/src/nbl/asset/interchange/COBJMeshWriter.cpp +++ b/src/nbl/asset/interchange/COBJMeshWriter.cpp @@ -3,20 +3,24 @@ // For conditions of distribution and use, see copyright notice in nabla.h #include "nbl/asset/interchange/COBJMeshWriter.h" +#include "SOBJPolygonGeometryAuxLayout.h" +#include "nbl/asset/interchange/SGeometryViewDecode.h" #include "nbl/asset/interchange/SGeometryWriterCommon.h" #include "nbl/asset/interchange/SInterchangeIO.h" +#include "impl/SFileAccess.h" +#include "impl/SIODiagnostics.h" #include "nbl/builtin/hlsl/array_accessors.hlsl" #include "nbl/builtin/hlsl/vector_utils/vector_traits.hlsl" -#include "SOBJPolygonGeometryAuxLayout.h" #ifdef _NBL_COMPILE_WITH_OBJ_WRITER_ #include "nbl/system/IFile.h" #include +#include #include -#include #include +#include #include #include @@ -52,78 +56,80 @@ writer_flags_t COBJMeshWriter::getForcedFlags() return EWF_NONE; } -namespace obj_writer_detail +namespace { -constexpr size_t ApproxObjBytesPerVertex = 96ull; -constexpr size_t ApproxObjBytesPerFace = 48ull; -constexpr size_t MaxFloatTextChars = std::numeric_limits::max_digits10 + 8ull; -constexpr size_t MaxUInt32Chars = std::numeric_limits::digits10 + 1ull; -constexpr size_t MaxIndexTokenBytes = MaxUInt32Chars * 3ull + 2ull; - -struct SIndexStringRef +struct Parse { - uint32_t offset = 0u; - uint16_t length = 0u; -}; + static constexpr size_t MaxFloatTextChars = std::numeric_limits::max_digits10 + 8ull; + static constexpr size_t MaxUInt32Chars = std::numeric_limits::digits10 + 1ull; + static constexpr size_t MaxIndexTokenBytes = MaxUInt32Chars * 3ull + 2ull; -bool decodeVec4(const ICPUPolygonGeometry::SDataView& view, const size_t ix, hlsl::float64_t4& out) -{ - out = hlsl::float64_t4(0.0, 0.0, 0.0, 0.0); - return view.decodeElement(ix, out); -} + struct IndexStringRef + { + uint32_t offset = 0u; + uint16_t length = 0u; + }; -template -void appendVecLine(std::string& out, const char* prefix, const size_t prefixSize, const Vec& values) -{ - constexpr size_t N = hlsl::vector_traits::Dimension; - const size_t oldSize = out.size(); - out.resize(oldSize + prefixSize + (N * MaxFloatTextChars) + N); - char* const lineBegin = out.data() + oldSize; - char* cursor = lineBegin; - char* const lineEnd = out.data() + out.size(); - hlsl::array_get getter; - - std::memcpy(cursor, prefix, prefixSize); - cursor += prefixSize; - - for (size_t i = 0ull; i < N; ++i) + struct GeometryTransformState { - cursor = SGeometryWriterCommon::appendFloatToBuffer(cursor, lineEnd, getter(values, static_cast(i))); - if (cursor < lineEnd) - *(cursor++) = (i + 1ull < N) ? ' ' : '\n'; - } + hlsl::float32_t3x4 transform; + hlsl::float32_t3x3 linear; + bool identity = true; + bool reverseWinding = false; + hlsl::math::linalg::cofactors_base normalTransform; + }; - out.resize(oldSize + static_cast(cursor - lineBegin)); -} + template + static void appendVecLine(std::string& out, const char* prefix, const size_t prefixSize, const Vec& values) + { + constexpr size_t N = hlsl::vector_traits::Dimension; + const size_t oldSize = out.size(); + out.resize(oldSize + prefixSize + (N * MaxFloatTextChars) + N); + char* const lineBegin = out.data() + oldSize; + char* cursor = lineBegin; + char* const lineEnd = out.data() + out.size(); + hlsl::array_get getter; + + std::memcpy(cursor, prefix, prefixSize); + cursor += prefixSize; + + for (size_t i = 0ull; i < N; ++i) + { + cursor = SGeometryWriterCommon::appendFloatToBuffer(cursor, lineEnd, getter(values, static_cast(i))); + if (cursor < lineEnd) + *(cursor++) = (i + 1ull < N) ? ' ' : '\n'; + } -void appendFaceLine(std::string& out, const std::string& storage, const core::vector& refs, const uint32_t i0, const uint32_t i1, const uint32_t i2) -{ - const auto& ref0 = refs[i0]; - const auto& ref1 = refs[i1]; - const auto& ref2 = refs[i2]; - const size_t oldSize = out.size(); - const size_t lineSize = 2ull + static_cast(ref0.length) + 1ull + static_cast(ref1.length) + 1ull + static_cast(ref2.length) + 1ull; - out.resize(oldSize + lineSize); - char* cursor = out.data() + oldSize; - *(cursor++) = 'f'; - *(cursor++) = ' '; - std::memcpy(cursor, storage.data() + ref0.offset, ref0.length); - cursor += ref0.length; - *(cursor++) = ' '; - std::memcpy(cursor, storage.data() + ref1.offset, ref1.length); - cursor += ref1.length; - *(cursor++) = ' '; - std::memcpy(cursor, storage.data() + ref2.offset, ref2.length); - cursor += ref2.length; - *(cursor++) = '\n'; -} + out.resize(oldSize + static_cast(cursor - lineBegin)); + } -void appendIndexTokenToStorage(std::string& storage, core::vector& refs, const uint32_t positionIx, const bool hasUVs, const uint32_t uvIx, const bool hasNormals, const uint32_t normalIx) -{ - SIndexStringRef ref = {}; - ref.offset = static_cast(storage.size()); + static void appendFaceLine(std::string& out, const std::string& storage, const core::vector& refs, const hlsl::uint32_t3& face) + { + const auto& ref0 = refs[face.x]; + const auto& ref1 = refs[face.y]; + const auto& ref2 = refs[face.z]; + const size_t oldSize = out.size(); + const size_t lineSize = 2ull + static_cast(ref0.length) + 1ull + static_cast(ref1.length) + 1ull + static_cast(ref2.length) + 1ull; + out.resize(oldSize + lineSize); + char* cursor = out.data() + oldSize; + *(cursor++) = 'f'; + *(cursor++) = ' '; + std::memcpy(cursor, storage.data() + ref0.offset, ref0.length); + cursor += ref0.length; + *(cursor++) = ' '; + std::memcpy(cursor, storage.data() + ref1.offset, ref1.length); + cursor += ref1.length; + *(cursor++) = ' '; + std::memcpy(cursor, storage.data() + ref2.offset, ref2.length); + cursor += ref2.length; + *(cursor++) = '\n'; + } + + static void appendIndexToken(std::string& storage, core::vector& refs, const uint32_t positionIx, const bool hasUVs, const uint32_t uvIx, const bool hasNormals, const uint32_t normalIx) { + IndexStringRef ref = {}; + ref.offset = static_cast(storage.size()); const size_t oldSize = storage.size(); storage.resize(oldSize + MaxIndexTokenBytes); char* const token = storage.data() + oldSize; @@ -144,64 +150,43 @@ void appendIndexTokenToStorage(std::string& storage, core::vector(cursor - token)); + ref.length = static_cast(storage.size() - ref.offset); + refs.push_back(ref); } - ref.length = static_cast(storage.size() - ref.offset); - refs.push_back(ref); -} -void appendObjectHeader(std::string& out, const SGeometryWriterCommon::SPolygonGeometryWriteItem& item) -{ - char name[128] = {}; - if (item.instanceIx != ~0u) - std::snprintf(name, sizeof(name), "o instance_%u_target_%u_geometry_%u\n", item.instanceIx, item.targetIx, item.geometryIx); - else - std::snprintf(name, sizeof(name), "o geometry_%u\n", item.geometryIx); - out.append(name); -} + static void appendHeader(std::string& out, const SGeometryWriterCommon::SPolygonGeometryWriteItem& item) + { + std::array name = {}; + if (item.instanceIx != ~0u) + std::snprintf(name.data(), name.size(), "o instance_%u_target_%u_geometry_%u\n", item.instanceIx, item.targetIx, item.geometryIx); + else + std::snprintf(name.data(), name.size(), "o geometry_%u\n", item.geometryIx); + out.append(name.data()); + } -struct SGeometryTransformState -{ - hlsl::float32_t3x4 transform; - hlsl::float32_t3x3 linear; - bool identity = true; - bool reverseWinding = false; - hlsl::math::linalg::cofactors_base normalTransform; -}; + static GeometryTransformState createTransformState(const hlsl::float32_t3x4& transform) + { + const auto linear = hlsl::float32_t3x3(transform); + return {.transform = transform, .linear = linear, .identity = SGeometryWriterCommon::isIdentityTransform(transform), .reverseWinding = hlsl::determinant(linear) < 0.f, .normalTransform = hlsl::math::linalg::cofactors_base::create(linear)}; + } -inline SGeometryTransformState createTransformState(const hlsl::float32_t3x4& transform) -{ - const auto linear = hlsl::float32_t3x3(transform); - return { - .transform = transform, - .linear = linear, - .identity = SGeometryWriterCommon::isIdentityTransform(transform), - .reverseWinding = hlsl::determinant(linear) < 0.f, - .normalTransform = hlsl::math::linalg::cofactors_base::create(linear) - }; -} + static hlsl::float32_t3 applyPosition(const GeometryTransformState& state, const hlsl::float32_t3& value) + { + if (state.identity) + return value; + return hlsl::mul(state.transform, hlsl::float32_t4(value.x, value.y, value.z, 1.f)); + } -inline hlsl::float32_t3 applyPositionTransform(const SGeometryTransformState& state, const hlsl::float32_t3& value) -{ - if (state.identity) - return value; - - return hlsl::float32_t3( - state.transform[0].x * value.x + state.transform[0].y * value.y + state.transform[0].z * value.z + state.transform[0].w, - state.transform[1].x * value.x + state.transform[1].y * value.y + state.transform[1].z * value.z + state.transform[1].w, - state.transform[2].x * value.x + state.transform[2].y * value.y + state.transform[2].z * value.z + state.transform[2].w - ); -} + static hlsl::float32_t3 applyNormal(const GeometryTransformState& state, const hlsl::float32_t3& value) + { + return state.identity ? value : state.normalTransform.normalTransform(value); + } +}; -inline hlsl::float32_t3 applyNormalTransform(const SGeometryTransformState& state, const hlsl::float32_t3& value) -{ - return state.identity ? value : state.normalTransform.normalTransform(value); } -} // namespace obj_writer_detail - bool COBJMeshWriter::writeAsset(system::IFile* _file, const SAssetWriteParams& _params, IAssetWriterOverride* _override) { - using namespace obj_writer_detail; SFileWriteTelemetry ioTelemetry = {}; if (!_override) @@ -210,14 +195,13 @@ bool COBJMeshWriter::writeAsset(system::IFile* _file, const SAssetWriteParams& _ if (!_file || !_params.rootAsset) return false; - // Scene input is flattened here by baking transforms and writing every collected - // polygon geometry as its own OBJ object block. + // Scene input is flattened here by baking transforms and writing every collected polygon geometry as its own OBJ object block. const auto items = SGeometryWriterCommon::collectPolygonGeometryWriteItems(_params.rootAsset); if (items.empty()) return false; - SAssetWriteContext ctx = { _params, _file }; - system::IFile* file = _override->getOutputFile(_file, ctx, { _params.rootAsset, 0u }); + SAssetWriteContext ctx = {_params, _file}; + system::IFile* file = _override->getOutputFile(_file, ctx, {_params.rootAsset, 0u}); if (!file) return false; @@ -266,33 +250,30 @@ bool COBJMeshWriter::writeAsset(system::IFile* _file, const SAssetWriteParams& _ const auto flags = _override->getAssetWritingFlags(ctx, geom, 0u); const bool flipHandedness = !flags.hasAnyFlag(E_WRITER_FLAGS::EWF_MESH_IS_RIGHT_HANDED); - const auto transformState = createTransformState(item.transform); + const auto transformState = Parse::createTransformState(item.transform); const hlsl::float32_t3* const tightPositions = SGeometryWriterCommon::getTightView(positionView); const hlsl::float32_t3* const tightNormals = hasNormals ? SGeometryWriterCommon::getTightView(normalView) : nullptr; const hlsl::float32_t2* const tightUV = hasUVs ? SGeometryWriterCommon::getTightView(*uvView) : nullptr; if (itemIx != 0u) output.push_back('\n'); - appendObjectHeader(output, item); + Parse::appendHeader(output, item); for (size_t i = 0u; i < vertexCount; ++i) { hlsl::float32_t3 vertex = {}; if (tightPositions) - { vertex = tightPositions[i]; - } else { - if (!decodeVec4(positionView, i, tmp)) + if (!SGeometryViewDecode::decodeElement(positionView, i, tmp)) return false; vertex = hlsl::float32_t3(static_cast(tmp.x), static_cast(tmp.y), static_cast(tmp.z)); } - vertex = applyPositionTransform(transformState, vertex); + vertex = Parse::applyPosition(transformState, vertex); if (flipHandedness) vertex.x = -vertex.x; - - appendVecLine(output, "v ", sizeof("v ") - 1ull, vertex); + Parse::appendVecLine(output, "v ", sizeof("v ") - 1ull, vertex); } if (hasUVs) @@ -301,17 +282,14 @@ bool COBJMeshWriter::writeAsset(system::IFile* _file, const SAssetWriteParams& _ { hlsl::float32_t2 uv = {}; if (tightUV) - { uv = hlsl::float32_t2(tightUV[i].x, 1.f - tightUV[i].y); - } else { - if (!decodeVec4(*uvView, i, tmp)) + if (!SGeometryViewDecode::decodeElement(*uvView, i, tmp)) return false; uv = hlsl::float32_t2(static_cast(tmp.x), 1.f - static_cast(tmp.y)); } - - appendVecLine(output, "vt ", sizeof("vt ") - 1ull, uv); + Parse::appendVecLine(output, "vt ", sizeof("vt ") - 1ull, uv); } } @@ -321,24 +299,21 @@ bool COBJMeshWriter::writeAsset(system::IFile* _file, const SAssetWriteParams& _ { hlsl::float32_t3 normal = {}; if (tightNormals) - { normal = tightNormals[i]; - } else { - if (!decodeVec4(normalView, i, tmp)) + if (!SGeometryViewDecode::decodeElement(normalView, i, tmp)) return false; normal = hlsl::float32_t3(static_cast(tmp.x), static_cast(tmp.y), static_cast(tmp.z)); } - normal = applyNormalTransform(transformState, normal); + normal = Parse::applyNormal(transformState, normal); if (flipHandedness) normal.x = -normal.x; - - appendVecLine(output, "vn ", sizeof("vn ") - 1ull, normal); + Parse::appendVecLine(output, "vn ", sizeof("vn ") - 1ull, normal); } } - core::vector faceIndexRefs; + core::vector faceIndexRefs; faceIndexRefs.reserve(vertexCount); std::string faceIndexStorage; faceIndexStorage.reserve(vertexCount * 24ull); @@ -347,18 +322,15 @@ bool COBJMeshWriter::writeAsset(system::IFile* _file, const SAssetWriteParams& _ const uint32_t positionIx = positionBase + static_cast(i); const uint32_t uvIx = hasUVs ? (uvBase + static_cast(i)) : 0u; const uint32_t normalIx = hasNormals ? (normalBase + static_cast(i)) : 0u; - appendIndexTokenToStorage(faceIndexStorage, faceIndexRefs, positionIx, hasUVs, uvIx, hasNormals, normalIx); + Parse::appendIndexToken(faceIndexStorage, faceIndexRefs, positionIx, hasUVs, uvIx, hasNormals, normalIx); } + const hlsl::uint32_t3 faceLimit(static_cast(faceIndexRefs.size())); - if (!SGeometryWriterCommon::visitTriangleIndices(geom, [&](const uint32_t i0, const uint32_t i1, const uint32_t i2)->bool - { - const uint32_t f0 = transformState.reverseWinding ? i0 : i2; - const uint32_t f1 = i1; - const uint32_t f2 = transformState.reverseWinding ? i2 : i0; - if (f0 >= faceIndexRefs.size() || f1 >= faceIndexRefs.size() || f2 >= faceIndexRefs.size()) + if (!SGeometryWriterCommon::visitTriangleIndices(geom, [&](const uint32_t i0, const uint32_t i1, const uint32_t i2) -> bool { + const hlsl::uint32_t3 face(transformState.reverseWinding ? i0 : i2, i1, transformState.reverseWinding ? i2 : i0); + if (hlsl::any(glm::greaterThanEqual(face, faceLimit))) return false; - - appendFaceLine(output, faceIndexStorage, faceIndexRefs, f0, f1, f2); + Parse::appendFaceLine(output, faceIndexStorage, faceIndexRefs, face); return true; })) return false; @@ -372,46 +344,23 @@ bool COBJMeshWriter::writeAsset(system::IFile* _file, const SAssetWriteParams& _ totalFaceCount += faceCount; } - const bool fileMappable = core::bitflag(file->getFlags()).hasAnyFlag(system::IFile::ECF_MAPPABLE); - const auto ioPlan = SResolvedFileIOPolicy(_params.ioPolicy, static_cast(output.size()), true, fileMappable); - if (!ioPlan.isValid()) - { - _params.logger.log("OBJ writer: invalid io policy for %s reason=%s", system::ILogger::ELL_ERROR, file->getFileName().string().c_str(), ioPlan.reason); + const auto ioPlan = impl::SFileAccess::resolvePlan(_params.ioPolicy, static_cast(output.size()), true, file); + if (impl::SIODiagnostics::logInvalidPlan(_params.logger, "OBJ writer", file->getFileName().string().c_str(), ioPlan)) return false; - } - const bool writeOk = SInterchangeIO::writeFileWithPolicy(file, ioPlan, reinterpret_cast(output.data()), output.size(), &ioTelemetry); + const bool writeOk = SInterchangeIO::writeFileWithPolicy(file, ioPlan, output.data(), output.size(), &ioTelemetry); const uint64_t ioMinWrite = ioTelemetry.getMinOrZero(); const uint64_t ioAvgWrite = ioTelemetry.getAvgOrZero(); - if (SInterchangeIO::isTinyIOTelemetryLikely(ioTelemetry, static_cast(output.size()), _params.ioPolicy)) - { - _params.logger.log( - "OBJ writer tiny-io guard: file=%s writes=%llu min=%llu avg=%llu", - system::ILogger::ELL_WARNING, - file->getFileName().string().c_str(), - static_cast(ioTelemetry.callCount), - static_cast(ioMinWrite), - static_cast(ioAvgWrite)); - } - _params.logger.log( - "OBJ writer stats: file=%s bytes=%llu vertices=%llu faces=%llu geometries=%llu io_writes=%llu io_min_write=%llu io_avg_write=%llu io_req=%s io_eff=%s io_chunk=%llu io_reason=%s", - system::ILogger::ELL_PERFORMANCE, - file->getFileName().string().c_str(), - static_cast(output.size()), - static_cast(totalVertexCount), - static_cast(totalFaceCount), - static_cast(items.size()), - static_cast(ioTelemetry.callCount), - static_cast(ioMinWrite), - static_cast(ioAvgWrite), - system::to_string(_params.ioPolicy.strategy).c_str(), - system::to_string(ioPlan.strategy).c_str(), - static_cast(ioPlan.chunkSizeBytes()), - ioPlan.reason); + impl::SIODiagnostics::logTinyIO(_params.logger, "OBJ writer", file->getFileName().string().c_str(), ioTelemetry, static_cast(output.size()), _params.ioPolicy, "writes"); + _params.logger.log("OBJ writer stats: file=%s bytes=%llu vertices=%llu faces=%llu geometries=%llu io_writes=%llu io_min_write=%llu io_avg_write=%llu io_req=%s io_eff=%s io_chunk=%llu io_reason=%s", + system::ILogger::ELL_PERFORMANCE, file->getFileName().string().c_str(), static_cast(output.size()), + static_cast(totalVertexCount), static_cast(totalFaceCount), static_cast(items.size()), + static_cast(ioTelemetry.callCount), static_cast(ioMinWrite), static_cast(ioAvgWrite), + system::to_string(_params.ioPolicy.strategy).c_str(), system::to_string(ioPlan.strategy).c_str(), static_cast(ioPlan.chunkSizeBytes()), ioPlan.reason); return writeOk; } -} // namespace nbl::asset +} #endif // _NBL_COMPILE_WITH_OBJ_WRITER_ diff --git a/src/nbl/asset/interchange/CPLYMeshFileLoader.cpp b/src/nbl/asset/interchange/CPLYMeshFileLoader.cpp index cdb40e28ab..c5d2f11217 100644 --- a/src/nbl/asset/interchange/CPLYMeshFileLoader.cpp +++ b/src/nbl/asset/interchange/CPLYMeshFileLoader.cpp @@ -5,2112 +5,1994 @@ #ifdef _NBL_COMPILE_WITH_PLY_LOADER_ #include "CPLYMeshFileLoader.h" +#include "SPLYPolygonGeometryAuxLayout.h" +#include "impl/SBinaryData.h" +#include "impl/SFileAccess.h" +#include "impl/SIODiagnostics.h" +#include "impl/STextParse.h" +#include "nbl/asset/IAssetManager.h" #include "nbl/asset/interchange/SGeometryContentHash.h" #include "nbl/asset/interchange/SGeometryLoaderCommon.h" #include "nbl/asset/interchange/SInterchangeIO.h" #include "nbl/asset/interchange/SLoaderRuntimeTuning.h" -#include "nbl/asset/IAssetManager.h" #include "nbl/asset/metadata/CPLYMetadata.h" #include "nbl/builtin/hlsl/array_accessors.hlsl" #include "nbl/builtin/hlsl/shapes/AABBAccumulator.hlsl" #include "nbl/builtin/hlsl/vector_utils/vector_traits.hlsl" #include "nbl/core/hash/blake.h" -#include "nbl/system/ISystem.h" #include "nbl/system/IFile.h" -#include "SPLYPolygonGeometryAuxLayout.h" - -#include +#include "nbl/system/ISystem.h" namespace nbl::asset { -CPLYMeshFileLoader::CPLYMeshFileLoader() = default; - -const char** CPLYMeshFileLoader::getAssociatedFileExtensions() const -{ - static const char* ext[] = { "ply", nullptr }; - return ext; -} - -bool CPLYMeshFileLoader::isALoadableFileFormat(system::IFile* _file, const system::logger_opt_ptr) const +namespace { - char buf[128]; - - system::IFile::success_t success; - _file->read(success, buf, 0, sizeof(buf)); - if (!success) - return false; - - const std::string_view fileHeader(buf, success.getBytesProcessed()); - auto trimWhitespace = [](std::string_view line) -> std::string_view - { - const auto isWhitespace = [](const char c) -> bool { return c == ' ' || c == '\t' || c == '\r'; }; - while (!line.empty() && isWhitespace(line.front())) - line.remove_prefix(1ull); - while (!line.empty() && isWhitespace(line.back())) - line.remove_suffix(1ull); - return line; - }; - - size_t lineStart = 0ull; - const size_t firstLineEnd = fileHeader.find('\n'); - std::string_view firstLine = fileHeader.substr(0ull, firstLineEnd); - firstLine = trimWhitespace(firstLine); - if (firstLine != "ply") - return false; - if (firstLineEnd == std::string_view::npos) - return false; - lineStart = firstLineEnd + 1ull; - - constexpr std::array headers = { "format ascii 1.0", "format binary_little_endian 1.0", "format binary_big_endian 1.0" }; - while (lineStart < fileHeader.size()) - { - size_t lineEnd = fileHeader.find('\n', lineStart); - if (lineEnd == std::string_view::npos) - lineEnd = fileHeader.size(); - std::string_view line = trimWhitespace(fileHeader.substr(lineStart, lineEnd - lineStart)); - if (line.starts_with("format ")) - return std::find(headers.begin(), headers.end(), line) != headers.end(); - lineStart = lineEnd + 1ull; - } - - return false; -} -const auto plyByteswap = [](const auto value) +struct Parse { - auto retval = value; - const auto* it = reinterpret_cast(&value); - std::reverse_copy(it, it + sizeof(retval), reinterpret_cast(&retval)); - return retval; -}; - -inline std::string_view plyToStringView(const char* text) -{ - return text ? std::string_view{ text } : std::string_view{}; -} - -struct SContext -{ - static constexpr uint64_t ReadWindowPaddingBytes = 1ull; - - // - struct SProperty - { - static E_FORMAT getType(const char* typeString) - { - struct STypeAlias - { - std::string_view name; - E_FORMAT format; - }; - constexpr std::array typeAliases = {{ - { "char", EF_R8_SINT }, - { "int8", EF_R8_SINT }, - { "uchar", EF_R8_UINT }, - { "uint8", EF_R8_UINT }, - { "short", EF_R16_SINT }, - { "int16", EF_R16_SINT }, - { "ushort", EF_R16_UINT }, - { "uint16", EF_R16_UINT }, - { "long", EF_R32_SINT }, - { "int", EF_R32_SINT }, - { "int32", EF_R32_SINT }, - { "ulong", EF_R32_UINT }, - { "uint", EF_R32_UINT }, - { "uint32", EF_R32_UINT }, - { "float", EF_R32_SFLOAT }, - { "float32", EF_R32_SFLOAT } - }}; - const std::string_view typeName = plyToStringView(typeString); - for (const auto& alias : typeAliases) - { - if (alias.name == typeName) - return alias.format; - } - if (typeName == "double" || typeName == "float64") - return EF_R64_SFLOAT; - return EF_UNKNOWN; - } - - bool isList() const {return type==EF_UNKNOWN && asset::isIntegerFormat(list.countType) && asset::isIntegerFormat(list.itemType);} - - void skip(SContext& _ctx) const - { - if (isList()) - { - int32_t count = _ctx.getInt(list.countType); - - for (decltype(count) i=0; i Properties; - // The number of elements in the file - size_t Count; - // known size in bytes, 0 if unknown - uint32_t KnownSize; - }; - - static constexpr size_t DefaultIoReadWindowBytes = 50ull << 10; - - void init(size_t _ioReadWindowSize = DefaultIoReadWindowBytes) - { - ioReadWindowSize = std::max(_ioReadWindowSize, DefaultIoReadWindowBytes); - Buffer.resize(ioReadWindowSize + ReadWindowPaddingBytes, '\0'); - EndPointer = StartPointer = Buffer.data(); - LineEndPointer = EndPointer-1; - - fillBuffer(); - } - - // gets more data from the file - void fillBuffer() - { - if (EndOfFile) - return; - else if (fileOffset>=inner.mainFile->getSize()) - { - EndOfFile = true; - return; - } - - const auto length = std::distance(StartPointer,EndPointer); - auto newStart = Buffer.data(); - // copy the remaining data to the start of the buffer - if (length && StartPointer!=newStart) - memmove(newStart,StartPointer,length); - // reset start position - StartPointer = newStart; - EndPointer = newStart+length; - - // read data from the file - const size_t usableBufferSize = Buffer.size() > 0ull ? Buffer.size() - ReadWindowPaddingBytes : 0ull; - if (usableBufferSize <= length) - { - EndOfFile = true; - return; - } - const size_t requestSize = usableBufferSize - length; - system::IFile::success_t success; - inner.mainFile->read(success,EndPointer,fileOffset,requestSize); - const size_t bytesRead = success.getBytesProcessed(); - ++readCallCount; - readBytesTotal += bytesRead; - if (bytesRead < readMinBytes) - readMinBytes = bytesRead; - fileOffset += bytesRead; - EndPointer += bytesRead; - - // if we didn't completely fill the buffer - if (bytesRead!=requestSize) - { - // cauterize the string - *EndPointer = 0; - EndOfFile = true; - } - } - // Split the string data into a line in place by terminating it instead of copying. - const char* getNextLine() - { - // move the start pointer along - StartPointer = LineEndPointer+1; - - // crlf split across buffer move - if (*StartPointer=='\n') - *(StartPointer++) = '\0'; - - // begin at the start of the next line - const std::array Terminators = { '\0','\r','\n'}; - auto terminator = std::find_first_of(StartPointer,EndPointer,Terminators.begin(),Terminators.end()); - if (terminator!=EndPointer) - *(terminator++) = '\0'; - - // we have reached the end of the buffer - if (terminator==EndPointer) - { - // get data from the file - if (EndOfFile) - { - StartPointer = EndPointer-1; - *StartPointer = '\0'; - return StartPointer; - } - else - { - fillBuffer(); - // reset line end pointer - LineEndPointer = StartPointer-1; - if (StartPointer!=EndPointer) - return getNextLine(); - else - return StartPointer; - } - } - else - { - LineEndPointer = terminator-1; - WordLength = -1; - // return pointer to the start of the line - return StartPointer; - } - } - // null terminate the next word on the previous line and move the next word pointer along - // since we already have a full line in the buffer, we never need to retrieve more data - const char* getNextWord() - { - // move the start pointer along - StartPointer += WordLength + 1; - if (StartPointer >= EndPointer) - { - if (EndOfFile) - { - WordLength = -1; - return EndPointer; - } - getNextLine(); - } - - if (StartPointer < EndPointer && !*StartPointer) - getNextLine(); - - if (StartPointer >= LineEndPointer) - { - WordLength = -1; // - return StartPointer; - } - // process the next word - { - assert(LineEndPointer<=EndPointer); - const std::array WhiteSpace = {'\0',' ','\t'}; - auto wordEnd = std::find_first_of(StartPointer,LineEndPointer,WhiteSpace.begin(),WhiteSpace.end()); - // null terminate the next word - if (wordEnd!=LineEndPointer) - *(wordEnd++) = '\0'; - // find next word - auto notWhiteSpace = [WhiteSpace](const char c)->bool - { - return std::find(WhiteSpace.begin(),WhiteSpace.end(),c)==WhiteSpace.end(); - }; - auto nextWord = std::find_if(wordEnd,LineEndPointer,notWhiteSpace); - WordLength = std::distance(StartPointer,nextWord)-1; - } - // return pointer to the start of current word - return StartPointer; - } - size_t getAbsoluteOffset(const char* ptr) const - { - if (!ptr || ptr > EndPointer) - return fileOffset; - const size_t trailingBytes = static_cast(EndPointer - ptr); - return fileOffset >= trailingBytes ? (fileOffset - trailingBytes) : 0ull; - } - void useMappedBinaryWindow(const char* data, const size_t sizeBytes) - { - if (!data) - return; - StartPointer = const_cast(data); - EndPointer = StartPointer + sizeBytes; - LineEndPointer = StartPointer - 1; - WordLength = -1; - EndOfFile = true; - fileOffset = inner.mainFile ? inner.mainFile->getSize() : fileOffset; - } - // skips x bytes in the file, getting more data if required - void moveForward(const size_t bytes) - { - assert(IsBinaryFile); - size_t remaining = bytes; - if (remaining == 0ull) - return; - - const size_t availableInitially = EndPointer > StartPointer ? static_cast(EndPointer - StartPointer) : 0ull; - if (remaining > availableInitially) - { - remaining -= availableInitially; - StartPointer = EndPointer; - if (remaining > ioReadWindowSize) - { - const size_t fileSize = inner.mainFile->getSize(); - const size_t fileRemaining = fileSize > fileOffset ? (fileSize - fileOffset) : 0ull; - const size_t directSkip = std::min(remaining, fileRemaining); - fileOffset += directSkip; - remaining -= directSkip; - } - } - - while (remaining) - { - if (StartPointer >= EndPointer) - { - fillBuffer(); - if (StartPointer >= EndPointer) - return; - } - - const size_t available = static_cast(EndPointer - StartPointer); - const size_t step = std::min(available, remaining); - StartPointer += step; - remaining -= step; - } - } - - // read the next int from the file and move the start pointer along - using widest_int_t = uint32_t; - widest_int_t getInt(const E_FORMAT f) - { - assert(!isFloatingPointFormat(f)); - if (IsBinaryFile) - { - if (StartPointer+sizeof(widest_int_t)>EndPointer) - fillBuffer(); + using Binary = impl::BinaryData; + using Common = impl::TextParse; - switch (getTexelOrBlockBytesize(f)) - { - case 1: - if (StartPointer+sizeof(int8_t)>EndPointer) - break; - return *(StartPointer++); - case 2: - { - if (StartPointer+sizeof(int16_t)>EndPointer) - break; - auto retval = *(reinterpret_cast(StartPointer)++); - if (IsWrongEndian) - retval = plyByteswap(retval); - return retval; - } - case 4: - { - if (StartPointer+sizeof(int32_t)>EndPointer) - break; - auto retval = *(reinterpret_cast(StartPointer)++); - if (IsWrongEndian) - retval = plyByteswap(retval); - return retval; - } - default: - assert(false); - break; - } - return 0; - } - const char* word = getNextWord(); - if (!word) - return 0u; - const size_t tokenLen = WordLength >= 0 ? static_cast(WordLength + 1) : std::char_traits::length(word); - const char* const wordEnd = word + tokenLen; - if (word == wordEnd) - return 0u; - - if (isSignedFormat(f)) - { - int64_t value = 0; - const auto parseResult = std::from_chars(word, wordEnd, value, 10); - if (parseResult.ec == std::errc() && parseResult.ptr == wordEnd) - return static_cast(value); - if (parseResult.ptr != word) - return static_cast(value); - return 0u; - } - else - { - uint64_t value = 0u; - const auto parseResult = std::from_chars(word, wordEnd, value, 10); - if (parseResult.ec == std::errc() && parseResult.ptr == wordEnd) - return static_cast(value); - if (parseResult.ptr != word) - return static_cast(value); - return 0u; - } - } - // read the next float from the file and move the start pointer along - hlsl::float64_t getFloat(const E_FORMAT f) - { - assert(isFloatingPointFormat(f)); - if (IsBinaryFile) - { - if (StartPointer+sizeof(hlsl::float64_t)>EndPointer) - fillBuffer(); - - switch (getTexelOrBlockBytesize(f)) - { - case 4: - { - if (StartPointer+sizeof(hlsl::float32_t)>EndPointer) - break; - auto retval = *(reinterpret_cast(StartPointer)++); - if (IsWrongEndian) - retval = plyByteswap(retval); - return retval; - } - case 8: - { - if (StartPointer+sizeof(hlsl::float64_t)>EndPointer) - break; - auto retval = *(reinterpret_cast(StartPointer)++); - if (IsWrongEndian) - retval = plyByteswap(retval); - return retval; - } - default: - assert(false); - break; - } - return 0; - } - const char* word = getNextWord(); - if (!word) - return 0.0; - const size_t tokenLen = WordLength >= 0 ? static_cast(WordLength + 1) : std::char_traits::length(word); - const char* const wordEnd = word + tokenLen; - if (word == wordEnd) - return 0.0; - - hlsl::float64_t value = 0.0; - const auto parseResult = fast_float::from_chars(word, wordEnd, value); - if (parseResult.ec == std::errc() && parseResult.ptr == wordEnd) - return value; - if (parseResult.ptr != word) - return value; - return 0.0; - } - // read the next thing from the file and move the start pointer along - void getData(void* dst, const E_FORMAT f) - { - const auto size = getTexelOrBlockBytesize(f); - if (StartPointer+size>EndPointer) - { - fillBuffer(); - if (StartPointer+size>EndPointer) - return; - } - if (IsWrongEndian) - std::reverse_copy(StartPointer,StartPointer+size,reinterpret_cast(dst)); - else - memcpy(dst,StartPointer,size); - StartPointer += size; - } - struct SVertAttrIt - { - uint8_t* ptr; - uint32_t stride; - E_FORMAT dstFmt; - }; - enum class EFastVertexReadResult : uint8_t - { - NotApplicable, - Success, - Error - }; - EFastVertexReadResult readVertexElementFast(const SElement& el, hlsl::shapes::util::AABBAccumulator3* parsedAABB) + static std::string_view toStringView(const char* text) { - if (!IsBinaryFile || el.Name != "vertex") - return EFastVertexReadResult::NotApplicable; - struct SLayoutDesc - { - uint32_t propertyCount; - uint32_t srcBytesPerVertex; - bool hasNormals; - bool hasUVs; - }; - - auto allF32 = [&el]()->bool - { - for (const auto& prop : el.Properties) - { - if (prop.type != EF_R32_SFLOAT) - return false; - } - return true; - }; - if (!allF32()) - return EFastVertexReadResult::NotApplicable; - - auto matchNames = [&el](std::initializer_list names)->bool - { - if (el.Properties.size() != names.size()) - return false; - size_t i = 0ull; - for (const auto* name : names) - { - if (el.Properties[i].Name != name) - return false; - ++i; - } - return true; - }; - static constexpr SLayoutDesc xyz = { 3u, sizeof(hlsl::float32_t) * 3u, false, false }; - static constexpr SLayoutDesc xyz_n = { 6u, sizeof(hlsl::float32_t) * 6u, true, false }; - static constexpr SLayoutDesc xyz_n_uv = { 8u, sizeof(hlsl::float32_t) * 8u, true, true }; - const SLayoutDesc* layout = nullptr; - if (matchNames({ "x", "y", "z" })) - layout = &xyz; - else if (matchNames({ "x", "y", "z", "nx", "ny", "nz" })) - layout = &xyz_n; - else if (matchNames({ "x", "y", "z", "nx", "ny", "nz", "u", "v" }) || matchNames({ "x", "y", "z", "nx", "ny", "nz", "s", "t" })) - layout = &xyz_n_uv; - if (!layout) - return EFastVertexReadResult::NotApplicable; - - const size_t floatBytes = sizeof(hlsl::float32_t); - auto validateTuple = [&](const size_t beginIx, const size_t componentCount, uint32_t& outStride, uint8_t*& outBase)->bool - { - if (beginIx + componentCount > vertAttrIts.size()) - return false; - auto& first = vertAttrIts[beginIx]; - if (!first.ptr || first.dstFmt != EF_R32_SFLOAT) - return false; - outStride = first.stride; - outBase = first.ptr; - for (size_t c = 1ull; c < componentCount; ++c) - { - auto& it = vertAttrIts[beginIx + c]; - if (!it.ptr || it.dstFmt != EF_R32_SFLOAT) - return false; - if (it.stride != outStride) - return false; - if (it.ptr != outBase + c * floatBytes) - return false; - } - return true; - }; - - uint32_t posStride = 0u; - uint32_t normalStride = 0u; - uint32_t uvStride = 0u; - uint8_t* posBase = nullptr; - uint8_t* normalBase = nullptr; - uint8_t* uvBase = nullptr; - if (vertAttrIts.size() != layout->propertyCount || !validateTuple(0u, 3u, posStride, posBase) || (layout->hasNormals && !validateTuple(3u, 3u, normalStride, normalBase)) || (layout->hasUVs && !validateTuple(6u, 2u, uvStride, uvBase))) - return EFastVertexReadResult::NotApplicable; - if (el.Count > (std::numeric_limits::max() / layout->srcBytesPerVertex)) - return EFastVertexReadResult::Error; - - const bool trackAABB = parsedAABB != nullptr; - const bool needsByteSwap = IsWrongEndian; - auto decodeF32 = [needsByteSwap](const uint8_t* src)->float - { - uint32_t bits = 0u; - std::memcpy(&bits, src, sizeof(bits)); - if (needsByteSwap) - bits = plyByteswap(bits); - float value = 0.f; - std::memcpy(&value, &bits, sizeof(value)); - return value; - }; - auto decodeVector = [&](const uint8_t* src)->Vec - { - constexpr uint32_t N = hlsl::vector_traits::Dimension; - Vec value{}; - hlsl::array_set setter; - for (uint32_t i = 0u; i < N; ++i) - setter(value, i, decodeF32(src + static_cast(i) * floatBytes)); - return value; - }; - auto storeVector = [](uint8_t* dst, const Vec& value) -> void - { - constexpr uint32_t N = hlsl::vector_traits::Dimension; - hlsl::array_get getter; - auto* const out = reinterpret_cast(dst); - for (uint32_t i = 0u; i < N; ++i) - out[i] = getter(value, i); - }; - auto advanceTuple = [&](const uint32_t beginIx, const uint32_t componentCount, const size_t advance) -> void - { - for (uint32_t i = 0u; i < componentCount; ++i) - vertAttrIts[beginIx + i].ptr += advance; - }; - - size_t remainingVertices = el.Count; - while (remainingVertices > 0ull) - { - if (StartPointer + layout->srcBytesPerVertex > EndPointer) - fillBuffer(); - const size_t available = EndPointer > StartPointer ? static_cast(EndPointer - StartPointer) : 0ull; - if (available < layout->srcBytesPerVertex) - return EFastVertexReadResult::Error; - - const size_t batchVertices = std::min(remainingVertices, available / layout->srcBytesPerVertex); - const uint8_t* src = reinterpret_cast(StartPointer); - if (!layout->hasNormals && !layout->hasUVs && posStride == 3ull * floatBytes && !needsByteSwap && !trackAABB) - { - const size_t batchBytes = batchVertices * 3ull * floatBytes; - std::memcpy(posBase, src, batchBytes); - src += batchBytes; - posBase += batchBytes; - } - else - { - for (size_t v = 0ull; v < batchVertices; ++v) - { - const hlsl::float32_t3 position = decodeVector.operator()(src); - storeVector.operator()(posBase, position); - if (trackAABB) - hlsl::shapes::util::extendAABBAccumulator(*parsedAABB, position); - src += 3ull * floatBytes; - posBase += posStride; - if (layout->hasNormals) - { - storeVector.operator()(normalBase, decodeVector.operator()(src)); - src += 3ull * floatBytes; - normalBase += normalStride; - } - if (layout->hasUVs) - { - storeVector.operator()(uvBase, decodeVector.operator()(src)); - src += 2ull * floatBytes; - uvBase += uvStride; - } - } - } - - const size_t consumed = batchVertices * layout->srcBytesPerVertex; - StartPointer += consumed; - remainingVertices -= batchVertices; - } - - advanceTuple(0u, 3u, el.Count * posStride); - if (layout->hasNormals) - advanceTuple(3u, 3u, el.Count * normalStride); - if (layout->hasUVs) - advanceTuple(6u, 2u, el.Count * uvStride); - return EFastVertexReadResult::Success; + return text ? std::string_view{text} : std::string_view{}; } - void readVertex(const IAssetLoader::SAssetLoadParams& _params, const SElement& el) - { - assert(el.Name=="vertex"); - assert(el.Properties.size()==vertAttrIts.size()); - if (!IsBinaryFile) - getNextLine(); - for (size_t j=0; j& _outIndices, uint32_t& _maxIndex, const uint32_t vertexCount) + struct Context { - if (!IsBinaryFile) - getNextLine(); - const bool hasVertexCount = vertexCount != 0u; + static constexpr uint64_t ReadWindowPaddingBytes = 1ull; - for (const auto& prop : Element.Properties) + struct SProperty { - if (prop.isList() && (prop.Name=="vertex_indices" || prop.Name == "vertex_index")) + static E_FORMAT getType(const char* typeString) { - const uint32_t count = getInt(prop.list.countType); - const auto srcIndexFmt = prop.list.itemType; - if (count < 3u) + struct STypeAlias { - for (uint32_t j = 0u; j < count; ++j) - getInt(srcIndexFmt); - continue; - } - if (count > 3u) - _outIndices.reserve(_outIndices.size() + static_cast(count - 2u) * 3ull); - auto emitFan = [&_outIndices, &_maxIndex, hasVertexCount, vertexCount](auto&& readIndex, const uint32_t faceVertexCount)->bool - { - uint32_t i0 = readIndex(); - uint32_t i1 = readIndex(); - uint32_t i2 = readIndex(); - if (hasVertexCount) - { - if (i0 >= vertexCount || i1 >= vertexCount || i2 >= vertexCount) - return false; - } - else - { - _maxIndex = std::max(_maxIndex, std::max(i0, std::max(i1, i2))); - } - _outIndices.push_back(i0); - _outIndices.push_back(i1); - _outIndices.push_back(i2); - uint32_t prev = i2; - for (uint32_t j = 3u; j < faceVertexCount; ++j) - { - const uint32_t idx = readIndex(); - if (hasVertexCount) - { - if (idx >= vertexCount) - return false; - } - else - { - _maxIndex = std::max(_maxIndex, idx); - } - _outIndices.push_back(i0); - _outIndices.push_back(prev); - _outIndices.push_back(idx); - prev = idx; - } - return true; + std::string_view name; + E_FORMAT format; }; - - if (IsBinaryFile && !IsWrongEndian && srcIndexFmt == EF_R32_UINT) - { - const size_t bytesNeeded = static_cast(count) * sizeof(uint32_t); - if (StartPointer + bytesNeeded > EndPointer) - fillBuffer(); - if (StartPointer + bytesNeeded <= EndPointer) - { - const uint8_t* ptr = reinterpret_cast(StartPointer); - auto readIndex = [&ptr]() -> uint32_t - { - uint32_t v = 0u; - std::memcpy(&v, ptr, sizeof(v)); - ptr += sizeof(v); - return v; - }; - if (!emitFan(readIndex, count)) - return false; - StartPointer = reinterpret_cast(const_cast(ptr)); - continue; - } - } - else if (IsBinaryFile && !IsWrongEndian && srcIndexFmt == EF_R16_UINT) - { - const size_t bytesNeeded = static_cast(count) * sizeof(uint16_t); - if (StartPointer + bytesNeeded > EndPointer) - fillBuffer(); - if (StartPointer + bytesNeeded <= EndPointer) - { - const uint8_t* ptr = reinterpret_cast(StartPointer); - auto readIndex = [&ptr]() -> uint32_t - { - uint16_t v = 0u; - std::memcpy(&v, ptr, sizeof(v)); - ptr += sizeof(v); - return static_cast(v); - }; - if (!emitFan(readIndex, count)) - return false; - StartPointer = reinterpret_cast(const_cast(ptr)); - continue; - } - } - - auto readIndex = [&]() -> uint32_t - { - return static_cast(getInt(srcIndexFmt)); - }; - if (!emitFan(readIndex, count)) - return false; - } - else if (prop.Name == "intensity") - { - // todo: face intensity - prop.skip(*this); - } - else - prop.skip(*this); - } - return true; - } - - enum class EFastFaceReadResult : uint8_t - { - NotApplicable, - Success, - Error - }; - - EFastFaceReadResult readFaceElementFast( - const SElement& element, - core::vector& _outIndices, - uint32_t& _maxIndex, - uint64_t& _faceCount, - const uint32_t vertexCount, - const bool computeIndexHash, - core::blake3_hash_t& outIndexHash) - { - if (!IsBinaryFile) - return EFastFaceReadResult::NotApplicable; - if (element.Properties.size() != 1u) - return EFastFaceReadResult::NotApplicable; - - const auto& prop = element.Properties[0]; - if (!prop.isList() || (prop.Name != "vertex_indices" && prop.Name != "vertex_index")) - return EFastFaceReadResult::NotApplicable; - if (prop.list.countType != EF_R8_UINT) - return EFastFaceReadResult::NotApplicable; - - const E_FORMAT srcIndexFmt = prop.list.itemType; - const bool isSrcU32 = srcIndexFmt == EF_R32_UINT; - const bool isSrcS32 = srcIndexFmt == EF_R32_SINT; - const bool isSrcU16 = srcIndexFmt == EF_R16_UINT; - const bool isSrcS16 = srcIndexFmt == EF_R16_SINT; - if (!isSrcU32 && !isSrcS32 && !isSrcU16 && !isSrcS16) - return EFastFaceReadResult::NotApplicable; - - const bool is32Bit = isSrcU32 || isSrcS32; - const bool needEndianSwap = IsWrongEndian; - const size_t indexSize = is32Bit ? sizeof(uint32_t) : sizeof(uint16_t); - const bool hasVertexCount = vertexCount != 0u; - const bool trackMaxIndex = !hasVertexCount; - outIndexHash = IPreHashed::INVALID_HASH; - const size_t minTriangleRecordSize = sizeof(uint8_t) + indexSize * 3u; - if (element.Count > (std::numeric_limits::max() / minTriangleRecordSize)) - return EFastFaceReadResult::Error; - const size_t minBytesNeeded = element.Count * minTriangleRecordSize; - if (StartPointer + minBytesNeeded <= EndPointer) - { - if (element.Count > (std::numeric_limits::max() / 3u)) - return EFastFaceReadResult::Error; - const size_t triIndices = element.Count * 3u; - if (_outIndices.size() > (std::numeric_limits::max() - triIndices)) - return EFastFaceReadResult::Error; - const size_t oldSize = _outIndices.size(); - const uint32_t oldMaxIndex = _maxIndex; - _outIndices.resize(oldSize + triIndices); - uint32_t* out = _outIndices.data() + oldSize; - const uint8_t* ptr = reinterpret_cast(StartPointer); - auto readU32 = [needEndianSwap](const uint8_t* src)->uint32_t - { - uint32_t value = 0u; - std::memcpy(&value, src, sizeof(value)); - if (needEndianSwap) - value = plyByteswap(value); - return value; - }; - auto readU16 = [needEndianSwap](const uint8_t* src)->uint16_t - { - uint16_t value = 0u; - std::memcpy(&value, src, sizeof(value)); - if (needEndianSwap) - value = plyByteswap(value); - return value; - }; - bool fallbackToGeneric = false; - if (is32Bit) - { - const size_t hw = SLoaderRuntimeTuner::resolveHardwareThreads(); - const size_t hardMaxWorkers = SLoaderRuntimeTuner::resolveHardMaxWorkers(hw, inner.params.ioPolicy.runtimeTuning.workerHeadroom); - const size_t recordBytes = sizeof(uint8_t) + 3ull * sizeof(uint32_t); - SLoaderRuntimeTuningRequest faceTuningRequest = {}; - faceTuningRequest.inputBytes = minBytesNeeded; - faceTuningRequest.totalWorkUnits = element.Count; - faceTuningRequest.minBytesPerWorker = recordBytes; - faceTuningRequest.hardwareThreads = static_cast(hw); - faceTuningRequest.hardMaxWorkers = static_cast(hardMaxWorkers); - faceTuningRequest.targetChunksPerWorker = inner.params.ioPolicy.runtimeTuning.targetChunksPerWorker; - faceTuningRequest.sampleData = ptr; - faceTuningRequest.sampleBytes = SLoaderRuntimeTuner::resolveSampleBytes(inner.params.ioPolicy, minBytesNeeded); - const auto faceTuning = SLoaderRuntimeTuner::tune(inner.params.ioPolicy, faceTuningRequest); - size_t workerCount = std::min(faceTuning.workerCount, element.Count); - if (workerCount > 1ull) + constexpr std::array typeAliases = {{ + {"char", EF_R8_SINT}, + {"int8", EF_R8_SINT}, + {"uchar", EF_R8_UINT}, + {"uint8", EF_R8_UINT}, + {"short", EF_R16_SINT}, + {"int16", EF_R16_SINT}, + {"ushort", EF_R16_UINT}, + {"uint16", EF_R16_UINT}, + {"long", EF_R32_SINT}, + {"int", EF_R32_SINT}, + {"int32", EF_R32_SINT}, + {"ulong", EF_R32_UINT}, + {"uint", EF_R32_UINT}, + {"uint32", EF_R32_UINT}, + {"float", EF_R32_SFLOAT}, + {"float32", EF_R32_SFLOAT} + }}; + const std::string_view typeName = Parse::toStringView(typeString); + for (const auto& alias : typeAliases) { - const bool needMax = trackMaxIndex; - const bool validateAgainstVertexCount = hasVertexCount; - std::vector workerNonTriangle(workerCount, 0u); - std::vector workerInvalid(workerCount, 0u); - std::vector workerMax(needMax ? workerCount : 0ull, 0u); - const bool hashInParsePipeline = computeIndexHash; - std::vector workerReady(hashInParsePipeline ? workerCount : 0ull, 0u); - std::vector workerHashable(hashInParsePipeline ? workerCount : 0ull, 1u); - std::atomic_bool hashPipelineOk = true; - core::blake3_hash_t parsedIndexHash = IPreHashed::INVALID_HASH; - std::jthread hashThread; - if (hashInParsePipeline) - { - hashThread = std::jthread([&]() - { - try - { - core::blake3_hasher hasher; - for (size_t workerIx = 0ull; workerIx < workerCount; ++workerIx) - { - auto ready = std::atomic_ref(workerReady[workerIx]); - while (ready.load(std::memory_order_acquire) == 0u) - ready.wait(0u, std::memory_order_acquire); - if (workerHashable[workerIx] == 0u) - { - hashPipelineOk.store(false, std::memory_order_relaxed); - return; - } - - const size_t begin = (element.Count * workerIx) / workerCount; - const size_t end = (element.Count * (workerIx + 1ull)) / workerCount; - const size_t faceCount = end - begin; - hasher.update(out + begin * 3ull, faceCount * 3ull * sizeof(uint32_t)); - } - parsedIndexHash = static_cast(hasher); - } - catch (...) - { - hashPipelineOk.store(false, std::memory_order_relaxed); - } - }); - } - auto parseChunk = [&](const size_t workerIx, const size_t beginFace, const size_t endFace) -> void - { - const uint8_t* in = ptr + beginFace * recordBytes; - uint32_t* outLocal = out + beginFace * 3ull; - uint32_t localMax = 0u; - for (size_t faceIx = beginFace; faceIx < endFace; ++faceIx) - { - if (*in != 3u) - { - workerNonTriangle[workerIx] = 1u; - if (hashInParsePipeline) - workerHashable[workerIx] = 0u; - break; - } - ++in; - const uint32_t i0 = readU32(in + 0ull * sizeof(uint32_t)); - const uint32_t i1 = readU32(in + 1ull * sizeof(uint32_t)); - const uint32_t i2 = readU32(in + 2ull * sizeof(uint32_t)); - outLocal[0] = i0; - outLocal[1] = i1; - outLocal[2] = i2; - const uint32_t triOr = (i0 | i1 | i2); - if (isSrcS32 && (triOr & 0x80000000u)) - { - workerInvalid[workerIx] = 1u; - if (hashInParsePipeline) - workerHashable[workerIx] = 0u; - break; - } - if (validateAgainstVertexCount) - { - if (i0 >= vertexCount || i1 >= vertexCount || i2 >= vertexCount) - { - workerInvalid[workerIx] = 1u; - if (hashInParsePipeline) - workerHashable[workerIx] = 0u; - break; - } - } - else if (needMax) - { - if (i0 > localMax) localMax = i0; - if (i1 > localMax) localMax = i1; - if (i2 > localMax) localMax = i2; - } - in += 3ull * sizeof(uint32_t); - outLocal += 3ull; - } - if (needMax) - workerMax[workerIx] = localMax; - if (hashInParsePipeline) - { - auto ready = std::atomic_ref(workerReady[workerIx]); - ready.store(1u, std::memory_order_release); - ready.notify_one(); - } - }; - SLoaderRuntimeTuner::dispatchWorkers(workerCount, [&](const size_t workerIx) { const size_t begin = (element.Count * workerIx) / workerCount; const size_t end = (element.Count * (workerIx + 1ull)) / workerCount; parseChunk(workerIx, begin, end); }); - if (hashThread.joinable()) - hashThread.join(); - - const bool anyNonTriangle = std::any_of(workerNonTriangle.begin(), workerNonTriangle.end(), [](const uint8_t v) { return v != 0u; }); - if (anyNonTriangle) - { - _outIndices.resize(oldSize); - _maxIndex = oldMaxIndex; - return EFastFaceReadResult::NotApplicable; - } - const bool anyInvalid = std::any_of(workerInvalid.begin(), workerInvalid.end(), [](const uint8_t v) { return v != 0u; }); - if (anyInvalid) - { - _outIndices.resize(oldSize); - _maxIndex = oldMaxIndex; - return EFastFaceReadResult::Error; - } - if (trackMaxIndex) - { - for (const uint32_t local : workerMax) - if (local > _maxIndex) - _maxIndex = local; - } - if (hashInParsePipeline && hashPipelineOk.load(std::memory_order_relaxed)) - outIndexHash = parsedIndexHash; - - StartPointer = reinterpret_cast(const_cast(ptr + element.Count * recordBytes)); - _faceCount += element.Count; - return EFastFaceReadResult::Success; + if (alias.name == typeName) + return alias.format; } + if (typeName == "double" || typeName == "float64") + return EF_R64_SFLOAT; + return EF_UNKNOWN; } - if (is32Bit) - { - if (isSrcU32) - { - if (trackMaxIndex) - { - for (size_t j = 0u; j < element.Count; ++j) - { - const uint8_t c = *ptr++; - if (c != 3u) - { - fallbackToGeneric = true; - break; - } - out[0] = readU32(ptr + 0ull * sizeof(uint32_t)); - out[1] = readU32(ptr + 1ull * sizeof(uint32_t)); - out[2] = readU32(ptr + 2ull * sizeof(uint32_t)); - ptr += 3ull * sizeof(uint32_t); - if (out[0] > _maxIndex) _maxIndex = out[0]; - if (out[1] > _maxIndex) _maxIndex = out[1]; - if (out[2] > _maxIndex) _maxIndex = out[2]; - out += 3; - } - } - else - { - for (size_t j = 0u; j < element.Count; ++j) - { - const uint8_t c = *ptr++; - if (c != 3u) - { - fallbackToGeneric = true; - break; - } - out[0] = readU32(ptr + 0ull * sizeof(uint32_t)); - out[1] = readU32(ptr + 1ull * sizeof(uint32_t)); - out[2] = readU32(ptr + 2ull * sizeof(uint32_t)); - ptr += 3ull * sizeof(uint32_t); - if (out[0] >= vertexCount || out[1] >= vertexCount || out[2] >= vertexCount) - return EFastFaceReadResult::Error; - out += 3; - } - } - } - else if (trackMaxIndex) - { - for (size_t j = 0u; j < element.Count; ++j) - { - const uint8_t c = *ptr++; - if (c != 3u) - { - fallbackToGeneric = true; - break; - } - out[0] = readU32(ptr + 0ull * sizeof(uint32_t)); - out[1] = readU32(ptr + 1ull * sizeof(uint32_t)); - out[2] = readU32(ptr + 2ull * sizeof(uint32_t)); - ptr += 3ull * sizeof(uint32_t); - if ((out[0] | out[1] | out[2]) & 0x80000000u) - return EFastFaceReadResult::Error; - if (out[0] > _maxIndex) _maxIndex = out[0]; - if (out[1] > _maxIndex) _maxIndex = out[1]; - if (out[2] > _maxIndex) _maxIndex = out[2]; - out += 3; - } - } - else - { - for (size_t j = 0u; j < element.Count; ++j) - { - const uint8_t c = *ptr++; - if (c != 3u) - { - fallbackToGeneric = true; - break; - } - out[0] = readU32(ptr + 0ull * sizeof(uint32_t)); - out[1] = readU32(ptr + 1ull * sizeof(uint32_t)); - out[2] = readU32(ptr + 2ull * sizeof(uint32_t)); - ptr += 3ull * sizeof(uint32_t); - const uint32_t triOr = (out[0] | out[1] | out[2]); - if (triOr & 0x80000000u) - return EFastFaceReadResult::Error; - if (out[0] >= vertexCount || out[1] >= vertexCount || out[2] >= vertexCount) - return EFastFaceReadResult::Error; - out += 3; - } - } - } - else + bool isList() const { - if (isSrcU16) - { - if (trackMaxIndex) - { - for (size_t j = 0u; j < element.Count; ++j) - { - const uint8_t c = *ptr++; - if (c != 3u) - { - fallbackToGeneric = true; - break; - } - out[0] = readU16(ptr + 0ull * sizeof(uint16_t)); - out[1] = readU16(ptr + 1ull * sizeof(uint16_t)); - out[2] = readU16(ptr + 2ull * sizeof(uint16_t)); - ptr += 3ull * sizeof(uint16_t); - if (out[0] > _maxIndex) _maxIndex = out[0]; - if (out[1] > _maxIndex) _maxIndex = out[1]; - if (out[2] > _maxIndex) _maxIndex = out[2]; - out += 3; - } - } - else - { - for (size_t j = 0u; j < element.Count; ++j) - { - const uint8_t c = *ptr++; - if (c != 3u) - { - fallbackToGeneric = true; - break; - } - out[0] = readU16(ptr + 0ull * sizeof(uint16_t)); - out[1] = readU16(ptr + 1ull * sizeof(uint16_t)); - out[2] = readU16(ptr + 2ull * sizeof(uint16_t)); - ptr += 3ull * sizeof(uint16_t); - if (out[0] >= vertexCount || out[1] >= vertexCount || out[2] >= vertexCount) - return EFastFaceReadResult::Error; - out += 3; - } - } - } - else if (trackMaxIndex) - { - for (size_t j = 0u; j < element.Count; ++j) - { - const uint8_t c = *ptr++; - if (c != 3u) - { - fallbackToGeneric = true; - break; - } - const uint16_t t0 = readU16(ptr + 0ull * sizeof(uint16_t)); - const uint16_t t1 = readU16(ptr + 1ull * sizeof(uint16_t)); - const uint16_t t2 = readU16(ptr + 2ull * sizeof(uint16_t)); - ptr += 3ull * sizeof(uint16_t); - if ((t0 | t1 | t2) & 0x8000u) - return EFastFaceReadResult::Error; - out[0] = static_cast(t0); - out[1] = static_cast(t1); - out[2] = static_cast(t2); - if (out[0] > _maxIndex) _maxIndex = out[0]; - if (out[1] > _maxIndex) _maxIndex = out[1]; - if (out[2] > _maxIndex) _maxIndex = out[2]; - out += 3; - } - } - else - { - for (size_t j = 0u; j < element.Count; ++j) - { - const uint8_t c = *ptr++; - if (c != 3u) - { - fallbackToGeneric = true; - break; - } - const uint16_t t0 = readU16(ptr + 0ull * sizeof(uint16_t)); - const uint16_t t1 = readU16(ptr + 1ull * sizeof(uint16_t)); - const uint16_t t2 = readU16(ptr + 2ull * sizeof(uint16_t)); - ptr += 3ull * sizeof(uint16_t); - if ((t0 | t1 | t2) & 0x8000u) - return EFastFaceReadResult::Error; - out[0] = static_cast(t0); - out[1] = static_cast(t1); - out[2] = static_cast(t2); - if (out[0] >= vertexCount || out[1] >= vertexCount || out[2] >= vertexCount) - return EFastFaceReadResult::Error; - out += 3; - } - } + return type == EF_UNKNOWN && asset::isIntegerFormat(list.countType) && asset::isIntegerFormat(list.itemType); } - if (!fallbackToGeneric) + void skip(Context& _ctx) const { - StartPointer = reinterpret_cast(const_cast(ptr)); - _faceCount += element.Count; - return EFastFaceReadResult::Success; - } - - _outIndices.resize(oldSize); - _maxIndex = oldMaxIndex; - } - - if (element.Count > (std::numeric_limits::max() / 3u)) - return EFastFaceReadResult::Error; - const size_t reserveCount = element.Count * 3u; - if (_outIndices.size() > (std::numeric_limits::max() - reserveCount)) - return EFastFaceReadResult::Error; - _outIndices.reserve(_outIndices.size() + reserveCount); - auto ensureBytes = [this](const size_t bytes)->bool - { - if (StartPointer + bytes > EndPointer) - fillBuffer(); - return StartPointer + bytes <= EndPointer; - }; - auto readCount = [&ensureBytes, this](int32_t& outCount)->bool - { - if (!ensureBytes(sizeof(uint8_t))) - return false; - outCount = static_cast(*StartPointer++); - return true; - }; - auto readIndex = [&ensureBytes, this, is32Bit, isSrcU32, isSrcU16, needEndianSwap](uint32_t& out)->bool - { - if (is32Bit) - { - if (!ensureBytes(sizeof(uint32_t))) - return false; - if (isSrcU32) + if (isList()) { - std::memcpy(&out, StartPointer, sizeof(uint32_t)); - if (needEndianSwap) - out = plyByteswap(out); + int32_t count = _ctx.getInt(list.countType); + for (decltype(count) i = 0; i < count; ++i) + _ctx.getInt(list.itemType); } + else if (_ctx.IsBinaryFile) + _ctx.moveForward(getTexelOrBlockBytesize(type)); else - { - int32_t v = 0; - std::memcpy(&v, StartPointer, sizeof(v)); - if (needEndianSwap) - v = plyByteswap(v); - if (v < 0) - return false; - out = static_cast(v); - } - StartPointer += sizeof(uint32_t); - return true; + _ctx.getNextWord(); } - if (!ensureBytes(sizeof(uint16_t))) - return false; - if (isSrcU16) + std::string Name; + E_FORMAT type; + struct SListTypes { - uint16_t v = 0u; - std::memcpy(&v, StartPointer, sizeof(uint16_t)); - if (needEndianSwap) - v = plyByteswap(v); - out = v; - } - else - { - int16_t v = 0; - std::memcpy(&v, StartPointer, sizeof(int16_t)); - if (needEndianSwap) - v = plyByteswap(v); - if (v < 0) - return false; - out = static_cast(v); - } - StartPointer += sizeof(uint16_t); - return true; + E_FORMAT countType; + E_FORMAT itemType; + } list; }; - for (size_t j = 0u; j < element.Count; ++j) - { - int32_t countSigned = 0; - if (!readCount(countSigned)) - return EFastFaceReadResult::Error; - const uint32_t count = static_cast(countSigned); - if (count < 3u) - { - uint32_t dummy = 0u; - for (uint32_t k = 0u; k < count; ++k) - { - if (!readIndex(dummy)) - return EFastFaceReadResult::Error; - } - ++_faceCount; - continue; - } - - uint32_t i0 = 0u; - uint32_t i1 = 0u; - uint32_t i2 = 0u; - if (!readIndex(i0) || !readIndex(i1) || !readIndex(i2)) - return EFastFaceReadResult::Error; - - if (trackMaxIndex) - { - _maxIndex = std::max(_maxIndex, std::max(i0, std::max(i1, i2))); - } - else if (i0 >= vertexCount || i1 >= vertexCount || i2 >= vertexCount) - { - return EFastFaceReadResult::Error; - } - _outIndices.push_back(i0); - _outIndices.push_back(i1); - _outIndices.push_back(i2); - - uint32_t prev = i2; - for (uint32_t k = 3u; k < count; ++k) - { - uint32_t idx = 0u; - if (!readIndex(idx)) - return EFastFaceReadResult::Error; - if (trackMaxIndex) - { - _maxIndex = std::max(_maxIndex, idx); - } - else if (idx >= vertexCount) - { - return EFastFaceReadResult::Error; - } - _outIndices.push_back(i0); - _outIndices.push_back(prev); - _outIndices.push_back(idx); - prev = idx; - } - - ++_faceCount; - } - - return EFastFaceReadResult::Success; - } - - IAssetLoader::SAssetLoadContext inner; - uint32_t topHierarchyLevel; - IAssetLoader::IAssetLoaderOverride* loaderOverride; - // input buffer must be at least twice as long as the longest line in the file - core::vector Buffer; - size_t ioReadWindowSize = DefaultIoReadWindowBytes; - core::vector ElementList = {}; - char* StartPointer = nullptr, *EndPointer = nullptr, *LineEndPointer = nullptr; - int32_t LineLength = 0; - int32_t WordLength = -1; // this variable is a misnomer, its really the offset to next word minus one - bool IsBinaryFile = false, IsWrongEndian = false, EndOfFile = false; - size_t fileOffset = {}; - uint64_t readCallCount = 0ull; - uint64_t readBytesTotal = 0ull; - uint64_t readMinBytes = std::numeric_limits::max(); - // - core::vector vertAttrIts; -}; - -//! creates/loads an animated mesh from the file. -SAssetBundle CPLYMeshFileLoader::loadAsset(system::IFile* _file, const IAssetLoader::SAssetLoadParams& _params, IAssetLoader::IAssetLoaderOverride* _override, uint32_t _hierarchyLevel) -{ - using namespace nbl::core; - if (!_file) - return {}; - - const bool computeContentHashes = !_params.loaderFlags.hasAnyFlag(IAssetLoader::ELPF_DONT_COMPUTE_CONTENT_HASHES); - uint64_t faceCount = 0u; - uint64_t fastFaceElementCount = 0u; - uint64_t fastVertexElementCount = 0u; - uint32_t maxIndexRead = 0u; - core::blake3_hash_t precomputedIndexHash = IPreHashed::INVALID_HASH; - const uint64_t fileSize = _file->getSize(); - const bool hashInBuild = computeContentHashes && SLoaderRuntimeTuner::shouldInlineHashBuild(_params.ioPolicy, fileSize); - const bool fileMappable = core::bitflag(_file->getFlags()).hasAnyFlag(system::IFile::ECF_MAPPABLE); - const auto ioPlan = SResolvedFileIOPolicy(_params.ioPolicy, fileSize, true, fileMappable); - if (!ioPlan.isValid()) - { - _params.logger.log("PLY loader: invalid io policy for %s reason=%s", system::ILogger::ELL_ERROR, _file->getFileName().string().c_str(), ioPlan.reason); - return {}; - } - - SContext ctx = { - asset::IAssetLoader::SAssetLoadContext{ - _params, - _file - }, - _hierarchyLevel, - _override - }; - uint64_t desiredReadWindow = ioPlan.strategy == SResolvedFileIOPolicy::Strategy::WholeFile ? (fileSize + SContext::ReadWindowPaddingBytes) : ioPlan.chunkSizeBytes(); - if (ioPlan.strategy == SResolvedFileIOPolicy::Strategy::WholeFile) - { - const bool mappedInput = static_cast(_file)->getMappedPointer() != nullptr; - if (mappedInput && fileSize > (SContext::DefaultIoReadWindowBytes * 2ull)) - desiredReadWindow = SContext::DefaultIoReadWindowBytes; - } - const uint64_t safeReadWindow = std::min(desiredReadWindow, static_cast(std::numeric_limits::max() - SContext::ReadWindowPaddingBytes)); - ctx.init(static_cast(safeReadWindow)); - - // start with empty mesh - auto geometry = make_smart_refctd_ptr(); - hlsl::shapes::util::AABBAccumulator3 parsedAABB = hlsl::shapes::util::createAABBAccumulator(); - uint32_t vertCount=0; - core::vector> hashedBuffers; - std::jthread deferredPositionHashThread; - auto hashBufferIfNeeded = [&](ICPUBuffer* buffer)->void - { - if (!hashInBuild || !buffer) - return; - for (const auto& hashed : hashedBuffers) + struct SElement { - if (hashed.get() == buffer) - return; - } - buffer->setContentHash(buffer->computeContentHash()); - hashedBuffers.push_back(core::smart_refctd_ptr(buffer)); - }; - auto tryLaunchDeferredHash = [&](const IGeometry::SDataView& view, std::jthread& deferredThread)->void - { - if (!computeContentHashes || hashInBuild || !view || !view.src.buffer) - return; - if (deferredThread.joinable()) - return; - if (view.src.buffer->getContentHash() != IPreHashed::INVALID_HASH) - return; - auto keepAlive = core::smart_refctd_ptr(view.src.buffer); - deferredThread = std::jthread([buffer = std::move(keepAlive)]() mutable - { - buffer->setContentHash(buffer->computeContentHash()); - }); - }; - auto hashViewBufferIfNeeded = [&](const IGeometry::SDataView& view)->void - { - if (!view || !view.src.buffer) - return; - hashBufferIfNeeded(view.src.buffer.get()); - }; - auto hashRemainingGeometryBuffers = [&]()->void - { - if (!hashInBuild) - return; - hashViewBufferIfNeeded(geometry->getPositionView()); - hashViewBufferIfNeeded(geometry->getIndexView()); - hashViewBufferIfNeeded(geometry->getNormalView()); - for (const auto& view : *geometry->getAuxAttributeViews()) - hashViewBufferIfNeeded(view); - for (const auto& view : *geometry->getJointWeightViews()) - { - hashViewBufferIfNeeded(view.indices); - hashViewBufferIfNeeded(view.weights); - } - if (const auto jointObb = geometry->getJointOBBView(); jointObb) - hashViewBufferIfNeeded(*jointObb); - }; - - // Currently only supports ASCII or binary meshes - if (plyToStringView(ctx.getNextLine()) != "ply") - { - _params.logger.log("Not a valid PLY file %s", system::ILogger::ELL_ERROR,ctx.inner.mainFile->getFileName().string().c_str()); - return {}; - } - - // cut the next line out - ctx.getNextLine(); - // grab the word from this line - const char* word = ctx.getNextWord(); - // ignore comments - for (; plyToStringView(word) == "comment"; ctx.getNextLine()) - word = ctx.getNextWord(); - - bool readingHeader = true; - bool continueReading = true; - ctx.IsBinaryFile = false; - ctx.IsWrongEndian= false; - - do - { - const std::string_view wordView = plyToStringView(word); - if (wordView == "property") - { - word = ctx.getNextWord(); - - if (ctx.ElementList.empty()) - { - _params.logger.log("PLY property token found before element %s", system::ILogger::ELL_WARNING, word); - } - else + void skipElement(Context& _ctx) const { - // get element - auto& el = ctx.ElementList.back(); - - // fill property struct - auto& prop = el.Properties.emplace_back(); - prop.type = prop.getType(word); - if (prop.type==EF_UNKNOWN) + if (_ctx.IsBinaryFile) { - el.KnownSize = false; - - word = ctx.getNextWord(); - - prop.list.countType = prop.getType(word); - if (ctx.IsBinaryFile && !isIntegerFormat(prop.list.countType)) - { - _params.logger.log("Cannot read binary PLY file containing data types of unknown or non integer length %s", system::ILogger::ELL_WARNING, word); - continueReading = false; - } + if (KnownSize) + _ctx.moveForward(KnownSize); else - { - word = ctx.getNextWord(); - prop.list.itemType = prop.getType(word); - if (ctx.IsBinaryFile && !isIntegerFormat(prop.list.itemType)) - { - _params.logger.log("Cannot read binary PLY file containing data types of unknown or non integer length %s", system::ILogger::ELL_ERROR, word); - continueReading = false; - } - } - } - else if (ctx.IsBinaryFile && prop.type==EF_UNKNOWN) - { - _params.logger.log("Cannot read binary PLY file containing data types of unknown length %s", system::ILogger::ELL_ERROR, word); - continueReading = false; - } - else - el.KnownSize += getTexelOrBlockBytesize(prop.type); - - prop.Name = ctx.getNextWord(); - } - } - else if (wordView == "element") - { - auto& el = ctx.ElementList.emplace_back(); - el.Name = ctx.getNextWord(); - const char* const countWord = ctx.getNextWord(); - uint64_t parsedCount = 0ull; - const std::string_view countWordView = plyToStringView(countWord); - if (!countWordView.empty()) - { - const char* const countWordBegin = countWordView.data(); - const char* const countWordEnd = countWordBegin + countWordView.size(); - const auto parseResult = std::from_chars(countWordBegin, countWordEnd, parsedCount, 10); - if (!(parseResult.ec == std::errc() && parseResult.ptr == countWordEnd)) - parsedCount = 0ull; - } - el.Count = static_cast(parsedCount); - el.KnownSize = 0; - if (el.Name=="vertex") - vertCount = el.Count; - } - else if (wordView == "comment") - { - // ignore line - } - // must be `format {binary_little_endian|binary_big_endian|ascii} 1.0` - else if (wordView == "format") - { - word = ctx.getNextWord(); - const std::string_view formatView = plyToStringView(word); - - if (formatView == "binary_little_endian") - { - ctx.IsBinaryFile = true; - } - else if (formatView == "binary_big_endian") - { - ctx.IsBinaryFile = true; - ctx.IsWrongEndian = true; - } - else if (formatView == "ascii") - { - } - else - { - // abort if this isn't an ascii or a binary mesh - _params.logger.log("Unsupported PLY mesh format %s", system::ILogger::ELL_ERROR, word); - continueReading = false; - } - - if (continueReading) - { - word = ctx.getNextWord(); - if (plyToStringView(word) != "1.0") - { - _params.logger.log("Unsupported PLY mesh version %s",system::ILogger::ELL_WARNING,word); - } - } - } - else if (wordView == "end_header") - { - readingHeader = false; - if (ctx.IsBinaryFile) - { - char* const binaryStartInBuffer = ctx.LineEndPointer + 1; - const auto* const mappedBase = reinterpret_cast(static_cast(_file)->getMappedPointer()); - if (mappedBase) - { - const size_t binaryOffset = ctx.getAbsoluteOffset(binaryStartInBuffer); - const size_t remainingBytes = static_cast(binaryOffset < fileSize ? (fileSize - binaryOffset) : 0ull); - ctx.useMappedBinaryWindow(mappedBase + binaryOffset, remainingBytes); + for (auto i = 0u; i < Properties.size(); ++i) + Properties[i].skip(_ctx); } else - { - ctx.StartPointer = binaryStartInBuffer; - } - } - } - else - { - _params.logger.log("Unknown item in PLY file %s", system::ILogger::ELL_WARNING, word); - } - - if (readingHeader && continueReading) - { - ctx.getNextLine(); - word = ctx.getNextWord(); - } - } - while (readingHeader && continueReading); - // - if (!continueReading) - return {}; - - // now to read the actual data from the file - using index_t = uint32_t; - core::vector indices = {}; - - // loop through each of the elements - bool verticesProcessed = false; - - for (uint32_t i=0; i extraViews; - for (auto& vertexProperty : el.Properties) - { - const auto& propertyName = vertexProperty.Name; - // only positions and normals need to be structured/canonicalized in any way - auto negotiateFormat = [&vertexProperty](ICPUPolygonGeometry::SDataViewBase& view, const uint8_t component)->void - { - assert(getFormatChannelCount(vertexProperty.type)!=0); - if (getTexelOrBlockBytesize(vertexProperty.type)>getTexelOrBlockBytesize(view.format)) - view.format = vertexProperty.type; - view.stride = hlsl::max(view.stride,component); - }; - if (propertyName=="x") - negotiateFormat(posView,0); - else if (propertyName=="y") - negotiateFormat(posView,1); - else if (propertyName=="z") - negotiateFormat(posView,2); - else if (propertyName=="nx") - negotiateFormat(normalView,0); - else if (propertyName=="ny") - negotiateFormat(normalView,1); - else if (propertyName=="nz") - negotiateFormat(normalView,2); - else if (propertyName=="u" || propertyName=="s") - negotiateFormat(uvView,0); - else if (propertyName=="v" || propertyName=="t") - negotiateFormat(uvView,1); - else - { -// property names for extra channels are currently not persisted in metadata - extraViews.push_back(createView(vertexProperty.type,el.Count)); - } - } - auto setFinalFormat = [&ctx](ICPUPolygonGeometry::SDataViewBase& view)->void - { - const auto componentFormat = view.format; - const auto componentCount = view.stride+1; - // turn single channel format to multiple - view.format = [=]()->E_FORMAT - { - switch (view.format) - { - case EF_R8_SINT: - switch (componentCount) - { - case 1: - return EF_R8_SINT; - case 2: - return EF_R8G8_SINT; - case 3: - return EF_R8G8B8_SINT; - case 4: - return EF_R8G8B8A8_SINT; - default: - break; - } - break; - case EF_R8_UINT: - switch (componentCount) - { - case 1: - return EF_R8_UINT; - case 2: - return EF_R8G8_UINT; - case 3: - return EF_R8G8B8_UINT; - case 4: - return EF_R8G8B8A8_UINT; - default: - break; - } - break; - case EF_R16_SINT: - switch (componentCount) - { - case 1: - return EF_R16_SINT; - case 2: - return EF_R16G16_SINT; - case 3: - return EF_R16G16B16_SINT; - case 4: - return EF_R16G16B16A16_SINT; - default: - break; - } - break; - case EF_R16_UINT: - switch (componentCount) - { - case 1: - return EF_R16_UINT; - case 2: - return EF_R16G16_UINT; - case 3: - return EF_R16G16B16_UINT; - case 4: - return EF_R16G16B16A16_UINT; - default: - break; - } - break; - case EF_R32_SINT: - switch (componentCount) - { - case 1: - return EF_R32_SINT; - case 2: - return EF_R32G32_SINT; - case 3: - return EF_R32G32B32_SINT; - case 4: - return EF_R32G32B32A32_SINT; - default: - break; - } - break; - case EF_R32_UINT: - switch (componentCount) - { - case 1: - return EF_R32_UINT; - case 2: - return EF_R32G32_UINT; - case 3: - return EF_R32G32B32_UINT; - case 4: - return EF_R32G32B32A32_UINT; - default: - break; - } - break; - case EF_R32_SFLOAT: - switch (componentCount) - { - case 1: - return EF_R32_SFLOAT; - case 2: - return EF_R32G32_SFLOAT; - case 3: - return EF_R32G32B32_SFLOAT; - case 4: - return EF_R32G32B32A32_SFLOAT; - default: - break; - } - break; - case EF_R64_SFLOAT: - switch (componentCount) - { - case 1: - return EF_R64_SFLOAT; - case 2: - return EF_R64G64_SFLOAT; - case 3: - return EF_R64G64B64_SFLOAT; - case 4: - return EF_R64G64B64A64_SFLOAT; - default: - break; - } - break; - default: - break; - } - return EF_UNKNOWN; - }(); - view.stride = getTexelOrBlockBytesize(view.format); - // - for (auto c=0u; c(offset), - .stride = view.stride, - .dstFmt = componentFormat - }); - } - }; - if (posView.format!=EF_UNKNOWN) - { - auto beginIx = ctx.vertAttrIts.size(); - setFinalFormat(posView); - auto view = createView(posView.format,el.Count); - for (const auto size=ctx.vertAttrIts.size(); beginIx!=size; beginIx++) - ctx.vertAttrIts[beginIx].ptr += ptrdiff_t(view.src.buffer->getPointer())+view.src.offset; - geometry->setPositionView(std::move(view)); - } - if (normalView.format!=EF_UNKNOWN) - { - auto beginIx = ctx.vertAttrIts.size(); - setFinalFormat(normalView); - auto view = createView(normalView.format,el.Count); - for (const auto size=ctx.vertAttrIts.size(); beginIx!=size; beginIx++) - ctx.vertAttrIts[beginIx].ptr += ptrdiff_t(view.src.buffer->getPointer())+view.src.offset; - geometry->setNormalView(std::move(view)); + _ctx.getNextLine(); } - if (uvView.format!=EF_UNKNOWN) - { - auto beginIx = ctx.vertAttrIts.size(); - setFinalFormat(uvView); - auto view = createView(uvView.format,el.Count); - for (const auto size=ctx.vertAttrIts.size(); beginIx!=size; beginIx++) - ctx.vertAttrIts[beginIx].ptr += ptrdiff_t(view.src.buffer->getPointer())+view.src.offset; - auto* const auxViews = geometry->getAuxAttributeViews(); - auxViews->resize(SPLYPolygonGeometryAuxLayout::UV0 + 1u); - auxViews->operator[](SPLYPolygonGeometryAuxLayout::UV0) = std::move(view); - } - // - for (auto& view : extraViews) - ctx.vertAttrIts.push_back({ - .ptr = reinterpret_cast(view.src.buffer->getPointer())+view.src.offset, - .stride = getTexelOrBlockBytesize(view.composed.format), - .dstFmt = view.composed.format - }); - for (auto& view : extraViews) - geometry->getAuxAttributeViews()->push_back(std::move(view)); - // loop through vertex properties - const auto fastVertexResult = ctx.readVertexElementFast(el, &parsedAABB); - if (fastVertexResult == SContext::EFastVertexReadResult::Success) - { - ++fastVertexElementCount; - } - else if (fastVertexResult == SContext::EFastVertexReadResult::NotApplicable) - { - ctx.readVertex(_params,el); - } - else - { - _params.logger.log("PLY vertex fast path failed on malformed data for %s", system::ILogger::ELL_ERROR, _file->getFileName().string().c_str()); - return {}; - } - hashViewBufferIfNeeded(geometry->getPositionView()); - hashViewBufferIfNeeded(geometry->getNormalView()); - for (const auto& view : *geometry->getAuxAttributeViews()) - hashViewBufferIfNeeded(view); - tryLaunchDeferredHash(geometry->getPositionView(), deferredPositionHashThread); - verticesProcessed = true; - } - else if (el.Name=="face") - { - const uint32_t vertexCount32 = vertCount <= static_cast(std::numeric_limits::max()) ? static_cast(vertCount) : 0u; - const auto fastFaceResult = ctx.readFaceElementFast( - el, - indices, - maxIndexRead, - faceCount, - vertexCount32, - computeContentHashes && !hashInBuild, - precomputedIndexHash); - if (fastFaceResult == SContext::EFastFaceReadResult::Success) - { - ++fastFaceElementCount; - } - else if (fastFaceResult == SContext::EFastFaceReadResult::NotApplicable) - { - indices.reserve(indices.size() + el.Count * 3u); - for (size_t j=0; jgetFileName().string().c_str()); - return {}; - } - } - else - { - // skip these elements - if (ctx.IsBinaryFile && el.KnownSize) - { - const uint64_t bytesToSkip64 = static_cast(el.KnownSize) * static_cast(el.Count); - if (bytesToSkip64 > static_cast(std::numeric_limits::max())) - return {}; - ctx.moveForward(static_cast(bytesToSkip64)); - } - else - { - for (size_t j=0; japplyAABB(parsedAABB.value); - else - CPolygonGeometryManipulator::recomputeAABB(geometry.get()); + // name of the element. We only want "vertex" and "face" elements + // but we have to parse the others anyway. + std::string Name; + // Properties of this element + core::vector Properties; + // The number of elements in the file + size_t Count; + // known size in bytes, 0 if unknown + uint32_t KnownSize; + }; + + static constexpr size_t DefaultIoReadWindowBytes = 50ull << 10; + + void init(size_t _ioReadWindowSize = DefaultIoReadWindowBytes) { + ioReadWindowSize = + std::max(_ioReadWindowSize, DefaultIoReadWindowBytes); + Buffer.resize(ioReadWindowSize + ReadWindowPaddingBytes, '\0'); + EndPointer = StartPointer = Buffer.data(); + LineEndPointer = EndPointer - 1; + + fillBuffer(); + } + + // gets more data from the file + void fillBuffer() { + if (EndOfFile) + return; + else if (fileOffset >= inner.mainFile->getSize()) { + EndOfFile = true; + return; + } + + const auto length = std::distance(StartPointer, EndPointer); + auto newStart = Buffer.data(); + // copy the remaining data to the start of the buffer + if (length && StartPointer != newStart) + memmove(newStart, StartPointer, length); + // reset start position + StartPointer = newStart; + EndPointer = newStart + length; + + // read data from the file + const size_t usableBufferSize = + Buffer.size() > 0ull ? Buffer.size() - ReadWindowPaddingBytes : 0ull; + if (usableBufferSize <= length) { + EndOfFile = true; + return; + } + const size_t requestSize = usableBufferSize - length; + system::IFile::success_t success; + inner.mainFile->read(success, EndPointer, fileOffset, requestSize); + const size_t bytesRead = success.getBytesProcessed(); + ++readCallCount; + readBytesTotal += bytesRead; + if (bytesRead < readMinBytes) + readMinBytes = bytesRead; + fileOffset += bytesRead; + EndPointer += bytesRead; + + // if we didn't completely fill the buffer + if (bytesRead != requestSize) { + // cauterize the string + *EndPointer = 0; + EndOfFile = true; + } + } + // Split the string data into a line in place by terminating it instead of + // copying. + const char* getNextLine() { + // move the start pointer along + StartPointer = LineEndPointer + 1; + + // crlf split across buffer move + if (*StartPointer == '\n') + *(StartPointer++) = '\0'; + + // begin at the start of the next line + const std::array Terminators = {'\0', '\r', '\n'}; + auto terminator = std::find_first_of( + StartPointer, EndPointer, Terminators.begin(), Terminators.end()); + if (terminator != EndPointer) + *(terminator++) = '\0'; + + // we have reached the end of the buffer + if (terminator == EndPointer) { + // get data from the file + if (EndOfFile) { + StartPointer = EndPointer - 1; + *StartPointer = '\0'; + return StartPointer; + } else { + fillBuffer(); + // reset line end pointer + LineEndPointer = StartPointer - 1; + if (StartPointer != EndPointer) + return getNextLine(); + else + return StartPointer; + } + } else { + LineEndPointer = terminator - 1; + WordLength = -1; + // return pointer to the start of the line + return StartPointer; + } + } + // null terminate the next word on the previous line and move the next word + // pointer along since we already have a full line in the buffer, we never + // need to retrieve more data + const char* getNextWord() { + // move the start pointer along + StartPointer += WordLength + 1; + if (StartPointer >= EndPointer) { + if (EndOfFile) { + WordLength = -1; + return EndPointer; + } + getNextLine(); + } + + if (StartPointer < EndPointer && !*StartPointer) + getNextLine(); + + if (StartPointer >= LineEndPointer) { + WordLength = -1; // + return StartPointer; + } + // process the next word + { + assert(LineEndPointer <= EndPointer); + const std::array WhiteSpace = {'\0', ' ', '\t'}; + auto wordEnd = std::find_first_of(StartPointer, LineEndPointer, + WhiteSpace.begin(), WhiteSpace.end()); + // null terminate the next word + if (wordEnd != LineEndPointer) + *(wordEnd++) = '\0'; + // find next word + auto notWhiteSpace = [WhiteSpace](const char c) -> bool { + return std::find(WhiteSpace.begin(), WhiteSpace.end(), c) == + WhiteSpace.end(); + }; + auto nextWord = std::find_if(wordEnd, LineEndPointer, notWhiteSpace); + WordLength = std::distance(StartPointer, nextWord) - 1; + } + // return pointer to the start of current word + return StartPointer; + } + size_t getAbsoluteOffset(const char* ptr) const { + if (!ptr || ptr > EndPointer) + return fileOffset; + const size_t trailingBytes = static_cast(EndPointer - ptr); + return fileOffset >= trailingBytes ? (fileOffset - trailingBytes) : 0ull; + } + void useMappedBinaryWindow(const char* data, const size_t sizeBytes) { + if (!data) + return; + StartPointer = const_cast(data); + EndPointer = StartPointer + sizeBytes; + LineEndPointer = StartPointer - 1; + WordLength = -1; + EndOfFile = true; + fileOffset = inner.mainFile ? inner.mainFile->getSize() : fileOffset; + } + // skips x bytes in the file, getting more data if required + void moveForward(const size_t bytes) { + assert(IsBinaryFile); + size_t remaining = bytes; + if (remaining == 0ull) + return; + + const size_t availableInitially = + EndPointer > StartPointer + ? static_cast(EndPointer - StartPointer) + : 0ull; + if (remaining > availableInitially) { + remaining -= availableInitially; + StartPointer = EndPointer; + if (remaining > ioReadWindowSize) { + const size_t fileSize = inner.mainFile->getSize(); + const size_t fileRemaining = + fileSize > fileOffset ? (fileSize - fileOffset) : 0ull; + const size_t directSkip = std::min(remaining, fileRemaining); + fileOffset += directSkip; + remaining -= directSkip; + } + } + + while (remaining) { + if (StartPointer >= EndPointer) { + fillBuffer(); + if (StartPointer >= EndPointer) + return; + } + + const size_t available = static_cast(EndPointer - StartPointer); + const size_t step = std::min(available, remaining); + StartPointer += step; + remaining -= step; + } + } + + // read the next int from the file and move the start pointer along + using widest_int_t = uint32_t; + widest_int_t getInt(const E_FORMAT f) { + assert(!isFloatingPointFormat(f)); + if (IsBinaryFile) { + if (StartPointer + sizeof(widest_int_t) > EndPointer) + fillBuffer(); + + switch (getTexelOrBlockBytesize(f)) { + case 1: + if (StartPointer + sizeof(int8_t) > EndPointer) + break; + return *(StartPointer++); + case 2: { + if (StartPointer + sizeof(int16_t) > EndPointer) + break; + const auto retval = Binary::loadUnaligned(StartPointer, IsWrongEndian); + StartPointer += sizeof(int16_t); + return retval; + } + case 4: { + if (StartPointer + sizeof(int32_t) > EndPointer) + break; + const auto retval = Binary::loadUnaligned(StartPointer, IsWrongEndian); + StartPointer += sizeof(int32_t); + return retval; + } + default: + assert(false); + break; + } + return 0; + } + const char* word = getNextWord(); + if (!word) + return 0u; + const size_t tokenLen = WordLength >= 0 + ? static_cast(WordLength + 1) + : std::char_traits::length(word); + const char* const wordEnd = word + tokenLen; + if (word == wordEnd) + return 0u; + + if (isSignedFormat(f)) { + int64_t value = 0; + auto ptr = word; + if (Common::parseNumber(ptr, wordEnd, value) && + ptr == wordEnd) + return static_cast(value); + if (ptr != word) + return static_cast(value); + return 0u; + } else { + uint64_t value = 0u; + auto ptr = word; + if (Common::parseNumber(ptr, wordEnd, value) && + ptr == wordEnd) + return static_cast(value); + if (ptr != word) + return static_cast(value); + return 0u; + } + } + // read the next float from the file and move the start pointer along + hlsl::float64_t getFloat(const E_FORMAT f) { + assert(isFloatingPointFormat(f)); + if (IsBinaryFile) { + if (StartPointer + sizeof(hlsl::float64_t) > EndPointer) + fillBuffer(); + + switch (getTexelOrBlockBytesize(f)) { + case 4: { + if (StartPointer + sizeof(hlsl::float32_t) > EndPointer) + break; + const auto retval = Binary::loadUnaligned(StartPointer, IsWrongEndian); + StartPointer += sizeof(hlsl::float32_t); + return retval; + } + case 8: { + if (StartPointer + sizeof(hlsl::float64_t) > EndPointer) + break; + const auto retval = Binary::loadUnaligned(StartPointer, IsWrongEndian); + StartPointer += sizeof(hlsl::float64_t); + return retval; + } + default: + assert(false); + break; + } + return 0; + } + const char* word = getNextWord(); + if (!word) + return 0.0; + const size_t tokenLen = WordLength >= 0 + ? static_cast(WordLength + 1) + : std::char_traits::length(word); + const char* const wordEnd = word + tokenLen; + if (word == wordEnd) + return 0.0; + + hlsl::float64_t value = 0.0; + auto ptr = word; + if (Common::parseNumber(ptr, wordEnd, value) && + ptr == wordEnd) + return value; + if (ptr != word) + return value; + return 0.0; + } + // read the next thing from the file and move the start pointer along + void getData(void* dst, const E_FORMAT f) { + const auto size = getTexelOrBlockBytesize(f); + if (StartPointer + size > EndPointer) { + fillBuffer(); + if (StartPointer + size > EndPointer) + return; + } + if (IsWrongEndian) + std::reverse_copy(StartPointer, StartPointer + size, + reinterpret_cast(dst)); + else + memcpy(dst, StartPointer, size); + StartPointer += size; + } + struct SVertAttrIt { + uint8_t* ptr; + uint32_t stride; + E_FORMAT dstFmt; + }; + enum class EFastVertexReadResult : uint8_t { + NotApplicable, + Success, + Error + }; + EFastVertexReadResult readVertexElementFast( + const SElement& el, + hlsl::shapes::util::AABBAccumulator3* parsedAABB) { + if (!IsBinaryFile || el.Name != "vertex") + return EFastVertexReadResult::NotApplicable; + struct SLayoutDesc { + uint32_t propertyCount; + uint32_t srcBytesPerVertex; + bool hasNormals; + bool hasUVs; + }; + + auto allF32 = [&el]() -> bool { + for (const auto& prop : el.Properties) { + if (prop.type != EF_R32_SFLOAT) + return false; + } + return true; + }; + if (!allF32()) + return EFastVertexReadResult::NotApplicable; + + auto matchNames = + [&el](std::initializer_list names) -> bool { + if (el.Properties.size() != names.size()) + return false; + size_t i = 0ull; + for (const auto* name : names) { + if (el.Properties[i].Name != name) + return false; + ++i; + } + return true; + }; + static constexpr SLayoutDesc xyz = {3u, sizeof(hlsl::float32_t) * 3u, + false, false}; + static constexpr SLayoutDesc xyz_n = {6u, sizeof(hlsl::float32_t) * 6u, + true, false}; + static constexpr SLayoutDesc xyz_n_uv = {8u, sizeof(hlsl::float32_t) * 8u, + true, true}; + const SLayoutDesc* layout = nullptr; + if (matchNames({"x", "y", "z"})) + layout = &xyz; + else if (matchNames({"x", "y", "z", "nx", "ny", "nz"})) + layout = &xyz_n; + else if (matchNames({"x", "y", "z", "nx", "ny", "nz", "u", "v"}) || + matchNames({"x", "y", "z", "nx", "ny", "nz", "s", "t"})) + layout = &xyz_n_uv; + if (!layout) + return EFastVertexReadResult::NotApplicable; + + const size_t floatBytes = sizeof(hlsl::float32_t); + auto validateTuple = [&](const size_t beginIx, + const size_t componentCount, uint32_t& outStride, + uint8_t*& outBase) -> bool { + if (beginIx + componentCount > vertAttrIts.size()) + return false; + auto& first = vertAttrIts[beginIx]; + if (!first.ptr || first.dstFmt != EF_R32_SFLOAT) + return false; + outStride = first.stride; + outBase = first.ptr; + for (size_t c = 1ull; c < componentCount; ++c) { + auto& it = vertAttrIts[beginIx + c]; + if (!it.ptr || it.dstFmt != EF_R32_SFLOAT) + return false; + if (it.stride != outStride) + return false; + if (it.ptr != outBase + c * floatBytes) + return false; + } + return true; + }; + + uint32_t posStride = 0u; + uint32_t normalStride = 0u; + uint32_t uvStride = 0u; + uint8_t* posBase = nullptr; + uint8_t* normalBase = nullptr; + uint8_t* uvBase = nullptr; + if (vertAttrIts.size() != layout->propertyCount || + !validateTuple(0u, 3u, posStride, posBase) || + (layout->hasNormals && + !validateTuple(3u, 3u, normalStride, normalBase)) || + (layout->hasUVs && !validateTuple(6u, 2u, uvStride, uvBase))) + return EFastVertexReadResult::NotApplicable; + if (el.Count > + (std::numeric_limits::max() / layout->srcBytesPerVertex)) + return EFastVertexReadResult::Error; + + const bool trackAABB = parsedAABB != nullptr; + const bool needsByteSwap = IsWrongEndian; + auto decodeF32 = [needsByteSwap](const uint8_t* src) -> float { + uint32_t bits = 0u; + std::memcpy(&bits, src, sizeof(bits)); + if (needsByteSwap) + bits = Binary::byteswap(bits); + float value = 0.f; + std::memcpy(&value, &bits, sizeof(value)); + return value; + }; + auto decodeVector = [&](const uint8_t* src) -> Vec { + constexpr uint32_t N = hlsl::vector_traits::Dimension; + Vec value{}; + hlsl::array_set setter; + for (uint32_t i = 0u; i < N; ++i) + setter(value, i, + decodeF32(src + static_cast(i) * floatBytes)); + return value; + }; + auto storeVector = [](uint8_t* dst, + const Vec& value) -> void { + constexpr uint32_t N = hlsl::vector_traits::Dimension; + hlsl::array_get getter; + auto* const out = reinterpret_cast(dst); + for (uint32_t i = 0u; i < N; ++i) + out[i] = getter(value, i); + }; + auto advanceTuple = [&](const uint32_t beginIx, + const uint32_t componentCount, + const size_t advance) -> void { + for (uint32_t i = 0u; i < componentCount; ++i) + vertAttrIts[beginIx + i].ptr += advance; + }; + + size_t remainingVertices = el.Count; + while (remainingVertices > 0ull) { + if (StartPointer + layout->srcBytesPerVertex > EndPointer) + fillBuffer(); + const size_t available = + EndPointer > StartPointer + ? static_cast(EndPointer - StartPointer) + : 0ull; + if (available < layout->srcBytesPerVertex) + return EFastVertexReadResult::Error; + + const size_t batchVertices = + std::min(remainingVertices, available / layout->srcBytesPerVertex); + const uint8_t* src = reinterpret_cast(StartPointer); + if (!layout->hasNormals && !layout->hasUVs && + posStride == 3ull * floatBytes && !needsByteSwap && !trackAABB) { + const size_t batchBytes = batchVertices * 3ull * floatBytes; + std::memcpy(posBase, src, batchBytes); + src += batchBytes; + posBase += batchBytes; + } else { + for (size_t v = 0ull; v < batchVertices; ++v) { + const hlsl::float32_t3 position = + decodeVector.operator()(src); + storeVector.operator()(posBase, position); + if (trackAABB) + hlsl::shapes::util::extendAABBAccumulator(*parsedAABB, position); + src += 3ull * floatBytes; + posBase += posStride; + if (layout->hasNormals) { + storeVector.operator()( + normalBase, decodeVector.operator()(src)); + src += 3ull * floatBytes; + normalBase += normalStride; + } + if (layout->hasUVs) { + storeVector.operator()( + uvBase, decodeVector.operator()(src)); + src += 2ull * floatBytes; + uvBase += uvStride; + } + } + } + + const size_t consumed = batchVertices * layout->srcBytesPerVertex; + StartPointer += consumed; + remainingVertices -= batchVertices; + } + + advanceTuple(0u, 3u, el.Count * posStride); + if (layout->hasNormals) + advanceTuple(3u, 3u, el.Count * normalStride); + if (layout->hasUVs) + advanceTuple(6u, 2u, el.Count * uvStride); + return EFastVertexReadResult::Success; + } + void readVertex(const IAssetLoader::SAssetLoadParams& _params, + const SElement& el) { + assert(el.Name == "vertex"); + assert(el.Properties.size() == vertAttrIts.size()); + if (!IsBinaryFile) + getNextLine(); + + for (size_t j = 0; j < el.Count; ++j) + for (auto i = 0u; i < vertAttrIts.size(); i++) { + const auto& prop = el.Properties[i]; + auto& it = vertAttrIts[i]; + if (!it.ptr) { + prop.skip(*this); + continue; + } + if (!IsBinaryFile) { + if (isIntegerFormat(prop.type)) { + uint64_t tmp = getInt(prop.type); + encodePixels(it.dstFmt, it.ptr, &tmp); + } else { + hlsl::float64_t tmp = getFloat(prop.type); + encodePixels(it.dstFmt, it.ptr, &tmp); + } + } else if (it.dstFmt != prop.type) { + assert(isIntegerFormat(it.dstFmt) == isIntegerFormat(prop.type)); + if (isIntegerFormat(it.dstFmt)) { + uint64_t tmp = getInt(prop.type); + encodePixels(it.dstFmt, it.ptr, &tmp); + } else { + hlsl::float64_t tmp = getFloat(prop.type); + encodePixels(it.dstFmt, it.ptr, &tmp); + } + } else + getData(it.ptr, prop.type); + // + it.ptr += it.stride; + } + } + bool readFace(const SElement& Element, core::vector& _outIndices, + uint32_t& _maxIndex, const uint32_t vertexCount) { + if (!IsBinaryFile) + getNextLine(); + const bool hasVertexCount = vertexCount != 0u; + + for (const auto& prop : Element.Properties) { + if (prop.isList() && + (prop.Name == "vertex_indices" || prop.Name == "vertex_index")) { + const uint32_t count = getInt(prop.list.countType); + const auto srcIndexFmt = prop.list.itemType; + if (count < 3u) { + for (uint32_t j = 0u; j < count; ++j) + getInt(srcIndexFmt); + continue; + } + if (count > 3u) + _outIndices.reserve(_outIndices.size() + + static_cast(count - 2u) * 3ull); + auto emitFan = [&_outIndices, &_maxIndex, hasVertexCount, + vertexCount](auto&& readIndex, + const uint32_t faceVertexCount) -> bool { + uint32_t i0 = readIndex(); + uint32_t i1 = readIndex(); + uint32_t i2 = readIndex(); + if (hasVertexCount) { + if (i0 >= vertexCount || i1 >= vertexCount || i2 >= vertexCount) + return false; + } else { + _maxIndex = std::max(_maxIndex, std::max(i0, std::max(i1, i2))); + } + _outIndices.push_back(i0); + _outIndices.push_back(i1); + _outIndices.push_back(i2); + uint32_t prev = i2; + for (uint32_t j = 3u; j < faceVertexCount; ++j) { + const uint32_t idx = readIndex(); + if (hasVertexCount) { + if (idx >= vertexCount) + return false; + } else { + _maxIndex = std::max(_maxIndex, idx); + } + _outIndices.push_back(i0); + _outIndices.push_back(prev); + _outIndices.push_back(idx); + prev = idx; + } + return true; + }; + + if (IsBinaryFile && !IsWrongEndian && srcIndexFmt == EF_R32_UINT) { + const size_t bytesNeeded = + static_cast(count) * sizeof(uint32_t); + if (StartPointer + bytesNeeded > EndPointer) + fillBuffer(); + if (StartPointer + bytesNeeded <= EndPointer) { + const uint8_t* ptr = + reinterpret_cast(StartPointer); + auto readIndex = [&ptr]() -> uint32_t { + uint32_t v = 0u; + std::memcpy(&v, ptr, sizeof(v)); + ptr += sizeof(v); + return v; + }; + if (!emitFan(readIndex, count)) + return false; + StartPointer = + reinterpret_cast(const_cast(ptr)); + continue; + } + } else if (IsBinaryFile && !IsWrongEndian && + srcIndexFmt == EF_R16_UINT) { + const size_t bytesNeeded = + static_cast(count) * sizeof(uint16_t); + if (StartPointer + bytesNeeded > EndPointer) + fillBuffer(); + if (StartPointer + bytesNeeded <= EndPointer) { + const uint8_t* ptr = + reinterpret_cast(StartPointer); + auto readIndex = [&ptr]() -> uint32_t { + uint16_t v = 0u; + std::memcpy(&v, ptr, sizeof(v)); + ptr += sizeof(v); + return static_cast(v); + }; + if (!emitFan(readIndex, count)) + return false; + StartPointer = + reinterpret_cast(const_cast(ptr)); + continue; + } + } + + auto readIndex = [&]() -> uint32_t { + return static_cast(getInt(srcIndexFmt)); + }; + if (!emitFan(readIndex, count)) + return false; + } else if (prop.Name == "intensity") { + // todo: face intensity + prop.skip(*this); + } else + prop.skip(*this); + } + return true; + } + + enum class EFastFaceReadResult : uint8_t { NotApplicable, + Success, + Error }; + + EFastFaceReadResult readFaceElementFast( + const SElement& element, core::vector& _outIndices, + uint32_t& _maxIndex, uint64_t& _faceCount, const uint32_t vertexCount, + const bool computeIndexHash, core::blake3_hash_t& outIndexHash) { + if (!IsBinaryFile) + return EFastFaceReadResult::NotApplicable; + if (element.Properties.size() != 1u) + return EFastFaceReadResult::NotApplicable; + + const auto& prop = element.Properties[0]; + if (!prop.isList() || + (prop.Name != "vertex_indices" && prop.Name != "vertex_index")) + return EFastFaceReadResult::NotApplicable; + if (prop.list.countType != EF_R8_UINT) + return EFastFaceReadResult::NotApplicable; + + const E_FORMAT srcIndexFmt = prop.list.itemType; + const bool isSrcU32 = srcIndexFmt == EF_R32_UINT; + const bool isSrcS32 = srcIndexFmt == EF_R32_SINT; + const bool isSrcU16 = srcIndexFmt == EF_R16_UINT; + const bool isSrcS16 = srcIndexFmt == EF_R16_SINT; + if (!isSrcU32 && !isSrcS32 && !isSrcU16 && !isSrcS16) + return EFastFaceReadResult::NotApplicable; + + const bool is32Bit = isSrcU32 || isSrcS32; + const bool needEndianSwap = IsWrongEndian; + const size_t indexSize = is32Bit ? sizeof(uint32_t) : sizeof(uint16_t); + const bool hasVertexCount = vertexCount != 0u; + const bool trackMaxIndex = !hasVertexCount; + const hlsl::uint32_t3 vertexLimit(vertexCount); + const auto triExceedsVertexLimit = + [&vertexLimit](const hlsl::uint32_t3& tri) -> bool { + return hlsl::any(glm::greaterThanEqual(tri, vertexLimit)); + }; + outIndexHash = IPreHashed::INVALID_HASH; + const size_t minTriangleRecordSize = sizeof(uint8_t) + indexSize * 3u; + if (element.Count > + (std::numeric_limits::max() / minTriangleRecordSize)) + return EFastFaceReadResult::Error; + const size_t minBytesNeeded = element.Count * minTriangleRecordSize; + if (StartPointer + minBytesNeeded <= EndPointer) { + if (element.Count > (std::numeric_limits::max() / 3u)) + return EFastFaceReadResult::Error; + const size_t triIndices = element.Count * 3u; + if (_outIndices.size() > + (std::numeric_limits::max() - triIndices)) + return EFastFaceReadResult::Error; + const size_t oldSize = _outIndices.size(); + const uint32_t oldMaxIndex = _maxIndex; + _outIndices.resize(oldSize + triIndices); + uint32_t* out = _outIndices.data() + oldSize; + const uint8_t* ptr = reinterpret_cast(StartPointer); + auto readU32 = [needEndianSwap](const uint8_t* src) -> uint32_t { + uint32_t value = 0u; + std::memcpy(&value, src, sizeof(value)); + if (needEndianSwap) + value = Binary::byteswap(value); + return value; + }; + auto readU16 = [needEndianSwap](const uint8_t* src) -> uint16_t { + uint16_t value = 0u; + std::memcpy(&value, src, sizeof(value)); + if (needEndianSwap) + value = Binary::byteswap(value); + return value; + }; + bool fallbackToGeneric = false; + if (is32Bit) { + const size_t hw = SLoaderRuntimeTuner::resolveHardwareThreads(); + const size_t hardMaxWorkers = + SLoaderRuntimeTuner::resolveHardMaxWorkers( + hw, inner.params.ioPolicy.runtimeTuning.workerHeadroom); + const size_t recordBytes = sizeof(uint8_t) + 3ull * sizeof(uint32_t); + SLoaderRuntimeTuningRequest faceTuningRequest = {}; + faceTuningRequest.inputBytes = minBytesNeeded; + faceTuningRequest.totalWorkUnits = element.Count; + faceTuningRequest.minBytesPerWorker = recordBytes; + faceTuningRequest.hardwareThreads = static_cast(hw); + faceTuningRequest.hardMaxWorkers = + static_cast(hardMaxWorkers); + faceTuningRequest.targetChunksPerWorker = + inner.params.ioPolicy.runtimeTuning.targetChunksPerWorker; + faceTuningRequest.sampleData = ptr; + faceTuningRequest.sampleBytes = + SLoaderRuntimeTuner::resolveSampleBytes(inner.params.ioPolicy, + minBytesNeeded); + const auto faceTuning = SLoaderRuntimeTuner::tune( + inner.params.ioPolicy, faceTuningRequest); + size_t workerCount = std::min(faceTuning.workerCount, element.Count); + if (workerCount > 1ull) { + const bool needMax = trackMaxIndex; + const bool validateAgainstVertexCount = hasVertexCount; + std::vector workerNonTriangle(workerCount, 0u); + std::vector workerInvalid(workerCount, 0u); + std::vector workerMax(needMax ? workerCount : 0ull, 0u); + const bool hashInParsePipeline = computeIndexHash; + std::vector workerReady( + hashInParsePipeline ? workerCount : 0ull, 0u); + std::vector workerHashable( + hashInParsePipeline ? workerCount : 0ull, 1u); + std::atomic_bool hashPipelineOk = true; + core::blake3_hash_t parsedIndexHash = IPreHashed::INVALID_HASH; + std::jthread hashThread; + if (hashInParsePipeline) { + hashThread = std::jthread([&]() { + try { + core::blake3_hasher hasher; + for (size_t workerIx = 0ull; workerIx < workerCount; + ++workerIx) { + auto ready = + std::atomic_ref(workerReady[workerIx]); + while (ready.load(std::memory_order_acquire) == 0u) + ready.wait(0u, std::memory_order_acquire); + if (workerHashable[workerIx] == 0u) { + hashPipelineOk.store(false, std::memory_order_relaxed); + return; + } + + const size_t begin = + (element.Count * workerIx) / workerCount; + const size_t end = + (element.Count * (workerIx + 1ull)) / workerCount; + const size_t faceCount = end - begin; + hasher.update(out + begin * 3ull, + faceCount * 3ull * sizeof(uint32_t)); + } + parsedIndexHash = static_cast(hasher); + } catch (...) { + hashPipelineOk.store(false, std::memory_order_relaxed); + } + }); + } + auto parseChunk = [&](const size_t workerIx, const size_t beginFace, + const size_t endFace) -> void { + const uint8_t* in = ptr + beginFace * recordBytes; + uint32_t* outLocal = out + beginFace * 3ull; + uint32_t localMax = 0u; + for (size_t faceIx = beginFace; faceIx < endFace; ++faceIx) { + if (*in != 3u) { + workerNonTriangle[workerIx] = 1u; + if (hashInParsePipeline) + workerHashable[workerIx] = 0u; + break; + } + ++in; + const hlsl::uint32_t3 tri( + readU32(in + 0ull * sizeof(uint32_t)), + readU32(in + 1ull * sizeof(uint32_t)), + readU32(in + 2ull * sizeof(uint32_t))); + outLocal[0] = tri.x; + outLocal[1] = tri.y; + outLocal[2] = tri.z; + const uint32_t triOr = tri.x | tri.y | tri.z; + if (isSrcS32 && (triOr & 0x80000000u)) { + workerInvalid[workerIx] = 1u; + if (hashInParsePipeline) + workerHashable[workerIx] = 0u; + break; + } + if (validateAgainstVertexCount) { + if (triExceedsVertexLimit(tri)) { + workerInvalid[workerIx] = 1u; + if (hashInParsePipeline) + workerHashable[workerIx] = 0u; + break; + } + } else if (needMax) { + const uint32_t triMax = std::max({tri.x, tri.y, tri.z}); + if (triMax > localMax) + localMax = triMax; + } + in += 3ull * sizeof(uint32_t); + outLocal += 3ull; + } + if (needMax) + workerMax[workerIx] = localMax; + if (hashInParsePipeline) { + auto ready = std::atomic_ref(workerReady[workerIx]); + ready.store(1u, std::memory_order_release); + ready.notify_one(); + } + }; + SLoaderRuntimeTuner::dispatchWorkers( + workerCount, [&](const size_t workerIx) { + const size_t begin = (element.Count * workerIx) / workerCount; + const size_t end = + (element.Count * (workerIx + 1ull)) / workerCount; + parseChunk(workerIx, begin, end); + }); + if (hashThread.joinable()) + hashThread.join(); + + const bool anyNonTriangle = + std::any_of(workerNonTriangle.begin(), workerNonTriangle.end(), + [](const uint8_t v) { return v != 0u; }); + if (anyNonTriangle) { + _outIndices.resize(oldSize); + _maxIndex = oldMaxIndex; + return EFastFaceReadResult::NotApplicable; + } + const bool anyInvalid = + std::any_of(workerInvalid.begin(), workerInvalid.end(), + [](const uint8_t v) { return v != 0u; }); + if (anyInvalid) { + _outIndices.resize(oldSize); + _maxIndex = oldMaxIndex; + return EFastFaceReadResult::Error; + } + if (trackMaxIndex) { + for (const uint32_t local : workerMax) + if (local > _maxIndex) + _maxIndex = local; + } + if (hashInParsePipeline && + hashPipelineOk.load(std::memory_order_relaxed)) + outIndexHash = parsedIndexHash; + + StartPointer = reinterpret_cast( + const_cast(ptr + element.Count * recordBytes)); + _faceCount += element.Count; + return EFastFaceReadResult::Success; + } + } + + if (is32Bit) { + if (isSrcU32) { + if (trackMaxIndex) { + for (size_t j = 0u; j < element.Count; ++j) { + const uint8_t c = *ptr++; + if (c != 3u) { + fallbackToGeneric = true; + break; + } + const hlsl::uint32_t3 tri( + readU32(ptr + 0ull * sizeof(uint32_t)), + readU32(ptr + 1ull * sizeof(uint32_t)), + readU32(ptr + 2ull * sizeof(uint32_t))); + out[0] = tri.x; + out[1] = tri.y; + out[2] = tri.z; + ptr += 3ull * sizeof(uint32_t); + const uint32_t triMax = std::max({tri.x, tri.y, tri.z}); + if (triMax > _maxIndex) + _maxIndex = triMax; + out += 3u; + } + } else { + for (size_t j = 0u; j < element.Count; ++j) { + const uint8_t c = *ptr++; + if (c != 3u) { + fallbackToGeneric = true; + break; + } + const hlsl::uint32_t3 tri( + readU32(ptr + 0ull * sizeof(uint32_t)), + readU32(ptr + 1ull * sizeof(uint32_t)), + readU32(ptr + 2ull * sizeof(uint32_t))); + out[0] = tri.x; + out[1] = tri.y; + out[2] = tri.z; + ptr += 3ull * sizeof(uint32_t); + if (triExceedsVertexLimit(tri)) + return EFastFaceReadResult::Error; + out += 3u; + } + } + } else if (trackMaxIndex) { + for (size_t j = 0u; j < element.Count; ++j) { + const uint8_t c = *ptr++; + if (c != 3u) { + fallbackToGeneric = true; + break; + } + const hlsl::uint32_t3 tri(readU32(ptr + 0ull * sizeof(uint32_t)), + readU32(ptr + 1ull * sizeof(uint32_t)), + readU32(ptr + 2ull * sizeof(uint32_t))); + out[0] = tri.x; + out[1] = tri.y; + out[2] = tri.z; + ptr += 3ull * sizeof(uint32_t); + if ((tri.x | tri.y | tri.z) & 0x80000000u) + return EFastFaceReadResult::Error; + const uint32_t triMax = std::max({tri.x, tri.y, tri.z}); + if (triMax > _maxIndex) + _maxIndex = triMax; + out += 3u; + } + } else { + for (size_t j = 0u; j < element.Count; ++j) { + const uint8_t c = *ptr++; + if (c != 3u) { + fallbackToGeneric = true; + break; + } + const hlsl::uint32_t3 tri(readU32(ptr + 0ull * sizeof(uint32_t)), + readU32(ptr + 1ull * sizeof(uint32_t)), + readU32(ptr + 2ull * sizeof(uint32_t))); + out[0] = tri.x; + out[1] = tri.y; + out[2] = tri.z; + ptr += 3ull * sizeof(uint32_t); + const uint32_t triOr = tri.x | tri.y | tri.z; + if (triOr & 0x80000000u) + return EFastFaceReadResult::Error; + if (triExceedsVertexLimit(tri)) + return EFastFaceReadResult::Error; + out += 3u; + } + } + } else { + if (isSrcU16) { + if (trackMaxIndex) { + for (size_t j = 0u; j < element.Count; ++j) { + const uint8_t c = *ptr++; + if (c != 3u) { + fallbackToGeneric = true; + break; + } + const hlsl::uint32_t3 tri( + readU16(ptr + 0ull * sizeof(uint16_t)), + readU16(ptr + 1ull * sizeof(uint16_t)), + readU16(ptr + 2ull * sizeof(uint16_t))); + out[0] = tri.x; + out[1] = tri.y; + out[2] = tri.z; + ptr += 3ull * sizeof(uint16_t); + const uint32_t triMax = std::max({tri.x, tri.y, tri.z}); + if (triMax > _maxIndex) + _maxIndex = triMax; + out += 3u; + } + } else { + for (size_t j = 0u; j < element.Count; ++j) { + const uint8_t c = *ptr++; + if (c != 3u) { + fallbackToGeneric = true; + break; + } + const hlsl::uint32_t3 tri( + readU16(ptr + 0ull * sizeof(uint16_t)), + readU16(ptr + 1ull * sizeof(uint16_t)), + readU16(ptr + 2ull * sizeof(uint16_t))); + out[0] = tri.x; + out[1] = tri.y; + out[2] = tri.z; + ptr += 3ull * sizeof(uint16_t); + if (triExceedsVertexLimit(tri)) + return EFastFaceReadResult::Error; + out += 3u; + } + } + } else if (trackMaxIndex) { + for (size_t j = 0u; j < element.Count; ++j) { + const uint8_t c = *ptr++; + if (c != 3u) { + fallbackToGeneric = true; + break; + } + const hlsl::uint32_t3 tri(readU16(ptr + 0ull * sizeof(uint16_t)), + readU16(ptr + 1ull * sizeof(uint16_t)), + readU16(ptr + 2ull * sizeof(uint16_t))); + ptr += 3ull * sizeof(uint16_t); + if ((tri.x | tri.y | tri.z) & 0x8000u) + return EFastFaceReadResult::Error; + out[0] = tri.x; + out[1] = tri.y; + out[2] = tri.z; + const uint32_t triMax = std::max({tri.x, tri.y, tri.z}); + if (triMax > _maxIndex) + _maxIndex = triMax; + out += 3u; + } + } else { + for (size_t j = 0u; j < element.Count; ++j) { + const uint8_t c = *ptr++; + if (c != 3u) { + fallbackToGeneric = true; + break; + } + const hlsl::uint32_t3 tri(readU16(ptr + 0ull * sizeof(uint16_t)), + readU16(ptr + 1ull * sizeof(uint16_t)), + readU16(ptr + 2ull * sizeof(uint16_t))); + ptr += 3ull * sizeof(uint16_t); + if ((tri.x | tri.y | tri.z) & 0x8000u) + return EFastFaceReadResult::Error; + out[0] = tri.x; + out[1] = tri.y; + out[2] = tri.z; + if (triExceedsVertexLimit(tri)) + return EFastFaceReadResult::Error; + out += 3u; + } + } + } + + if (!fallbackToGeneric) { + StartPointer = reinterpret_cast(const_cast(ptr)); + _faceCount += element.Count; + return EFastFaceReadResult::Success; + } + + _outIndices.resize(oldSize); + _maxIndex = oldMaxIndex; + } + + if (element.Count > (std::numeric_limits::max() / 3u)) + return EFastFaceReadResult::Error; + const size_t reserveCount = element.Count * 3u; + if (_outIndices.size() > + (std::numeric_limits::max() - reserveCount)) + return EFastFaceReadResult::Error; + _outIndices.reserve(_outIndices.size() + reserveCount); + auto ensureBytes = [this](const size_t bytes) -> bool { + if (StartPointer + bytes > EndPointer) + fillBuffer(); + return StartPointer + bytes <= EndPointer; + }; + auto readCount = [&ensureBytes, this](int32_t& outCount) -> bool { + if (!ensureBytes(sizeof(uint8_t))) + return false; + outCount = static_cast(*StartPointer++); + return true; + }; + auto readIndex = [&ensureBytes, this, is32Bit, isSrcU32, isSrcU16, + needEndianSwap](uint32_t& out) -> bool { + if (is32Bit) { + if (!ensureBytes(sizeof(uint32_t))) + return false; + if (isSrcU32) { + std::memcpy(&out, StartPointer, sizeof(uint32_t)); + if (needEndianSwap) + out = Binary::byteswap(out); + } else { + int32_t v = 0; + std::memcpy(&v, StartPointer, sizeof(v)); + if (needEndianSwap) + v = Binary::byteswap(v); + if (v < 0) + return false; + out = static_cast(v); + } + StartPointer += sizeof(uint32_t); + return true; + } + + if (!ensureBytes(sizeof(uint16_t))) + return false; + if (isSrcU16) { + uint16_t v = 0u; + std::memcpy(&v, StartPointer, sizeof(uint16_t)); + if (needEndianSwap) + v = Binary::byteswap(v); + out = v; + } else { + int16_t v = 0; + std::memcpy(&v, StartPointer, sizeof(int16_t)); + if (needEndianSwap) + v = Binary::byteswap(v); + if (v < 0) + return false; + out = static_cast(v); + } + StartPointer += sizeof(uint16_t); + return true; + }; + + for (size_t j = 0u; j < element.Count; ++j) { + int32_t countSigned = 0; + if (!readCount(countSigned)) + return EFastFaceReadResult::Error; + const uint32_t count = static_cast(countSigned); + if (count < 3u) { + uint32_t dummy = 0u; + for (uint32_t k = 0u; k < count; ++k) { + if (!readIndex(dummy)) + return EFastFaceReadResult::Error; + } + ++_faceCount; + continue; + } + + uint32_t i0 = 0u; + uint32_t i1 = 0u; + uint32_t i2 = 0u; + if (!readIndex(i0) || !readIndex(i1) || !readIndex(i2)) + return EFastFaceReadResult::Error; + + if (trackMaxIndex) { + _maxIndex = std::max(_maxIndex, std::max(i0, std::max(i1, i2))); + } else if (i0 >= vertexCount || i1 >= vertexCount || + i2 >= vertexCount) { + return EFastFaceReadResult::Error; + } + _outIndices.push_back(i0); + _outIndices.push_back(i1); + _outIndices.push_back(i2); + + uint32_t prev = i2; + for (uint32_t k = 3u; k < count; ++k) { + uint32_t idx = 0u; + if (!readIndex(idx)) + return EFastFaceReadResult::Error; + if (trackMaxIndex) { + _maxIndex = std::max(_maxIndex, idx); + } else if (idx >= vertexCount) { + return EFastFaceReadResult::Error; + } + _outIndices.push_back(i0); + _outIndices.push_back(prev); + _outIndices.push_back(idx); + prev = idx; + } + + ++_faceCount; + } + + return EFastFaceReadResult::Success; + } + + IAssetLoader::SAssetLoadContext inner; + uint32_t topHierarchyLevel; + IAssetLoader::IAssetLoaderOverride* loaderOverride; + // input buffer must be at least twice as long as the longest line in the + // file + core::vector Buffer; + size_t ioReadWindowSize = DefaultIoReadWindowBytes; + core::vector ElementList = {}; + char *StartPointer = nullptr, *EndPointer = nullptr, + *LineEndPointer = nullptr; + int32_t LineLength = 0; + int32_t WordLength = -1; // this variable is a misnomer, its really the + // offset to next word minus one + bool IsBinaryFile = false, IsWrongEndian = false, EndOfFile = false; + size_t fileOffset = {}; + uint64_t readCallCount = 0ull; + uint64_t readBytesTotal = 0ull; + uint64_t readMinBytes = std::numeric_limits::max(); + // + core::vector vertAttrIts; + }; +}; - const uint64_t indexCount = static_cast(indices.size()); - if (indices.empty()) - { - // no index buffer means point cloud - geometry->setIndexing(IPolygonGeometryBase::PointList()); - } - else - { - if (vertCount != 0u && maxIndexRead >= vertCount) - { - _params.logger.log("PLY indices out of range for %s", system::ILogger::ELL_ERROR, _file->getFileName().string().c_str()); - return {}; - } +} - geometry->setIndexing(IPolygonGeometryBase::TriangleList()); - const bool canUseU16 = (vertCount != 0u) ? (vertCount <= std::numeric_limits::max()) : (maxIndexRead <= std::numeric_limits::max()); - if (canUseU16) - { - core::vector indices16(indices.size()); - for (size_t i = 0u; i < indices.size(); ++i) - indices16[i] = static_cast(indices[i]); - auto view = SGeometryLoaderCommon::createAdoptedView(std::move(indices16)); - if (!view) - return {}; - geometry->setIndexView(std::move(view)); - hashViewBufferIfNeeded(geometry->getIndexView()); - } - else - { - auto view = SGeometryLoaderCommon::createAdoptedView(std::move(indices)); - if (!view) - return {}; - if (precomputedIndexHash != IPreHashed::INVALID_HASH) - view.src.buffer->setContentHash(precomputedIndexHash); - geometry->setIndexView(std::move(view)); - hashViewBufferIfNeeded(geometry->getIndexView()); - } - } +CPLYMeshFileLoader::CPLYMeshFileLoader() = default; - if (computeContentHashes && !hashInBuild) - { - if (deferredPositionHashThread.joinable()) - deferredPositionHashThread.join(); - SPolygonGeometryContentHash::computeMissing(geometry.get(), _params.ioPolicy); - } - else - { - hashRemainingGeometryBuffers(); - } +const char** CPLYMeshFileLoader::getAssociatedFileExtensions() const +{ + static const char* ext[] = { "ply", nullptr }; + return ext; +} - const uint64_t ioMinRead = ctx.readCallCount ? ctx.readMinBytes : 0ull; - const uint64_t ioAvgRead = ctx.readCallCount ? (ctx.readBytesTotal / ctx.readCallCount) : 0ull; - const SFileReadTelemetry ioTelemetry = { - .callCount = ctx.readCallCount, - .totalBytes = ctx.readBytesTotal, - .minBytes = ctx.readMinBytes - }; - if (SInterchangeIO::isTinyIOTelemetryLikely(ioTelemetry, fileSize, _params.ioPolicy)) - { - _params.logger.log( - "PLY loader tiny-io guard: file=%s reads=%llu min=%llu avg=%llu", - system::ILogger::ELL_WARNING, - _file->getFileName().string().c_str(), - static_cast(ctx.readCallCount), - static_cast(ioMinRead), - static_cast(ioAvgRead)); - } - _params.logger.log( - "PLY loader stats: file=%s binary=%d verts=%llu faces=%llu idx=%llu vertex_fast=%llu face_fast=%llu io_reads=%llu io_min_read=%llu io_avg_read=%llu io_req=%s io_eff=%s io_chunk=%llu io_reason=%s", - system::ILogger::ELL_PERFORMANCE, - _file->getFileName().string().c_str(), - ctx.IsBinaryFile ? 1 : 0, - static_cast(vertCount), - static_cast(faceCount), - static_cast(indexCount), - static_cast(fastVertexElementCount), - static_cast(fastFaceElementCount), - static_cast(ctx.readCallCount), - static_cast(ioMinRead), - static_cast(ioAvgRead), - system::to_string(_params.ioPolicy.strategy).c_str(), - system::to_string(ioPlan.strategy).c_str(), - static_cast(ioPlan.chunkSizeBytes()), - ioPlan.reason); - auto meta = core::make_smart_refctd_ptr(); - return SAssetBundle(std::move(meta),{std::move(geometry)}); +bool CPLYMeshFileLoader::isALoadableFileFormat(system::IFile* _file, const system::logger_opt_ptr) const { + std::array buf = {}; + + system::IFile::success_t success; + _file->read(success, buf.data(), 0, buf.size()); + if (!success) + return false; + + const std::string_view fileHeader(buf.data(), success.getBytesProcessed()); + auto trimWhitespace = [](std::string_view line) -> std::string_view { + const auto isWhitespace = [](const char c) -> bool { + return c == ' ' || c == '\t' || c == '\r'; + }; + while (!line.empty() && isWhitespace(line.front())) + line.remove_prefix(1ull); + while (!line.empty() && isWhitespace(line.back())) + line.remove_suffix(1ull); + return line; + }; + + size_t lineStart = 0ull; + const size_t firstLineEnd = fileHeader.find('\n'); + std::string_view firstLine = fileHeader.substr(0ull, firstLineEnd); + firstLine = trimWhitespace(firstLine); + if (firstLine != "ply") + return false; + if (firstLineEnd == std::string_view::npos) + return false; + lineStart = firstLineEnd + 1ull; + + constexpr std::array headers = { + "format ascii 1.0", "format binary_little_endian 1.0", + "format binary_big_endian 1.0"}; + while (lineStart < fileHeader.size()) { + size_t lineEnd = fileHeader.find('\n', lineStart); + if (lineEnd == std::string_view::npos) + lineEnd = fileHeader.size(); + std::string_view line = + trimWhitespace(fileHeader.substr(lineStart, lineEnd - lineStart)); + if (line.starts_with("format ")) + return std::find(headers.begin(), headers.end(), line) != headers.end(); + lineStart = lineEnd + 1ull; + } + + return false; } +//! creates/loads an animated mesh from the file. +SAssetBundle CPLYMeshFileLoader::loadAsset( + system::IFile* _file, const IAssetLoader::SAssetLoadParams& _params, + IAssetLoader::IAssetLoaderOverride* _override, uint32_t _hierarchyLevel) { + using namespace nbl::core; + if (!_file) + return {}; + + const bool computeContentHashes = !_params.loaderFlags.hasAnyFlag( + IAssetLoader::ELPF_DONT_COMPUTE_CONTENT_HASHES); + uint64_t faceCount = 0u; + uint64_t fastFaceElementCount = 0u; + uint64_t fastVertexElementCount = 0u; + uint32_t maxIndexRead = 0u; + core::blake3_hash_t precomputedIndexHash = IPreHashed::INVALID_HASH; + const uint64_t fileSize = _file->getSize(); + const bool hashInBuild = + computeContentHashes && + SLoaderRuntimeTuner::shouldInlineHashBuild(_params.ioPolicy, fileSize); + const auto ioPlan = impl::SFileAccess::resolvePlan(_params.ioPolicy, fileSize, true, _file); + if (impl::SIODiagnostics::logInvalidPlan(_params.logger, "PLY loader", _file->getFileName().string().c_str(), ioPlan)) + return {}; + + Parse::Context ctx = {asset::IAssetLoader::SAssetLoadContext{_params, _file}, + _hierarchyLevel, _override}; + uint64_t desiredReadWindow = + ioPlan.strategy == SResolvedFileIOPolicy::Strategy::WholeFile + ? (fileSize + Parse::Context::ReadWindowPaddingBytes) + : ioPlan.chunkSizeBytes(); + if (ioPlan.strategy == SResolvedFileIOPolicy::Strategy::WholeFile) { + const bool mappedInput = + static_cast(_file)->getMappedPointer() != + nullptr; + if (mappedInput && + fileSize > (Parse::Context::DefaultIoReadWindowBytes * 2ull)) + desiredReadWindow = Parse::Context::DefaultIoReadWindowBytes; + } + const uint64_t safeReadWindow = std::min( + desiredReadWindow, + static_cast(std::numeric_limits::max() - + Parse::Context::ReadWindowPaddingBytes)); + ctx.init(static_cast(safeReadWindow)); + + // start with empty mesh + auto geometry = make_smart_refctd_ptr(); + hlsl::shapes::util::AABBAccumulator3 parsedAABB = + hlsl::shapes::util::createAABBAccumulator(); + uint32_t vertCount = 0; + core::vector> hashedBuffers; + std::jthread deferredPositionHashThread; + auto hashBufferIfNeeded = [&](ICPUBuffer* buffer) -> void { + if (!hashInBuild || !buffer) + return; + for (const auto& hashed : hashedBuffers) { + if (hashed.get() == buffer) + return; + } + buffer->setContentHash(buffer->computeContentHash()); + hashedBuffers.push_back(core::smart_refctd_ptr(buffer)); + }; + auto tryLaunchDeferredHash = [&](const IGeometry::SDataView& view, + std::jthread& deferredThread) -> void { + if (!computeContentHashes || hashInBuild || !view || !view.src.buffer) + return; + if (deferredThread.joinable()) + return; + if (view.src.buffer->getContentHash() != IPreHashed::INVALID_HASH) + return; + auto keepAlive = core::smart_refctd_ptr(view.src.buffer); + deferredThread = std::jthread([buffer = std::move(keepAlive)]() mutable { + buffer->setContentHash(buffer->computeContentHash()); + }); + }; + auto hashViewBufferIfNeeded = + [&](const IGeometry::SDataView& view) -> void { + if (!view || !view.src.buffer) + return; + hashBufferIfNeeded(view.src.buffer.get()); + }; + auto hashRemainingGeometryBuffers = [&]() -> void { + if (!hashInBuild) + return; + hashViewBufferIfNeeded(geometry->getPositionView()); + hashViewBufferIfNeeded(geometry->getIndexView()); + hashViewBufferIfNeeded(geometry->getNormalView()); + for (const auto& view : *geometry->getAuxAttributeViews()) + hashViewBufferIfNeeded(view); + for (const auto& view : *geometry->getJointWeightViews()) { + hashViewBufferIfNeeded(view.indices); + hashViewBufferIfNeeded(view.weights); + } + if (const auto jointObb = geometry->getJointOBBView(); jointObb) + hashViewBufferIfNeeded(*jointObb); + }; + + // Currently only supports ASCII or binary meshes + if (Parse::toStringView(ctx.getNextLine()) != "ply") { + _params.logger.log("Not a valid PLY file %s", system::ILogger::ELL_ERROR, + ctx.inner.mainFile->getFileName().string().c_str()); + return {}; + } + + // cut the next line out + ctx.getNextLine(); + // grab the word from this line + const char* word = ctx.getNextWord(); + // ignore comments + for (; Parse::toStringView(word) == "comment"; ctx.getNextLine()) + word = ctx.getNextWord(); + + bool readingHeader = true; + bool continueReading = true; + ctx.IsBinaryFile = false; + ctx.IsWrongEndian = false; + + do { + const std::string_view wordView = Parse::toStringView(word); + if (wordView == "property") { + word = ctx.getNextWord(); + + if (ctx.ElementList.empty()) { + _params.logger.log("PLY property token found before element %s", + system::ILogger::ELL_WARNING, word); + } else { + // get element + auto& el = ctx.ElementList.back(); + + // fill property struct + auto& prop = el.Properties.emplace_back(); + prop.type = prop.getType(word); + if (prop.type == EF_UNKNOWN) { + el.KnownSize = false; + + word = ctx.getNextWord(); + + prop.list.countType = prop.getType(word); + if (ctx.IsBinaryFile && !isIntegerFormat(prop.list.countType)) { + _params.logger.log("Cannot read binary PLY file containing data " + "types of unknown or non integer length %s", + system::ILogger::ELL_WARNING, word); + continueReading = false; + } else { + word = ctx.getNextWord(); + prop.list.itemType = prop.getType(word); + if (ctx.IsBinaryFile && !isIntegerFormat(prop.list.itemType)) { + _params.logger.log("Cannot read binary PLY file containing data " + "types of unknown or non integer length %s", + system::ILogger::ELL_ERROR, word); + continueReading = false; + } + } + } else if (ctx.IsBinaryFile && prop.type == EF_UNKNOWN) { + _params.logger.log("Cannot read binary PLY file containing data " + "types of unknown length %s", + system::ILogger::ELL_ERROR, word); + continueReading = false; + } else + el.KnownSize += getTexelOrBlockBytesize(prop.type); + + prop.Name = ctx.getNextWord(); + } + } else if (wordView == "element") { + auto& el = ctx.ElementList.emplace_back(); + el.Name = ctx.getNextWord(); + const char* const countWord = ctx.getNextWord(); + uint64_t parsedCount = 0ull; + const std::string_view countWordView = Parse::toStringView(countWord); + if (!countWordView.empty()) { + if (!Parse::Common::parseExactNumber(countWordView, parsedCount)) + parsedCount = 0ull; + } + el.Count = static_cast(parsedCount); + el.KnownSize = 0; + if (el.Name == "vertex") + vertCount = el.Count; + } else if (wordView == "comment") { + // ignore line + } + // must be `format {binary_little_endian|binary_big_endian|ascii} 1.0` + else if (wordView == "format") { + word = ctx.getNextWord(); + const std::string_view formatView = Parse::toStringView(word); + + if (formatView == "binary_little_endian") { + ctx.IsBinaryFile = true; + } else if (formatView == "binary_big_endian") { + ctx.IsBinaryFile = true; + ctx.IsWrongEndian = true; + } else if (formatView == "ascii") { + } else { + // abort if this isn't an ascii or a binary mesh + _params.logger.log("Unsupported PLY mesh format %s", + system::ILogger::ELL_ERROR, word); + continueReading = false; + } + + if (continueReading) { + word = ctx.getNextWord(); + if (Parse::toStringView(word) != "1.0") { + _params.logger.log("Unsupported PLY mesh version %s", + system::ILogger::ELL_WARNING, word); + } + } + } else if (wordView == "end_header") { + readingHeader = false; + if (ctx.IsBinaryFile) { + char* const binaryStartInBuffer = ctx.LineEndPointer + 1; + const auto* const mappedBase = reinterpret_cast( + static_cast(_file)->getMappedPointer()); + if (mappedBase) { + const size_t binaryOffset = + ctx.getAbsoluteOffset(binaryStartInBuffer); + const size_t remainingBytes = static_cast( + binaryOffset < fileSize ? (fileSize - binaryOffset) : 0ull); + ctx.useMappedBinaryWindow(mappedBase + binaryOffset, remainingBytes); + } else { + ctx.StartPointer = binaryStartInBuffer; + } + } + } else { + _params.logger.log("Unknown item in PLY file %s", + system::ILogger::ELL_WARNING, word); + } + + if (readingHeader && continueReading) { + ctx.getNextLine(); + word = ctx.getNextWord(); + } + } while (readingHeader && continueReading); + // + if (!continueReading) + return {}; + + // now to read the actual data from the file + using index_t = uint32_t; + core::vector indices = {}; + + // loop through each of the elements + bool verticesProcessed = false; + + for (uint32_t i = 0; i < ctx.ElementList.size(); ++i) { + auto& el = ctx.ElementList[i]; + if (el.Name == "vertex") // multiple vertex elements are currently treated + // as unsupported + { + if (verticesProcessed) { + _params.logger.log("Multiple `vertex` elements not supported!", + system::ILogger::ELL_ERROR); + return {}; + } + ICPUPolygonGeometry::SDataViewBase posView = {}, normalView = {}, + uvView = {}; + core::vector extraViews; + for (auto& vertexProperty : el.Properties) { + const auto& propertyName = vertexProperty.Name; + // only positions and normals need to be structured/canonicalized in any + // way + auto negotiateFormat = + [&vertexProperty](ICPUPolygonGeometry::SDataViewBase& view, + const uint8_t component) -> void { + assert(getFormatChannelCount(vertexProperty.type) != 0); + if (getTexelOrBlockBytesize(vertexProperty.type) > + getTexelOrBlockBytesize(view.format)) + view.format = vertexProperty.type; + view.stride = hlsl::max(view.stride, component); + }; + if (propertyName == "x") + negotiateFormat(posView, 0); + else if (propertyName == "y") + negotiateFormat(posView, 1); + else if (propertyName == "z") + negotiateFormat(posView, 2); + else if (propertyName == "nx") + negotiateFormat(normalView, 0); + else if (propertyName == "ny") + negotiateFormat(normalView, 1); + else if (propertyName == "nz") + negotiateFormat(normalView, 2); + else if (propertyName == "u" || propertyName == "s") + negotiateFormat(uvView, 0); + else if (propertyName == "v" || propertyName == "t") + negotiateFormat(uvView, 1); + else { + // property names for extra channels are currently not persisted in + // metadata + extraViews.push_back(createView(vertexProperty.type, el.Count)); + } + } + auto setFinalFormat = + [&ctx](ICPUPolygonGeometry::SDataViewBase& view) -> void { + const auto componentFormat = view.format; + const auto componentCount = view.stride + 1; + // turn single channel format to multiple + view.format = [=]() -> E_FORMAT { + switch (view.format) { + case EF_R8_SINT: + switch (componentCount) { + case 1: + return EF_R8_SINT; + case 2: + return EF_R8G8_SINT; + case 3: + return EF_R8G8B8_SINT; + case 4: + return EF_R8G8B8A8_SINT; + default: + break; + } + break; + case EF_R8_UINT: + switch (componentCount) { + case 1: + return EF_R8_UINT; + case 2: + return EF_R8G8_UINT; + case 3: + return EF_R8G8B8_UINT; + case 4: + return EF_R8G8B8A8_UINT; + default: + break; + } + break; + case EF_R16_SINT: + switch (componentCount) { + case 1: + return EF_R16_SINT; + case 2: + return EF_R16G16_SINT; + case 3: + return EF_R16G16B16_SINT; + case 4: + return EF_R16G16B16A16_SINT; + default: + break; + } + break; + case EF_R16_UINT: + switch (componentCount) { + case 1: + return EF_R16_UINT; + case 2: + return EF_R16G16_UINT; + case 3: + return EF_R16G16B16_UINT; + case 4: + return EF_R16G16B16A16_UINT; + default: + break; + } + break; + case EF_R32_SINT: + switch (componentCount) { + case 1: + return EF_R32_SINT; + case 2: + return EF_R32G32_SINT; + case 3: + return EF_R32G32B32_SINT; + case 4: + return EF_R32G32B32A32_SINT; + default: + break; + } + break; + case EF_R32_UINT: + switch (componentCount) { + case 1: + return EF_R32_UINT; + case 2: + return EF_R32G32_UINT; + case 3: + return EF_R32G32B32_UINT; + case 4: + return EF_R32G32B32A32_UINT; + default: + break; + } + break; + case EF_R32_SFLOAT: + switch (componentCount) { + case 1: + return EF_R32_SFLOAT; + case 2: + return EF_R32G32_SFLOAT; + case 3: + return EF_R32G32B32_SFLOAT; + case 4: + return EF_R32G32B32A32_SFLOAT; + default: + break; + } + break; + case EF_R64_SFLOAT: + switch (componentCount) { + case 1: + return EF_R64_SFLOAT; + case 2: + return EF_R64G64_SFLOAT; + case 3: + return EF_R64G64B64_SFLOAT; + case 4: + return EF_R64G64B64A64_SFLOAT; + default: + break; + } + break; + default: + break; + } + return EF_UNKNOWN; + }(); + view.stride = getTexelOrBlockBytesize(view.format); + // + for (auto c = 0u; c < componentCount; c++) { + size_t offset = getTexelOrBlockBytesize(componentFormat) * c; + ctx.vertAttrIts.push_back({.ptr = reinterpret_cast(offset), + .stride = view.stride, + .dstFmt = componentFormat}); + } + }; + if (posView.format != EF_UNKNOWN) { + auto beginIx = ctx.vertAttrIts.size(); + setFinalFormat(posView); + auto view = createView(posView.format, el.Count); + for (const auto size = ctx.vertAttrIts.size(); beginIx != size; + beginIx++) + ctx.vertAttrIts[beginIx].ptr += + ptrdiff_t(view.src.buffer->getPointer()) + view.src.offset; + geometry->setPositionView(std::move(view)); + } + if (normalView.format != EF_UNKNOWN) { + auto beginIx = ctx.vertAttrIts.size(); + setFinalFormat(normalView); + auto view = createView(normalView.format, el.Count); + for (const auto size = ctx.vertAttrIts.size(); beginIx != size; + beginIx++) + ctx.vertAttrIts[beginIx].ptr += + ptrdiff_t(view.src.buffer->getPointer()) + view.src.offset; + geometry->setNormalView(std::move(view)); + } + if (uvView.format != EF_UNKNOWN) { + auto beginIx = ctx.vertAttrIts.size(); + setFinalFormat(uvView); + auto view = createView(uvView.format, el.Count); + for (const auto size = ctx.vertAttrIts.size(); beginIx != size; + beginIx++) + ctx.vertAttrIts[beginIx].ptr += + ptrdiff_t(view.src.buffer->getPointer()) + view.src.offset; + auto* const auxViews = geometry->getAuxAttributeViews(); + auxViews->resize(SPLYPolygonGeometryAuxLayout::UV0 + 1u); + auxViews->operator[](SPLYPolygonGeometryAuxLayout::UV0) = + std::move(view); + } + // + for (auto& view : extraViews) + ctx.vertAttrIts.push_back( + {.ptr = reinterpret_cast(view.src.buffer->getPointer()) + + view.src.offset, + .stride = getTexelOrBlockBytesize(view.composed.format), + .dstFmt = view.composed.format}); + for (auto& view : extraViews) + geometry->getAuxAttributeViews()->push_back(std::move(view)); + // loop through vertex properties + const auto fastVertexResult = ctx.readVertexElementFast(el, &parsedAABB); + if (fastVertexResult == Parse::Context::EFastVertexReadResult::Success) { + ++fastVertexElementCount; + } else if (fastVertexResult == + Parse::Context::EFastVertexReadResult::NotApplicable) { + ctx.readVertex(_params, el); + } else { + _params.logger.log( + "PLY vertex fast path failed on malformed data for %s", + system::ILogger::ELL_ERROR, _file->getFileName().string().c_str()); + return {}; + } + hashViewBufferIfNeeded(geometry->getPositionView()); + hashViewBufferIfNeeded(geometry->getNormalView()); + for (const auto& view : *geometry->getAuxAttributeViews()) + hashViewBufferIfNeeded(view); + tryLaunchDeferredHash(geometry->getPositionView(), + deferredPositionHashThread); + verticesProcessed = true; + } else if (el.Name == "face") { + const uint32_t vertexCount32 = + vertCount <= static_cast(std::numeric_limits::max()) + ? static_cast(vertCount) + : 0u; + const auto fastFaceResult = ctx.readFaceElementFast( + el, indices, maxIndexRead, faceCount, vertexCount32, + computeContentHashes && !hashInBuild, precomputedIndexHash); + if (fastFaceResult == Parse::Context::EFastFaceReadResult::Success) { + ++fastFaceElementCount; + } else if (fastFaceResult == + Parse::Context::EFastFaceReadResult::NotApplicable) { + indices.reserve(indices.size() + el.Count * 3u); + for (size_t j = 0; j < el.Count; ++j) { + if (!ctx.readFace(el, indices, maxIndexRead, vertexCount32)) + return {}; + ++faceCount; + } + } else { + _params.logger.log("PLY face fast path failed on malformed data for %s", + system::ILogger::ELL_ERROR, + _file->getFileName().string().c_str()); + return {}; + } + } else { + // skip these elements + if (ctx.IsBinaryFile && el.KnownSize) { + const uint64_t bytesToSkip64 = static_cast(el.KnownSize) * + static_cast(el.Count); + if (bytesToSkip64 > + static_cast(std::numeric_limits::max())) + return {}; + ctx.moveForward(static_cast(bytesToSkip64)); + } else { + for (size_t j = 0; j < el.Count; ++j) + el.skipElement(ctx); + } + } + } + + if (!parsedAABB.empty()) + geometry->applyAABB(parsedAABB.value); + else + CPolygonGeometryManipulator::recomputeAABB(geometry.get()); + + const uint64_t indexCount = static_cast(indices.size()); + if (indices.empty()) { + // no index buffer means point cloud + geometry->setIndexing(IPolygonGeometryBase::PointList()); + } else { + if (vertCount != 0u && maxIndexRead >= vertCount) { + _params.logger.log("PLY indices out of range for %s", + system::ILogger::ELL_ERROR, + _file->getFileName().string().c_str()); + return {}; + } + + geometry->setIndexing(IPolygonGeometryBase::TriangleList()); + const bool canUseU16 = + (vertCount != 0u) + ? (vertCount <= std::numeric_limits::max()) + : (maxIndexRead <= std::numeric_limits::max()); + if (canUseU16) { + core::vector indices16(indices.size()); + for (size_t i = 0u; i < indices.size(); ++i) + indices16[i] = static_cast(indices[i]); + auto view = SGeometryLoaderCommon::createAdoptedView( + std::move(indices16)); + if (!view) + return {}; + geometry->setIndexView(std::move(view)); + hashViewBufferIfNeeded(geometry->getIndexView()); + } else { + auto view = SGeometryLoaderCommon::createAdoptedView( + std::move(indices)); + if (!view) + return {}; + if (precomputedIndexHash != IPreHashed::INVALID_HASH) + view.src.buffer->setContentHash(precomputedIndexHash); + geometry->setIndexView(std::move(view)); + hashViewBufferIfNeeded(geometry->getIndexView()); + } + } + + if (computeContentHashes && !hashInBuild) { + if (deferredPositionHashThread.joinable()) + deferredPositionHashThread.join(); + SPolygonGeometryContentHash::computeMissing(geometry.get(), + _params.ioPolicy); + } else { + hashRemainingGeometryBuffers(); + } + + const uint64_t ioMinRead = ctx.readCallCount ? ctx.readMinBytes : 0ull; + const uint64_t ioAvgRead = + ctx.readCallCount ? (ctx.readBytesTotal / ctx.readCallCount) : 0ull; + const SFileReadTelemetry ioTelemetry = {.callCount = ctx.readCallCount, + .totalBytes = ctx.readBytesTotal, + .minBytes = ctx.readMinBytes}; + impl::SIODiagnostics::logTinyIO(_params.logger, "PLY loader", _file->getFileName().string().c_str(), ioTelemetry, fileSize, _params.ioPolicy, "reads"); + _params.logger.log( + "PLY loader stats: file=%s binary=%d verts=%llu faces=%llu idx=%llu " + "vertex_fast=%llu face_fast=%llu io_reads=%llu io_min_read=%llu " + "io_avg_read=%llu io_req=%s io_eff=%s io_chunk=%llu io_reason=%s", + system::ILogger::ELL_PERFORMANCE, _file->getFileName().string().c_str(), + ctx.IsBinaryFile ? 1 : 0, static_cast(vertCount), + static_cast(faceCount), + static_cast(indexCount), + static_cast(fastVertexElementCount), + static_cast(fastFaceElementCount), + static_cast(ctx.readCallCount), + static_cast(ioMinRead), + static_cast(ioAvgRead), + system::to_string(_params.ioPolicy.strategy).c_str(), + system::to_string(ioPlan.strategy).c_str(), + static_cast(ioPlan.chunkSizeBytes()), ioPlan.reason); + auto meta = core::make_smart_refctd_ptr(); + return SAssetBundle(std::move(meta), {std::move(geometry)}); +} -} // end namespace nbl::asset +} #endif // _NBL_COMPILE_WITH_PLY_LOADER_ diff --git a/src/nbl/asset/interchange/CPLYMeshWriter.cpp b/src/nbl/asset/interchange/CPLYMeshWriter.cpp index b63f898065..4f8cbbf866 100644 --- a/src/nbl/asset/interchange/CPLYMeshWriter.cpp +++ b/src/nbl/asset/interchange/CPLYMeshWriter.cpp @@ -4,21 +4,25 @@ // See the original file in irrlicht source for authors #include "CPLYMeshWriter.h" +#include "SPLYPolygonGeometryAuxLayout.h" +#include "nbl/asset/interchange/SGeometryViewDecode.h" #include "nbl/asset/interchange/SGeometryWriterCommon.h" #include "nbl/asset/interchange/SInterchangeIO.h" -#include "SPLYPolygonGeometryAuxLayout.h" +#include "impl/SBinaryData.h" +#include "impl/SFileAccess.h" +#include "impl/SIODiagnostics.h" #ifdef _NBL_COMPILE_WITH_PLY_WRITER_ #include "nbl/system/IFile.h" #include -#include -#include -#include #include +#include #include +#include #include +#include #include #include #include @@ -28,755 +32,526 @@ namespace nbl::asset CPLYMeshWriter::CPLYMeshWriter() { - #ifdef _NBL_DEBUG - setDebugName("CPLYMeshWriter"); - #endif + #ifdef _NBL_DEBUG + setDebugName("CPLYMeshWriter"); + #endif } const char** CPLYMeshWriter::getAssociatedFileExtensions() const { - static const char* ext[] = { "ply", nullptr }; - return ext; + static const char* ext[] = { "ply", nullptr }; + return ext; } writer_flags_t CPLYMeshWriter::getSupportedFlags() { - return asset::EWF_BINARY; + return asset::EWF_BINARY; } writer_flags_t CPLYMeshWriter::getForcedFlags() { - return EWF_NONE; + return EWF_NONE; } -namespace ply_writer_detail +namespace { -constexpr size_t ApproxPlyTextBytesPerVertex = sizeof("0.000000 0.000000 0.000000 0.000000 0.000000 0.000000\n") - 1ull; -constexpr size_t ApproxPlyTextBytesPerFace = sizeof("3 4294967295 4294967295 4294967295\n") - 1ull; - -enum class EPlyScalarType : uint8_t -{ - Int8, - UInt8, - Int16, - UInt16, - Int32, - UInt32, - Float32, - Float64 -}; - -struct SPlyScalarMeta +struct Parse { - const char* name = "float32"; - uint32_t byteSize = sizeof(float); - bool integer = false; - bool signedType = true; + using Binary = impl::BinaryData; + + enum class ScalarType : uint8_t + { + Int8, + UInt8, + Int16, + UInt16, + Int32, + UInt32, + Float32, + Float64 + }; + + struct ScalarMeta + { + const char* name = "float32"; + uint32_t byteSize = sizeof(float); + bool integer = false; + bool signedType = true; + }; + + struct ExtraAuxView + { + const ICPUPolygonGeometry::SDataView* view = nullptr; + uint32_t components = 0u; + uint32_t auxIndex = 0u; + ScalarType scalarType = ScalarType::Float32; + }; + + struct WriteInput + { + const ICPUPolygonGeometry* geom = nullptr; + ScalarType positionScalarType = ScalarType::Float32; + const ICPUPolygonGeometry::SDataView* uvView = nullptr; + ScalarType uvScalarType = ScalarType::Float32; + const core::vector* extraAuxViews = nullptr; + bool writeNormals = false; + ScalarType normalScalarType = ScalarType::Float32; + size_t vertexCount = 0ull; + size_t faceCount = 0ull; + bool write16BitIndices = false; + bool flipVectors = false; + }; + + static constexpr size_t ApproxTextBytesPerVertex = sizeof("0.000000 0.000000 0.000000 0.000000 0.000000 0.000000\n") - 1ull; + static constexpr size_t ApproxTextBytesPerFace = sizeof("3 4294967295 4294967295 4294967295\n") - 1ull; + static constexpr size_t MaxFloatTextChars = std::numeric_limits::max_digits10 + 16ull; + + template + static void appendIntegral(std::string& out, const T value) + { + std::array buf = {}; + const auto res = std::to_chars(buf.data(), buf.data() + buf.size(), value); + if (res.ec == std::errc()) + out.append(buf.data(), static_cast(res.ptr - buf.data())); + } + + static void appendFloat(std::string& out, double value) + { + const size_t oldSize = out.size(); + out.resize(oldSize + MaxFloatTextChars); + char* const begin = out.data() + oldSize; + char* const end = begin + MaxFloatTextChars; + char* const cursor = SGeometryWriterCommon::appendFloatToBuffer(begin, end, value); + out.resize(oldSize + static_cast(cursor - begin)); + } + + static ScalarMeta getScalarMeta(const ScalarType type) + { + switch (type) + { + case ScalarType::Int8: return {"int8", sizeof(int8_t), true, true}; + case ScalarType::UInt8: return {"uint8", sizeof(uint8_t), true, false}; + case ScalarType::Int16: return {"int16", sizeof(int16_t), true, true}; + case ScalarType::UInt16: return {"uint16", sizeof(uint16_t), true, false}; + case ScalarType::Int32: return {"int32", sizeof(int32_t), true, true}; + case ScalarType::UInt32: return {"uint32", sizeof(uint32_t), true, false}; + case ScalarType::Float64: return {"float64", sizeof(double), false, true}; + default: return {"float32", sizeof(float), false, true}; + } + } + + static bool isSupportedScalarFormat(const E_FORMAT format) + { + if (format == EF_UNKNOWN) + return false; + + const uint32_t channels = getFormatChannelCount(format); + if (channels == 0u) + return false; + + if (!(isIntegerFormat(format) || isFloatingPointFormat(format) || isNormalizedFormat(format) || isScaledFormat(format))) + return false; + + const auto bytesPerPixel = getBytesPerPixel(format); + if (bytesPerPixel.getDenominator() != 1u) + return false; + const uint32_t pixelBytes = bytesPerPixel.getNumerator(); + if (pixelBytes == 0u || (pixelBytes % channels) != 0u) + return false; + + const uint32_t bytesPerChannel = pixelBytes / channels; + return bytesPerChannel == 1u || bytesPerChannel == 2u || bytesPerChannel == 4u || bytesPerChannel == 8u; + } + + static ScalarType selectScalarType(const E_FORMAT format) + { + if (!isSupportedScalarFormat(format)) + return ScalarType::Float32; + if (isNormalizedFormat(format) || isScaledFormat(format)) + return ScalarType::Float32; + + const uint32_t channels = getFormatChannelCount(format); + if (channels == 0u) + { + assert(format == EF_UNKNOWN); + return ScalarType::Float32; + } + + const auto bytesPerPixel = getBytesPerPixel(format); + if (bytesPerPixel.getDenominator() != 1u) + return ScalarType::Float32; + const uint32_t pixelBytes = bytesPerPixel.getNumerator(); + if (pixelBytes == 0u || (pixelBytes % channels) != 0u) + return ScalarType::Float32; + const uint32_t bytesPerChannel = pixelBytes / channels; + + if (isIntegerFormat(format)) + { + const bool signedType = isSignedFormat(format); + switch (bytesPerChannel) + { + case 1u: return signedType ? ScalarType::Int8 : ScalarType::UInt8; + case 2u: return signedType ? ScalarType::Int16 : ScalarType::UInt16; + case 4u: return signedType ? ScalarType::Int32 : ScalarType::UInt32; + default: return ScalarType::Float64; + } + } + + if (isFloatingPointFormat(format)) + return bytesPerChannel >= 8u ? ScalarType::Float64 : ScalarType::Float32; + + return ScalarType::Float32; + } + + struct BinarySink + { + uint8_t* cursor = nullptr; + + template + inline bool append(const T value) + { + if (!cursor) + return false; + Binary::storeUnalignedAdvance(cursor, value); + return true; + } + + inline bool finishVertex() + { + return true; + } + }; + + struct TextSink + { + std::string& output; + + template + inline bool append(const T value) + { + if constexpr (std::is_floating_point_v) + appendFloat(output, static_cast(value)); + else + appendIntegral(output, value); + output.push_back(' '); + return true; + } + + inline bool finishVertex() + { + output.push_back('\n'); + return true; + } + }; + + template + static bool emitDecodedView(Sink& sink, const ICPUPolygonGeometry::SDataView& view, const size_t ix, const uint32_t componentCount, const bool flipVectors) + { + std::array decoded = {}; + if (!SGeometryViewDecode::decodeElement, Mode>(view, ix, decoded)) + return false; + for (uint32_t c = 0u; c < componentCount; ++c) + { + OutT value = static_cast(decoded[c]); + if constexpr (std::is_signed_v || std::is_floating_point_v) + { + if (flipVectors && c == 0u) + value = -value; + } + if (!sink.append(value)) + return false; + } + return true; + } + + template + static bool emitView(Sink& sink, const ICPUPolygonGeometry::SDataView& view, const size_t ix, const uint32_t componentCount, const ScalarType scalarType, const bool flipVectors) + { + switch (scalarType) + { + case ScalarType::Float64: return emitDecodedView(sink, view, ix, componentCount, flipVectors); + case ScalarType::Float32: return emitDecodedView(sink, view, ix, componentCount, flipVectors); + case ScalarType::Int8: return emitDecodedView(sink, view, ix, componentCount, flipVectors); + case ScalarType::UInt8: return emitDecodedView(sink, view, ix, componentCount, false); + case ScalarType::Int16: return emitDecodedView(sink, view, ix, componentCount, flipVectors); + case ScalarType::UInt16: return emitDecodedView(sink, view, ix, componentCount, false); + case ScalarType::Int32: return emitDecodedView(sink, view, ix, componentCount, flipVectors); + case ScalarType::UInt32: return emitDecodedView(sink, view, ix, componentCount, false); + } + return false; + } + + template + static bool emitVertices(const WriteInput& input, Sink& sink) + { + if (!input.geom || !input.extraAuxViews) + return false; + + const auto& positionView = input.geom->getPositionView(); + const auto& normalView = input.geom->getNormalView(); + const auto& extraAuxViews = *input.extraAuxViews; + for (size_t i = 0u; i < input.vertexCount; ++i) + { + if (!emitView(sink, positionView, i, 3u, input.positionScalarType, input.flipVectors)) + return false; + if (input.writeNormals && !emitView(sink, normalView, i, 3u, input.normalScalarType, input.flipVectors)) + return false; + if (input.uvView && !emitView(sink, *input.uvView, i, 2u, input.uvScalarType, false)) + return false; + for (const auto& extra : extraAuxViews) + { + if (!extra.view || !emitView(sink, *extra.view, i, extra.components, extra.scalarType, false)) + return false; + } + if (!sink.finishVertex()) + return false; + } + return true; + } + + static bool writeBinary(const WriteInput& input, uint8_t* dst) + { + BinarySink sink = {.cursor = dst}; + if (!emitVertices(input, sink)) + return false; + + return SGeometryWriterCommon::visitTriangleIndices(input.geom, [&](const uint32_t i0, const uint32_t i1, const uint32_t i2) -> bool { + if (!sink.append(static_cast(3u))) + return false; + if (input.write16BitIndices) + { + if (!sink.append(static_cast(i0)) || !sink.append(static_cast(i1)) || !sink.append(static_cast(i2))) + return false; + } + else if (!sink.append(i0) || !sink.append(i1) || !sink.append(i2)) + return false; + return true; + }); + } + + static bool writeText(const WriteInput& input, std::string& output) + { + TextSink sink = {.output = output}; + if (!emitVertices(input, sink)) + return false; + + return SGeometryWriterCommon::visitTriangleIndices(input.geom, [&](const uint32_t i0, const uint32_t i1, const uint32_t i2) { + output.append("3 "); + appendIntegral(output, i0); + output.push_back(' '); + appendIntegral(output, i1); + output.push_back(' '); + appendIntegral(output, i2); + output.push_back('\n'); + }); + } }; -SPlyScalarMeta getPlyScalarMeta(const EPlyScalarType type) -{ - switch (type) - { - case EPlyScalarType::Int8: return { "int8", sizeof(int8_t), true, true }; - case EPlyScalarType::UInt8: return { "uint8", sizeof(uint8_t), true, false }; - case EPlyScalarType::Int16: return { "int16", sizeof(int16_t), true, true }; - case EPlyScalarType::UInt16: return { "uint16", sizeof(uint16_t), true, false }; - case EPlyScalarType::Int32: return { "int32", sizeof(int32_t), true, true }; - case EPlyScalarType::UInt32: return { "uint32", sizeof(uint32_t), true, false }; - case EPlyScalarType::Float64: return { "float64", sizeof(double), false, true }; - default: return { "float32", sizeof(float), false, true }; - } -} - -bool isPlySupportedScalarFormat(const E_FORMAT format) -{ - if (format == EF_UNKNOWN) - return false; - - const uint32_t channels = getFormatChannelCount(format); - if (channels == 0u) - return false; - - if (!(isIntegerFormat(format) || isFloatingPointFormat(format) || isNormalizedFormat(format) || isScaledFormat(format))) - return false; - - const auto bytesPerPixel = getBytesPerPixel(format); - if (bytesPerPixel.getDenominator() != 1u) - return false; - const uint32_t pixelBytes = bytesPerPixel.getNumerator(); - if (pixelBytes == 0u || (pixelBytes % channels) != 0u) - return false; - - const uint32_t bytesPerChannel = pixelBytes / channels; - return bytesPerChannel == 1u || bytesPerChannel == 2u || bytesPerChannel == 4u || bytesPerChannel == 8u; -} - -EPlyScalarType selectPlyScalarType(const E_FORMAT format) -{ - if (!isPlySupportedScalarFormat(format)) - return EPlyScalarType::Float32; - if (isNormalizedFormat(format) || isScaledFormat(format)) - return EPlyScalarType::Float32; - - const uint32_t channels = getFormatChannelCount(format); - if (channels == 0u) - { - assert(format == EF_UNKNOWN); - return EPlyScalarType::Float32; - } - - const auto bytesPerPixel = getBytesPerPixel(format); - if (bytesPerPixel.getDenominator() != 1u) - return EPlyScalarType::Float32; - const uint32_t pixelBytes = bytesPerPixel.getNumerator(); - if (pixelBytes == 0u || (pixelBytes % channels) != 0u) - return EPlyScalarType::Float32; - const uint32_t bytesPerChannel = pixelBytes / channels; - - if (isIntegerFormat(format)) - { - const bool signedType = isSignedFormat(format); - switch (bytesPerChannel) - { - case 1u: return signedType ? EPlyScalarType::Int8 : EPlyScalarType::UInt8; - case 2u: return signedType ? EPlyScalarType::Int16 : EPlyScalarType::UInt16; - case 4u: return signedType ? EPlyScalarType::Int32 : EPlyScalarType::UInt32; - default: return EPlyScalarType::Float64; - } - } - - if (isFloatingPointFormat(format)) - return bytesPerChannel >= 8u ? EPlyScalarType::Float64 : EPlyScalarType::Float32; - - return EPlyScalarType::Float32; } -bool decodeVec4(const ICPUPolygonGeometry::SDataView& view, const size_t ix, hlsl::float64_t4& out) -{ - out = hlsl::float64_t4(0.0, 0.0, 0.0, 0.0); - if (!view.composed.isFormatted()) - return false; - - const void* src = view.getPointer(ix); - if (!src) - return false; - - const void* srcArr[4] = { src, nullptr, nullptr, nullptr }; - double tmp[4] = {}; - if (!decodePixels(view.composed.format, srcArr, tmp, 0u, 0u)) - return false; - - const uint32_t channels = std::min(4u, getFormatChannelCount(view.composed.format)); - if (isNormalizedFormat(view.composed.format)) - { - const auto range = view.composed.getRange>(); - for (uint32_t i = 0u; i < channels; ++i) - (&out.x)[i] = tmp[i] * (range.maxVx[i] - range.minVx[i]) + range.minVx[i]; - } - else - { - for (uint32_t i = 0u; i < channels; ++i) - (&out.x)[i] = tmp[i]; - } - return true; -} - -bool decodeSigned4Raw(const ICPUPolygonGeometry::SDataView& view, const size_t ix, int64_t (&out)[4]) -{ - const void* src = view.getPointer(ix); - if (!src) - return false; - const void* srcArr[4] = { src, nullptr, nullptr, nullptr }; - return decodePixels(view.composed.format, srcArr, out, 0u, 0u); -} - -bool decodeUnsigned4Raw(const ICPUPolygonGeometry::SDataView& view, const size_t ix, uint64_t (&out)[4]) -{ - const void* src = view.getPointer(ix); - if (!src) - return false; - const void* srcArr[4] = { src, nullptr, nullptr, nullptr }; - return decodePixels(view.composed.format, srcArr, out, 0u, 0u); -} - -template -void appendIntegral(std::string& out, const T value) -{ - std::array buf = {}; - const auto res = std::to_chars(buf.data(), buf.data() + buf.size(), value); - if (res.ec == std::errc()) - out.append(buf.data(), static_cast(res.ptr - buf.data())); -} - -constexpr size_t MaxFloatTextChars = std::numeric_limits::max_digits10 + 16ull; - -void appendFloat(std::string& out, double value) -{ - const size_t oldSize = out.size(); - out.resize(oldSize + MaxFloatTextChars); - char* const begin = out.data() + oldSize; - char* const end = begin + MaxFloatTextChars; - char* const cursor = SGeometryWriterCommon::appendFloatToBuffer(begin, end, value); - out.resize(oldSize + static_cast(cursor - begin)); -} - -inline bool writeTypedViewBinary(const ICPUPolygonGeometry::SDataView& view, const size_t ix, const uint32_t componentCount, const EPlyScalarType scalarType, const bool flipVectors, uint8_t*& dst) -{ - if (!dst) - return false; - - switch (scalarType) - { - case EPlyScalarType::Float64: - case EPlyScalarType::Float32: - { - hlsl::float64_t4 tmp = {}; - if (!decodeVec4(view, ix, tmp)) - return false; - for (uint32_t c = 0u; c < componentCount; ++c) - { - double value = (&tmp.x)[c]; - if (flipVectors && c == 0u) - value = -value; - if (scalarType == EPlyScalarType::Float64) - { - const double typed = value; - std::memcpy(dst, &typed, sizeof(typed)); - dst += sizeof(typed); - } - else - { - const float typed = static_cast(value); - std::memcpy(dst, &typed, sizeof(typed)); - dst += sizeof(typed); - } - } - return true; - } - case EPlyScalarType::Int8: - case EPlyScalarType::Int16: - case EPlyScalarType::Int32: - { - int64_t tmp[4] = {}; - if (!decodeSigned4Raw(view, ix, tmp)) - return false; - for (uint32_t c = 0u; c < componentCount; ++c) - { - int64_t value = tmp[c]; - if (flipVectors && c == 0u) - value = -value; - switch (scalarType) - { - case EPlyScalarType::Int8: - { - const int8_t typed = static_cast(value); - std::memcpy(dst, &typed, sizeof(typed)); - dst += sizeof(typed); - break; - } - case EPlyScalarType::Int16: - { - const int16_t typed = static_cast(value); - std::memcpy(dst, &typed, sizeof(typed)); - dst += sizeof(typed); - break; - } - default: - { - const int32_t typed = static_cast(value); - std::memcpy(dst, &typed, sizeof(typed)); - dst += sizeof(typed); - break; - } - } - } - return true; - } - case EPlyScalarType::UInt8: - case EPlyScalarType::UInt16: - case EPlyScalarType::UInt32: - { - uint64_t tmp[4] = {}; - if (!decodeUnsigned4Raw(view, ix, tmp)) - return false; - for (uint32_t c = 0u; c < componentCount; ++c) - { - uint64_t value = tmp[c]; - switch (scalarType) - { - case EPlyScalarType::UInt8: - { - const uint8_t typed = static_cast(value); - std::memcpy(dst, &typed, sizeof(typed)); - dst += sizeof(typed); - break; - } - case EPlyScalarType::UInt16: - { - const uint16_t typed = static_cast(value); - std::memcpy(dst, &typed, sizeof(typed)); - dst += sizeof(typed); - break; - } - default: - { - const uint32_t typed = static_cast(value); - std::memcpy(dst, &typed, sizeof(typed)); - dst += sizeof(typed); - break; - } - } - } - return true; - } - } - return false; -} - -inline bool writeTypedViewText(std::string& output, const ICPUPolygonGeometry::SDataView& view, const size_t ix, const uint32_t componentCount, const EPlyScalarType scalarType, const bool flipVectors) -{ - switch (scalarType) - { - case EPlyScalarType::Float64: - case EPlyScalarType::Float32: - { - hlsl::float64_t4 tmp = {}; - if (!decodeVec4(view, ix, tmp)) - return false; - for (uint32_t c = 0u; c < componentCount; ++c) - { - double value = (&tmp.x)[c]; - if (flipVectors && c == 0u) - value = -value; - appendFloat(output, value); - output.push_back(' '); - } - return true; - } - case EPlyScalarType::Int8: - case EPlyScalarType::Int16: - case EPlyScalarType::Int32: - { - int64_t tmp[4] = {}; - if (!decodeSigned4Raw(view, ix, tmp)) - return false; - for (uint32_t c = 0u; c < componentCount; ++c) - { - int64_t value = tmp[c]; - if (flipVectors && c == 0u) - value = -value; - appendIntegral(output, value); - output.push_back(' '); - } - return true; - } - case EPlyScalarType::UInt8: - case EPlyScalarType::UInt16: - case EPlyScalarType::UInt32: - { - uint64_t tmp[4] = {}; - if (!decodeUnsigned4Raw(view, ix, tmp)) - return false; - for (uint32_t c = 0u; c < componentCount; ++c) - { - appendIntegral(output, tmp[c]); - output.push_back(' '); - } - return true; - } - } - return false; -} - -struct SExtraAuxView -{ - const ICPUPolygonGeometry::SDataView* view = nullptr; - uint32_t components = 0u; - uint32_t auxIndex = 0u; - EPlyScalarType scalarType = EPlyScalarType::Float32; -}; - -struct SWriteInput -{ - const ICPUPolygonGeometry* geom = nullptr; - EPlyScalarType positionScalarType = EPlyScalarType::Float32; - const ICPUPolygonGeometry::SDataView* uvView = nullptr; - EPlyScalarType uvScalarType = EPlyScalarType::Float32; - const core::vector* extraAuxViews = nullptr; - bool writeNormals = false; - EPlyScalarType normalScalarType = EPlyScalarType::Float32; - size_t vertexCount = 0ull; - size_t faceCount = 0ull; - bool write16BitIndices = false; - bool flipVectors = false; -}; - -bool writeBinary( - const SWriteInput& input, - uint8_t* dst); -bool writeText( - const SWriteInput& input, - std::string& output); - -} // namespace ply_writer_detail - bool CPLYMeshWriter::writeAsset(system::IFile* _file, const SAssetWriteParams& _params, IAssetWriterOverride* _override) { - using namespace ply_writer_detail; - SFileWriteTelemetry ioTelemetry = {}; - - if (!_override) - getDefaultOverride(_override); - - if (!_file || !_params.rootAsset) - { - _params.logger.log("PLY writer: missing output file or root asset.", system::ILogger::ELL_ERROR); - return false; - } - - const auto items = SGeometryWriterCommon::collectPolygonGeometryWriteItems(_params.rootAsset); - if (items.size() != 1u) - { - _params.logger.log("PLY writer: expected exactly one polygon geometry to write.", system::ILogger::ELL_ERROR); - return false; - } - const auto& item = items.front(); - const auto* geom = item.geometry; - if (!geom || !geom->valid()) - { - _params.logger.log("PLY writer: root asset is not a valid polygon geometry.", system::ILogger::ELL_ERROR); - return false; - } - if (!SGeometryWriterCommon::isIdentityTransform(item.transform)) - { - _params.logger.log("PLY writer: transformed scene or collection export is not supported.", system::ILogger::ELL_ERROR); - return false; - } - - SAssetWriteContext ctx = { _params, _file }; - system::IFile* file = _override->getOutputFile(_file, ctx, { geom, 0u }); - if (!file) - { - _params.logger.log("PLY writer: output override returned null file.", system::ILogger::ELL_ERROR); - return false; - } - - const auto& positionView = geom->getPositionView(); - const auto& normalView = geom->getNormalView(); - const size_t vertexCount = positionView.getElementCount(); - if (vertexCount == 0ull) - { - _params.logger.log("PLY writer: empty position view.", system::ILogger::ELL_ERROR); - return false; - } - const bool writeNormals = static_cast(normalView); - if (writeNormals && normalView.getElementCount() != vertexCount) - { - _params.logger.log("PLY writer: normal vertex count mismatch.", system::ILogger::ELL_ERROR); - return false; - } - - const ICPUPolygonGeometry::SDataView* uvView = SGeometryWriterCommon::getAuxViewAt(geom, SPLYPolygonGeometryAuxLayout::UV0, vertexCount); - if (uvView && getFormatChannelCount(uvView->composed.format) != 2u) - uvView = nullptr; - - core::vector extraAuxViews; - const auto& auxViews = geom->getAuxAttributeViews(); - extraAuxViews.reserve(auxViews.size()); - for (uint32_t auxIx = 0u; auxIx < static_cast(auxViews.size()); ++auxIx) - { - const auto& view = auxViews[auxIx]; - if (!view || (uvView && auxIx == SPLYPolygonGeometryAuxLayout::UV0)) - continue; - if (view.getElementCount() != vertexCount) - continue; - const uint32_t channels = getFormatChannelCount(view.composed.format); - if (channels == 0u) - continue; - const uint32_t components = std::min(4u, channels); - extraAuxViews.push_back({ &view, components, auxIx, selectPlyScalarType(view.composed.format) }); - } - - const auto* indexing = geom->getIndexingCallback(); - if (!indexing) - { - _params.logger.log("PLY writer: missing indexing callback.", system::ILogger::ELL_ERROR); - return false; - } - - if (indexing->knownTopology() != E_PRIMITIVE_TOPOLOGY::EPT_TRIANGLE_LIST) - { - _params.logger.log("PLY writer: only triangle-list topology is supported.", system::ILogger::ELL_ERROR); - return false; - } - - size_t faceCount = 0ull; - if (!SGeometryWriterCommon::getTriangleFaceCount(geom, faceCount)) - { - _params.logger.log("PLY writer: failed to validate triangle indexing.", system::ILogger::ELL_ERROR); - return false; - } - - const auto flags = _override->getAssetWritingFlags(ctx, geom, 0u); - const bool binary = flags.hasAnyFlag(E_WRITER_FLAGS::EWF_BINARY); - const bool flipVectors = !flags.hasAnyFlag(E_WRITER_FLAGS::EWF_MESH_IS_RIGHT_HANDED); - const bool write16BitIndices = vertexCount <= static_cast(std::numeric_limits::max()) + 1ull; - - EPlyScalarType positionScalarType = selectPlyScalarType(positionView.composed.format); - if (flipVectors && getPlyScalarMeta(positionScalarType).integer && !getPlyScalarMeta(positionScalarType).signedType) - positionScalarType = EPlyScalarType::Float32; - EPlyScalarType normalScalarType = selectPlyScalarType(normalView.composed.format); - if (flipVectors && getPlyScalarMeta(normalScalarType).integer && !getPlyScalarMeta(normalScalarType).signedType) - normalScalarType = EPlyScalarType::Float32; - const EPlyScalarType uvScalarType = uvView ? selectPlyScalarType(uvView->composed.format) : EPlyScalarType::Float32; - - const auto positionMeta = getPlyScalarMeta(positionScalarType); - const auto normalMeta = getPlyScalarMeta(normalScalarType); - const auto uvMeta = getPlyScalarMeta(uvScalarType); - - size_t extraAuxBytesPerVertex = 0ull; - for (const auto& extra : extraAuxViews) - extraAuxBytesPerVertex += static_cast(extra.components) * getPlyScalarMeta(extra.scalarType).byteSize; - - std::ostringstream headerBuilder; - headerBuilder << "ply\n"; - headerBuilder << (binary ? "format binary_little_endian 1.0" : "format ascii 1.0"); - headerBuilder << "\ncomment Nabla " << NABLA_SDK_VERSION; - headerBuilder << "\nelement vertex " << vertexCount << "\n"; - - headerBuilder << "property " << positionMeta.name << " x\n"; - headerBuilder << "property " << positionMeta.name << " y\n"; - headerBuilder << "property " << positionMeta.name << " z\n"; - - if (writeNormals) - { - headerBuilder << "property " << normalMeta.name << " nx\n"; - headerBuilder << "property " << normalMeta.name << " ny\n"; - headerBuilder << "property " << normalMeta.name << " nz\n"; - } - - if (uvView) - { - headerBuilder << "property " << uvMeta.name << " u\n"; - headerBuilder << "property " << uvMeta.name << " v\n"; - } - - for (const auto& extra : extraAuxViews) - { - const auto extraMeta = getPlyScalarMeta(extra.scalarType); - for (uint32_t component = 0u; component < extra.components; ++component) - { - headerBuilder << "property " << extraMeta.name << " aux" << extra.auxIndex; - if (extra.components > 1u) - headerBuilder << "_" << component; - headerBuilder << "\n"; - } - } - - headerBuilder << "element face " << faceCount; - headerBuilder << (write16BitIndices ? "\nproperty list uchar uint16 vertex_indices\n" : "\nproperty list uchar uint32 vertex_indices\n"); - headerBuilder << "end_header\n"; - const std::string header = headerBuilder.str(); - - const SWriteInput input = { - .geom = geom, - .positionScalarType = positionScalarType, - .uvView = uvView, - .uvScalarType = uvScalarType, - .extraAuxViews = &extraAuxViews, - .writeNormals = writeNormals, - .normalScalarType = normalScalarType, - .vertexCount = vertexCount, - .faceCount = faceCount, - .write16BitIndices = write16BitIndices, - .flipVectors = flipVectors - }; - - bool writeOk = false; - size_t outputBytes = 0ull; - auto writePayload = [&](const uint8_t* bodyData, const size_t bodySize) -> bool - { - const size_t outputSize = header.size() + bodySize; - const bool fileMappable = core::bitflag(file->getFlags()).hasAnyFlag(system::IFile::ECF_MAPPABLE); - const auto ioPlan = SResolvedFileIOPolicy(_params.ioPolicy, static_cast(outputSize), true, fileMappable); - if (!ioPlan.isValid()) - { - _params.logger.log("PLY writer: invalid io policy for %s reason=%s", system::ILogger::ELL_ERROR, file->getFileName().string().c_str(), ioPlan.reason); - return false; - } - - outputBytes = outputSize; - const SInterchangeIO::SBufferRange writeBuffers[] = - { - { .data = reinterpret_cast(header.data()), .byteCount = header.size() }, - { .data = bodyData, .byteCount = bodySize } - }; - writeOk = SInterchangeIO::writeBuffersWithPolicy(file, ioPlan, writeBuffers, &ioTelemetry); - const uint64_t ioMinWrite = ioTelemetry.getMinOrZero(); - const uint64_t ioAvgWrite = ioTelemetry.getAvgOrZero(); - if (SInterchangeIO::isTinyIOTelemetryLikely(ioTelemetry, static_cast(outputBytes), _params.ioPolicy)) - { - _params.logger.log( - "PLY writer tiny-io guard: file=%s writes=%llu min=%llu avg=%llu", - system::ILogger::ELL_WARNING, - file->getFileName().string().c_str(), - static_cast(ioTelemetry.callCount), - static_cast(ioMinWrite), - static_cast(ioAvgWrite)); - } - _params.logger.log( - "PLY writer stats: file=%s bytes=%llu vertices=%llu faces=%llu binary=%d io_writes=%llu io_min_write=%llu io_avg_write=%llu io_req=%s io_eff=%s io_chunk=%llu io_reason=%s", - system::ILogger::ELL_PERFORMANCE, - file->getFileName().string().c_str(), - static_cast(outputBytes), - static_cast(vertexCount), - static_cast(faceCount), - binary ? 1 : 0, - static_cast(ioTelemetry.callCount), - static_cast(ioMinWrite), - static_cast(ioAvgWrite), - system::to_string(_params.ioPolicy.strategy).c_str(), - system::to_string(ioPlan.strategy).c_str(), - static_cast(ioPlan.chunkSizeBytes()), - ioPlan.reason); - return writeOk; - }; - if (binary) - { - const size_t vertexStride = - static_cast(positionMeta.byteSize) * 3ull + - (writeNormals ? static_cast(normalMeta.byteSize) * 3ull : 0ull) + - (uvView ? static_cast(uvMeta.byteSize) * 2ull : 0ull) + - extraAuxBytesPerVertex; - const size_t faceStride = sizeof(uint8_t) + (write16BitIndices ? sizeof(uint16_t) : sizeof(uint32_t)) * 3u; - const size_t bodySize = vertexCount * vertexStride + faceCount * faceStride; - - core::vector body; - body.resize(bodySize); - if (!writeBinary(input, body.data())) - { - _params.logger.log("PLY writer: binary payload generation failed.", system::ILogger::ELL_ERROR); - return false; - } - return writePayload(body.data(), body.size()); - } - - std::string body; - body.reserve(vertexCount * ApproxPlyTextBytesPerVertex + faceCount * ApproxPlyTextBytesPerFace); - if (!writeText(input, body)) - { - _params.logger.log("PLY writer: text payload generation failed.", system::ILogger::ELL_ERROR); - return false; - } - return writePayload(reinterpret_cast(body.data()), body.size()); + using ScalarType = Parse::ScalarType; + SFileWriteTelemetry ioTelemetry = {}; + + if (!_override) + getDefaultOverride(_override); + + if (!_file || !_params.rootAsset) + { + _params.logger.log("PLY writer: missing output file or root asset.", system::ILogger::ELL_ERROR); + return false; + } + + const auto items = SGeometryWriterCommon::collectPolygonGeometryWriteItems(_params.rootAsset); + if (items.size() != 1u) + { + _params.logger.log("PLY writer: expected exactly one polygon geometry to write.", system::ILogger::ELL_ERROR); + return false; + } + const auto& item = items.front(); + const auto* geom = item.geometry; + if (!geom || !geom->valid()) + { + _params.logger.log("PLY writer: root asset is not a valid polygon geometry.", system::ILogger::ELL_ERROR); + return false; + } + if (!SGeometryWriterCommon::isIdentityTransform(item.transform)) + { + _params.logger.log("PLY writer: transformed scene or collection export is not supported.", system::ILogger::ELL_ERROR); + return false; + } + + SAssetWriteContext ctx = {_params, _file}; + system::IFile* file = _override->getOutputFile(_file, ctx, {geom, 0u}); + if (!file) + { + _params.logger.log("PLY writer: output override returned null file.", system::ILogger::ELL_ERROR); + return false; + } + + const auto& positionView = geom->getPositionView(); + const auto& normalView = geom->getNormalView(); + const size_t vertexCount = positionView.getElementCount(); + if (vertexCount == 0ull) + { + _params.logger.log("PLY writer: empty position view.", system::ILogger::ELL_ERROR); + return false; + } + const bool writeNormals = static_cast(normalView); + if (writeNormals && normalView.getElementCount() != vertexCount) + { + _params.logger.log("PLY writer: normal vertex count mismatch.", system::ILogger::ELL_ERROR); + return false; + } + + const ICPUPolygonGeometry::SDataView* uvView = SGeometryWriterCommon::getAuxViewAt(geom, SPLYPolygonGeometryAuxLayout::UV0, vertexCount); + if (uvView && getFormatChannelCount(uvView->composed.format) != 2u) + uvView = nullptr; + + core::vector extraAuxViews; + const auto& auxViews = geom->getAuxAttributeViews(); + extraAuxViews.reserve(auxViews.size()); + for (uint32_t auxIx = 0u; auxIx < static_cast(auxViews.size()); ++auxIx) + { + const auto& view = auxViews[auxIx]; + if (!view || (uvView && auxIx == SPLYPolygonGeometryAuxLayout::UV0)) + continue; + if (view.getElementCount() != vertexCount) + continue; + const uint32_t channels = getFormatChannelCount(view.composed.format); + if (channels == 0u) + continue; + const uint32_t components = std::min(4u, channels); + extraAuxViews.push_back({&view, components, auxIx, Parse::selectScalarType(view.composed.format)}); + } + + const auto* indexing = geom->getIndexingCallback(); + if (!indexing) + { + _params.logger.log("PLY writer: missing indexing callback.", system::ILogger::ELL_ERROR); + return false; + } + if (indexing->knownTopology() != E_PRIMITIVE_TOPOLOGY::EPT_TRIANGLE_LIST) + { + _params.logger.log("PLY writer: only triangle-list topology is supported.", system::ILogger::ELL_ERROR); + return false; + } + + size_t faceCount = 0ull; + if (!SGeometryWriterCommon::getTriangleFaceCount(geom, faceCount)) + { + _params.logger.log("PLY writer: failed to validate triangle indexing.", system::ILogger::ELL_ERROR); + return false; + } + + const auto flags = _override->getAssetWritingFlags(ctx, geom, 0u); + const bool binary = flags.hasAnyFlag(E_WRITER_FLAGS::EWF_BINARY); + const bool flipVectors = !flags.hasAnyFlag(E_WRITER_FLAGS::EWF_MESH_IS_RIGHT_HANDED); + const bool write16BitIndices = vertexCount <= static_cast(std::numeric_limits::max()) + 1ull; + + ScalarType positionScalarType = Parse::selectScalarType(positionView.composed.format); + if (flipVectors && Parse::getScalarMeta(positionScalarType).integer && !Parse::getScalarMeta(positionScalarType).signedType) + positionScalarType = ScalarType::Float32; + ScalarType normalScalarType = Parse::selectScalarType(normalView.composed.format); + if (flipVectors && Parse::getScalarMeta(normalScalarType).integer && !Parse::getScalarMeta(normalScalarType).signedType) + normalScalarType = ScalarType::Float32; + const ScalarType uvScalarType = uvView ? Parse::selectScalarType(uvView->composed.format) : ScalarType::Float32; + + const auto positionMeta = Parse::getScalarMeta(positionScalarType); + const auto normalMeta = Parse::getScalarMeta(normalScalarType); + const auto uvMeta = Parse::getScalarMeta(uvScalarType); + + size_t extraAuxBytesPerVertex = 0ull; + for (const auto& extra : extraAuxViews) + extraAuxBytesPerVertex += static_cast(extra.components) * Parse::getScalarMeta(extra.scalarType).byteSize; + + std::ostringstream headerBuilder; + headerBuilder << "ply\n"; + headerBuilder << (binary ? "format binary_little_endian 1.0" : "format ascii 1.0"); + headerBuilder << "\ncomment Nabla " << NABLA_SDK_VERSION; + headerBuilder << "\nelement vertex " << vertexCount << "\n"; + headerBuilder << "property " << positionMeta.name << " x\n"; + headerBuilder << "property " << positionMeta.name << " y\n"; + headerBuilder << "property " << positionMeta.name << " z\n"; + if (writeNormals) + { + headerBuilder << "property " << normalMeta.name << " nx\n"; + headerBuilder << "property " << normalMeta.name << " ny\n"; + headerBuilder << "property " << normalMeta.name << " nz\n"; + } + if (uvView) + { + headerBuilder << "property " << uvMeta.name << " u\n"; + headerBuilder << "property " << uvMeta.name << " v\n"; + } + for (const auto& extra : extraAuxViews) + { + const auto extraMeta = Parse::getScalarMeta(extra.scalarType); + for (uint32_t component = 0u; component < extra.components; ++component) + { + headerBuilder << "property " << extraMeta.name << " aux" << extra.auxIndex; + if (extra.components > 1u) + headerBuilder << "_" << component; + headerBuilder << "\n"; + } + } + headerBuilder << "element face " << faceCount; + headerBuilder << (write16BitIndices ? "\nproperty list uchar uint16 vertex_indices\n" : "\nproperty list uchar uint32 vertex_indices\n"); + headerBuilder << "end_header\n"; + const std::string header = headerBuilder.str(); + + const Parse::WriteInput input = {.geom = geom, .positionScalarType = positionScalarType, .uvView = uvView, .uvScalarType = uvScalarType, .extraAuxViews = &extraAuxViews, .writeNormals = writeNormals, .normalScalarType = normalScalarType, .vertexCount = vertexCount, .faceCount = faceCount, .write16BitIndices = write16BitIndices, .flipVectors = flipVectors}; + + bool writeOk = false; + size_t outputBytes = 0ull; + auto writePayload = [&](const void* bodyData, const size_t bodySize) -> bool { + const size_t outputSize = header.size() + bodySize; + const auto ioPlan = impl::SFileAccess::resolvePlan(_params.ioPolicy, static_cast(outputSize), true, file); + if (impl::SIODiagnostics::logInvalidPlan(_params.logger, "PLY writer", file->getFileName().string().c_str(), ioPlan)) + return false; + + outputBytes = outputSize; + const SInterchangeIO::SBufferRange writeBuffers[] = {{.data = header.data(), .byteCount = header.size()}, {.data = bodyData, .byteCount = bodySize}}; + writeOk = SInterchangeIO::writeBuffersWithPolicy(file, ioPlan, writeBuffers, &ioTelemetry); + const uint64_t ioMinWrite = ioTelemetry.getMinOrZero(); + const uint64_t ioAvgWrite = ioTelemetry.getAvgOrZero(); + impl::SIODiagnostics::logTinyIO(_params.logger, "PLY writer", file->getFileName().string().c_str(), ioTelemetry, static_cast(outputBytes), _params.ioPolicy, "writes"); + _params.logger.log("PLY writer stats: file=%s bytes=%llu vertices=%llu faces=%llu binary=%d io_writes=%llu io_min_write=%llu io_avg_write=%llu io_req=%s io_eff=%s io_chunk=%llu io_reason=%s", + system::ILogger::ELL_PERFORMANCE, file->getFileName().string().c_str(), static_cast(outputBytes), + static_cast(vertexCount), static_cast(faceCount), binary ? 1 : 0, + static_cast(ioTelemetry.callCount), static_cast(ioMinWrite), static_cast(ioAvgWrite), + system::to_string(_params.ioPolicy.strategy).c_str(), system::to_string(ioPlan.strategy).c_str(), static_cast(ioPlan.chunkSizeBytes()), ioPlan.reason); + return writeOk; + }; + + if (binary) + { + const size_t vertexStride = static_cast(positionMeta.byteSize) * 3ull + (writeNormals ? static_cast(normalMeta.byteSize) * 3ull : 0ull) + (uvView ? static_cast(uvMeta.byteSize) * 2ull : 0ull) + extraAuxBytesPerVertex; + const size_t faceStride = sizeof(uint8_t) + (write16BitIndices ? sizeof(uint16_t) : sizeof(uint32_t)) * 3u; + const size_t bodySize = vertexCount * vertexStride + faceCount * faceStride; + + core::vector body; + body.resize(bodySize); + if (!Parse::writeBinary(input, body.data())) + { + _params.logger.log("PLY writer: binary payload generation failed.", system::ILogger::ELL_ERROR); + return false; + } + return writePayload(body.data(), body.size()); + } + + std::string body; + body.reserve(vertexCount * Parse::ApproxTextBytesPerVertex + faceCount * Parse::ApproxTextBytesPerFace); + if (!Parse::writeText(input, body)) + { + _params.logger.log("PLY writer: text payload generation failed.", system::ILogger::ELL_ERROR); + return false; + } + return writePayload(body.data(), body.size()); } -bool ply_writer_detail::writeBinary( - const SWriteInput& input, - uint8_t* dst) -{ - if (!input.geom || !input.extraAuxViews || !dst) - return false; - - const auto& positionView = input.geom->getPositionView(); - const auto& normalView = input.geom->getNormalView(); - const auto& extraAuxViews = *input.extraAuxViews; - - for (size_t i = 0; i < input.vertexCount; ++i) - { - if (!writeTypedViewBinary(positionView, i, 3u, input.positionScalarType, input.flipVectors, dst)) - return false; - - if (input.writeNormals && !writeTypedViewBinary(normalView, i, 3u, input.normalScalarType, input.flipVectors, dst)) - return false; - - if (input.uvView && !writeTypedViewBinary(*input.uvView, i, 2u, input.uvScalarType, false, dst)) - return false; - - for (const auto& extra : extraAuxViews) - { - if (!extra.view || !writeTypedViewBinary(*extra.view, i, extra.components, extra.scalarType, false, dst)) - return false; - } - } - - return SGeometryWriterCommon::visitTriangleIndices(input.geom, [&](const uint32_t i0, const uint32_t i1, const uint32_t i2)->bool - { - const uint8_t listSize = 3u; - *dst++ = listSize; - - if (input.write16BitIndices) - { - const uint16_t tri16[3] = { - static_cast(i0), - static_cast(i1), - static_cast(i2) - }; - std::memcpy(dst, tri16, sizeof(tri16)); - dst += sizeof(tri16); - } - else - { - const uint32_t tri[3] = { i0, i1, i2 }; - std::memcpy(dst, tri, sizeof(tri)); - dst += sizeof(tri); - } - return true; - }); } -bool ply_writer_detail::writeText( - const SWriteInput& input, - std::string& output) -{ - if (!input.geom || !input.extraAuxViews) - return false; - - const auto& positionView = input.geom->getPositionView(); - const auto& normalView = input.geom->getNormalView(); - const auto& extraAuxViews = *input.extraAuxViews; - - for (size_t i = 0; i < input.vertexCount; ++i) - { - if (!writeTypedViewText(output, positionView, i, 3u, input.positionScalarType, input.flipVectors)) - return false; - - if (input.writeNormals) - { - if (!writeTypedViewText(output, normalView, i, 3u, input.normalScalarType, input.flipVectors)) - return false; - } - - if (input.uvView) - { - if (!writeTypedViewText(output, *input.uvView, i, 2u, input.uvScalarType, false)) - return false; - } - - for (const auto& extra : extraAuxViews) - { - if (!extra.view || !writeTypedViewText(output, *extra.view, i, extra.components, extra.scalarType, false)) - return false; - } - - output += "\n"; - } - - return SGeometryWriterCommon::visitTriangleIndices(input.geom, [&](const uint32_t i0, const uint32_t i1, const uint32_t i2) - { - output.append("3 "); - appendIntegral(output, i0); - output.push_back(' '); - appendIntegral(output, i1); - output.push_back(' '); - appendIntegral(output, i2); - output.push_back('\n'); - }); -} - -} // namespace nbl::asset - #endif // _NBL_COMPILE_WITH_PLY_WRITER_ - diff --git a/src/nbl/asset/interchange/CSTLMeshFileLoader.cpp b/src/nbl/asset/interchange/CSTLMeshFileLoader.cpp index 8dabfa723f..2b4edd25e6 100644 --- a/src/nbl/asset/interchange/CSTLMeshFileLoader.cpp +++ b/src/nbl/asset/interchange/CSTLMeshFileLoader.cpp @@ -7,163 +7,145 @@ #ifdef _NBL_COMPILE_WITH_STL_LOADER_ +#include "SSTLPolygonGeometryAuxLayout.h" +#include "impl/SFileAccess.h" +#include "impl/SIODiagnostics.h" +#include "impl/STextParse.h" +#include "nbl/asset/asset.h" +#include "nbl/asset/format/convertColor.h" #include "nbl/asset/interchange/SGeometryContentHash.h" #include "nbl/asset/interchange/SGeometryLoaderCommon.h" #include "nbl/asset/interchange/SInterchangeIO.h" #include "nbl/asset/interchange/SLoaderRuntimeTuning.h" -#include "nbl/asset/format/convertColor.h" -#include "nbl/asset/utils/SGeometryNormalCommon.h" -#include "nbl/asset/asset.h" #include "nbl/asset/metadata/CSTLMetadata.h" #include "nbl/asset/utils/CPolygonGeometryManipulator.h" +#include "nbl/asset/utils/SGeometryNormalCommon.h" #include "nbl/builtin/hlsl/shapes/AABBAccumulator.hlsl" #include "nbl/core/hash/blake.h" #include "nbl/system/IFile.h" -#include "SSTLPolygonGeometryAuxLayout.h" -#include #include namespace nbl::asset { -struct SSTLContext +namespace { - IAssetLoader::SAssetLoadContext inner; - SFileReadTelemetry ioTelemetry = {}; - static constexpr size_t TextProbeBytes = 6ull; - static constexpr size_t BinaryHeaderBytes = 80ull; - static constexpr size_t TriangleCountBytes = sizeof(uint32_t); - static constexpr size_t BinaryPrefixBytes = BinaryHeaderBytes + TriangleCountBytes; - static constexpr size_t TriangleFloatCount = 12ull; - static constexpr size_t TriangleFloatBytes = sizeof(float) * TriangleFloatCount; - static constexpr size_t TriangleAttributeBytes = sizeof(uint16_t); - static constexpr size_t TriangleRecordBytes = TriangleFloatBytes + TriangleAttributeBytes; - static constexpr size_t VerticesPerTriangle = 3ull; - static constexpr size_t FloatChannelsPerVertex = 3ull; -}; -class SStlAsciiParser +struct Parse { - public: - inline SStlAsciiParser(const char* begin, const char* end) : m_cursor(begin), m_end(end) {} - - inline std::optional readToken() - { - skipWhitespace(); - if (m_cursor >= m_end) - return std::nullopt; - - const char* tokenEnd = m_cursor; - while (tokenEnd < m_end && !core::isspace(*tokenEnd)) - ++tokenEnd; - - const std::string_view token(m_cursor, static_cast(tokenEnd - m_cursor)); - m_cursor = tokenEnd; - return token; - } - - inline std::optional readFloat() - { - skipWhitespace(); - if (m_cursor >= m_end) - return std::nullopt; - - float value = 0.f; - const auto parseResult = fast_float::from_chars(m_cursor, m_end, value); - if (parseResult.ec == std::errc() && parseResult.ptr != m_cursor) + using Common = impl::TextParse; + + static hlsl::float32_t3 resolveStoredNormal(const hlsl::float32_t3& fileNormal) + { + const float fileLen2 = hlsl::dot(fileNormal, fileNormal); + if (fileLen2 > 0.f && std::abs(fileLen2 - 1.f) < 1e-4f) + return fileNormal; + return SGeometryNormalCommon::normalizeOrZero(fileNormal); + } + + static void pushTriangleReversed(const std::array& p, core::vector& positions) + { + positions.push_back(p[2u]); + positions.push_back(p[1u]); + positions.push_back(p[0u]); + } + + static uint32_t decodeViscamColorToB8G8R8A8(const uint16_t packedColor) + { + std::array src = {&packedColor}; + uint32_t outColor = 0u; + convertColor(src.data(), &outColor, 0u, 0u); + return outColor; + } + + struct Context + { + IAssetLoader::SAssetLoadContext inner; + SFileReadTelemetry ioTelemetry = {}; + static constexpr size_t TextProbeBytes = 6ull; + static constexpr size_t BinaryHeaderBytes = 80ull; + static constexpr size_t TriangleCountBytes = sizeof(uint32_t); + static constexpr size_t BinaryPrefixBytes = BinaryHeaderBytes + TriangleCountBytes; + static constexpr size_t TriangleFloatCount = 12ull; + static constexpr size_t TriangleFloatBytes = sizeof(float) * TriangleFloatCount; + static constexpr size_t TriangleAttributeBytes = sizeof(uint16_t); + static constexpr size_t TriangleRecordBytes = TriangleFloatBytes + TriangleAttributeBytes; + static constexpr size_t VerticesPerTriangle = 3ull; + static constexpr size_t FloatChannelsPerVertex = 3ull; + }; + + class AsciiParser + { + public: + inline AsciiParser(const char* begin, const char* end) : m_cursor(begin), m_end(end) {} + + inline std::optional readToken() + { + return Common::readToken(m_cursor, m_end); + } + + inline std::optional readFloat() { - m_cursor = parseResult.ptr; + float value = 0.f; + if (!Common::parseNumber(m_cursor, m_end, value)) + return std::nullopt; return value; } - return std::nullopt; - } - - inline std::optional readVec3() - { - const auto x = readFloat(); - const auto y = readFloat(); - const auto z = readFloat(); - if (!x.has_value() || !y.has_value() || !z.has_value()) - return std::nullopt; - return hlsl::float32_t3(*x, *y, *z); - } - - private: - inline void skipWhitespace() - { - while (m_cursor < m_end && core::isspace(*m_cursor)) - ++m_cursor; - } - - const char* m_cursor = nullptr; - const char* m_end = nullptr; -}; -hlsl::float32_t3 stlResolveStoredNormal(const hlsl::float32_t3& fileNormal) -{ - const float fileLen2 = hlsl::dot(fileNormal, fileNormal); - if (fileLen2 > 0.f && std::abs(fileLen2 - 1.f) < 1e-4f) - return fileNormal; - return SGeometryNormalCommon::normalizeOrZero(fileNormal); -} + inline std::optional readVec3() + { + const auto x = readFloat(); + const auto y = readFloat(); + const auto z = readFloat(); + if (!x.has_value() || !y.has_value() || !z.has_value()) + return std::nullopt; + return hlsl::float32_t3(*x, *y, *z); + } -void stlPushTriangleReversed(const hlsl::float32_t3 (&p)[3], core::vector& positions) -{ - positions.push_back(p[2u]); - positions.push_back(p[1u]); - positions.push_back(p[0u]); -} + private: + const char* m_cursor = nullptr; + const char* m_end = nullptr; + }; -inline uint32_t stlDecodeViscamColorToB8G8R8A8(const uint16_t packedColor) -{ - const void* src[4] = { &packedColor, nullptr, nullptr, nullptr }; - uint32_t outColor = 0u; - convertColor(src, &outColor, 0u, 0u); - return outColor; -} + class SplitBlockMemoryResource final : public core::refctd_memory_resource + { + public: + inline SplitBlockMemoryResource(core::smart_refctd_ptr&& upstream, void* block, const size_t blockBytes, const size_t alignment) + : m_upstream(std::move(upstream)), m_block(block), m_blockBytes(blockBytes), m_alignment(alignment) {} -class CStlSplitBlockMemoryResource final : public core::refctd_memory_resource -{ - public: - inline CStlSplitBlockMemoryResource( - core::smart_refctd_ptr&& upstream, - void* block, - const size_t blockBytes, - const size_t alignment - ) : m_upstream(std::move(upstream)), m_block(block), m_blockBytes(blockBytes), m_alignment(alignment) - { - } - - inline void* allocate(std::size_t, std::size_t) override - { - assert(false); - return nullptr; - } - - inline void deallocate(void* p, std::size_t bytes, std::size_t) override - { - const auto* const begin = reinterpret_cast(m_block); - const auto* const end = begin + m_blockBytes; - const auto* const ptr = reinterpret_cast(p); - assert(ptr >= begin && ptr <= end); - assert(ptr + bytes <= end); - } - - protected: - inline ~CStlSplitBlockMemoryResource() override - { - if (m_upstream && m_block) - m_upstream->deallocate(m_block, m_blockBytes, m_alignment); - } - - private: - core::smart_refctd_ptr m_upstream; - void* m_block = nullptr; - size_t m_blockBytes = 0ull; - size_t m_alignment = 1ull; + inline void* allocate(std::size_t, std::size_t) override + { + assert(false); + return nullptr; + } + + inline void deallocate(void* p, std::size_t bytes, std::size_t) override + { + const auto* const begin = reinterpret_cast(m_block); + const auto* const end = begin + m_blockBytes; + const auto* const ptr = reinterpret_cast(p); + assert(ptr >= begin && ptr <= end); + assert(ptr + bytes <= end); + } + + protected: + inline ~SplitBlockMemoryResource() override + { + if (m_upstream && m_block) + m_upstream->deallocate(m_block, m_blockBytes, m_alignment); + } + + private: + core::smart_refctd_ptr m_upstream; + void* m_block = nullptr; + size_t m_blockBytes = 0ull; + size_t m_alignment = 1ull; + }; }; +} + CSTLMeshFileLoader::CSTLMeshFileLoader(asset::IAssetManager*) { } @@ -174,8 +156,12 @@ const char** CSTLMeshFileLoader::getAssociatedFileExtensions() const return ext; } -SAssetBundle CSTLMeshFileLoader::loadAsset(system::IFile* _file, const IAssetLoader::SAssetLoadParams& _params, IAssetLoader::IAssetLoaderOverride*, uint32_t) +SAssetBundle CSTLMeshFileLoader::loadAsset(system::IFile* _file, const IAssetLoader::SAssetLoadParams& _params, IAssetLoader::IAssetLoaderOverride* _override [[maybe_unused]], uint32_t _hierarchyLevel [[maybe_unused]]) { + using Context = Parse::Context; + using AsciiParser = Parse::AsciiParser; + using SplitBlockMemoryResource = Parse::SplitBlockMemoryResource; + if (!_file) return {}; @@ -184,615 +170,619 @@ SAssetBundle CSTLMeshFileLoader::loadAsset(system::IFile* _file, const IAssetLoa const bool computeContentHashes = !_params.loaderFlags.hasAnyFlag(IAssetLoader::ELPF_DONT_COMPUTE_CONTENT_HASHES); bool hasTriangleColors = false; - SSTLContext context = { asset::IAssetLoader::SAssetLoadContext{ _params,_file },0ull }; - + Context context = {asset::IAssetLoader::SAssetLoadContext{_params, _file}, 0ull}; const size_t filesize = context.inner.mainFile->getSize(); - if (filesize < SSTLContext::TextProbeBytes) + if (filesize < Context::TextProbeBytes) return {}; - const bool fileMappable = core::bitflag(_file->getFlags()).hasAnyFlag(system::IFile::ECF_MAPPABLE); - const auto ioPlan = SResolvedFileIOPolicy(_params.ioPolicy, static_cast(filesize), true, fileMappable); - if (!ioPlan.isValid()) - { - _params.logger.log("STL loader: invalid io policy for %s reason=%s", system::ILogger::ELL_ERROR, _file->getFileName().string().c_str(), ioPlan.reason); + const auto ioPlan = impl::SFileAccess::resolvePlan(_params.ioPolicy, static_cast(filesize), true, _file); + if (impl::SIODiagnostics::logInvalidPlan(_params.logger, "STL loader", _file->getFileName().string().c_str(), ioPlan)) return {}; - } core::vector wholeFilePayload; const uint8_t* wholeFileData = nullptr; - bool wholeFileDataIsMapped = false; if (ioPlan.strategy == SResolvedFileIOPolicy::Strategy::WholeFile) { - const auto* constFile = static_cast(context.inner.mainFile); - const auto* mapped = reinterpret_cast(constFile->getMappedPointer()); - if (mapped) - { - wholeFileData = mapped; - wholeFileDataIsMapped = true; - context.ioTelemetry.account(filesize); - } - else - { - wholeFilePayload.resize(filesize + 1ull); - if (!SInterchangeIO::readFileExact(context.inner.mainFile, wholeFilePayload.data(), 0ull, filesize, &context.ioTelemetry)) - return {}; - wholeFilePayload[filesize] = 0u; - wholeFileData = wholeFilePayload.data(); - } - } - - bool binary = false; - bool hasBinaryTriCountFromDetect = false; - uint32_t binaryTriCountFromDetect = 0u; - { - std::array prefix = {}; - bool hasPrefix = false; - if (wholeFileData && filesize >= SSTLContext::BinaryPrefixBytes) - { - std::memcpy(prefix.data(), wholeFileData, SSTLContext::BinaryPrefixBytes); - hasPrefix = true; - } - else - { - hasPrefix = filesize >= SSTLContext::BinaryPrefixBytes && SInterchangeIO::readFileExact(context.inner.mainFile, prefix.data(), 0ull, SSTLContext::BinaryPrefixBytes, &context.ioTelemetry); - } - bool startsWithSolid = false; - if (hasPrefix) - { - startsWithSolid = (std::memcmp(prefix.data(), "solid ", SSTLContext::TextProbeBytes) == 0); - } - else - { - char header[SSTLContext::TextProbeBytes] = {}; - if (wholeFileData) - std::memcpy(header, wholeFileData, sizeof(header)); - else if (!SInterchangeIO::readFileExact(context.inner.mainFile, header, 0ull, sizeof(header), &context.ioTelemetry)) - return {}; - startsWithSolid = (std::strncmp(header, "solid ", SSTLContext::TextProbeBytes) == 0); - } - - bool binaryBySize = false; - if (hasPrefix) - { - uint32_t triCount = 0u; - std::memcpy(&triCount, prefix.data() + SSTLContext::BinaryHeaderBytes, sizeof(triCount)); - binaryTriCountFromDetect = triCount; - hasBinaryTriCountFromDetect = true; - const uint64_t expectedSize = SSTLContext::BinaryPrefixBytes + static_cast(triCount) * SSTLContext::TriangleRecordBytes; - binaryBySize = (expectedSize == filesize); - } - - if (binaryBySize) - binary = true; - else if (!startsWithSolid) - binary = true; - else - binary = false; - - } - - auto geometry = core::make_smart_refctd_ptr(); - geometry->setIndexing(IPolygonGeometryBase::TriangleList()); - hlsl::shapes::util::AABBAccumulator3 parsedAABB = hlsl::shapes::util::createAABBAccumulator(); - uint64_t vertexCount = 0ull; - - if (!binary && wholeFileDataIsMapped) - { - wholeFilePayload.resize(filesize + 1ull); - std::memcpy(wholeFilePayload.data(), wholeFileData, filesize); - wholeFilePayload[filesize] = 0u; - wholeFileData = wholeFilePayload.data(); - wholeFileDataIsMapped = false; - } - - if (binary) - { - parsePath = "binary_fast"; - if (filesize < SSTLContext::BinaryPrefixBytes) - return {}; - - uint32_t triangleCount32 = binaryTriCountFromDetect; - if (!hasBinaryTriCountFromDetect) - { - if (!SInterchangeIO::readFileExact(context.inner.mainFile, &triangleCount32, SSTLContext::BinaryHeaderBytes, sizeof(triangleCount32), &context.ioTelemetry)) - return {}; - } - - triangleCount = triangleCount32; - const size_t dataSize = static_cast(triangleCount) * SSTLContext::TriangleRecordBytes; - const size_t expectedSize = SSTLContext::BinaryPrefixBytes + dataSize; - if (filesize < expectedSize) - return {}; - - const uint8_t* payloadData = nullptr; - if (wholeFileData) - { - payloadData = wholeFileData + SSTLContext::BinaryPrefixBytes; - } - else - { - core::vector payload; - payload.resize(dataSize); - if (!SInterchangeIO::readFileWithPolicy(context.inner.mainFile, payload.data(), SSTLContext::BinaryPrefixBytes, dataSize, ioPlan, &context.ioTelemetry)) - return {}; - wholeFilePayload = std::move(payload); - payloadData = wholeFilePayload.data(); - } - - vertexCount = triangleCount * SSTLContext::VerticesPerTriangle; - const size_t vertexCountSizeT = static_cast(vertexCount); - if (vertexCountSizeT > (std::numeric_limits::max() / sizeof(hlsl::float32_t3))) - return {}; - const size_t viewByteSize = vertexCountSizeT * sizeof(hlsl::float32_t3); - if (viewByteSize > (std::numeric_limits::max() - viewByteSize)) - return {}; - const size_t blockBytes = viewByteSize * 2ull; - auto upstream = core::getDefaultMemoryResource(); - if (!upstream) - return {}; - void* block = upstream->allocate(blockBytes, alignof(float)); - if (!block) - return {}; - auto blockResource = core::make_smart_refctd_ptr( - core::smart_refctd_ptr(std::move(upstream)), - block, - blockBytes, - alignof(float)); - auto posBuffer = ICPUBuffer::create({ { viewByteSize },block,core::smart_refctd_ptr(blockResource),alignof(float) }, core::adopt_memory); - auto normalBuffer = ICPUBuffer::create({ { viewByteSize },reinterpret_cast(block) + viewByteSize,core::smart_refctd_ptr(blockResource),alignof(float) }, core::adopt_memory); - if (!posBuffer || !normalBuffer) - return {}; - ICPUPolygonGeometry::SDataView posView = {}; - posView.composed = { - .stride = sizeof(hlsl::float32_t3), - .format = EF_R32G32B32_SFLOAT, - .rangeFormat = IGeometryBase::getMatchingAABBFormat(EF_R32G32B32_SFLOAT) - }; - posView.src = { - .offset = 0ull, - .size = viewByteSize, - .buffer = std::move(posBuffer) - }; - ICPUPolygonGeometry::SDataView normalView = {}; - normalView.composed = { - .stride = sizeof(hlsl::float32_t3), - .format = EF_R32G32B32_SFLOAT, - .rangeFormat = IGeometryBase::getMatchingAABBFormat(EF_R32G32B32_SFLOAT) - }; - normalView.src = { - .offset = 0ull, - .size = viewByteSize, - .buffer = std::move(normalBuffer) - }; - auto* posOutFloat = reinterpret_cast(posView.getPointer()); - auto* normalOutFloat = reinterpret_cast(normalView.getPointer()); - if (!posOutFloat || !normalOutFloat) - return {}; - - const uint8_t* cursor = payloadData; - const uint8_t* const end = cursor + dataSize; - if (end < cursor || static_cast(end - cursor) < static_cast(triangleCount) * SSTLContext::TriangleRecordBytes) - return {}; - core::vector faceColors(static_cast(triangleCount), 0u); - std::atomic_bool colorValidForAllFaces = true; - const size_t hw = SLoaderRuntimeTuner::resolveHardwareThreads(); - const size_t hardMaxWorkers = SLoaderRuntimeTuner::resolveHardMaxWorkers(hw, _params.ioPolicy.runtimeTuning.workerHeadroom); - SLoaderRuntimeTuningRequest parseTuningRequest = {}; - parseTuningRequest.inputBytes = dataSize; - parseTuningRequest.totalWorkUnits = triangleCount; - parseTuningRequest.minBytesPerWorker = SSTLContext::TriangleRecordBytes; - parseTuningRequest.hardwareThreads = static_cast(hw); - parseTuningRequest.hardMaxWorkers = static_cast(hardMaxWorkers); - parseTuningRequest.targetChunksPerWorker = _params.ioPolicy.runtimeTuning.targetChunksPerWorker; - parseTuningRequest.minChunkWorkUnits = 1ull; - parseTuningRequest.maxChunkWorkUnits = std::max(1ull, triangleCount); - parseTuningRequest.sampleData = payloadData; - parseTuningRequest.sampleBytes = SLoaderRuntimeTuner::resolveSampleBytes(_params.ioPolicy, dataSize); - const auto parseTuning = SLoaderRuntimeTuner::tune(_params.ioPolicy, parseTuningRequest); - const size_t workerCount = std::max(1ull, std::min(parseTuning.workerCount, static_cast(std::max(1ull, triangleCount)))); - static constexpr bool ComputeAABBInParse = true; - struct SThreadAABB - { - bool has = false; - float minX = 0.f; - float minY = 0.f; - float minZ = 0.f; - float maxX = 0.f; - float maxY = 0.f; - float maxZ = 0.f; - }; - std::vector threadAABBs(ComputeAABBInParse ? workerCount : 0ull); - const uint64_t parseChunkTriangles = std::max(1ull, parseTuning.chunkWorkUnits); - const size_t parseChunkCount = static_cast(SLoaderRuntimeTuner::ceilDiv(triangleCount, parseChunkTriangles)); - const bool hashInParsePipeline = computeContentHashes; - std::vector hashChunkReady(hashInParsePipeline ? parseChunkCount : 0ull, 0u); - std::atomic_bool hashPipelineOk = true; - core::blake3_hash_t parsedPositionHash = static_cast(core::blake3_hasher{}); - core::blake3_hash_t parsedNormalHash = static_cast(core::blake3_hasher{}); - auto parseRange = [&](const uint64_t beginTri, const uint64_t endTri, SThreadAABB& localAABB) -> void - { - const uint8_t* localCursor = payloadData + beginTri * SSTLContext::TriangleRecordBytes; - float* posCursor = posOutFloat + beginTri * SSTLContext::VerticesPerTriangle * SSTLContext::FloatChannelsPerVertex; - float* normalCursor = normalOutFloat + beginTri * SSTLContext::VerticesPerTriangle * SSTLContext::FloatChannelsPerVertex; - for (uint64_t tri = beginTri; tri < endTri; ++tri) - { - const uint8_t* const triRecord = localCursor; - localCursor += SSTLContext::TriangleRecordBytes; - float triValues[SSTLContext::TriangleFloatCount]; - std::memcpy(triValues, triRecord, sizeof(triValues)); - uint16_t packedColor = 0u; - std::memcpy(&packedColor, triRecord + SSTLContext::TriangleFloatBytes, sizeof(packedColor)); - if (packedColor & 0x8000u) - faceColors[static_cast(tri)] = stlDecodeViscamColorToB8G8R8A8(packedColor); - else - colorValidForAllFaces.store(false, std::memory_order_relaxed); - - float normalX = triValues[0ull]; - float normalY = triValues[1ull]; - float normalZ = triValues[2ull]; - - const float vertex0x = triValues[9ull]; - const float vertex0y = triValues[10ull]; - const float vertex0z = triValues[11ull]; - const float vertex1x = triValues[6ull]; - const float vertex1y = triValues[7ull]; - const float vertex1z = triValues[8ull]; - const float vertex2x = triValues[3ull]; - const float vertex2y = triValues[4ull]; - const float vertex2z = triValues[5ull]; - - posCursor[0ull] = vertex0x; - posCursor[1ull] = vertex0y; - posCursor[2ull] = vertex0z; - posCursor[3ull] = vertex1x; - posCursor[4ull] = vertex1y; - posCursor[5ull] = vertex1z; - posCursor[6ull] = vertex2x; - posCursor[7ull] = vertex2y; - posCursor[8ull] = vertex2z; - if constexpr (ComputeAABBInParse) - { - if (!localAABB.has) - { - localAABB.has = true; - localAABB.minX = vertex0x; - localAABB.minY = vertex0y; - localAABB.minZ = vertex0z; - localAABB.maxX = vertex0x; - localAABB.maxY = vertex0y; - localAABB.maxZ = vertex0z; - } - if (vertex0x < localAABB.minX) localAABB.minX = vertex0x; - if (vertex0y < localAABB.minY) localAABB.minY = vertex0y; - if (vertex0z < localAABB.minZ) localAABB.minZ = vertex0z; - if (vertex0x > localAABB.maxX) localAABB.maxX = vertex0x; - if (vertex0y > localAABB.maxY) localAABB.maxY = vertex0y; - if (vertex0z > localAABB.maxZ) localAABB.maxZ = vertex0z; - if (vertex1x < localAABB.minX) localAABB.minX = vertex1x; - if (vertex1y < localAABB.minY) localAABB.minY = vertex1y; - if (vertex1z < localAABB.minZ) localAABB.minZ = vertex1z; - if (vertex1x > localAABB.maxX) localAABB.maxX = vertex1x; - if (vertex1y > localAABB.maxY) localAABB.maxY = vertex1y; - if (vertex1z > localAABB.maxZ) localAABB.maxZ = vertex1z; - if (vertex2x < localAABB.minX) localAABB.minX = vertex2x; - if (vertex2y < localAABB.minY) localAABB.minY = vertex2y; - if (vertex2z < localAABB.minZ) localAABB.minZ = vertex2z; - if (vertex2x > localAABB.maxX) localAABB.maxX = vertex2x; - if (vertex2y > localAABB.maxY) localAABB.maxY = vertex2y; - if (vertex2z > localAABB.maxZ) localAABB.maxZ = vertex2z; - } - if (normalX == 0.f && normalY == 0.f && normalZ == 0.f) - { - const float edge10x = vertex1x - vertex0x; - const float edge10y = vertex1y - vertex0y; - const float edge10z = vertex1z - vertex0z; - const float edge20x = vertex2x - vertex0x; - const float edge20y = vertex2y - vertex0y; - const float edge20z = vertex2z - vertex0z; - - normalX = edge10y * edge20z - edge10z * edge20y; - normalY = edge10z * edge20x - edge10x * edge20z; - normalZ = edge10x * edge20y - edge10y * edge20x; - const float planeLen2 = normalX * normalX + normalY * normalY + normalZ * normalZ; - if (planeLen2 > 0.f) - { - const float invLen = 1.f / std::sqrt(planeLen2); - normalX *= invLen; - normalY *= invLen; - normalZ *= invLen; - } - else - { - normalX = 0.f; - normalY = 0.f; - normalZ = 0.f; - } - } - normalCursor[0ull] = normalX; - normalCursor[1ull] = normalY; - normalCursor[2ull] = normalZ; - normalCursor[3ull] = normalX; - normalCursor[4ull] = normalY; - normalCursor[5ull] = normalZ; - normalCursor[6ull] = normalX; - normalCursor[7ull] = normalY; - normalCursor[8ull] = normalZ; - posCursor += SSTLContext::VerticesPerTriangle * SSTLContext::FloatChannelsPerVertex; - normalCursor += SSTLContext::VerticesPerTriangle * SSTLContext::FloatChannelsPerVertex; - } - }; - std::jthread positionHashThread; - std::jthread normalHashThread; - if (hashInParsePipeline) - { - auto launchHashThread = [&](const float* srcFloat, core::blake3_hash_t& outHash) -> std::jthread - { - return std::jthread([&, srcFloat, outHashPtr = &outHash]() - { - try - { - core::blake3_hasher hasher; - size_t chunkIx = 0ull; - while (chunkIx < parseChunkCount) - { - auto ready = std::atomic_ref(hashChunkReady[chunkIx]); - while (ready.load(std::memory_order_acquire) == 0u) - ready.wait(0u, std::memory_order_acquire); - - size_t runEnd = chunkIx + 1ull; - while (runEnd < parseChunkCount) - { - const auto runReady = std::atomic_ref(hashChunkReady[runEnd]).load(std::memory_order_acquire); - if (runReady == 0u) - break; - ++runEnd; - } - - const uint64_t begin = static_cast(chunkIx) * parseChunkTriangles; - const uint64_t endTri = std::min(static_cast(runEnd) * parseChunkTriangles, triangleCount); - const size_t runTriangles = static_cast(endTri - begin); - const size_t runBytes = runTriangles * SSTLContext::VerticesPerTriangle * SSTLContext::FloatChannelsPerVertex * sizeof(float); - hasher.update(srcFloat + begin * SSTLContext::VerticesPerTriangle * SSTLContext::FloatChannelsPerVertex, runBytes); - chunkIx = runEnd; - } - *outHashPtr = static_cast(hasher); - } - catch (...) - { - hashPipelineOk.store(false, std::memory_order_relaxed); - } - }); - }; - positionHashThread = launchHashThread(posOutFloat, parsedPositionHash); - normalHashThread = launchHashThread(normalOutFloat, parsedNormalHash); - } - std::atomic_size_t nextChunkIx = 0ull; - auto parseWorker = [&](const size_t workerIx) -> void - { - SThreadAABB localAABB = {}; - while (true) - { - const size_t chunkIx = nextChunkIx.fetch_add(1ull, std::memory_order_relaxed); - if (chunkIx >= parseChunkCount) - break; - const uint64_t begin = static_cast(chunkIx) * parseChunkTriangles; - const uint64_t endTri = std::min(begin + parseChunkTriangles, triangleCount); - parseRange(begin, endTri, localAABB); - if (hashInParsePipeline) - { - auto ready = std::atomic_ref(hashChunkReady[chunkIx]); - ready.store(1u, std::memory_order_release); - ready.notify_all(); - } - } - if constexpr (ComputeAABBInParse) - threadAABBs[workerIx] = localAABB; - }; - SLoaderRuntimeTuner::dispatchWorkers(workerCount, parseWorker); - if (positionHashThread.joinable()) - positionHashThread.join(); - if (normalHashThread.joinable()) - normalHashThread.join(); - if (hashInParsePipeline) - { - if (!hashPipelineOk.load(std::memory_order_relaxed)) - return {}; - posView.src.buffer->setContentHash(parsedPositionHash); - normalView.src.buffer->setContentHash(parsedNormalHash); - } - if constexpr (ComputeAABBInParse) - { - for (const auto& localAABB : threadAABBs) - { - if (!localAABB.has) - continue; - hlsl::shapes::util::extendAABBAccumulator(parsedAABB, localAABB.minX, localAABB.minY, localAABB.minZ); - hlsl::shapes::util::extendAABBAccumulator(parsedAABB, localAABB.maxX, localAABB.maxY, localAABB.maxZ); - } - } - geometry->setPositionView(std::move(posView)); - geometry->setNormalView(std::move(normalView)); - if (colorValidForAllFaces.load(std::memory_order_relaxed)) - { - core::vector vertexColors(vertexCountSizeT); - for (size_t triIx = 0ull; triIx < static_cast(triangleCount); ++triIx) - { - const uint32_t triColor = faceColors[triIx]; - const size_t baseIx = triIx * SSTLContext::VerticesPerTriangle; - vertexColors[baseIx + 0ull] = triColor; - vertexColors[baseIx + 1ull] = triColor; - vertexColors[baseIx + 2ull] = triColor; - } - auto colorView = SGeometryLoaderCommon::createAdoptedView(std::move(vertexColors)); - if (!colorView) - return {}; - auto* const auxViews = geometry->getAuxAttributeViews(); - auxViews->resize(SSTLPolygonGeometryAuxLayout::COLOR0 + 1u); - auxViews->operator[](SSTLPolygonGeometryAuxLayout::COLOR0) = std::move(colorView); - hasTriangleColors = true; - } - } - else - { - parsePath = "ascii_fallback"; + wholeFileData = impl::SFileAccess::mapOrReadWholeFile(context.inner.mainFile, filesize, wholeFilePayload, ioPlan, &context.ioTelemetry); if (!wholeFileData) - { - wholeFilePayload.resize(filesize + 1ull); - if (!SInterchangeIO::readFileWithPolicy(context.inner.mainFile, wholeFilePayload.data(), 0ull, filesize, ioPlan, &context.ioTelemetry)) - return {}; - wholeFilePayload[filesize] = 0u; - wholeFileData = wholeFilePayload.data(); - } - - const char* const begin = reinterpret_cast(wholeFileData); - const char* const end = begin + filesize; - SStlAsciiParser parser(begin, end); - core::vector positions; - core::vector normals; - const auto firstToken = parser.readToken(); - if (!firstToken.has_value() || *firstToken != std::string_view("solid")) - return {}; - - for (;;) - { - const auto maybeToken = parser.readToken(); - if (!maybeToken.has_value()) - break; - const std::string_view textToken = *maybeToken; - if (textToken == std::string_view("endsolid")) - break; - if (textToken != std::string_view("facet")) - continue; - - const auto normalKeyword = parser.readToken(); - if (!normalKeyword.has_value() || *normalKeyword != std::string_view("normal")) - return {}; - - const auto fileNormal = parser.readVec3(); - if (!fileNormal.has_value()) - return {}; - - const auto outerKeyword = parser.readToken(); - if (!outerKeyword.has_value() || *outerKeyword != std::string_view("outer")) - return {}; - const auto loopKeyword = parser.readToken(); - if (!loopKeyword.has_value() || *loopKeyword != std::string_view("loop")) - return {}; - - hlsl::float32_t3 p[3] = {}; - for (uint32_t i = 0u; i < 3u; ++i) - { - const auto vertexKeyword = parser.readToken(); - if (!vertexKeyword.has_value() || *vertexKeyword != std::string_view("vertex")) - return {}; - const auto vertex = parser.readVec3(); - if (!vertex.has_value()) - return {}; - p[i] = *vertex; - } - - stlPushTriangleReversed(p, positions); - hlsl::float32_t3 faceNormal = stlResolveStoredNormal(*fileNormal); - if (hlsl::dot(faceNormal, faceNormal) <= 0.f) - faceNormal = SGeometryNormalCommon::computeFaceNormal(p[2u], p[1u], p[0u]); - normals.push_back(faceNormal); - normals.push_back(faceNormal); - normals.push_back(faceNormal); - hlsl::shapes::util::extendAABBAccumulator(parsedAABB, p[2u]); - hlsl::shapes::util::extendAABBAccumulator(parsedAABB, p[1u]); - hlsl::shapes::util::extendAABBAccumulator(parsedAABB, p[0u]); - - const auto endLoopKeyword = parser.readToken(); - if (!endLoopKeyword.has_value() || *endLoopKeyword != std::string_view("endloop")) - return {}; - const auto endFacetKeyword = parser.readToken(); - if (!endFacetKeyword.has_value() || *endFacetKeyword != std::string_view("endfacet")) - return {}; - } - if (positions.empty()) - return {}; - - triangleCount = positions.size() / SSTLContext::VerticesPerTriangle; - vertexCount = positions.size(); - - auto posView = SGeometryLoaderCommon::createAdoptedView(std::move(positions)); - auto normalView = SGeometryLoaderCommon::createAdoptedView(std::move(normals)); - if (!posView || !normalView) return {}; - geometry->setPositionView(std::move(posView)); - geometry->setNormalView(std::move(normalView)); } - if (vertexCount == 0ull) - return {}; - - if (computeContentHashes) - { - SPolygonGeometryContentHash::computeMissing(geometry.get(), _params.ioPolicy); - } - - if (!parsedAABB.empty()) - geometry->applyAABB(parsedAABB.value); - else - { - CPolygonGeometryManipulator::recomputeAABB(geometry.get()); - } - const uint64_t ioMinRead = context.ioTelemetry.getMinOrZero(); - const uint64_t ioAvgRead = context.ioTelemetry.getAvgOrZero(); - if (SInterchangeIO::isTinyIOTelemetryLikely(context.ioTelemetry, static_cast(filesize), _params.ioPolicy)) - { - _params.logger.log( - "STL loader tiny-io guard: file=%s reads=%llu min=%llu avg=%llu", - system::ILogger::ELL_WARNING, - _file->getFileName().string().c_str(), - static_cast(context.ioTelemetry.callCount), - static_cast(ioMinRead), - static_cast(ioAvgRead)); - } - _params.logger.log( - "STL loader stats: file=%s binary=%d parse_path=%s triangles=%llu vertices=%llu colors=%d io_reads=%llu io_min_read=%llu io_avg_read=%llu io_req=%s io_eff=%s io_chunk=%llu io_reason=%s", - system::ILogger::ELL_PERFORMANCE, - _file->getFileName().string().c_str(), - binary ? 1 : 0, - parsePath, - static_cast(triangleCount), - static_cast(vertexCount), - hasTriangleColors ? 1 : 0, - static_cast(context.ioTelemetry.callCount), - static_cast(ioMinRead), - static_cast(ioAvgRead), - system::to_string(_params.ioPolicy.strategy).c_str(), - system::to_string(ioPlan.strategy).c_str(), - static_cast(ioPlan.chunkSizeBytes()), - ioPlan.reason); - auto meta = core::make_smart_refctd_ptr(); - return SAssetBundle(std::move(meta), { std::move(geometry) }); + bool binary = false; + bool hasBinaryTriCountFromDetect = false; + uint32_t binaryTriCountFromDetect = 0u; + { + std::array prefix = {}; + bool hasPrefix = false; + if (wholeFileData && filesize >= Context::BinaryPrefixBytes) { + std::memcpy(prefix.data(), wholeFileData, Context::BinaryPrefixBytes); + hasPrefix = true; + } else { + hasPrefix = filesize >= Context::BinaryPrefixBytes && + SInterchangeIO::readFileExact( + context.inner.mainFile, prefix.data(), 0ull, + Context::BinaryPrefixBytes, &context.ioTelemetry); + } + bool startsWithSolid = false; + if (hasPrefix) { + startsWithSolid = + (std::memcmp(prefix.data(), "solid ", Context::TextProbeBytes) == 0); + } else { + char header[Context::TextProbeBytes] = {}; + if (wholeFileData) + std::memcpy(header, wholeFileData, sizeof(header)); + else if (!SInterchangeIO::readFileExact(context.inner.mainFile, header, + 0ull, sizeof(header), + &context.ioTelemetry)) + return {}; + startsWithSolid = + (std::strncmp(header, "solid ", Context::TextProbeBytes) == 0); + } + + bool binaryBySize = false; + if (hasPrefix) { + uint32_t triCount = 0u; + std::memcpy(&triCount, prefix.data() + Context::BinaryHeaderBytes, + sizeof(triCount)); + binaryTriCountFromDetect = triCount; + hasBinaryTriCountFromDetect = true; + const uint64_t expectedSize = + Context::BinaryPrefixBytes + + static_cast(triCount) * Context::TriangleRecordBytes; + binaryBySize = (expectedSize == filesize); + } + + if (binaryBySize) + binary = true; + else if (!startsWithSolid) + binary = true; + else + binary = false; + } + + auto geometry = core::make_smart_refctd_ptr(); + geometry->setIndexing(IPolygonGeometryBase::TriangleList()); + hlsl::shapes::util::AABBAccumulator3 parsedAABB = + hlsl::shapes::util::createAABBAccumulator(); + uint64_t vertexCount = 0ull; + + if (binary) { + parsePath = "binary_fast"; + if (filesize < Context::BinaryPrefixBytes) + return {}; + + uint32_t triangleCount32 = binaryTriCountFromDetect; + if (!hasBinaryTriCountFromDetect) { + if (!SInterchangeIO::readFileExact( + context.inner.mainFile, &triangleCount32, + Context::BinaryHeaderBytes, sizeof(triangleCount32), + &context.ioTelemetry)) + return {}; + } + + triangleCount = triangleCount32; + const size_t dataSize = + static_cast(triangleCount) * Context::TriangleRecordBytes; + const size_t expectedSize = Context::BinaryPrefixBytes + dataSize; + if (filesize < expectedSize) + return {}; + + const uint8_t* payloadData = wholeFileData ? (wholeFileData + Context::BinaryPrefixBytes) : impl::SFileAccess::readRange(context.inner.mainFile, Context::BinaryPrefixBytes, dataSize, wholeFilePayload, ioPlan, &context.ioTelemetry); + if (!payloadData) + return {}; + + vertexCount = triangleCount * Context::VerticesPerTriangle; + const size_t vertexCountSizeT = static_cast(vertexCount); + if (vertexCountSizeT > + (std::numeric_limits::max() / sizeof(hlsl::float32_t3))) + return {}; + const size_t viewByteSize = vertexCountSizeT * sizeof(hlsl::float32_t3); + if (viewByteSize > (std::numeric_limits::max() - viewByteSize)) + return {}; + const size_t blockBytes = viewByteSize * 2ull; + auto upstream = core::getDefaultMemoryResource(); + if (!upstream) + return {}; + void* block = upstream->allocate(blockBytes, alignof(float)); + if (!block) + return {}; + auto blockResource = core::make_smart_refctd_ptr( + core::smart_refctd_ptr( + std::move(upstream)), + block, blockBytes, alignof(float)); + auto posBuffer = ICPUBuffer::create( + {{viewByteSize}, + block, + core::smart_refctd_ptr(blockResource), + alignof(float)}, + core::adopt_memory); + auto normalBuffer = ICPUBuffer::create( + {{viewByteSize}, + reinterpret_cast(block) + viewByteSize, + core::smart_refctd_ptr(blockResource), + alignof(float)}, + core::adopt_memory); + if (!posBuffer || !normalBuffer) + return {}; + ICPUPolygonGeometry::SDataView posView = {}; + posView.composed = {.stride = sizeof(hlsl::float32_t3), + .format = EF_R32G32B32_SFLOAT, + .rangeFormat = IGeometryBase::getMatchingAABBFormat( + EF_R32G32B32_SFLOAT)}; + posView.src = { + .offset = 0ull, + .size = viewByteSize, + .buffer = std::move(posBuffer)}; + ICPUPolygonGeometry::SDataView normalView = {}; + normalView.composed = {.stride = sizeof(hlsl::float32_t3), + .format = EF_R32G32B32_SFLOAT, + .rangeFormat = IGeometryBase::getMatchingAABBFormat( + EF_R32G32B32_SFLOAT)}; + normalView.src = {.offset = 0ull, + .size = viewByteSize, + .buffer = std::move(normalBuffer)}; + auto* posOutFloat = reinterpret_cast(posView.getPointer()); + auto* normalOutFloat = reinterpret_cast(normalView.getPointer()); + if (!posOutFloat || !normalOutFloat) + return {}; + + const uint8_t* cursor = payloadData; + const uint8_t* const end = cursor + dataSize; + if (end < cursor || + static_cast(end - cursor) < + static_cast(triangleCount) * Context::TriangleRecordBytes) + return {}; + core::vector faceColors(static_cast(triangleCount), 0u); + std::atomic_bool colorValidForAllFaces = true; + const size_t hw = SLoaderRuntimeTuner::resolveHardwareThreads(); + const size_t hardMaxWorkers = SLoaderRuntimeTuner::resolveHardMaxWorkers( + hw, _params.ioPolicy.runtimeTuning.workerHeadroom); + SLoaderRuntimeTuningRequest parseTuningRequest = {}; + parseTuningRequest.inputBytes = dataSize; + parseTuningRequest.totalWorkUnits = triangleCount; + parseTuningRequest.minBytesPerWorker = Context::TriangleRecordBytes; + parseTuningRequest.hardwareThreads = static_cast(hw); + parseTuningRequest.hardMaxWorkers = static_cast(hardMaxWorkers); + parseTuningRequest.targetChunksPerWorker = + _params.ioPolicy.runtimeTuning.targetChunksPerWorker; + parseTuningRequest.minChunkWorkUnits = 1ull; + parseTuningRequest.maxChunkWorkUnits = + std::max(1ull, triangleCount); + parseTuningRequest.sampleData = payloadData; + parseTuningRequest.sampleBytes = + SLoaderRuntimeTuner::resolveSampleBytes(_params.ioPolicy, dataSize); + const auto parseTuning = + SLoaderRuntimeTuner::tune(_params.ioPolicy, parseTuningRequest); + const size_t workerCount = std::max( + 1ull, + std::min(parseTuning.workerCount, + static_cast(std::max(1ull, triangleCount)))); + static constexpr bool ComputeAABBInParse = true; + struct SThreadAABB { + bool has = false; + float minX = 0.f; + float minY = 0.f; + float minZ = 0.f; + float maxX = 0.f; + float maxY = 0.f; + float maxZ = 0.f; + }; + std::vector threadAABBs(ComputeAABBInParse ? workerCount + : 0ull); + const uint64_t parseChunkTriangles = + std::max(1ull, parseTuning.chunkWorkUnits); + const size_t parseChunkCount = static_cast( + SLoaderRuntimeTuner::ceilDiv(triangleCount, parseChunkTriangles)); + const bool hashInParsePipeline = computeContentHashes; + std::vector hashChunkReady( + hashInParsePipeline ? parseChunkCount : 0ull, 0u); + std::atomic_bool hashPipelineOk = true; + core::blake3_hash_t parsedPositionHash = + static_cast(core::blake3_hasher{}); + core::blake3_hash_t parsedNormalHash = + static_cast(core::blake3_hasher{}); + auto parseRange = [&](const uint64_t beginTri, const uint64_t endTri, + SThreadAABB& localAABB) -> void { + const uint8_t* localCursor = + payloadData + beginTri * Context::TriangleRecordBytes; + float* posCursor = posOutFloat + beginTri * Context::VerticesPerTriangle * + Context::FloatChannelsPerVertex; + float* normalCursor = + normalOutFloat + beginTri * Context::VerticesPerTriangle * + Context::FloatChannelsPerVertex; + for (uint64_t tri = beginTri; tri < endTri; ++tri) { + const uint8_t* const triRecord = localCursor; + localCursor += Context::TriangleRecordBytes; + std::array triValues = {}; + std::memcpy(triValues.data(), triRecord, sizeof(triValues)); + uint16_t packedColor = 0u; + std::memcpy(&packedColor, triRecord + Context::TriangleFloatBytes, + sizeof(packedColor)); + if (packedColor & 0x8000u) + faceColors[static_cast(tri)] = + Parse::decodeViscamColorToB8G8R8A8(packedColor); + else + colorValidForAllFaces.store(false, std::memory_order_relaxed); + + float normalX = triValues[0ull]; + float normalY = triValues[1ull]; + float normalZ = triValues[2ull]; + + const float vertex0x = triValues[9ull]; + const float vertex0y = triValues[10ull]; + const float vertex0z = triValues[11ull]; + const float vertex1x = triValues[6ull]; + const float vertex1y = triValues[7ull]; + const float vertex1z = triValues[8ull]; + const float vertex2x = triValues[3ull]; + const float vertex2y = triValues[4ull]; + const float vertex2z = triValues[5ull]; + + posCursor[0ull] = vertex0x; + posCursor[1ull] = vertex0y; + posCursor[2ull] = vertex0z; + posCursor[3ull] = vertex1x; + posCursor[4ull] = vertex1y; + posCursor[5ull] = vertex1z; + posCursor[6ull] = vertex2x; + posCursor[7ull] = vertex2y; + posCursor[8ull] = vertex2z; + if constexpr (ComputeAABBInParse) { + if (!localAABB.has) { + localAABB.has = true; + localAABB.minX = vertex0x; + localAABB.minY = vertex0y; + localAABB.minZ = vertex0z; + localAABB.maxX = vertex0x; + localAABB.maxY = vertex0y; + localAABB.maxZ = vertex0z; + } + if (vertex0x < localAABB.minX) + localAABB.minX = vertex0x; + if (vertex0y < localAABB.minY) + localAABB.minY = vertex0y; + if (vertex0z < localAABB.minZ) + localAABB.minZ = vertex0z; + if (vertex0x > localAABB.maxX) + localAABB.maxX = vertex0x; + if (vertex0y > localAABB.maxY) + localAABB.maxY = vertex0y; + if (vertex0z > localAABB.maxZ) + localAABB.maxZ = vertex0z; + if (vertex1x < localAABB.minX) + localAABB.minX = vertex1x; + if (vertex1y < localAABB.minY) + localAABB.minY = vertex1y; + if (vertex1z < localAABB.minZ) + localAABB.minZ = vertex1z; + if (vertex1x > localAABB.maxX) + localAABB.maxX = vertex1x; + if (vertex1y > localAABB.maxY) + localAABB.maxY = vertex1y; + if (vertex1z > localAABB.maxZ) + localAABB.maxZ = vertex1z; + if (vertex2x < localAABB.minX) + localAABB.minX = vertex2x; + if (vertex2y < localAABB.minY) + localAABB.minY = vertex2y; + if (vertex2z < localAABB.minZ) + localAABB.minZ = vertex2z; + if (vertex2x > localAABB.maxX) + localAABB.maxX = vertex2x; + if (vertex2y > localAABB.maxY) + localAABB.maxY = vertex2y; + if (vertex2z > localAABB.maxZ) + localAABB.maxZ = vertex2z; + } + if (normalX == 0.f && normalY == 0.f && normalZ == 0.f) { + const float edge10x = vertex1x - vertex0x; + const float edge10y = vertex1y - vertex0y; + const float edge10z = vertex1z - vertex0z; + const float edge20x = vertex2x - vertex0x; + const float edge20y = vertex2y - vertex0y; + const float edge20z = vertex2z - vertex0z; + + normalX = edge10y * edge20z - edge10z * edge20y; + normalY = edge10z * edge20x - edge10x * edge20z; + normalZ = edge10x * edge20y - edge10y * edge20x; + const float planeLen2 = + normalX * normalX + normalY * normalY + normalZ * normalZ; + if (planeLen2 > 0.f) { + const float invLen = 1.f / std::sqrt(planeLen2); + normalX *= invLen; + normalY *= invLen; + normalZ *= invLen; + } else { + normalX = 0.f; + normalY = 0.f; + normalZ = 0.f; + } + } + normalCursor[0ull] = normalX; + normalCursor[1ull] = normalY; + normalCursor[2ull] = normalZ; + normalCursor[3ull] = normalX; + normalCursor[4ull] = normalY; + normalCursor[5ull] = normalZ; + normalCursor[6ull] = normalX; + normalCursor[7ull] = normalY; + normalCursor[8ull] = normalZ; + posCursor += + Context::VerticesPerTriangle * Context::FloatChannelsPerVertex; + normalCursor += + Context::VerticesPerTriangle * Context::FloatChannelsPerVertex; + } + }; + std::jthread positionHashThread; + std::jthread normalHashThread; + if (hashInParsePipeline) { + auto launchHashThread = + [&](const float* srcFloat, + core::blake3_hash_t& outHash) -> std::jthread { + return std::jthread([&, srcFloat, outHashPtr = &outHash]() { + try { + core::blake3_hasher hasher; + size_t chunkIx = 0ull; + while (chunkIx < parseChunkCount) { + auto ready = std::atomic_ref(hashChunkReady[chunkIx]); + while (ready.load(std::memory_order_acquire) == 0u) + ready.wait(0u, std::memory_order_acquire); + + size_t runEnd = chunkIx + 1ull; + while (runEnd < parseChunkCount) { + const auto runReady = + std::atomic_ref(hashChunkReady[runEnd]) + .load(std::memory_order_acquire); + if (runReady == 0u) + break; + ++runEnd; + } + + const uint64_t begin = + static_cast(chunkIx) * parseChunkTriangles; + const uint64_t endTri = std::min( + static_cast(runEnd) * parseChunkTriangles, + triangleCount); + const size_t runTriangles = static_cast(endTri - begin); + const size_t runBytes = + runTriangles * Context::VerticesPerTriangle * + Context::FloatChannelsPerVertex * sizeof(float); + hasher.update(srcFloat + begin * Context::VerticesPerTriangle * + Context::FloatChannelsPerVertex, + runBytes); + chunkIx = runEnd; + } + *outHashPtr = static_cast(hasher); + } catch (...) { + hashPipelineOk.store(false, std::memory_order_relaxed); + } + }); + }; + positionHashThread = launchHashThread(posOutFloat, parsedPositionHash); + normalHashThread = launchHashThread(normalOutFloat, parsedNormalHash); + } + std::atomic_size_t nextChunkIx = 0ull; + auto parseWorker = [&](const size_t workerIx) -> void { + SThreadAABB localAABB = {}; + while (true) { + const size_t chunkIx = + nextChunkIx.fetch_add(1ull, std::memory_order_relaxed); + if (chunkIx >= parseChunkCount) + break; + const uint64_t begin = + static_cast(chunkIx) * parseChunkTriangles; + const uint64_t endTri = + std::min(begin + parseChunkTriangles, triangleCount); + parseRange(begin, endTri, localAABB); + if (hashInParsePipeline) { + auto ready = std::atomic_ref(hashChunkReady[chunkIx]); + ready.store(1u, std::memory_order_release); + ready.notify_all(); + } + } + if constexpr (ComputeAABBInParse) + threadAABBs[workerIx] = localAABB; + }; + SLoaderRuntimeTuner::dispatchWorkers(workerCount, parseWorker); + if (positionHashThread.joinable()) + positionHashThread.join(); + if (normalHashThread.joinable()) + normalHashThread.join(); + if (hashInParsePipeline) { + if (!hashPipelineOk.load(std::memory_order_relaxed)) + return {}; + posView.src.buffer->setContentHash(parsedPositionHash); + normalView.src.buffer->setContentHash(parsedNormalHash); + } + if constexpr (ComputeAABBInParse) { + for (const auto& localAABB : threadAABBs) { + if (!localAABB.has) + continue; + hlsl::shapes::util::extendAABBAccumulator( + parsedAABB, localAABB.minX, localAABB.minY, localAABB.minZ); + hlsl::shapes::util::extendAABBAccumulator( + parsedAABB, localAABB.maxX, localAABB.maxY, localAABB.maxZ); + } + } + geometry->setPositionView(std::move(posView)); + geometry->setNormalView(std::move(normalView)); + if (colorValidForAllFaces.load(std::memory_order_relaxed)) { + core::vector vertexColors(vertexCountSizeT); + for (size_t triIx = 0ull; triIx < static_cast(triangleCount); + ++triIx) { + const uint32_t triColor = faceColors[triIx]; + const size_t baseIx = triIx * Context::VerticesPerTriangle; + vertexColors[baseIx + 0ull] = triColor; + vertexColors[baseIx + 1ull] = triColor; + vertexColors[baseIx + 2ull] = triColor; + } + auto colorView = + SGeometryLoaderCommon::createAdoptedView( + std::move(vertexColors)); + if (!colorView) + return {}; + auto* const auxViews = geometry->getAuxAttributeViews(); + auxViews->resize(SSTLPolygonGeometryAuxLayout::COLOR0 + 1u); + auxViews->operator[](SSTLPolygonGeometryAuxLayout::COLOR0) = + std::move(colorView); + hasTriangleColors = true; + } + } else { + parsePath = "ascii_fallback"; + if (!wholeFileData) + { + wholeFileData = impl::SFileAccess::mapOrReadWholeFile(context.inner.mainFile, filesize, wholeFilePayload, ioPlan, &context.ioTelemetry); + if (!wholeFileData) + return {}; + } + + const char* const begin = reinterpret_cast(wholeFileData); + const char* const end = begin + filesize; + AsciiParser parser(begin, end); + core::vector positions; + core::vector normals; + const auto firstToken = parser.readToken(); + if (!firstToken.has_value() || *firstToken != std::string_view("solid")) + return {}; + + for (;;) { + const auto maybeToken = parser.readToken(); + if (!maybeToken.has_value()) + break; + const std::string_view textToken = *maybeToken; + if (textToken == std::string_view("endsolid")) + break; + if (textToken != std::string_view("facet")) + continue; + + const auto normalKeyword = parser.readToken(); + if (!normalKeyword.has_value() || + *normalKeyword != std::string_view("normal")) + return {}; + + const auto fileNormal = parser.readVec3(); + if (!fileNormal.has_value()) + return {}; + + const auto outerKeyword = parser.readToken(); + if (!outerKeyword.has_value() || + *outerKeyword != std::string_view("outer")) + return {}; + const auto loopKeyword = parser.readToken(); + if (!loopKeyword.has_value() || *loopKeyword != std::string_view("loop")) + return {}; + + std::array p = {}; + for (uint32_t i = 0u; i < 3u; ++i) { + const auto vertexKeyword = parser.readToken(); + if (!vertexKeyword.has_value() || + *vertexKeyword != std::string_view("vertex")) + return {}; + const auto vertex = parser.readVec3(); + if (!vertex.has_value()) + return {}; + p[i] = *vertex; + } + + Parse::pushTriangleReversed(p, positions); + hlsl::float32_t3 faceNormal = Parse::resolveStoredNormal(*fileNormal); + if (hlsl::dot(faceNormal, faceNormal) <= 0.f) + faceNormal = + SGeometryNormalCommon::computeFaceNormal(p[2u], p[1u], p[0u]); + normals.push_back(faceNormal); + normals.push_back(faceNormal); + normals.push_back(faceNormal); + hlsl::shapes::util::extendAABBAccumulator(parsedAABB, p[2u]); + hlsl::shapes::util::extendAABBAccumulator(parsedAABB, p[1u]); + hlsl::shapes::util::extendAABBAccumulator(parsedAABB, p[0u]); + + const auto endLoopKeyword = parser.readToken(); + if (!endLoopKeyword.has_value() || + *endLoopKeyword != std::string_view("endloop")) + return {}; + const auto endFacetKeyword = parser.readToken(); + if (!endFacetKeyword.has_value() || + *endFacetKeyword != std::string_view("endfacet")) + return {}; + } + if (positions.empty()) + return {}; + + triangleCount = positions.size() / Context::VerticesPerTriangle; + vertexCount = positions.size(); + + auto posView = + SGeometryLoaderCommon::createAdoptedView( + std::move(positions)); + auto normalView = + SGeometryLoaderCommon::createAdoptedView( + std::move(normals)); + if (!posView || !normalView) + return {}; + geometry->setPositionView(std::move(posView)); + geometry->setNormalView(std::move(normalView)); + } + + if (vertexCount == 0ull) + return {}; + + if (computeContentHashes) { + SPolygonGeometryContentHash::computeMissing(geometry.get(), + _params.ioPolicy); + } + + if (!parsedAABB.empty()) + geometry->applyAABB(parsedAABB.value); + else { + CPolygonGeometryManipulator::recomputeAABB(geometry.get()); + } + const uint64_t ioMinRead = context.ioTelemetry.getMinOrZero(); + const uint64_t ioAvgRead = context.ioTelemetry.getAvgOrZero(); + impl::SIODiagnostics::logTinyIO(_params.logger, "STL loader", _file->getFileName().string().c_str(), context.ioTelemetry, static_cast(filesize), _params.ioPolicy, "reads"); + _params.logger.log( + "STL loader stats: file=%s binary=%d parse_path=%s triangles=%llu " + "vertices=%llu colors=%d io_reads=%llu io_min_read=%llu io_avg_read=%llu " + "io_req=%s io_eff=%s io_chunk=%llu io_reason=%s", + system::ILogger::ELL_PERFORMANCE, _file->getFileName().string().c_str(), + binary ? 1 : 0, parsePath, static_cast(triangleCount), + static_cast(vertexCount), hasTriangleColors ? 1 : 0, + static_cast(context.ioTelemetry.callCount), + static_cast(ioMinRead), + static_cast(ioAvgRead), + system::to_string(_params.ioPolicy.strategy).c_str(), + system::to_string(ioPlan.strategy).c_str(), + static_cast(ioPlan.chunkSizeBytes()), ioPlan.reason); + auto meta = core::make_smart_refctd_ptr(); + return SAssetBundle(std::move(meta), {std::move(geometry)}); } -bool CSTLMeshFileLoader::isALoadableFileFormat(system::IFile* _file, const system::logger_opt_ptr) const -{ - if (!_file || _file->getSize() <= SSTLContext::TextProbeBytes) - return false; - - const size_t fileSize = _file->getSize(); - if (fileSize < SSTLContext::BinaryPrefixBytes) - { - char header[SSTLContext::TextProbeBytes] = {}; - if (!SInterchangeIO::readFileExact(_file, header, 0ull, sizeof(header))) - return false; - return std::strncmp(header, "solid ", SSTLContext::TextProbeBytes) == 0; - } - - std::array prefix = {}; - if (!SInterchangeIO::readFileExact(_file, prefix.data(), 0ull, prefix.size())) - return false; - - uint32_t triangleCount = 0u; - std::memcpy(&triangleCount, prefix.data() + SSTLContext::BinaryHeaderBytes, sizeof(triangleCount)); - if (std::memcmp(prefix.data(), "solid ", SSTLContext::TextProbeBytes) == 0) - return true; - - return fileSize == (SSTLContext::TriangleRecordBytes * triangleCount + SSTLContext::BinaryPrefixBytes); +bool CSTLMeshFileLoader::isALoadableFileFormat( + system::IFile* _file, const system::logger_opt_ptr) const { + using Context = Parse::Context; + + if (!_file || _file->getSize() <= Context::TextProbeBytes) + return false; + + const size_t fileSize = _file->getSize(); + if (fileSize < Context::BinaryPrefixBytes) { + char header[Context::TextProbeBytes] = {}; + if (!SInterchangeIO::readFileExact(_file, header, 0ull, sizeof(header))) + return false; + return std::strncmp(header, "solid ", Context::TextProbeBytes) == 0; + } + + std::array prefix = {}; + if (!SInterchangeIO::readFileExact(_file, prefix.data(), 0ull, prefix.size())) + return false; + + uint32_t triangleCount = 0u; + std::memcpy(&triangleCount, prefix.data() + Context::BinaryHeaderBytes, + sizeof(triangleCount)); + if (std::memcmp(prefix.data(), "solid ", Context::TextProbeBytes) == 0) + return true; + + return fileSize == (Context::TriangleRecordBytes * triangleCount + + Context::BinaryPrefixBytes); } } diff --git a/src/nbl/asset/interchange/CSTLMeshWriter.cpp b/src/nbl/asset/interchange/CSTLMeshWriter.cpp index d39dc801f6..6d531e55fb 100644 --- a/src/nbl/asset/interchange/CSTLMeshWriter.cpp +++ b/src/nbl/asset/interchange/CSTLMeshWriter.cpp @@ -5,10 +5,12 @@ #include "nbl/system/IFile.h" #include "CSTLMeshWriter.h" +#include "SSTLPolygonGeometryAuxLayout.h" +#include "impl/SFileAccess.h" +#include "impl/SIODiagnostics.h" #include "nbl/asset/format/convertColor.h" #include "nbl/asset/interchange/SGeometryWriterCommon.h" #include "nbl/asset/interchange/SInterchangeIO.h" -#include "SSTLPolygonGeometryAuxLayout.h" #include #include @@ -24,59 +26,654 @@ namespace nbl::asset { -namespace stl_writer_detail +namespace { -struct SContext +struct Parse { - IAssetWriter::SAssetWriteContext writeContext; - SResolvedFileIOPolicy ioPlan = {}; - core::vector ioBuffer = {}; - size_t fileOffset = 0ull; - SFileWriteTelemetry writeTelemetry = {}; - - bool flush(); - bool write(const void* data, size_t size); -}; + struct Context + { + IAssetWriter::SAssetWriteContext writeContext; + SResolvedFileIOPolicy ioPlan = {}; + core::vector ioBuffer = {}; + size_t fileOffset = 0ull; + SFileWriteTelemetry writeTelemetry = {}; -constexpr size_t BinaryHeaderBytes = 80ull; -constexpr size_t BinaryTriangleCountBytes = sizeof(uint32_t); -constexpr size_t BinaryTriangleFloatCount = 12ull; -constexpr size_t BinaryTriangleFloatBytes = sizeof(float) * BinaryTriangleFloatCount; -constexpr size_t BinaryTriangleAttributeBytes = sizeof(uint16_t); -constexpr size_t BinaryTriangleRecordBytes = BinaryTriangleFloatBytes + BinaryTriangleAttributeBytes; -static_assert(BinaryTriangleRecordBytes == 50ull); -constexpr size_t BinaryPrefixBytes = BinaryHeaderBytes + BinaryTriangleCountBytes; -constexpr size_t IoFallbackReserveBytes = 1ull << 20; -constexpr size_t AsciiFaceTextMaxBytes = 1024ull; -constexpr char AsciiSolidPrefix[] = "solid "; -constexpr char AsciiEndSolidPrefix[] = "endsolid "; -constexpr char AsciiDefaultName[] = "nabla_mesh"; + bool flush() + { + if (ioBuffer.empty()) + return true; -} + size_t bytesWritten = 0ull; + const size_t totalBytes = ioBuffer.size(); + while (bytesWritten < totalBytes) + { + system::IFile::success_t success; + writeContext.outputFile->write(success, ioBuffer.data() + bytesWritten, fileOffset + bytesWritten, totalBytes - bytesWritten); + if (!success) + return false; + const size_t processed = success.getBytesProcessed(); + if (processed == 0ull) + return false; + writeTelemetry.account(processed); + bytesWritten += processed; + } + fileOffset += totalBytes; + ioBuffer.clear(); + return true; + } + + bool write(const void* data, size_t size) + { + if (!data && size != 0ull) + return false; + if (size == 0ull) + return true; + + const uint8_t* src = reinterpret_cast(data); + switch (ioPlan.strategy) + { + case SResolvedFileIOPolicy::Strategy::WholeFile: + { + const size_t oldSize = ioBuffer.size(); + ioBuffer.resize(oldSize + size); + std::memcpy(ioBuffer.data() + oldSize, src, size); + return true; + } + case SResolvedFileIOPolicy::Strategy::Chunked: + default: + { + const size_t chunkSize = static_cast(ioPlan.chunkSizeBytes()); + size_t remaining = size; + while (remaining > 0ull) + { + const size_t freeSpace = chunkSize - ioBuffer.size(); + const size_t toCopy = std::min(freeSpace, remaining); + const size_t oldSize = ioBuffer.size(); + ioBuffer.resize(oldSize + toCopy); + std::memcpy(ioBuffer.data() + oldSize, src, toCopy); + src += toCopy; + remaining -= toCopy; + if (ioBuffer.size() == chunkSize && !flush()) + return false; + } + return true; + } + } + } + }; + + static constexpr size_t BinaryHeaderBytes = 80ull; + static constexpr size_t BinaryTriangleCountBytes = sizeof(uint32_t); + static constexpr size_t BinaryTriangleFloatCount = 12ull; + static constexpr size_t BinaryTriangleFloatBytes = sizeof(float) * BinaryTriangleFloatCount; + static constexpr size_t BinaryTriangleAttributeBytes = sizeof(uint16_t); + static constexpr size_t BinaryTriangleRecordBytes = BinaryTriangleFloatBytes + BinaryTriangleAttributeBytes; + static constexpr size_t BinaryPrefixBytes = BinaryHeaderBytes + BinaryTriangleCountBytes; + static constexpr size_t IoFallbackReserveBytes = 1ull << 20; + static constexpr size_t AsciiFaceTextMaxBytes = 1024ull; + static constexpr char AsciiSolidPrefix[] = "solid "; + static constexpr char AsciiEndSolidPrefix[] = "endsolid "; + static constexpr char AsciiDefaultName[] = "nabla_mesh"; + static_assert(BinaryTriangleRecordBytes == 50ull); + + static bool appendLiteral(char*& cursor, char* const end, const char* text, const size_t textSize) + { + if (!cursor || cursor + textSize > end) + return false; + std::memcpy(cursor, text, textSize); + cursor += textSize; + return true; + } + + static bool appendVectorAsAsciiLine(char*& cursor, char* const end, const hlsl::float32_t3& v) + { + cursor = SGeometryWriterCommon::appendFloatToBuffer(cursor, end, v.x); + if (cursor >= end) + return false; + *(cursor++) = ' '; + cursor = SGeometryWriterCommon::appendFloatToBuffer(cursor, end, v.y); + if (cursor >= end) + return false; + *(cursor++) = ' '; + cursor = SGeometryWriterCommon::appendFloatToBuffer(cursor, end, v.z); + if (cursor >= end) + return false; + *(cursor++) = '\n'; + return true; + } + + static bool decodeTriangle(const ICPUPolygonGeometry* geom, const IPolygonGeometryBase::IIndexingCallback* indexing, const ICPUPolygonGeometry::SDataView& posView, uint32_t primIx, hlsl::float32_t3& out0, hlsl::float32_t3& out1, hlsl::float32_t3& out2, hlsl::uint32_t3* outIdx) + { + hlsl::uint32_t3 idx(0u); + const auto& indexView = geom->getIndexView(); + const void* indexBuffer = indexView ? indexView.getPointer() : nullptr; + const uint64_t indexSize = indexView ? indexView.composed.getStride() : 0u; + IPolygonGeometryBase::IIndexingCallback::SContext ctx = {.indexBuffer = indexBuffer, .indexSize = indexSize, .beginPrimitive = primIx, .endPrimitive = primIx + 1u, .out = &idx.x}; + indexing->operator()(ctx); + if (outIdx) + *outIdx = idx; + + hlsl::float32_t3 p0 = {}; + hlsl::float32_t3 p1 = {}; + hlsl::float32_t3 p2 = {}; + if (!posView.decodeElement(idx.x, p0)) + return false; + if (!posView.decodeElement(idx.y, p1)) + return false; + if (!posView.decodeElement(idx.z, p2)) + return false; -using SContext = stl_writer_detail::SContext; - -bool decodeTriangle(const ICPUPolygonGeometry* geom, const IPolygonGeometryBase::IIndexingCallback* indexing, const ICPUPolygonGeometry::SDataView& posView, uint32_t primIx, hlsl::float32_t3& out0, hlsl::float32_t3& out1, hlsl::float32_t3& out2, uint32_t* outIdx); -bool decodeTriangleNormal(const ICPUPolygonGeometry::SDataView& normalView, const uint32_t* idx, hlsl::float32_t3& outNormal); -double stlNormalizeColorComponentToUnit(double value); -uint16_t stlPackViscamColorFromB8G8R8A8(uint32_t color); -const ICPUPolygonGeometry::SDataView* stlGetColorView(const ICPUPolygonGeometry* geom, size_t vertexCount); -bool stlDecodeColorB8G8R8A8(const ICPUPolygonGeometry::SDataView& colorView, const uint32_t ix, uint32_t& outColor); -void stlDecodeColorUnitRGBAFromB8G8R8A8(uint32_t color, double (&out)[4]); -bool writeMeshBinary(const asset::ICPUPolygonGeometry* geom, SContext* context); -bool writeMeshASCII(const asset::ICPUPolygonGeometry* geom, SContext* context); -bool writeFaceText( - const hlsl::float32_t3& v1, - const hlsl::float32_t3& v2, - const hlsl::float32_t3& v3, - const uint32_t* idx, - const asset::ICPUPolygonGeometry::SDataView& normalView, - const bool flipHandedness, - SContext* context); - -bool appendLiteral(char*& cursor, char* const end, const char* text, const size_t textSize); -bool appendVectorAsAsciiLine(char*& cursor, char* const end, const hlsl::float32_t3& v); + out0 = p0; + out1 = p1; + out2 = p2; + return true; + } + + static bool decodeTriangleNormal(const ICPUPolygonGeometry::SDataView& normalView, const hlsl::uint32_t3& idx, hlsl::float32_t3& outNormal) + { + if (!normalView) + return false; + + hlsl::float32_t3 n0 = {}; + hlsl::float32_t3 n1 = {}; + hlsl::float32_t3 n2 = {}; + if (!normalView.decodeElement(idx.x, n0)) + return false; + if (!normalView.decodeElement(idx.y, n1)) + return false; + if (!normalView.decodeElement(idx.z, n2)) + return false; + + auto normal = n0; + if (hlsl::dot(normal, normal) <= 0.f) + normal = n1; + if (hlsl::dot(normal, normal) <= 0.f) + normal = n2; + if (hlsl::dot(normal, normal) <= 0.f) + return false; + + outNormal = normal; + return true; + } + + static double normalizeColorComponentToUnit(double value) + { + if (!std::isfinite(value)) + return 0.0; + if (value > 1.0) + value /= 255.0; + return std::clamp(value, 0.0, 1.0); + } + + static uint16_t packViscamColorFromB8G8R8A8(const uint32_t color) + { + const void* src[4] = {&color, nullptr, nullptr, nullptr}; + uint16_t packed = 0u; + convertColor(src, &packed, 0u, 0u); + packed |= 0x8000u; + return packed; + } + + static const ICPUPolygonGeometry::SDataView* getColorView(const ICPUPolygonGeometry* geom, const size_t vertexCount) + { + const auto* view = SGeometryWriterCommon::getAuxViewAt(geom, SSTLPolygonGeometryAuxLayout::COLOR0, vertexCount); + if (!view) + return nullptr; + return getFormatChannelCount(view->composed.format) >= 3u ? view : nullptr; + } + + static bool decodeColorB8G8R8A8(const ICPUPolygonGeometry::SDataView& colorView, const uint32_t ix, uint32_t& outColor) + { + if (colorView.composed.format == EF_B8G8R8A8_UNORM && colorView.composed.getStride() == sizeof(uint32_t)) + { + const auto* const ptr = reinterpret_cast(colorView.getPointer()); + if (!ptr) + return false; + std::memcpy(&outColor, ptr + static_cast(ix) * sizeof(uint32_t), sizeof(outColor)); + return true; + } + + hlsl::float64_t4 decoded = {}; + if (!colorView.decodeElement(ix, decoded)) + return false; + const double rgbaUnit[4] = {normalizeColorComponentToUnit(decoded.x), normalizeColorComponentToUnit(decoded.y), normalizeColorComponentToUnit(decoded.z), normalizeColorComponentToUnit(decoded.w)}; + encodePixels(&outColor, rgbaUnit); + return true; + } + + static void decodeColorUnitRGBAFromB8G8R8A8(const uint32_t color, double* out) + { + const void* src[4] = {&color, nullptr, nullptr, nullptr}; + decodePixels(src, out, 0u, 0u); + } + + static bool writeMeshBinary(const asset::ICPUPolygonGeometry* geom, + Context* context) { + if (!geom || !context || !context->writeContext.outputFile) + return false; + + const auto& posView = geom->getPositionView(); + if (!posView) + return false; + + const bool flipHandedness = !context->writeContext.params.flags.hasAnyFlag( + E_WRITER_FLAGS::EWF_MESH_IS_RIGHT_HANDED); + const size_t vertexCount = posView.getElementCount(); + if (vertexCount == 0ull) + return false; + + uint32_t facenum = 0u; + size_t faceCount = 0ull; + if (!SGeometryWriterCommon::getTriangleFaceCount(geom, faceCount)) + return false; + if (faceCount > static_cast(std::numeric_limits::max())) + return false; + facenum = static_cast(faceCount); + + const size_t outputSize = BinaryPrefixBytes + static_cast(facenum) * + BinaryTriangleRecordBytes; + std::unique_ptr output(new (std::nothrow) uint8_t[outputSize]); + if (!output) + return false; + uint8_t* dst = output.get(); + + std::memset(dst, 0, BinaryHeaderBytes); + dst += BinaryHeaderBytes; + + std::memcpy(dst, &facenum, sizeof(facenum)); + dst += sizeof(facenum); + + const auto& normalView = geom->getNormalView(); + const bool hasNormals = static_cast(normalView); + const auto* const colorView = getColorView(geom, vertexCount); + const hlsl::float32_t3* const tightPositions = + SGeometryWriterCommon::getTightView(posView); + const hlsl::float32_t3* const tightNormals = + hasNormals ? SGeometryWriterCommon::getTightView( + normalView) + : nullptr; + const bool hasImplicitTriangleIndices = !geom->getIndexView(); + + auto decodePosition = [&](const uint32_t ix, + hlsl::float32_t3& out) -> bool { + if (tightPositions) { + out = tightPositions[ix]; + return true; + } + return posView.decodeElement(ix, out); + }; + + auto decodeNormal = [&](const uint32_t ix, hlsl::float32_t3& out) -> bool { + if (!hasNormals) + return false; + if (tightNormals) { + out = tightNormals[ix]; + return true; + } + return normalView.decodeElement(ix, out); + }; + auto computeFaceColor = [&](const hlsl::uint32_t3& idx, + uint16_t& outColor) -> bool { + outColor = 0u; + if (!colorView) + return true; + hlsl::uint32_t3 color(0u); + if (!decodeColorB8G8R8A8(*colorView, idx.x, color.x)) + return false; + if (!decodeColorB8G8R8A8(*colorView, idx.y, color.y)) + return false; + if (!decodeColorB8G8R8A8(*colorView, idx.z, color.z)) + return false; + std::array, 3> rgba = {}; + decodeColorUnitRGBAFromB8G8R8A8(color.x, rgba[0].data()); + decodeColorUnitRGBAFromB8G8R8A8(color.y, rgba[1].data()); + decodeColorUnitRGBAFromB8G8R8A8(color.z, rgba[2].data()); + const std::array rgbaAvg = { + (rgba[0][0] + rgba[1][0] + rgba[2][0]) / 3.0, + (rgba[0][1] + rgba[1][1] + rgba[2][1]) / 3.0, + (rgba[0][2] + rgba[1][2] + rgba[2][2]) / 3.0, 1.0}; + uint32_t avgColor = 0u; + encodePixels(&avgColor, rgbaAvg.data()); + outColor = packViscamColorFromB8G8R8A8(avgColor); + return true; + }; + auto writeRecord = [&dst](const float nx, const float ny, const float nz, + const float v1x, const float v1y, const float v1z, + const float v2x, const float v2y, const float v2z, + const float v3x, const float v3y, const float v3z, + const uint16_t attribute) -> void { + const float payload[BinaryTriangleFloatCount] = { + nx, ny, nz, v1x, v1y, v1z, v2x, v2y, v2z, v3x, v3y, v3z}; + std::memcpy(dst, payload, BinaryTriangleFloatBytes); + dst += BinaryTriangleFloatBytes; + std::memcpy(dst, &attribute, BinaryTriangleAttributeBytes); + dst += BinaryTriangleAttributeBytes; + }; + auto prepareVertices = + [&](const hlsl::float32_t3& p0, const hlsl::float32_t3& p1, + const hlsl::float32_t3& p2, hlsl::float32_t3& vertex1, + hlsl::float32_t3& vertex2, hlsl::float32_t3& vertex3) -> void { + vertex1 = p2; + vertex2 = p1; + vertex3 = p0; + if (flipHandedness) { + vertex1.x = -vertex1.x; + vertex2.x = -vertex2.x; + vertex3.x = -vertex3.x; + } + }; + auto computePlaneNormal = + [&](const hlsl::float32_t3& vertex1, const hlsl::float32_t3& vertex2, + const hlsl::float32_t3& vertex3) -> hlsl::float32_t3 { + const hlsl::float32_t3 planeNormal = + hlsl::cross(vertex2 - vertex1, vertex3 - vertex1); + const float planeNormalLen2 = hlsl::dot(planeNormal, planeNormal); + return planeNormalLen2 > 0.f ? hlsl::normalize(planeNormal) + : hlsl::float32_t3(0.f, 0.f, 0.f); + }; + + const bool hasFastTightPath = hasImplicitTriangleIndices && + (tightPositions != nullptr) && + (!hasNormals || (tightNormals != nullptr)); + if (hasFastTightPath && hasNormals) { + bool allFastNormalsNonZero = true; + const size_t normalCount = static_cast(facenum) * 3ull; + for (size_t i = 0ull; i < normalCount; ++i) { + const auto& n = tightNormals[i]; + if (hlsl::dot(n, n) <= 0.f) { + allFastNormalsNonZero = false; + break; + } + } + + const hlsl::float32_t3* posTri = tightPositions; + const hlsl::float32_t3* nrmTri = tightNormals; + if (allFastNormalsNonZero) { + for (uint32_t primIx = 0u; primIx < facenum; + ++primIx, posTri += 3u, nrmTri += 3u) { + uint16_t faceColor = 0u; + if (!computeFaceColor(hlsl::uint32_t3(primIx * 3u + 0u, + primIx * 3u + 1u, + primIx * 3u + 2u), + faceColor)) + return false; + + hlsl::float32_t3 vertex1 = {}; + hlsl::float32_t3 vertex2 = {}; + hlsl::float32_t3 vertex3 = {}; + prepareVertices(posTri[0u], posTri[1u], posTri[2u], vertex1, vertex2, + vertex3); + + hlsl::float32_t3 attrNormal = nrmTri[0u]; + if (flipHandedness) + attrNormal.x = -attrNormal.x; + + writeRecord(attrNormal.x, attrNormal.y, attrNormal.z, vertex1.x, + vertex1.y, vertex1.z, vertex2.x, vertex2.y, vertex2.z, + vertex3.x, vertex3.y, vertex3.z, faceColor); + } + } else { + for (uint32_t primIx = 0u; primIx < facenum; + ++primIx, posTri += 3u, nrmTri += 3u) { + uint16_t faceColor = 0u; + if (!computeFaceColor(hlsl::uint32_t3(primIx * 3u + 0u, + primIx * 3u + 1u, + primIx * 3u + 2u), + faceColor)) + return false; + + hlsl::float32_t3 vertex1 = {}; + hlsl::float32_t3 vertex2 = {}; + hlsl::float32_t3 vertex3 = {}; + prepareVertices(posTri[0u], posTri[1u], posTri[2u], vertex1, vertex2, + vertex3); + + hlsl::float32_t3 normal = hlsl::float32_t3(0.f, 0.f, 0.f); + hlsl::float32_t3 attrNormal = nrmTri[0u]; + if (hlsl::dot(attrNormal, attrNormal) <= 0.f) + attrNormal = nrmTri[1u]; + if (hlsl::dot(attrNormal, attrNormal) <= 0.f) + attrNormal = nrmTri[2u]; + if (hlsl::dot(attrNormal, attrNormal) > 0.f) { + if (flipHandedness) + attrNormal.x = -attrNormal.x; + normal = attrNormal; + } + + if (hlsl::dot(normal, normal) <= 0.f) + normal = computePlaneNormal(vertex1, vertex2, vertex3); + + writeRecord(normal.x, normal.y, normal.z, vertex1.x, vertex1.y, + vertex1.z, vertex2.x, vertex2.y, vertex2.z, vertex3.x, + vertex3.y, vertex3.z, faceColor); + } + } + } else if (hasFastTightPath) { + const hlsl::float32_t3* posTri = tightPositions; + for (uint32_t primIx = 0u; primIx < facenum; ++primIx, posTri += 3u) { + uint16_t faceColor = 0u; + if (!computeFaceColor(hlsl::uint32_t3(primIx * 3u + 0u, + primIx * 3u + 1u, + primIx * 3u + 2u), + faceColor)) + return false; + + hlsl::float32_t3 vertex1 = {}; + hlsl::float32_t3 vertex2 = {}; + hlsl::float32_t3 vertex3 = {}; + prepareVertices(posTri[0u], posTri[1u], posTri[2u], vertex1, vertex2, + vertex3); + const hlsl::float32_t3 normal = + computePlaneNormal(vertex1, vertex2, vertex3); + + writeRecord(normal.x, normal.y, normal.z, vertex1.x, vertex1.y, + vertex1.z, vertex2.x, vertex2.y, vertex2.z, vertex3.x, + vertex3.y, vertex3.z, faceColor); + } + } else { + if (!SGeometryWriterCommon::visitTriangleIndices( + geom, + [&](const uint32_t i0, const uint32_t i1, + const uint32_t i2) -> bool { + const hlsl::uint32_t3 idx(i0, i1, i2); + uint16_t faceColor = 0u; + if (!computeFaceColor(idx, faceColor)) + return false; + + hlsl::float32_t3 p0 = {}; + hlsl::float32_t3 p1 = {}; + hlsl::float32_t3 p2 = {}; + if (!decodePosition(idx.x, p0) || !decodePosition(idx.y, p1) || + !decodePosition(idx.z, p2)) + return false; + + hlsl::float32_t3 vertex1 = p2; + hlsl::float32_t3 vertex2 = p1; + hlsl::float32_t3 vertex3 = p0; + + if (flipHandedness) { + vertex1.x = -vertex1.x; + vertex2.x = -vertex2.x; + vertex3.x = -vertex3.x; + } + + const hlsl::float32_t3 planeNormal = + hlsl::cross(vertex2 - vertex1, vertex3 - vertex1); + const float planeNormalLen2 = + hlsl::dot(planeNormal, planeNormal); + hlsl::float32_t3 normal = hlsl::float32_t3(0.f, 0.f, 0.f); + if (!hasNormals) { + if (planeNormalLen2 > 0.f) + normal = hlsl::normalize(planeNormal); + } + + if (hasNormals) { + hlsl::float32_t3 n0 = {}; + if (!decodeNormal(idx.x, n0)) + return false; + + hlsl::float32_t3 attrNormal = n0; + if (hlsl::dot(attrNormal, attrNormal) <= 0.f) { + hlsl::float32_t3 n1 = {}; + if (!decodeNormal(idx.y, n1)) + return false; + attrNormal = n1; + } + if (hlsl::dot(attrNormal, attrNormal) <= 0.f) { + hlsl::float32_t3 n2 = {}; + if (!decodeNormal(idx.z, n2)) + return false; + attrNormal = n2; + } + + if (hlsl::dot(attrNormal, attrNormal) > 0.f) { + if (flipHandedness) + attrNormal.x = -attrNormal.x; + if (planeNormalLen2 > 0.f && + hlsl::dot(attrNormal, planeNormal) < 0.f) + attrNormal = -attrNormal; + normal = attrNormal; + } else if (planeNormalLen2 > 0.f) { + normal = hlsl::normalize(planeNormal); + } + } + + writeRecord(normal.x, normal.y, normal.z, vertex1.x, vertex1.y, + vertex1.z, vertex2.x, vertex2.y, vertex2.z, + vertex3.x, vertex3.y, vertex3.z, faceColor); + return true; + })) + return false; + } + + const bool writeOk = SInterchangeIO::writeFileWithPolicy( + context->writeContext.outputFile, context->ioPlan, output.get(), + outputSize, &context->writeTelemetry); + if (writeOk) + context->fileOffset += outputSize; + return writeOk; + } + + static bool writeMeshASCII(const asset::ICPUPolygonGeometry* geom, + Context* context) { + if (!geom) + return false; + + const auto* indexing = geom->getIndexingCallback(); + if (!indexing || indexing->degree() != 3u) + return false; + + const auto& posView = geom->getPositionView(); + if (!posView) + return false; + const auto& normalView = geom->getNormalView(); + const bool flipHandedness = !context->writeContext.params.flags.hasAnyFlag( + E_WRITER_FLAGS::EWF_MESH_IS_RIGHT_HANDED); + + const std::string name = context->writeContext.outputFile->getFileName() + .filename() + .replace_extension() + .string(); + const std::string_view solidName = name.empty() + ? std::string_view(AsciiDefaultName) + : std::string_view(name); + + if (!context->write(AsciiSolidPrefix, sizeof(AsciiSolidPrefix) - 1ull)) + return false; + + if (!context->write(solidName.data(), solidName.size())) + return false; + + if (!context->write("\n", sizeof("\n") - 1ull)) + return false; + + const uint32_t faceCount = static_cast(geom->getPrimitiveCount()); + for (uint32_t primIx = 0u; primIx < faceCount; ++primIx) { + hlsl::float32_t3 v0 = {}; + hlsl::float32_t3 v1 = {}; + hlsl::float32_t3 v2 = {}; + hlsl::uint32_t3 idx(0u); + if (!decodeTriangle(geom, indexing, posView, primIx, v0, v1, v2, &idx)) + return false; + if (!writeFaceText(v0, v1, v2, idx, normalView, flipHandedness, context)) + return false; + if (!context->write("\n", sizeof("\n") - 1ull)) + return false; + } + + if (!context->write(AsciiEndSolidPrefix, + sizeof(AsciiEndSolidPrefix) - 1ull)) + return false; + + if (!context->write(solidName.data(), solidName.size())) + return false; + + return true; + } + + static bool + writeFaceText(const hlsl::float32_t3& v1, const hlsl::float32_t3& v2, + const hlsl::float32_t3& v3, const hlsl::uint32_t3& idx, + const asset::ICPUPolygonGeometry::SDataView& normalView, + const bool flipHandedness, Context* context) { + hlsl::float32_t3 vertex1 = v3; + hlsl::float32_t3 vertex2 = v2; + hlsl::float32_t3 vertex3 = v1; + + if (flipHandedness) { + vertex1.x = -vertex1.x; + vertex2.x = -vertex2.x; + vertex3.x = -vertex3.x; + } + + const hlsl::float32_t3 planeNormal = + hlsl::cross(vertex2 - vertex1, vertex3 - vertex1); + const float planeNormalLen2 = hlsl::dot(planeNormal, planeNormal); + hlsl::float32_t3 normal = hlsl::float32_t3(0.f, 0.f, 0.f); + if (planeNormalLen2 > 0.f) + normal = hlsl::normalize(planeNormal); + + hlsl::float32_t3 attrNormal = {}; + if (decodeTriangleNormal(normalView, idx, attrNormal)) { + if (flipHandedness) + attrNormal.x = -attrNormal.x; + if (planeNormalLen2 > 0.f && hlsl::dot(attrNormal, planeNormal) < 0.f) + attrNormal = -attrNormal; + normal = attrNormal; + } + + std::array faceText = {}; + char* cursor = faceText.data(); + char* const end = faceText.data() + faceText.size(); + const std::array vertices = {vertex1, vertex2, vertex3}; + if (!appendLiteral(cursor, end, "facet normal ", + sizeof("facet normal ") - 1ull)) + return false; + if (!appendVectorAsAsciiLine(cursor, end, normal)) + return false; + if (!appendLiteral(cursor, end, " outer loop\n", + sizeof(" outer loop\n") - 1ull)) + return false; + for (const auto& vertex : vertices) + if (!appendLiteral(cursor, end, " vertex ", + sizeof(" vertex ") - 1ull) || + !appendVectorAsAsciiLine(cursor, end, vertex)) + return false; + if (!appendLiteral(cursor, end, " endloop\n", + sizeof(" endloop\n") - 1ull)) + return false; + if (!appendLiteral(cursor, end, "endfacet\n", sizeof("endfacet\n") - 1ull)) + return false; + + return context->write(faceText.data(), + static_cast(cursor - faceText.data())); + } +}; + +} CSTLMeshWriter::CSTLMeshWriter() { @@ -107,11 +704,12 @@ writer_flags_t CSTLMeshWriter::getForcedFlags() bool CSTLMeshWriter::writeAsset(system::IFile* _file, const SAssetWriteParams& _params, IAssetWriterOverride* _override) { + using Context = Parse::Context; + if (!_override) getDefaultOverride(_override); IAssetWriter::SAssetWriteContext inCtx{_params, _file}; - const asset::ICPUPolygonGeometry* geom = IAsset::castDown(_params.rootAsset); if (!geom) return false; @@ -120,8 +718,7 @@ bool CSTLMeshWriter::writeAsset(system::IFile* _file, const SAssetWriteParams& _ if (!file) return false; - SContext context = { IAssetWriter::SAssetWriteContext{ inCtx.params, file} }; - + Context context = {IAssetWriter::SAssetWriteContext{inCtx.params, file}}; _params.logger.log("WRITING STL: writing the file %s", system::ILogger::ELL_INFO, file->getFileName().string().c_str()); const auto flags = _override->getAssetWritingFlags(context.writeContext, geom, 0u); @@ -131,698 +728,36 @@ bool CSTLMeshWriter::writeAsset(system::IFile* _file, const SAssetWriteParams& _ bool sizeKnown = false; if (binary) { - expectedSize = stl_writer_detail::BinaryPrefixBytes + static_cast(geom->getPrimitiveCount()) * stl_writer_detail::BinaryTriangleRecordBytes; + expectedSize = Parse::BinaryPrefixBytes + static_cast(geom->getPrimitiveCount()) * Parse::BinaryTriangleRecordBytes; sizeKnown = true; } - const bool fileMappable = core::bitflag(file->getFlags()).hasAnyFlag(system::IFile::ECF_MAPPABLE); - context.ioPlan = SResolvedFileIOPolicy(_params.ioPolicy, expectedSize, sizeKnown, fileMappable); - if (!context.ioPlan.isValid()) - { - _params.logger.log("STL writer: invalid io policy for %s reason=%s", system::ILogger::ELL_ERROR, file->getFileName().string().c_str(), context.ioPlan.reason); + context.ioPlan = impl::SFileAccess::resolvePlan(_params.ioPolicy, expectedSize, sizeKnown, file); + if (impl::SIODiagnostics::logInvalidPlan(_params.logger, "STL writer", file->getFileName().string().c_str(), context.ioPlan)) return false; - } if (context.ioPlan.strategy == SResolvedFileIOPolicy::Strategy::WholeFile && sizeKnown) context.ioBuffer.reserve(static_cast(expectedSize)); else - context.ioBuffer.reserve(static_cast(std::min(context.ioPlan.chunkSizeBytes(), stl_writer_detail::IoFallbackReserveBytes))); + context.ioBuffer.reserve(static_cast(std::min(context.ioPlan.chunkSizeBytes(), Parse::IoFallbackReserveBytes))); - const bool written = binary ? writeMeshBinary(geom, &context) : writeMeshASCII(geom, &context); + const bool written = binary ? Parse::writeMeshBinary(geom, &context) : Parse::writeMeshASCII(geom, &context); if (!written) return false; - - const bool flushed = context.flush(); - if (!flushed) + if (!context.flush()) return false; const uint64_t ioMinWrite = context.writeTelemetry.getMinOrZero(); const uint64_t ioAvgWrite = context.writeTelemetry.getAvgOrZero(); - if (SInterchangeIO::isTinyIOTelemetryLikely(context.writeTelemetry, context.fileOffset, _params.ioPolicy)) - { - _params.logger.log( - "STL writer tiny-io guard: file=%s writes=%llu min=%llu avg=%llu", - system::ILogger::ELL_WARNING, - file->getFileName().string().c_str(), - static_cast(context.writeTelemetry.callCount), - static_cast(ioMinWrite), - static_cast(ioAvgWrite)); - } - _params.logger.log( - "STL writer stats: file=%s bytes=%llu binary=%d io_writes=%llu io_min_write=%llu io_avg_write=%llu io_req=%s io_eff=%s io_chunk=%llu io_reason=%s", - system::ILogger::ELL_PERFORMANCE, - file->getFileName().string().c_str(), - static_cast(context.fileOffset), - binary ? 1 : 0, - static_cast(context.writeTelemetry.callCount), - static_cast(ioMinWrite), - static_cast(ioAvgWrite), - system::to_string(_params.ioPolicy.strategy).c_str(), - system::to_string(context.ioPlan.strategy).c_str(), - static_cast(context.ioPlan.chunkSizeBytes()), - context.ioPlan.reason); - - return true; -} - -bool stl_writer_detail::SContext::flush() -{ - if (ioBuffer.empty()) - return true; - - size_t bytesWritten = 0ull; - const size_t totalBytes = ioBuffer.size(); - while (bytesWritten < totalBytes) - { - system::IFile::success_t success; - writeContext.outputFile->write( - success, - ioBuffer.data() + bytesWritten, - fileOffset + bytesWritten, - totalBytes - bytesWritten); - if (!success) - return false; - const size_t processed = success.getBytesProcessed(); - if (processed == 0ull) - return false; - writeTelemetry.account(processed); - bytesWritten += processed; - } - fileOffset += totalBytes; - ioBuffer.clear(); - return true; -} - -bool stl_writer_detail::SContext::write(const void* data, size_t size) -{ - if (!data && size != 0ull) - return false; - if (size == 0ull) - return true; - - const uint8_t* src = reinterpret_cast(data); - switch (ioPlan.strategy) - { - case SResolvedFileIOPolicy::Strategy::WholeFile: - { - const size_t oldSize = ioBuffer.size(); - ioBuffer.resize(oldSize + size); - std::memcpy(ioBuffer.data() + oldSize, src, size); - return true; - } - case SResolvedFileIOPolicy::Strategy::Chunked: - default: - { - const size_t chunkSize = static_cast(ioPlan.chunkSizeBytes()); - size_t remaining = size; - while (remaining > 0ull) - { - const size_t freeSpace = chunkSize - ioBuffer.size(); - const size_t toCopy = std::min(freeSpace, remaining); - const size_t oldSize = ioBuffer.size(); - ioBuffer.resize(oldSize + toCopy); - std::memcpy(ioBuffer.data() + oldSize, src, toCopy); - src += toCopy; - remaining -= toCopy; - - if (ioBuffer.size() == chunkSize) - { - if (!flush()) - return false; - } - } - return true; - } - } -} - -bool appendLiteral(char*& cursor, char* const end, const char* text, const size_t textSize) -{ - if (!cursor || cursor + textSize > end) - return false; - std::memcpy(cursor, text, textSize); - cursor += textSize; - return true; -} - -bool appendVectorAsAsciiLine(char*& cursor, char* const end, const hlsl::float32_t3& v) -{ - cursor = SGeometryWriterCommon::appendFloatToBuffer(cursor, end, v.x); - if (cursor >= end) - return false; - *(cursor++) = ' '; - cursor = SGeometryWriterCommon::appendFloatToBuffer(cursor, end, v.y); - if (cursor >= end) - return false; - *(cursor++) = ' '; - cursor = SGeometryWriterCommon::appendFloatToBuffer(cursor, end, v.z); - if (cursor >= end) - return false; - *(cursor++) = '\n'; - return true; -} - -bool decodeTriangle(const ICPUPolygonGeometry* geom, const IPolygonGeometryBase::IIndexingCallback* indexing, const ICPUPolygonGeometry::SDataView& posView, uint32_t primIx, hlsl::float32_t3& out0, hlsl::float32_t3& out1, hlsl::float32_t3& out2, uint32_t* outIdx) -{ - uint32_t idx[3] = {}; - const auto& indexView = geom->getIndexView(); - const void* indexBuffer = indexView ? indexView.getPointer() : nullptr; - const uint64_t indexSize = indexView ? indexView.composed.getStride() : 0u; - IPolygonGeometryBase::IIndexingCallback::SContext ctx = { - .indexBuffer = indexBuffer, - .indexSize = indexSize, - .beginPrimitive = primIx, - .endPrimitive = primIx + 1u, - .out = idx - }; - indexing->operator()(ctx); - if (outIdx) - { - outIdx[0] = idx[0]; - outIdx[1] = idx[1]; - outIdx[2] = idx[2]; - } - - hlsl::float32_t3 p0 = {}; - hlsl::float32_t3 p1 = {}; - hlsl::float32_t3 p2 = {}; - if (!posView.decodeElement(idx[0], p0)) - return false; - if (!posView.decodeElement(idx[1], p1)) - return false; - if (!posView.decodeElement(idx[2], p2)) - return false; - - out0 = p0; - out1 = p1; - out2 = p2; - return true; -} - -bool decodeTriangleNormal(const ICPUPolygonGeometry::SDataView& normalView, const uint32_t* idx, hlsl::float32_t3& outNormal) -{ - if (!normalView || !idx) - return false; - - hlsl::float32_t3 n0 = {}; - hlsl::float32_t3 n1 = {}; - hlsl::float32_t3 n2 = {}; - if (!normalView.decodeElement(idx[0], n0)) - return false; - if (!normalView.decodeElement(idx[1], n1)) - return false; - if (!normalView.decodeElement(idx[2], n2)) - return false; - - auto normal = n0; - if (normal.x == 0.f && normal.y == 0.f && normal.z == 0.f) - normal = n1; - if (normal.x == 0.f && normal.y == 0.f && normal.z == 0.f) - normal = n2; - if (normal.x == 0.f && normal.y == 0.f && normal.z == 0.f) - return false; - - outNormal = normal; - return true; -} - -double stlNormalizeColorComponentToUnit(double value) -{ - if (!std::isfinite(value)) - return 0.0; - if (value > 1.0) - value /= 255.0; - return std::clamp(value, 0.0, 1.0); -} - -uint16_t stlPackViscamColorFromB8G8R8A8(const uint32_t color) -{ - const void* src[4] = { &color, nullptr, nullptr, nullptr }; - uint16_t packed = 0u; - convertColor(src, &packed, 0u, 0u); - packed |= 0x8000u; - return packed; -} - -const ICPUPolygonGeometry::SDataView* stlGetColorView(const ICPUPolygonGeometry* geom, const size_t vertexCount) -{ - const auto* view = SGeometryWriterCommon::getAuxViewAt(geom, SSTLPolygonGeometryAuxLayout::COLOR0, vertexCount); - if (!view) - return nullptr; - return getFormatChannelCount(view->composed.format) >= 3u ? view : nullptr; -} - -bool stlDecodeColorB8G8R8A8(const ICPUPolygonGeometry::SDataView& colorView, const uint32_t ix, uint32_t& outColor) -{ - if (colorView.composed.format == EF_B8G8R8A8_UNORM && colorView.composed.getStride() == sizeof(uint32_t)) - { - const auto* const ptr = reinterpret_cast(colorView.getPointer()); - if (!ptr) - return false; - std::memcpy(&outColor, ptr + static_cast(ix) * sizeof(uint32_t), sizeof(outColor)); - return true; - } - - hlsl::float64_t4 decoded = {}; - if (!colorView.decodeElement(ix, decoded)) - return false; - const double rgbaUnit[4] = { - stlNormalizeColorComponentToUnit(decoded.x), - stlNormalizeColorComponentToUnit(decoded.y), - stlNormalizeColorComponentToUnit(decoded.z), - stlNormalizeColorComponentToUnit(decoded.w) - }; - encodePixels(&outColor, rgbaUnit); - return true; -} - -void stlDecodeColorUnitRGBAFromB8G8R8A8(const uint32_t color, double (&out)[4]) -{ - const void* src[4] = { &color, nullptr, nullptr, nullptr }; - decodePixels(src, out, 0u, 0u); -} - -bool writeMeshBinary(const asset::ICPUPolygonGeometry* geom, SContext* context) -{ - if (!geom || !context || !context->writeContext.outputFile) - return false; - - const auto& posView = geom->getPositionView(); - if (!posView) - return false; - - const bool flipHandedness = !context->writeContext.params.flags.hasAnyFlag(E_WRITER_FLAGS::EWF_MESH_IS_RIGHT_HANDED); - const size_t vertexCount = posView.getElementCount(); - if (vertexCount == 0ull) - return false; - - uint32_t facenum = 0u; - size_t faceCount = 0ull; - if (!SGeometryWriterCommon::getTriangleFaceCount(geom, faceCount)) - return false; - if (faceCount > static_cast(std::numeric_limits::max())) - return false; - facenum = static_cast(faceCount); - - const size_t outputSize = stl_writer_detail::BinaryPrefixBytes + static_cast(facenum) * stl_writer_detail::BinaryTriangleRecordBytes; - std::unique_ptr output(new (std::nothrow) uint8_t[outputSize]); - if (!output) - return false; - uint8_t* dst = output.get(); - - std::memset(dst, 0, stl_writer_detail::BinaryHeaderBytes); - dst += stl_writer_detail::BinaryHeaderBytes; - - std::memcpy(dst, &facenum, sizeof(facenum)); - dst += sizeof(facenum); - - const auto& normalView = geom->getNormalView(); - const bool hasNormals = static_cast(normalView); - const auto* const colorView = stlGetColorView(geom, vertexCount); - const hlsl::float32_t3* const tightPositions = SGeometryWriterCommon::getTightView(posView); - const hlsl::float32_t3* const tightNormals = hasNormals ? SGeometryWriterCommon::getTightView(normalView) : nullptr; - const bool hasImplicitTriangleIndices = !geom->getIndexView(); - - auto decodePosition = [&](const uint32_t ix, hlsl::float32_t3& out)->bool - { - if (tightPositions) - { - out = tightPositions[ix]; - return true; - } - return posView.decodeElement(ix, out); - }; - - auto decodeNormal = [&](const uint32_t ix, hlsl::float32_t3& out)->bool - { - if (!hasNormals) - return false; - if (tightNormals) - { - out = tightNormals[ix]; - return true; - } - return normalView.decodeElement(ix, out); - }; - auto computeFaceColor = [&](const uint32_t i0, const uint32_t i1, const uint32_t i2, uint16_t& outColor)->bool - { - outColor = 0u; - if (!colorView) - return true; - uint32_t c0 = 0u, c1 = 0u, c2 = 0u; - if (!stlDecodeColorB8G8R8A8(*colorView, i0, c0)) - return false; - if (!stlDecodeColorB8G8R8A8(*colorView, i1, c1)) - return false; - if (!stlDecodeColorB8G8R8A8(*colorView, i2, c2)) - return false; - double rgba0[4] = {}; - double rgba1[4] = {}; - double rgba2[4] = {}; - stlDecodeColorUnitRGBAFromB8G8R8A8(c0, rgba0); - stlDecodeColorUnitRGBAFromB8G8R8A8(c1, rgba1); - stlDecodeColorUnitRGBAFromB8G8R8A8(c2, rgba2); - const double rgbaAvg[4] = { - (rgba0[0] + rgba1[0] + rgba2[0]) / 3.0, - (rgba0[1] + rgba1[1] + rgba2[1]) / 3.0, - (rgba0[2] + rgba1[2] + rgba2[2]) / 3.0, - 1.0 - }; - uint32_t avgColor = 0u; - encodePixels(&avgColor, rgbaAvg); - outColor = stlPackViscamColorFromB8G8R8A8(avgColor); - return true; - }; - auto writeRecord = [&dst](const float nx, const float ny, const float nz, const float v1x, const float v1y, const float v1z, const float v2x, const float v2y, const float v2z, const float v3x, const float v3y, const float v3z, const uint16_t attribute)->void - { - const float payload[stl_writer_detail::BinaryTriangleFloatCount] = { - nx, ny, nz, - v1x, v1y, v1z, - v2x, v2y, v2z, - v3x, v3y, v3z - }; - std::memcpy(dst, payload, stl_writer_detail::BinaryTriangleFloatBytes); - dst += stl_writer_detail::BinaryTriangleFloatBytes; - std::memcpy(dst, &attribute, stl_writer_detail::BinaryTriangleAttributeBytes); - dst += stl_writer_detail::BinaryTriangleAttributeBytes; - }; - auto prepareVertices = [&](const hlsl::float32_t3& p0, const hlsl::float32_t3& p1, const hlsl::float32_t3& p2, hlsl::float32_t3& vertex1, hlsl::float32_t3& vertex2, hlsl::float32_t3& vertex3)->void - { - vertex1 = p2; - vertex2 = p1; - vertex3 = p0; - if (flipHandedness) - { - vertex1.x = -vertex1.x; - vertex2.x = -vertex2.x; - vertex3.x = -vertex3.x; - } - }; - auto computePlaneNormal = [&](const hlsl::float32_t3& vertex1, const hlsl::float32_t3& vertex2, const hlsl::float32_t3& vertex3)->hlsl::float32_t3 - { - const hlsl::float32_t3 planeNormal = hlsl::cross(vertex2 - vertex1, vertex3 - vertex1); - const float planeNormalLen2 = hlsl::dot(planeNormal, planeNormal); - return planeNormalLen2 > 0.f ? hlsl::normalize(planeNormal) : hlsl::float32_t3(0.f, 0.f, 0.f); - }; - - const bool hasFastTightPath = hasImplicitTriangleIndices && (tightPositions != nullptr) && (!hasNormals || (tightNormals != nullptr)); - if (hasFastTightPath && hasNormals) - { - bool allFastNormalsNonZero = true; - const size_t normalCount = static_cast(facenum) * 3ull; - for (size_t i = 0ull; i < normalCount; ++i) - { - const auto& n = tightNormals[i]; - if (n.x == 0.f && n.y == 0.f && n.z == 0.f) - { - allFastNormalsNonZero = false; - break; - } - } - - const hlsl::float32_t3* posTri = tightPositions; - const hlsl::float32_t3* nrmTri = tightNormals; - if (allFastNormalsNonZero) - { - for (uint32_t primIx = 0u; primIx < facenum; ++primIx, posTri += 3u, nrmTri += 3u) - { - uint16_t faceColor = 0u; - if (!computeFaceColor(primIx * 3u + 0u, primIx * 3u + 1u, primIx * 3u + 2u, faceColor)) - return false; - - hlsl::float32_t3 vertex1 = {}; - hlsl::float32_t3 vertex2 = {}; - hlsl::float32_t3 vertex3 = {}; - prepareVertices(posTri[0u], posTri[1u], posTri[2u], vertex1, vertex2, vertex3); - - hlsl::float32_t3 attrNormal = nrmTri[0u]; - if (flipHandedness) - attrNormal.x = -attrNormal.x; - - writeRecord( - attrNormal.x, attrNormal.y, attrNormal.z, - vertex1.x, vertex1.y, vertex1.z, - vertex2.x, vertex2.y, vertex2.z, - vertex3.x, vertex3.y, vertex3.z, - faceColor); - } - } - else - { - for (uint32_t primIx = 0u; primIx < facenum; ++primIx, posTri += 3u, nrmTri += 3u) - { - uint16_t faceColor = 0u; - if (!computeFaceColor(primIx * 3u + 0u, primIx * 3u + 1u, primIx * 3u + 2u, faceColor)) - return false; - - hlsl::float32_t3 vertex1 = {}; - hlsl::float32_t3 vertex2 = {}; - hlsl::float32_t3 vertex3 = {}; - prepareVertices(posTri[0u], posTri[1u], posTri[2u], vertex1, vertex2, vertex3); - - hlsl::float32_t3 normal = hlsl::float32_t3(0.f, 0.f, 0.f); - hlsl::float32_t3 attrNormal = nrmTri[0u]; - if (attrNormal.x == 0.f && attrNormal.y == 0.f && attrNormal.z == 0.f) - attrNormal = nrmTri[1u]; - if (attrNormal.x == 0.f && attrNormal.y == 0.f && attrNormal.z == 0.f) - attrNormal = nrmTri[2u]; - if (!(attrNormal.x == 0.f && attrNormal.y == 0.f && attrNormal.z == 0.f)) - { - if (flipHandedness) - attrNormal.x = -attrNormal.x; - normal = attrNormal; - } - - if (normal.x == 0.f && normal.y == 0.f && normal.z == 0.f) - normal = computePlaneNormal(vertex1, vertex2, vertex3); - - writeRecord( - normal.x, normal.y, normal.z, - vertex1.x, vertex1.y, vertex1.z, - vertex2.x, vertex2.y, vertex2.z, - vertex3.x, vertex3.y, vertex3.z, - faceColor); - } - } - } - else if (hasFastTightPath) - { - const hlsl::float32_t3* posTri = tightPositions; - for (uint32_t primIx = 0u; primIx < facenum; ++primIx, posTri += 3u) - { - uint16_t faceColor = 0u; - if (!computeFaceColor(primIx * 3u + 0u, primIx * 3u + 1u, primIx * 3u + 2u, faceColor)) - return false; - - hlsl::float32_t3 vertex1 = {}; - hlsl::float32_t3 vertex2 = {}; - hlsl::float32_t3 vertex3 = {}; - prepareVertices(posTri[0u], posTri[1u], posTri[2u], vertex1, vertex2, vertex3); - const hlsl::float32_t3 normal = computePlaneNormal(vertex1, vertex2, vertex3); - - writeRecord( - normal.x, normal.y, normal.z, - vertex1.x, vertex1.y, vertex1.z, - vertex2.x, vertex2.y, vertex2.z, - vertex3.x, vertex3.y, vertex3.z, - faceColor); - } - } - else - { - if (!SGeometryWriterCommon::visitTriangleIndices(geom, [&](const uint32_t i0, const uint32_t i1, const uint32_t i2)->bool - { - uint16_t faceColor = 0u; - if (!computeFaceColor(i0, i1, i2, faceColor)) - return false; - - hlsl::float32_t3 p0 = {}; - hlsl::float32_t3 p1 = {}; - hlsl::float32_t3 p2 = {}; - if (!decodePosition(i0, p0) || !decodePosition(i1, p1) || !decodePosition(i2, p2)) - return false; - - hlsl::float32_t3 vertex1 = p2; - hlsl::float32_t3 vertex2 = p1; - hlsl::float32_t3 vertex3 = p0; - - if (flipHandedness) - { - vertex1.x = -vertex1.x; - vertex2.x = -vertex2.x; - vertex3.x = -vertex3.x; - } - - const hlsl::float32_t3 planeNormal = hlsl::cross(vertex2 - vertex1, vertex3 - vertex1); - const float planeNormalLen2 = hlsl::dot(planeNormal, planeNormal); - hlsl::float32_t3 normal = hlsl::float32_t3(0.f, 0.f, 0.f); - if (!hasNormals) - { - if (planeNormalLen2 > 0.f) - normal = hlsl::normalize(planeNormal); - } - - if (hasNormals) - { - hlsl::float32_t3 n0 = {}; - if (!decodeNormal(i0, n0)) - return false; - - hlsl::float32_t3 attrNormal = n0; - if (hlsl::dot(attrNormal, attrNormal) <= 0.f) - { - hlsl::float32_t3 n1 = {}; - if (!decodeNormal(i1, n1)) - return false; - attrNormal = n1; - } - if (hlsl::dot(attrNormal, attrNormal) <= 0.f) - { - hlsl::float32_t3 n2 = {}; - if (!decodeNormal(i2, n2)) - return false; - attrNormal = n2; - } - - if (hlsl::dot(attrNormal, attrNormal) > 0.f) - { - if (flipHandedness) - attrNormal.x = -attrNormal.x; - if (planeNormalLen2 > 0.f && hlsl::dot(attrNormal, planeNormal) < 0.f) - attrNormal = -attrNormal; - normal = attrNormal; - } - else if (planeNormalLen2 > 0.f) - { - normal = hlsl::normalize(planeNormal); - } - } - - writeRecord( - normal.x, normal.y, normal.z, - vertex1.x, vertex1.y, vertex1.z, - vertex2.x, vertex2.y, vertex2.z, - vertex3.x, vertex3.y, vertex3.z, - faceColor); - return true; - })) - return false; - } - - const bool writeOk = SInterchangeIO::writeFileWithPolicy(context->writeContext.outputFile, context->ioPlan, output.get(), outputSize, &context->writeTelemetry); - if (writeOk) - context->fileOffset += outputSize; - return writeOk; -} - -bool writeMeshASCII(const asset::ICPUPolygonGeometry* geom, SContext* context) -{ - if (!geom) - return false; - - const auto* indexing = geom->getIndexingCallback(); - if (!indexing || indexing->degree() != 3u) - return false; - - const auto& posView = geom->getPositionView(); - if (!posView) - return false; - const auto& normalView = geom->getNormalView(); - const bool flipHandedness = !context->writeContext.params.flags.hasAnyFlag(E_WRITER_FLAGS::EWF_MESH_IS_RIGHT_HANDED); - - const std::string name = context->writeContext.outputFile->getFileName().filename().replace_extension().string(); - const std::string_view solidName = name.empty() ? std::string_view(stl_writer_detail::AsciiDefaultName) : std::string_view(name); - - if (!context->write(stl_writer_detail::AsciiSolidPrefix, sizeof(stl_writer_detail::AsciiSolidPrefix) - 1ull)) - return false; - - if (!context->write(solidName.data(), solidName.size())) - return false; - - if (!context->write("\n", sizeof("\n") - 1ull)) - return false; - - const uint32_t faceCount = static_cast(geom->getPrimitiveCount()); - for (uint32_t primIx = 0u; primIx < faceCount; ++primIx) - { - hlsl::float32_t3 v0 = {}; - hlsl::float32_t3 v1 = {}; - hlsl::float32_t3 v2 = {}; - uint32_t idx[3] = {}; - if (!decodeTriangle(geom, indexing, posView, primIx, v0, v1, v2, idx)) - return false; - if (!writeFaceText(v0, v1, v2, idx, normalView, flipHandedness, context)) - return false; - if (!context->write("\n", sizeof("\n") - 1ull)) - return false; - } - - if (!context->write(stl_writer_detail::AsciiEndSolidPrefix, sizeof(stl_writer_detail::AsciiEndSolidPrefix) - 1ull)) - return false; - - if (!context->write(solidName.data(), solidName.size())) - return false; + impl::SIODiagnostics::logTinyIO(_params.logger, "STL writer", file->getFileName().string().c_str(), context.writeTelemetry, context.fileOffset, _params.ioPolicy, "writes"); + _params.logger.log("STL writer stats: file=%s bytes=%llu binary=%d io_writes=%llu io_min_write=%llu io_avg_write=%llu io_req=%s io_eff=%s io_chunk=%llu io_reason=%s", + system::ILogger::ELL_PERFORMANCE, file->getFileName().string().c_str(), static_cast(context.fileOffset), binary ? 1 : 0, + static_cast(context.writeTelemetry.callCount), static_cast(ioMinWrite), static_cast(ioAvgWrite), + system::to_string(_params.ioPolicy.strategy).c_str(), system::to_string(context.ioPlan.strategy).c_str(), static_cast(context.ioPlan.chunkSizeBytes()), context.ioPlan.reason); return true; } -bool writeFaceText( - const hlsl::float32_t3& v1, - const hlsl::float32_t3& v2, - const hlsl::float32_t3& v3, - const uint32_t* idx, - const asset::ICPUPolygonGeometry::SDataView& normalView, - const bool flipHandedness, - SContext* context) -{ - hlsl::float32_t3 vertex1 = v3; - hlsl::float32_t3 vertex2 = v2; - hlsl::float32_t3 vertex3 = v1; - - if (flipHandedness) - { - vertex1.x = -vertex1.x; - vertex2.x = -vertex2.x; - vertex3.x = -vertex3.x; - } - - const hlsl::float32_t3 planeNormal = hlsl::cross(vertex2 - vertex1, vertex3 - vertex1); - const float planeNormalLen2 = hlsl::dot(planeNormal, planeNormal); - hlsl::float32_t3 normal = hlsl::float32_t3(0.f, 0.f, 0.f); - if (planeNormalLen2 > 0.f) - normal = hlsl::normalize(planeNormal); - - hlsl::float32_t3 attrNormal = {}; - if (decodeTriangleNormal(normalView, idx, attrNormal)) - { - if (flipHandedness) - attrNormal.x = -attrNormal.x; - if (planeNormalLen2 > 0.f && hlsl::dot(attrNormal, planeNormal) < 0.f) - attrNormal = -attrNormal; - normal = attrNormal; - } - - std::array faceText = {}; - char* cursor = faceText.data(); - char* const end = faceText.data() + faceText.size(); - const hlsl::float32_t3 vertices[3] = { vertex1, vertex2, vertex3 }; - if (!appendLiteral(cursor, end, "facet normal ", sizeof("facet normal ") - 1ull)) - return false; - if (!appendVectorAsAsciiLine(cursor, end, normal)) - return false; - if (!appendLiteral(cursor, end, " outer loop\n", sizeof(" outer loop\n") - 1ull)) - return false; - for (const auto& vertex : vertices) - if (!appendLiteral(cursor, end, " vertex ", sizeof(" vertex ") - 1ull) || !appendVectorAsAsciiLine(cursor, end, vertex)) - return false; - if (!appendLiteral(cursor, end, " endloop\n", sizeof(" endloop\n") - 1ull)) - return false; - if (!appendLiteral(cursor, end, "endfacet\n", sizeof("endfacet\n") - 1ull)) - return false; - - return context->write(faceText.data(), static_cast(cursor - faceText.data())); -} - } #endif diff --git a/src/nbl/asset/interchange/SGeometryViewDecode.h b/src/nbl/asset/interchange/SGeometryViewDecode.h new file mode 100644 index 0000000000..5847f3ea7f --- /dev/null +++ b/src/nbl/asset/interchange/SGeometryViewDecode.h @@ -0,0 +1,106 @@ +// Internal src-only header. +// Do not include from public headers. +#ifndef _NBL_ASSET_S_GEOMETRY_VIEW_DECODE_H_INCLUDED_ +#define _NBL_ASSET_S_GEOMETRY_VIEW_DECODE_H_INCLUDED_ + +#include "nbl/asset/ICPUPolygonGeometry.h" +#include "nbl/asset/format/decodePixels.h" +#include "nbl/builtin/hlsl/array_accessors.hlsl" +#include "nbl/builtin/hlsl/vector_utils/vector_traits.hlsl" + +#include +#include +#include + + +namespace nbl::asset +{ + +class SGeometryViewDecode +{ + public: + enum class EMode : uint8_t + { + Cooked, + Raw + }; + + template + static inline bool decodeElement(const ICPUPolygonGeometry::SDataView& view, const size_t ix, Out& out) + { + using scalar_t = typename STraits::scalar_type; + + out = {}; + if (!view.composed.isFormatted()) + return false; + + const void* const src = view.getPointer(ix); + if (!src) + return false; + + std::array srcArr = {src}; + std::array tmp = {}; + if (!decodePixels(view.composed.format, srcArr.data(), tmp.data(), 0u, 0u)) + return false; + + const uint32_t channels = std::min(STraits::Dimension, getFormatChannelCount(view.composed.format)); + if constexpr (Mode == EMode::Cooked && std::is_floating_point_v) + { + if (isNormalizedFormat(view.composed.format)) + { + const auto range = view.composed.getRange>(); + for (uint32_t i = 0u; i < channels; ++i) + tmp[i] = static_cast(tmp[i] * (range.maxVx[i] - range.minVx[i]) + range.minVx[i]); + } + } + + for (uint32_t i = 0u; i < channels; ++i) + STraits::set(out, i, tmp[i]); + return true; + } + + private: + template + struct SIsStdArray : std::false_type {}; + + template + struct SIsStdArray> : std::true_type {}; + + template + struct SHasVectorTraits : std::false_type {}; + + template + struct SHasVectorTraits::scalar_type>> : std::true_type {}; + + template::value, bool IsVector = (!IsStdArray && SHasVectorTraits::value)> + struct STraits; + + template + struct STraits + { + using scalar_type = typename Out::value_type; + static constexpr uint32_t Dimension = std::tuple_size_v; + + static inline void set(Out& out, const uint32_t ix, const scalar_type value) + { + out[ix] = value; + } + }; + + template + struct STraits + { + using scalar_type = typename hlsl::vector_traits::scalar_type; + static constexpr uint32_t Dimension = hlsl::vector_traits::Dimension; + + static inline void set(Out& out, const uint32_t ix, const scalar_type value) + { + hlsl::array_set setter; + setter(out, ix, value); + } + }; +}; + +} + +#endif diff --git a/src/nbl/asset/interchange/impl/SBinaryData.h b/src/nbl/asset/interchange/impl/SBinaryData.h new file mode 100644 index 0000000000..76107f8714 --- /dev/null +++ b/src/nbl/asset/interchange/impl/SBinaryData.h @@ -0,0 +1,51 @@ +// Internal src-only header. +// Do not include from public headers. +#ifndef _NBL_ASSET_IMPL_S_BINARY_DATA_H_INCLUDED_ +#define _NBL_ASSET_IMPL_S_BINARY_DATA_H_INCLUDED_ + +#include +#include +#include + + +namespace nbl::asset::impl +{ + +struct BinaryData +{ + template + static inline T byteswap(const T value) + { + auto retval = value; + const auto* it = reinterpret_cast(&value); + std::reverse_copy(it, it + sizeof(retval), reinterpret_cast(&retval)); + return retval; + } + + template + static inline T loadUnaligned(const void* src, const bool swapEndian = false) + { + T value = {}; + if (!src) + return value; + std::memcpy(&value, src, sizeof(value)); + return swapEndian ? byteswap(value) : value; + } + + template + static inline void storeUnaligned(void* dst, const T& value) + { + std::memcpy(dst, &value, sizeof(value)); + } + + template + static inline void storeUnalignedAdvance(uint8_t*& dst, const T& value) + { + storeUnaligned(dst, value); + dst += sizeof(value); + } +}; + +} + +#endif diff --git a/src/nbl/asset/interchange/impl/SFileAccess.h b/src/nbl/asset/interchange/impl/SFileAccess.h new file mode 100644 index 0000000000..540d2b3459 --- /dev/null +++ b/src/nbl/asset/interchange/impl/SFileAccess.h @@ -0,0 +1,60 @@ +// Internal src-only header. +// Do not include from public headers. +#ifndef _NBL_ASSET_IMPL_S_FILE_ACCESS_H_INCLUDED_ +#define _NBL_ASSET_IMPL_S_FILE_ACCESS_H_INCLUDED_ + +#include "nbl/core/declarations.h" +#include "nbl/asset/interchange/SInterchangeIO.h" + + +namespace nbl::asset::impl +{ + +class SFileAccess +{ + public: + static inline bool isMappable(const system::IFile* file) + { + return file && core::bitflag(file->getFlags()).hasAnyFlag(system::IFile::ECF_MAPPABLE); + } + + static inline SResolvedFileIOPolicy resolvePlan(const SFileIOPolicy& ioPolicy, const uint64_t payloadBytes, const bool sizeKnown, const system::IFile* file) + { + return SResolvedFileIOPolicy(ioPolicy, payloadBytes, sizeKnown, isMappable(file)); + } + + static inline const uint8_t* readRange(system::IFile* file, const size_t offset, const size_t bytes, core::vector& storage, const SResolvedFileIOPolicy& ioPlan, SFileReadTelemetry* ioTelemetry = nullptr, const bool zeroTerminate = false) + { + storage.resize(bytes + (zeroTerminate ? 1ull : 0ull), 0u); + if (!SInterchangeIO::readFileWithPolicy(file, storage.data(), offset, bytes, ioPlan, ioTelemetry)) + return nullptr; + if (zeroTerminate) + storage[bytes] = 0u; + return storage.data(); + } + + static inline const uint8_t* mapOrReadWholeFile(system::IFile* file, const size_t bytes, core::vector& storage, const SResolvedFileIOPolicy& ioPlan, SFileReadTelemetry* ioTelemetry = nullptr, bool* wasMapped = nullptr, const bool zeroTerminate = false) + { + if (wasMapped) + *wasMapped = false; + + if (ioPlan.strategy == SResolvedFileIOPolicy::Strategy::WholeFile) + { + const auto* mapped = reinterpret_cast(static_cast(file)->getMappedPointer()); + if (mapped) + { + if (ioTelemetry) + ioTelemetry->account(bytes); + if (wasMapped) + *wasMapped = true; + return mapped; + } + } + + return readRange(file, 0ull, bytes, storage, ioPlan, ioTelemetry, zeroTerminate); + } +}; + +} + +#endif diff --git a/src/nbl/asset/interchange/impl/SIODiagnostics.h b/src/nbl/asset/interchange/impl/SIODiagnostics.h new file mode 100644 index 0000000000..eede03dae3 --- /dev/null +++ b/src/nbl/asset/interchange/impl/SIODiagnostics.h @@ -0,0 +1,40 @@ +// Internal src-only header. +// Do not include from public headers. +#ifndef _NBL_ASSET_IMPL_S_IO_DIAGNOSTICS_H_INCLUDED_ +#define _NBL_ASSET_IMPL_S_IO_DIAGNOSTICS_H_INCLUDED_ + +#include "nbl/asset/interchange/SInterchangeIO.h" +#include "nbl/system/ILogger.h" + + +namespace nbl::asset::impl +{ + +class SIODiagnostics +{ + public: + template + static inline bool logInvalidPlan(Logger& logger, const char* const owner, const char* const fileName, const SResolvedFileIOPolicy& ioPlan) + { + if (ioPlan.isValid()) + return false; + logger.log("%s: invalid io policy for %s reason=%s", system::ILogger::ELL_ERROR, owner, fileName, ioPlan.reason); + return true; + } + + template + static inline void logTinyIO(Logger& logger, const char* const owner, const char* const fileName, const SInterchangeIO::STelemetry& telemetry, const uint64_t payloadBytes, const SFileIOPolicy& ioPolicy, const char* const opName) + { + if (!SInterchangeIO::isTinyIOTelemetryLikely(telemetry, payloadBytes, ioPolicy)) + return; + logger.log("%s tiny-io guard: file=%s %s=%llu min=%llu avg=%llu", + system::ILogger::ELL_WARNING, owner, fileName, opName, + static_cast(telemetry.callCount), + static_cast(telemetry.getMinOrZero()), + static_cast(telemetry.getAvgOrZero())); + } +}; + +} + +#endif diff --git a/src/nbl/asset/interchange/impl/STextParse.h b/src/nbl/asset/interchange/impl/STextParse.h new file mode 100644 index 0000000000..45e8bd405d --- /dev/null +++ b/src/nbl/asset/interchange/impl/STextParse.h @@ -0,0 +1,171 @@ +// Internal src-only header. +// Do not include from public headers. +#ifndef _NBL_ASSET_IMPL_S_TEXT_PARSE_H_INCLUDED_ +#define _NBL_ASSET_IMPL_S_TEXT_PARSE_H_INCLUDED_ + +#include "nbl/core/string/stringutil.h" + +#include +#include +#include +#include +#include +#include + +#include + + +namespace nbl::asset::impl +{ + +struct TextParse +{ + template + static inline bool parseNumber(const char*& ptr, const char* const end, T& out) + { + static_assert(std::is_arithmetic_v); + if constexpr (std::is_floating_point_v) + { + const auto parseResult = fast_float::from_chars(ptr, end, out); + if (parseResult.ec != std::errc() || parseResult.ptr == ptr) + return false; + ptr = parseResult.ptr; + return true; + } + else + { + const auto parseResult = std::from_chars(ptr, end, out); + if (parseResult.ec != std::errc() || parseResult.ptr == ptr) + return false; + ptr = parseResult.ptr; + return true; + } + } + + template + static inline bool parseExactNumber(const char* const begin, const char* const end, T& out) + { + auto ptr = begin; + return parseNumber(ptr, end, out) && ptr == end; + } + + template + static inline bool parseExactNumber(const std::string_view token, T& out) + { + return parseExactNumber(token.data(), token.data() + token.size(), out); + } + + template + static inline bool parseNonZeroNumber(const char*& ptr, const char* const end, T& out) + { + return parseNumber(ptr, end, out) && out != static_cast(0); + } + + static inline bool isInlineWhitespace(const char c) + { + return c == ' ' || c == '\t' || c == '\v' || c == '\f'; + } + + static inline void skipInlineWhitespace(const char*& ptr, const char* const end) + { + while (ptr < end && isInlineWhitespace(*ptr)) + ++ptr; + } + + static inline void skipWhitespace(const char*& ptr, const char* const end) + { + while (ptr < end && core::isspace(*ptr)) + ++ptr; + } + + static inline std::string_view trimInlineWhitespace(std::string_view token) + { + while (!token.empty() && isInlineWhitespace(token.front())) + token.remove_prefix(1ull); + while (!token.empty() && isInlineWhitespace(token.back())) + token.remove_suffix(1ull); + return token; + } + + static inline std::string_view trimWhitespace(std::string_view token) + { + while (!token.empty() && core::isspace(token.front())) + token.remove_prefix(1ull); + while (!token.empty() && core::isspace(token.back())) + token.remove_suffix(1ull); + return token; + } + + static inline bool tokenEqualsIgnoreCase(const std::string_view lhs, const std::string_view rhs) + { + if (lhs.size() != rhs.size()) + return false; + for (size_t i = 0ull; i < lhs.size(); ++i) + { + const auto l = static_cast(std::tolower(static_cast(lhs[i]))); + const auto r = static_cast(std::tolower(static_cast(rhs[i]))); + if (l != r) + return false; + } + return true; + } + + static inline std::optional readToken(const char*& cursor, const char* const end) + { + skipWhitespace(cursor, end); + if (cursor >= end) + return std::nullopt; + + const auto* tokenEnd = cursor; + while (tokenEnd < end && !core::isspace(*tokenEnd)) + ++tokenEnd; + + const std::string_view token(cursor, static_cast(tokenEnd - cursor)); + cursor = tokenEnd; + return token; + } + + struct Cursor + { + const char* current = nullptr; + const char* end = nullptr; + + inline Cursor() = default; + inline Cursor(const char* begin, const char* endPtr) : current(begin), end(endPtr) {} + + inline void skipWhitespace() + { + TextParse::skipWhitespace(current, end); + } + + inline void skipInlineWhitespace() + { + TextParse::skipInlineWhitespace(current, end); + } + + inline std::optional readToken() + { + return TextParse::readToken(current, end); + } + + template + inline bool parseNumber(T& out) + { + TextParse::skipWhitespace(current, end); + return TextParse::parseNumber(current, end, out); + } + + template + inline std::optional readNumber() + { + T value = {}; + if (!parseNumber(value)) + return std::nullopt; + return value; + } + }; +}; + +} + +#endif From 68cb7e65cd7ed8056ad1a87da43b57b3a2d78f94 Mon Sep 17 00:00:00 2001 From: Arkadiusz Lachowicz Date: Sun, 8 Mar 2026 13:49:09 +0100 Subject: [PATCH 074/118] Refine mesh interchange cleanup --- src/nbl/asset/interchange/COBJMeshWriter.cpp | 34 ++-- .../asset/interchange/CPLYMeshFileLoader.cpp | 2 +- .../asset/interchange/CPLYMeshFileLoader.h | 2 - src/nbl/asset/interchange/CPLYMeshWriter.cpp | 82 ++++++--- .../asset/interchange/CSTLMeshFileLoader.cpp | 3 +- src/nbl/asset/interchange/CSTLMeshWriter.cpp | 3 +- .../asset/interchange/SGeometryViewDecode.h | 160 +++++++++++------- src/nbl/asset/interchange/impl/SBinaryData.h | 1 + src/nbl/asset/interchange/impl/SFileAccess.h | 1 + .../asset/interchange/impl/SIODiagnostics.h | 1 + src/nbl/asset/interchange/impl/STextParse.h | 2 + 11 files changed, 184 insertions(+), 107 deletions(-) diff --git a/src/nbl/asset/interchange/COBJMeshWriter.cpp b/src/nbl/asset/interchange/COBJMeshWriter.cpp index 1c088a99e0..5b2822c34b 100644 --- a/src/nbl/asset/interchange/COBJMeshWriter.cpp +++ b/src/nbl/asset/interchange/COBJMeshWriter.cpp @@ -1,3 +1,4 @@ +#ifdef _NBL_COMPILE_WITH_OBJ_WRITER_ // Copyright (C) 2018-2025 - DevSH Graphics Programming Sp. z O.O. // This file is part of the "Nabla Engine". // For conditions of distribution and use, see copyright notice in nabla.h @@ -12,8 +13,6 @@ #include "nbl/builtin/hlsl/array_accessors.hlsl" #include "nbl/builtin/hlsl/vector_utils/vector_traits.hlsl" -#ifdef _NBL_COMPILE_WITH_OBJ_WRITER_ - #include "nbl/system/IFile.h" #include @@ -212,7 +211,7 @@ bool COBJMeshWriter::writeAsset(system::IFile* _file, const SAssetWriteParams& _ uint32_t positionBase = 1u; uint32_t uvBase = 1u; uint32_t normalBase = 1u; - hlsl::float64_t4 tmp = {}; + using SemanticDecode = SGeometryViewDecode::Prepared; for (size_t itemIx = 0u; itemIx < items.size(); ++itemIx) { const auto& item = items[itemIx]; @@ -254,6 +253,9 @@ bool COBJMeshWriter::writeAsset(system::IFile* _file, const SAssetWriteParams& _ const hlsl::float32_t3* const tightPositions = SGeometryWriterCommon::getTightView(positionView); const hlsl::float32_t3* const tightNormals = hasNormals ? SGeometryWriterCommon::getTightView(normalView) : nullptr; const hlsl::float32_t2* const tightUV = hasUVs ? SGeometryWriterCommon::getTightView(*uvView) : nullptr; + const SemanticDecode positionDecode = tightPositions ? SemanticDecode{} : SGeometryViewDecode::prepare(positionView); + const SemanticDecode uvDecode = (!hasUVs || tightUV) ? SemanticDecode{} : SGeometryViewDecode::prepare(*uvView); + const SemanticDecode normalDecode = (!hasNormals || tightNormals) ? SemanticDecode{} : SGeometryViewDecode::prepare(normalView); if (itemIx != 0u) output.push_back('\n'); @@ -264,12 +266,8 @@ bool COBJMeshWriter::writeAsset(system::IFile* _file, const SAssetWriteParams& _ hlsl::float32_t3 vertex = {}; if (tightPositions) vertex = tightPositions[i]; - else - { - if (!SGeometryViewDecode::decodeElement(positionView, i, tmp)) - return false; - vertex = hlsl::float32_t3(static_cast(tmp.x), static_cast(tmp.y), static_cast(tmp.z)); - } + else if (!positionDecode.decode(i, vertex)) + return false; vertex = Parse::applyPosition(transformState, vertex); if (flipHandedness) vertex.x = -vertex.x; @@ -283,12 +281,10 @@ bool COBJMeshWriter::writeAsset(system::IFile* _file, const SAssetWriteParams& _ hlsl::float32_t2 uv = {}; if (tightUV) uv = hlsl::float32_t2(tightUV[i].x, 1.f - tightUV[i].y); - else - { - if (!SGeometryViewDecode::decodeElement(*uvView, i, tmp)) - return false; - uv = hlsl::float32_t2(static_cast(tmp.x), 1.f - static_cast(tmp.y)); - } + else if (!uvDecode.decode(i, uv)) + return false; + if (!tightUV) + uv.y = 1.f - uv.y; Parse::appendVecLine(output, "vt ", sizeof("vt ") - 1ull, uv); } } @@ -300,12 +296,8 @@ bool COBJMeshWriter::writeAsset(system::IFile* _file, const SAssetWriteParams& _ hlsl::float32_t3 normal = {}; if (tightNormals) normal = tightNormals[i]; - else - { - if (!SGeometryViewDecode::decodeElement(normalView, i, tmp)) - return false; - normal = hlsl::float32_t3(static_cast(tmp.x), static_cast(tmp.y), static_cast(tmp.z)); - } + else if (!normalDecode.decode(i, normal)) + return false; normal = Parse::applyNormal(transformState, normal); if (flipHandedness) normal.x = -normal.x; diff --git a/src/nbl/asset/interchange/CPLYMeshFileLoader.cpp b/src/nbl/asset/interchange/CPLYMeshFileLoader.cpp index c5d2f11217..73784e58ec 100644 --- a/src/nbl/asset/interchange/CPLYMeshFileLoader.cpp +++ b/src/nbl/asset/interchange/CPLYMeshFileLoader.cpp @@ -1,8 +1,8 @@ +#ifdef _NBL_COMPILE_WITH_PLY_LOADER_ // Copyright (C) 2018-2025 - DevSH Graphics Programming Sp. z O.O. // This file is part of the "Nabla Engine" and was originally part of the "Irrlicht Engine" // For conditions of distribution and use, see copyright notice in nabla.h // See the original file in irrlicht source for authors -#ifdef _NBL_COMPILE_WITH_PLY_LOADER_ #include "CPLYMeshFileLoader.h" #include "SPLYPolygonGeometryAuxLayout.h" diff --git a/src/nbl/asset/interchange/CPLYMeshFileLoader.h b/src/nbl/asset/interchange/CPLYMeshFileLoader.h index 4d7b849e2e..50ecf06555 100644 --- a/src/nbl/asset/interchange/CPLYMeshFileLoader.h +++ b/src/nbl/asset/interchange/CPLYMeshFileLoader.h @@ -4,7 +4,6 @@ // See the original file in irrlicht source for authors #ifndef _NBL_ASSET_C_PLY_MESH_FILE_LOADER_H_INCLUDED_ #define _NBL_ASSET_C_PLY_MESH_FILE_LOADER_H_INCLUDED_ -#ifdef _NBL_COMPILE_WITH_PLY_LOADER_ #include "nbl/core/declarations.h" @@ -29,4 +28,3 @@ class CPLYMeshFileLoader final : public IGeometryLoader } // end namespace nbl::asset #endif -#endif diff --git a/src/nbl/asset/interchange/CPLYMeshWriter.cpp b/src/nbl/asset/interchange/CPLYMeshWriter.cpp index 4f8cbbf866..9a69e5a1c7 100644 --- a/src/nbl/asset/interchange/CPLYMeshWriter.cpp +++ b/src/nbl/asset/interchange/CPLYMeshWriter.cpp @@ -1,3 +1,4 @@ +#ifdef _NBL_COMPILE_WITH_PLY_WRITER_ // Copyright (C) 2018-2025 - DevSH Graphics Programming Sp. z O.O. // This file is part of the "Nabla Engine" and was originally part of the "Irrlicht Engine" // For conditions of distribution and use, see copyright notice in nabla.h @@ -12,8 +13,6 @@ #include "impl/SFileAccess.h" #include "impl/SIODiagnostics.h" -#ifdef _NBL_COMPILE_WITH_PLY_WRITER_ - #include "nbl/system/IFile.h" #include @@ -59,6 +58,8 @@ namespace struct Parse { using Binary = impl::BinaryData; + using SemanticDecode = SGeometryViewDecode::Prepared; + using StoredDecode = SGeometryViewDecode::Prepared; enum class ScalarType : uint8_t { @@ -103,6 +104,34 @@ struct Parse bool flipVectors = false; }; + struct PreparedView + { + // Per-attribute emission state cached once before the vertex loop. + // Floats use semantic decode, integer payloads use stored decode. + uint32_t components = 0u; + ScalarType scalarType = ScalarType::Float32; + bool flipVectors = false; + SemanticDecode semantic = {}; + StoredDecode stored = {}; + + inline explicit operator bool() const + { + return getScalarMeta(scalarType).integer ? static_cast(stored) : static_cast(semantic); + } + + static PreparedView create(const ICPUPolygonGeometry::SDataView* view, const uint32_t components, const ScalarType scalarType, const bool flipVectors) + { + PreparedView retval = {.components = components, .scalarType = scalarType, .flipVectors = flipVectors}; + if (!view) + return retval; + if (getScalarMeta(scalarType).integer) + retval.stored = SGeometryViewDecode::prepare(*view); + else + retval.semantic = SGeometryViewDecode::prepare(*view); + return retval; + } + }; + static constexpr size_t ApproxTextBytesPerVertex = sizeof("0.000000 0.000000 0.000000 0.000000 0.000000 0.000000\n") - 1ull; static constexpr size_t ApproxTextBytesPerFace = sizeof("3 4294967295 4294967295 4294967295\n") - 1ull; static constexpr size_t MaxFloatTextChars = std::numeric_limits::max_digits10 + 16ull; @@ -245,15 +274,15 @@ struct Parse } }; - template - static bool emitDecodedView(Sink& sink, const ICPUPolygonGeometry::SDataView& view, const size_t ix, const uint32_t componentCount, const bool flipVectors) + template + static bool emitDecodedView(Sink& sink, const SGeometryViewDecode::Prepared& view, const size_t ix, const uint32_t componentCount, const bool flipVectors) { - std::array decoded = {}; - if (!SGeometryViewDecode::decodeElement, Mode>(view, ix, decoded)) + std::array decoded = {}; + if (!view.decode(ix, decoded)) return false; for (uint32_t c = 0u; c < componentCount; ++c) { - OutT value = static_cast(decoded[c]); + OutT value = decoded[c]; if constexpr (std::is_signed_v || std::is_floating_point_v) { if (flipVectors && c == 0u) @@ -266,18 +295,20 @@ struct Parse } template - static bool emitView(Sink& sink, const ICPUPolygonGeometry::SDataView& view, const size_t ix, const uint32_t componentCount, const ScalarType scalarType, const bool flipVectors) + static bool emitView(Sink& sink, const PreparedView& view, const size_t ix) { - switch (scalarType) + if (!view) + return false; + switch (view.scalarType) { - case ScalarType::Float64: return emitDecodedView(sink, view, ix, componentCount, flipVectors); - case ScalarType::Float32: return emitDecodedView(sink, view, ix, componentCount, flipVectors); - case ScalarType::Int8: return emitDecodedView(sink, view, ix, componentCount, flipVectors); - case ScalarType::UInt8: return emitDecodedView(sink, view, ix, componentCount, false); - case ScalarType::Int16: return emitDecodedView(sink, view, ix, componentCount, flipVectors); - case ScalarType::UInt16: return emitDecodedView(sink, view, ix, componentCount, false); - case ScalarType::Int32: return emitDecodedView(sink, view, ix, componentCount, flipVectors); - case ScalarType::UInt32: return emitDecodedView(sink, view, ix, componentCount, false); + case ScalarType::Float64: return emitDecodedView(sink, view.semantic, ix, view.components, view.flipVectors); + case ScalarType::Float32: return emitDecodedView(sink, view.semantic, ix, view.components, view.flipVectors); + case ScalarType::Int8: return emitDecodedView(sink, view.stored, ix, view.components, view.flipVectors); + case ScalarType::UInt8: return emitDecodedView(sink, view.stored, ix, view.components, false); + case ScalarType::Int16: return emitDecodedView(sink, view.stored, ix, view.components, view.flipVectors); + case ScalarType::UInt16: return emitDecodedView(sink, view.stored, ix, view.components, false); + case ScalarType::Int32: return emitDecodedView(sink, view.stored, ix, view.components, view.flipVectors); + case ScalarType::UInt32: return emitDecodedView(sink, view.stored, ix, view.components, false); } return false; } @@ -291,17 +322,24 @@ struct Parse const auto& positionView = input.geom->getPositionView(); const auto& normalView = input.geom->getNormalView(); const auto& extraAuxViews = *input.extraAuxViews; + const PreparedView preparedPosition = PreparedView::create(&positionView, 3u, input.positionScalarType, input.flipVectors); + const PreparedView preparedNormal = input.writeNormals ? PreparedView::create(&normalView, 3u, input.normalScalarType, input.flipVectors) : PreparedView{}; + const PreparedView preparedUV = input.uvView ? PreparedView::create(input.uvView, 2u, input.uvScalarType, false) : PreparedView{}; + core::vector preparedExtraAuxViews; + preparedExtraAuxViews.reserve(extraAuxViews.size()); + for (const auto& extra : extraAuxViews) + preparedExtraAuxViews.push_back(extra.view ? PreparedView::create(extra.view, extra.components, extra.scalarType, false) : PreparedView{}); for (size_t i = 0u; i < input.vertexCount; ++i) { - if (!emitView(sink, positionView, i, 3u, input.positionScalarType, input.flipVectors)) + if (!emitView(sink, preparedPosition, i)) return false; - if (input.writeNormals && !emitView(sink, normalView, i, 3u, input.normalScalarType, input.flipVectors)) + if (input.writeNormals && !emitView(sink, preparedNormal, i)) return false; - if (input.uvView && !emitView(sink, *input.uvView, i, 2u, input.uvScalarType, false)) + if (input.uvView && !emitView(sink, preparedUV, i)) return false; - for (const auto& extra : extraAuxViews) + for (size_t extraIx = 0u; extraIx < extraAuxViews.size(); ++extraIx) { - if (!extra.view || !emitView(sink, *extra.view, i, extra.components, extra.scalarType, false)) + if (!extraAuxViews[extraIx].view || !emitView(sink, preparedExtraAuxViews[extraIx], i)) return false; } if (!sink.finishVertex()) diff --git a/src/nbl/asset/interchange/CSTLMeshFileLoader.cpp b/src/nbl/asset/interchange/CSTLMeshFileLoader.cpp index 2b4edd25e6..8d3b906848 100644 --- a/src/nbl/asset/interchange/CSTLMeshFileLoader.cpp +++ b/src/nbl/asset/interchange/CSTLMeshFileLoader.cpp @@ -1,3 +1,4 @@ +#ifdef _NBL_COMPILE_WITH_STL_LOADER_ // Copyright (C) 2018-2025 - DevSH Graphics Programming Sp. z O.O. // This file is part of the "Nabla Engine" and was originally part of the "Irrlicht Engine" // For conditions of distribution and use, see copyright notice in nabla.h @@ -5,8 +6,6 @@ #include "CSTLMeshFileLoader.h" -#ifdef _NBL_COMPILE_WITH_STL_LOADER_ - #include "SSTLPolygonGeometryAuxLayout.h" #include "impl/SFileAccess.h" #include "impl/SIODiagnostics.h" diff --git a/src/nbl/asset/interchange/CSTLMeshWriter.cpp b/src/nbl/asset/interchange/CSTLMeshWriter.cpp index 6d531e55fb..459bad9854 100644 --- a/src/nbl/asset/interchange/CSTLMeshWriter.cpp +++ b/src/nbl/asset/interchange/CSTLMeshWriter.cpp @@ -1,3 +1,4 @@ +#ifdef _NBL_COMPILE_WITH_STL_WRITER_ // Copyright (C) 2018-2025 - DevSH Graphics Programming Sp. z O.O. // This file is part of the "Nabla Engine" and was originally part of the "Irrlicht Engine" // For conditions of distribution and use, see copyright notice in nabla.h @@ -21,8 +22,6 @@ #include #include -#ifdef _NBL_COMPILE_WITH_STL_WRITER_ - namespace nbl::asset { diff --git a/src/nbl/asset/interchange/SGeometryViewDecode.h b/src/nbl/asset/interchange/SGeometryViewDecode.h index 5847f3ea7f..6c4c2c024e 100644 --- a/src/nbl/asset/interchange/SGeometryViewDecode.h +++ b/src/nbl/asset/interchange/SGeometryViewDecode.h @@ -5,11 +5,11 @@ #include "nbl/asset/ICPUPolygonGeometry.h" #include "nbl/asset/format/decodePixels.h" -#include "nbl/builtin/hlsl/array_accessors.hlsl" +#include "nbl/builtin/hlsl/concepts.hlsl" #include "nbl/builtin/hlsl/vector_utils/vector_traits.hlsl" +#include #include -#include #include @@ -21,84 +21,130 @@ class SGeometryViewDecode public: enum class EMode : uint8_t { - Cooked, - Raw + // Semantic values ready for writer-side math and text/binary emission. + Semantic, + // Stored values preserved in the original integer storage domain. + Stored }; - template - static inline bool decodeElement(const ICPUPolygonGeometry::SDataView& view, const size_t ix, Out& out) + template + struct Prepared { - using scalar_t = typename STraits::scalar_type; + // Cached per-view decode state prepared once and reused inside tight loops. + const uint8_t* data = nullptr; + uint32_t stride = 0u; + E_FORMAT format = EF_UNKNOWN; + uint32_t channels = 0u; + bool normalized = false; + hlsl::shapes::AABB<4, hlsl::float64_t> range = hlsl::shapes::AABB<4, hlsl::float64_t>::create(); + + inline explicit operator bool() const + { + return data != nullptr && stride != 0u && format != EF_UNKNOWN && channels != 0u; + } + + template + inline bool decode(const size_t ix, std::array& out) const + { + out.fill(T{}); + return SGeometryViewDecode::template decodePrepared(*this, ix, out.data(), static_cast(N)); + } - out = {}; + template requires hlsl::concepts::Vector + inline bool decode(const size_t ix, V& out) const + { + out = V{}; + return SGeometryViewDecode::template decodePrepared(*this, ix, out); + } + }; + + template + static inline Prepared prepare(const ICPUPolygonGeometry::SDataView& view) + { + // Hoist view invariants out of the per-element decode path. + Prepared retval = {}; if (!view.composed.isFormatted()) - return false; + return retval; + + retval.data = reinterpret_cast(view.getPointer()); + if (!retval.data) + return {}; + + retval.stride = view.composed.getStride(); + retval.format = view.composed.format; + retval.channels = getFormatChannelCount(retval.format); + if constexpr (Mode == EMode::Semantic) + { + retval.normalized = isNormalizedFormat(retval.format); + if (retval.normalized) + retval.range = view.composed.getRange>(); + } + return retval; + } + + template + static inline bool decodeElement(const ICPUPolygonGeometry::SDataView& view, const size_t ix, Out& out) + { + // Convenience wrapper for one-off decode sites that do not keep prepared state. + return prepare(view).decode(ix, out); + } - const void* const src = view.getPointer(ix); - if (!src) + private: + template requires hlsl::concepts::Vector + static inline bool decodePrepared(const Prepared& prepared, const size_t ix, V& out) + { + using scalar_t = typename hlsl::vector_traits::scalar_type; + constexpr uint32_t Dimension = hlsl::vector_traits::Dimension; + if (!prepared || Dimension == 0u) return false; - std::array srcArr = {src}; - std::array tmp = {}; - if (!decodePixels(view.composed.format, srcArr.data(), tmp.data(), 0u, 0u)) + using storage_t = std::conditional_t, hlsl::float64_t, std::conditional_t, int64_t, uint64_t>>; + std::array tmp = {}; + const void* srcArr[4] = {prepared.data + ix * prepared.stride, nullptr}; + if (!decodePixels(prepared.format, srcArr, tmp.data(), 0u, 0u)) return false; - const uint32_t channels = std::min(STraits::Dimension, getFormatChannelCount(view.composed.format)); - if constexpr (Mode == EMode::Cooked && std::is_floating_point_v) + const uint32_t componentCount = std::min({prepared.channels, Dimension, 4u}); + if constexpr (Mode == EMode::Semantic && std::is_floating_point_v) { - if (isNormalizedFormat(view.composed.format)) + if (prepared.normalized) { - const auto range = view.composed.getRange>(); - for (uint32_t i = 0u; i < channels; ++i) - tmp[i] = static_cast(tmp[i] * (range.maxVx[i] - range.minVx[i]) + range.minVx[i]); + for (uint32_t i = 0u; i < componentCount; ++i) + tmp[i] = static_cast(tmp[i] * (prepared.range.maxVx[i] - prepared.range.minVx[i]) + prepared.range.minVx[i]); } } - for (uint32_t i = 0u; i < channels; ++i) - STraits::set(out, i, tmp[i]); + for (uint32_t i = 0u; i < componentCount; ++i) + out[i] = static_cast(tmp[i]); return true; } - private: - template - struct SIsStdArray : std::false_type {}; - - template - struct SIsStdArray> : std::true_type {}; - - template - struct SHasVectorTraits : std::false_type {}; - - template - struct SHasVectorTraits::scalar_type>> : std::true_type {}; - - template::value, bool IsVector = (!IsStdArray && SHasVectorTraits::value)> - struct STraits; - - template - struct STraits + template + static inline bool decodePrepared(const Prepared& prepared, const size_t ix, T* out, const uint32_t outDim) { - using scalar_type = typename Out::value_type; - static constexpr uint32_t Dimension = std::tuple_size_v; - - static inline void set(Out& out, const uint32_t ix, const scalar_type value) - { - out[ix] = value; - } - }; + if (!prepared || !out || outDim == 0u) + return false; - template - struct STraits - { - using scalar_type = typename hlsl::vector_traits::scalar_type; - static constexpr uint32_t Dimension = hlsl::vector_traits::Dimension; + using storage_t = std::conditional_t, hlsl::float64_t, std::conditional_t, int64_t, uint64_t>>; + std::array tmp = {}; + const void* srcArr[4] = {prepared.data + ix * prepared.stride, nullptr}; + if (!decodePixels(prepared.format, srcArr, tmp.data(), 0u, 0u)) + return false; - static inline void set(Out& out, const uint32_t ix, const scalar_type value) + const uint32_t componentCount = std::min({prepared.channels, outDim, 4u}); + if constexpr (Mode == EMode::Semantic && std::is_floating_point_v) { - hlsl::array_set setter; - setter(out, ix, value); + if (prepared.normalized) + { + for (uint32_t i = 0u; i < componentCount; ++i) + tmp[i] = static_cast(tmp[i] * (prepared.range.maxVx[i] - prepared.range.minVx[i]) + prepared.range.minVx[i]); + } } - }; + + for (uint32_t i = 0u; i < componentCount; ++i) + out[i] = static_cast(tmp[i]); + return true; + } }; } diff --git a/src/nbl/asset/interchange/impl/SBinaryData.h b/src/nbl/asset/interchange/impl/SBinaryData.h index 76107f8714..1eeba4f764 100644 --- a/src/nbl/asset/interchange/impl/SBinaryData.h +++ b/src/nbl/asset/interchange/impl/SBinaryData.h @@ -13,6 +13,7 @@ namespace nbl::asset::impl struct BinaryData { + // Minimal unaligned binary primitives reused by binary interchange paths. template static inline T byteswap(const T value) { diff --git a/src/nbl/asset/interchange/impl/SFileAccess.h b/src/nbl/asset/interchange/impl/SFileAccess.h index 540d2b3459..1373c6b5e3 100644 --- a/src/nbl/asset/interchange/impl/SFileAccess.h +++ b/src/nbl/asset/interchange/impl/SFileAccess.h @@ -13,6 +13,7 @@ namespace nbl::asset::impl class SFileAccess { public: + // Common file access policy glue for mapped whole-file and buffered fallback paths. static inline bool isMappable(const system::IFile* file) { return file && core::bitflag(file->getFlags()).hasAnyFlag(system::IFile::ECF_MAPPABLE); diff --git a/src/nbl/asset/interchange/impl/SIODiagnostics.h b/src/nbl/asset/interchange/impl/SIODiagnostics.h index eede03dae3..687c61f2c7 100644 --- a/src/nbl/asset/interchange/impl/SIODiagnostics.h +++ b/src/nbl/asset/interchange/impl/SIODiagnostics.h @@ -13,6 +13,7 @@ namespace nbl::asset::impl class SIODiagnostics { public: + // Shared logging helpers for IO policy failures and suspicious tiny-IO telemetry. template static inline bool logInvalidPlan(Logger& logger, const char* const owner, const char* const fileName, const SResolvedFileIOPolicy& ioPlan) { diff --git a/src/nbl/asset/interchange/impl/STextParse.h b/src/nbl/asset/interchange/impl/STextParse.h index 45e8bd405d..15d18e18cd 100644 --- a/src/nbl/asset/interchange/impl/STextParse.h +++ b/src/nbl/asset/interchange/impl/STextParse.h @@ -20,6 +20,7 @@ namespace nbl::asset::impl struct TextParse { + // Small asset-agnostic text parsing helpers shared by src-only interchange code. template static inline bool parseNumber(const char*& ptr, const char* const end, T& out) { @@ -127,6 +128,7 @@ struct TextParse struct Cursor { + // Lightweight stateful wrapper for sequential token/number reads. const char* current = nullptr; const char* end = nullptr; From 690d799254791f702a8f2b73a2b2db65dc595d86 Mon Sep 17 00:00:00 2001 From: Arkadiusz Lachowicz Date: Sun, 8 Mar 2026 14:18:36 +0100 Subject: [PATCH 075/118] Reduce mesh loader writer duplication --- .../asset/interchange/COBJMeshFileLoader.cpp | 15 +- .../asset/interchange/CPLYMeshFileLoader.cpp | 103 +-- src/nbl/asset/interchange/CPLYMeshWriter.cpp | 152 ++-- .../asset/interchange/CSTLMeshFileLoader.cpp | 21 +- src/nbl/asset/interchange/CSTLMeshWriter.cpp | 717 +++++++----------- .../interchange/SGeometryAttributeEmit.h | 41 + .../interchange/impl/SContentHashBuild.h | 72 ++ src/nbl/asset/interchange/impl/SLoadSession.h | 74 ++ 8 files changed, 607 insertions(+), 588 deletions(-) create mode 100644 src/nbl/asset/interchange/SGeometryAttributeEmit.h create mode 100644 src/nbl/asset/interchange/impl/SContentHashBuild.h create mode 100644 src/nbl/asset/interchange/impl/SLoadSession.h diff --git a/src/nbl/asset/interchange/COBJMeshFileLoader.cpp b/src/nbl/asset/interchange/COBJMeshFileLoader.cpp index 1983ccbaac..26a56a1bc5 100644 --- a/src/nbl/asset/interchange/COBJMeshFileLoader.cpp +++ b/src/nbl/asset/interchange/COBJMeshFileLoader.cpp @@ -20,8 +20,7 @@ #include "nbl/system/IFile.h" #include "COBJMeshFileLoader.h" -#include "impl/SFileAccess.h" -#include "impl/SIODiagnostics.h" +#include "impl/SLoadSession.h" #include "impl/STextParse.h" #include @@ -352,12 +351,12 @@ asset::SAssetBundle COBJMeshFileLoader::loadAsset( const long filesize = _file->getSize(); if (filesize <= 0) return {}; - const auto ioPlan = impl::SFileAccess::resolvePlan(_params.ioPolicy, static_cast(filesize), true, _file); - if (impl::SIODiagnostics::logInvalidPlan(_params.logger, "OBJ loader", _file->getFileName().string().c_str(), ioPlan)) + impl::SLoadSession loadSession = {}; + if (!impl::SLoadSession::begin(_params.logger, "OBJ loader", _file, _params.ioPolicy, static_cast(filesize), true, loadSession)) return {}; core::vector fileContents; - const auto* fileData = impl::SFileAccess::mapOrReadWholeFile(_file, static_cast(filesize), fileContents, ioPlan, &ioTelemetry); + const auto* fileData = loadSession.mapOrReadWholeFile(fileContents, &ioTelemetry); if (!fileData) return {}; const char* const buf = reinterpret_cast(fileData); @@ -932,7 +931,7 @@ asset::SAssetBundle COBJMeshFileLoader::loadAsset( faceFallbackTokenCountSum += loaded.faceFallbackTokenCount; } - impl::SIODiagnostics::logTinyIO(_params.logger, "OBJ loader", _file->getFileName().string().c_str(), ioTelemetry, static_cast(filesize), _params.ioPolicy, "reads"); + loadSession.logTinyIO(_params.logger, ioTelemetry); const bool buildCollections = sawObjectDirective || sawGroupDirective || loadedGeometries.size() > 1ull; @@ -1003,8 +1002,8 @@ asset::SAssetBundle COBJMeshFileLoader::loadAsset( static_cast(ioTelemetry.getMinOrZero()), static_cast(ioTelemetry.getAvgOrZero()), system::to_string(_params.ioPolicy.strategy).c_str(), - system::to_string(ioPlan.strategy).c_str(), - static_cast(ioPlan.chunkSizeBytes()), ioPlan.reason); + system::to_string(loadSession.ioPlan.strategy).c_str(), + static_cast(loadSession.ioPlan.chunkSizeBytes()), loadSession.ioPlan.reason); return SAssetBundle(core::smart_refctd_ptr(), std::move(outputAssets)); diff --git a/src/nbl/asset/interchange/CPLYMeshFileLoader.cpp b/src/nbl/asset/interchange/CPLYMeshFileLoader.cpp index 73784e58ec..e696e1f698 100644 --- a/src/nbl/asset/interchange/CPLYMeshFileLoader.cpp +++ b/src/nbl/asset/interchange/CPLYMeshFileLoader.cpp @@ -7,8 +7,8 @@ #include "CPLYMeshFileLoader.h" #include "SPLYPolygonGeometryAuxLayout.h" #include "impl/SBinaryData.h" -#include "impl/SFileAccess.h" -#include "impl/SIODiagnostics.h" +#include "impl/SContentHashBuild.h" +#include "impl/SLoadSession.h" #include "impl/STextParse.h" #include "nbl/asset/IAssetManager.h" #include "nbl/asset/interchange/SGeometryContentHash.h" @@ -1410,20 +1410,18 @@ SAssetBundle CPLYMeshFileLoader::loadAsset( const bool hashInBuild = computeContentHashes && SLoaderRuntimeTuner::shouldInlineHashBuild(_params.ioPolicy, fileSize); - const auto ioPlan = impl::SFileAccess::resolvePlan(_params.ioPolicy, fileSize, true, _file); - if (impl::SIODiagnostics::logInvalidPlan(_params.logger, "PLY loader", _file->getFileName().string().c_str(), ioPlan)) + impl::SLoadSession loadSession = {}; + if (!impl::SLoadSession::begin(_params.logger, "PLY loader", _file, _params.ioPolicy, fileSize, true, loadSession)) return {}; Parse::Context ctx = {asset::IAssetLoader::SAssetLoadContext{_params, _file}, _hierarchyLevel, _override}; uint64_t desiredReadWindow = - ioPlan.strategy == SResolvedFileIOPolicy::Strategy::WholeFile + loadSession.isWholeFile() ? (fileSize + Parse::Context::ReadWindowPaddingBytes) - : ioPlan.chunkSizeBytes(); - if (ioPlan.strategy == SResolvedFileIOPolicy::Strategy::WholeFile) { - const bool mappedInput = - static_cast(_file)->getMappedPointer() != - nullptr; + : loadSession.ioPlan.chunkSizeBytes(); + if (loadSession.isWholeFile()) { + const bool mappedInput = loadSession.mappedPointer() != nullptr; if (mappedInput && fileSize > (Parse::Context::DefaultIoReadWindowBytes * 2ull)) desiredReadWindow = Parse::Context::DefaultIoReadWindowBytes; @@ -1439,51 +1437,36 @@ SAssetBundle CPLYMeshFileLoader::loadAsset( hlsl::shapes::util::AABBAccumulator3 parsedAABB = hlsl::shapes::util::createAABBAccumulator(); uint32_t vertCount = 0; - core::vector> hashedBuffers; - std::jthread deferredPositionHashThread; - auto hashBufferIfNeeded = [&](ICPUBuffer* buffer) -> void { - if (!hashInBuild || !buffer) - return; - for (const auto& hashed : hashedBuffers) { - if (hashed.get() == buffer) - return; - } - buffer->setContentHash(buffer->computeContentHash()); - hashedBuffers.push_back(core::smart_refctd_ptr(buffer)); + impl::SContentHashBuild contentHashBuild = impl::SContentHashBuild::create(computeContentHashes, hashInBuild); + auto visitVertexAttributeViews = [&](auto&& visitor) -> void { + visitor(geometry->getPositionView()); + visitor(geometry->getNormalView()); + for (const auto& view : *geometry->getAuxAttributeViews()) + visitor(view); }; - auto tryLaunchDeferredHash = [&](const IGeometry::SDataView& view, - std::jthread& deferredThread) -> void { - if (!computeContentHashes || hashInBuild || !view || !view.src.buffer) - return; - if (deferredThread.joinable()) - return; - if (view.src.buffer->getContentHash() != IPreHashed::INVALID_HASH) - return; - auto keepAlive = core::smart_refctd_ptr(view.src.buffer); - deferredThread = std::jthread([buffer = std::move(keepAlive)]() mutable { - buffer->setContentHash(buffer->computeContentHash()); - }); + auto visitGeometryViews = [&](auto&& visitor) -> void { + visitVertexAttributeViews(visitor); + visitor(geometry->getIndexView()); + for (const auto& view : *geometry->getJointWeightViews()) { + visitor(view.indices); + visitor(view.weights); + } + if (const auto jointObb = geometry->getJointOBBView(); jointObb) + visitor(*jointObb); }; - auto hashViewBufferIfNeeded = - [&](const IGeometry::SDataView& view) -> void { + auto hashViewBufferIfNeeded = [&](const IGeometry::SDataView& view) -> void { if (!view || !view.src.buffer) return; - hashBufferIfNeeded(view.src.buffer.get()); + contentHashBuild.hashNow(view.src.buffer.get()); }; auto hashRemainingGeometryBuffers = [&]() -> void { - if (!hashInBuild) + if (contentHashBuild.hashesInline()) + visitGeometryViews(hashViewBufferIfNeeded); + }; + auto tryLaunchDeferredHash = [&](const IGeometry::SDataView& view) -> void { + if (!view || !view.src.buffer) return; - hashViewBufferIfNeeded(geometry->getPositionView()); - hashViewBufferIfNeeded(geometry->getIndexView()); - hashViewBufferIfNeeded(geometry->getNormalView()); - for (const auto& view : *geometry->getAuxAttributeViews()) - hashViewBufferIfNeeded(view); - for (const auto& view : *geometry->getJointWeightViews()) { - hashViewBufferIfNeeded(view.indices); - hashViewBufferIfNeeded(view.weights); - } - if (const auto jointObb = geometry->getJointOBBView(); jointObb) - hashViewBufferIfNeeded(*jointObb); + contentHashBuild.tryDefer(view.src.buffer.get()); }; // Currently only supports ASCII or binary meshes @@ -1598,8 +1581,7 @@ SAssetBundle CPLYMeshFileLoader::loadAsset( readingHeader = false; if (ctx.IsBinaryFile) { char* const binaryStartInBuffer = ctx.LineEndPointer + 1; - const auto* const mappedBase = reinterpret_cast( - static_cast(_file)->getMappedPointer()); + const auto* const mappedBase = reinterpret_cast(loadSession.mappedPointer()); if (mappedBase) { const size_t binaryOffset = ctx.getAbsoluteOffset(binaryStartInBuffer); @@ -1867,12 +1849,8 @@ SAssetBundle CPLYMeshFileLoader::loadAsset( system::ILogger::ELL_ERROR, _file->getFileName().string().c_str()); return {}; } - hashViewBufferIfNeeded(geometry->getPositionView()); - hashViewBufferIfNeeded(geometry->getNormalView()); - for (const auto& view : *geometry->getAuxAttributeViews()) - hashViewBufferIfNeeded(view); - tryLaunchDeferredHash(geometry->getPositionView(), - deferredPositionHashThread); + visitVertexAttributeViews(hashViewBufferIfNeeded); + tryLaunchDeferredHash(geometry->getPositionView()); verticesProcessed = true; } else if (el.Name == "face") { const uint32_t vertexCount32 = @@ -1881,7 +1859,7 @@ SAssetBundle CPLYMeshFileLoader::loadAsset( : 0u; const auto fastFaceResult = ctx.readFaceElementFast( el, indices, maxIndexRead, faceCount, vertexCount32, - computeContentHashes && !hashInBuild, precomputedIndexHash); + contentHashBuild.hashesDeferred(), precomputedIndexHash); if (fastFaceResult == Parse::Context::EFastFaceReadResult::Success) { ++fastFaceElementCount; } else if (fastFaceResult == @@ -1958,9 +1936,8 @@ SAssetBundle CPLYMeshFileLoader::loadAsset( } } - if (computeContentHashes && !hashInBuild) { - if (deferredPositionHashThread.joinable()) - deferredPositionHashThread.join(); + if (contentHashBuild.hashesDeferred()) { + contentHashBuild.wait(); SPolygonGeometryContentHash::computeMissing(geometry.get(), _params.ioPolicy); } else { @@ -1973,7 +1950,7 @@ SAssetBundle CPLYMeshFileLoader::loadAsset( const SFileReadTelemetry ioTelemetry = {.callCount = ctx.readCallCount, .totalBytes = ctx.readBytesTotal, .minBytes = ctx.readMinBytes}; - impl::SIODiagnostics::logTinyIO(_params.logger, "PLY loader", _file->getFileName().string().c_str(), ioTelemetry, fileSize, _params.ioPolicy, "reads"); + loadSession.logTinyIO(_params.logger, ioTelemetry); _params.logger.log( "PLY loader stats: file=%s binary=%d verts=%llu faces=%llu idx=%llu " "vertex_fast=%llu face_fast=%llu io_reads=%llu io_min_read=%llu " @@ -1988,8 +1965,8 @@ SAssetBundle CPLYMeshFileLoader::loadAsset( static_cast(ioMinRead), static_cast(ioAvgRead), system::to_string(_params.ioPolicy.strategy).c_str(), - system::to_string(ioPlan.strategy).c_str(), - static_cast(ioPlan.chunkSizeBytes()), ioPlan.reason); + system::to_string(loadSession.ioPlan.strategy).c_str(), + static_cast(loadSession.ioPlan.chunkSizeBytes()), loadSession.ioPlan.reason); auto meta = core::make_smart_refctd_ptr(); return SAssetBundle(std::move(meta), {std::move(geometry)}); } diff --git a/src/nbl/asset/interchange/CPLYMeshWriter.cpp b/src/nbl/asset/interchange/CPLYMeshWriter.cpp index 9a69e5a1c7..25ca52e45d 100644 --- a/src/nbl/asset/interchange/CPLYMeshWriter.cpp +++ b/src/nbl/asset/interchange/CPLYMeshWriter.cpp @@ -6,6 +6,7 @@ #include "CPLYMeshWriter.h" #include "SPLYPolygonGeometryAuxLayout.h" +#include "nbl/asset/interchange/SGeometryAttributeEmit.h" #include "nbl/asset/interchange/SGeometryViewDecode.h" #include "nbl/asset/interchange/SGeometryWriterCommon.h" #include "nbl/asset/interchange/SInterchangeIO.h" @@ -104,34 +105,6 @@ struct Parse bool flipVectors = false; }; - struct PreparedView - { - // Per-attribute emission state cached once before the vertex loop. - // Floats use semantic decode, integer payloads use stored decode. - uint32_t components = 0u; - ScalarType scalarType = ScalarType::Float32; - bool flipVectors = false; - SemanticDecode semantic = {}; - StoredDecode stored = {}; - - inline explicit operator bool() const - { - return getScalarMeta(scalarType).integer ? static_cast(stored) : static_cast(semantic); - } - - static PreparedView create(const ICPUPolygonGeometry::SDataView* view, const uint32_t components, const ScalarType scalarType, const bool flipVectors) - { - PreparedView retval = {.components = components, .scalarType = scalarType, .flipVectors = flipVectors}; - if (!view) - return retval; - if (getScalarMeta(scalarType).integer) - retval.stored = SGeometryViewDecode::prepare(*view); - else - retval.semantic = SGeometryViewDecode::prepare(*view); - return retval; - } - }; - static constexpr size_t ApproxTextBytesPerVertex = sizeof("0.000000 0.000000 0.000000 0.000000 0.000000 0.000000\n") - 1ull; static constexpr size_t ApproxTextBytesPerFace = sizeof("3 4294967295 4294967295 4294967295\n") - 1ull; static constexpr size_t MaxFloatTextChars = std::numeric_limits::max_digits10 + 16ull; @@ -274,44 +247,85 @@ struct Parse } }; - template - static bool emitDecodedView(Sink& sink, const SGeometryViewDecode::Prepared& view, const size_t ix, const uint32_t componentCount, const bool flipVectors) + template + struct PreparedView { - std::array decoded = {}; - if (!view.decode(ix, decoded)) - return false; - for (uint32_t c = 0u; c < componentCount; ++c) + using EmitFn = bool(*)(Sink&, const PreparedView&, size_t); + + uint32_t components = 0u; + bool flipVectors = false; + SemanticDecode semantic = {}; + StoredDecode stored = {}; + EmitFn emit = nullptr; + + inline explicit operator bool() const { - OutT value = decoded[c]; - if constexpr (std::is_signed_v || std::is_floating_point_v) - { - if (flipVectors && c == 0u) - value = -value; - } - if (!sink.append(value)) - return false; + return emit != nullptr && (static_cast(semantic) || static_cast(stored)); } - return true; - } - template - static bool emitView(Sink& sink, const PreparedView& view, const size_t ix) - { - if (!view) - return false; - switch (view.scalarType) + inline bool operator()(Sink& sink, const size_t ix) const { - case ScalarType::Float64: return emitDecodedView(sink, view.semantic, ix, view.components, view.flipVectors); - case ScalarType::Float32: return emitDecodedView(sink, view.semantic, ix, view.components, view.flipVectors); - case ScalarType::Int8: return emitDecodedView(sink, view.stored, ix, view.components, view.flipVectors); - case ScalarType::UInt8: return emitDecodedView(sink, view.stored, ix, view.components, false); - case ScalarType::Int16: return emitDecodedView(sink, view.stored, ix, view.components, view.flipVectors); - case ScalarType::UInt16: return emitDecodedView(sink, view.stored, ix, view.components, false); - case ScalarType::Int32: return emitDecodedView(sink, view.stored, ix, view.components, view.flipVectors); - case ScalarType::UInt32: return emitDecodedView(sink, view.stored, ix, view.components, false); + return static_cast(*this) && emit(sink, *this, ix); } - return false; - } + + template + static bool emitPrepared(Sink& sink, const PreparedView& view, const size_t ix) + { + if constexpr (Mode == SGeometryViewDecode::EMode::Semantic) + return SGeometryAttributeEmit::emit(sink, view.semantic, ix, view.components, view.flipVectors); + else + return SGeometryAttributeEmit::emit(sink, view.stored, ix, view.components, view.flipVectors); + } + + static PreparedView create(const ICPUPolygonGeometry::SDataView* view, const uint32_t components, const ScalarType scalarType, const bool flipVectors) + { + PreparedView retval = {.components = components}; + if (!view) + return retval; + + switch (scalarType) + { + case ScalarType::Float64: + retval.flipVectors = flipVectors; + retval.semantic = SGeometryViewDecode::prepare(*view); + retval.emit = &emitPrepared; + break; + case ScalarType::Float32: + retval.flipVectors = flipVectors; + retval.semantic = SGeometryViewDecode::prepare(*view); + retval.emit = &emitPrepared; + break; + case ScalarType::Int8: + retval.flipVectors = flipVectors; + retval.stored = SGeometryViewDecode::prepare(*view); + retval.emit = &emitPrepared; + break; + case ScalarType::UInt8: + retval.stored = SGeometryViewDecode::prepare(*view); + retval.emit = &emitPrepared; + break; + case ScalarType::Int16: + retval.flipVectors = flipVectors; + retval.stored = SGeometryViewDecode::prepare(*view); + retval.emit = &emitPrepared; + break; + case ScalarType::UInt16: + retval.stored = SGeometryViewDecode::prepare(*view); + retval.emit = &emitPrepared; + break; + case ScalarType::Int32: + retval.flipVectors = flipVectors; + retval.stored = SGeometryViewDecode::prepare(*view); + retval.emit = &emitPrepared; + break; + case ScalarType::UInt32: + retval.stored = SGeometryViewDecode::prepare(*view); + retval.emit = &emitPrepared; + break; + } + return retval; + } + }; template static bool emitVertices(const WriteInput& input, Sink& sink) @@ -322,24 +336,24 @@ struct Parse const auto& positionView = input.geom->getPositionView(); const auto& normalView = input.geom->getNormalView(); const auto& extraAuxViews = *input.extraAuxViews; - const PreparedView preparedPosition = PreparedView::create(&positionView, 3u, input.positionScalarType, input.flipVectors); - const PreparedView preparedNormal = input.writeNormals ? PreparedView::create(&normalView, 3u, input.normalScalarType, input.flipVectors) : PreparedView{}; - const PreparedView preparedUV = input.uvView ? PreparedView::create(input.uvView, 2u, input.uvScalarType, false) : PreparedView{}; - core::vector preparedExtraAuxViews; + const PreparedView preparedPosition = PreparedView::create(&positionView, 3u, input.positionScalarType, input.flipVectors); + const PreparedView preparedNormal = input.writeNormals ? PreparedView::create(&normalView, 3u, input.normalScalarType, input.flipVectors) : PreparedView{}; + const PreparedView preparedUV = input.uvView ? PreparedView::create(input.uvView, 2u, input.uvScalarType, false) : PreparedView{}; + core::vector> preparedExtraAuxViews; preparedExtraAuxViews.reserve(extraAuxViews.size()); for (const auto& extra : extraAuxViews) - preparedExtraAuxViews.push_back(extra.view ? PreparedView::create(extra.view, extra.components, extra.scalarType, false) : PreparedView{}); + preparedExtraAuxViews.push_back(extra.view ? PreparedView::create(extra.view, extra.components, extra.scalarType, false) : PreparedView{}); for (size_t i = 0u; i < input.vertexCount; ++i) { - if (!emitView(sink, preparedPosition, i)) + if (!preparedPosition(sink, i)) return false; - if (input.writeNormals && !emitView(sink, preparedNormal, i)) + if (input.writeNormals && !preparedNormal(sink, i)) return false; - if (input.uvView && !emitView(sink, preparedUV, i)) + if (input.uvView && !preparedUV(sink, i)) return false; for (size_t extraIx = 0u; extraIx < extraAuxViews.size(); ++extraIx) { - if (!extraAuxViews[extraIx].view || !emitView(sink, preparedExtraAuxViews[extraIx], i)) + if (!extraAuxViews[extraIx].view || !preparedExtraAuxViews[extraIx](sink, i)) return false; } if (!sink.finishVertex()) diff --git a/src/nbl/asset/interchange/CSTLMeshFileLoader.cpp b/src/nbl/asset/interchange/CSTLMeshFileLoader.cpp index 8d3b906848..0bd1e9c112 100644 --- a/src/nbl/asset/interchange/CSTLMeshFileLoader.cpp +++ b/src/nbl/asset/interchange/CSTLMeshFileLoader.cpp @@ -7,8 +7,7 @@ #include "CSTLMeshFileLoader.h" #include "SSTLPolygonGeometryAuxLayout.h" -#include "impl/SFileAccess.h" -#include "impl/SIODiagnostics.h" +#include "impl/SLoadSession.h" #include "impl/STextParse.h" #include "nbl/asset/asset.h" #include "nbl/asset/format/convertColor.h" @@ -174,15 +173,15 @@ SAssetBundle CSTLMeshFileLoader::loadAsset(system::IFile* _file, const IAssetLoa if (filesize < Context::TextProbeBytes) return {}; - const auto ioPlan = impl::SFileAccess::resolvePlan(_params.ioPolicy, static_cast(filesize), true, _file); - if (impl::SIODiagnostics::logInvalidPlan(_params.logger, "STL loader", _file->getFileName().string().c_str(), ioPlan)) + impl::SLoadSession loadSession = {}; + if (!impl::SLoadSession::begin(_params.logger, "STL loader", _file, _params.ioPolicy, static_cast(filesize), true, loadSession)) return {}; core::vector wholeFilePayload; const uint8_t* wholeFileData = nullptr; - if (ioPlan.strategy == SResolvedFileIOPolicy::Strategy::WholeFile) + if (loadSession.isWholeFile()) { - wholeFileData = impl::SFileAccess::mapOrReadWholeFile(context.inner.mainFile, filesize, wholeFilePayload, ioPlan, &context.ioTelemetry); + wholeFileData = loadSession.mapOrReadWholeFile(wholeFilePayload, &context.ioTelemetry); if (!wholeFileData) return {}; } @@ -266,7 +265,7 @@ SAssetBundle CSTLMeshFileLoader::loadAsset(system::IFile* _file, const IAssetLoa if (filesize < expectedSize) return {}; - const uint8_t* payloadData = wholeFileData ? (wholeFileData + Context::BinaryPrefixBytes) : impl::SFileAccess::readRange(context.inner.mainFile, Context::BinaryPrefixBytes, dataSize, wholeFilePayload, ioPlan, &context.ioTelemetry); + const uint8_t* payloadData = wholeFileData ? (wholeFileData + Context::BinaryPrefixBytes) : loadSession.readRange(Context::BinaryPrefixBytes, dataSize, wholeFilePayload, &context.ioTelemetry); if (!payloadData) return {}; @@ -630,7 +629,7 @@ SAssetBundle CSTLMeshFileLoader::loadAsset(system::IFile* _file, const IAssetLoa parsePath = "ascii_fallback"; if (!wholeFileData) { - wholeFileData = impl::SFileAccess::mapOrReadWholeFile(context.inner.mainFile, filesize, wholeFilePayload, ioPlan, &context.ioTelemetry); + wholeFileData = loadSession.mapOrReadWholeFile(wholeFilePayload, &context.ioTelemetry); if (!wholeFileData) return {}; } @@ -737,7 +736,7 @@ SAssetBundle CSTLMeshFileLoader::loadAsset(system::IFile* _file, const IAssetLoa } const uint64_t ioMinRead = context.ioTelemetry.getMinOrZero(); const uint64_t ioAvgRead = context.ioTelemetry.getAvgOrZero(); - impl::SIODiagnostics::logTinyIO(_params.logger, "STL loader", _file->getFileName().string().c_str(), context.ioTelemetry, static_cast(filesize), _params.ioPolicy, "reads"); + loadSession.logTinyIO(_params.logger, context.ioTelemetry); _params.logger.log( "STL loader stats: file=%s binary=%d parse_path=%s triangles=%llu " "vertices=%llu colors=%d io_reads=%llu io_min_read=%llu io_avg_read=%llu " @@ -749,8 +748,8 @@ SAssetBundle CSTLMeshFileLoader::loadAsset(system::IFile* _file, const IAssetLoa static_cast(ioMinRead), static_cast(ioAvgRead), system::to_string(_params.ioPolicy.strategy).c_str(), - system::to_string(ioPlan.strategy).c_str(), - static_cast(ioPlan.chunkSizeBytes()), ioPlan.reason); + system::to_string(loadSession.ioPlan.strategy).c_str(), + static_cast(loadSession.ioPlan.chunkSizeBytes()), loadSession.ioPlan.reason); auto meta = core::make_smart_refctd_ptr(); return SAssetBundle(std::move(meta), {std::move(geometry)}); } diff --git a/src/nbl/asset/interchange/CSTLMeshWriter.cpp b/src/nbl/asset/interchange/CSTLMeshWriter.cpp index 459bad9854..6ab92f18d7 100644 --- a/src/nbl/asset/interchange/CSTLMeshWriter.cpp +++ b/src/nbl/asset/interchange/CSTLMeshWriter.cpp @@ -102,6 +102,14 @@ struct Parse } }; + struct TriangleData + { + hlsl::float32_t3 normal = {}; + hlsl::float32_t3 vertex1 = {}; + hlsl::float32_t3 vertex2 = {}; + hlsl::float32_t3 vertex3 = {}; + }; + static constexpr size_t BinaryHeaderBytes = 80ull; static constexpr size_t BinaryTriangleCountBytes = sizeof(uint32_t); static constexpr size_t BinaryTriangleFloatCount = 12ull; @@ -174,26 +182,77 @@ struct Parse if (!normalView) return false; - hlsl::float32_t3 n0 = {}; - hlsl::float32_t3 n1 = {}; - hlsl::float32_t3 n2 = {}; - if (!normalView.decodeElement(idx.x, n0)) + std::array normals = {}; + if (!normalView.decodeElement(idx.x, normals[0])) return false; - if (!normalView.decodeElement(idx.y, n1)) + if (!normalView.decodeElement(idx.y, normals[1])) return false; - if (!normalView.decodeElement(idx.z, n2)) + if (!normalView.decodeElement(idx.z, normals[2])) return false; - auto normal = n0; - if (hlsl::dot(normal, normal) <= 0.f) - normal = n1; - if (hlsl::dot(normal, normal) <= 0.f) - normal = n2; - if (hlsl::dot(normal, normal) <= 0.f) + return selectFirstValidNormal(normals.data(), static_cast(normals.size()), outNormal); + } + + static bool selectFirstValidNormal(const hlsl::float32_t3* const normals, const uint32_t count, hlsl::float32_t3& outNormal) + { + if (!normals || count == 0u) return false; + for (uint32_t i = 0u; i < count; ++i) + { + if (hlsl::dot(normals[i], normals[i]) > 0.f) + { + outNormal = normals[i]; + return true; + } + } + return false; + } - outNormal = normal; - return true; + static void prepareVertices(const hlsl::float32_t3& p0, const hlsl::float32_t3& p1, const hlsl::float32_t3& p2, const bool flipHandedness, hlsl::float32_t3& vertex1, hlsl::float32_t3& vertex2, hlsl::float32_t3& vertex3) + { + vertex1 = p2; + vertex2 = p1; + vertex3 = p0; + if (flipHandedness) + { + vertex1.x = -vertex1.x; + vertex2.x = -vertex2.x; + vertex3.x = -vertex3.x; + } + } + + static hlsl::float32_t3 computePlaneNormal(const hlsl::float32_t3& vertex1, const hlsl::float32_t3& vertex2, const hlsl::float32_t3& vertex3, float* const planeNormalLen2 = nullptr) + { + const hlsl::float32_t3 planeNormal = hlsl::cross(vertex2 - vertex1, vertex3 - vertex1); + const float len2 = hlsl::dot(planeNormal, planeNormal); + if (planeNormalLen2) + { + *planeNormalLen2 = len2; + return planeNormal; + } + return len2 > 0.f ? hlsl::normalize(planeNormal) : hlsl::float32_t3(0.f, 0.f, 0.f); + } + + static hlsl::float32_t3 resolveTriangleNormal(const hlsl::float32_t3& planeNormal, const float planeNormalLen2, const hlsl::float32_t3* const attrNormals, const uint32_t attrNormalCount, const bool flipHandedness, const bool alignToPlane) + { + hlsl::float32_t3 attrNormal = {}; + if (selectFirstValidNormal(attrNormals, attrNormalCount, attrNormal)) + { + if (flipHandedness) + attrNormal.x = -attrNormal.x; + if (alignToPlane && planeNormalLen2 > 0.f && hlsl::dot(attrNormal, planeNormal) < 0.f) + attrNormal = -attrNormal; + return attrNormal; + } + return planeNormalLen2 > 0.f ? hlsl::normalize(planeNormal) : hlsl::float32_t3(0.f, 0.f, 0.f); + } + + static void buildTriangle(const hlsl::float32_t3& p0, const hlsl::float32_t3& p1, const hlsl::float32_t3& p2, const hlsl::float32_t3* const attrNormals, const uint32_t attrNormalCount, const bool flipHandedness, const bool alignToPlane, TriangleData& triangle) + { + prepareVertices(p0, p1, p2, flipHandedness, triangle.vertex1, triangle.vertex2, triangle.vertex3); + float planeNormalLen2 = 0.f; + const hlsl::float32_t3 planeNormal = computePlaneNormal(triangle.vertex1, triangle.vertex2, triangle.vertex3, &planeNormalLen2); + triangle.normal = resolveTriangleNormal(planeNormal, planeNormalLen2, attrNormals, attrNormalCount, flipHandedness, alignToPlane); } static double normalizeColorComponentToUnit(double value) @@ -247,429 +306,213 @@ struct Parse decodePixels(src, out, 0u, 0u); } - static bool writeMeshBinary(const asset::ICPUPolygonGeometry* geom, - Context* context) { - if (!geom || !context || !context->writeContext.outputFile) - return false; - - const auto& posView = geom->getPositionView(); - if (!posView) - return false; - - const bool flipHandedness = !context->writeContext.params.flags.hasAnyFlag( - E_WRITER_FLAGS::EWF_MESH_IS_RIGHT_HANDED); - const size_t vertexCount = posView.getElementCount(); - if (vertexCount == 0ull) - return false; - - uint32_t facenum = 0u; - size_t faceCount = 0ull; - if (!SGeometryWriterCommon::getTriangleFaceCount(geom, faceCount)) - return false; - if (faceCount > static_cast(std::numeric_limits::max())) - return false; - facenum = static_cast(faceCount); - - const size_t outputSize = BinaryPrefixBytes + static_cast(facenum) * - BinaryTriangleRecordBytes; - std::unique_ptr output(new (std::nothrow) uint8_t[outputSize]); - if (!output) - return false; - uint8_t* dst = output.get(); - - std::memset(dst, 0, BinaryHeaderBytes); - dst += BinaryHeaderBytes; - - std::memcpy(dst, &facenum, sizeof(facenum)); - dst += sizeof(facenum); - - const auto& normalView = geom->getNormalView(); - const bool hasNormals = static_cast(normalView); - const auto* const colorView = getColorView(geom, vertexCount); - const hlsl::float32_t3* const tightPositions = - SGeometryWriterCommon::getTightView(posView); - const hlsl::float32_t3* const tightNormals = - hasNormals ? SGeometryWriterCommon::getTightView( - normalView) - : nullptr; - const bool hasImplicitTriangleIndices = !geom->getIndexView(); - - auto decodePosition = [&](const uint32_t ix, - hlsl::float32_t3& out) -> bool { - if (tightPositions) { - out = tightPositions[ix]; - return true; - } - return posView.decodeElement(ix, out); - }; - - auto decodeNormal = [&](const uint32_t ix, hlsl::float32_t3& out) -> bool { - if (!hasNormals) - return false; - if (tightNormals) { - out = tightNormals[ix]; - return true; - } - return normalView.decodeElement(ix, out); - }; - auto computeFaceColor = [&](const hlsl::uint32_t3& idx, - uint16_t& outColor) -> bool { - outColor = 0u; - if (!colorView) - return true; - hlsl::uint32_t3 color(0u); - if (!decodeColorB8G8R8A8(*colorView, idx.x, color.x)) - return false; - if (!decodeColorB8G8R8A8(*colorView, idx.y, color.y)) - return false; - if (!decodeColorB8G8R8A8(*colorView, idx.z, color.z)) - return false; - std::array, 3> rgba = {}; - decodeColorUnitRGBAFromB8G8R8A8(color.x, rgba[0].data()); - decodeColorUnitRGBAFromB8G8R8A8(color.y, rgba[1].data()); - decodeColorUnitRGBAFromB8G8R8A8(color.z, rgba[2].data()); - const std::array rgbaAvg = { - (rgba[0][0] + rgba[1][0] + rgba[2][0]) / 3.0, - (rgba[0][1] + rgba[1][1] + rgba[2][1]) / 3.0, - (rgba[0][2] + rgba[1][2] + rgba[2][2]) / 3.0, 1.0}; - uint32_t avgColor = 0u; - encodePixels(&avgColor, rgbaAvg.data()); - outColor = packViscamColorFromB8G8R8A8(avgColor); - return true; - }; - auto writeRecord = [&dst](const float nx, const float ny, const float nz, - const float v1x, const float v1y, const float v1z, - const float v2x, const float v2y, const float v2z, - const float v3x, const float v3y, const float v3z, - const uint16_t attribute) -> void { - const float payload[BinaryTriangleFloatCount] = { - nx, ny, nz, v1x, v1y, v1z, v2x, v2y, v2z, v3x, v3y, v3z}; - std::memcpy(dst, payload, BinaryTriangleFloatBytes); - dst += BinaryTriangleFloatBytes; - std::memcpy(dst, &attribute, BinaryTriangleAttributeBytes); - dst += BinaryTriangleAttributeBytes; - }; - auto prepareVertices = - [&](const hlsl::float32_t3& p0, const hlsl::float32_t3& p1, - const hlsl::float32_t3& p2, hlsl::float32_t3& vertex1, - hlsl::float32_t3& vertex2, hlsl::float32_t3& vertex3) -> void { - vertex1 = p2; - vertex2 = p1; - vertex3 = p0; - if (flipHandedness) { - vertex1.x = -vertex1.x; - vertex2.x = -vertex2.x; - vertex3.x = -vertex3.x; - } - }; - auto computePlaneNormal = - [&](const hlsl::float32_t3& vertex1, const hlsl::float32_t3& vertex2, - const hlsl::float32_t3& vertex3) -> hlsl::float32_t3 { - const hlsl::float32_t3 planeNormal = - hlsl::cross(vertex2 - vertex1, vertex3 - vertex1); - const float planeNormalLen2 = hlsl::dot(planeNormal, planeNormal); - return planeNormalLen2 > 0.f ? hlsl::normalize(planeNormal) - : hlsl::float32_t3(0.f, 0.f, 0.f); - }; - - const bool hasFastTightPath = hasImplicitTriangleIndices && - (tightPositions != nullptr) && - (!hasNormals || (tightNormals != nullptr)); - if (hasFastTightPath && hasNormals) { - bool allFastNormalsNonZero = true; - const size_t normalCount = static_cast(facenum) * 3ull; - for (size_t i = 0ull; i < normalCount; ++i) { - const auto& n = tightNormals[i]; - if (hlsl::dot(n, n) <= 0.f) { - allFastNormalsNonZero = false; - break; - } - } - - const hlsl::float32_t3* posTri = tightPositions; - const hlsl::float32_t3* nrmTri = tightNormals; - if (allFastNormalsNonZero) { - for (uint32_t primIx = 0u; primIx < facenum; - ++primIx, posTri += 3u, nrmTri += 3u) { - uint16_t faceColor = 0u; - if (!computeFaceColor(hlsl::uint32_t3(primIx * 3u + 0u, - primIx * 3u + 1u, - primIx * 3u + 2u), - faceColor)) - return false; - - hlsl::float32_t3 vertex1 = {}; - hlsl::float32_t3 vertex2 = {}; - hlsl::float32_t3 vertex3 = {}; - prepareVertices(posTri[0u], posTri[1u], posTri[2u], vertex1, vertex2, - vertex3); - - hlsl::float32_t3 attrNormal = nrmTri[0u]; - if (flipHandedness) - attrNormal.x = -attrNormal.x; - - writeRecord(attrNormal.x, attrNormal.y, attrNormal.z, vertex1.x, - vertex1.y, vertex1.z, vertex2.x, vertex2.y, vertex2.z, - vertex3.x, vertex3.y, vertex3.z, faceColor); - } - } else { - for (uint32_t primIx = 0u; primIx < facenum; - ++primIx, posTri += 3u, nrmTri += 3u) { - uint16_t faceColor = 0u; - if (!computeFaceColor(hlsl::uint32_t3(primIx * 3u + 0u, - primIx * 3u + 1u, - primIx * 3u + 2u), - faceColor)) - return false; - - hlsl::float32_t3 vertex1 = {}; - hlsl::float32_t3 vertex2 = {}; - hlsl::float32_t3 vertex3 = {}; - prepareVertices(posTri[0u], posTri[1u], posTri[2u], vertex1, vertex2, - vertex3); - - hlsl::float32_t3 normal = hlsl::float32_t3(0.f, 0.f, 0.f); - hlsl::float32_t3 attrNormal = nrmTri[0u]; - if (hlsl::dot(attrNormal, attrNormal) <= 0.f) - attrNormal = nrmTri[1u]; - if (hlsl::dot(attrNormal, attrNormal) <= 0.f) - attrNormal = nrmTri[2u]; - if (hlsl::dot(attrNormal, attrNormal) > 0.f) { - if (flipHandedness) - attrNormal.x = -attrNormal.x; - normal = attrNormal; - } - - if (hlsl::dot(normal, normal) <= 0.f) - normal = computePlaneNormal(vertex1, vertex2, vertex3); - - writeRecord(normal.x, normal.y, normal.z, vertex1.x, vertex1.y, - vertex1.z, vertex2.x, vertex2.y, vertex2.z, vertex3.x, - vertex3.y, vertex3.z, faceColor); - } - } - } else if (hasFastTightPath) { - const hlsl::float32_t3* posTri = tightPositions; - for (uint32_t primIx = 0u; primIx < facenum; ++primIx, posTri += 3u) { - uint16_t faceColor = 0u; - if (!computeFaceColor(hlsl::uint32_t3(primIx * 3u + 0u, - primIx * 3u + 1u, - primIx * 3u + 2u), - faceColor)) - return false; - - hlsl::float32_t3 vertex1 = {}; - hlsl::float32_t3 vertex2 = {}; - hlsl::float32_t3 vertex3 = {}; - prepareVertices(posTri[0u], posTri[1u], posTri[2u], vertex1, vertex2, - vertex3); - const hlsl::float32_t3 normal = - computePlaneNormal(vertex1, vertex2, vertex3); - - writeRecord(normal.x, normal.y, normal.z, vertex1.x, vertex1.y, - vertex1.z, vertex2.x, vertex2.y, vertex2.z, vertex3.x, - vertex3.y, vertex3.z, faceColor); - } - } else { - if (!SGeometryWriterCommon::visitTriangleIndices( - geom, - [&](const uint32_t i0, const uint32_t i1, - const uint32_t i2) -> bool { - const hlsl::uint32_t3 idx(i0, i1, i2); - uint16_t faceColor = 0u; - if (!computeFaceColor(idx, faceColor)) - return false; - - hlsl::float32_t3 p0 = {}; - hlsl::float32_t3 p1 = {}; - hlsl::float32_t3 p2 = {}; - if (!decodePosition(idx.x, p0) || !decodePosition(idx.y, p1) || - !decodePosition(idx.z, p2)) - return false; - - hlsl::float32_t3 vertex1 = p2; - hlsl::float32_t3 vertex2 = p1; - hlsl::float32_t3 vertex3 = p0; - - if (flipHandedness) { - vertex1.x = -vertex1.x; - vertex2.x = -vertex2.x; - vertex3.x = -vertex3.x; - } - - const hlsl::float32_t3 planeNormal = - hlsl::cross(vertex2 - vertex1, vertex3 - vertex1); - const float planeNormalLen2 = - hlsl::dot(planeNormal, planeNormal); - hlsl::float32_t3 normal = hlsl::float32_t3(0.f, 0.f, 0.f); - if (!hasNormals) { - if (planeNormalLen2 > 0.f) - normal = hlsl::normalize(planeNormal); - } - - if (hasNormals) { - hlsl::float32_t3 n0 = {}; - if (!decodeNormal(idx.x, n0)) - return false; - - hlsl::float32_t3 attrNormal = n0; - if (hlsl::dot(attrNormal, attrNormal) <= 0.f) { - hlsl::float32_t3 n1 = {}; - if (!decodeNormal(idx.y, n1)) - return false; - attrNormal = n1; - } - if (hlsl::dot(attrNormal, attrNormal) <= 0.f) { - hlsl::float32_t3 n2 = {}; - if (!decodeNormal(idx.z, n2)) - return false; - attrNormal = n2; - } - - if (hlsl::dot(attrNormal, attrNormal) > 0.f) { - if (flipHandedness) - attrNormal.x = -attrNormal.x; - if (planeNormalLen2 > 0.f && - hlsl::dot(attrNormal, planeNormal) < 0.f) - attrNormal = -attrNormal; - normal = attrNormal; - } else if (planeNormalLen2 > 0.f) { - normal = hlsl::normalize(planeNormal); - } - } - - writeRecord(normal.x, normal.y, normal.z, vertex1.x, vertex1.y, - vertex1.z, vertex2.x, vertex2.y, vertex2.z, - vertex3.x, vertex3.y, vertex3.z, faceColor); - return true; - })) - return false; - } - - const bool writeOk = SInterchangeIO::writeFileWithPolicy( - context->writeContext.outputFile, context->ioPlan, output.get(), - outputSize, &context->writeTelemetry); - if (writeOk) - context->fileOffset += outputSize; - return writeOk; - } - - static bool writeMeshASCII(const asset::ICPUPolygonGeometry* geom, - Context* context) { - if (!geom) - return false; - - const auto* indexing = geom->getIndexingCallback(); - if (!indexing || indexing->degree() != 3u) - return false; - - const auto& posView = geom->getPositionView(); - if (!posView) - return false; - const auto& normalView = geom->getNormalView(); - const bool flipHandedness = !context->writeContext.params.flags.hasAnyFlag( - E_WRITER_FLAGS::EWF_MESH_IS_RIGHT_HANDED); - - const std::string name = context->writeContext.outputFile->getFileName() - .filename() - .replace_extension() - .string(); - const std::string_view solidName = name.empty() - ? std::string_view(AsciiDefaultName) - : std::string_view(name); - - if (!context->write(AsciiSolidPrefix, sizeof(AsciiSolidPrefix) - 1ull)) - return false; - - if (!context->write(solidName.data(), solidName.size())) - return false; - - if (!context->write("\n", sizeof("\n") - 1ull)) - return false; - - const uint32_t faceCount = static_cast(geom->getPrimitiveCount()); - for (uint32_t primIx = 0u; primIx < faceCount; ++primIx) { - hlsl::float32_t3 v0 = {}; - hlsl::float32_t3 v1 = {}; - hlsl::float32_t3 v2 = {}; - hlsl::uint32_t3 idx(0u); - if (!decodeTriangle(geom, indexing, posView, primIx, v0, v1, v2, &idx)) - return false; - if (!writeFaceText(v0, v1, v2, idx, normalView, flipHandedness, context)) - return false; - if (!context->write("\n", sizeof("\n") - 1ull)) - return false; - } - - if (!context->write(AsciiEndSolidPrefix, - sizeof(AsciiEndSolidPrefix) - 1ull)) - return false; - - if (!context->write(solidName.data(), solidName.size())) - return false; - - return true; - } - - static bool - writeFaceText(const hlsl::float32_t3& v1, const hlsl::float32_t3& v2, - const hlsl::float32_t3& v3, const hlsl::uint32_t3& idx, - const asset::ICPUPolygonGeometry::SDataView& normalView, - const bool flipHandedness, Context* context) { - hlsl::float32_t3 vertex1 = v3; - hlsl::float32_t3 vertex2 = v2; - hlsl::float32_t3 vertex3 = v1; - - if (flipHandedness) { - vertex1.x = -vertex1.x; - vertex2.x = -vertex2.x; - vertex3.x = -vertex3.x; - } - - const hlsl::float32_t3 planeNormal = - hlsl::cross(vertex2 - vertex1, vertex3 - vertex1); - const float planeNormalLen2 = hlsl::dot(planeNormal, planeNormal); - hlsl::float32_t3 normal = hlsl::float32_t3(0.f, 0.f, 0.f); - if (planeNormalLen2 > 0.f) - normal = hlsl::normalize(planeNormal); - - hlsl::float32_t3 attrNormal = {}; - if (decodeTriangleNormal(normalView, idx, attrNormal)) { - if (flipHandedness) - attrNormal.x = -attrNormal.x; - if (planeNormalLen2 > 0.f && hlsl::dot(attrNormal, planeNormal) < 0.f) - attrNormal = -attrNormal; - normal = attrNormal; - } - - std::array faceText = {}; - char* cursor = faceText.data(); - char* const end = faceText.data() + faceText.size(); - const std::array vertices = {vertex1, vertex2, vertex3}; - if (!appendLiteral(cursor, end, "facet normal ", - sizeof("facet normal ") - 1ull)) - return false; - if (!appendVectorAsAsciiLine(cursor, end, normal)) - return false; - if (!appendLiteral(cursor, end, " outer loop\n", - sizeof(" outer loop\n") - 1ull)) - return false; - for (const auto& vertex : vertices) - if (!appendLiteral(cursor, end, " vertex ", - sizeof(" vertex ") - 1ull) || - !appendVectorAsAsciiLine(cursor, end, vertex)) - return false; - if (!appendLiteral(cursor, end, " endloop\n", - sizeof(" endloop\n") - 1ull)) - return false; - if (!appendLiteral(cursor, end, "endfacet\n", sizeof("endfacet\n") - 1ull)) - return false; - - return context->write(faceText.data(), - static_cast(cursor - faceText.data())); - } + static bool writeMeshBinary(const asset::ICPUPolygonGeometry* geom, Context* context) + { + if (!geom || !context || !context->writeContext.outputFile) + return false; + + const auto& posView = geom->getPositionView(); + if (!posView) + return false; + + const bool flipHandedness = !context->writeContext.params.flags.hasAnyFlag(E_WRITER_FLAGS::EWF_MESH_IS_RIGHT_HANDED); + const size_t vertexCount = posView.getElementCount(); + if (vertexCount == 0ull) + return false; + + size_t faceCount = 0ull; + if (!SGeometryWriterCommon::getTriangleFaceCount(geom, faceCount)) + return false; + if (faceCount > static_cast(std::numeric_limits::max())) + return false; + const uint32_t facenum = static_cast(faceCount); + + const size_t outputSize = BinaryPrefixBytes + static_cast(facenum) * BinaryTriangleRecordBytes; + std::unique_ptr output(new (std::nothrow) uint8_t[outputSize]); + if (!output) + return false; + uint8_t* dst = output.get(); + std::memset(dst, 0, BinaryHeaderBytes); + dst += BinaryHeaderBytes; + std::memcpy(dst, &facenum, sizeof(facenum)); + dst += sizeof(facenum); + + const auto& normalView = geom->getNormalView(); + const bool hasNormals = static_cast(normalView); + const auto* const colorView = getColorView(geom, vertexCount); + const hlsl::float32_t3* const tightPositions = SGeometryWriterCommon::getTightView(posView); + const hlsl::float32_t3* const tightNormals = hasNormals ? SGeometryWriterCommon::getTightView(normalView) : nullptr; + const bool hasFastTightPath = !geom->getIndexView() && tightPositions && (!hasNormals || tightNormals); + + auto decodePosition = [&](const uint32_t ix, hlsl::float32_t3& out) -> bool { + if (tightPositions) + { + out = tightPositions[ix]; + return true; + } + return posView.decodeElement(ix, out); + }; + auto decodeNormal = [&](const uint32_t ix, hlsl::float32_t3& out) -> bool { + if (!hasNormals) + return false; + if (tightNormals) + { + out = tightNormals[ix]; + return true; + } + return normalView.decodeElement(ix, out); + }; + auto computeFaceColor = [&](const hlsl::uint32_t3& idx, uint16_t& outColor) -> bool { + outColor = 0u; + if (!colorView) + return true; + hlsl::uint32_t3 color(0u); + if (!decodeColorB8G8R8A8(*colorView, idx.x, color.x)) + return false; + if (!decodeColorB8G8R8A8(*colorView, idx.y, color.y)) + return false; + if (!decodeColorB8G8R8A8(*colorView, idx.z, color.z)) + return false; + std::array, 3> rgba = {}; + decodeColorUnitRGBAFromB8G8R8A8(color.x, rgba[0].data()); + decodeColorUnitRGBAFromB8G8R8A8(color.y, rgba[1].data()); + decodeColorUnitRGBAFromB8G8R8A8(color.z, rgba[2].data()); + const std::array rgbaAvg = {(rgba[0][0] + rgba[1][0] + rgba[2][0]) / 3.0, (rgba[0][1] + rgba[1][1] + rgba[2][1]) / 3.0, (rgba[0][2] + rgba[1][2] + rgba[2][2]) / 3.0, 1.0}; + uint32_t avgColor = 0u; + encodePixels(&avgColor, rgbaAvg.data()); + outColor = packViscamColorFromB8G8R8A8(avgColor); + return true; + }; + auto writeRecord = [&dst](const hlsl::float32_t3& normal, const hlsl::float32_t3& vertex1, const hlsl::float32_t3& vertex2, const hlsl::float32_t3& vertex3, const uint16_t attribute) -> void { + const float payload[BinaryTriangleFloatCount] = {normal.x, normal.y, normal.z, vertex1.x, vertex1.y, vertex1.z, vertex2.x, vertex2.y, vertex2.z, vertex3.x, vertex3.y, vertex3.z}; + std::memcpy(dst, payload, BinaryTriangleFloatBytes); + dst += BinaryTriangleFloatBytes; + std::memcpy(dst, &attribute, BinaryTriangleAttributeBytes); + dst += BinaryTriangleAttributeBytes; + }; + auto emitTriangle = [&](const hlsl::float32_t3& p0, const hlsl::float32_t3& p1, const hlsl::float32_t3& p2, const hlsl::uint32_t3& idx, const hlsl::float32_t3* const attrNormals, const uint32_t attrNormalCount, const bool alignToPlane) -> bool { + uint16_t faceColor = 0u; + if (!computeFaceColor(idx, faceColor)) + return false; + TriangleData triangle = {}; + buildTriangle(p0, p1, p2, attrNormals, attrNormalCount, flipHandedness, alignToPlane, triangle); + writeRecord(triangle.normal, triangle.vertex1, triangle.vertex2, triangle.vertex3, faceColor); + return true; + }; + + if (hasFastTightPath) + { + const hlsl::float32_t3* posTri = tightPositions; + const hlsl::float32_t3* nrmTri = tightNormals; + for (uint32_t primIx = 0u; primIx < facenum; ++primIx, posTri += 3u) + { + const hlsl::uint32_t3 idx(primIx * 3u + 0u, primIx * 3u + 1u, primIx * 3u + 2u); + if (!emitTriangle(posTri[0u], posTri[1u], posTri[2u], idx, nrmTri, hasNormals ? 3u : 0u, false)) + return false; + if (nrmTri) + nrmTri += 3u; + } + } + else if (!SGeometryWriterCommon::visitTriangleIndices(geom, [&](const uint32_t i0, const uint32_t i1, const uint32_t i2) -> bool { + const hlsl::uint32_t3 idx(i0, i1, i2); + hlsl::float32_t3 p0 = {}; + hlsl::float32_t3 p1 = {}; + hlsl::float32_t3 p2 = {}; + if (!decodePosition(idx.x, p0) || !decodePosition(idx.y, p1) || !decodePosition(idx.z, p2)) + return false; + + hlsl::float32_t3 normals[3] = {}; + if (hasNormals) + { + if (!decodeNormal(idx.x, normals[0]) || !decodeNormal(idx.y, normals[1]) || !decodeNormal(idx.z, normals[2])) + return false; + } + return emitTriangle(p0, p1, p2, idx, hasNormals ? normals : nullptr, hasNormals ? 3u : 0u, true); + })) + return false; + + const bool writeOk = SInterchangeIO::writeFileWithPolicy(context->writeContext.outputFile, context->ioPlan, output.get(), outputSize, &context->writeTelemetry); + if (writeOk) + context->fileOffset += outputSize; + return writeOk; + } + + static bool writeMeshASCII(const asset::ICPUPolygonGeometry* geom, Context* context) + { + if (!geom) + return false; + + const auto* indexing = geom->getIndexingCallback(); + if (!indexing || indexing->degree() != 3u) + return false; + + const auto& posView = geom->getPositionView(); + if (!posView) + return false; + const auto& normalView = geom->getNormalView(); + const bool flipHandedness = !context->writeContext.params.flags.hasAnyFlag(E_WRITER_FLAGS::EWF_MESH_IS_RIGHT_HANDED); + + const std::string name = context->writeContext.outputFile->getFileName().filename().replace_extension().string(); + const std::string_view solidName = name.empty() ? std::string_view(AsciiDefaultName) : std::string_view(name); + if (!context->write(AsciiSolidPrefix, sizeof(AsciiSolidPrefix) - 1ull)) + return false; + if (!context->write(solidName.data(), solidName.size())) + return false; + if (!context->write("\n", sizeof("\n") - 1ull)) + return false; + + const uint32_t faceCount = static_cast(geom->getPrimitiveCount()); + for (uint32_t primIx = 0u; primIx < faceCount; ++primIx) + { + hlsl::float32_t3 v0 = {}; + hlsl::float32_t3 v1 = {}; + hlsl::float32_t3 v2 = {}; + hlsl::uint32_t3 idx(0u); + if (!decodeTriangle(geom, indexing, posView, primIx, v0, v1, v2, &idx)) + return false; + if (!writeFaceText(v0, v1, v2, idx, normalView, flipHandedness, context)) + return false; + if (!context->write("\n", sizeof("\n") - 1ull)) + return false; + } + + if (!context->write(AsciiEndSolidPrefix, sizeof(AsciiEndSolidPrefix) - 1ull)) + return false; + if (!context->write(solidName.data(), solidName.size())) + return false; + return true; + } + + static bool writeFaceText(const hlsl::float32_t3& v1, const hlsl::float32_t3& v2, const hlsl::float32_t3& v3, const hlsl::uint32_t3& idx, const asset::ICPUPolygonGeometry::SDataView& normalView, const bool flipHandedness, Context* context) + { + hlsl::float32_t3 attrNormal = {}; + TriangleData triangle = {}; + const hlsl::float32_t3* const attrNormalPtr = decodeTriangleNormal(normalView, idx, attrNormal) ? &attrNormal : nullptr; + buildTriangle(v1, v2, v3, attrNormalPtr, attrNormalPtr ? 1u : 0u, flipHandedness, true, triangle); + + std::array faceText = {}; + char* cursor = faceText.data(); + char* const end = faceText.data() + faceText.size(); + const std::array vertices = {triangle.vertex1, triangle.vertex2, triangle.vertex3}; + if (!appendLiteral(cursor, end, "facet normal ", sizeof("facet normal ") - 1ull)) + return false; + if (!appendVectorAsAsciiLine(cursor, end, triangle.normal)) + return false; + if (!appendLiteral(cursor, end, " outer loop\n", sizeof(" outer loop\n") - 1ull)) + return false; + for (const auto& vertex : vertices) + { + if (!appendLiteral(cursor, end, " vertex ", sizeof(" vertex ") - 1ull)) + return false; + if (!appendVectorAsAsciiLine(cursor, end, vertex)) + return false; + } + if (!appendLiteral(cursor, end, " endloop\n", sizeof(" endloop\n") - 1ull)) + return false; + if (!appendLiteral(cursor, end, "endfacet\n", sizeof("endfacet\n") - 1ull)) + return false; + return context->write(faceText.data(), static_cast(cursor - faceText.data())); + } }; } diff --git a/src/nbl/asset/interchange/SGeometryAttributeEmit.h b/src/nbl/asset/interchange/SGeometryAttributeEmit.h new file mode 100644 index 0000000000..a326ad8ea8 --- /dev/null +++ b/src/nbl/asset/interchange/SGeometryAttributeEmit.h @@ -0,0 +1,41 @@ +// Internal src-only header. +// Do not include from public headers. +#ifndef _NBL_ASSET_S_GEOMETRY_ATTRIBUTE_EMIT_H_INCLUDED_ +#define _NBL_ASSET_S_GEOMETRY_ATTRIBUTE_EMIT_H_INCLUDED_ + +#include "nbl/asset/interchange/SGeometryViewDecode.h" + +#include +#include + + +namespace nbl::asset +{ + +class SGeometryAttributeEmit +{ + public: + template + static inline bool emit(Sink& sink, const SGeometryViewDecode::Prepared& view, const size_t ix, const uint32_t componentCount, const bool flipVectors) + { + std::array decoded = {}; + if (!view.decode(ix, decoded)) + return false; + for (uint32_t c = 0u; c < componentCount; ++c) + { + OutT value = decoded[c]; + if constexpr (std::is_signed_v || std::is_floating_point_v) + { + if (flipVectors && c == 0u) + value = -value; + } + if (!sink.append(value)) + return false; + } + return true; + } +}; + +} + +#endif diff --git a/src/nbl/asset/interchange/impl/SContentHashBuild.h b/src/nbl/asset/interchange/impl/SContentHashBuild.h new file mode 100644 index 0000000000..a9d77b3943 --- /dev/null +++ b/src/nbl/asset/interchange/impl/SContentHashBuild.h @@ -0,0 +1,72 @@ +// Internal src-only header. +// Do not include from public headers. +#ifndef _NBL_ASSET_IMPL_S_CONTENT_HASH_BUILD_H_INCLUDED_ +#define _NBL_ASSET_IMPL_S_CONTENT_HASH_BUILD_H_INCLUDED_ + +#include "nbl/core/declarations.h" +#include "nbl/asset/ICPUBuffer.h" + +#include + + +namespace nbl::asset::impl +{ + + class SContentHashBuild +{ + public: + bool enabled = false; + bool inlineHash = false; + core::vector> hashedBuffers = {}; + std::jthread deferredThread = {}; + + static inline SContentHashBuild create(const bool enabled, const bool inlineHash) + { + return {.enabled = enabled, .inlineHash = inlineHash}; + } + + inline bool hashesInline() const + { + return enabled && inlineHash; + } + + inline bool hashesDeferred() const + { + return enabled && !inlineHash; + } + + inline void hashNow(ICPUBuffer* const buffer) + { + if (!hashesInline() || !buffer) + return; + if (buffer->getContentHash() != IPreHashed::INVALID_HASH) + return; + for (const auto& hashed : hashedBuffers) + if (hashed.get() == buffer) + return; + buffer->setContentHash(buffer->computeContentHash()); + hashedBuffers.push_back(core::smart_refctd_ptr(buffer)); + } + + inline void tryDefer(ICPUBuffer* const buffer) + { + if (!hashesDeferred() || !buffer) + return; + if (deferredThread.joinable()) + return; + if (buffer->getContentHash() != IPreHashed::INVALID_HASH) + return; + auto keepAlive = core::smart_refctd_ptr(buffer); + deferredThread = std::jthread([buffer = std::move(keepAlive)]() mutable {buffer->setContentHash(buffer->computeContentHash());}); + } + + inline void wait() + { + if (deferredThread.joinable()) + deferredThread.join(); + } +}; + +} + +#endif diff --git a/src/nbl/asset/interchange/impl/SLoadSession.h b/src/nbl/asset/interchange/impl/SLoadSession.h new file mode 100644 index 0000000000..0d368e0119 --- /dev/null +++ b/src/nbl/asset/interchange/impl/SLoadSession.h @@ -0,0 +1,74 @@ +// Internal src-only header. +// Do not include from public headers. +#ifndef _NBL_ASSET_IMPL_S_LOAD_SESSION_H_INCLUDED_ +#define _NBL_ASSET_IMPL_S_LOAD_SESSION_H_INCLUDED_ + +#include "SFileAccess.h" +#include "SIODiagnostics.h" + +#include + + +namespace nbl::asset::impl +{ + +class SLoadSession +{ + public: + system::IFile* file = nullptr; + const SFileIOPolicy* requestedPolicy = nullptr; + SResolvedFileIOPolicy ioPlan = {}; + uint64_t payloadBytes = 0ull; + const char* owner = nullptr; + std::string fileName = {}; + + template + static inline bool begin(Logger& logger, const char* const owner, system::IFile* file, const SFileIOPolicy& ioPolicy, const uint64_t payloadBytes, const bool sizeKnown, SLoadSession& out) + { + out = {}; + if (!file) + return false; + + out.file = file; + out.requestedPolicy = &ioPolicy; + out.ioPlan = SFileAccess::resolvePlan(ioPolicy, payloadBytes, sizeKnown, file); + out.payloadBytes = payloadBytes; + out.owner = owner; + out.fileName = file->getFileName().string(); + return !SIODiagnostics::logInvalidPlan(logger, owner, out.fileName.c_str(), out.ioPlan); + } + + inline bool isWholeFile() const + { + return ioPlan.strategy == SResolvedFileIOPolicy::Strategy::WholeFile; + } + + inline const uint8_t* mappedPointer() const + { + if (!file || !isWholeFile()) + return nullptr; + return reinterpret_cast(static_cast(file)->getMappedPointer()); + } + + inline const uint8_t* readRange(const size_t offset, const size_t bytes, core::vector& storage, SFileReadTelemetry* const ioTelemetry = nullptr, const bool zeroTerminate = false) const + { + return SFileAccess::readRange(file, offset, bytes, storage, ioPlan, ioTelemetry, zeroTerminate); + } + + inline const uint8_t* mapOrReadWholeFile(core::vector& storage, SFileReadTelemetry* const ioTelemetry = nullptr, bool* const wasMapped = nullptr, const bool zeroTerminate = false) const + { + return SFileAccess::mapOrReadWholeFile(file, static_cast(payloadBytes), storage, ioPlan, ioTelemetry, wasMapped, zeroTerminate); + } + + template + inline void logTinyIO(Logger& logger, const Telemetry& telemetry, const char* const opName = "reads") const + { + if (!requestedPolicy) + return; + SIODiagnostics::logTinyIO(logger, owner, fileName.c_str(), telemetry, payloadBytes, *requestedPolicy, opName); + } +}; + +} + +#endif From dc7282e7509055fce6b94e48b3453bc41f13b99c Mon Sep 17 00:00:00 2001 From: Arkadiusz Lachowicz Date: Sun, 8 Mar 2026 14:35:35 +0100 Subject: [PATCH 076/118] Reduce remaining loader duplication --- .../asset/interchange/CPLYMeshFileLoader.cpp | 120 ++++++------ src/nbl/asset/interchange/CPLYMeshWriter.cpp | 61 +++--- .../asset/interchange/CSTLMeshFileLoader.cpp | 180 ++++++++---------- .../asset/interchange/SGeometryViewDecode.h | 50 ++--- 4 files changed, 174 insertions(+), 237 deletions(-) diff --git a/src/nbl/asset/interchange/CPLYMeshFileLoader.cpp b/src/nbl/asset/interchange/CPLYMeshFileLoader.cpp index e696e1f698..afa8a3bb60 100644 --- a/src/nbl/asset/interchange/CPLYMeshFileLoader.cpp +++ b/src/nbl/asset/interchange/CPLYMeshFileLoader.cpp @@ -1612,6 +1612,41 @@ SAssetBundle CPLYMeshFileLoader::loadAsset( // loop through each of the elements bool verticesProcessed = false; + const std::string fileName = _file->getFileName().string(); + auto logMalformedElement = [&](const char* const elementName) -> void { + _params.logger.log("PLY %s fast path failed on malformed data for %s", system::ILogger::ELL_ERROR, elementName, fileName.c_str()); + }; + auto skipUnknownElement = [&](const Parse::Context::SElement& el) -> bool { + if (ctx.IsBinaryFile && el.KnownSize) { + const uint64_t bytesToSkip64 = static_cast(el.KnownSize) * static_cast(el.Count); + if (bytesToSkip64 > static_cast(std::numeric_limits::max())) + return false; + ctx.moveForward(static_cast(bytesToSkip64)); + } else { + for (size_t j = 0; j < el.Count; ++j) + el.skipElement(ctx); + } + return true; + }; + auto readFaceElement = [&](const Parse::Context::SElement& el) -> bool { + const uint32_t vertexCount32 = vertCount <= static_cast(std::numeric_limits::max()) ? static_cast(vertCount) : 0u; + const auto fastFaceResult = ctx.readFaceElementFast(el, indices, maxIndexRead, faceCount, vertexCount32, contentHashBuild.hashesDeferred(), precomputedIndexHash); + if (fastFaceResult == Parse::Context::EFastFaceReadResult::Success) { + ++fastFaceElementCount; + return true; + } + if (fastFaceResult == Parse::Context::EFastFaceReadResult::NotApplicable) { + indices.reserve(indices.size() + el.Count * 3u); + for (size_t j = 0; j < el.Count; ++j) { + if (!ctx.readFace(el, indices, maxIndexRead, vertexCount32)) + return false; + ++faceCount; + } + return true; + } + logMalformedElement("face"); + return false; + }; for (uint32_t i = 0; i < ctx.ElementList.size(); ++i) { auto& el = ctx.ElementList[i]; @@ -1794,39 +1829,23 @@ SAssetBundle CPLYMeshFileLoader::loadAsset( .dstFmt = componentFormat}); } }; - if (posView.format != EF_UNKNOWN) { - auto beginIx = ctx.vertAttrIts.size(); - setFinalFormat(posView); - auto view = createView(posView.format, el.Count); - for (const auto size = ctx.vertAttrIts.size(); beginIx != size; - beginIx++) - ctx.vertAttrIts[beginIx].ptr += - ptrdiff_t(view.src.buffer->getPointer()) + view.src.offset; - geometry->setPositionView(std::move(view)); - } - if (normalView.format != EF_UNKNOWN) { - auto beginIx = ctx.vertAttrIts.size(); - setFinalFormat(normalView); - auto view = createView(normalView.format, el.Count); - for (const auto size = ctx.vertAttrIts.size(); beginIx != size; - beginIx++) - ctx.vertAttrIts[beginIx].ptr += - ptrdiff_t(view.src.buffer->getPointer()) + view.src.offset; - geometry->setNormalView(std::move(view)); - } - if (uvView.format != EF_UNKNOWN) { + auto attachStructuredView = [&](ICPUPolygonGeometry::SDataViewBase& baseView, auto&& setter) -> void { + if (baseView.format == EF_UNKNOWN) + return; auto beginIx = ctx.vertAttrIts.size(); - setFinalFormat(uvView); - auto view = createView(uvView.format, el.Count); - for (const auto size = ctx.vertAttrIts.size(); beginIx != size; - beginIx++) - ctx.vertAttrIts[beginIx].ptr += - ptrdiff_t(view.src.buffer->getPointer()) + view.src.offset; + setFinalFormat(baseView); + auto view = createView(baseView.format, el.Count); + for (const auto size = ctx.vertAttrIts.size(); beginIx != size; ++beginIx) + ctx.vertAttrIts[beginIx].ptr += ptrdiff_t(view.src.buffer->getPointer()) + view.src.offset; + setter(std::move(view)); + }; + attachStructuredView(posView, [&](auto view) { geometry->setPositionView(std::move(view)); }); + attachStructuredView(normalView, [&](auto view) { geometry->setNormalView(std::move(view)); }); + attachStructuredView(uvView, [&](auto view) { auto* const auxViews = geometry->getAuxAttributeViews(); auxViews->resize(SPLYPolygonGeometryAuxLayout::UV0 + 1u); - auxViews->operator[](SPLYPolygonGeometryAuxLayout::UV0) = - std::move(view); - } + auxViews->operator[](SPLYPolygonGeometryAuxLayout::UV0) = std::move(view); + }); // for (auto& view : extraViews) ctx.vertAttrIts.push_back( @@ -1844,51 +1863,18 @@ SAssetBundle CPLYMeshFileLoader::loadAsset( Parse::Context::EFastVertexReadResult::NotApplicable) { ctx.readVertex(_params, el); } else { - _params.logger.log( - "PLY vertex fast path failed on malformed data for %s", - system::ILogger::ELL_ERROR, _file->getFileName().string().c_str()); + logMalformedElement("vertex"); return {}; } visitVertexAttributeViews(hashViewBufferIfNeeded); tryLaunchDeferredHash(geometry->getPositionView()); verticesProcessed = true; } else if (el.Name == "face") { - const uint32_t vertexCount32 = - vertCount <= static_cast(std::numeric_limits::max()) - ? static_cast(vertCount) - : 0u; - const auto fastFaceResult = ctx.readFaceElementFast( - el, indices, maxIndexRead, faceCount, vertexCount32, - contentHashBuild.hashesDeferred(), precomputedIndexHash); - if (fastFaceResult == Parse::Context::EFastFaceReadResult::Success) { - ++fastFaceElementCount; - } else if (fastFaceResult == - Parse::Context::EFastFaceReadResult::NotApplicable) { - indices.reserve(indices.size() + el.Count * 3u); - for (size_t j = 0; j < el.Count; ++j) { - if (!ctx.readFace(el, indices, maxIndexRead, vertexCount32)) - return {}; - ++faceCount; - } - } else { - _params.logger.log("PLY face fast path failed on malformed data for %s", - system::ILogger::ELL_ERROR, - _file->getFileName().string().c_str()); + if (!readFaceElement(el)) return {}; - } } else { - // skip these elements - if (ctx.IsBinaryFile && el.KnownSize) { - const uint64_t bytesToSkip64 = static_cast(el.KnownSize) * - static_cast(el.Count); - if (bytesToSkip64 > - static_cast(std::numeric_limits::max())) - return {}; - ctx.moveForward(static_cast(bytesToSkip64)); - } else { - for (size_t j = 0; j < el.Count; ++j) - el.skipElement(ctx); - } + if (!skipUnknownElement(el)) + return {}; } } diff --git a/src/nbl/asset/interchange/CPLYMeshWriter.cpp b/src/nbl/asset/interchange/CPLYMeshWriter.cpp index 25ca52e45d..556a8e5d0a 100644 --- a/src/nbl/asset/interchange/CPLYMeshWriter.cpp +++ b/src/nbl/asset/interchange/CPLYMeshWriter.cpp @@ -277,6 +277,22 @@ struct Parse return SGeometryAttributeEmit::emit(sink, view.stored, ix, view.components, view.flipVectors); } + template + static inline void prepareSemantic(PreparedView& view, const ICPUPolygonGeometry::SDataView& src, const bool flipVectors) + { + view.flipVectors = flipVectors; + view.semantic = SGeometryViewDecode::prepare(src); + view.emit = &emitPrepared; + } + + template + static inline void prepareStored(PreparedView& view, const ICPUPolygonGeometry::SDataView& src, const bool flipVectors) + { + view.flipVectors = flipVectors; + view.stored = SGeometryViewDecode::prepare(src); + view.emit = &emitPrepared; + } + static PreparedView create(const ICPUPolygonGeometry::SDataView* view, const uint32_t components, const ScalarType scalarType, const bool flipVectors) { PreparedView retval = {.components = components}; @@ -285,43 +301,14 @@ struct Parse switch (scalarType) { - case ScalarType::Float64: - retval.flipVectors = flipVectors; - retval.semantic = SGeometryViewDecode::prepare(*view); - retval.emit = &emitPrepared; - break; - case ScalarType::Float32: - retval.flipVectors = flipVectors; - retval.semantic = SGeometryViewDecode::prepare(*view); - retval.emit = &emitPrepared; - break; - case ScalarType::Int8: - retval.flipVectors = flipVectors; - retval.stored = SGeometryViewDecode::prepare(*view); - retval.emit = &emitPrepared; - break; - case ScalarType::UInt8: - retval.stored = SGeometryViewDecode::prepare(*view); - retval.emit = &emitPrepared; - break; - case ScalarType::Int16: - retval.flipVectors = flipVectors; - retval.stored = SGeometryViewDecode::prepare(*view); - retval.emit = &emitPrepared; - break; - case ScalarType::UInt16: - retval.stored = SGeometryViewDecode::prepare(*view); - retval.emit = &emitPrepared; - break; - case ScalarType::Int32: - retval.flipVectors = flipVectors; - retval.stored = SGeometryViewDecode::prepare(*view); - retval.emit = &emitPrepared; - break; - case ScalarType::UInt32: - retval.stored = SGeometryViewDecode::prepare(*view); - retval.emit = &emitPrepared; - break; + case ScalarType::Float64: prepareSemantic(retval, *view, flipVectors); break; + case ScalarType::Float32: prepareSemantic(retval, *view, flipVectors); break; + case ScalarType::Int8: prepareStored(retval, *view, flipVectors); break; + case ScalarType::UInt8: prepareStored(retval, *view, false); break; + case ScalarType::Int16: prepareStored(retval, *view, flipVectors); break; + case ScalarType::UInt16: prepareStored(retval, *view, false); break; + case ScalarType::Int32: prepareStored(retval, *view, flipVectors); break; + case ScalarType::UInt32: prepareStored(retval, *view, false); break; } return retval; } diff --git a/src/nbl/asset/interchange/CSTLMeshFileLoader.cpp b/src/nbl/asset/interchange/CSTLMeshFileLoader.cpp index 0bd1e9c112..3241e56380 100644 --- a/src/nbl/asset/interchange/CSTLMeshFileLoader.cpp +++ b/src/nbl/asset/interchange/CSTLMeshFileLoader.cpp @@ -34,6 +34,14 @@ struct Parse { using Common = impl::TextParse; + struct LayoutProbe + { + bool hasPrefix = false; + bool startsWithSolid = false; + bool binaryBySize = false; + uint32_t triangleCount = 0u; + }; + static hlsl::float32_t3 resolveStoredNormal(const hlsl::float32_t3& fileNormal) { const float fileLen2 = hlsl::dot(fileNormal, fileNormal); @@ -73,6 +81,37 @@ struct Parse static constexpr size_t FloatChannelsPerVertex = 3ull; }; + static bool probeLayout(system::IFile* file, const size_t fileSize, const uint8_t* const wholeFileData, SFileReadTelemetry* const ioTelemetry, LayoutProbe& out) + { + out = {}; + if (!file || fileSize < Context::TextProbeBytes) + return false; + + if (fileSize >= Context::BinaryPrefixBytes) + { + std::array prefix = {}; + out.hasPrefix = wholeFileData ? true : SInterchangeIO::readFileExact(file, prefix.data(), 0ull, Context::BinaryPrefixBytes, ioTelemetry); + if (out.hasPrefix) + { + if (wholeFileData) + std::memcpy(prefix.data(), wholeFileData, Context::BinaryPrefixBytes); + out.startsWithSolid = (std::memcmp(prefix.data(), "solid ", Context::TextProbeBytes) == 0); + std::memcpy(&out.triangleCount, prefix.data() + Context::BinaryHeaderBytes, sizeof(out.triangleCount)); + const uint64_t expectedSize = Context::BinaryPrefixBytes + static_cast(out.triangleCount) * Context::TriangleRecordBytes; + out.binaryBySize = (expectedSize == fileSize); + return true; + } + } + + char header[Context::TextProbeBytes] = {}; + if (wholeFileData) + std::memcpy(header, wholeFileData, sizeof(header)); + else if (!SInterchangeIO::readFileExact(file, header, 0ull, sizeof(header), ioTelemetry)) + return false; + out.startsWithSolid = (std::strncmp(header, "solid ", Context::TextProbeBytes) == 0); + return true; + } + class AsciiParser { public: @@ -186,88 +225,39 @@ SAssetBundle CSTLMeshFileLoader::loadAsset(system::IFile* _file, const IAssetLoa return {}; } - bool binary = false; - bool hasBinaryTriCountFromDetect = false; - uint32_t binaryTriCountFromDetect = 0u; - { - std::array prefix = {}; - bool hasPrefix = false; - if (wholeFileData && filesize >= Context::BinaryPrefixBytes) { - std::memcpy(prefix.data(), wholeFileData, Context::BinaryPrefixBytes); - hasPrefix = true; - } else { - hasPrefix = filesize >= Context::BinaryPrefixBytes && - SInterchangeIO::readFileExact( - context.inner.mainFile, prefix.data(), 0ull, - Context::BinaryPrefixBytes, &context.ioTelemetry); - } - bool startsWithSolid = false; - if (hasPrefix) { - startsWithSolid = - (std::memcmp(prefix.data(), "solid ", Context::TextProbeBytes) == 0); - } else { - char header[Context::TextProbeBytes] = {}; - if (wholeFileData) - std::memcpy(header, wholeFileData, sizeof(header)); - else if (!SInterchangeIO::readFileExact(context.inner.mainFile, header, - 0ull, sizeof(header), - &context.ioTelemetry)) - return {}; - startsWithSolid = - (std::strncmp(header, "solid ", Context::TextProbeBytes) == 0); - } - - bool binaryBySize = false; - if (hasPrefix) { - uint32_t triCount = 0u; - std::memcpy(&triCount, prefix.data() + Context::BinaryHeaderBytes, - sizeof(triCount)); - binaryTriCountFromDetect = triCount; - hasBinaryTriCountFromDetect = true; - const uint64_t expectedSize = - Context::BinaryPrefixBytes + - static_cast(triCount) * Context::TriangleRecordBytes; - binaryBySize = (expectedSize == filesize); - } - - if (binaryBySize) - binary = true; - else if (!startsWithSolid) - binary = true; - else - binary = false; - } - - auto geometry = core::make_smart_refctd_ptr(); - geometry->setIndexing(IPolygonGeometryBase::TriangleList()); - hlsl::shapes::util::AABBAccumulator3 parsedAABB = - hlsl::shapes::util::createAABBAccumulator(); - uint64_t vertexCount = 0ull; - - if (binary) { - parsePath = "binary_fast"; - if (filesize < Context::BinaryPrefixBytes) - return {}; - - uint32_t triangleCount32 = binaryTriCountFromDetect; - if (!hasBinaryTriCountFromDetect) { - if (!SInterchangeIO::readFileExact( - context.inner.mainFile, &triangleCount32, - Context::BinaryHeaderBytes, sizeof(triangleCount32), - &context.ioTelemetry)) - return {}; - } + Parse::LayoutProbe layout = {}; + if (!Parse::probeLayout(context.inner.mainFile, filesize, wholeFileData, &context.ioTelemetry, layout)) + return {}; + const bool binary = layout.binaryBySize || !layout.startsWithSolid; + const bool hasBinaryTriCountFromDetect = layout.hasPrefix; + const uint32_t binaryTriCountFromDetect = layout.triangleCount; + + auto geometry = core::make_smart_refctd_ptr(); + geometry->setIndexing(IPolygonGeometryBase::TriangleList()); + hlsl::shapes::util::AABBAccumulator3 parsedAABB = hlsl::shapes::util::createAABBAccumulator(); + uint64_t vertexCount = 0ull; + + if (binary) { + parsePath = "binary_fast"; + if (filesize < Context::BinaryPrefixBytes) + return {}; - triangleCount = triangleCount32; - const size_t dataSize = - static_cast(triangleCount) * Context::TriangleRecordBytes; - const size_t expectedSize = Context::BinaryPrefixBytes + dataSize; - if (filesize < expectedSize) - return {}; + uint32_t triangleCount32 = binaryTriCountFromDetect; + if (!hasBinaryTriCountFromDetect) + { + if (!SInterchangeIO::readFileExact(context.inner.mainFile, &triangleCount32, Context::BinaryHeaderBytes, sizeof(triangleCount32), &context.ioTelemetry)) + return {}; + } + + triangleCount = triangleCount32; + const size_t dataSize = static_cast(triangleCount) * Context::TriangleRecordBytes; + const size_t expectedSize = Context::BinaryPrefixBytes + dataSize; + if (filesize < expectedSize) + return {}; - const uint8_t* payloadData = wholeFileData ? (wholeFileData + Context::BinaryPrefixBytes) : loadSession.readRange(Context::BinaryPrefixBytes, dataSize, wholeFilePayload, &context.ioTelemetry); - if (!payloadData) - return {}; + const uint8_t* payloadData = wholeFileData ? (wholeFileData + Context::BinaryPrefixBytes) : loadSession.readRange(Context::BinaryPrefixBytes, dataSize, wholeFilePayload, &context.ioTelemetry); + if (!payloadData) + return {}; vertexCount = triangleCount * Context::VerticesPerTriangle; const size_t vertexCountSizeT = static_cast(vertexCount); @@ -756,31 +746,15 @@ SAssetBundle CSTLMeshFileLoader::loadAsset(system::IFile* _file, const IAssetLoa bool CSTLMeshFileLoader::isALoadableFileFormat( system::IFile* _file, const system::logger_opt_ptr) const { - using Context = Parse::Context; - - if (!_file || _file->getSize() <= Context::TextProbeBytes) - return false; - - const size_t fileSize = _file->getSize(); - if (fileSize < Context::BinaryPrefixBytes) { - char header[Context::TextProbeBytes] = {}; - if (!SInterchangeIO::readFileExact(_file, header, 0ull, sizeof(header))) - return false; - return std::strncmp(header, "solid ", Context::TextProbeBytes) == 0; - } - - std::array prefix = {}; - if (!SInterchangeIO::readFileExact(_file, prefix.data(), 0ull, prefix.size())) - return false; + using Context = Parse::Context; - uint32_t triangleCount = 0u; - std::memcpy(&triangleCount, prefix.data() + Context::BinaryHeaderBytes, - sizeof(triangleCount)); - if (std::memcmp(prefix.data(), "solid ", Context::TextProbeBytes) == 0) - return true; + if (!_file || _file->getSize() <= Context::TextProbeBytes) + return false; - return fileSize == (Context::TriangleRecordBytes * triangleCount + - Context::BinaryPrefixBytes); + Parse::LayoutProbe layout = {}; + if (!Parse::probeLayout(_file, _file->getSize(), nullptr, nullptr, layout)) + return false; + return layout.startsWithSolid || layout.binaryBySize; } } diff --git a/src/nbl/asset/interchange/SGeometryViewDecode.h b/src/nbl/asset/interchange/SGeometryViewDecode.h index 6c4c2c024e..1cec082565 100644 --- a/src/nbl/asset/interchange/SGeometryViewDecode.h +++ b/src/nbl/asset/interchange/SGeometryViewDecode.h @@ -90,37 +90,8 @@ class SGeometryViewDecode } private: - template requires hlsl::concepts::Vector - static inline bool decodePrepared(const Prepared& prepared, const size_t ix, V& out) - { - using scalar_t = typename hlsl::vector_traits::scalar_type; - constexpr uint32_t Dimension = hlsl::vector_traits::Dimension; - if (!prepared || Dimension == 0u) - return false; - - using storage_t = std::conditional_t, hlsl::float64_t, std::conditional_t, int64_t, uint64_t>>; - std::array tmp = {}; - const void* srcArr[4] = {prepared.data + ix * prepared.stride, nullptr}; - if (!decodePixels(prepared.format, srcArr, tmp.data(), 0u, 0u)) - return false; - - const uint32_t componentCount = std::min({prepared.channels, Dimension, 4u}); - if constexpr (Mode == EMode::Semantic && std::is_floating_point_v) - { - if (prepared.normalized) - { - for (uint32_t i = 0u; i < componentCount; ++i) - tmp[i] = static_cast(tmp[i] * (prepared.range.maxVx[i] - prepared.range.minVx[i]) + prepared.range.minVx[i]); - } - } - - for (uint32_t i = 0u; i < componentCount; ++i) - out[i] = static_cast(tmp[i]); - return true; - } - template - static inline bool decodePrepared(const Prepared& prepared, const size_t ix, T* out, const uint32_t outDim) + static inline bool decodePreparedComponents(const Prepared& prepared, const size_t ix, T* out, const uint32_t outDim) { if (!prepared || !out || outDim == 0u) return false; @@ -145,6 +116,25 @@ class SGeometryViewDecode out[i] = static_cast(tmp[i]); return true; } + + template requires hlsl::concepts::Vector + static inline bool decodePrepared(const Prepared& prepared, const size_t ix, V& out) + { + using scalar_t = typename hlsl::vector_traits::scalar_type; + constexpr uint32_t Dimension = hlsl::vector_traits::Dimension; + std::array tmp = {}; + if (!decodePreparedComponents(prepared, ix, tmp.data(), Dimension)) + return false; + for (uint32_t i = 0u; i < Dimension; ++i) + out[i] = tmp[i]; + return true; + } + + template + static inline bool decodePrepared(const Prepared& prepared, const size_t ix, T* out, const uint32_t outDim) + { + return decodePreparedComponents(prepared, ix, out, outDim); + } }; } From 60e9b5c94f648af450b017eac9bfa5bb8516aee9 Mon Sep 17 00:00:00 2001 From: Arkadiusz Lachowicz Date: Sun, 8 Mar 2026 14:39:17 +0100 Subject: [PATCH 077/118] Trim parser boilerplate --- .../asset/interchange/COBJMeshFileLoader.cpp | 27 ++-- .../asset/interchange/CPLYMeshFileLoader.cpp | 143 +++--------------- 2 files changed, 33 insertions(+), 137 deletions(-) diff --git a/src/nbl/asset/interchange/COBJMeshFileLoader.cpp b/src/nbl/asset/interchange/COBJMeshFileLoader.cpp index 26a56a1bc5..e32e89cce4 100644 --- a/src/nbl/asset/interchange/COBJMeshFileLoader.cpp +++ b/src/nbl/asset/interchange/COBJMeshFileLoader.cpp @@ -708,6 +708,12 @@ asset::SAssetBundle COBJMeshFileLoader::loadAsset( } return false; }; + auto acquireTriangleCorners = [&](auto&& acquire, const std::array& triIdx, hlsl::uint32_t3& cornerIx) -> bool { + return acquire(triIdx[0], cornerIx.x) && acquire(triIdx[1], cornerIx.y) && acquire(triIdx[2], cornerIx.z); + }; + auto appendTriangle = [&](const hlsl::uint32_t3& cornerIx) -> bool { + return appendIndex(cornerIx.z) && appendIndex(cornerIx.y) && appendIndex(cornerIx.x); + }; uint32_t currentSmoothingGroup = 0u; while (bufPtr < bufEnd) { @@ -824,16 +830,11 @@ asset::SAssetBundle COBJMeshFileLoader::loadAsset( } if (triangleFastPath) { hlsl::uint32_t3 cornerIx = {}; - if (!acquireCornerIndexPositiveTriplet(triIdx[0], cornerIx.x)) - return {}; - if (!acquireCornerIndexPositiveTriplet(triIdx[1], cornerIx.y)) - return {}; - if (!acquireCornerIndexPositiveTriplet(triIdx[2], cornerIx.z)) + if (!acquireTriangleCorners(acquireCornerIndexPositiveTriplet, triIdx, cornerIx)) return {}; faceFastTokenCount += 3u; currentFaceFastTokenCount += 3u; - if (!appendIndex(cornerIx.z) || !appendIndex(cornerIx.y) || - !appendIndex(cornerIx.x)) + if (!appendTriangle(cornerIx)) return {}; } else { const char* linePtr = lineStart + 1; @@ -843,19 +844,11 @@ asset::SAssetBundle COBJMeshFileLoader::loadAsset( if (parsedFirstThree) { hlsl::uint32_t3 cornerIx = {}; - if (!acquireCornerIndex(triIdx[0], currentSmoothingGroup, - cornerIx.x)) - return {}; - if (!acquireCornerIndex(triIdx[1], currentSmoothingGroup, - cornerIx.y)) - return {}; - if (!acquireCornerIndex(triIdx[2], currentSmoothingGroup, - cornerIx.z)) + if (!acquireTriangleCorners([&](const hlsl::int32_t3& idx, uint32_t& outIx) { return acquireCornerIndex(idx, currentSmoothingGroup, outIx); }, triIdx, cornerIx)) return {}; faceFallbackTokenCount += 3u; currentFaceFallbackTokenCount += 3u; - if (!appendIndex(cornerIx.z) || !appendIndex(cornerIx.y) || - !appendIndex(cornerIx.x)) + if (!appendTriangle(cornerIx)) return {}; firstCorner = cornerIx.x; previousCorner = cornerIx.z; diff --git a/src/nbl/asset/interchange/CPLYMeshFileLoader.cpp b/src/nbl/asset/interchange/CPLYMeshFileLoader.cpp index afa8a3bb60..cb4b713ee8 100644 --- a/src/nbl/asset/interchange/CPLYMeshFileLoader.cpp +++ b/src/nbl/asset/interchange/CPLYMeshFileLoader.cpp @@ -39,6 +39,28 @@ struct Parse return text ? std::string_view{text} : std::string_view{}; } + template + static E_FORMAT selectStructuredFormat(const std::array& formats, const uint32_t componentCount) + { + return componentCount > 0u && componentCount <= N ? formats[componentCount - 1u] : EF_UNKNOWN; + } + + static E_FORMAT expandStructuredFormat(const E_FORMAT componentFormat, const uint32_t componentCount) + { + switch (componentFormat) + { + case EF_R8_SINT: return selectStructuredFormat(std::to_array({EF_R8_SINT, EF_R8G8_SINT, EF_R8G8B8_SINT, EF_R8G8B8A8_SINT}), componentCount); + case EF_R8_UINT: return selectStructuredFormat(std::to_array({EF_R8_UINT, EF_R8G8_UINT, EF_R8G8B8_UINT, EF_R8G8B8A8_UINT}), componentCount); + case EF_R16_SINT: return selectStructuredFormat(std::to_array({EF_R16_SINT, EF_R16G16_SINT, EF_R16G16B16_SINT, EF_R16G16B16A16_SINT}), componentCount); + case EF_R16_UINT: return selectStructuredFormat(std::to_array({EF_R16_UINT, EF_R16G16_UINT, EF_R16G16B16_UINT, EF_R16G16B16A16_UINT}), componentCount); + case EF_R32_SINT: return selectStructuredFormat(std::to_array({EF_R32_SINT, EF_R32G32_SINT, EF_R32G32B32_SINT, EF_R32G32B32A32_SINT}), componentCount); + case EF_R32_UINT: return selectStructuredFormat(std::to_array({EF_R32_UINT, EF_R32G32_UINT, EF_R32G32B32_UINT, EF_R32G32B32A32_UINT}), componentCount); + case EF_R32_SFLOAT: return selectStructuredFormat(std::to_array({EF_R32_SFLOAT, EF_R32G32_SFLOAT, EF_R32G32B32_SFLOAT, EF_R32G32B32A32_SFLOAT}), componentCount); + case EF_R64_SFLOAT: return selectStructuredFormat(std::to_array({EF_R64_SFLOAT, EF_R64G64_SFLOAT, EF_R64G64B64_SFLOAT, EF_R64G64B64A64_SFLOAT}), componentCount); + default: return EF_UNKNOWN; + } + } + struct Context { static constexpr uint64_t ReadWindowPaddingBytes = 1ull; @@ -1700,126 +1722,7 @@ SAssetBundle CPLYMeshFileLoader::loadAsset( [&ctx](ICPUPolygonGeometry::SDataViewBase& view) -> void { const auto componentFormat = view.format; const auto componentCount = view.stride + 1; - // turn single channel format to multiple - view.format = [=]() -> E_FORMAT { - switch (view.format) { - case EF_R8_SINT: - switch (componentCount) { - case 1: - return EF_R8_SINT; - case 2: - return EF_R8G8_SINT; - case 3: - return EF_R8G8B8_SINT; - case 4: - return EF_R8G8B8A8_SINT; - default: - break; - } - break; - case EF_R8_UINT: - switch (componentCount) { - case 1: - return EF_R8_UINT; - case 2: - return EF_R8G8_UINT; - case 3: - return EF_R8G8B8_UINT; - case 4: - return EF_R8G8B8A8_UINT; - default: - break; - } - break; - case EF_R16_SINT: - switch (componentCount) { - case 1: - return EF_R16_SINT; - case 2: - return EF_R16G16_SINT; - case 3: - return EF_R16G16B16_SINT; - case 4: - return EF_R16G16B16A16_SINT; - default: - break; - } - break; - case EF_R16_UINT: - switch (componentCount) { - case 1: - return EF_R16_UINT; - case 2: - return EF_R16G16_UINT; - case 3: - return EF_R16G16B16_UINT; - case 4: - return EF_R16G16B16A16_UINT; - default: - break; - } - break; - case EF_R32_SINT: - switch (componentCount) { - case 1: - return EF_R32_SINT; - case 2: - return EF_R32G32_SINT; - case 3: - return EF_R32G32B32_SINT; - case 4: - return EF_R32G32B32A32_SINT; - default: - break; - } - break; - case EF_R32_UINT: - switch (componentCount) { - case 1: - return EF_R32_UINT; - case 2: - return EF_R32G32_UINT; - case 3: - return EF_R32G32B32_UINT; - case 4: - return EF_R32G32B32A32_UINT; - default: - break; - } - break; - case EF_R32_SFLOAT: - switch (componentCount) { - case 1: - return EF_R32_SFLOAT; - case 2: - return EF_R32G32_SFLOAT; - case 3: - return EF_R32G32B32_SFLOAT; - case 4: - return EF_R32G32B32A32_SFLOAT; - default: - break; - } - break; - case EF_R64_SFLOAT: - switch (componentCount) { - case 1: - return EF_R64_SFLOAT; - case 2: - return EF_R64G64_SFLOAT; - case 3: - return EF_R64G64B64_SFLOAT; - case 4: - return EF_R64G64B64A64_SFLOAT; - default: - break; - } - break; - default: - break; - } - return EF_UNKNOWN; - }(); + view.format = Parse::expandStructuredFormat(view.format, componentCount); view.stride = getTexelOrBlockBytesize(view.format); // for (auto c = 0u; c < componentCount; c++) { From e5fc4accde2e02b973eb6092b05c73848b020f73 Mon Sep 17 00:00:00 2001 From: Arkadiusz Lachowicz Date: Sun, 8 Mar 2026 14:52:40 +0100 Subject: [PATCH 078/118] Reduce mesh writer parser boilerplate --- .../asset/interchange/CPLYMeshFileLoader.cpp | 199 +++--------------- src/nbl/asset/interchange/CPLYMeshWriter.cpp | 35 ++- src/nbl/asset/interchange/CSTLMeshWriter.cpp | 31 +-- 3 files changed, 68 insertions(+), 197 deletions(-) diff --git a/src/nbl/asset/interchange/CPLYMeshFileLoader.cpp b/src/nbl/asset/interchange/CPLYMeshFileLoader.cpp index cb4b713ee8..35a39e7764 100644 --- a/src/nbl/asset/interchange/CPLYMeshFileLoader.cpp +++ b/src/nbl/asset/interchange/CPLYMeshFileLoader.cpp @@ -1038,176 +1038,47 @@ struct Parse } } - if (is32Bit) { - if (isSrcU32) { - if (trackMaxIndex) { - for (size_t j = 0u; j < element.Count; ++j) { - const uint8_t c = *ptr++; - if (c != 3u) { - fallbackToGeneric = true; - break; - } - const hlsl::uint32_t3 tri( - readU32(ptr + 0ull * sizeof(uint32_t)), - readU32(ptr + 1ull * sizeof(uint32_t)), - readU32(ptr + 2ull * sizeof(uint32_t))); - out[0] = tri.x; - out[1] = tri.y; - out[2] = tri.z; - ptr += 3ull * sizeof(uint32_t); - const uint32_t triMax = std::max({tri.x, tri.y, tri.z}); - if (triMax > _maxIndex) - _maxIndex = triMax; - out += 3u; - } - } else { - for (size_t j = 0u; j < element.Count; ++j) { - const uint8_t c = *ptr++; - if (c != 3u) { - fallbackToGeneric = true; - break; - } - const hlsl::uint32_t3 tri( - readU32(ptr + 0ull * sizeof(uint32_t)), - readU32(ptr + 1ull * sizeof(uint32_t)), - readU32(ptr + 2ull * sizeof(uint32_t))); - out[0] = tri.x; - out[1] = tri.y; - out[2] = tri.z; - ptr += 3ull * sizeof(uint32_t); - if (triExceedsVertexLimit(tri)) - return EFastFaceReadResult::Error; - out += 3u; - } - } - } else if (trackMaxIndex) { - for (size_t j = 0u; j < element.Count; ++j) { - const uint8_t c = *ptr++; - if (c != 3u) { - fallbackToGeneric = true; - break; - } - const hlsl::uint32_t3 tri(readU32(ptr + 0ull * sizeof(uint32_t)), - readU32(ptr + 1ull * sizeof(uint32_t)), - readU32(ptr + 2ull * sizeof(uint32_t))); - out[0] = tri.x; - out[1] = tri.y; - out[2] = tri.z; - ptr += 3ull * sizeof(uint32_t); - if ((tri.x | tri.y | tri.z) & 0x80000000u) - return EFastFaceReadResult::Error; - const uint32_t triMax = std::max({tri.x, tri.y, tri.z}); - if (triMax > _maxIndex) - _maxIndex = triMax; - out += 3u; - } - } else { - for (size_t j = 0u; j < element.Count; ++j) { - const uint8_t c = *ptr++; - if (c != 3u) { - fallbackToGeneric = true; - break; - } - const hlsl::uint32_t3 tri(readU32(ptr + 0ull * sizeof(uint32_t)), - readU32(ptr + 1ull * sizeof(uint32_t)), - readU32(ptr + 2ull * sizeof(uint32_t))); - out[0] = tri.x; - out[1] = tri.y; - out[2] = tri.z; - ptr += 3ull * sizeof(uint32_t); - const uint32_t triOr = tri.x | tri.y | tri.z; - if (triOr & 0x80000000u) - return EFastFaceReadResult::Error; - if (triExceedsVertexLimit(tri)) - return EFastFaceReadResult::Error; - out += 3u; + auto consumeTriangles = [&](const size_t indexBytes, const uint32_t signedMask, auto readTri) -> EFastFaceReadResult { + for (size_t j = 0u; j < element.Count; ++j) { + if (*ptr++ != 3u) { + fallbackToGeneric = true; + return EFastFaceReadResult::NotApplicable; } - } - } else { - if (isSrcU16) { + + const hlsl::uint32_t3 tri = readTri(ptr); + ptr += 3ull * indexBytes; + const uint32_t triOr = tri.x | tri.y | tri.z; + if (signedMask && (triOr & signedMask)) + return EFastFaceReadResult::Error; + + out[0] = tri.x; + out[1] = tri.y; + out[2] = tri.z; if (trackMaxIndex) { - for (size_t j = 0u; j < element.Count; ++j) { - const uint8_t c = *ptr++; - if (c != 3u) { - fallbackToGeneric = true; - break; - } - const hlsl::uint32_t3 tri( - readU16(ptr + 0ull * sizeof(uint16_t)), - readU16(ptr + 1ull * sizeof(uint16_t)), - readU16(ptr + 2ull * sizeof(uint16_t))); - out[0] = tri.x; - out[1] = tri.y; - out[2] = tri.z; - ptr += 3ull * sizeof(uint16_t); - const uint32_t triMax = std::max({tri.x, tri.y, tri.z}); - if (triMax > _maxIndex) - _maxIndex = triMax; - out += 3u; - } - } else { - for (size_t j = 0u; j < element.Count; ++j) { - const uint8_t c = *ptr++; - if (c != 3u) { - fallbackToGeneric = true; - break; - } - const hlsl::uint32_t3 tri( - readU16(ptr + 0ull * sizeof(uint16_t)), - readU16(ptr + 1ull * sizeof(uint16_t)), - readU16(ptr + 2ull * sizeof(uint16_t))); - out[0] = tri.x; - out[1] = tri.y; - out[2] = tri.z; - ptr += 3ull * sizeof(uint16_t); - if (triExceedsVertexLimit(tri)) - return EFastFaceReadResult::Error; - out += 3u; - } - } - } else if (trackMaxIndex) { - for (size_t j = 0u; j < element.Count; ++j) { - const uint8_t c = *ptr++; - if (c != 3u) { - fallbackToGeneric = true; - break; - } - const hlsl::uint32_t3 tri(readU16(ptr + 0ull * sizeof(uint16_t)), - readU16(ptr + 1ull * sizeof(uint16_t)), - readU16(ptr + 2ull * sizeof(uint16_t))); - ptr += 3ull * sizeof(uint16_t); - if ((tri.x | tri.y | tri.z) & 0x8000u) - return EFastFaceReadResult::Error; - out[0] = tri.x; - out[1] = tri.y; - out[2] = tri.z; const uint32_t triMax = std::max({tri.x, tri.y, tri.z}); if (triMax > _maxIndex) _maxIndex = triMax; - out += 3u; - } - } else { - for (size_t j = 0u; j < element.Count; ++j) { - const uint8_t c = *ptr++; - if (c != 3u) { - fallbackToGeneric = true; - break; - } - const hlsl::uint32_t3 tri(readU16(ptr + 0ull * sizeof(uint16_t)), - readU16(ptr + 1ull * sizeof(uint16_t)), - readU16(ptr + 2ull * sizeof(uint16_t))); - ptr += 3ull * sizeof(uint16_t); - if ((tri.x | tri.y | tri.z) & 0x8000u) - return EFastFaceReadResult::Error; - out[0] = tri.x; - out[1] = tri.y; - out[2] = tri.z; - if (triExceedsVertexLimit(tri)) - return EFastFaceReadResult::Error; - out += 3u; - } + } else if (triExceedsVertexLimit(tri)) + return EFastFaceReadResult::Error; + out += 3u; } - } + return EFastFaceReadResult::Success; + }; + const auto fastReadResult = is32Bit ? + consumeTriangles(sizeof(uint32_t), isSrcS32 ? 0x80000000u : 0u, + [&](const uint8_t* const src) -> hlsl::uint32_t3 { + return hlsl::uint32_t3(readU32(src + 0ull * sizeof(uint32_t)), + readU32(src + 1ull * sizeof(uint32_t)), + readU32(src + 2ull * sizeof(uint32_t))); + }) : + consumeTriangles(sizeof(uint16_t), isSrcS16 ? 0x8000u : 0u, + [&](const uint8_t* const src) -> hlsl::uint32_t3 { + return hlsl::uint32_t3(readU16(src + 0ull * sizeof(uint16_t)), + readU16(src + 1ull * sizeof(uint16_t)), + readU16(src + 2ull * sizeof(uint16_t))); + }); + if (fastReadResult == EFastFaceReadResult::Error) + return EFastFaceReadResult::Error; if (!fallbackToGeneric) { StartPointer = reinterpret_cast(const_cast(ptr)); diff --git a/src/nbl/asset/interchange/CPLYMeshWriter.cpp b/src/nbl/asset/interchange/CPLYMeshWriter.cpp index 556a8e5d0a..266c741a51 100644 --- a/src/nbl/asset/interchange/CPLYMeshWriter.cpp +++ b/src/nbl/asset/interchange/CPLYMeshWriter.cpp @@ -277,20 +277,15 @@ struct Parse return SGeometryAttributeEmit::emit(sink, view.stored, ix, view.components, view.flipVectors); } - template - static inline void prepareSemantic(PreparedView& view, const ICPUPolygonGeometry::SDataView& src, const bool flipVectors) - { - view.flipVectors = flipVectors; - view.semantic = SGeometryViewDecode::prepare(src); - view.emit = &emitPrepared; - } - - template - static inline void prepareStored(PreparedView& view, const ICPUPolygonGeometry::SDataView& src, const bool flipVectors) + template + static inline void prepareDecode(PreparedView& view, const ICPUPolygonGeometry::SDataView& src, const bool flipVectors) { view.flipVectors = flipVectors; - view.stored = SGeometryViewDecode::prepare(src); - view.emit = &emitPrepared; + if constexpr (Mode == SGeometryViewDecode::EMode::Semantic) + view.semantic = SGeometryViewDecode::prepare(src); + else + view.stored = SGeometryViewDecode::prepare(src); + view.emit = &emitPrepared; } static PreparedView create(const ICPUPolygonGeometry::SDataView* view, const uint32_t components, const ScalarType scalarType, const bool flipVectors) @@ -301,14 +296,14 @@ struct Parse switch (scalarType) { - case ScalarType::Float64: prepareSemantic(retval, *view, flipVectors); break; - case ScalarType::Float32: prepareSemantic(retval, *view, flipVectors); break; - case ScalarType::Int8: prepareStored(retval, *view, flipVectors); break; - case ScalarType::UInt8: prepareStored(retval, *view, false); break; - case ScalarType::Int16: prepareStored(retval, *view, flipVectors); break; - case ScalarType::UInt16: prepareStored(retval, *view, false); break; - case ScalarType::Int32: prepareStored(retval, *view, flipVectors); break; - case ScalarType::UInt32: prepareStored(retval, *view, false); break; + case ScalarType::Float64: prepareDecode(retval, *view, flipVectors); break; + case ScalarType::Float32: prepareDecode(retval, *view, flipVectors); break; + case ScalarType::Int8: prepareDecode(retval, *view, flipVectors); break; + case ScalarType::UInt8: prepareDecode(retval, *view, false); break; + case ScalarType::Int16: prepareDecode(retval, *view, flipVectors); break; + case ScalarType::UInt16: prepareDecode(retval, *view, false); break; + case ScalarType::Int32: prepareDecode(retval, *view, flipVectors); break; + case ScalarType::UInt32: prepareDecode(retval, *view, false); break; } return retval; } diff --git a/src/nbl/asset/interchange/CSTLMeshWriter.cpp b/src/nbl/asset/interchange/CSTLMeshWriter.cpp index 6ab92f18d7..c60bb98b19 100644 --- a/src/nbl/asset/interchange/CSTLMeshWriter.cpp +++ b/src/nbl/asset/interchange/CSTLMeshWriter.cpp @@ -292,7 +292,7 @@ struct Parse return true; } - hlsl::float64_t4 decoded = {}; + hlsl::float32_t4 decoded = {}; if (!colorView.decodeElement(ix, decoded)) return false; const double rgbaUnit[4] = {normalizeColorComponentToUnit(decoded.x), normalizeColorComponentToUnit(decoded.y), normalizeColorComponentToUnit(decoded.z), normalizeColorComponentToUnit(decoded.w)}; @@ -366,18 +366,23 @@ struct Parse outColor = 0u; if (!colorView) return true; - hlsl::uint32_t3 color(0u); - if (!decodeColorB8G8R8A8(*colorView, idx.x, color.x)) - return false; - if (!decodeColorB8G8R8A8(*colorView, idx.y, color.y)) - return false; - if (!decodeColorB8G8R8A8(*colorView, idx.z, color.z)) - return false; - std::array, 3> rgba = {}; - decodeColorUnitRGBAFromB8G8R8A8(color.x, rgba[0].data()); - decodeColorUnitRGBAFromB8G8R8A8(color.y, rgba[1].data()); - decodeColorUnitRGBAFromB8G8R8A8(color.z, rgba[2].data()); - const std::array rgbaAvg = {(rgba[0][0] + rgba[1][0] + rgba[2][0]) / 3.0, (rgba[0][1] + rgba[1][1] + rgba[2][1]) / 3.0, (rgba[0][2] + rgba[1][2] + rgba[2][2]) / 3.0, 1.0}; + const std::array vertexIx = {idx.x, idx.y, idx.z}; + std::array rgbaAvg = {}; + for (uint32_t corner = 0u; corner < vertexIx.size(); ++corner) + { + uint32_t color = 0u; + if (!decodeColorB8G8R8A8(*colorView, vertexIx[corner], color)) + return false; + std::array rgba = {}; + decodeColorUnitRGBAFromB8G8R8A8(color, rgba.data()); + rgbaAvg[0] += rgba[0]; + rgbaAvg[1] += rgba[1]; + rgbaAvg[2] += rgba[2]; + } + rgbaAvg[0] /= 3.0; + rgbaAvg[1] /= 3.0; + rgbaAvg[2] /= 3.0; + rgbaAvg[3] = 1.0; uint32_t avgColor = 0u; encodePixels(&avgColor, rgbaAvg.data()); outColor = packViscamColorFromB8G8R8A8(avgColor); From 4eb9d160f9c897480b29aabf13ac42746d0c6775 Mon Sep 17 00:00:00 2001 From: Arkadiusz Lachowicz Date: Sun, 8 Mar 2026 15:31:09 +0100 Subject: [PATCH 079/118] Trim interchange helper boilerplate --- include/nbl/asset/interchange/SFileIOPolicy.h | 128 +++----------- .../asset/interchange/SGeometryContentHash.h | 29 +--- .../asset/interchange/SGeometryLoaderCommon.h | 27 +-- .../asset/interchange/SGeometryWriterCommon.h | 118 +++---------- .../nbl/asset/interchange/SInterchangeIO.h | 85 ++-------- .../asset/interchange/SLoaderRuntimeTuning.h | 72 +------- .../nbl/asset/utils/SGeometryNormalCommon.h | 28 +--- .../asset/interchange/COBJMeshFileLoader.cpp | 8 +- src/nbl/asset/interchange/COBJMeshWriter.cpp | 10 +- .../asset/interchange/CPLYMeshFileLoader.cpp | 158 ++++++++++-------- src/nbl/asset/interchange/CPLYMeshWriter.cpp | 36 +++- .../asset/interchange/CSTLMeshFileLoader.cpp | 8 +- src/nbl/asset/interchange/CSTLMeshWriter.cpp | 55 +++--- .../interchange/SGeometryAttributeEmit.h | 41 ----- .../asset/interchange/SGeometryViewDecode.h | 12 -- .../SOBJPolygonGeometryAuxLayout.h | 19 --- .../SPLYPolygonGeometryAuxLayout.h | 19 --- .../SSTLPolygonGeometryAuxLayout.h | 19 --- src/nbl/asset/interchange/impl/SBinaryData.h | 7 - .../interchange/impl/SContentHashBuild.h | 72 -------- src/nbl/asset/interchange/impl/SFileAccess.h | 70 ++++++-- .../asset/interchange/impl/SIODiagnostics.h | 41 ----- src/nbl/asset/interchange/impl/SLoadSession.h | 74 -------- src/nbl/asset/interchange/impl/STextParse.h | 85 +--------- 24 files changed, 277 insertions(+), 944 deletions(-) delete mode 100644 src/nbl/asset/interchange/SGeometryAttributeEmit.h delete mode 100644 src/nbl/asset/interchange/SOBJPolygonGeometryAuxLayout.h delete mode 100644 src/nbl/asset/interchange/SPLYPolygonGeometryAuxLayout.h delete mode 100644 src/nbl/asset/interchange/SSTLPolygonGeometryAuxLayout.h delete mode 100644 src/nbl/asset/interchange/impl/SContentHashBuild.h delete mode 100644 src/nbl/asset/interchange/impl/SIODiagnostics.h delete mode 100644 src/nbl/asset/interchange/impl/SLoadSession.h diff --git a/include/nbl/asset/interchange/SFileIOPolicy.h b/include/nbl/asset/interchange/SFileIOPolicy.h index 49e149bdaa..0e59986c0b 100644 --- a/include/nbl/asset/interchange/SFileIOPolicy.h +++ b/include/nbl/asset/interchange/SFileIOPolicy.h @@ -3,87 +3,50 @@ // For conditions of distribution and use, see copyright notice in nabla.h #ifndef _NBL_ASSET_S_FILE_IO_POLICY_H_INCLUDED_ #define _NBL_ASSET_S_FILE_IO_POLICY_H_INCLUDED_ - - #include "nbl/core/util/bitflag.h" #include "nbl/system/to_string.h" - #include #include #include #include #include - - namespace nbl::asset { - enum class EFileIOStrategy : uint8_t { - // Sentinel used when strategy resolution fails or the value is uninitialized. Invalid = 0u, - // Pick whole-file or chunked dynamically based on file size and policy limits. Auto, - // Force whole-file strategy. May fallback when not feasible unless strict=true. WholeFile, - // Force chunked strategy. Chunked }; - -// Requested IO policy shared by loaders, writers, and hash stages before file constraints are resolved. struct SFileIOPolicy { - // Runtime tuning knobs shared by loader parallelism and IO anomaly diagnostics. struct SRuntimeTuning { - // Runtime tuning strategy for worker/chunk selection. enum class Mode : uint8_t { - // Disable runtime tuning and force sequential execution. Sequential, - // Backward-compatible alias for Sequential. None = Sequential, - // Use deterministic heuristics derived from input size and hardware. Heuristic, - // Use heuristics and optionally refine with lightweight sampling. Hybrid }; - - // Runtime tuning mode. Mode mode = Mode::Heuristic; - // Maximum acceptable tuning overhead as a fraction of estimated full workload time. float maxOverheadRatio = 0.05f; - // Maximum sampling budget as a fraction of estimated full workload time. float samplingBudgetRatio = 0.05f; - // Minimum expected gain required to keep extra workers enabled. float minExpectedGainRatio = 0.03f; - // Hard cap for worker count. 0 means auto. uint16_t maxWorkers = 0u; - // Reserved hardware threads not used by the loader. Prevents full CPU saturation. uint8_t workerHeadroom = 2u; - // Maximum number of worker-count candidates tested in hybrid mode. uint8_t samplingMaxCandidates = 4u; - // Number of benchmark passes per candidate in hybrid mode. uint8_t samplingPasses = 1u; - // Minimum work units required before hybrid sampling is allowed. 0 means auto. uint64_t samplingMinWorkUnits = 0ull; - // Target chunk count assigned to each worker for loader stages. uint8_t targetChunksPerWorker = 4u; - // Target chunk count assigned to each worker for hash stages. uint8_t hashTaskTargetChunksPerWorker = 1u; - // Hash inlining threshold. Inputs up to this size prefer inline hash build. - uint64_t hashInlineThresholdBytes = 1ull << 20; // 1 MiB - // Lower bound for sampled byte count in hybrid mode. - uint64_t minSampleBytes = 4ull << 10; // 4 KiB - // Upper bound for sampled byte count in hybrid mode. - uint64_t maxSampleBytes = 128ull << 10; // 128 KiB - // Payload size threshold for tiny-IO anomaly detection. - uint64_t tinyIoPayloadThresholdBytes = 1ull << 20; // 1 MiB - // Average operation size threshold for tiny-IO anomaly detection. - uint64_t tinyIoAvgBytesThreshold = 1024ull; // 1 KiB - // Minimum operation size threshold for tiny-IO anomaly detection. - uint64_t tinyIoMinBytesThreshold = 64ull; // 64 B - // Minimum operation count required to report tiny-IO anomaly. + uint64_t hashInlineThresholdBytes = 1ull << 20; + uint64_t minSampleBytes = 4ull << 10; + uint64_t maxSampleBytes = 128ull << 10; + uint64_t tinyIoPayloadThresholdBytes = 1ull << 20; + uint64_t tinyIoAvgBytesThreshold = 1024ull; + uint64_t tinyIoMinBytesThreshold = 64ull; uint64_t tinyIoMinCallCount = 1024ull; }; @@ -95,90 +58,49 @@ struct SFileIOPolicy EF_STRICT_BIT = 1u << 0u }; - static inline constexpr uint64_t MIN_CHUNK_SIZE_BYTES = 64ull << 10u; // 64 KiB + static inline constexpr uint64_t MIN_CHUNK_SIZE_BYTES = 64ull << 10u; static inline constexpr uint8_t MIN_CHUNK_SIZE_LOG2 = static_cast(std::bit_width(MIN_CHUNK_SIZE_BYTES) - 1u); static inline constexpr uint8_t MAX_BYTE_SIZE_LOG2 = std::numeric_limits::digits - 1u; - static inline constexpr uint64_t DEFAULT_WHOLE_FILE_THRESHOLD_BYTES = 64ull << 20u; // 64 MiB - static inline constexpr uint64_t DEFAULT_CHUNK_SIZE_BYTES = 4ull << 20u; // 4 MiB - static inline constexpr uint64_t DEFAULT_MAX_STAGING_BYTES = 256ull << 20u; // 256 MiB + static inline constexpr uint64_t DEFAULT_WHOLE_FILE_THRESHOLD_BYTES = 64ull << 20u; + static inline constexpr uint64_t DEFAULT_CHUNK_SIZE_BYTES = 4ull << 20u; + static inline constexpr uint64_t DEFAULT_MAX_STAGING_BYTES = 256ull << 20u; - // These defaults are stored and clamped as log2(byte_count), so the source byte values must stay powers of two. static_assert(std::has_single_bit(MIN_CHUNK_SIZE_BYTES)); static_assert(std::has_single_bit(DEFAULT_WHOLE_FILE_THRESHOLD_BYTES)); static_assert(std::has_single_bit(DEFAULT_CHUNK_SIZE_BYTES)); static_assert(std::has_single_bit(DEFAULT_MAX_STAGING_BYTES)); - static inline constexpr uint8_t clampBytesLog2(const uint8_t value, const uint8_t minValue = 0u) - { - return std::clamp(value, minValue, MAX_BYTE_SIZE_LOG2); - } - - static inline constexpr uint64_t bytesFromLog2(const uint8_t value, const uint8_t minValue = 0u) - { - return 1ull << clampBytesLog2(value, minValue); - } + static inline constexpr uint8_t clampBytesLog2(const uint8_t value, const uint8_t minValue = 0u) { return std::clamp(value, minValue, MAX_BYTE_SIZE_LOG2); } - // Requested IO strategy. Defaults to Auto. + static inline constexpr uint64_t bytesFromLog2(const uint8_t value, const uint8_t minValue = 0u) { return 1ull << clampBytesLog2(value, minValue); } Strategy strategy = Strategy::Auto; - // Resolution flags. Defaults to none. core::bitflag flags = EF_NONE; - // Maximum payload size allowed for whole-file strategy in auto mode. Defaults to 64 MiB. uint8_t wholeFileThresholdLog2 = static_cast(std::bit_width(DEFAULT_WHOLE_FILE_THRESHOLD_BYTES) - 1u); - // Chunk size used by chunked strategy encoded as log2(bytes). Defaults to 4 MiB. uint8_t chunkSizeLog2 = static_cast(std::bit_width(DEFAULT_CHUNK_SIZE_BYTES) - 1u); - // Maximum staging allocation for whole-file strategy encoded as log2(bytes). Defaults to 256 MiB. uint8_t maxStagingLog2 = static_cast(std::bit_width(DEFAULT_MAX_STAGING_BYTES) - 1u); - // Runtime tuning controls used by loaders and hash stages. SRuntimeTuning runtimeTuning = {}; - inline constexpr bool strict() const - { - return flags.hasAnyFlag(EF_STRICT_BIT); - } + inline constexpr bool strict() const { return flags.hasAnyFlag(EF_STRICT_BIT); } - inline constexpr uint64_t wholeFileThresholdBytes() const - { - return bytesFromLog2(wholeFileThresholdLog2, MIN_CHUNK_SIZE_LOG2); - } + inline constexpr uint64_t wholeFileThresholdBytes() const { return bytesFromLog2(wholeFileThresholdLog2, MIN_CHUNK_SIZE_LOG2); } - inline constexpr uint64_t chunkSizeBytes() const - { - return bytesFromLog2(chunkSizeLog2, MIN_CHUNK_SIZE_LOG2); - } + inline constexpr uint64_t chunkSizeBytes() const { return bytesFromLog2(chunkSizeLog2, MIN_CHUNK_SIZE_LOG2); } - inline constexpr uint64_t maxStagingBytes() const - { - return bytesFromLog2(maxStagingLog2, MIN_CHUNK_SIZE_LOG2); - } + inline constexpr uint64_t maxStagingBytes() const { return bytesFromLog2(maxStagingLog2, MIN_CHUNK_SIZE_LOG2); } }; - -// Resolved IO plan chosen from SFileIOPolicy after considering file size, mapping, and staging limits. struct SResolvedFileIOPolicy { using Strategy = EFileIOStrategy; constexpr SResolvedFileIOPolicy() = default; - inline constexpr SResolvedFileIOPolicy(const SFileIOPolicy& policy, const uint64_t byteCount, const bool sizeKnown = true, const bool fileMappable = false) : - SResolvedFileIOPolicy(resolve(policy, byteCount, sizeKnown, fileMappable)) - { - } - - // Effective strategy chosen by resolver. Invalid means strict policy resolution failed. + inline constexpr SResolvedFileIOPolicy(const SFileIOPolicy& policy, const uint64_t byteCount, const bool sizeKnown = true, const bool fileMappable = false) : SResolvedFileIOPolicy(resolve(policy, byteCount, sizeKnown, fileMappable)) {} Strategy strategy = Strategy::Invalid; - // Effective chunk size encoded as log2(bytes). Also set for whole-file for telemetry consistency. uint8_t chunkSizeLog2 = SFileIOPolicy::MIN_CHUNK_SIZE_LOG2; - // Resolver reason string used in logs and diagnostics. const char* reason = "invalid"; - inline constexpr bool isValid() const - { - return strategy != Strategy::Invalid; - } + inline constexpr bool isValid() const { return strategy != Strategy::Invalid; } - inline constexpr uint64_t chunkSizeBytes() const - { - return SFileIOPolicy::bytesFromLog2(chunkSizeLog2, SFileIOPolicy::MIN_CHUNK_SIZE_LOG2); - } + inline constexpr uint64_t chunkSizeBytes() const { return SFileIOPolicy::bytesFromLog2(chunkSizeLog2, SFileIOPolicy::MIN_CHUNK_SIZE_LOG2); } static inline constexpr SResolvedFileIOPolicy resolve(const SFileIOPolicy& policy, const uint64_t byteCount, const bool sizeKnown = true, const bool fileMappable = false) { @@ -189,14 +111,7 @@ struct SResolvedFileIOPolicy const uint64_t maxStaging = SFileIOPolicy::bytesFromLog2(maxStagingLog2, SFileIOPolicy::MIN_CHUNK_SIZE_LOG2); const uint64_t wholeThreshold = policy.wholeFileThresholdBytes(); - auto makeResolved = [&](const Strategy strategy, const char* const reason) -> SResolvedFileIOPolicy - { - SResolvedFileIOPolicy resolved = {}; - resolved.strategy = strategy; - resolved.chunkSizeLog2 = chunkSizeLog2; - resolved.reason = reason; - return resolved; - }; + auto makeResolved = [&](const Strategy strategy, const char* const reason) -> SResolvedFileIOPolicy { SResolvedFileIOPolicy resolved = {}; resolved.strategy = strategy; resolved.chunkSizeLog2 = chunkSizeLog2; resolved.reason = reason; return resolved; }; switch (policy.strategy) { @@ -228,9 +143,7 @@ struct SResolvedFileIOPolicy } } }; - } - namespace nbl::system::impl { template<> @@ -254,5 +167,4 @@ struct to_string_helper } }; } - #endif diff --git a/include/nbl/asset/interchange/SGeometryContentHash.h b/include/nbl/asset/interchange/SGeometryContentHash.h index 12a5c80567..a8ddf4d3ce 100644 --- a/include/nbl/asset/interchange/SGeometryContentHash.h +++ b/include/nbl/asset/interchange/SGeometryContentHash.h @@ -3,28 +3,17 @@ // For conditions of distribution and use, see copyright notice in nabla.h #ifndef _NBL_ASSET_S_GEOMETRY_CONTENT_HASH_H_INCLUDED_ #define _NBL_ASSET_S_GEOMETRY_CONTENT_HASH_H_INCLUDED_ - - #include "nbl/asset/IPreHashed.h" #include "nbl/asset/utils/CPolygonGeometryManipulator.h" #include "nbl/core/hash/blake.h" - - namespace nbl::asset { - -// Polygon-geometry content-hash helper built on top of CPolygonGeometryManipulator buffer hashing. class SPolygonGeometryContentHash { public: using mode_t = CPolygonGeometryManipulator::EContentHashMode; - static inline void collectBuffers( - const ICPUPolygonGeometry* geometry, - core::vector>& buffers) - { - CPolygonGeometryManipulator::collectUniqueBuffers(geometry, buffers); - } + static inline void collectBuffers(const ICPUPolygonGeometry* geometry, core::vector>& buffers) { CPolygonGeometryManipulator::collectUniqueBuffers(geometry, buffers); } static inline void reset(ICPUPolygonGeometry* geometry) { @@ -35,8 +24,6 @@ class SPolygonGeometryContentHash buffer->setContentHash(IPreHashed::INVALID_HASH); } - // Composes a geometry hash from indexing metadata and the current content hashes of referenced buffers. - // It does not compute missing buffer content hashes. Any buffer without a content hash contributes INVALID_HASH. static inline core::blake3_hash_t composeHashFromBufferContentHashes(const ICPUPolygonGeometry* geometry) { if (!geometry) @@ -57,19 +44,9 @@ class SPolygonGeometryContentHash return static_cast(hashBuilder); } - static inline core::blake3_hash_t computeMissing(ICPUPolygonGeometry* geometry, const SFileIOPolicy& ioPolicy) - { - CPolygonGeometryManipulator::computeMissingContentHashesParallel(geometry, ioPolicy); - return composeHashFromBufferContentHashes(geometry); - } + static inline core::blake3_hash_t computeMissing(ICPUPolygonGeometry* geometry, const SFileIOPolicy& ioPolicy) { CPolygonGeometryManipulator::computeMissingContentHashesParallel(geometry, ioPolicy); return composeHashFromBufferContentHashes(geometry); } - static inline core::blake3_hash_t recompute(ICPUPolygonGeometry* geometry, const SFileIOPolicy& ioPolicy) - { - CPolygonGeometryManipulator::recomputeContentHashesParallel(geometry, ioPolicy); - return composeHashFromBufferContentHashes(geometry); - } + static inline core::blake3_hash_t recompute(ICPUPolygonGeometry* geometry, const SFileIOPolicy& ioPolicy) { CPolygonGeometryManipulator::recomputeContentHashesParallel(geometry, ioPolicy); return composeHashFromBufferContentHashes(geometry); } }; - } - #endif diff --git a/include/nbl/asset/interchange/SGeometryLoaderCommon.h b/include/nbl/asset/interchange/SGeometryLoaderCommon.h index a77917aa52..c81aab95db 100644 --- a/include/nbl/asset/interchange/SGeometryLoaderCommon.h +++ b/include/nbl/asset/interchange/SGeometryLoaderCommon.h @@ -3,19 +3,12 @@ // For conditions of distribution and use, see copyright notice in nabla.h #ifndef _NBL_ASSET_S_GEOMETRY_LOADER_COMMON_H_INCLUDED_ #define _NBL_ASSET_S_GEOMETRY_LOADER_COMMON_H_INCLUDED_ - - #include #include - #include "nbl/asset/SBufferAdoption.h" #include "nbl/asset/ICPUPolygonGeometry.h" - - namespace nbl::asset { - -// Loader-side helpers for building polygon-geometry data views backed by adopted CPU buffers. class SGeometryLoaderCommon { public: @@ -23,19 +16,7 @@ class SGeometryLoaderCommon { if (!buffer || byteCount == 0ull) return {}; - - return { - .composed = { - .stride = stride, - .format = format, - .rangeFormat = IGeometryBase::getMatchingAABBFormat(format) - }, - .src = { - .offset = 0ull, - .size = byteCount, - .buffer = std::move(buffer) - } - }; + return {.composed = {.stride = stride, .format = format, .rangeFormat = IGeometryBase::getMatchingAABBFormat(format)}, .src = {.offset = 0ull, .size = byteCount, .buffer = std::move(buffer)}}; } template @@ -47,12 +28,8 @@ class SGeometryLoaderCommon auto buffer = SBufferAdoption::create(std::forward(data)); if (!buffer) return {}; - const size_t byteCount = buffer->getSize(); - return createDataView(std::move(buffer), byteCount, static_cast(sizeof(value_t)), Format); + return createDataView(std::move(buffer), buffer->getSize(), static_cast(sizeof(value_t)), Format); } }; - } - - #endif diff --git a/include/nbl/asset/interchange/SGeometryWriterCommon.h b/include/nbl/asset/interchange/SGeometryWriterCommon.h index 40cafca654..fa6c15a5b9 100644 --- a/include/nbl/asset/interchange/SGeometryWriterCommon.h +++ b/include/nbl/asset/interchange/SGeometryWriterCommon.h @@ -3,8 +3,6 @@ // For conditions of distribution and use, see copyright notice in nabla.h #ifndef _NBL_ASSET_S_GEOMETRY_WRITER_COMMON_H_INCLUDED_ #define _NBL_ASSET_S_GEOMETRY_WRITER_COMMON_H_INCLUDED_ - - #include #include "nbl/asset/ICPUScene.h" #include "nbl/asset/ICPUGeometryCollection.h" @@ -18,73 +16,40 @@ #include #include #include - - namespace nbl::asset { -// Writer-side helpers for flattening scene inputs and serializing polygon geometry views safely. class SGeometryWriterCommon { public: - // Shared write context propagated while flattening geometry collections and scenes. - struct SWriteState - { - hlsl::float32_t3x4 transform = hlsl::math::linalg::identity(); - uint32_t instanceIx = ~0u; - uint32_t targetIx = ~0u; - uint32_t geometryIx = 0u; - }; - - // One polygon geometry scheduled for writing together with the transform and scene indices that produced it. - struct SPolygonGeometryWriteItem : SWriteState - { - const ICPUPolygonGeometry* geometry = nullptr; - }; + struct SWriteState { hlsl::float32_t3x4 transform = hlsl::math::linalg::identity(); uint32_t instanceIx = ~0u; uint32_t targetIx = ~0u; uint32_t geometryIx = 0u; }; + struct SPolygonGeometryWriteItem : SWriteState { const ICPUPolygonGeometry* geometry = nullptr; }; - // Parameters used when expanding one geometry collection into polygon write items. - struct SGeometryCollectionWriteParams : SWriteState + template> requires requires(Container& c, const SPolygonGeometryWriteItem& item) { c.emplace_back(item); } + static inline Container collectPolygonGeometryWriteItems(const IAsset* rootAsset) { - const ICPUGeometryCollection* collection = nullptr; - }; + Container out = {}; + if (!rootAsset) + return out; - // Collector used by collectPolygonGeometryWriteItems to flatten one collection into a caller-provided container. - template requires requires(Container& c, const SPolygonGeometryWriteItem& item) { c.emplace_back(item); } - struct SWriteCollector - { - static inline void appendFromCollection(Container& out, const SGeometryCollectionWriteParams& params) - { - if (!params.collection) + const auto identity = hlsl::math::linalg::identity(); + auto appendFromCollection = [&](const ICPUGeometryCollection* collection, const hlsl::float32_t3x4& transform, const uint32_t instanceIx, const uint32_t targetIx) -> void { + if (!collection) return; - - const auto identity = hlsl::math::linalg::identity(); - const auto& geometries = params.collection->getGeometries(); + const auto& geometries = collection->getGeometries(); for (uint32_t geometryIx = 0u; geometryIx < geometries.size(); ++geometryIx) { const auto& ref = geometries[geometryIx]; if (!ref.geometry || ref.geometry->getPrimitiveType() != IGeometryBase::EPrimitiveType::Polygon) continue; - const auto* geometry = static_cast(ref.geometry.get()); - const auto localTransform = ref.hasTransform() ? ref.transform : identity; SPolygonGeometryWriteItem item = {}; - item.geometry = geometry; - item.transform = hlsl::math::linalg::promoted_mul(params.transform, localTransform); - item.instanceIx = params.instanceIx; - item.targetIx = params.targetIx; + item.geometry = static_cast(ref.geometry.get()); + item.transform = hlsl::math::linalg::promoted_mul(transform, ref.hasTransform() ? ref.transform : identity); + item.instanceIx = instanceIx; + item.targetIx = targetIx; item.geometryIx = geometryIx; out.emplace_back(item); } - } - }; - - // Collects every polygon geometry a writer can serialize from a geometry, collection, or flattened scene. - template> requires requires(Container& c, const SPolygonGeometryWriteItem& item) { c.emplace_back(item); } - static inline Container collectPolygonGeometryWriteItems(const IAsset* rootAsset) - { - Container out = {}; - if (!rootAsset) - return out; - - const auto identity = hlsl::math::linalg::identity(); + }; if (rootAsset->getAssetType() == IAsset::ET_GEOMETRY) { const auto* geometry = static_cast*>(rootAsset); @@ -99,9 +64,7 @@ class SGeometryWriterCommon if (rootAsset->getAssetType() == IAsset::ET_GEOMETRY_COLLECTION) { - SGeometryCollectionWriteParams appendParams = {}; - appendParams.collection = static_cast(rootAsset); - SWriteCollector::appendFromCollection(out, appendParams); + appendFromCollection(static_cast(rootAsset), identity, ~0u, ~0u); return out; } @@ -121,34 +84,21 @@ class SGeometryWriterCommon const auto instanceTransform = initialTransforms.empty() ? identity : initialTransforms[instanceIx]; const auto& targetList = targets->getTargets(); for (uint32_t targetIx = 0u; targetIx < targetList.size(); ++targetIx) - { - SGeometryCollectionWriteParams appendParams = {}; - appendParams.collection = targetList[targetIx].geoCollection.get(); - appendParams.transform = instanceTransform; - appendParams.instanceIx = instanceIx; - appendParams.targetIx = targetIx; - SWriteCollector::appendFromCollection(out, appendParams); - } + appendFromCollection(targetList[targetIx].geoCollection.get(), instanceTransform, instanceIx, targetIx); } return out; } - static inline bool isIdentityTransform(const hlsl::float32_t3x4& transform) - { - return transform == hlsl::math::linalg::identity(); - } + static inline bool isIdentityTransform(const hlsl::float32_t3x4& transform) { return transform == hlsl::math::linalg::identity(); } - // Returns the aux view stored at a specific semantic slot when it exists. static inline const ICPUPolygonGeometry::SDataView* getAuxViewAt(const ICPUPolygonGeometry* geom, const uint32_t auxViewIx, const size_t requiredElementCount = 0ull) { if (!geom) return nullptr; - const auto& auxViews = geom->getAuxAttributeViews(); if (auxViewIx >= auxViews.size()) return nullptr; - const auto& view = auxViews[auxViewIx]; if (!view) return nullptr; @@ -157,18 +107,15 @@ class SGeometryWriterCommon return &view; } - // Validates triangle-list indexing and returns the number of faces the writer will emit. static inline bool getTriangleFaceCount(const ICPUPolygonGeometry* geom, size_t& outFaceCount) { outFaceCount = 0ull; if (!geom) return false; - const auto& positionView = geom->getPositionView(); const size_t vertexCount = positionView.getElementCount(); if (vertexCount == 0ull) return false; - const auto& indexView = geom->getIndexView(); if (indexView) { @@ -181,12 +128,10 @@ class SGeometryWriterCommon if ((vertexCount % 3ull) != 0ull) return false; - outFaceCount = vertexCount / 3ull; return true; } - // Calls `visitor(i0, i1, i2)` once per triangle after validating indices and normalizing implicit/R16/R32 indexing to uint32_t. template static inline bool visitTriangleIndices(const ICPUPolygonGeometry* geom, Visitor&& visitor) { @@ -254,26 +199,10 @@ class SGeometryWriterCommon } template - static inline const T* getTightView(const ICPUPolygonGeometry::SDataView& view) - { - if (!view) - return nullptr; - if (view.composed.format != ExpectedFormat) - return nullptr; - if (view.composed.getStride() != sizeof(T)) - return nullptr; - return reinterpret_cast(view.getPointer()); - } - - static inline char* appendFloatToBuffer(char* dst, char* end, float value) - { - return appendFloatingPointToBuffer(dst, end, value); - } + static inline const T* getTightView(const ICPUPolygonGeometry::SDataView& view) { return view && view.composed.format == ExpectedFormat && view.composed.getStride() == sizeof(T) ? reinterpret_cast(view.getPointer()) : nullptr; } - static inline char* appendFloatToBuffer(char* dst, char* end, double value) - { - return appendFloatingPointToBuffer(dst, end, value); - } + static inline char* appendFloatToBuffer(char* dst, char* end, float value) { return appendFloatingPointToBuffer(dst, end, value); } + static inline char* appendFloatToBuffer(char* dst, char* end, double value) { return appendFloatingPointToBuffer(dst, end, value); } static inline char* appendUIntToBuffer(char* dst, char* const end, const uint32_t value) { @@ -319,8 +248,5 @@ class SGeometryWriterCommon return dst + writeLen; } }; - } - - #endif diff --git a/include/nbl/asset/interchange/SInterchangeIO.h b/include/nbl/asset/interchange/SInterchangeIO.h index 0bdf5af8ef..484be7d09f 100644 --- a/include/nbl/asset/interchange/SInterchangeIO.h +++ b/include/nbl/asset/interchange/SInterchangeIO.h @@ -3,11 +3,8 @@ // For conditions of distribution and use, see copyright notice in nabla.h #ifndef _NBL_ASSET_S_INTERCHANGE_IO_H_INCLUDED_ #define _NBL_ASSET_S_INTERCHANGE_IO_H_INCLUDED_ - - #include "nbl/asset/interchange/SFileIOPolicy.h" #include "nbl/system/IFile.h" - #include #include #include @@ -15,52 +12,32 @@ #include #include #include - - namespace nbl::asset { - -// Shared read/write helpers that execute a resolved IO plan and collect simple telemetry. class SInterchangeIO { public: - // Tracks IO call count and byte distribution for tiny-io diagnostics. struct STelemetry { uint64_t callCount = 0ull; uint64_t totalBytes = 0ull; uint64_t minBytes = std::numeric_limits::max(); - inline void account(const uint64_t bytes) - { - ++callCount; - totalBytes += bytes; - if (bytes < minBytes) - minBytes = bytes; - } - - inline uint64_t getMinOrZero() const - { - return callCount ? minBytes : 0ull; - } - - inline uint64_t getAvgOrZero() const - { - return callCount ? (totalBytes / callCount) : 0ull; - } + inline void account(const uint64_t bytes) { ++callCount; totalBytes += bytes; if (bytes < minBytes) minBytes = bytes; } + inline uint64_t getMinOrZero() const { return callCount ? minBytes : 0ull; } + inline uint64_t getAvgOrZero() const { return callCount ? (totalBytes / callCount) : 0ull; } }; using SReadTelemetry = STelemetry; using SWriteTelemetry = STelemetry; - // Flags large payloads that were served through suspiciously small IO calls. static inline bool isTinyIOTelemetryLikely( const STelemetry& telemetry, const uint64_t payloadBytes, - const uint64_t bigPayloadThresholdBytes = (1ull << 20), // Default 1 MiB. - const uint64_t lowAvgBytesThreshold = 1024ull, // Default 1 KiB. - const uint64_t tinyChunkBytesThreshold = 64ull, // Default 64 B. - const uint64_t tinyChunkCallsThreshold = 1024ull) // Default 1024 calls. + const uint64_t bigPayloadThresholdBytes = (1ull << 20), + const uint64_t lowAvgBytesThreshold = 1024ull, + const uint64_t tinyChunkBytesThreshold = 64ull, + const uint64_t tinyChunkCallsThreshold = 1024ull) { if (payloadBytes <= bigPayloadThresholdBytes) return false; @@ -71,20 +48,7 @@ class SInterchangeIO avgBytes < lowAvgBytesThreshold || (minBytes < tinyChunkBytesThreshold && telemetry.callCount > tinyChunkCallsThreshold); } - - // Same tiny-io heuristic but pulls thresholds from the resolved IO policy. - static inline bool isTinyIOTelemetryLikely(const STelemetry& telemetry, const uint64_t payloadBytes, const SFileIOPolicy& ioPolicy) - { - return isTinyIOTelemetryLikely( - telemetry, - payloadBytes, - ioPolicy.runtimeTuning.tinyIoPayloadThresholdBytes, - ioPolicy.runtimeTuning.tinyIoAvgBytesThreshold, - ioPolicy.runtimeTuning.tinyIoMinBytesThreshold, - ioPolicy.runtimeTuning.tinyIoMinCallCount); - } - - // Issues one read request and verifies that the full byte count was returned. + static inline bool isTinyIOTelemetryLikely(const STelemetry& telemetry, const uint64_t payloadBytes, const SFileIOPolicy& ioPolicy) { return isTinyIOTelemetryLikely(telemetry, payloadBytes, ioPolicy.runtimeTuning.tinyIoPayloadThresholdBytes, ioPolicy.runtimeTuning.tinyIoAvgBytesThreshold, ioPolicy.runtimeTuning.tinyIoMinBytesThreshold, ioPolicy.runtimeTuning.tinyIoMinCallCount); } static inline bool readFileExact(system::IFile* file, void* dst, const size_t offset, const size_t bytes, SReadTelemetry* ioTelemetry = nullptr) { if (!file || (!dst && bytes != 0ull)) @@ -98,9 +62,6 @@ class SInterchangeIO ioTelemetry->account(success.getBytesProcessed()); return success && success.getBytesProcessed() == bytes; } - - // Reads a byte range using the resolved whole-file or chunked strategy. - // When ioTime is non-null it also reports wall time in TimeUnit. Default TimeUnit is milliseconds. template> requires std::same_as> static inline bool readFileWithPolicy(system::IFile* file, void* dst, const size_t offset, const size_t bytes, const SResolvedFileIOPolicy& ioPlan, SReadTelemetry* ioTelemetry = nullptr, TimeUnit* ioTime = nullptr) @@ -147,15 +108,11 @@ class SInterchangeIO } } } - - // Describes one contiguous output buffer written as part of a larger stream. struct SBufferRange { const void* data = nullptr; size_t byteCount = 0ull; }; - - // Writes one or more buffers sequentially at fileOffset and advances it on success. static inline bool writeBuffersWithPolicyAtOffset(system::IFile* file, const SResolvedFileIOPolicy& ioPlan, const std::span buffers, size_t& fileOffset, SWriteTelemetry* ioTelemetry = nullptr) { if (!file) @@ -192,33 +149,13 @@ class SInterchangeIO } return true; } - - // Writes one or more buffers starting from file offset 0. - static inline bool writeBuffersWithPolicy(system::IFile* file, const SResolvedFileIOPolicy& ioPlan, const std::span buffers, SWriteTelemetry* ioTelemetry = nullptr) - { - size_t fileOffset = 0ull; - return writeBuffersWithPolicyAtOffset(file, ioPlan, buffers, fileOffset, ioTelemetry); - } - - // Single-buffer convenience wrapper over writeBuffersWithPolicyAtOffset. - static inline bool writeFileWithPolicyAtOffset(system::IFile* file, const SResolvedFileIOPolicy& ioPlan, const void* data, size_t byteCount, size_t& fileOffset, SWriteTelemetry* ioTelemetry = nullptr) - { - const SBufferRange buffers[] = { { .data = data, .byteCount = byteCount } }; - return writeBuffersWithPolicyAtOffset(file, ioPlan, buffers, fileOffset, ioTelemetry); - } - - // Single-buffer convenience wrapper over writeBuffersWithPolicy. - static inline bool writeFileWithPolicy(system::IFile* file, const SResolvedFileIOPolicy& ioPlan, const void* data, size_t byteCount, SWriteTelemetry* ioTelemetry = nullptr) - { - const SBufferRange buffers[] = { { .data = data, .byteCount = byteCount } }; - return writeBuffersWithPolicy(file, ioPlan, buffers, ioTelemetry); - } + static inline bool writeBuffersWithPolicy(system::IFile* file, const SResolvedFileIOPolicy& ioPlan, const std::span buffers, SWriteTelemetry* ioTelemetry = nullptr) { size_t fileOffset = 0ull; return writeBuffersWithPolicyAtOffset(file, ioPlan, buffers, fileOffset, ioTelemetry); } + static inline bool writeFileWithPolicyAtOffset(system::IFile* file, const SResolvedFileIOPolicy& ioPlan, const void* data, size_t byteCount, size_t& fileOffset, SWriteTelemetry* ioTelemetry = nullptr) { const SBufferRange buffers[] = {{.data = data, .byteCount = byteCount}}; return writeBuffersWithPolicyAtOffset(file, ioPlan, buffers, fileOffset, ioTelemetry); } + static inline bool writeFileWithPolicy(system::IFile* file, const SResolvedFileIOPolicy& ioPlan, const void* data, size_t byteCount, SWriteTelemetry* ioTelemetry = nullptr) { const SBufferRange buffers[] = {{.data = data, .byteCount = byteCount}}; return writeBuffersWithPolicy(file, ioPlan, buffers, ioTelemetry); } }; using SFileIOTelemetry = SInterchangeIO::STelemetry; using SFileReadTelemetry = SInterchangeIO::SReadTelemetry; using SFileWriteTelemetry = SInterchangeIO::SWriteTelemetry; - } - #endif diff --git a/include/nbl/asset/interchange/SLoaderRuntimeTuning.h b/include/nbl/asset/interchange/SLoaderRuntimeTuning.h index 94a623732b..7aea972713 100644 --- a/include/nbl/asset/interchange/SLoaderRuntimeTuning.h +++ b/include/nbl/asset/interchange/SLoaderRuntimeTuning.h @@ -18,56 +18,34 @@ namespace nbl::asset { - -// Input describing one loader or hash stage that needs worker and chunk sizing. struct SLoaderRuntimeTuningRequest { - // Total input bytes for the tuned stage. uint64_t inputBytes = 0ull; - // Total amount of stage work in logical units. uint64_t totalWorkUnits = 0ull; - // Minimum work units assigned to one worker. uint64_t minWorkUnitsPerWorker = 1ull; - // Minimum input bytes assigned to one worker. uint64_t minBytesPerWorker = 1ull; - // Hardware thread count override. 0 means auto-detect. uint32_t hardwareThreads = 0u; - // Hard cap for workers for this request. 0 means no extra cap. uint32_t hardMaxWorkers = 0u; - // Preferred chunk count per worker for this stage. 0 means policy default. uint32_t targetChunksPerWorker = 0u; - // Minimum work units in one chunk. uint64_t minChunkWorkUnits = 1ull; - // Maximum work units in one chunk. uint64_t maxChunkWorkUnits = std::numeric_limits::max(); - // Pointer to representative sample bytes for hybrid sampling. const uint8_t* sampleData = nullptr; - // Number of sample bytes available at sampleData. uint64_t sampleBytes = 0ull; - // Sampling pass count override. 0 means policy default. uint32_t samplePasses = 0u; - // Sampling candidate count override. 0 means policy default. uint32_t sampleMaxCandidates = 0u; - // Minimum work units required to allow sampling. 0 means policy or auto value. uint64_t sampleMinWorkUnits = 0ull; }; -// Final worker and chunk layout selected for one stage. struct SLoaderRuntimeTuningResult { - // Selected worker count for the stage. size_t workerCount = 1ull; - // Work units per chunk assigned by tuner. uint64_t chunkWorkUnits = 1ull; - // Total chunk count for the stage. size_t chunkCount = 1ull; }; -// Stateless runtime tuner used by loaders and hash stages to size worker pools and chunking. struct SLoaderRuntimeTuner { private: - // Aggregated timings collected while probing one worker-count candidate. struct SBenchmarkSampleStats { uint64_t medianNs = 0ull; @@ -87,8 +65,6 @@ struct SLoaderRuntimeTuner return; } - // std::jthread starts execution in its constructor, so emplace_back launches workers 1..N-1 immediately. - // The current thread runs worker 0 and std::jthread joins automatically when the local vector is destroyed. std::vector workers; workers.reserve(workerCount - 1ull); for (size_t workerIx = 1ull; workerIx < workerCount; ++workerIx) @@ -96,15 +72,10 @@ struct SLoaderRuntimeTuner fn(0ull); } - // Integer ceil division. Callers must pass a non-zero denominator. - static constexpr uint64_t ceilDiv(const uint64_t numerator, const uint64_t denominator) - { - return (numerator + denominator - 1ull) / denominator; - } + static constexpr uint64_t ceilDiv(const uint64_t numerator, const uint64_t denominator) { return (numerator + denominator - 1ull) / denominator; } template requires std::same_as> - // Measures one sampled memory-touch pass configuration and returns aggregate wall time across all passes. static inline TimeUnit benchmarkSample(const uint8_t* const sampleData, const uint64_t sampleBytes, const size_t workerCount, const uint32_t passes) { if (!sampleData || sampleBytes == 0ull || workerCount == 0ull) @@ -138,14 +109,7 @@ struct SLoaderRuntimeTuner return std::chrono::duration_cast(std::chrono::nanoseconds(elapsedNs)); } - // Warms up once and then collects timing observations for one worker-count candidate. - static inline SBenchmarkSampleStats benchmarkSampleStats( - const uint8_t* const sampleData, - const uint64_t sampleBytes, - const size_t workerCount, - const uint32_t passes, - const uint32_t observations - ) + static inline SBenchmarkSampleStats benchmarkSampleStats(const uint8_t* const sampleData, const uint64_t sampleBytes, const size_t workerCount, const uint32_t passes, const uint32_t observations) { SBenchmarkSampleStats stats = {}; if (!sampleData || sampleBytes == 0ull || workerCount == 0ull) @@ -178,16 +142,8 @@ struct SLoaderRuntimeTuner return stats; } - // Keeps the candidate probe list unique while preserving insertion order. - static inline void appendCandidate(std::vector& dst, const size_t candidate) - { - if (candidate == 0ull) - return; - if (std::find(dst.begin(), dst.end(), candidate) == dst.end()) - dst.push_back(candidate); - } + static inline void appendCandidate(std::vector& dst, const size_t candidate) { if (candidate != 0ull && std::find(dst.begin(), dst.end(), candidate) == dst.end()) dst.push_back(candidate); } - // Chooses the sample byte budget used by hybrid tuning from the known input size and policy clamps. static inline uint64_t resolveSampleBytes(const SFileIOPolicy& ioPolicy, const uint64_t knownInputBytes) { if (knownInputBytes == 0ull) @@ -201,26 +157,13 @@ struct SLoaderRuntimeTuner return std::clamp(adaptive, cappedMin, cappedMax); } - // Returns true when the hash build is small enough to stay on the caller thread. - static inline bool shouldInlineHashBuild(const SFileIOPolicy& ioPolicy, const uint64_t inputBytes) - { - const uint64_t thresholdBytes = std::max(1ull, ioPolicy.runtimeTuning.hashInlineThresholdBytes); - return inputBytes <= thresholdBytes; - } + static inline bool shouldInlineHashBuild(const SFileIOPolicy& ioPolicy, const uint64_t inputBytes) { return inputBytes <= std::max(1ull, ioPolicy.runtimeTuning.hashInlineThresholdBytes); } - // Resolves the effective hardware thread count and always returns at least one worker. - static inline size_t resolveHardwareThreads(const uint32_t requested = 0u) - { - const size_t hw = requested ? static_cast(requested) : static_cast(std::thread::hardware_concurrency()); - return hw ? hw : 1ull; - } + static inline size_t resolveHardwareThreads(const uint32_t requested = 0u) { const size_t hw = requested ? static_cast(requested) : static_cast(std::thread::hardware_concurrency()); return hw ? hw : 1ull; } - // Applies worker headroom while keeping at least two workers when parallel hardware is available. static inline size_t resolveHardMaxWorkers(const size_t hardwareThreads, const uint32_t workerHeadroom) { - const size_t hw = std::max(1ull, hardwareThreads); - const size_t minWorkers = hw >= 2ull ? 2ull : 1ull; - const size_t headroom = static_cast(workerHeadroom); + const size_t hw = std::max(1ull, hardwareThreads), minWorkers = hw >= 2ull ? 2ull : 1ull, headroom = static_cast(workerHeadroom); if (headroom == 0ull) return hw; if (hw <= headroom) @@ -228,7 +171,6 @@ struct SLoaderRuntimeTuner return std::max(minWorkers, hw - headroom); } - // Resolves worker and chunk counts for one stage using policy limits plus optional hybrid sampling. static inline SLoaderRuntimeTuningResult tune(const SFileIOPolicy& ioPolicy, const SLoaderRuntimeTuningRequest& request) { using RTMode = SFileIOPolicy::SRuntimeTuning::Mode; @@ -303,7 +245,6 @@ struct SLoaderRuntimeTuner effectiveSampleBytes = std::min(effectiveSampleBytes, request.inputBytes); if (effectiveSampleBytes > 0ull && samplingBudgetRatio > 0.0) { - // keep probing lightweight: sample fraction scales with input and parallelism if (request.inputBytes > 0ull) { const uint64_t sampleDivisor = std::max( @@ -329,7 +270,6 @@ struct SLoaderRuntimeTuner if (candidates.size() > maxCandidates) candidates.resize(maxCandidates); - // probe heuristic first and only continue when budget can amortize additional probes const auto heuristicStatsProbe = benchmarkSampleStats( request.sampleData, effectiveSampleBytes, heuristicWorkerCount, samplePasses, 2u); if (heuristicStatsProbe.medianNs > 0ull) diff --git a/include/nbl/asset/utils/SGeometryNormalCommon.h b/include/nbl/asset/utils/SGeometryNormalCommon.h index 4589b6d40e..8900559421 100644 --- a/include/nbl/asset/utils/SGeometryNormalCommon.h +++ b/include/nbl/asset/utils/SGeometryNormalCommon.h @@ -3,42 +3,20 @@ // For conditions of distribution and use, see copyright notice in nabla.h #ifndef _NBL_ASSET_S_GEOMETRY_NORMAL_COMMON_H_INCLUDED_ #define _NBL_ASSET_S_GEOMETRY_NORMAL_COMMON_H_INCLUDED_ - - #include "nbl/builtin/hlsl/tgmath.hlsl" - - namespace nbl::asset { - -// Shared normal helpers used by loaders and geometry utilities for simple face-normal generation. class SGeometryNormalCommon { public: static_assert(sizeof(hlsl::float32_t3) == sizeof(float[3])); static_assert(alignof(hlsl::float32_t3) == alignof(float)); - static inline hlsl::float32_t3 normalizeOrZero(const hlsl::float32_t3& v, const float epsilon = 0.f) - { - const float len2 = hlsl::dot(v, v); - const float epsilon2 = epsilon * epsilon; - if (len2 <= epsilon2) - return hlsl::float32_t3(0.f, 0.f, 0.f); - return hlsl::normalize(v); - } + static inline hlsl::float32_t3 normalizeOrZero(const hlsl::float32_t3& v, const float epsilon = 0.f) { const float len2 = hlsl::dot(v, v), epsilon2 = epsilon * epsilon; return len2 <= epsilon2 ? hlsl::float32_t3(0.f, 0.f, 0.f) : hlsl::normalize(v); } - static inline hlsl::float32_t3 computeFaceNormal(const hlsl::float32_t3& a, const hlsl::float32_t3& b, const hlsl::float32_t3& c, const float epsilon = 0.000001f) - { - return normalizeOrZero(hlsl::cross(b - a, c - a), epsilon); - } + static inline hlsl::float32_t3 computeFaceNormal(const hlsl::float32_t3& a, const hlsl::float32_t3& b, const hlsl::float32_t3& c, const float epsilon = 0.000001f) { return normalizeOrZero(hlsl::cross(b - a, c - a), epsilon); } - static inline void computeFaceNormal(const float a[3], const float b[3], const float c[3], float normal[3], const float epsilon = 0.000001f) - { - *(hlsl::float32_t3*)normal = computeFaceNormal(*(const hlsl::float32_t3*)a, *(const hlsl::float32_t3*)b, *(const hlsl::float32_t3*)c, epsilon); - } + static inline void computeFaceNormal(const float a[3], const float b[3], const float c[3], float normal[3], const float epsilon = 0.000001f) { *(hlsl::float32_t3*)normal = computeFaceNormal(*(const hlsl::float32_t3*)a, *(const hlsl::float32_t3*)b, *(const hlsl::float32_t3*)c, epsilon); } }; - } - - #endif diff --git a/src/nbl/asset/interchange/COBJMeshFileLoader.cpp b/src/nbl/asset/interchange/COBJMeshFileLoader.cpp index e32e89cce4..318dbb1697 100644 --- a/src/nbl/asset/interchange/COBJMeshFileLoader.cpp +++ b/src/nbl/asset/interchange/COBJMeshFileLoader.cpp @@ -7,7 +7,6 @@ #include "nbl/core/declarations.h" -#include "SOBJPolygonGeometryAuxLayout.h" #include "nbl/asset/IAssetManager.h" #include "nbl/asset/ICPUGeometryCollection.h" #include "nbl/asset/interchange/SGeometryContentHash.h" @@ -20,7 +19,7 @@ #include "nbl/system/IFile.h" #include "COBJMeshFileLoader.h" -#include "impl/SLoadSession.h" +#include "impl/SFileAccess.h" #include "impl/STextParse.h" #include @@ -37,6 +36,7 @@ namespace struct Parse { + static constexpr uint32_t UV0 = 0u; using Common = impl::TextParse; struct VertexDedupNode @@ -524,8 +524,8 @@ asset::SAssetBundle COBJMeshFileLoader::loadAsset( if (!view) return false; auto* const auxViews = geometry->getAuxAttributeViews(); - auxViews->resize(SOBJPolygonGeometryAuxLayout::UV0 + 1u); - auxViews->operator[](SOBJPolygonGeometryAuxLayout::UV0) = std::move(view); + auxViews->resize(Parse::UV0 + 1u); + auxViews->operator[](Parse::UV0) = std::move(view); } if (!indices.empty()) { diff --git a/src/nbl/asset/interchange/COBJMeshWriter.cpp b/src/nbl/asset/interchange/COBJMeshWriter.cpp index 5b2822c34b..43c4e3d98b 100644 --- a/src/nbl/asset/interchange/COBJMeshWriter.cpp +++ b/src/nbl/asset/interchange/COBJMeshWriter.cpp @@ -4,12 +4,10 @@ // For conditions of distribution and use, see copyright notice in nabla.h #include "nbl/asset/interchange/COBJMeshWriter.h" -#include "SOBJPolygonGeometryAuxLayout.h" #include "nbl/asset/interchange/SGeometryViewDecode.h" #include "nbl/asset/interchange/SGeometryWriterCommon.h" #include "nbl/asset/interchange/SInterchangeIO.h" #include "impl/SFileAccess.h" -#include "impl/SIODiagnostics.h" #include "nbl/builtin/hlsl/array_accessors.hlsl" #include "nbl/builtin/hlsl/vector_utils/vector_traits.hlsl" @@ -35,7 +33,6 @@ COBJMeshWriter::COBJMeshWriter() uint64_t COBJMeshWriter::getSupportedAssetTypesBitfield() const { - // OBJ can store a single geometry, a geometry collection, or a flattened scene export. return IAsset::ET_GEOMETRY | IAsset::ET_GEOMETRY_COLLECTION | IAsset::ET_SCENE; } @@ -60,6 +57,7 @@ namespace struct Parse { + static constexpr uint32_t UV0 = 0u; static constexpr size_t MaxFloatTextChars = std::numeric_limits::max_digits10 + 8ull; static constexpr size_t MaxUInt32Chars = std::numeric_limits::digits10 + 1ull; static constexpr size_t MaxIndexTokenBytes = MaxUInt32Chars * 3ull + 2ull; @@ -226,7 +224,7 @@ bool COBJMeshWriter::writeAsset(system::IFile* _file, const SAssetWriteParams& _ const auto& normalView = geom->getNormalView(); const bool hasNormals = static_cast(normalView); const size_t vertexCount = positionView.getElementCount(); - const ICPUPolygonGeometry::SDataView* uvView = SGeometryWriterCommon::getAuxViewAt(geom, SOBJPolygonGeometryAuxLayout::UV0, vertexCount); + const ICPUPolygonGeometry::SDataView* uvView = SGeometryWriterCommon::getAuxViewAt(geom, Parse::UV0, vertexCount); if (uvView && getFormatChannelCount(uvView->composed.format) != 2u) uvView = nullptr; const bool hasUVs = uvView != nullptr; @@ -337,13 +335,13 @@ bool COBJMeshWriter::writeAsset(system::IFile* _file, const SAssetWriteParams& _ } const auto ioPlan = impl::SFileAccess::resolvePlan(_params.ioPolicy, static_cast(output.size()), true, file); - if (impl::SIODiagnostics::logInvalidPlan(_params.logger, "OBJ writer", file->getFileName().string().c_str(), ioPlan)) + if (impl::SFileAccess::logInvalidPlan(_params.logger, "OBJ writer", file->getFileName().string().c_str(), ioPlan)) return false; const bool writeOk = SInterchangeIO::writeFileWithPolicy(file, ioPlan, output.data(), output.size(), &ioTelemetry); const uint64_t ioMinWrite = ioTelemetry.getMinOrZero(); const uint64_t ioAvgWrite = ioTelemetry.getAvgOrZero(); - impl::SIODiagnostics::logTinyIO(_params.logger, "OBJ writer", file->getFileName().string().c_str(), ioTelemetry, static_cast(output.size()), _params.ioPolicy, "writes"); + impl::SFileAccess::logTinyIO(_params.logger, "OBJ writer", file->getFileName().string().c_str(), ioTelemetry, static_cast(output.size()), _params.ioPolicy, "writes"); _params.logger.log("OBJ writer stats: file=%s bytes=%llu vertices=%llu faces=%llu geometries=%llu io_writes=%llu io_min_write=%llu io_avg_write=%llu io_req=%s io_eff=%s io_chunk=%llu io_reason=%s", system::ILogger::ELL_PERFORMANCE, file->getFileName().string().c_str(), static_cast(output.size()), static_cast(totalVertexCount), static_cast(totalFaceCount), static_cast(items.size()), diff --git a/src/nbl/asset/interchange/CPLYMeshFileLoader.cpp b/src/nbl/asset/interchange/CPLYMeshFileLoader.cpp index 35a39e7764..a302bd9c01 100644 --- a/src/nbl/asset/interchange/CPLYMeshFileLoader.cpp +++ b/src/nbl/asset/interchange/CPLYMeshFileLoader.cpp @@ -5,10 +5,8 @@ // See the original file in irrlicht source for authors #include "CPLYMeshFileLoader.h" -#include "SPLYPolygonGeometryAuxLayout.h" #include "impl/SBinaryData.h" -#include "impl/SContentHashBuild.h" -#include "impl/SLoadSession.h" +#include "impl/SFileAccess.h" #include "impl/STextParse.h" #include "nbl/asset/IAssetManager.h" #include "nbl/asset/interchange/SGeometryContentHash.h" @@ -23,6 +21,8 @@ #include "nbl/system/IFile.h" #include "nbl/system/ISystem.h" +#include + namespace nbl::asset { @@ -31,9 +31,43 @@ namespace struct Parse { - using Binary = impl::BinaryData; + static constexpr uint32_t UV0 = 0u; + using Binary = impl::BinaryData; using Common = impl::TextParse; + struct ContentHashBuild + { + bool enabled = false; + bool inlineHash = false; + core::vector> hashedBuffers = {}; + std::jthread deferredThread = {}; + + static inline ContentHashBuild create(const bool enabled, const bool inlineHash) { return {.enabled = enabled, .inlineHash = inlineHash}; } + inline bool hashesInline() const { return enabled && inlineHash; } + inline bool hashesDeferred() const { return enabled && !inlineHash; } + + inline void hashNow(ICPUBuffer* const buffer) + { + if (!hashesInline() || !buffer || buffer->getContentHash() != IPreHashed::INVALID_HASH) + return; + for (const auto& hashed : hashedBuffers) + if (hashed.get() == buffer) + return; + buffer->setContentHash(buffer->computeContentHash()); + hashedBuffers.push_back(core::smart_refctd_ptr(buffer)); + } + + inline void tryDefer(ICPUBuffer* const buffer) + { + if (!hashesDeferred() || !buffer || deferredThread.joinable() || buffer->getContentHash() != IPreHashed::INVALID_HASH) + return; + auto keepAlive = core::smart_refctd_ptr(buffer); + deferredThread = std::jthread([buffer = std::move(keepAlive)]() mutable {buffer->setContentHash(buffer->computeContentHash());}); + } + + inline void wait() { if (deferredThread.joinable()) deferredThread.join(); } + }; + static std::string_view toStringView(const char* text) { return text ? std::string_view{text} : std::string_view{}; @@ -535,40 +569,46 @@ struct Parse return EFastVertexReadResult::NotApplicable; const size_t floatBytes = sizeof(hlsl::float32_t); - auto validateTuple = [&](const size_t beginIx, - const size_t componentCount, uint32_t& outStride, - uint8_t*& outBase) -> bool { - if (beginIx + componentCount > vertAttrIts.size()) + struct STupleDesc { + uint32_t beginIx; + uint32_t componentCount; + uint32_t stride = 0u; + uint8_t* base = nullptr; + }; + std::array tuples = {STupleDesc{0u, 3u}, + STupleDesc{3u, 3u}, + STupleDesc{6u, 2u}}; + const uint32_t tupleCount = + 1u + static_cast(layout->hasNormals) + + static_cast(layout->hasUVs); + auto validateTuple = [&](STupleDesc& tuple) -> bool { + if (tuple.beginIx + tuple.componentCount > vertAttrIts.size()) return false; - auto& first = vertAttrIts[beginIx]; + auto& first = vertAttrIts[tuple.beginIx]; if (!first.ptr || first.dstFmt != EF_R32_SFLOAT) return false; - outStride = first.stride; - outBase = first.ptr; - for (size_t c = 1ull; c < componentCount; ++c) { - auto& it = vertAttrIts[beginIx + c]; + tuple.stride = first.stride; + tuple.base = first.ptr; + for (uint32_t c = 1u; c < tuple.componentCount; ++c) { + auto& it = vertAttrIts[tuple.beginIx + c]; if (!it.ptr || it.dstFmt != EF_R32_SFLOAT) return false; - if (it.stride != outStride) + if (it.stride != tuple.stride) return false; - if (it.ptr != outBase + c * floatBytes) + if (it.ptr != tuple.base + c * floatBytes) return false; } return true; }; - - uint32_t posStride = 0u; - uint32_t normalStride = 0u; - uint32_t uvStride = 0u; - uint8_t* posBase = nullptr; - uint8_t* normalBase = nullptr; - uint8_t* uvBase = nullptr; - if (vertAttrIts.size() != layout->propertyCount || - !validateTuple(0u, 3u, posStride, posBase) || - (layout->hasNormals && - !validateTuple(3u, 3u, normalStride, normalBase)) || - (layout->hasUVs && !validateTuple(6u, 2u, uvStride, uvBase))) + auto commitTuple = [&](const STupleDesc& tuple) -> void { + for (uint32_t c = 0u; c < tuple.componentCount; ++c) + vertAttrIts[tuple.beginIx + c].ptr = tuple.base + c * floatBytes; + }; + if (vertAttrIts.size() != layout->propertyCount) return EFastVertexReadResult::NotApplicable; + for (uint32_t tupleIx = 0u; tupleIx < tupleCount; ++tupleIx) + if (!validateTuple(tuples[tupleIx])) + return EFastVertexReadResult::NotApplicable; if (el.Count > (std::numeric_limits::max() / layout->srcBytesPerVertex)) return EFastVertexReadResult::Error; @@ -601,11 +641,14 @@ struct Parse for (uint32_t i = 0u; i < N; ++i) out[i] = getter(value, i); }; - auto advanceTuple = [&](const uint32_t beginIx, - const uint32_t componentCount, - const size_t advance) -> void { - for (uint32_t i = 0u; i < componentCount; ++i) - vertAttrIts[beginIx + i].ptr += advance; + auto decodeStore = [&](STupleDesc& tuple, + const uint8_t*& src) -> Vec { + Vec value = decodeVector.operator()(src); + storeVector.operator()(tuple.base, value); + src += static_cast(hlsl::vector_traits::Dimension) * + floatBytes; + tuple.base += tuple.stride; + return value; }; size_t remainingVertices = el.Count; @@ -623,31 +666,23 @@ struct Parse std::min(remainingVertices, available / layout->srcBytesPerVertex); const uint8_t* src = reinterpret_cast(StartPointer); if (!layout->hasNormals && !layout->hasUVs && - posStride == 3ull * floatBytes && !needsByteSwap && !trackAABB) { + tuples[0].stride == 3ull * floatBytes && !needsByteSwap && + !trackAABB) { const size_t batchBytes = batchVertices * 3ull * floatBytes; - std::memcpy(posBase, src, batchBytes); + std::memcpy(tuples[0].base, src, batchBytes); src += batchBytes; - posBase += batchBytes; + tuples[0].base += batchBytes; } else { for (size_t v = 0ull; v < batchVertices; ++v) { const hlsl::float32_t3 position = - decodeVector.operator()(src); - storeVector.operator()(posBase, position); + decodeStore.operator()(tuples[0], src); if (trackAABB) hlsl::shapes::util::extendAABBAccumulator(*parsedAABB, position); - src += 3ull * floatBytes; - posBase += posStride; if (layout->hasNormals) { - storeVector.operator()( - normalBase, decodeVector.operator()(src)); - src += 3ull * floatBytes; - normalBase += normalStride; + decodeStore.operator()(tuples[1], src); } if (layout->hasUVs) { - storeVector.operator()( - uvBase, decodeVector.operator()(src)); - src += 2ull * floatBytes; - uvBase += uvStride; + decodeStore.operator()(tuples[2], src); } } } @@ -657,11 +692,8 @@ struct Parse remainingVertices -= batchVertices; } - advanceTuple(0u, 3u, el.Count * posStride); - if (layout->hasNormals) - advanceTuple(3u, 3u, el.Count * normalStride); - if (layout->hasUVs) - advanceTuple(6u, 2u, el.Count * uvStride); + for (uint32_t tupleIx = 0u; tupleIx < tupleCount; ++tupleIx) + commitTuple(tuples[tupleIx]); return EFastVertexReadResult::Success; } void readVertex(const IAssetLoader::SAssetLoadParams& _params, @@ -1246,21 +1278,10 @@ bool CPLYMeshFileLoader::isALoadableFileFormat(system::IFile* _file, const syste return false; const std::string_view fileHeader(buf.data(), success.getBytesProcessed()); - auto trimWhitespace = [](std::string_view line) -> std::string_view { - const auto isWhitespace = [](const char c) -> bool { - return c == ' ' || c == '\t' || c == '\r'; - }; - while (!line.empty() && isWhitespace(line.front())) - line.remove_prefix(1ull); - while (!line.empty() && isWhitespace(line.back())) - line.remove_suffix(1ull); - return line; - }; - size_t lineStart = 0ull; const size_t firstLineEnd = fileHeader.find('\n'); std::string_view firstLine = fileHeader.substr(0ull, firstLineEnd); - firstLine = trimWhitespace(firstLine); + firstLine = Parse::Common::trimWhitespace(firstLine); if (firstLine != "ply") return false; if (firstLineEnd == std::string_view::npos) @@ -1274,8 +1295,7 @@ bool CPLYMeshFileLoader::isALoadableFileFormat(system::IFile* _file, const syste size_t lineEnd = fileHeader.find('\n', lineStart); if (lineEnd == std::string_view::npos) lineEnd = fileHeader.size(); - std::string_view line = - trimWhitespace(fileHeader.substr(lineStart, lineEnd - lineStart)); + std::string_view line = Parse::Common::trimWhitespace(fileHeader.substr(lineStart, lineEnd - lineStart)); if (line.starts_with("format ")) return std::find(headers.begin(), headers.end(), line) != headers.end(); lineStart = lineEnd + 1ull; @@ -1330,7 +1350,7 @@ SAssetBundle CPLYMeshFileLoader::loadAsset( hlsl::shapes::util::AABBAccumulator3 parsedAABB = hlsl::shapes::util::createAABBAccumulator(); uint32_t vertCount = 0; - impl::SContentHashBuild contentHashBuild = impl::SContentHashBuild::create(computeContentHashes, hashInBuild); + Parse::ContentHashBuild contentHashBuild = Parse::ContentHashBuild::create(computeContentHashes, hashInBuild); auto visitVertexAttributeViews = [&](auto&& visitor) -> void { visitor(geometry->getPositionView()); visitor(geometry->getNormalView()); @@ -1617,8 +1637,8 @@ SAssetBundle CPLYMeshFileLoader::loadAsset( attachStructuredView(normalView, [&](auto view) { geometry->setNormalView(std::move(view)); }); attachStructuredView(uvView, [&](auto view) { auto* const auxViews = geometry->getAuxAttributeViews(); - auxViews->resize(SPLYPolygonGeometryAuxLayout::UV0 + 1u); - auxViews->operator[](SPLYPolygonGeometryAuxLayout::UV0) = std::move(view); + auxViews->resize(Parse::UV0 + 1u); + auxViews->operator[](Parse::UV0) = std::move(view); }); // for (auto& view : extraViews) diff --git a/src/nbl/asset/interchange/CPLYMeshWriter.cpp b/src/nbl/asset/interchange/CPLYMeshWriter.cpp index 266c741a51..2c1a768797 100644 --- a/src/nbl/asset/interchange/CPLYMeshWriter.cpp +++ b/src/nbl/asset/interchange/CPLYMeshWriter.cpp @@ -5,14 +5,11 @@ // See the original file in irrlicht source for authors #include "CPLYMeshWriter.h" -#include "SPLYPolygonGeometryAuxLayout.h" -#include "nbl/asset/interchange/SGeometryAttributeEmit.h" #include "nbl/asset/interchange/SGeometryViewDecode.h" #include "nbl/asset/interchange/SGeometryWriterCommon.h" #include "nbl/asset/interchange/SInterchangeIO.h" #include "impl/SBinaryData.h" #include "impl/SFileAccess.h" -#include "impl/SIODiagnostics.h" #include "nbl/system/IFile.h" @@ -58,6 +55,7 @@ namespace struct Parse { + static constexpr uint32_t UV0 = 0u; using Binary = impl::BinaryData; using SemanticDecode = SGeometryViewDecode::Prepared; using StoredDecode = SGeometryViewDecode::Prepared; @@ -268,13 +266,33 @@ struct Parse return static_cast(*this) && emit(sink, *this, ix); } + template + static bool emitDecode(Sink& sink, const auto& decode, const size_t ix, const uint32_t components, const bool flipVectors) + { + std::array decoded = {}; + if (!decode.decode(ix, decoded)) + return false; + for (uint32_t c = 0u; c < components; ++c) + { + OutT value = decoded[c]; + if constexpr (std::is_signed_v || std::is_floating_point_v) + { + if (flipVectors && c == 0u) + value = -value; + } + if (!sink.append(value)) + return false; + } + return true; + } + template static bool emitPrepared(Sink& sink, const PreparedView& view, const size_t ix) { if constexpr (Mode == SGeometryViewDecode::EMode::Semantic) - return SGeometryAttributeEmit::emit(sink, view.semantic, ix, view.components, view.flipVectors); + return emitDecode(sink, view.semantic, ix, view.components, view.flipVectors); else - return SGeometryAttributeEmit::emit(sink, view.stored, ix, view.components, view.flipVectors); + return emitDecode(sink, view.stored, ix, view.components, view.flipVectors); } template @@ -440,7 +458,7 @@ bool CPLYMeshWriter::writeAsset(system::IFile* _file, const SAssetWriteParams& _ return false; } - const ICPUPolygonGeometry::SDataView* uvView = SGeometryWriterCommon::getAuxViewAt(geom, SPLYPolygonGeometryAuxLayout::UV0, vertexCount); + const ICPUPolygonGeometry::SDataView* uvView = SGeometryWriterCommon::getAuxViewAt(geom, Parse::UV0, vertexCount); if (uvView && getFormatChannelCount(uvView->composed.format) != 2u) uvView = nullptr; @@ -450,7 +468,7 @@ bool CPLYMeshWriter::writeAsset(system::IFile* _file, const SAssetWriteParams& _ for (uint32_t auxIx = 0u; auxIx < static_cast(auxViews.size()); ++auxIx) { const auto& view = auxViews[auxIx]; - if (!view || (uvView && auxIx == SPLYPolygonGeometryAuxLayout::UV0)) + if (!view || (uvView && auxIx == Parse::UV0)) continue; if (view.getElementCount() != vertexCount) continue; @@ -543,7 +561,7 @@ bool CPLYMeshWriter::writeAsset(system::IFile* _file, const SAssetWriteParams& _ auto writePayload = [&](const void* bodyData, const size_t bodySize) -> bool { const size_t outputSize = header.size() + bodySize; const auto ioPlan = impl::SFileAccess::resolvePlan(_params.ioPolicy, static_cast(outputSize), true, file); - if (impl::SIODiagnostics::logInvalidPlan(_params.logger, "PLY writer", file->getFileName().string().c_str(), ioPlan)) + if (impl::SFileAccess::logInvalidPlan(_params.logger, "PLY writer", file->getFileName().string().c_str(), ioPlan)) return false; outputBytes = outputSize; @@ -551,7 +569,7 @@ bool CPLYMeshWriter::writeAsset(system::IFile* _file, const SAssetWriteParams& _ writeOk = SInterchangeIO::writeBuffersWithPolicy(file, ioPlan, writeBuffers, &ioTelemetry); const uint64_t ioMinWrite = ioTelemetry.getMinOrZero(); const uint64_t ioAvgWrite = ioTelemetry.getAvgOrZero(); - impl::SIODiagnostics::logTinyIO(_params.logger, "PLY writer", file->getFileName().string().c_str(), ioTelemetry, static_cast(outputBytes), _params.ioPolicy, "writes"); + impl::SFileAccess::logTinyIO(_params.logger, "PLY writer", file->getFileName().string().c_str(), ioTelemetry, static_cast(outputBytes), _params.ioPolicy, "writes"); _params.logger.log("PLY writer stats: file=%s bytes=%llu vertices=%llu faces=%llu binary=%d io_writes=%llu io_min_write=%llu io_avg_write=%llu io_req=%s io_eff=%s io_chunk=%llu io_reason=%s", system::ILogger::ELL_PERFORMANCE, file->getFileName().string().c_str(), static_cast(outputBytes), static_cast(vertexCount), static_cast(faceCount), binary ? 1 : 0, diff --git a/src/nbl/asset/interchange/CSTLMeshFileLoader.cpp b/src/nbl/asset/interchange/CSTLMeshFileLoader.cpp index 3241e56380..8f54500a95 100644 --- a/src/nbl/asset/interchange/CSTLMeshFileLoader.cpp +++ b/src/nbl/asset/interchange/CSTLMeshFileLoader.cpp @@ -6,8 +6,7 @@ #include "CSTLMeshFileLoader.h" -#include "SSTLPolygonGeometryAuxLayout.h" -#include "impl/SLoadSession.h" +#include "impl/SFileAccess.h" #include "impl/STextParse.h" #include "nbl/asset/asset.h" #include "nbl/asset/format/convertColor.h" @@ -32,6 +31,7 @@ namespace struct Parse { + static constexpr uint32_t COLOR0 = 0u; using Common = impl::TextParse; struct LayoutProbe @@ -610,8 +610,8 @@ SAssetBundle CSTLMeshFileLoader::loadAsset(system::IFile* _file, const IAssetLoa if (!colorView) return {}; auto* const auxViews = geometry->getAuxAttributeViews(); - auxViews->resize(SSTLPolygonGeometryAuxLayout::COLOR0 + 1u); - auxViews->operator[](SSTLPolygonGeometryAuxLayout::COLOR0) = + auxViews->resize(Parse::COLOR0 + 1u); + auxViews->operator[](Parse::COLOR0) = std::move(colorView); hasTriangleColors = true; } diff --git a/src/nbl/asset/interchange/CSTLMeshWriter.cpp b/src/nbl/asset/interchange/CSTLMeshWriter.cpp index c60bb98b19..ed9ea8d2fa 100644 --- a/src/nbl/asset/interchange/CSTLMeshWriter.cpp +++ b/src/nbl/asset/interchange/CSTLMeshWriter.cpp @@ -6,9 +6,7 @@ #include "nbl/system/IFile.h" #include "CSTLMeshWriter.h" -#include "SSTLPolygonGeometryAuxLayout.h" #include "impl/SFileAccess.h" -#include "impl/SIODiagnostics.h" #include "nbl/asset/format/convertColor.h" #include "nbl/asset/interchange/SGeometryWriterCommon.h" #include "nbl/asset/interchange/SInterchangeIO.h" @@ -30,6 +28,7 @@ namespace struct Parse { + static constexpr uint32_t COLOR0 = 0u; struct Context { IAssetWriter::SAssetWriteContext writeContext; @@ -161,33 +160,28 @@ struct Parse if (outIdx) *outIdx = idx; - hlsl::float32_t3 p0 = {}; - hlsl::float32_t3 p1 = {}; - hlsl::float32_t3 p2 = {}; - if (!posView.decodeElement(idx.x, p0)) + std::array positions = {}; + if (!decodeIndexedTriple(idx, [&posView](const uint32_t vertexIx, hlsl::float32_t3& out) -> bool { return posView.decodeElement(vertexIx, out); }, positions.data())) return false; - if (!posView.decodeElement(idx.y, p1)) - return false; - if (!posView.decodeElement(idx.z, p2)) - return false; - - out0 = p0; - out1 = p1; - out2 = p2; + out0 = positions[0]; + out1 = positions[1]; + out2 = positions[2]; return true; } + template + static bool decodeIndexedTriple(const hlsl::uint32_t3& idx, DecodeFn&& decode, T* out) + { + return out && decode(idx.x, out[0]) && decode(idx.y, out[1]) && decode(idx.z, out[2]); + } + static bool decodeTriangleNormal(const ICPUPolygonGeometry::SDataView& normalView, const hlsl::uint32_t3& idx, hlsl::float32_t3& outNormal) { if (!normalView) return false; std::array normals = {}; - if (!normalView.decodeElement(idx.x, normals[0])) - return false; - if (!normalView.decodeElement(idx.y, normals[1])) - return false; - if (!normalView.decodeElement(idx.z, normals[2])) + if (!decodeIndexedTriple(idx, [&normalView](const uint32_t vertexIx, hlsl::float32_t3& out) -> bool { return normalView.decodeElement(vertexIx, out); }, normals.data())) return false; return selectFirstValidNormal(normals.data(), static_cast(normals.size()), outNormal); @@ -275,7 +269,7 @@ struct Parse static const ICPUPolygonGeometry::SDataView* getColorView(const ICPUPolygonGeometry* geom, const size_t vertexCount) { - const auto* view = SGeometryWriterCommon::getAuxViewAt(geom, SSTLPolygonGeometryAuxLayout::COLOR0, vertexCount); + const auto* view = SGeometryWriterCommon::getAuxViewAt(geom, Parse::COLOR0, vertexCount); if (!view) return nullptr; return getFormatChannelCount(view->composed.format) >= 3u ? view : nullptr; @@ -420,19 +414,14 @@ struct Parse } else if (!SGeometryWriterCommon::visitTriangleIndices(geom, [&](const uint32_t i0, const uint32_t i1, const uint32_t i2) -> bool { const hlsl::uint32_t3 idx(i0, i1, i2); - hlsl::float32_t3 p0 = {}; - hlsl::float32_t3 p1 = {}; - hlsl::float32_t3 p2 = {}; - if (!decodePosition(idx.x, p0) || !decodePosition(idx.y, p1) || !decodePosition(idx.z, p2)) + std::array positions = {}; + if (!decodeIndexedTriple(idx, decodePosition, positions.data())) return false; - hlsl::float32_t3 normals[3] = {}; - if (hasNormals) - { - if (!decodeNormal(idx.x, normals[0]) || !decodeNormal(idx.y, normals[1]) || !decodeNormal(idx.z, normals[2])) - return false; - } - return emitTriangle(p0, p1, p2, idx, hasNormals ? normals : nullptr, hasNormals ? 3u : 0u, true); + std::array normals = {}; + if (hasNormals && !decodeIndexedTriple(idx, decodeNormal, normals.data())) + return false; + return emitTriangle(positions[0], positions[1], positions[2], idx, hasNormals ? normals.data() : nullptr, hasNormals ? 3u : 0u, true); })) return false; @@ -580,7 +569,7 @@ bool CSTLMeshWriter::writeAsset(system::IFile* _file, const SAssetWriteParams& _ } context.ioPlan = impl::SFileAccess::resolvePlan(_params.ioPolicy, expectedSize, sizeKnown, file); - if (impl::SIODiagnostics::logInvalidPlan(_params.logger, "STL writer", file->getFileName().string().c_str(), context.ioPlan)) + if (impl::SFileAccess::logInvalidPlan(_params.logger, "STL writer", file->getFileName().string().c_str(), context.ioPlan)) return false; if (context.ioPlan.strategy == SResolvedFileIOPolicy::Strategy::WholeFile && sizeKnown) @@ -596,7 +585,7 @@ bool CSTLMeshWriter::writeAsset(system::IFile* _file, const SAssetWriteParams& _ const uint64_t ioMinWrite = context.writeTelemetry.getMinOrZero(); const uint64_t ioAvgWrite = context.writeTelemetry.getAvgOrZero(); - impl::SIODiagnostics::logTinyIO(_params.logger, "STL writer", file->getFileName().string().c_str(), context.writeTelemetry, context.fileOffset, _params.ioPolicy, "writes"); + impl::SFileAccess::logTinyIO(_params.logger, "STL writer", file->getFileName().string().c_str(), context.writeTelemetry, context.fileOffset, _params.ioPolicy, "writes"); _params.logger.log("STL writer stats: file=%s bytes=%llu binary=%d io_writes=%llu io_min_write=%llu io_avg_write=%llu io_req=%s io_eff=%s io_chunk=%llu io_reason=%s", system::ILogger::ELL_PERFORMANCE, file->getFileName().string().c_str(), static_cast(context.fileOffset), binary ? 1 : 0, static_cast(context.writeTelemetry.callCount), static_cast(ioMinWrite), static_cast(ioAvgWrite), diff --git a/src/nbl/asset/interchange/SGeometryAttributeEmit.h b/src/nbl/asset/interchange/SGeometryAttributeEmit.h deleted file mode 100644 index a326ad8ea8..0000000000 --- a/src/nbl/asset/interchange/SGeometryAttributeEmit.h +++ /dev/null @@ -1,41 +0,0 @@ -// Internal src-only header. -// Do not include from public headers. -#ifndef _NBL_ASSET_S_GEOMETRY_ATTRIBUTE_EMIT_H_INCLUDED_ -#define _NBL_ASSET_S_GEOMETRY_ATTRIBUTE_EMIT_H_INCLUDED_ - -#include "nbl/asset/interchange/SGeometryViewDecode.h" - -#include -#include - - -namespace nbl::asset -{ - -class SGeometryAttributeEmit -{ - public: - template - static inline bool emit(Sink& sink, const SGeometryViewDecode::Prepared& view, const size_t ix, const uint32_t componentCount, const bool flipVectors) - { - std::array decoded = {}; - if (!view.decode(ix, decoded)) - return false; - for (uint32_t c = 0u; c < componentCount; ++c) - { - OutT value = decoded[c]; - if constexpr (std::is_signed_v || std::is_floating_point_v) - { - if (flipVectors && c == 0u) - value = -value; - } - if (!sink.append(value)) - return false; - } - return true; - } -}; - -} - -#endif diff --git a/src/nbl/asset/interchange/SGeometryViewDecode.h b/src/nbl/asset/interchange/SGeometryViewDecode.h index 1cec082565..68cc616f87 100644 --- a/src/nbl/asset/interchange/SGeometryViewDecode.h +++ b/src/nbl/asset/interchange/SGeometryViewDecode.h @@ -2,35 +2,27 @@ // Do not include from public headers. #ifndef _NBL_ASSET_S_GEOMETRY_VIEW_DECODE_H_INCLUDED_ #define _NBL_ASSET_S_GEOMETRY_VIEW_DECODE_H_INCLUDED_ - #include "nbl/asset/ICPUPolygonGeometry.h" #include "nbl/asset/format/decodePixels.h" #include "nbl/builtin/hlsl/concepts.hlsl" #include "nbl/builtin/hlsl/vector_utils/vector_traits.hlsl" - #include #include #include - - namespace nbl::asset { - class SGeometryViewDecode { public: enum class EMode : uint8_t { - // Semantic values ready for writer-side math and text/binary emission. Semantic, - // Stored values preserved in the original integer storage domain. Stored }; template struct Prepared { - // Cached per-view decode state prepared once and reused inside tight loops. const uint8_t* data = nullptr; uint32_t stride = 0u; E_FORMAT format = EF_UNKNOWN; @@ -61,7 +53,6 @@ class SGeometryViewDecode template static inline Prepared prepare(const ICPUPolygonGeometry::SDataView& view) { - // Hoist view invariants out of the per-element decode path. Prepared retval = {}; if (!view.composed.isFormatted()) return retval; @@ -85,7 +76,6 @@ class SGeometryViewDecode template static inline bool decodeElement(const ICPUPolygonGeometry::SDataView& view, const size_t ix, Out& out) { - // Convenience wrapper for one-off decode sites that do not keep prepared state. return prepare(view).decode(ix, out); } @@ -136,7 +126,5 @@ class SGeometryViewDecode return decodePreparedComponents(prepared, ix, out, outDim); } }; - } - #endif diff --git a/src/nbl/asset/interchange/SOBJPolygonGeometryAuxLayout.h b/src/nbl/asset/interchange/SOBJPolygonGeometryAuxLayout.h deleted file mode 100644 index 074c597624..0000000000 --- a/src/nbl/asset/interchange/SOBJPolygonGeometryAuxLayout.h +++ /dev/null @@ -1,19 +0,0 @@ -// Copyright (C) 2018-2025 - DevSH Graphics Programming Sp. z O.O. -// This file is part of the "Nabla Engine". -// For conditions of distribution and use, see copyright notice in nabla.h -#ifndef _NBL_ASSET_S_OBJ_POLYGON_GEOMETRY_AUX_LAYOUT_H_INCLUDED_ -#define _NBL_ASSET_S_OBJ_POLYGON_GEOMETRY_AUX_LAYOUT_H_INCLUDED_ - -namespace nbl::asset -{ - -// Private OBJ loader/writer contract for aux slots stored in ICPUPolygonGeometry. -class SOBJPolygonGeometryAuxLayout -{ - public: - static inline constexpr unsigned int UV0 = 0u; -}; - -} - -#endif diff --git a/src/nbl/asset/interchange/SPLYPolygonGeometryAuxLayout.h b/src/nbl/asset/interchange/SPLYPolygonGeometryAuxLayout.h deleted file mode 100644 index 097f6160d8..0000000000 --- a/src/nbl/asset/interchange/SPLYPolygonGeometryAuxLayout.h +++ /dev/null @@ -1,19 +0,0 @@ -// Copyright (C) 2018-2025 - DevSH Graphics Programming Sp. z O.O. -// This file is part of the "Nabla Engine". -// For conditions of distribution and use, see copyright notice in nabla.h -#ifndef _NBL_ASSET_S_PLY_POLYGON_GEOMETRY_AUX_LAYOUT_H_INCLUDED_ -#define _NBL_ASSET_S_PLY_POLYGON_GEOMETRY_AUX_LAYOUT_H_INCLUDED_ - -namespace nbl::asset -{ - -// Private PLY loader/writer contract for reserved aux slots stored in ICPUPolygonGeometry. -class SPLYPolygonGeometryAuxLayout -{ - public: - static inline constexpr unsigned int UV0 = 0u; -}; - -} - -#endif diff --git a/src/nbl/asset/interchange/SSTLPolygonGeometryAuxLayout.h b/src/nbl/asset/interchange/SSTLPolygonGeometryAuxLayout.h deleted file mode 100644 index f2ef0a5027..0000000000 --- a/src/nbl/asset/interchange/SSTLPolygonGeometryAuxLayout.h +++ /dev/null @@ -1,19 +0,0 @@ -// Copyright (C) 2018-2025 - DevSH Graphics Programming Sp. z O.O. -// This file is part of the "Nabla Engine". -// For conditions of distribution and use, see copyright notice in nabla.h -#ifndef _NBL_ASSET_S_STL_POLYGON_GEOMETRY_AUX_LAYOUT_H_INCLUDED_ -#define _NBL_ASSET_S_STL_POLYGON_GEOMETRY_AUX_LAYOUT_H_INCLUDED_ - -namespace nbl::asset -{ - -// Private STL loader/writer contract for aux slots stored in ICPUPolygonGeometry. -class SSTLPolygonGeometryAuxLayout -{ - public: - static inline constexpr unsigned int COLOR0 = 0u; -}; - -} - -#endif diff --git a/src/nbl/asset/interchange/impl/SBinaryData.h b/src/nbl/asset/interchange/impl/SBinaryData.h index 1eeba4f764..9b44cf5297 100644 --- a/src/nbl/asset/interchange/impl/SBinaryData.h +++ b/src/nbl/asset/interchange/impl/SBinaryData.h @@ -2,18 +2,13 @@ // Do not include from public headers. #ifndef _NBL_ASSET_IMPL_S_BINARY_DATA_H_INCLUDED_ #define _NBL_ASSET_IMPL_S_BINARY_DATA_H_INCLUDED_ - #include #include #include - - namespace nbl::asset::impl { - struct BinaryData { - // Minimal unaligned binary primitives reused by binary interchange paths. template static inline T byteswap(const T value) { @@ -46,7 +41,5 @@ struct BinaryData dst += sizeof(value); } }; - } - #endif diff --git a/src/nbl/asset/interchange/impl/SContentHashBuild.h b/src/nbl/asset/interchange/impl/SContentHashBuild.h deleted file mode 100644 index a9d77b3943..0000000000 --- a/src/nbl/asset/interchange/impl/SContentHashBuild.h +++ /dev/null @@ -1,72 +0,0 @@ -// Internal src-only header. -// Do not include from public headers. -#ifndef _NBL_ASSET_IMPL_S_CONTENT_HASH_BUILD_H_INCLUDED_ -#define _NBL_ASSET_IMPL_S_CONTENT_HASH_BUILD_H_INCLUDED_ - -#include "nbl/core/declarations.h" -#include "nbl/asset/ICPUBuffer.h" - -#include - - -namespace nbl::asset::impl -{ - - class SContentHashBuild -{ - public: - bool enabled = false; - bool inlineHash = false; - core::vector> hashedBuffers = {}; - std::jthread deferredThread = {}; - - static inline SContentHashBuild create(const bool enabled, const bool inlineHash) - { - return {.enabled = enabled, .inlineHash = inlineHash}; - } - - inline bool hashesInline() const - { - return enabled && inlineHash; - } - - inline bool hashesDeferred() const - { - return enabled && !inlineHash; - } - - inline void hashNow(ICPUBuffer* const buffer) - { - if (!hashesInline() || !buffer) - return; - if (buffer->getContentHash() != IPreHashed::INVALID_HASH) - return; - for (const auto& hashed : hashedBuffers) - if (hashed.get() == buffer) - return; - buffer->setContentHash(buffer->computeContentHash()); - hashedBuffers.push_back(core::smart_refctd_ptr(buffer)); - } - - inline void tryDefer(ICPUBuffer* const buffer) - { - if (!hashesDeferred() || !buffer) - return; - if (deferredThread.joinable()) - return; - if (buffer->getContentHash() != IPreHashed::INVALID_HASH) - return; - auto keepAlive = core::smart_refctd_ptr(buffer); - deferredThread = std::jthread([buffer = std::move(keepAlive)]() mutable {buffer->setContentHash(buffer->computeContentHash());}); - } - - inline void wait() - { - if (deferredThread.joinable()) - deferredThread.join(); - } -}; - -} - -#endif diff --git a/src/nbl/asset/interchange/impl/SFileAccess.h b/src/nbl/asset/interchange/impl/SFileAccess.h index 1373c6b5e3..671b5371c8 100644 --- a/src/nbl/asset/interchange/impl/SFileAccess.h +++ b/src/nbl/asset/interchange/impl/SFileAccess.h @@ -2,26 +2,37 @@ // Do not include from public headers. #ifndef _NBL_ASSET_IMPL_S_FILE_ACCESS_H_INCLUDED_ #define _NBL_ASSET_IMPL_S_FILE_ACCESS_H_INCLUDED_ - #include "nbl/core/declarations.h" #include "nbl/asset/interchange/SInterchangeIO.h" - - +#include "nbl/system/ILogger.h" +#include namespace nbl::asset::impl { - class SFileAccess { public: - // Common file access policy glue for mapped whole-file and buffered fallback paths. - static inline bool isMappable(const system::IFile* file) + static inline bool isMappable(const system::IFile* file) { return file && core::bitflag(file->getFlags()).hasAnyFlag(system::IFile::ECF_MAPPABLE); } + static inline SResolvedFileIOPolicy resolvePlan(const SFileIOPolicy& ioPolicy, const uint64_t payloadBytes, const bool sizeKnown, const system::IFile* file) { return SResolvedFileIOPolicy(ioPolicy, payloadBytes, sizeKnown, isMappable(file)); } + + template + static inline bool logInvalidPlan(Logger& logger, const char* const owner, const char* const fileName, const SResolvedFileIOPolicy& ioPlan) { - return file && core::bitflag(file->getFlags()).hasAnyFlag(system::IFile::ECF_MAPPABLE); + if (ioPlan.isValid()) + return false; + logger.log("%s: invalid io policy for %s reason=%s", system::ILogger::ELL_ERROR, owner, fileName, ioPlan.reason); + return true; } - static inline SResolvedFileIOPolicy resolvePlan(const SFileIOPolicy& ioPolicy, const uint64_t payloadBytes, const bool sizeKnown, const system::IFile* file) + template + static inline void logTinyIO(Logger& logger, const char* const owner, const char* const fileName, const SInterchangeIO::STelemetry& telemetry, const uint64_t payloadBytes, const SFileIOPolicy& ioPolicy, const char* const opName) { - return SResolvedFileIOPolicy(ioPolicy, payloadBytes, sizeKnown, isMappable(file)); + if (!SInterchangeIO::isTinyIOTelemetryLikely(telemetry, payloadBytes, ioPolicy)) + return; + logger.log("%s tiny-io guard: file=%s %s=%llu min=%llu avg=%llu", + system::ILogger::ELL_WARNING, owner, fileName, opName, + static_cast(telemetry.callCount), + static_cast(telemetry.getMinOrZero()), + static_cast(telemetry.getAvgOrZero())); } static inline const uint8_t* readRange(system::IFile* file, const size_t offset, const size_t bytes, core::vector& storage, const SResolvedFileIOPolicy& ioPlan, SFileReadTelemetry* ioTelemetry = nullptr, const bool zeroTerminate = false) @@ -38,7 +49,6 @@ class SFileAccess { if (wasMapped) *wasMapped = false; - if (ioPlan.strategy == SResolvedFileIOPolicy::Strategy::WholeFile) { const auto* mapped = reinterpret_cast(static_cast(file)->getMappedPointer()); @@ -51,11 +61,47 @@ class SFileAccess return mapped; } } - return readRange(file, 0ull, bytes, storage, ioPlan, ioTelemetry, zeroTerminate); } }; -} +class SLoadSession +{ + public: + system::IFile* file = nullptr; + const SFileIOPolicy* requestedPolicy = nullptr; + SResolvedFileIOPolicy ioPlan = {}; + uint64_t payloadBytes = 0ull; + const char* owner = nullptr; + std::string fileName = {}; + + template + static inline bool begin(Logger& logger, const char* const owner, system::IFile* file, const SFileIOPolicy& ioPolicy, const uint64_t payloadBytes, const bool sizeKnown, SLoadSession& out) + { + out = {}; + if (!file) + return false; + out.file = file; + out.requestedPolicy = &ioPolicy; + out.ioPlan = SFileAccess::resolvePlan(ioPolicy, payloadBytes, sizeKnown, file); + out.payloadBytes = payloadBytes; + out.owner = owner; + out.fileName = file->getFileName().string(); + return !SFileAccess::logInvalidPlan(logger, owner, out.fileName.c_str(), out.ioPlan); + } + + inline bool isWholeFile() const { return ioPlan.strategy == SResolvedFileIOPolicy::Strategy::WholeFile; } + inline const uint8_t* mappedPointer() const { return file && isWholeFile() ? reinterpret_cast(static_cast(file)->getMappedPointer()) : nullptr; } + inline const uint8_t* readRange(const size_t offset, const size_t bytes, core::vector& storage, SFileReadTelemetry* const ioTelemetry = nullptr, const bool zeroTerminate = false) const { return SFileAccess::readRange(file, offset, bytes, storage, ioPlan, ioTelemetry, zeroTerminate); } + inline const uint8_t* mapOrReadWholeFile(core::vector& storage, SFileReadTelemetry* const ioTelemetry = nullptr, bool* const wasMapped = nullptr, const bool zeroTerminate = false) const { return SFileAccess::mapOrReadWholeFile(file, static_cast(payloadBytes), storage, ioPlan, ioTelemetry, wasMapped, zeroTerminate); } + template + inline void logTinyIO(Logger& logger, const Telemetry& telemetry, const char* const opName = "reads") const + { + if (!requestedPolicy) + return; + SFileAccess::logTinyIO(logger, owner, fileName.c_str(), telemetry, payloadBytes, *requestedPolicy, opName); + } +}; +} #endif diff --git a/src/nbl/asset/interchange/impl/SIODiagnostics.h b/src/nbl/asset/interchange/impl/SIODiagnostics.h deleted file mode 100644 index 687c61f2c7..0000000000 --- a/src/nbl/asset/interchange/impl/SIODiagnostics.h +++ /dev/null @@ -1,41 +0,0 @@ -// Internal src-only header. -// Do not include from public headers. -#ifndef _NBL_ASSET_IMPL_S_IO_DIAGNOSTICS_H_INCLUDED_ -#define _NBL_ASSET_IMPL_S_IO_DIAGNOSTICS_H_INCLUDED_ - -#include "nbl/asset/interchange/SInterchangeIO.h" -#include "nbl/system/ILogger.h" - - -namespace nbl::asset::impl -{ - -class SIODiagnostics -{ - public: - // Shared logging helpers for IO policy failures and suspicious tiny-IO telemetry. - template - static inline bool logInvalidPlan(Logger& logger, const char* const owner, const char* const fileName, const SResolvedFileIOPolicy& ioPlan) - { - if (ioPlan.isValid()) - return false; - logger.log("%s: invalid io policy for %s reason=%s", system::ILogger::ELL_ERROR, owner, fileName, ioPlan.reason); - return true; - } - - template - static inline void logTinyIO(Logger& logger, const char* const owner, const char* const fileName, const SInterchangeIO::STelemetry& telemetry, const uint64_t payloadBytes, const SFileIOPolicy& ioPolicy, const char* const opName) - { - if (!SInterchangeIO::isTinyIOTelemetryLikely(telemetry, payloadBytes, ioPolicy)) - return; - logger.log("%s tiny-io guard: file=%s %s=%llu min=%llu avg=%llu", - system::ILogger::ELL_WARNING, owner, fileName, opName, - static_cast(telemetry.callCount), - static_cast(telemetry.getMinOrZero()), - static_cast(telemetry.getAvgOrZero())); - } -}; - -} - -#endif diff --git a/src/nbl/asset/interchange/impl/SLoadSession.h b/src/nbl/asset/interchange/impl/SLoadSession.h deleted file mode 100644 index 0d368e0119..0000000000 --- a/src/nbl/asset/interchange/impl/SLoadSession.h +++ /dev/null @@ -1,74 +0,0 @@ -// Internal src-only header. -// Do not include from public headers. -#ifndef _NBL_ASSET_IMPL_S_LOAD_SESSION_H_INCLUDED_ -#define _NBL_ASSET_IMPL_S_LOAD_SESSION_H_INCLUDED_ - -#include "SFileAccess.h" -#include "SIODiagnostics.h" - -#include - - -namespace nbl::asset::impl -{ - -class SLoadSession -{ - public: - system::IFile* file = nullptr; - const SFileIOPolicy* requestedPolicy = nullptr; - SResolvedFileIOPolicy ioPlan = {}; - uint64_t payloadBytes = 0ull; - const char* owner = nullptr; - std::string fileName = {}; - - template - static inline bool begin(Logger& logger, const char* const owner, system::IFile* file, const SFileIOPolicy& ioPolicy, const uint64_t payloadBytes, const bool sizeKnown, SLoadSession& out) - { - out = {}; - if (!file) - return false; - - out.file = file; - out.requestedPolicy = &ioPolicy; - out.ioPlan = SFileAccess::resolvePlan(ioPolicy, payloadBytes, sizeKnown, file); - out.payloadBytes = payloadBytes; - out.owner = owner; - out.fileName = file->getFileName().string(); - return !SIODiagnostics::logInvalidPlan(logger, owner, out.fileName.c_str(), out.ioPlan); - } - - inline bool isWholeFile() const - { - return ioPlan.strategy == SResolvedFileIOPolicy::Strategy::WholeFile; - } - - inline const uint8_t* mappedPointer() const - { - if (!file || !isWholeFile()) - return nullptr; - return reinterpret_cast(static_cast(file)->getMappedPointer()); - } - - inline const uint8_t* readRange(const size_t offset, const size_t bytes, core::vector& storage, SFileReadTelemetry* const ioTelemetry = nullptr, const bool zeroTerminate = false) const - { - return SFileAccess::readRange(file, offset, bytes, storage, ioPlan, ioTelemetry, zeroTerminate); - } - - inline const uint8_t* mapOrReadWholeFile(core::vector& storage, SFileReadTelemetry* const ioTelemetry = nullptr, bool* const wasMapped = nullptr, const bool zeroTerminate = false) const - { - return SFileAccess::mapOrReadWholeFile(file, static_cast(payloadBytes), storage, ioPlan, ioTelemetry, wasMapped, zeroTerminate); - } - - template - inline void logTinyIO(Logger& logger, const Telemetry& telemetry, const char* const opName = "reads") const - { - if (!requestedPolicy) - return; - SIODiagnostics::logTinyIO(logger, owner, fileName.c_str(), telemetry, payloadBytes, *requestedPolicy, opName); - } -}; - -} - -#endif diff --git a/src/nbl/asset/interchange/impl/STextParse.h b/src/nbl/asset/interchange/impl/STextParse.h index 15d18e18cd..a560a04219 100644 --- a/src/nbl/asset/interchange/impl/STextParse.h +++ b/src/nbl/asset/interchange/impl/STextParse.h @@ -2,25 +2,17 @@ // Do not include from public headers. #ifndef _NBL_ASSET_IMPL_S_TEXT_PARSE_H_INCLUDED_ #define _NBL_ASSET_IMPL_S_TEXT_PARSE_H_INCLUDED_ - #include "nbl/core/string/stringutil.h" - -#include #include #include #include #include #include - #include - - namespace nbl::asset::impl { - struct TextParse { - // Small asset-agnostic text parsing helpers shared by src-only interchange code. template static inline bool parseNumber(const char*& ptr, const char* const end, T& out) { @@ -44,17 +36,10 @@ struct TextParse } template - static inline bool parseExactNumber(const char* const begin, const char* const end, T& out) - { - auto ptr = begin; - return parseNumber(ptr, end, out) && ptr == end; - } + static inline bool parseExactNumber(const char* const begin, const char* const end, T& out) { auto ptr = begin; return parseNumber(ptr, end, out) && ptr == end; } template - static inline bool parseExactNumber(const std::string_view token, T& out) - { - return parseExactNumber(token.data(), token.data() + token.size(), out); - } + static inline bool parseExactNumber(const std::string_view token, T& out) { return parseExactNumber(token.data(), token.data() + token.size(), out); } template static inline bool parseNonZeroNumber(const char*& ptr, const char* const end, T& out) @@ -79,15 +64,6 @@ struct TextParse ++ptr; } - static inline std::string_view trimInlineWhitespace(std::string_view token) - { - while (!token.empty() && isInlineWhitespace(token.front())) - token.remove_prefix(1ull); - while (!token.empty() && isInlineWhitespace(token.back())) - token.remove_suffix(1ull); - return token; - } - static inline std::string_view trimWhitespace(std::string_view token) { while (!token.empty() && core::isspace(token.front())) @@ -97,20 +73,6 @@ struct TextParse return token; } - static inline bool tokenEqualsIgnoreCase(const std::string_view lhs, const std::string_view rhs) - { - if (lhs.size() != rhs.size()) - return false; - for (size_t i = 0ull; i < lhs.size(); ++i) - { - const auto l = static_cast(std::tolower(static_cast(lhs[i]))); - const auto r = static_cast(std::tolower(static_cast(rhs[i]))); - if (l != r) - return false; - } - return true; - } - static inline std::optional readToken(const char*& cursor, const char* const end) { skipWhitespace(cursor, end); @@ -125,49 +87,6 @@ struct TextParse cursor = tokenEnd; return token; } - - struct Cursor - { - // Lightweight stateful wrapper for sequential token/number reads. - const char* current = nullptr; - const char* end = nullptr; - - inline Cursor() = default; - inline Cursor(const char* begin, const char* endPtr) : current(begin), end(endPtr) {} - - inline void skipWhitespace() - { - TextParse::skipWhitespace(current, end); - } - - inline void skipInlineWhitespace() - { - TextParse::skipInlineWhitespace(current, end); - } - - inline std::optional readToken() - { - return TextParse::readToken(current, end); - } - - template - inline bool parseNumber(T& out) - { - TextParse::skipWhitespace(current, end); - return TextParse::parseNumber(current, end, out); - } - - template - inline std::optional readNumber() - { - T value = {}; - if (!parseNumber(value)) - return std::nullopt; - return value; - } - }; }; - } - #endif From bd6f96cf1f3690c167c38fdc0c1968deeac4eca8 Mon Sep 17 00:00:00 2001 From: Arkadiusz Lachowicz Date: Sun, 8 Mar 2026 16:20:10 +0100 Subject: [PATCH 080/118] Reduce loader writer boilerplate --- .../asset/interchange/SGeometryWriterCommon.h | 18 - .../nbl/asset/interchange/SInterchangeIO.h | 13 +- .../asset/interchange/SLoaderRuntimeTuning.h | 65 +- .../asset/interchange/COBJMeshFileLoader.cpp | 5 - src/nbl/asset/interchange/COBJMeshWriter.cpp | 1 - .../asset/interchange/CPLYMeshFileLoader.cpp | 659 ++++++++---------- src/nbl/asset/interchange/CPLYMeshWriter.cpp | 44 +- src/nbl/asset/interchange/CSTLMeshWriter.cpp | 18 +- .../asset/interchange/SGeometryViewDecode.h | 10 +- 9 files changed, 309 insertions(+), 524 deletions(-) diff --git a/include/nbl/asset/interchange/SGeometryWriterCommon.h b/include/nbl/asset/interchange/SGeometryWriterCommon.h index fa6c15a5b9..6b88534a09 100644 --- a/include/nbl/asset/interchange/SGeometryWriterCommon.h +++ b/include/nbl/asset/interchange/SGeometryWriterCommon.h @@ -30,7 +30,6 @@ class SGeometryWriterCommon Container out = {}; if (!rootAsset) return out; - const auto identity = hlsl::math::linalg::identity(); auto appendFromCollection = [&](const ICPUGeometryCollection* collection, const hlsl::float32_t3x4& transform, const uint32_t instanceIx, const uint32_t targetIx) -> void { if (!collection) @@ -61,17 +60,14 @@ class SGeometryWriterCommon } return out; } - if (rootAsset->getAssetType() == IAsset::ET_GEOMETRY_COLLECTION) { appendFromCollection(static_cast(rootAsset), identity, ~0u, ~0u); return out; } - if (rootAsset->getAssetType() != IAsset::ET_SCENE) return out; const auto* scene = static_cast(rootAsset); - const auto& instances = scene->getInstances(); const auto& morphTargets = instances.getMorphTargets(); const auto& initialTransforms = instances.getInitialTransforms(); @@ -137,12 +133,10 @@ class SGeometryWriterCommon { if (!geom) return false; - const auto& positionView = geom->getPositionView(); const size_t vertexCount = positionView.getElementCount(); if (vertexCount == 0ull) return false; - auto visit = [&](const IndexT i0, const IndexT i1, const IndexT i2)->bool { const uint32_t u0 = static_cast(i0); @@ -150,7 +144,6 @@ class SGeometryWriterCommon const uint32_t u2 = static_cast(i2); if (u0 >= vertexCount || u1 >= vertexCount || u2 >= vertexCount) return false; - if constexpr (std::is_same_v, bool>) return visitor(u0, u1, u2); else @@ -165,21 +158,17 @@ class SGeometryWriterCommon { if ((vertexCount % 3ull) != 0ull) return false; - for (uint32_t i = 0u; i < vertexCount; i += 3u) if (!visit(i + 0u, i + 1u, i + 2u)) return false; return true; } - const size_t indexCount = indexView.getElementCount(); if ((indexCount % 3ull) != 0ull) return false; - const void* const src = indexView.getPointer(); if (!src) return false; - auto visitIndexed = [&]()->bool { const auto* indices = reinterpret_cast(src); @@ -208,11 +197,9 @@ class SGeometryWriterCommon { if (!dst || dst >= end) return end; - const auto result = std::to_chars(dst, end, value); if (result.ec == std::errc()) return result.ptr; - const int written = std::snprintf(dst, static_cast(end - dst), "%u", value); if (written <= 0) return dst; @@ -225,25 +212,20 @@ class SGeometryWriterCommon static inline char* appendFloatingPointToBuffer(char* dst, char* const end, const T value) { static_assert(std::is_same_v || std::is_same_v); - if (!dst || dst >= end) return end; - const auto result = std::to_chars(dst, end, value); if (result.ec == std::errc()) return result.ptr; - constexpr size_t FloatingPointScratchSize = std::numeric_limits::max_digits10 + 9ull; std::array scratch = {}; constexpr int Precision = std::numeric_limits::max_digits10; const int written = std::snprintf(scratch.data(), scratch.size(), "%.*g", Precision, static_cast(value)); if (written <= 0) return dst; - const size_t writeLen = static_cast(written); if (writeLen > static_cast(end - dst)) return end; - std::memcpy(dst, scratch.data(), writeLen); return dst + writeLen; } diff --git a/include/nbl/asset/interchange/SInterchangeIO.h b/include/nbl/asset/interchange/SInterchangeIO.h index 484be7d09f..aec1bf596d 100644 --- a/include/nbl/asset/interchange/SInterchangeIO.h +++ b/include/nbl/asset/interchange/SInterchangeIO.h @@ -41,12 +41,9 @@ class SInterchangeIO { if (payloadBytes <= bigPayloadThresholdBytes) return false; - const uint64_t minBytes = telemetry.getMinOrZero(); const uint64_t avgBytes = telemetry.getAvgOrZero(); - return - avgBytes < lowAvgBytesThreshold || - (minBytes < tinyChunkBytesThreshold && telemetry.callCount > tinyChunkCallsThreshold); + return avgBytes < lowAvgBytesThreshold || (minBytes < tinyChunkBytesThreshold && telemetry.callCount > tinyChunkCallsThreshold); } static inline bool isTinyIOTelemetryLikely(const STelemetry& telemetry, const uint64_t payloadBytes, const SFileIOPolicy& ioPolicy) { return isTinyIOTelemetryLikely(telemetry, payloadBytes, ioPolicy.runtimeTuning.tinyIoPayloadThresholdBytes, ioPolicy.runtimeTuning.tinyIoAvgBytesThreshold, ioPolicy.runtimeTuning.tinyIoMinBytesThreshold, ioPolicy.runtimeTuning.tinyIoMinCallCount); } static inline bool readFileExact(system::IFile* file, void* dst, const size_t offset, const size_t bytes, SReadTelemetry* ioTelemetry = nullptr) @@ -74,7 +71,6 @@ class SInterchangeIO *ioTime = std::chrono::duration_cast(clock_t::now() - ioStart); return ok; }; - if (!file || (!dst && bytes != 0ull)) return finalize(false); if (bytes == 0ull) @@ -117,7 +113,6 @@ class SInterchangeIO { if (!file) return false; - const uint64_t chunkSizeBytes = ioPlan.chunkSizeBytes(); for (const auto& buffer : buffers) { @@ -126,14 +121,10 @@ class SInterchangeIO if (buffer.byteCount == 0ull) continue; const auto* data = reinterpret_cast(buffer.data); - size_t writtenTotal = 0ull; while (writtenTotal < buffer.byteCount) { - const size_t toWrite = - ioPlan.strategy == SResolvedFileIOPolicy::Strategy::WholeFile ? - (buffer.byteCount - writtenTotal) : - static_cast(std::min(chunkSizeBytes, buffer.byteCount - writtenTotal)); + const size_t toWrite = ioPlan.strategy == SResolvedFileIOPolicy::Strategy::WholeFile ? (buffer.byteCount - writtenTotal) : static_cast(std::min(chunkSizeBytes, buffer.byteCount - writtenTotal)); system::IFile::success_t success; file->write(success, data + writtenTotal, fileOffset + writtenTotal, toWrite); if (!success) diff --git a/include/nbl/asset/interchange/SLoaderRuntimeTuning.h b/include/nbl/asset/interchange/SLoaderRuntimeTuning.h index 7aea972713..31c3fac3b4 100644 --- a/include/nbl/asset/interchange/SLoaderRuntimeTuning.h +++ b/include/nbl/asset/interchange/SLoaderRuntimeTuning.h @@ -71,7 +71,6 @@ struct SLoaderRuntimeTuner workers.emplace_back([&fn, workerIx]() { fn(workerIx); }); fn(0ull); } - static constexpr uint64_t ceilDiv(const uint64_t numerator, const uint64_t denominator) { return (numerator + denominator - 1ull) / denominator; } template @@ -80,7 +79,6 @@ struct SLoaderRuntimeTuner { if (!sampleData || sampleBytes == 0ull || workerCount == 0ull) return TimeUnit::zero(); - const uint32_t passCount = std::max(1u, passes); std::vector partial(workerCount, 0ull); uint64_t elapsedNs = 0ull; @@ -100,7 +98,6 @@ struct SLoaderRuntimeTuner }); elapsedNs += static_cast(std::chrono::duration_cast(clock_t::now() - passStart).count()); } - uint64_t reduced = 0ull; for (const uint64_t v : partial) reduced ^= v; @@ -114,11 +111,9 @@ struct SLoaderRuntimeTuner SBenchmarkSampleStats stats = {}; if (!sampleData || sampleBytes == 0ull || workerCount == 0ull) return stats; - const uint32_t observationCount = std::max(1u, observations); std::vector samples; samples.reserve(observationCount); - benchmarkSample(sampleData, sampleBytes, workerCount, 1u); for (uint32_t obsIx = 0u; obsIx < observationCount; ++obsIx) { @@ -128,10 +123,8 @@ struct SLoaderRuntimeTuner stats.totalNs += elapsedNs; samples.push_back(elapsedNs); } - if (samples.empty()) return SBenchmarkSampleStats{}; - std::sort(samples.begin(), samples.end()); stats.minNs = samples.front(); stats.maxNs = samples.back(); @@ -158,7 +151,6 @@ struct SLoaderRuntimeTuner } static inline bool shouldInlineHashBuild(const SFileIOPolicy& ioPolicy, const uint64_t inputBytes) { return inputBytes <= std::max(1ull, ioPolicy.runtimeTuning.hashInlineThresholdBytes); } - static inline size_t resolveHardwareThreads(const uint32_t requested = 0u) { const size_t hw = requested ? static_cast(requested) : static_cast(std::thread::hardware_concurrency()); return hw ? hw : 1ull; } static inline size_t resolveHardMaxWorkers(const size_t hardwareThreads, const uint32_t workerHeadroom) @@ -181,7 +173,6 @@ struct SLoaderRuntimeTuner result.chunkCount = 0ull; return result; } - const size_t hw = SLoaderRuntimeTuner::resolveHardwareThreads(request.hardwareThreads); size_t maxWorkers = hw; if (request.hardMaxWorkers > 0u) @@ -189,21 +180,16 @@ struct SLoaderRuntimeTuner if (ioPolicy.runtimeTuning.maxWorkers > 0u) maxWorkers = std::min(maxWorkers, static_cast(ioPolicy.runtimeTuning.maxWorkers)); maxWorkers = std::max(1ull, maxWorkers); - const uint64_t minWorkUnitsPerWorker = std::max(1ull, request.minWorkUnitsPerWorker); const uint64_t minBytesPerWorker = std::max(1ull, request.minBytesPerWorker); const size_t maxByWork = static_cast(SLoaderRuntimeTuner::ceilDiv(request.totalWorkUnits, minWorkUnitsPerWorker)); const size_t maxByBytes = request.inputBytes ? static_cast(SLoaderRuntimeTuner::ceilDiv(request.inputBytes, minBytesPerWorker)) : maxWorkers; const bool heuristicEnabled = ioPolicy.runtimeTuning.mode != RTMode::Sequential; const bool hybridEnabled = ioPolicy.runtimeTuning.mode == RTMode::Hybrid; - size_t workerCount = 1ull; if (heuristicEnabled) workerCount = std::max(1ull, std::min({ maxWorkers, maxByWork, maxByBytes })); - - const size_t targetChunksPerWorker = std::max( - 1ull, - static_cast(request.targetChunksPerWorker ? request.targetChunksPerWorker : ioPolicy.runtimeTuning.targetChunksPerWorker)); + const size_t targetChunksPerWorker = std::max(1ull, static_cast(request.targetChunksPerWorker ? request.targetChunksPerWorker : ioPolicy.runtimeTuning.targetChunksPerWorker)); if (workerCount > 1ull && heuristicEnabled) { const double maxOverheadRatio = std::max(0.0, static_cast(ioPolicy.runtimeTuning.maxOverheadRatio)); @@ -220,23 +206,11 @@ struct SLoaderRuntimeTuner break; } } - const size_t heuristicWorkerCount = std::max(1ull, workerCount); - if ( - heuristicEnabled && - hybridEnabled && - request.sampleData != nullptr && - request.sampleBytes > 0ull && - heuristicWorkerCount > 1ull && - maxWorkers > 1ull - ) + if (heuristicEnabled && hybridEnabled && request.sampleData != nullptr && request.sampleBytes > 0ull && heuristicWorkerCount > 1ull && maxWorkers > 1ull) { - const uint64_t autoMinSamplingWorkUnits = std::max( - static_cast(targetChunksPerWorker) * 8ull, - static_cast(maxWorkers * targetChunksPerWorker)); - const uint64_t minSamplingWorkUnits = request.sampleMinWorkUnits ? - request.sampleMinWorkUnits : - (ioPolicy.runtimeTuning.samplingMinWorkUnits ? ioPolicy.runtimeTuning.samplingMinWorkUnits : autoMinSamplingWorkUnits); + const uint64_t autoMinSamplingWorkUnits = std::max(static_cast(targetChunksPerWorker) * 8ull, static_cast(maxWorkers * targetChunksPerWorker)); + const uint64_t minSamplingWorkUnits = request.sampleMinWorkUnits ? request.sampleMinWorkUnits : (ioPolicy.runtimeTuning.samplingMinWorkUnits ? ioPolicy.runtimeTuning.samplingMinWorkUnits : autoMinSamplingWorkUnits); if (request.totalWorkUnits >= minSamplingWorkUnits) { const double samplingBudgetRatio = std::clamp(static_cast(ioPolicy.runtimeTuning.samplingBudgetRatio), 0.0, 0.5); @@ -247,17 +221,13 @@ struct SLoaderRuntimeTuner { if (request.inputBytes > 0ull) { - const uint64_t sampleDivisor = std::max( - 4ull, - static_cast(heuristicWorkerCount) * static_cast(targetChunksPerWorker)); + const uint64_t sampleDivisor = std::max(4ull, static_cast(heuristicWorkerCount) * static_cast(targetChunksPerWorker)); const uint64_t adaptiveSampleBytes = std::max(1ull, request.inputBytes / sampleDivisor); effectiveSampleBytes = std::min(effectiveSampleBytes, adaptiveSampleBytes); } - const uint32_t samplePasses = request.samplePasses ? request.samplePasses : ioPolicy.runtimeTuning.samplingPasses; uint32_t maxCandidates = request.sampleMaxCandidates ? request.sampleMaxCandidates : ioPolicy.runtimeTuning.samplingMaxCandidates; maxCandidates = std::max(2u, maxCandidates); - std::vector candidates; candidates.reserve(maxCandidates); appendCandidate(candidates, heuristicWorkerCount); @@ -269,14 +239,10 @@ struct SLoaderRuntimeTuner appendCandidate(candidates, heuristicWorkerCount + 2ull); if (candidates.size() > maxCandidates) candidates.resize(maxCandidates); - - const auto heuristicStatsProbe = benchmarkSampleStats( - request.sampleData, effectiveSampleBytes, heuristicWorkerCount, samplePasses, 2u); + const auto heuristicStatsProbe = benchmarkSampleStats(request.sampleData, effectiveSampleBytes, heuristicWorkerCount, samplePasses, 2u); if (heuristicStatsProbe.medianNs > 0ull) { - const double scale = request.inputBytes ? - (static_cast(request.inputBytes) / static_cast(effectiveSampleBytes)) : - 1.0; + const double scale = request.inputBytes ? (static_cast(request.inputBytes) / static_cast(effectiveSampleBytes)) : 1.0; const uint64_t estimatedFullNs = static_cast(static_cast(heuristicStatsProbe.medianNs) * std::max(1.0, scale)); const uint64_t samplingBudgetNs = static_cast(static_cast(estimatedFullNs) * samplingBudgetRatio); uint64_t spentNs = heuristicStatsProbe.totalNs; @@ -293,7 +259,6 @@ struct SLoaderRuntimeTuner SBenchmarkSampleStats bestStats = heuristicStatsProbe; size_t bestWorker = heuristicWorkerCount; - for (const size_t candidate : candidates) { if (candidate == heuristicWorkerCount) @@ -311,20 +276,14 @@ struct SLoaderRuntimeTuner bestWorker = candidate; } } - if (bestWorker != heuristicWorkerCount) { - const double gain = static_cast(heuristicStatsProbe.medianNs - bestStats.medianNs) / - static_cast(heuristicStatsProbe.medianNs); + const double gain = static_cast(heuristicStatsProbe.medianNs - bestStats.medianNs) / static_cast(heuristicStatsProbe.medianNs); const uint64_t heuristicSpan = heuristicStatsProbe.maxNs - heuristicStatsProbe.minNs; const uint64_t bestSpan = bestStats.maxNs - bestStats.minNs; - const double heuristicNoise = static_cast(heuristicSpan) / - static_cast(std::max(1ull, heuristicStatsProbe.medianNs)); - const double bestNoise = static_cast(bestSpan) / - static_cast(std::max(1ull, bestStats.medianNs)); - const double requiredGain = std::max( - std::clamp(static_cast(ioPolicy.runtimeTuning.minExpectedGainRatio), 0.0, 0.99), - std::clamp(std::max(heuristicNoise, bestNoise) * 1.25, 0.0, 0.99)); + const double heuristicNoise = static_cast(heuristicSpan) / static_cast(std::max(1ull, heuristicStatsProbe.medianNs)); + const double bestNoise = static_cast(bestSpan) / static_cast(std::max(1ull, bestStats.medianNs)); + const double requiredGain = std::max(std::clamp(static_cast(ioPolicy.runtimeTuning.minExpectedGainRatio), 0.0, 0.99), std::clamp(std::max(heuristicNoise, bestNoise) * 1.25, 0.0, 0.99)); if (gain >= requiredGain) workerCount = bestWorker; } @@ -333,9 +292,7 @@ struct SLoaderRuntimeTuner } } } - result.workerCount = std::max(1ull, workerCount); - const uint64_t minChunkWorkUnits = std::max(1ull, request.minChunkWorkUnits); uint64_t maxChunkWorkUnits = std::max(minChunkWorkUnits, request.maxChunkWorkUnits); const uint64_t desiredChunkCount = static_cast(std::max(1ull, result.workerCount * targetChunksPerWorker)); diff --git a/src/nbl/asset/interchange/COBJMeshFileLoader.cpp b/src/nbl/asset/interchange/COBJMeshFileLoader.cpp index 318dbb1697..cc4f57c248 100644 --- a/src/nbl/asset/interchange/COBJMeshFileLoader.cpp +++ b/src/nbl/asset/interchange/COBJMeshFileLoader.cpp @@ -931,14 +931,9 @@ asset::SAssetBundle COBJMeshFileLoader::loadAsset( core::vector> outputAssets; uint64_t objectCount = 1ull; if (!buildCollections) { - // Plain OBJ is still just one polygon geometry here. outputAssets.push_back(core::smart_refctd_ptr_static_cast( std::move(loadedGeometries.front().geometry))); } else { - // Plain OBJ can group many polygon geometries with `o` and `g`, but it - // still does not define a real scene graph, instancing, or node transforms. - // Keep that as geometry collections instead of fabricating an ICPUScene on - // load. core::vector objectNames; core::vector> objectCollections; diff --git a/src/nbl/asset/interchange/COBJMeshWriter.cpp b/src/nbl/asset/interchange/COBJMeshWriter.cpp index 43c4e3d98b..05bc4f5fad 100644 --- a/src/nbl/asset/interchange/COBJMeshWriter.cpp +++ b/src/nbl/asset/interchange/COBJMeshWriter.cpp @@ -192,7 +192,6 @@ bool COBJMeshWriter::writeAsset(system::IFile* _file, const SAssetWriteParams& _ if (!_file || !_params.rootAsset) return false; - // Scene input is flattened here by baking transforms and writing every collected polygon geometry as its own OBJ object block. const auto items = SGeometryWriterCommon::collectPolygonGeometryWriteItems(_params.rootAsset); if (items.empty()) return false; diff --git a/src/nbl/asset/interchange/CPLYMeshFileLoader.cpp b/src/nbl/asset/interchange/CPLYMeshFileLoader.cpp index a302bd9c01..3376c76082 100644 --- a/src/nbl/asset/interchange/CPLYMeshFileLoader.cpp +++ b/src/nbl/asset/interchange/CPLYMeshFileLoader.cpp @@ -181,332 +181,295 @@ struct Parse _ctx.getNextLine(); } - // name of the element. We only want "vertex" and "face" elements - // but we have to parse the others anyway. - std::string Name; - // Properties of this element - core::vector Properties; - // The number of elements in the file - size_t Count; - // known size in bytes, 0 if unknown - uint32_t KnownSize; - }; + std::string Name; + core::vector Properties; + size_t Count; + uint32_t KnownSize; + }; - static constexpr size_t DefaultIoReadWindowBytes = 50ull << 10; + static constexpr size_t DefaultIoReadWindowBytes = 50ull << 10; - void init(size_t _ioReadWindowSize = DefaultIoReadWindowBytes) { - ioReadWindowSize = - std::max(_ioReadWindowSize, DefaultIoReadWindowBytes); - Buffer.resize(ioReadWindowSize + ReadWindowPaddingBytes, '\0'); - EndPointer = StartPointer = Buffer.data(); - LineEndPointer = EndPointer - 1; + void init(size_t _ioReadWindowSize = DefaultIoReadWindowBytes) + { + ioReadWindowSize = std::max(_ioReadWindowSize, DefaultIoReadWindowBytes); + Buffer.resize(ioReadWindowSize + ReadWindowPaddingBytes, '\0'); + EndPointer = StartPointer = Buffer.data(); + LineEndPointer = EndPointer - 1; + fillBuffer(); + } - fillBuffer(); - } + void fillBuffer() + { + if (EndOfFile) + return; + if (fileOffset >= inner.mainFile->getSize()) + { + EndOfFile = true; + return; + } + const auto length = std::distance(StartPointer, EndPointer); + auto newStart = Buffer.data(); + if (length && StartPointer != newStart) + memmove(newStart, StartPointer, length); + StartPointer = newStart; + EndPointer = newStart + length; + const size_t usableBufferSize = Buffer.size() > 0ull ? Buffer.size() - ReadWindowPaddingBytes : 0ull; + if (usableBufferSize <= length) + { + EndOfFile = true; + return; + } + const size_t requestSize = usableBufferSize - length; + system::IFile::success_t success; + inner.mainFile->read(success, EndPointer, fileOffset, requestSize); + const size_t bytesRead = success.getBytesProcessed(); + ++readCallCount; + readBytesTotal += bytesRead; + if (bytesRead < readMinBytes) + readMinBytes = bytesRead; + fileOffset += bytesRead; + EndPointer += bytesRead; + if (bytesRead != requestSize) + { + *EndPointer = 0; + EndOfFile = true; + } + } - // gets more data from the file - void fillBuffer() { - if (EndOfFile) - return; - else if (fileOffset >= inner.mainFile->getSize()) { - EndOfFile = true; - return; - } + const char* getNextLine() + { + StartPointer = LineEndPointer + 1; + if (*StartPointer == '\n') + *(StartPointer++) = '\0'; + const std::array Terminators = {'\0', '\r', '\n'}; + auto terminator = std::find_first_of(StartPointer, EndPointer, Terminators.begin(), Terminators.end()); + if (terminator != EndPointer) + *(terminator++) = '\0'; + if (terminator == EndPointer) + { + if (EndOfFile) + { + StartPointer = EndPointer - 1; + *StartPointer = '\0'; + return StartPointer; + } + fillBuffer(); + LineEndPointer = StartPointer - 1; + return StartPointer != EndPointer ? getNextLine() : StartPointer; + } + LineEndPointer = terminator - 1; + WordLength = -1; + return StartPointer; + } - const auto length = std::distance(StartPointer, EndPointer); - auto newStart = Buffer.data(); - // copy the remaining data to the start of the buffer - if (length && StartPointer != newStart) - memmove(newStart, StartPointer, length); - // reset start position - StartPointer = newStart; - EndPointer = newStart + length; - - // read data from the file - const size_t usableBufferSize = - Buffer.size() > 0ull ? Buffer.size() - ReadWindowPaddingBytes : 0ull; - if (usableBufferSize <= length) { - EndOfFile = true; - return; - } - const size_t requestSize = usableBufferSize - length; - system::IFile::success_t success; - inner.mainFile->read(success, EndPointer, fileOffset, requestSize); - const size_t bytesRead = success.getBytesProcessed(); - ++readCallCount; - readBytesTotal += bytesRead; - if (bytesRead < readMinBytes) - readMinBytes = bytesRead; - fileOffset += bytesRead; - EndPointer += bytesRead; - - // if we didn't completely fill the buffer - if (bytesRead != requestSize) { - // cauterize the string - *EndPointer = 0; - EndOfFile = true; - } - } - // Split the string data into a line in place by terminating it instead of - // copying. - const char* getNextLine() { - // move the start pointer along - StartPointer = LineEndPointer + 1; - - // crlf split across buffer move - if (*StartPointer == '\n') - *(StartPointer++) = '\0'; - - // begin at the start of the next line - const std::array Terminators = {'\0', '\r', '\n'}; - auto terminator = std::find_first_of( - StartPointer, EndPointer, Terminators.begin(), Terminators.end()); - if (terminator != EndPointer) - *(terminator++) = '\0'; - - // we have reached the end of the buffer - if (terminator == EndPointer) { - // get data from the file - if (EndOfFile) { - StartPointer = EndPointer - 1; - *StartPointer = '\0'; - return StartPointer; - } else { - fillBuffer(); - // reset line end pointer - LineEndPointer = StartPointer - 1; - if (StartPointer != EndPointer) - return getNextLine(); - else - return StartPointer; - } - } else { - LineEndPointer = terminator - 1; - WordLength = -1; - // return pointer to the start of the line - return StartPointer; - } - } - // null terminate the next word on the previous line and move the next word - // pointer along since we already have a full line in the buffer, we never - // need to retrieve more data - const char* getNextWord() { - // move the start pointer along - StartPointer += WordLength + 1; - if (StartPointer >= EndPointer) { - if (EndOfFile) { - WordLength = -1; - return EndPointer; - } - getNextLine(); - } + const char* getNextWord() + { + StartPointer += WordLength + 1; + if (StartPointer >= EndPointer) + { + if (EndOfFile) + { + WordLength = -1; + return EndPointer; + } + getNextLine(); + } + if (StartPointer < EndPointer && !*StartPointer) + getNextLine(); + if (StartPointer >= LineEndPointer) + { + WordLength = -1; + return StartPointer; + } + assert(LineEndPointer <= EndPointer); + const std::array WhiteSpace = {'\0', ' ', '\t'}; + auto wordEnd = std::find_first_of(StartPointer, LineEndPointer, WhiteSpace.begin(), WhiteSpace.end()); + if (wordEnd != LineEndPointer) + *(wordEnd++) = '\0'; + auto nextWord = std::find_if(wordEnd, LineEndPointer, [WhiteSpace](const char c) -> bool { return std::find(WhiteSpace.begin(), WhiteSpace.end(), c) == WhiteSpace.end(); }); + WordLength = std::distance(StartPointer, nextWord) - 1; + return StartPointer; + } - if (StartPointer < EndPointer && !*StartPointer) - getNextLine(); + size_t getAbsoluteOffset(const char* ptr) const + { + if (!ptr || ptr > EndPointer) + return fileOffset; + const size_t trailingBytes = static_cast(EndPointer - ptr); + return fileOffset >= trailingBytes ? (fileOffset - trailingBytes) : 0ull; + } - if (StartPointer >= LineEndPointer) { - WordLength = -1; // - return StartPointer; - } - // process the next word - { - assert(LineEndPointer <= EndPointer); - const std::array WhiteSpace = {'\0', ' ', '\t'}; - auto wordEnd = std::find_first_of(StartPointer, LineEndPointer, - WhiteSpace.begin(), WhiteSpace.end()); - // null terminate the next word - if (wordEnd != LineEndPointer) - *(wordEnd++) = '\0'; - // find next word - auto notWhiteSpace = [WhiteSpace](const char c) -> bool { - return std::find(WhiteSpace.begin(), WhiteSpace.end(), c) == - WhiteSpace.end(); - }; - auto nextWord = std::find_if(wordEnd, LineEndPointer, notWhiteSpace); - WordLength = std::distance(StartPointer, nextWord) - 1; - } - // return pointer to the start of current word - return StartPointer; - } - size_t getAbsoluteOffset(const char* ptr) const { - if (!ptr || ptr > EndPointer) - return fileOffset; - const size_t trailingBytes = static_cast(EndPointer - ptr); - return fileOffset >= trailingBytes ? (fileOffset - trailingBytes) : 0ull; - } - void useMappedBinaryWindow(const char* data, const size_t sizeBytes) { - if (!data) - return; - StartPointer = const_cast(data); - EndPointer = StartPointer + sizeBytes; - LineEndPointer = StartPointer - 1; - WordLength = -1; - EndOfFile = true; - fileOffset = inner.mainFile ? inner.mainFile->getSize() : fileOffset; - } - // skips x bytes in the file, getting more data if required - void moveForward(const size_t bytes) { - assert(IsBinaryFile); - size_t remaining = bytes; - if (remaining == 0ull) - return; - - const size_t availableInitially = - EndPointer > StartPointer - ? static_cast(EndPointer - StartPointer) - : 0ull; - if (remaining > availableInitially) { - remaining -= availableInitially; - StartPointer = EndPointer; - if (remaining > ioReadWindowSize) { - const size_t fileSize = inner.mainFile->getSize(); - const size_t fileRemaining = - fileSize > fileOffset ? (fileSize - fileOffset) : 0ull; - const size_t directSkip = std::min(remaining, fileRemaining); - fileOffset += directSkip; - remaining -= directSkip; - } - } + void useMappedBinaryWindow(const char* data, const size_t sizeBytes) + { + if (!data) + return; + StartPointer = const_cast(data); + EndPointer = StartPointer + sizeBytes; + LineEndPointer = StartPointer - 1; + WordLength = -1; + EndOfFile = true; + fileOffset = inner.mainFile ? inner.mainFile->getSize() : fileOffset; + } - while (remaining) { - if (StartPointer >= EndPointer) { - fillBuffer(); - if (StartPointer >= EndPointer) - return; - } + void moveForward(const size_t bytes) + { + assert(IsBinaryFile); + size_t remaining = bytes; + if (remaining == 0ull) + return; + const size_t availableInitially = EndPointer > StartPointer ? static_cast(EndPointer - StartPointer) : 0ull; + if (remaining > availableInitially) + { + remaining -= availableInitially; + StartPointer = EndPointer; + if (remaining > ioReadWindowSize) + { + const size_t fileSize = inner.mainFile->getSize(); + const size_t fileRemaining = fileSize > fileOffset ? (fileSize - fileOffset) : 0ull; + const size_t directSkip = std::min(remaining, fileRemaining); + fileOffset += directSkip; + remaining -= directSkip; + } + } + while (remaining) + { + if (StartPointer >= EndPointer) + { + fillBuffer(); + if (StartPointer >= EndPointer) + return; + } + const size_t available = static_cast(EndPointer - StartPointer); + const size_t step = std::min(available, remaining); + StartPointer += step; + remaining -= step; + } + } - const size_t available = static_cast(EndPointer - StartPointer); - const size_t step = std::min(available, remaining); - StartPointer += step; - remaining -= step; - } - } + using widest_int_t = uint32_t; - // read the next int from the file and move the start pointer along - using widest_int_t = uint32_t; - widest_int_t getInt(const E_FORMAT f) { - assert(!isFloatingPointFormat(f)); - if (IsBinaryFile) { - if (StartPointer + sizeof(widest_int_t) > EndPointer) - fillBuffer(); + const char* getCurrentWordEnd(const char* word) const + { + const size_t tokenLen = WordLength >= 0 ? static_cast(WordLength + 1) : std::char_traits::length(word); + return word + tokenLen; + } - switch (getTexelOrBlockBytesize(f)) { - case 1: - if (StartPointer + sizeof(int8_t) > EndPointer) - break; - return *(StartPointer++); - case 2: { - if (StartPointer + sizeof(int16_t) > EndPointer) - break; - const auto retval = Binary::loadUnaligned(StartPointer, IsWrongEndian); - StartPointer += sizeof(int16_t); - return retval; - } - case 4: { - if (StartPointer + sizeof(int32_t) > EndPointer) - break; - const auto retval = Binary::loadUnaligned(StartPointer, IsWrongEndian); - StartPointer += sizeof(int32_t); - return retval; - } - default: - assert(false); - break; - } - return 0; - } - const char* word = getNextWord(); - if (!word) - return 0u; - const size_t tokenLen = WordLength >= 0 - ? static_cast(WordLength + 1) - : std::char_traits::length(word); - const char* const wordEnd = word + tokenLen; - if (word == wordEnd) - return 0u; - - if (isSignedFormat(f)) { - int64_t value = 0; - auto ptr = word; - if (Common::parseNumber(ptr, wordEnd, value) && - ptr == wordEnd) - return static_cast(value); - if (ptr != word) - return static_cast(value); - return 0u; - } else { - uint64_t value = 0u; - auto ptr = word; - if (Common::parseNumber(ptr, wordEnd, value) && - ptr == wordEnd) - return static_cast(value); - if (ptr != word) - return static_cast(value); - return 0u; - } - } - // read the next float from the file and move the start pointer along - hlsl::float64_t getFloat(const E_FORMAT f) { - assert(isFloatingPointFormat(f)); - if (IsBinaryFile) { - if (StartPointer + sizeof(hlsl::float64_t) > EndPointer) - fillBuffer(); + widest_int_t getInt(const E_FORMAT f) + { + assert(!isFloatingPointFormat(f)); + if (IsBinaryFile) + { + if (StartPointer + sizeof(widest_int_t) > EndPointer) + fillBuffer(); + switch (getTexelOrBlockBytesize(f)) + { + case 1: + if (StartPointer + sizeof(int8_t) <= EndPointer) + return *(StartPointer++); + break; + case 2: + if (StartPointer + sizeof(int16_t) <= EndPointer) + { + const auto retval = Binary::loadUnaligned(StartPointer, IsWrongEndian); + StartPointer += sizeof(int16_t); + return retval; + } + break; + case 4: + if (StartPointer + sizeof(int32_t) <= EndPointer) + { + const auto retval = Binary::loadUnaligned(StartPointer, IsWrongEndian); + StartPointer += sizeof(int32_t); + return retval; + } + break; + default: + assert(false); + break; + } + return 0u; + } + const char* word = getNextWord(); + if (!word) + return 0u; + const char* const wordEnd = getCurrentWordEnd(word); + if (word == wordEnd) + return 0u; + auto parseInt = [&](auto& value) -> widest_int_t + { + auto ptr = word; + if (Common::parseNumber(ptr, wordEnd, value) && ptr == wordEnd) + return static_cast(value); + return ptr != word ? static_cast(value) : 0u; + }; + if (isSignedFormat(f)) + { + int64_t value = 0; + return parseInt(value); + } + uint64_t value = 0u; + return parseInt(value); + } - switch (getTexelOrBlockBytesize(f)) { - case 4: { - if (StartPointer + sizeof(hlsl::float32_t) > EndPointer) - break; - const auto retval = Binary::loadUnaligned(StartPointer, IsWrongEndian); - StartPointer += sizeof(hlsl::float32_t); - return retval; - } - case 8: { - if (StartPointer + sizeof(hlsl::float64_t) > EndPointer) - break; - const auto retval = Binary::loadUnaligned(StartPointer, IsWrongEndian); - StartPointer += sizeof(hlsl::float64_t); - return retval; - } - default: - assert(false); - break; - } - return 0; - } - const char* word = getNextWord(); - if (!word) - return 0.0; - const size_t tokenLen = WordLength >= 0 - ? static_cast(WordLength + 1) - : std::char_traits::length(word); - const char* const wordEnd = word + tokenLen; - if (word == wordEnd) - return 0.0; - - hlsl::float64_t value = 0.0; - auto ptr = word; - if (Common::parseNumber(ptr, wordEnd, value) && - ptr == wordEnd) - return value; - if (ptr != word) - return value; - return 0.0; - } - // read the next thing from the file and move the start pointer along - void getData(void* dst, const E_FORMAT f) { - const auto size = getTexelOrBlockBytesize(f); - if (StartPointer + size > EndPointer) { - fillBuffer(); - if (StartPointer + size > EndPointer) - return; - } - if (IsWrongEndian) - std::reverse_copy(StartPointer, StartPointer + size, - reinterpret_cast(dst)); - else - memcpy(dst, StartPointer, size); - StartPointer += size; - } + hlsl::float64_t getFloat(const E_FORMAT f) + { + assert(isFloatingPointFormat(f)); + if (IsBinaryFile) + { + if (StartPointer + sizeof(hlsl::float64_t) > EndPointer) + fillBuffer(); + switch (getTexelOrBlockBytesize(f)) + { + case 4: + if (StartPointer + sizeof(hlsl::float32_t) <= EndPointer) + { + const auto retval = Binary::loadUnaligned(StartPointer, IsWrongEndian); + StartPointer += sizeof(hlsl::float32_t); + return retval; + } + break; + case 8: + if (StartPointer + sizeof(hlsl::float64_t) <= EndPointer) + { + const auto retval = Binary::loadUnaligned(StartPointer, IsWrongEndian); + StartPointer += sizeof(hlsl::float64_t); + return retval; + } + break; + default: + assert(false); + break; + } + return 0.0; + } + const char* word = getNextWord(); + if (!word) + return 0.0; + const char* const wordEnd = getCurrentWordEnd(word); + if (word == wordEnd) + return 0.0; + hlsl::float64_t value = 0.0; + auto ptr = word; + if (Common::parseNumber(ptr, wordEnd, value) && ptr == wordEnd) + return value; + return ptr != word ? value : 0.0; + } + + void getData(void* dst, const E_FORMAT f) + { + const auto size = getTexelOrBlockBytesize(f); + if (StartPointer + size > EndPointer) + { + fillBuffer(); + if (StartPointer + size > EndPointer) + return; + } + if (IsWrongEndian) + std::reverse_copy(StartPointer, StartPointer + size, reinterpret_cast(dst)); + else + memcpy(dst, StartPointer, size); + StartPointer += size; + } struct SVertAttrIt { uint8_t* ptr; uint32_t stride; @@ -1239,22 +1202,18 @@ struct Parse IAssetLoader::SAssetLoadContext inner; uint32_t topHierarchyLevel; IAssetLoader::IAssetLoaderOverride* loaderOverride; - // input buffer must be at least twice as long as the longest line in the - // file core::vector Buffer; size_t ioReadWindowSize = DefaultIoReadWindowBytes; core::vector ElementList = {}; char *StartPointer = nullptr, *EndPointer = nullptr, *LineEndPointer = nullptr; int32_t LineLength = 0; - int32_t WordLength = -1; // this variable is a misnomer, its really the - // offset to next word minus one + int32_t WordLength = -1; bool IsBinaryFile = false, IsWrongEndian = false, EndOfFile = false; size_t fileOffset = {}; uint64_t readCallCount = 0ull; uint64_t readBytesTotal = 0ull; uint64_t readMinBytes = std::numeric_limits::max(); - // core::vector vertAttrIts; }; }; @@ -1339,16 +1298,10 @@ SAssetBundle CPLYMeshFileLoader::loadAsset( fileSize > (Parse::Context::DefaultIoReadWindowBytes * 2ull)) desiredReadWindow = Parse::Context::DefaultIoReadWindowBytes; } - const uint64_t safeReadWindow = std::min( - desiredReadWindow, - static_cast(std::numeric_limits::max() - - Parse::Context::ReadWindowPaddingBytes)); + const uint64_t safeReadWindow = std::min(desiredReadWindow, static_cast(std::numeric_limits::max() - Parse::Context::ReadWindowPaddingBytes)); ctx.init(static_cast(safeReadWindow)); - - // start with empty mesh auto geometry = make_smart_refctd_ptr(); - hlsl::shapes::util::AABBAccumulator3 parsedAABB = - hlsl::shapes::util::createAABBAccumulator(); + hlsl::shapes::util::AABBAccumulator3 parsedAABB = hlsl::shapes::util::createAABBAccumulator(); uint32_t vertCount = 0; Parse::ContentHashBuild contentHashBuild = Parse::ContentHashBuild::create(computeContentHashes, hashInBuild); auto visitVertexAttributeViews = [&](auto&& visitor) -> void { @@ -1382,18 +1335,13 @@ SAssetBundle CPLYMeshFileLoader::loadAsset( contentHashBuild.tryDefer(view.src.buffer.get()); }; - // Currently only supports ASCII or binary meshes if (Parse::toStringView(ctx.getNextLine()) != "ply") { _params.logger.log("Not a valid PLY file %s", system::ILogger::ELL_ERROR, ctx.inner.mainFile->getFileName().string().c_str()); return {}; } - - // cut the next line out ctx.getNextLine(); - // grab the word from this line const char* word = ctx.getNextWord(); - // ignore comments for (; Parse::toStringView(word) == "comment"; ctx.getNextLine()) word = ctx.getNextWord(); @@ -1411,10 +1359,7 @@ SAssetBundle CPLYMeshFileLoader::loadAsset( _params.logger.log("PLY property token found before element %s", system::ILogger::ELL_WARNING, word); } else { - // get element auto& el = ctx.ElementList.back(); - - // fill property struct auto& prop = el.Properties.emplace_back(); prop.type = prop.getType(word); if (prop.type == EF_UNKNOWN) { @@ -1463,10 +1408,7 @@ SAssetBundle CPLYMeshFileLoader::loadAsset( if (el.Name == "vertex") vertCount = el.Count; } else if (wordView == "comment") { - // ignore line - } - // must be `format {binary_little_endian|binary_big_endian|ascii} 1.0` - else if (wordView == "format") { + } else if (wordView == "format") { word = ctx.getNextWord(); const std::string_view formatView = Parse::toStringView(word); @@ -1477,7 +1419,6 @@ SAssetBundle CPLYMeshFileLoader::loadAsset( ctx.IsWrongEndian = true; } else if (formatView == "ascii") { } else { - // abort if this isn't an ascii or a binary mesh _params.logger.log("Unsupported PLY mesh format %s", system::ILogger::ELL_ERROR, word); continueReading = false; @@ -1515,15 +1456,10 @@ SAssetBundle CPLYMeshFileLoader::loadAsset( word = ctx.getNextWord(); } } while (readingHeader && continueReading); - // if (!continueReading) return {}; - - // now to read the actual data from the file using index_t = uint32_t; core::vector indices = {}; - - // loop through each of the elements bool verticesProcessed = false; const std::string fileName = _file->getFileName().string(); auto logMalformedElement = [&](const char* const elementName) -> void { @@ -1563,9 +1499,7 @@ SAssetBundle CPLYMeshFileLoader::loadAsset( for (uint32_t i = 0; i < ctx.ElementList.size(); ++i) { auto& el = ctx.ElementList[i]; - if (el.Name == "vertex") // multiple vertex elements are currently treated - // as unsupported - { + if (el.Name == "vertex") { if (verticesProcessed) { _params.logger.log("Multiple `vertex` elements not supported!", system::ILogger::ELL_ERROR); @@ -1576,14 +1510,9 @@ SAssetBundle CPLYMeshFileLoader::loadAsset( core::vector extraViews; for (auto& vertexProperty : el.Properties) { const auto& propertyName = vertexProperty.Name; - // only positions and normals need to be structured/canonicalized in any - // way - auto negotiateFormat = - [&vertexProperty](ICPUPolygonGeometry::SDataViewBase& view, - const uint8_t component) -> void { + auto negotiateFormat = [&vertexProperty](ICPUPolygonGeometry::SDataViewBase& view, const uint8_t component) -> void { assert(getFormatChannelCount(vertexProperty.type) != 0); - if (getTexelOrBlockBytesize(vertexProperty.type) > - getTexelOrBlockBytesize(view.format)) + if (getTexelOrBlockBytesize(vertexProperty.type) > getTexelOrBlockBytesize(view.format)) view.format = vertexProperty.type; view.stride = hlsl::max(view.stride, component); }; @@ -1603,19 +1532,14 @@ SAssetBundle CPLYMeshFileLoader::loadAsset( negotiateFormat(uvView, 0); else if (propertyName == "v" || propertyName == "t") negotiateFormat(uvView, 1); - else { - // property names for extra channels are currently not persisted in - // metadata + else extraViews.push_back(createView(vertexProperty.type, el.Count)); - } } - auto setFinalFormat = - [&ctx](ICPUPolygonGeometry::SDataViewBase& view) -> void { + auto setFinalFormat = [&ctx](ICPUPolygonGeometry::SDataViewBase& view) -> void { const auto componentFormat = view.format; const auto componentCount = view.stride + 1; view.format = Parse::expandStructuredFormat(view.format, componentCount); view.stride = getTexelOrBlockBytesize(view.format); - // for (auto c = 0u; c < componentCount; c++) { size_t offset = getTexelOrBlockBytesize(componentFormat) * c; ctx.vertAttrIts.push_back({.ptr = reinterpret_cast(offset), @@ -1640,16 +1564,12 @@ SAssetBundle CPLYMeshFileLoader::loadAsset( auxViews->resize(Parse::UV0 + 1u); auxViews->operator[](Parse::UV0) = std::move(view); }); - // for (auto& view : extraViews) - ctx.vertAttrIts.push_back( - {.ptr = reinterpret_cast(view.src.buffer->getPointer()) + - view.src.offset, - .stride = getTexelOrBlockBytesize(view.composed.format), - .dstFmt = view.composed.format}); + ctx.vertAttrIts.push_back({.ptr = reinterpret_cast(view.src.buffer->getPointer()) + view.src.offset, + .stride = getTexelOrBlockBytesize(view.composed.format), + .dstFmt = view.composed.format}); for (auto& view : extraViews) geometry->getAuxAttributeViews()->push_back(std::move(view)); - // loop through vertex properties const auto fastVertexResult = ctx.readVertexElementFast(el, &parsedAABB); if (fastVertexResult == Parse::Context::EFastVertexReadResult::Success) { ++fastVertexElementCount; @@ -1679,7 +1599,6 @@ SAssetBundle CPLYMeshFileLoader::loadAsset( const uint64_t indexCount = static_cast(indices.size()); if (indices.empty()) { - // no index buffer means point cloud geometry->setIndexing(IPolygonGeometryBase::PointList()); } else { if (vertCount != 0u && maxIndexRead >= vertCount) { diff --git a/src/nbl/asset/interchange/CPLYMeshWriter.cpp b/src/nbl/asset/interchange/CPLYMeshWriter.cpp index 2c1a768797..833f636f43 100644 --- a/src/nbl/asset/interchange/CPLYMeshWriter.cpp +++ b/src/nbl/asset/interchange/CPLYMeshWriter.cpp @@ -207,7 +207,6 @@ struct Parse struct BinarySink { uint8_t* cursor = nullptr; - template inline bool append(const T value) { @@ -216,17 +215,12 @@ struct Parse Binary::storeUnalignedAdvance(cursor, value); return true; } - - inline bool finishVertex() - { - return true; - } + inline bool finishVertex() { return true; } }; struct TextSink { std::string& output; - template inline bool append(const T value) { @@ -237,12 +231,7 @@ struct Parse output.push_back(' '); return true; } - - inline bool finishVertex() - { - output.push_back('\n'); - return true; - } + inline bool finishVertex() { output.push_back('\n'); return true; } }; template @@ -255,16 +244,8 @@ struct Parse SemanticDecode semantic = {}; StoredDecode stored = {}; EmitFn emit = nullptr; - - inline explicit operator bool() const - { - return emit != nullptr && (static_cast(semantic) || static_cast(stored)); - } - - inline bool operator()(Sink& sink, const size_t ix) const - { - return static_cast(*this) && emit(sink, *this, ix); - } + inline explicit operator bool() const { return emit != nullptr && (static_cast(semantic) || static_cast(stored)); } + inline bool operator()(Sink& sink, const size_t ix) const { return static_cast(*this) && emit(sink, *this, ix); } template static bool emitDecode(Sink& sink, const auto& decode, const size_t ix, const uint32_t components, const bool flipVectors) @@ -291,8 +272,7 @@ struct Parse { if constexpr (Mode == SGeometryViewDecode::EMode::Semantic) return emitDecode(sink, view.semantic, ix, view.components, view.flipVectors); - else - return emitDecode(sink, view.stored, ix, view.components, view.flipVectors); + return emitDecode(sink, view.stored, ix, view.components, view.flipVectors); } template @@ -311,7 +291,6 @@ struct Parse PreparedView retval = {.components = components}; if (!view) return retval; - switch (scalarType) { case ScalarType::Float64: prepareDecode(retval, *view, flipVectors); break; @@ -332,7 +311,6 @@ struct Parse { if (!input.geom || !input.extraAuxViews) return false; - const auto& positionView = input.geom->getPositionView(); const auto& normalView = input.geom->getNormalView(); const auto& extraAuxViews = *input.extraAuxViews; @@ -367,7 +345,6 @@ struct Parse BinarySink sink = {.cursor = dst}; if (!emitVertices(input, sink)) return false; - return SGeometryWriterCommon::visitTriangleIndices(input.geom, [&](const uint32_t i0, const uint32_t i1, const uint32_t i2) -> bool { if (!sink.append(static_cast(3u))) return false; @@ -387,7 +364,6 @@ struct Parse TextSink sink = {.output = output}; if (!emitVertices(input, sink)) return false; - return SGeometryWriterCommon::visitTriangleIndices(input.geom, [&](const uint32_t i0, const uint32_t i1, const uint32_t i2) { output.append("3 "); appendIntegral(output, i0); @@ -415,7 +391,6 @@ bool CPLYMeshWriter::writeAsset(system::IFile* _file, const SAssetWriteParams& _ _params.logger.log("PLY writer: missing output file or root asset.", system::ILogger::ELL_ERROR); return false; } - const auto items = SGeometryWriterCommon::collectPolygonGeometryWriteItems(_params.rootAsset); if (items.size() != 1u) { @@ -434,7 +409,6 @@ bool CPLYMeshWriter::writeAsset(system::IFile* _file, const SAssetWriteParams& _ _params.logger.log("PLY writer: transformed scene or collection export is not supported.", system::ILogger::ELL_ERROR); return false; } - SAssetWriteContext ctx = {_params, _file}; system::IFile* file = _override->getOutputFile(_file, ctx, {geom, 0u}); if (!file) @@ -457,7 +431,6 @@ bool CPLYMeshWriter::writeAsset(system::IFile* _file, const SAssetWriteParams& _ _params.logger.log("PLY writer: normal vertex count mismatch.", system::ILogger::ELL_ERROR); return false; } - const ICPUPolygonGeometry::SDataView* uvView = SGeometryWriterCommon::getAuxViewAt(geom, Parse::UV0, vertexCount); if (uvView && getFormatChannelCount(uvView->composed.format) != 2u) uvView = nullptr; @@ -490,7 +463,6 @@ bool CPLYMeshWriter::writeAsset(system::IFile* _file, const SAssetWriteParams& _ _params.logger.log("PLY writer: only triangle-list topology is supported.", system::ILogger::ELL_ERROR); return false; } - size_t faceCount = 0ull; if (!SGeometryWriterCommon::getTriangleFaceCount(geom, faceCount)) { @@ -502,7 +474,6 @@ bool CPLYMeshWriter::writeAsset(system::IFile* _file, const SAssetWriteParams& _ const bool binary = flags.hasAnyFlag(E_WRITER_FLAGS::EWF_BINARY); const bool flipVectors = !flags.hasAnyFlag(E_WRITER_FLAGS::EWF_MESH_IS_RIGHT_HANDED); const bool write16BitIndices = vertexCount <= static_cast(std::numeric_limits::max()) + 1ull; - ScalarType positionScalarType = Parse::selectScalarType(positionView.composed.format); if (flipVectors && Parse::getScalarMeta(positionScalarType).integer && !Parse::getScalarMeta(positionScalarType).signedType) positionScalarType = ScalarType::Float32; @@ -518,7 +489,6 @@ bool CPLYMeshWriter::writeAsset(system::IFile* _file, const SAssetWriteParams& _ size_t extraAuxBytesPerVertex = 0ull; for (const auto& extra : extraAuxViews) extraAuxBytesPerVertex += static_cast(extra.components) * Parse::getScalarMeta(extra.scalarType).byteSize; - std::ostringstream headerBuilder; headerBuilder << "ply\n"; headerBuilder << (binary ? "format binary_little_endian 1.0" : "format ascii 1.0"); @@ -553,9 +523,7 @@ bool CPLYMeshWriter::writeAsset(system::IFile* _file, const SAssetWriteParams& _ headerBuilder << (write16BitIndices ? "\nproperty list uchar uint16 vertex_indices\n" : "\nproperty list uchar uint32 vertex_indices\n"); headerBuilder << "end_header\n"; const std::string header = headerBuilder.str(); - const Parse::WriteInput input = {.geom = geom, .positionScalarType = positionScalarType, .uvView = uvView, .uvScalarType = uvScalarType, .extraAuxViews = &extraAuxViews, .writeNormals = writeNormals, .normalScalarType = normalScalarType, .vertexCount = vertexCount, .faceCount = faceCount, .write16BitIndices = write16BitIndices, .flipVectors = flipVectors}; - bool writeOk = false; size_t outputBytes = 0ull; auto writePayload = [&](const void* bodyData, const size_t bodySize) -> bool { @@ -563,7 +531,6 @@ bool CPLYMeshWriter::writeAsset(system::IFile* _file, const SAssetWriteParams& _ const auto ioPlan = impl::SFileAccess::resolvePlan(_params.ioPolicy, static_cast(outputSize), true, file); if (impl::SFileAccess::logInvalidPlan(_params.logger, "PLY writer", file->getFileName().string().c_str(), ioPlan)) return false; - outputBytes = outputSize; const SInterchangeIO::SBufferRange writeBuffers[] = {{.data = header.data(), .byteCount = header.size()}, {.data = bodyData, .byteCount = bodySize}}; writeOk = SInterchangeIO::writeBuffersWithPolicy(file, ioPlan, writeBuffers, &ioTelemetry); @@ -583,7 +550,6 @@ bool CPLYMeshWriter::writeAsset(system::IFile* _file, const SAssetWriteParams& _ const size_t vertexStride = static_cast(positionMeta.byteSize) * 3ull + (writeNormals ? static_cast(normalMeta.byteSize) * 3ull : 0ull) + (uvView ? static_cast(uvMeta.byteSize) * 2ull : 0ull) + extraAuxBytesPerVertex; const size_t faceStride = sizeof(uint8_t) + (write16BitIndices ? sizeof(uint16_t) : sizeof(uint32_t)) * 3u; const size_t bodySize = vertexCount * vertexStride + faceCount * faceStride; - core::vector body; body.resize(bodySize); if (!Parse::writeBinary(input, body.data())) diff --git a/src/nbl/asset/interchange/CSTLMeshWriter.cpp b/src/nbl/asset/interchange/CSTLMeshWriter.cpp index ed9ea8d2fa..e1cde54efa 100644 --- a/src/nbl/asset/interchange/CSTLMeshWriter.cpp +++ b/src/nbl/asset/interchange/CSTLMeshWriter.cpp @@ -41,7 +41,6 @@ struct Parse { if (ioBuffer.empty()) return true; - size_t bytesWritten = 0ull; const size_t totalBytes = ioBuffer.size(); while (bytesWritten < totalBytes) @@ -60,14 +59,12 @@ struct Parse ioBuffer.clear(); return true; } - bool write(const void* data, size_t size) { if (!data && size != 0ull) return false; if (size == 0ull) return true; - const uint8_t* src = reinterpret_cast(data); switch (ioPlan.strategy) { @@ -179,11 +176,9 @@ struct Parse { if (!normalView) return false; - std::array normals = {}; if (!decodeIndexedTriple(idx, [&normalView](const uint32_t vertexIx, hlsl::float32_t3& out) -> bool { return normalView.decodeElement(vertexIx, out); }, normals.data())) return false; - return selectFirstValidNormal(normals.data(), static_cast(normals.size()), outNormal); } @@ -270,9 +265,7 @@ struct Parse static const ICPUPolygonGeometry::SDataView* getColorView(const ICPUPolygonGeometry* geom, const size_t vertexCount) { const auto* view = SGeometryWriterCommon::getAuxViewAt(geom, Parse::COLOR0, vertexCount); - if (!view) - return nullptr; - return getFormatChannelCount(view->composed.format) >= 3u ? view : nullptr; + return view && getFormatChannelCount(view->composed.format) >= 3u ? view : nullptr; } static bool decodeColorB8G8R8A8(const ICPUPolygonGeometry::SDataView& colorView, const uint32_t ix, uint32_t& outColor) @@ -285,7 +278,6 @@ struct Parse std::memcpy(&outColor, ptr + static_cast(ix) * sizeof(uint32_t), sizeof(outColor)); return true; } - hlsl::float32_t4 decoded = {}; if (!colorView.decodeElement(ix, decoded)) return false; @@ -304,16 +296,13 @@ struct Parse { if (!geom || !context || !context->writeContext.outputFile) return false; - const auto& posView = geom->getPositionView(); if (!posView) return false; - const bool flipHandedness = !context->writeContext.params.flags.hasAnyFlag(E_WRITER_FLAGS::EWF_MESH_IS_RIGHT_HANDED); const size_t vertexCount = posView.getElementCount(); if (vertexCount == 0ull) return false; - size_t faceCount = 0ull; if (!SGeometryWriterCommon::getTriangleFaceCount(geom, faceCount)) return false; @@ -417,7 +406,6 @@ struct Parse std::array positions = {}; if (!decodeIndexedTriple(idx, decodePosition, positions.data())) return false; - std::array normals = {}; if (hasNormals && !decodeIndexedTriple(idx, decodeNormal, normals.data())) return false; @@ -435,11 +423,9 @@ struct Parse { if (!geom) return false; - const auto* indexing = geom->getIndexingCallback(); if (!indexing || indexing->degree() != 3u) return false; - const auto& posView = geom->getPositionView(); if (!posView) return false; @@ -454,7 +440,6 @@ struct Parse return false; if (!context->write("\n", sizeof("\n") - 1ull)) return false; - const uint32_t faceCount = static_cast(geom->getPrimitiveCount()); for (uint32_t primIx = 0u; primIx < faceCount; ++primIx) { @@ -483,7 +468,6 @@ struct Parse TriangleData triangle = {}; const hlsl::float32_t3* const attrNormalPtr = decodeTriangleNormal(normalView, idx, attrNormal) ? &attrNormal : nullptr; buildTriangle(v1, v2, v3, attrNormalPtr, attrNormalPtr ? 1u : 0u, flipHandedness, true, triangle); - std::array faceText = {}; char* cursor = faceText.data(); char* const end = faceText.data() + faceText.size(); diff --git a/src/nbl/asset/interchange/SGeometryViewDecode.h b/src/nbl/asset/interchange/SGeometryViewDecode.h index 68cc616f87..c8d9ae43cc 100644 --- a/src/nbl/asset/interchange/SGeometryViewDecode.h +++ b/src/nbl/asset/interchange/SGeometryViewDecode.h @@ -1,5 +1,4 @@ -// Internal src-only header. -// Do not include from public headers. +// Internal src-only header. Do not include from public headers. #ifndef _NBL_ASSET_S_GEOMETRY_VIEW_DECODE_H_INCLUDED_ #define _NBL_ASSET_S_GEOMETRY_VIEW_DECODE_H_INCLUDED_ #include "nbl/asset/ICPUPolygonGeometry.h" @@ -34,14 +33,12 @@ class SGeometryViewDecode { return data != nullptr && stride != 0u && format != EF_UNKNOWN && channels != 0u; } - template inline bool decode(const size_t ix, std::array& out) const { out.fill(T{}); return SGeometryViewDecode::template decodePrepared(*this, ix, out.data(), static_cast(N)); } - template requires hlsl::concepts::Vector inline bool decode(const size_t ix, V& out) const { @@ -56,11 +53,9 @@ class SGeometryViewDecode Prepared retval = {}; if (!view.composed.isFormatted()) return retval; - retval.data = reinterpret_cast(view.getPointer()); if (!retval.data) return {}; - retval.stride = view.composed.getStride(); retval.format = view.composed.format; retval.channels = getFormatChannelCount(retval.format); @@ -85,13 +80,11 @@ class SGeometryViewDecode { if (!prepared || !out || outDim == 0u) return false; - using storage_t = std::conditional_t, hlsl::float64_t, std::conditional_t, int64_t, uint64_t>>; std::array tmp = {}; const void* srcArr[4] = {prepared.data + ix * prepared.stride, nullptr}; if (!decodePixels(prepared.format, srcArr, tmp.data(), 0u, 0u)) return false; - const uint32_t componentCount = std::min({prepared.channels, outDim, 4u}); if constexpr (Mode == EMode::Semantic && std::is_floating_point_v) { @@ -101,7 +94,6 @@ class SGeometryViewDecode tmp[i] = static_cast(tmp[i] * (prepared.range.maxVx[i] - prepared.range.minVx[i]) + prepared.range.minVx[i]); } } - for (uint32_t i = 0u; i < componentCount; ++i) out[i] = static_cast(tmp[i]); return true; From 0a4316b9aecb4070ec403de50948943bf5387a3f Mon Sep 17 00:00:00 2001 From: Arkadiusz Lachowicz Date: Sun, 8 Mar 2026 16:23:31 +0100 Subject: [PATCH 081/118] Compact interchange whitespace --- .../asset/interchange/SGeometryWriterCommon.h | 15 --- .../nbl/asset/interchange/SInterchangeIO.h | 6 -- .../asset/interchange/SLoaderRuntimeTuning.h | 18 ---- .../asset/interchange/COBJMeshFileLoader.cpp | 98 ------------------- .../asset/interchange/CPLYMeshFileLoader.cpp | 95 ------------------ src/nbl/asset/interchange/CPLYMeshWriter.cpp | 54 ---------- src/nbl/asset/interchange/CSTLMeshWriter.cpp | 54 ---------- .../asset/interchange/SGeometryViewDecode.h | 7 -- src/nbl/asset/interchange/impl/SBinaryData.h | 3 - src/nbl/asset/interchange/impl/SFileAccess.h | 8 -- src/nbl/asset/interchange/impl/STextParse.h | 10 -- 11 files changed, 368 deletions(-) diff --git a/include/nbl/asset/interchange/SGeometryWriterCommon.h b/include/nbl/asset/interchange/SGeometryWriterCommon.h index 6b88534a09..55422aa02c 100644 --- a/include/nbl/asset/interchange/SGeometryWriterCommon.h +++ b/include/nbl/asset/interchange/SGeometryWriterCommon.h @@ -8,7 +8,6 @@ #include "nbl/asset/ICPUGeometryCollection.h" #include "nbl/asset/ICPUPolygonGeometry.h" #include "nbl/builtin/hlsl/math/linalg/fast_affine.hlsl" - #include #include #include @@ -23,7 +22,6 @@ class SGeometryWriterCommon public: struct SWriteState { hlsl::float32_t3x4 transform = hlsl::math::linalg::identity(); uint32_t instanceIx = ~0u; uint32_t targetIx = ~0u; uint32_t geometryIx = 0u; }; struct SPolygonGeometryWriteItem : SWriteState { const ICPUPolygonGeometry* geometry = nullptr; }; - template> requires requires(Container& c, const SPolygonGeometryWriteItem& item) { c.emplace_back(item); } static inline Container collectPolygonGeometryWriteItems(const IAsset* rootAsset) { @@ -76,18 +74,14 @@ class SGeometryWriterCommon const auto* targets = morphTargets[instanceIx].get(); if (!targets) continue; - const auto instanceTransform = initialTransforms.empty() ? identity : initialTransforms[instanceIx]; const auto& targetList = targets->getTargets(); for (uint32_t targetIx = 0u; targetIx < targetList.size(); ++targetIx) appendFromCollection(targetList[targetIx].geoCollection.get(), instanceTransform, instanceIx, targetIx); } - return out; } - static inline bool isIdentityTransform(const hlsl::float32_t3x4& transform) { return transform == hlsl::math::linalg::identity(); } - static inline const ICPUPolygonGeometry::SDataView* getAuxViewAt(const ICPUPolygonGeometry* geom, const uint32_t auxViewIx, const size_t requiredElementCount = 0ull) { if (!geom) @@ -102,7 +96,6 @@ class SGeometryWriterCommon return nullptr; return &view; } - static inline bool getTriangleFaceCount(const ICPUPolygonGeometry* geom, size_t& outFaceCount) { outFaceCount = 0ull; @@ -121,13 +114,11 @@ class SGeometryWriterCommon outFaceCount = indexCount / 3ull; return true; } - if ((vertexCount % 3ull) != 0ull) return false; outFaceCount = vertexCount / 3ull; return true; } - template static inline bool visitTriangleIndices(const ICPUPolygonGeometry* geom, Visitor&& visitor) { @@ -152,7 +143,6 @@ class SGeometryWriterCommon return true; } }; - const auto& indexView = geom->getIndexView(); if (!indexView) { @@ -177,7 +167,6 @@ class SGeometryWriterCommon return false; return true; }; - switch (geom->getIndexType()) { case EIT_32BIT: return visitIndexed.template operator()(); @@ -186,13 +175,10 @@ class SGeometryWriterCommon return false; } } - template static inline const T* getTightView(const ICPUPolygonGeometry::SDataView& view) { return view && view.composed.format == ExpectedFormat && view.composed.getStride() == sizeof(T) ? reinterpret_cast(view.getPointer()) : nullptr; } - static inline char* appendFloatToBuffer(char* dst, char* end, float value) { return appendFloatingPointToBuffer(dst, end, value); } static inline char* appendFloatToBuffer(char* dst, char* end, double value) { return appendFloatingPointToBuffer(dst, end, value); } - static inline char* appendUIntToBuffer(char* dst, char* const end, const uint32_t value) { if (!dst || dst >= end) @@ -206,7 +192,6 @@ class SGeometryWriterCommon const size_t writeLen = static_cast(written); return (writeLen < static_cast(end - dst)) ? (dst + writeLen) : end; } - private: template static inline char* appendFloatingPointToBuffer(char* dst, char* const end, const T value) diff --git a/include/nbl/asset/interchange/SInterchangeIO.h b/include/nbl/asset/interchange/SInterchangeIO.h index aec1bf596d..0ae55deedb 100644 --- a/include/nbl/asset/interchange/SInterchangeIO.h +++ b/include/nbl/asset/interchange/SInterchangeIO.h @@ -22,15 +22,12 @@ class SInterchangeIO uint64_t callCount = 0ull; uint64_t totalBytes = 0ull; uint64_t minBytes = std::numeric_limits::max(); - inline void account(const uint64_t bytes) { ++callCount; totalBytes += bytes; if (bytes < minBytes) minBytes = bytes; } inline uint64_t getMinOrZero() const { return callCount ? minBytes : 0ull; } inline uint64_t getAvgOrZero() const { return callCount ? (totalBytes / callCount) : 0ull; } }; - using SReadTelemetry = STelemetry; using SWriteTelemetry = STelemetry; - static inline bool isTinyIOTelemetryLikely( const STelemetry& telemetry, const uint64_t payloadBytes, @@ -52,7 +49,6 @@ class SInterchangeIO return false; if (bytes == 0ull) return true; - system::IFile::success_t success; file->read(success, dst, offset, bytes); if (success && ioTelemetry) @@ -76,7 +72,6 @@ class SInterchangeIO if (bytes == 0ull) return finalize(true); auto* out = reinterpret_cast(dst); - switch (ioPlan.strategy) { case SResolvedFileIOPolicy::Strategy::WholeFile: @@ -144,7 +139,6 @@ class SInterchangeIO static inline bool writeFileWithPolicyAtOffset(system::IFile* file, const SResolvedFileIOPolicy& ioPlan, const void* data, size_t byteCount, size_t& fileOffset, SWriteTelemetry* ioTelemetry = nullptr) { const SBufferRange buffers[] = {{.data = data, .byteCount = byteCount}}; return writeBuffersWithPolicyAtOffset(file, ioPlan, buffers, fileOffset, ioTelemetry); } static inline bool writeFileWithPolicy(system::IFile* file, const SResolvedFileIOPolicy& ioPlan, const void* data, size_t byteCount, SWriteTelemetry* ioTelemetry = nullptr) { const SBufferRange buffers[] = {{.data = data, .byteCount = byteCount}}; return writeBuffersWithPolicy(file, ioPlan, buffers, ioTelemetry); } }; - using SFileIOTelemetry = SInterchangeIO::STelemetry; using SFileReadTelemetry = SInterchangeIO::SReadTelemetry; using SFileWriteTelemetry = SInterchangeIO::SWriteTelemetry; diff --git a/include/nbl/asset/interchange/SLoaderRuntimeTuning.h b/include/nbl/asset/interchange/SLoaderRuntimeTuning.h index 31c3fac3b4..67eddef32f 100644 --- a/include/nbl/asset/interchange/SLoaderRuntimeTuning.h +++ b/include/nbl/asset/interchange/SLoaderRuntimeTuning.h @@ -3,9 +3,7 @@ // For conditions of distribution and use, see copyright notice in nabla.h #ifndef _NBL_ASSET_S_LOADER_RUNTIME_TUNING_H_INCLUDED_ #define _NBL_ASSET_S_LOADER_RUNTIME_TUNING_H_INCLUDED_ - #include "nbl/asset/interchange/SFileIOPolicy.h" - #include #include #include @@ -15,7 +13,6 @@ #include #include #include - namespace nbl::asset { struct SLoaderRuntimeTuningRequest @@ -35,14 +32,12 @@ struct SLoaderRuntimeTuningRequest uint32_t sampleMaxCandidates = 0u; uint64_t sampleMinWorkUnits = 0ull; }; - struct SLoaderRuntimeTuningResult { size_t workerCount = 1ull; uint64_t chunkWorkUnits = 1ull; size_t chunkCount = 1ull; }; - struct SLoaderRuntimeTuner { private: @@ -53,7 +48,6 @@ struct SLoaderRuntimeTuner uint64_t maxNs = 0ull; uint64_t totalNs = 0ull; }; - public: template requires std::invocable @@ -64,7 +58,6 @@ struct SLoaderRuntimeTuner fn(0ull); return; } - std::vector workers; workers.reserve(workerCount - 1ull); for (size_t workerIx = 1ull; workerIx < workerCount; ++workerIx) @@ -72,7 +65,6 @@ struct SLoaderRuntimeTuner fn(0ull); } static constexpr uint64_t ceilDiv(const uint64_t numerator, const uint64_t denominator) { return (numerator + denominator - 1ull) / denominator; } - template requires std::same_as> static inline TimeUnit benchmarkSample(const uint8_t* const sampleData, const uint64_t sampleBytes, const size_t workerCount, const uint32_t passes) @@ -105,7 +97,6 @@ struct SLoaderRuntimeTuner sink.fetch_xor(reduced, std::memory_order_relaxed); return std::chrono::duration_cast(std::chrono::nanoseconds(elapsedNs)); } - static inline SBenchmarkSampleStats benchmarkSampleStats(const uint8_t* const sampleData, const uint64_t sampleBytes, const size_t workerCount, const uint32_t passes, const uint32_t observations) { SBenchmarkSampleStats stats = {}; @@ -134,14 +125,11 @@ struct SLoaderRuntimeTuner stats.medianNs = (samples[samples.size() / 2ull - 1ull] + samples[samples.size() / 2ull]) / 2ull; return stats; } - static inline void appendCandidate(std::vector& dst, const size_t candidate) { if (candidate != 0ull && std::find(dst.begin(), dst.end(), candidate) == dst.end()) dst.push_back(candidate); } - static inline uint64_t resolveSampleBytes(const SFileIOPolicy& ioPolicy, const uint64_t knownInputBytes) { if (knownInputBytes == 0ull) return 0ull; - const uint64_t minSampleBytes = std::max(1ull, ioPolicy.runtimeTuning.minSampleBytes); const uint64_t maxSampleBytes = std::max(minSampleBytes, ioPolicy.runtimeTuning.maxSampleBytes); const uint64_t cappedMin = std::min(minSampleBytes, knownInputBytes); @@ -149,10 +137,8 @@ struct SLoaderRuntimeTuner const uint64_t adaptive = std::max(knownInputBytes / 64ull, cappedMin); return std::clamp(adaptive, cappedMin, cappedMax); } - static inline bool shouldInlineHashBuild(const SFileIOPolicy& ioPolicy, const uint64_t inputBytes) { return inputBytes <= std::max(1ull, ioPolicy.runtimeTuning.hashInlineThresholdBytes); } static inline size_t resolveHardwareThreads(const uint32_t requested = 0u) { const size_t hw = requested ? static_cast(requested) : static_cast(std::thread::hardware_concurrency()); return hw ? hw : 1ull; } - static inline size_t resolveHardMaxWorkers(const size_t hardwareThreads, const uint32_t workerHeadroom) { const size_t hw = std::max(1ull, hardwareThreads), minWorkers = hw >= 2ull ? 2ull : 1ull, headroom = static_cast(workerHeadroom); @@ -162,7 +148,6 @@ struct SLoaderRuntimeTuner return minWorkers; return std::max(minWorkers, hw - headroom); } - static inline SLoaderRuntimeTuningResult tune(const SFileIOPolicy& ioPolicy, const SLoaderRuntimeTuningRequest& request) { using RTMode = SFileIOPolicy::SRuntimeTuning::Mode; @@ -256,7 +241,6 @@ struct SLoaderRuntimeTuner estimatedEvaluations / static_cast(alternativeCandidates), 1ull, 3ull)); - SBenchmarkSampleStats bestStats = heuristicStatsProbe; size_t bestWorker = heuristicWorkerCount; for (const size_t candidate : candidates) @@ -298,12 +282,10 @@ struct SLoaderRuntimeTuner const uint64_t desiredChunkCount = static_cast(std::max(1ull, result.workerCount * targetChunksPerWorker)); uint64_t chunkWorkUnits = SLoaderRuntimeTuner::ceilDiv(request.totalWorkUnits, desiredChunkCount); chunkWorkUnits = std::clamp(chunkWorkUnits, minChunkWorkUnits, maxChunkWorkUnits); - result.chunkWorkUnits = chunkWorkUnits; result.chunkCount = static_cast(SLoaderRuntimeTuner::ceilDiv(request.totalWorkUnits, chunkWorkUnits)); return result; } }; - } #endif diff --git a/src/nbl/asset/interchange/COBJMeshFileLoader.cpp b/src/nbl/asset/interchange/COBJMeshFileLoader.cpp index cc4f57c248..46222d0f05 100644 --- a/src/nbl/asset/interchange/COBJMeshFileLoader.cpp +++ b/src/nbl/asset/interchange/COBJMeshFileLoader.cpp @@ -1,12 +1,9 @@ #ifdef _NBL_COMPILE_WITH_OBJ_LOADER_ - // Copyright (C) 2018-2025 - DevSH Graphics Programming Sp. z O.O. // This file is part of the "Nabla Engine" and was originally part of the "Irrlicht Engine" // For conditions of distribution and use, see copyright notice in nabla.h // See the original file in irrlicht source for authors - #include "nbl/core/declarations.h" - #include "nbl/asset/IAssetManager.h" #include "nbl/asset/ICPUGeometryCollection.h" #include "nbl/asset/interchange/SGeometryContentHash.h" @@ -15,30 +12,23 @@ #include "nbl/asset/interchange/SLoaderRuntimeTuning.h" #include "nbl/asset/utils/CPolygonGeometryManipulator.h" #include "nbl/builtin/hlsl/shapes/AABBAccumulator.hlsl" - #include "nbl/system/IFile.h" - #include "COBJMeshFileLoader.h" #include "impl/SFileAccess.h" #include "impl/STextParse.h" - #include #include #include #include #include - namespace nbl::asset { - namespace { - struct Parse { static constexpr uint32_t UV0 = 0u; using Common = impl::TextParse; - struct VertexDedupNode { int32_t uv = -1; @@ -47,7 +37,6 @@ struct Parse uint32_t outIndex = 0u; int32_t next = -1; }; - static bool resolveIndex(const int32_t rawIndex, const size_t elementCount, int32_t& resolved) { if (rawIndex > 0) @@ -61,14 +50,12 @@ struct Parse resolved = static_cast(zeroBased); return true; } - const int64_t zeroBased = static_cast(elementCount) + static_cast(rawIndex); if (zeroBased < 0 || zeroBased >= static_cast(elementCount)) return false; resolved = static_cast(zeroBased); return true; } - static void parseSmoothingGroup(const char* linePtr, const char* const lineEnd, uint32_t& outGroup) { Common::skipInlineWhitespace(linePtr, lineEnd); @@ -77,12 +64,10 @@ struct Parse outGroup = 0u; return; } - const char* const tokenStart = linePtr; while (linePtr < lineEnd && !Common::isInlineWhitespace(*linePtr)) ++linePtr; const std::string_view token(tokenStart, static_cast(linePtr - tokenStart)); - if (token.size() == 2u && std::tolower(token[0]) == 'o' && std::tolower(token[1]) == 'n') { outGroup = 1u; @@ -93,23 +78,19 @@ struct Parse outGroup = 0u; return; } - uint32_t value = 0u; outGroup = Common::parseExactNumber(token, value) ? value : 0u; } - static std::string parseIdentifier(const char* linePtr, const char* const lineEnd, const std::string_view fallback) { const char* endPtr = lineEnd; Common::skipInlineWhitespace(linePtr, lineEnd); while (endPtr > linePtr && Common::isInlineWhitespace(endPtr[-1])) --endPtr; - if (linePtr >= endPtr) return std::string(fallback); return std::string(linePtr, static_cast(endPtr - linePtr)); } - static bool parseTrianglePositiveTripletLine(const char* const lineStart, const char* const lineEnd, std::array& out, const size_t posCount, const size_t uvCount, const size_t normalCount) { const char* ptr = lineStart; @@ -118,7 +99,6 @@ struct Parse Common::skipInlineWhitespace(ptr, lineEnd); if (ptr >= lineEnd || !core::isdigit(*ptr)) return false; - int32_t posIx = -1; { uint32_t value = 0u; @@ -133,7 +113,6 @@ struct Parse if (ptr >= lineEnd || *ptr != '/') return false; ++ptr; - int32_t uvIx = -1; { uint32_t value = 0u; @@ -148,7 +127,6 @@ struct Parse if (ptr >= lineEnd || *ptr != '/') return false; ++ptr; - int32_t normalIx = -1; { uint32_t value = 0u; @@ -160,22 +138,17 @@ struct Parse return false; normalIx = value - 1u; } - out[corner] = hlsl::int32_t3(posIx, uvIx, normalIx); } - Common::skipInlineWhitespace(ptr, lineEnd); return ptr == lineEnd; } - static bool parseFaceVertexToken(const char*& linePtr, const char* const lineEnd, hlsl::int32_t3& idx, const size_t posCount, const size_t uvCount, const size_t normalCount) { Common::skipInlineWhitespace(linePtr, lineEnd); if (linePtr >= lineEnd) return false; - idx = hlsl::int32_t3(-1, -1, -1); - const char* ptr = linePtr; if (*ptr != '-' && *ptr != '+') { @@ -187,7 +160,6 @@ struct Parse if (posRaw > posCount) return false; idx.x = posRaw - 1u; - if (ptr < lineEnd && *ptr == '/') { ++ptr; @@ -202,7 +174,6 @@ struct Parse return false; idx.y = uvRaw - 1u; } - if (ptr < lineEnd && *ptr == '/') { ++ptr; @@ -231,7 +202,6 @@ struct Parse return false; if (!resolveIndex(raw, posCount, idx.x)) return false; - if (ptr < lineEnd && *ptr == '/') { ++ptr; @@ -242,7 +212,6 @@ struct Parse if (!resolveIndex(raw, uvCount, idx.y)) return false; } - if (ptr < lineEnd && *ptr == '/') { ++ptr; @@ -260,22 +229,17 @@ struct Parse else if (ptr < lineEnd && !Common::isInlineWhitespace(*ptr)) return false; } - if (ptr < lineEnd && !Common::isInlineWhitespace(*ptr)) return false; linePtr = ptr; return true; } }; - } - COBJMeshFileLoader::COBJMeshFileLoader(IAssetManager*) { } - COBJMeshFileLoader::~COBJMeshFileLoader() = default; - bool COBJMeshFileLoader::isALoadableFileFormat(system::IFile* _file, const system::logger_opt_ptr) const { if (!_file) @@ -283,7 +247,6 @@ bool COBJMeshFileLoader::isALoadableFileFormat(system::IFile* _file, const syste const auto fileSize = _file->getSize(); if (fileSize <= 0) return false; - constexpr size_t ProbeBytes = 4096ull; const size_t bytesToRead = std::min(ProbeBytes, static_cast(fileSize)); std::array probe = {}; @@ -291,26 +254,22 @@ bool COBJMeshFileLoader::isALoadableFileFormat(system::IFile* _file, const syste _file->read(succ, probe.data(), 0ull, bytesToRead); if (!succ || bytesToRead == 0ull) return false; - const char* ptr = probe.data(); const char* const end = probe.data() + bytesToRead; if ((end - ptr) >= 3 && static_cast(ptr[0]) == 0xEFu && static_cast(ptr[1]) == 0xBBu && static_cast(ptr[2]) == 0xBFu) ptr += 3; - while (ptr < end) { while (ptr < end && (*ptr == ' ' || *ptr == '\t' || *ptr == '\r' || *ptr == '\n')) ++ptr; if (ptr >= end) break; - if (*ptr == '#') { while (ptr < end && *ptr != '\n') ++ptr; continue; } - switch (std::tolower(*ptr)) { case 'v': @@ -329,41 +288,34 @@ bool COBJMeshFileLoader::isALoadableFileFormat(system::IFile* _file, const syste } return false; } - const char** COBJMeshFileLoader::getAssociatedFileExtensions() const { static const char* ext[] = { "obj", nullptr }; return ext; } - asset::SAssetBundle COBJMeshFileLoader::loadAsset( system::IFile* _file, const asset::IAssetLoader::SAssetLoadParams& _params, asset::IAssetLoader::IAssetLoaderOverride* _override [[maybe_unused]], uint32_t _hierarchyLevel [[maybe_unused]]) { if (!_file) return {}; - uint64_t faceCount = 0u; uint64_t faceFastTokenCount = 0u; uint64_t faceFallbackTokenCount = 0u; SFileReadTelemetry ioTelemetry = {}; - const long filesize = _file->getSize(); if (filesize <= 0) return {}; impl::SLoadSession loadSession = {}; if (!impl::SLoadSession::begin(_params.logger, "OBJ loader", _file, _params.ioPolicy, static_cast(filesize), true, loadSession)) return {}; - core::vector fileContents; const auto* fileData = loadSession.mapOrReadWholeFile(fileContents, &ioTelemetry); if (!fileData) return {}; const char* const buf = reinterpret_cast(fileData); - const char* const bufEnd = buf + static_cast(filesize); const char* bufPtr = buf; - core::vector positions; core::vector normals; core::vector uvs; @@ -372,7 +324,6 @@ asset::SAssetBundle COBJMeshFileLoader::loadAsset( positions.reserve(estimatedAttributeCount); normals.reserve(estimatedAttributeCount); uvs.reserve(estimatedAttributeCount); - core::vector outPositions; core::vector outNormals; core::vector outNormalNeedsGeneration; @@ -422,7 +373,6 @@ asset::SAssetBundle COBJMeshFileLoader::loadAsset( const size_t dedupHotEntryCount = std::bit_ceil(dedupHotSeed); core::vector dedupHotCache(dedupHotEntryCount); const size_t dedupHotMask = dedupHotEntryCount - 1ull; - struct SLoadedGeometry { core::smart_refctd_ptr geometry = {}; std::string objectName = {}; @@ -431,7 +381,6 @@ asset::SAssetBundle COBJMeshFileLoader::loadAsset( uint64_t faceFastTokenCount = 0ull; uint64_t faceFallbackTokenCount = 0ull; }; - core::vector loadedGeometries; std::string currentObjectName = "default_object"; std::string currentGroupName = "default_group"; @@ -445,7 +394,6 @@ asset::SAssetBundle COBJMeshFileLoader::loadAsset( uint64_t currentFaceCount = 0ull; uint64_t currentFaceFastTokenCount = 0ull; uint64_t currentFaceFallbackTokenCount = 0ull; - const auto resetBuilderState = [&]() -> void { outPositions.clear(); outNormals.clear(); @@ -453,7 +401,6 @@ asset::SAssetBundle COBJMeshFileLoader::loadAsset( outUVs.clear(); indices.clear(); dedupNodes.clear(); - outPositions.resize(initialOutVertexCapacity); outNormals.resize(initialOutVertexCapacity); outNormalNeedsGeneration.resize(initialOutVertexCapacity, 0u); @@ -461,7 +408,6 @@ asset::SAssetBundle COBJMeshFileLoader::loadAsset( indices.resize(initialOutIndexCapacity); dedupHeadByPos.assign(positions.size(), -1); dedupNodes.resize(initialOutVertexCapacity); - outVertexWriteCount = 0ull; outIndexWriteCount = 0ull; dedupNodeCount = 0ull; @@ -475,17 +421,14 @@ asset::SAssetBundle COBJMeshFileLoader::loadAsset( const SDedupHotEntry emptyHotEntry = {}; std::fill(dedupHotCache.begin(), dedupHotCache.end(), emptyHotEntry); }; - const auto finalizeCurrentGeometry = [&]() -> bool { if (outVertexWriteCount == 0ull) return true; - outPositions.resize(outVertexWriteCount); outNormals.resize(outVertexWriteCount); outNormalNeedsGeneration.resize(outVertexWriteCount); outUVs.resize(outVertexWriteCount); indices.resize(outIndexWriteCount); - if (needsNormalGeneration) { // OBJ smoothing groups are already encoded in the parser-side vertex // split corners that must stay sharp become different output vertices @@ -498,7 +441,6 @@ asset::SAssetBundle COBJMeshFileLoader::loadAsset( outNormals, outPositions, indices, outNormalNeedsGeneration)) return false; } - const size_t outVertexCount = outPositions.size(); auto geometry = core::make_smart_refctd_ptr(); { @@ -508,7 +450,6 @@ asset::SAssetBundle COBJMeshFileLoader::loadAsset( return false; geometry->setPositionView(std::move(view)); } - const bool hasNormals = hasProvidedNormals || needsNormalGeneration; if (hasNormals) { auto view = SGeometryLoaderCommon::createAdoptedView( @@ -517,7 +458,6 @@ asset::SAssetBundle COBJMeshFileLoader::loadAsset( return false; geometry->setNormalView(std::move(view)); } - if (hasUVs) { auto view = SGeometryLoaderCommon::createAdoptedView( std::move(outUVs)); @@ -527,7 +467,6 @@ asset::SAssetBundle COBJMeshFileLoader::loadAsset( auxViews->resize(Parse::UV0 + 1u); auxViews->operator[](Parse::UV0) = std::move(view); } - if (!indices.empty()) { geometry->setIndexing(IPolygonGeometryBase::TriangleList()); if (outVertexCount <= @@ -550,17 +489,14 @@ asset::SAssetBundle COBJMeshFileLoader::loadAsset( } else { geometry->setIndexing(IPolygonGeometryBase::PointList()); } - if (!_params.loaderFlags.hasAnyFlag( IAssetLoader::ELPF_DONT_COMPUTE_CONTENT_HASHES)) SPolygonGeometryContentHash::computeMissing(geometry.get(), _params.ioPolicy); - if (!parsedAABB.empty()) geometry->applyAABB(parsedAABB.value); else CPolygonGeometryManipulator::recomputeAABB(geometry.get()); - loadedGeometries.push_back(SLoadedGeometry{ .geometry = std::move(geometry), .objectName = currentObjectName, @@ -570,7 +506,6 @@ asset::SAssetBundle COBJMeshFileLoader::loadAsset( .faceFallbackTokenCount = currentFaceFallbackTokenCount}); return true; }; - resetBuilderState(); auto allocateOutVertex = [&](uint32_t& outIx) -> bool { if (outVertexWriteCount >= outPositions.size()) { @@ -587,7 +522,6 @@ asset::SAssetBundle COBJMeshFileLoader::loadAsset( outIx = static_cast(outVertexWriteCount++); return true; }; - auto appendIndex = [&](const uint32_t value) -> bool { if (outIndexWriteCount >= indices.size()) { const size_t newCapacity = @@ -597,7 +531,6 @@ asset::SAssetBundle COBJMeshFileLoader::loadAsset( indices[outIndexWriteCount++] = value; return true; }; - auto allocateDedupNode = [&]() -> int32_t { if (dedupNodeCount >= dedupNodes.size()) { const size_t newCapacity = @@ -610,7 +543,6 @@ asset::SAssetBundle COBJMeshFileLoader::loadAsset( const int32_t ix = static_cast(dedupNodeCount++); return ix; }; - auto findCornerIndex = [&](const int32_t posIx, const int32_t uvIx, const int32_t normalIx, const uint32_t dedupSmoothingGroup, uint32_t& outIx) -> bool { @@ -618,7 +550,6 @@ asset::SAssetBundle COBJMeshFileLoader::loadAsset( return false; if (static_cast(posIx) >= dedupHeadByPos.size()) dedupHeadByPos.resize(positions.size(), -1); - int32_t nodeIx = dedupHeadByPos[static_cast(posIx)]; while (nodeIx >= 0) { const auto& node = dedupNodes[static_cast(nodeIx)]; @@ -631,7 +562,6 @@ asset::SAssetBundle COBJMeshFileLoader::loadAsset( } return false; }; - auto materializeCornerIndex = [&](const int32_t posIx, const int32_t uvIx, const int32_t normalIx, const uint32_t dedupSmoothingGroup, uint32_t& outIx) -> bool { @@ -640,7 +570,6 @@ asset::SAssetBundle COBJMeshFileLoader::loadAsset( const int32_t newNodeIx = allocateDedupNode(); if (newNodeIx < 0) return false; - auto& node = dedupNodes[static_cast(newNodeIx)]; node.uv = uvIx; node.normal = normalIx; @@ -648,18 +577,15 @@ asset::SAssetBundle COBJMeshFileLoader::loadAsset( node.outIndex = outIx; node.next = dedupHeadByPos[static_cast(posIx)]; dedupHeadByPos[static_cast(posIx)] = newNodeIx; - const auto& srcPos = positions[static_cast(posIx)]; outPositions[static_cast(outIx)] = srcPos; hlsl::shapes::util::extendAABBAccumulator(parsedAABB, srcPos); - hlsl::float32_t2 uv(0.f, 0.f); if (uvIx >= 0 && static_cast(uvIx) < uvs.size()) { uv = uvs[static_cast(uvIx)]; hasUVs = true; } outUVs[static_cast(outIx)] = uv; - hlsl::float32_t3 normal(0.f, 0.f, 0.f); if (normalIx >= 0 && static_cast(normalIx) < normals.size()) { normal = normals[static_cast(normalIx)]; @@ -672,7 +598,6 @@ asset::SAssetBundle COBJMeshFileLoader::loadAsset( outNormals[static_cast(outIx)] = normal; return true; }; - auto acquireCornerIndex = [&](const hlsl::int32_t3& idx, const uint32_t smoothingGroup, uint32_t& outIx) -> bool { @@ -685,7 +610,6 @@ asset::SAssetBundle COBJMeshFileLoader::loadAsset( return materializeCornerIndex(posIx, idx.y, idx.z, dedupSmoothingGroup, outIx); }; - auto acquireCornerIndexPositiveTriplet = [&](const hlsl::int32_t3& idx, uint32_t& outIx) -> bool { const uint32_t hotHash = static_cast(idx.x) * 73856093u ^ @@ -697,7 +621,6 @@ asset::SAssetBundle COBJMeshFileLoader::loadAsset( outIx = hotEntry.outIndex; return true; } - if (findCornerIndex(idx.x, idx.y, idx.z, 0u, outIx) || materializeCornerIndex(idx.x, idx.y, idx.z, 0u, outIx)) { hotEntry.pos = idx.x; @@ -714,7 +637,6 @@ asset::SAssetBundle COBJMeshFileLoader::loadAsset( auto appendTriangle = [&](const hlsl::uint32_t3& cornerIx) -> bool { return appendIndex(cornerIx.z) && appendIndex(cornerIx.y) && appendIndex(cornerIx.x); }; - uint32_t currentSmoothingGroup = 0u; while (bufPtr < bufEnd) { const char* const lineStart = bufPtr; @@ -726,11 +648,9 @@ asset::SAssetBundle COBJMeshFileLoader::loadAsset( static_cast(std::memchr(lineStart, '\r', remaining)); if (!lineTerminator) lineTerminator = bufEnd; - const char* lineEnd = lineTerminator; if (lineEnd > lineStart && lineEnd[-1] == '\r') --lineEnd; - if (lineStart < lineEnd) { const char lineType = std::tolower(*lineStart); if (lineType == 'v') { @@ -841,7 +761,6 @@ asset::SAssetBundle COBJMeshFileLoader::loadAsset( uint32_t firstCorner = 0u; uint32_t previousCorner = 0u; uint32_t cornerCount = 0u; - if (parsedFirstThree) { hlsl::uint32_t3 cornerIx = {}; if (!acquireTriangleCorners([&](const hlsl::int32_t3& idx, uint32_t& outIx) { return acquireCornerIndex(idx, currentSmoothingGroup, outIx); }, triIdx, cornerIx)) @@ -855,37 +774,31 @@ asset::SAssetBundle COBJMeshFileLoader::loadAsset( cornerCount = 3u; linePtr = triLinePtr; } - while (linePtr < lineEnd) { while (linePtr < lineEnd && Parse::Common::isInlineWhitespace(*linePtr)) ++linePtr; if (linePtr >= lineEnd) break; - hlsl::int32_t3 idx(-1, -1, -1); if (!Parse::parseFaceVertexToken(linePtr, lineEnd, idx, posCount, uvCount, normalCount)) return {}; ++faceFallbackTokenCount; ++currentFaceFallbackTokenCount; - uint32_t cornerIx = 0u; if (!acquireCornerIndex(idx, currentSmoothingGroup, cornerIx)) return {}; - if (cornerCount == 0u) { firstCorner = cornerIx; ++cornerCount; continue; } - if (cornerCount == 1u) { previousCorner = cornerIx; ++cornerCount; continue; } - if (!appendIndex(cornerIx) || !appendIndex(previousCorner) || !appendIndex(firstCorner)) return {}; @@ -895,7 +808,6 @@ asset::SAssetBundle COBJMeshFileLoader::loadAsset( } } } - if (lineTerminator >= bufEnd) bufPtr = bufEnd; else if (*lineTerminator == '\r' && (lineTerminator + 1) < bufEnd && @@ -908,7 +820,6 @@ asset::SAssetBundle COBJMeshFileLoader::loadAsset( return {}; if (loadedGeometries.empty()) return {}; - uint64_t outVertexCount = 0ull; uint64_t outIndexCount = 0ull; uint64_t faceFastTokenCountSum = 0ull; @@ -923,9 +834,7 @@ asset::SAssetBundle COBJMeshFileLoader::loadAsset( faceFastTokenCountSum += loaded.faceFastTokenCount; faceFallbackTokenCountSum += loaded.faceFallbackTokenCount; } - loadSession.logTinyIO(_params.logger, ioTelemetry); - const bool buildCollections = sawObjectDirective || sawGroupDirective || loadedGeometries.size() > 1ull; core::vector> outputAssets; @@ -952,24 +861,20 @@ asset::SAssetBundle COBJMeshFileLoader::loadAsset( return {}; objectCollections.push_back(std::move(collection)); } - auto* refs = objectCollections[objectIx]->getGeometries(); if (!refs) return {}; - IGeometryCollection::SGeometryReference ref = {}; ref.geometry = core::smart_refctd_ptr_static_cast>( loaded.geometry); refs->push_back(std::move(ref)); } - outputAssets.reserve(objectCollections.size()); for (auto& collection : objectCollections) outputAssets.push_back( core::smart_refctd_ptr_static_cast(std::move(collection))); objectCount = outputAssets.size(); } - _params.logger.log( "OBJ loader stats: file=%s in(v=%llu n=%llu uv=%llu) out(v=%llu idx=%llu " "faces=%llu face_fast_tokens=%llu face_fallback_tokens=%llu " @@ -992,11 +897,8 @@ asset::SAssetBundle COBJMeshFileLoader::loadAsset( system::to_string(_params.ioPolicy.strategy).c_str(), system::to_string(loadSession.ioPlan.strategy).c_str(), static_cast(loadSession.ioPlan.chunkSizeBytes()), loadSession.ioPlan.reason); - return SAssetBundle(core::smart_refctd_ptr(), std::move(outputAssets)); } - } - #endif // _NBL_COMPILE_WITH_OBJ_LOADER_ diff --git a/src/nbl/asset/interchange/CPLYMeshFileLoader.cpp b/src/nbl/asset/interchange/CPLYMeshFileLoader.cpp index 3376c76082..4ebf2c9f95 100644 --- a/src/nbl/asset/interchange/CPLYMeshFileLoader.cpp +++ b/src/nbl/asset/interchange/CPLYMeshFileLoader.cpp @@ -3,7 +3,6 @@ // This file is part of the "Nabla Engine" and was originally part of the "Irrlicht Engine" // For conditions of distribution and use, see copyright notice in nabla.h // See the original file in irrlicht source for authors - #include "CPLYMeshFileLoader.h" #include "impl/SBinaryData.h" #include "impl/SFileAccess.h" @@ -20,32 +19,25 @@ #include "nbl/core/hash/blake.h" #include "nbl/system/IFile.h" #include "nbl/system/ISystem.h" - #include - namespace nbl::asset { - namespace { - struct Parse { static constexpr uint32_t UV0 = 0u; using Binary = impl::BinaryData; using Common = impl::TextParse; - struct ContentHashBuild { bool enabled = false; bool inlineHash = false; core::vector> hashedBuffers = {}; std::jthread deferredThread = {}; - static inline ContentHashBuild create(const bool enabled, const bool inlineHash) { return {.enabled = enabled, .inlineHash = inlineHash}; } inline bool hashesInline() const { return enabled && inlineHash; } inline bool hashesDeferred() const { return enabled && !inlineHash; } - inline void hashNow(ICPUBuffer* const buffer) { if (!hashesInline() || !buffer || buffer->getContentHash() != IPreHashed::INVALID_HASH) @@ -56,7 +48,6 @@ struct Parse buffer->setContentHash(buffer->computeContentHash()); hashedBuffers.push_back(core::smart_refctd_ptr(buffer)); } - inline void tryDefer(ICPUBuffer* const buffer) { if (!hashesDeferred() || !buffer || deferredThread.joinable() || buffer->getContentHash() != IPreHashed::INVALID_HASH) @@ -64,21 +55,17 @@ struct Parse auto keepAlive = core::smart_refctd_ptr(buffer); deferredThread = std::jthread([buffer = std::move(keepAlive)]() mutable {buffer->setContentHash(buffer->computeContentHash());}); } - inline void wait() { if (deferredThread.joinable()) deferredThread.join(); } }; - static std::string_view toStringView(const char* text) { return text ? std::string_view{text} : std::string_view{}; } - template static E_FORMAT selectStructuredFormat(const std::array& formats, const uint32_t componentCount) { return componentCount > 0u && componentCount <= N ? formats[componentCount - 1u] : EF_UNKNOWN; } - static E_FORMAT expandStructuredFormat(const E_FORMAT componentFormat, const uint32_t componentCount) { switch (componentFormat) @@ -94,11 +81,9 @@ struct Parse default: return EF_UNKNOWN; } } - struct Context { static constexpr uint64_t ReadWindowPaddingBytes = 1ull; - struct SProperty { static E_FORMAT getType(const char* typeString) @@ -136,12 +121,10 @@ struct Parse return EF_R64_SFLOAT; return EF_UNKNOWN; } - bool isList() const { return type == EF_UNKNOWN && asset::isIntegerFormat(list.countType) && asset::isIntegerFormat(list.itemType); } - void skip(Context& _ctx) const { if (isList()) @@ -155,7 +138,6 @@ struct Parse else _ctx.getNextWord(); } - std::string Name; E_FORMAT type; struct SListTypes @@ -164,7 +146,6 @@ struct Parse E_FORMAT itemType; } list; }; - struct SElement { void skipElement(Context& _ctx) const @@ -180,15 +161,12 @@ struct Parse else _ctx.getNextLine(); } - std::string Name; core::vector Properties; size_t Count; uint32_t KnownSize; }; - static constexpr size_t DefaultIoReadWindowBytes = 50ull << 10; - void init(size_t _ioReadWindowSize = DefaultIoReadWindowBytes) { ioReadWindowSize = std::max(_ioReadWindowSize, DefaultIoReadWindowBytes); @@ -197,7 +175,6 @@ struct Parse LineEndPointer = EndPointer - 1; fillBuffer(); } - void fillBuffer() { if (EndOfFile) @@ -235,7 +212,6 @@ struct Parse EndOfFile = true; } } - const char* getNextLine() { StartPointer = LineEndPointer + 1; @@ -261,7 +237,6 @@ struct Parse WordLength = -1; return StartPointer; } - const char* getNextWord() { StartPointer += WordLength + 1; @@ -290,7 +265,6 @@ struct Parse WordLength = std::distance(StartPointer, nextWord) - 1; return StartPointer; } - size_t getAbsoluteOffset(const char* ptr) const { if (!ptr || ptr > EndPointer) @@ -298,7 +272,6 @@ struct Parse const size_t trailingBytes = static_cast(EndPointer - ptr); return fileOffset >= trailingBytes ? (fileOffset - trailingBytes) : 0ull; } - void useMappedBinaryWindow(const char* data, const size_t sizeBytes) { if (!data) @@ -310,7 +283,6 @@ struct Parse EndOfFile = true; fileOffset = inner.mainFile ? inner.mainFile->getSize() : fileOffset; } - void moveForward(const size_t bytes) { assert(IsBinaryFile); @@ -345,15 +317,12 @@ struct Parse remaining -= step; } } - using widest_int_t = uint32_t; - const char* getCurrentWordEnd(const char* word) const { const size_t tokenLen = WordLength >= 0 ? static_cast(WordLength + 1) : std::char_traits::length(word); return word + tokenLen; } - widest_int_t getInt(const E_FORMAT f) { assert(!isFloatingPointFormat(f)); @@ -410,7 +379,6 @@ struct Parse uint64_t value = 0u; return parseInt(value); } - hlsl::float64_t getFloat(const E_FORMAT f) { assert(isFloatingPointFormat(f)); @@ -454,7 +422,6 @@ struct Parse return value; return ptr != word ? value : 0.0; } - void getData(void* dst, const E_FORMAT f) { const auto size = getTexelOrBlockBytesize(f); @@ -491,7 +458,6 @@ struct Parse bool hasNormals; bool hasUVs; }; - auto allF32 = [&el]() -> bool { for (const auto& prop : el.Properties) { if (prop.type != EF_R32_SFLOAT) @@ -501,7 +467,6 @@ struct Parse }; if (!allF32()) return EFastVertexReadResult::NotApplicable; - auto matchNames = [&el](std::initializer_list names) -> bool { if (el.Properties.size() != names.size()) @@ -530,7 +495,6 @@ struct Parse layout = &xyz_n_uv; if (!layout) return EFastVertexReadResult::NotApplicable; - const size_t floatBytes = sizeof(hlsl::float32_t); struct STupleDesc { uint32_t beginIx; @@ -575,7 +539,6 @@ struct Parse if (el.Count > (std::numeric_limits::max() / layout->srcBytesPerVertex)) return EFastVertexReadResult::Error; - const bool trackAABB = parsedAABB != nullptr; const bool needsByteSwap = IsWrongEndian; auto decodeF32 = [needsByteSwap](const uint8_t* src) -> float { @@ -613,7 +576,6 @@ struct Parse tuple.base += tuple.stride; return value; }; - size_t remainingVertices = el.Count; while (remainingVertices > 0ull) { if (StartPointer + layout->srcBytesPerVertex > EndPointer) @@ -624,7 +586,6 @@ struct Parse : 0ull; if (available < layout->srcBytesPerVertex) return EFastVertexReadResult::Error; - const size_t batchVertices = std::min(remainingVertices, available / layout->srcBytesPerVertex); const uint8_t* src = reinterpret_cast(StartPointer); @@ -649,12 +610,10 @@ struct Parse } } } - const size_t consumed = batchVertices * layout->srcBytesPerVertex; StartPointer += consumed; remainingVertices -= batchVertices; } - for (uint32_t tupleIx = 0u; tupleIx < tupleCount; ++tupleIx) commitTuple(tuples[tupleIx]); return EFastVertexReadResult::Success; @@ -665,7 +624,6 @@ struct Parse assert(el.Properties.size() == vertAttrIts.size()); if (!IsBinaryFile) getNextLine(); - for (size_t j = 0; j < el.Count; ++j) for (auto i = 0u; i < vertAttrIts.size(); i++) { const auto& prop = el.Properties[i]; @@ -702,7 +660,6 @@ struct Parse if (!IsBinaryFile) getNextLine(); const bool hasVertexCount = vertexCount != 0u; - for (const auto& prop : Element.Properties) { if (prop.isList() && (prop.Name == "vertex_indices" || prop.Name == "vertex_index")) { @@ -747,7 +704,6 @@ struct Parse } return true; }; - if (IsBinaryFile && !IsWrongEndian && srcIndexFmt == EF_R32_UINT) { const size_t bytesNeeded = static_cast(count) * sizeof(uint32_t); @@ -790,7 +746,6 @@ struct Parse continue; } } - auto readIndex = [&]() -> uint32_t { return static_cast(getInt(srcIndexFmt)); }; @@ -804,11 +759,9 @@ struct Parse } return true; } - enum class EFastFaceReadResult : uint8_t { NotApplicable, Success, Error }; - EFastFaceReadResult readFaceElementFast( const SElement& element, core::vector& _outIndices, uint32_t& _maxIndex, uint64_t& _faceCount, const uint32_t vertexCount, @@ -817,14 +770,12 @@ struct Parse return EFastFaceReadResult::NotApplicable; if (element.Properties.size() != 1u) return EFastFaceReadResult::NotApplicable; - const auto& prop = element.Properties[0]; if (!prop.isList() || (prop.Name != "vertex_indices" && prop.Name != "vertex_index")) return EFastFaceReadResult::NotApplicable; if (prop.list.countType != EF_R8_UINT) return EFastFaceReadResult::NotApplicable; - const E_FORMAT srcIndexFmt = prop.list.itemType; const bool isSrcU32 = srcIndexFmt == EF_R32_UINT; const bool isSrcS32 = srcIndexFmt == EF_R32_SINT; @@ -832,7 +783,6 @@ struct Parse const bool isSrcS16 = srcIndexFmt == EF_R16_SINT; if (!isSrcU32 && !isSrcS32 && !isSrcU16 && !isSrcS16) return EFastFaceReadResult::NotApplicable; - const bool is32Bit = isSrcU32 || isSrcS32; const bool needEndianSwap = IsWrongEndian; const size_t indexSize = is32Bit ? sizeof(uint32_t) : sizeof(uint16_t); @@ -926,7 +876,6 @@ struct Parse hashPipelineOk.store(false, std::memory_order_relaxed); return; } - const size_t begin = (element.Count * workerIx) / workerCount; const size_t end = @@ -1000,7 +949,6 @@ struct Parse }); if (hashThread.joinable()) hashThread.join(); - const bool anyNonTriangle = std::any_of(workerNonTriangle.begin(), workerNonTriangle.end(), [](const uint8_t v) { return v != 0u; }); @@ -1025,27 +973,23 @@ struct Parse if (hashInParsePipeline && hashPipelineOk.load(std::memory_order_relaxed)) outIndexHash = parsedIndexHash; - StartPointer = reinterpret_cast( const_cast(ptr + element.Count * recordBytes)); _faceCount += element.Count; return EFastFaceReadResult::Success; } } - auto consumeTriangles = [&](const size_t indexBytes, const uint32_t signedMask, auto readTri) -> EFastFaceReadResult { for (size_t j = 0u; j < element.Count; ++j) { if (*ptr++ != 3u) { fallbackToGeneric = true; return EFastFaceReadResult::NotApplicable; } - const hlsl::uint32_t3 tri = readTri(ptr); ptr += 3ull * indexBytes; const uint32_t triOr = tri.x | tri.y | tri.z; if (signedMask && (triOr & signedMask)) return EFastFaceReadResult::Error; - out[0] = tri.x; out[1] = tri.y; out[2] = tri.z; @@ -1074,17 +1018,14 @@ struct Parse }); if (fastReadResult == EFastFaceReadResult::Error) return EFastFaceReadResult::Error; - if (!fallbackToGeneric) { StartPointer = reinterpret_cast(const_cast(ptr)); _faceCount += element.Count; return EFastFaceReadResult::Success; } - _outIndices.resize(oldSize); _maxIndex = oldMaxIndex; } - if (element.Count > (std::numeric_limits::max() / 3u)) return EFastFaceReadResult::Error; const size_t reserveCount = element.Count * 3u; @@ -1124,7 +1065,6 @@ struct Parse StartPointer += sizeof(uint32_t); return true; } - if (!ensureBytes(sizeof(uint16_t))) return false; if (isSrcU16) { @@ -1145,7 +1085,6 @@ struct Parse StartPointer += sizeof(uint16_t); return true; }; - for (size_t j = 0u; j < element.Count; ++j) { int32_t countSigned = 0; if (!readCount(countSigned)) @@ -1160,13 +1099,11 @@ struct Parse ++_faceCount; continue; } - uint32_t i0 = 0u; uint32_t i1 = 0u; uint32_t i2 = 0u; if (!readIndex(i0) || !readIndex(i1) || !readIndex(i2)) return EFastFaceReadResult::Error; - if (trackMaxIndex) { _maxIndex = std::max(_maxIndex, std::max(i0, std::max(i1, i2))); } else if (i0 >= vertexCount || i1 >= vertexCount || @@ -1176,7 +1113,6 @@ struct Parse _outIndices.push_back(i0); _outIndices.push_back(i1); _outIndices.push_back(i2); - uint32_t prev = i2; for (uint32_t k = 3u; k < count; ++k) { uint32_t idx = 0u; @@ -1192,13 +1128,10 @@ struct Parse _outIndices.push_back(idx); prev = idx; } - ++_faceCount; } - return EFastFaceReadResult::Success; } - IAssetLoader::SAssetLoadContext inner; uint32_t topHierarchyLevel; IAssetLoader::IAssetLoaderOverride* loaderOverride; @@ -1217,25 +1150,19 @@ struct Parse core::vector vertAttrIts; }; }; - } - CPLYMeshFileLoader::CPLYMeshFileLoader() = default; - const char** CPLYMeshFileLoader::getAssociatedFileExtensions() const { static const char* ext[] = { "ply", nullptr }; return ext; } - bool CPLYMeshFileLoader::isALoadableFileFormat(system::IFile* _file, const system::logger_opt_ptr) const { std::array buf = {}; - system::IFile::success_t success; _file->read(success, buf.data(), 0, buf.size()); if (!success) return false; - const std::string_view fileHeader(buf.data(), success.getBytesProcessed()); size_t lineStart = 0ull; const size_t firstLineEnd = fileHeader.find('\n'); @@ -1246,7 +1173,6 @@ bool CPLYMeshFileLoader::isALoadableFileFormat(system::IFile* _file, const syste if (firstLineEnd == std::string_view::npos) return false; lineStart = firstLineEnd + 1ull; - constexpr std::array headers = { "format ascii 1.0", "format binary_little_endian 1.0", "format binary_big_endian 1.0"}; @@ -1259,10 +1185,8 @@ bool CPLYMeshFileLoader::isALoadableFileFormat(system::IFile* _file, const syste return std::find(headers.begin(), headers.end(), line) != headers.end(); lineStart = lineEnd + 1ull; } - return false; } - //! creates/loads an animated mesh from the file. SAssetBundle CPLYMeshFileLoader::loadAsset( system::IFile* _file, const IAssetLoader::SAssetLoadParams& _params, @@ -1270,7 +1194,6 @@ SAssetBundle CPLYMeshFileLoader::loadAsset( using namespace nbl::core; if (!_file) return {}; - const bool computeContentHashes = !_params.loaderFlags.hasAnyFlag( IAssetLoader::ELPF_DONT_COMPUTE_CONTENT_HASHES); uint64_t faceCount = 0u; @@ -1285,7 +1208,6 @@ SAssetBundle CPLYMeshFileLoader::loadAsset( impl::SLoadSession loadSession = {}; if (!impl::SLoadSession::begin(_params.logger, "PLY loader", _file, _params.ioPolicy, fileSize, true, loadSession)) return {}; - Parse::Context ctx = {asset::IAssetLoader::SAssetLoadContext{_params, _file}, _hierarchyLevel, _override}; uint64_t desiredReadWindow = @@ -1334,7 +1256,6 @@ SAssetBundle CPLYMeshFileLoader::loadAsset( return; contentHashBuild.tryDefer(view.src.buffer.get()); }; - if (Parse::toStringView(ctx.getNextLine()) != "ply") { _params.logger.log("Not a valid PLY file %s", system::ILogger::ELL_ERROR, ctx.inner.mainFile->getFileName().string().c_str()); @@ -1344,17 +1265,14 @@ SAssetBundle CPLYMeshFileLoader::loadAsset( const char* word = ctx.getNextWord(); for (; Parse::toStringView(word) == "comment"; ctx.getNextLine()) word = ctx.getNextWord(); - bool readingHeader = true; bool continueReading = true; ctx.IsBinaryFile = false; ctx.IsWrongEndian = false; - do { const std::string_view wordView = Parse::toStringView(word); if (wordView == "property") { word = ctx.getNextWord(); - if (ctx.ElementList.empty()) { _params.logger.log("PLY property token found before element %s", system::ILogger::ELL_WARNING, word); @@ -1364,9 +1282,7 @@ SAssetBundle CPLYMeshFileLoader::loadAsset( prop.type = prop.getType(word); if (prop.type == EF_UNKNOWN) { el.KnownSize = false; - word = ctx.getNextWord(); - prop.list.countType = prop.getType(word); if (ctx.IsBinaryFile && !isIntegerFormat(prop.list.countType)) { _params.logger.log("Cannot read binary PLY file containing data " @@ -1390,7 +1306,6 @@ SAssetBundle CPLYMeshFileLoader::loadAsset( continueReading = false; } else el.KnownSize += getTexelOrBlockBytesize(prop.type); - prop.Name = ctx.getNextWord(); } } else if (wordView == "element") { @@ -1411,7 +1326,6 @@ SAssetBundle CPLYMeshFileLoader::loadAsset( } else if (wordView == "format") { word = ctx.getNextWord(); const std::string_view formatView = Parse::toStringView(word); - if (formatView == "binary_little_endian") { ctx.IsBinaryFile = true; } else if (formatView == "binary_big_endian") { @@ -1423,7 +1337,6 @@ SAssetBundle CPLYMeshFileLoader::loadAsset( system::ILogger::ELL_ERROR, word); continueReading = false; } - if (continueReading) { word = ctx.getNextWord(); if (Parse::toStringView(word) != "1.0") { @@ -1450,7 +1363,6 @@ SAssetBundle CPLYMeshFileLoader::loadAsset( _params.logger.log("Unknown item in PLY file %s", system::ILogger::ELL_WARNING, word); } - if (readingHeader && continueReading) { ctx.getNextLine(); word = ctx.getNextWord(); @@ -1496,7 +1408,6 @@ SAssetBundle CPLYMeshFileLoader::loadAsset( logMalformedElement("face"); return false; }; - for (uint32_t i = 0; i < ctx.ElementList.size(); ++i) { auto& el = ctx.ElementList[i]; if (el.Name == "vertex") { @@ -1591,12 +1502,10 @@ SAssetBundle CPLYMeshFileLoader::loadAsset( return {}; } } - if (!parsedAABB.empty()) geometry->applyAABB(parsedAABB.value); else CPolygonGeometryManipulator::recomputeAABB(geometry.get()); - const uint64_t indexCount = static_cast(indices.size()); if (indices.empty()) { geometry->setIndexing(IPolygonGeometryBase::PointList()); @@ -1607,7 +1516,6 @@ SAssetBundle CPLYMeshFileLoader::loadAsset( _file->getFileName().string().c_str()); return {}; } - geometry->setIndexing(IPolygonGeometryBase::TriangleList()); const bool canUseU16 = (vertCount != 0u) @@ -1634,7 +1542,6 @@ SAssetBundle CPLYMeshFileLoader::loadAsset( hashViewBufferIfNeeded(geometry->getIndexView()); } } - if (contentHashBuild.hashesDeferred()) { contentHashBuild.wait(); SPolygonGeometryContentHash::computeMissing(geometry.get(), @@ -1642,7 +1549,6 @@ SAssetBundle CPLYMeshFileLoader::loadAsset( } else { hashRemainingGeometryBuffers(); } - const uint64_t ioMinRead = ctx.readCallCount ? ctx.readMinBytes : 0ull; const uint64_t ioAvgRead = ctx.readCallCount ? (ctx.readBytesTotal / ctx.readCallCount) : 0ull; @@ -1669,6 +1575,5 @@ SAssetBundle CPLYMeshFileLoader::loadAsset( auto meta = core::make_smart_refctd_ptr(); return SAssetBundle(std::move(meta), {std::move(geometry)}); } - } #endif // _NBL_COMPILE_WITH_PLY_LOADER_ diff --git a/src/nbl/asset/interchange/CPLYMeshWriter.cpp b/src/nbl/asset/interchange/CPLYMeshWriter.cpp index 833f636f43..23b4251c4e 100644 --- a/src/nbl/asset/interchange/CPLYMeshWriter.cpp +++ b/src/nbl/asset/interchange/CPLYMeshWriter.cpp @@ -3,16 +3,13 @@ // This file is part of the "Nabla Engine" and was originally part of the "Irrlicht Engine" // For conditions of distribution and use, see copyright notice in nabla.h // See the original file in irrlicht source for authors - #include "CPLYMeshWriter.h" #include "nbl/asset/interchange/SGeometryViewDecode.h" #include "nbl/asset/interchange/SGeometryWriterCommon.h" #include "nbl/asset/interchange/SInterchangeIO.h" #include "impl/SBinaryData.h" #include "impl/SFileAccess.h" - #include "nbl/system/IFile.h" - #include #include #include @@ -23,43 +20,35 @@ #include #include #include - namespace nbl::asset { - CPLYMeshWriter::CPLYMeshWriter() { #ifdef _NBL_DEBUG setDebugName("CPLYMeshWriter"); #endif } - const char** CPLYMeshWriter::getAssociatedFileExtensions() const { static const char* ext[] = { "ply", nullptr }; return ext; } - writer_flags_t CPLYMeshWriter::getSupportedFlags() { return asset::EWF_BINARY; } - writer_flags_t CPLYMeshWriter::getForcedFlags() { return EWF_NONE; } - namespace { - struct Parse { static constexpr uint32_t UV0 = 0u; using Binary = impl::BinaryData; using SemanticDecode = SGeometryViewDecode::Prepared; using StoredDecode = SGeometryViewDecode::Prepared; - enum class ScalarType : uint8_t { Int8, @@ -71,7 +60,6 @@ struct Parse Float32, Float64 }; - struct ScalarMeta { const char* name = "float32"; @@ -79,7 +67,6 @@ struct Parse bool integer = false; bool signedType = true; }; - struct ExtraAuxView { const ICPUPolygonGeometry::SDataView* view = nullptr; @@ -87,7 +74,6 @@ struct Parse uint32_t auxIndex = 0u; ScalarType scalarType = ScalarType::Float32; }; - struct WriteInput { const ICPUPolygonGeometry* geom = nullptr; @@ -102,11 +88,9 @@ struct Parse bool write16BitIndices = false; bool flipVectors = false; }; - static constexpr size_t ApproxTextBytesPerVertex = sizeof("0.000000 0.000000 0.000000 0.000000 0.000000 0.000000\n") - 1ull; static constexpr size_t ApproxTextBytesPerFace = sizeof("3 4294967295 4294967295 4294967295\n") - 1ull; static constexpr size_t MaxFloatTextChars = std::numeric_limits::max_digits10 + 16ull; - template static void appendIntegral(std::string& out, const T value) { @@ -115,7 +99,6 @@ struct Parse if (res.ec == std::errc()) out.append(buf.data(), static_cast(res.ptr - buf.data())); } - static void appendFloat(std::string& out, double value) { const size_t oldSize = out.size(); @@ -125,7 +108,6 @@ struct Parse char* const cursor = SGeometryWriterCommon::appendFloatToBuffer(begin, end, value); out.resize(oldSize + static_cast(cursor - begin)); } - static ScalarMeta getScalarMeta(const ScalarType type) { switch (type) @@ -140,44 +122,36 @@ struct Parse default: return {"float32", sizeof(float), false, true}; } } - static bool isSupportedScalarFormat(const E_FORMAT format) { if (format == EF_UNKNOWN) return false; - const uint32_t channels = getFormatChannelCount(format); if (channels == 0u) return false; - if (!(isIntegerFormat(format) || isFloatingPointFormat(format) || isNormalizedFormat(format) || isScaledFormat(format))) return false; - const auto bytesPerPixel = getBytesPerPixel(format); if (bytesPerPixel.getDenominator() != 1u) return false; const uint32_t pixelBytes = bytesPerPixel.getNumerator(); if (pixelBytes == 0u || (pixelBytes % channels) != 0u) return false; - const uint32_t bytesPerChannel = pixelBytes / channels; return bytesPerChannel == 1u || bytesPerChannel == 2u || bytesPerChannel == 4u || bytesPerChannel == 8u; } - static ScalarType selectScalarType(const E_FORMAT format) { if (!isSupportedScalarFormat(format)) return ScalarType::Float32; if (isNormalizedFormat(format) || isScaledFormat(format)) return ScalarType::Float32; - const uint32_t channels = getFormatChannelCount(format); if (channels == 0u) { assert(format == EF_UNKNOWN); return ScalarType::Float32; } - const auto bytesPerPixel = getBytesPerPixel(format); if (bytesPerPixel.getDenominator() != 1u) return ScalarType::Float32; @@ -185,7 +159,6 @@ struct Parse if (pixelBytes == 0u || (pixelBytes % channels) != 0u) return ScalarType::Float32; const uint32_t bytesPerChannel = pixelBytes / channels; - if (isIntegerFormat(format)) { const bool signedType = isSignedFormat(format); @@ -197,13 +170,10 @@ struct Parse default: return ScalarType::Float64; } } - if (isFloatingPointFormat(format)) return bytesPerChannel >= 8u ? ScalarType::Float64 : ScalarType::Float32; - return ScalarType::Float32; } - struct BinarySink { uint8_t* cursor = nullptr; @@ -217,7 +187,6 @@ struct Parse } inline bool finishVertex() { return true; } }; - struct TextSink { std::string& output; @@ -233,12 +202,10 @@ struct Parse } inline bool finishVertex() { output.push_back('\n'); return true; } }; - template struct PreparedView { using EmitFn = bool(*)(Sink&, const PreparedView&, size_t); - uint32_t components = 0u; bool flipVectors = false; SemanticDecode semantic = {}; @@ -246,7 +213,6 @@ struct Parse EmitFn emit = nullptr; inline explicit operator bool() const { return emit != nullptr && (static_cast(semantic) || static_cast(stored)); } inline bool operator()(Sink& sink, const size_t ix) const { return static_cast(*this) && emit(sink, *this, ix); } - template static bool emitDecode(Sink& sink, const auto& decode, const size_t ix, const uint32_t components, const bool flipVectors) { @@ -266,7 +232,6 @@ struct Parse } return true; } - template static bool emitPrepared(Sink& sink, const PreparedView& view, const size_t ix) { @@ -274,7 +239,6 @@ struct Parse return emitDecode(sink, view.semantic, ix, view.components, view.flipVectors); return emitDecode(sink, view.stored, ix, view.components, view.flipVectors); } - template static inline void prepareDecode(PreparedView& view, const ICPUPolygonGeometry::SDataView& src, const bool flipVectors) { @@ -285,7 +249,6 @@ struct Parse view.stored = SGeometryViewDecode::prepare(src); view.emit = &emitPrepared; } - static PreparedView create(const ICPUPolygonGeometry::SDataView* view, const uint32_t components, const ScalarType scalarType, const bool flipVectors) { PreparedView retval = {.components = components}; @@ -305,7 +268,6 @@ struct Parse return retval; } }; - template static bool emitVertices(const WriteInput& input, Sink& sink) { @@ -339,7 +301,6 @@ struct Parse } return true; } - static bool writeBinary(const WriteInput& input, uint8_t* dst) { BinarySink sink = {.cursor = dst}; @@ -358,7 +319,6 @@ struct Parse return true; }); } - static bool writeText(const WriteInput& input, std::string& output) { TextSink sink = {.output = output}; @@ -375,17 +335,13 @@ struct Parse }); } }; - } - bool CPLYMeshWriter::writeAsset(system::IFile* _file, const SAssetWriteParams& _params, IAssetWriterOverride* _override) { using ScalarType = Parse::ScalarType; SFileWriteTelemetry ioTelemetry = {}; - if (!_override) getDefaultOverride(_override); - if (!_file || !_params.rootAsset) { _params.logger.log("PLY writer: missing output file or root asset.", system::ILogger::ELL_ERROR); @@ -416,7 +372,6 @@ bool CPLYMeshWriter::writeAsset(system::IFile* _file, const SAssetWriteParams& _ _params.logger.log("PLY writer: output override returned null file.", system::ILogger::ELL_ERROR); return false; } - const auto& positionView = geom->getPositionView(); const auto& normalView = geom->getNormalView(); const size_t vertexCount = positionView.getElementCount(); @@ -434,7 +389,6 @@ bool CPLYMeshWriter::writeAsset(system::IFile* _file, const SAssetWriteParams& _ const ICPUPolygonGeometry::SDataView* uvView = SGeometryWriterCommon::getAuxViewAt(geom, Parse::UV0, vertexCount); if (uvView && getFormatChannelCount(uvView->composed.format) != 2u) uvView = nullptr; - core::vector extraAuxViews; const auto& auxViews = geom->getAuxAttributeViews(); extraAuxViews.reserve(auxViews.size()); @@ -451,7 +405,6 @@ bool CPLYMeshWriter::writeAsset(system::IFile* _file, const SAssetWriteParams& _ const uint32_t components = std::min(4u, channels); extraAuxViews.push_back({&view, components, auxIx, Parse::selectScalarType(view.composed.format)}); } - const auto* indexing = geom->getIndexingCallback(); if (!indexing) { @@ -469,7 +422,6 @@ bool CPLYMeshWriter::writeAsset(system::IFile* _file, const SAssetWriteParams& _ _params.logger.log("PLY writer: failed to validate triangle indexing.", system::ILogger::ELL_ERROR); return false; } - const auto flags = _override->getAssetWritingFlags(ctx, geom, 0u); const bool binary = flags.hasAnyFlag(E_WRITER_FLAGS::EWF_BINARY); const bool flipVectors = !flags.hasAnyFlag(E_WRITER_FLAGS::EWF_MESH_IS_RIGHT_HANDED); @@ -481,11 +433,9 @@ bool CPLYMeshWriter::writeAsset(system::IFile* _file, const SAssetWriteParams& _ if (flipVectors && Parse::getScalarMeta(normalScalarType).integer && !Parse::getScalarMeta(normalScalarType).signedType) normalScalarType = ScalarType::Float32; const ScalarType uvScalarType = uvView ? Parse::selectScalarType(uvView->composed.format) : ScalarType::Float32; - const auto positionMeta = Parse::getScalarMeta(positionScalarType); const auto normalMeta = Parse::getScalarMeta(normalScalarType); const auto uvMeta = Parse::getScalarMeta(uvScalarType); - size_t extraAuxBytesPerVertex = 0ull; for (const auto& extra : extraAuxViews) extraAuxBytesPerVertex += static_cast(extra.components) * Parse::getScalarMeta(extra.scalarType).byteSize; @@ -544,7 +494,6 @@ bool CPLYMeshWriter::writeAsset(system::IFile* _file, const SAssetWriteParams& _ system::to_string(_params.ioPolicy.strategy).c_str(), system::to_string(ioPlan.strategy).c_str(), static_cast(ioPlan.chunkSizeBytes()), ioPlan.reason); return writeOk; }; - if (binary) { const size_t vertexStride = static_cast(positionMeta.byteSize) * 3ull + (writeNormals ? static_cast(normalMeta.byteSize) * 3ull : 0ull) + (uvView ? static_cast(uvMeta.byteSize) * 2ull : 0ull) + extraAuxBytesPerVertex; @@ -559,7 +508,6 @@ bool CPLYMeshWriter::writeAsset(system::IFile* _file, const SAssetWriteParams& _ } return writePayload(body.data(), body.size()); } - std::string body; body.reserve(vertexCount * Parse::ApproxTextBytesPerVertex + faceCount * Parse::ApproxTextBytesPerFace); if (!Parse::writeText(input, body)) @@ -569,7 +517,5 @@ bool CPLYMeshWriter::writeAsset(system::IFile* _file, const SAssetWriteParams& _ } return writePayload(body.data(), body.size()); } - } - #endif // _NBL_COMPILE_WITH_PLY_WRITER_ diff --git a/src/nbl/asset/interchange/CSTLMeshWriter.cpp b/src/nbl/asset/interchange/CSTLMeshWriter.cpp index e1cde54efa..f7155e5b43 100644 --- a/src/nbl/asset/interchange/CSTLMeshWriter.cpp +++ b/src/nbl/asset/interchange/CSTLMeshWriter.cpp @@ -4,13 +4,11 @@ // For conditions of distribution and use, see copyright notice in nabla.h // See the original file in irrlicht source for authors #include "nbl/system/IFile.h" - #include "CSTLMeshWriter.h" #include "impl/SFileAccess.h" #include "nbl/asset/format/convertColor.h" #include "nbl/asset/interchange/SGeometryWriterCommon.h" #include "nbl/asset/interchange/SInterchangeIO.h" - #include #include #include @@ -19,13 +17,10 @@ #include #include #include - namespace nbl::asset { - namespace { - struct Parse { static constexpr uint32_t COLOR0 = 0u; @@ -36,7 +31,6 @@ struct Parse core::vector ioBuffer = {}; size_t fileOffset = 0ull; SFileWriteTelemetry writeTelemetry = {}; - bool flush() { if (ioBuffer.empty()) @@ -97,7 +91,6 @@ struct Parse } } }; - struct TriangleData { hlsl::float32_t3 normal = {}; @@ -105,7 +98,6 @@ struct Parse hlsl::float32_t3 vertex2 = {}; hlsl::float32_t3 vertex3 = {}; }; - static constexpr size_t BinaryHeaderBytes = 80ull; static constexpr size_t BinaryTriangleCountBytes = sizeof(uint32_t); static constexpr size_t BinaryTriangleFloatCount = 12ull; @@ -119,7 +111,6 @@ struct Parse static constexpr char AsciiEndSolidPrefix[] = "endsolid "; static constexpr char AsciiDefaultName[] = "nabla_mesh"; static_assert(BinaryTriangleRecordBytes == 50ull); - static bool appendLiteral(char*& cursor, char* const end, const char* text, const size_t textSize) { if (!cursor || cursor + textSize > end) @@ -128,7 +119,6 @@ struct Parse cursor += textSize; return true; } - static bool appendVectorAsAsciiLine(char*& cursor, char* const end, const hlsl::float32_t3& v) { cursor = SGeometryWriterCommon::appendFloatToBuffer(cursor, end, v.x); @@ -145,7 +135,6 @@ struct Parse *(cursor++) = '\n'; return true; } - static bool decodeTriangle(const ICPUPolygonGeometry* geom, const IPolygonGeometryBase::IIndexingCallback* indexing, const ICPUPolygonGeometry::SDataView& posView, uint32_t primIx, hlsl::float32_t3& out0, hlsl::float32_t3& out1, hlsl::float32_t3& out2, hlsl::uint32_t3* outIdx) { hlsl::uint32_t3 idx(0u); @@ -156,7 +145,6 @@ struct Parse indexing->operator()(ctx); if (outIdx) *outIdx = idx; - std::array positions = {}; if (!decodeIndexedTriple(idx, [&posView](const uint32_t vertexIx, hlsl::float32_t3& out) -> bool { return posView.decodeElement(vertexIx, out); }, positions.data())) return false; @@ -165,13 +153,11 @@ struct Parse out2 = positions[2]; return true; } - template static bool decodeIndexedTriple(const hlsl::uint32_t3& idx, DecodeFn&& decode, T* out) { return out && decode(idx.x, out[0]) && decode(idx.y, out[1]) && decode(idx.z, out[2]); } - static bool decodeTriangleNormal(const ICPUPolygonGeometry::SDataView& normalView, const hlsl::uint32_t3& idx, hlsl::float32_t3& outNormal) { if (!normalView) @@ -181,7 +167,6 @@ struct Parse return false; return selectFirstValidNormal(normals.data(), static_cast(normals.size()), outNormal); } - static bool selectFirstValidNormal(const hlsl::float32_t3* const normals, const uint32_t count, hlsl::float32_t3& outNormal) { if (!normals || count == 0u) @@ -196,7 +181,6 @@ struct Parse } return false; } - static void prepareVertices(const hlsl::float32_t3& p0, const hlsl::float32_t3& p1, const hlsl::float32_t3& p2, const bool flipHandedness, hlsl::float32_t3& vertex1, hlsl::float32_t3& vertex2, hlsl::float32_t3& vertex3) { vertex1 = p2; @@ -209,7 +193,6 @@ struct Parse vertex3.x = -vertex3.x; } } - static hlsl::float32_t3 computePlaneNormal(const hlsl::float32_t3& vertex1, const hlsl::float32_t3& vertex2, const hlsl::float32_t3& vertex3, float* const planeNormalLen2 = nullptr) { const hlsl::float32_t3 planeNormal = hlsl::cross(vertex2 - vertex1, vertex3 - vertex1); @@ -221,7 +204,6 @@ struct Parse } return len2 > 0.f ? hlsl::normalize(planeNormal) : hlsl::float32_t3(0.f, 0.f, 0.f); } - static hlsl::float32_t3 resolveTriangleNormal(const hlsl::float32_t3& planeNormal, const float planeNormalLen2, const hlsl::float32_t3* const attrNormals, const uint32_t attrNormalCount, const bool flipHandedness, const bool alignToPlane) { hlsl::float32_t3 attrNormal = {}; @@ -235,7 +217,6 @@ struct Parse } return planeNormalLen2 > 0.f ? hlsl::normalize(planeNormal) : hlsl::float32_t3(0.f, 0.f, 0.f); } - static void buildTriangle(const hlsl::float32_t3& p0, const hlsl::float32_t3& p1, const hlsl::float32_t3& p2, const hlsl::float32_t3* const attrNormals, const uint32_t attrNormalCount, const bool flipHandedness, const bool alignToPlane, TriangleData& triangle) { prepareVertices(p0, p1, p2, flipHandedness, triangle.vertex1, triangle.vertex2, triangle.vertex3); @@ -243,7 +224,6 @@ struct Parse const hlsl::float32_t3 planeNormal = computePlaneNormal(triangle.vertex1, triangle.vertex2, triangle.vertex3, &planeNormalLen2); triangle.normal = resolveTriangleNormal(planeNormal, planeNormalLen2, attrNormals, attrNormalCount, flipHandedness, alignToPlane); } - static double normalizeColorComponentToUnit(double value) { if (!std::isfinite(value)) @@ -252,7 +232,6 @@ struct Parse value /= 255.0; return std::clamp(value, 0.0, 1.0); } - static uint16_t packViscamColorFromB8G8R8A8(const uint32_t color) { const void* src[4] = {&color, nullptr, nullptr, nullptr}; @@ -261,13 +240,11 @@ struct Parse packed |= 0x8000u; return packed; } - static const ICPUPolygonGeometry::SDataView* getColorView(const ICPUPolygonGeometry* geom, const size_t vertexCount) { const auto* view = SGeometryWriterCommon::getAuxViewAt(geom, Parse::COLOR0, vertexCount); return view && getFormatChannelCount(view->composed.format) >= 3u ? view : nullptr; } - static bool decodeColorB8G8R8A8(const ICPUPolygonGeometry::SDataView& colorView, const uint32_t ix, uint32_t& outColor) { if (colorView.composed.format == EF_B8G8R8A8_UNORM && colorView.composed.getStride() == sizeof(uint32_t)) @@ -285,13 +262,11 @@ struct Parse encodePixels(&outColor, rgbaUnit); return true; } - static void decodeColorUnitRGBAFromB8G8R8A8(const uint32_t color, double* out) { const void* src[4] = {&color, nullptr, nullptr, nullptr}; decodePixels(src, out, 0u, 0u); } - static bool writeMeshBinary(const asset::ICPUPolygonGeometry* geom, Context* context) { if (!geom || !context || !context->writeContext.outputFile) @@ -309,7 +284,6 @@ struct Parse if (faceCount > static_cast(std::numeric_limits::max())) return false; const uint32_t facenum = static_cast(faceCount); - const size_t outputSize = BinaryPrefixBytes + static_cast(facenum) * BinaryTriangleRecordBytes; std::unique_ptr output(new (std::nothrow) uint8_t[outputSize]); if (!output) @@ -319,14 +293,12 @@ struct Parse dst += BinaryHeaderBytes; std::memcpy(dst, &facenum, sizeof(facenum)); dst += sizeof(facenum); - const auto& normalView = geom->getNormalView(); const bool hasNormals = static_cast(normalView); const auto* const colorView = getColorView(geom, vertexCount); const hlsl::float32_t3* const tightPositions = SGeometryWriterCommon::getTightView(posView); const hlsl::float32_t3* const tightNormals = hasNormals ? SGeometryWriterCommon::getTightView(normalView) : nullptr; const bool hasFastTightPath = !geom->getIndexView() && tightPositions && (!hasNormals || tightNormals); - auto decodePosition = [&](const uint32_t ix, hlsl::float32_t3& out) -> bool { if (tightPositions) { @@ -387,7 +359,6 @@ struct Parse writeRecord(triangle.normal, triangle.vertex1, triangle.vertex2, triangle.vertex3, faceColor); return true; }; - if (hasFastTightPath) { const hlsl::float32_t3* posTri = tightPositions; @@ -412,13 +383,11 @@ struct Parse return emitTriangle(positions[0], positions[1], positions[2], idx, hasNormals ? normals.data() : nullptr, hasNormals ? 3u : 0u, true); })) return false; - const bool writeOk = SInterchangeIO::writeFileWithPolicy(context->writeContext.outputFile, context->ioPlan, output.get(), outputSize, &context->writeTelemetry); if (writeOk) context->fileOffset += outputSize; return writeOk; } - static bool writeMeshASCII(const asset::ICPUPolygonGeometry* geom, Context* context) { if (!geom) @@ -431,7 +400,6 @@ struct Parse return false; const auto& normalView = geom->getNormalView(); const bool flipHandedness = !context->writeContext.params.flags.hasAnyFlag(E_WRITER_FLAGS::EWF_MESH_IS_RIGHT_HANDED); - const std::string name = context->writeContext.outputFile->getFileName().filename().replace_extension().string(); const std::string_view solidName = name.empty() ? std::string_view(AsciiDefaultName) : std::string_view(name); if (!context->write(AsciiSolidPrefix, sizeof(AsciiSolidPrefix) - 1ull)) @@ -454,14 +422,12 @@ struct Parse if (!context->write("\n", sizeof("\n") - 1ull)) return false; } - if (!context->write(AsciiEndSolidPrefix, sizeof(AsciiEndSolidPrefix) - 1ull)) return false; if (!context->write(solidName.data(), solidName.size())) return false; return true; } - static bool writeFaceText(const hlsl::float32_t3& v1, const hlsl::float32_t3& v2, const hlsl::float32_t3& v3, const hlsl::uint32_t3& idx, const asset::ICPUPolygonGeometry::SDataView& normalView, const bool flipHandedness, Context* context) { hlsl::float32_t3 attrNormal = {}; @@ -492,58 +458,45 @@ struct Parse return context->write(faceText.data(), static_cast(cursor - faceText.data())); } }; - } - CSTLMeshWriter::CSTLMeshWriter() { #ifdef _NBL_DEBUG setDebugName("CSTLMeshWriter"); #endif } - CSTLMeshWriter::~CSTLMeshWriter() { } - const char** CSTLMeshWriter::getAssociatedFileExtensions() const { static const char* ext[] = { "stl", nullptr }; return ext; } - writer_flags_t CSTLMeshWriter::getSupportedFlags() { return asset::EWF_BINARY; } - writer_flags_t CSTLMeshWriter::getForcedFlags() { return EWF_NONE; } - bool CSTLMeshWriter::writeAsset(system::IFile* _file, const SAssetWriteParams& _params, IAssetWriterOverride* _override) { using Context = Parse::Context; - if (!_override) getDefaultOverride(_override); - IAssetWriter::SAssetWriteContext inCtx{_params, _file}; const asset::ICPUPolygonGeometry* geom = IAsset::castDown(_params.rootAsset); if (!geom) return false; - system::IFile* file = _override->getOutputFile(_file, inCtx, {geom, 0u}); if (!file) return false; - Context context = {IAssetWriter::SAssetWriteContext{inCtx.params, file}}; _params.logger.log("WRITING STL: writing the file %s", system::ILogger::ELL_INFO, file->getFileName().string().c_str()); - const auto flags = _override->getAssetWritingFlags(context.writeContext, geom, 0u); const bool binary = flags.hasAnyFlag(asset::EWF_BINARY); - uint64_t expectedSize = 0ull; bool sizeKnown = false; if (binary) @@ -551,22 +504,18 @@ bool CSTLMeshWriter::writeAsset(system::IFile* _file, const SAssetWriteParams& _ expectedSize = Parse::BinaryPrefixBytes + static_cast(geom->getPrimitiveCount()) * Parse::BinaryTriangleRecordBytes; sizeKnown = true; } - context.ioPlan = impl::SFileAccess::resolvePlan(_params.ioPolicy, expectedSize, sizeKnown, file); if (impl::SFileAccess::logInvalidPlan(_params.logger, "STL writer", file->getFileName().string().c_str(), context.ioPlan)) return false; - if (context.ioPlan.strategy == SResolvedFileIOPolicy::Strategy::WholeFile && sizeKnown) context.ioBuffer.reserve(static_cast(expectedSize)); else context.ioBuffer.reserve(static_cast(std::min(context.ioPlan.chunkSizeBytes(), Parse::IoFallbackReserveBytes))); - const bool written = binary ? Parse::writeMeshBinary(geom, &context) : Parse::writeMeshASCII(geom, &context); if (!written) return false; if (!context.flush()) return false; - const uint64_t ioMinWrite = context.writeTelemetry.getMinOrZero(); const uint64_t ioAvgWrite = context.writeTelemetry.getAvgOrZero(); impl::SFileAccess::logTinyIO(_params.logger, "STL writer", file->getFileName().string().c_str(), context.writeTelemetry, context.fileOffset, _params.ioPolicy, "writes"); @@ -574,10 +523,7 @@ bool CSTLMeshWriter::writeAsset(system::IFile* _file, const SAssetWriteParams& _ system::ILogger::ELL_PERFORMANCE, file->getFileName().string().c_str(), static_cast(context.fileOffset), binary ? 1 : 0, static_cast(context.writeTelemetry.callCount), static_cast(ioMinWrite), static_cast(ioAvgWrite), system::to_string(_params.ioPolicy.strategy).c_str(), system::to_string(context.ioPlan.strategy).c_str(), static_cast(context.ioPlan.chunkSizeBytes()), context.ioPlan.reason); - return true; } - } - #endif diff --git a/src/nbl/asset/interchange/SGeometryViewDecode.h b/src/nbl/asset/interchange/SGeometryViewDecode.h index c8d9ae43cc..46e82dfbd4 100644 --- a/src/nbl/asset/interchange/SGeometryViewDecode.h +++ b/src/nbl/asset/interchange/SGeometryViewDecode.h @@ -18,7 +18,6 @@ class SGeometryViewDecode Semantic, Stored }; - template struct Prepared { @@ -28,7 +27,6 @@ class SGeometryViewDecode uint32_t channels = 0u; bool normalized = false; hlsl::shapes::AABB<4, hlsl::float64_t> range = hlsl::shapes::AABB<4, hlsl::float64_t>::create(); - inline explicit operator bool() const { return data != nullptr && stride != 0u && format != EF_UNKNOWN && channels != 0u; @@ -46,7 +44,6 @@ class SGeometryViewDecode return SGeometryViewDecode::template decodePrepared(*this, ix, out); } }; - template static inline Prepared prepare(const ICPUPolygonGeometry::SDataView& view) { @@ -67,13 +64,11 @@ class SGeometryViewDecode } return retval; } - template static inline bool decodeElement(const ICPUPolygonGeometry::SDataView& view, const size_t ix, Out& out) { return prepare(view).decode(ix, out); } - private: template static inline bool decodePreparedComponents(const Prepared& prepared, const size_t ix, T* out, const uint32_t outDim) @@ -98,7 +93,6 @@ class SGeometryViewDecode out[i] = static_cast(tmp[i]); return true; } - template requires hlsl::concepts::Vector static inline bool decodePrepared(const Prepared& prepared, const size_t ix, V& out) { @@ -111,7 +105,6 @@ class SGeometryViewDecode out[i] = tmp[i]; return true; } - template static inline bool decodePrepared(const Prepared& prepared, const size_t ix, T* out, const uint32_t outDim) { diff --git a/src/nbl/asset/interchange/impl/SBinaryData.h b/src/nbl/asset/interchange/impl/SBinaryData.h index 9b44cf5297..29e19b5c87 100644 --- a/src/nbl/asset/interchange/impl/SBinaryData.h +++ b/src/nbl/asset/interchange/impl/SBinaryData.h @@ -17,7 +17,6 @@ struct BinaryData std::reverse_copy(it, it + sizeof(retval), reinterpret_cast(&retval)); return retval; } - template static inline T loadUnaligned(const void* src, const bool swapEndian = false) { @@ -27,13 +26,11 @@ struct BinaryData std::memcpy(&value, src, sizeof(value)); return swapEndian ? byteswap(value) : value; } - template static inline void storeUnaligned(void* dst, const T& value) { std::memcpy(dst, &value, sizeof(value)); } - template static inline void storeUnalignedAdvance(uint8_t*& dst, const T& value) { diff --git a/src/nbl/asset/interchange/impl/SFileAccess.h b/src/nbl/asset/interchange/impl/SFileAccess.h index 671b5371c8..274ceb90d0 100644 --- a/src/nbl/asset/interchange/impl/SFileAccess.h +++ b/src/nbl/asset/interchange/impl/SFileAccess.h @@ -13,7 +13,6 @@ class SFileAccess public: static inline bool isMappable(const system::IFile* file) { return file && core::bitflag(file->getFlags()).hasAnyFlag(system::IFile::ECF_MAPPABLE); } static inline SResolvedFileIOPolicy resolvePlan(const SFileIOPolicy& ioPolicy, const uint64_t payloadBytes, const bool sizeKnown, const system::IFile* file) { return SResolvedFileIOPolicy(ioPolicy, payloadBytes, sizeKnown, isMappable(file)); } - template static inline bool logInvalidPlan(Logger& logger, const char* const owner, const char* const fileName, const SResolvedFileIOPolicy& ioPlan) { @@ -22,7 +21,6 @@ class SFileAccess logger.log("%s: invalid io policy for %s reason=%s", system::ILogger::ELL_ERROR, owner, fileName, ioPlan.reason); return true; } - template static inline void logTinyIO(Logger& logger, const char* const owner, const char* const fileName, const SInterchangeIO::STelemetry& telemetry, const uint64_t payloadBytes, const SFileIOPolicy& ioPolicy, const char* const opName) { @@ -34,7 +32,6 @@ class SFileAccess static_cast(telemetry.getMinOrZero()), static_cast(telemetry.getAvgOrZero())); } - static inline const uint8_t* readRange(system::IFile* file, const size_t offset, const size_t bytes, core::vector& storage, const SResolvedFileIOPolicy& ioPlan, SFileReadTelemetry* ioTelemetry = nullptr, const bool zeroTerminate = false) { storage.resize(bytes + (zeroTerminate ? 1ull : 0ull), 0u); @@ -44,7 +41,6 @@ class SFileAccess storage[bytes] = 0u; return storage.data(); } - static inline const uint8_t* mapOrReadWholeFile(system::IFile* file, const size_t bytes, core::vector& storage, const SResolvedFileIOPolicy& ioPlan, SFileReadTelemetry* ioTelemetry = nullptr, bool* wasMapped = nullptr, const bool zeroTerminate = false) { if (wasMapped) @@ -64,7 +60,6 @@ class SFileAccess return readRange(file, 0ull, bytes, storage, ioPlan, ioTelemetry, zeroTerminate); } }; - class SLoadSession { public: @@ -74,7 +69,6 @@ class SLoadSession uint64_t payloadBytes = 0ull; const char* owner = nullptr; std::string fileName = {}; - template static inline bool begin(Logger& logger, const char* const owner, system::IFile* file, const SFileIOPolicy& ioPolicy, const uint64_t payloadBytes, const bool sizeKnown, SLoadSession& out) { @@ -89,12 +83,10 @@ class SLoadSession out.fileName = file->getFileName().string(); return !SFileAccess::logInvalidPlan(logger, owner, out.fileName.c_str(), out.ioPlan); } - inline bool isWholeFile() const { return ioPlan.strategy == SResolvedFileIOPolicy::Strategy::WholeFile; } inline const uint8_t* mappedPointer() const { return file && isWholeFile() ? reinterpret_cast(static_cast(file)->getMappedPointer()) : nullptr; } inline const uint8_t* readRange(const size_t offset, const size_t bytes, core::vector& storage, SFileReadTelemetry* const ioTelemetry = nullptr, const bool zeroTerminate = false) const { return SFileAccess::readRange(file, offset, bytes, storage, ioPlan, ioTelemetry, zeroTerminate); } inline const uint8_t* mapOrReadWholeFile(core::vector& storage, SFileReadTelemetry* const ioTelemetry = nullptr, bool* const wasMapped = nullptr, const bool zeroTerminate = false) const { return SFileAccess::mapOrReadWholeFile(file, static_cast(payloadBytes), storage, ioPlan, ioTelemetry, wasMapped, zeroTerminate); } - template inline void logTinyIO(Logger& logger, const Telemetry& telemetry, const char* const opName = "reads") const { diff --git a/src/nbl/asset/interchange/impl/STextParse.h b/src/nbl/asset/interchange/impl/STextParse.h index a560a04219..88bc46696c 100644 --- a/src/nbl/asset/interchange/impl/STextParse.h +++ b/src/nbl/asset/interchange/impl/STextParse.h @@ -34,36 +34,29 @@ struct TextParse return true; } } - template static inline bool parseExactNumber(const char* const begin, const char* const end, T& out) { auto ptr = begin; return parseNumber(ptr, end, out) && ptr == end; } - template static inline bool parseExactNumber(const std::string_view token, T& out) { return parseExactNumber(token.data(), token.data() + token.size(), out); } - template static inline bool parseNonZeroNumber(const char*& ptr, const char* const end, T& out) { return parseNumber(ptr, end, out) && out != static_cast(0); } - static inline bool isInlineWhitespace(const char c) { return c == ' ' || c == '\t' || c == '\v' || c == '\f'; } - static inline void skipInlineWhitespace(const char*& ptr, const char* const end) { while (ptr < end && isInlineWhitespace(*ptr)) ++ptr; } - static inline void skipWhitespace(const char*& ptr, const char* const end) { while (ptr < end && core::isspace(*ptr)) ++ptr; } - static inline std::string_view trimWhitespace(std::string_view token) { while (!token.empty() && core::isspace(token.front())) @@ -72,17 +65,14 @@ struct TextParse token.remove_suffix(1ull); return token; } - static inline std::optional readToken(const char*& cursor, const char* const end) { skipWhitespace(cursor, end); if (cursor >= end) return std::nullopt; - const auto* tokenEnd = cursor; while (tokenEnd < end && !core::isspace(*tokenEnd)) ++tokenEnd; - const std::string_view token(cursor, static_cast(tokenEnd - cursor)); cursor = tokenEnd; return token; From b1fe5fc885ec7e2285df1f3d532bbd7f8e03261c Mon Sep 17 00:00:00 2001 From: Arkadiusz Lachowicz Date: Sun, 8 Mar 2026 16:26:14 +0100 Subject: [PATCH 082/118] Tighten helper declarations --- .../asset/interchange/SLoaderRuntimeTuning.h | 26 +++++-------------- src/nbl/asset/interchange/impl/SBinaryData.h | 3 +-- src/nbl/asset/interchange/impl/SFileAccess.h | 3 +-- src/nbl/asset/interchange/impl/STextParse.h | 3 +-- 4 files changed, 10 insertions(+), 25 deletions(-) diff --git a/include/nbl/asset/interchange/SLoaderRuntimeTuning.h b/include/nbl/asset/interchange/SLoaderRuntimeTuning.h index 67eddef32f..6d51a60e9f 100644 --- a/include/nbl/asset/interchange/SLoaderRuntimeTuning.h +++ b/include/nbl/asset/interchange/SLoaderRuntimeTuning.h @@ -17,36 +17,24 @@ namespace nbl::asset { struct SLoaderRuntimeTuningRequest { - uint64_t inputBytes = 0ull; - uint64_t totalWorkUnits = 0ull; - uint64_t minWorkUnitsPerWorker = 1ull; - uint64_t minBytesPerWorker = 1ull; - uint32_t hardwareThreads = 0u; - uint32_t hardMaxWorkers = 0u; - uint32_t targetChunksPerWorker = 0u; - uint64_t minChunkWorkUnits = 1ull; - uint64_t maxChunkWorkUnits = std::numeric_limits::max(); + uint64_t inputBytes = 0ull, totalWorkUnits = 0ull, minWorkUnitsPerWorker = 1ull, minBytesPerWorker = 1ull; + uint32_t hardwareThreads = 0u, hardMaxWorkers = 0u, targetChunksPerWorker = 0u; + uint64_t minChunkWorkUnits = 1ull, maxChunkWorkUnits = std::numeric_limits::max(); const uint8_t* sampleData = nullptr; - uint64_t sampleBytes = 0ull; - uint32_t samplePasses = 0u; - uint32_t sampleMaxCandidates = 0u; - uint64_t sampleMinWorkUnits = 0ull; + uint64_t sampleBytes = 0ull, sampleMinWorkUnits = 0ull; + uint32_t samplePasses = 0u, sampleMaxCandidates = 0u; }; struct SLoaderRuntimeTuningResult { - size_t workerCount = 1ull; + size_t workerCount = 1ull, chunkCount = 1ull; uint64_t chunkWorkUnits = 1ull; - size_t chunkCount = 1ull; }; struct SLoaderRuntimeTuner { private: struct SBenchmarkSampleStats { - uint64_t medianNs = 0ull; - uint64_t minNs = 0ull; - uint64_t maxNs = 0ull; - uint64_t totalNs = 0ull; + uint64_t medianNs = 0ull, minNs = 0ull, maxNs = 0ull, totalNs = 0ull; }; public: template diff --git a/src/nbl/asset/interchange/impl/SBinaryData.h b/src/nbl/asset/interchange/impl/SBinaryData.h index 29e19b5c87..13e64a84df 100644 --- a/src/nbl/asset/interchange/impl/SBinaryData.h +++ b/src/nbl/asset/interchange/impl/SBinaryData.h @@ -1,5 +1,4 @@ -// Internal src-only header. -// Do not include from public headers. +// Internal src-only header. Do not include from public headers. #ifndef _NBL_ASSET_IMPL_S_BINARY_DATA_H_INCLUDED_ #define _NBL_ASSET_IMPL_S_BINARY_DATA_H_INCLUDED_ #include diff --git a/src/nbl/asset/interchange/impl/SFileAccess.h b/src/nbl/asset/interchange/impl/SFileAccess.h index 274ceb90d0..2dfa24f65b 100644 --- a/src/nbl/asset/interchange/impl/SFileAccess.h +++ b/src/nbl/asset/interchange/impl/SFileAccess.h @@ -1,5 +1,4 @@ -// Internal src-only header. -// Do not include from public headers. +// Internal src-only header. Do not include from public headers. #ifndef _NBL_ASSET_IMPL_S_FILE_ACCESS_H_INCLUDED_ #define _NBL_ASSET_IMPL_S_FILE_ACCESS_H_INCLUDED_ #include "nbl/core/declarations.h" diff --git a/src/nbl/asset/interchange/impl/STextParse.h b/src/nbl/asset/interchange/impl/STextParse.h index 88bc46696c..21e5ee4e30 100644 --- a/src/nbl/asset/interchange/impl/STextParse.h +++ b/src/nbl/asset/interchange/impl/STextParse.h @@ -1,5 +1,4 @@ -// Internal src-only header. -// Do not include from public headers. +// Internal src-only header. Do not include from public headers. #ifndef _NBL_ASSET_IMPL_S_TEXT_PARSE_H_INCLUDED_ #define _NBL_ASSET_IMPL_S_TEXT_PARSE_H_INCLUDED_ #include "nbl/core/string/stringutil.h" From 503a9511ae81d09da41c8dd8a96fe21673e21b0a Mon Sep 17 00:00:00 2001 From: Arkadiusz Lachowicz Date: Sun, 8 Mar 2026 17:03:58 +0100 Subject: [PATCH 083/118] Restore comments and trim boilerplate --- include/nbl/asset/interchange/SFileIOPolicy.h | 40 +-- .../asset/interchange/SGeometryWriterCommon.h | 25 +- .../nbl/asset/interchange/SInterchangeIO.h | 35 +-- .../asset/interchange/SLoaderRuntimeTuning.h | 26 +- .../asset/interchange/COBJMeshFileLoader.cpp | 124 +++------ src/nbl/asset/interchange/COBJMeshWriter.cpp | 64 +---- .../asset/interchange/CPLYMeshFileLoader.cpp | 252 ++++++++---------- src/nbl/asset/interchange/CPLYMeshWriter.cpp | 142 ++-------- .../asset/interchange/CSTLMeshFileLoader.cpp | 245 ++++------------- src/nbl/asset/interchange/CSTLMeshWriter.cpp | 57 +--- .../asset/interchange/SGeometryViewDecode.h | 21 +- src/nbl/asset/interchange/impl/SBinaryData.h | 19 +- src/nbl/asset/interchange/impl/SFileAccess.h | 13 +- src/nbl/asset/interchange/impl/STextParse.h | 22 +- 14 files changed, 288 insertions(+), 797 deletions(-) diff --git a/include/nbl/asset/interchange/SFileIOPolicy.h b/include/nbl/asset/interchange/SFileIOPolicy.h index 0e59986c0b..25d6fb2b0e 100644 --- a/include/nbl/asset/interchange/SFileIOPolicy.h +++ b/include/nbl/asset/interchange/SFileIOPolicy.h @@ -23,13 +23,7 @@ struct SFileIOPolicy { struct SRuntimeTuning { - enum class Mode : uint8_t - { - Sequential, - None = Sequential, - Heuristic, - Hybrid - }; + enum class Mode : uint8_t { Sequential, None = Sequential, Heuristic, Hybrid }; Mode mode = Mode::Heuristic; float maxOverheadRatio = 0.05f; float samplingBudgetRatio = 0.05f; @@ -52,11 +46,7 @@ struct SFileIOPolicy using Strategy = EFileIOStrategy; - enum E_FLAGS : uint8_t - { - EF_NONE = 0u, - EF_STRICT_BIT = 1u << 0u - }; + enum E_FLAGS : uint8_t { EF_NONE = 0u, EF_STRICT_BIT = 1u << 0u }; static inline constexpr uint64_t MIN_CHUNK_SIZE_BYTES = 64ull << 10u; static inline constexpr uint8_t MIN_CHUNK_SIZE_LOG2 = static_cast(std::bit_width(MIN_CHUNK_SIZE_BYTES) - 1u); @@ -105,14 +95,10 @@ struct SResolvedFileIOPolicy static inline constexpr SResolvedFileIOPolicy resolve(const SFileIOPolicy& policy, const uint64_t byteCount, const bool sizeKnown = true, const bool fileMappable = false) { const uint8_t maxStagingLog2 = SFileIOPolicy::clampBytesLog2(policy.maxStagingLog2, SFileIOPolicy::MIN_CHUNK_SIZE_LOG2); - const uint8_t chunkSizeLog2 = std::min( - SFileIOPolicy::clampBytesLog2(policy.chunkSizeLog2, SFileIOPolicy::MIN_CHUNK_SIZE_LOG2), - maxStagingLog2); + const uint8_t chunkSizeLog2 = std::min(SFileIOPolicy::clampBytesLog2(policy.chunkSizeLog2, SFileIOPolicy::MIN_CHUNK_SIZE_LOG2), maxStagingLog2); const uint64_t maxStaging = SFileIOPolicy::bytesFromLog2(maxStagingLog2, SFileIOPolicy::MIN_CHUNK_SIZE_LOG2); const uint64_t wholeThreshold = policy.wholeFileThresholdBytes(); - auto makeResolved = [&](const Strategy strategy, const char* const reason) -> SResolvedFileIOPolicy { SResolvedFileIOPolicy resolved = {}; resolved.strategy = strategy; resolved.chunkSizeLog2 = chunkSizeLog2; resolved.reason = reason; return resolved; }; - switch (policy.strategy) { case SFileIOPolicy::Strategy::Invalid: @@ -132,10 +118,7 @@ struct SResolvedFileIOPolicy { if (!sizeKnown) return makeResolved(fileMappable ? Strategy::WholeFile : Strategy::Chunked, fileMappable ? "auto_unknown_size_mappable_whole_file" : "auto_unknown_size"); - - const uint64_t wholeLimit = fileMappable ? - std::max(wholeThreshold, maxStaging) : - std::min(wholeThreshold, maxStaging); + const uint64_t wholeLimit = fileMappable ? std::max(wholeThreshold, maxStaging) : std::min(wholeThreshold, maxStaging); if (byteCount <= wholeLimit) return makeResolved(Strategy::WholeFile, fileMappable ? "auto_mappable_prefers_whole_file" : "auto_small_enough_for_whole_file"); return makeResolved(Strategy::Chunked, "auto_too_large_for_whole_file"); @@ -153,16 +136,11 @@ struct to_string_helper { switch (value) { - case asset::EFileIOStrategy::Invalid: - return "invalid"; - case asset::EFileIOStrategy::Auto: - return "auto"; - case asset::EFileIOStrategy::WholeFile: - return "whole"; - case asset::EFileIOStrategy::Chunked: - return "chunked"; - default: - return "unknown"; + case asset::EFileIOStrategy::Invalid: return "invalid"; + case asset::EFileIOStrategy::Auto: return "auto"; + case asset::EFileIOStrategy::WholeFile: return "whole"; + case asset::EFileIOStrategy::Chunked: return "chunked"; + default: return "unknown"; } } }; diff --git a/include/nbl/asset/interchange/SGeometryWriterCommon.h b/include/nbl/asset/interchange/SGeometryWriterCommon.h index 55422aa02c..9ba55cc6aa 100644 --- a/include/nbl/asset/interchange/SGeometryWriterCommon.h +++ b/include/nbl/asset/interchange/SGeometryWriterCommon.h @@ -41,9 +41,7 @@ class SGeometryWriterCommon SPolygonGeometryWriteItem item = {}; item.geometry = static_cast(ref.geometry.get()); item.transform = hlsl::math::linalg::promoted_mul(transform, ref.hasTransform() ? ref.transform : identity); - item.instanceIx = instanceIx; - item.targetIx = targetIx; - item.geometryIx = geometryIx; + item.instanceIx = instanceIx; item.targetIx = targetIx; item.geometryIx = geometryIx; out.emplace_back(item); } }; @@ -51,11 +49,7 @@ class SGeometryWriterCommon { const auto* geometry = static_cast*>(rootAsset); if (geometry->getPrimitiveType() == IGeometryBase::EPrimitiveType::Polygon) - { - SPolygonGeometryWriteItem item = {}; - item.geometry = static_cast(rootAsset); - out.emplace_back(item); - } + out.emplace_back(SPolygonGeometryWriteItem{.geometry = static_cast(rootAsset)}); return out; } if (rootAsset->getAssetType() == IAsset::ET_GEOMETRY_COLLECTION) @@ -111,13 +105,11 @@ class SGeometryWriterCommon const size_t indexCount = indexView.getElementCount(); if ((indexCount % 3ull) != 0ull) return false; - outFaceCount = indexCount / 3ull; - return true; + return (outFaceCount = indexCount / 3ull), true; } if ((vertexCount % 3ull) != 0ull) return false; - outFaceCount = vertexCount / 3ull; - return true; + return (outFaceCount = vertexCount / 3ull), true; } template static inline bool visitTriangleIndices(const ICPUPolygonGeometry* geom, Visitor&& visitor) @@ -137,11 +129,7 @@ class SGeometryWriterCommon return false; if constexpr (std::is_same_v, bool>) return visitor(u0, u1, u2); - else - { - visitor(u0, u1, u2); - return true; - } + else { visitor(u0, u1, u2); return true; } }; const auto& indexView = geom->getIndexView(); if (!indexView) @@ -171,8 +159,7 @@ class SGeometryWriterCommon { case EIT_32BIT: return visitIndexed.template operator()(); case EIT_16BIT: return visitIndexed.template operator()(); - default: - return false; + default: return false; } } template diff --git a/include/nbl/asset/interchange/SInterchangeIO.h b/include/nbl/asset/interchange/SInterchangeIO.h index 0ae55deedb..c853be23fd 100644 --- a/include/nbl/asset/interchange/SInterchangeIO.h +++ b/include/nbl/asset/interchange/SInterchangeIO.h @@ -17,24 +17,10 @@ namespace nbl::asset class SInterchangeIO { public: - struct STelemetry - { - uint64_t callCount = 0ull; - uint64_t totalBytes = 0ull; - uint64_t minBytes = std::numeric_limits::max(); - inline void account(const uint64_t bytes) { ++callCount; totalBytes += bytes; if (bytes < minBytes) minBytes = bytes; } - inline uint64_t getMinOrZero() const { return callCount ? minBytes : 0ull; } - inline uint64_t getAvgOrZero() const { return callCount ? (totalBytes / callCount) : 0ull; } - }; + struct STelemetry { uint64_t callCount = 0ull, totalBytes = 0ull, minBytes = std::numeric_limits::max(); inline void account(const uint64_t bytes) { ++callCount; totalBytes += bytes; if (bytes < minBytes) minBytes = bytes; } inline uint64_t getMinOrZero() const { return callCount ? minBytes : 0ull; } inline uint64_t getAvgOrZero() const { return callCount ? (totalBytes / callCount) : 0ull; } }; using SReadTelemetry = STelemetry; using SWriteTelemetry = STelemetry; - static inline bool isTinyIOTelemetryLikely( - const STelemetry& telemetry, - const uint64_t payloadBytes, - const uint64_t bigPayloadThresholdBytes = (1ull << 20), - const uint64_t lowAvgBytesThreshold = 1024ull, - const uint64_t tinyChunkBytesThreshold = 64ull, - const uint64_t tinyChunkCallsThreshold = 1024ull) + static inline bool isTinyIOTelemetryLikely(const STelemetry& telemetry, const uint64_t payloadBytes, const uint64_t bigPayloadThresholdBytes = (1ull << 20), const uint64_t lowAvgBytesThreshold = 1024ull, const uint64_t tinyChunkBytesThreshold = 64ull, const uint64_t tinyChunkCallsThreshold = 1024ull) { if (payloadBytes <= bigPayloadThresholdBytes) return false; @@ -61,12 +47,7 @@ class SInterchangeIO { using clock_t = std::chrono::high_resolution_clock; const auto ioStart = ioTime ? clock_t::now() : clock_t::time_point{}; - auto finalize = [&](const bool ok) -> bool - { - if (ioTime) - *ioTime = std::chrono::duration_cast(clock_t::now() - ioStart); - return ok; - }; + auto finalize = [&](const bool ok) -> bool { if (ioTime) *ioTime = std::chrono::duration_cast(clock_t::now() - ioStart); return ok; }; if (!file || (!dst && bytes != 0ull)) return finalize(false); if (bytes == 0ull) @@ -87,10 +68,10 @@ class SInterchangeIO system::IFile::success_t success; file->read(success, out + bytesRead, offset + bytesRead, toRead); if (!success) - return false; + return finalize(false); const size_t processed = success.getBytesProcessed(); if (processed == 0ull) - return false; + return finalize(false); if (ioTelemetry) ioTelemetry->account(processed); bytesRead += processed; @@ -99,11 +80,7 @@ class SInterchangeIO } } } - struct SBufferRange - { - const void* data = nullptr; - size_t byteCount = 0ull; - }; + struct SBufferRange { const void* data = nullptr; size_t byteCount = 0ull; }; static inline bool writeBuffersWithPolicyAtOffset(system::IFile* file, const SResolvedFileIOPolicy& ioPlan, const std::span buffers, size_t& fileOffset, SWriteTelemetry* ioTelemetry = nullptr) { if (!file) diff --git a/include/nbl/asset/interchange/SLoaderRuntimeTuning.h b/include/nbl/asset/interchange/SLoaderRuntimeTuning.h index 6d51a60e9f..74a6f668ce 100644 --- a/include/nbl/asset/interchange/SLoaderRuntimeTuning.h +++ b/include/nbl/asset/interchange/SLoaderRuntimeTuning.h @@ -32,20 +32,14 @@ struct SLoaderRuntimeTuningResult struct SLoaderRuntimeTuner { private: - struct SBenchmarkSampleStats - { - uint64_t medianNs = 0ull, minNs = 0ull, maxNs = 0ull, totalNs = 0ull; - }; + struct SBenchmarkSampleStats { uint64_t medianNs = 0ull, minNs = 0ull, maxNs = 0ull, totalNs = 0ull; }; public: template requires std::invocable static void dispatchWorkers(const size_t workerCount, Fn&& fn) { if (workerCount <= 1ull) - { - fn(0ull); - return; - } + return fn(0ull); std::vector workers; workers.reserve(workerCount - 1ull); for (size_t workerIx = 1ull; workerIx < workerCount; ++workerIx) @@ -141,11 +135,7 @@ struct SLoaderRuntimeTuner using RTMode = SFileIOPolicy::SRuntimeTuning::Mode; SLoaderRuntimeTuningResult result = {}; if (request.totalWorkUnits == 0ull) - { - result.chunkWorkUnits = 0ull; - result.chunkCount = 0ull; - return result; - } + return (result.chunkWorkUnits = 0ull), (result.chunkCount = 0ull), result; const size_t hw = SLoaderRuntimeTuner::resolveHardwareThreads(request.hardwareThreads); size_t maxWorkers = hw; if (request.hardMaxWorkers > 0u) @@ -225,10 +215,7 @@ struct SLoaderRuntimeTuner const uint64_t spareBudgetNs = samplingBudgetNs - spentNs; const uint64_t estimatedEvalNs = std::max(1ull, heuristicStatsProbe.medianNs); const uint64_t estimatedEvaluations = std::max(1ull, spareBudgetNs / estimatedEvalNs); - uint32_t observations = static_cast(std::clamp( - estimatedEvaluations / static_cast(alternativeCandidates), - 1ull, - 3ull)); + const uint32_t observations = static_cast(std::clamp(estimatedEvaluations / static_cast(alternativeCandidates), 1ull, 3ull)); SBenchmarkSampleStats bestStats = heuristicStatsProbe; size_t bestWorker = heuristicWorkerCount; for (const size_t candidate : candidates) @@ -243,10 +230,7 @@ struct SLoaderRuntimeTuner continue; spentNs += candidateStats.totalNs; if (candidateStats.medianNs < bestStats.medianNs) - { - bestStats = candidateStats; - bestWorker = candidate; - } + bestStats = candidateStats, bestWorker = candidate; } if (bestWorker != heuristicWorkerCount) { diff --git a/src/nbl/asset/interchange/COBJMeshFileLoader.cpp b/src/nbl/asset/interchange/COBJMeshFileLoader.cpp index 46222d0f05..b1df506062 100644 --- a/src/nbl/asset/interchange/COBJMeshFileLoader.cpp +++ b/src/nbl/asset/interchange/COBJMeshFileLoader.cpp @@ -29,14 +29,7 @@ struct Parse { static constexpr uint32_t UV0 = 0u; using Common = impl::TextParse; - struct VertexDedupNode - { - int32_t uv = -1; - int32_t normal = -1; - uint32_t smoothingGroup = 0u; - uint32_t outIndex = 0u; - int32_t next = -1; - }; + struct VertexDedupNode { int32_t uv = -1; int32_t normal = -1; uint32_t smoothingGroup = 0u; uint32_t outIndex = 0u; int32_t next = -1; }; static bool resolveIndex(const int32_t rawIndex, const size_t elementCount, int32_t& resolved) { if (rawIndex > 0) @@ -60,24 +53,15 @@ struct Parse { Common::skipInlineWhitespace(linePtr, lineEnd); if (linePtr >= lineEnd) - { - outGroup = 0u; - return; - } + return void(outGroup = 0u); const char* const tokenStart = linePtr; while (linePtr < lineEnd && !Common::isInlineWhitespace(*linePtr)) ++linePtr; const std::string_view token(tokenStart, static_cast(linePtr - tokenStart)); if (token.size() == 2u && std::tolower(token[0]) == 'o' && std::tolower(token[1]) == 'n') - { - outGroup = 1u; - return; - } + return void(outGroup = 1u); if (token.size() == 3u && std::tolower(token[0]) == 'o' && std::tolower(token[1]) == 'f' && std::tolower(token[2]) == 'f') - { - outGroup = 0u; - return; - } + return void(outGroup = 0u); uint32_t value = 0u; outGroup = Common::parseExactNumber(token, value) ? value : 0u; } @@ -94,50 +78,35 @@ struct Parse static bool parseTrianglePositiveTripletLine(const char* const lineStart, const char* const lineEnd, std::array& out, const size_t posCount, const size_t uvCount, const size_t normalCount) { const char* ptr = lineStart; + auto parsePositive = [&](const size_t count, int32_t& outIx) -> bool { + uint32_t value = 0u; + if (!Common::parseNonZeroNumber(ptr, lineEnd, value)) + return false; + if (value > static_cast(std::numeric_limits::max()) || value > count) + return false; + outIx = value - 1u; + return true; + }; for (uint32_t corner = 0u; corner < 3u; ++corner) { Common::skipInlineWhitespace(ptr, lineEnd); if (ptr >= lineEnd || !core::isdigit(*ptr)) return false; int32_t posIx = -1; - { - uint32_t value = 0u; - if (!Common::parseNonZeroNumber(ptr, lineEnd, value)) - return false; - if (value > static_cast(std::numeric_limits::max())) - return false; - if (value > posCount) - return false; - posIx = value - 1u; - } + if (!parsePositive(posCount, posIx)) + return false; if (ptr >= lineEnd || *ptr != '/') return false; ++ptr; int32_t uvIx = -1; - { - uint32_t value = 0u; - if (!Common::parseNonZeroNumber(ptr, lineEnd, value)) - return false; - if (value > static_cast(std::numeric_limits::max())) - return false; - if (value > uvCount) - return false; - uvIx = value - 1u; - } + if (!parsePositive(uvCount, uvIx)) + return false; if (ptr >= lineEnd || *ptr != '/') return false; ++ptr; int32_t normalIx = -1; - { - uint32_t value = 0u; - if (!Common::parseNonZeroNumber(ptr, lineEnd, value)) - return false; - if (value > static_cast(std::numeric_limits::max())) - return false; - if (value > normalCount) - return false; - normalIx = value - 1u; - } + if (!parsePositive(normalCount, normalIx)) + return false; out[corner] = hlsl::int32_t3(posIx, uvIx, normalIx); } Common::skipInlineWhitespace(ptr, lineEnd); @@ -150,43 +119,38 @@ struct Parse return false; idx = hlsl::int32_t3(-1, -1, -1); const char* ptr = linePtr; - if (*ptr != '-' && *ptr != '+') - { - uint32_t posRaw = 0u; - if (!Common::parseNonZeroNumber(ptr, lineEnd, posRaw)) + auto parsePositive = [&](const size_t count, int32_t& outIx) -> bool { + uint32_t raw = 0u; + if (!Common::parseNonZeroNumber(ptr, lineEnd, raw)) return false; - if (posRaw > static_cast(std::numeric_limits::max())) + if (raw > static_cast(std::numeric_limits::max()) || raw > count) return false; - if (posRaw > posCount) + outIx = raw - 1u; + return true; + }; + auto parseResolved = [&](const size_t count, int32_t& outIx) -> bool { + int32_t raw = 0; + return Common::parseNonZeroNumber(ptr, lineEnd, raw) && resolveIndex(raw, count, outIx); + }; + if (*ptr != '-' && *ptr != '+') + { + if (!parsePositive(posCount, idx.x)) return false; - idx.x = posRaw - 1u; if (ptr < lineEnd && *ptr == '/') { ++ptr; if (ptr < lineEnd && *ptr != '/') { - uint32_t uvRaw = 0u; - if (!Common::parseNonZeroNumber(ptr, lineEnd, uvRaw)) - return false; - if (uvRaw > static_cast(std::numeric_limits::max())) + if (!parsePositive(uvCount, idx.y)) return false; - if (uvRaw > uvCount) - return false; - idx.y = uvRaw - 1u; } if (ptr < lineEnd && *ptr == '/') { ++ptr; if (ptr < lineEnd && !Common::isInlineWhitespace(*ptr)) { - uint32_t normalRaw = 0u; - if (!Common::parseNonZeroNumber(ptr, lineEnd, normalRaw)) - return false; - if (normalRaw > static_cast(std::numeric_limits::max())) - return false; - if (normalRaw > normalCount) + if (!parsePositive(normalCount, idx.z)) return false; - idx.z = normalRaw - 1u; } } else if (ptr < lineEnd && !Common::isInlineWhitespace(*ptr)) @@ -197,19 +161,14 @@ struct Parse } else { - int32_t raw = 0; - if (!Common::parseNonZeroNumber(ptr, lineEnd, raw)) - return false; - if (!resolveIndex(raw, posCount, idx.x)) + if (!parseResolved(posCount, idx.x)) return false; if (ptr < lineEnd && *ptr == '/') { ++ptr; if (ptr < lineEnd && *ptr != '/') { - if (!Common::parseNonZeroNumber(ptr, lineEnd, raw)) - return false; - if (!resolveIndex(raw, uvCount, idx.y)) + if (!parseResolved(uvCount, idx.y)) return false; } if (ptr < lineEnd && *ptr == '/') @@ -217,9 +176,7 @@ struct Parse ++ptr; if (ptr < lineEnd && !Common::isInlineWhitespace(*ptr)) { - if (!Common::parseNonZeroNumber(ptr, lineEnd, raw)) - return false; - if (!resolveIndex(raw, normalCount, idx.z)) + if (!parseResolved(normalCount, idx.z)) return false; } } @@ -840,9 +797,14 @@ asset::SAssetBundle COBJMeshFileLoader::loadAsset( core::vector> outputAssets; uint64_t objectCount = 1ull; if (!buildCollections) { + // Plain OBJ is still just one polygon geometry here. outputAssets.push_back(core::smart_refctd_ptr_static_cast( std::move(loadedGeometries.front().geometry))); } else { + // Plain OBJ can group many polygon geometries with `o` and `g`, but it + // still does not define a real scene graph, instancing, or node transforms. + // Keep that as geometry collections instead of fabricating an ICPUScene on + // load. core::vector objectNames; core::vector> objectCollections; diff --git a/src/nbl/asset/interchange/COBJMeshWriter.cpp b/src/nbl/asset/interchange/COBJMeshWriter.cpp index 05bc4f5fad..4b9ed20cd6 100644 --- a/src/nbl/asset/interchange/COBJMeshWriter.cpp +++ b/src/nbl/asset/interchange/COBJMeshWriter.cpp @@ -30,52 +30,33 @@ COBJMeshWriter::COBJMeshWriter() setDebugName("COBJMeshWriter"); #endif } - uint64_t COBJMeshWriter::getSupportedAssetTypesBitfield() const { return IAsset::ET_GEOMETRY | IAsset::ET_GEOMETRY_COLLECTION | IAsset::ET_SCENE; } - const char** COBJMeshWriter::getAssociatedFileExtensions() const { static const char* ext[] = { "obj", nullptr }; return ext; } - writer_flags_t COBJMeshWriter::getSupportedFlags() { return EWF_NONE; } - writer_flags_t COBJMeshWriter::getForcedFlags() { return EWF_NONE; } - namespace { - struct Parse { static constexpr uint32_t UV0 = 0u; static constexpr size_t MaxFloatTextChars = std::numeric_limits::max_digits10 + 8ull; static constexpr size_t MaxUInt32Chars = std::numeric_limits::digits10 + 1ull; static constexpr size_t MaxIndexTokenBytes = MaxUInt32Chars * 3ull + 2ull; - - struct IndexStringRef - { - uint32_t offset = 0u; - uint16_t length = 0u; - }; - - struct GeometryTransformState - { - hlsl::float32_t3x4 transform; - hlsl::float32_t3x3 linear; - bool identity = true; - bool reverseWinding = false; - hlsl::math::linalg::cofactors_base normalTransform; - }; + struct IndexStringRef { uint32_t offset = 0u; uint16_t length = 0u; }; + struct GeometryTransformState { hlsl::float32_t3x4 transform; hlsl::float32_t3x3 linear; bool identity = true; bool reverseWinding = false; hlsl::math::linalg::cofactors_base normalTransform; }; template static void appendVecLine(std::string& out, const char* prefix, const size_t prefixSize, const Vec& values) @@ -87,17 +68,14 @@ struct Parse char* cursor = lineBegin; char* const lineEnd = out.data() + out.size(); hlsl::array_get getter; - std::memcpy(cursor, prefix, prefixSize); cursor += prefixSize; - for (size_t i = 0ull; i < N; ++i) { cursor = SGeometryWriterCommon::appendFloatToBuffer(cursor, lineEnd, getter(values, static_cast(i))); if (cursor < lineEnd) *(cursor++) = (i + 1ull < N) ? ' ' : '\n'; } - out.resize(oldSize + static_cast(cursor - lineBegin)); } @@ -166,41 +144,24 @@ struct Parse const auto linear = hlsl::float32_t3x3(transform); return {.transform = transform, .linear = linear, .identity = SGeometryWriterCommon::isIdentityTransform(transform), .reverseWinding = hlsl::determinant(linear) < 0.f, .normalTransform = hlsl::math::linalg::cofactors_base::create(linear)}; } - - static hlsl::float32_t3 applyPosition(const GeometryTransformState& state, const hlsl::float32_t3& value) - { - if (state.identity) - return value; - return hlsl::mul(state.transform, hlsl::float32_t4(value.x, value.y, value.z, 1.f)); - } - - static hlsl::float32_t3 applyNormal(const GeometryTransformState& state, const hlsl::float32_t3& value) - { - return state.identity ? value : state.normalTransform.normalTransform(value); - } + static hlsl::float32_t3 applyPosition(const GeometryTransformState& state, const hlsl::float32_t3& value) { return state.identity ? value : hlsl::mul(state.transform, hlsl::float32_t4(value.x, value.y, value.z, 1.f)); } + static hlsl::float32_t3 applyNormal(const GeometryTransformState& state, const hlsl::float32_t3& value) { return state.identity ? value : state.normalTransform.normalTransform(value); } }; - } - bool COBJMeshWriter::writeAsset(system::IFile* _file, const SAssetWriteParams& _params, IAssetWriterOverride* _override) { SFileWriteTelemetry ioTelemetry = {}; - if (!_override) getDefaultOverride(_override); - if (!_file || !_params.rootAsset) return false; - const auto items = SGeometryWriterCommon::collectPolygonGeometryWriteItems(_params.rootAsset); if (items.empty()) return false; - SAssetWriteContext ctx = {_params, _file}; system::IFile* file = _override->getOutputFile(_file, ctx, {_params.rootAsset, 0u}); if (!file) return false; - std::string output; output.append("# Nabla OBJ\n"); uint64_t totalVertexCount = 0ull; @@ -215,11 +176,9 @@ bool COBJMeshWriter::writeAsset(system::IFile* _file, const SAssetWriteParams& _ const auto* geom = item.geometry; if (!geom || !geom->valid()) return false; - const auto& positionView = geom->getPositionView(); if (!positionView) return false; - const auto& normalView = geom->getNormalView(); const bool hasNormals = static_cast(normalView); const size_t vertexCount = positionView.getElementCount(); @@ -233,19 +192,17 @@ bool COBJMeshWriter::writeAsset(system::IFile* _file, const SAssetWriteParams& _ return false; if (hasUVs && uvView->getElementCount() != vertexCount) return false; - const auto* indexing = geom->getIndexingCallback(); if (!indexing) return false; if (indexing->knownTopology() != E_PRIMITIVE_TOPOLOGY::EPT_TRIANGLE_LIST) return false; - size_t faceCount = 0ull; if (!SGeometryWriterCommon::getTriangleFaceCount(geom, faceCount)) return false; - const auto flags = _override->getAssetWritingFlags(ctx, geom, 0u); const bool flipHandedness = !flags.hasAnyFlag(E_WRITER_FLAGS::EWF_MESH_IS_RIGHT_HANDED); + // Scene input is flattened here by baking transforms and writing every collected polygon geometry as its own OBJ object block. const auto transformState = Parse::createTransformState(item.transform); const hlsl::float32_t3* const tightPositions = SGeometryWriterCommon::getTightView(positionView); const hlsl::float32_t3* const tightNormals = hasNormals ? SGeometryWriterCommon::getTightView(normalView) : nullptr; @@ -253,11 +210,9 @@ bool COBJMeshWriter::writeAsset(system::IFile* _file, const SAssetWriteParams& _ const SemanticDecode positionDecode = tightPositions ? SemanticDecode{} : SGeometryViewDecode::prepare(positionView); const SemanticDecode uvDecode = (!hasUVs || tightUV) ? SemanticDecode{} : SGeometryViewDecode::prepare(*uvView); const SemanticDecode normalDecode = (!hasNormals || tightNormals) ? SemanticDecode{} : SGeometryViewDecode::prepare(normalView); - if (itemIx != 0u) output.push_back('\n'); Parse::appendHeader(output, item); - for (size_t i = 0u; i < vertexCount; ++i) { hlsl::float32_t3 vertex = {}; @@ -270,7 +225,6 @@ bool COBJMeshWriter::writeAsset(system::IFile* _file, const SAssetWriteParams& _ vertex.x = -vertex.x; Parse::appendVecLine(output, "v ", sizeof("v ") - 1ull, vertex); } - if (hasUVs) { for (size_t i = 0u; i < vertexCount; ++i) @@ -285,7 +239,6 @@ bool COBJMeshWriter::writeAsset(system::IFile* _file, const SAssetWriteParams& _ Parse::appendVecLine(output, "vt ", sizeof("vt ") - 1ull, uv); } } - if (hasNormals) { for (size_t i = 0u; i < vertexCount; ++i) @@ -301,7 +254,6 @@ bool COBJMeshWriter::writeAsset(system::IFile* _file, const SAssetWriteParams& _ Parse::appendVecLine(output, "vn ", sizeof("vn ") - 1ull, normal); } } - core::vector faceIndexRefs; faceIndexRefs.reserve(vertexCount); std::string faceIndexStorage; @@ -314,7 +266,6 @@ bool COBJMeshWriter::writeAsset(system::IFile* _file, const SAssetWriteParams& _ Parse::appendIndexToken(faceIndexStorage, faceIndexRefs, positionIx, hasUVs, uvIx, hasNormals, normalIx); } const hlsl::uint32_t3 faceLimit(static_cast(faceIndexRefs.size())); - if (!SGeometryWriterCommon::visitTriangleIndices(geom, [&](const uint32_t i0, const uint32_t i1, const uint32_t i2) -> bool { const hlsl::uint32_t3 face(transformState.reverseWinding ? i0 : i2, i1, transformState.reverseWinding ? i2 : i0); if (hlsl::any(glm::greaterThanEqual(face, faceLimit))) @@ -332,11 +283,9 @@ bool COBJMeshWriter::writeAsset(system::IFile* _file, const SAssetWriteParams& _ totalVertexCount += vertexCount; totalFaceCount += faceCount; } - const auto ioPlan = impl::SFileAccess::resolvePlan(_params.ioPolicy, static_cast(output.size()), true, file); if (impl::SFileAccess::logInvalidPlan(_params.logger, "OBJ writer", file->getFileName().string().c_str(), ioPlan)) return false; - const bool writeOk = SInterchangeIO::writeFileWithPolicy(file, ioPlan, output.data(), output.size(), &ioTelemetry); const uint64_t ioMinWrite = ioTelemetry.getMinOrZero(); const uint64_t ioAvgWrite = ioTelemetry.getAvgOrZero(); @@ -346,10 +295,7 @@ bool COBJMeshWriter::writeAsset(system::IFile* _file, const SAssetWriteParams& _ static_cast(totalVertexCount), static_cast(totalFaceCount), static_cast(items.size()), static_cast(ioTelemetry.callCount), static_cast(ioMinWrite), static_cast(ioAvgWrite), system::to_string(_params.ioPolicy.strategy).c_str(), system::to_string(ioPlan.strategy).c_str(), static_cast(ioPlan.chunkSizeBytes()), ioPlan.reason); - return writeOk; } - } - #endif // _NBL_COMPILE_WITH_OBJ_WRITER_ diff --git a/src/nbl/asset/interchange/CPLYMeshFileLoader.cpp b/src/nbl/asset/interchange/CPLYMeshFileLoader.cpp index 4ebf2c9f95..68d178f004 100644 --- a/src/nbl/asset/interchange/CPLYMeshFileLoader.cpp +++ b/src/nbl/asset/interchange/CPLYMeshFileLoader.cpp @@ -161,10 +161,11 @@ struct Parse else _ctx.getNextLine(); } - std::string Name; - core::vector Properties; - size_t Count; - uint32_t KnownSize; + std::string Name; // name of the element. We only want "vertex" and "face" elements + // but we have to parse the others anyway. + core::vector Properties; // Properties of this element + size_t Count; // The number of elements in the file + uint32_t KnownSize; // known size in bytes, 0 if unknown }; static constexpr size_t DefaultIoReadWindowBytes = 50ull << 10; void init(size_t _ioReadWindowSize = DefaultIoReadWindowBytes) @@ -175,7 +176,7 @@ struct Parse LineEndPointer = EndPointer - 1; fillBuffer(); } - void fillBuffer() + void fillBuffer() // gets more data from the file { if (EndOfFile) return; @@ -186,8 +187,10 @@ struct Parse } const auto length = std::distance(StartPointer, EndPointer); auto newStart = Buffer.data(); + // copy the remaining data to the start of the buffer if (length && StartPointer != newStart) memmove(newStart, StartPointer, length); + // reset start position StartPointer = newStart; EndPointer = newStart + length; const size_t usableBufferSize = Buffer.size() > 0ull ? Buffer.size() - ReadWindowPaddingBytes : 0ull; @@ -198,6 +201,7 @@ struct Parse } const size_t requestSize = usableBufferSize - length; system::IFile::success_t success; + // read data from the file inner.mainFile->read(success, EndPointer, fileOffset, requestSize); const size_t bytesRead = success.getBytesProcessed(); ++readCallCount; @@ -206,21 +210,27 @@ struct Parse readMinBytes = bytesRead; fileOffset += bytesRead; EndPointer += bytesRead; + // if we didn't completely fill the buffer if (bytesRead != requestSize) { + // cauterize the string *EndPointer = 0; EndOfFile = true; } } - const char* getNextLine() + const char* getNextLine() // Split the string data into a line in place by terminating it instead of copying. { + // move the start pointer along StartPointer = LineEndPointer + 1; + // crlf split across buffer move if (*StartPointer == '\n') *(StartPointer++) = '\0'; + // begin at the start of the next line const std::array Terminators = {'\0', '\r', '\n'}; auto terminator = std::find_first_of(StartPointer, EndPointer, Terminators.begin(), Terminators.end()); if (terminator != EndPointer) *(terminator++) = '\0'; + // we have reached the end of the buffer if (terminator == EndPointer) { if (EndOfFile) @@ -229,16 +239,20 @@ struct Parse *StartPointer = '\0'; return StartPointer; } + // get data from the file fillBuffer(); + // reset line end pointer LineEndPointer = StartPointer - 1; return StartPointer != EndPointer ? getNextLine() : StartPointer; } LineEndPointer = terminator - 1; WordLength = -1; + // return pointer to the start of the line return StartPointer; } - const char* getNextWord() + const char* getNextWord() // null terminate the next word on the previous line and move the next word pointer along since we already have a full line in the buffer, we never need to retrieve more data { + // move the start pointer along StartPointer += WordLength + 1; if (StartPointer >= EndPointer) { @@ -257,12 +271,16 @@ struct Parse return StartPointer; } assert(LineEndPointer <= EndPointer); + // process the next word const std::array WhiteSpace = {'\0', ' ', '\t'}; auto wordEnd = std::find_first_of(StartPointer, LineEndPointer, WhiteSpace.begin(), WhiteSpace.end()); + // null terminate the next word if (wordEnd != LineEndPointer) *(wordEnd++) = '\0'; + // find next word auto nextWord = std::find_if(wordEnd, LineEndPointer, [WhiteSpace](const char c) -> bool { return std::find(WhiteSpace.begin(), WhiteSpace.end(), c) == WhiteSpace.end(); }); WordLength = std::distance(StartPointer, nextWord) - 1; + // return pointer to the start of current word return StartPointer; } size_t getAbsoluteOffset(const char* ptr) const @@ -283,7 +301,7 @@ struct Parse EndOfFile = true; fileOffset = inner.mainFile ? inner.mainFile->getSize() : fileOffset; } - void moveForward(const size_t bytes) + void moveForward(const size_t bytes) // skips x bytes in the file, getting more data if required { assert(IsBinaryFile); size_t remaining = bytes; @@ -323,114 +341,79 @@ struct Parse const size_t tokenLen = WordLength >= 0 ? static_cast(WordLength + 1) : std::char_traits::length(word); return word + tokenLen; } - widest_int_t getInt(const E_FORMAT f) + inline bool ensureBytes(const size_t bytes) + { + if (StartPointer + bytes > EndPointer) + fillBuffer(); + return StartPointer + bytes <= EndPointer; + } + template + inline T loadBinaryScalar() + { + if (!ensureBytes(sizeof(T))) + return T{}; + const auto retval = Binary::loadUnaligned(StartPointer, IsWrongEndian); + StartPointer += sizeof(T); + return retval; + } + template + inline T parseCurrentWordValue() + { + const char* word = getNextWord(); + if (!word) + return T{}; + const char* const wordEnd = getCurrentWordEnd(word); + if (word == wordEnd) + return T{}; + T value = {}; + auto ptr = word; + if (Common::parseNumber(ptr, wordEnd, value) && ptr == wordEnd) + return value; + return ptr != word ? value : T{}; + } + widest_int_t getInt(const E_FORMAT f) // read the next int from the file and move the start pointer along { assert(!isFloatingPointFormat(f)); if (IsBinaryFile) { - if (StartPointer + sizeof(widest_int_t) > EndPointer) - fillBuffer(); switch (getTexelOrBlockBytesize(f)) { case 1: - if (StartPointer + sizeof(int8_t) <= EndPointer) + if (ensureBytes(sizeof(int8_t))) return *(StartPointer++); break; - case 2: - if (StartPointer + sizeof(int16_t) <= EndPointer) - { - const auto retval = Binary::loadUnaligned(StartPointer, IsWrongEndian); - StartPointer += sizeof(int16_t); - return retval; - } - break; - case 4: - if (StartPointer + sizeof(int32_t) <= EndPointer) - { - const auto retval = Binary::loadUnaligned(StartPointer, IsWrongEndian); - StartPointer += sizeof(int32_t); - return retval; - } - break; + case 2: return static_cast(loadBinaryScalar()); + case 4: return static_cast(loadBinaryScalar()); default: assert(false); break; } return 0u; } - const char* word = getNextWord(); - if (!word) - return 0u; - const char* const wordEnd = getCurrentWordEnd(word); - if (word == wordEnd) - return 0u; - auto parseInt = [&](auto& value) -> widest_int_t - { - auto ptr = word; - if (Common::parseNumber(ptr, wordEnd, value) && ptr == wordEnd) - return static_cast(value); - return ptr != word ? static_cast(value) : 0u; - }; - if (isSignedFormat(f)) - { - int64_t value = 0; - return parseInt(value); - } - uint64_t value = 0u; - return parseInt(value); + return isSignedFormat(f) ? static_cast(parseCurrentWordValue()) : static_cast(parseCurrentWordValue()); } - hlsl::float64_t getFloat(const E_FORMAT f) + hlsl::float64_t getFloat(const E_FORMAT f) // read the next float from the file and move the start pointer along { assert(isFloatingPointFormat(f)); if (IsBinaryFile) { - if (StartPointer + sizeof(hlsl::float64_t) > EndPointer) - fillBuffer(); switch (getTexelOrBlockBytesize(f)) { - case 4: - if (StartPointer + sizeof(hlsl::float32_t) <= EndPointer) - { - const auto retval = Binary::loadUnaligned(StartPointer, IsWrongEndian); - StartPointer += sizeof(hlsl::float32_t); - return retval; - } - break; - case 8: - if (StartPointer + sizeof(hlsl::float64_t) <= EndPointer) - { - const auto retval = Binary::loadUnaligned(StartPointer, IsWrongEndian); - StartPointer += sizeof(hlsl::float64_t); - return retval; - } - break; + case 4: return loadBinaryScalar(); + case 8: return loadBinaryScalar(); default: assert(false); break; } return 0.0; } - const char* word = getNextWord(); - if (!word) - return 0.0; - const char* const wordEnd = getCurrentWordEnd(word); - if (word == wordEnd) - return 0.0; - hlsl::float64_t value = 0.0; - auto ptr = word; - if (Common::parseNumber(ptr, wordEnd, value) && ptr == wordEnd) - return value; - return ptr != word ? value : 0.0; + return parseCurrentWordValue(); } - void getData(void* dst, const E_FORMAT f) + void getData(void* dst, const E_FORMAT f) // read the next thing from the file and move the start pointer along { const auto size = getTexelOrBlockBytesize(f); - if (StartPointer + size > EndPointer) - { - fillBuffer(); - if (StartPointer + size > EndPointer) - return; - } + if (!ensureBytes(size)) + return; if (IsWrongEndian) std::reverse_copy(StartPointer, StartPointer + size, reinterpret_cast(dst)); else @@ -699,56 +682,34 @@ struct Parse } _outIndices.push_back(i0); _outIndices.push_back(prev); - _outIndices.push_back(idx); - prev = idx; - } - return true; - }; - if (IsBinaryFile && !IsWrongEndian && srcIndexFmt == EF_R32_UINT) { - const size_t bytesNeeded = - static_cast(count) * sizeof(uint32_t); - if (StartPointer + bytesNeeded > EndPointer) - fillBuffer(); - if (StartPointer + bytesNeeded <= EndPointer) { - const uint8_t* ptr = - reinterpret_cast(StartPointer); - auto readIndex = [&ptr]() -> uint32_t { - uint32_t v = 0u; - std::memcpy(&v, ptr, sizeof(v)); - ptr += sizeof(v); - return v; - }; - if (!emitFan(readIndex, count)) - return false; - StartPointer = - reinterpret_cast(const_cast(ptr)); - continue; - } - } else if (IsBinaryFile && !IsWrongEndian && - srcIndexFmt == EF_R16_UINT) { - const size_t bytesNeeded = - static_cast(count) * sizeof(uint16_t); - if (StartPointer + bytesNeeded > EndPointer) - fillBuffer(); - if (StartPointer + bytesNeeded <= EndPointer) { - const uint8_t* ptr = - reinterpret_cast(StartPointer); - auto readIndex = [&ptr]() -> uint32_t { - uint16_t v = 0u; - std::memcpy(&v, ptr, sizeof(v)); - ptr += sizeof(v); - return static_cast(v); - }; - if (!emitFan(readIndex, count)) - return false; - StartPointer = - reinterpret_cast(const_cast(ptr)); - continue; - } - } - auto readIndex = [&]() -> uint32_t { - return static_cast(getInt(srcIndexFmt)); - }; + _outIndices.push_back(idx); + prev = idx; + } + return true; + }; + auto tryReadContiguousFan = [&]() -> bool { + const size_t bytesNeeded = static_cast(count) * sizeof(T); + if (!ensureBytes(bytesNeeded)) + return false; + const uint8_t* ptr = reinterpret_cast(StartPointer); + auto readIndex = [&ptr]() -> uint32_t { + T v = {}; + std::memcpy(&v, ptr, sizeof(v)); + ptr += sizeof(v); + return static_cast(v); + }; + if (!emitFan(readIndex, count)) + return false; + StartPointer = reinterpret_cast(const_cast(ptr)); + return true; + }; + if (IsBinaryFile && !IsWrongEndian && srcIndexFmt == EF_R32_UINT && tryReadContiguousFan.template operator()()) + continue; + if (IsBinaryFile && !IsWrongEndian && srcIndexFmt == EF_R16_UINT && tryReadContiguousFan.template operator()()) + continue; + auto readIndex = [&]() -> uint32_t { + return static_cast(getInt(srcIndexFmt)); + }; if (!emitFan(readIndex, count)) return false; } else if (prop.Name == "intensity") { @@ -1135,13 +1096,13 @@ struct Parse IAssetLoader::SAssetLoadContext inner; uint32_t topHierarchyLevel; IAssetLoader::IAssetLoaderOverride* loaderOverride; - core::vector Buffer; + core::vector Buffer; // input buffer must be at least twice as long as the longest line in the file size_t ioReadWindowSize = DefaultIoReadWindowBytes; core::vector ElementList = {}; char *StartPointer = nullptr, *EndPointer = nullptr, *LineEndPointer = nullptr; int32_t LineLength = 0; - int32_t WordLength = -1; + int32_t WordLength = -1; // this variable is a misnomer, its really the offset to next word minus one bool IsBinaryFile = false, IsWrongEndian = false, EndOfFile = false; size_t fileOffset = {}; uint64_t readCallCount = 0ull; @@ -1222,6 +1183,7 @@ SAssetBundle CPLYMeshFileLoader::loadAsset( } const uint64_t safeReadWindow = std::min(desiredReadWindow, static_cast(std::numeric_limits::max() - Parse::Context::ReadWindowPaddingBytes)); ctx.init(static_cast(safeReadWindow)); + // start with empty mesh auto geometry = make_smart_refctd_ptr(); hlsl::shapes::util::AABBAccumulator3 parsedAABB = hlsl::shapes::util::createAABBAccumulator(); uint32_t vertCount = 0; @@ -1256,13 +1218,17 @@ SAssetBundle CPLYMeshFileLoader::loadAsset( return; contentHashBuild.tryDefer(view.src.buffer.get()); }; + // Currently only supports ASCII or binary meshes if (Parse::toStringView(ctx.getNextLine()) != "ply") { _params.logger.log("Not a valid PLY file %s", system::ILogger::ELL_ERROR, ctx.inner.mainFile->getFileName().string().c_str()); return {}; } + // cut the next line out ctx.getNextLine(); + // grab the word from this line const char* word = ctx.getNextWord(); + // ignore comments for (; Parse::toStringView(word) == "comment"; ctx.getNextLine()) word = ctx.getNextWord(); bool readingHeader = true; @@ -1277,7 +1243,9 @@ SAssetBundle CPLYMeshFileLoader::loadAsset( _params.logger.log("PLY property token found before element %s", system::ILogger::ELL_WARNING, word); } else { + // get element auto& el = ctx.ElementList.back(); + // fill property struct auto& prop = el.Properties.emplace_back(); prop.type = prop.getType(word); if (prop.type == EF_UNKNOWN) { @@ -1323,7 +1291,9 @@ SAssetBundle CPLYMeshFileLoader::loadAsset( if (el.Name == "vertex") vertCount = el.Count; } else if (wordView == "comment") { + // ignore line } else if (wordView == "format") { + // must be `format {binary_little_endian|binary_big_endian|ascii} 1.0` word = ctx.getNextWord(); const std::string_view formatView = Parse::toStringView(word); if (formatView == "binary_little_endian") { @@ -1333,6 +1303,7 @@ SAssetBundle CPLYMeshFileLoader::loadAsset( ctx.IsWrongEndian = true; } else if (formatView == "ascii") { } else { + // abort if this isn't an ascii or a binary mesh _params.logger.log("Unsupported PLY mesh format %s", system::ILogger::ELL_ERROR, word); continueReading = false; @@ -1370,6 +1341,7 @@ SAssetBundle CPLYMeshFileLoader::loadAsset( } while (readingHeader && continueReading); if (!continueReading) return {}; + // now to read the actual data from the file using index_t = uint32_t; core::vector indices = {}; bool verticesProcessed = false; @@ -1408,10 +1380,12 @@ SAssetBundle CPLYMeshFileLoader::loadAsset( logMalformedElement("face"); return false; }; + // loop through each of the elements for (uint32_t i = 0; i < ctx.ElementList.size(); ++i) { auto& el = ctx.ElementList[i]; if (el.Name == "vertex") { if (verticesProcessed) { + // multiple vertex elements are currently treated as unsupported _params.logger.log("Multiple `vertex` elements not supported!", system::ILogger::ELL_ERROR); return {}; @@ -1421,6 +1395,7 @@ SAssetBundle CPLYMeshFileLoader::loadAsset( core::vector extraViews; for (auto& vertexProperty : el.Properties) { const auto& propertyName = vertexProperty.Name; + // only positions and normals need to be structured/canonicalized in any way auto negotiateFormat = [&vertexProperty](ICPUPolygonGeometry::SDataViewBase& view, const uint8_t component) -> void { assert(getFormatChannelCount(vertexProperty.type) != 0); if (getTexelOrBlockBytesize(vertexProperty.type) > getTexelOrBlockBytesize(view.format)) @@ -1444,6 +1419,7 @@ SAssetBundle CPLYMeshFileLoader::loadAsset( else if (propertyName == "v" || propertyName == "t") negotiateFormat(uvView, 1); else + // property names for extra channels are currently not persisted in metadata extraViews.push_back(createView(vertexProperty.type, el.Count)); } auto setFinalFormat = [&ctx](ICPUPolygonGeometry::SDataViewBase& view) -> void { @@ -1481,6 +1457,7 @@ SAssetBundle CPLYMeshFileLoader::loadAsset( .dstFmt = view.composed.format}); for (auto& view : extraViews) geometry->getAuxAttributeViews()->push_back(std::move(view)); + // loop through vertex properties const auto fastVertexResult = ctx.readVertexElementFast(el, &parsedAABB); if (fastVertexResult == Parse::Context::EFastVertexReadResult::Success) { ++fastVertexElementCount; @@ -1508,6 +1485,7 @@ SAssetBundle CPLYMeshFileLoader::loadAsset( CPolygonGeometryManipulator::recomputeAABB(geometry.get()); const uint64_t indexCount = static_cast(indices.size()); if (indices.empty()) { + // no index buffer means point cloud geometry->setIndexing(IPolygonGeometryBase::PointList()); } else { if (vertCount != 0u && maxIndexRead >= vertCount) { diff --git a/src/nbl/asset/interchange/CPLYMeshWriter.cpp b/src/nbl/asset/interchange/CPLYMeshWriter.cpp index 23b4251c4e..dd39637458 100644 --- a/src/nbl/asset/interchange/CPLYMeshWriter.cpp +++ b/src/nbl/asset/interchange/CPLYMeshWriter.cpp @@ -49,56 +49,15 @@ struct Parse using Binary = impl::BinaryData; using SemanticDecode = SGeometryViewDecode::Prepared; using StoredDecode = SGeometryViewDecode::Prepared; - enum class ScalarType : uint8_t - { - Int8, - UInt8, - Int16, - UInt16, - Int32, - UInt32, - Float32, - Float64 - }; - struct ScalarMeta - { - const char* name = "float32"; - uint32_t byteSize = sizeof(float); - bool integer = false; - bool signedType = true; - }; - struct ExtraAuxView - { - const ICPUPolygonGeometry::SDataView* view = nullptr; - uint32_t components = 0u; - uint32_t auxIndex = 0u; - ScalarType scalarType = ScalarType::Float32; - }; - struct WriteInput - { - const ICPUPolygonGeometry* geom = nullptr; - ScalarType positionScalarType = ScalarType::Float32; - const ICPUPolygonGeometry::SDataView* uvView = nullptr; - ScalarType uvScalarType = ScalarType::Float32; - const core::vector* extraAuxViews = nullptr; - bool writeNormals = false; - ScalarType normalScalarType = ScalarType::Float32; - size_t vertexCount = 0ull; - size_t faceCount = 0ull; - bool write16BitIndices = false; - bool flipVectors = false; - }; + enum class ScalarType : uint8_t { Int8, UInt8, Int16, UInt16, Int32, UInt32, Float32, Float64 }; + struct ScalarMeta { const char* name = "float32"; uint32_t byteSize = sizeof(float); bool integer = false; bool signedType = true; }; + struct ExtraAuxView { const ICPUPolygonGeometry::SDataView* view = nullptr; uint32_t components = 0u; uint32_t auxIndex = 0u; ScalarType scalarType = ScalarType::Float32; }; + struct WriteInput { const ICPUPolygonGeometry* geom = nullptr; ScalarType positionScalarType = ScalarType::Float32; const ICPUPolygonGeometry::SDataView* uvView = nullptr; ScalarType uvScalarType = ScalarType::Float32; const core::vector* extraAuxViews = nullptr; bool writeNormals = false; ScalarType normalScalarType = ScalarType::Float32; size_t vertexCount = 0ull; size_t faceCount = 0ull; bool write16BitIndices = false; bool flipVectors = false; }; static constexpr size_t ApproxTextBytesPerVertex = sizeof("0.000000 0.000000 0.000000 0.000000 0.000000 0.000000\n") - 1ull; static constexpr size_t ApproxTextBytesPerFace = sizeof("3 4294967295 4294967295 4294967295\n") - 1ull; static constexpr size_t MaxFloatTextChars = std::numeric_limits::max_digits10 + 16ull; template - static void appendIntegral(std::string& out, const T value) - { - std::array buf = {}; - const auto res = std::to_chars(buf.data(), buf.data() + buf.size(), value); - if (res.ec == std::errc()) - out.append(buf.data(), static_cast(res.ptr - buf.data())); - } + static void appendIntegral(std::string& out, const T value) { std::array buf = {}; const auto res = std::to_chars(buf.data(), buf.data() + buf.size(), value); if (res.ec == std::errc()) out.append(buf.data(), static_cast(res.ptr - buf.data())); } static void appendFloat(std::string& out, double value) { const size_t oldSize = out.size(); @@ -178,13 +137,7 @@ struct Parse { uint8_t* cursor = nullptr; template - inline bool append(const T value) - { - if (!cursor) - return false; - Binary::storeUnalignedAdvance(cursor, value); - return true; - } + inline bool append(const T value) { if (!cursor) return false; Binary::storeUnalignedAdvance(cursor, value); return true; } inline bool finishVertex() { return true; } }; struct TextSink @@ -193,10 +146,8 @@ struct Parse template inline bool append(const T value) { - if constexpr (std::is_floating_point_v) - appendFloat(output, static_cast(value)); - else - appendIntegral(output, value); + if constexpr (std::is_floating_point_v) appendFloat(output, static_cast(value)); + else appendIntegral(output, value); output.push_back(' '); return true; } @@ -233,22 +184,9 @@ struct Parse return true; } template - static bool emitPrepared(Sink& sink, const PreparedView& view, const size_t ix) - { - if constexpr (Mode == SGeometryViewDecode::EMode::Semantic) - return emitDecode(sink, view.semantic, ix, view.components, view.flipVectors); - return emitDecode(sink, view.stored, ix, view.components, view.flipVectors); - } + static bool emitPrepared(Sink& sink, const PreparedView& view, const size_t ix) { if constexpr (Mode == SGeometryViewDecode::EMode::Semantic) return emitDecode(sink, view.semantic, ix, view.components, view.flipVectors); return emitDecode(sink, view.stored, ix, view.components, view.flipVectors); } template - static inline void prepareDecode(PreparedView& view, const ICPUPolygonGeometry::SDataView& src, const bool flipVectors) - { - view.flipVectors = flipVectors; - if constexpr (Mode == SGeometryViewDecode::EMode::Semantic) - view.semantic = SGeometryViewDecode::prepare(src); - else - view.stored = SGeometryViewDecode::prepare(src); - view.emit = &emitPrepared; - } + static inline void prepareDecode(PreparedView& view, const ICPUPolygonGeometry::SDataView& src, const bool flipVectors) { view.flipVectors = flipVectors; if constexpr (Mode == SGeometryViewDecode::EMode::Semantic) view.semantic = SGeometryViewDecode::prepare(src); else view.stored = SGeometryViewDecode::prepare(src); view.emit = &emitPrepared; } static PreparedView create(const ICPUPolygonGeometry::SDataView* view, const uint32_t components, const ScalarType scalarType, const bool flipVectors) { PreparedView retval = {.components = components}; @@ -343,49 +281,28 @@ bool CPLYMeshWriter::writeAsset(system::IFile* _file, const SAssetWriteParams& _ if (!_override) getDefaultOverride(_override); if (!_file || !_params.rootAsset) - { - _params.logger.log("PLY writer: missing output file or root asset.", system::ILogger::ELL_ERROR); - return false; - } + return _params.logger.log("PLY writer: missing output file or root asset.", system::ILogger::ELL_ERROR), false; const auto items = SGeometryWriterCommon::collectPolygonGeometryWriteItems(_params.rootAsset); if (items.size() != 1u) - { - _params.logger.log("PLY writer: expected exactly one polygon geometry to write.", system::ILogger::ELL_ERROR); - return false; - } + return _params.logger.log("PLY writer: expected exactly one polygon geometry to write.", system::ILogger::ELL_ERROR), false; const auto& item = items.front(); const auto* geom = item.geometry; if (!geom || !geom->valid()) - { - _params.logger.log("PLY writer: root asset is not a valid polygon geometry.", system::ILogger::ELL_ERROR); - return false; - } + return _params.logger.log("PLY writer: root asset is not a valid polygon geometry.", system::ILogger::ELL_ERROR), false; if (!SGeometryWriterCommon::isIdentityTransform(item.transform)) - { - _params.logger.log("PLY writer: transformed scene or collection export is not supported.", system::ILogger::ELL_ERROR); - return false; - } + return _params.logger.log("PLY writer: transformed scene or collection export is not supported.", system::ILogger::ELL_ERROR), false; SAssetWriteContext ctx = {_params, _file}; system::IFile* file = _override->getOutputFile(_file, ctx, {geom, 0u}); if (!file) - { - _params.logger.log("PLY writer: output override returned null file.", system::ILogger::ELL_ERROR); - return false; - } + return _params.logger.log("PLY writer: output override returned null file.", system::ILogger::ELL_ERROR), false; const auto& positionView = geom->getPositionView(); const auto& normalView = geom->getNormalView(); const size_t vertexCount = positionView.getElementCount(); if (vertexCount == 0ull) - { - _params.logger.log("PLY writer: empty position view.", system::ILogger::ELL_ERROR); - return false; - } + return _params.logger.log("PLY writer: empty position view.", system::ILogger::ELL_ERROR), false; const bool writeNormals = static_cast(normalView); if (writeNormals && normalView.getElementCount() != vertexCount) - { - _params.logger.log("PLY writer: normal vertex count mismatch.", system::ILogger::ELL_ERROR); - return false; - } + return _params.logger.log("PLY writer: normal vertex count mismatch.", system::ILogger::ELL_ERROR), false; const ICPUPolygonGeometry::SDataView* uvView = SGeometryWriterCommon::getAuxViewAt(geom, Parse::UV0, vertexCount); if (uvView && getFormatChannelCount(uvView->composed.format) != 2u) uvView = nullptr; @@ -407,21 +324,12 @@ bool CPLYMeshWriter::writeAsset(system::IFile* _file, const SAssetWriteParams& _ } const auto* indexing = geom->getIndexingCallback(); if (!indexing) - { - _params.logger.log("PLY writer: missing indexing callback.", system::ILogger::ELL_ERROR); - return false; - } + return _params.logger.log("PLY writer: missing indexing callback.", system::ILogger::ELL_ERROR), false; if (indexing->knownTopology() != E_PRIMITIVE_TOPOLOGY::EPT_TRIANGLE_LIST) - { - _params.logger.log("PLY writer: only triangle-list topology is supported.", system::ILogger::ELL_ERROR); - return false; - } + return _params.logger.log("PLY writer: only triangle-list topology is supported.", system::ILogger::ELL_ERROR), false; size_t faceCount = 0ull; if (!SGeometryWriterCommon::getTriangleFaceCount(geom, faceCount)) - { - _params.logger.log("PLY writer: failed to validate triangle indexing.", system::ILogger::ELL_ERROR); - return false; - } + return _params.logger.log("PLY writer: failed to validate triangle indexing.", system::ILogger::ELL_ERROR), false; const auto flags = _override->getAssetWritingFlags(ctx, geom, 0u); const bool binary = flags.hasAnyFlag(E_WRITER_FLAGS::EWF_BINARY); const bool flipVectors = !flags.hasAnyFlag(E_WRITER_FLAGS::EWF_MESH_IS_RIGHT_HANDED); @@ -502,19 +410,13 @@ bool CPLYMeshWriter::writeAsset(system::IFile* _file, const SAssetWriteParams& _ core::vector body; body.resize(bodySize); if (!Parse::writeBinary(input, body.data())) - { - _params.logger.log("PLY writer: binary payload generation failed.", system::ILogger::ELL_ERROR); - return false; - } + return _params.logger.log("PLY writer: binary payload generation failed.", system::ILogger::ELL_ERROR), false; return writePayload(body.data(), body.size()); } std::string body; body.reserve(vertexCount * Parse::ApproxTextBytesPerVertex + faceCount * Parse::ApproxTextBytesPerFace); if (!Parse::writeText(input, body)) - { - _params.logger.log("PLY writer: text payload generation failed.", system::ILogger::ELL_ERROR); - return false; - } + return _params.logger.log("PLY writer: text payload generation failed.", system::ILogger::ELL_ERROR), false; return writePayload(body.data(), body.size()); } } diff --git a/src/nbl/asset/interchange/CSTLMeshFileLoader.cpp b/src/nbl/asset/interchange/CSTLMeshFileLoader.cpp index 8f54500a95..23e7860af3 100644 --- a/src/nbl/asset/interchange/CSTLMeshFileLoader.cpp +++ b/src/nbl/asset/interchange/CSTLMeshFileLoader.cpp @@ -5,7 +5,6 @@ // See the original file in irrlicht source for authors #include "CSTLMeshFileLoader.h" - #include "impl/SFileAccess.h" #include "impl/STextParse.h" #include "nbl/asset/asset.h" @@ -22,48 +21,18 @@ #include "nbl/system/IFile.h" #include - namespace nbl::asset { - namespace { - struct Parse { static constexpr uint32_t COLOR0 = 0u; using Common = impl::TextParse; - - struct LayoutProbe - { - bool hasPrefix = false; - bool startsWithSolid = false; - bool binaryBySize = false; - uint32_t triangleCount = 0u; - }; - - static hlsl::float32_t3 resolveStoredNormal(const hlsl::float32_t3& fileNormal) - { - const float fileLen2 = hlsl::dot(fileNormal, fileNormal); - if (fileLen2 > 0.f && std::abs(fileLen2 - 1.f) < 1e-4f) - return fileNormal; - return SGeometryNormalCommon::normalizeOrZero(fileNormal); - } - - static void pushTriangleReversed(const std::array& p, core::vector& positions) - { - positions.push_back(p[2u]); - positions.push_back(p[1u]); - positions.push_back(p[0u]); - } - - static uint32_t decodeViscamColorToB8G8R8A8(const uint16_t packedColor) - { - std::array src = {&packedColor}; - uint32_t outColor = 0u; - convertColor(src.data(), &outColor, 0u, 0u); - return outColor; - } + struct LayoutProbe { bool hasPrefix = false; bool startsWithSolid = false; bool binaryBySize = false; uint32_t triangleCount = 0u; }; + static hlsl::float32_t3 resolveStoredNormal(const hlsl::float32_t3& fileNormal) { const float fileLen2 = hlsl::dot(fileNormal, fileNormal); return (fileLen2 > 0.f && std::abs(fileLen2 - 1.f) < 1e-4f) ? fileNormal : SGeometryNormalCommon::normalizeOrZero(fileNormal); } + static void pushTriangleReversed(const std::array& p, core::vector& positions) { positions.push_back(p[2u]); positions.push_back(p[1u]); positions.push_back(p[0u]); } + static uint32_t decodeViscamColorToB8G8R8A8(const uint16_t packedColor) { std::array src = {&packedColor}; uint32_t outColor = 0u; convertColor(src.data(), &outColor, 0u, 0u); return outColor; } struct Context { @@ -116,30 +85,13 @@ struct Parse { public: inline AsciiParser(const char* begin, const char* end) : m_cursor(begin), m_end(end) {} - - inline std::optional readToken() - { - return Common::readToken(m_cursor, m_end); - } - - inline std::optional readFloat() - { - float value = 0.f; - if (!Common::parseNumber(m_cursor, m_end, value)) - return std::nullopt; - return value; - } - + inline std::optional readToken() { return Common::readToken(m_cursor, m_end); } + inline std::optional readFloat() { float value = 0.f; return Common::parseNumber(m_cursor, m_end, value) ? std::optional(value) : std::nullopt; } inline std::optional readVec3() { - const auto x = readFloat(); - const auto y = readFloat(); - const auto z = readFloat(); - if (!x.has_value() || !y.has_value() || !z.has_value()) - return std::nullopt; - return hlsl::float32_t3(*x, *y, *z); + const auto x = readFloat(), y = readFloat(), z = readFloat(); + return x.has_value() && y.has_value() && z.has_value() ? std::optional(hlsl::float32_t3(*x, *y, *z)) : std::nullopt; } - private: const char* m_cursor = nullptr; const char* m_end = nullptr; @@ -148,14 +100,8 @@ struct Parse class SplitBlockMemoryResource final : public core::refctd_memory_resource { public: - inline SplitBlockMemoryResource(core::smart_refctd_ptr&& upstream, void* block, const size_t blockBytes, const size_t alignment) - : m_upstream(std::move(upstream)), m_block(block), m_blockBytes(blockBytes), m_alignment(alignment) {} - - inline void* allocate(std::size_t, std::size_t) override - { - assert(false); - return nullptr; - } + inline SplitBlockMemoryResource(core::smart_refctd_ptr&& upstream, void* block, const size_t blockBytes, const size_t alignment) : m_upstream(std::move(upstream)), m_block(block), m_blockBytes(blockBytes), m_alignment(alignment) {} + inline void* allocate(std::size_t, std::size_t) override { assert(false); return nullptr; } inline void deallocate(void* p, std::size_t bytes, std::size_t) override { @@ -167,11 +113,7 @@ struct Parse } protected: - inline ~SplitBlockMemoryResource() override - { - if (m_upstream && m_block) - m_upstream->deallocate(m_block, m_blockBytes, m_alignment); - } + inline ~SplitBlockMemoryResource() override { if (m_upstream && m_block) m_upstream->deallocate(m_block, m_blockBytes, m_alignment); } private: core::smart_refctd_ptr m_upstream; @@ -180,9 +122,7 @@ struct Parse size_t m_alignment = 1ull; }; }; - } - CSTLMeshFileLoader::CSTLMeshFileLoader(asset::IAssetManager*) { } @@ -242,27 +182,20 @@ SAssetBundle CSTLMeshFileLoader::loadAsset(system::IFile* _file, const IAssetLoa if (filesize < Context::BinaryPrefixBytes) return {}; - uint32_t triangleCount32 = binaryTriCountFromDetect; - if (!hasBinaryTriCountFromDetect) - { - if (!SInterchangeIO::readFileExact(context.inner.mainFile, &triangleCount32, Context::BinaryHeaderBytes, sizeof(triangleCount32), &context.ioTelemetry)) - return {}; - } - - triangleCount = triangleCount32; - const size_t dataSize = static_cast(triangleCount) * Context::TriangleRecordBytes; - const size_t expectedSize = Context::BinaryPrefixBytes + dataSize; - if (filesize < expectedSize) - return {}; - - const uint8_t* payloadData = wholeFileData ? (wholeFileData + Context::BinaryPrefixBytes) : loadSession.readRange(Context::BinaryPrefixBytes, dataSize, wholeFilePayload, &context.ioTelemetry); - if (!payloadData) - return {}; - + uint32_t triangleCount32 = binaryTriCountFromDetect; + if (!hasBinaryTriCountFromDetect && !SInterchangeIO::readFileExact(context.inner.mainFile, &triangleCount32, Context::BinaryHeaderBytes, sizeof(triangleCount32), &context.ioTelemetry)) + return {}; + triangleCount = triangleCount32; + const size_t dataSize = static_cast(triangleCount) * Context::TriangleRecordBytes; + const size_t expectedSize = Context::BinaryPrefixBytes + dataSize; + if (filesize < expectedSize) + return {}; + const uint8_t* payloadData = wholeFileData ? (wholeFileData + Context::BinaryPrefixBytes) : loadSession.readRange(Context::BinaryPrefixBytes, dataSize, wholeFilePayload, &context.ioTelemetry); + if (!payloadData) + return {}; vertexCount = triangleCount * Context::VerticesPerTriangle; const size_t vertexCountSizeT = static_cast(vertexCount); - if (vertexCountSizeT > - (std::numeric_limits::max() / sizeof(hlsl::float32_t3))) + if (vertexCountSizeT > (std::numeric_limits::max() / sizeof(hlsl::float32_t3))) return {}; const size_t viewByteSize = vertexCountSizeT * sizeof(hlsl::float32_t3); if (viewByteSize > (std::numeric_limits::max() - viewByteSize)) @@ -274,41 +207,17 @@ SAssetBundle CSTLMeshFileLoader::loadAsset(system::IFile* _file, const IAssetLoa void* block = upstream->allocate(blockBytes, alignof(float)); if (!block) return {}; - auto blockResource = core::make_smart_refctd_ptr( - core::smart_refctd_ptr( - std::move(upstream)), - block, blockBytes, alignof(float)); - auto posBuffer = ICPUBuffer::create( - {{viewByteSize}, - block, - core::smart_refctd_ptr(blockResource), - alignof(float)}, - core::adopt_memory); - auto normalBuffer = ICPUBuffer::create( - {{viewByteSize}, - reinterpret_cast(block) + viewByteSize, - core::smart_refctd_ptr(blockResource), - alignof(float)}, - core::adopt_memory); + auto blockResource = core::make_smart_refctd_ptr(core::smart_refctd_ptr(std::move(upstream)), block, blockBytes, alignof(float)); + auto posBuffer = ICPUBuffer::create({{viewByteSize}, block, core::smart_refctd_ptr(blockResource), alignof(float)}, core::adopt_memory); + auto normalBuffer = ICPUBuffer::create({{viewByteSize}, reinterpret_cast(block) + viewByteSize, core::smart_refctd_ptr(blockResource), alignof(float)}, core::adopt_memory); if (!posBuffer || !normalBuffer) return {}; ICPUPolygonGeometry::SDataView posView = {}; - posView.composed = {.stride = sizeof(hlsl::float32_t3), - .format = EF_R32G32B32_SFLOAT, - .rangeFormat = IGeometryBase::getMatchingAABBFormat( - EF_R32G32B32_SFLOAT)}; - posView.src = { - .offset = 0ull, - .size = viewByteSize, - .buffer = std::move(posBuffer)}; + posView.composed = {.stride = sizeof(hlsl::float32_t3), .format = EF_R32G32B32_SFLOAT, .rangeFormat = IGeometryBase::getMatchingAABBFormat(EF_R32G32B32_SFLOAT)}; + posView.src = {.offset = 0ull, .size = viewByteSize, .buffer = std::move(posBuffer)}; ICPUPolygonGeometry::SDataView normalView = {}; - normalView.composed = {.stride = sizeof(hlsl::float32_t3), - .format = EF_R32G32B32_SFLOAT, - .rangeFormat = IGeometryBase::getMatchingAABBFormat( - EF_R32G32B32_SFLOAT)}; - normalView.src = {.offset = 0ull, - .size = viewByteSize, - .buffer = std::move(normalBuffer)}; + normalView.composed = {.stride = sizeof(hlsl::float32_t3), .format = EF_R32G32B32_SFLOAT, .rangeFormat = IGeometryBase::getMatchingAABBFormat(EF_R32G32B32_SFLOAT)}; + normalView.src = {.offset = 0ull, .size = viewByteSize, .buffer = std::move(normalBuffer)}; auto* posOutFloat = reinterpret_cast(posView.getPointer()); auto* normalOutFloat = reinterpret_cast(normalView.getPointer()); if (!posOutFloat || !normalOutFloat) @@ -331,64 +240,36 @@ SAssetBundle CSTLMeshFileLoader::loadAsset(system::IFile* _file, const IAssetLoa parseTuningRequest.minBytesPerWorker = Context::TriangleRecordBytes; parseTuningRequest.hardwareThreads = static_cast(hw); parseTuningRequest.hardMaxWorkers = static_cast(hardMaxWorkers); - parseTuningRequest.targetChunksPerWorker = - _params.ioPolicy.runtimeTuning.targetChunksPerWorker; + parseTuningRequest.targetChunksPerWorker = _params.ioPolicy.runtimeTuning.targetChunksPerWorker; parseTuningRequest.minChunkWorkUnits = 1ull; - parseTuningRequest.maxChunkWorkUnits = - std::max(1ull, triangleCount); + parseTuningRequest.maxChunkWorkUnits = std::max(1ull, triangleCount); parseTuningRequest.sampleData = payloadData; - parseTuningRequest.sampleBytes = - SLoaderRuntimeTuner::resolveSampleBytes(_params.ioPolicy, dataSize); - const auto parseTuning = - SLoaderRuntimeTuner::tune(_params.ioPolicy, parseTuningRequest); - const size_t workerCount = std::max( - 1ull, - std::min(parseTuning.workerCount, - static_cast(std::max(1ull, triangleCount)))); + parseTuningRequest.sampleBytes = SLoaderRuntimeTuner::resolveSampleBytes(_params.ioPolicy, dataSize); + const auto parseTuning = SLoaderRuntimeTuner::tune(_params.ioPolicy, parseTuningRequest); + const size_t workerCount = std::max(1ull, std::min(parseTuning.workerCount, static_cast(std::max(1ull, triangleCount)))); static constexpr bool ComputeAABBInParse = true; - struct SThreadAABB { - bool has = false; - float minX = 0.f; - float minY = 0.f; - float minZ = 0.f; - float maxX = 0.f; - float maxY = 0.f; - float maxZ = 0.f; - }; - std::vector threadAABBs(ComputeAABBInParse ? workerCount - : 0ull); - const uint64_t parseChunkTriangles = - std::max(1ull, parseTuning.chunkWorkUnits); - const size_t parseChunkCount = static_cast( - SLoaderRuntimeTuner::ceilDiv(triangleCount, parseChunkTriangles)); + struct SThreadAABB { bool has = false; float minX = 0.f; float minY = 0.f; float minZ = 0.f; float maxX = 0.f; float maxY = 0.f; float maxZ = 0.f; }; + std::vector threadAABBs(ComputeAABBInParse ? workerCount : 0ull); + const uint64_t parseChunkTriangles = std::max(1ull, parseTuning.chunkWorkUnits); + const size_t parseChunkCount = static_cast(SLoaderRuntimeTuner::ceilDiv(triangleCount, parseChunkTriangles)); const bool hashInParsePipeline = computeContentHashes; - std::vector hashChunkReady( - hashInParsePipeline ? parseChunkCount : 0ull, 0u); + std::vector hashChunkReady(hashInParsePipeline ? parseChunkCount : 0ull, 0u); std::atomic_bool hashPipelineOk = true; - core::blake3_hash_t parsedPositionHash = - static_cast(core::blake3_hasher{}); - core::blake3_hash_t parsedNormalHash = - static_cast(core::blake3_hasher{}); - auto parseRange = [&](const uint64_t beginTri, const uint64_t endTri, - SThreadAABB& localAABB) -> void { - const uint8_t* localCursor = - payloadData + beginTri * Context::TriangleRecordBytes; - float* posCursor = posOutFloat + beginTri * Context::VerticesPerTriangle * - Context::FloatChannelsPerVertex; - float* normalCursor = - normalOutFloat + beginTri * Context::VerticesPerTriangle * - Context::FloatChannelsPerVertex; + core::blake3_hash_t parsedPositionHash = static_cast(core::blake3_hasher{}); + core::blake3_hash_t parsedNormalHash = static_cast(core::blake3_hasher{}); + auto parseRange = [&](const uint64_t beginTri, const uint64_t endTri, SThreadAABB& localAABB) -> void { + const uint8_t* localCursor = payloadData + beginTri * Context::TriangleRecordBytes; + float* posCursor = posOutFloat + beginTri * Context::VerticesPerTriangle * Context::FloatChannelsPerVertex; + float* normalCursor = normalOutFloat + beginTri * Context::VerticesPerTriangle * Context::FloatChannelsPerVertex; for (uint64_t tri = beginTri; tri < endTri; ++tri) { const uint8_t* const triRecord = localCursor; localCursor += Context::TriangleRecordBytes; std::array triValues = {}; std::memcpy(triValues.data(), triRecord, sizeof(triValues)); uint16_t packedColor = 0u; - std::memcpy(&packedColor, triRecord + Context::TriangleFloatBytes, - sizeof(packedColor)); + std::memcpy(&packedColor, triRecord + Context::TriangleFloatBytes, sizeof(packedColor)); if (packedColor & 0x8000u) - faceColors[static_cast(tri)] = - Parse::decodeViscamColorToB8G8R8A8(packedColor); + faceColors[static_cast(tri)] = Parse::decodeViscamColorToB8G8R8A8(packedColor); else colorValidForAllFaces.store(false, std::memory_order_relaxed); @@ -685,12 +566,10 @@ SAssetBundle CSTLMeshFileLoader::loadAsset(system::IFile* _file, const IAssetLoa hlsl::shapes::util::extendAABBAccumulator(parsedAABB, p[0u]); const auto endLoopKeyword = parser.readToken(); - if (!endLoopKeyword.has_value() || - *endLoopKeyword != std::string_view("endloop")) + if (!endLoopKeyword.has_value() || *endLoopKeyword != std::string_view("endloop")) return {}; const auto endFacetKeyword = parser.readToken(); - if (!endFacetKeyword.has_value() || - *endFacetKeyword != std::string_view("endfacet")) + if (!endFacetKeyword.has_value() || *endFacetKeyword != std::string_view("endfacet")) return {}; } if (positions.empty()) @@ -698,13 +577,8 @@ SAssetBundle CSTLMeshFileLoader::loadAsset(system::IFile* _file, const IAssetLoa triangleCount = positions.size() / Context::VerticesPerTriangle; vertexCount = positions.size(); - - auto posView = - SGeometryLoaderCommon::createAdoptedView( - std::move(positions)); - auto normalView = - SGeometryLoaderCommon::createAdoptedView( - std::move(normals)); + auto posView = SGeometryLoaderCommon::createAdoptedView(std::move(positions)); + auto normalView = SGeometryLoaderCommon::createAdoptedView(std::move(normals)); if (!posView || !normalView) return {}; geometry->setPositionView(std::move(posView)); @@ -713,17 +587,12 @@ SAssetBundle CSTLMeshFileLoader::loadAsset(system::IFile* _file, const IAssetLoa if (vertexCount == 0ull) return {}; - - if (computeContentHashes) { - SPolygonGeometryContentHash::computeMissing(geometry.get(), - _params.ioPolicy); - } - + if (computeContentHashes) + SPolygonGeometryContentHash::computeMissing(geometry.get(), _params.ioPolicy); if (!parsedAABB.empty()) geometry->applyAABB(parsedAABB.value); - else { + else CPolygonGeometryManipulator::recomputeAABB(geometry.get()); - } const uint64_t ioMinRead = context.ioTelemetry.getMinOrZero(); const uint64_t ioAvgRead = context.ioTelemetry.getAvgOrZero(); loadSession.logTinyIO(_params.logger, context.ioTelemetry); @@ -747,16 +616,12 @@ SAssetBundle CSTLMeshFileLoader::loadAsset(system::IFile* _file, const IAssetLoa bool CSTLMeshFileLoader::isALoadableFileFormat( system::IFile* _file, const system::logger_opt_ptr) const { using Context = Parse::Context; - if (!_file || _file->getSize() <= Context::TextProbeBytes) return false; - Parse::LayoutProbe layout = {}; if (!Parse::probeLayout(_file, _file->getSize(), nullptr, nullptr, layout)) return false; return layout.startsWithSolid || layout.binaryBySize; } - } - #endif // _NBL_COMPILE_WITH_STL_LOADER_ diff --git a/src/nbl/asset/interchange/CSTLMeshWriter.cpp b/src/nbl/asset/interchange/CSTLMeshWriter.cpp index f7155e5b43..e93be03ed5 100644 --- a/src/nbl/asset/interchange/CSTLMeshWriter.cpp +++ b/src/nbl/asset/interchange/CSTLMeshWriter.cpp @@ -91,13 +91,7 @@ struct Parse } } }; - struct TriangleData - { - hlsl::float32_t3 normal = {}; - hlsl::float32_t3 vertex1 = {}; - hlsl::float32_t3 vertex2 = {}; - hlsl::float32_t3 vertex3 = {}; - }; + struct TriangleData { hlsl::float32_t3 normal = {}; hlsl::float32_t3 vertex1 = {}; hlsl::float32_t3 vertex2 = {}; hlsl::float32_t3 vertex3 = {}; }; static constexpr size_t BinaryHeaderBytes = 80ull; static constexpr size_t BinaryTriangleCountBytes = sizeof(uint32_t); static constexpr size_t BinaryTriangleFloatCount = 12ull; @@ -172,13 +166,8 @@ struct Parse if (!normals || count == 0u) return false; for (uint32_t i = 0u; i < count; ++i) - { if (hlsl::dot(normals[i], normals[i]) > 0.f) - { - outNormal = normals[i]; - return true; - } - } + return outNormal = normals[i], true; return false; } static void prepareVertices(const hlsl::float32_t3& p0, const hlsl::float32_t3& p1, const hlsl::float32_t3& p2, const bool flipHandedness, hlsl::float32_t3& vertex1, hlsl::float32_t3& vertex2, hlsl::float32_t3& vertex3) @@ -198,10 +187,7 @@ struct Parse const hlsl::float32_t3 planeNormal = hlsl::cross(vertex2 - vertex1, vertex3 - vertex1); const float len2 = hlsl::dot(planeNormal, planeNormal); if (planeNormalLen2) - { - *planeNormalLen2 = len2; - return planeNormal; - } + return *planeNormalLen2 = len2, planeNormal; return len2 > 0.f ? hlsl::normalize(planeNormal) : hlsl::float32_t3(0.f, 0.f, 0.f); } static hlsl::float32_t3 resolveTriangleNormal(const hlsl::float32_t3& planeNormal, const float planeNormalLen2, const hlsl::float32_t3* const attrNormals, const uint32_t attrNormalCount, const bool flipHandedness, const bool alignToPlane) @@ -299,24 +285,8 @@ struct Parse const hlsl::float32_t3* const tightPositions = SGeometryWriterCommon::getTightView(posView); const hlsl::float32_t3* const tightNormals = hasNormals ? SGeometryWriterCommon::getTightView(normalView) : nullptr; const bool hasFastTightPath = !geom->getIndexView() && tightPositions && (!hasNormals || tightNormals); - auto decodePosition = [&](const uint32_t ix, hlsl::float32_t3& out) -> bool { - if (tightPositions) - { - out = tightPositions[ix]; - return true; - } - return posView.decodeElement(ix, out); - }; - auto decodeNormal = [&](const uint32_t ix, hlsl::float32_t3& out) -> bool { - if (!hasNormals) - return false; - if (tightNormals) - { - out = tightNormals[ix]; - return true; - } - return normalView.decodeElement(ix, out); - }; + auto decodePosition = [&](const uint32_t ix, hlsl::float32_t3& out) -> bool { return tightPositions ? (out = tightPositions[ix], true) : posView.decodeElement(ix, out); }; + auto decodeNormal = [&](const uint32_t ix, hlsl::float32_t3& out) -> bool { return hasNormals && (tightNormals ? (out = tightNormals[ix], true) : normalView.decodeElement(ix, out)); }; auto computeFaceColor = [&](const hlsl::uint32_t3& idx, uint16_t& outColor) -> bool { outColor = 0u; if (!colorView) @@ -402,11 +372,7 @@ struct Parse const bool flipHandedness = !context->writeContext.params.flags.hasAnyFlag(E_WRITER_FLAGS::EWF_MESH_IS_RIGHT_HANDED); const std::string name = context->writeContext.outputFile->getFileName().filename().replace_extension().string(); const std::string_view solidName = name.empty() ? std::string_view(AsciiDefaultName) : std::string_view(name); - if (!context->write(AsciiSolidPrefix, sizeof(AsciiSolidPrefix) - 1ull)) - return false; - if (!context->write(solidName.data(), solidName.size())) - return false; - if (!context->write("\n", sizeof("\n") - 1ull)) + if (!context->write(AsciiSolidPrefix, sizeof(AsciiSolidPrefix) - 1ull) || !context->write(solidName.data(), solidName.size()) || !context->write("\n", sizeof("\n") - 1ull)) return false; const uint32_t faceCount = static_cast(geom->getPrimitiveCount()); for (uint32_t primIx = 0u; primIx < faceCount; ++primIx) @@ -422,11 +388,7 @@ struct Parse if (!context->write("\n", sizeof("\n") - 1ull)) return false; } - if (!context->write(AsciiEndSolidPrefix, sizeof(AsciiEndSolidPrefix) - 1ull)) - return false; - if (!context->write(solidName.data(), solidName.size())) - return false; - return true; + return context->write(AsciiEndSolidPrefix, sizeof(AsciiEndSolidPrefix) - 1ull) && context->write(solidName.data(), solidName.size()); } static bool writeFaceText(const hlsl::float32_t3& v1, const hlsl::float32_t3& v2, const hlsl::float32_t3& v3, const hlsl::uint32_t3& idx, const asset::ICPUPolygonGeometry::SDataView& normalView, const bool flipHandedness, Context* context) { @@ -507,10 +469,7 @@ bool CSTLMeshWriter::writeAsset(system::IFile* _file, const SAssetWriteParams& _ context.ioPlan = impl::SFileAccess::resolvePlan(_params.ioPolicy, expectedSize, sizeKnown, file); if (impl::SFileAccess::logInvalidPlan(_params.logger, "STL writer", file->getFileName().string().c_str(), context.ioPlan)) return false; - if (context.ioPlan.strategy == SResolvedFileIOPolicy::Strategy::WholeFile && sizeKnown) - context.ioBuffer.reserve(static_cast(expectedSize)); - else - context.ioBuffer.reserve(static_cast(std::min(context.ioPlan.chunkSizeBytes(), Parse::IoFallbackReserveBytes))); + context.ioBuffer.reserve(static_cast(context.ioPlan.strategy == SResolvedFileIOPolicy::Strategy::WholeFile && sizeKnown ? expectedSize : std::min(context.ioPlan.chunkSizeBytes(), Parse::IoFallbackReserveBytes))); const bool written = binary ? Parse::writeMeshBinary(geom, &context) : Parse::writeMeshASCII(geom, &context); if (!written) return false; diff --git a/src/nbl/asset/interchange/SGeometryViewDecode.h b/src/nbl/asset/interchange/SGeometryViewDecode.h index 46e82dfbd4..5522605dfd 100644 --- a/src/nbl/asset/interchange/SGeometryViewDecode.h +++ b/src/nbl/asset/interchange/SGeometryViewDecode.h @@ -13,11 +13,7 @@ namespace nbl::asset class SGeometryViewDecode { public: - enum class EMode : uint8_t - { - Semantic, - Stored - }; + enum class EMode : uint8_t { Semantic, Stored }; template struct Prepared { @@ -27,10 +23,7 @@ class SGeometryViewDecode uint32_t channels = 0u; bool normalized = false; hlsl::shapes::AABB<4, hlsl::float64_t> range = hlsl::shapes::AABB<4, hlsl::float64_t>::create(); - inline explicit operator bool() const - { - return data != nullptr && stride != 0u && format != EF_UNKNOWN && channels != 0u; - } + inline explicit operator bool() const { return data != nullptr && stride != 0u && format != EF_UNKNOWN && channels != 0u; } template inline bool decode(const size_t ix, std::array& out) const { @@ -57,11 +50,8 @@ class SGeometryViewDecode retval.format = view.composed.format; retval.channels = getFormatChannelCount(retval.format); if constexpr (Mode == EMode::Semantic) - { - retval.normalized = isNormalizedFormat(retval.format); - if (retval.normalized) + if (retval.normalized = isNormalizedFormat(retval.format); retval.normalized) retval.range = view.composed.getRange>(); - } return retval; } template @@ -106,10 +96,7 @@ class SGeometryViewDecode return true; } template - static inline bool decodePrepared(const Prepared& prepared, const size_t ix, T* out, const uint32_t outDim) - { - return decodePreparedComponents(prepared, ix, out, outDim); - } + static inline bool decodePrepared(const Prepared& prepared, const size_t ix, T* out, const uint32_t outDim) { return decodePreparedComponents(prepared, ix, out, outDim); } }; } #endif diff --git a/src/nbl/asset/interchange/impl/SBinaryData.h b/src/nbl/asset/interchange/impl/SBinaryData.h index 13e64a84df..2a0259bc87 100644 --- a/src/nbl/asset/interchange/impl/SBinaryData.h +++ b/src/nbl/asset/interchange/impl/SBinaryData.h @@ -9,13 +9,7 @@ namespace nbl::asset::impl struct BinaryData { template - static inline T byteswap(const T value) - { - auto retval = value; - const auto* it = reinterpret_cast(&value); - std::reverse_copy(it, it + sizeof(retval), reinterpret_cast(&retval)); - return retval; - } + static inline T byteswap(const T value) { auto retval = value; const auto* it = reinterpret_cast(&value); std::reverse_copy(it, it + sizeof(retval), reinterpret_cast(&retval)); return retval; } template static inline T loadUnaligned(const void* src, const bool swapEndian = false) { @@ -26,16 +20,9 @@ struct BinaryData return swapEndian ? byteswap(value) : value; } template - static inline void storeUnaligned(void* dst, const T& value) - { - std::memcpy(dst, &value, sizeof(value)); - } + static inline void storeUnaligned(void* dst, const T& value) { std::memcpy(dst, &value, sizeof(value)); } template - static inline void storeUnalignedAdvance(uint8_t*& dst, const T& value) - { - storeUnaligned(dst, value); - dst += sizeof(value); - } + static inline void storeUnalignedAdvance(uint8_t*& dst, const T& value) { storeUnaligned(dst, value); dst += sizeof(value); } }; } #endif diff --git a/src/nbl/asset/interchange/impl/SFileAccess.h b/src/nbl/asset/interchange/impl/SFileAccess.h index 2dfa24f65b..2c2d990e12 100644 --- a/src/nbl/asset/interchange/impl/SFileAccess.h +++ b/src/nbl/asset/interchange/impl/SFileAccess.h @@ -44,11 +44,9 @@ class SFileAccess { if (wasMapped) *wasMapped = false; - if (ioPlan.strategy == SResolvedFileIOPolicy::Strategy::WholeFile) - { + if (ioPlan.strategy == SResolvedFileIOPolicy::Strategy::WholeFile) { const auto* mapped = reinterpret_cast(static_cast(file)->getMappedPointer()); - if (mapped) - { + if (mapped) { if (ioTelemetry) ioTelemetry->account(bytes); if (wasMapped) @@ -87,12 +85,7 @@ class SLoadSession inline const uint8_t* readRange(const size_t offset, const size_t bytes, core::vector& storage, SFileReadTelemetry* const ioTelemetry = nullptr, const bool zeroTerminate = false) const { return SFileAccess::readRange(file, offset, bytes, storage, ioPlan, ioTelemetry, zeroTerminate); } inline const uint8_t* mapOrReadWholeFile(core::vector& storage, SFileReadTelemetry* const ioTelemetry = nullptr, bool* const wasMapped = nullptr, const bool zeroTerminate = false) const { return SFileAccess::mapOrReadWholeFile(file, static_cast(payloadBytes), storage, ioPlan, ioTelemetry, wasMapped, zeroTerminate); } template - inline void logTinyIO(Logger& logger, const Telemetry& telemetry, const char* const opName = "reads") const - { - if (!requestedPolicy) - return; - SFileAccess::logTinyIO(logger, owner, fileName.c_str(), telemetry, payloadBytes, *requestedPolicy, opName); - } + inline void logTinyIO(Logger& logger, const Telemetry& telemetry, const char* const opName = "reads") const { if (requestedPolicy) SFileAccess::logTinyIO(logger, owner, fileName.c_str(), telemetry, payloadBytes, *requestedPolicy, opName); } }; } #endif diff --git a/src/nbl/asset/interchange/impl/STextParse.h b/src/nbl/asset/interchange/impl/STextParse.h index 21e5ee4e30..0ff935e7b7 100644 --- a/src/nbl/asset/interchange/impl/STextParse.h +++ b/src/nbl/asset/interchange/impl/STextParse.h @@ -38,24 +38,10 @@ struct TextParse template static inline bool parseExactNumber(const std::string_view token, T& out) { return parseExactNumber(token.data(), token.data() + token.size(), out); } template - static inline bool parseNonZeroNumber(const char*& ptr, const char* const end, T& out) - { - return parseNumber(ptr, end, out) && out != static_cast(0); - } - static inline bool isInlineWhitespace(const char c) - { - return c == ' ' || c == '\t' || c == '\v' || c == '\f'; - } - static inline void skipInlineWhitespace(const char*& ptr, const char* const end) - { - while (ptr < end && isInlineWhitespace(*ptr)) - ++ptr; - } - static inline void skipWhitespace(const char*& ptr, const char* const end) - { - while (ptr < end && core::isspace(*ptr)) - ++ptr; - } + static inline bool parseNonZeroNumber(const char*& ptr, const char* const end, T& out) { return parseNumber(ptr, end, out) && out != static_cast(0); } + static inline bool isInlineWhitespace(const char c) { return c == ' ' || c == '\t' || c == '\v' || c == '\f'; } + static inline void skipInlineWhitespace(const char*& ptr, const char* const end) { while (ptr < end && isInlineWhitespace(*ptr)) ++ptr; } + static inline void skipWhitespace(const char*& ptr, const char* const end) { while (ptr < end && core::isspace(*ptr)) ++ptr; } static inline std::string_view trimWhitespace(std::string_view token) { while (!token.empty() && core::isspace(token.front())) From 6c01eaf162a951bca5d7e2808d14eaef0b3330e5 Mon Sep 17 00:00:00 2001 From: Arkadiusz Lachowicz Date: Sun, 8 Mar 2026 17:12:33 +0100 Subject: [PATCH 084/118] Tighten shared interchange helpers --- include/nbl/asset/interchange/SFileIOPolicy.h | 4 ---- .../nbl/asset/interchange/SInterchangeIO.h | 15 +++++-------- .../asset/interchange/SLoaderRuntimeTuning.h | 2 +- .../asset/interchange/SGeometryViewDecode.h | 22 +++++-------------- src/nbl/asset/interchange/impl/SFileAccess.h | 18 ++++++--------- src/nbl/asset/interchange/impl/STextParse.h | 9 +++----- 6 files changed, 21 insertions(+), 49 deletions(-) diff --git a/include/nbl/asset/interchange/SFileIOPolicy.h b/include/nbl/asset/interchange/SFileIOPolicy.h index 25d6fb2b0e..760890710f 100644 --- a/include/nbl/asset/interchange/SFileIOPolicy.h +++ b/include/nbl/asset/interchange/SFileIOPolicy.h @@ -69,13 +69,9 @@ struct SFileIOPolicy uint8_t chunkSizeLog2 = static_cast(std::bit_width(DEFAULT_CHUNK_SIZE_BYTES) - 1u); uint8_t maxStagingLog2 = static_cast(std::bit_width(DEFAULT_MAX_STAGING_BYTES) - 1u); SRuntimeTuning runtimeTuning = {}; - inline constexpr bool strict() const { return flags.hasAnyFlag(EF_STRICT_BIT); } - inline constexpr uint64_t wholeFileThresholdBytes() const { return bytesFromLog2(wholeFileThresholdLog2, MIN_CHUNK_SIZE_LOG2); } - inline constexpr uint64_t chunkSizeBytes() const { return bytesFromLog2(chunkSizeLog2, MIN_CHUNK_SIZE_LOG2); } - inline constexpr uint64_t maxStagingBytes() const { return bytesFromLog2(maxStagingLog2, MIN_CHUNK_SIZE_LOG2); } }; struct SResolvedFileIOPolicy diff --git a/include/nbl/asset/interchange/SInterchangeIO.h b/include/nbl/asset/interchange/SInterchangeIO.h index c853be23fd..9203c3eca6 100644 --- a/include/nbl/asset/interchange/SInterchangeIO.h +++ b/include/nbl/asset/interchange/SInterchangeIO.h @@ -31,14 +31,11 @@ class SInterchangeIO static inline bool isTinyIOTelemetryLikely(const STelemetry& telemetry, const uint64_t payloadBytes, const SFileIOPolicy& ioPolicy) { return isTinyIOTelemetryLikely(telemetry, payloadBytes, ioPolicy.runtimeTuning.tinyIoPayloadThresholdBytes, ioPolicy.runtimeTuning.tinyIoAvgBytesThreshold, ioPolicy.runtimeTuning.tinyIoMinBytesThreshold, ioPolicy.runtimeTuning.tinyIoMinCallCount); } static inline bool readFileExact(system::IFile* file, void* dst, const size_t offset, const size_t bytes, SReadTelemetry* ioTelemetry = nullptr) { - if (!file || (!dst && bytes != 0ull)) - return false; - if (bytes == 0ull) - return true; + if (!file || (!dst && bytes != 0ull)) return false; + if (bytes == 0ull) return true; system::IFile::success_t success; file->read(success, dst, offset, bytes); - if (success && ioTelemetry) - ioTelemetry->account(success.getBytesProcessed()); + if (success && ioTelemetry) ioTelemetry->account(success.getBytesProcessed()); return success && success.getBytesProcessed() == bytes; } template> @@ -83,13 +80,11 @@ class SInterchangeIO struct SBufferRange { const void* data = nullptr; size_t byteCount = 0ull; }; static inline bool writeBuffersWithPolicyAtOffset(system::IFile* file, const SResolvedFileIOPolicy& ioPlan, const std::span buffers, size_t& fileOffset, SWriteTelemetry* ioTelemetry = nullptr) { - if (!file) - return false; + if (!file) return false; const uint64_t chunkSizeBytes = ioPlan.chunkSizeBytes(); for (const auto& buffer : buffers) { - if (!buffer.data && buffer.byteCount != 0ull) - return false; + if (!buffer.data && buffer.byteCount != 0ull) return false; if (buffer.byteCount == 0ull) continue; const auto* data = reinterpret_cast(buffer.data); diff --git a/include/nbl/asset/interchange/SLoaderRuntimeTuning.h b/include/nbl/asset/interchange/SLoaderRuntimeTuning.h index 74a6f668ce..09b2ea9f9e 100644 --- a/include/nbl/asset/interchange/SLoaderRuntimeTuning.h +++ b/include/nbl/asset/interchange/SLoaderRuntimeTuning.h @@ -97,7 +97,7 @@ struct SLoaderRuntimeTuner samples.push_back(elapsedNs); } if (samples.empty()) - return SBenchmarkSampleStats{}; + return {}; std::sort(samples.begin(), samples.end()); stats.minNs = samples.front(); stats.maxNs = samples.back(); diff --git a/src/nbl/asset/interchange/SGeometryViewDecode.h b/src/nbl/asset/interchange/SGeometryViewDecode.h index 5522605dfd..c596c0d78e 100644 --- a/src/nbl/asset/interchange/SGeometryViewDecode.h +++ b/src/nbl/asset/interchange/SGeometryViewDecode.h @@ -25,26 +25,17 @@ class SGeometryViewDecode hlsl::shapes::AABB<4, hlsl::float64_t> range = hlsl::shapes::AABB<4, hlsl::float64_t>::create(); inline explicit operator bool() const { return data != nullptr && stride != 0u && format != EF_UNKNOWN && channels != 0u; } template - inline bool decode(const size_t ix, std::array& out) const - { - out.fill(T{}); - return SGeometryViewDecode::template decodePrepared(*this, ix, out.data(), static_cast(N)); - } + inline bool decode(const size_t ix, std::array& out) const { out.fill(T{}); return SGeometryViewDecode::template decodePrepared(*this, ix, out.data(), static_cast(N)); } template requires hlsl::concepts::Vector - inline bool decode(const size_t ix, V& out) const - { - out = V{}; - return SGeometryViewDecode::template decodePrepared(*this, ix, out); - } + inline bool decode(const size_t ix, V& out) const { out = V{}; return SGeometryViewDecode::template decodePrepared(*this, ix, out); } }; template static inline Prepared prepare(const ICPUPolygonGeometry::SDataView& view) { Prepared retval = {}; if (!view.composed.isFormatted()) - return retval; - retval.data = reinterpret_cast(view.getPointer()); - if (!retval.data) + return {}; + if (!(retval.data = reinterpret_cast(view.getPointer()))) return {}; retval.stride = view.composed.getStride(); retval.format = view.composed.format; @@ -55,10 +46,7 @@ class SGeometryViewDecode return retval; } template - static inline bool decodeElement(const ICPUPolygonGeometry::SDataView& view, const size_t ix, Out& out) - { - return prepare(view).decode(ix, out); - } + static inline bool decodeElement(const ICPUPolygonGeometry::SDataView& view, const size_t ix, Out& out) { return prepare(view).decode(ix, out); } private: template static inline bool decodePreparedComponents(const Prepared& prepared, const size_t ix, T* out, const uint32_t outDim) diff --git a/src/nbl/asset/interchange/impl/SFileAccess.h b/src/nbl/asset/interchange/impl/SFileAccess.h index 2c2d990e12..239cd557ec 100644 --- a/src/nbl/asset/interchange/impl/SFileAccess.h +++ b/src/nbl/asset/interchange/impl/SFileAccess.h @@ -25,11 +25,7 @@ class SFileAccess { if (!SInterchangeIO::isTinyIOTelemetryLikely(telemetry, payloadBytes, ioPolicy)) return; - logger.log("%s tiny-io guard: file=%s %s=%llu min=%llu avg=%llu", - system::ILogger::ELL_WARNING, owner, fileName, opName, - static_cast(telemetry.callCount), - static_cast(telemetry.getMinOrZero()), - static_cast(telemetry.getAvgOrZero())); + logger.log("%s tiny-io guard: file=%s %s=%llu min=%llu avg=%llu", system::ILogger::ELL_WARNING, owner, fileName, opName, static_cast(telemetry.callCount), static_cast(telemetry.getMinOrZero()), static_cast(telemetry.getAvgOrZero())); } static inline const uint8_t* readRange(system::IFile* file, const size_t offset, const size_t bytes, core::vector& storage, const SResolvedFileIOPolicy& ioPlan, SFileReadTelemetry* ioTelemetry = nullptr, const bool zeroTerminate = false) { @@ -44,13 +40,13 @@ class SFileAccess { if (wasMapped) *wasMapped = false; - if (ioPlan.strategy == SResolvedFileIOPolicy::Strategy::WholeFile) { + if (ioPlan.strategy == SResolvedFileIOPolicy::Strategy::WholeFile) + { const auto* mapped = reinterpret_cast(static_cast(file)->getMappedPointer()); - if (mapped) { - if (ioTelemetry) - ioTelemetry->account(bytes); - if (wasMapped) - *wasMapped = true; + if (mapped) + { + if (ioTelemetry) ioTelemetry->account(bytes); + if (wasMapped) *wasMapped = true; return mapped; } } diff --git a/src/nbl/asset/interchange/impl/STextParse.h b/src/nbl/asset/interchange/impl/STextParse.h index 0ff935e7b7..434c5adbf4 100644 --- a/src/nbl/asset/interchange/impl/STextParse.h +++ b/src/nbl/asset/interchange/impl/STextParse.h @@ -44,10 +44,8 @@ struct TextParse static inline void skipWhitespace(const char*& ptr, const char* const end) { while (ptr < end && core::isspace(*ptr)) ++ptr; } static inline std::string_view trimWhitespace(std::string_view token) { - while (!token.empty() && core::isspace(token.front())) - token.remove_prefix(1ull); - while (!token.empty() && core::isspace(token.back())) - token.remove_suffix(1ull); + while (!token.empty() && core::isspace(token.front())) token.remove_prefix(1ull); + while (!token.empty() && core::isspace(token.back())) token.remove_suffix(1ull); return token; } static inline std::optional readToken(const char*& cursor, const char* const end) @@ -59,8 +57,7 @@ struct TextParse while (tokenEnd < end && !core::isspace(*tokenEnd)) ++tokenEnd; const std::string_view token(cursor, static_cast(tokenEnd - cursor)); - cursor = tokenEnd; - return token; + return cursor = tokenEnd, token; } }; } From e319b7522652765aee089f0aa072497270f87760 Mon Sep 17 00:00:00 2001 From: Arkadiusz Lachowicz Date: Sun, 8 Mar 2026 17:38:46 +0100 Subject: [PATCH 085/118] Restore helper comments and spacing --- include/nbl/asset/interchange/IAssetLoader.h | 9 --- include/nbl/asset/interchange/IAssetWriter.h | 8 -- .../nbl/asset/interchange/IGeometryLoader.h | 8 -- include/nbl/asset/interchange/ISceneWriter.h | 9 --- include/nbl/asset/interchange/SFileIOPolicy.h | 80 ++++++++++--------- .../nbl/asset/interchange/SInterchangeIO.h | 23 +++--- .../asset/interchange/SLoaderRuntimeTuning.h | 25 ++++-- 7 files changed, 70 insertions(+), 92 deletions(-) diff --git a/include/nbl/asset/interchange/IAssetLoader.h b/include/nbl/asset/interchange/IAssetLoader.h index 42a1ecc855..9ba1e5e14a 100644 --- a/include/nbl/asset/interchange/IAssetLoader.h +++ b/include/nbl/asset/interchange/IAssetLoader.h @@ -3,23 +3,15 @@ // For conditions of distribution and use, see copyright notice in nabla.h #ifndef _NBL_ASSET_I_ASSET_LOADER_H_INCLUDED_ #define _NBL_ASSET_I_ASSET_LOADER_H_INCLUDED_ - - #include "nbl/system/declarations.h" - #include "nbl/system/ISystem.h" #include "nbl/system/ILogger.h" - #include "nbl/core/util/bitflag.h" - #include "nbl/asset/interchange/SAssetBundle.h" #include "nbl/asset/interchange/SFileIOPolicy.h" #include "nbl/asset/utils/CGeometryCreator.h" - - namespace nbl::asset { - class CPolygonGeometryManipulator; //! A class automating process of loading Assets from resources, eg. files @@ -62,7 +54,6 @@ class CPolygonGeometryManipulator; @see IAssetManager @see IAssetWriter */ - class NBL_API2 IAssetLoader : public virtual core::IReferenceCounted { public: diff --git a/include/nbl/asset/interchange/IAssetWriter.h b/include/nbl/asset/interchange/IAssetWriter.h index 4a4734fedb..2c4ff6bddf 100644 --- a/include/nbl/asset/interchange/IAssetWriter.h +++ b/include/nbl/asset/interchange/IAssetWriter.h @@ -3,18 +3,12 @@ // For conditions of distribution and use, see copyright notice in nabla.h #ifndef _NBL_ASSET_I_ASSET_WRITER_H_INCLUDED_ #define _NBL_ASSET_I_ASSET_WRITER_H_INCLUDED_ - - #include "nbl/system/IFile.h" #include "nbl/system/ILogger.h" - #include "nbl/asset/IAsset.h" #include "nbl/asset/interchange/SFileIOPolicy.h" - - namespace nbl::asset { - //! Writing flags /** They have an impact on writing (saving) an Asset. @@ -119,9 +113,7 @@ class IAssetWriter : public virtual core::IReferenceCounted const SAssetWriteParams params; system::IFile* outputFile; }; - public: - //! Returns an array of string literals terminated by nullptr virtual const char** getAssociatedFileExtensions() const = 0; diff --git a/include/nbl/asset/interchange/IGeometryLoader.h b/include/nbl/asset/interchange/IGeometryLoader.h index 90d1caa725..e51f8fb629 100644 --- a/include/nbl/asset/interchange/IGeometryLoader.h +++ b/include/nbl/asset/interchange/IGeometryLoader.h @@ -3,19 +3,13 @@ // For conditions of distribution and use, see copyright notice in nabla.h #ifndef _NBL_ASSET_I_GEOMETRY_LOADER_H_INCLUDED_ #define _NBL_ASSET_I_GEOMETRY_LOADER_H_INCLUDED_ - - #include "nbl/core/declarations.h" - #include "nbl/asset/ICPUPolygonGeometry.h" #include "nbl/asset/interchange/IAssetLoader.h" #include "nbl/asset/interchange/IImageAssetHandlerBase.h" #include "nbl/asset/utils/CGeometryManipulator.h" - - namespace nbl::asset { - class IGeometryLoader : public IAssetLoader { public: @@ -96,7 +90,5 @@ class IGeometryLoader : public IAssetLoader private: }; - } - #endif diff --git a/include/nbl/asset/interchange/ISceneWriter.h b/include/nbl/asset/interchange/ISceneWriter.h index 897a592dbc..4ca658c64e 100644 --- a/include/nbl/asset/interchange/ISceneWriter.h +++ b/include/nbl/asset/interchange/ISceneWriter.h @@ -3,27 +3,18 @@ // For conditions of distribution and use, see copyright notice in nabla.h #ifndef _NBL_ASSET_I_SCENE_WRITER_H_INCLUDED_ #define _NBL_ASSET_I_SCENE_WRITER_H_INCLUDED_ - - #include "nbl/core/declarations.h" - #include "nbl/asset/ICPUScene.h" #include "nbl/asset/interchange/IAssetWriter.h" - - namespace nbl::asset { - class ISceneWriter : public IAssetWriter { public: virtual inline uint64_t getSupportedAssetTypesBitfield() const override { return IAsset::ET_SCENE; } - protected: ISceneWriter() = default; virtual ~ISceneWriter() = default; }; - } - #endif diff --git a/include/nbl/asset/interchange/SFileIOPolicy.h b/include/nbl/asset/interchange/SFileIOPolicy.h index 760890710f..8131210156 100644 --- a/include/nbl/asset/interchange/SFileIOPolicy.h +++ b/include/nbl/asset/interchange/SFileIOPolicy.h @@ -14,47 +14,49 @@ namespace nbl::asset { enum class EFileIOStrategy : uint8_t { - Invalid = 0u, - Auto, - WholeFile, - Chunked + Invalid = 0u, // Sentinel used when strategy resolution fails or the value is uninitialized. + Auto, // Pick whole-file or chunked dynamically based on file size and policy limits. + WholeFile, // Force whole-file strategy. May fallback when not feasible unless strict=true. + Chunked // Force chunked strategy. }; -struct SFileIOPolicy +struct SFileIOPolicy // Requested IO policy shared by loaders, writers, and hash stages before file constraints are resolved. { - struct SRuntimeTuning + struct SRuntimeTuning // Runtime tuning knobs shared by loader parallelism and IO anomaly diagnostics. { - enum class Mode : uint8_t { Sequential, None = Sequential, Heuristic, Hybrid }; - Mode mode = Mode::Heuristic; - float maxOverheadRatio = 0.05f; - float samplingBudgetRatio = 0.05f; - float minExpectedGainRatio = 0.03f; - uint16_t maxWorkers = 0u; - uint8_t workerHeadroom = 2u; - uint8_t samplingMaxCandidates = 4u; - uint8_t samplingPasses = 1u; - uint64_t samplingMinWorkUnits = 0ull; - uint8_t targetChunksPerWorker = 4u; - uint8_t hashTaskTargetChunksPerWorker = 1u; - uint64_t hashInlineThresholdBytes = 1ull << 20; - uint64_t minSampleBytes = 4ull << 10; - uint64_t maxSampleBytes = 128ull << 10; - uint64_t tinyIoPayloadThresholdBytes = 1ull << 20; - uint64_t tinyIoAvgBytesThreshold = 1024ull; - uint64_t tinyIoMinBytesThreshold = 64ull; - uint64_t tinyIoMinCallCount = 1024ull; + /* Disable runtime tuning and force sequential execution. Backward-compatible alias for Sequential. Use deterministic heuristics derived from input size and hardware. Use heuristics and optionally refine with lightweight sampling. */ + enum class Mode : uint8_t { Sequential, None = Sequential, Heuristic, Hybrid }; // Runtime tuning strategy for worker/chunk selection. + Mode mode = Mode::Heuristic; // Runtime tuning mode. + float maxOverheadRatio = 0.05f; // Maximum acceptable tuning overhead as a fraction of estimated full workload time. + float samplingBudgetRatio = 0.05f; // Maximum sampling budget as a fraction of estimated full workload time. + float minExpectedGainRatio = 0.03f; // Minimum expected gain required to keep extra workers enabled. + uint16_t maxWorkers = 0u; // Hard cap for worker count. 0 means auto. + uint8_t workerHeadroom = 2u; // Reserved hardware threads not used by the loader. Prevents full CPU saturation. + uint8_t samplingMaxCandidates = 4u; // Maximum number of worker-count candidates tested in hybrid mode. + uint8_t samplingPasses = 1u; // Number of benchmark passes per candidate in hybrid mode. + uint64_t samplingMinWorkUnits = 0ull; // Minimum work units required before hybrid sampling is allowed. 0 means auto. + uint8_t targetChunksPerWorker = 4u; // Target chunk count assigned to each worker for loader stages. + uint8_t hashTaskTargetChunksPerWorker = 1u; // Target chunk count assigned to each worker for hash stages. + uint64_t hashInlineThresholdBytes = 1ull << 20; // Hash inlining threshold. Inputs up to this size prefer inline hash build. + uint64_t minSampleBytes = 4ull << 10; // Lower bound for sampled byte count in hybrid mode. + uint64_t maxSampleBytes = 128ull << 10; // Upper bound for sampled byte count in hybrid mode. + uint64_t tinyIoPayloadThresholdBytes = 1ull << 20; // Payload size threshold for tiny-IO anomaly detection. + uint64_t tinyIoAvgBytesThreshold = 1024ull; // Average operation size threshold for tiny-IO anomaly detection. + uint64_t tinyIoMinBytesThreshold = 64ull; // Minimum operation size threshold for tiny-IO anomaly detection. + uint64_t tinyIoMinCallCount = 1024ull; // Minimum operation count required to report tiny-IO anomaly. }; using Strategy = EFileIOStrategy; enum E_FLAGS : uint8_t { EF_NONE = 0u, EF_STRICT_BIT = 1u << 0u }; - static inline constexpr uint64_t MIN_CHUNK_SIZE_BYTES = 64ull << 10u; + static inline constexpr uint64_t MIN_CHUNK_SIZE_BYTES = 64ull << 10u; // 64 KiB static inline constexpr uint8_t MIN_CHUNK_SIZE_LOG2 = static_cast(std::bit_width(MIN_CHUNK_SIZE_BYTES) - 1u); static inline constexpr uint8_t MAX_BYTE_SIZE_LOG2 = std::numeric_limits::digits - 1u; - static inline constexpr uint64_t DEFAULT_WHOLE_FILE_THRESHOLD_BYTES = 64ull << 20u; - static inline constexpr uint64_t DEFAULT_CHUNK_SIZE_BYTES = 4ull << 20u; - static inline constexpr uint64_t DEFAULT_MAX_STAGING_BYTES = 256ull << 20u; + static inline constexpr uint64_t DEFAULT_WHOLE_FILE_THRESHOLD_BYTES = 64ull << 20u; // 64 MiB + static inline constexpr uint64_t DEFAULT_CHUNK_SIZE_BYTES = 4ull << 20u; // 4 MiB + static inline constexpr uint64_t DEFAULT_MAX_STAGING_BYTES = 256ull << 20u; // 256 MiB + // These defaults are stored and clamped as log2(byte_count), so the source byte values must stay powers of two. static_assert(std::has_single_bit(MIN_CHUNK_SIZE_BYTES)); static_assert(std::has_single_bit(DEFAULT_WHOLE_FILE_THRESHOLD_BYTES)); static_assert(std::has_single_bit(DEFAULT_CHUNK_SIZE_BYTES)); @@ -63,26 +65,26 @@ struct SFileIOPolicy static inline constexpr uint8_t clampBytesLog2(const uint8_t value, const uint8_t minValue = 0u) { return std::clamp(value, minValue, MAX_BYTE_SIZE_LOG2); } static inline constexpr uint64_t bytesFromLog2(const uint8_t value, const uint8_t minValue = 0u) { return 1ull << clampBytesLog2(value, minValue); } - Strategy strategy = Strategy::Auto; - core::bitflag flags = EF_NONE; - uint8_t wholeFileThresholdLog2 = static_cast(std::bit_width(DEFAULT_WHOLE_FILE_THRESHOLD_BYTES) - 1u); - uint8_t chunkSizeLog2 = static_cast(std::bit_width(DEFAULT_CHUNK_SIZE_BYTES) - 1u); - uint8_t maxStagingLog2 = static_cast(std::bit_width(DEFAULT_MAX_STAGING_BYTES) - 1u); - SRuntimeTuning runtimeTuning = {}; + Strategy strategy = Strategy::Auto; // Requested IO strategy. Defaults to Auto. + core::bitflag flags = EF_NONE; // Resolution flags. Defaults to none. + uint8_t wholeFileThresholdLog2 = static_cast(std::bit_width(DEFAULT_WHOLE_FILE_THRESHOLD_BYTES) - 1u); // Maximum payload size allowed for whole-file strategy in auto mode. Defaults to 64 MiB. + uint8_t chunkSizeLog2 = static_cast(std::bit_width(DEFAULT_CHUNK_SIZE_BYTES) - 1u); // Chunk size used by chunked strategy encoded as log2(bytes). Defaults to 4 MiB. + uint8_t maxStagingLog2 = static_cast(std::bit_width(DEFAULT_MAX_STAGING_BYTES) - 1u); // Maximum staging allocation for whole-file strategy encoded as log2(bytes). Defaults to 256 MiB. + SRuntimeTuning runtimeTuning = {}; // Runtime tuning controls used by loaders and hash stages. inline constexpr bool strict() const { return flags.hasAnyFlag(EF_STRICT_BIT); } inline constexpr uint64_t wholeFileThresholdBytes() const { return bytesFromLog2(wholeFileThresholdLog2, MIN_CHUNK_SIZE_LOG2); } inline constexpr uint64_t chunkSizeBytes() const { return bytesFromLog2(chunkSizeLog2, MIN_CHUNK_SIZE_LOG2); } inline constexpr uint64_t maxStagingBytes() const { return bytesFromLog2(maxStagingLog2, MIN_CHUNK_SIZE_LOG2); } }; -struct SResolvedFileIOPolicy +struct SResolvedFileIOPolicy // Resolved IO plan chosen from SFileIOPolicy after considering file size, mapping, and staging limits. { using Strategy = EFileIOStrategy; constexpr SResolvedFileIOPolicy() = default; inline constexpr SResolvedFileIOPolicy(const SFileIOPolicy& policy, const uint64_t byteCount, const bool sizeKnown = true, const bool fileMappable = false) : SResolvedFileIOPolicy(resolve(policy, byteCount, sizeKnown, fileMappable)) {} - Strategy strategy = Strategy::Invalid; - uint8_t chunkSizeLog2 = SFileIOPolicy::MIN_CHUNK_SIZE_LOG2; - const char* reason = "invalid"; + Strategy strategy = Strategy::Invalid; // Effective strategy chosen by resolver. Invalid means strict policy resolution failed. + uint8_t chunkSizeLog2 = SFileIOPolicy::MIN_CHUNK_SIZE_LOG2; // Effective chunk size encoded as log2(bytes). Also set for whole-file for telemetry consistency. + const char* reason = "invalid"; // Resolver reason string used in logs and diagnostics. inline constexpr bool isValid() const { return strategy != Strategy::Invalid; } diff --git a/include/nbl/asset/interchange/SInterchangeIO.h b/include/nbl/asset/interchange/SInterchangeIO.h index 9203c3eca6..ff6426b56a 100644 --- a/include/nbl/asset/interchange/SInterchangeIO.h +++ b/include/nbl/asset/interchange/SInterchangeIO.h @@ -14,13 +14,14 @@ #include namespace nbl::asset { -class SInterchangeIO +class SInterchangeIO // Shared read/write helpers that execute a resolved IO plan and collect simple telemetry. { public: - struct STelemetry { uint64_t callCount = 0ull, totalBytes = 0ull, minBytes = std::numeric_limits::max(); inline void account(const uint64_t bytes) { ++callCount; totalBytes += bytes; if (bytes < minBytes) minBytes = bytes; } inline uint64_t getMinOrZero() const { return callCount ? minBytes : 0ull; } inline uint64_t getAvgOrZero() const { return callCount ? (totalBytes / callCount) : 0ull; } }; + struct STelemetry { uint64_t callCount = 0ull, totalBytes = 0ull, minBytes = std::numeric_limits::max(); inline void account(const uint64_t bytes) { ++callCount; totalBytes += bytes; if (bytes < minBytes) minBytes = bytes; } inline uint64_t getMinOrZero() const { return callCount ? minBytes : 0ull; } inline uint64_t getAvgOrZero() const { return callCount ? (totalBytes / callCount) : 0ull; } }; // Tracks IO call count and byte distribution for tiny-io diagnostics. using SReadTelemetry = STelemetry; using SWriteTelemetry = STelemetry; - static inline bool isTinyIOTelemetryLikely(const STelemetry& telemetry, const uint64_t payloadBytes, const uint64_t bigPayloadThresholdBytes = (1ull << 20), const uint64_t lowAvgBytesThreshold = 1024ull, const uint64_t tinyChunkBytesThreshold = 64ull, const uint64_t tinyChunkCallsThreshold = 1024ull) + /* Default 1 MiB. Default 1 KiB. Default 64 B. Default 1024 calls. */ + static inline bool isTinyIOTelemetryLikely(const STelemetry& telemetry, const uint64_t payloadBytes, const uint64_t bigPayloadThresholdBytes = (1ull << 20), const uint64_t lowAvgBytesThreshold = 1024ull, const uint64_t tinyChunkBytesThreshold = 64ull, const uint64_t tinyChunkCallsThreshold = 1024ull) // Flags large payloads that were served through suspiciously small IO calls. { if (payloadBytes <= bigPayloadThresholdBytes) return false; @@ -28,8 +29,8 @@ class SInterchangeIO const uint64_t avgBytes = telemetry.getAvgOrZero(); return avgBytes < lowAvgBytesThreshold || (minBytes < tinyChunkBytesThreshold && telemetry.callCount > tinyChunkCallsThreshold); } - static inline bool isTinyIOTelemetryLikely(const STelemetry& telemetry, const uint64_t payloadBytes, const SFileIOPolicy& ioPolicy) { return isTinyIOTelemetryLikely(telemetry, payloadBytes, ioPolicy.runtimeTuning.tinyIoPayloadThresholdBytes, ioPolicy.runtimeTuning.tinyIoAvgBytesThreshold, ioPolicy.runtimeTuning.tinyIoMinBytesThreshold, ioPolicy.runtimeTuning.tinyIoMinCallCount); } - static inline bool readFileExact(system::IFile* file, void* dst, const size_t offset, const size_t bytes, SReadTelemetry* ioTelemetry = nullptr) + static inline bool isTinyIOTelemetryLikely(const STelemetry& telemetry, const uint64_t payloadBytes, const SFileIOPolicy& ioPolicy) { return isTinyIOTelemetryLikely(telemetry, payloadBytes, ioPolicy.runtimeTuning.tinyIoPayloadThresholdBytes, ioPolicy.runtimeTuning.tinyIoAvgBytesThreshold, ioPolicy.runtimeTuning.tinyIoMinBytesThreshold, ioPolicy.runtimeTuning.tinyIoMinCallCount); } // Same tiny-io heuristic but pulls thresholds from the resolved IO policy. + static inline bool readFileExact(system::IFile* file, void* dst, const size_t offset, const size_t bytes, SReadTelemetry* ioTelemetry = nullptr) // Issues one read request and verifies that the full byte count was returned. { if (!file || (!dst && bytes != 0ull)) return false; if (bytes == 0ull) return true; @@ -40,7 +41,7 @@ class SInterchangeIO } template> requires std::same_as> - static inline bool readFileWithPolicy(system::IFile* file, void* dst, const size_t offset, const size_t bytes, const SResolvedFileIOPolicy& ioPlan, SReadTelemetry* ioTelemetry = nullptr, TimeUnit* ioTime = nullptr) + static inline bool readFileWithPolicy(system::IFile* file, void* dst, const size_t offset, const size_t bytes, const SResolvedFileIOPolicy& ioPlan, SReadTelemetry* ioTelemetry = nullptr, TimeUnit* ioTime = nullptr) // Reads a byte range using the resolved whole-file or chunked strategy. When ioTime is non-null it also reports wall time in TimeUnit. Default TimeUnit is milliseconds. { using clock_t = std::chrono::high_resolution_clock; const auto ioStart = ioTime ? clock_t::now() : clock_t::time_point{}; @@ -77,8 +78,8 @@ class SInterchangeIO } } } - struct SBufferRange { const void* data = nullptr; size_t byteCount = 0ull; }; - static inline bool writeBuffersWithPolicyAtOffset(system::IFile* file, const SResolvedFileIOPolicy& ioPlan, const std::span buffers, size_t& fileOffset, SWriteTelemetry* ioTelemetry = nullptr) + struct SBufferRange { const void* data = nullptr; size_t byteCount = 0ull; }; // Describes one contiguous output buffer written as part of a larger stream. + static inline bool writeBuffersWithPolicyAtOffset(system::IFile* file, const SResolvedFileIOPolicy& ioPlan, const std::span buffers, size_t& fileOffset, SWriteTelemetry* ioTelemetry = nullptr) // Writes one or more buffers sequentially at fileOffset and advances it on success. { if (!file) return false; const uint64_t chunkSizeBytes = ioPlan.chunkSizeBytes(); @@ -107,9 +108,9 @@ class SInterchangeIO } return true; } - static inline bool writeBuffersWithPolicy(system::IFile* file, const SResolvedFileIOPolicy& ioPlan, const std::span buffers, SWriteTelemetry* ioTelemetry = nullptr) { size_t fileOffset = 0ull; return writeBuffersWithPolicyAtOffset(file, ioPlan, buffers, fileOffset, ioTelemetry); } - static inline bool writeFileWithPolicyAtOffset(system::IFile* file, const SResolvedFileIOPolicy& ioPlan, const void* data, size_t byteCount, size_t& fileOffset, SWriteTelemetry* ioTelemetry = nullptr) { const SBufferRange buffers[] = {{.data = data, .byteCount = byteCount}}; return writeBuffersWithPolicyAtOffset(file, ioPlan, buffers, fileOffset, ioTelemetry); } - static inline bool writeFileWithPolicy(system::IFile* file, const SResolvedFileIOPolicy& ioPlan, const void* data, size_t byteCount, SWriteTelemetry* ioTelemetry = nullptr) { const SBufferRange buffers[] = {{.data = data, .byteCount = byteCount}}; return writeBuffersWithPolicy(file, ioPlan, buffers, ioTelemetry); } + static inline bool writeBuffersWithPolicy(system::IFile* file, const SResolvedFileIOPolicy& ioPlan, const std::span buffers, SWriteTelemetry* ioTelemetry = nullptr) { size_t fileOffset = 0ull; return writeBuffersWithPolicyAtOffset(file, ioPlan, buffers, fileOffset, ioTelemetry); } // Writes one or more buffers starting from file offset 0. + static inline bool writeFileWithPolicyAtOffset(system::IFile* file, const SResolvedFileIOPolicy& ioPlan, const void* data, size_t byteCount, size_t& fileOffset, SWriteTelemetry* ioTelemetry = nullptr) { const SBufferRange buffers[] = {{.data = data, .byteCount = byteCount}}; return writeBuffersWithPolicyAtOffset(file, ioPlan, buffers, fileOffset, ioTelemetry); } // Single-buffer convenience wrapper over writeBuffersWithPolicyAtOffset. + static inline bool writeFileWithPolicy(system::IFile* file, const SResolvedFileIOPolicy& ioPlan, const void* data, size_t byteCount, SWriteTelemetry* ioTelemetry = nullptr) { const SBufferRange buffers[] = {{.data = data, .byteCount = byteCount}}; return writeBuffersWithPolicy(file, ioPlan, buffers, ioTelemetry); } // Single-buffer convenience wrapper over writeBuffersWithPolicy. }; using SFileIOTelemetry = SInterchangeIO::STelemetry; using SFileReadTelemetry = SInterchangeIO::SReadTelemetry; diff --git a/include/nbl/asset/interchange/SLoaderRuntimeTuning.h b/include/nbl/asset/interchange/SLoaderRuntimeTuning.h index 09b2ea9f9e..b87938a262 100644 --- a/include/nbl/asset/interchange/SLoaderRuntimeTuning.h +++ b/include/nbl/asset/interchange/SLoaderRuntimeTuning.h @@ -17,6 +17,7 @@ namespace nbl::asset { struct SLoaderRuntimeTuningRequest { + /* Input describing one loader or hash stage that needs worker and chunk sizing. Total input bytes for the tuned stage. Total amount of stage work in logical units. Minimum work units assigned to one worker. Minimum input bytes assigned to one worker. Hardware thread count override. 0 means auto-detect. Hard cap for workers for this request. 0 means no extra cap. Preferred chunk count per worker for this stage. 0 means policy default. Minimum work units in one chunk. Maximum work units in one chunk. Pointer to representative sample bytes for hybrid sampling. Number of sample bytes available at sampleData. Sampling pass count override. 0 means policy default. Sampling candidate count override. 0 means policy default. Minimum work units required to allow sampling. 0 means policy or auto value. */ uint64_t inputBytes = 0ull, totalWorkUnits = 0ull, minWorkUnitsPerWorker = 1ull, minBytesPerWorker = 1ull; uint32_t hardwareThreads = 0u, hardMaxWorkers = 0u, targetChunksPerWorker = 0u; uint64_t minChunkWorkUnits = 1ull, maxChunkWorkUnits = std::numeric_limits::max(); @@ -24,20 +25,22 @@ struct SLoaderRuntimeTuningRequest uint64_t sampleBytes = 0ull, sampleMinWorkUnits = 0ull; uint32_t samplePasses = 0u, sampleMaxCandidates = 0u; }; -struct SLoaderRuntimeTuningResult +struct SLoaderRuntimeTuningResult /* Final worker and chunk layout selected for one stage. */ { + /* Selected worker count for the stage. Work units per chunk assigned by tuner. Total chunk count for the stage. */ size_t workerCount = 1ull, chunkCount = 1ull; uint64_t chunkWorkUnits = 1ull; }; -struct SLoaderRuntimeTuner +struct SLoaderRuntimeTuner /* Stateless runtime tuner used by loaders and hash stages to size worker pools and chunking. */ { private: - struct SBenchmarkSampleStats { uint64_t medianNs = 0ull, minNs = 0ull, maxNs = 0ull, totalNs = 0ull; }; + struct SBenchmarkSampleStats { uint64_t medianNs = 0ull, minNs = 0ull, maxNs = 0ull, totalNs = 0ull; }; // Aggregated timings collected while probing one worker-count candidate. public: template requires std::invocable static void dispatchWorkers(const size_t workerCount, Fn&& fn) { + // std::jthread starts execution in its constructor, so emplace_back launches workers 1..N-1 immediately. The current thread runs worker 0 and std::jthread joins automatically when the local vector is destroyed. if (workerCount <= 1ull) return fn(0ull); std::vector workers; @@ -46,11 +49,12 @@ struct SLoaderRuntimeTuner workers.emplace_back([&fn, workerIx]() { fn(workerIx); }); fn(0ull); } - static constexpr uint64_t ceilDiv(const uint64_t numerator, const uint64_t denominator) { return (numerator + denominator - 1ull) / denominator; } + static constexpr uint64_t ceilDiv(const uint64_t numerator, const uint64_t denominator) { return (numerator + denominator - 1ull) / denominator; } // Integer ceil division. Callers must pass a non-zero denominator. template requires std::same_as> static inline TimeUnit benchmarkSample(const uint8_t* const sampleData, const uint64_t sampleBytes, const size_t workerCount, const uint32_t passes) { + // Measures one sampled memory-touch pass configuration and returns aggregate wall time across all passes. if (!sampleData || sampleBytes == 0ull || workerCount == 0ull) return TimeUnit::zero(); const uint32_t passCount = std::max(1u, passes); @@ -81,6 +85,7 @@ struct SLoaderRuntimeTuner } static inline SBenchmarkSampleStats benchmarkSampleStats(const uint8_t* const sampleData, const uint64_t sampleBytes, const size_t workerCount, const uint32_t passes, const uint32_t observations) { + // Warms up once and then collects timing observations for one worker-count candidate. SBenchmarkSampleStats stats = {}; if (!sampleData || sampleBytes == 0ull || workerCount == 0ull) return stats; @@ -107,9 +112,10 @@ struct SLoaderRuntimeTuner stats.medianNs = (samples[samples.size() / 2ull - 1ull] + samples[samples.size() / 2ull]) / 2ull; return stats; } - static inline void appendCandidate(std::vector& dst, const size_t candidate) { if (candidate != 0ull && std::find(dst.begin(), dst.end(), candidate) == dst.end()) dst.push_back(candidate); } + static inline void appendCandidate(std::vector& dst, const size_t candidate) { if (candidate != 0ull && std::find(dst.begin(), dst.end(), candidate) == dst.end()) dst.push_back(candidate); } // Keeps the candidate probe list unique while preserving insertion order. static inline uint64_t resolveSampleBytes(const SFileIOPolicy& ioPolicy, const uint64_t knownInputBytes) { + // Chooses the sample byte budget used by hybrid tuning from the known input size and policy clamps. if (knownInputBytes == 0ull) return 0ull; const uint64_t minSampleBytes = std::max(1ull, ioPolicy.runtimeTuning.minSampleBytes); @@ -119,10 +125,11 @@ struct SLoaderRuntimeTuner const uint64_t adaptive = std::max(knownInputBytes / 64ull, cappedMin); return std::clamp(adaptive, cappedMin, cappedMax); } - static inline bool shouldInlineHashBuild(const SFileIOPolicy& ioPolicy, const uint64_t inputBytes) { return inputBytes <= std::max(1ull, ioPolicy.runtimeTuning.hashInlineThresholdBytes); } - static inline size_t resolveHardwareThreads(const uint32_t requested = 0u) { const size_t hw = requested ? static_cast(requested) : static_cast(std::thread::hardware_concurrency()); return hw ? hw : 1ull; } + static inline bool shouldInlineHashBuild(const SFileIOPolicy& ioPolicy, const uint64_t inputBytes) { return inputBytes <= std::max(1ull, ioPolicy.runtimeTuning.hashInlineThresholdBytes); } // Returns true when the hash build is small enough to stay on the caller thread. + static inline size_t resolveHardwareThreads(const uint32_t requested = 0u) { const size_t hw = requested ? static_cast(requested) : static_cast(std::thread::hardware_concurrency()); return hw ? hw : 1ull; } // Resolves the effective hardware thread count and always returns at least one worker. static inline size_t resolveHardMaxWorkers(const size_t hardwareThreads, const uint32_t workerHeadroom) { + // Applies worker headroom while keeping at least two workers when parallel hardware is available. const size_t hw = std::max(1ull, hardwareThreads), minWorkers = hw >= 2ull ? 2ull : 1ull, headroom = static_cast(workerHeadroom); if (headroom == 0ull) return hw; @@ -132,6 +139,7 @@ struct SLoaderRuntimeTuner } static inline SLoaderRuntimeTuningResult tune(const SFileIOPolicy& ioPolicy, const SLoaderRuntimeTuningRequest& request) { + // Resolves worker and chunk counts for one stage using policy limits plus optional hybrid sampling. using RTMode = SFileIOPolicy::SRuntimeTuning::Mode; SLoaderRuntimeTuningResult result = {}; if (request.totalWorkUnits == 0ull) @@ -184,6 +192,7 @@ struct SLoaderRuntimeTuner { if (request.inputBytes > 0ull) { + // keep probing lightweight: sample fraction scales with input and parallelism const uint64_t sampleDivisor = std::max(4ull, static_cast(heuristicWorkerCount) * static_cast(targetChunksPerWorker)); const uint64_t adaptiveSampleBytes = std::max(1ull, request.inputBytes / sampleDivisor); effectiveSampleBytes = std::min(effectiveSampleBytes, adaptiveSampleBytes); @@ -202,7 +211,7 @@ struct SLoaderRuntimeTuner appendCandidate(candidates, heuristicWorkerCount + 2ull); if (candidates.size() > maxCandidates) candidates.resize(maxCandidates); - const auto heuristicStatsProbe = benchmarkSampleStats(request.sampleData, effectiveSampleBytes, heuristicWorkerCount, samplePasses, 2u); + const auto heuristicStatsProbe = benchmarkSampleStats(request.sampleData, effectiveSampleBytes, heuristicWorkerCount, samplePasses, 2u); // probe heuristic first and only continue when budget can amortize additional probes if (heuristicStatsProbe.medianNs > 0ull) { const double scale = request.inputBytes ? (static_cast(request.inputBytes) / static_cast(effectiveSampleBytes)) : 1.0; From e4278de3b5416b490cae406477d993d42d258f3f Mon Sep 17 00:00:00 2001 From: Arkadiusz Lachowicz Date: Sun, 8 Mar 2026 18:08:12 +0100 Subject: [PATCH 086/118] Improve interchange comment formatting --- .../nbl/asset/interchange/COBJMeshWriter.h | 16 +-- include/nbl/asset/interchange/IAssetLoader.h | 52 ++++++--- include/nbl/asset/interchange/IAssetWriter.h | 14 ++- .../nbl/asset/interchange/IGeometryLoader.h | 7 +- include/nbl/asset/interchange/ISceneWriter.h | 1 + include/nbl/asset/interchange/SFileIOPolicy.h | 108 +++++++++++------- .../asset/interchange/SGeometryContentHash.h | 6 + .../asset/interchange/SGeometryLoaderCommon.h | 3 + .../asset/interchange/SGeometryWriterCommon.h | 23 +++- .../nbl/asset/interchange/SInterchangeIO.h | 60 ++++++++-- .../asset/interchange/SLoaderRuntimeTuning.h | 81 +++++++++---- .../asset/interchange/COBJMeshFileLoader.h | 20 ++-- .../asset/interchange/CPLYMeshFileLoader.h | 8 +- src/nbl/asset/interchange/CPLYMeshWriter.h | 7 +- .../asset/interchange/CSTLMeshFileLoader.h | 11 +- src/nbl/asset/interchange/CSTLMeshWriter.h | 7 +- .../asset/interchange/SGeometryViewDecode.h | 35 +++++- src/nbl/asset/interchange/impl/SBinaryData.h | 8 ++ src/nbl/asset/interchange/impl/SFileAccess.h | 30 ++++- src/nbl/asset/interchange/impl/STextParse.h | 14 +++ 20 files changed, 355 insertions(+), 156 deletions(-) diff --git a/include/nbl/asset/interchange/COBJMeshWriter.h b/include/nbl/asset/interchange/COBJMeshWriter.h index d0a1c0c3dc..7159f7f21c 100644 --- a/include/nbl/asset/interchange/COBJMeshWriter.h +++ b/include/nbl/asset/interchange/COBJMeshWriter.h @@ -3,17 +3,18 @@ // For conditions of distribution and use, see copyright notice in nabla.h #ifndef _NBL_ASSET_OBJ_MESH_WRITER_H_INCLUDED_ #define _NBL_ASSET_OBJ_MESH_WRITER_H_INCLUDED_ - #include "nbl/asset/interchange/ISceneWriter.h" - namespace nbl::asset { -/* +/** Writes OBJ from a single polygon geometry, a geometry collection, or a scene. OBJ itself is still treated here as final flattened geometry data, not as a scene format. - Scene input is accepted only as export input: the writer bakes transforms and serializes all collected polygon geometries into one OBJ stream. - This preserves the final shape but does not try to keep scene-only structure such as hierarchy or instancing. - In other words `ET_SCENE -> OBJ` is supported as flattening, not as round-tripping scene semantics through the OBJ format. + Scene input is accepted only as export input: the writer bakes transforms + and serializes all collected polygon geometries into one OBJ stream. + This preserves the final shape but does not try to keep scene-only structure + such as hierarchy or instancing. + In other words `ET_SCENE -> OBJ` is supported as flattening, + not as round-tripping scene semantics through the OBJ format. */ class COBJMeshWriter : public ISceneWriter { @@ -32,5 +33,4 @@ class COBJMeshWriter : public ISceneWriter }; } // end namespace - -#endif \ No newline at end of file +#endif diff --git a/include/nbl/asset/interchange/IAssetLoader.h b/include/nbl/asset/interchange/IAssetLoader.h index 9ba1e5e14a..548e020267 100644 --- a/include/nbl/asset/interchange/IAssetLoader.h +++ b/include/nbl/asset/interchange/IAssetLoader.h @@ -38,7 +38,9 @@ class CPolygonGeometryManipulator; where 2 bits represent one single level, so we've been on second level). Notice that loading process can be seen as a chain. When you're loading a mesh, it can references a submesh. Submesh can reference graphics pipeline and descriptor set. Descriptor set can reference, for example, textures. - Hierarchy level is distance in such chain/tree from Root Asset (the one you asked for by calling IAssetManager::getAsset()) and the currently loaded Asset (needed by Root Asset). + Hierarchy level is distance in such chain/tree from Root Asset + (the one you asked for by calling IAssetManager::getAsset()) + and the currently loaded Asset (needed by Root Asset). When the class derived from IAssetLoader is added, its put once on an vector and once on an multimap @@ -84,9 +86,14 @@ class NBL_API2 IAssetLoader : public virtual core::IReferenceCounted enum E_LOADER_PARAMETER_FLAGS : uint64_t { ELPF_NONE = 0, //!< default value, it doesn't do anything -//[[deprecated]] ELPF_RIGHT_HANDED_MESHES = 0x1, //!< specifies that a mesh will be flipped in such a way that it'll look correctly in right-handed camera system -//[[deprecated]] ELPF_DONT_COMPILE_GLSL = 0x2, //!< it states that GLSL won't be compiled to SPIR-V if it is loaded or generated - ELPF_LOAD_METADATA_ONLY = 0x4, //!< it forces the loader to not load the entire scene for performance in special cases to fetch metadata. + // [[deprecated]] ELPF_RIGHT_HANDED_MESHES = 0x1 + // specifies that a mesh will be flipped in such a way + // that it'll look correctly in right-handed camera system + // [[deprecated]] ELPF_DONT_COMPILE_GLSL = 0x2 + // it states that GLSL won't be compiled to SPIR-V if it is loaded or generated + + //! it forces the loader to not load the entire scene for performance in special cases to fetch metadata. + ELPF_LOAD_METADATA_ONLY = 0x4, ELPF_DONT_COMPUTE_CONTENT_HASHES = 0x8 //!< opt-out from computing content hashes of produced buffers before returning. }; using loader_flags_t = core::bitflag; @@ -201,7 +208,7 @@ class NBL_API2 IAssetLoader : public virtual core::IReferenceCounted return m_creationParams.polyGeoManip.get(); }*/ - //! + //! Typed convenience wrapper over the untyped `findDefaultAsset` overload. template inline std::pair,const IAssetMetadata*> findDefaultAsset(const std::string& inSearchKey, const SAssetLoadContext& ctx, const uint32_t hierarchyLevel) { @@ -211,7 +218,7 @@ class NBL_API2 IAssetLoader : public virtual core::IReferenceCounted // The only reason these functions are not declared static is to allow stateful overrides - //! + //! Finds one default asset for a key and asset type after cache lookup. inline virtual std::pair,const IAssetMetadata*> findDefaultAsset(const std::string& inSearchKey, const IAsset::E_TYPE assetType, const SAssetLoadContext& ctx, const uint32_t hierarchyLevel) { size_t storageSz = 1ull; @@ -225,7 +232,7 @@ class NBL_API2 IAssetLoader : public virtual core::IReferenceCounted return { chooseDefaultAsset(bundle,ctx),bundle.getMetadata() }; } - //! + //! Chooses one default asset from a bundle returned by cache or load flow. inline virtual core::smart_refctd_ptr chooseDefaultAsset(const SAssetBundle& bundle, const SAssetLoadContext& ctx) { auto contents = bundle.getContents(); @@ -234,11 +241,15 @@ class NBL_API2 IAssetLoader : public virtual core::IReferenceCounted return *contents.begin(); } - //! The most imporant overrides are the ones for caching + //! The most imporant overrides are the ones for caching. virtual SAssetBundle findCachedAsset(const std::string& inSearchKey, const IAsset::E_TYPE* inAssetTypes, const SAssetLoadContext& ctx, const uint32_t hierarchyLevel); - //! Since more then one asset of the same key of the same type can exist, this function is called right after search for cached assets (if anything was found) and decides which of them is relevant. - //! Note: this function can assume that `found` is never empty. + /** + Since more then one asset of the same key of the same type can exist, + this function is called right after search for cached assets + (if anything was found) and decides which of them is relevant. + Note: this function can assume that `found` is never empty. + */ inline virtual SAssetBundle chooseRelevantFromFound(const SAssetBundle* foundBegin, const SAssetBundle* foundEnd, const SAssetLoadContext& ctx, const uint32_t hierarchyLevel) { return *foundBegin; @@ -267,18 +278,27 @@ class NBL_API2 IAssetLoader : public virtual core::IReferenceCounted // otherwise it was already absolute } - //! This function can be used to swap out the actually opened (or unknown unopened file if `inFile` is nullptr) file for a different one. - /** Especially useful if you've used some sort of a fake path and the file won't load from that path just via `io::IFileSystem` . */ + /** + This function can be used to swap out the actually opened + (or unknown unopened file if `inFile` is nullptr) file for a different one. + Especially useful if you've used some sort of a fake path + and the file won't load from that path just via `io::IFileSystem`. + */ inline virtual core::smart_refctd_ptr getLoadFile(system::IFile* inFile, const std::string& supposedFilename, const SAssetLoadContext& ctx, const uint32_t hierarchyLevel) { return core::smart_refctd_ptr(inFile); } //! When you sometimes have different passwords for different assets - /** \param inOutDecrKeyLen expects length of buffer `outDecrKey`, then function writes into it length of actual key. - Write to `outDecrKey` happens only if output value of `inOutDecrKeyLen` is less or equal to input value of `inOutDecrKeyLen`. - \param supposedFilename is the string after modification by getLoadFilename. - \param attempt if decryption or validation algorithm supports reporting failure, you can try different key*/ + /** + \param inOutDecrKeyLen expects length of buffer `outDecrKey`, + then function writes into it length of actual key. + Write to `outDecrKey` happens only if output value of `inOutDecrKeyLen` + is less or equal to input value of `inOutDecrKeyLen`. + \param supposedFilename is the string after modification by getLoadFilename. + \param attempt if decryption or validation algorithm supports reporting failure, + you can try different key + */ inline virtual bool getDecryptionKey(uint8_t* outDecrKey, size_t& inOutDecrKeyLen, const uint32_t attempt, const system::IFile* assetsFile, const std::string& supposedFilename, const std::string& cacheKey, const SAssetLoadContext& ctx, const uint32_t hierarchyLevel) { if (ctx.params.decryptionKeyLen <= inOutDecrKeyLen) diff --git a/include/nbl/asset/interchange/IAssetWriter.h b/include/nbl/asset/interchange/IAssetWriter.h index 2c4ff6bddf..8e02aa7bc7 100644 --- a/include/nbl/asset/interchange/IAssetWriter.h +++ b/include/nbl/asset/interchange/IAssetWriter.h @@ -28,10 +28,11 @@ enum E_WRITER_FLAGS : uint32_t EWF_NONE = 0u, //!< No writer flags (default writer settings) EWF_COMPRESSED = 1u<<0u, //!< Write in a way that consumes less disk space if possible EWF_ENCRYPTED = 1u<<1u, //!< Write encrypted if possible - //! write in binary format rather than text if possible - EWF_BINARY = 1u << 2u, - - //!< specifies the incoming orientation of loaded mesh we want to write. Flipping will be performed if needed in dependency of format extension orientation + EWF_BINARY = 1u << 2u, //!< Write in binary format rather than text if possible + /** + Specifies the incoming orientation of loaded mesh we want to write. + Flipping will be performed if needed in dependency of format extension orientation. + */ EWF_MESH_IS_RIGHT_HANDED = 1u << 3u }; using writer_flags_t = core::bitflag; @@ -160,16 +161,17 @@ class IAssetWriter : public virtual core::IReferenceCounted return ctx.params.encryptionKeyLen; } - //! If the writer has to output multiple files (e.g. write out textures) + //! If the writer has to output multiple files (e.g. write out textures). inline virtual void getExtraFilePaths(std::string& inOutAbsoluteFileWritePath, std::string& inOutPathToRecord, const SAssetWriteContext& ctx, std::pair assetsToWriteAndTheirLevel) {} // do absolutely nothing, no changes to paths + //! Lets the override replace the seeked destination file for one sub-asset. inline virtual system::IFile* getOutputFile(system::IFile* origIntendedOutput, const SAssetWriteContext& ctx, std::pair assetsToWriteAndTheirLeve) { // if you want to return something else, better drop origIntendedOutput return origIntendedOutput; } - //!This function is supposed to give an already seeked file the IAssetWriter can write to + //! This function is supposed to give an already seeked file the IAssetWriter can write to. inline virtual system::IFile* handleWriteError(system::IFile* failingFile, const uint32_t& failedPos, const SAssetWriteContext& ctx, const IAsset* assetToWrite, const uint32_t& hierarchyLevel) { return nullptr; // no handling of fail diff --git a/include/nbl/asset/interchange/IGeometryLoader.h b/include/nbl/asset/interchange/IGeometryLoader.h index e51f8fb629..4f6321d7bc 100644 --- a/include/nbl/asset/interchange/IGeometryLoader.h +++ b/include/nbl/asset/interchange/IGeometryLoader.h @@ -10,6 +10,7 @@ #include "nbl/asset/utils/CGeometryManipulator.h" namespace nbl::asset { +//! Geometry loader base shared by mesh-style interchange formats. class IGeometryLoader : public IAssetLoader { public: @@ -18,6 +19,7 @@ class IGeometryLoader : public IAssetLoader protected: inline IGeometryLoader() {} + //! Creates one geometry data view from caller-owned memory or copied storage. template static inline IGeometry::SDataView createView( const E_FORMAT format, const size_t elementCount, const void* data=nullptr, @@ -45,7 +47,8 @@ class IGeometryLoader : public IAssetLoader } return retval; } - // creates a View from a mapped file + + //! Memory resource that keeps a mapped file alive while adopted geometry views reference it. class CFileMemoryResource final : public core::refctd_memory_resource { public: @@ -65,6 +68,8 @@ class IGeometryLoader : public IAssetLoader protected: core::smart_refctd_ptr m_file; }; + + //! Creates one geometry data view backed directly by a mapped file or by copied file contents. static inline IGeometry::SDataView createView(const E_FORMAT format, const size_t elementCount, core::smart_refctd_ptr&& file, const size_t offsetInFile) { if (auto* const basePtr=reinterpret_cast(file->getMappedPointer()); basePtr) diff --git a/include/nbl/asset/interchange/ISceneWriter.h b/include/nbl/asset/interchange/ISceneWriter.h index 4ca658c64e..94e4548270 100644 --- a/include/nbl/asset/interchange/ISceneWriter.h +++ b/include/nbl/asset/interchange/ISceneWriter.h @@ -8,6 +8,7 @@ #include "nbl/asset/interchange/IAssetWriter.h" namespace nbl::asset { +//! Writer base for exporters whose root asset type is `ET_SCENE`. class ISceneWriter : public IAssetWriter { public: diff --git a/include/nbl/asset/interchange/SFileIOPolicy.h b/include/nbl/asset/interchange/SFileIOPolicy.h index 8131210156..3525f51b5f 100644 --- a/include/nbl/asset/interchange/SFileIOPolicy.h +++ b/include/nbl/asset/interchange/SFileIOPolicy.h @@ -12,51 +12,63 @@ #include namespace nbl::asset { +//! Requested IO strategy selected before file size and mapping constraints are resolved. enum class EFileIOStrategy : uint8_t { - Invalid = 0u, // Sentinel used when strategy resolution fails or the value is uninitialized. - Auto, // Pick whole-file or chunked dynamically based on file size and policy limits. - WholeFile, // Force whole-file strategy. May fallback when not feasible unless strict=true. - Chunked // Force chunked strategy. + Invalid = 0u, //!< Sentinel used when strategy resolution fails or the value is uninitialized. + Auto, //!< Pick whole-file or chunked dynamically based on file size and policy limits. + WholeFile, //!< Force whole-file strategy. May fallback when not feasible unless strict=true. + Chunked //!< Force chunked strategy. }; -struct SFileIOPolicy // Requested IO policy shared by loaders, writers, and hash stages before file constraints are resolved. + +//! Requested IO policy shared by loaders, writers, and hash stages before file constraints are resolved. +struct SFileIOPolicy { - struct SRuntimeTuning // Runtime tuning knobs shared by loader parallelism and IO anomaly diagnostics. + //! Runtime tuning knobs shared by loader parallelism and IO anomaly diagnostics. + struct SRuntimeTuning { - /* Disable runtime tuning and force sequential execution. Backward-compatible alias for Sequential. Use deterministic heuristics derived from input size and hardware. Use heuristics and optionally refine with lightweight sampling. */ - enum class Mode : uint8_t { Sequential, None = Sequential, Heuristic, Hybrid }; // Runtime tuning strategy for worker/chunk selection. - Mode mode = Mode::Heuristic; // Runtime tuning mode. - float maxOverheadRatio = 0.05f; // Maximum acceptable tuning overhead as a fraction of estimated full workload time. - float samplingBudgetRatio = 0.05f; // Maximum sampling budget as a fraction of estimated full workload time. - float minExpectedGainRatio = 0.03f; // Minimum expected gain required to keep extra workers enabled. - uint16_t maxWorkers = 0u; // Hard cap for worker count. 0 means auto. - uint8_t workerHeadroom = 2u; // Reserved hardware threads not used by the loader. Prevents full CPU saturation. - uint8_t samplingMaxCandidates = 4u; // Maximum number of worker-count candidates tested in hybrid mode. - uint8_t samplingPasses = 1u; // Number of benchmark passes per candidate in hybrid mode. - uint64_t samplingMinWorkUnits = 0ull; // Minimum work units required before hybrid sampling is allowed. 0 means auto. - uint8_t targetChunksPerWorker = 4u; // Target chunk count assigned to each worker for loader stages. - uint8_t hashTaskTargetChunksPerWorker = 1u; // Target chunk count assigned to each worker for hash stages. - uint64_t hashInlineThresholdBytes = 1ull << 20; // Hash inlining threshold. Inputs up to this size prefer inline hash build. - uint64_t minSampleBytes = 4ull << 10; // Lower bound for sampled byte count in hybrid mode. - uint64_t maxSampleBytes = 128ull << 10; // Upper bound for sampled byte count in hybrid mode. - uint64_t tinyIoPayloadThresholdBytes = 1ull << 20; // Payload size threshold for tiny-IO anomaly detection. - uint64_t tinyIoAvgBytesThreshold = 1024ull; // Average operation size threshold for tiny-IO anomaly detection. - uint64_t tinyIoMinBytesThreshold = 64ull; // Minimum operation size threshold for tiny-IO anomaly detection. - uint64_t tinyIoMinCallCount = 1024ull; // Minimum operation count required to report tiny-IO anomaly. + //! Runtime tuning strategy for worker/chunk selection. + enum class Mode : uint8_t + { + Sequential, //!< Disable runtime tuning and force sequential execution. + None = Sequential, //!< Backward-compatible alias for Sequential. + Heuristic, //!< Use deterministic heuristics derived from input size and hardware. + Hybrid //!< Use heuristics and optionally refine with lightweight sampling. + }; + + Mode mode = Mode::Heuristic; //!< Runtime tuning mode. + float maxOverheadRatio = 0.05f; //!< Maximum acceptable tuning overhead as a fraction of estimated full workload time. + float samplingBudgetRatio = 0.05f; //!< Maximum sampling budget as a fraction of estimated full workload time. + float minExpectedGainRatio = 0.03f; //!< Minimum expected gain required to keep extra workers enabled. + uint16_t maxWorkers = 0u; //!< Hard cap for worker count. 0 means auto. + uint8_t workerHeadroom = 2u; //!< Reserved hardware threads not used by the loader. Prevents full CPU saturation. + uint8_t samplingMaxCandidates = 4u; //!< Maximum number of worker-count candidates tested in hybrid mode. + uint8_t samplingPasses = 1u; //!< Number of benchmark passes per candidate in hybrid mode. + uint64_t samplingMinWorkUnits = 0ull; //!< Minimum work units required before hybrid sampling is allowed. 0 means auto. + uint8_t targetChunksPerWorker = 4u; //!< Target chunk count assigned to each worker for loader stages. + uint8_t hashTaskTargetChunksPerWorker = 1u; //!< Target chunk count assigned to each worker for hash stages. + uint64_t hashInlineThresholdBytes = 1ull << 20; //!< Hash inlining threshold. Inputs up to this size prefer inline hash build. + uint64_t minSampleBytes = 4ull << 10; //!< Lower bound for sampled byte count in hybrid mode. + uint64_t maxSampleBytes = 128ull << 10; //!< Upper bound for sampled byte count in hybrid mode. + uint64_t tinyIoPayloadThresholdBytes = 1ull << 20; //!< Payload size threshold for tiny-IO anomaly detection. + uint64_t tinyIoAvgBytesThreshold = 1024ull; //!< Average operation size threshold for tiny-IO anomaly detection. + uint64_t tinyIoMinBytesThreshold = 64ull; //!< Minimum operation size threshold for tiny-IO anomaly detection. + uint64_t tinyIoMinCallCount = 1024ull; //!< Minimum operation count required to report tiny-IO anomaly. }; using Strategy = EFileIOStrategy; + //! Extra resolution flags affecting fallback behavior. enum E_FLAGS : uint8_t { EF_NONE = 0u, EF_STRICT_BIT = 1u << 0u }; - static inline constexpr uint64_t MIN_CHUNK_SIZE_BYTES = 64ull << 10u; // 64 KiB + static inline constexpr uint64_t MIN_CHUNK_SIZE_BYTES = 64ull << 10u; //!< 64 KiB. static inline constexpr uint8_t MIN_CHUNK_SIZE_LOG2 = static_cast(std::bit_width(MIN_CHUNK_SIZE_BYTES) - 1u); static inline constexpr uint8_t MAX_BYTE_SIZE_LOG2 = std::numeric_limits::digits - 1u; - static inline constexpr uint64_t DEFAULT_WHOLE_FILE_THRESHOLD_BYTES = 64ull << 20u; // 64 MiB - static inline constexpr uint64_t DEFAULT_CHUNK_SIZE_BYTES = 4ull << 20u; // 4 MiB - static inline constexpr uint64_t DEFAULT_MAX_STAGING_BYTES = 256ull << 20u; // 256 MiB + static inline constexpr uint64_t DEFAULT_WHOLE_FILE_THRESHOLD_BYTES = 64ull << 20u; //!< 64 MiB. + static inline constexpr uint64_t DEFAULT_CHUNK_SIZE_BYTES = 4ull << 20u; //!< 4 MiB. + static inline constexpr uint64_t DEFAULT_MAX_STAGING_BYTES = 256ull << 20u; //!< 256 MiB. - // These defaults are stored and clamped as log2(byte_count), so the source byte values must stay powers of two. + //! These defaults are stored and clamped as log2(byte_count), so the source byte values must stay powers of two. static_assert(std::has_single_bit(MIN_CHUNK_SIZE_BYTES)); static_assert(std::has_single_bit(DEFAULT_WHOLE_FILE_THRESHOLD_BYTES)); static_assert(std::has_single_bit(DEFAULT_CHUNK_SIZE_BYTES)); @@ -65,26 +77,40 @@ struct SFileIOPolicy // Requested IO policy shared by loaders, writers, and hash static inline constexpr uint8_t clampBytesLog2(const uint8_t value, const uint8_t minValue = 0u) { return std::clamp(value, minValue, MAX_BYTE_SIZE_LOG2); } static inline constexpr uint64_t bytesFromLog2(const uint8_t value, const uint8_t minValue = 0u) { return 1ull << clampBytesLog2(value, minValue); } - Strategy strategy = Strategy::Auto; // Requested IO strategy. Defaults to Auto. - core::bitflag flags = EF_NONE; // Resolution flags. Defaults to none. - uint8_t wholeFileThresholdLog2 = static_cast(std::bit_width(DEFAULT_WHOLE_FILE_THRESHOLD_BYTES) - 1u); // Maximum payload size allowed for whole-file strategy in auto mode. Defaults to 64 MiB. - uint8_t chunkSizeLog2 = static_cast(std::bit_width(DEFAULT_CHUNK_SIZE_BYTES) - 1u); // Chunk size used by chunked strategy encoded as log2(bytes). Defaults to 4 MiB. - uint8_t maxStagingLog2 = static_cast(std::bit_width(DEFAULT_MAX_STAGING_BYTES) - 1u); // Maximum staging allocation for whole-file strategy encoded as log2(bytes). Defaults to 256 MiB. - SRuntimeTuning runtimeTuning = {}; // Runtime tuning controls used by loaders and hash stages. + + Strategy strategy = Strategy::Auto; //!< Requested IO strategy. Defaults to Auto. + core::bitflag flags = EF_NONE; //!< Resolution flags. Defaults to none. + + //! Maximum payload size allowed for whole-file strategy in auto mode. Defaults to 64 MiB. + uint8_t wholeFileThresholdLog2 = static_cast(std::bit_width(DEFAULT_WHOLE_FILE_THRESHOLD_BYTES) - 1u); + + //! Chunk size used by chunked strategy encoded as log2(bytes). Defaults to 4 MiB. + uint8_t chunkSizeLog2 = static_cast(std::bit_width(DEFAULT_CHUNK_SIZE_BYTES) - 1u); + + //! Maximum staging allocation for whole-file strategy encoded as log2(bytes). Defaults to 256 MiB. + uint8_t maxStagingLog2 = static_cast(std::bit_width(DEFAULT_MAX_STAGING_BYTES) - 1u); + + SRuntimeTuning runtimeTuning = {}; //!< Runtime tuning controls used by loaders and hash stages. + inline constexpr bool strict() const { return flags.hasAnyFlag(EF_STRICT_BIT); } inline constexpr uint64_t wholeFileThresholdBytes() const { return bytesFromLog2(wholeFileThresholdLog2, MIN_CHUNK_SIZE_LOG2); } inline constexpr uint64_t chunkSizeBytes() const { return bytesFromLog2(chunkSizeLog2, MIN_CHUNK_SIZE_LOG2); } inline constexpr uint64_t maxStagingBytes() const { return bytesFromLog2(maxStagingLog2, MIN_CHUNK_SIZE_LOG2); } }; -struct SResolvedFileIOPolicy // Resolved IO plan chosen from SFileIOPolicy after considering file size, mapping, and staging limits. + +//! Resolved IO plan chosen from SFileIOPolicy after considering file size, mapping, and staging limits. +struct SResolvedFileIOPolicy { using Strategy = EFileIOStrategy; constexpr SResolvedFileIOPolicy() = default; inline constexpr SResolvedFileIOPolicy(const SFileIOPolicy& policy, const uint64_t byteCount, const bool sizeKnown = true, const bool fileMappable = false) : SResolvedFileIOPolicy(resolve(policy, byteCount, sizeKnown, fileMappable)) {} - Strategy strategy = Strategy::Invalid; // Effective strategy chosen by resolver. Invalid means strict policy resolution failed. - uint8_t chunkSizeLog2 = SFileIOPolicy::MIN_CHUNK_SIZE_LOG2; // Effective chunk size encoded as log2(bytes). Also set for whole-file for telemetry consistency. - const char* reason = "invalid"; // Resolver reason string used in logs and diagnostics. + Strategy strategy = Strategy::Invalid; //!< Effective strategy chosen by resolver. Invalid means strict policy resolution failed. + + //! Effective chunk size encoded as log2(bytes). Also set for whole-file for telemetry consistency. + uint8_t chunkSizeLog2 = SFileIOPolicy::MIN_CHUNK_SIZE_LOG2; + + const char* reason = "invalid"; //!< Resolver reason string used in logs and diagnostics. inline constexpr bool isValid() const { return strategy != Strategy::Invalid; } diff --git a/include/nbl/asset/interchange/SGeometryContentHash.h b/include/nbl/asset/interchange/SGeometryContentHash.h index a8ddf4d3ce..c7353dea9b 100644 --- a/include/nbl/asset/interchange/SGeometryContentHash.h +++ b/include/nbl/asset/interchange/SGeometryContentHash.h @@ -8,13 +8,16 @@ #include "nbl/core/hash/blake.h" namespace nbl::asset { +//! Geometry-content-hash helper operating on all unique buffers referenced by one polygon geometry. class SPolygonGeometryContentHash { public: using mode_t = CPolygonGeometryManipulator::EContentHashMode; + //! Collects all unique buffers contributing to the geometry content hash. static inline void collectBuffers(const ICPUPolygonGeometry* geometry, core::vector>& buffers) { CPolygonGeometryManipulator::collectUniqueBuffers(geometry, buffers); } + //! Resets all referenced buffer hashes to `INVALID_HASH`. static inline void reset(ICPUPolygonGeometry* geometry) { core::vector> buffers; @@ -24,6 +27,7 @@ class SPolygonGeometryContentHash buffer->setContentHash(IPreHashed::INVALID_HASH); } + //! Composes the geometry hash from the current content hashes of all referenced buffers. static inline core::blake3_hash_t composeHashFromBufferContentHashes(const ICPUPolygonGeometry* geometry) { if (!geometry) @@ -44,8 +48,10 @@ class SPolygonGeometryContentHash return static_cast(hashBuilder); } + //! Computes missing buffer hashes and returns the composed geometry hash. static inline core::blake3_hash_t computeMissing(ICPUPolygonGeometry* geometry, const SFileIOPolicy& ioPolicy) { CPolygonGeometryManipulator::computeMissingContentHashesParallel(geometry, ioPolicy); return composeHashFromBufferContentHashes(geometry); } + //! Recomputes all buffer hashes and returns the composed geometry hash. static inline core::blake3_hash_t recompute(ICPUPolygonGeometry* geometry, const SFileIOPolicy& ioPolicy) { CPolygonGeometryManipulator::recomputeContentHashesParallel(geometry, ioPolicy); return composeHashFromBufferContentHashes(geometry); } }; } diff --git a/include/nbl/asset/interchange/SGeometryLoaderCommon.h b/include/nbl/asset/interchange/SGeometryLoaderCommon.h index c81aab95db..57ec8abded 100644 --- a/include/nbl/asset/interchange/SGeometryLoaderCommon.h +++ b/include/nbl/asset/interchange/SGeometryLoaderCommon.h @@ -9,9 +9,11 @@ #include "nbl/asset/ICPUPolygonGeometry.h" namespace nbl::asset { +//! Shared geometry-loader helpers for adopting buffers and assembling formatted data views. class SGeometryLoaderCommon { public: + //! Creates one formatted data view over an existing CPU buffer. static inline IGeometry::SDataView createDataView(core::smart_refctd_ptr&& buffer, const size_t byteCount, const uint32_t stride, const E_FORMAT format) { if (!buffer || byteCount == 0ull) @@ -19,6 +21,7 @@ class SGeometryLoaderCommon return {.composed = {.stride = stride, .format = format, .rangeFormat = IGeometryBase::getMatchingAABBFormat(format)}, .src = {.offset = 0ull, .size = byteCount, .buffer = std::move(buffer)}}; } + //! Adopts contiguous caller-owned storage into a CPU buffer and exposes it as a formatted data view. template static inline IGeometry::SDataView createAdoptedView(Storage&& data) { diff --git a/include/nbl/asset/interchange/SGeometryWriterCommon.h b/include/nbl/asset/interchange/SGeometryWriterCommon.h index 9ba55cc6aa..5c2055a5e5 100644 --- a/include/nbl/asset/interchange/SGeometryWriterCommon.h +++ b/include/nbl/asset/interchange/SGeometryWriterCommon.h @@ -17,11 +17,23 @@ #include namespace nbl::asset { +//! Shared writer-side helpers used by geometry exporters. class SGeometryWriterCommon { public: - struct SWriteState { hlsl::float32_t3x4 transform = hlsl::math::linalg::identity(); uint32_t instanceIx = ~0u; uint32_t targetIx = ~0u; uint32_t geometryIx = 0u; }; + //! Common scene/collection context propagated to one emitted geometry item. + struct SWriteState + { + //! World transform accumulated up to the emitted geometry. + hlsl::float32_t3x4 transform = hlsl::math::linalg::identity(); + uint32_t instanceIx = ~0u; //!< Scene instance index or `~0u` when not applicable. + uint32_t targetIx = ~0u; //!< Morph-target index or `~0u` when not applicable. + uint32_t geometryIx = 0u; //!< Geometry index inside the current collection. + }; + //! One polygon geometry together with the scene context needed by writers. struct SPolygonGeometryWriteItem : SWriteState { const ICPUPolygonGeometry* geometry = nullptr; }; + + //! Collects polygon geometry items from a geometry, geometry collection, or scene root asset. template> requires requires(Container& c, const SPolygonGeometryWriteItem& item) { c.emplace_back(item); } static inline Container collectPolygonGeometryWriteItems(const IAsset* rootAsset) { @@ -75,7 +87,9 @@ class SGeometryWriterCommon } return out; } + //! Returns true when the transform equals the writer identity matrix. static inline bool isIdentityTransform(const hlsl::float32_t3x4& transform) { return transform == hlsl::math::linalg::identity(); } + //! Returns one auxiliary view when it exists and optionally matches `requiredElementCount`. static inline const ICPUPolygonGeometry::SDataView* getAuxViewAt(const ICPUPolygonGeometry* geom, const uint32_t auxViewIx, const size_t requiredElementCount = 0ull) { if (!geom) @@ -90,6 +104,7 @@ class SGeometryWriterCommon return nullptr; return &view; } + //! Resolves the triangle face count for indexed or non-indexed polygon geometry. static inline bool getTriangleFaceCount(const ICPUPolygonGeometry* geom, size_t& outFaceCount) { outFaceCount = 0ull; @@ -111,6 +126,7 @@ class SGeometryWriterCommon return false; return (outFaceCount = vertexCount / 3ull), true; } + //! Visits triangle indices as validated `uint32_t` triplets. template static inline bool visitTriangleIndices(const ICPUPolygonGeometry* geom, Visitor&& visitor) { @@ -162,10 +178,14 @@ class SGeometryWriterCommon default: return false; } } + //! Returns a direct pointer for tightly packed views that already match `ExpectedFormat`. template static inline const T* getTightView(const ICPUPolygonGeometry::SDataView& view) { return view && view.composed.format == ExpectedFormat && view.composed.getStride() == sizeof(T) ? reinterpret_cast(view.getPointer()) : nullptr; } + //! Appends one floating-point value to a caller-provided character buffer. static inline char* appendFloatToBuffer(char* dst, char* end, float value) { return appendFloatingPointToBuffer(dst, end, value); } + //! Appends one double-precision value to a caller-provided character buffer. static inline char* appendFloatToBuffer(char* dst, char* end, double value) { return appendFloatingPointToBuffer(dst, end, value); } + //! Appends one unsigned integer value to a caller-provided character buffer. static inline char* appendUIntToBuffer(char* dst, char* const end, const uint32_t value) { if (!dst || dst >= end) @@ -180,6 +200,7 @@ class SGeometryWriterCommon return (writeLen < static_cast(end - dst)) ? (dst + writeLen) : end; } private: + //! Shared floating-point backend for the `appendFloatToBuffer` overload set. template static inline char* appendFloatingPointToBuffer(char* dst, char* const end, const T value) { diff --git a/include/nbl/asset/interchange/SInterchangeIO.h b/include/nbl/asset/interchange/SInterchangeIO.h index ff6426b56a..c95bc88608 100644 --- a/include/nbl/asset/interchange/SInterchangeIO.h +++ b/include/nbl/asset/interchange/SInterchangeIO.h @@ -14,14 +14,33 @@ #include namespace nbl::asset { -class SInterchangeIO // Shared read/write helpers that execute a resolved IO plan and collect simple telemetry. +//! Shared read/write helpers that execute a resolved IO plan and collect simple telemetry. +class SInterchangeIO { public: - struct STelemetry { uint64_t callCount = 0ull, totalBytes = 0ull, minBytes = std::numeric_limits::max(); inline void account(const uint64_t bytes) { ++callCount; totalBytes += bytes; if (bytes < minBytes) minBytes = bytes; } inline uint64_t getMinOrZero() const { return callCount ? minBytes : 0ull; } inline uint64_t getAvgOrZero() const { return callCount ? (totalBytes / callCount) : 0ull; } }; // Tracks IO call count and byte distribution for tiny-io diagnostics. + //! Tracks IO call count and byte distribution for tiny-io diagnostics. + struct STelemetry + { + uint64_t callCount = 0ull; //!< Number of IO calls recorded. + uint64_t totalBytes = 0ull; //!< Sum of processed bytes across all calls. + uint64_t minBytes = std::numeric_limits::max(); //!< Smallest processed byte count observed so far. + + inline void account(const uint64_t bytes) + { + ++callCount; + totalBytes += bytes; + if (bytes < minBytes) + minBytes = bytes; + } + + inline uint64_t getMinOrZero() const { return callCount ? minBytes : 0ull; } + inline uint64_t getAvgOrZero() const { return callCount ? (totalBytes / callCount) : 0ull; } + }; using SReadTelemetry = STelemetry; using SWriteTelemetry = STelemetry; - /* Default 1 MiB. Default 1 KiB. Default 64 B. Default 1024 calls. */ - static inline bool isTinyIOTelemetryLikely(const STelemetry& telemetry, const uint64_t payloadBytes, const uint64_t bigPayloadThresholdBytes = (1ull << 20), const uint64_t lowAvgBytesThreshold = 1024ull, const uint64_t tinyChunkBytesThreshold = 64ull, const uint64_t tinyChunkCallsThreshold = 1024ull) // Flags large payloads that were served through suspiciously small IO calls. + //! Flags large payloads that were served through suspiciously small IO calls. + //! Defaults are 1 MiB, 1 KiB, 64 B, and 1024 calls. + static inline bool isTinyIOTelemetryLikely(const STelemetry& telemetry, const uint64_t payloadBytes, const uint64_t bigPayloadThresholdBytes = (1ull << 20), const uint64_t lowAvgBytesThreshold = 1024ull, const uint64_t tinyChunkBytesThreshold = 64ull, const uint64_t tinyChunkCallsThreshold = 1024ull) { if (payloadBytes <= bigPayloadThresholdBytes) return false; @@ -29,8 +48,10 @@ class SInterchangeIO // Shared read/write helpers that execute a resolved IO pla const uint64_t avgBytes = telemetry.getAvgOrZero(); return avgBytes < lowAvgBytesThreshold || (minBytes < tinyChunkBytesThreshold && telemetry.callCount > tinyChunkCallsThreshold); } - static inline bool isTinyIOTelemetryLikely(const STelemetry& telemetry, const uint64_t payloadBytes, const SFileIOPolicy& ioPolicy) { return isTinyIOTelemetryLikely(telemetry, payloadBytes, ioPolicy.runtimeTuning.tinyIoPayloadThresholdBytes, ioPolicy.runtimeTuning.tinyIoAvgBytesThreshold, ioPolicy.runtimeTuning.tinyIoMinBytesThreshold, ioPolicy.runtimeTuning.tinyIoMinCallCount); } // Same tiny-io heuristic but pulls thresholds from the resolved IO policy. - static inline bool readFileExact(system::IFile* file, void* dst, const size_t offset, const size_t bytes, SReadTelemetry* ioTelemetry = nullptr) // Issues one read request and verifies that the full byte count was returned. + //! Same tiny-io heuristic but pulls thresholds from the resolved IO policy. + static inline bool isTinyIOTelemetryLikely(const STelemetry& telemetry, const uint64_t payloadBytes, const SFileIOPolicy& ioPolicy) { return isTinyIOTelemetryLikely(telemetry, payloadBytes, ioPolicy.runtimeTuning.tinyIoPayloadThresholdBytes, ioPolicy.runtimeTuning.tinyIoAvgBytesThreshold, ioPolicy.runtimeTuning.tinyIoMinBytesThreshold, ioPolicy.runtimeTuning.tinyIoMinCallCount); } + //! Issues one read request and verifies that the full byte count was returned. + static inline bool readFileExact(system::IFile* file, void* dst, const size_t offset, const size_t bytes, SReadTelemetry* ioTelemetry = nullptr) { if (!file || (!dst && bytes != 0ull)) return false; if (bytes == 0ull) return true; @@ -39,9 +60,15 @@ class SInterchangeIO // Shared read/write helpers that execute a resolved IO pla if (success && ioTelemetry) ioTelemetry->account(success.getBytesProcessed()); return success && success.getBytesProcessed() == bytes; } + + /** + Reads a byte range using the resolved whole-file or chunked strategy. + When `ioTime` is non-null it also reports wall time in `TimeUnit`. + Default `TimeUnit` is milliseconds. + */ template> requires std::same_as> - static inline bool readFileWithPolicy(system::IFile* file, void* dst, const size_t offset, const size_t bytes, const SResolvedFileIOPolicy& ioPlan, SReadTelemetry* ioTelemetry = nullptr, TimeUnit* ioTime = nullptr) // Reads a byte range using the resolved whole-file or chunked strategy. When ioTime is non-null it also reports wall time in TimeUnit. Default TimeUnit is milliseconds. + static inline bool readFileWithPolicy(system::IFile* file, void* dst, const size_t offset, const size_t bytes, const SResolvedFileIOPolicy& ioPlan, SReadTelemetry* ioTelemetry = nullptr, TimeUnit* ioTime = nullptr) { using clock_t = std::chrono::high_resolution_clock; const auto ioStart = ioTime ? clock_t::now() : clock_t::time_point{}; @@ -78,8 +105,14 @@ class SInterchangeIO // Shared read/write helpers that execute a resolved IO pla } } } - struct SBufferRange { const void* data = nullptr; size_t byteCount = 0ull; }; // Describes one contiguous output buffer written as part of a larger stream. - static inline bool writeBuffersWithPolicyAtOffset(system::IFile* file, const SResolvedFileIOPolicy& ioPlan, const std::span buffers, size_t& fileOffset, SWriteTelemetry* ioTelemetry = nullptr) // Writes one or more buffers sequentially at fileOffset and advances it on success. + //! Describes one contiguous output buffer written as part of a larger stream. + struct SBufferRange + { + const void* data = nullptr; //!< Start of the contiguous byte range. + size_t byteCount = 0ull; //!< Number of bytes to write from `data`. + }; + //! Writes one or more buffers sequentially at `fileOffset` and advances it on success. + static inline bool writeBuffersWithPolicyAtOffset(system::IFile* file, const SResolvedFileIOPolicy& ioPlan, const std::span buffers, size_t& fileOffset, SWriteTelemetry* ioTelemetry = nullptr) { if (!file) return false; const uint64_t chunkSizeBytes = ioPlan.chunkSizeBytes(); @@ -108,9 +141,12 @@ class SInterchangeIO // Shared read/write helpers that execute a resolved IO pla } return true; } - static inline bool writeBuffersWithPolicy(system::IFile* file, const SResolvedFileIOPolicy& ioPlan, const std::span buffers, SWriteTelemetry* ioTelemetry = nullptr) { size_t fileOffset = 0ull; return writeBuffersWithPolicyAtOffset(file, ioPlan, buffers, fileOffset, ioTelemetry); } // Writes one or more buffers starting from file offset 0. - static inline bool writeFileWithPolicyAtOffset(system::IFile* file, const SResolvedFileIOPolicy& ioPlan, const void* data, size_t byteCount, size_t& fileOffset, SWriteTelemetry* ioTelemetry = nullptr) { const SBufferRange buffers[] = {{.data = data, .byteCount = byteCount}}; return writeBuffersWithPolicyAtOffset(file, ioPlan, buffers, fileOffset, ioTelemetry); } // Single-buffer convenience wrapper over writeBuffersWithPolicyAtOffset. - static inline bool writeFileWithPolicy(system::IFile* file, const SResolvedFileIOPolicy& ioPlan, const void* data, size_t byteCount, SWriteTelemetry* ioTelemetry = nullptr) { const SBufferRange buffers[] = {{.data = data, .byteCount = byteCount}}; return writeBuffersWithPolicy(file, ioPlan, buffers, ioTelemetry); } // Single-buffer convenience wrapper over writeBuffersWithPolicy. + //! Writes one or more buffers starting from file offset `0`. + static inline bool writeBuffersWithPolicy(system::IFile* file, const SResolvedFileIOPolicy& ioPlan, const std::span buffers, SWriteTelemetry* ioTelemetry = nullptr) { size_t fileOffset = 0ull; return writeBuffersWithPolicyAtOffset(file, ioPlan, buffers, fileOffset, ioTelemetry); } + //! Single-buffer convenience wrapper over `writeBuffersWithPolicyAtOffset`. + static inline bool writeFileWithPolicyAtOffset(system::IFile* file, const SResolvedFileIOPolicy& ioPlan, const void* data, size_t byteCount, size_t& fileOffset, SWriteTelemetry* ioTelemetry = nullptr) { const SBufferRange buffers[] = {{.data = data, .byteCount = byteCount}}; return writeBuffersWithPolicyAtOffset(file, ioPlan, buffers, fileOffset, ioTelemetry); } + //! Single-buffer convenience wrapper over `writeBuffersWithPolicy`. + static inline bool writeFileWithPolicy(system::IFile* file, const SResolvedFileIOPolicy& ioPlan, const void* data, size_t byteCount, SWriteTelemetry* ioTelemetry = nullptr) { const SBufferRange buffers[] = {{.data = data, .byteCount = byteCount}}; return writeBuffersWithPolicy(file, ioPlan, buffers, ioTelemetry); } }; using SFileIOTelemetry = SInterchangeIO::STelemetry; using SFileReadTelemetry = SInterchangeIO::SReadTelemetry; diff --git a/include/nbl/asset/interchange/SLoaderRuntimeTuning.h b/include/nbl/asset/interchange/SLoaderRuntimeTuning.h index b87938a262..e180325606 100644 --- a/include/nbl/asset/interchange/SLoaderRuntimeTuning.h +++ b/include/nbl/asset/interchange/SLoaderRuntimeTuning.h @@ -15,32 +15,52 @@ #include namespace nbl::asset { +//! Input describing one loader or hash stage that needs worker and chunk sizing. struct SLoaderRuntimeTuningRequest { - /* Input describing one loader or hash stage that needs worker and chunk sizing. Total input bytes for the tuned stage. Total amount of stage work in logical units. Minimum work units assigned to one worker. Minimum input bytes assigned to one worker. Hardware thread count override. 0 means auto-detect. Hard cap for workers for this request. 0 means no extra cap. Preferred chunk count per worker for this stage. 0 means policy default. Minimum work units in one chunk. Maximum work units in one chunk. Pointer to representative sample bytes for hybrid sampling. Number of sample bytes available at sampleData. Sampling pass count override. 0 means policy default. Sampling candidate count override. 0 means policy default. Minimum work units required to allow sampling. 0 means policy or auto value. */ - uint64_t inputBytes = 0ull, totalWorkUnits = 0ull, minWorkUnitsPerWorker = 1ull, minBytesPerWorker = 1ull; - uint32_t hardwareThreads = 0u, hardMaxWorkers = 0u, targetChunksPerWorker = 0u; - uint64_t minChunkWorkUnits = 1ull, maxChunkWorkUnits = std::numeric_limits::max(); - const uint8_t* sampleData = nullptr; - uint64_t sampleBytes = 0ull, sampleMinWorkUnits = 0ull; - uint32_t samplePasses = 0u, sampleMaxCandidates = 0u; + uint64_t inputBytes = 0ull; //!< Total input bytes for the tuned stage. + uint64_t totalWorkUnits = 0ull; //!< Total amount of stage work in logical units. + uint64_t minWorkUnitsPerWorker = 1ull; //!< Minimum work units assigned to one worker. + uint64_t minBytesPerWorker = 1ull; //!< Minimum input bytes assigned to one worker. + uint32_t hardwareThreads = 0u; //!< Hardware thread count override. 0 means auto-detect. + uint32_t hardMaxWorkers = 0u; //!< Hard cap for workers for this request. 0 means no extra cap. + uint32_t targetChunksPerWorker = 0u; //!< Preferred chunk count per worker for this stage. 0 means policy default. + uint64_t minChunkWorkUnits = 1ull; //!< Minimum work units in one chunk. + uint64_t maxChunkWorkUnits = std::numeric_limits::max(); //!< Maximum work units in one chunk. + const uint8_t* sampleData = nullptr; //!< Pointer to representative sample bytes for hybrid sampling. + uint64_t sampleBytes = 0ull; //!< Number of sample bytes available at sampleData. + uint64_t sampleMinWorkUnits = 0ull; //!< Minimum work units required to allow sampling. 0 means policy or auto value. + uint32_t samplePasses = 0u; //!< Sampling pass count override. 0 means policy default. + uint32_t sampleMaxCandidates = 0u; //!< Sampling candidate count override. 0 means policy default. }; -struct SLoaderRuntimeTuningResult /* Final worker and chunk layout selected for one stage. */ +//! Final worker and chunk layout selected for one stage. +struct SLoaderRuntimeTuningResult { - /* Selected worker count for the stage. Work units per chunk assigned by tuner. Total chunk count for the stage. */ - size_t workerCount = 1ull, chunkCount = 1ull; - uint64_t chunkWorkUnits = 1ull; + size_t workerCount = 1ull; //!< Selected worker count for the stage. + size_t chunkCount = 1ull; //!< Total chunk count for the stage. + uint64_t chunkWorkUnits = 1ull; //!< Work units per chunk assigned by tuner. }; -struct SLoaderRuntimeTuner /* Stateless runtime tuner used by loaders and hash stages to size worker pools and chunking. */ +//! Stateless runtime tuner used by loaders and hash stages to size worker pools and chunking. +struct SLoaderRuntimeTuner { private: - struct SBenchmarkSampleStats { uint64_t medianNs = 0ull, minNs = 0ull, maxNs = 0ull, totalNs = 0ull; }; // Aggregated timings collected while probing one worker-count candidate. + //! Aggregated timings collected while probing one worker-count candidate. + struct SBenchmarkSampleStats + { + uint64_t medianNs = 0ull; + uint64_t minNs = 0ull; + uint64_t maxNs = 0ull; + uint64_t totalNs = 0ull; + }; public: + /** + Dispatches workers `1..N-1` on `std::jthread` + and runs worker `0` on the caller thread. + */ template requires std::invocable static void dispatchWorkers(const size_t workerCount, Fn&& fn) { - // std::jthread starts execution in its constructor, so emplace_back launches workers 1..N-1 immediately. The current thread runs worker 0 and std::jthread joins automatically when the local vector is destroyed. if (workerCount <= 1ull) return fn(0ull); std::vector workers; @@ -49,12 +69,18 @@ struct SLoaderRuntimeTuner /* Stateless runtime tuner used by loaders and hash s workers.emplace_back([&fn, workerIx]() { fn(workerIx); }); fn(0ull); } - static constexpr uint64_t ceilDiv(const uint64_t numerator, const uint64_t denominator) { return (numerator + denominator - 1ull) / denominator; } // Integer ceil division. Callers must pass a non-zero denominator. + + //! Integer ceil division. Callers must pass a non-zero denominator. + static constexpr uint64_t ceilDiv(const uint64_t numerator, const uint64_t denominator) { return (numerator + denominator - 1ull) / denominator; } + + /** + Measures one sampled memory-touch pass configuration + and returns aggregate wall time across all passes. + */ template requires std::same_as> static inline TimeUnit benchmarkSample(const uint8_t* const sampleData, const uint64_t sampleBytes, const size_t workerCount, const uint32_t passes) { - // Measures one sampled memory-touch pass configuration and returns aggregate wall time across all passes. if (!sampleData || sampleBytes == 0ull || workerCount == 0ull) return TimeUnit::zero(); const uint32_t passCount = std::max(1u, passes); @@ -83,9 +109,10 @@ struct SLoaderRuntimeTuner /* Stateless runtime tuner used by loaders and hash s sink.fetch_xor(reduced, std::memory_order_relaxed); return std::chrono::duration_cast(std::chrono::nanoseconds(elapsedNs)); } + + //! Warms up once and then collects timing observations for one worker-count candidate. static inline SBenchmarkSampleStats benchmarkSampleStats(const uint8_t* const sampleData, const uint64_t sampleBytes, const size_t workerCount, const uint32_t passes, const uint32_t observations) { - // Warms up once and then collects timing observations for one worker-count candidate. SBenchmarkSampleStats stats = {}; if (!sampleData || sampleBytes == 0ull || workerCount == 0ull) return stats; @@ -112,10 +139,11 @@ struct SLoaderRuntimeTuner /* Stateless runtime tuner used by loaders and hash s stats.medianNs = (samples[samples.size() / 2ull - 1ull] + samples[samples.size() / 2ull]) / 2ull; return stats; } - static inline void appendCandidate(std::vector& dst, const size_t candidate) { if (candidate != 0ull && std::find(dst.begin(), dst.end(), candidate) == dst.end()) dst.push_back(candidate); } // Keeps the candidate probe list unique while preserving insertion order. + //! Keeps the candidate probe list unique while preserving insertion order. + static inline void appendCandidate(std::vector& dst, const size_t candidate) { if (candidate != 0ull && std::find(dst.begin(), dst.end(), candidate) == dst.end()) dst.push_back(candidate); } + //! Chooses the sample byte budget used by hybrid tuning from the known input size and policy clamps. static inline uint64_t resolveSampleBytes(const SFileIOPolicy& ioPolicy, const uint64_t knownInputBytes) { - // Chooses the sample byte budget used by hybrid tuning from the known input size and policy clamps. if (knownInputBytes == 0ull) return 0ull; const uint64_t minSampleBytes = std::max(1ull, ioPolicy.runtimeTuning.minSampleBytes); @@ -125,11 +153,13 @@ struct SLoaderRuntimeTuner /* Stateless runtime tuner used by loaders and hash s const uint64_t adaptive = std::max(knownInputBytes / 64ull, cappedMin); return std::clamp(adaptive, cappedMin, cappedMax); } - static inline bool shouldInlineHashBuild(const SFileIOPolicy& ioPolicy, const uint64_t inputBytes) { return inputBytes <= std::max(1ull, ioPolicy.runtimeTuning.hashInlineThresholdBytes); } // Returns true when the hash build is small enough to stay on the caller thread. - static inline size_t resolveHardwareThreads(const uint32_t requested = 0u) { const size_t hw = requested ? static_cast(requested) : static_cast(std::thread::hardware_concurrency()); return hw ? hw : 1ull; } // Resolves the effective hardware thread count and always returns at least one worker. + //! Returns true when the hash build is small enough to stay on the caller thread. + static inline bool shouldInlineHashBuild(const SFileIOPolicy& ioPolicy, const uint64_t inputBytes) { return inputBytes <= std::max(1ull, ioPolicy.runtimeTuning.hashInlineThresholdBytes); } + //! Resolves the effective hardware thread count and always returns at least one worker. + static inline size_t resolveHardwareThreads(const uint32_t requested = 0u) { const size_t hw = requested ? static_cast(requested) : static_cast(std::thread::hardware_concurrency()); return hw ? hw : 1ull; } + //! Applies worker headroom while keeping at least two workers when parallel hardware is available. static inline size_t resolveHardMaxWorkers(const size_t hardwareThreads, const uint32_t workerHeadroom) { - // Applies worker headroom while keeping at least two workers when parallel hardware is available. const size_t hw = std::max(1ull, hardwareThreads), minWorkers = hw >= 2ull ? 2ull : 1ull, headroom = static_cast(workerHeadroom); if (headroom == 0ull) return hw; @@ -137,9 +167,9 @@ struct SLoaderRuntimeTuner /* Stateless runtime tuner used by loaders and hash s return minWorkers; return std::max(minWorkers, hw - headroom); } + //! Resolves worker and chunk counts for one stage using policy limits plus optional hybrid sampling. static inline SLoaderRuntimeTuningResult tune(const SFileIOPolicy& ioPolicy, const SLoaderRuntimeTuningRequest& request) { - // Resolves worker and chunk counts for one stage using policy limits plus optional hybrid sampling. using RTMode = SFileIOPolicy::SRuntimeTuning::Mode; SLoaderRuntimeTuningResult result = {}; if (request.totalWorkUnits == 0ull) @@ -192,7 +222,7 @@ struct SLoaderRuntimeTuner /* Stateless runtime tuner used by loaders and hash s { if (request.inputBytes > 0ull) { - // keep probing lightweight: sample fraction scales with input and parallelism + // Keep probing lightweight: sample fraction scales with input and parallelism. const uint64_t sampleDivisor = std::max(4ull, static_cast(heuristicWorkerCount) * static_cast(targetChunksPerWorker)); const uint64_t adaptiveSampleBytes = std::max(1ull, request.inputBytes / sampleDivisor); effectiveSampleBytes = std::min(effectiveSampleBytes, adaptiveSampleBytes); @@ -211,7 +241,8 @@ struct SLoaderRuntimeTuner /* Stateless runtime tuner used by loaders and hash s appendCandidate(candidates, heuristicWorkerCount + 2ull); if (candidates.size() > maxCandidates) candidates.resize(maxCandidates); - const auto heuristicStatsProbe = benchmarkSampleStats(request.sampleData, effectiveSampleBytes, heuristicWorkerCount, samplePasses, 2u); // probe heuristic first and only continue when budget can amortize additional probes + // Probe heuristic first and only continue when budget can amortize additional probes. + const auto heuristicStatsProbe = benchmarkSampleStats(request.sampleData, effectiveSampleBytes, heuristicWorkerCount, samplePasses, 2u); if (heuristicStatsProbe.medianNs > 0ull) { const double scale = request.inputBytes ? (static_cast(request.inputBytes) / static_cast(effectiveSampleBytes)) : 1.0; diff --git a/src/nbl/asset/interchange/COBJMeshFileLoader.h b/src/nbl/asset/interchange/COBJMeshFileLoader.h index 68161da937..78b88c84db 100644 --- a/src/nbl/asset/interchange/COBJMeshFileLoader.h +++ b/src/nbl/asset/interchange/COBJMeshFileLoader.h @@ -4,18 +4,22 @@ // See the original file in irrlicht source for authors #ifndef _NBL_ASSET_C_OBJ_MESH_FILE_LOADER_H_INCLUDED_ #define _NBL_ASSET_C_OBJ_MESH_FILE_LOADER_H_INCLUDED_ - #include "nbl/core/declarations.h" #include "nbl/asset/interchange/IGeometryLoader.h" - namespace nbl::asset { -/* +/** Loads plain OBJ as polygon geometry or geometry collections. - Multiple `o` and `g` blocks mean multiple geometry pieces in one file, not a real scene. - This loader keeps that split as geometry collections because plain OBJ does not define scene hierarchy, instancing, or node transforms. - OBJ/MTL material data also belongs here and remains TODO, but that still does not turn plain OBJ into a scene format. - A single mesh payload can therefore load as one geometry, while multiple split pieces still load as geometry collections instead of a synthetic scene. + Multiple `o` and `g` blocks mean multiple geometry pieces in one file, + not a real scene. + This loader keeps that split as geometry collections because plain OBJ + does not define scene hierarchy, instancing, or node transforms. + OBJ/MTL material data also belongs here and remains TODO, + but that still does not turn plain OBJ into a scene format. + A single mesh payload can therefore load as one geometry, + while multiple split pieces still load as geometry collections + instead of a synthetic scene. + References: - https://www.loc.gov/preservation/digital/formats/fdd/fdd000507 - https://www.fileformat.info/format/wavefrontobj/egff.htm @@ -32,9 +36,9 @@ class COBJMeshFileLoader : public IGeometryLoader const char** getAssociatedFileExtensions() const override; + //! Loads one OBJ asset bundle from an already opened file. asset::SAssetBundle loadAsset(system::IFile* _file, const asset::IAssetLoader::SAssetLoadParams& _params, asset::IAssetLoader::IAssetLoaderOverride* _override = nullptr, uint32_t _hierarchyLevel = 0u) override; }; } // end namespace nbl::asset - #endif diff --git a/src/nbl/asset/interchange/CPLYMeshFileLoader.h b/src/nbl/asset/interchange/CPLYMeshFileLoader.h index 50ecf06555..c4aaf1c22c 100644 --- a/src/nbl/asset/interchange/CPLYMeshFileLoader.h +++ b/src/nbl/asset/interchange/CPLYMeshFileLoader.h @@ -4,15 +4,11 @@ // See the original file in irrlicht source for authors #ifndef _NBL_ASSET_C_PLY_MESH_FILE_LOADER_H_INCLUDED_ #define _NBL_ASSET_C_PLY_MESH_FILE_LOADER_H_INCLUDED_ - #include "nbl/core/declarations.h" - #include "nbl/asset/interchange/IGeometryLoader.h" - namespace nbl::asset { - -//! Meshloader capable of loading obj meshes. +//! Mesh loader capable of loading PLY meshes. class CPLYMeshFileLoader final : public IGeometryLoader { public: @@ -22,7 +18,7 @@ class CPLYMeshFileLoader final : public IGeometryLoader const char** getAssociatedFileExtensions() const override; - //! creates/loads an animated mesh from the file. + //! Loads one PLY asset bundle from an already opened file. SAssetBundle loadAsset(system::IFile* _file, const IAssetLoader::SAssetLoadParams& _params, IAssetLoader::IAssetLoaderOverride* _override = nullptr, uint32_t _hierarchyLevel = 0u) override; }; diff --git a/src/nbl/asset/interchange/CPLYMeshWriter.h b/src/nbl/asset/interchange/CPLYMeshWriter.h index 3331e6bba3..05ac6a2011 100644 --- a/src/nbl/asset/interchange/CPLYMeshWriter.h +++ b/src/nbl/asset/interchange/CPLYMeshWriter.h @@ -4,15 +4,10 @@ // See the original file in irrlicht source for authors #ifndef _NBL_ASSET_PLY_MESH_WRITER_H_INCLUDED_ #define _NBL_ASSET_PLY_MESH_WRITER_H_INCLUDED_ - - #include "nbl/asset/interchange/IGeometryWriter.h" - - namespace nbl::asset { - -//! class to write PLY mesh files +//! Geometry writer capable of emitting PLY mesh files. class CPLYMeshWriter : public IGeometryWriter { public: diff --git a/src/nbl/asset/interchange/CSTLMeshFileLoader.h b/src/nbl/asset/interchange/CSTLMeshFileLoader.h index c5c982800d..42317a129a 100644 --- a/src/nbl/asset/interchange/CSTLMeshFileLoader.h +++ b/src/nbl/asset/interchange/CSTLMeshFileLoader.h @@ -4,22 +4,17 @@ // See the original file in irrlicht source for authors #ifndef _NBL_ASSET_C_STL_MESH_FILE_LOADER_H_INCLUDED_ #define _NBL_ASSET_C_STL_MESH_FILE_LOADER_H_INCLUDED_ - - #include "nbl/core/declarations.h" - #include "nbl/asset/interchange/IGeometryLoader.h" - - namespace nbl::asset { - -//! Meshloader capable of loading STL meshes. +//! Mesh loader capable of loading STL meshes. class CSTLMeshFileLoader final : public IGeometryLoader { public: explicit CSTLMeshFileLoader(asset::IAssetManager* _assetManager); + //! Loads one STL asset bundle from an already opened file. asset::SAssetBundle loadAsset(system::IFile* _file, const IAssetLoader::SAssetLoadParams& _params, IAssetLoader::IAssetLoaderOverride* _override = nullptr, uint32_t _hierarchyLevel = 0u) override; bool isALoadableFileFormat(system::IFile* _file, const system::logger_opt_ptr logger) const override; @@ -27,6 +22,6 @@ class CSTLMeshFileLoader final : public IGeometryLoader const char** getAssociatedFileExtensions() const override; }; -} // end namespace nbl::scene +} // end namespace nbl::asset #endif diff --git a/src/nbl/asset/interchange/CSTLMeshWriter.h b/src/nbl/asset/interchange/CSTLMeshWriter.h index 5841096cec..30f8ff4e24 100644 --- a/src/nbl/asset/interchange/CSTLMeshWriter.h +++ b/src/nbl/asset/interchange/CSTLMeshWriter.h @@ -4,15 +4,10 @@ // See the original file in irrlicht source for authors #ifndef _NBL_ASSET_STL_MESH_WRITER_H_INCLUDED_ #define _NBL_ASSET_STL_MESH_WRITER_H_INCLUDED_ - - #include "nbl/asset/interchange/IGeometryWriter.h" - - namespace nbl::asset { - -//! class to write meshes, implementing a STL writer +//! Geometry writer capable of emitting STL mesh files. class CSTLMeshWriter : public IGeometryWriter { public: diff --git a/src/nbl/asset/interchange/SGeometryViewDecode.h b/src/nbl/asset/interchange/SGeometryViewDecode.h index c596c0d78e..3e8d72adba 100644 --- a/src/nbl/asset/interchange/SGeometryViewDecode.h +++ b/src/nbl/asset/interchange/SGeometryViewDecode.h @@ -10,25 +10,41 @@ #include namespace nbl::asset { +//! Shared decode helper for geometry `SDataView` read paths used by writers. class SGeometryViewDecode { public: - enum class EMode : uint8_t { Semantic, Stored }; + //! Selects whether the output should be in logical attribute space or storage space. + enum class EMode : uint8_t + { + Semantic, //!< Decode values ready for writer-side math and text/binary emission. + Stored //!< Decode values in storage-domain form for raw integer emission. + }; + + //! Prepared decode state hoisted out of inner loops for one formatted view. template struct Prepared { - const uint8_t* data = nullptr; - uint32_t stride = 0u; - E_FORMAT format = EF_UNKNOWN; - uint32_t channels = 0u; - bool normalized = false; + const uint8_t* data = nullptr; //!< First byte of the view payload. + uint32_t stride = 0u; //!< Byte stride between consecutive elements. + E_FORMAT format = EF_UNKNOWN; //!< Source format used by `decodePixels`. + uint32_t channels = 0u; //!< Channel count cached from `format`. + bool normalized = false; //!< True when semantic decode must apply `range`. + + //! Decoded attribute range used for normalized semantic outputs. hlsl::shapes::AABB<4, hlsl::float64_t> range = hlsl::shapes::AABB<4, hlsl::float64_t>::create(); inline explicit operator bool() const { return data != nullptr && stride != 0u && format != EF_UNKNOWN && channels != 0u; } + + //! Decodes one element into a fixed-size `std::array`. template inline bool decode(const size_t ix, std::array& out) const { out.fill(T{}); return SGeometryViewDecode::template decodePrepared(*this, ix, out.data(), static_cast(N)); } + + //! Decodes one element into an HLSL vector type. template requires hlsl::concepts::Vector inline bool decode(const size_t ix, V& out) const { out = V{}; return SGeometryViewDecode::template decodePrepared(*this, ix, out); } }; + + //! Prepares one decode state that can be reused across many elements of the same view. template static inline Prepared prepare(const ICPUPolygonGeometry::SDataView& view) { @@ -45,9 +61,12 @@ class SGeometryViewDecode retval.range = view.composed.getRange>(); return retval; } + + //! One-shot convenience wrapper over `prepare(...).decode(...)`. template static inline bool decodeElement(const ICPUPolygonGeometry::SDataView& view, const size_t ix, Out& out) { return prepare(view).decode(ix, out); } private: + //! Shared scalar/vector backend that decodes one prepared element into plain components. template static inline bool decodePreparedComponents(const Prepared& prepared, const size_t ix, T* out, const uint32_t outDim) { @@ -71,6 +90,8 @@ class SGeometryViewDecode out[i] = static_cast(tmp[i]); return true; } + + //! Vector overload built on top of `decodePreparedComponents`. template requires hlsl::concepts::Vector static inline bool decodePrepared(const Prepared& prepared, const size_t ix, V& out) { @@ -83,6 +104,8 @@ class SGeometryViewDecode out[i] = tmp[i]; return true; } + + //! Pointer overload used by `std::array` and internal scratch storage. template static inline bool decodePrepared(const Prepared& prepared, const size_t ix, T* out, const uint32_t outDim) { return decodePreparedComponents(prepared, ix, out, outDim); } }; diff --git a/src/nbl/asset/interchange/impl/SBinaryData.h b/src/nbl/asset/interchange/impl/SBinaryData.h index 2a0259bc87..370f1a383d 100644 --- a/src/nbl/asset/interchange/impl/SBinaryData.h +++ b/src/nbl/asset/interchange/impl/SBinaryData.h @@ -6,10 +6,14 @@ #include namespace nbl::asset::impl { +//! Binary helpers for endian conversion and unaligned loads/stores. struct BinaryData { + //! Returns `value` with byte order reversed. template static inline T byteswap(const T value) { auto retval = value; const auto* it = reinterpret_cast(&value); std::reverse_copy(it, it + sizeof(retval), reinterpret_cast(&retval)); return retval; } + + //! Loads one trivially copyable value from unaligned memory and optionally byte-swaps it. template static inline T loadUnaligned(const void* src, const bool swapEndian = false) { @@ -19,8 +23,12 @@ struct BinaryData std::memcpy(&value, src, sizeof(value)); return swapEndian ? byteswap(value) : value; } + + //! Stores one trivially copyable value into unaligned memory. template static inline void storeUnaligned(void* dst, const T& value) { std::memcpy(dst, &value, sizeof(value)); } + + //! Stores one value and advances the destination pointer by `sizeof(T)`. template static inline void storeUnalignedAdvance(uint8_t*& dst, const T& value) { storeUnaligned(dst, value); dst += sizeof(value); } }; diff --git a/src/nbl/asset/interchange/impl/SFileAccess.h b/src/nbl/asset/interchange/impl/SFileAccess.h index 239cd557ec..b1e15010f7 100644 --- a/src/nbl/asset/interchange/impl/SFileAccess.h +++ b/src/nbl/asset/interchange/impl/SFileAccess.h @@ -7,11 +7,16 @@ #include namespace nbl::asset::impl { +//! Small file access helper shared by interchange loaders. class SFileAccess { public: + //! Returns true when the file exposes a mapped pointer. static inline bool isMappable(const system::IFile* file) { return file && core::bitflag(file->getFlags()).hasAnyFlag(system::IFile::ECF_MAPPABLE); } + //! Resolves the final IO plan after considering payload size and mapping support. static inline SResolvedFileIOPolicy resolvePlan(const SFileIOPolicy& ioPolicy, const uint64_t payloadBytes, const bool sizeKnown, const system::IFile* file) { return SResolvedFileIOPolicy(ioPolicy, payloadBytes, sizeKnown, isMappable(file)); } + + //! Logs an invalid plan and returns true when the caller should abort. template static inline bool logInvalidPlan(Logger& logger, const char* const owner, const char* const fileName, const SResolvedFileIOPolicy& ioPlan) { @@ -20,6 +25,8 @@ class SFileAccess logger.log("%s: invalid io policy for %s reason=%s", system::ILogger::ELL_ERROR, owner, fileName, ioPlan.reason); return true; } + + //! Emits the shared tiny-IO warning when telemetry indicates suspiciously small reads. template static inline void logTinyIO(Logger& logger, const char* const owner, const char* const fileName, const SInterchangeIO::STelemetry& telemetry, const uint64_t payloadBytes, const SFileIOPolicy& ioPolicy, const char* const opName) { @@ -27,6 +34,7 @@ class SFileAccess return; logger.log("%s tiny-io guard: file=%s %s=%llu min=%llu avg=%llu", system::ILogger::ELL_WARNING, owner, fileName, opName, static_cast(telemetry.callCount), static_cast(telemetry.getMinOrZero()), static_cast(telemetry.getAvgOrZero())); } + //! Reads one byte range using the already resolved IO plan. static inline const uint8_t* readRange(system::IFile* file, const size_t offset, const size_t bytes, core::vector& storage, const SResolvedFileIOPolicy& ioPlan, SFileReadTelemetry* ioTelemetry = nullptr, const bool zeroTerminate = false) { storage.resize(bytes + (zeroTerminate ? 1ull : 0ull), 0u); @@ -36,6 +44,7 @@ class SFileAccess storage[bytes] = 0u; return storage.data(); } + //! Uses the mapped pointer for whole-file mode when available, otherwise falls back to `readRange`. static inline const uint8_t* mapOrReadWholeFile(system::IFile* file, const size_t bytes, core::vector& storage, const SResolvedFileIOPolicy& ioPlan, SFileReadTelemetry* ioTelemetry = nullptr, bool* wasMapped = nullptr, const bool zeroTerminate = false) { if (wasMapped) @@ -53,15 +62,18 @@ class SFileAccess return readRange(file, 0ull, bytes, storage, ioPlan, ioTelemetry, zeroTerminate); } }; +//! Per-load session state shared across the loader entry points. class SLoadSession { public: - system::IFile* file = nullptr; - const SFileIOPolicy* requestedPolicy = nullptr; - SResolvedFileIOPolicy ioPlan = {}; - uint64_t payloadBytes = 0ull; - const char* owner = nullptr; - std::string fileName = {}; + system::IFile* file = nullptr; //!< File being processed by the loader. + const SFileIOPolicy* requestedPolicy = nullptr; //!< Original policy requested by the caller. + SResolvedFileIOPolicy ioPlan = {}; //!< Final plan chosen for this payload. + uint64_t payloadBytes = 0ull; //!< Logical payload size covered by `ioPlan`. + const char* owner = nullptr; //!< Human-readable loader name used in logs. + std::string fileName = {}; //!< Cached file name used in diagnostics. + + //! Initializes the session and resolves the IO plan. template static inline bool begin(Logger& logger, const char* const owner, system::IFile* file, const SFileIOPolicy& ioPolicy, const uint64_t payloadBytes, const bool sizeKnown, SLoadSession& out) { @@ -76,10 +88,16 @@ class SLoadSession out.fileName = file->getFileName().string(); return !SFileAccess::logInvalidPlan(logger, owner, out.fileName.c_str(), out.ioPlan); } + //! Returns true when the resolved plan prefers whole-file access. inline bool isWholeFile() const { return ioPlan.strategy == SResolvedFileIOPolicy::Strategy::WholeFile; } + //! Returns the mapped pointer for whole-file mode or `nullptr` when unavailable. inline const uint8_t* mappedPointer() const { return file && isWholeFile() ? reinterpret_cast(static_cast(file)->getMappedPointer()) : nullptr; } + //! Convenience wrapper over `SFileAccess::readRange` bound to this session. inline const uint8_t* readRange(const size_t offset, const size_t bytes, core::vector& storage, SFileReadTelemetry* const ioTelemetry = nullptr, const bool zeroTerminate = false) const { return SFileAccess::readRange(file, offset, bytes, storage, ioPlan, ioTelemetry, zeroTerminate); } + //! Convenience wrapper over `SFileAccess::mapOrReadWholeFile` bound to this session. inline const uint8_t* mapOrReadWholeFile(core::vector& storage, SFileReadTelemetry* const ioTelemetry = nullptr, bool* const wasMapped = nullptr, const bool zeroTerminate = false) const { return SFileAccess::mapOrReadWholeFile(file, static_cast(payloadBytes), storage, ioPlan, ioTelemetry, wasMapped, zeroTerminate); } + + //! Emits the shared tiny-IO diagnostic for this session. template inline void logTinyIO(Logger& logger, const Telemetry& telemetry, const char* const opName = "reads") const { if (requestedPolicy) SFileAccess::logTinyIO(logger, owner, fileName.c_str(), telemetry, payloadBytes, *requestedPolicy, opName); } }; diff --git a/src/nbl/asset/interchange/impl/STextParse.h b/src/nbl/asset/interchange/impl/STextParse.h index 434c5adbf4..4afbd631ed 100644 --- a/src/nbl/asset/interchange/impl/STextParse.h +++ b/src/nbl/asset/interchange/impl/STextParse.h @@ -10,8 +10,10 @@ #include namespace nbl::asset::impl { +//! Text token and numeric parsing helpers shared by interchange text formats. struct TextParse { + //! Parses one arithmetic token and advances `ptr` on success. template static inline bool parseNumber(const char*& ptr, const char* const end, T& out) { @@ -33,21 +35,33 @@ struct TextParse return true; } } + + //! Parses one arithmetic token and succeeds only if the whole range was consumed. template static inline bool parseExactNumber(const char* const begin, const char* const end, T& out) { auto ptr = begin; return parseNumber(ptr, end, out) && ptr == end; } + + //! `std::string_view` convenience wrapper over `parseExactNumber(begin,end,...)`. template static inline bool parseExactNumber(const std::string_view token, T& out) { return parseExactNumber(token.data(), token.data() + token.size(), out); } + + //! Parses one arithmetic token and rejects zero. template static inline bool parseNonZeroNumber(const char*& ptr, const char* const end, T& out) { return parseNumber(ptr, end, out) && out != static_cast(0); } + + //! Returns true for inline whitespace accepted inside tokenized text formats. static inline bool isInlineWhitespace(const char c) { return c == ' ' || c == '\t' || c == '\v' || c == '\f'; } + //! Skips spaces and tabs that stay within the current logical line. static inline void skipInlineWhitespace(const char*& ptr, const char* const end) { while (ptr < end && isInlineWhitespace(*ptr)) ++ptr; } + //! Skips generic whitespace according to `core::isspace`. static inline void skipWhitespace(const char*& ptr, const char* const end) { while (ptr < end && core::isspace(*ptr)) ++ptr; } + //! Trims leading and trailing whitespace from a token view. static inline std::string_view trimWhitespace(std::string_view token) { while (!token.empty() && core::isspace(token.front())) token.remove_prefix(1ull); while (!token.empty() && core::isspace(token.back())) token.remove_suffix(1ull); return token; } + //! Reads one whitespace-delimited token and advances `cursor` past it. static inline std::optional readToken(const char*& cursor, const char* const end) { skipWhitespace(cursor, end); From 943d4c42e555f6d1734625e67f115363863a58ee Mon Sep 17 00:00:00 2001 From: Arkadiusz Lachowicz Date: Sun, 8 Mar 2026 18:15:17 +0100 Subject: [PATCH 087/118] Tighten added interchange file layout --- include/nbl/asset/SBufferAdoption.h | 12 ------------ include/nbl/asset/interchange/COBJMeshWriter.h | 1 - src/nbl/asset/interchange/COBJMeshFileLoader.h | 1 - src/nbl/asset/interchange/COBJMeshWriter.cpp | 5 ----- src/nbl/asset/interchange/CPLYMeshFileLoader.h | 1 - src/nbl/asset/interchange/CPLYMeshWriter.h | 1 - src/nbl/asset/interchange/CSTLMeshFileLoader.cpp | 2 -- src/nbl/asset/interchange/CSTLMeshFileLoader.h | 1 - src/nbl/asset/interchange/CSTLMeshWriter.h | 1 - src/nbl/asset/interchange/IGeometryWriter.cpp | 4 ---- 10 files changed, 29 deletions(-) diff --git a/include/nbl/asset/SBufferAdoption.h b/include/nbl/asset/SBufferAdoption.h index 3ea123a2a4..d31e5ff95b 100644 --- a/include/nbl/asset/SBufferAdoption.h +++ b/include/nbl/asset/SBufferAdoption.h @@ -3,22 +3,15 @@ // For conditions of distribution and use, see copyright notice in nabla.h #ifndef _NBL_ASSET_S_BUFFER_ADOPTION_H_INCLUDED_ #define _NBL_ASSET_S_BUFFER_ADOPTION_H_INCLUDED_ - - #include #include #include #include - #include "nbl/asset/ICPUBuffer.h" - - namespace nbl::asset { - namespace impl { - // Owns contiguous storage that can be adopted by a CPU buffer. Views like std::span are rejected. template concept AdoptedBufferStorage = @@ -30,9 +23,7 @@ concept AdoptedBufferStorage = typename std::ranges::range_value_t>; { std::ranges::data(storage) } -> std::same_as>*>; }; - } - // Generic CPU-buffer adoption helper for owning contiguous storage such as std::vector or core::vector. class SBufferAdoption { @@ -54,8 +45,5 @@ class SBufferAdoption core::adopt_memory); } }; - } - - #endif diff --git a/include/nbl/asset/interchange/COBJMeshWriter.h b/include/nbl/asset/interchange/COBJMeshWriter.h index 7159f7f21c..5446118246 100644 --- a/include/nbl/asset/interchange/COBJMeshWriter.h +++ b/include/nbl/asset/interchange/COBJMeshWriter.h @@ -31,6 +31,5 @@ class COBJMeshWriter : public ISceneWriter bool writeAsset(system::IFile* _file, const SAssetWriteParams& _params, IAssetWriterOverride* _override = nullptr) override; }; - } // end namespace #endif diff --git a/src/nbl/asset/interchange/COBJMeshFileLoader.h b/src/nbl/asset/interchange/COBJMeshFileLoader.h index 78b88c84db..e25981e854 100644 --- a/src/nbl/asset/interchange/COBJMeshFileLoader.h +++ b/src/nbl/asset/interchange/COBJMeshFileLoader.h @@ -39,6 +39,5 @@ class COBJMeshFileLoader : public IGeometryLoader //! Loads one OBJ asset bundle from an already opened file. asset::SAssetBundle loadAsset(system::IFile* _file, const asset::IAssetLoader::SAssetLoadParams& _params, asset::IAssetLoader::IAssetLoaderOverride* _override = nullptr, uint32_t _hierarchyLevel = 0u) override; }; - } // end namespace nbl::asset #endif diff --git a/src/nbl/asset/interchange/COBJMeshWriter.cpp b/src/nbl/asset/interchange/COBJMeshWriter.cpp index 4b9ed20cd6..265f29f7d0 100644 --- a/src/nbl/asset/interchange/COBJMeshWriter.cpp +++ b/src/nbl/asset/interchange/COBJMeshWriter.cpp @@ -2,7 +2,6 @@ // Copyright (C) 2018-2025 - DevSH Graphics Programming Sp. z O.O. // This file is part of the "Nabla Engine". // For conditions of distribution and use, see copyright notice in nabla.h - #include "nbl/asset/interchange/COBJMeshWriter.h" #include "nbl/asset/interchange/SGeometryViewDecode.h" #include "nbl/asset/interchange/SGeometryWriterCommon.h" @@ -10,9 +9,7 @@ #include "impl/SFileAccess.h" #include "nbl/builtin/hlsl/array_accessors.hlsl" #include "nbl/builtin/hlsl/vector_utils/vector_traits.hlsl" - #include "nbl/system/IFile.h" - #include #include #include @@ -20,10 +17,8 @@ #include #include #include - namespace nbl::asset { - COBJMeshWriter::COBJMeshWriter() { #ifdef _NBL_DEBUG diff --git a/src/nbl/asset/interchange/CPLYMeshFileLoader.h b/src/nbl/asset/interchange/CPLYMeshFileLoader.h index c4aaf1c22c..43d57e74d7 100644 --- a/src/nbl/asset/interchange/CPLYMeshFileLoader.h +++ b/src/nbl/asset/interchange/CPLYMeshFileLoader.h @@ -21,6 +21,5 @@ class CPLYMeshFileLoader final : public IGeometryLoader //! Loads one PLY asset bundle from an already opened file. SAssetBundle loadAsset(system::IFile* _file, const IAssetLoader::SAssetLoadParams& _params, IAssetLoader::IAssetLoaderOverride* _override = nullptr, uint32_t _hierarchyLevel = 0u) override; }; - } // end namespace nbl::asset #endif diff --git a/src/nbl/asset/interchange/CPLYMeshWriter.h b/src/nbl/asset/interchange/CPLYMeshWriter.h index 05ac6a2011..4adacc4c68 100644 --- a/src/nbl/asset/interchange/CPLYMeshWriter.h +++ b/src/nbl/asset/interchange/CPLYMeshWriter.h @@ -20,6 +20,5 @@ class CPLYMeshWriter : public IGeometryWriter bool writeAsset(system::IFile* _file, const SAssetWriteParams& _params, IAssetWriterOverride* _override = nullptr) override; }; - } // end namespace #endif diff --git a/src/nbl/asset/interchange/CSTLMeshFileLoader.cpp b/src/nbl/asset/interchange/CSTLMeshFileLoader.cpp index 23e7860af3..1ccf55e5ca 100644 --- a/src/nbl/asset/interchange/CSTLMeshFileLoader.cpp +++ b/src/nbl/asset/interchange/CSTLMeshFileLoader.cpp @@ -3,7 +3,6 @@ // This file is part of the "Nabla Engine" and was originally part of the "Irrlicht Engine" // For conditions of distribution and use, see copyright notice in nabla.h // See the original file in irrlicht source for authors - #include "CSTLMeshFileLoader.h" #include "impl/SFileAccess.h" #include "impl/STextParse.h" @@ -19,7 +18,6 @@ #include "nbl/builtin/hlsl/shapes/AABBAccumulator.hlsl" #include "nbl/core/hash/blake.h" #include "nbl/system/IFile.h" - #include namespace nbl::asset { diff --git a/src/nbl/asset/interchange/CSTLMeshFileLoader.h b/src/nbl/asset/interchange/CSTLMeshFileLoader.h index 42317a129a..dadfb1ca7f 100644 --- a/src/nbl/asset/interchange/CSTLMeshFileLoader.h +++ b/src/nbl/asset/interchange/CSTLMeshFileLoader.h @@ -21,7 +21,6 @@ class CSTLMeshFileLoader final : public IGeometryLoader const char** getAssociatedFileExtensions() const override; }; - } // end namespace nbl::asset #endif diff --git a/src/nbl/asset/interchange/CSTLMeshWriter.h b/src/nbl/asset/interchange/CSTLMeshWriter.h index 30f8ff4e24..e06e5c5b65 100644 --- a/src/nbl/asset/interchange/CSTLMeshWriter.h +++ b/src/nbl/asset/interchange/CSTLMeshWriter.h @@ -21,6 +21,5 @@ class CSTLMeshWriter : public IGeometryWriter bool writeAsset(system::IFile* _file, const SAssetWriteParams& _params, IAssetWriterOverride* _override = nullptr) override; }; - } // end namespace #endif diff --git a/src/nbl/asset/interchange/IGeometryWriter.cpp b/src/nbl/asset/interchange/IGeometryWriter.cpp index c66b7096bc..795241e539 100644 --- a/src/nbl/asset/interchange/IGeometryWriter.cpp +++ b/src/nbl/asset/interchange/IGeometryWriter.cpp @@ -1,12 +1,8 @@ // Copyright (C) 2018-2025 - DevSH Graphics Programming Sp. z O.O. // This file is part of the "Nabla Engine". // For conditions of distribution and use, see copyright notice in nabla.h - #include "nbl/asset/interchange/IGeometryWriter.h" - namespace nbl::asset { - IGeometryWriter::~IGeometryWriter() = default; - } From 64560b4d76496cabd6d262f08552815ff8ba153e Mon Sep 17 00:00:00 2001 From: Arkadiusz Lachowicz Date: Sun, 8 Mar 2026 18:18:48 +0100 Subject: [PATCH 088/118] Use relative fast_float submodule URL --- .gitmodules | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.gitmodules b/.gitmodules index 17be178b94..8c03de482d 100644 --- a/.gitmodules +++ b/.gitmodules @@ -131,4 +131,4 @@ url = git@github.com:Devsh-Graphics-Programming/Vulkan-Tools.git [submodule "3rdparty/fast_float"] path = 3rdparty/fast_float - url = https://github.com/fastfloat/fast_float.git + url = ../fast_float.git From a54d4afb0399dc41ec7a873ceb7fbbf1fd48be35 Mon Sep 17 00:00:00 2001 From: Arkadiusz Lachowicz Date: Sun, 8 Mar 2026 18:31:48 +0100 Subject: [PATCH 089/118] Restore inherited comment layout --- include/nbl/asset/interchange/IAssetLoader.h | 52 ++++++-------------- include/nbl/asset/interchange/IAssetWriter.h | 14 +++--- 2 files changed, 22 insertions(+), 44 deletions(-) diff --git a/include/nbl/asset/interchange/IAssetLoader.h b/include/nbl/asset/interchange/IAssetLoader.h index 548e020267..9ba1e5e14a 100644 --- a/include/nbl/asset/interchange/IAssetLoader.h +++ b/include/nbl/asset/interchange/IAssetLoader.h @@ -38,9 +38,7 @@ class CPolygonGeometryManipulator; where 2 bits represent one single level, so we've been on second level). Notice that loading process can be seen as a chain. When you're loading a mesh, it can references a submesh. Submesh can reference graphics pipeline and descriptor set. Descriptor set can reference, for example, textures. - Hierarchy level is distance in such chain/tree from Root Asset - (the one you asked for by calling IAssetManager::getAsset()) - and the currently loaded Asset (needed by Root Asset). + Hierarchy level is distance in such chain/tree from Root Asset (the one you asked for by calling IAssetManager::getAsset()) and the currently loaded Asset (needed by Root Asset). When the class derived from IAssetLoader is added, its put once on an vector and once on an multimap @@ -86,14 +84,9 @@ class NBL_API2 IAssetLoader : public virtual core::IReferenceCounted enum E_LOADER_PARAMETER_FLAGS : uint64_t { ELPF_NONE = 0, //!< default value, it doesn't do anything - // [[deprecated]] ELPF_RIGHT_HANDED_MESHES = 0x1 - // specifies that a mesh will be flipped in such a way - // that it'll look correctly in right-handed camera system - // [[deprecated]] ELPF_DONT_COMPILE_GLSL = 0x2 - // it states that GLSL won't be compiled to SPIR-V if it is loaded or generated - - //! it forces the loader to not load the entire scene for performance in special cases to fetch metadata. - ELPF_LOAD_METADATA_ONLY = 0x4, +//[[deprecated]] ELPF_RIGHT_HANDED_MESHES = 0x1, //!< specifies that a mesh will be flipped in such a way that it'll look correctly in right-handed camera system +//[[deprecated]] ELPF_DONT_COMPILE_GLSL = 0x2, //!< it states that GLSL won't be compiled to SPIR-V if it is loaded or generated + ELPF_LOAD_METADATA_ONLY = 0x4, //!< it forces the loader to not load the entire scene for performance in special cases to fetch metadata. ELPF_DONT_COMPUTE_CONTENT_HASHES = 0x8 //!< opt-out from computing content hashes of produced buffers before returning. }; using loader_flags_t = core::bitflag; @@ -208,7 +201,7 @@ class NBL_API2 IAssetLoader : public virtual core::IReferenceCounted return m_creationParams.polyGeoManip.get(); }*/ - //! Typed convenience wrapper over the untyped `findDefaultAsset` overload. + //! template inline std::pair,const IAssetMetadata*> findDefaultAsset(const std::string& inSearchKey, const SAssetLoadContext& ctx, const uint32_t hierarchyLevel) { @@ -218,7 +211,7 @@ class NBL_API2 IAssetLoader : public virtual core::IReferenceCounted // The only reason these functions are not declared static is to allow stateful overrides - //! Finds one default asset for a key and asset type after cache lookup. + //! inline virtual std::pair,const IAssetMetadata*> findDefaultAsset(const std::string& inSearchKey, const IAsset::E_TYPE assetType, const SAssetLoadContext& ctx, const uint32_t hierarchyLevel) { size_t storageSz = 1ull; @@ -232,7 +225,7 @@ class NBL_API2 IAssetLoader : public virtual core::IReferenceCounted return { chooseDefaultAsset(bundle,ctx),bundle.getMetadata() }; } - //! Chooses one default asset from a bundle returned by cache or load flow. + //! inline virtual core::smart_refctd_ptr chooseDefaultAsset(const SAssetBundle& bundle, const SAssetLoadContext& ctx) { auto contents = bundle.getContents(); @@ -241,15 +234,11 @@ class NBL_API2 IAssetLoader : public virtual core::IReferenceCounted return *contents.begin(); } - //! The most imporant overrides are the ones for caching. + //! The most imporant overrides are the ones for caching virtual SAssetBundle findCachedAsset(const std::string& inSearchKey, const IAsset::E_TYPE* inAssetTypes, const SAssetLoadContext& ctx, const uint32_t hierarchyLevel); - /** - Since more then one asset of the same key of the same type can exist, - this function is called right after search for cached assets - (if anything was found) and decides which of them is relevant. - Note: this function can assume that `found` is never empty. - */ + //! Since more then one asset of the same key of the same type can exist, this function is called right after search for cached assets (if anything was found) and decides which of them is relevant. + //! Note: this function can assume that `found` is never empty. inline virtual SAssetBundle chooseRelevantFromFound(const SAssetBundle* foundBegin, const SAssetBundle* foundEnd, const SAssetLoadContext& ctx, const uint32_t hierarchyLevel) { return *foundBegin; @@ -278,27 +267,18 @@ class NBL_API2 IAssetLoader : public virtual core::IReferenceCounted // otherwise it was already absolute } - /** - This function can be used to swap out the actually opened - (or unknown unopened file if `inFile` is nullptr) file for a different one. - Especially useful if you've used some sort of a fake path - and the file won't load from that path just via `io::IFileSystem`. - */ + //! This function can be used to swap out the actually opened (or unknown unopened file if `inFile` is nullptr) file for a different one. + /** Especially useful if you've used some sort of a fake path and the file won't load from that path just via `io::IFileSystem` . */ inline virtual core::smart_refctd_ptr getLoadFile(system::IFile* inFile, const std::string& supposedFilename, const SAssetLoadContext& ctx, const uint32_t hierarchyLevel) { return core::smart_refctd_ptr(inFile); } //! When you sometimes have different passwords for different assets - /** - \param inOutDecrKeyLen expects length of buffer `outDecrKey`, - then function writes into it length of actual key. - Write to `outDecrKey` happens only if output value of `inOutDecrKeyLen` - is less or equal to input value of `inOutDecrKeyLen`. - \param supposedFilename is the string after modification by getLoadFilename. - \param attempt if decryption or validation algorithm supports reporting failure, - you can try different key - */ + /** \param inOutDecrKeyLen expects length of buffer `outDecrKey`, then function writes into it length of actual key. + Write to `outDecrKey` happens only if output value of `inOutDecrKeyLen` is less or equal to input value of `inOutDecrKeyLen`. + \param supposedFilename is the string after modification by getLoadFilename. + \param attempt if decryption or validation algorithm supports reporting failure, you can try different key*/ inline virtual bool getDecryptionKey(uint8_t* outDecrKey, size_t& inOutDecrKeyLen, const uint32_t attempt, const system::IFile* assetsFile, const std::string& supposedFilename, const std::string& cacheKey, const SAssetLoadContext& ctx, const uint32_t hierarchyLevel) { if (ctx.params.decryptionKeyLen <= inOutDecrKeyLen) diff --git a/include/nbl/asset/interchange/IAssetWriter.h b/include/nbl/asset/interchange/IAssetWriter.h index 8e02aa7bc7..fca8e24124 100644 --- a/include/nbl/asset/interchange/IAssetWriter.h +++ b/include/nbl/asset/interchange/IAssetWriter.h @@ -28,11 +28,10 @@ enum E_WRITER_FLAGS : uint32_t EWF_NONE = 0u, //!< No writer flags (default writer settings) EWF_COMPRESSED = 1u<<0u, //!< Write in a way that consumes less disk space if possible EWF_ENCRYPTED = 1u<<1u, //!< Write encrypted if possible - EWF_BINARY = 1u << 2u, //!< Write in binary format rather than text if possible - /** - Specifies the incoming orientation of loaded mesh we want to write. - Flipping will be performed if needed in dependency of format extension orientation. - */ + //! write in binary format rather than text if possible + EWF_BINARY = 1u << 2u, + + //!< specifies the incoming orientation of loaded mesh we want to write. Flipping will be performed if needed in dependency of format extension orientation EWF_MESH_IS_RIGHT_HANDED = 1u << 3u }; using writer_flags_t = core::bitflag; @@ -161,17 +160,16 @@ class IAssetWriter : public virtual core::IReferenceCounted return ctx.params.encryptionKeyLen; } - //! If the writer has to output multiple files (e.g. write out textures). + //! If the writer has to output multiple files (e.g. write out textures) inline virtual void getExtraFilePaths(std::string& inOutAbsoluteFileWritePath, std::string& inOutPathToRecord, const SAssetWriteContext& ctx, std::pair assetsToWriteAndTheirLevel) {} // do absolutely nothing, no changes to paths - //! Lets the override replace the seeked destination file for one sub-asset. inline virtual system::IFile* getOutputFile(system::IFile* origIntendedOutput, const SAssetWriteContext& ctx, std::pair assetsToWriteAndTheirLeve) { // if you want to return something else, better drop origIntendedOutput return origIntendedOutput; } - //! This function is supposed to give an already seeked file the IAssetWriter can write to. + //!This function is supposed to give an already seeked file the IAssetWriter can write to inline virtual system::IFile* handleWriteError(system::IFile* failingFile, const uint32_t& failedPos, const SAssetWriteContext& ctx, const IAsset* assetToWrite, const uint32_t& hierarchyLevel) { return nullptr; // no handling of fail From 00fa8bb03a4ff85333fee7d5fc3ba5e5916c0d84 Mon Sep 17 00:00:00 2001 From: Arkadiusz Lachowicz Date: Sun, 8 Mar 2026 22:05:18 +0100 Subject: [PATCH 090/118] Restore mesh loader benchmark performance --- include/nbl/asset/interchange/SFileIOPolicy.h | 8 +- .../asset/interchange/COBJMeshFileLoader.cpp | 245 ++++++- .../asset/interchange/CPLYMeshFileLoader.cpp | 691 +++++++++++++----- src/nbl/asset/interchange/CPLYMeshWriter.cpp | 499 ++++++++++--- .../asset/interchange/CSTLMeshFileLoader.cpp | 7 +- src/nbl/asset/interchange/CSTLMeshWriter.cpp | 64 +- 6 files changed, 1203 insertions(+), 311 deletions(-) diff --git a/include/nbl/asset/interchange/SFileIOPolicy.h b/include/nbl/asset/interchange/SFileIOPolicy.h index 3525f51b5f..372539cb7d 100644 --- a/include/nbl/asset/interchange/SFileIOPolicy.h +++ b/include/nbl/asset/interchange/SFileIOPolicy.h @@ -140,11 +140,13 @@ struct SResolvedFileIOPolicy case SFileIOPolicy::Strategy::Auto: default: { + if (fileMappable) + return makeResolved(Strategy::WholeFile, sizeKnown ? "auto_mappable_prefers_whole_file" : "auto_unknown_size_mappable_whole_file"); if (!sizeKnown) - return makeResolved(fileMappable ? Strategy::WholeFile : Strategy::Chunked, fileMappable ? "auto_unknown_size_mappable_whole_file" : "auto_unknown_size"); - const uint64_t wholeLimit = fileMappable ? std::max(wholeThreshold, maxStaging) : std::min(wholeThreshold, maxStaging); + return makeResolved(Strategy::Chunked, "auto_unknown_size"); + const uint64_t wholeLimit = std::min(wholeThreshold, maxStaging); if (byteCount <= wholeLimit) - return makeResolved(Strategy::WholeFile, fileMappable ? "auto_mappable_prefers_whole_file" : "auto_small_enough_for_whole_file"); + return makeResolved(Strategy::WholeFile, "auto_small_enough_for_whole_file"); return makeResolved(Strategy::Chunked, "auto_too_large_for_whole_file"); } } diff --git a/src/nbl/asset/interchange/COBJMeshFileLoader.cpp b/src/nbl/asset/interchange/COBJMeshFileLoader.cpp index b1df506062..6f945e6878 100644 --- a/src/nbl/asset/interchange/COBJMeshFileLoader.cpp +++ b/src/nbl/asset/interchange/COBJMeshFileLoader.cpp @@ -20,6 +20,7 @@ #include #include #include +#include #include namespace nbl::asset { @@ -30,6 +31,154 @@ struct Parse static constexpr uint32_t UV0 = 0u; using Common = impl::TextParse; struct VertexDedupNode { int32_t uv = -1; int32_t normal = -1; uint32_t smoothingGroup = 0u; uint32_t outIndex = 0u; int32_t next = -1; }; + static inline bool isDigit(const char c) { return c >= '0' && c <= '9'; } + static bool parseFloat(const char*& ptr, const char* const end, float& out) + { + const char* const start = ptr; + if (start >= end) + return false; + const char* p = start; + bool negative = false; + if (*p == '-' || *p == '+') + { + negative = (*p == '-'); + ++p; + if (p >= end) + return false; + } + if (*p == '.' || !isDigit(*p)) + { + const auto parseResult = fast_float::from_chars(start, end, out); + if (parseResult.ec == std::errc() && parseResult.ptr != start) + { + ptr = parseResult.ptr; + return true; + } + return false; + } + uint64_t integerPart = 0ull; + while (p < end && isDigit(*p)) + { + integerPart = integerPart * 10ull + static_cast(*p - '0'); + ++p; + } + double value = static_cast(integerPart); + if (p < end && *p == '.') + { + const char* const dot = p; + if ((dot + 7) <= end) + { + const char d0 = dot[1]; + const char d1 = dot[2]; + const char d2 = dot[3]; + const char d3 = dot[4]; + const char d4 = dot[5]; + const char d5 = dot[6]; + if (isDigit(d0) && isDigit(d1) && isDigit(d2) && isDigit(d3) && isDigit(d4) && isDigit(d5)) + { + const bool hasNext = (dot + 7) < end; + const char next = hasNext ? dot[7] : '\0'; + if ((!hasNext || !isDigit(next)) && (!hasNext || (next != 'e' && next != 'E'))) + { + const uint32_t frac = + static_cast(d0 - '0') * 100000u + + static_cast(d1 - '0') * 10000u + + static_cast(d2 - '0') * 1000u + + static_cast(d3 - '0') * 100u + + static_cast(d4 - '0') * 10u + + static_cast(d5 - '0'); + value += static_cast(frac) * 1e-6; + p = dot + 7; + out = static_cast(negative ? -value : value); + ptr = p; + return true; + } + } + } + static constexpr double InvPow10[] = { + 1.0, + 1e-1, 1e-2, 1e-3, 1e-4, 1e-5, + 1e-6, 1e-7, 1e-8, 1e-9, 1e-10, + 1e-11, 1e-12, 1e-13, 1e-14, 1e-15, + 1e-16, 1e-17, 1e-18 + }; + ++p; + uint64_t fractionPart = 0ull; + uint32_t fractionDigits = 0u; + while (p < end && isDigit(*p)) + { + if (fractionDigits >= (std::size(InvPow10) - 1u)) + { + const auto parseResult = fast_float::from_chars(start, end, out); + if (parseResult.ec == std::errc() && parseResult.ptr != start) + { + ptr = parseResult.ptr; + return true; + } + return false; + } + fractionPart = fractionPart * 10ull + static_cast(*p - '0'); + ++fractionDigits; + ++p; + } + value += static_cast(fractionPart) * InvPow10[fractionDigits]; + } + if (p < end && (*p == 'e' || *p == 'E')) + { + const auto parseResult = fast_float::from_chars(start, end, out); + if (parseResult.ec == std::errc() && parseResult.ptr != start) + { + ptr = parseResult.ptr; + return true; + } + return false; + } + out = static_cast(negative ? -value : value); + ptr = p; + return true; + } + static bool parseUnsignedIndex(const char*& ptr, const char* const end, uint32_t& out) + { + if (ptr >= end || !isDigit(*ptr)) + return false; + uint64_t value = 0ull; + while (ptr < end && isDigit(*ptr)) + { + value = value * 10ull + static_cast(*ptr - '0'); + ++ptr; + } + if (value == 0ull || value > static_cast(std::numeric_limits::max())) + return false; + out = static_cast(value); + return true; + } + static bool parseSignedIndex(const char*& ptr, const char* const end, int32_t& out) + { + if (ptr >= end) + return false; + bool negative = false; + if (*ptr == '-') + { + negative = true; + ++ptr; + } + else if (*ptr == '+') + ++ptr; + if (ptr >= end || !isDigit(*ptr)) + return false; + int64_t value = 0; + while (ptr < end && isDigit(*ptr)) + { + value = value * 10ll + static_cast(*ptr - '0'); + ++ptr; + } + if (negative) + value = -value; + if (value == 0 || value < static_cast(std::numeric_limits::min()) || value > static_cast(std::numeric_limits::max())) + return false; + out = static_cast(value); + return true; + } static bool resolveIndex(const int32_t rawIndex, const size_t elementCount, int32_t& resolved) { if (rawIndex > 0) @@ -80,9 +229,9 @@ struct Parse const char* ptr = lineStart; auto parsePositive = [&](const size_t count, int32_t& outIx) -> bool { uint32_t value = 0u; - if (!Common::parseNonZeroNumber(ptr, lineEnd, value)) + if (!parseUnsignedIndex(ptr, lineEnd, value)) return false; - if (value > static_cast(std::numeric_limits::max()) || value > count) + if (value > count) return false; outIx = value - 1u; return true; @@ -90,7 +239,7 @@ struct Parse for (uint32_t corner = 0u; corner < 3u; ++corner) { Common::skipInlineWhitespace(ptr, lineEnd); - if (ptr >= lineEnd || !core::isdigit(*ptr)) + if (ptr >= lineEnd || !isDigit(*ptr)) return false; int32_t posIx = -1; if (!parsePositive(posCount, posIx)) @@ -112,6 +261,37 @@ struct Parse Common::skipInlineWhitespace(ptr, lineEnd); return ptr == lineEnd; } + static bool parseTrianglePositivePositionNormalLine(const char* const lineStart, const char* const lineEnd, std::array& out, const size_t posCount, const size_t normalCount) + { + const char* ptr = lineStart; + auto parsePositive = [&](const size_t count, int32_t& outIx) -> bool { + uint32_t value = 0u; + if (!parseUnsignedIndex(ptr, lineEnd, value)) + return false; + if (value > count) + return false; + outIx = value - 1u; + return true; + }; + for (uint32_t corner = 0u; corner < 3u; ++corner) + { + Common::skipInlineWhitespace(ptr, lineEnd); + if (ptr >= lineEnd || !isDigit(*ptr)) + return false; + int32_t posIx = -1; + if (!parsePositive(posCount, posIx)) + return false; + if ((ptr + 1) >= lineEnd || ptr[0] != '/' || ptr[1] != '/') + return false; + ptr += 2; + int32_t normalIx = -1; + if (!parsePositive(normalCount, normalIx)) + return false; + out[corner] = hlsl::int32_t3(posIx, -1, normalIx); + } + Common::skipInlineWhitespace(ptr, lineEnd); + return ptr == lineEnd; + } static bool parseFaceVertexToken(const char*& linePtr, const char* const lineEnd, hlsl::int32_t3& idx, const size_t posCount, const size_t uvCount, const size_t normalCount) { Common::skipInlineWhitespace(linePtr, lineEnd); @@ -121,16 +301,16 @@ struct Parse const char* ptr = linePtr; auto parsePositive = [&](const size_t count, int32_t& outIx) -> bool { uint32_t raw = 0u; - if (!Common::parseNonZeroNumber(ptr, lineEnd, raw)) + if (!parseUnsignedIndex(ptr, lineEnd, raw)) return false; - if (raw > static_cast(std::numeric_limits::max()) || raw > count) + if (raw > count) return false; outIx = raw - 1u; return true; }; auto parseResolved = [&](const size_t count, int32_t& outIx) -> bool { int32_t raw = 0; - return Common::parseNonZeroNumber(ptr, lineEnd, raw) && resolveIndex(raw, count, outIx); + return parseSignedIndex(ptr, lineEnd, raw) && resolveIndex(raw, count, outIx); }; if (*ptr != '-' && *ptr != '+') { @@ -588,6 +768,27 @@ asset::SAssetBundle COBJMeshFileLoader::loadAsset( } return false; }; + auto acquireCornerIndexPositiveNormal = [&](const hlsl::int32_t3& idx, + uint32_t& outIx) -> bool { + const uint32_t hotHash = static_cast(idx.x) * 73856093u ^ + static_cast(idx.z) * 83492791u ^ + 0x9e3779b9u; + auto& hotEntry = dedupHotCache[static_cast(hotHash) & dedupHotMask]; + if (hotEntry.pos == idx.x && hotEntry.uv == -1 && + hotEntry.normal == idx.z) { + outIx = hotEntry.outIndex; + return true; + } + if (findCornerIndex(idx.x, -1, idx.z, 0u, outIx) || + materializeCornerIndex(idx.x, -1, idx.z, 0u, outIx)) { + hotEntry.pos = idx.x; + hotEntry.uv = -1; + hotEntry.normal = idx.z; + hotEntry.outIndex = outIx; + return true; + } + return false; + }; auto acquireTriangleCorners = [&](auto&& acquire, const std::array& triIdx, hlsl::uint32_t3& cornerIx) -> bool { return acquire(triIdx[0], cornerIx.x) && acquire(triIdx[1], cornerIx.y) && acquire(triIdx[2], cornerIx.z); }; @@ -611,16 +812,15 @@ asset::SAssetBundle COBJMeshFileLoader::loadAsset( if (lineStart < lineEnd) { const char lineType = std::tolower(*lineStart); if (lineType == 'v') { - auto parseVector = [&](const char* ptr, float* values, - const uint32_t count) -> bool { - for (uint32_t i = 0u; i < count; ++i) { - while (ptr < lineEnd && Parse::Common::isInlineWhitespace(*ptr)) - ++ptr; - if (ptr >= lineEnd || - !Parse::Common::parseNumber(ptr, lineEnd, values[i])) - return false; - } - return true; + auto parseVector = [&](const char* ptr, float* values, + const uint32_t count) -> bool { + for (uint32_t i = 0u; i < count; ++i) { + while (ptr < lineEnd && Parse::Common::isInlineWhitespace(*ptr)) + ++ptr; + if (ptr >= lineEnd || !Parse::parseFloat(ptr, lineEnd, values[i])) + return false; + } + return true; }; const char subType = ((lineStart + 1) < lineEnd) ? std::tolower(lineStart[1]) : '\0'; @@ -679,6 +879,12 @@ asset::SAssetBundle COBJMeshFileLoader::loadAsset( hlsl::int32_t3(-1, -1, -1)}; bool triangleFastPath = Parse::parseTrianglePositiveTripletLine( lineStart + 1, lineEnd, triIdx, posCount, uvCount, normalCount); + bool positiveNormalOnlyFastPath = false; + if (!triangleFastPath && uvCount == 0u && normalCount != 0u) { + triangleFastPath = Parse::parseTrianglePositivePositionNormalLine( + lineStart + 1, lineEnd, triIdx, posCount, normalCount); + positiveNormalOnlyFastPath = triangleFastPath; + } bool parsedFirstThree = triangleFastPath; if (!triangleFastPath) { triLinePtr = lineStart + 1; @@ -697,7 +903,7 @@ asset::SAssetBundle COBJMeshFileLoader::loadAsset( triangleFastPath = (triLinePtr == lineEnd); } } - if (triangleFastPath) { + if (triangleFastPath && !positiveNormalOnlyFastPath) { const bool fullTriplet = std::all_of( triIdx.begin(), triIdx.end(), [](const hlsl::int32_t3& idx) { return hlsl::all(glm::greaterThanEqual(idx, hlsl::int32_t3(0))); @@ -707,7 +913,10 @@ asset::SAssetBundle COBJMeshFileLoader::loadAsset( } if (triangleFastPath) { hlsl::uint32_t3 cornerIx = {}; - if (!acquireTriangleCorners(acquireCornerIndexPositiveTriplet, triIdx, cornerIx)) + if (positiveNormalOnlyFastPath) { + if (!acquireTriangleCorners(acquireCornerIndexPositiveNormal, triIdx, cornerIx)) + return {}; + } else if (!acquireTriangleCorners(acquireCornerIndexPositiveTriplet, triIdx, cornerIx)) return {}; faceFastTokenCount += 3u; currentFaceFastTokenCount += 3u; diff --git a/src/nbl/asset/interchange/CPLYMeshFileLoader.cpp b/src/nbl/asset/interchange/CPLYMeshFileLoader.cpp index 68d178f004..93107fbcde 100644 --- a/src/nbl/asset/interchange/CPLYMeshFileLoader.cpp +++ b/src/nbl/asset/interchange/CPLYMeshFileLoader.cpp @@ -19,6 +19,7 @@ #include "nbl/core/hash/blake.h" #include "nbl/system/IFile.h" #include "nbl/system/ISystem.h" +#include #include namespace nbl::asset { @@ -435,96 +436,95 @@ struct Parse hlsl::shapes::util::AABBAccumulator3* parsedAABB) { if (!IsBinaryFile || el.Name != "vertex") return EFastVertexReadResult::NotApplicable; - struct SLayoutDesc { - uint32_t propertyCount; - uint32_t srcBytesPerVertex; - bool hasNormals; - bool hasUVs; - }; - auto allF32 = [&el]() -> bool { - for (const auto& prop : el.Properties) { + enum class ELayoutKind : uint8_t { XYZ, XYZ_N, XYZ_N_UV }; + auto allF32 = [&el]()->bool { + for (const auto& prop : el.Properties) if (prop.type != EF_R32_SFLOAT) return false; - } return true; }; if (!allF32()) return EFastVertexReadResult::NotApplicable; - auto matchNames = - [&el](std::initializer_list names) -> bool { + auto matchNames = [&el](std::initializer_list names)->bool { if (el.Properties.size() != names.size()) return false; size_t i = 0ull; - for (const auto* name : names) { + for (const auto* name : names) + { if (el.Properties[i].Name != name) return false; ++i; } return true; }; - static constexpr SLayoutDesc xyz = {3u, sizeof(hlsl::float32_t) * 3u, - false, false}; - static constexpr SLayoutDesc xyz_n = {6u, sizeof(hlsl::float32_t) * 6u, - true, false}; - static constexpr SLayoutDesc xyz_n_uv = {8u, sizeof(hlsl::float32_t) * 8u, - true, true}; - const SLayoutDesc* layout = nullptr; + ELayoutKind layout = ELayoutKind::XYZ; if (matchNames({"x", "y", "z"})) - layout = &xyz; + layout = ELayoutKind::XYZ; else if (matchNames({"x", "y", "z", "nx", "ny", "nz"})) - layout = &xyz_n; + layout = ELayoutKind::XYZ_N; else if (matchNames({"x", "y", "z", "nx", "ny", "nz", "u", "v"}) || matchNames({"x", "y", "z", "nx", "ny", "nz", "s", "t"})) - layout = &xyz_n_uv; - if (!layout) + layout = ELayoutKind::XYZ_N_UV; + else return EFastVertexReadResult::NotApplicable; const size_t floatBytes = sizeof(hlsl::float32_t); - struct STupleDesc { - uint32_t beginIx; - uint32_t componentCount; - uint32_t stride = 0u; - uint8_t* base = nullptr; - }; - std::array tuples = {STupleDesc{0u, 3u}, - STupleDesc{3u, 3u}, - STupleDesc{6u, 2u}}; - const uint32_t tupleCount = - 1u + static_cast(layout->hasNormals) + - static_cast(layout->hasUVs); - auto validateTuple = [&](STupleDesc& tuple) -> bool { - if (tuple.beginIx + tuple.componentCount > vertAttrIts.size()) + auto validateTuple = [&](const size_t beginIx, const size_t componentCount, uint32_t& outStride, uint8_t*& outBase)->bool { + if (beginIx + componentCount > vertAttrIts.size()) return false; - auto& first = vertAttrIts[tuple.beginIx]; + auto& first = vertAttrIts[beginIx]; if (!first.ptr || first.dstFmt != EF_R32_SFLOAT) return false; - tuple.stride = first.stride; - tuple.base = first.ptr; - for (uint32_t c = 1u; c < tuple.componentCount; ++c) { - auto& it = vertAttrIts[tuple.beginIx + c]; + outStride = first.stride; + outBase = first.ptr; + for (size_t c = 1ull; c < componentCount; ++c) + { + auto& it = vertAttrIts[beginIx + c]; if (!it.ptr || it.dstFmt != EF_R32_SFLOAT) return false; - if (it.stride != tuple.stride) + if (it.stride != outStride) return false; - if (it.ptr != tuple.base + c * floatBytes) + if (it.ptr != outBase + c * floatBytes) return false; } return true; }; - auto commitTuple = [&](const STupleDesc& tuple) -> void { - for (uint32_t c = 0u; c < tuple.componentCount; ++c) - vertAttrIts[tuple.beginIx + c].ptr = tuple.base + c * floatBytes; - }; - if (vertAttrIts.size() != layout->propertyCount) - return EFastVertexReadResult::NotApplicable; - for (uint32_t tupleIx = 0u; tupleIx < tupleCount; ++tupleIx) - if (!validateTuple(tuples[tupleIx])) - return EFastVertexReadResult::NotApplicable; - if (el.Count > - (std::numeric_limits::max() / layout->srcBytesPerVertex)) + uint32_t posStride = 0u, normalStride = 0u, uvStride = 0u; + uint8_t* posBase = nullptr; + uint8_t* normalBase = nullptr; + uint8_t* uvBase = nullptr; + switch (layout) + { + case ELayoutKind::XYZ: + if (vertAttrIts.size() != 3u || !validateTuple(0u, 3u, posStride, posBase)) + return EFastVertexReadResult::NotApplicable; + break; + case ELayoutKind::XYZ_N: + if (vertAttrIts.size() != 6u) + return EFastVertexReadResult::NotApplicable; + if (!validateTuple(0u, 3u, posStride, posBase) || !validateTuple(3u, 3u, normalStride, normalBase)) + return EFastVertexReadResult::NotApplicable; + break; + case ELayoutKind::XYZ_N_UV: + if (vertAttrIts.size() != 8u) + return EFastVertexReadResult::NotApplicable; + if (!validateTuple(0u, 3u, posStride, posBase) || !validateTuple(3u, 3u, normalStride, normalBase) || !validateTuple(6u, 2u, uvStride, uvBase)) + return EFastVertexReadResult::NotApplicable; + break; + } + const size_t srcBytesPerVertex = [layout]()->size_t { + switch (layout) + { + case ELayoutKind::XYZ: return sizeof(hlsl::float32_t) * 3ull; + case ELayoutKind::XYZ_N: return sizeof(hlsl::float32_t) * 6ull; + case ELayoutKind::XYZ_N_UV: return sizeof(hlsl::float32_t) * 8ull; + default: return 0ull; + } + }(); + if (srcBytesPerVertex == 0ull || el.Count > (std::numeric_limits::max() / srcBytesPerVertex)) return EFastVertexReadResult::Error; const bool trackAABB = parsedAABB != nullptr; const bool needsByteSwap = IsWrongEndian; - auto decodeF32 = [needsByteSwap](const uint8_t* src) -> float { + auto decodeF32 = [needsByteSwap](const uint8_t* src)->float { uint32_t bits = 0u; std::memcpy(&bits, src, sizeof(bits)); if (needsByteSwap) @@ -533,72 +533,240 @@ struct Parse std::memcpy(&value, &bits, sizeof(value)); return value; }; - auto decodeVector = [&](const uint8_t* src) -> Vec { - constexpr uint32_t N = hlsl::vector_traits::Dimension; - Vec value{}; - hlsl::array_set setter; - for (uint32_t i = 0u; i < N; ++i) - setter(value, i, - decodeF32(src + static_cast(i) * floatBytes)); - return value; - }; - auto storeVector = [](uint8_t* dst, - const Vec& value) -> void { - constexpr uint32_t N = hlsl::vector_traits::Dimension; - hlsl::array_get getter; - auto* const out = reinterpret_cast(dst); - for (uint32_t i = 0u; i < N; ++i) - out[i] = getter(value, i); - }; - auto decodeStore = [&](STupleDesc& tuple, - const uint8_t*& src) -> Vec { - Vec value = decodeVector.operator()(src); - storeVector.operator()(tuple.base, value); - src += static_cast(hlsl::vector_traits::Dimension) * - floatBytes; - tuple.base += tuple.stride; - return value; - }; size_t remainingVertices = el.Count; - while (remainingVertices > 0ull) { - if (StartPointer + layout->srcBytesPerVertex > EndPointer) + while (remainingVertices > 0ull) + { + if (StartPointer + srcBytesPerVertex > EndPointer) fillBuffer(); - const size_t available = - EndPointer > StartPointer - ? static_cast(EndPointer - StartPointer) - : 0ull; - if (available < layout->srcBytesPerVertex) + const size_t available = EndPointer > StartPointer ? static_cast(EndPointer - StartPointer) : 0ull; + if (available < srcBytesPerVertex) return EFastVertexReadResult::Error; - const size_t batchVertices = - std::min(remainingVertices, available / layout->srcBytesPerVertex); + const size_t batchVertices = std::min(remainingVertices, available / srcBytesPerVertex); const uint8_t* src = reinterpret_cast(StartPointer); - if (!layout->hasNormals && !layout->hasUVs && - tuples[0].stride == 3ull * floatBytes && !needsByteSwap && - !trackAABB) { - const size_t batchBytes = batchVertices * 3ull * floatBytes; - std::memcpy(tuples[0].base, src, batchBytes); - src += batchBytes; - tuples[0].base += batchBytes; - } else { - for (size_t v = 0ull; v < batchVertices; ++v) { - const hlsl::float32_t3 position = - decodeStore.operator()(tuples[0], src); - if (trackAABB) - hlsl::shapes::util::extendAABBAccumulator(*parsedAABB, position); - if (layout->hasNormals) { - decodeStore.operator()(tuples[1], src); + switch (layout) + { + case ELayoutKind::XYZ: + { + if (posStride == 3ull * floatBytes) + { + const size_t batchBytes = batchVertices * 3ull * floatBytes; + if (trackAABB && batchVertices >= (1ull << 20)) + { + const size_t hw = SLoaderRuntimeTuner::resolveHardwareThreads(); + const size_t hardMaxWorkers = SLoaderRuntimeTuner::resolveHardMaxWorkers(hw, inner.params.ioPolicy.runtimeTuning.workerHeadroom); + SLoaderRuntimeTuningRequest vertexTuningRequest = {}; + vertexTuningRequest.inputBytes = batchBytes; + vertexTuningRequest.totalWorkUnits = batchVertices; + vertexTuningRequest.minBytesPerWorker = 3ull * floatBytes; + vertexTuningRequest.hardwareThreads = static_cast(hw); + vertexTuningRequest.hardMaxWorkers = static_cast(hardMaxWorkers); + vertexTuningRequest.targetChunksPerWorker = inner.params.ioPolicy.runtimeTuning.targetChunksPerWorker; + vertexTuningRequest.sampleData = reinterpret_cast(src); + vertexTuningRequest.sampleBytes = SLoaderRuntimeTuner::resolveSampleBytes(inner.params.ioPolicy, batchBytes); + const auto vertexTuning = SLoaderRuntimeTuner::tune(inner.params.ioPolicy, vertexTuningRequest); + const size_t workerCount = std::min(vertexTuning.workerCount, batchVertices); + if (workerCount > 1ull) + { + struct SAABBRange { float minX = std::numeric_limits::max(); float minY = std::numeric_limits::max(); float minZ = std::numeric_limits::max(); float maxX = std::numeric_limits::lowest(); float maxY = std::numeric_limits::lowest(); float maxZ = std::numeric_limits::lowest(); }; + std::vector workerRanges(workerCount); + uint8_t* dstBase = posBase; + SLoaderRuntimeTuner::dispatchWorkers(workerCount, [&](const size_t workerIx) { + const size_t begin = (batchVertices * workerIx) / workerCount; + const size_t end = (batchVertices * (workerIx + 1ull)) / workerCount; + const size_t count = end - begin; + if (count == 0ull) + return; + auto& range = workerRanges[workerIx]; + const uint8_t* inBytes = src + begin * 3ull * floatBytes; + float* outFloats = reinterpret_cast(dstBase + begin * 3ull * floatBytes); + if (!needsByteSwap) + { + std::memcpy(outFloats, inBytes, count * 3ull * floatBytes); + const float* xyz = reinterpret_cast(inBytes); + for (size_t v = 0ull; v < count; ++v) + { + const float x = xyz[v * 3ull + 0ull]; + const float y = xyz[v * 3ull + 1ull]; + const float z = xyz[v * 3ull + 2ull]; + if (x < range.minX) range.minX = x; + if (y < range.minY) range.minY = y; + if (z < range.minZ) range.minZ = z; + if (x > range.maxX) range.maxX = x; + if (y > range.maxY) range.maxY = y; + if (z > range.maxZ) range.maxZ = z; + } + } + else + { + for (size_t v = 0ull; v < count; ++v) + { + uint32_t xb = 0u, yb = 0u, zb = 0u; + std::memcpy(&xb, inBytes + 0ull * floatBytes, sizeof(xb)); + std::memcpy(&yb, inBytes + 1ull * floatBytes, sizeof(yb)); + std::memcpy(&zb, inBytes + 2ull * floatBytes, sizeof(zb)); + xb = Binary::byteswap(xb); + yb = Binary::byteswap(yb); + zb = Binary::byteswap(zb); + float x = 0.f, y = 0.f, z = 0.f; + std::memcpy(&x, &xb, sizeof(x)); + std::memcpy(&y, &yb, sizeof(y)); + std::memcpy(&z, &zb, sizeof(z)); + outFloats[0] = x; + outFloats[1] = y; + outFloats[2] = z; + if (x < range.minX) range.minX = x; + if (y < range.minY) range.minY = y; + if (z < range.minZ) range.minZ = z; + if (x > range.maxX) range.maxX = x; + if (y > range.maxY) range.maxY = y; + if (z > range.maxZ) range.maxZ = z; + inBytes += 3ull * floatBytes; + outFloats += 3ull; + } + } + }); + auto& aabb = parsedAABB->value; + for (const auto& range : workerRanges) + { + if (range.minX < aabb.minVx.x) aabb.minVx.x = range.minX; + if (range.minY < aabb.minVx.y) aabb.minVx.y = range.minY; + if (range.minZ < aabb.minVx.z) aabb.minVx.z = range.minZ; + if (range.maxX > aabb.maxVx.x) aabb.maxVx.x = range.maxX; + if (range.maxY > aabb.maxVx.y) aabb.maxVx.y = range.maxY; + if (range.maxZ > aabb.maxVx.z) aabb.maxVx.z = range.maxZ; + } + src += batchBytes; + posBase += batchBytes; + break; + } + } + if (!needsByteSwap) + { + std::memcpy(posBase, src, batchBytes); + if (trackAABB) + { + const float* xyz = reinterpret_cast(src); + auto& aabb = parsedAABB->value; + for (size_t v = 0ull; v < batchVertices; ++v) + { + const float x = xyz[v * 3ull + 0ull]; + const float y = xyz[v * 3ull + 1ull]; + const float z = xyz[v * 3ull + 2ull]; + if (x < aabb.minVx.x) aabb.minVx.x = x; + if (y < aabb.minVx.y) aabb.minVx.y = y; + if (z < aabb.minVx.z) aabb.minVx.z = z; + if (x > aabb.maxVx.x) aabb.maxVx.x = x; + if (y > aabb.maxVx.y) aabb.maxVx.y = y; + if (z > aabb.maxVx.z) aabb.maxVx.z = z; + } + } + src += batchBytes; + posBase += batchBytes; + } + else + { + for (size_t v = 0ull; v < batchVertices; ++v) + { + const float x = decodeF32(src + 0ull * floatBytes); + const float y = decodeF32(src + 1ull * floatBytes); + const float z = decodeF32(src + 2ull * floatBytes); + reinterpret_cast(posBase)[0] = x; + reinterpret_cast(posBase)[1] = y; + reinterpret_cast(posBase)[2] = z; + if (trackAABB) + hlsl::shapes::util::extendAABBAccumulator(*parsedAABB, x, y, z); + src += 3ull * floatBytes; + posBase += posStride; + } + } + } + else + { + for (size_t v = 0ull; v < batchVertices; ++v) + { + const float x = decodeF32(src + 0ull * floatBytes); + const float y = decodeF32(src + 1ull * floatBytes); + const float z = decodeF32(src + 2ull * floatBytes); + reinterpret_cast(posBase)[0] = x; + reinterpret_cast(posBase)[1] = y; + reinterpret_cast(posBase)[2] = z; + if (trackAABB) + hlsl::shapes::util::extendAABBAccumulator(*parsedAABB, x, y, z); + src += 3ull * floatBytes; + posBase += posStride; + } } - if (layout->hasUVs) { - decodeStore.operator()(tuples[2], src); + } + break; + case ELayoutKind::XYZ_N: + { + for (size_t v = 0ull; v < batchVertices; ++v) + { + const float x = decodeF32(src + 0ull * floatBytes); + const float y = decodeF32(src + 1ull * floatBytes); + const float z = decodeF32(src + 2ull * floatBytes); + reinterpret_cast(posBase)[0] = x; + reinterpret_cast(posBase)[1] = y; + reinterpret_cast(posBase)[2] = z; + if (trackAABB) + hlsl::shapes::util::extendAABBAccumulator(*parsedAABB, hlsl::float32_t3(x, y, z)); + src += 3ull * floatBytes; + posBase += posStride; + reinterpret_cast(normalBase)[0] = decodeF32(src + 0ull * floatBytes); + reinterpret_cast(normalBase)[1] = decodeF32(src + 1ull * floatBytes); + reinterpret_cast(normalBase)[2] = decodeF32(src + 2ull * floatBytes); + src += 3ull * floatBytes; + normalBase += normalStride; + } + } + break; + case ELayoutKind::XYZ_N_UV: + { + for (size_t v = 0ull; v < batchVertices; ++v) + { + const float x = decodeF32(src + 0ull * floatBytes); + const float y = decodeF32(src + 1ull * floatBytes); + const float z = decodeF32(src + 2ull * floatBytes); + reinterpret_cast(posBase)[0] = x; + reinterpret_cast(posBase)[1] = y; + reinterpret_cast(posBase)[2] = z; + if (trackAABB) + hlsl::shapes::util::extendAABBAccumulator(*parsedAABB, hlsl::float32_t3(x, y, z)); + src += 3ull * floatBytes; + posBase += posStride; + reinterpret_cast(normalBase)[0] = decodeF32(src + 0ull * floatBytes); + reinterpret_cast(normalBase)[1] = decodeF32(src + 1ull * floatBytes); + reinterpret_cast(normalBase)[2] = decodeF32(src + 2ull * floatBytes); + src += 3ull * floatBytes; + normalBase += normalStride; + reinterpret_cast(uvBase)[0] = decodeF32(src + 0ull * floatBytes); + reinterpret_cast(uvBase)[1] = decodeF32(src + 1ull * floatBytes); + src += 2ull * floatBytes; + uvBase += uvStride; } } } - const size_t consumed = batchVertices * layout->srcBytesPerVertex; + const size_t consumed = batchVertices * srcBytesPerVertex; StartPointer += consumed; remainingVertices -= batchVertices; } - for (uint32_t tupleIx = 0u; tupleIx < tupleCount; ++tupleIx) - commitTuple(tuples[tupleIx]); + const size_t posAdvance = el.Count * posStride; + vertAttrIts[0].ptr += posAdvance; + vertAttrIts[1].ptr += posAdvance; + vertAttrIts[2].ptr += posAdvance; + if (layout == ELayoutKind::XYZ_N || layout == ELayoutKind::XYZ_N_UV) + { + const size_t normalAdvance = el.Count * normalStride; + vertAttrIts[3].ptr += normalAdvance; + vertAttrIts[4].ptr += normalAdvance; + vertAttrIts[5].ptr += normalAdvance; + } + if (layout == ELayoutKind::XYZ_N_UV) + { + const size_t uvAdvance = el.Count * uvStride; + vertAttrIts[6].ptr += uvAdvance; + vertAttrIts[7].ptr += uvAdvance; + } return EFastVertexReadResult::Success; } void readVertex(const IAssetLoader::SAssetLoadParams& _params, @@ -749,11 +917,6 @@ struct Parse const size_t indexSize = is32Bit ? sizeof(uint32_t) : sizeof(uint16_t); const bool hasVertexCount = vertexCount != 0u; const bool trackMaxIndex = !hasVertexCount; - const hlsl::uint32_t3 vertexLimit(vertexCount); - const auto triExceedsVertexLimit = - [&vertexLimit](const hlsl::uint32_t3& tri) -> bool { - return hlsl::any(glm::greaterThanEqual(tri, vertexLimit)); - }; outIndexHash = IPreHashed::INVALID_HASH; const size_t minTriangleRecordSize = sizeof(uint8_t) + indexSize * 3u; if (element.Count > @@ -786,7 +949,6 @@ struct Parse value = Binary::byteswap(value); return value; }; - bool fallbackToGeneric = false; if (is32Bit) { const size_t hw = SLoaderRuntimeTuner::resolveHardwareThreads(); const size_t hardMaxWorkers = @@ -864,14 +1026,13 @@ struct Parse break; } ++in; - const hlsl::uint32_t3 tri( - readU32(in + 0ull * sizeof(uint32_t)), - readU32(in + 1ull * sizeof(uint32_t)), - readU32(in + 2ull * sizeof(uint32_t))); - outLocal[0] = tri.x; - outLocal[1] = tri.y; - outLocal[2] = tri.z; - const uint32_t triOr = tri.x | tri.y | tri.z; + const uint32_t i0 = readU32(in + 0ull * sizeof(uint32_t)); + const uint32_t i1 = readU32(in + 1ull * sizeof(uint32_t)); + const uint32_t i2 = readU32(in + 2ull * sizeof(uint32_t)); + outLocal[0] = i0; + outLocal[1] = i1; + outLocal[2] = i2; + const uint32_t triOr = i0 | i1 | i2; if (isSrcS32 && (triOr & 0x80000000u)) { workerInvalid[workerIx] = 1u; if (hashInParsePipeline) @@ -879,16 +1040,16 @@ struct Parse break; } if (validateAgainstVertexCount) { - if (triExceedsVertexLimit(tri)) { + if (i0 >= vertexCount || i1 >= vertexCount || i2 >= vertexCount) { workerInvalid[workerIx] = 1u; if (hashInParsePipeline) workerHashable[workerIx] = 0u; break; } } else if (needMax) { - const uint32_t triMax = std::max({tri.x, tri.y, tri.z}); - if (triMax > localMax) - localMax = triMax; + if (i0 > localMax) localMax = i0; + if (i1 > localMax) localMax = i1; + if (i2 > localMax) localMax = i2; } in += 3ull * sizeof(uint32_t); outLocal += 3ull; @@ -940,52 +1101,135 @@ struct Parse return EFastFaceReadResult::Success; } } - auto consumeTriangles = [&](const size_t indexBytes, const uint32_t signedMask, auto readTri) -> EFastFaceReadResult { - for (size_t j = 0u; j < element.Count; ++j) { - if (*ptr++ != 3u) { - fallbackToGeneric = true; - return EFastFaceReadResult::NotApplicable; + if (is32Bit) + { + if (isSrcU32) + { + if (trackMaxIndex) + { + for (size_t j = 0u; j < element.Count; ++j) + { + const uint8_t c = *ptr++; + if (c != 3u) + return EFastFaceReadResult::NotApplicable; + out[0] = readU32(ptr + 0ull * sizeof(uint32_t)); + out[1] = readU32(ptr + 1ull * sizeof(uint32_t)); + out[2] = readU32(ptr + 2ull * sizeof(uint32_t)); + ptr += 3ull * sizeof(uint32_t); + if (out[0] > _maxIndex) _maxIndex = out[0]; + if (out[1] > _maxIndex) _maxIndex = out[1]; + if (out[2] > _maxIndex) _maxIndex = out[2]; + out += 3u; + } + } + else + { + for (size_t j = 0u; j < element.Count; ++j) + { + const uint8_t c = *ptr++; + if (c != 3u) + return EFastFaceReadResult::NotApplicable; + out[0] = readU32(ptr + 0ull * sizeof(uint32_t)); + out[1] = readU32(ptr + 1ull * sizeof(uint32_t)); + out[2] = readU32(ptr + 2ull * sizeof(uint32_t)); + ptr += 3ull * sizeof(uint32_t); + if (out[0] >= vertexCount || out[1] >= vertexCount || out[2] >= vertexCount) + return EFastFaceReadResult::Error; + out += 3u; + } + } + } + else + { + for (size_t j = 0u; j < element.Count; ++j) + { + const uint8_t c = *ptr++; + if (c != 3u) + return EFastFaceReadResult::NotApplicable; + out[0] = readU32(ptr + 0ull * sizeof(uint32_t)); + out[1] = readU32(ptr + 1ull * sizeof(uint32_t)); + out[2] = readU32(ptr + 2ull * sizeof(uint32_t)); + ptr += 3ull * sizeof(uint32_t); + if ((out[0] | out[1] | out[2]) & 0x80000000u) + return EFastFaceReadResult::Error; + if (trackMaxIndex) + { + if (out[0] > _maxIndex) _maxIndex = out[0]; + if (out[1] > _maxIndex) _maxIndex = out[1]; + if (out[2] > _maxIndex) _maxIndex = out[2]; + } + else if (out[0] >= vertexCount || out[1] >= vertexCount || out[2] >= vertexCount) + return EFastFaceReadResult::Error; + out += 3u; + } + } + } + else + { + if (isSrcU16) + { + if (trackMaxIndex) + { + for (size_t j = 0u; j < element.Count; ++j) + { + const uint8_t c = *ptr++; + if (c != 3u) + return EFastFaceReadResult::NotApplicable; + out[0] = readU16(ptr + 0ull * sizeof(uint16_t)); + out[1] = readU16(ptr + 1ull * sizeof(uint16_t)); + out[2] = readU16(ptr + 2ull * sizeof(uint16_t)); + ptr += 3ull * sizeof(uint16_t); + if (out[0] > _maxIndex) _maxIndex = out[0]; + if (out[1] > _maxIndex) _maxIndex = out[1]; + if (out[2] > _maxIndex) _maxIndex = out[2]; + out += 3u; + } + } + else + { + for (size_t j = 0u; j < element.Count; ++j) + { + const uint8_t c = *ptr++; + if (c != 3u) + return EFastFaceReadResult::NotApplicable; + out[0] = readU16(ptr + 0ull * sizeof(uint16_t)); + out[1] = readU16(ptr + 1ull * sizeof(uint16_t)); + out[2] = readU16(ptr + 2ull * sizeof(uint16_t)); + ptr += 3ull * sizeof(uint16_t); + if (out[0] >= vertexCount || out[1] >= vertexCount || out[2] >= vertexCount) + return EFastFaceReadResult::Error; + out += 3u; + } + } + } + else + { + for (size_t j = 0u; j < element.Count; ++j) + { + const uint8_t c = *ptr++; + if (c != 3u) + return EFastFaceReadResult::NotApplicable; + out[0] = readU16(ptr + 0ull * sizeof(uint16_t)); + out[1] = readU16(ptr + 1ull * sizeof(uint16_t)); + out[2] = readU16(ptr + 2ull * sizeof(uint16_t)); + ptr += 3ull * sizeof(uint16_t); + if ((out[0] | out[1] | out[2]) & 0x8000u) + return EFastFaceReadResult::Error; + if (trackMaxIndex) + { + if (out[0] > _maxIndex) _maxIndex = out[0]; + if (out[1] > _maxIndex) _maxIndex = out[1]; + if (out[2] > _maxIndex) _maxIndex = out[2]; + } + else if (out[0] >= vertexCount || out[1] >= vertexCount || out[2] >= vertexCount) + return EFastFaceReadResult::Error; + out += 3u; } - const hlsl::uint32_t3 tri = readTri(ptr); - ptr += 3ull * indexBytes; - const uint32_t triOr = tri.x | tri.y | tri.z; - if (signedMask && (triOr & signedMask)) - return EFastFaceReadResult::Error; - out[0] = tri.x; - out[1] = tri.y; - out[2] = tri.z; - if (trackMaxIndex) { - const uint32_t triMax = std::max({tri.x, tri.y, tri.z}); - if (triMax > _maxIndex) - _maxIndex = triMax; - } else if (triExceedsVertexLimit(tri)) - return EFastFaceReadResult::Error; - out += 3u; } - return EFastFaceReadResult::Success; - }; - const auto fastReadResult = is32Bit ? - consumeTriangles(sizeof(uint32_t), isSrcS32 ? 0x80000000u : 0u, - [&](const uint8_t* const src) -> hlsl::uint32_t3 { - return hlsl::uint32_t3(readU32(src + 0ull * sizeof(uint32_t)), - readU32(src + 1ull * sizeof(uint32_t)), - readU32(src + 2ull * sizeof(uint32_t))); - }) : - consumeTriangles(sizeof(uint16_t), isSrcS16 ? 0x8000u : 0u, - [&](const uint8_t* const src) -> hlsl::uint32_t3 { - return hlsl::uint32_t3(readU16(src + 0ull * sizeof(uint16_t)), - readU16(src + 1ull * sizeof(uint16_t)), - readU16(src + 2ull * sizeof(uint16_t))); - }); - if (fastReadResult == EFastFaceReadResult::Error) - return EFastFaceReadResult::Error; - if (!fallbackToGeneric) { - StartPointer = reinterpret_cast(const_cast(ptr)); - _faceCount += element.Count; - return EFastFaceReadResult::Success; } - _outIndices.resize(oldSize); - _maxIndex = oldMaxIndex; + StartPointer = reinterpret_cast(const_cast(ptr)); + _faceCount += element.Count; + return EFastFaceReadResult::Success; } if (element.Count > (std::numeric_limits::max() / 3u)) return EFastFaceReadResult::Error; @@ -993,7 +1237,10 @@ struct Parse if (_outIndices.size() > (std::numeric_limits::max() - reserveCount)) return EFastFaceReadResult::Error; - _outIndices.reserve(_outIndices.size() + reserveCount); + const size_t oldSize = _outIndices.size(); + _outIndices.resize(oldSize + reserveCount); + uint32_t* out = _outIndices.data() + oldSize; + size_t written = 0ull; auto ensureBytes = [this](const size_t bytes) -> bool { if (StartPointer + bytes > EndPointer) fillBuffer(); @@ -1046,7 +1293,71 @@ struct Parse StartPointer += sizeof(uint16_t); return true; }; + auto readPackedU32 = [needEndianSwap](const uint8_t* src) -> uint32_t { + uint32_t value = 0u; + std::memcpy(&value, src, sizeof(value)); + if (needEndianSwap) + value = Binary::byteswap(value); + return value; + }; + auto readPackedU16 = [needEndianSwap](const uint8_t* src) -> uint32_t { + uint16_t value = 0u; + std::memcpy(&value, src, sizeof(value)); + if (needEndianSwap) + value = Binary::byteswap(value); + return value; + }; for (size_t j = 0u; j < element.Count; ++j) { + if (is32Bit && ensureBytes(sizeof(uint8_t) + sizeof(uint32_t) * 3ull) && static_cast(*StartPointer) == 3u) + { + ++StartPointer; + const uint32_t i0 = readPackedU32(reinterpret_cast(StartPointer) + 0ull * sizeof(uint32_t)); + const uint32_t i1 = readPackedU32(reinterpret_cast(StartPointer) + 1ull * sizeof(uint32_t)); + const uint32_t i2 = readPackedU32(reinterpret_cast(StartPointer) + 2ull * sizeof(uint32_t)); + StartPointer += 3ull * sizeof(uint32_t); + if (isSrcS32 && ((i0 | i1 | i2) & 0x80000000u)) + return EFastFaceReadResult::Error; + if (trackMaxIndex) + { + if (i0 > _maxIndex) _maxIndex = i0; + if (i1 > _maxIndex) _maxIndex = i1; + if (i2 > _maxIndex) _maxIndex = i2; + } + else if (i0 >= vertexCount || i1 >= vertexCount || i2 >= vertexCount) + return EFastFaceReadResult::Error; + out[0] = i0; + out[1] = i1; + out[2] = i2; + out += 3u; + written += 3ull; + ++_faceCount; + continue; + } + if (!is32Bit && ensureBytes(sizeof(uint8_t) + sizeof(uint16_t) * 3ull) && static_cast(*StartPointer) == 3u) + { + ++StartPointer; + const uint32_t i0 = readPackedU16(reinterpret_cast(StartPointer) + 0ull * sizeof(uint16_t)); + const uint32_t i1 = readPackedU16(reinterpret_cast(StartPointer) + 1ull * sizeof(uint16_t)); + const uint32_t i2 = readPackedU16(reinterpret_cast(StartPointer) + 2ull * sizeof(uint16_t)); + StartPointer += 3ull * sizeof(uint16_t); + if (isSrcS16 && ((i0 | i1 | i2) & 0x8000u)) + return EFastFaceReadResult::Error; + if (trackMaxIndex) + { + if (i0 > _maxIndex) _maxIndex = i0; + if (i1 > _maxIndex) _maxIndex = i1; + if (i2 > _maxIndex) _maxIndex = i2; + } + else if (i0 >= vertexCount || i1 >= vertexCount || i2 >= vertexCount) + return EFastFaceReadResult::Error; + out[0] = i0; + out[1] = i1; + out[2] = i2; + out += 3u; + written += 3ull; + ++_faceCount; + continue; + } int32_t countSigned = 0; if (!readCount(countSigned)) return EFastFaceReadResult::Error; @@ -1071,9 +1382,11 @@ struct Parse i2 >= vertexCount) { return EFastFaceReadResult::Error; } - _outIndices.push_back(i0); - _outIndices.push_back(i1); - _outIndices.push_back(i2); + out[0] = i0; + out[1] = i1; + out[2] = i2; + out += 3u; + written += 3ull; uint32_t prev = i2; for (uint32_t k = 3u; k < count; ++k) { uint32_t idx = 0u; @@ -1084,13 +1397,22 @@ struct Parse } else if (idx >= vertexCount) { return EFastFaceReadResult::Error; } - _outIndices.push_back(i0); - _outIndices.push_back(prev); - _outIndices.push_back(idx); + if (_outIndices.size() < oldSize + written + 3ull) + { + const size_t outOffset = static_cast(out - _outIndices.data()); + _outIndices.resize(oldSize + written + 3ull); + out = _outIndices.data() + outOffset; + } + out[0] = i0; + out[1] = prev; + out[2] = idx; + out += 3u; + written += 3ull; prev = idx; } ++_faceCount; } + _outIndices.resize(oldSize + written); return EFastFaceReadResult::Success; } IAssetLoader::SAssetLoadContext inner; @@ -1153,6 +1475,7 @@ SAssetBundle CPLYMeshFileLoader::loadAsset( system::IFile* _file, const IAssetLoader::SAssetLoadParams& _params, IAssetLoader::IAssetLoaderOverride* _override, uint32_t _hierarchyLevel) { using namespace nbl::core; + using clock_t = std::chrono::high_resolution_clock; if (!_file) return {}; const bool computeContentHashes = !_params.loaderFlags.hasAnyFlag( @@ -1188,6 +1511,7 @@ SAssetBundle CPLYMeshFileLoader::loadAsset( hlsl::shapes::util::AABBAccumulator3 parsedAABB = hlsl::shapes::util::createAABBAccumulator(); uint32_t vertCount = 0; Parse::ContentHashBuild contentHashBuild = Parse::ContentHashBuild::create(computeContentHashes, hashInBuild); + double headerMs = 0.0, vertexMs = 0.0, faceMs = 0.0, finalizeMs = 0.0; auto visitVertexAttributeViews = [&](auto&& visitor) -> void { visitor(geometry->getPositionView()); visitor(geometry->getNormalView()); @@ -1235,6 +1559,7 @@ SAssetBundle CPLYMeshFileLoader::loadAsset( bool continueReading = true; ctx.IsBinaryFile = false; ctx.IsWrongEndian = false; + const auto headerStart = clock_t::now(); do { const std::string_view wordView = Parse::toStringView(word); if (wordView == "property") { @@ -1339,6 +1664,7 @@ SAssetBundle CPLYMeshFileLoader::loadAsset( word = ctx.getNextWord(); } } while (readingHeader && continueReading); + headerMs = std::chrono::duration(clock_t::now() - headerStart).count(); if (!continueReading) return {}; // now to read the actual data from the file @@ -1384,6 +1710,7 @@ SAssetBundle CPLYMeshFileLoader::loadAsset( for (uint32_t i = 0; i < ctx.ElementList.size(); ++i) { auto& el = ctx.ElementList[i]; if (el.Name == "vertex") { + const auto vertexStart = clock_t::now(); if (verticesProcessed) { // multiple vertex elements are currently treated as unsupported _params.logger.log("Multiple `vertex` elements not supported!", @@ -1471,9 +1798,12 @@ SAssetBundle CPLYMeshFileLoader::loadAsset( visitVertexAttributeViews(hashViewBufferIfNeeded); tryLaunchDeferredHash(geometry->getPositionView()); verticesProcessed = true; + vertexMs += std::chrono::duration(clock_t::now() - vertexStart).count(); } else if (el.Name == "face") { + const auto faceStart = clock_t::now(); if (!readFaceElement(el)) return {}; + faceMs += std::chrono::duration(clock_t::now() - faceStart).count(); } else { if (!skipUnknownElement(el)) return {}; @@ -1520,6 +1850,7 @@ SAssetBundle CPLYMeshFileLoader::loadAsset( hashViewBufferIfNeeded(geometry->getIndexView()); } } + const auto finalizeStart = clock_t::now(); if (contentHashBuild.hashesDeferred()) { contentHashBuild.wait(); SPolygonGeometryContentHash::computeMissing(geometry.get(), @@ -1527,6 +1858,7 @@ SAssetBundle CPLYMeshFileLoader::loadAsset( } else { hashRemainingGeometryBuffers(); } + finalizeMs = std::chrono::duration(clock_t::now() - finalizeStart).count(); const uint64_t ioMinRead = ctx.readCallCount ? ctx.readMinBytes : 0ull; const uint64_t ioAvgRead = ctx.readCallCount ? (ctx.readBytesTotal / ctx.readCallCount) : 0ull; @@ -1550,6 +1882,7 @@ SAssetBundle CPLYMeshFileLoader::loadAsset( system::to_string(_params.ioPolicy.strategy).c_str(), system::to_string(loadSession.ioPlan.strategy).c_str(), static_cast(loadSession.ioPlan.chunkSizeBytes()), loadSession.ioPlan.reason); + _params.logger.log("PLY loader stages: file=%s header=%.3f ms vertex=%.3f ms face=%.3f ms finalize=%.3f ms", system::ILogger::ELL_PERFORMANCE, _file->getFileName().string().c_str(), headerMs, vertexMs, faceMs, finalizeMs); auto meta = core::make_smart_refctd_ptr(); return SAssetBundle(std::move(meta), {std::move(geometry)}); } diff --git a/src/nbl/asset/interchange/CPLYMeshWriter.cpp b/src/nbl/asset/interchange/CPLYMeshWriter.cpp index dd39637458..24d6aa9967 100644 --- a/src/nbl/asset/interchange/CPLYMeshWriter.cpp +++ b/src/nbl/asset/interchange/CPLYMeshWriter.cpp @@ -4,16 +4,15 @@ // For conditions of distribution and use, see copyright notice in nabla.h // See the original file in irrlicht source for authors #include "CPLYMeshWriter.h" -#include "nbl/asset/interchange/SGeometryViewDecode.h" #include "nbl/asset/interchange/SGeometryWriterCommon.h" #include "nbl/asset/interchange/SInterchangeIO.h" -#include "impl/SBinaryData.h" #include "impl/SFileAccess.h" #include "nbl/system/IFile.h" #include #include #include #include +#include #include #include #include @@ -46,13 +45,10 @@ namespace struct Parse { static constexpr uint32_t UV0 = 0u; - using Binary = impl::BinaryData; - using SemanticDecode = SGeometryViewDecode::Prepared; - using StoredDecode = SGeometryViewDecode::Prepared; enum class ScalarType : uint8_t { Int8, UInt8, Int16, UInt16, Int32, UInt32, Float32, Float64 }; struct ScalarMeta { const char* name = "float32"; uint32_t byteSize = sizeof(float); bool integer = false; bool signedType = true; }; struct ExtraAuxView { const ICPUPolygonGeometry::SDataView* view = nullptr; uint32_t components = 0u; uint32_t auxIndex = 0u; ScalarType scalarType = ScalarType::Float32; }; - struct WriteInput { const ICPUPolygonGeometry* geom = nullptr; ScalarType positionScalarType = ScalarType::Float32; const ICPUPolygonGeometry::SDataView* uvView = nullptr; ScalarType uvScalarType = ScalarType::Float32; const core::vector* extraAuxViews = nullptr; bool writeNormals = false; ScalarType normalScalarType = ScalarType::Float32; size_t vertexCount = 0ull; size_t faceCount = 0ull; bool write16BitIndices = false; bool flipVectors = false; }; + struct WriteInput { const ICPUPolygonGeometry* geom = nullptr; ScalarType positionScalarType = ScalarType::Float32; const ICPUPolygonGeometry::SDataView* uvView = nullptr; ScalarType uvScalarType = ScalarType::Float32; const core::vector* extraAuxViews = nullptr; bool writeNormals = false; ScalarType normalScalarType = ScalarType::Float32; size_t vertexCount = 0ull; const uint32_t* indices = nullptr; size_t faceCount = 0ull; bool write16BitIndices = false; bool flipVectors = false; }; static constexpr size_t ApproxTextBytesPerVertex = sizeof("0.000000 0.000000 0.000000 0.000000 0.000000 0.000000\n") - 1ull; static constexpr size_t ApproxTextBytesPerFace = sizeof("3 4294967295 4294967295 4294967295\n") - 1ull; static constexpr size_t MaxFloatTextChars = std::numeric_limits::max_digits10 + 16ull; @@ -133,150 +129,382 @@ struct Parse return bytesPerChannel >= 8u ? ScalarType::Float64 : ScalarType::Float32; return ScalarType::Float32; } - struct BinarySink + static bool decodeVec4(const ICPUPolygonGeometry::SDataView& view, const size_t ix, hlsl::float64_t4& out) { - uint8_t* cursor = nullptr; - template - inline bool append(const T value) { if (!cursor) return false; Binary::storeUnalignedAdvance(cursor, value); return true; } - inline bool finishVertex() { return true; } - }; - struct TextSink + out = hlsl::float64_t4(0.0, 0.0, 0.0, 0.0); + if (!view.composed.isFormatted()) + return false; + const void* src = view.getPointer(ix); + if (!src) + return false; + const void* srcArr[4] = {src, nullptr, nullptr, nullptr}; + double tmp[4] = {}; + if (!decodePixels(view.composed.format, srcArr, tmp, 0u, 0u)) + return false; + const uint32_t channels = std::min(4u, getFormatChannelCount(view.composed.format)); + if (isNormalizedFormat(view.composed.format)) + { + const auto range = view.composed.getRange>(); + for (uint32_t i = 0u; i < channels; ++i) + (&out.x)[i] = tmp[i] * (range.maxVx[i] - range.minVx[i]) + range.minVx[i]; + } + else + { + for (uint32_t i = 0u; i < channels; ++i) + (&out.x)[i] = tmp[i]; + } + return true; + } + static bool decodeSigned4Raw(const ICPUPolygonGeometry::SDataView& view, const size_t ix, int64_t (&out)[4]) { - std::string& output; - template - inline bool append(const T value) + const void* src = view.getPointer(ix); + if (!src) + return false; + const void* srcArr[4] = {src, nullptr, nullptr, nullptr}; + return decodePixels(view.composed.format, srcArr, out, 0u, 0u); + } + static bool decodeUnsigned4Raw(const ICPUPolygonGeometry::SDataView& view, const size_t ix, uint64_t (&out)[4]) + { + const void* src = view.getPointer(ix); + if (!src) + return false; + const void* srcArr[4] = {src, nullptr, nullptr, nullptr}; + return decodePixels(view.composed.format, srcArr, out, 0u, 0u); + } + static bool isDirectScalarFormat(const E_FORMAT format, const ScalarType scalarType, const uint32_t componentCount, uint32_t& outByteSize) + { + outByteSize = 0u; + if (format == EF_UNKNOWN || componentCount == 0u) + return false; + if (isNormalizedFormat(format) || isScaledFormat(format)) + return false; + const uint32_t channels = getFormatChannelCount(format); + if (channels < componentCount) + return false; + const auto bytesPerPixel = getBytesPerPixel(format); + if (bytesPerPixel.getDenominator() != 1u) + return false; + const uint32_t pixelBytes = bytesPerPixel.getNumerator(); + if (pixelBytes == 0u || (pixelBytes % channels) != 0u) + return false; + const uint32_t byteSize = pixelBytes / channels; + const auto meta = getScalarMeta(scalarType); + if (byteSize != meta.byteSize) + return false; + switch (scalarType) { - if constexpr (std::is_floating_point_v) appendFloat(output, static_cast(value)); - else appendIntegral(output, value); - output.push_back(' '); + case ScalarType::Float32: + case ScalarType::Float64: + if (!isFloatingPointFormat(format)) + return false; + break; + case ScalarType::Int8: + case ScalarType::Int16: + case ScalarType::Int32: + if (!isIntegerFormat(format) || !isSignedFormat(format)) + return false; + break; + case ScalarType::UInt8: + case ScalarType::UInt16: + case ScalarType::UInt32: + if (!isIntegerFormat(format) || isSignedFormat(format)) + return false; + break; + } + outByteSize = byteSize; + return true; + } + static bool writeDirectBinaryView(const ICPUPolygonGeometry::SDataView& view, const size_t ix, const uint32_t componentCount, const ScalarType scalarType, const bool flipVectors, uint8_t*& dst) + { + if (flipVectors || !dst || !view.composed.isFormatted()) + return false; + uint32_t byteSize = 0u; + if (!isDirectScalarFormat(view.composed.format, scalarType, componentCount, byteSize)) + return false; + const uint32_t pixelBytes = getBytesPerPixel(view.composed.format).getNumerator(); + if (view.composed.getStride() != pixelBytes) + return false; + const void* src = view.getPointer(ix); + if (!src) + return false; + const size_t copyBytes = static_cast(componentCount) * byteSize; + std::memcpy(dst, src, copyBytes); + dst += copyBytes; + return true; + } + static bool writeTypedViewBinary(const ICPUPolygonGeometry::SDataView& view, const size_t ix, const uint32_t componentCount, const ScalarType scalarType, const bool flipVectors, uint8_t*& dst) + { + if (!dst) + return false; + if (writeDirectBinaryView(view, ix, componentCount, scalarType, flipVectors, dst)) return true; + switch (scalarType) + { + case ScalarType::Float64: + case ScalarType::Float32: + { + hlsl::float64_t4 tmp = {}; + if (!decodeVec4(view, ix, tmp)) + return false; + for (uint32_t c = 0u; c < componentCount; ++c) + { + double value = (&tmp.x)[c]; + if (flipVectors && c == 0u) + value = -value; + if (scalarType == ScalarType::Float64) + { + std::memcpy(dst, &value, sizeof(value)); + dst += sizeof(value); + } + else + { + const float typed = static_cast(value); + std::memcpy(dst, &typed, sizeof(typed)); + dst += sizeof(typed); + } + } + return true; + } + case ScalarType::Int8: + case ScalarType::Int16: + case ScalarType::Int32: + { + int64_t tmp[4] = {}; + if (!decodeSigned4Raw(view, ix, tmp)) + return false; + for (uint32_t c = 0u; c < componentCount; ++c) + { + int64_t value = tmp[c]; + if (flipVectors && c == 0u) + value = -value; + switch (scalarType) + { + case ScalarType::Int8: + { + const int8_t typed = static_cast(value); + std::memcpy(dst, &typed, sizeof(typed)); + dst += sizeof(typed); + } break; + case ScalarType::Int16: + { + const int16_t typed = static_cast(value); + std::memcpy(dst, &typed, sizeof(typed)); + dst += sizeof(typed); + } break; + default: + { + const int32_t typed = static_cast(value); + std::memcpy(dst, &typed, sizeof(typed)); + dst += sizeof(typed); + } break; + } + } + return true; + } + case ScalarType::UInt8: + case ScalarType::UInt16: + case ScalarType::UInt32: + { + uint64_t tmp[4] = {}; + if (!decodeUnsigned4Raw(view, ix, tmp)) + return false; + for (uint32_t c = 0u; c < componentCount; ++c) + { + switch (scalarType) + { + case ScalarType::UInt8: + { + const uint8_t typed = static_cast(tmp[c]); + std::memcpy(dst, &typed, sizeof(typed)); + dst += sizeof(typed); + } break; + case ScalarType::UInt16: + { + const uint16_t typed = static_cast(tmp[c]); + std::memcpy(dst, &typed, sizeof(typed)); + dst += sizeof(typed); + } break; + default: + { + const uint32_t typed = static_cast(tmp[c]); + std::memcpy(dst, &typed, sizeof(typed)); + dst += sizeof(typed); + } break; + } + } + return true; + } } - inline bool finishVertex() { output.push_back('\n'); return true; } - }; - template - struct PreparedView + return false; + } + static bool writeTypedViewText(std::string& output, const ICPUPolygonGeometry::SDataView& view, const size_t ix, const uint32_t componentCount, const ScalarType scalarType, const bool flipVectors) { - using EmitFn = bool(*)(Sink&, const PreparedView&, size_t); - uint32_t components = 0u; - bool flipVectors = false; - SemanticDecode semantic = {}; - StoredDecode stored = {}; - EmitFn emit = nullptr; - inline explicit operator bool() const { return emit != nullptr && (static_cast(semantic) || static_cast(stored)); } - inline bool operator()(Sink& sink, const size_t ix) const { return static_cast(*this) && emit(sink, *this, ix); } - template - static bool emitDecode(Sink& sink, const auto& decode, const size_t ix, const uint32_t components, const bool flipVectors) + switch (scalarType) { - std::array decoded = {}; - if (!decode.decode(ix, decoded)) - return false; - for (uint32_t c = 0u; c < components; ++c) + case ScalarType::Float64: + case ScalarType::Float32: { - OutT value = decoded[c]; - if constexpr (std::is_signed_v || std::is_floating_point_v) + hlsl::float64_t4 tmp = {}; + if (!decodeVec4(view, ix, tmp)) + return false; + for (uint32_t c = 0u; c < componentCount; ++c) { + double value = (&tmp.x)[c]; if (flipVectors && c == 0u) value = -value; + appendFloat(output, value); + output.push_back(' '); } - if (!sink.append(value)) + return true; + } + case ScalarType::Int8: + case ScalarType::Int16: + case ScalarType::Int32: + { + int64_t tmp[4] = {}; + if (!decodeSigned4Raw(view, ix, tmp)) return false; + for (uint32_t c = 0u; c < componentCount; ++c) + { + int64_t value = tmp[c]; + if (flipVectors && c == 0u) + value = -value; + appendIntegral(output, value); + output.push_back(' '); + } + return true; + } + case ScalarType::UInt8: + case ScalarType::UInt16: + case ScalarType::UInt32: + { + uint64_t tmp[4] = {}; + if (!decodeUnsigned4Raw(view, ix, tmp)) + return false; + for (uint32_t c = 0u; c < componentCount; ++c) + { + appendIntegral(output, tmp[c]); + output.push_back(' '); + } + return true; } - return true; } - template - static bool emitPrepared(Sink& sink, const PreparedView& view, const size_t ix) { if constexpr (Mode == SGeometryViewDecode::EMode::Semantic) return emitDecode(sink, view.semantic, ix, view.components, view.flipVectors); return emitDecode(sink, view.stored, ix, view.components, view.flipVectors); } - template - static inline void prepareDecode(PreparedView& view, const ICPUPolygonGeometry::SDataView& src, const bool flipVectors) { view.flipVectors = flipVectors; if constexpr (Mode == SGeometryViewDecode::EMode::Semantic) view.semantic = SGeometryViewDecode::prepare(src); else view.stored = SGeometryViewDecode::prepare(src); view.emit = &emitPrepared; } - static PreparedView create(const ICPUPolygonGeometry::SDataView* view, const uint32_t components, const ScalarType scalarType, const bool flipVectors) + return false; + } + static bool writeBinaryFast(const WriteInput& input, uint8_t*& dst) + { + if (!input.geom || !input.indices || !input.extraAuxViews || !dst || input.flipVectors || input.writeNormals || input.uvView || !input.extraAuxViews->empty() || input.positionScalarType != ScalarType::Float32) + return false; + const auto& positionView = input.geom->getPositionView(); + if (!positionView.composed.isFormatted() || positionView.composed.format != EF_R32G32B32_SFLOAT || positionView.composed.getStride() != sizeof(hlsl::float32_t3)) + return false; + const void* src = positionView.getPointer(); + if (!src) + return false; + const size_t vertexBytes = input.vertexCount * sizeof(hlsl::float32_t3); + std::memcpy(dst, src, vertexBytes); + dst += vertexBytes; + for (size_t i = 0u; i < input.faceCount; ++i) { - PreparedView retval = {.components = components}; - if (!view) - return retval; - switch (scalarType) + *dst++ = 3u; + const uint32_t* tri = input.indices + i * 3u; + if (input.write16BitIndices) + { + const uint16_t tri16[3] = {static_cast(tri[0]), static_cast(tri[1]), static_cast(tri[2])}; + std::memcpy(dst, tri16, sizeof(tri16)); + dst += sizeof(tri16); + } + else { - case ScalarType::Float64: prepareDecode(retval, *view, flipVectors); break; - case ScalarType::Float32: prepareDecode(retval, *view, flipVectors); break; - case ScalarType::Int8: prepareDecode(retval, *view, flipVectors); break; - case ScalarType::UInt8: prepareDecode(retval, *view, false); break; - case ScalarType::Int16: prepareDecode(retval, *view, flipVectors); break; - case ScalarType::UInt16: prepareDecode(retval, *view, false); break; - case ScalarType::Int32: prepareDecode(retval, *view, flipVectors); break; - case ScalarType::UInt32: prepareDecode(retval, *view, false); break; + std::memcpy(dst, tri, sizeof(uint32_t) * 3u); + dst += sizeof(uint32_t) * 3u; } - return retval; } - }; - template - static bool emitVertices(const WriteInput& input, Sink& sink) + return true; + } + static bool writeBinary(const WriteInput& input, uint8_t* dst) { - if (!input.geom || !input.extraAuxViews) + if (!input.geom || !input.extraAuxViews || !dst) return false; + if (writeBinaryFast(input, dst)) + return true; const auto& positionView = input.geom->getPositionView(); const auto& normalView = input.geom->getNormalView(); const auto& extraAuxViews = *input.extraAuxViews; - const PreparedView preparedPosition = PreparedView::create(&positionView, 3u, input.positionScalarType, input.flipVectors); - const PreparedView preparedNormal = input.writeNormals ? PreparedView::create(&normalView, 3u, input.normalScalarType, input.flipVectors) : PreparedView{}; - const PreparedView preparedUV = input.uvView ? PreparedView::create(input.uvView, 2u, input.uvScalarType, false) : PreparedView{}; - core::vector> preparedExtraAuxViews; - preparedExtraAuxViews.reserve(extraAuxViews.size()); - for (const auto& extra : extraAuxViews) - preparedExtraAuxViews.push_back(extra.view ? PreparedView::create(extra.view, extra.components, extra.scalarType, false) : PreparedView{}); for (size_t i = 0u; i < input.vertexCount; ++i) { - if (!preparedPosition(sink, i)) + if (!writeTypedViewBinary(positionView, i, 3u, input.positionScalarType, input.flipVectors, dst)) return false; - if (input.writeNormals && !preparedNormal(sink, i)) + if (input.writeNormals && !writeTypedViewBinary(normalView, i, 3u, input.normalScalarType, input.flipVectors, dst)) return false; - if (input.uvView && !preparedUV(sink, i)) + if (input.uvView && !writeTypedViewBinary(*input.uvView, i, 2u, input.uvScalarType, false, dst)) return false; - for (size_t extraIx = 0u; extraIx < extraAuxViews.size(); ++extraIx) - { - if (!extraAuxViews[extraIx].view || !preparedExtraAuxViews[extraIx](sink, i)) + for (const auto& extra : extraAuxViews) + if (!extra.view || !writeTypedViewBinary(*extra.view, i, extra.components, extra.scalarType, false, dst)) return false; - } - if (!sink.finishVertex()) - return false; } - return true; - } - static bool writeBinary(const WriteInput& input, uint8_t* dst) - { - BinarySink sink = {.cursor = dst}; - if (!emitVertices(input, sink)) + if (!input.indices) return false; - return SGeometryWriterCommon::visitTriangleIndices(input.geom, [&](const uint32_t i0, const uint32_t i1, const uint32_t i2) -> bool { - if (!sink.append(static_cast(3u))) - return false; + for (size_t i = 0u; i < input.faceCount; ++i) + { + const uint8_t listSize = 3u; + *dst++ = listSize; + const uint32_t* tri = input.indices + i * 3u; if (input.write16BitIndices) { - if (!sink.append(static_cast(i0)) || !sink.append(static_cast(i1)) || !sink.append(static_cast(i2))) - return false; + const uint16_t tri16[3] = {static_cast(tri[0]), static_cast(tri[1]), static_cast(tri[2])}; + std::memcpy(dst, tri16, sizeof(tri16)); + dst += sizeof(tri16); } - else if (!sink.append(i0) || !sink.append(i1) || !sink.append(i2)) - return false; - return true; - }); + else + { + std::memcpy(dst, tri, sizeof(uint32_t) * 3u); + dst += sizeof(uint32_t) * 3u; + } + } + return true; } static bool writeText(const WriteInput& input, std::string& output) { - TextSink sink = {.output = output}; - if (!emitVertices(input, sink)) + if (!input.geom || !input.extraAuxViews) + return false; + const auto& positionView = input.geom->getPositionView(); + const auto& normalView = input.geom->getNormalView(); + const auto& extraAuxViews = *input.extraAuxViews; + for (size_t i = 0u; i < input.vertexCount; ++i) + { + if (!writeTypedViewText(output, positionView, i, 3u, input.positionScalarType, input.flipVectors)) + return false; + if (input.writeNormals && !writeTypedViewText(output, normalView, i, 3u, input.normalScalarType, input.flipVectors)) + return false; + if (input.uvView && !writeTypedViewText(output, *input.uvView, i, 2u, input.uvScalarType, false)) + return false; + for (const auto& extra : extraAuxViews) + if (!extra.view || !writeTypedViewText(output, *extra.view, i, extra.components, extra.scalarType, false)) + return false; + output.push_back('\n'); + } + if (!input.indices) return false; - return SGeometryWriterCommon::visitTriangleIndices(input.geom, [&](const uint32_t i0, const uint32_t i1, const uint32_t i2) { + for (size_t i = 0u; i < input.faceCount; ++i) + { + const uint32_t* tri = input.indices + i * 3u; output.append("3 "); - appendIntegral(output, i0); + appendIntegral(output, tri[0]); output.push_back(' '); - appendIntegral(output, i1); + appendIntegral(output, tri[1]); output.push_back(' '); - appendIntegral(output, i2); + appendIntegral(output, tri[2]); output.push_back('\n'); - }); + } + return true; } }; } bool CPLYMeshWriter::writeAsset(system::IFile* _file, const SAssetWriteParams& _params, IAssetWriterOverride* _override) { using ScalarType = Parse::ScalarType; + using clock_t = std::chrono::high_resolution_clock; SFileWriteTelemetry ioTelemetry = {}; if (!_override) getDefaultOverride(_override); @@ -322,14 +550,62 @@ bool CPLYMeshWriter::writeAsset(system::IFile* _file, const SAssetWriteParams& _ const uint32_t components = std::min(4u, channels); extraAuxViews.push_back({&view, components, auxIx, Parse::selectScalarType(view.composed.format)}); } + _params.logger.log("PLY writer input: file=%s pos_fmt=%u pos_stride=%u pos_count=%llu normal_fmt=%u normal_stride=%u normal_count=%llu uv_fmt=%u uv_stride=%u uv_count=%llu aux=%u", + system::ILogger::ELL_INFO, file->getFileName().string().c_str(), static_cast(positionView.composed.format), positionView.composed.getStride(), + static_cast(positionView.getElementCount()), static_cast(normalView.composed.format), normalView.composed.getStride(), + static_cast(normalView.getElementCount()), uvView ? static_cast(uvView->composed.format) : static_cast(EF_UNKNOWN), + uvView ? uvView->composed.getStride() : 0u, uvView ? static_cast(uvView->getElementCount()) : 0ull, static_cast(extraAuxViews.size())); const auto* indexing = geom->getIndexingCallback(); if (!indexing) return _params.logger.log("PLY writer: missing indexing callback.", system::ILogger::ELL_ERROR), false; if (indexing->knownTopology() != E_PRIMITIVE_TOPOLOGY::EPT_TRIANGLE_LIST) return _params.logger.log("PLY writer: only triangle-list topology is supported.", system::ILogger::ELL_ERROR), false; + const auto& indexView = geom->getIndexView(); + core::vector indexData; + const uint32_t* indices = nullptr; size_t faceCount = 0ull; - if (!SGeometryWriterCommon::getTriangleFaceCount(geom, faceCount)) - return _params.logger.log("PLY writer: failed to validate triangle indexing.", system::ILogger::ELL_ERROR), false; + if (indexView) + { + const size_t indexCount = indexView.getElementCount(); + if ((indexCount % 3u) != 0u) + return _params.logger.log("PLY writer: failed to validate triangle indexing.", system::ILogger::ELL_ERROR), false; + const void* src = indexView.getPointer(); + if (!src) + return _params.logger.log("PLY writer: missing index buffer pointer.", system::ILogger::ELL_ERROR), false; + if (indexView.composed.format == EF_R32_UINT && indexView.composed.getStride() == sizeof(uint32_t)) + indices = reinterpret_cast(src); + else if (indexView.composed.format == EF_R16_UINT && indexView.composed.getStride() == sizeof(uint16_t)) + { + const auto* src16 = reinterpret_cast(src); + indexData.resize(indexCount); + for (size_t i = 0u; i < indexCount; ++i) + indexData[i] = src16[i]; + indices = indexData.data(); + } + else + { + indexData.resize(indexCount); + for (size_t i = 0u; i < indexCount; ++i) + { + hlsl::uint32_t4 decoded = {}; + if (!indexView.decodeElement(i, decoded)) + return _params.logger.log("PLY writer: failed to decode index view.", system::ILogger::ELL_ERROR), false; + indexData[i] = decoded.x; + } + indices = indexData.data(); + } + faceCount = indexCount / 3u; + } + else + { + if ((vertexCount % 3u) != 0u) + return _params.logger.log("PLY writer: failed to derive triangle indexing from positions.", system::ILogger::ELL_ERROR), false; + indexData.resize(vertexCount); + for (size_t i = 0u; i < vertexCount; ++i) + indexData[i] = static_cast(i); + indices = indexData.data(); + faceCount = vertexCount / 3u; + } const auto flags = _override->getAssetWritingFlags(ctx, geom, 0u); const bool binary = flags.hasAnyFlag(E_WRITER_FLAGS::EWF_BINARY); const bool flipVectors = !flags.hasAnyFlag(E_WRITER_FLAGS::EWF_MESH_IS_RIGHT_HANDED); @@ -381,9 +657,10 @@ bool CPLYMeshWriter::writeAsset(system::IFile* _file, const SAssetWriteParams& _ headerBuilder << (write16BitIndices ? "\nproperty list uchar uint16 vertex_indices\n" : "\nproperty list uchar uint32 vertex_indices\n"); headerBuilder << "end_header\n"; const std::string header = headerBuilder.str(); - const Parse::WriteInput input = {.geom = geom, .positionScalarType = positionScalarType, .uvView = uvView, .uvScalarType = uvScalarType, .extraAuxViews = &extraAuxViews, .writeNormals = writeNormals, .normalScalarType = normalScalarType, .vertexCount = vertexCount, .faceCount = faceCount, .write16BitIndices = write16BitIndices, .flipVectors = flipVectors}; + const Parse::WriteInput input = {.geom = geom, .positionScalarType = positionScalarType, .uvView = uvView, .uvScalarType = uvScalarType, .extraAuxViews = &extraAuxViews, .writeNormals = writeNormals, .normalScalarType = normalScalarType, .vertexCount = vertexCount, .indices = indices, .faceCount = faceCount, .write16BitIndices = write16BitIndices, .flipVectors = flipVectors}; bool writeOk = false; size_t outputBytes = 0ull; + double writeIoMs = 0.0; auto writePayload = [&](const void* bodyData, const size_t bodySize) -> bool { const size_t outputSize = header.size() + bodySize; const auto ioPlan = impl::SFileAccess::resolvePlan(_params.ioPolicy, static_cast(outputSize), true, file); @@ -391,7 +668,9 @@ bool CPLYMeshWriter::writeAsset(system::IFile* _file, const SAssetWriteParams& _ return false; outputBytes = outputSize; const SInterchangeIO::SBufferRange writeBuffers[] = {{.data = header.data(), .byteCount = header.size()}, {.data = bodyData, .byteCount = bodySize}}; + const auto ioStart = clock_t::now(); writeOk = SInterchangeIO::writeBuffersWithPolicy(file, ioPlan, writeBuffers, &ioTelemetry); + writeIoMs = std::chrono::duration(clock_t::now() - ioStart).count(); const uint64_t ioMinWrite = ioTelemetry.getMinOrZero(); const uint64_t ioAvgWrite = ioTelemetry.getAvgOrZero(); impl::SFileAccess::logTinyIO(_params.logger, "PLY writer", file->getFileName().string().c_str(), ioTelemetry, static_cast(outputBytes), _params.ioPolicy, "writes"); @@ -408,16 +687,24 @@ bool CPLYMeshWriter::writeAsset(system::IFile* _file, const SAssetWriteParams& _ const size_t faceStride = sizeof(uint8_t) + (write16BitIndices ? sizeof(uint16_t) : sizeof(uint32_t)) * 3u; const size_t bodySize = vertexCount * vertexStride + faceCount * faceStride; core::vector body; + const auto fillStart = clock_t::now(); body.resize(bodySize); if (!Parse::writeBinary(input, body.data())) return _params.logger.log("PLY writer: binary payload generation failed.", system::ILogger::ELL_ERROR), false; - return writePayload(body.data(), body.size()); + const auto fillMs = std::chrono::duration(clock_t::now() - fillStart).count(); + const bool ok = writePayload(body.data(), body.size()); + _params.logger.log("PLY writer stages: file=%s header=%llu body=%llu fill=%.3f ms io=%.3f ms", system::ILogger::ELL_PERFORMANCE, file->getFileName().string().c_str(), static_cast(header.size()), static_cast(body.size()), fillMs, writeIoMs); + return ok; } std::string body; body.reserve(vertexCount * Parse::ApproxTextBytesPerVertex + faceCount * Parse::ApproxTextBytesPerFace); + const auto fillStart = clock_t::now(); if (!Parse::writeText(input, body)) return _params.logger.log("PLY writer: text payload generation failed.", system::ILogger::ELL_ERROR), false; - return writePayload(body.data(), body.size()); + const auto fillMs = std::chrono::duration(clock_t::now() - fillStart).count(); + const bool ok = writePayload(body.data(), body.size()); + _params.logger.log("PLY writer stages: file=%s header=%llu body=%llu fill=%.3f ms io=%.3f ms", system::ILogger::ELL_PERFORMANCE, file->getFileName().string().c_str(), static_cast(header.size()), static_cast(body.size()), fillMs, writeIoMs); + return ok; } } #endif // _NBL_COMPILE_WITH_PLY_WRITER_ diff --git a/src/nbl/asset/interchange/CSTLMeshFileLoader.cpp b/src/nbl/asset/interchange/CSTLMeshFileLoader.cpp index 1ccf55e5ca..66ed992575 100644 --- a/src/nbl/asset/interchange/CSTLMeshFileLoader.cpp +++ b/src/nbl/asset/interchange/CSTLMeshFileLoader.cpp @@ -84,7 +84,12 @@ struct Parse public: inline AsciiParser(const char* begin, const char* end) : m_cursor(begin), m_end(end) {} inline std::optional readToken() { return Common::readToken(m_cursor, m_end); } - inline std::optional readFloat() { float value = 0.f; return Common::parseNumber(m_cursor, m_end, value) ? std::optional(value) : std::nullopt; } + inline std::optional readFloat() + { + Common::skipWhitespace(m_cursor, m_end); + float value = 0.f; + return Common::parseNumber(m_cursor, m_end, value) ? std::optional(value) : std::nullopt; + } inline std::optional readVec3() { const auto x = readFloat(), y = readFloat(), z = readFloat(); diff --git a/src/nbl/asset/interchange/CSTLMeshWriter.cpp b/src/nbl/asset/interchange/CSTLMeshWriter.cpp index e93be03ed5..37c7ed6b82 100644 --- a/src/nbl/asset/interchange/CSTLMeshWriter.cpp +++ b/src/nbl/asset/interchange/CSTLMeshWriter.cpp @@ -285,6 +285,7 @@ struct Parse const hlsl::float32_t3* const tightPositions = SGeometryWriterCommon::getTightView(posView); const hlsl::float32_t3* const tightNormals = hasNormals ? SGeometryWriterCommon::getTightView(normalView) : nullptr; const bool hasFastTightPath = !geom->getIndexView() && tightPositions && (!hasNormals || tightNormals); + const float handednessSign = flipHandedness ? -1.f : 1.f; auto decodePosition = [&](const uint32_t ix, hlsl::float32_t3& out) -> bool { return tightPositions ? (out = tightPositions[ix], true) : posView.decodeElement(ix, out); }; auto decodeNormal = [&](const uint32_t ix, hlsl::float32_t3& out) -> bool { return hasNormals && (tightNormals ? (out = tightNormals[ix], true) : normalView.decodeElement(ix, out)); }; auto computeFaceColor = [&](const hlsl::uint32_t3& idx, uint16_t& outColor) -> bool { @@ -329,17 +330,72 @@ struct Parse writeRecord(triangle.normal, triangle.vertex1, triangle.vertex2, triangle.vertex3, faceColor); return true; }; - if (hasFastTightPath) + if (hasFastTightPath && hasNormals) { const hlsl::float32_t3* posTri = tightPositions; const hlsl::float32_t3* nrmTri = tightNormals; + bool allFastNormalsNonZero = true; + for (size_t i = 0ull, normalCount = static_cast(facenum) * 3ull; i < normalCount; ++i) + { + const auto& n = tightNormals[i]; + if (n.x == 0.f && n.y == 0.f && n.z == 0.f) + { + allFastNormalsNonZero = false; + break; + } + } + for (uint32_t primIx = 0u; primIx < facenum; ++primIx, posTri += 3u, nrmTri += 3u) + { + const hlsl::uint32_t3 idx(primIx * 3u + 0u, primIx * 3u + 1u, primIx * 3u + 2u); + uint16_t faceColor = 0u; + if (!computeFaceColor(idx, faceColor)) + return false; + hlsl::float32_t3 vertex1 = posTri[2u]; + hlsl::float32_t3 vertex2 = posTri[1u]; + hlsl::float32_t3 vertex3 = posTri[0u]; + vertex1.x *= handednessSign; + vertex2.x *= handednessSign; + vertex3.x *= handednessSign; + hlsl::float32_t3 normal = {}; + if (allFastNormalsNonZero) + { + normal = nrmTri[0u]; + if (flipHandedness) + normal.x = -normal.x; + } + else if (selectFirstValidNormal(nrmTri, 3u, normal)) + { + if (flipHandedness) + normal.x = -normal.x; + } + else + { + float planeNormalLen2 = 0.f; + const hlsl::float32_t3 planeNormal = computePlaneNormal(vertex1, vertex2, vertex3, &planeNormalLen2); + normal = planeNormalLen2 > 0.f ? hlsl::normalize(planeNormal) : hlsl::float32_t3(0.f, 0.f, 0.f); + } + writeRecord(normal, vertex1, vertex2, vertex3, faceColor); + } + } + else if (hasFastTightPath) + { + const hlsl::float32_t3* posTri = tightPositions; for (uint32_t primIx = 0u; primIx < facenum; ++primIx, posTri += 3u) { const hlsl::uint32_t3 idx(primIx * 3u + 0u, primIx * 3u + 1u, primIx * 3u + 2u); - if (!emitTriangle(posTri[0u], posTri[1u], posTri[2u], idx, nrmTri, hasNormals ? 3u : 0u, false)) + uint16_t faceColor = 0u; + if (!computeFaceColor(idx, faceColor)) return false; - if (nrmTri) - nrmTri += 3u; + hlsl::float32_t3 vertex1 = posTri[2u]; + hlsl::float32_t3 vertex2 = posTri[1u]; + hlsl::float32_t3 vertex3 = posTri[0u]; + vertex1.x *= handednessSign; + vertex2.x *= handednessSign; + vertex3.x *= handednessSign; + float planeNormalLen2 = 0.f; + const hlsl::float32_t3 planeNormal = computePlaneNormal(vertex1, vertex2, vertex3, &planeNormalLen2); + const hlsl::float32_t3 normal = planeNormalLen2 > 0.f ? hlsl::normalize(planeNormal) : hlsl::float32_t3(0.f, 0.f, 0.f); + writeRecord(normal, vertex1, vertex2, vertex3, faceColor); } } else if (!SGeometryWriterCommon::visitTriangleIndices(geom, [&](const uint32_t i0, const uint32_t i1, const uint32_t i2) -> bool { From 618681d4300c4d753a038817c0eb822ea9bd4c32 Mon Sep 17 00:00:00 2001 From: Arkadiusz Lachowicz Date: Sun, 8 Mar 2026 22:30:32 +0100 Subject: [PATCH 091/118] Share OBJ perf parsing helpers --- .../asset/interchange/COBJMeshFileLoader.cpp | 164 +----------------- src/nbl/asset/interchange/impl/STextParse.h | 96 ++++++++++ 2 files changed, 103 insertions(+), 157 deletions(-) diff --git a/src/nbl/asset/interchange/COBJMeshFileLoader.cpp b/src/nbl/asset/interchange/COBJMeshFileLoader.cpp index 6f945e6878..0ea69ee44c 100644 --- a/src/nbl/asset/interchange/COBJMeshFileLoader.cpp +++ b/src/nbl/asset/interchange/COBJMeshFileLoader.cpp @@ -19,8 +19,6 @@ #include #include #include -#include -#include #include namespace nbl::asset { @@ -31,154 +29,6 @@ struct Parse static constexpr uint32_t UV0 = 0u; using Common = impl::TextParse; struct VertexDedupNode { int32_t uv = -1; int32_t normal = -1; uint32_t smoothingGroup = 0u; uint32_t outIndex = 0u; int32_t next = -1; }; - static inline bool isDigit(const char c) { return c >= '0' && c <= '9'; } - static bool parseFloat(const char*& ptr, const char* const end, float& out) - { - const char* const start = ptr; - if (start >= end) - return false; - const char* p = start; - bool negative = false; - if (*p == '-' || *p == '+') - { - negative = (*p == '-'); - ++p; - if (p >= end) - return false; - } - if (*p == '.' || !isDigit(*p)) - { - const auto parseResult = fast_float::from_chars(start, end, out); - if (parseResult.ec == std::errc() && parseResult.ptr != start) - { - ptr = parseResult.ptr; - return true; - } - return false; - } - uint64_t integerPart = 0ull; - while (p < end && isDigit(*p)) - { - integerPart = integerPart * 10ull + static_cast(*p - '0'); - ++p; - } - double value = static_cast(integerPart); - if (p < end && *p == '.') - { - const char* const dot = p; - if ((dot + 7) <= end) - { - const char d0 = dot[1]; - const char d1 = dot[2]; - const char d2 = dot[3]; - const char d3 = dot[4]; - const char d4 = dot[5]; - const char d5 = dot[6]; - if (isDigit(d0) && isDigit(d1) && isDigit(d2) && isDigit(d3) && isDigit(d4) && isDigit(d5)) - { - const bool hasNext = (dot + 7) < end; - const char next = hasNext ? dot[7] : '\0'; - if ((!hasNext || !isDigit(next)) && (!hasNext || (next != 'e' && next != 'E'))) - { - const uint32_t frac = - static_cast(d0 - '0') * 100000u + - static_cast(d1 - '0') * 10000u + - static_cast(d2 - '0') * 1000u + - static_cast(d3 - '0') * 100u + - static_cast(d4 - '0') * 10u + - static_cast(d5 - '0'); - value += static_cast(frac) * 1e-6; - p = dot + 7; - out = static_cast(negative ? -value : value); - ptr = p; - return true; - } - } - } - static constexpr double InvPow10[] = { - 1.0, - 1e-1, 1e-2, 1e-3, 1e-4, 1e-5, - 1e-6, 1e-7, 1e-8, 1e-9, 1e-10, - 1e-11, 1e-12, 1e-13, 1e-14, 1e-15, - 1e-16, 1e-17, 1e-18 - }; - ++p; - uint64_t fractionPart = 0ull; - uint32_t fractionDigits = 0u; - while (p < end && isDigit(*p)) - { - if (fractionDigits >= (std::size(InvPow10) - 1u)) - { - const auto parseResult = fast_float::from_chars(start, end, out); - if (parseResult.ec == std::errc() && parseResult.ptr != start) - { - ptr = parseResult.ptr; - return true; - } - return false; - } - fractionPart = fractionPart * 10ull + static_cast(*p - '0'); - ++fractionDigits; - ++p; - } - value += static_cast(fractionPart) * InvPow10[fractionDigits]; - } - if (p < end && (*p == 'e' || *p == 'E')) - { - const auto parseResult = fast_float::from_chars(start, end, out); - if (parseResult.ec == std::errc() && parseResult.ptr != start) - { - ptr = parseResult.ptr; - return true; - } - return false; - } - out = static_cast(negative ? -value : value); - ptr = p; - return true; - } - static bool parseUnsignedIndex(const char*& ptr, const char* const end, uint32_t& out) - { - if (ptr >= end || !isDigit(*ptr)) - return false; - uint64_t value = 0ull; - while (ptr < end && isDigit(*ptr)) - { - value = value * 10ull + static_cast(*ptr - '0'); - ++ptr; - } - if (value == 0ull || value > static_cast(std::numeric_limits::max())) - return false; - out = static_cast(value); - return true; - } - static bool parseSignedIndex(const char*& ptr, const char* const end, int32_t& out) - { - if (ptr >= end) - return false; - bool negative = false; - if (*ptr == '-') - { - negative = true; - ++ptr; - } - else if (*ptr == '+') - ++ptr; - if (ptr >= end || !isDigit(*ptr)) - return false; - int64_t value = 0; - while (ptr < end && isDigit(*ptr)) - { - value = value * 10ll + static_cast(*ptr - '0'); - ++ptr; - } - if (negative) - value = -value; - if (value == 0 || value < static_cast(std::numeric_limits::min()) || value > static_cast(std::numeric_limits::max())) - return false; - out = static_cast(value); - return true; - } static bool resolveIndex(const int32_t rawIndex, const size_t elementCount, int32_t& resolved) { if (rawIndex > 0) @@ -229,7 +79,7 @@ struct Parse const char* ptr = lineStart; auto parsePositive = [&](const size_t count, int32_t& outIx) -> bool { uint32_t value = 0u; - if (!parseUnsignedIndex(ptr, lineEnd, value)) + if (!Common::parseNonZeroNumber(ptr, lineEnd, value)) return false; if (value > count) return false; @@ -239,7 +89,7 @@ struct Parse for (uint32_t corner = 0u; corner < 3u; ++corner) { Common::skipInlineWhitespace(ptr, lineEnd); - if (ptr >= lineEnd || !isDigit(*ptr)) + if (ptr >= lineEnd || !Common::isDigit(*ptr)) return false; int32_t posIx = -1; if (!parsePositive(posCount, posIx)) @@ -266,7 +116,7 @@ struct Parse const char* ptr = lineStart; auto parsePositive = [&](const size_t count, int32_t& outIx) -> bool { uint32_t value = 0u; - if (!parseUnsignedIndex(ptr, lineEnd, value)) + if (!Common::parseNonZeroNumber(ptr, lineEnd, value)) return false; if (value > count) return false; @@ -276,7 +126,7 @@ struct Parse for (uint32_t corner = 0u; corner < 3u; ++corner) { Common::skipInlineWhitespace(ptr, lineEnd); - if (ptr >= lineEnd || !isDigit(*ptr)) + if (ptr >= lineEnd || !Common::isDigit(*ptr)) return false; int32_t posIx = -1; if (!parsePositive(posCount, posIx)) @@ -301,7 +151,7 @@ struct Parse const char* ptr = linePtr; auto parsePositive = [&](const size_t count, int32_t& outIx) -> bool { uint32_t raw = 0u; - if (!parseUnsignedIndex(ptr, lineEnd, raw)) + if (!Common::parseNonZeroNumber(ptr, lineEnd, raw)) return false; if (raw > count) return false; @@ -310,7 +160,7 @@ struct Parse }; auto parseResolved = [&](const size_t count, int32_t& outIx) -> bool { int32_t raw = 0; - return parseSignedIndex(ptr, lineEnd, raw) && resolveIndex(raw, count, outIx); + return Common::parseNonZeroNumber(ptr, lineEnd, raw) && resolveIndex(raw, count, outIx); }; if (*ptr != '-' && *ptr != '+') { @@ -817,7 +667,7 @@ asset::SAssetBundle COBJMeshFileLoader::loadAsset( for (uint32_t i = 0u; i < count; ++i) { while (ptr < lineEnd && Parse::Common::isInlineWhitespace(*ptr)) ++ptr; - if (ptr >= lineEnd || !Parse::parseFloat(ptr, lineEnd, values[i])) + if (ptr >= lineEnd || !Parse::Common::parseNumber(ptr, lineEnd, values[i])) return false; } return true; diff --git a/src/nbl/asset/interchange/impl/STextParse.h b/src/nbl/asset/interchange/impl/STextParse.h index 4afbd631ed..ccfeaa2c4a 100644 --- a/src/nbl/asset/interchange/impl/STextParse.h +++ b/src/nbl/asset/interchange/impl/STextParse.h @@ -3,6 +3,8 @@ #define _NBL_ASSET_IMPL_S_TEXT_PARSE_H_INCLUDED_ #include "nbl/core/string/stringutil.h" #include +#include +#include #include #include #include @@ -13,6 +15,7 @@ namespace nbl::asset::impl //! Text token and numeric parsing helpers shared by interchange text formats. struct TextParse { + static inline bool isDigit(const char c) { return c >= '0' && c <= '9'; } //! Parses one arithmetic token and advances `ptr` on success. template static inline bool parseNumber(const char*& ptr, const char* const end, T& out) @@ -20,6 +23,99 @@ struct TextParse static_assert(std::is_arithmetic_v); if constexpr (std::is_floating_point_v) { + const char* const start = ptr; + if (start >= end) + return false; + const char* p = start; + bool negative = false; + if (*p == '-' || *p == '+') + { + negative = (*p == '-'); + ++p; + if (p >= end) + return false; + } + if (*p != '.' && isDigit(*p)) + { + uint64_t integerPart = 0ull; + while (p < end && isDigit(*p)) + { + integerPart = integerPart * 10ull + static_cast(*p - '0'); + ++p; + } + double value = static_cast(integerPart); + if (p < end && *p == '.') + { + const char* const dot = p; + if ((dot + 7) <= end) + { + const char d0 = dot[1]; + const char d1 = dot[2]; + const char d2 = dot[3]; + const char d3 = dot[4]; + const char d4 = dot[5]; + const char d5 = dot[6]; + if (isDigit(d0) && isDigit(d1) && isDigit(d2) && isDigit(d3) && isDigit(d4) && isDigit(d5)) + { + const bool hasNext = (dot + 7) < end; + const char next = hasNext ? dot[7] : '\0'; + if ((!hasNext || !isDigit(next)) && (!hasNext || (next != 'e' && next != 'E'))) + { + const uint32_t frac = + static_cast(d0 - '0') * 100000u + + static_cast(d1 - '0') * 10000u + + static_cast(d2 - '0') * 1000u + + static_cast(d3 - '0') * 100u + + static_cast(d4 - '0') * 10u + + static_cast(d5 - '0'); + value += static_cast(frac) * 1e-6; + ptr = dot + 7; + out = static_cast(negative ? -value : value); + return true; + } + } + } + static constexpr double InvPow10[] = { + 1.0, + 1e-1, 1e-2, 1e-3, 1e-4, 1e-5, + 1e-6, 1e-7, 1e-8, 1e-9, 1e-10, + 1e-11, 1e-12, 1e-13, 1e-14, 1e-15, + 1e-16, 1e-17, 1e-18 + }; + ++p; + uint64_t fractionPart = 0ull; + uint32_t fractionDigits = 0u; + while (p < end && isDigit(*p)) + { + if (fractionDigits >= (std::size(InvPow10) - 1u)) + break; + fractionPart = fractionPart * 10ull + static_cast(*p - '0'); + ++fractionDigits; + ++p; + } + if (fractionDigits) + value += static_cast(fractionPart) * InvPow10[fractionDigits]; + if (p < end && isDigit(*p)) + { + const auto parseResult = fast_float::from_chars(start, end, out); + if (parseResult.ec != std::errc() || parseResult.ptr == start) + return false; + ptr = parseResult.ptr; + return true; + } + } + if (p < end && (*p == 'e' || *p == 'E')) + { + const auto parseResult = fast_float::from_chars(start, end, out); + if (parseResult.ec != std::errc() || parseResult.ptr == start) + return false; + ptr = parseResult.ptr; + return true; + } + ptr = p; + out = static_cast(negative ? -value : value); + return true; + } const auto parseResult = fast_float::from_chars(ptr, end, out); if (parseResult.ec != std::errc() || parseResult.ptr == ptr) return false; From dce1c4cd284f8a83977677ebe9a2cd84b4a0e37a Mon Sep 17 00:00:00 2001 From: Arkadiusz Lachowicz Date: Mon, 9 Mar 2026 08:37:17 +0100 Subject: [PATCH 092/118] Refine geometry interchange helpers --- include/nbl/asset/format/EFormat.h | 61 ++++++- .../asset/interchange/SGeometryLoaderCommon.h | 62 ++++--- .../SOBJPolygonGeometryAuxLayout.h | 14 ++ .../SPLYPolygonGeometryAuxLayout.h | 14 ++ .../SSTLPolygonGeometryAuxLayout.h | 14 ++ include/nbl/asset/metadata/CPLYMetadata.h | 34 +++- .../asset/interchange/COBJMeshFileLoader.cpp | 6 +- src/nbl/asset/interchange/COBJMeshWriter.cpp | 4 +- .../asset/interchange/CPLYMeshFileLoader.cpp | 128 ++++++-------- src/nbl/asset/interchange/CPLYMeshWriter.cpp | 157 ++++++++++-------- .../asset/interchange/CSTLMeshFileLoader.cpp | 6 +- src/nbl/asset/interchange/CSTLMeshWriter.cpp | 4 +- src/nbl/asset/interchange/impl/STextParse.h | 30 ++++ 13 files changed, 351 insertions(+), 183 deletions(-) create mode 100644 include/nbl/asset/interchange/SOBJPolygonGeometryAuxLayout.h create mode 100644 include/nbl/asset/interchange/SPLYPolygonGeometryAuxLayout.h create mode 100644 include/nbl/asset/interchange/SSTLPolygonGeometryAuxLayout.h diff --git a/include/nbl/asset/format/EFormat.h b/include/nbl/asset/format/EFormat.h index 62ce71555e..7daf5ae45c 100644 --- a/include/nbl/asset/format/EFormat.h +++ b/include/nbl/asset/format/EFormat.h @@ -5,6 +5,7 @@ #ifndef __NBL_ASSET_E_FORMAT_H_INCLUDED__ #define __NBL_ASSET_E_FORMAT_H_INCLUDED__ +#include #include #include #include "BuildConfigOptions.h" @@ -574,6 +575,64 @@ constexpr uint32_t getFormatChannelCount() { #include "nbl/asset/format/impl/EFormat_getFormatChannelCount.h" } +namespace impl +{ +struct SStructuredFormatVariants +{ + E_FORMAT base; + std::array variants; +}; +static inline constexpr uint32_t StructuredFormatChannelVariantCount = 4u; +static inline constexpr auto StructuredFormatVariants = std::to_array({ + {EF_R8_SINT, {EF_R8_SINT, EF_R8G8_SINT, EF_R8G8B8_SINT, EF_R8G8B8A8_SINT}}, + {EF_R8_UINT, {EF_R8_UINT, EF_R8G8_UINT, EF_R8G8B8_UINT, EF_R8G8B8A8_UINT}}, + {EF_R16_SINT, {EF_R16_SINT, EF_R16G16_SINT, EF_R16G16B16_SINT, EF_R16G16B16A16_SINT}}, + {EF_R16_UINT, {EF_R16_UINT, EF_R16G16_UINT, EF_R16G16B16_UINT, EF_R16G16B16A16_UINT}}, + {EF_R32_SINT, {EF_R32_SINT, EF_R32G32_SINT, EF_R32G32B32_SINT, EF_R32G32B32A32_SINT}}, + {EF_R32_UINT, {EF_R32_UINT, EF_R32G32_UINT, EF_R32G32B32_UINT, EF_R32G32B32A32_UINT}}, + {EF_R32_SFLOAT, {EF_R32_SFLOAT, EF_R32G32_SFLOAT, EF_R32G32B32_SFLOAT, EF_R32G32B32A32_SFLOAT}}, + {EF_R64_SFLOAT, {EF_R64_SFLOAT, EF_R64G64_SFLOAT, EF_R64G64B64_SFLOAT, EF_R64G64B64A64_SFLOAT}} + }); + inline constexpr uint32_t getStructuredFormatVariantIndex(const E_FORMAT _fmt) + { + for (uint32_t i = 0u; i < StructuredFormatVariants.size(); ++i) + if (StructuredFormatVariants[i].base == _fmt) + return i; + return StructuredFormatVariants.size(); + } + template + inline constexpr uint32_t getStructuredFormatVariantIndex() + { + return getStructuredFormatVariantIndex(_fmt); + } + inline constexpr E_FORMAT getStructuredFormatVariant(const uint32_t _variantIndex, const uint32_t _channelCount) + { + return _variantIndex < StructuredFormatVariants.size() && _channelCount > 0u && _channelCount <= StructuredFormatChannelVariantCount ? + StructuredFormatVariants[_variantIndex].variants[_channelCount - 1u] : EF_UNKNOWN; + } + template + inline constexpr E_FORMAT getStructuredFormatVariant(const uint32_t _variantIndex) + { + if constexpr (_channelCount > 0u && _channelCount <= StructuredFormatChannelVariantCount) + return _variantIndex < StructuredFormatVariants.size() ? StructuredFormatVariants[_variantIndex].variants[_channelCount - 1u] : EF_UNKNOWN; + else + return EF_UNKNOWN; + } +} +template +inline constexpr E_FORMAT getFormatWithChannelCount(const uint32_t _channelCount) +{ + return impl::getStructuredFormatVariant(impl::getStructuredFormatVariantIndex<_fmt>(), _channelCount); +} +template +inline constexpr E_FORMAT getFormatWithChannelCount() +{ + return impl::getStructuredFormatVariant<_channelCount>(impl::getStructuredFormatVariantIndex<_fmt>()); +} +inline constexpr E_FORMAT getFormatWithChannelCount(const E_FORMAT _fmt, const uint32_t _channelCount) +{ + return impl::getStructuredFormatVariant(impl::getStructuredFormatVariantIndex(_fmt), _channelCount); +} /* inline uint32_t getBitsPerChannel(asset::E_FORMAT _fmt, uint8_t _channel) @@ -1987,4 +2046,4 @@ namespace std }; } -#endif \ No newline at end of file +#endif diff --git a/include/nbl/asset/interchange/SGeometryLoaderCommon.h b/include/nbl/asset/interchange/SGeometryLoaderCommon.h index 57ec8abded..27434b01e9 100644 --- a/include/nbl/asset/interchange/SGeometryLoaderCommon.h +++ b/include/nbl/asset/interchange/SGeometryLoaderCommon.h @@ -3,6 +3,8 @@ // For conditions of distribution and use, see copyright notice in nabla.h #ifndef _NBL_ASSET_S_GEOMETRY_LOADER_COMMON_H_INCLUDED_ #define _NBL_ASSET_S_GEOMETRY_LOADER_COMMON_H_INCLUDED_ +#include +#include #include #include #include "nbl/asset/SBufferAdoption.h" @@ -12,27 +14,47 @@ namespace nbl::asset //! Shared geometry-loader helpers for adopting buffers and assembling formatted data views. class SGeometryLoaderCommon { - public: - //! Creates one formatted data view over an existing CPU buffer. - static inline IGeometry::SDataView createDataView(core::smart_refctd_ptr&& buffer, const size_t byteCount, const uint32_t stride, const E_FORMAT format) - { - if (!buffer || byteCount == 0ull) - return {}; - return {.composed = {.stride = stride, .format = format, .rangeFormat = IGeometryBase::getMatchingAABBFormat(format)}, .src = {.offset = 0ull, .size = byteCount, .buffer = std::move(buffer)}}; - } + public: + //! Creates one formatted data view over an existing CPU buffer. + static inline IGeometry::SDataView createDataView(core::smart_refctd_ptr&& buffer, const size_t byteCount, const uint32_t stride, const E_FORMAT format) + { + if (!buffer || byteCount == 0ull) + return {}; + return {.composed = {.stride = stride, .format = format, .rangeFormat = IGeometryBase::getMatchingAABBFormat(format)}, .src = {.offset = 0ull, .size = byteCount, .buffer = std::move(buffer)}}; + } + //! Tracks the widest scalar component format and highest component index seen for one structured attribute. + static inline void negotiateStructuredComponent(IGeometry::SDataViewBase& view, const E_FORMAT componentFormat, const uint8_t component) + { + assert(getFormatChannelCount(componentFormat) != 0u); + if (getTexelOrBlockBytesize(componentFormat) > getTexelOrBlockBytesize(view.format)) + view.format = componentFormat; + view.stride = std::max(view.stride, component); + } + //! Finalizes one structured base view and invokes `onComponent(offset,stride,componentFormat)` per component slot. + template + static inline void finalizeStructuredBaseView(IGeometry::SDataViewBase& view, Fn&& onComponent) + { + if (view.format == EF_UNKNOWN) + return; + const auto componentFormat = view.format; + const auto componentCount = view.stride + 1u; + view.format = getFormatWithChannelCount(componentFormat, componentCount); + view.stride = getTexelOrBlockBytesize(view.format); + for (uint32_t c = 0u; c < componentCount; ++c) + onComponent(getTexelOrBlockBytesize(componentFormat) * c, view.stride, componentFormat); + } - //! Adopts contiguous caller-owned storage into a CPU buffer and exposes it as a formatted data view. - template - static inline IGeometry::SDataView createAdoptedView(Storage&& data) - { - using storage_t = std::remove_cvref_t; - using value_t = std::ranges::range_value_t; - - auto buffer = SBufferAdoption::create(std::forward(data)); - if (!buffer) - return {}; - return createDataView(std::move(buffer), buffer->getSize(), static_cast(sizeof(value_t)), Format); - } + //! Adopts contiguous caller-owned storage into a CPU buffer and exposes it as a formatted data view. + template + static inline IGeometry::SDataView createAdoptedView(Storage&& data) + { + using storage_t = std::remove_cvref_t; + using value_t = std::ranges::range_value_t; + auto buffer = SBufferAdoption::create(std::forward(data)); + if (!buffer) + return {}; + return createDataView(std::move(buffer), buffer->getSize(), static_cast(sizeof(value_t)), Format); + } }; } #endif diff --git a/include/nbl/asset/interchange/SOBJPolygonGeometryAuxLayout.h b/include/nbl/asset/interchange/SOBJPolygonGeometryAuxLayout.h new file mode 100644 index 0000000000..ed2743e493 --- /dev/null +++ b/include/nbl/asset/interchange/SOBJPolygonGeometryAuxLayout.h @@ -0,0 +1,14 @@ +// Copyright (C) 2018-2025 - DevSH Graphics Programming Sp. z O.O. +// This file is part of the "Nabla Engine". +// For conditions of distribution and use, see copyright notice in nabla.h +#ifndef _NBL_ASSET_S_OBJ_POLYGON_GEOMETRY_AUX_LAYOUT_H_INCLUDED_ +#define _NBL_ASSET_S_OBJ_POLYGON_GEOMETRY_AUX_LAYOUT_H_INCLUDED_ +namespace nbl::asset +{ +//! Public OBJ aux-view slot ids shared by loader and writer code. +struct SOBJPolygonGeometryAuxLayout +{ + static inline constexpr uint32_t UV0 = 0u; +}; +} +#endif diff --git a/include/nbl/asset/interchange/SPLYPolygonGeometryAuxLayout.h b/include/nbl/asset/interchange/SPLYPolygonGeometryAuxLayout.h new file mode 100644 index 0000000000..66a1f3d692 --- /dev/null +++ b/include/nbl/asset/interchange/SPLYPolygonGeometryAuxLayout.h @@ -0,0 +1,14 @@ +// Copyright (C) 2018-2025 - DevSH Graphics Programming Sp. z O.O. +// This file is part of the "Nabla Engine". +// For conditions of distribution and use, see copyright notice in nabla.h +#ifndef _NBL_ASSET_S_PLY_POLYGON_GEOMETRY_AUX_LAYOUT_H_INCLUDED_ +#define _NBL_ASSET_S_PLY_POLYGON_GEOMETRY_AUX_LAYOUT_H_INCLUDED_ +namespace nbl::asset +{ +//! Public PLY aux-view slot ids shared by loader and writer code. +struct SPLYPolygonGeometryAuxLayout +{ + static inline constexpr uint32_t UV0 = 0u; +}; +} +#endif diff --git a/include/nbl/asset/interchange/SSTLPolygonGeometryAuxLayout.h b/include/nbl/asset/interchange/SSTLPolygonGeometryAuxLayout.h new file mode 100644 index 0000000000..a49b16b6ee --- /dev/null +++ b/include/nbl/asset/interchange/SSTLPolygonGeometryAuxLayout.h @@ -0,0 +1,14 @@ +// Copyright (C) 2018-2025 - DevSH Graphics Programming Sp. z O.O. +// This file is part of the "Nabla Engine". +// For conditions of distribution and use, see copyright notice in nabla.h +#ifndef _NBL_ASSET_S_STL_POLYGON_GEOMETRY_AUX_LAYOUT_H_INCLUDED_ +#define _NBL_ASSET_S_STL_POLYGON_GEOMETRY_AUX_LAYOUT_H_INCLUDED_ +namespace nbl::asset +{ +//! Public STL aux-view slot ids shared by loader and writer code. +struct SSTLPolygonGeometryAuxLayout +{ + static inline constexpr uint32_t COLOR0 = 0u; +}; +} +#endif diff --git a/include/nbl/asset/metadata/CPLYMetadata.h b/include/nbl/asset/metadata/CPLYMetadata.h index 39ad07561a..d9afd4c3fe 100644 --- a/include/nbl/asset/metadata/CPLYMetadata.h +++ b/include/nbl/asset/metadata/CPLYMetadata.h @@ -6,6 +6,8 @@ #include "nbl/asset/metadata/IAssetMetadata.h" +#include +#include namespace nbl::asset @@ -13,12 +15,38 @@ namespace nbl::asset class CPLYMetadata final : public IAssetMetadata { - public: - CPLYMetadata() : IAssetMetadata() {} + public: + class CPolygonGeometry : public IPolygonGeometryMetadata + { + public: + using IPolygonGeometryMetadata::IPolygonGeometryMetadata; + inline CPolygonGeometry& operator=(CPolygonGeometry&& other) + { + IPolygonGeometryMetadata::operator=(std::move(other)); + std::swap(m_auxAttributeNames, other.m_auxAttributeNames); + return *this; + } + inline std::string_view getAuxAttributeName(const uint32_t auxViewIx) const + { + return auxViewIx < m_auxAttributeNames.size() ? std::string_view(m_auxAttributeNames[auxViewIx]) : std::string_view{}; + } + core::vector m_auxAttributeNames; + }; + CPLYMetadata(const uint32_t geometryCount = 0u) : IAssetMetadata(), m_geometryMetaStorage(createContainer(geometryCount)) {} _NBL_STATIC_INLINE_CONSTEXPR const char* LoaderName = "CPLYMeshFileLoader"; const char* getLoaderName() const override { return LoaderName; } + private: + meta_container_t m_geometryMetaStorage; + friend class CPLYMeshFileLoader; + inline void placeMeta(const uint32_t offset, const ICPUPolygonGeometry* geometry, core::vector&& auxAttributeNames) + { + auto& meta = m_geometryMetaStorage->operator[](offset); + meta = CPolygonGeometry{}; + meta.m_auxAttributeNames = std::move(auxAttributeNames); + IAssetMetadata::insertAssetSpecificMetadata(geometry, &meta); + } }; } -#endif \ No newline at end of file +#endif diff --git a/src/nbl/asset/interchange/COBJMeshFileLoader.cpp b/src/nbl/asset/interchange/COBJMeshFileLoader.cpp index 0ea69ee44c..e95467f67b 100644 --- a/src/nbl/asset/interchange/COBJMeshFileLoader.cpp +++ b/src/nbl/asset/interchange/COBJMeshFileLoader.cpp @@ -8,6 +8,7 @@ #include "nbl/asset/ICPUGeometryCollection.h" #include "nbl/asset/interchange/SGeometryContentHash.h" #include "nbl/asset/interchange/SGeometryLoaderCommon.h" +#include "nbl/asset/interchange/SOBJPolygonGeometryAuxLayout.h" #include "nbl/asset/interchange/SInterchangeIO.h" #include "nbl/asset/interchange/SLoaderRuntimeTuning.h" #include "nbl/asset/utils/CPolygonGeometryManipulator.h" @@ -26,7 +27,6 @@ namespace { struct Parse { - static constexpr uint32_t UV0 = 0u; using Common = impl::TextParse; struct VertexDedupNode { int32_t uv = -1; int32_t normal = -1; uint32_t smoothingGroup = 0u; uint32_t outIndex = 0u; int32_t next = -1; }; static bool resolveIndex(const int32_t rawIndex, const size_t elementCount, int32_t& resolved) @@ -451,8 +451,8 @@ asset::SAssetBundle COBJMeshFileLoader::loadAsset( if (!view) return false; auto* const auxViews = geometry->getAuxAttributeViews(); - auxViews->resize(Parse::UV0 + 1u); - auxViews->operator[](Parse::UV0) = std::move(view); + auxViews->resize(SOBJPolygonGeometryAuxLayout::UV0 + 1u); + auxViews->operator[](SOBJPolygonGeometryAuxLayout::UV0) = std::move(view); } if (!indices.empty()) { geometry->setIndexing(IPolygonGeometryBase::TriangleList()); diff --git a/src/nbl/asset/interchange/COBJMeshWriter.cpp b/src/nbl/asset/interchange/COBJMeshWriter.cpp index 265f29f7d0..358d7cfd29 100644 --- a/src/nbl/asset/interchange/COBJMeshWriter.cpp +++ b/src/nbl/asset/interchange/COBJMeshWriter.cpp @@ -5,6 +5,7 @@ #include "nbl/asset/interchange/COBJMeshWriter.h" #include "nbl/asset/interchange/SGeometryViewDecode.h" #include "nbl/asset/interchange/SGeometryWriterCommon.h" +#include "nbl/asset/interchange/SOBJPolygonGeometryAuxLayout.h" #include "nbl/asset/interchange/SInterchangeIO.h" #include "impl/SFileAccess.h" #include "nbl/builtin/hlsl/array_accessors.hlsl" @@ -46,7 +47,6 @@ namespace { struct Parse { - static constexpr uint32_t UV0 = 0u; static constexpr size_t MaxFloatTextChars = std::numeric_limits::max_digits10 + 8ull; static constexpr size_t MaxUInt32Chars = std::numeric_limits::digits10 + 1ull; static constexpr size_t MaxIndexTokenBytes = MaxUInt32Chars * 3ull + 2ull; @@ -177,7 +177,7 @@ bool COBJMeshWriter::writeAsset(system::IFile* _file, const SAssetWriteParams& _ const auto& normalView = geom->getNormalView(); const bool hasNormals = static_cast(normalView); const size_t vertexCount = positionView.getElementCount(); - const ICPUPolygonGeometry::SDataView* uvView = SGeometryWriterCommon::getAuxViewAt(geom, Parse::UV0, vertexCount); + const ICPUPolygonGeometry::SDataView* uvView = SGeometryWriterCommon::getAuxViewAt(geom, SOBJPolygonGeometryAuxLayout::UV0, vertexCount); if (uvView && getFormatChannelCount(uvView->composed.format) != 2u) uvView = nullptr; const bool hasUVs = uvView != nullptr; diff --git a/src/nbl/asset/interchange/CPLYMeshFileLoader.cpp b/src/nbl/asset/interchange/CPLYMeshFileLoader.cpp index 93107fbcde..61ebdfb6b2 100644 --- a/src/nbl/asset/interchange/CPLYMeshFileLoader.cpp +++ b/src/nbl/asset/interchange/CPLYMeshFileLoader.cpp @@ -10,6 +10,7 @@ #include "nbl/asset/IAssetManager.h" #include "nbl/asset/interchange/SGeometryContentHash.h" #include "nbl/asset/interchange/SGeometryLoaderCommon.h" +#include "nbl/asset/interchange/SPLYPolygonGeometryAuxLayout.h" #include "nbl/asset/interchange/SInterchangeIO.h" #include "nbl/asset/interchange/SLoaderRuntimeTuning.h" #include "nbl/asset/metadata/CPLYMetadata.h" @@ -27,7 +28,6 @@ namespace { struct Parse { - static constexpr uint32_t UV0 = 0u; using Binary = impl::BinaryData; using Common = impl::TextParse; struct ContentHashBuild @@ -62,26 +62,6 @@ struct Parse { return text ? std::string_view{text} : std::string_view{}; } - template - static E_FORMAT selectStructuredFormat(const std::array& formats, const uint32_t componentCount) - { - return componentCount > 0u && componentCount <= N ? formats[componentCount - 1u] : EF_UNKNOWN; - } - static E_FORMAT expandStructuredFormat(const E_FORMAT componentFormat, const uint32_t componentCount) - { - switch (componentFormat) - { - case EF_R8_SINT: return selectStructuredFormat(std::to_array({EF_R8_SINT, EF_R8G8_SINT, EF_R8G8B8_SINT, EF_R8G8B8A8_SINT}), componentCount); - case EF_R8_UINT: return selectStructuredFormat(std::to_array({EF_R8_UINT, EF_R8G8_UINT, EF_R8G8B8_UINT, EF_R8G8B8A8_UINT}), componentCount); - case EF_R16_SINT: return selectStructuredFormat(std::to_array({EF_R16_SINT, EF_R16G16_SINT, EF_R16G16B16_SINT, EF_R16G16B16A16_SINT}), componentCount); - case EF_R16_UINT: return selectStructuredFormat(std::to_array({EF_R16_UINT, EF_R16G16_UINT, EF_R16G16B16_UINT, EF_R16G16B16A16_UINT}), componentCount); - case EF_R32_SINT: return selectStructuredFormat(std::to_array({EF_R32_SINT, EF_R32G32_SINT, EF_R32G32B32_SINT, EF_R32G32B32A32_SINT}), componentCount); - case EF_R32_UINT: return selectStructuredFormat(std::to_array({EF_R32_UINT, EF_R32G32_UINT, EF_R32G32B32_UINT, EF_R32G32B32A32_UINT}), componentCount); - case EF_R32_SFLOAT: return selectStructuredFormat(std::to_array({EF_R32_SFLOAT, EF_R32G32_SFLOAT, EF_R32G32B32_SFLOAT, EF_R32G32B32A32_SFLOAT}), componentCount); - case EF_R64_SFLOAT: return selectStructuredFormat(std::to_array({EF_R64_SFLOAT, EF_R64G64_SFLOAT, EF_R64G64B64_SFLOAT, EF_R64G64B64A64_SFLOAT}), componentCount); - default: return EF_UNKNOWN; - } - } struct Context { static constexpr uint64_t ReadWindowPaddingBytes = 1ull; @@ -219,18 +199,25 @@ struct Parse EndOfFile = true; } } - const char* getNextLine() // Split the string data into a line in place by terminating it instead of copying. + std::string_view getNextLine() // Split the string data into a line in place by terminating it instead of copying. { // move the start pointer along StartPointer = LineEndPointer + 1; // crlf split across buffer move - if (*StartPointer == '\n') + if (StartPointer < EndPointer && *StartPointer == '\n') *(StartPointer++) = '\0'; + const char* const lineStart = StartPointer; // begin at the start of the next line const std::array Terminators = {'\0', '\r', '\n'}; auto terminator = std::find_first_of(StartPointer, EndPointer, Terminators.begin(), Terminators.end()); if (terminator != EndPointer) + { + const char* const lineEnd = terminator; *(terminator++) = '\0'; + LineEndPointer = terminator - 1; + WordLength = -1; + return std::string_view(lineStart, static_cast(lineEnd - lineStart)); + } // we have reached the end of the buffer if (terminator == EndPointer) { @@ -238,18 +225,15 @@ struct Parse { StartPointer = EndPointer - 1; *StartPointer = '\0'; - return StartPointer; + return {}; } // get data from the file fillBuffer(); // reset line end pointer LineEndPointer = StartPointer - 1; - return StartPointer != EndPointer ? getNextLine() : StartPointer; + return StartPointer != EndPointer ? getNextLine() : std::string_view{}; } - LineEndPointer = terminator - 1; - WordLength = -1; - // return pointer to the start of the line - return StartPointer; + return {}; } const char* getNextWord() // null terminate the next word on the previous line and move the next word pointer along since we already have a full line in the buffer, we never need to retrieve more data { @@ -1447,26 +1431,17 @@ bool CPLYMeshFileLoader::isALoadableFileFormat(system::IFile* _file, const syste if (!success) return false; const std::string_view fileHeader(buf.data(), success.getBytesProcessed()); - size_t lineStart = 0ull; - const size_t firstLineEnd = fileHeader.find('\n'); - std::string_view firstLine = fileHeader.substr(0ull, firstLineEnd); - firstLine = Parse::Common::trimWhitespace(firstLine); - if (firstLine != "ply") + Parse::Common::LineCursor lineCursor = {.cursor = fileHeader.data(), .end = fileHeader.data() + fileHeader.size()}; + const auto firstLineOpt = lineCursor.readLine(); + if (!firstLineOpt.has_value() || Parse::Common::trimWhitespace(*firstLineOpt) != "ply") return false; - if (firstLineEnd == std::string_view::npos) - return false; - lineStart = firstLineEnd + 1ull; constexpr std::array headers = { "format ascii 1.0", "format binary_little_endian 1.0", "format binary_big_endian 1.0"}; - while (lineStart < fileHeader.size()) { - size_t lineEnd = fileHeader.find('\n', lineStart); - if (lineEnd == std::string_view::npos) - lineEnd = fileHeader.size(); - std::string_view line = Parse::Common::trimWhitespace(fileHeader.substr(lineStart, lineEnd - lineStart)); + while (const auto lineOpt = lineCursor.readLine()) { + const std::string_view line = Parse::Common::trimWhitespace(*lineOpt); if (line.starts_with("format ")) return std::find(headers.begin(), headers.end(), line) != headers.end(); - lineStart = lineEnd + 1ull; } return false; } @@ -1508,6 +1483,7 @@ SAssetBundle CPLYMeshFileLoader::loadAsset( ctx.init(static_cast(safeReadWindow)); // start with empty mesh auto geometry = make_smart_refctd_ptr(); + std::optional> geometryMetadata = std::nullopt; hlsl::shapes::util::AABBAccumulator3 parsedAABB = hlsl::shapes::util::createAABBAccumulator(); uint32_t vertCount = 0; Parse::ContentHashBuild contentHashBuild = Parse::ContentHashBuild::create(computeContentHashes, hashInBuild); @@ -1543,7 +1519,7 @@ SAssetBundle CPLYMeshFileLoader::loadAsset( contentHashBuild.tryDefer(view.src.buffer.get()); }; // Currently only supports ASCII or binary meshes - if (Parse::toStringView(ctx.getNextLine()) != "ply") { + if (Parse::Common::trimWhitespace(ctx.getNextLine()) != "ply") { _params.logger.log("Not a valid PLY file %s", system::ILogger::ELL_ERROR, ctx.inner.mainFile->getFileName().string().c_str()); return {}; @@ -1720,52 +1696,38 @@ SAssetBundle CPLYMeshFileLoader::loadAsset( ICPUPolygonGeometry::SDataViewBase posView = {}, normalView = {}, uvView = {}; core::vector extraViews; + core::vector extraViewNames; for (auto& vertexProperty : el.Properties) { const auto& propertyName = vertexProperty.Name; - // only positions and normals need to be structured/canonicalized in any way - auto negotiateFormat = [&vertexProperty](ICPUPolygonGeometry::SDataViewBase& view, const uint8_t component) -> void { - assert(getFormatChannelCount(vertexProperty.type) != 0); - if (getTexelOrBlockBytesize(vertexProperty.type) > getTexelOrBlockBytesize(view.format)) - view.format = vertexProperty.type; - view.stride = hlsl::max(view.stride, component); - }; if (propertyName == "x") - negotiateFormat(posView, 0); + SGeometryLoaderCommon::negotiateStructuredComponent(posView, vertexProperty.type, 0); else if (propertyName == "y") - negotiateFormat(posView, 1); + SGeometryLoaderCommon::negotiateStructuredComponent(posView, vertexProperty.type, 1); else if (propertyName == "z") - negotiateFormat(posView, 2); + SGeometryLoaderCommon::negotiateStructuredComponent(posView, vertexProperty.type, 2); else if (propertyName == "nx") - negotiateFormat(normalView, 0); + SGeometryLoaderCommon::negotiateStructuredComponent(normalView, vertexProperty.type, 0); else if (propertyName == "ny") - negotiateFormat(normalView, 1); + SGeometryLoaderCommon::negotiateStructuredComponent(normalView, vertexProperty.type, 1); else if (propertyName == "nz") - negotiateFormat(normalView, 2); + SGeometryLoaderCommon::negotiateStructuredComponent(normalView, vertexProperty.type, 2); else if (propertyName == "u" || propertyName == "s") - negotiateFormat(uvView, 0); + SGeometryLoaderCommon::negotiateStructuredComponent(uvView, vertexProperty.type, 0); else if (propertyName == "v" || propertyName == "t") - negotiateFormat(uvView, 1); + SGeometryLoaderCommon::negotiateStructuredComponent(uvView, vertexProperty.type, 1); else - // property names for extra channels are currently not persisted in metadata + { extraViews.push_back(createView(vertexProperty.type, el.Count)); - } - auto setFinalFormat = [&ctx](ICPUPolygonGeometry::SDataViewBase& view) -> void { - const auto componentFormat = view.format; - const auto componentCount = view.stride + 1; - view.format = Parse::expandStructuredFormat(view.format, componentCount); - view.stride = getTexelOrBlockBytesize(view.format); - for (auto c = 0u; c < componentCount; c++) { - size_t offset = getTexelOrBlockBytesize(componentFormat) * c; - ctx.vertAttrIts.push_back({.ptr = reinterpret_cast(offset), - .stride = view.stride, - .dstFmt = componentFormat}); + extraViewNames.push_back(propertyName); } - }; + } auto attachStructuredView = [&](ICPUPolygonGeometry::SDataViewBase& baseView, auto&& setter) -> void { if (baseView.format == EF_UNKNOWN) return; auto beginIx = ctx.vertAttrIts.size(); - setFinalFormat(baseView); + SGeometryLoaderCommon::finalizeStructuredBaseView(baseView, [&](const size_t offset, const uint32_t stride, const E_FORMAT componentFormat) -> void { + ctx.vertAttrIts.push_back({.ptr = reinterpret_cast(offset), .stride = stride, .dstFmt = componentFormat}); + }); auto view = createView(baseView.format, el.Count); for (const auto size = ctx.vertAttrIts.size(); beginIx != size; ++beginIx) ctx.vertAttrIts[beginIx].ptr += ptrdiff_t(view.src.buffer->getPointer()) + view.src.offset; @@ -1775,15 +1737,23 @@ SAssetBundle CPLYMeshFileLoader::loadAsset( attachStructuredView(normalView, [&](auto view) { geometry->setNormalView(std::move(view)); }); attachStructuredView(uvView, [&](auto view) { auto* const auxViews = geometry->getAuxAttributeViews(); - auxViews->resize(Parse::UV0 + 1u); - auxViews->operator[](Parse::UV0) = std::move(view); + auxViews->resize(SPLYPolygonGeometryAuxLayout::UV0 + 1u); + auxViews->operator[](SPLYPolygonGeometryAuxLayout::UV0) = std::move(view); }); + core::vector auxAttributeNames; + const size_t extraNameOffset = geometry->getAuxAttributeViews()->size(); for (auto& view : extraViews) ctx.vertAttrIts.push_back({.ptr = reinterpret_cast(view.src.buffer->getPointer()) + view.src.offset, .stride = getTexelOrBlockBytesize(view.composed.format), .dstFmt = view.composed.format}); for (auto& view : extraViews) geometry->getAuxAttributeViews()->push_back(std::move(view)); + if (!extraViewNames.empty()) + { + auxAttributeNames.resize(geometry->getAuxAttributeViews()->size()); + for (size_t extraIx = 0ull; extraIx < extraViewNames.size(); ++extraIx) + auxAttributeNames[extraNameOffset + extraIx] = std::move(extraViewNames[extraIx]); + } // loop through vertex properties const auto fastVertexResult = ctx.readVertexElementFast(el, &parsedAABB); if (fastVertexResult == Parse::Context::EFastVertexReadResult::Success) { @@ -1798,6 +1768,10 @@ SAssetBundle CPLYMeshFileLoader::loadAsset( visitVertexAttributeViews(hashViewBufferIfNeeded); tryLaunchDeferredHash(geometry->getPositionView()); verticesProcessed = true; + if (!auxAttributeNames.empty()) + { + geometryMetadata = std::move(auxAttributeNames); + } vertexMs += std::chrono::duration(clock_t::now() - vertexStart).count(); } else if (el.Name == "face") { const auto faceStart = clock_t::now(); @@ -1883,7 +1857,9 @@ SAssetBundle CPLYMeshFileLoader::loadAsset( system::to_string(loadSession.ioPlan.strategy).c_str(), static_cast(loadSession.ioPlan.chunkSizeBytes()), loadSession.ioPlan.reason); _params.logger.log("PLY loader stages: file=%s header=%.3f ms vertex=%.3f ms face=%.3f ms finalize=%.3f ms", system::ILogger::ELL_PERFORMANCE, _file->getFileName().string().c_str(), headerMs, vertexMs, faceMs, finalizeMs); - auto meta = core::make_smart_refctd_ptr(); + auto meta = core::make_smart_refctd_ptr(1u); + if (geometryMetadata) + meta->placeMeta(0u, geometry.get(), std::move(*geometryMetadata)); return SAssetBundle(std::move(meta), {std::move(geometry)}); } } diff --git a/src/nbl/asset/interchange/CPLYMeshWriter.cpp b/src/nbl/asset/interchange/CPLYMeshWriter.cpp index 24d6aa9967..3fc671d82f 100644 --- a/src/nbl/asset/interchange/CPLYMeshWriter.cpp +++ b/src/nbl/asset/interchange/CPLYMeshWriter.cpp @@ -4,6 +4,8 @@ // For conditions of distribution and use, see copyright notice in nabla.h // See the original file in irrlicht source for authors #include "CPLYMeshWriter.h" +#include "nbl/asset/interchange/SGeometryViewDecode.h" +#include "nbl/asset/interchange/SPLYPolygonGeometryAuxLayout.h" #include "nbl/asset/interchange/SGeometryWriterCommon.h" #include "nbl/asset/interchange/SInterchangeIO.h" #include "impl/SFileAccess.h" @@ -44,8 +46,9 @@ namespace { struct Parse { - static constexpr uint32_t UV0 = 0u; enum class ScalarType : uint8_t { Int8, UInt8, Int16, UInt16, Int32, UInt32, Float32, Float64 }; + using SemanticDecode = SGeometryViewDecode::Prepared; + using StoredDecode = SGeometryViewDecode::Prepared; struct ScalarMeta { const char* name = "float32"; uint32_t byteSize = sizeof(float); bool integer = false; bool signedType = true; }; struct ExtraAuxView { const ICPUPolygonGeometry::SDataView* view = nullptr; uint32_t components = 0u; uint32_t auxIndex = 0u; ScalarType scalarType = ScalarType::Float32; }; struct WriteInput { const ICPUPolygonGeometry* geom = nullptr; ScalarType positionScalarType = ScalarType::Float32; const ICPUPolygonGeometry::SDataView* uvView = nullptr; ScalarType uvScalarType = ScalarType::Float32; const core::vector* extraAuxViews = nullptr; bool writeNormals = false; ScalarType normalScalarType = ScalarType::Float32; size_t vertexCount = 0ull; const uint32_t* indices = nullptr; size_t faceCount = 0ull; bool write16BitIndices = false; bool flipVectors = false; }; @@ -77,6 +80,25 @@ struct Parse default: return {"float32", sizeof(float), false, true}; } } + struct PreparedView + { + const ICPUPolygonGeometry::SDataView* view = nullptr; + uint32_t componentCount = 0u; + ScalarType scalarType = ScalarType::Float32; + bool flipVectors = false; + SemanticDecode semantic = {}; + StoredDecode stored = {}; + static inline PreparedView create(const ICPUPolygonGeometry::SDataView& view, const uint32_t componentCount, const ScalarType scalarType, const bool flipVectors) + { + PreparedView retval = {.view = &view, .componentCount = componentCount, .scalarType = scalarType, .flipVectors = flipVectors}; + const auto meta = getScalarMeta(scalarType); + if (meta.integer) + retval.stored = SGeometryViewDecode::prepare(view); + else + retval.semantic = SGeometryViewDecode::prepare(view); + return retval; + } + }; static bool isSupportedScalarFormat(const E_FORMAT format) { if (format == EF_UNKNOWN) @@ -129,48 +151,6 @@ struct Parse return bytesPerChannel >= 8u ? ScalarType::Float64 : ScalarType::Float32; return ScalarType::Float32; } - static bool decodeVec4(const ICPUPolygonGeometry::SDataView& view, const size_t ix, hlsl::float64_t4& out) - { - out = hlsl::float64_t4(0.0, 0.0, 0.0, 0.0); - if (!view.composed.isFormatted()) - return false; - const void* src = view.getPointer(ix); - if (!src) - return false; - const void* srcArr[4] = {src, nullptr, nullptr, nullptr}; - double tmp[4] = {}; - if (!decodePixels(view.composed.format, srcArr, tmp, 0u, 0u)) - return false; - const uint32_t channels = std::min(4u, getFormatChannelCount(view.composed.format)); - if (isNormalizedFormat(view.composed.format)) - { - const auto range = view.composed.getRange>(); - for (uint32_t i = 0u; i < channels; ++i) - (&out.x)[i] = tmp[i] * (range.maxVx[i] - range.minVx[i]) + range.minVx[i]; - } - else - { - for (uint32_t i = 0u; i < channels; ++i) - (&out.x)[i] = tmp[i]; - } - return true; - } - static bool decodeSigned4Raw(const ICPUPolygonGeometry::SDataView& view, const size_t ix, int64_t (&out)[4]) - { - const void* src = view.getPointer(ix); - if (!src) - return false; - const void* srcArr[4] = {src, nullptr, nullptr, nullptr}; - return decodePixels(view.composed.format, srcArr, out, 0u, 0u); - } - static bool decodeUnsigned4Raw(const ICPUPolygonGeometry::SDataView& view, const size_t ix, uint64_t (&out)[4]) - { - const void* src = view.getPointer(ix); - if (!src) - return false; - const void* srcArr[4] = {src, nullptr, nullptr, nullptr}; - return decodePixels(view.composed.format, srcArr, out, 0u, 0u); - } static bool isDirectScalarFormat(const E_FORMAT format, const ScalarType scalarType, const uint32_t componentCount, uint32_t& outByteSize) { outByteSize = 0u; @@ -232,8 +212,14 @@ struct Parse dst += copyBytes; return true; } - static bool writeTypedViewBinary(const ICPUPolygonGeometry::SDataView& view, const size_t ix, const uint32_t componentCount, const ScalarType scalarType, const bool flipVectors, uint8_t*& dst) + static bool writeTypedViewBinary(const PreparedView& prepared, const size_t ix, uint8_t*& dst) { + if (!prepared.view || !dst) + return false; + const auto& view = *prepared.view; + const auto componentCount = prepared.componentCount; + const auto scalarType = prepared.scalarType; + const auto flipVectors = prepared.flipVectors; if (!dst) return false; if (writeDirectBinaryView(view, ix, componentCount, scalarType, flipVectors, dst)) @@ -243,12 +229,12 @@ struct Parse case ScalarType::Float64: case ScalarType::Float32: { - hlsl::float64_t4 tmp = {}; - if (!decodeVec4(view, ix, tmp)) + std::array tmp = {}; + if (!prepared.semantic.decode(ix, tmp)) return false; for (uint32_t c = 0u; c < componentCount; ++c) { - double value = (&tmp.x)[c]; + double value = tmp[c]; if (flipVectors && c == 0u) value = -value; if (scalarType == ScalarType::Float64) @@ -269,8 +255,8 @@ struct Parse case ScalarType::Int16: case ScalarType::Int32: { - int64_t tmp[4] = {}; - if (!decodeSigned4Raw(view, ix, tmp)) + std::array tmp = {}; + if (!prepared.stored.decode(ix, tmp)) return false; for (uint32_t c = 0u; c < componentCount; ++c) { @@ -305,8 +291,8 @@ struct Parse case ScalarType::UInt16: case ScalarType::UInt32: { - uint64_t tmp[4] = {}; - if (!decodeUnsigned4Raw(view, ix, tmp)) + std::array tmp = {}; + if (!prepared.stored.decode(ix, tmp)) return false; for (uint32_t c = 0u; c < componentCount; ++c) { @@ -337,19 +323,24 @@ struct Parse } return false; } - static bool writeTypedViewText(std::string& output, const ICPUPolygonGeometry::SDataView& view, const size_t ix, const uint32_t componentCount, const ScalarType scalarType, const bool flipVectors) + static bool writeTypedViewText(std::string& output, const PreparedView& prepared, const size_t ix) { + if (!prepared.view) + return false; + const auto componentCount = prepared.componentCount; + const auto scalarType = prepared.scalarType; + const auto flipVectors = prepared.flipVectors; switch (scalarType) { case ScalarType::Float64: case ScalarType::Float32: { - hlsl::float64_t4 tmp = {}; - if (!decodeVec4(view, ix, tmp)) + std::array tmp = {}; + if (!prepared.semantic.decode(ix, tmp)) return false; for (uint32_t c = 0u; c < componentCount; ++c) { - double value = (&tmp.x)[c]; + double value = tmp[c]; if (flipVectors && c == 0u) value = -value; appendFloat(output, value); @@ -361,8 +352,8 @@ struct Parse case ScalarType::Int16: case ScalarType::Int32: { - int64_t tmp[4] = {}; - if (!decodeSigned4Raw(view, ix, tmp)) + std::array tmp = {}; + if (!prepared.stored.decode(ix, tmp)) return false; for (uint32_t c = 0u; c < componentCount; ++c) { @@ -378,8 +369,8 @@ struct Parse case ScalarType::UInt16: case ScalarType::UInt32: { - uint64_t tmp[4] = {}; - if (!decodeUnsigned4Raw(view, ix, tmp)) + std::array tmp = {}; + if (!prepared.stored.decode(ix, tmp)) return false; for (uint32_t c = 0u; c < componentCount; ++c) { @@ -431,16 +422,27 @@ struct Parse const auto& positionView = input.geom->getPositionView(); const auto& normalView = input.geom->getNormalView(); const auto& extraAuxViews = *input.extraAuxViews; + const PreparedView preparedPosition = PreparedView::create(positionView, 3u, input.positionScalarType, input.flipVectors); + const PreparedView preparedNormal = input.writeNormals ? PreparedView::create(normalView, 3u, input.normalScalarType, input.flipVectors) : PreparedView{}; + const PreparedView preparedUV = input.uvView ? PreparedView::create(*input.uvView, 2u, input.uvScalarType, false) : PreparedView{}; + core::vector preparedExtraAuxViews; + preparedExtraAuxViews.reserve(extraAuxViews.size()); + for (const auto& extra : extraAuxViews) + { + if (!extra.view) + return false; + preparedExtraAuxViews.push_back(PreparedView::create(*extra.view, extra.components, extra.scalarType, false)); + } for (size_t i = 0u; i < input.vertexCount; ++i) { - if (!writeTypedViewBinary(positionView, i, 3u, input.positionScalarType, input.flipVectors, dst)) + if (!writeTypedViewBinary(preparedPosition, i, dst)) return false; - if (input.writeNormals && !writeTypedViewBinary(normalView, i, 3u, input.normalScalarType, input.flipVectors, dst)) + if (input.writeNormals && !writeTypedViewBinary(preparedNormal, i, dst)) return false; - if (input.uvView && !writeTypedViewBinary(*input.uvView, i, 2u, input.uvScalarType, false, dst)) + if (input.uvView && !writeTypedViewBinary(preparedUV, i, dst)) return false; - for (const auto& extra : extraAuxViews) - if (!extra.view || !writeTypedViewBinary(*extra.view, i, extra.components, extra.scalarType, false, dst)) + for (const auto& extra : preparedExtraAuxViews) + if (!writeTypedViewBinary(extra, i, dst)) return false; } if (!input.indices) @@ -468,19 +470,28 @@ struct Parse { if (!input.geom || !input.extraAuxViews) return false; - const auto& positionView = input.geom->getPositionView(); - const auto& normalView = input.geom->getNormalView(); const auto& extraAuxViews = *input.extraAuxViews; + const PreparedView preparedPosition = PreparedView::create(input.geom->getPositionView(), 3u, input.positionScalarType, input.flipVectors); + const PreparedView preparedNormal = input.writeNormals ? PreparedView::create(input.geom->getNormalView(), 3u, input.normalScalarType, input.flipVectors) : PreparedView{}; + const PreparedView preparedUV = input.uvView ? PreparedView::create(*input.uvView, 2u, input.uvScalarType, false) : PreparedView{}; + core::vector preparedExtraAuxViews; + preparedExtraAuxViews.reserve(extraAuxViews.size()); + for (const auto& extra : extraAuxViews) + { + if (!extra.view) + return false; + preparedExtraAuxViews.push_back(PreparedView::create(*extra.view, extra.components, extra.scalarType, false)); + } for (size_t i = 0u; i < input.vertexCount; ++i) { - if (!writeTypedViewText(output, positionView, i, 3u, input.positionScalarType, input.flipVectors)) + if (!writeTypedViewText(output, preparedPosition, i)) return false; - if (input.writeNormals && !writeTypedViewText(output, normalView, i, 3u, input.normalScalarType, input.flipVectors)) + if (input.writeNormals && !writeTypedViewText(output, preparedNormal, i)) return false; - if (input.uvView && !writeTypedViewText(output, *input.uvView, i, 2u, input.uvScalarType, false)) + if (input.uvView && !writeTypedViewText(output, preparedUV, i)) return false; - for (const auto& extra : extraAuxViews) - if (!extra.view || !writeTypedViewText(output, *extra.view, i, extra.components, extra.scalarType, false)) + for (const auto& extra : preparedExtraAuxViews) + if (!writeTypedViewText(output, extra, i)) return false; output.push_back('\n'); } @@ -531,7 +542,7 @@ bool CPLYMeshWriter::writeAsset(system::IFile* _file, const SAssetWriteParams& _ const bool writeNormals = static_cast(normalView); if (writeNormals && normalView.getElementCount() != vertexCount) return _params.logger.log("PLY writer: normal vertex count mismatch.", system::ILogger::ELL_ERROR), false; - const ICPUPolygonGeometry::SDataView* uvView = SGeometryWriterCommon::getAuxViewAt(geom, Parse::UV0, vertexCount); + const ICPUPolygonGeometry::SDataView* uvView = SGeometryWriterCommon::getAuxViewAt(geom, SPLYPolygonGeometryAuxLayout::UV0, vertexCount); if (uvView && getFormatChannelCount(uvView->composed.format) != 2u) uvView = nullptr; core::vector extraAuxViews; @@ -540,7 +551,7 @@ bool CPLYMeshWriter::writeAsset(system::IFile* _file, const SAssetWriteParams& _ for (uint32_t auxIx = 0u; auxIx < static_cast(auxViews.size()); ++auxIx) { const auto& view = auxViews[auxIx]; - if (!view || (uvView && auxIx == Parse::UV0)) + if (!view || (uvView && auxIx == SPLYPolygonGeometryAuxLayout::UV0)) continue; if (view.getElementCount() != vertexCount) continue; diff --git a/src/nbl/asset/interchange/CSTLMeshFileLoader.cpp b/src/nbl/asset/interchange/CSTLMeshFileLoader.cpp index 66ed992575..da59154a00 100644 --- a/src/nbl/asset/interchange/CSTLMeshFileLoader.cpp +++ b/src/nbl/asset/interchange/CSTLMeshFileLoader.cpp @@ -10,6 +10,7 @@ #include "nbl/asset/format/convertColor.h" #include "nbl/asset/interchange/SGeometryContentHash.h" #include "nbl/asset/interchange/SGeometryLoaderCommon.h" +#include "nbl/asset/interchange/SSTLPolygonGeometryAuxLayout.h" #include "nbl/asset/interchange/SInterchangeIO.h" #include "nbl/asset/interchange/SLoaderRuntimeTuning.h" #include "nbl/asset/metadata/CSTLMetadata.h" @@ -25,7 +26,6 @@ namespace { struct Parse { - static constexpr uint32_t COLOR0 = 0u; using Common = impl::TextParse; struct LayoutProbe { bool hasPrefix = false; bool startsWithSolid = false; bool binaryBySize = false; uint32_t triangleCount = 0u; }; static hlsl::float32_t3 resolveStoredNormal(const hlsl::float32_t3& fileNormal) { const float fileLen2 = hlsl::dot(fileNormal, fileNormal); return (fileLen2 > 0.f && std::abs(fileLen2 - 1.f) < 1e-4f) ? fileNormal : SGeometryNormalCommon::normalizeOrZero(fileNormal); } @@ -494,8 +494,8 @@ SAssetBundle CSTLMeshFileLoader::loadAsset(system::IFile* _file, const IAssetLoa if (!colorView) return {}; auto* const auxViews = geometry->getAuxAttributeViews(); - auxViews->resize(Parse::COLOR0 + 1u); - auxViews->operator[](Parse::COLOR0) = + auxViews->resize(SSTLPolygonGeometryAuxLayout::COLOR0 + 1u); + auxViews->operator[](SSTLPolygonGeometryAuxLayout::COLOR0) = std::move(colorView); hasTriangleColors = true; } diff --git a/src/nbl/asset/interchange/CSTLMeshWriter.cpp b/src/nbl/asset/interchange/CSTLMeshWriter.cpp index 37c7ed6b82..8e8fde722b 100644 --- a/src/nbl/asset/interchange/CSTLMeshWriter.cpp +++ b/src/nbl/asset/interchange/CSTLMeshWriter.cpp @@ -7,6 +7,7 @@ #include "CSTLMeshWriter.h" #include "impl/SFileAccess.h" #include "nbl/asset/format/convertColor.h" +#include "nbl/asset/interchange/SSTLPolygonGeometryAuxLayout.h" #include "nbl/asset/interchange/SGeometryWriterCommon.h" #include "nbl/asset/interchange/SInterchangeIO.h" #include @@ -23,7 +24,6 @@ namespace { struct Parse { - static constexpr uint32_t COLOR0 = 0u; struct Context { IAssetWriter::SAssetWriteContext writeContext; @@ -228,7 +228,7 @@ struct Parse } static const ICPUPolygonGeometry::SDataView* getColorView(const ICPUPolygonGeometry* geom, const size_t vertexCount) { - const auto* view = SGeometryWriterCommon::getAuxViewAt(geom, Parse::COLOR0, vertexCount); + const auto* view = SGeometryWriterCommon::getAuxViewAt(geom, SSTLPolygonGeometryAuxLayout::COLOR0, vertexCount); return view && getFormatChannelCount(view->composed.format) >= 3u ? view : nullptr; } static bool decodeColorB8G8R8A8(const ICPUPolygonGeometry::SDataView& colorView, const uint32_t ix, uint32_t& outColor) diff --git a/src/nbl/asset/interchange/impl/STextParse.h b/src/nbl/asset/interchange/impl/STextParse.h index ccfeaa2c4a..371d730f82 100644 --- a/src/nbl/asset/interchange/impl/STextParse.h +++ b/src/nbl/asset/interchange/impl/STextParse.h @@ -15,6 +15,28 @@ namespace nbl::asset::impl //! Text token and numeric parsing helpers shared by interchange text formats. struct TextParse { + struct LineCursor + { + const char* cursor = nullptr; + const char* end = nullptr; + inline std::optional readLine() + { + if (!cursor || cursor >= end) + return std::nullopt; + const char* lineEnd = cursor; + while (lineEnd < end && *lineEnd != '\0' && *lineEnd != '\r' && *lineEnd != '\n') + ++lineEnd; + const std::string_view line(cursor, static_cast(lineEnd - cursor)); + if (lineEnd < end && *lineEnd == '\r') + ++lineEnd; + if (lineEnd < end && *lineEnd == '\n') + ++lineEnd; + else if (lineEnd < end && *lineEnd == '\0') + ++lineEnd; + cursor = lineEnd; + return line; + } + }; static inline bool isDigit(const char c) { return c >= '0' && c <= '9'; } //! Parses one arithmetic token and advances `ptr` on success. template @@ -169,6 +191,14 @@ struct TextParse const std::string_view token(cursor, static_cast(tokenEnd - cursor)); return cursor = tokenEnd, token; } + //! Reads one line view from a contiguous text buffer and advances `cursor`. + static inline std::optional readLine(const char*& cursor, const char* const end) + { + LineCursor lineCursor = {.cursor = cursor, .end = end}; + auto line = lineCursor.readLine(); + cursor = lineCursor.cursor; + return line; + } }; } #endif From bd84fe318c0bafd7e3de802fd598dad652ec640d Mon Sep 17 00:00:00 2001 From: Arkadiusz Lachowicz Date: Mon, 9 Mar 2026 17:21:48 +0100 Subject: [PATCH 093/118] Commonize loader view helpers --- .../asset/interchange/SGeometryLoaderCommon.h | 68 +++++++++++++++++++ .../asset/interchange/COBJMeshFileLoader.cpp | 2 +- .../asset/interchange/CPLYMeshFileLoader.cpp | 46 +++---------- .../asset/interchange/CSTLMeshFileLoader.cpp | 3 +- 4 files changed, 80 insertions(+), 39 deletions(-) diff --git a/include/nbl/asset/interchange/SGeometryLoaderCommon.h b/include/nbl/asset/interchange/SGeometryLoaderCommon.h index 27434b01e9..dd2e1cf72e 100644 --- a/include/nbl/asset/interchange/SGeometryLoaderCommon.h +++ b/include/nbl/asset/interchange/SGeometryLoaderCommon.h @@ -43,6 +43,74 @@ class SGeometryLoaderCommon for (uint32_t c = 0u; c < componentCount; ++c) onComponent(getTexelOrBlockBytesize(componentFormat) * c, view.stride, componentFormat); } + //! Creates one owned data view with storage sized for `elementCount` items in `format`. + static inline IGeometry::SDataView createOwnedView(const E_FORMAT format, const size_t elementCount) + { + if (format == EF_UNKNOWN || elementCount == 0ull) + return {}; + const auto stride = getTexelOrBlockBytesize(format); + auto buffer = ICPUBuffer::create({stride * elementCount}); + return buffer ? createDataView(std::move(buffer), stride * elementCount, stride, format) : IGeometry::SDataView{}; + } + //! Finalizes one structured base view, calls `onComponent`, and allocates the resulting owned data view. + template + static inline IGeometry::SDataView createStructuredView(IGeometry::SDataViewBase& view, const size_t elementCount, Fn&& onComponent) + { + if (view.format == EF_UNKNOWN) + return {}; + finalizeStructuredBaseView(view, std::forward(onComponent)); + return createOwnedView(view.format, elementCount); + } + //! Finalizes one structured view, appends per-component iterator bindings, rebases them against the allocated buffer, and passes the created view to `setter`. + template + static inline void attachStructuredView(IGeometry::SDataViewBase& baseView, const size_t elementCount, IteratorContainer& iterators, PushComponent&& pushComponent, RebaseComponent&& rebaseComponent, Setter&& setter) + { + auto beginIx = iterators.size(); + auto view = createStructuredView(baseView, elementCount, [&](const size_t offset, const uint32_t stride, const E_FORMAT componentFormat) -> void { pushComponent(iterators, offset, stride, componentFormat); }); + if (!view) + return; + const auto basePtr = ptrdiff_t(view.src.buffer->getPointer()) + view.src.offset; + for (const auto endIx = iterators.size(); beginIx != endIx; ++beginIx) + rebaseComponent(iterators[beginIx], basePtr); + setter(std::move(view)); + } + //! Visits position, normal, and auxiliary attribute views for one polygon geometry. + template + static inline void visitVertexAttributeViews(const ICPUPolygonGeometry* geometry, Visitor&& visitor) + { + if (!geometry) + return; + visitor(geometry->getPositionView()); + visitor(geometry->getNormalView()); + for (const auto& view : geometry->getAuxAttributeViews()) + visitor(view); + } + //! Visits all views owned by one polygon geometry, including index and skeletal data. + template + static inline void visitGeometryViews(const ICPUPolygonGeometry* geometry, Visitor&& visitor) + { + if (!geometry) + return; + visitVertexAttributeViews(geometry, visitor); + visitor(geometry->getIndexView()); + for (const auto& view : geometry->getJointWeightViews()) + { + visitor(view.indices); + visitor(view.weights); + } + if (const auto jointObb = geometry->getJointOBBView(); jointObb) + visitor(*jointObb); + } + //! Stores one auxiliary view at `slot`, resizing the aux array as needed. + static inline void setAuxViewAt(ICPUPolygonGeometry* geometry, const uint32_t slot, IGeometry::SDataView&& view) + { + if (!geometry || !view) + return; + auto* const auxViews = geometry->getAuxAttributeViews(); + if (auxViews->size() <= slot) + auxViews->resize(slot + 1u); + (*auxViews)[slot] = std::move(view); + } //! Adopts contiguous caller-owned storage into a CPU buffer and exposes it as a formatted data view. template diff --git a/src/nbl/asset/interchange/COBJMeshFileLoader.cpp b/src/nbl/asset/interchange/COBJMeshFileLoader.cpp index e95467f67b..5b463729cf 100644 --- a/src/nbl/asset/interchange/COBJMeshFileLoader.cpp +++ b/src/nbl/asset/interchange/COBJMeshFileLoader.cpp @@ -452,7 +452,7 @@ asset::SAssetBundle COBJMeshFileLoader::loadAsset( return false; auto* const auxViews = geometry->getAuxAttributeViews(); auxViews->resize(SOBJPolygonGeometryAuxLayout::UV0 + 1u); - auxViews->operator[](SOBJPolygonGeometryAuxLayout::UV0) = std::move(view); + (*auxViews)[SOBJPolygonGeometryAuxLayout::UV0] = std::move(view); } if (!indices.empty()) { geometry->setIndexing(IPolygonGeometryBase::TriangleList()); diff --git a/src/nbl/asset/interchange/CPLYMeshFileLoader.cpp b/src/nbl/asset/interchange/CPLYMeshFileLoader.cpp index 61ebdfb6b2..3e009207a6 100644 --- a/src/nbl/asset/interchange/CPLYMeshFileLoader.cpp +++ b/src/nbl/asset/interchange/CPLYMeshFileLoader.cpp @@ -1488,22 +1488,6 @@ SAssetBundle CPLYMeshFileLoader::loadAsset( uint32_t vertCount = 0; Parse::ContentHashBuild contentHashBuild = Parse::ContentHashBuild::create(computeContentHashes, hashInBuild); double headerMs = 0.0, vertexMs = 0.0, faceMs = 0.0, finalizeMs = 0.0; - auto visitVertexAttributeViews = [&](auto&& visitor) -> void { - visitor(geometry->getPositionView()); - visitor(geometry->getNormalView()); - for (const auto& view : *geometry->getAuxAttributeViews()) - visitor(view); - }; - auto visitGeometryViews = [&](auto&& visitor) -> void { - visitVertexAttributeViews(visitor); - visitor(geometry->getIndexView()); - for (const auto& view : *geometry->getJointWeightViews()) { - visitor(view.indices); - visitor(view.weights); - } - if (const auto jointObb = geometry->getJointOBBView(); jointObb) - visitor(*jointObb); - }; auto hashViewBufferIfNeeded = [&](const IGeometry::SDataView& view) -> void { if (!view || !view.src.buffer) return; @@ -1511,7 +1495,7 @@ SAssetBundle CPLYMeshFileLoader::loadAsset( }; auto hashRemainingGeometryBuffers = [&]() -> void { if (contentHashBuild.hashesInline()) - visitGeometryViews(hashViewBufferIfNeeded); + SGeometryLoaderCommon::visitGeometryViews(geometry.get(), hashViewBufferIfNeeded); }; auto tryLaunchDeferredHash = [&](const IGeometry::SDataView& view) -> void { if (!view || !view.src.buffer) @@ -1721,25 +1705,15 @@ SAssetBundle CPLYMeshFileLoader::loadAsset( extraViewNames.push_back(propertyName); } } - auto attachStructuredView = [&](ICPUPolygonGeometry::SDataViewBase& baseView, auto&& setter) -> void { - if (baseView.format == EF_UNKNOWN) - return; - auto beginIx = ctx.vertAttrIts.size(); - SGeometryLoaderCommon::finalizeStructuredBaseView(baseView, [&](const size_t offset, const uint32_t stride, const E_FORMAT componentFormat) -> void { - ctx.vertAttrIts.push_back({.ptr = reinterpret_cast(offset), .stride = stride, .dstFmt = componentFormat}); - }); - auto view = createView(baseView.format, el.Count); - for (const auto size = ctx.vertAttrIts.size(); beginIx != size; ++beginIx) - ctx.vertAttrIts[beginIx].ptr += ptrdiff_t(view.src.buffer->getPointer()) + view.src.offset; - setter(std::move(view)); + auto pushStructuredAttr = [](auto& iterators, const size_t offset, const uint32_t stride, const E_FORMAT componentFormat) -> void { + iterators.push_back({.ptr = reinterpret_cast(offset), .stride = stride, .dstFmt = componentFormat}); + }; + auto rebaseStructuredAttr = [](auto& iter, const ptrdiff_t basePtr) -> void { + iter.ptr += basePtr; }; - attachStructuredView(posView, [&](auto view) { geometry->setPositionView(std::move(view)); }); - attachStructuredView(normalView, [&](auto view) { geometry->setNormalView(std::move(view)); }); - attachStructuredView(uvView, [&](auto view) { - auto* const auxViews = geometry->getAuxAttributeViews(); - auxViews->resize(SPLYPolygonGeometryAuxLayout::UV0 + 1u); - auxViews->operator[](SPLYPolygonGeometryAuxLayout::UV0) = std::move(view); - }); + SGeometryLoaderCommon::attachStructuredView(posView, el.Count, ctx.vertAttrIts, pushStructuredAttr, rebaseStructuredAttr, [&](auto view) { geometry->setPositionView(std::move(view)); }); + SGeometryLoaderCommon::attachStructuredView(normalView, el.Count, ctx.vertAttrIts, pushStructuredAttr, rebaseStructuredAttr, [&](auto view) { geometry->setNormalView(std::move(view)); }); + SGeometryLoaderCommon::attachStructuredView(uvView, el.Count, ctx.vertAttrIts, pushStructuredAttr, rebaseStructuredAttr, [&](auto view) { SGeometryLoaderCommon::setAuxViewAt(geometry.get(), SPLYPolygonGeometryAuxLayout::UV0, std::move(view)); }); core::vector auxAttributeNames; const size_t extraNameOffset = geometry->getAuxAttributeViews()->size(); for (auto& view : extraViews) @@ -1765,7 +1739,7 @@ SAssetBundle CPLYMeshFileLoader::loadAsset( logMalformedElement("vertex"); return {}; } - visitVertexAttributeViews(hashViewBufferIfNeeded); + SGeometryLoaderCommon::visitVertexAttributeViews(geometry.get(), hashViewBufferIfNeeded); tryLaunchDeferredHash(geometry->getPositionView()); verticesProcessed = true; if (!auxAttributeNames.empty()) diff --git a/src/nbl/asset/interchange/CSTLMeshFileLoader.cpp b/src/nbl/asset/interchange/CSTLMeshFileLoader.cpp index da59154a00..a92b86f839 100644 --- a/src/nbl/asset/interchange/CSTLMeshFileLoader.cpp +++ b/src/nbl/asset/interchange/CSTLMeshFileLoader.cpp @@ -495,8 +495,7 @@ SAssetBundle CSTLMeshFileLoader::loadAsset(system::IFile* _file, const IAssetLoa return {}; auto* const auxViews = geometry->getAuxAttributeViews(); auxViews->resize(SSTLPolygonGeometryAuxLayout::COLOR0 + 1u); - auxViews->operator[](SSTLPolygonGeometryAuxLayout::COLOR0) = - std::move(colorView); + (*auxViews)[SSTLPolygonGeometryAuxLayout::COLOR0] = std::move(colorView); hasTriangleColors = true; } } else { From 0b8265589774e3a09b60ff21c43949bc30d7e245 Mon Sep 17 00:00:00 2001 From: Arkadiusz Lachowicz Date: Mon, 9 Mar 2026 17:49:52 +0100 Subject: [PATCH 094/118] Clarify STL writer color decoding --- src/nbl/asset/interchange/CSTLMeshWriter.cpp | 31 +++++++++++++------- 1 file changed, 20 insertions(+), 11 deletions(-) diff --git a/src/nbl/asset/interchange/CSTLMeshWriter.cpp b/src/nbl/asset/interchange/CSTLMeshWriter.cpp index 8e8fde722b..543106d502 100644 --- a/src/nbl/asset/interchange/CSTLMeshWriter.cpp +++ b/src/nbl/asset/interchange/CSTLMeshWriter.cpp @@ -218,40 +218,49 @@ struct Parse value /= 255.0; return std::clamp(value, 0.0, 1.0); } + struct PackedColor + { + uint32_t value = 0u; + E_FORMAT format = EF_B8G8R8A8_UNORM; + }; static uint16_t packViscamColorFromB8G8R8A8(const uint32_t color) { const void* src[4] = {&color, nullptr, nullptr, nullptr}; uint16_t packed = 0u; convertColor(src, &packed, 0u, 0u); - packed |= 0x8000u; - return packed; + return packed | 0x8000u; } static const ICPUPolygonGeometry::SDataView* getColorView(const ICPUPolygonGeometry* geom, const size_t vertexCount) { const auto* view = SGeometryWriterCommon::getAuxViewAt(geom, SSTLPolygonGeometryAuxLayout::COLOR0, vertexCount); return view && getFormatChannelCount(view->composed.format) >= 3u ? view : nullptr; } - static bool decodeColorB8G8R8A8(const ICPUPolygonGeometry::SDataView& colorView, const uint32_t ix, uint32_t& outColor) + static bool decodeColorB8G8R8A8(const ICPUPolygonGeometry::SDataView& colorView, const uint32_t ix, PackedColor& outColor) { - if (colorView.composed.format == EF_B8G8R8A8_UNORM && colorView.composed.getStride() == sizeof(uint32_t)) + if ((colorView.composed.format == EF_B8G8R8A8_UNORM || colorView.composed.format == EF_B8G8R8A8_SRGB) && colorView.composed.getStride() == sizeof(uint32_t)) { const auto* const ptr = reinterpret_cast(colorView.getPointer()); if (!ptr) return false; - std::memcpy(&outColor, ptr + static_cast(ix) * sizeof(uint32_t), sizeof(outColor)); + std::memcpy(&outColor.value, ptr + static_cast(ix) * sizeof(uint32_t), sizeof(outColor.value)); + outColor.format = colorView.composed.format; return true; } hlsl::float32_t4 decoded = {}; if (!colorView.decodeElement(ix, decoded)) return false; - const double rgbaUnit[4] = {normalizeColorComponentToUnit(decoded.x), normalizeColorComponentToUnit(decoded.y), normalizeColorComponentToUnit(decoded.z), normalizeColorComponentToUnit(decoded.w)}; - encodePixels(&outColor, rgbaUnit); + const double rgbaUnit[4] = {normalizeColorComponentToUnit(decoded.x), normalizeColorComponentToUnit(decoded.y), normalizeColorComponentToUnit(decoded.z), getFormatChannelCount(colorView.composed.format) >= 4u ? normalizeColorComponentToUnit(decoded.w) : 1.0}; + encodePixels(&outColor.value, rgbaUnit); + outColor.format = EF_B8G8R8A8_UNORM; return true; } - static void decodeColorUnitRGBAFromB8G8R8A8(const uint32_t color, double* out) + static void decodeColorUnitRGBAFromB8G8R8A8(const PackedColor& color, double* const outRGBA) { - const void* src[4] = {&color, nullptr, nullptr, nullptr}; - decodePixels(src, out, 0u, 0u); + const void* src[4] = {&color.value, nullptr, nullptr, nullptr}; + if (color.format == EF_B8G8R8A8_SRGB) + decodePixels(src, outRGBA, 0u, 0u); + else + decodePixels(src, outRGBA, 0u, 0u); } static bool writeMeshBinary(const asset::ICPUPolygonGeometry* geom, Context* context) { @@ -296,7 +305,7 @@ struct Parse std::array rgbaAvg = {}; for (uint32_t corner = 0u; corner < vertexIx.size(); ++corner) { - uint32_t color = 0u; + PackedColor color = {}; if (!decodeColorB8G8R8A8(*colorView, vertexIx[corner], color)) return false; std::array rgba = {}; From 833f14469c2df7e8b58fe0ab64bdf335ac908980 Mon Sep 17 00:00:00 2001 From: Arkadiusz Lachowicz Date: Mon, 9 Mar 2026 20:27:52 +0100 Subject: [PATCH 095/118] Pipeline chunked interchange IO --- include/nbl/asset/interchange/SFileIOPolicy.h | 17 ++- .../nbl/asset/interchange/SInterchangeIO.h | 110 +++++++++++++++--- include/nbl/system/IFile.h | 5 + src/nbl/system/CFilePOSIX.cpp | 10 +- src/nbl/system/CFileWin32.cpp | 16 +-- 5 files changed, 130 insertions(+), 28 deletions(-) diff --git a/include/nbl/asset/interchange/SFileIOPolicy.h b/include/nbl/asset/interchange/SFileIOPolicy.h index 372539cb7d..108f35addc 100644 --- a/include/nbl/asset/interchange/SFileIOPolicy.h +++ b/include/nbl/asset/interchange/SFileIOPolicy.h @@ -10,6 +10,7 @@ #include #include #include +#include namespace nbl::asset { //! Requested IO strategy selected before file size and mapping constraints are resolved. @@ -54,6 +55,7 @@ struct SFileIOPolicy uint64_t tinyIoAvgBytesThreshold = 1024ull; //!< Average operation size threshold for tiny-IO anomaly detection. uint64_t tinyIoMinBytesThreshold = 64ull; //!< Minimum operation size threshold for tiny-IO anomaly detection. uint64_t tinyIoMinCallCount = 1024ull; //!< Minimum operation count required to report tiny-IO anomaly. + uint8_t chunkedInFlightDepth = 0u; //!< Chunked IO requests allowed in flight. 0 means auto, 1 disables pipelining. }; using Strategy = EFileIOStrategy; @@ -109,6 +111,7 @@ struct SResolvedFileIOPolicy //! Effective chunk size encoded as log2(bytes). Also set for whole-file for telemetry consistency. uint8_t chunkSizeLog2 = SFileIOPolicy::MIN_CHUNK_SIZE_LOG2; + uint8_t chunkedInFlightDepth = 1u; //!< Resolved chunked in-flight depth. Non-chunked strategies always keep this at 1. const char* reason = "invalid"; //!< Resolver reason string used in logs and diagnostics. @@ -122,7 +125,19 @@ struct SResolvedFileIOPolicy const uint8_t chunkSizeLog2 = std::min(SFileIOPolicy::clampBytesLog2(policy.chunkSizeLog2, SFileIOPolicy::MIN_CHUNK_SIZE_LOG2), maxStagingLog2); const uint64_t maxStaging = SFileIOPolicy::bytesFromLog2(maxStagingLog2, SFileIOPolicy::MIN_CHUNK_SIZE_LOG2); const uint64_t wholeThreshold = policy.wholeFileThresholdBytes(); - auto makeResolved = [&](const Strategy strategy, const char* const reason) -> SResolvedFileIOPolicy { SResolvedFileIOPolicy resolved = {}; resolved.strategy = strategy; resolved.chunkSizeLog2 = chunkSizeLog2; resolved.reason = reason; return resolved; }; + const uint64_t chunkSizeBytes = SFileIOPolicy::bytesFromLog2(chunkSizeLog2, SFileIOPolicy::MIN_CHUNK_SIZE_LOG2); + const uint64_t chunkCount = chunkSizeBytes ? std::max(1ull, (byteCount + chunkSizeBytes - 1ull) / chunkSizeBytes) : 1ull; + auto resolveChunkedInFlightDepth = [&](const Strategy strategy) -> uint8_t + { + if (strategy != Strategy::Chunked || chunkCount <= 1ull) + return 1u; + if (policy.runtimeTuning.chunkedInFlightDepth > 0u) + return static_cast(std::min(policy.runtimeTuning.chunkedInFlightDepth, chunkCount)); + const uint32_t hardwareThreads = policy.runtimeTuning.maxWorkers ? policy.runtimeTuning.maxWorkers : std::thread::hardware_concurrency(); + const uint32_t usableThreads = hardwareThreads > policy.runtimeTuning.workerHeadroom ? (hardwareThreads - policy.runtimeTuning.workerHeadroom) : 1u; + return static_cast(std::clamp(usableThreads, 1ull, std::min(chunkCount, std::numeric_limits::max()))); + }; + auto makeResolved = [&](const Strategy strategy, const char* const reason) -> SResolvedFileIOPolicy { SResolvedFileIOPolicy resolved = {}; resolved.strategy = strategy; resolved.chunkSizeLog2 = chunkSizeLog2; resolved.chunkedInFlightDepth = resolveChunkedInFlightDepth(strategy); resolved.reason = reason; return resolved; }; switch (policy.strategy) { case SFileIOPolicy::Strategy::Invalid: diff --git a/include/nbl/asset/interchange/SInterchangeIO.h b/include/nbl/asset/interchange/SInterchangeIO.h index c95bc88608..953e3142d5 100644 --- a/include/nbl/asset/interchange/SInterchangeIO.h +++ b/include/nbl/asset/interchange/SInterchangeIO.h @@ -11,6 +11,8 @@ #include #include #include +#include +#include #include namespace nbl::asset { @@ -85,21 +87,41 @@ class SInterchangeIO case SResolvedFileIOPolicy::Strategy::Chunked: default: { - size_t bytesRead = 0ull; + const size_t inFlightDepth = ioPlan.chunkedInFlightDepth; + auto inFlight = std::make_unique(inFlightDepth); + size_t submitOffset = 0ull; + size_t activeCount = 0ull; + size_t submitIndex = 0ull; + size_t drainIndex = 0ull; const uint64_t chunkSizeBytes = ioPlan.chunkSizeBytes(); - while (bytesRead < bytes) + auto submitChunk = [&]() -> bool { + if (submitOffset >= bytes || activeCount >= inFlightDepth) + return false; + auto& request = inFlight[submitIndex]; + const size_t toRead = static_cast(std::min(chunkSizeBytes, bytes - submitOffset)); + request.success.emplace(); + file->read(*request.success, out + submitOffset, offset + submitOffset, toRead); + request.bytes = toRead; + request.active = true; + submitOffset += toRead; + submitIndex = (submitIndex + 1ull) % inFlightDepth; + ++activeCount; + return true; + }; + auto drainChunk = [&]() -> bool { + auto& request = inFlight[drainIndex]; + if (!request.active) + return false; + const bool ok = drainChunkedRequest(request, ioTelemetry); + drainIndex = (drainIndex + 1ull) % inFlightDepth; + --activeCount; + return ok; + }; + while (submitOffset < bytes || activeCount) { - const size_t toRead = static_cast(std::min(chunkSizeBytes, bytes - bytesRead)); - system::IFile::success_t success; - file->read(success, out + bytesRead, offset + bytesRead, toRead); - if (!success) + while (submitChunk()) {} + if (activeCount && !drainChunk()) return finalize(false); - const size_t processed = success.getBytesProcessed(); - if (processed == 0ull) - return finalize(false); - if (ioTelemetry) - ioTelemetry->account(processed); - bytesRead += processed; } return finalize(true); } @@ -123,11 +145,11 @@ class SInterchangeIO continue; const auto* data = reinterpret_cast(buffer.data); size_t writtenTotal = 0ull; - while (writtenTotal < buffer.byteCount) + if (ioPlan.strategy == SResolvedFileIOPolicy::Strategy::WholeFile) { - const size_t toWrite = ioPlan.strategy == SResolvedFileIOPolicy::Strategy::WholeFile ? (buffer.byteCount - writtenTotal) : static_cast(std::min(chunkSizeBytes, buffer.byteCount - writtenTotal)); + const size_t toWrite = buffer.byteCount; system::IFile::success_t success; - file->write(success, data + writtenTotal, fileOffset + writtenTotal, toWrite); + file->write(success, data, fileOffset, toWrite); if (!success) return false; const size_t written = success.getBytesProcessed(); @@ -137,6 +159,46 @@ class SInterchangeIO ioTelemetry->account(written); writtenTotal += written; } + else + { + const size_t inFlightDepth = ioPlan.chunkedInFlightDepth; + auto inFlight = std::make_unique(inFlightDepth); + size_t submitOffset = 0ull; + size_t activeCount = 0ull; + size_t submitIndex = 0ull; + size_t drainIndex = 0ull; + auto submitChunk = [&]() -> bool { + if (submitOffset >= buffer.byteCount || activeCount >= inFlightDepth) + return false; + auto& request = inFlight[submitIndex]; + const size_t toWrite = static_cast(std::min(chunkSizeBytes, buffer.byteCount - submitOffset)); + request.success.emplace(); + file->write(*request.success, data + submitOffset, fileOffset + submitOffset, toWrite); + request.bytes = toWrite; + request.active = true; + submitOffset += toWrite; + submitIndex = (submitIndex + 1ull) % inFlightDepth; + ++activeCount; + return true; + }; + auto drainChunk = [&]() -> bool { + auto& request = inFlight[drainIndex]; + if (!request.active) + return false; + const bool ok = drainChunkedRequest(request, ioTelemetry); + if (ok) + writtenTotal += request.bytes; + drainIndex = (drainIndex + 1ull) % inFlightDepth; + --activeCount; + return ok; + }; + while (submitOffset < buffer.byteCount || activeCount) + { + while (submitChunk()) {} + if (activeCount && !drainChunk()) + return false; + } + } fileOffset += writtenTotal; } return true; @@ -147,6 +209,24 @@ class SInterchangeIO static inline bool writeFileWithPolicyAtOffset(system::IFile* file, const SResolvedFileIOPolicy& ioPlan, const void* data, size_t byteCount, size_t& fileOffset, SWriteTelemetry* ioTelemetry = nullptr) { const SBufferRange buffers[] = {{.data = data, .byteCount = byteCount}}; return writeBuffersWithPolicyAtOffset(file, ioPlan, buffers, fileOffset, ioTelemetry); } //! Single-buffer convenience wrapper over `writeBuffersWithPolicy`. static inline bool writeFileWithPolicy(system::IFile* file, const SResolvedFileIOPolicy& ioPlan, const void* data, size_t byteCount, SWriteTelemetry* ioTelemetry = nullptr) { const SBufferRange buffers[] = {{.data = data, .byteCount = byteCount}}; return writeBuffersWithPolicy(file, ioPlan, buffers, ioTelemetry); } + private: + struct SChunkedRequest + { + std::optional success = std::nullopt; + size_t bytes = 0ull; + bool active = false; + }; + static inline bool drainChunkedRequest(SChunkedRequest& request, STelemetry* ioTelemetry) + { + const size_t processed = request.success ? request.success->getBytesProcessed():0ull; + request.success.reset(); + request.active = false; + if (processed != request.bytes || processed == 0ull) + return false; + if (ioTelemetry) + ioTelemetry->account(processed); + return true; + } }; using SFileIOTelemetry = SInterchangeIO::STelemetry; using SFileReadTelemetry = SInterchangeIO::SReadTelemetry; diff --git a/include/nbl/system/IFile.h b/include/nbl/system/IFile.h index 0ab739ba4a..f2c615c311 100644 --- a/include/nbl/system/IFile.h +++ b/include/nbl/system/IFile.h @@ -81,11 +81,16 @@ class IFile : public IFileBase, private ISystem::IFutureManipulator }; void read(success_t& fut, void* buffer, size_t offset, size_t sizeToRead) { + // The higher-level IO helpers may queue multiple chunked operations before waiting on the futures. + // Backends therefore need to treat `offset` as the request-local byte position rather than relying on + // a mutable shared file pointer hidden inside the OS file handle. read(fut.m_internalFuture,buffer,offset,sizeToRead); fut.sizeToProcess = sizeToRead; } void write(success_t& fut, const void* buffer, size_t offset, size_t sizeToWrite) { + // Same requirement as `read(...)`: writes are logically positional requests and must honor the explicit + // byte offset even when multiple operations are submitted before the caller drains their futures. write(fut.m_internalFuture,buffer,offset,sizeToWrite); fut.sizeToProcess = sizeToWrite; } diff --git a/src/nbl/system/CFilePOSIX.cpp b/src/nbl/system/CFilePOSIX.cpp index 1f78d5befa..2eb9e62ed2 100644 --- a/src/nbl/system/CFilePOSIX.cpp +++ b/src/nbl/system/CFilePOSIX.cpp @@ -28,13 +28,13 @@ CFilePOSIX::~CFilePOSIX() size_t CFilePOSIX::asyncRead(void* buffer, size_t offset, size_t sizeToRead) { - lseek(m_native, offset, SEEK_SET); - return ::read(m_native, buffer, sizeToRead); + const auto processed = pread(m_native, buffer, sizeToRead, static_cast(offset)); + return processed > 0 ? static_cast(processed):0ull; } size_t CFilePOSIX::asyncWrite(const void* buffer, size_t offset, size_t sizeToWrite) { - lseek(m_native, offset, SEEK_SET); - return ::write(m_native, buffer, sizeToWrite); + const auto processed = pwrite(m_native, buffer, sizeToWrite, static_cast(offset)); + return processed > 0 ? static_cast(processed):0ull; } -#endif \ No newline at end of file +#endif diff --git a/src/nbl/system/CFileWin32.cpp b/src/nbl/system/CFileWin32.cpp index ae888e0d9b..ffe9d9c6b0 100644 --- a/src/nbl/system/CFileWin32.cpp +++ b/src/nbl/system/CFileWin32.cpp @@ -57,17 +57,19 @@ inline size_t CFileWin32::getSize() const size_t CFileWin32::asyncRead(void* buffer, size_t offset, size_t sizeToRead) { - seek(offset); + OVERLAPPED overlapped = {}; + overlapped.Offset = LODWORD(offset); + overlapped.OffsetHigh = HIDWORD(offset); DWORD numOfBytesRead; - ReadFile(m_native, buffer, sizeToRead, &numOfBytesRead, nullptr); - return numOfBytesRead; + return ReadFile(m_native, buffer, sizeToRead, &numOfBytesRead, &overlapped) ? numOfBytesRead:0ull; } size_t CFileWin32::asyncWrite(const void* buffer, size_t offset, size_t sizeToWrite) { - seek(offset); + OVERLAPPED overlapped = {}; + overlapped.Offset = LODWORD(offset); + overlapped.OffsetHigh = HIDWORD(offset); DWORD numOfBytesWritten; - WriteFile(m_native, buffer, sizeToWrite, &numOfBytesWritten, nullptr); - return numOfBytesWritten; + return WriteFile(m_native, buffer, sizeToWrite, &numOfBytesWritten, &overlapped) ? numOfBytesWritten:0ull; } @@ -76,4 +78,4 @@ void CFileWin32::seek(size_t position) LONG hiDword = HIDWORD(position); SetFilePointer(m_native,position,&hiDword,FILE_BEGIN); } -#endif \ No newline at end of file +#endif From 9bccfeae48c1edfc10e8da3f2ce1eb6158962f88 Mon Sep 17 00:00:00 2001 From: Arkadiusz Lachowicz Date: Mon, 9 Mar 2026 20:54:36 +0100 Subject: [PATCH 096/118] Tighten metadata access and document float fast path --- include/nbl/asset/metadata/CPLYMetadata.h | 2 +- src/nbl/asset/interchange/impl/STextParse.h | 3 +++ 2 files changed, 4 insertions(+), 1 deletion(-) diff --git a/include/nbl/asset/metadata/CPLYMetadata.h b/include/nbl/asset/metadata/CPLYMetadata.h index d9afd4c3fe..ec7112bd0a 100644 --- a/include/nbl/asset/metadata/CPLYMetadata.h +++ b/include/nbl/asset/metadata/CPLYMetadata.h @@ -41,7 +41,7 @@ class CPLYMetadata final : public IAssetMetadata friend class CPLYMeshFileLoader; inline void placeMeta(const uint32_t offset, const ICPUPolygonGeometry* geometry, core::vector&& auxAttributeNames) { - auto& meta = m_geometryMetaStorage->operator[](offset); + auto& meta = (*m_geometryMetaStorage)[offset]; meta = CPolygonGeometry{}; meta.m_auxAttributeNames = std::move(auxAttributeNames); IAssetMetadata::insertAssetSpecificMetadata(geometry, &meta); diff --git a/src/nbl/asset/interchange/impl/STextParse.h b/src/nbl/asset/interchange/impl/STextParse.h index 371d730f82..387833dfa0 100644 --- a/src/nbl/asset/interchange/impl/STextParse.h +++ b/src/nbl/asset/interchange/impl/STextParse.h @@ -57,6 +57,9 @@ struct TextParse if (p >= end) return false; } + // Fast path for the plain decimal tokens dominating large text interchange datasets. + // It exists because routing every float through fast_float regresses benchmarked load times noticeably. + // This is not a standalone general-purpose parser: exponent or otherwise non-trivial spellings still fall back to fast_float. if (*p != '.' && isDigit(*p)) { uint64_t integerPart = 0ull; From 42258531c8bace66f4115fa94df6aa222f24ce46 Mon Sep 17 00:00:00 2001 From: Arkadiusz Lachowicz Date: Mon, 9 Mar 2026 21:01:14 +0100 Subject: [PATCH 097/118] Clarify shared decimal fast path --- src/nbl/asset/interchange/impl/STextParse.h | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/src/nbl/asset/interchange/impl/STextParse.h b/src/nbl/asset/interchange/impl/STextParse.h index 387833dfa0..ac4ed2d9b2 100644 --- a/src/nbl/asset/interchange/impl/STextParse.h +++ b/src/nbl/asset/interchange/impl/STextParse.h @@ -57,9 +57,10 @@ struct TextParse if (p >= end) return false; } - // Fast path for the plain decimal tokens dominating large text interchange datasets. - // It exists because routing every float through fast_float regresses benchmarked load times noticeably. - // This is not a standalone general-purpose parser: exponent or otherwise non-trivial spellings still fall back to fast_float. + // Fast path for the common plain-decimal subset: optional sign, digits, and an optional fractional part, but no exponent. + // This follows the same broad idea as RapidJSON's StrtodFast: cheaply handle the dominant simple spellings before delegating + // harder cases to the full parser. This is not a standalone general-purpose parser. Tokens with exponents or otherwise + // non-trivial spellings still fall back to fast_float. if (*p != '.' && isDigit(*p)) { uint64_t integerPart = 0ull; From c4733d0849befe508bb1ed66dce7b39341f8ed37 Mon Sep 17 00:00:00 2001 From: Arkadiusz Lachowicz Date: Mon, 9 Mar 2026 21:08:08 +0100 Subject: [PATCH 098/118] Update examples tests pointer --- examples_tests | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/examples_tests b/examples_tests index 4eb1314632..6ceae5f010 160000 --- a/examples_tests +++ b/examples_tests @@ -1 +1 @@ -Subproject commit 4eb131463299c77ea8f3ed0e17ede02271f89bff +Subproject commit 6ceae5f01095f1e04ee274a094622c5ffd0b1ba7 From b83cfc5cd39d0c7273cf9bde81d67d2b0a24ea96 Mon Sep 17 00:00:00 2001 From: Arkadiusz Lachowicz Date: Tue, 10 Mar 2026 11:17:15 +0100 Subject: [PATCH 099/118] Refine smooth normal accumulation --- .../asset/utils/CPolygonGeometryManipulator.h | 4 + .../asset/interchange/COBJMeshFileLoader.cpp | 45 +++- .../utils/CPolygonGeometryManipulator.cpp | 70 +---- src/nbl/asset/utils/CSmoothNormalGenerator.h | 243 +++++++++++++++++- 4 files changed, 290 insertions(+), 72 deletions(-) diff --git a/include/nbl/asset/utils/CPolygonGeometryManipulator.h b/include/nbl/asset/utils/CPolygonGeometryManipulator.h index 7163cbfbce..3faed25913 100644 --- a/include/nbl/asset/utils/CPolygonGeometryManipulator.h +++ b/include/nbl/asset/utils/CPolygonGeometryManipulator.h @@ -44,6 +44,10 @@ class NBL_API2 CPolygonGeometryManipulator recomputeContentHashesParallel(geo, SFileIOPolicy{}); } + using ESmoothNormalAccumulationMode = CSmoothNormalGenerator::EAccumulationMode; + using SSmoothNormalCorner = CSmoothNormalGenerator::SAccumulatedCorner<>; + using CSmoothNormalAccumulator = CSmoothNormalGenerator::CAccumulatedNormals<>; + static bool generateMissingSmoothNormals( core::vector& normals, const core::vector& positions, diff --git a/src/nbl/asset/interchange/COBJMeshFileLoader.cpp b/src/nbl/asset/interchange/COBJMeshFileLoader.cpp index 5b463729cf..147dd6b0fe 100644 --- a/src/nbl/asset/interchange/COBJMeshFileLoader.cpp +++ b/src/nbl/asset/interchange/COBJMeshFileLoader.cpp @@ -20,6 +20,8 @@ #include #include #include +#include +#include #include namespace nbl::asset { @@ -315,6 +317,7 @@ asset::SAssetBundle COBJMeshFileLoader::loadAsset( core::vector outNormals; core::vector outNormalNeedsGeneration; core::vector outUVs; + std::optional smoothNormalAccumulator; core::vector indices; core::vector dedupHeadByPos; core::vector dedupNodes; @@ -386,6 +389,7 @@ asset::SAssetBundle COBJMeshFileLoader::loadAsset( outNormals.clear(); outNormalNeedsGeneration.clear(); outUVs.clear(); + smoothNormalAccumulator.reset(); indices.clear(); dedupNodes.clear(); outPositions.resize(initialOutVertexCapacity); @@ -419,13 +423,15 @@ asset::SAssetBundle COBJMeshFileLoader::loadAsset( if (needsNormalGeneration) { // OBJ smoothing groups are already encoded in the parser-side vertex // split corners that must stay sharp become different output vertices - // even if they share position. This helper works on that final indexed - // output and fills only normals missing in the source. - // `createSmoothVertexNormal` is still not enough here even with - // indexed-view support, because it would also need a "missing only" mode - // and proper OBJ smoothing-group handling. - if (!CPolygonGeometryManipulator::generateMissingSmoothNormals( - outNormals, outPositions, indices, outNormalNeedsGeneration)) + // even if they share position. We therefore feed the parser-final + // indexed triangles into a smoothing accumulator and finalize only + // the normals that were missing in the source. + if (!smoothNormalAccumulator) + return false; + smoothNormalAccumulator->reserveVertices(outVertexWriteCount); + if (!smoothNormalAccumulator->finalize( + std::span(outNormals.data(), outNormals.size()), + std::span(outNormalNeedsGeneration.data(), outNormalNeedsGeneration.size()))) return false; } const size_t outVertexCount = outPositions.size(); @@ -502,6 +508,10 @@ asset::SAssetBundle COBJMeshFileLoader::loadAsset( outNormals.resize(newCapacity); outNormalNeedsGeneration.resize(newCapacity, 0u); outUVs.resize(newCapacity); + if (smoothNormalAccumulator) { + smoothNormalAccumulator->reserveVertices(newCapacity); + smoothNormalAccumulator->prepareIdentityGroups(newCapacity); + } } if (outVertexWriteCount > static_cast(std::numeric_limits::max())) @@ -643,7 +653,23 @@ asset::SAssetBundle COBJMeshFileLoader::loadAsset( return acquire(triIdx[0], cornerIx.x) && acquire(triIdx[1], cornerIx.y) && acquire(triIdx[2], cornerIx.z); }; auto appendTriangle = [&](const hlsl::uint32_t3& cornerIx) -> bool { - return appendIndex(cornerIx.z) && appendIndex(cornerIx.y) && appendIndex(cornerIx.x); + if (!(appendIndex(cornerIx.z) && appendIndex(cornerIx.y) && appendIndex(cornerIx.x))) + return false; + if (!needsNormalGeneration) + return true; + if (!smoothNormalAccumulator) { + smoothNormalAccumulator.emplace(CPolygonGeometryManipulator::ESmoothNormalAccumulationMode::AreaWeighted); + smoothNormalAccumulator->reserveVertices(outVertexWriteCount); + smoothNormalAccumulator->prepareIdentityGroups(outPositions.size()); + } + if (outNormalNeedsGeneration[static_cast(cornerIx.x)] == 0u && + outNormalNeedsGeneration[static_cast(cornerIx.y)] == 0u && + outNormalNeedsGeneration[static_cast(cornerIx.z)] == 0u) + return true; + return smoothNormalAccumulator->addPreparedIdentityTriangle( + cornerIx.z, outPositions[static_cast(cornerIx.z)], + cornerIx.y, outPositions[static_cast(cornerIx.y)], + cornerIx.x, outPositions[static_cast(cornerIx.x)]); }; uint32_t currentSmoothingGroup = 0u; while (bufPtr < bufEnd) { @@ -815,8 +841,7 @@ asset::SAssetBundle COBJMeshFileLoader::loadAsset( ++cornerCount; continue; } - if (!appendIndex(cornerIx) || !appendIndex(previousCorner) || - !appendIndex(firstCorner)) + if (!appendTriangle(hlsl::uint32_t3(firstCorner, previousCorner, cornerIx))) return {}; previousCorner = cornerIx; ++cornerCount; diff --git a/src/nbl/asset/utils/CPolygonGeometryManipulator.cpp b/src/nbl/asset/utils/CPolygonGeometryManipulator.cpp index 1ac97716ba..f83fb3c3e0 100644 --- a/src/nbl/asset/utils/CPolygonGeometryManipulator.cpp +++ b/src/nbl/asset/utils/CPolygonGeometryManipulator.cpp @@ -7,6 +7,7 @@ #include #include +#include #include "nbl/asset/utils/CPolygonGeometryManipulator.h" #include "nbl/asset/interchange/SLoaderRuntimeTuning.h" @@ -149,7 +150,9 @@ bool CPolygonGeometryManipulator::generateMissingSmoothNormals( if (normals.size() != positions.size() || normals.size() != normalNeedsGeneration.size()) return false; - core::vector generatedNormals(positions.size(), hlsl::float32_t3(0.f, 0.f, 0.f)); + CSmoothNormalAccumulator accumulator(ESmoothNormalAccumulationMode::AreaWeighted); + accumulator.reserveVertices(positions.size()); + accumulator.prepareIdentityGroups(positions.size()); const size_t triangleCount = indices.size() / 3ull; for (size_t triIx = 0ull; triIx < triangleCount; ++triIx) { @@ -158,64 +161,15 @@ bool CPolygonGeometryManipulator::generateMissingSmoothNormals( const uint32_t i2 = indices[triIx * 3ull + 2ull]; if (i0 >= positions.size() || i1 >= positions.size() || i2 >= positions.size()) continue; - - const auto& p0 = positions[static_cast(i0)]; - const auto& p1 = positions[static_cast(i1)]; - const auto& p2 = positions[static_cast(i2)]; - - const float e10x = p1.x - p0.x; - const float e10y = p1.y - p0.y; - const float e10z = p1.z - p0.z; - const float e20x = p2.x - p0.x; - const float e20y = p2.y - p0.y; - const float e20z = p2.z - p0.z; - - const hlsl::float32_t3 faceNormal( - e10y * e20z - e10z * e20y, - e10z * e20x - e10x * e20z, - e10x * e20y - e10y * e20x); - - const float faceLenSq = faceNormal.x * faceNormal.x + faceNormal.y * faceNormal.y + faceNormal.z * faceNormal.z; - if (faceLenSq <= 1e-20f) - continue; - - const auto accumulateIfNeeded = [&](const uint32_t vertexIx)->void - { - if (normalNeedsGeneration[static_cast(vertexIx)] == 0u) - return; - auto& dstNormal = generatedNormals[static_cast(vertexIx)]; - dstNormal.x += faceNormal.x; - dstNormal.y += faceNormal.y; - dstNormal.z += faceNormal.z; - }; - - accumulateIfNeeded(i0); - accumulateIfNeeded(i1); - accumulateIfNeeded(i2); - } - - for (size_t i = 0ull; i < normals.size(); ++i) - { - if (normalNeedsGeneration[i] == 0u) - continue; - - auto normal = generatedNormals[i]; - const float lenSq = normal.x * normal.x + normal.y * normal.y + normal.z * normal.z; - if (lenSq > 1e-20f) - { - const float invLen = 1.f / std::sqrt(lenSq); - normal.x *= invLen; - normal.y *= invLen; - normal.z *= invLen; - } - else - { - normal = hlsl::float32_t3(0.f, 0.f, 1.f); - } - normals[i] = normal; + if (!accumulator.addPreparedIdentityTriangle( + i0, positions[static_cast(i0)], + i1, positions[static_cast(i1)], + i2, positions[static_cast(i2)])) + return false; } - - return true; + return accumulator.finalize( + std::span(normals.data(), normals.size()), + std::span(normalNeedsGeneration.data(), normalNeedsGeneration.size())); } diff --git a/src/nbl/asset/utils/CSmoothNormalGenerator.h b/src/nbl/asset/utils/CSmoothNormalGenerator.h index 7c9bf5358f..65be349e4a 100644 --- a/src/nbl/asset/utils/CSmoothNormalGenerator.h +++ b/src/nbl/asset/utils/CSmoothNormalGenerator.h @@ -5,18 +5,253 @@ #define _NBL_ASSET_C_SMOOTH_NORMAL_GENERATOR_H_INCLUDED_ #include "nbl/asset/utils/CVertexHashGrid.h" +#include "nbl/builtin/hlsl/shapes/triangle.hlsl" + +#include +#include +#include +#include namespace nbl::asset { -// TODO: implement a class template that take position type(either float32_t3 or float64_t3 as template argument +template +concept SmoothNormalPosition = std::same_as || std::same_as; + class CSmoothNormalGenerator final { public: CSmoothNormalGenerator() = delete; ~CSmoothNormalGenerator() = delete; + enum class EAccumulationMode : uint8_t + { + AreaWeighted, + AngleWeighted + }; + + template + struct SAccumulatedCorner + { + uint32_t vertexIx = 0u; + uint32_t accumulationGroup = 0u; + PositionT position = PositionT(0.f, 0.f, 0.f); + }; + + template + class CAccumulatedNormals final + { + public: + using vector_t = PositionT; + + explicit CAccumulatedNormals(const EAccumulationMode mode = EAccumulationMode::AreaWeighted) : m_mode(mode) {} + + NBL_FORCE_INLINE void reserveVertices(const size_t count) + { + if (count > m_vertexCount) + m_vertexCount = count; + if (count > m_groupsByVertex.capacity() && !m_groupsByVertex.empty()) + m_groupsByVertex.reserve(growSize(count)); + } + + NBL_FORCE_INLINE void reserveGroups(const size_t count) + { + if (count > m_accumulatedNormals.capacity()) + m_accumulatedNormals.reserve(growSize(count)); + } + + NBL_FORCE_INLINE void prepareIdentityGroups(const size_t count) + { + if (!m_groupsByVertex.empty()) + return; + ensureGroupStorage(count); + } + + NBL_FORCE_INLINE bool addTriangle(const std::array, 3>& corners) + { + if (canUseIdentityFastPath(corners)) + return addTriangle(corners[0].vertexIx, corners[0].position, corners[1].vertexIx, corners[1].position, corners[2].vertexIx, corners[2].position); + for (const auto& corner : corners) + { + if (!registerCorner(corner)) + return false; + } + return accumulateTriangle(corners, [](const SAccumulatedCorner& corner) { return corner.accumulationGroup; }); + } + + NBL_FORCE_INLINE bool addTriangle(const uint32_t i0, const PositionT& p0, const uint32_t i1, const PositionT& p1, const uint32_t i2, const PositionT& p2) + { + const size_t maxIx = std::max(static_cast(i0), std::max(static_cast(i1), static_cast(i2))); + const size_t requiredCount = maxIx + 1ull; + if (requiredCount > m_vertexCount) + m_vertexCount = requiredCount; + ensureGroupStorage(requiredCount); + if (m_groupsByVertex.empty()) + return accumulateTriangle(p0, p1, p2, i0, i1, i2); + return addTriangle({{ + {.vertexIx = i0, .accumulationGroup = i0, .position = p0}, + {.vertexIx = i1, .accumulationGroup = i1, .position = p1}, + {.vertexIx = i2, .accumulationGroup = i2, .position = p2} + }}); + } + + NBL_FORCE_INLINE bool addPreparedIdentityTriangle(const uint32_t i0, const PositionT& p0, const uint32_t i1, const PositionT& p1, const uint32_t i2, const PositionT& p2) + { + if (!m_groupsByVertex.empty()) + return false; + const size_t requiredCount = std::max(static_cast(i0), std::max(static_cast(i1), static_cast(i2))) + 1ull; + if (requiredCount > m_vertexCount) + m_vertexCount = requiredCount; + if (requiredCount > m_accumulatedNormals.size()) + return false; + return accumulateTriangle(p0, p1, p2, i0, i1, i2); + } + + template + NBL_FORCE_INLINE bool finalize(const std::span normals, const std::span normalNeedsGeneration = {}, const NormalT& fallback = NormalT(0.f, 0.f, 1.f)) const + { + if (!normalNeedsGeneration.empty() && normalNeedsGeneration.size() != normals.size()) + return false; + if (normals.size() < m_vertexCount) + return false; + + if (m_groupsByVertex.empty()) + { + for (size_t vertexIx = 0ull; vertexIx < m_vertexCount; ++vertexIx) + { + if (!normalNeedsGeneration.empty() && normalNeedsGeneration[vertexIx] == 0u) + continue; + const auto normal = vertexIx < m_accumulatedNormals.size() ? m_accumulatedNormals[vertexIx] : vector_t(0.f, 0.f, 0.f); + const auto lenSq = hlsl::dot(normal, normal); + normals[vertexIx] = (lenSq > 1e-20f) ? (normal * hlsl::rsqrt(lenSq)) : fallback; + } + return true; + } + + for (size_t vertexIx = 0ull; vertexIx < m_vertexCount; ++vertexIx) + { + if (!normalNeedsGeneration.empty() && normalNeedsGeneration[vertexIx] == 0u) + continue; + const uint32_t group = resolveGroup(static_cast(vertexIx)); + if (group == InvalidGroup) + return false; + + const auto normal = group < m_accumulatedNormals.size() ? m_accumulatedNormals[group] : vector_t(0.f, 0.f, 0.f); + const auto lenSq = hlsl::dot(normal, normal); + normals[vertexIx] = (lenSq > 1e-20f) ? (normal * hlsl::rsqrt(lenSq)) : fallback; + } + return true; + } + + private: + static inline constexpr uint32_t InvalidGroup = std::numeric_limits::max(); + + static NBL_FORCE_INLINE size_t growSize(const size_t required) + { + return required > 1ull ? std::bit_ceil(required) : 1ull; + } + + template + NBL_FORCE_INLINE bool accumulateTriangle(const std::array, 3>& corners, GroupFn&& groupFn) + { + return accumulateTriangle( + corners[0].position, corners[1].position, corners[2].position, + groupFn(corners[0]), groupFn(corners[1]), groupFn(corners[2]) + ); + } + + NBL_FORCE_INLINE void ensureGroupStorage(const size_t requiredCount) + { + if (requiredCount <= m_accumulatedNormals.size()) + return; + const size_t grownCount = growSize(requiredCount); + if (requiredCount > m_accumulatedNormals.capacity()) + m_accumulatedNormals.reserve(grownCount); + m_accumulatedNormals.resize(grownCount, vector_t(0.f, 0.f, 0.f)); + } + + NBL_FORCE_INLINE bool accumulateTriangle(const PositionT& p0, const PositionT& p1, const PositionT& p2, const uint32_t g0, const uint32_t g1, const uint32_t g2) + { + const auto edge10 = p1 - p0; + const auto edge20 = p2 - p0; + const auto faceNormal = hlsl::cross(edge10, edge20); + const auto faceLenSq = hlsl::dot(faceNormal, faceNormal); + if (faceLenSq <= 1e-20f) + return true; + + if (m_mode == EAccumulationMode::AreaWeighted) + { + m_accumulatedNormals[g0] += faceNormal; + m_accumulatedNormals[g1] += faceNormal; + m_accumulatedNormals[g2] += faceNormal; + return true; + } + + const auto weights = hlsl::shapes::util::anglesFromTriangleEdges(p2 - p1, p0 - p2, p1 - p0); + const auto unitNormal = faceNormal * hlsl::rsqrt(faceLenSq); + m_accumulatedNormals[g0] += unitNormal * weights.x; + m_accumulatedNormals[g1] += unitNormal * weights.y; + m_accumulatedNormals[g2] += unitNormal * weights.z; + return true; + } + + NBL_FORCE_INLINE bool canUseIdentityFastPath(const std::array, 3>& corners) const + { + if (!m_groupsByVertex.empty()) + return false; + for (const auto& corner : corners) + { + if (corner.vertexIx != corner.accumulationGroup) + return false; + } + return true; + } + + NBL_FORCE_INLINE uint32_t resolveGroup(const uint32_t vertexIx) const + { + if (vertexIx >= m_vertexCount) + return InvalidGroup; + if (m_groupsByVertex.empty()) + return vertexIx; + if (vertexIx >= m_groupsByVertex.size()) + return vertexIx; + const uint32_t mapped = m_groupsByVertex[vertexIx]; + return mapped == InvalidGroup ? vertexIx : mapped; + } + + NBL_FORCE_INLINE bool registerCorner(const SAccumulatedCorner& corner) + { + if ((static_cast(corner.vertexIx) + 1ull) > m_vertexCount) + m_vertexCount = static_cast(corner.vertexIx) + 1ull; + ensureGroupStorage(static_cast(corner.accumulationGroup) + 1ull); + if (m_groupsByVertex.empty()) + { + if (corner.vertexIx == corner.accumulationGroup) + return true; + m_groupsByVertex.reserve(growSize(m_vertexCount)); + } + else if (corner.vertexIx >= m_groupsByVertex.size()) + m_groupsByVertex.reserve(growSize(m_vertexCount)); + if (corner.vertexIx >= m_groupsByVertex.size()) + m_groupsByVertex.resize(growSize(static_cast(corner.vertexIx) + 1ull), InvalidGroup); + auto& group = m_groupsByVertex[corner.vertexIx]; + if (group == InvalidGroup) + { + if (corner.vertexIx == corner.accumulationGroup) + return true; + group = corner.accumulationGroup; + return true; + } + return group == corner.accumulationGroup; + } + + EAccumulationMode m_mode; + size_t m_vertexCount = 0ull; + core::vector m_groupsByVertex; + core::vector m_accumulatedNormals; + }; + struct VertexData { //offset of the vertex into index buffer @@ -31,9 +266,9 @@ class CSmoothNormalGenerator final return position; } - void setHash(uint32_t hash) + void setHash(uint32_t newHash) { - this->hash = hash; + hash = newHash; } uint32_t getHash() const @@ -60,4 +295,4 @@ class CSmoothNormalGenerator final }; } -#endif \ No newline at end of file +#endif From c4e9665e7dbeeb0b34527928bdeb27e406e5274f Mon Sep 17 00:00:00 2001 From: Arkadiusz Lachowicz Date: Tue, 10 Mar 2026 11:28:41 +0100 Subject: [PATCH 100/118] Document smooth normal accumulation --- .../asset/utils/CPolygonGeometryManipulator.h | 5 ++++ src/nbl/asset/utils/CSmoothNormalGenerator.h | 30 +++++++++++++++++++ 2 files changed, 35 insertions(+) diff --git a/include/nbl/asset/utils/CPolygonGeometryManipulator.h b/include/nbl/asset/utils/CPolygonGeometryManipulator.h index 3faed25913..1d971776ec 100644 --- a/include/nbl/asset/utils/CPolygonGeometryManipulator.h +++ b/include/nbl/asset/utils/CPolygonGeometryManipulator.h @@ -44,10 +44,15 @@ class NBL_API2 CPolygonGeometryManipulator recomputeContentHashesParallel(geo, SFileIOPolicy{}); } + //! Public aliases for the generic smooth-normal accumulation core. + //! The default path keeps float32 positions to match current geometry storage. using ESmoothNormalAccumulationMode = CSmoothNormalGenerator::EAccumulationMode; using SSmoothNormalCorner = CSmoothNormalGenerator::SAccumulatedCorner<>; using CSmoothNormalAccumulator = CSmoothNormalGenerator::CAccumulatedNormals<>; + //! Convenience wrapper over the incremental smooth-normal accumulator for the common + //! "indexed positions + generate only missing normals" case. This keeps the existing + //! area-weighted behaviour while reusing the generic accumulator implementation. static bool generateMissingSmoothNormals( core::vector& normals, const core::vector& positions, diff --git a/src/nbl/asset/utils/CSmoothNormalGenerator.h b/src/nbl/asset/utils/CSmoothNormalGenerator.h index 65be349e4a..8bce90f864 100644 --- a/src/nbl/asset/utils/CSmoothNormalGenerator.h +++ b/src/nbl/asset/utils/CSmoothNormalGenerator.h @@ -19,18 +19,28 @@ namespace nbl::asset template concept SmoothNormalPosition = std::same_as || std::same_as; +//! Generic smooth-normal accumulation utilities. The core accepts triangles incrementally, +//! supports indexed inputs, optional caller-defined grouping, and finalizes into a caller-owned +//! normal buffer. Parsing and authoring of any format-specific grouping rules stay outside. class CSmoothNormalGenerator final { public: CSmoothNormalGenerator() = delete; ~CSmoothNormalGenerator() = delete; + //! AreaWeighted matches the existing behaviour used by current loaders. AngleWeighted + //! is available for future callers that need angle-based smoothing without changing the API. enum class EAccumulationMode : uint8_t { AreaWeighted, AngleWeighted }; + //! One triangle corner to be accumulated. `vertexIx` points at the output vertex whose + //! normal will be written on finalize. `accumulationGroup` controls which corners smooth + //! together. This is the generic equivalent of format-specific smoothing-group semantics. + //! Callers can keep it equal to `vertexIx` for identity grouping or map it to any other + //! stable grouping key when corners that share a position must stay sharp. template struct SAccumulatedCorner { @@ -39,6 +49,9 @@ class CSmoothNormalGenerator final PositionT position = PositionT(0.f, 0.f, 0.f); }; + //! Incremental smooth-normal accumulator. Callers feed triangles through `addTriangle(...)` + //! and then materialize results with `finalize(...)`. Grouping is provided entirely by + //! the caller through `accumulationGroup`. template class CAccumulatedNormals final { @@ -47,6 +60,8 @@ class CSmoothNormalGenerator final explicit CAccumulatedNormals(const EAccumulationMode mode = EAccumulationMode::AreaWeighted) : m_mode(mode) {} + //! Records how many output vertices may need normals. This affects finalize-time + //! validation and may reserve group storage if non-identity grouping is already active. NBL_FORCE_INLINE void reserveVertices(const size_t count) { if (count > m_vertexCount) @@ -55,12 +70,17 @@ class CSmoothNormalGenerator final m_groupsByVertex.reserve(growSize(count)); } + //! Reserves accumulation storage for explicit grouping. Callers that know they will + //! feed many non-identity groups can use this to avoid repeated reallocations. NBL_FORCE_INLINE void reserveGroups(const size_t count) { if (count > m_accumulatedNormals.capacity()) m_accumulatedNormals.reserve(growSize(count)); } + //! Prepares the common identity-group case (`accumulationGroup == vertexIx`) up front. + //! This enables a lighter hot path where `addPreparedIdentityTriangle(...)` can skip + //! per-corner registration and write straight into pre-sized accumulation slots. NBL_FORCE_INLINE void prepareIdentityGroups(const size_t count) { if (!m_groupsByVertex.empty()) @@ -68,6 +88,10 @@ class CSmoothNormalGenerator final ensureGroupStorage(count); } + //! Generic triangle submission path. Use this when the caller needs custom grouping. + //! In particular, callers can encode smoothing-group-like semantics by assigning + //! the same `accumulationGroup` to corners that should share a smooth normal and a + //! different one to corners that must stay sharp. NBL_FORCE_INLINE bool addTriangle(const std::array, 3>& corners) { if (canUseIdentityFastPath(corners)) @@ -96,6 +120,9 @@ class CSmoothNormalGenerator final }}); } + //! Hot path for already-prepared identity grouping. This is still triangle accumulation, + //! not a separate algorithm. It simply avoids the generic registration overhead once the + //! caller has committed to `vertexIx == accumulationGroup`. NBL_FORCE_INLINE bool addPreparedIdentityTriangle(const uint32_t i0, const PositionT& p0, const uint32_t i1, const PositionT& p1, const uint32_t i2, const PositionT& p2) { if (!m_groupsByVertex.empty()) @@ -108,6 +135,9 @@ class CSmoothNormalGenerator final return accumulateTriangle(p0, p1, p2, i0, i1, i2); } + //! Writes accumulated normals into the caller-owned output buffer. If `normalNeedsGeneration` + //! is supplied, only those entries marked non-zero are overwritten. This supports the + //! common "preserve existing normals and fill only the missing ones" workflow. template NBL_FORCE_INLINE bool finalize(const std::span normals, const std::span normalNeedsGeneration = {}, const NormalT& fallback = NormalT(0.f, 0.f, 1.f)) const { From a32ff117b8de9b44ed232175a0258a6fc5403bcf Mon Sep 17 00:00:00 2001 From: Arkadiusz Lachowicz Date: Tue, 10 Mar 2026 14:17:19 +0100 Subject: [PATCH 101/118] Load OBJ assets as scenes --- examples_tests | 2 +- .../asset/interchange/COBJMeshFileLoader.cpp | 128 ++++++++++-------- .../asset/interchange/COBJMeshFileLoader.h | 21 ++- 3 files changed, 78 insertions(+), 73 deletions(-) diff --git a/examples_tests b/examples_tests index f060676b31..540fc1ab50 160000 --- a/examples_tests +++ b/examples_tests @@ -1 +1 @@ -Subproject commit f060676b3161b10041a53e8bba4224b806926f57 +Subproject commit 540fc1ab50442be3009a3ba6611e1f6d36ceb740 diff --git a/src/nbl/asset/interchange/COBJMeshFileLoader.cpp b/src/nbl/asset/interchange/COBJMeshFileLoader.cpp index 147dd6b0fe..0662628799 100644 --- a/src/nbl/asset/interchange/COBJMeshFileLoader.cpp +++ b/src/nbl/asset/interchange/COBJMeshFileLoader.cpp @@ -6,6 +6,8 @@ #include "nbl/core/declarations.h" #include "nbl/asset/IAssetManager.h" #include "nbl/asset/ICPUGeometryCollection.h" +#include "nbl/asset/ICPUMorphTargets.h" +#include "nbl/asset/ICPUScene.h" #include "nbl/asset/interchange/SGeometryContentHash.h" #include "nbl/asset/interchange/SGeometryLoaderCommon.h" #include "nbl/asset/interchange/SOBJPolygonGeometryAuxLayout.h" @@ -23,6 +25,7 @@ #include #include #include +#include namespace nbl::asset { namespace @@ -875,70 +878,75 @@ asset::SAssetBundle COBJMeshFileLoader::loadAsset( faceFastTokenCountSum += loaded.faceFastTokenCount; faceFallbackTokenCountSum += loaded.faceFallbackTokenCount; } - loadSession.logTinyIO(_params.logger, ioTelemetry); - const bool buildCollections = - sawObjectDirective || sawGroupDirective || loadedGeometries.size() > 1ull; - core::vector> outputAssets; - uint64_t objectCount = 1ull; - if (!buildCollections) { - // Plain OBJ is still just one polygon geometry here. - outputAssets.push_back(core::smart_refctd_ptr_static_cast( - std::move(loadedGeometries.front().geometry))); - } else { - // Plain OBJ can group many polygon geometries with `o` and `g`, but it - // still does not define a real scene graph, instancing, or node transforms. - // Keep that as geometry collections instead of fabricating an ICPUScene on - // load. - core::vector objectNames; - core::vector> - objectCollections; - for (auto& loaded : loadedGeometries) { - size_t objectIx = objectNames.size(); - for (size_t i = 0ull; i < objectNames.size(); ++i) { - if (objectNames[i] == loaded.objectName) { - objectIx = i; - break; - } - } - if (objectIx == objectNames.size()) { - objectNames.push_back(loaded.objectName); - auto collection = core::make_smart_refctd_ptr(); - if (!collection) - return {}; - objectCollections.push_back(std::move(collection)); - } - auto* refs = objectCollections[objectIx]->getGeometries(); - if (!refs) - return {}; - IGeometryCollection::SGeometryReference ref = {}; - ref.geometry = core::smart_refctd_ptr_static_cast>( - loaded.geometry); - refs->push_back(std::move(ref)); - } - outputAssets.reserve(objectCollections.size()); - for (auto& collection : objectCollections) - outputAssets.push_back( - core::smart_refctd_ptr_static_cast(std::move(collection))); - objectCount = outputAssets.size(); + loadSession.logTinyIO(_params.logger, ioTelemetry); + core::vector> objectCollections; + objectCollections.reserve(loadedGeometries.size()); + std::unordered_map objectIndices; + objectIndices.reserve(loadedGeometries.size()); + size_t currentObjectIx = ~size_t(0ull); + std::string_view currentCollectionObjectName; + for (auto& loaded : loadedGeometries) { + const std::string_view objectName(loaded.objectName); + size_t objectIx = currentObjectIx; + if (objectIx == ~size_t(0ull) || currentCollectionObjectName != objectName) { + auto [it, inserted] = objectIndices.try_emplace(objectName, objectCollections.size()); + if (inserted) { + auto collection = core::make_smart_refctd_ptr(); + if (!collection) + return {}; + objectCollections.push_back(std::move(collection)); + } + objectIx = it->second; + currentObjectIx = objectIx; + currentCollectionObjectName = objectName; + } + auto* refs = objectCollections[objectIx]->getGeometries(); + if (!refs) + return {}; + IGeometryCollection::SGeometryReference ref = {}; + ref.geometry = core::smart_refctd_ptr_static_cast>(loaded.geometry); + refs->push_back(std::move(ref)); + } + auto scene = ICPUScene::create(nullptr); + if (!scene) + return {}; + auto& instances = scene->getInstances(); + instances.resize(objectCollections.size()); + auto morphTargets = instances.getMorphTargets(); + for (size_t i = 0ull; i < objectCollections.size(); ++i) { + auto targets = core::make_smart_refctd_ptr(); + if (!targets) + return {}; + auto* targetList = targets->getTargets(); + if (!targetList) + return {}; + targetList->push_back({.geoCollection = std::move(objectCollections[i])}); + morphTargets[i] = std::move(targets); } - _params.logger.log( - "OBJ loader stats: file=%s in(v=%llu n=%llu uv=%llu) out(v=%llu idx=%llu " - "faces=%llu face_fast_tokens=%llu face_fallback_tokens=%llu " - "geometries=%llu objects=%llu io_reads=%llu io_min_read=%llu " - "io_avg_read=%llu io_req=%s io_eff=%s io_chunk=%llu io_reason=%s", - system::ILogger::ELL_PERFORMANCE, _file->getFileName().string().c_str(), - static_cast(positions.size()), + // Plain OBJ now loads as a flat scene so later material pairing can attach + // to scene instances. We keep identity transforms here and leave material + // tables invalid until `MTL` support lands. + core::vector> outputAssets; + outputAssets.push_back(core::smart_refctd_ptr_static_cast(std::move(scene))); + const uint64_t objectCount = objectCollections.size(); + _params.logger.log( + "OBJ loader stats: file=%s in(v=%llu n=%llu uv=%llu) out(v=%llu idx=%llu " + "faces=%llu face_fast_tokens=%llu face_fallback_tokens=%llu " + "geometries=%llu objects=%llu io_reads=%llu io_min_read=%llu " + "io_avg_read=%llu io_req=%s io_eff=%s io_chunk=%llu io_reason=%s", + system::ILogger::ELL_PERFORMANCE, _file->getFileName().string().c_str(), + static_cast(positions.size()), static_cast(normals.size()), static_cast(uvs.size()), static_cast(outVertexCount), - static_cast(outIndexCount), - static_cast(faceCount), - static_cast(faceFastTokenCountSum), - static_cast(faceFallbackTokenCountSum), - static_cast(loadedGeometries.size()), - static_cast(objectCount), - static_cast(ioTelemetry.callCount), - static_cast(ioTelemetry.getMinOrZero()), + static_cast(outIndexCount), + static_cast(faceCount), + static_cast(faceFastTokenCountSum), + static_cast(faceFallbackTokenCountSum), + static_cast(loadedGeometries.size()), + static_cast(objectCount), + static_cast(ioTelemetry.callCount), + static_cast(ioTelemetry.getMinOrZero()), static_cast(ioTelemetry.getAvgOrZero()), system::to_string(_params.ioPolicy.strategy).c_str(), system::to_string(loadSession.ioPlan.strategy).c_str(), diff --git a/src/nbl/asset/interchange/COBJMeshFileLoader.h b/src/nbl/asset/interchange/COBJMeshFileLoader.h index e25981e854..339166f7ac 100644 --- a/src/nbl/asset/interchange/COBJMeshFileLoader.h +++ b/src/nbl/asset/interchange/COBJMeshFileLoader.h @@ -5,26 +5,23 @@ #ifndef _NBL_ASSET_C_OBJ_MESH_FILE_LOADER_H_INCLUDED_ #define _NBL_ASSET_C_OBJ_MESH_FILE_LOADER_H_INCLUDED_ #include "nbl/core/declarations.h" -#include "nbl/asset/interchange/IGeometryLoader.h" +#include "nbl/asset/interchange/ISceneLoader.h" namespace nbl::asset { /** - Loads plain OBJ as polygon geometry or geometry collections. - Multiple `o` and `g` blocks mean multiple geometry pieces in one file, - not a real scene. - This loader keeps that split as geometry collections because plain OBJ - does not define scene hierarchy, instancing, or node transforms. - OBJ/MTL material data also belongs here and remains TODO, - but that still does not turn plain OBJ into a scene format. - A single mesh payload can therefore load as one geometry, - while multiple split pieces still load as geometry collections - instead of a synthetic scene. + Loads plain OBJ into a flat `ICPUScene`. + Multiple `o` and `g` blocks become separate scene instances backed by + geometry collections. + All instance transforms stay identity here. + Material tables stay invalid until `MTL` support is implemented. + This keeps the geometry parsing logic unchanged while making the top-level + asset shape match how Nabla pairs geometry with materials. References: - https://www.loc.gov/preservation/digital/formats/fdd/fdd000507 - https://www.fileformat.info/format/wavefrontobj/egff.htm */ -class COBJMeshFileLoader : public IGeometryLoader +class COBJMeshFileLoader : public ISceneLoader { public: ~COBJMeshFileLoader() override; From f19123a21137433c304425c964ac1188f901f8c9 Mon Sep 17 00:00:00 2001 From: Arkadiusz Lachowicz Date: Tue, 10 Mar 2026 14:21:40 +0100 Subject: [PATCH 102/118] Trim OBJ scene loader docs --- src/nbl/asset/interchange/COBJMeshFileLoader.h | 2 -- 1 file changed, 2 deletions(-) diff --git a/src/nbl/asset/interchange/COBJMeshFileLoader.h b/src/nbl/asset/interchange/COBJMeshFileLoader.h index 339166f7ac..2af6f62bd7 100644 --- a/src/nbl/asset/interchange/COBJMeshFileLoader.h +++ b/src/nbl/asset/interchange/COBJMeshFileLoader.h @@ -14,8 +14,6 @@ namespace nbl::asset geometry collections. All instance transforms stay identity here. Material tables stay invalid until `MTL` support is implemented. - This keeps the geometry parsing logic unchanged while making the top-level - asset shape match how Nabla pairs geometry with materials. References: - https://www.loc.gov/preservation/digital/formats/fdd/fdd000507 From 46b9b0ca9433f0ce23548a3a9d9b65d6c7535be8 Mon Sep 17 00:00:00 2001 From: Arkadiusz Lachowicz Date: Tue, 10 Mar 2026 16:12:56 +0100 Subject: [PATCH 103/118] Update examples_tests loaders pointer --- examples_tests | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/examples_tests b/examples_tests index 540fc1ab50..653bbdc171 160000 --- a/examples_tests +++ b/examples_tests @@ -1 +1 @@ -Subproject commit 540fc1ab50442be3009a3ba6611e1f6d36ceb740 +Subproject commit 653bbdc171fe7ab7891f6c29e063bc22d327c244 From c0d42efd9fa900c660144b2ace8b07986dc53460 Mon Sep 17 00:00:00 2001 From: Arkadiusz Lachowicz Date: Tue, 10 Mar 2026 17:56:20 +0100 Subject: [PATCH 104/118] Advance examples_tests loaders pointer --- examples_tests | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/examples_tests b/examples_tests index 653bbdc171..328c4fbfc7 160000 --- a/examples_tests +++ b/examples_tests @@ -1 +1 @@ -Subproject commit 653bbdc171fe7ab7891f6c29e063bc22d327c244 +Subproject commit 328c4fbfc7ea73178d17814dc430b573726be3ed From d8ad511aeba1e7c4bbb9342a5a2de980087e3c83 Mon Sep 17 00:00:00 2001 From: Arkadiusz Lachowicz Date: Tue, 10 Mar 2026 18:14:45 +0100 Subject: [PATCH 105/118] Advance examples_tests loaders pointer --- examples_tests | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/examples_tests b/examples_tests index 328c4fbfc7..db245df406 160000 --- a/examples_tests +++ b/examples_tests @@ -1 +1 @@ -Subproject commit 328c4fbfc7ea73178d17814dc430b573726be3ed +Subproject commit db245df40685e30c5ea0e958fcc104be837aa54a From 82905e7af7f30f4b97bba359e39ae960e10da0e0 Mon Sep 17 00:00:00 2001 From: Arkadiusz Lachowicz Date: Tue, 10 Mar 2026 19:50:37 +0100 Subject: [PATCH 106/118] Advance examples_tests loaders pointer --- examples_tests | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/examples_tests b/examples_tests index db245df406..9fe02df82c 160000 --- a/examples_tests +++ b/examples_tests @@ -1 +1 @@ -Subproject commit db245df40685e30c5ea0e958fcc104be837aa54a +Subproject commit 9fe02df82cc2a63153f634bb6f45c6289d8389f3 From 93eddc412cdf1e9682c8b33957fa3a7d4e55ed5e Mon Sep 17 00:00:00 2001 From: Arkadiusz Lachowicz Date: Tue, 10 Mar 2026 20:46:41 +0100 Subject: [PATCH 107/118] Advance examples_tests loaders pointer --- examples_tests | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/examples_tests b/examples_tests index 9fe02df82c..9d1103fe35 160000 --- a/examples_tests +++ b/examples_tests @@ -1 +1 @@ -Subproject commit 9fe02df82cc2a63153f634bb6f45c6289d8389f3 +Subproject commit 9d1103fe35fda109e0c659dc2675d133b8f4b170 From 8c1b8ae65cd83dd99c51be1cd25c0ff5276b282e Mon Sep 17 00:00:00 2001 From: Arkadiusz Lachowicz Date: Tue, 10 Mar 2026 22:14:07 +0100 Subject: [PATCH 108/118] Advance examples_tests loaders pointer --- examples_tests | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/examples_tests b/examples_tests index 9d1103fe35..b38a4f75b6 160000 --- a/examples_tests +++ b/examples_tests @@ -1 +1 @@ -Subproject commit 9d1103fe35fda109e0c659dc2675d133b8f4b170 +Subproject commit b38a4f75b684a046f26fce00c64dab54453f95cd From e9f19c8c4bf534adfc548d117cec4a4502c0bfb3 Mon Sep 17 00:00:00 2001 From: Arkadiusz Lachowicz Date: Wed, 11 Mar 2026 08:59:05 +0100 Subject: [PATCH 109/118] Add growable memory file utility --- include/nbl/asset/IAssetManager.h | 19 ++ include/nbl/system/CGrowableMemoryFile.h | 325 +++++++++++++++++++ src/nbl/asset/interchange/COBJMeshWriter.cpp | 2 +- src/nbl/asset/interchange/CPLYMeshWriter.cpp | 2 +- src/nbl/asset/interchange/CSTLMeshWriter.cpp | 2 +- 5 files changed, 347 insertions(+), 3 deletions(-) create mode 100644 include/nbl/system/CGrowableMemoryFile.h diff --git a/include/nbl/asset/IAssetManager.h b/include/nbl/asset/IAssetManager.h index 07257bbaf5..557aff64dc 100644 --- a/include/nbl/asset/IAssetManager.h +++ b/include/nbl/asset/IAssetManager.h @@ -5,6 +5,7 @@ #define _NBL_ASSET_I_ASSET_MANAGER_H_INCLUDED_ #include +#include #include #include "nbl/core/declarations.h" @@ -51,6 +52,12 @@ class NBL_API2 IAssetManager : public core::IReferenceCounted friend std::function makeAssetDisposeFunc(const IAssetManager* const _mgr); public: + struct SWriterFlagInfo + { + writer_flags_t supported = EWF_NONE; + writer_flags_t forced = EWF_NONE; + }; + #ifdef USE_MAPS_FOR_PATH_BASED_CACHE using AssetCacheType = core::CConcurrentMultiObjectCache; #else @@ -397,6 +404,18 @@ class NBL_API2 IAssetManager : public core::IReferenceCounted return writeAsset(_file, _params, nullptr); } + inline std::optional getAssetWriterFlagInfo(const IAsset::E_TYPE assetType, const std::string_view extension) const + { + const auto capableWritersRng = m_writers.perTypeAndFileExt.findRange({assetType, std::string(extension)}); + if (capableWritersRng.empty()) + return std::nullopt; + auto* const writer = capableWritersRng.begin()->second; + return SWriterFlagInfo{ + .supported = writer->getSupportedFlags(), + .forced = writer->getForcedFlags() + }; + } + // Asset Loaders [FOLLOWING ARE NOT THREAD SAFE] uint32_t getAssetLoaderCount() { return static_cast(m_loaders.vector.size()); } diff --git a/include/nbl/system/CGrowableMemoryFile.h b/include/nbl/system/CGrowableMemoryFile.h new file mode 100644 index 0000000000..aea3e60a15 --- /dev/null +++ b/include/nbl/system/CGrowableMemoryFile.h @@ -0,0 +1,325 @@ +// Copyright (C) 2018-2025 - DevSH Graphics Programming Sp. z O.O. +// This file is part of the "Nabla Engine". +// For conditions of distribution and use, see copyright notice in nabla.h +#ifndef _NBL_SYSTEM_C_GROWABLE_MEMORY_FILE_H_INCLUDED_ +#define _NBL_SYSTEM_C_GROWABLE_MEMORY_FILE_H_INCLUDED_ + +#include "nbl/system/IFile.h" + +#include +#include +#include +#include +#include +#include +#include +#include + +namespace nbl::system +{ + +namespace impl +{ + +struct CImmediateFileIoResultSetter final : ISystem::IFutureManipulator +{ + using ISystem::IFutureManipulator::set_result; +}; + +struct CNoopMutex +{ + inline void lock() {} + inline void unlock() {} +}; + +class CGrowableMemoryFileStorage +{ + public: + constexpr static inline size_t InitialGrowthBytes = 1ull << 20; // 1 MiB + + inline size_t size() const + { + return m_storage.size(); + } + + inline size_t capacity() const + { + return m_storage.capacity(); + } + + inline void reserve(const size_t reservedSize) + { + m_storage.reserve(reservedSize); + } + + inline void clear() + { + m_storage.clear(); + } + + inline const std::byte* data() const + { + return m_storage.empty() ? nullptr : m_storage.data(); + } + + inline std::byte* data() + { + return m_storage.empty() ? nullptr : m_storage.data(); + } + + inline std::vector copyData() const + { + return m_storage; + } + + inline size_t read(void* const buffer, const size_t offset, const size_t sizeToRead) const + { + if (offset >= m_storage.size()) + return 0ull; + + const size_t clampedRead = std::min(sizeToRead, m_storage.size() - offset); + std::memcpy(buffer, m_storage.data() + offset, clampedRead); + return clampedRead; + } + + inline size_t write(const void* const buffer, const size_t offset, const size_t sizeToWrite) + { + const size_t requiredSize = offset + sizeToWrite; + if (requiredSize > m_storage.capacity()) + reserve(growCapacity(requiredSize)); + if (requiredSize > m_storage.size()) + m_storage.resize(requiredSize); + std::memcpy(m_storage.data() + offset, buffer, sizeToWrite); + return sizeToWrite; + } + + private: + inline size_t growCapacity(const size_t requiredSize) const + { + size_t currentCapacity = m_storage.capacity(); + if (currentCapacity == 0ull) + currentCapacity = InitialGrowthBytes; + + size_t nextCapacity = currentCapacity; + while (nextCapacity < requiredSize) + { + const size_t growth = std::max(nextCapacity, InitialGrowthBytes); + if (nextCapacity > std::numeric_limits::max() - growth) + return requiredSize; + nextCapacity += growth; + } + return nextCapacity; + } + + std::vector m_storage; +}; + +template +class IGrowableMemoryFile : public IFile +{ + protected: + using mutex_t = MutexType; + + inline explicit IGrowableMemoryFile(path&& filename, const size_t reservedSize = 0ull, const time_point_t initialModified = std::chrono::utc_clock::now()) + : IFile(std::move(filename), core::bitflag(E_CREATE_FLAGS::ECF_READ_WRITE), initialModified) + { + reserve(reservedSize); + } + + template + inline decltype(auto) withLockedStorage(Fn&& fn) + { + std::lock_guard lock(m_mutex); + return std::forward(fn)(m_storage); + } + + template + inline decltype(auto) withLockedStorage(Fn&& fn) const + { + std::lock_guard lock(m_mutex); + return std::forward(fn)(m_storage); + } + + public: + inline size_t getSize() const override + { + return withLockedStorage([](const CGrowableMemoryFileStorage& storage) { + return storage.size(); + }); + } + + inline size_t capacity() const + { + return withLockedStorage([](const CGrowableMemoryFileStorage& storage) { + return storage.capacity(); + }); + } + + //! Optional capacity hint for callers that can estimate the final serialized size. + /** The internal storage already uses an adaptive growth policy, so this is only a performance hint. */ + inline void reserve(const size_t reservedSize) + { + withLockedStorage([reservedSize](CGrowableMemoryFileStorage& storage) { + storage.reserve(reservedSize); + }); + } + + inline void clear() + { + withLockedStorage([](CGrowableMemoryFileStorage& storage) { + storage.clear(); + }); + setLastWriteTime(); + } + + inline std::vector copyData() const + { + return withLockedStorage([](const CGrowableMemoryFileStorage& storage) { + return storage.copyData(); + }); + } + + protected: + inline void* getMappedPointer_impl() override + { + return nullptr; + } + + inline const void* getMappedPointer_impl() const override + { + return nullptr; + } + + inline void unmappedRead(ISystem::future_t& fut, void* buffer, size_t offset, size_t sizeToRead) override + { + static const CImmediateFileIoResultSetter resultSetter = {}; + const size_t processed = withLockedStorage([buffer, offset, sizeToRead](const CGrowableMemoryFileStorage& storage) { + return storage.read(buffer, offset, sizeToRead); + }); + resultSetter.set_result(fut, processed); + } + + inline void unmappedWrite(ISystem::future_t& fut, const void* buffer, size_t offset, size_t sizeToWrite) override + { + static const CImmediateFileIoResultSetter resultSetter = {}; + const size_t processed = withLockedStorage([buffer, offset, sizeToWrite](CGrowableMemoryFileStorage& storage) { + return storage.write(buffer, offset, sizeToWrite); + }); + resultSetter.set_result(fut, processed); + } + + private: + mutable mutex_t m_mutex; + CGrowableMemoryFileStorage m_storage; +}; + +} + +//! A lightweight growable in-memory implementation of `system::IFile`. +/** + This class stores file contents in a dynamically growing byte buffer while preserving the regular + Nabla file-oriented API. It is useful in flows that want `IFile*` interoperability without + forcing an obligatory round-trip through the host filesystem. + + Representative use-cases include: + - serialization roundtrip validation + - benchmark or profiling harnesses that want to separate codec work from storage latency + - tool pipelines that need a temporary serialized representation but do not require a persistent file + + The object grows on demand during writes and can later be consumed by APIs that read from + `system::IFile*`, for example `IAssetManager::getAsset(system::IFile*, supposedFilename, ...)`. + + Allocation policy: + - storage growth is handled internally + - capacity expansion is geometric rather than exact-size-only + - the first growth step uses a minimum allocation quantum of `1 MiB` + - callers may still provide an explicit `reserve(...)` hint if they already know the likely output size + + This keeps the common case simple for callers while reducing the amount of repeated reallocation + and copying that would otherwise happen during long sequential write streams. + + Important notes: + - reads and writes are positional and operate on the current logical size + - `getMappedPointer()` intentionally returns `nullptr` + The storage is growable, so exposing a stable mapped pointer would be misleading + - this class is not thread-safe + Concurrent read, write, reserve, clear, or direct `data()` access on the same object requires external synchronization +*/ +class CGrowableMemoryFile final : public impl::IGrowableMemoryFile +{ + using base_t = impl::IGrowableMemoryFile; + + public: + using base_t::capacity; + using base_t::clear; + using base_t::copyData; + using base_t::reserve; + + inline explicit CGrowableMemoryFile(path&& filename, const size_t reservedSize = 0ull, const time_point_t initialModified = std::chrono::utc_clock::now()) + : base_t(std::move(filename), reservedSize, initialModified) + { + } + + inline const std::byte* data() const + { + return withLockedStorage([](const impl::CGrowableMemoryFileStorage& storage) { + return storage.data(); + }); + } + + inline std::byte* data() + { + return withLockedStorage([](impl::CGrowableMemoryFileStorage& storage) { + return storage.data(); + }); + } +}; + +//! A synchronized growable in-memory implementation of `system::IFile`. +/** + This variant serializes internal operations with a mutex. It is intended for cases where the same + memory-backed file object may be touched from multiple threads and external synchronization is not + desirable or not available. + + The synchronized variant intentionally does not expose raw `data()` accessors. A raw pointer would + not carry any lifetime relationship to the internal lock and would therefore invite accidental use + after another thread mutates or reallocates the storage. Callers that need to inspect the contents + can either: + - take a snapshot with `copyData()` + - use `withLockedData(...)` and keep any pointer or span-like view strictly inside the callback +*/ +class CSynchronizedGrowableMemoryFile final : public impl::IGrowableMemoryFile +{ + using base_t = impl::IGrowableMemoryFile; + + public: + using base_t::capacity; + using base_t::clear; + using base_t::copyData; + using base_t::reserve; + + inline explicit CSynchronizedGrowableMemoryFile(path&& filename, const size_t reservedSize = 0ull, const time_point_t initialModified = std::chrono::utc_clock::now()) + : base_t(std::move(filename), reservedSize, initialModified) + { + } + + template + inline decltype(auto) withLockedData(Fn&& fn) + { + return withLockedStorage([&fn](impl::CGrowableMemoryFileStorage& storage) -> decltype(auto) { + return std::forward(fn)(storage.data(), storage.size()); + }); + } + + template + inline decltype(auto) withLockedData(Fn&& fn) const + { + return withLockedStorage([&fn](const impl::CGrowableMemoryFileStorage& storage) -> decltype(auto) { + return std::forward(fn)(storage.data(), storage.size()); + }); + } +}; + +} + +#endif diff --git a/src/nbl/asset/interchange/COBJMeshWriter.cpp b/src/nbl/asset/interchange/COBJMeshWriter.cpp index 358d7cfd29..ccd48e599d 100644 --- a/src/nbl/asset/interchange/COBJMeshWriter.cpp +++ b/src/nbl/asset/interchange/COBJMeshWriter.cpp @@ -37,7 +37,7 @@ const char** COBJMeshWriter::getAssociatedFileExtensions() const } writer_flags_t COBJMeshWriter::getSupportedFlags() { - return EWF_NONE; + return EWF_MESH_IS_RIGHT_HANDED; } writer_flags_t COBJMeshWriter::getForcedFlags() { diff --git a/src/nbl/asset/interchange/CPLYMeshWriter.cpp b/src/nbl/asset/interchange/CPLYMeshWriter.cpp index 3fc671d82f..0d6f1e7b92 100644 --- a/src/nbl/asset/interchange/CPLYMeshWriter.cpp +++ b/src/nbl/asset/interchange/CPLYMeshWriter.cpp @@ -36,7 +36,7 @@ const char** CPLYMeshWriter::getAssociatedFileExtensions() const } writer_flags_t CPLYMeshWriter::getSupportedFlags() { - return asset::EWF_BINARY; + return writer_flags_t(asset::EWF_BINARY | asset::EWF_MESH_IS_RIGHT_HANDED); } writer_flags_t CPLYMeshWriter::getForcedFlags() { diff --git a/src/nbl/asset/interchange/CSTLMeshWriter.cpp b/src/nbl/asset/interchange/CSTLMeshWriter.cpp index 543106d502..db2b70aef4 100644 --- a/src/nbl/asset/interchange/CSTLMeshWriter.cpp +++ b/src/nbl/asset/interchange/CSTLMeshWriter.cpp @@ -502,7 +502,7 @@ const char** CSTLMeshWriter::getAssociatedFileExtensions() const } writer_flags_t CSTLMeshWriter::getSupportedFlags() { - return asset::EWF_BINARY; + return writer_flags_t(asset::EWF_BINARY | asset::EWF_MESH_IS_RIGHT_HANDED); } writer_flags_t CSTLMeshWriter::getForcedFlags() { From ece07faabe477ad0efe3e60c56e408a36892468e Mon Sep 17 00:00:00 2001 From: Arkadiusz Lachowicz Date: Wed, 11 Mar 2026 08:59:12 +0100 Subject: [PATCH 110/118] Advance examples_tests loaders pointer --- examples_tests | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/examples_tests b/examples_tests index b38a4f75b6..ec33704c3c 160000 --- a/examples_tests +++ b/examples_tests @@ -1 +1 @@ -Subproject commit b38a4f75b684a046f26fce00c64dab54453f95cd +Subproject commit ec33704c3c3a9771373ef09be0a61f1b655d40d6 From 0a1308b8840f0fcd727c2d6b3bf35ce37a81d2b0 Mon Sep 17 00:00:00 2001 From: Arkadiusz Lachowicz Date: Wed, 11 Mar 2026 11:26:07 +0100 Subject: [PATCH 111/118] Expose CPU system info --- examples_tests | 2 +- include/nbl/system/ISystem.h | 2 + src/nbl/system/CSystemLinux.cpp | 68 ++++++++++++++++++++++++++++++++- src/nbl/system/CSystemWin32.cpp | 61 +++++++++++++++++++++++++++++ 4 files changed, 131 insertions(+), 2 deletions(-) diff --git a/examples_tests b/examples_tests index ec33704c3c..d18b6d9d60 160000 --- a/examples_tests +++ b/examples_tests @@ -1 +1 @@ -Subproject commit ec33704c3c3a9771373ef09be0a61f1b655d40d6 +Subproject commit d18b6d9d60351ca9806ff0dc60d472525843d61c diff --git a/include/nbl/system/ISystem.h b/include/nbl/system/ISystem.h index 1b31bc9061..658604febd 100644 --- a/include/nbl/system/ISystem.h +++ b/include/nbl/system/ISystem.h @@ -150,6 +150,7 @@ class NBL_API2 ISystem : public core::IReferenceCounted struct SystemInfo { uint64_t cpuFrequencyHz = 0u; + uint32_t physicalCoreCount = 0u; // in bytes uint64_t totalMemory = 0u; @@ -158,6 +159,7 @@ class NBL_API2 ISystem : public core::IReferenceCounted uint32_t desktopResX = 0u; uint32_t desktopResY = 0u; + std::string cpuName = "Unknown"; std::string OSFullName = "Unknown"; }; virtual SystemInfo getSystemInfo() const = 0; diff --git a/src/nbl/system/CSystemLinux.cpp b/src/nbl/system/CSystemLinux.cpp index 58aaeeb51b..8a47ac09b3 100644 --- a/src/nbl/system/CSystemLinux.cpp +++ b/src/nbl/system/CSystemLinux.cpp @@ -5,10 +5,29 @@ using namespace nbl::system; #ifdef _NBL_PLATFORM_LINUX_ +#include +#include +#include +#include +#include #include #include #include #include + +namespace +{ + +std::string trimCopy(std::string value) +{ + auto notSpace = [](unsigned char ch) { return !std::isspace(ch); }; + value.erase(value.begin(), std::find_if(value.begin(), value.end(), notSpace)); + value.erase(std::find_if(value.rbegin(), value.rend(), notSpace).base(), value.end()); + return value; +} + +} + ISystem::SystemInfo CSystemLinux::getSystemInfo() const { SystemInfo info; @@ -27,6 +46,53 @@ ISystem::SystemInfo CSystemLinux::getSystemInfo() const info.desktopResX = 0xdeadbeefu; info.desktopResY = 0xdeadbeefu; + std::ifstream cpuInfo("/proc/cpuinfo"); + std::unordered_set uniquePhysicalCores; + std::string currentPhysicalId; + std::string currentCoreId; + auto flushCurrentCore = [&]() + { + if (!currentPhysicalId.empty() || !currentCoreId.empty()) + uniquePhysicalCores.insert(currentPhysicalId + ":" + currentCoreId); + currentPhysicalId.clear(); + currentCoreId.clear(); + }; + + for (std::string line; std::getline(cpuInfo, line);) + { + if (line.empty()) + { + flushCurrentCore(); + continue; + } + + if (line.starts_with("model name")) + { + const auto separator = line.find(':'); + if (separator != std::string::npos && info.cpuName == "Unknown") + info.cpuName = trimCopy(line.substr(separator + 1u)); + continue; + } + + if (line.starts_with("physical id")) + { + const auto separator = line.find(':'); + if (separator != std::string::npos) + currentPhysicalId = trimCopy(line.substr(separator + 1u)); + continue; + } + + if (line.starts_with("core id")) + { + const auto separator = line.find(':'); + if (separator != std::string::npos) + currentCoreId = trimCopy(line.substr(separator + 1u)); + continue; + } + } + flushCurrentCore(); + info.physicalCoreCount = static_cast(uniquePhysicalCores.size()); + return info; } @@ -62,4 +128,4 @@ bool isDebuggerAttached() return false; } -#endif \ No newline at end of file +#endif diff --git a/src/nbl/system/CSystemWin32.cpp b/src/nbl/system/CSystemWin32.cpp index 7dcf429603..603d92b4fb 100644 --- a/src/nbl/system/CSystemWin32.cpp +++ b/src/nbl/system/CSystemWin32.cpp @@ -5,7 +5,66 @@ using namespace nbl; using namespace nbl::system; #ifdef _NBL_PLATFORM_WINDOWS_ +#include +#include #include +#include +#include +#include + +namespace +{ + +std::string queryCpuName() +{ + int cpuInfo[4] = {}; + __cpuid(cpuInfo, 0x80000000); + const auto maxExtendedLeaf = static_cast(cpuInfo[0]); + if (maxExtendedLeaf < 0x80000004u) + return "Unknown"; + + std::array brandString = {}; + auto* cursor = reinterpret_cast(brandString.data()); + for (auto leaf = 0x80000002; leaf <= 0x80000004; ++leaf) + { + __cpuid(cpuInfo, leaf); + std::memcpy(cursor, cpuInfo, sizeof(cpuInfo)); + cursor += sizeof(cpuInfo) / sizeof(int); + } + + std::string result = brandString.data(); + auto notSpace = [](unsigned char ch) { return !std::isspace(ch); }; + result.erase(result.begin(), std::find_if(result.begin(), result.end(), notSpace)); + result.erase(std::find_if(result.rbegin(), result.rend(), notSpace).base(), result.end()); + return result.empty() ? std::string("Unknown") : result; +} + +uint32_t queryPhysicalCoreCount() +{ + DWORD bufferSize = 0u; + GetLogicalProcessorInformationEx(RelationProcessorCore, nullptr, &bufferSize); + if (bufferSize == 0u) + return 0u; + + std::vector buffer(bufferSize); + auto* info = reinterpret_cast(buffer.data()); + if (!GetLogicalProcessorInformationEx(RelationProcessorCore, info, &bufferSize)) + return 0u; + + uint32_t coreCount = 0u; + auto* current = reinterpret_cast(info); + const auto* end = current + bufferSize; + while (current < end) + { + auto* entry = reinterpret_cast(current); + if (entry->Relationship == RelationProcessorCore) + ++coreCount; + current += entry->Size; + } + return coreCount; +} + +} //LOL the struct definition wasn't added to winapi headers do they ask to declare them yourself typedef struct _PROCESSOR_POWER_INFORMATION { @@ -34,6 +93,8 @@ ISystem::SystemInfo CSystemWin32::getSystemInfo() const info.desktopResX = GetSystemMetrics(SM_CXSCREEN); info.desktopResY = GetSystemMetrics(SM_CYSCREEN); + info.cpuName = queryCpuName(); + info.physicalCoreCount = queryPhysicalCoreCount(); return info; } From a2c26395fabd695cbf8921e91ada3235b350b32c Mon Sep 17 00:00:00 2001 From: Arkadiusz Lachowicz Date: Wed, 11 Mar 2026 11:49:48 +0100 Subject: [PATCH 112/118] Track examples git metadata --- 3rdparty/CMakeLists.txt | 4 ++++ examples_tests | 2 +- 2 files changed, 5 insertions(+), 1 deletion(-) diff --git a/3rdparty/CMakeLists.txt b/3rdparty/CMakeLists.txt index 966dac1fe0..6335406824 100755 --- a/3rdparty/CMakeLists.txt +++ b/3rdparty/CMakeLists.txt @@ -284,6 +284,10 @@ option(GIT_EXCLUDE_IS_DIRTY "Exclude IS_DIRTY from git tracking checks, will inc add_subdirectory(git-version-tracking EXCLUDE_FROM_ALL) NBL_ADD_GIT_TRACKING_META_LIBRARY(nabla "${NBL_ROOT_PATH}") NBL_ADD_GIT_TRACKING_META_LIBRARY(dxc "${CMAKE_CURRENT_SOURCE_DIR}/dxc/dxc") +if (EXISTS "${NBL_ROOT_PATH}/examples_tests/.git") + NBL_ADD_GIT_TRACKING_META_LIBRARY(examples "${NBL_ROOT_PATH}/examples_tests") + nbl_install_file("${CMAKE_CURRENT_BINARY_DIR}/git-version-tracking/examples_git_info.json") +endif() nbl_install_file("${CMAKE_CURRENT_BINARY_DIR}/git-version-tracking/nabla_git_info.json") nbl_install_file("${CMAKE_CURRENT_BINARY_DIR}/git-version-tracking/dxc_git_info.json") diff --git a/examples_tests b/examples_tests index d18b6d9d60..530edd61f9 160000 --- a/examples_tests +++ b/examples_tests @@ -1 +1 @@ -Subproject commit d18b6d9d60351ca9806ff0dc60d472525843d61c +Subproject commit 530edd61f9634b66fb7059e2cb908639d36d40b8 From 2fc8438c206ec56158a0dc23c25b42ff4a3f029a Mon Sep 17 00:00:00 2001 From: Arkadiusz Lachowicz Date: Wed, 11 Mar 2026 12:39:27 +0100 Subject: [PATCH 113/118] Move examples git tracking to examples project --- 3rdparty/CMakeLists.txt | 4 ---- 3rdparty/git-version-tracking | 2 +- examples_tests | 2 +- 3 files changed, 2 insertions(+), 6 deletions(-) diff --git a/3rdparty/CMakeLists.txt b/3rdparty/CMakeLists.txt index 6335406824..966dac1fe0 100755 --- a/3rdparty/CMakeLists.txt +++ b/3rdparty/CMakeLists.txt @@ -284,10 +284,6 @@ option(GIT_EXCLUDE_IS_DIRTY "Exclude IS_DIRTY from git tracking checks, will inc add_subdirectory(git-version-tracking EXCLUDE_FROM_ALL) NBL_ADD_GIT_TRACKING_META_LIBRARY(nabla "${NBL_ROOT_PATH}") NBL_ADD_GIT_TRACKING_META_LIBRARY(dxc "${CMAKE_CURRENT_SOURCE_DIR}/dxc/dxc") -if (EXISTS "${NBL_ROOT_PATH}/examples_tests/.git") - NBL_ADD_GIT_TRACKING_META_LIBRARY(examples "${NBL_ROOT_PATH}/examples_tests") - nbl_install_file("${CMAKE_CURRENT_BINARY_DIR}/git-version-tracking/examples_git_info.json") -endif() nbl_install_file("${CMAKE_CURRENT_BINARY_DIR}/git-version-tracking/nabla_git_info.json") nbl_install_file("${CMAKE_CURRENT_BINARY_DIR}/git-version-tracking/dxc_git_info.json") diff --git a/3rdparty/git-version-tracking b/3rdparty/git-version-tracking index 6c3ecac5f0..74d60c21dc 160000 --- a/3rdparty/git-version-tracking +++ b/3rdparty/git-version-tracking @@ -1 +1 @@ -Subproject commit 6c3ecac5f0297877d1573ef4e3cdb537c5feeb62 +Subproject commit 74d60c21dccd9efcaf14e02acf40cc8780b6059f diff --git a/examples_tests b/examples_tests index 530edd61f9..2aa2d14491 160000 --- a/examples_tests +++ b/examples_tests @@ -1 +1 @@ -Subproject commit 530edd61f9634b66fb7059e2cb908639d36d40b8 +Subproject commit 2aa2d144915aefd46fa7d9bfe7f68b9c3d37f7b7 From ea6b9dab3ade56d657f277d82e3e60c9f0360efd Mon Sep 17 00:00:00 2001 From: Arkadiusz Lachowicz Date: Wed, 11 Mar 2026 13:00:56 +0100 Subject: [PATCH 114/118] Update examples git tracking hooks --- 3rdparty/git-version-tracking | 2 +- examples_tests | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/3rdparty/git-version-tracking b/3rdparty/git-version-tracking index 74d60c21dc..dc6031ec3b 160000 --- a/3rdparty/git-version-tracking +++ b/3rdparty/git-version-tracking @@ -1 +1 @@ -Subproject commit 74d60c21dccd9efcaf14e02acf40cc8780b6059f +Subproject commit dc6031ec3b6bf5f1421d6452188368e66f813f66 diff --git a/examples_tests b/examples_tests index 2aa2d14491..348eef714f 160000 --- a/examples_tests +++ b/examples_tests @@ -1 +1 @@ -Subproject commit 2aa2d144915aefd46fa7d9bfe7f68b9c3d37f7b7 +Subproject commit 348eef714febe90da0195d89bc364df7a45320d5 From cc3952accd0367d64169f8354cea1283824202a1 Mon Sep 17 00:00:00 2001 From: Arkadiusz Lachowicz Date: Wed, 11 Mar 2026 17:25:57 +0100 Subject: [PATCH 115/118] Adopt scoped git metadata targets --- 3rdparty/CMakeLists.txt | 35 +++++++++++++++++++++------ 3rdparty/git-version-tracking | 2 +- examples_tests | 2 +- include/nabla.h | 6 ++--- include/nbl/logging_macros.h | 8 +++--- include/nbl/video/IGPUCommandBuffer.h | 2 +- include/nbl/video/ILogicalDevice.h | 4 +-- src/nbl/gtml.cpp | 8 +++--- src/nbl/video/ILogicalDevice.cpp | 4 +-- src/nbl/video/IQueue.cpp | 4 +-- tools/nsc/main.cpp | 24 +++--------------- 11 files changed, 50 insertions(+), 49 deletions(-) diff --git a/3rdparty/CMakeLists.txt b/3rdparty/CMakeLists.txt index 966dac1fe0..826bfa4cad 100755 --- a/3rdparty/CMakeLists.txt +++ b/3rdparty/CMakeLists.txt @@ -282,12 +282,31 @@ target_compile_definitions(spirv_cross PUBLIC SPIRV_CROSS_EXCEPTIONS_TO_ASSERTIO # note that checking if a repository is dirty may cost build time (especially a lot if like us you have a lot of submodules) - by default we run with all checks but if you want to increase build time iterations I recommend to exclude this check option(GIT_EXCLUDE_IS_DIRTY "Exclude IS_DIRTY from git tracking checks, will increase build time iterations at the expense of the meta information loss" OFF) add_subdirectory(git-version-tracking EXCLUDE_FROM_ALL) -NBL_ADD_GIT_TRACKING_META_LIBRARY(nabla "${NBL_ROOT_PATH}") -NBL_ADD_GIT_TRACKING_META_LIBRARY(dxc "${CMAKE_CURRENT_SOURCE_DIR}/dxc/dxc") -nbl_install_file("${CMAKE_CURRENT_BINARY_DIR}/git-version-tracking/nabla_git_info.json") -nbl_install_file("${CMAKE_CURRENT_BINARY_DIR}/git-version-tracking/dxc_git_info.json") - -NBL_GENERATE_GIT_TRACKING_META() +NBL_CONFIGURE_GIT_TRACKING_META_RUNTIME( + TARGET gtml_core + NAMESPACE gtml + IGITINFO_HEADER_PATH nbl/gtml/IGitInfo.h + JSON_FORMATTER_HEADER_PATH nbl/gtml/SJsonFormatter.h +) +NBL_ADD_GIT_TRACKING_META_LIBRARY( + TARGET gtml + NAMESPACE nbl::gtml + HEADER_PATH nbl/git/info.h + REPOS + nabla "${NBL_ROOT_PATH}" + dxc "${CMAKE_CURRENT_SOURCE_DIR}/dxc/dxc" +) +NBL_GET_GIT_TRACKING_META_RUNTIME_OUTPUTS( + IGITINFO_HEADER_OUTPUT_VAR _NBL_GTML_IGITINFO_HEADER + JSON_FORMATTER_HEADER_OUTPUT_VAR _NBL_GTML_JSON_FORMATTER_HEADER +) +NBL_GET_GIT_TRACKING_META_OUTPUTS(TARGET gtml HEADER_OUTPUT_VAR _NBL_GTML_HEADER JSON_OUTPUTS_VAR _NBL_GTML_JSONS) +nbl_install_file_spec("${_NBL_GTML_IGITINFO_HEADER}" "nbl/gtml") +nbl_install_file_spec("${_NBL_GTML_JSON_FORMATTER_HEADER}" "nbl/gtml") +nbl_install_file_spec("${_NBL_GTML_HEADER}" "nbl/git") +foreach(_NBL_GTML_JSON IN LISTS _NBL_GTML_JSONS) + nbl_install_file("${_NBL_GTML_JSON}") +endforeach() # NGFX include(ngfx/ngfx.cmake) @@ -456,6 +475,8 @@ set(NBL_3RDPARTY_TARGETS lz4 aesGladman spirv_cross + gtml_core + gtml png_static zlibstatic shaderc_util @@ -532,8 +553,6 @@ nbl_install_dir(boost/superproject/libs/preprocessor/include/boost) nbl_install_file_spec(renderdoc/renderdoc_app.h renderdoc) -nbl_install_file(${CMAKE_CURRENT_BINARY_DIR}/git-version-tracking/git_info.h) - # parent scope exports, must be at the end of the file set(_NBL_3RDPARTY_TARGETS_ ${NBL_3RDPARTY_TARGETS} diff --git a/3rdparty/git-version-tracking b/3rdparty/git-version-tracking index dc6031ec3b..b0a7450c14 160000 --- a/3rdparty/git-version-tracking +++ b/3rdparty/git-version-tracking @@ -1 +1 @@ -Subproject commit dc6031ec3b6bf5f1421d6452188368e66f813f66 +Subproject commit b0a7450c141e8520c0225370d7408ed9a18e8efb diff --git a/examples_tests b/examples_tests index 348eef714f..87b1305d65 160000 --- a/examples_tests +++ b/examples_tests @@ -1 +1 @@ -Subproject commit 348eef714febe90da0195d89bc364df7a45320d5 +Subproject commit 87b1305d6504dade7d7c901d7ab5f9e71f1fbaa3 diff --git a/include/nabla.h b/include/nabla.h index fa231e3db7..cedf6b0ebf 100644 --- a/include/nabla.h +++ b/include/nabla.h @@ -64,11 +64,11 @@ #include "SColor.h" // meta info -#include "git_info.h" +#include "nbl/git/info.h" namespace nbl { - const NBL_API2 gtml::GitInfo& getGitInfo(gtml::E_GIT_REPO_META repo); + const NBL_API2 ::gtml::IGitInfo& getGitInfo(gtml::E_GIT_REPO_META repo); } -#endif // __NABLA_H_INCLUDED__ \ No newline at end of file +#endif // __NABLA_H_INCLUDED__ diff --git a/include/nbl/logging_macros.h b/include/nbl/logging_macros.h index cf4f63f9bc..97cbdcc0c2 100644 --- a/include/nbl/logging_macros.h +++ b/include/nbl/logging_macros.h @@ -1,8 +1,8 @@ #if defined(NBL_LOG) || defined(NBL_LOG_ERROR) #error redefinition of NBL_LOG/NBL_LOG_ERROR. did you forgot to undefine logging macros somewhere? #include "nbl/undefine_logging_macros.h" -#elif !defined(_GIT_INFO_H_INCLUDED_) - #error logging macros require git meta info, include "git_info.h" +#elif !defined(_NBL_GIT_INFO_H_INCLUDED_) + #error logging macros require git meta info, include "nbl/git/info.h" #else - #define NBL_LOG(SEVERITY, FORMAT, ...) NBL_LOG_FUNCTION(FORMAT" [%s][%s - %s:%d]", SEVERITY __VA_OPT__(,) __VA_ARGS__, nbl::gtml::nabla_git_info.commitShortHash, __FUNCTION__, __FILE__, __LINE__); + #define NBL_LOG(SEVERITY, FORMAT, ...) NBL_LOG_FUNCTION(FORMAT" [%s][%s - %s:%d]", SEVERITY __VA_OPT__(,) __VA_ARGS__, nbl::gtml::nabla_git_info.commitShortHash().data(), __FUNCTION__, __FILE__, __LINE__); #define NBL_LOG_ERROR(FORMAT, ...) NBL_LOG(nbl::system::ILogger::ELL_ERROR, FORMAT __VA_OPT__(,) __VA_ARGS__) -#endif \ No newline at end of file +#endif diff --git a/include/nbl/video/IGPUCommandBuffer.h b/include/nbl/video/IGPUCommandBuffer.h index 8f0f1fce30..b863de3030 100644 --- a/include/nbl/video/IGPUCommandBuffer.h +++ b/include/nbl/video/IGPUCommandBuffer.h @@ -10,7 +10,7 @@ #include "nbl/video/IGPUCommandPool.h" #include "nbl/video/IQueue.h" -#include "git_info.h" +#include "nbl/git/info.h" #define NBL_LOG_FUNCTION m_logger.log #include "nbl/logging_macros.h" diff --git a/include/nbl/video/ILogicalDevice.h b/include/nbl/video/ILogicalDevice.h index ae351fdecd..983f6c6b5a 100644 --- a/include/nbl/video/ILogicalDevice.h +++ b/include/nbl/video/ILogicalDevice.h @@ -15,7 +15,7 @@ #include "nbl/video/CThreadSafeQueueAdapter.h" #include "nbl/video/CJITIncludeLoader.h" -#include "git_info.h" +#include "nbl/git/info.h" #define NBL_LOG_FUNCTION m_logger.log #include "nbl/logging_macros.h" @@ -1606,4 +1606,4 @@ inline bool ILogicalDevice::validateMemoryBarrier(const uint32_t queueFamilyInde } // namespace nbl::video #include "nbl/undef_logging_macros.h" -#endif //_NBL_VIDEO_I_LOGICAL_DEVICE_H_INCLUDED_ \ No newline at end of file +#endif //_NBL_VIDEO_I_LOGICAL_DEVICE_H_INCLUDED_ diff --git a/src/nbl/gtml.cpp b/src/nbl/gtml.cpp index 2829c03c07..f1f9b1d0fe 100644 --- a/src/nbl/gtml.cpp +++ b/src/nbl/gtml.cpp @@ -1,7 +1,7 @@ -#include "git_info.h" +#include "nbl/git/info.h" namespace nbl { - const gtml::GitInfo& getGitInfo(gtml::E_GIT_REPO_META repo) { - return gtml::gitMeta[repo]; + const ::gtml::IGitInfo& getGitInfo(gtml::E_GIT_REPO_META repo) { + return *gtml::gitMeta[repo]; } -} \ No newline at end of file +} diff --git a/src/nbl/video/ILogicalDevice.cpp b/src/nbl/video/ILogicalDevice.cpp index a98deff5c7..c24f7c1950 100644 --- a/src/nbl/video/ILogicalDevice.cpp +++ b/src/nbl/video/ILogicalDevice.cpp @@ -1,6 +1,6 @@ #include "nbl/video/IPhysicalDevice.h" -#include "git_info.h" +#include "nbl/git/info.h" #define NBL_LOG_FUNCTION m_logger.log #include "nbl/logging_macros.h" @@ -1147,4 +1147,4 @@ bool ILogicalDevice::createRayTracingPipelines(IGPUPipelineCache* const pipeline } return retval; } -#include "nbl/undef_logging_macros.h" \ No newline at end of file +#include "nbl/undef_logging_macros.h" diff --git a/src/nbl/video/IQueue.cpp b/src/nbl/video/IQueue.cpp index 70acecffca..4e24a0e7e9 100644 --- a/src/nbl/video/IQueue.cpp +++ b/src/nbl/video/IQueue.cpp @@ -3,7 +3,7 @@ #include "nbl/video/ILogicalDevice.h" #include "nbl/video/TimelineEventHandlers.h" -#include "git_info.h" +#include "nbl/git/info.h" #define NBL_LOG_FUNCTION logger->log #include "nbl/logging_macros.h" @@ -245,4 +245,4 @@ void IQueue::DeferredSubmitCallback::operator()() } // namespace nbl::video -#include "nbl/undef_logging_macros.h" \ No newline at end of file +#include "nbl/undef_logging_macros.h" diff --git a/tools/nsc/main.cpp b/tools/nsc/main.cpp index 203aa6ce8c..9745a17299 100644 --- a/tools/nsc/main.cpp +++ b/tools/nsc/main.cpp @@ -1,4 +1,5 @@ #include "nabla.h" +#include "nbl/gtml/SJsonFormatter.h" #include "nbl/system/IApplicationFramework.h" #include #include @@ -418,27 +419,8 @@ class ShaderCompiler final : public IApplicationFramework { ::json j; auto& modules = j["modules"]; - - auto serialize = [&](const gtml::GitInfo& info, std::string_view target) - { - auto& s = modules[target.data()]; - s["isPopulated"] = info.isPopulated; - s["hasUncommittedChanges"] = info.hasUncommittedChanges.has_value() ? ::json(info.hasUncommittedChanges.value()) : ::json("UNKNOWN, BUILT WITHOUT DIRTY-CHANGES CAPTURE"); - s["commitAuthorName"] = info.commitAuthorName; - s["commitAuthorEmail"] = info.commitAuthorEmail; - s["commitHash"] = info.commitHash; - s["commitShortHash"] = info.commitShortHash; - s["commitDate"] = info.commitDate; - s["commitSubject"] = info.commitSubject; - s["commitBody"] = info.commitBody; - s["describe"] = info.describe; - s["branchName"] = info.branchName; - s["latestTag"] = info.latestTag; - s["latestTagName"] = info.latestTagName; - }; - - serialize(gtml::nabla_git_info, "nabla"); - serialize(gtml::dxc_git_info, "dxc"); + modules["nabla"] = ::json::parse(::gtml::SJsonFormatter::toString(nbl::gtml::nabla_git_info)); + modules["dxc"] = ::json::parse(::gtml::SJsonFormatter::toString(nbl::gtml::dxc_git_info)); const auto pretty = j.dump(4); std::cout << pretty << std::endl; From f2a9685d7a6ce58f54f5587d74073ab9191ccf96 Mon Sep 17 00:00:00 2001 From: Arkadiusz Lachowicz Date: Wed, 11 Mar 2026 18:24:12 +0100 Subject: [PATCH 116/118] Enable long paths for perf reference updates --- examples_tests | 2 +- include/nbl/system/ISystem.h | 2 +- src/nbl/system/CSystemWin32.cpp | 29 ++++++++++++++++++++++++++--- src/nbl/system/ISystem.cpp | 13 +------------ 4 files changed, 29 insertions(+), 17 deletions(-) diff --git a/examples_tests b/examples_tests index 87b1305d65..a41b88c0d1 160000 --- a/examples_tests +++ b/examples_tests @@ -1 +1 @@ -Subproject commit 87b1305d6504dade7d7c901d7ab5f9e71f1fbaa3 +Subproject commit a41b88c0d14536c299680290250880770156ffd0 diff --git a/include/nbl/system/ISystem.h b/include/nbl/system/ISystem.h index 658604febd..9ee5f0bb83 100644 --- a/include/nbl/system/ISystem.h +++ b/include/nbl/system/ISystem.h @@ -224,7 +224,7 @@ class NBL_API2 ISystem : public core::IReferenceCounted using retval_t = core::smart_refctd_ptr; void operator()(core::StorageTrivializer* retval, ICaller* _caller); - char filename[MAX_FILENAME_LENGTH] {}; + std::filesystem::path filename; IFileBase::E_CREATE_FLAGS flags; }; struct SRequestParams_READ diff --git a/src/nbl/system/CSystemWin32.cpp b/src/nbl/system/CSystemWin32.cpp index 603d92b4fb..c9d26b33af 100644 --- a/src/nbl/system/CSystemWin32.cpp +++ b/src/nbl/system/CSystemWin32.cpp @@ -15,6 +15,30 @@ using namespace nbl::system; namespace { +std::wstring makeLongPathAwareWindowsPath(std::filesystem::path path) +{ + path = path.lexically_normal(); + if (!path.is_absolute()) + { + std::error_code ec; + auto absolutePath = std::filesystem::absolute(path, ec); + if (!ec) + path = absolutePath.lexically_normal(); + } + path.make_preferred(); + + std::wstring native = path.native(); + constexpr std::wstring_view ExtendedPrefix = LR"(\\?\)"; + constexpr std::wstring_view UncPrefix = LR"(\\)"; + constexpr std::wstring_view ExtendedUncPrefix = LR"(\\?\UNC\)"; + + if (native.rfind(ExtendedPrefix.data(), 0u) == 0u) + return native; + if (native.rfind(UncPrefix.data(), 0u) == 0u) + return std::wstring(ExtendedUncPrefix) + native.substr(2u); + return std::wstring(ExtendedPrefix) + native; +} + std::string queryCpuName() { int cpuInfo[4] = {}; @@ -113,12 +137,11 @@ core::smart_refctd_ptr CSystemWin32::CCaller::createFile(const std: SECURITY_ATTRIBUTES secAttribs{ sizeof(SECURITY_ATTRIBUTES), nullptr, FALSE }; system::path p = filename; - if (p.is_absolute()) - p.make_preferred(); // Replace "/" separators with "\" + const auto nativePath = makeLongPathAwareWindowsPath(p); // only write access should create new files if they don't exist const auto creationDisposition = writeAccess ? OPEN_ALWAYS : OPEN_EXISTING; - HANDLE _native = CreateFileA(p.string().data(), fileAccess, shareMode, &secAttribs, creationDisposition, FILE_ATTRIBUTE_NORMAL, nullptr); + HANDLE _native = CreateFileW(nativePath.c_str(), fileAccess, shareMode, &secAttribs, creationDisposition, FILE_ATTRIBUTE_NORMAL, nullptr); if (_native==INVALID_HANDLE_VALUE) { auto e = GetLastError(); diff --git a/src/nbl/system/ISystem.cpp b/src/nbl/system/ISystem.cpp index 6d103035dc..ebdb63ac87 100644 --- a/src/nbl/system/ISystem.cpp +++ b/src/nbl/system/ISystem.cpp @@ -37,10 +37,6 @@ ISystem::ISystem(core::smart_refctd_ptr&& caller) : m_dispatch bool ISystem::exists(const system::path& filename, const core::bitflag flags) const { const bool writeUsage = flags.value&IFile::ECF_WRITE; - - // filename too long - if (filename.string().size() >= sizeof(SRequestParams_CREATE_FILE::filename)) - return false; // regular file std::error_code fsEc; if (std::filesystem::exists(filename, fsEc) && !fsEc) @@ -234,15 +230,8 @@ void ISystem::createFile(future_t>& future, std::f } // - if (filename.string().size()>=MAX_FILENAME_LENGTH) - { - future.set_result(nullptr); - return; - } - - SRequestParams_CREATE_FILE params; - strcpy(params.filename,filename.string().c_str()); + params.filename = std::move(filename); params.flags = flags.value; m_dispatcher.request(&future,params); } From 3e5ac16366792c77ea83a217fbb0aee3b26da1e7 Mon Sep 17 00:00:00 2001 From: Arkadiusz Lachowicz Date: Wed, 11 Mar 2026 20:05:20 +0100 Subject: [PATCH 117/118] Support long path perf reference updates --- examples_tests | 2 +- src/nbl/system/ISystem.cpp | 46 ++++++++++++++++++++++++++++++++++++++ 2 files changed, 47 insertions(+), 1 deletion(-) diff --git a/examples_tests b/examples_tests index a41b88c0d1..187aebeaf3 160000 --- a/examples_tests +++ b/examples_tests @@ -1 +1 @@ -Subproject commit a41b88c0d14536c299680290250880770156ffd0 +Subproject commit 187aebeaf3287ba5ba3a6e872bd682af988d8e85 diff --git a/src/nbl/system/ISystem.cpp b/src/nbl/system/ISystem.cpp index ebdb63ac87..4c11de4266 100644 --- a/src/nbl/system/ISystem.cpp +++ b/src/nbl/system/ISystem.cpp @@ -12,9 +12,47 @@ #include "nbl/system/CArchiveLoaderTar.h" #include "nbl/system/CMountDirectoryArchive.h" +#ifdef _NBL_PLATFORM_WINDOWS_ +#ifndef WIN32_LEAN_AND_MEAN +#define WIN32_LEAN_AND_MEAN +#endif +#include +#endif + using namespace nbl; using namespace nbl::system; +namespace +{ + +#ifdef _NBL_PLATFORM_WINDOWS_ +std::wstring makeLongPathAwareWindowsPath(std::filesystem::path path) +{ + path = path.lexically_normal(); + if (!path.is_absolute()) + { + std::error_code ec; + const auto absolutePath = std::filesystem::absolute(path, ec); + if (!ec) + path = absolutePath.lexically_normal(); + } + path.make_preferred(); + + std::wstring native = path.native(); + constexpr std::wstring_view ExtendedPrefix = LR"(\\?\)"; + constexpr std::wstring_view UncPrefix = LR"(\\)"; + constexpr std::wstring_view ExtendedUncPrefix = LR"(\\?\UNC\)"; + + if (native.rfind(ExtendedPrefix.data(), 0u) == 0u) + return native; + if (native.rfind(UncPrefix.data(), 0u) == 0u) + return std::wstring(ExtendedUncPrefix) + native.substr(2u); + return std::wstring(ExtendedPrefix) + native; +} +#endif + +} + ISystem::ISystem(core::smart_refctd_ptr&& caller) : m_dispatcher(std::move(caller)) { addArchiveLoader(core::make_smart_refctd_ptr(nullptr)); @@ -119,10 +157,18 @@ bool ISystem::deleteDirectory(const system::path& p) bool nbl::system::ISystem::deleteFile(const system::path& p) { +#ifdef _NBL_PLATFORM_WINDOWS_ + const auto nativePath = makeLongPathAwareWindowsPath(std::filesystem::path(p.string())); + const DWORD attributes = GetFileAttributesW(nativePath.c_str()); + if (attributes == INVALID_FILE_ATTRIBUTES || (attributes & FILE_ATTRIBUTE_DIRECTORY)) + return false; + return DeleteFileW(nativePath.c_str()); +#else if (std::filesystem::exists(p) && !std::filesystem::is_directory(p)) return std::filesystem::remove(p); else return false; +#endif } std::error_code ISystem::moveFileOrDirectory(const system::path& oldPath, const system::path& newPath) From 3a80686d45c5e9dc518635a6906c8d76fc60df5e Mon Sep 17 00:00:00 2001 From: Arkadiusz Lachowicz Date: Wed, 11 Mar 2026 20:12:29 +0100 Subject: [PATCH 118/118] Deduplicate Win32 long path helper --- src/nbl/system/CSystemWin32.cpp | 28 ++---------------- src/nbl/system/ISystem.cpp | 34 ++-------------------- src/nbl/system/SWin32PathUtilities.h | 43 ++++++++++++++++++++++++++++ 3 files changed, 47 insertions(+), 58 deletions(-) create mode 100644 src/nbl/system/SWin32PathUtilities.h diff --git a/src/nbl/system/CSystemWin32.cpp b/src/nbl/system/CSystemWin32.cpp index c9d26b33af..49aaf2e3ac 100644 --- a/src/nbl/system/CSystemWin32.cpp +++ b/src/nbl/system/CSystemWin32.cpp @@ -1,5 +1,6 @@ #include "nbl/system/CSystemWin32.h" #include "nbl/system/CFileWin32.h" +#include "nbl/system/SWin32PathUtilities.h" using namespace nbl; using namespace nbl::system; @@ -14,31 +15,6 @@ using namespace nbl::system; namespace { - -std::wstring makeLongPathAwareWindowsPath(std::filesystem::path path) -{ - path = path.lexically_normal(); - if (!path.is_absolute()) - { - std::error_code ec; - auto absolutePath = std::filesystem::absolute(path, ec); - if (!ec) - path = absolutePath.lexically_normal(); - } - path.make_preferred(); - - std::wstring native = path.native(); - constexpr std::wstring_view ExtendedPrefix = LR"(\\?\)"; - constexpr std::wstring_view UncPrefix = LR"(\\)"; - constexpr std::wstring_view ExtendedUncPrefix = LR"(\\?\UNC\)"; - - if (native.rfind(ExtendedPrefix.data(), 0u) == 0u) - return native; - if (native.rfind(UncPrefix.data(), 0u) == 0u) - return std::wstring(ExtendedUncPrefix) + native.substr(2u); - return std::wstring(ExtendedPrefix) + native; -} - std::string queryCpuName() { int cpuInfo[4] = {}; @@ -137,7 +113,7 @@ core::smart_refctd_ptr CSystemWin32::CCaller::createFile(const std: SECURITY_ATTRIBUTES secAttribs{ sizeof(SECURITY_ATTRIBUTES), nullptr, FALSE }; system::path p = filename; - const auto nativePath = makeLongPathAwareWindowsPath(p); + const auto nativePath = impl::makeLongPathAwareWindowsPath(p); // only write access should create new files if they don't exist const auto creationDisposition = writeAccess ? OPEN_ALWAYS : OPEN_EXISTING; diff --git a/src/nbl/system/ISystem.cpp b/src/nbl/system/ISystem.cpp index 4c11de4266..f1b3dec85e 100644 --- a/src/nbl/system/ISystem.cpp +++ b/src/nbl/system/ISystem.cpp @@ -11,6 +11,7 @@ #include "nbl/system/CArchiveLoaderZip.h" #include "nbl/system/CArchiveLoaderTar.h" #include "nbl/system/CMountDirectoryArchive.h" +#include "nbl/system/SWin32PathUtilities.h" #ifdef _NBL_PLATFORM_WINDOWS_ #ifndef WIN32_LEAN_AND_MEAN @@ -22,37 +23,6 @@ using namespace nbl; using namespace nbl::system; -namespace -{ - -#ifdef _NBL_PLATFORM_WINDOWS_ -std::wstring makeLongPathAwareWindowsPath(std::filesystem::path path) -{ - path = path.lexically_normal(); - if (!path.is_absolute()) - { - std::error_code ec; - const auto absolutePath = std::filesystem::absolute(path, ec); - if (!ec) - path = absolutePath.lexically_normal(); - } - path.make_preferred(); - - std::wstring native = path.native(); - constexpr std::wstring_view ExtendedPrefix = LR"(\\?\)"; - constexpr std::wstring_view UncPrefix = LR"(\\)"; - constexpr std::wstring_view ExtendedUncPrefix = LR"(\\?\UNC\)"; - - if (native.rfind(ExtendedPrefix.data(), 0u) == 0u) - return native; - if (native.rfind(UncPrefix.data(), 0u) == 0u) - return std::wstring(ExtendedUncPrefix) + native.substr(2u); - return std::wstring(ExtendedPrefix) + native; -} -#endif - -} - ISystem::ISystem(core::smart_refctd_ptr&& caller) : m_dispatcher(std::move(caller)) { addArchiveLoader(core::make_smart_refctd_ptr(nullptr)); @@ -158,7 +128,7 @@ bool ISystem::deleteDirectory(const system::path& p) bool nbl::system::ISystem::deleteFile(const system::path& p) { #ifdef _NBL_PLATFORM_WINDOWS_ - const auto nativePath = makeLongPathAwareWindowsPath(std::filesystem::path(p.string())); + const auto nativePath = impl::makeLongPathAwareWindowsPath(std::filesystem::path(p.string())); const DWORD attributes = GetFileAttributesW(nativePath.c_str()); if (attributes == INVALID_FILE_ATTRIBUTES || (attributes & FILE_ATTRIBUTE_DIRECTORY)) return false; diff --git a/src/nbl/system/SWin32PathUtilities.h b/src/nbl/system/SWin32PathUtilities.h new file mode 100644 index 0000000000..0f2ae33f24 --- /dev/null +++ b/src/nbl/system/SWin32PathUtilities.h @@ -0,0 +1,43 @@ +// Internal src-only header. Do not include from public headers. +#ifndef _NBL_SYSTEM_S_WIN32_PATH_UTILITIES_H_INCLUDED_ +#define _NBL_SYSTEM_S_WIN32_PATH_UTILITIES_H_INCLUDED_ + +#ifdef _NBL_PLATFORM_WINDOWS_ + +#include +#include +#include +#include + +namespace nbl::system::impl +{ + +inline std::wstring makeLongPathAwareWindowsPath(std::filesystem::path path) +{ + path = path.lexically_normal(); + if (!path.is_absolute()) + { + std::error_code ec; + const auto absolutePath = std::filesystem::absolute(path, ec); + if (!ec) + path = absolutePath.lexically_normal(); + } + path.make_preferred(); + + std::wstring native = path.native(); + constexpr std::wstring_view ExtendedPrefix = LR"(\\?\)"; + constexpr std::wstring_view UncPrefix = LR"(\\)"; + constexpr std::wstring_view ExtendedUncPrefix = LR"(\\?\UNC\)"; + + if (native.rfind(ExtendedPrefix.data(), 0u) == 0u) + return native; + if (native.rfind(UncPrefix.data(), 0u) == 0u) + return std::wstring(ExtendedUncPrefix) + native.substr(2u); + return std::wstring(ExtendedPrefix) + native; +} + +} + +#endif + +#endif