diff --git a/.gitignore b/.gitignore index c7bbb2808e..48ce7cc770 100644 --- a/.gitignore +++ b/.gitignore @@ -37,4 +37,4 @@ tools/nsc/bin/* */__pycache__/* __pycache__/* *.pyc - +tmp/* diff --git a/.gitmodules b/.gitmodules index 8a04f82d9d..8c03de482d 100644 --- a/.gitmodules +++ b/.gitmodules @@ -129,3 +129,6 @@ [submodule "3rdparty/Vulkan-Tools"] path = 3rdparty/Vulkan-Tools url = git@github.com:Devsh-Graphics-Programming/Vulkan-Tools.git +[submodule "3rdparty/fast_float"] + path = 3rdparty/fast_float + url = ../fast_float.git diff --git a/3rdparty/CMakeLists.txt b/3rdparty/CMakeLists.txt index 68e821dfdf..826bfa4cad 100755 --- a/3rdparty/CMakeLists.txt +++ b/3rdparty/CMakeLists.txt @@ -282,12 +282,31 @@ target_compile_definitions(spirv_cross PUBLIC SPIRV_CROSS_EXCEPTIONS_TO_ASSERTIO # note that checking if a repository is dirty may cost build time (especially a lot if like us you have a lot of submodules) - by default we run with all checks but if you want to increase build time iterations I recommend to exclude this check option(GIT_EXCLUDE_IS_DIRTY "Exclude IS_DIRTY from git tracking checks, will increase build time iterations at the expense of the meta information loss" OFF) add_subdirectory(git-version-tracking EXCLUDE_FROM_ALL) -NBL_ADD_GIT_TRACKING_META_LIBRARY(nabla "${NBL_ROOT_PATH}") -NBL_ADD_GIT_TRACKING_META_LIBRARY(dxc "${CMAKE_CURRENT_SOURCE_DIR}/dxc/dxc") -nbl_install_file("${CMAKE_CURRENT_BINARY_DIR}/git-version-tracking/nabla_git_info.json") -nbl_install_file("${CMAKE_CURRENT_BINARY_DIR}/git-version-tracking/dxc_git_info.json") - -NBL_GENERATE_GIT_TRACKING_META() +NBL_CONFIGURE_GIT_TRACKING_META_RUNTIME( + TARGET gtml_core + NAMESPACE gtml + IGITINFO_HEADER_PATH nbl/gtml/IGitInfo.h + JSON_FORMATTER_HEADER_PATH nbl/gtml/SJsonFormatter.h +) +NBL_ADD_GIT_TRACKING_META_LIBRARY( + TARGET gtml + NAMESPACE nbl::gtml + HEADER_PATH nbl/git/info.h + REPOS + nabla "${NBL_ROOT_PATH}" + dxc "${CMAKE_CURRENT_SOURCE_DIR}/dxc/dxc" +) +NBL_GET_GIT_TRACKING_META_RUNTIME_OUTPUTS( + IGITINFO_HEADER_OUTPUT_VAR _NBL_GTML_IGITINFO_HEADER + JSON_FORMATTER_HEADER_OUTPUT_VAR _NBL_GTML_JSON_FORMATTER_HEADER +) +NBL_GET_GIT_TRACKING_META_OUTPUTS(TARGET gtml HEADER_OUTPUT_VAR _NBL_GTML_HEADER JSON_OUTPUTS_VAR _NBL_GTML_JSONS) +nbl_install_file_spec("${_NBL_GTML_IGITINFO_HEADER}" "nbl/gtml") +nbl_install_file_spec("${_NBL_GTML_JSON_FORMATTER_HEADER}" "nbl/gtml") +nbl_install_file_spec("${_NBL_GTML_HEADER}" "nbl/git") +foreach(_NBL_GTML_JSON IN LISTS _NBL_GTML_JSONS) + nbl_install_file("${_NBL_GTML_JSON}") +endforeach() # NGFX include(ngfx/ngfx.cmake) @@ -456,6 +475,8 @@ set(NBL_3RDPARTY_TARGETS lz4 aesGladman spirv_cross + gtml_core + gtml png_static zlibstatic shaderc_util @@ -528,14 +549,10 @@ nbl_install_dir(glm/glm) nbl_install_file_spec(${CMAKE_CURRENT_BINARY_DIR}/imath/config/ImathConfig.h imath) nbl_install_dir(imath/src/Imath) -nbl_install_file(blake/c/blake3.h) - nbl_install_dir(boost/superproject/libs/preprocessor/include/boost) nbl_install_file_spec(renderdoc/renderdoc_app.h renderdoc) -nbl_install_file(${CMAKE_CURRENT_BINARY_DIR}/git-version-tracking/git_info.h) - # parent scope exports, must be at the end of the file set(_NBL_3RDPARTY_TARGETS_ ${NBL_3RDPARTY_TARGETS} diff --git a/3rdparty/fast_float b/3rdparty/fast_float new file mode 160000 index 0000000000..221a4920db --- /dev/null +++ b/3rdparty/fast_float @@ -0,0 +1 @@ +Subproject commit 221a4920db7d68d33ab9794af602daef19667351 diff --git a/3rdparty/git-version-tracking b/3rdparty/git-version-tracking index 6c3ecac5f0..b0a7450c14 160000 --- a/3rdparty/git-version-tracking +++ b/3rdparty/git-version-tracking @@ -1 +1 @@ -Subproject commit 6c3ecac5f0297877d1573ef4e3cdb537c5feeb62 +Subproject commit b0a7450c141e8520c0225370d7408ed9a18e8efb diff --git a/examples_tests b/examples_tests index 77f4b77500..187aebeaf3 160000 --- a/examples_tests +++ b/examples_tests @@ -1 +1 @@ -Subproject commit 77f4b775008a50cda066af5d611e6147a886f52e +Subproject commit 187aebeaf3287ba5ba3a6e872bd682af988d8e85 diff --git a/include/nabla.h b/include/nabla.h index fa231e3db7..cedf6b0ebf 100644 --- a/include/nabla.h +++ b/include/nabla.h @@ -64,11 +64,11 @@ #include "SColor.h" // meta info -#include "git_info.h" +#include "nbl/git/info.h" namespace nbl { - const NBL_API2 gtml::GitInfo& getGitInfo(gtml::E_GIT_REPO_META repo); + const NBL_API2 ::gtml::IGitInfo& getGitInfo(gtml::E_GIT_REPO_META repo); } -#endif // __NABLA_H_INCLUDED__ \ No newline at end of file +#endif // __NABLA_H_INCLUDED__ diff --git a/include/nbl/application_templates/MonoDeviceApplication.hpp b/include/nbl/application_templates/MonoDeviceApplication.hpp index c7a94fe332..a3399ac8f0 100644 --- a/include/nbl/application_templates/MonoDeviceApplication.hpp +++ b/include/nbl/application_templates/MonoDeviceApplication.hpp @@ -1,4 +1,4 @@ -// Copyright (C) 2023-2023 - DevSH Graphics Programming Sp. z O.O. +// Copyright (C) 2018-2025 - DevSH Graphics Programming Sp. z O.O. // This file is part of the "Nabla Engine". // For conditions of distribution and use, see copyright notice in nabla.h #ifndef _NBL_APPLICATION_TEMPLATES_MONO_DEVICE_APPLICATION_HPP_INCLUDED_ @@ -280,4 +280,4 @@ class MonoDeviceApplication : public virtual MonoSystemMonoLoggerApplication }; } -#endif \ No newline at end of file +#endif diff --git a/include/nbl/asset/IAssetManager.h b/include/nbl/asset/IAssetManager.h index 45b32b7c61..557aff64dc 100644 --- a/include/nbl/asset/IAssetManager.h +++ b/include/nbl/asset/IAssetManager.h @@ -5,6 +5,7 @@ #define _NBL_ASSET_I_ASSET_MANAGER_H_INCLUDED_ #include +#include #include #include "nbl/core/declarations.h" @@ -51,6 +52,12 @@ class NBL_API2 IAssetManager : public core::IReferenceCounted friend std::function makeAssetDisposeFunc(const IAssetManager* const _mgr); public: + struct SWriterFlagInfo + { + writer_flags_t supported = EWF_NONE; + writer_flags_t forced = EWF_NONE; + }; + #ifdef USE_MAPS_FOR_PATH_BASED_CACHE using AssetCacheType = core::CConcurrentMultiObjectCache; #else @@ -180,19 +187,31 @@ class NBL_API2 IAssetManager : public core::IReferenceCounted SAssetBundle getAssetInHierarchy_impl(const std::string& _filePath, const IAssetLoader::SAssetLoadParams& _params, uint32_t _hierarchyLevel, IAssetLoader::IAssetLoaderOverride* _override) { IAssetLoader::SAssetLoadContext ctx(_params, nullptr); + system::ISystem::future_t> future; + const auto tryLoadAssetFromPath = [&](const system::path& path)->SAssetBundle + { + m_system->createFile(future, path, static_cast(system::IFile::ECF_READ | system::IFile::ECF_MAPPABLE)); + if (auto file=future.acquire()) + return getAssetInHierarchy_impl(file->get(), path.string(), ctx.params, _hierarchyLevel, _override); + m_system->createFile(future, path, system::IFile::ECF_READ); + if (auto file=future.acquire()) + return getAssetInHierarchy_impl(file->get(), path.string(), ctx.params, _hierarchyLevel, _override); + return SAssetBundle(0); + }; system::path filePath = _filePath; _override->getLoadFilename(filePath, m_system.get(), ctx, _hierarchyLevel); - if (!m_system->exists(filePath,system::IFile::ECF_READ)) + if (auto bundle=tryLoadAssetFromPath(filePath); !bundle.getContents().empty()) + return bundle; + + auto fallbackPath = _params.workingDirectory / filePath; + if (fallbackPath != filePath) { - filePath = _params.workingDirectory/filePath; + filePath = std::move(fallbackPath); _override->getLoadFilename(filePath, m_system.get(), ctx, _hierarchyLevel); + if (auto bundle=tryLoadAssetFromPath(filePath); !bundle.getContents().empty()) + return bundle; } - - system::ISystem::future_t> future; - m_system->createFile(future, filePath, system::IFile::ECF_READ); - if (auto file=future.acquire()) - return getAssetInHierarchy_impl(file->get(), filePath.string(), ctx.params, _hierarchyLevel, _override); return SAssetBundle(0); } @@ -350,8 +369,12 @@ class NBL_API2 IAssetManager : public core::IReferenceCounted if (!_override) _override = &defOverride; + system::path filename = _filename; + if (filename.is_relative() && !_params.workingDirectory.empty()) + filename = _params.workingDirectory / filename; + system::ISystem::future_t> future; - m_system->createFile(future, (_params.workingDirectory.generic_string()+_filename).c_str(), system::IFile::ECF_WRITE); + m_system->createFile(future, std::move(filename), system::IFile::ECF_WRITE); if (auto file=future.acquire()) return writeAsset(file->get(), _params, _override); return false; @@ -381,6 +404,18 @@ class NBL_API2 IAssetManager : public core::IReferenceCounted return writeAsset(_file, _params, nullptr); } + inline std::optional getAssetWriterFlagInfo(const IAsset::E_TYPE assetType, const std::string_view extension) const + { + const auto capableWritersRng = m_writers.perTypeAndFileExt.findRange({assetType, std::string(extension)}); + if (capableWritersRng.empty()) + return std::nullopt; + auto* const writer = capableWritersRng.begin()->second; + return SWriterFlagInfo{ + .supported = writer->getSupportedFlags(), + .forced = writer->getForcedFlags() + }; + } + // Asset Loaders [FOLLOWING ARE NOT THREAD SAFE] uint32_t getAssetLoaderCount() { return static_cast(m_loaders.vector.size()); } diff --git a/include/nbl/asset/ICPUBuffer.h b/include/nbl/asset/ICPUBuffer.h index 26f45d4ced..5cd03363ef 100644 --- a/include/nbl/asset/ICPUBuffer.h +++ b/include/nbl/asset/ICPUBuffer.h @@ -1,4 +1,4 @@ -// Copyright (C) 2018-2020 - DevSH Graphics Programming Sp. z O.O. +// Copyright (C) 2018-2025 - DevSH Graphics Programming Sp. z O.O. // This file is part of the "Nabla Engine". // For conditions of distribution and use, see copyright notice in nabla.h #ifndef _NBL_ASSET_I_CPU_BUFFER_H_INCLUDED_ @@ -76,13 +76,12 @@ class ICPUBuffer final : public asset::IBuffer, public IPreHashed constexpr static inline auto AssetType = ET_BUFFER; inline IAsset::E_TYPE getAssetType() const override final { return AssetType; } - inline core::blake3_hash_t computeContentHash() const override - { - core::blake3_hasher hasher; - if (m_data) - hasher.update(m_data, m_creationParams.size); - return static_cast(hasher); - } + inline core::blake3_hash_t computeContentHash() const override + { + if (!m_data) + return static_cast(core::blake3_hasher{}); + return core::blake3_hash_buffer(m_data, m_creationParams.size); + } inline bool missingContent() const override { return !m_data; } @@ -149,4 +148,4 @@ class ICPUBuffer final : public asset::IBuffer, public IPreHashed } // end namespace nbl::asset -#endif \ No newline at end of file +#endif diff --git a/include/nbl/asset/ICPUPolygonGeometry.h b/include/nbl/asset/ICPUPolygonGeometry.h index 2fb640e02b..e877499443 100644 --- a/include/nbl/asset/ICPUPolygonGeometry.h +++ b/include/nbl/asset/ICPUPolygonGeometry.h @@ -8,6 +8,7 @@ #include "nbl/asset/IAsset.h" #include "nbl/asset/ICPUBuffer.h" #include "nbl/asset/IPolygonGeometry.h" +#include "nbl/builtin/hlsl/shapes/AABBAccumulator.hlsl" namespace nbl::asset @@ -112,7 +113,20 @@ class NBL_API2 ICPUPolygonGeometry final : public IPolygonGeometry return false; } template - inline bool setAABB(const hlsl::shapes::AABB<3,Scalar>& aabb) {return visitAABB([&aabb](auto&& ref)->void{ref=aabb;});} + inline bool setAABB(const hlsl::shapes::AABB<3,Scalar>& aabb) + { + bool assigned = false; + const bool visited = visitAABB([&aabb, &assigned](auto&& ref)->void + { + assigned = hlsl::shapes::util::assignAABB(ref, aabb); + }); + return visited && assigned; + } + template + inline bool applyAABB(const hlsl::shapes::AABB<3, Scalar>& aabb) + { + return setAABB(aabb); + } // inline bool setJointCount(const uint32_t count) @@ -194,4 +208,4 @@ class NBL_API2 ICPUPolygonGeometry final : public IPolygonGeometry }; } -#endif \ No newline at end of file +#endif diff --git a/include/nbl/asset/SBufferAdoption.h b/include/nbl/asset/SBufferAdoption.h new file mode 100644 index 0000000000..d31e5ff95b --- /dev/null +++ b/include/nbl/asset/SBufferAdoption.h @@ -0,0 +1,49 @@ +// Copyright (C) 2018-2025 - DevSH Graphics Programming Sp. z O.O. +// This file is part of the "Nabla Engine". +// For conditions of distribution and use, see copyright notice in nabla.h +#ifndef _NBL_ASSET_S_BUFFER_ADOPTION_H_INCLUDED_ +#define _NBL_ASSET_S_BUFFER_ADOPTION_H_INCLUDED_ +#include +#include +#include +#include +#include "nbl/asset/ICPUBuffer.h" +namespace nbl::asset +{ +namespace impl +{ +// Owns contiguous storage that can be adopted by a CPU buffer. Views like std::span are rejected. +template +concept AdoptedBufferStorage = + std::ranges::contiguous_range> && + std::ranges::sized_range> && + (!std::ranges::view>) && + requires(std::remove_reference_t& storage) + { + typename std::ranges::range_value_t>; + { std::ranges::data(storage) } -> std::same_as>*>; + }; +} +// Generic CPU-buffer adoption helper for owning contiguous storage such as std::vector or core::vector. +class SBufferAdoption +{ + public: + template + static inline core::smart_refctd_ptr create(Storage&& data) + { + using storage_t = std::remove_cvref_t; + using value_t = std::ranges::range_value_t; + + if (std::ranges::empty(data)) + return nullptr; + + auto backer = core::make_smart_refctd_ptr>(std::forward(data)); + auto& storage = backer->getBacker(); + const size_t byteCount = std::ranges::size(storage) * sizeof(value_t); + return ICPUBuffer::create( + { { byteCount }, std::ranges::data(storage), core::smart_refctd_ptr(std::move(backer)), alignof(value_t) }, + core::adopt_memory); + } +}; +} +#endif diff --git a/include/nbl/asset/format/EFormat.h b/include/nbl/asset/format/EFormat.h index 62ce71555e..7daf5ae45c 100644 --- a/include/nbl/asset/format/EFormat.h +++ b/include/nbl/asset/format/EFormat.h @@ -5,6 +5,7 @@ #ifndef __NBL_ASSET_E_FORMAT_H_INCLUDED__ #define __NBL_ASSET_E_FORMAT_H_INCLUDED__ +#include #include #include #include "BuildConfigOptions.h" @@ -574,6 +575,64 @@ constexpr uint32_t getFormatChannelCount() { #include "nbl/asset/format/impl/EFormat_getFormatChannelCount.h" } +namespace impl +{ +struct SStructuredFormatVariants +{ + E_FORMAT base; + std::array variants; +}; +static inline constexpr uint32_t StructuredFormatChannelVariantCount = 4u; +static inline constexpr auto StructuredFormatVariants = std::to_array({ + {EF_R8_SINT, {EF_R8_SINT, EF_R8G8_SINT, EF_R8G8B8_SINT, EF_R8G8B8A8_SINT}}, + {EF_R8_UINT, {EF_R8_UINT, EF_R8G8_UINT, EF_R8G8B8_UINT, EF_R8G8B8A8_UINT}}, + {EF_R16_SINT, {EF_R16_SINT, EF_R16G16_SINT, EF_R16G16B16_SINT, EF_R16G16B16A16_SINT}}, + {EF_R16_UINT, {EF_R16_UINT, EF_R16G16_UINT, EF_R16G16B16_UINT, EF_R16G16B16A16_UINT}}, + {EF_R32_SINT, {EF_R32_SINT, EF_R32G32_SINT, EF_R32G32B32_SINT, EF_R32G32B32A32_SINT}}, + {EF_R32_UINT, {EF_R32_UINT, EF_R32G32_UINT, EF_R32G32B32_UINT, EF_R32G32B32A32_UINT}}, + {EF_R32_SFLOAT, {EF_R32_SFLOAT, EF_R32G32_SFLOAT, EF_R32G32B32_SFLOAT, EF_R32G32B32A32_SFLOAT}}, + {EF_R64_SFLOAT, {EF_R64_SFLOAT, EF_R64G64_SFLOAT, EF_R64G64B64_SFLOAT, EF_R64G64B64A64_SFLOAT}} + }); + inline constexpr uint32_t getStructuredFormatVariantIndex(const E_FORMAT _fmt) + { + for (uint32_t i = 0u; i < StructuredFormatVariants.size(); ++i) + if (StructuredFormatVariants[i].base == _fmt) + return i; + return StructuredFormatVariants.size(); + } + template + inline constexpr uint32_t getStructuredFormatVariantIndex() + { + return getStructuredFormatVariantIndex(_fmt); + } + inline constexpr E_FORMAT getStructuredFormatVariant(const uint32_t _variantIndex, const uint32_t _channelCount) + { + return _variantIndex < StructuredFormatVariants.size() && _channelCount > 0u && _channelCount <= StructuredFormatChannelVariantCount ? + StructuredFormatVariants[_variantIndex].variants[_channelCount - 1u] : EF_UNKNOWN; + } + template + inline constexpr E_FORMAT getStructuredFormatVariant(const uint32_t _variantIndex) + { + if constexpr (_channelCount > 0u && _channelCount <= StructuredFormatChannelVariantCount) + return _variantIndex < StructuredFormatVariants.size() ? StructuredFormatVariants[_variantIndex].variants[_channelCount - 1u] : EF_UNKNOWN; + else + return EF_UNKNOWN; + } +} +template +inline constexpr E_FORMAT getFormatWithChannelCount(const uint32_t _channelCount) +{ + return impl::getStructuredFormatVariant(impl::getStructuredFormatVariantIndex<_fmt>(), _channelCount); +} +template +inline constexpr E_FORMAT getFormatWithChannelCount() +{ + return impl::getStructuredFormatVariant<_channelCount>(impl::getStructuredFormatVariantIndex<_fmt>()); +} +inline constexpr E_FORMAT getFormatWithChannelCount(const E_FORMAT _fmt, const uint32_t _channelCount) +{ + return impl::getStructuredFormatVariant(impl::getStructuredFormatVariantIndex(_fmt), _channelCount); +} /* inline uint32_t getBitsPerChannel(asset::E_FORMAT _fmt, uint8_t _channel) @@ -1987,4 +2046,4 @@ namespace std }; } -#endif \ No newline at end of file +#endif diff --git a/include/nbl/asset/interchange/COBJMeshWriter.h b/include/nbl/asset/interchange/COBJMeshWriter.h new file mode 100644 index 0000000000..5446118246 --- /dev/null +++ b/include/nbl/asset/interchange/COBJMeshWriter.h @@ -0,0 +1,35 @@ +// Copyright (C) 2018-2025 - DevSH Graphics Programming Sp. z O.O. +// This file is part of the "Nabla Engine". +// For conditions of distribution and use, see copyright notice in nabla.h +#ifndef _NBL_ASSET_OBJ_MESH_WRITER_H_INCLUDED_ +#define _NBL_ASSET_OBJ_MESH_WRITER_H_INCLUDED_ +#include "nbl/asset/interchange/ISceneWriter.h" +namespace nbl::asset +{ +/** + Writes OBJ from a single polygon geometry, a geometry collection, or a scene. + OBJ itself is still treated here as final flattened geometry data, not as a scene format. + Scene input is accepted only as export input: the writer bakes transforms + and serializes all collected polygon geometries into one OBJ stream. + This preserves the final shape but does not try to keep scene-only structure + such as hierarchy or instancing. + In other words `ET_SCENE -> OBJ` is supported as flattening, + not as round-tripping scene semantics through the OBJ format. +*/ +class COBJMeshWriter : public ISceneWriter +{ + public: + COBJMeshWriter(); + + uint64_t getSupportedAssetTypesBitfield() const override; + + const char** getAssociatedFileExtensions() const override; + + writer_flags_t getSupportedFlags() override; + + writer_flags_t getForcedFlags() override; + + bool writeAsset(system::IFile* _file, const SAssetWriteParams& _params, IAssetWriterOverride* _override = nullptr) override; +}; +} // end namespace +#endif diff --git a/include/nbl/asset/interchange/IAssetLoader.h b/include/nbl/asset/interchange/IAssetLoader.h index 5354228278..9ba1e5e14a 100644 --- a/include/nbl/asset/interchange/IAssetLoader.h +++ b/include/nbl/asset/interchange/IAssetLoader.h @@ -1,22 +1,17 @@ -// Copyright (C) 2018-2024 - DevSH Graphics Programming Sp. z O.O. +// Copyright (C) 2018-2025 - DevSH Graphics Programming Sp. z O.O. // This file is part of the "Nabla Engine". // For conditions of distribution and use, see copyright notice in nabla.h #ifndef _NBL_ASSET_I_ASSET_LOADER_H_INCLUDED_ #define _NBL_ASSET_I_ASSET_LOADER_H_INCLUDED_ - - #include "nbl/system/declarations.h" - #include "nbl/system/ISystem.h" #include "nbl/system/ILogger.h" - +#include "nbl/core/util/bitflag.h" #include "nbl/asset/interchange/SAssetBundle.h" +#include "nbl/asset/interchange/SFileIOPolicy.h" #include "nbl/asset/utils/CGeometryCreator.h" - - namespace nbl::asset { - class CPolygonGeometryManipulator; //! A class automating process of loading Assets from resources, eg. files @@ -59,7 +54,6 @@ class CPolygonGeometryManipulator; @see IAssetManager @see IAssetWriter */ - class NBL_API2 IAssetLoader : public virtual core::IReferenceCounted { public: @@ -75,6 +69,7 @@ class NBL_API2 IAssetLoader : public virtual core::IReferenceCounted //! meaning identical as to ECF_DUPLICATE_TOP_LEVEL but for any asset in the chain ECF_DUPLICATE_REFERENCES = 0xffffffffffffffffull }; + using caching_flags_t = core::bitflag; //! Parameter flags for a loader /** @@ -91,17 +86,19 @@ class NBL_API2 IAssetLoader : public virtual core::IReferenceCounted ELPF_NONE = 0, //!< default value, it doesn't do anything //[[deprecated]] ELPF_RIGHT_HANDED_MESHES = 0x1, //!< specifies that a mesh will be flipped in such a way that it'll look correctly in right-handed camera system //[[deprecated]] ELPF_DONT_COMPILE_GLSL = 0x2, //!< it states that GLSL won't be compiled to SPIR-V if it is loaded or generated - ELPF_LOAD_METADATA_ONLY = 0x4 //!< it forces the loader to not load the entire scene for performance in special cases to fetch metadata. + ELPF_LOAD_METADATA_ONLY = 0x4, //!< it forces the loader to not load the entire scene for performance in special cases to fetch metadata. + ELPF_DONT_COMPUTE_CONTENT_HASHES = 0x8 //!< opt-out from computing content hashes of produced buffers before returning. }; + using loader_flags_t = core::bitflag; struct SAssetLoadParams { inline SAssetLoadParams(const size_t _decryptionKeyLen = 0u, const uint8_t* const _decryptionKey = nullptr, - const E_CACHING_FLAGS _cacheFlags = ECF_CACHE_EVERYTHING,const E_LOADER_PARAMETER_FLAGS _loaderFlags = ELPF_NONE, - const system::logger_opt_ptr _logger = nullptr, const std::filesystem::path& cwd = "") : + const caching_flags_t _cacheFlags = ECF_CACHE_EVERYTHING, const loader_flags_t _loaderFlags = ELPF_NONE, + const system::logger_opt_ptr _logger = nullptr, const std::filesystem::path& cwd = "", const SFileIOPolicy& _ioPolicy = {}) : decryptionKeyLen(_decryptionKeyLen), decryptionKey(_decryptionKey), cacheFlags(_cacheFlags), loaderFlags(_loaderFlags), - logger(std::move(_logger)), workingDirectory(cwd) + logger(std::move(_logger)), workingDirectory(cwd), ioPolicy(_ioPolicy) { } @@ -111,16 +108,18 @@ class NBL_API2 IAssetLoader : public virtual core::IReferenceCounted cacheFlags(rhs.cacheFlags), loaderFlags(rhs.loaderFlags), logger(rhs.logger), - workingDirectory(rhs.workingDirectory) + workingDirectory(rhs.workingDirectory), + ioPolicy(rhs.ioPolicy) { } size_t decryptionKeyLen; const uint8_t* decryptionKey; - E_CACHING_FLAGS cacheFlags; - E_LOADER_PARAMETER_FLAGS loaderFlags; //!< Flags having an impact on extraordinary tasks during loading process + caching_flags_t cacheFlags; + loader_flags_t loaderFlags; //!< Flags having an impact on extraordinary tasks during loading process std::filesystem::path workingDirectory = ""; system::logger_opt_ptr logger; + SFileIOPolicy ioPolicy = {}; }; //! Struct for keeping the state of the current loadoperation for safe threading @@ -133,37 +132,37 @@ class NBL_API2 IAssetLoader : public virtual core::IReferenceCounted }; // following could be inlined - static E_CACHING_FLAGS ECF_DONT_CACHE_LEVEL(uint64_t N) + static caching_flags_t ECF_DONT_CACHE_LEVEL(uint64_t N) { N *= 2ull; - return (E_CACHING_FLAGS)(ECF_DONT_CACHE_TOP_LEVEL << N); + return caching_flags_t(static_cast(ECF_DONT_CACHE_TOP_LEVEL) << N); } - static E_CACHING_FLAGS ECF_DUPLICATE_LEVEL(uint64_t N) + static caching_flags_t ECF_DUPLICATE_LEVEL(uint64_t N) { N *= 2ull; - return (E_CACHING_FLAGS)(ECF_DUPLICATE_TOP_LEVEL << N); + return caching_flags_t(static_cast(ECF_DUPLICATE_TOP_LEVEL) << N); } - static E_CACHING_FLAGS ECF_DONT_CACHE_FROM_LEVEL(uint64_t N) + static caching_flags_t ECF_DONT_CACHE_FROM_LEVEL(uint64_t N) { // (Criss) Shouldn't be set all DONT_CACHE bits from hierarchy numbers N-1 to 32 (64==2*32) ? Same for ECF_DUPLICATE_FROM_LEVEL below N *= 2ull; - return (E_CACHING_FLAGS)(ECF_DONT_CACHE_REFERENCES << N); + return caching_flags_t(static_cast(ECF_DONT_CACHE_REFERENCES) << N); } - static E_CACHING_FLAGS ECF_DUPLICATE_FROM_LEVEL(uint64_t N) + static caching_flags_t ECF_DUPLICATE_FROM_LEVEL(uint64_t N) { N *= 2ull; - return (E_CACHING_FLAGS)(ECF_DUPLICATE_REFERENCES << N); + return caching_flags_t(static_cast(ECF_DUPLICATE_REFERENCES) << N); } - static E_CACHING_FLAGS ECF_DONT_CACHE_UNTIL_LEVEL(uint64_t N) + static caching_flags_t ECF_DONT_CACHE_UNTIL_LEVEL(uint64_t N) { // (Criss) is this ok? Shouldn't be set all DONT_CACHE bits from hierarchy numbers 0 to N-1? Same for ECF_DUPLICATE_UNTIL_LEVEL below N = 64ull - N * 2ull; - return (E_CACHING_FLAGS)(ECF_DONT_CACHE_REFERENCES >> N); + return caching_flags_t(static_cast(ECF_DONT_CACHE_REFERENCES) >> N); } - static E_CACHING_FLAGS ECF_DUPLICATE_UNTIL_LEVEL(uint64_t N) + static caching_flags_t ECF_DUPLICATE_UNTIL_LEVEL(uint64_t N) { N = 64ull - N * 2ull; - return (E_CACHING_FLAGS)(ECF_DUPLICATE_REFERENCES >> N); + return caching_flags_t(static_cast(ECF_DUPLICATE_REFERENCES) >> N); } //! Override class to facilitate changing how assets are loaded @@ -256,6 +255,8 @@ class NBL_API2 IAssetLoader : public virtual core::IReferenceCounted //! Called before loading a file to determine the correct path (could be relative or absolute) inline virtual void getLoadFilename(system::path& inOutFilename, const system::ISystem* sys, const SAssetLoadContext& ctx, const uint32_t hierarchyLevel) { + if (inOutFilename.is_absolute() || inOutFilename.has_root_path()) + return; // try compute absolute path auto absolute = ctx.params.workingDirectory/inOutFilename; if (sys->exists(absolute,system::IFile::ECF_READ)) diff --git a/include/nbl/asset/interchange/IAssetWriter.h b/include/nbl/asset/interchange/IAssetWriter.h index 694053df5e..fca8e24124 100644 --- a/include/nbl/asset/interchange/IAssetWriter.h +++ b/include/nbl/asset/interchange/IAssetWriter.h @@ -3,17 +3,12 @@ // For conditions of distribution and use, see copyright notice in nabla.h #ifndef _NBL_ASSET_I_ASSET_WRITER_H_INCLUDED_ #define _NBL_ASSET_I_ASSET_WRITER_H_INCLUDED_ - - #include "nbl/system/IFile.h" #include "nbl/system/ILogger.h" - #include "nbl/asset/IAsset.h" - - +#include "nbl/asset/interchange/SFileIOPolicy.h" namespace nbl::asset { - //! Writing flags /** They have an impact on writing (saving) an Asset. @@ -36,9 +31,10 @@ enum E_WRITER_FLAGS : uint32_t //! write in binary format rather than text if possible EWF_BINARY = 1u << 2u, - //!< specifies the incoming orientation of loaded mesh we want to write. Flipping will be performed if needed in dependency of format extension orientation + //!< specifies the incoming orientation of loaded mesh we want to write. Flipping will be performed if needed in dependency of format extension orientation EWF_MESH_IS_RIGHT_HANDED = 1u << 3u }; +using writer_flags_t = core::bitflag; //! A class that defines rules during Asset-writing (saving) process /** @@ -85,21 +81,22 @@ class IAssetWriter : public virtual core::IReferenceCounted */ struct SAssetWriteParams { - SAssetWriteParams(IAsset* _asset, const E_WRITER_FLAGS& _flags = EWF_NONE, const float& _compressionLevel = 0.f, const size_t& _encryptionKeyLen = 0, const uint8_t* _encryptionKey = nullptr, const void* _userData = nullptr, const system::logger_opt_ptr _logger = nullptr, system::path cwd = "") : + SAssetWriteParams(IAsset* _asset, const writer_flags_t _flags = EWF_NONE, const float& _compressionLevel = 0.f, const size_t& _encryptionKeyLen = 0, const uint8_t* _encryptionKey = nullptr, const void* _userData = nullptr, const system::logger_opt_ptr _logger = nullptr, system::path cwd = "", const SFileIOPolicy& _ioPolicy = {}) : rootAsset(_asset), flags(_flags), compressionLevel(_compressionLevel), encryptionKeyLen(_encryptionKeyLen), encryptionKey(_encryptionKey), - userData(_userData), logger(_logger), workingDirectory(cwd) + userData(_userData), logger(_logger), workingDirectory(cwd), ioPolicy(_ioPolicy) { } const IAsset* rootAsset; //!< An Asset on which entire writing process is based. - E_WRITER_FLAGS flags; //!< Flags set by user that defines rules during writing process. + writer_flags_t flags; //!< Flags set by user that defines rules during writing process. float compressionLevel; //!< The more compression level, the more expensive (slower) compression algorithm is launched. size_t encryptionKeyLen; //!< Stores a size of data in encryptionKey pointer for correct iteration. const uint8_t* encryptionKey; //!< Stores an encryption key used for encryption process. const void* userData; //!< Stores writer-dependets parameters. It is usually a struct provided by a writer author. system::logger_opt_ptr logger; system::path workingDirectory; + SFileIOPolicy ioPolicy = {}; }; //! Struct for keeping the state of the current write operation for safe threading @@ -116,9 +113,7 @@ class IAssetWriter : public virtual core::IReferenceCounted const SAssetWriteParams params; system::IFile* outputFile; }; - public: - //! Returns an array of string literals terminated by nullptr virtual const char** getAssociatedFileExtensions() const = 0; @@ -130,10 +125,10 @@ class IAssetWriter : public virtual core::IReferenceCounted virtual uint64_t getSupportedAssetTypesBitfield() const { return 0; } //! Returns which flags are supported for writing modes - virtual uint32_t getSupportedFlags() = 0; + virtual writer_flags_t getSupportedFlags() = 0; //! Returns which flags are forced for writing modes, i.e. a writer can only support binary - virtual uint32_t getForcedFlags() = 0; + virtual writer_flags_t getForcedFlags() = 0; //! Override class to facilitate changing how assets are written, especially the sub-assets /* @@ -146,7 +141,7 @@ class IAssetWriter : public virtual core::IReferenceCounted //! The only reason these functions are not declared static is to allow stateful overrides public: //! To allow the asset writer to write different sub-assets with different flags - inline virtual E_WRITER_FLAGS getAssetWritingFlags(const SAssetWriteContext& ctx, const IAsset* assetToWrite, const uint32_t& hierarchyLevel) + inline virtual writer_flags_t getAssetWritingFlags(const SAssetWriteContext& ctx, const IAsset* assetToWrite, const uint32_t& hierarchyLevel) { return ctx.params.flags; } @@ -192,4 +187,4 @@ class IAssetWriter : public virtual core::IReferenceCounted }; } //nbl::asset -#endif \ No newline at end of file +#endif diff --git a/include/nbl/asset/interchange/IGeometryLoader.h b/include/nbl/asset/interchange/IGeometryLoader.h index 6e6c7c4e26..4f6321d7bc 100644 --- a/include/nbl/asset/interchange/IGeometryLoader.h +++ b/include/nbl/asset/interchange/IGeometryLoader.h @@ -1,21 +1,16 @@ -// Copyright (C) 2025-2025 - DevSH Graphics Programming Sp. z O.O. +// Copyright (C) 2018-2025 - DevSH Graphics Programming Sp. z O.O. // This file is part of the "Nabla Engine". // For conditions of distribution and use, see copyright notice in nabla.h #ifndef _NBL_ASSET_I_GEOMETRY_LOADER_H_INCLUDED_ #define _NBL_ASSET_I_GEOMETRY_LOADER_H_INCLUDED_ - - #include "nbl/core/declarations.h" - #include "nbl/asset/ICPUPolygonGeometry.h" #include "nbl/asset/interchange/IAssetLoader.h" #include "nbl/asset/interchange/IImageAssetHandlerBase.h" #include "nbl/asset/utils/CGeometryManipulator.h" - - namespace nbl::asset { - +//! Geometry loader base shared by mesh-style interchange formats. class IGeometryLoader : public IAssetLoader { public: @@ -24,6 +19,7 @@ class IGeometryLoader : public IAssetLoader protected: inline IGeometryLoader() {} + //! Creates one geometry data view from caller-owned memory or copied storage. template static inline IGeometry::SDataView createView( const E_FORMAT format, const size_t elementCount, const void* data=nullptr, @@ -51,7 +47,8 @@ class IGeometryLoader : public IAssetLoader } return retval; } - // creates a View from a mapped file + + //! Memory resource that keeps a mapped file alive while adopted geometry views reference it. class CFileMemoryResource final : public core::refctd_memory_resource { public: @@ -71,6 +68,8 @@ class IGeometryLoader : public IAssetLoader protected: core::smart_refctd_ptr m_file; }; + + //! Creates one geometry data view backed directly by a mapped file or by copied file contents. static inline IGeometry::SDataView createView(const E_FORMAT format, const size_t elementCount, core::smart_refctd_ptr&& file, const size_t offsetInFile) { if (auto* const basePtr=reinterpret_cast(file->getMappedPointer()); basePtr) @@ -96,7 +95,5 @@ class IGeometryLoader : public IAssetLoader private: }; - } - #endif diff --git a/include/nbl/asset/interchange/ISceneWriter.h b/include/nbl/asset/interchange/ISceneWriter.h new file mode 100644 index 0000000000..94e4548270 --- /dev/null +++ b/include/nbl/asset/interchange/ISceneWriter.h @@ -0,0 +1,21 @@ +// Copyright (C) 2025-2025 - DevSH Graphics Programming Sp. z O.O. +// This file is part of the "Nabla Engine". +// For conditions of distribution and use, see copyright notice in nabla.h +#ifndef _NBL_ASSET_I_SCENE_WRITER_H_INCLUDED_ +#define _NBL_ASSET_I_SCENE_WRITER_H_INCLUDED_ +#include "nbl/core/declarations.h" +#include "nbl/asset/ICPUScene.h" +#include "nbl/asset/interchange/IAssetWriter.h" +namespace nbl::asset +{ +//! Writer base for exporters whose root asset type is `ET_SCENE`. +class ISceneWriter : public IAssetWriter +{ + public: + virtual inline uint64_t getSupportedAssetTypesBitfield() const override { return IAsset::ET_SCENE; } + protected: + ISceneWriter() = default; + virtual ~ISceneWriter() = default; +}; +} +#endif diff --git a/include/nbl/asset/interchange/SFileIOPolicy.h b/include/nbl/asset/interchange/SFileIOPolicy.h new file mode 100644 index 0000000000..108f35addc --- /dev/null +++ b/include/nbl/asset/interchange/SFileIOPolicy.h @@ -0,0 +1,189 @@ +// Copyright (C) 2018-2025 - DevSH Graphics Programming Sp. z O.O. +// This file is part of the "Nabla Engine". +// For conditions of distribution and use, see copyright notice in nabla.h +#ifndef _NBL_ASSET_S_FILE_IO_POLICY_H_INCLUDED_ +#define _NBL_ASSET_S_FILE_IO_POLICY_H_INCLUDED_ +#include "nbl/core/util/bitflag.h" +#include "nbl/system/to_string.h" +#include +#include +#include +#include +#include +#include +namespace nbl::asset +{ +//! Requested IO strategy selected before file size and mapping constraints are resolved. +enum class EFileIOStrategy : uint8_t +{ + Invalid = 0u, //!< Sentinel used when strategy resolution fails or the value is uninitialized. + Auto, //!< Pick whole-file or chunked dynamically based on file size and policy limits. + WholeFile, //!< Force whole-file strategy. May fallback when not feasible unless strict=true. + Chunked //!< Force chunked strategy. +}; + +//! Requested IO policy shared by loaders, writers, and hash stages before file constraints are resolved. +struct SFileIOPolicy +{ + //! Runtime tuning knobs shared by loader parallelism and IO anomaly diagnostics. + struct SRuntimeTuning + { + //! Runtime tuning strategy for worker/chunk selection. + enum class Mode : uint8_t + { + Sequential, //!< Disable runtime tuning and force sequential execution. + None = Sequential, //!< Backward-compatible alias for Sequential. + Heuristic, //!< Use deterministic heuristics derived from input size and hardware. + Hybrid //!< Use heuristics and optionally refine with lightweight sampling. + }; + + Mode mode = Mode::Heuristic; //!< Runtime tuning mode. + float maxOverheadRatio = 0.05f; //!< Maximum acceptable tuning overhead as a fraction of estimated full workload time. + float samplingBudgetRatio = 0.05f; //!< Maximum sampling budget as a fraction of estimated full workload time. + float minExpectedGainRatio = 0.03f; //!< Minimum expected gain required to keep extra workers enabled. + uint16_t maxWorkers = 0u; //!< Hard cap for worker count. 0 means auto. + uint8_t workerHeadroom = 2u; //!< Reserved hardware threads not used by the loader. Prevents full CPU saturation. + uint8_t samplingMaxCandidates = 4u; //!< Maximum number of worker-count candidates tested in hybrid mode. + uint8_t samplingPasses = 1u; //!< Number of benchmark passes per candidate in hybrid mode. + uint64_t samplingMinWorkUnits = 0ull; //!< Minimum work units required before hybrid sampling is allowed. 0 means auto. + uint8_t targetChunksPerWorker = 4u; //!< Target chunk count assigned to each worker for loader stages. + uint8_t hashTaskTargetChunksPerWorker = 1u; //!< Target chunk count assigned to each worker for hash stages. + uint64_t hashInlineThresholdBytes = 1ull << 20; //!< Hash inlining threshold. Inputs up to this size prefer inline hash build. + uint64_t minSampleBytes = 4ull << 10; //!< Lower bound for sampled byte count in hybrid mode. + uint64_t maxSampleBytes = 128ull << 10; //!< Upper bound for sampled byte count in hybrid mode. + uint64_t tinyIoPayloadThresholdBytes = 1ull << 20; //!< Payload size threshold for tiny-IO anomaly detection. + uint64_t tinyIoAvgBytesThreshold = 1024ull; //!< Average operation size threshold for tiny-IO anomaly detection. + uint64_t tinyIoMinBytesThreshold = 64ull; //!< Minimum operation size threshold for tiny-IO anomaly detection. + uint64_t tinyIoMinCallCount = 1024ull; //!< Minimum operation count required to report tiny-IO anomaly. + uint8_t chunkedInFlightDepth = 0u; //!< Chunked IO requests allowed in flight. 0 means auto, 1 disables pipelining. + }; + + using Strategy = EFileIOStrategy; + + //! Extra resolution flags affecting fallback behavior. + enum E_FLAGS : uint8_t { EF_NONE = 0u, EF_STRICT_BIT = 1u << 0u }; + + static inline constexpr uint64_t MIN_CHUNK_SIZE_BYTES = 64ull << 10u; //!< 64 KiB. + static inline constexpr uint8_t MIN_CHUNK_SIZE_LOG2 = static_cast(std::bit_width(MIN_CHUNK_SIZE_BYTES) - 1u); + static inline constexpr uint8_t MAX_BYTE_SIZE_LOG2 = std::numeric_limits::digits - 1u; + static inline constexpr uint64_t DEFAULT_WHOLE_FILE_THRESHOLD_BYTES = 64ull << 20u; //!< 64 MiB. + static inline constexpr uint64_t DEFAULT_CHUNK_SIZE_BYTES = 4ull << 20u; //!< 4 MiB. + static inline constexpr uint64_t DEFAULT_MAX_STAGING_BYTES = 256ull << 20u; //!< 256 MiB. + + //! These defaults are stored and clamped as log2(byte_count), so the source byte values must stay powers of two. + static_assert(std::has_single_bit(MIN_CHUNK_SIZE_BYTES)); + static_assert(std::has_single_bit(DEFAULT_WHOLE_FILE_THRESHOLD_BYTES)); + static_assert(std::has_single_bit(DEFAULT_CHUNK_SIZE_BYTES)); + static_assert(std::has_single_bit(DEFAULT_MAX_STAGING_BYTES)); + + static inline constexpr uint8_t clampBytesLog2(const uint8_t value, const uint8_t minValue = 0u) { return std::clamp(value, minValue, MAX_BYTE_SIZE_LOG2); } + + static inline constexpr uint64_t bytesFromLog2(const uint8_t value, const uint8_t minValue = 0u) { return 1ull << clampBytesLog2(value, minValue); } + + Strategy strategy = Strategy::Auto; //!< Requested IO strategy. Defaults to Auto. + core::bitflag flags = EF_NONE; //!< Resolution flags. Defaults to none. + + //! Maximum payload size allowed for whole-file strategy in auto mode. Defaults to 64 MiB. + uint8_t wholeFileThresholdLog2 = static_cast(std::bit_width(DEFAULT_WHOLE_FILE_THRESHOLD_BYTES) - 1u); + + //! Chunk size used by chunked strategy encoded as log2(bytes). Defaults to 4 MiB. + uint8_t chunkSizeLog2 = static_cast(std::bit_width(DEFAULT_CHUNK_SIZE_BYTES) - 1u); + + //! Maximum staging allocation for whole-file strategy encoded as log2(bytes). Defaults to 256 MiB. + uint8_t maxStagingLog2 = static_cast(std::bit_width(DEFAULT_MAX_STAGING_BYTES) - 1u); + + SRuntimeTuning runtimeTuning = {}; //!< Runtime tuning controls used by loaders and hash stages. + + inline constexpr bool strict() const { return flags.hasAnyFlag(EF_STRICT_BIT); } + inline constexpr uint64_t wholeFileThresholdBytes() const { return bytesFromLog2(wholeFileThresholdLog2, MIN_CHUNK_SIZE_LOG2); } + inline constexpr uint64_t chunkSizeBytes() const { return bytesFromLog2(chunkSizeLog2, MIN_CHUNK_SIZE_LOG2); } + inline constexpr uint64_t maxStagingBytes() const { return bytesFromLog2(maxStagingLog2, MIN_CHUNK_SIZE_LOG2); } +}; + +//! Resolved IO plan chosen from SFileIOPolicy after considering file size, mapping, and staging limits. +struct SResolvedFileIOPolicy +{ + using Strategy = EFileIOStrategy; + + constexpr SResolvedFileIOPolicy() = default; + inline constexpr SResolvedFileIOPolicy(const SFileIOPolicy& policy, const uint64_t byteCount, const bool sizeKnown = true, const bool fileMappable = false) : SResolvedFileIOPolicy(resolve(policy, byteCount, sizeKnown, fileMappable)) {} + Strategy strategy = Strategy::Invalid; //!< Effective strategy chosen by resolver. Invalid means strict policy resolution failed. + + //! Effective chunk size encoded as log2(bytes). Also set for whole-file for telemetry consistency. + uint8_t chunkSizeLog2 = SFileIOPolicy::MIN_CHUNK_SIZE_LOG2; + uint8_t chunkedInFlightDepth = 1u; //!< Resolved chunked in-flight depth. Non-chunked strategies always keep this at 1. + + const char* reason = "invalid"; //!< Resolver reason string used in logs and diagnostics. + + inline constexpr bool isValid() const { return strategy != Strategy::Invalid; } + + inline constexpr uint64_t chunkSizeBytes() const { return SFileIOPolicy::bytesFromLog2(chunkSizeLog2, SFileIOPolicy::MIN_CHUNK_SIZE_LOG2); } + + static inline constexpr SResolvedFileIOPolicy resolve(const SFileIOPolicy& policy, const uint64_t byteCount, const bool sizeKnown = true, const bool fileMappable = false) + { + const uint8_t maxStagingLog2 = SFileIOPolicy::clampBytesLog2(policy.maxStagingLog2, SFileIOPolicy::MIN_CHUNK_SIZE_LOG2); + const uint8_t chunkSizeLog2 = std::min(SFileIOPolicy::clampBytesLog2(policy.chunkSizeLog2, SFileIOPolicy::MIN_CHUNK_SIZE_LOG2), maxStagingLog2); + const uint64_t maxStaging = SFileIOPolicy::bytesFromLog2(maxStagingLog2, SFileIOPolicy::MIN_CHUNK_SIZE_LOG2); + const uint64_t wholeThreshold = policy.wholeFileThresholdBytes(); + const uint64_t chunkSizeBytes = SFileIOPolicy::bytesFromLog2(chunkSizeLog2, SFileIOPolicy::MIN_CHUNK_SIZE_LOG2); + const uint64_t chunkCount = chunkSizeBytes ? std::max(1ull, (byteCount + chunkSizeBytes - 1ull) / chunkSizeBytes) : 1ull; + auto resolveChunkedInFlightDepth = [&](const Strategy strategy) -> uint8_t + { + if (strategy != Strategy::Chunked || chunkCount <= 1ull) + return 1u; + if (policy.runtimeTuning.chunkedInFlightDepth > 0u) + return static_cast(std::min(policy.runtimeTuning.chunkedInFlightDepth, chunkCount)); + const uint32_t hardwareThreads = policy.runtimeTuning.maxWorkers ? policy.runtimeTuning.maxWorkers : std::thread::hardware_concurrency(); + const uint32_t usableThreads = hardwareThreads > policy.runtimeTuning.workerHeadroom ? (hardwareThreads - policy.runtimeTuning.workerHeadroom) : 1u; + return static_cast(std::clamp(usableThreads, 1ull, std::min(chunkCount, std::numeric_limits::max()))); + }; + auto makeResolved = [&](const Strategy strategy, const char* const reason) -> SResolvedFileIOPolicy { SResolvedFileIOPolicy resolved = {}; resolved.strategy = strategy; resolved.chunkSizeLog2 = chunkSizeLog2; resolved.chunkedInFlightDepth = resolveChunkedInFlightDepth(strategy); resolved.reason = reason; return resolved; }; + switch (policy.strategy) + { + case SFileIOPolicy::Strategy::Invalid: + return makeResolved(Strategy::Invalid, "invalid_requested_strategy"); + case SFileIOPolicy::Strategy::WholeFile: + { + if (fileMappable || (sizeKnown && byteCount <= maxStaging)) + return makeResolved(Strategy::WholeFile, fileMappable ? "requested_whole_file_mappable" : "requested_whole_file"); + if (policy.strict()) + return makeResolved(Strategy::Invalid, "whole_file_not_feasible_strict"); + return makeResolved(Strategy::Chunked, sizeKnown ? "whole_file_not_feasible_fallback_chunked" : "whole_file_unknown_size_fallback_chunked"); + } + case SFileIOPolicy::Strategy::Chunked: + return makeResolved(Strategy::Chunked, "requested_chunked"); + case SFileIOPolicy::Strategy::Auto: + default: + { + if (fileMappable) + return makeResolved(Strategy::WholeFile, sizeKnown ? "auto_mappable_prefers_whole_file" : "auto_unknown_size_mappable_whole_file"); + if (!sizeKnown) + return makeResolved(Strategy::Chunked, "auto_unknown_size"); + const uint64_t wholeLimit = std::min(wholeThreshold, maxStaging); + if (byteCount <= wholeLimit) + return makeResolved(Strategy::WholeFile, "auto_small_enough_for_whole_file"); + return makeResolved(Strategy::Chunked, "auto_too_large_for_whole_file"); + } + } + } +}; +} +namespace nbl::system::impl +{ +template<> +struct to_string_helper +{ + static inline std::string __call(const asset::EFileIOStrategy value) + { + switch (value) + { + case asset::EFileIOStrategy::Invalid: return "invalid"; + case asset::EFileIOStrategy::Auto: return "auto"; + case asset::EFileIOStrategy::WholeFile: return "whole"; + case asset::EFileIOStrategy::Chunked: return "chunked"; + default: return "unknown"; + } + } +}; +} +#endif diff --git a/include/nbl/asset/interchange/SGeometryContentHash.h b/include/nbl/asset/interchange/SGeometryContentHash.h new file mode 100644 index 0000000000..c7353dea9b --- /dev/null +++ b/include/nbl/asset/interchange/SGeometryContentHash.h @@ -0,0 +1,58 @@ +// Copyright (C) 2018-2025 - DevSH Graphics Programming Sp. z O.O. +// This file is part of the "Nabla Engine". +// For conditions of distribution and use, see copyright notice in nabla.h +#ifndef _NBL_ASSET_S_GEOMETRY_CONTENT_HASH_H_INCLUDED_ +#define _NBL_ASSET_S_GEOMETRY_CONTENT_HASH_H_INCLUDED_ +#include "nbl/asset/IPreHashed.h" +#include "nbl/asset/utils/CPolygonGeometryManipulator.h" +#include "nbl/core/hash/blake.h" +namespace nbl::asset +{ +//! Geometry-content-hash helper operating on all unique buffers referenced by one polygon geometry. +class SPolygonGeometryContentHash +{ + public: + using mode_t = CPolygonGeometryManipulator::EContentHashMode; + + //! Collects all unique buffers contributing to the geometry content hash. + static inline void collectBuffers(const ICPUPolygonGeometry* geometry, core::vector>& buffers) { CPolygonGeometryManipulator::collectUniqueBuffers(geometry, buffers); } + + //! Resets all referenced buffer hashes to `INVALID_HASH`. + static inline void reset(ICPUPolygonGeometry* geometry) + { + core::vector> buffers; + collectBuffers(geometry, buffers); + for (auto& buffer : buffers) + if (buffer) + buffer->setContentHash(IPreHashed::INVALID_HASH); + } + + //! Composes the geometry hash from the current content hashes of all referenced buffers. + static inline core::blake3_hash_t composeHashFromBufferContentHashes(const ICPUPolygonGeometry* geometry) + { + if (!geometry) + return IPreHashed::INVALID_HASH; + + core::blake3_hasher hashBuilder = {}; + if (const auto* indexing = geometry->getIndexingCallback(); indexing) + { + hashBuilder << indexing->degree(); + hashBuilder << indexing->rate(); + hashBuilder << indexing->knownTopology(); + } + + core::vector> buffers; + collectBuffers(geometry, buffers); + for (const auto& buffer : buffers) + hashBuilder << (buffer ? buffer->getContentHash() : IPreHashed::INVALID_HASH); + return static_cast(hashBuilder); + } + + //! Computes missing buffer hashes and returns the composed geometry hash. + static inline core::blake3_hash_t computeMissing(ICPUPolygonGeometry* geometry, const SFileIOPolicy& ioPolicy) { CPolygonGeometryManipulator::computeMissingContentHashesParallel(geometry, ioPolicy); return composeHashFromBufferContentHashes(geometry); } + + //! Recomputes all buffer hashes and returns the composed geometry hash. + static inline core::blake3_hash_t recompute(ICPUPolygonGeometry* geometry, const SFileIOPolicy& ioPolicy) { CPolygonGeometryManipulator::recomputeContentHashesParallel(geometry, ioPolicy); return composeHashFromBufferContentHashes(geometry); } +}; +} +#endif diff --git a/include/nbl/asset/interchange/SGeometryLoaderCommon.h b/include/nbl/asset/interchange/SGeometryLoaderCommon.h new file mode 100644 index 0000000000..dd2e1cf72e --- /dev/null +++ b/include/nbl/asset/interchange/SGeometryLoaderCommon.h @@ -0,0 +1,128 @@ +// Copyright (C) 2018-2025 - DevSH Graphics Programming Sp. z O.O. +// This file is part of the "Nabla Engine". +// For conditions of distribution and use, see copyright notice in nabla.h +#ifndef _NBL_ASSET_S_GEOMETRY_LOADER_COMMON_H_INCLUDED_ +#define _NBL_ASSET_S_GEOMETRY_LOADER_COMMON_H_INCLUDED_ +#include +#include +#include +#include +#include "nbl/asset/SBufferAdoption.h" +#include "nbl/asset/ICPUPolygonGeometry.h" +namespace nbl::asset +{ +//! Shared geometry-loader helpers for adopting buffers and assembling formatted data views. +class SGeometryLoaderCommon +{ + public: + //! Creates one formatted data view over an existing CPU buffer. + static inline IGeometry::SDataView createDataView(core::smart_refctd_ptr&& buffer, const size_t byteCount, const uint32_t stride, const E_FORMAT format) + { + if (!buffer || byteCount == 0ull) + return {}; + return {.composed = {.stride = stride, .format = format, .rangeFormat = IGeometryBase::getMatchingAABBFormat(format)}, .src = {.offset = 0ull, .size = byteCount, .buffer = std::move(buffer)}}; + } + //! Tracks the widest scalar component format and highest component index seen for one structured attribute. + static inline void negotiateStructuredComponent(IGeometry::SDataViewBase& view, const E_FORMAT componentFormat, const uint8_t component) + { + assert(getFormatChannelCount(componentFormat) != 0u); + if (getTexelOrBlockBytesize(componentFormat) > getTexelOrBlockBytesize(view.format)) + view.format = componentFormat; + view.stride = std::max(view.stride, component); + } + //! Finalizes one structured base view and invokes `onComponent(offset,stride,componentFormat)` per component slot. + template + static inline void finalizeStructuredBaseView(IGeometry::SDataViewBase& view, Fn&& onComponent) + { + if (view.format == EF_UNKNOWN) + return; + const auto componentFormat = view.format; + const auto componentCount = view.stride + 1u; + view.format = getFormatWithChannelCount(componentFormat, componentCount); + view.stride = getTexelOrBlockBytesize(view.format); + for (uint32_t c = 0u; c < componentCount; ++c) + onComponent(getTexelOrBlockBytesize(componentFormat) * c, view.stride, componentFormat); + } + //! Creates one owned data view with storage sized for `elementCount` items in `format`. + static inline IGeometry::SDataView createOwnedView(const E_FORMAT format, const size_t elementCount) + { + if (format == EF_UNKNOWN || elementCount == 0ull) + return {}; + const auto stride = getTexelOrBlockBytesize(format); + auto buffer = ICPUBuffer::create({stride * elementCount}); + return buffer ? createDataView(std::move(buffer), stride * elementCount, stride, format) : IGeometry::SDataView{}; + } + //! Finalizes one structured base view, calls `onComponent`, and allocates the resulting owned data view. + template + static inline IGeometry::SDataView createStructuredView(IGeometry::SDataViewBase& view, const size_t elementCount, Fn&& onComponent) + { + if (view.format == EF_UNKNOWN) + return {}; + finalizeStructuredBaseView(view, std::forward(onComponent)); + return createOwnedView(view.format, elementCount); + } + //! Finalizes one structured view, appends per-component iterator bindings, rebases them against the allocated buffer, and passes the created view to `setter`. + template + static inline void attachStructuredView(IGeometry::SDataViewBase& baseView, const size_t elementCount, IteratorContainer& iterators, PushComponent&& pushComponent, RebaseComponent&& rebaseComponent, Setter&& setter) + { + auto beginIx = iterators.size(); + auto view = createStructuredView(baseView, elementCount, [&](const size_t offset, const uint32_t stride, const E_FORMAT componentFormat) -> void { pushComponent(iterators, offset, stride, componentFormat); }); + if (!view) + return; + const auto basePtr = ptrdiff_t(view.src.buffer->getPointer()) + view.src.offset; + for (const auto endIx = iterators.size(); beginIx != endIx; ++beginIx) + rebaseComponent(iterators[beginIx], basePtr); + setter(std::move(view)); + } + //! Visits position, normal, and auxiliary attribute views for one polygon geometry. + template + static inline void visitVertexAttributeViews(const ICPUPolygonGeometry* geometry, Visitor&& visitor) + { + if (!geometry) + return; + visitor(geometry->getPositionView()); + visitor(geometry->getNormalView()); + for (const auto& view : geometry->getAuxAttributeViews()) + visitor(view); + } + //! Visits all views owned by one polygon geometry, including index and skeletal data. + template + static inline void visitGeometryViews(const ICPUPolygonGeometry* geometry, Visitor&& visitor) + { + if (!geometry) + return; + visitVertexAttributeViews(geometry, visitor); + visitor(geometry->getIndexView()); + for (const auto& view : geometry->getJointWeightViews()) + { + visitor(view.indices); + visitor(view.weights); + } + if (const auto jointObb = geometry->getJointOBBView(); jointObb) + visitor(*jointObb); + } + //! Stores one auxiliary view at `slot`, resizing the aux array as needed. + static inline void setAuxViewAt(ICPUPolygonGeometry* geometry, const uint32_t slot, IGeometry::SDataView&& view) + { + if (!geometry || !view) + return; + auto* const auxViews = geometry->getAuxAttributeViews(); + if (auxViews->size() <= slot) + auxViews->resize(slot + 1u); + (*auxViews)[slot] = std::move(view); + } + + //! Adopts contiguous caller-owned storage into a CPU buffer and exposes it as a formatted data view. + template + static inline IGeometry::SDataView createAdoptedView(Storage&& data) + { + using storage_t = std::remove_cvref_t; + using value_t = std::ranges::range_value_t; + auto buffer = SBufferAdoption::create(std::forward(data)); + if (!buffer) + return {}; + return createDataView(std::move(buffer), buffer->getSize(), static_cast(sizeof(value_t)), Format); + } +}; +} +#endif diff --git a/include/nbl/asset/interchange/SGeometryWriterCommon.h b/include/nbl/asset/interchange/SGeometryWriterCommon.h new file mode 100644 index 0000000000..5c2055a5e5 --- /dev/null +++ b/include/nbl/asset/interchange/SGeometryWriterCommon.h @@ -0,0 +1,227 @@ +// Copyright (C) 2018-2025 - DevSH Graphics Programming Sp. z O.O. +// This file is part of the "Nabla Engine". +// For conditions of distribution and use, see copyright notice in nabla.h +#ifndef _NBL_ASSET_S_GEOMETRY_WRITER_COMMON_H_INCLUDED_ +#define _NBL_ASSET_S_GEOMETRY_WRITER_COMMON_H_INCLUDED_ +#include +#include "nbl/asset/ICPUScene.h" +#include "nbl/asset/ICPUGeometryCollection.h" +#include "nbl/asset/ICPUPolygonGeometry.h" +#include "nbl/builtin/hlsl/math/linalg/fast_affine.hlsl" +#include +#include +#include +#include +#include +#include +#include +namespace nbl::asset +{ +//! Shared writer-side helpers used by geometry exporters. +class SGeometryWriterCommon +{ + public: + //! Common scene/collection context propagated to one emitted geometry item. + struct SWriteState + { + //! World transform accumulated up to the emitted geometry. + hlsl::float32_t3x4 transform = hlsl::math::linalg::identity(); + uint32_t instanceIx = ~0u; //!< Scene instance index or `~0u` when not applicable. + uint32_t targetIx = ~0u; //!< Morph-target index or `~0u` when not applicable. + uint32_t geometryIx = 0u; //!< Geometry index inside the current collection. + }; + //! One polygon geometry together with the scene context needed by writers. + struct SPolygonGeometryWriteItem : SWriteState { const ICPUPolygonGeometry* geometry = nullptr; }; + + //! Collects polygon geometry items from a geometry, geometry collection, or scene root asset. + template> requires requires(Container& c, const SPolygonGeometryWriteItem& item) { c.emplace_back(item); } + static inline Container collectPolygonGeometryWriteItems(const IAsset* rootAsset) + { + Container out = {}; + if (!rootAsset) + return out; + const auto identity = hlsl::math::linalg::identity(); + auto appendFromCollection = [&](const ICPUGeometryCollection* collection, const hlsl::float32_t3x4& transform, const uint32_t instanceIx, const uint32_t targetIx) -> void { + if (!collection) + return; + const auto& geometries = collection->getGeometries(); + for (uint32_t geometryIx = 0u; geometryIx < geometries.size(); ++geometryIx) + { + const auto& ref = geometries[geometryIx]; + if (!ref.geometry || ref.geometry->getPrimitiveType() != IGeometryBase::EPrimitiveType::Polygon) + continue; + SPolygonGeometryWriteItem item = {}; + item.geometry = static_cast(ref.geometry.get()); + item.transform = hlsl::math::linalg::promoted_mul(transform, ref.hasTransform() ? ref.transform : identity); + item.instanceIx = instanceIx; item.targetIx = targetIx; item.geometryIx = geometryIx; + out.emplace_back(item); + } + }; + if (rootAsset->getAssetType() == IAsset::ET_GEOMETRY) + { + const auto* geometry = static_cast*>(rootAsset); + if (geometry->getPrimitiveType() == IGeometryBase::EPrimitiveType::Polygon) + out.emplace_back(SPolygonGeometryWriteItem{.geometry = static_cast(rootAsset)}); + return out; + } + if (rootAsset->getAssetType() == IAsset::ET_GEOMETRY_COLLECTION) + { + appendFromCollection(static_cast(rootAsset), identity, ~0u, ~0u); + return out; + } + if (rootAsset->getAssetType() != IAsset::ET_SCENE) + return out; + const auto* scene = static_cast(rootAsset); + const auto& instances = scene->getInstances(); + const auto& morphTargets = instances.getMorphTargets(); + const auto& initialTransforms = instances.getInitialTransforms(); + for (uint32_t instanceIx = 0u; instanceIx < morphTargets.size(); ++instanceIx) + { + const auto* targets = morphTargets[instanceIx].get(); + if (!targets) + continue; + const auto instanceTransform = initialTransforms.empty() ? identity : initialTransforms[instanceIx]; + const auto& targetList = targets->getTargets(); + for (uint32_t targetIx = 0u; targetIx < targetList.size(); ++targetIx) + appendFromCollection(targetList[targetIx].geoCollection.get(), instanceTransform, instanceIx, targetIx); + } + return out; + } + //! Returns true when the transform equals the writer identity matrix. + static inline bool isIdentityTransform(const hlsl::float32_t3x4& transform) { return transform == hlsl::math::linalg::identity(); } + //! Returns one auxiliary view when it exists and optionally matches `requiredElementCount`. + static inline const ICPUPolygonGeometry::SDataView* getAuxViewAt(const ICPUPolygonGeometry* geom, const uint32_t auxViewIx, const size_t requiredElementCount = 0ull) + { + if (!geom) + return nullptr; + const auto& auxViews = geom->getAuxAttributeViews(); + if (auxViewIx >= auxViews.size()) + return nullptr; + const auto& view = auxViews[auxViewIx]; + if (!view) + return nullptr; + if (requiredElementCount && view.getElementCount() != requiredElementCount) + return nullptr; + return &view; + } + //! Resolves the triangle face count for indexed or non-indexed polygon geometry. + static inline bool getTriangleFaceCount(const ICPUPolygonGeometry* geom, size_t& outFaceCount) + { + outFaceCount = 0ull; + if (!geom) + return false; + const auto& positionView = geom->getPositionView(); + const size_t vertexCount = positionView.getElementCount(); + if (vertexCount == 0ull) + return false; + const auto& indexView = geom->getIndexView(); + if (indexView) + { + const size_t indexCount = indexView.getElementCount(); + if ((indexCount % 3ull) != 0ull) + return false; + return (outFaceCount = indexCount / 3ull), true; + } + if ((vertexCount % 3ull) != 0ull) + return false; + return (outFaceCount = vertexCount / 3ull), true; + } + //! Visits triangle indices as validated `uint32_t` triplets. + template + static inline bool visitTriangleIndices(const ICPUPolygonGeometry* geom, Visitor&& visitor) + { + if (!geom) + return false; + const auto& positionView = geom->getPositionView(); + const size_t vertexCount = positionView.getElementCount(); + if (vertexCount == 0ull) + return false; + auto visit = [&](const IndexT i0, const IndexT i1, const IndexT i2)->bool + { + const uint32_t u0 = static_cast(i0); + const uint32_t u1 = static_cast(i1); + const uint32_t u2 = static_cast(i2); + if (u0 >= vertexCount || u1 >= vertexCount || u2 >= vertexCount) + return false; + if constexpr (std::is_same_v, bool>) + return visitor(u0, u1, u2); + else { visitor(u0, u1, u2); return true; } + }; + const auto& indexView = geom->getIndexView(); + if (!indexView) + { + if ((vertexCount % 3ull) != 0ull) + return false; + for (uint32_t i = 0u; i < vertexCount; i += 3u) + if (!visit(i + 0u, i + 1u, i + 2u)) + return false; + return true; + } + const size_t indexCount = indexView.getElementCount(); + if ((indexCount % 3ull) != 0ull) + return false; + const void* const src = indexView.getPointer(); + if (!src) + return false; + auto visitIndexed = [&]()->bool + { + const auto* indices = reinterpret_cast(src); + for (size_t i = 0ull; i < indexCount; i += 3ull) + if (!visit(indices[i + 0ull], indices[i + 1ull], indices[i + 2ull])) + return false; + return true; + }; + switch (geom->getIndexType()) + { + case EIT_32BIT: return visitIndexed.template operator()(); + case EIT_16BIT: return visitIndexed.template operator()(); + default: return false; + } + } + //! Returns a direct pointer for tightly packed views that already match `ExpectedFormat`. + template + static inline const T* getTightView(const ICPUPolygonGeometry::SDataView& view) { return view && view.composed.format == ExpectedFormat && view.composed.getStride() == sizeof(T) ? reinterpret_cast(view.getPointer()) : nullptr; } + //! Appends one floating-point value to a caller-provided character buffer. + static inline char* appendFloatToBuffer(char* dst, char* end, float value) { return appendFloatingPointToBuffer(dst, end, value); } + //! Appends one double-precision value to a caller-provided character buffer. + static inline char* appendFloatToBuffer(char* dst, char* end, double value) { return appendFloatingPointToBuffer(dst, end, value); } + //! Appends one unsigned integer value to a caller-provided character buffer. + static inline char* appendUIntToBuffer(char* dst, char* const end, const uint32_t value) + { + if (!dst || dst >= end) + return end; + const auto result = std::to_chars(dst, end, value); + if (result.ec == std::errc()) + return result.ptr; + const int written = std::snprintf(dst, static_cast(end - dst), "%u", value); + if (written <= 0) + return dst; + const size_t writeLen = static_cast(written); + return (writeLen < static_cast(end - dst)) ? (dst + writeLen) : end; + } + private: + //! Shared floating-point backend for the `appendFloatToBuffer` overload set. + template + static inline char* appendFloatingPointToBuffer(char* dst, char* const end, const T value) + { + static_assert(std::is_same_v || std::is_same_v); + if (!dst || dst >= end) + return end; + const auto result = std::to_chars(dst, end, value); + if (result.ec == std::errc()) + return result.ptr; + constexpr size_t FloatingPointScratchSize = std::numeric_limits::max_digits10 + 9ull; + std::array scratch = {}; + constexpr int Precision = std::numeric_limits::max_digits10; + const int written = std::snprintf(scratch.data(), scratch.size(), "%.*g", Precision, static_cast(value)); + if (written <= 0) + return dst; + const size_t writeLen = static_cast(written); + if (writeLen > static_cast(end - dst)) + return end; + std::memcpy(dst, scratch.data(), writeLen); + return dst + writeLen; + } +}; +} +#endif diff --git a/include/nbl/asset/interchange/SInterchangeIO.h b/include/nbl/asset/interchange/SInterchangeIO.h new file mode 100644 index 0000000000..953e3142d5 --- /dev/null +++ b/include/nbl/asset/interchange/SInterchangeIO.h @@ -0,0 +1,235 @@ +// Copyright (C) 2018-2025 - DevSH Graphics Programming Sp. z O.O. +// This file is part of the "Nabla Engine". +// For conditions of distribution and use, see copyright notice in nabla.h +#ifndef _NBL_ASSET_S_INTERCHANGE_IO_H_INCLUDED_ +#define _NBL_ASSET_S_INTERCHANGE_IO_H_INCLUDED_ +#include "nbl/asset/interchange/SFileIOPolicy.h" +#include "nbl/system/IFile.h" +#include +#include +#include +#include +#include +#include +#include +#include +#include +namespace nbl::asset +{ +//! Shared read/write helpers that execute a resolved IO plan and collect simple telemetry. +class SInterchangeIO +{ + public: + //! Tracks IO call count and byte distribution for tiny-io diagnostics. + struct STelemetry + { + uint64_t callCount = 0ull; //!< Number of IO calls recorded. + uint64_t totalBytes = 0ull; //!< Sum of processed bytes across all calls. + uint64_t minBytes = std::numeric_limits::max(); //!< Smallest processed byte count observed so far. + + inline void account(const uint64_t bytes) + { + ++callCount; + totalBytes += bytes; + if (bytes < minBytes) + minBytes = bytes; + } + + inline uint64_t getMinOrZero() const { return callCount ? minBytes : 0ull; } + inline uint64_t getAvgOrZero() const { return callCount ? (totalBytes / callCount) : 0ull; } + }; + using SReadTelemetry = STelemetry; + using SWriteTelemetry = STelemetry; + //! Flags large payloads that were served through suspiciously small IO calls. + //! Defaults are 1 MiB, 1 KiB, 64 B, and 1024 calls. + static inline bool isTinyIOTelemetryLikely(const STelemetry& telemetry, const uint64_t payloadBytes, const uint64_t bigPayloadThresholdBytes = (1ull << 20), const uint64_t lowAvgBytesThreshold = 1024ull, const uint64_t tinyChunkBytesThreshold = 64ull, const uint64_t tinyChunkCallsThreshold = 1024ull) + { + if (payloadBytes <= bigPayloadThresholdBytes) + return false; + const uint64_t minBytes = telemetry.getMinOrZero(); + const uint64_t avgBytes = telemetry.getAvgOrZero(); + return avgBytes < lowAvgBytesThreshold || (minBytes < tinyChunkBytesThreshold && telemetry.callCount > tinyChunkCallsThreshold); + } + //! Same tiny-io heuristic but pulls thresholds from the resolved IO policy. + static inline bool isTinyIOTelemetryLikely(const STelemetry& telemetry, const uint64_t payloadBytes, const SFileIOPolicy& ioPolicy) { return isTinyIOTelemetryLikely(telemetry, payloadBytes, ioPolicy.runtimeTuning.tinyIoPayloadThresholdBytes, ioPolicy.runtimeTuning.tinyIoAvgBytesThreshold, ioPolicy.runtimeTuning.tinyIoMinBytesThreshold, ioPolicy.runtimeTuning.tinyIoMinCallCount); } + //! Issues one read request and verifies that the full byte count was returned. + static inline bool readFileExact(system::IFile* file, void* dst, const size_t offset, const size_t bytes, SReadTelemetry* ioTelemetry = nullptr) + { + if (!file || (!dst && bytes != 0ull)) return false; + if (bytes == 0ull) return true; + system::IFile::success_t success; + file->read(success, dst, offset, bytes); + if (success && ioTelemetry) ioTelemetry->account(success.getBytesProcessed()); + return success && success.getBytesProcessed() == bytes; + } + + /** + Reads a byte range using the resolved whole-file or chunked strategy. + When `ioTime` is non-null it also reports wall time in `TimeUnit`. + Default `TimeUnit` is milliseconds. + */ + template> + requires std::same_as> + static inline bool readFileWithPolicy(system::IFile* file, void* dst, const size_t offset, const size_t bytes, const SResolvedFileIOPolicy& ioPlan, SReadTelemetry* ioTelemetry = nullptr, TimeUnit* ioTime = nullptr) + { + using clock_t = std::chrono::high_resolution_clock; + const auto ioStart = ioTime ? clock_t::now() : clock_t::time_point{}; + auto finalize = [&](const bool ok) -> bool { if (ioTime) *ioTime = std::chrono::duration_cast(clock_t::now() - ioStart); return ok; }; + if (!file || (!dst && bytes != 0ull)) + return finalize(false); + if (bytes == 0ull) + return finalize(true); + auto* out = reinterpret_cast(dst); + switch (ioPlan.strategy) + { + case SResolvedFileIOPolicy::Strategy::WholeFile: + return finalize(readFileExact(file, out, offset, bytes, ioTelemetry)); + case SResolvedFileIOPolicy::Strategy::Chunked: + default: + { + const size_t inFlightDepth = ioPlan.chunkedInFlightDepth; + auto inFlight = std::make_unique(inFlightDepth); + size_t submitOffset = 0ull; + size_t activeCount = 0ull; + size_t submitIndex = 0ull; + size_t drainIndex = 0ull; + const uint64_t chunkSizeBytes = ioPlan.chunkSizeBytes(); + auto submitChunk = [&]() -> bool { + if (submitOffset >= bytes || activeCount >= inFlightDepth) + return false; + auto& request = inFlight[submitIndex]; + const size_t toRead = static_cast(std::min(chunkSizeBytes, bytes - submitOffset)); + request.success.emplace(); + file->read(*request.success, out + submitOffset, offset + submitOffset, toRead); + request.bytes = toRead; + request.active = true; + submitOffset += toRead; + submitIndex = (submitIndex + 1ull) % inFlightDepth; + ++activeCount; + return true; + }; + auto drainChunk = [&]() -> bool { + auto& request = inFlight[drainIndex]; + if (!request.active) + return false; + const bool ok = drainChunkedRequest(request, ioTelemetry); + drainIndex = (drainIndex + 1ull) % inFlightDepth; + --activeCount; + return ok; + }; + while (submitOffset < bytes || activeCount) + { + while (submitChunk()) {} + if (activeCount && !drainChunk()) + return finalize(false); + } + return finalize(true); + } + } + } + //! Describes one contiguous output buffer written as part of a larger stream. + struct SBufferRange + { + const void* data = nullptr; //!< Start of the contiguous byte range. + size_t byteCount = 0ull; //!< Number of bytes to write from `data`. + }; + //! Writes one or more buffers sequentially at `fileOffset` and advances it on success. + static inline bool writeBuffersWithPolicyAtOffset(system::IFile* file, const SResolvedFileIOPolicy& ioPlan, const std::span buffers, size_t& fileOffset, SWriteTelemetry* ioTelemetry = nullptr) + { + if (!file) return false; + const uint64_t chunkSizeBytes = ioPlan.chunkSizeBytes(); + for (const auto& buffer : buffers) + { + if (!buffer.data && buffer.byteCount != 0ull) return false; + if (buffer.byteCount == 0ull) + continue; + const auto* data = reinterpret_cast(buffer.data); + size_t writtenTotal = 0ull; + if (ioPlan.strategy == SResolvedFileIOPolicy::Strategy::WholeFile) + { + const size_t toWrite = buffer.byteCount; + system::IFile::success_t success; + file->write(success, data, fileOffset, toWrite); + if (!success) + return false; + const size_t written = success.getBytesProcessed(); + if (written == 0ull) + return false; + if (ioTelemetry) + ioTelemetry->account(written); + writtenTotal += written; + } + else + { + const size_t inFlightDepth = ioPlan.chunkedInFlightDepth; + auto inFlight = std::make_unique(inFlightDepth); + size_t submitOffset = 0ull; + size_t activeCount = 0ull; + size_t submitIndex = 0ull; + size_t drainIndex = 0ull; + auto submitChunk = [&]() -> bool { + if (submitOffset >= buffer.byteCount || activeCount >= inFlightDepth) + return false; + auto& request = inFlight[submitIndex]; + const size_t toWrite = static_cast(std::min(chunkSizeBytes, buffer.byteCount - submitOffset)); + request.success.emplace(); + file->write(*request.success, data + submitOffset, fileOffset + submitOffset, toWrite); + request.bytes = toWrite; + request.active = true; + submitOffset += toWrite; + submitIndex = (submitIndex + 1ull) % inFlightDepth; + ++activeCount; + return true; + }; + auto drainChunk = [&]() -> bool { + auto& request = inFlight[drainIndex]; + if (!request.active) + return false; + const bool ok = drainChunkedRequest(request, ioTelemetry); + if (ok) + writtenTotal += request.bytes; + drainIndex = (drainIndex + 1ull) % inFlightDepth; + --activeCount; + return ok; + }; + while (submitOffset < buffer.byteCount || activeCount) + { + while (submitChunk()) {} + if (activeCount && !drainChunk()) + return false; + } + } + fileOffset += writtenTotal; + } + return true; + } + //! Writes one or more buffers starting from file offset `0`. + static inline bool writeBuffersWithPolicy(system::IFile* file, const SResolvedFileIOPolicy& ioPlan, const std::span buffers, SWriteTelemetry* ioTelemetry = nullptr) { size_t fileOffset = 0ull; return writeBuffersWithPolicyAtOffset(file, ioPlan, buffers, fileOffset, ioTelemetry); } + //! Single-buffer convenience wrapper over `writeBuffersWithPolicyAtOffset`. + static inline bool writeFileWithPolicyAtOffset(system::IFile* file, const SResolvedFileIOPolicy& ioPlan, const void* data, size_t byteCount, size_t& fileOffset, SWriteTelemetry* ioTelemetry = nullptr) { const SBufferRange buffers[] = {{.data = data, .byteCount = byteCount}}; return writeBuffersWithPolicyAtOffset(file, ioPlan, buffers, fileOffset, ioTelemetry); } + //! Single-buffer convenience wrapper over `writeBuffersWithPolicy`. + static inline bool writeFileWithPolicy(system::IFile* file, const SResolvedFileIOPolicy& ioPlan, const void* data, size_t byteCount, SWriteTelemetry* ioTelemetry = nullptr) { const SBufferRange buffers[] = {{.data = data, .byteCount = byteCount}}; return writeBuffersWithPolicy(file, ioPlan, buffers, ioTelemetry); } + private: + struct SChunkedRequest + { + std::optional success = std::nullopt; + size_t bytes = 0ull; + bool active = false; + }; + static inline bool drainChunkedRequest(SChunkedRequest& request, STelemetry* ioTelemetry) + { + const size_t processed = request.success ? request.success->getBytesProcessed():0ull; + request.success.reset(); + request.active = false; + if (processed != request.bytes || processed == 0ull) + return false; + if (ioTelemetry) + ioTelemetry->account(processed); + return true; + } +}; +using SFileIOTelemetry = SInterchangeIO::STelemetry; +using SFileReadTelemetry = SInterchangeIO::SReadTelemetry; +using SFileWriteTelemetry = SInterchangeIO::SWriteTelemetry; +} +#endif diff --git a/include/nbl/asset/interchange/SLoaderRuntimeTuning.h b/include/nbl/asset/interchange/SLoaderRuntimeTuning.h new file mode 100644 index 0000000000..e180325606 --- /dev/null +++ b/include/nbl/asset/interchange/SLoaderRuntimeTuning.h @@ -0,0 +1,303 @@ +// Copyright (C) 2018-2025 - DevSH Graphics Programming Sp. z O.O. +// This file is part of the "Nabla Engine". +// For conditions of distribution and use, see copyright notice in nabla.h +#ifndef _NBL_ASSET_S_LOADER_RUNTIME_TUNING_H_INCLUDED_ +#define _NBL_ASSET_S_LOADER_RUNTIME_TUNING_H_INCLUDED_ +#include "nbl/asset/interchange/SFileIOPolicy.h" +#include +#include +#include +#include +#include +#include +#include +#include +#include +namespace nbl::asset +{ +//! Input describing one loader or hash stage that needs worker and chunk sizing. +struct SLoaderRuntimeTuningRequest +{ + uint64_t inputBytes = 0ull; //!< Total input bytes for the tuned stage. + uint64_t totalWorkUnits = 0ull; //!< Total amount of stage work in logical units. + uint64_t minWorkUnitsPerWorker = 1ull; //!< Minimum work units assigned to one worker. + uint64_t minBytesPerWorker = 1ull; //!< Minimum input bytes assigned to one worker. + uint32_t hardwareThreads = 0u; //!< Hardware thread count override. 0 means auto-detect. + uint32_t hardMaxWorkers = 0u; //!< Hard cap for workers for this request. 0 means no extra cap. + uint32_t targetChunksPerWorker = 0u; //!< Preferred chunk count per worker for this stage. 0 means policy default. + uint64_t minChunkWorkUnits = 1ull; //!< Minimum work units in one chunk. + uint64_t maxChunkWorkUnits = std::numeric_limits::max(); //!< Maximum work units in one chunk. + const uint8_t* sampleData = nullptr; //!< Pointer to representative sample bytes for hybrid sampling. + uint64_t sampleBytes = 0ull; //!< Number of sample bytes available at sampleData. + uint64_t sampleMinWorkUnits = 0ull; //!< Minimum work units required to allow sampling. 0 means policy or auto value. + uint32_t samplePasses = 0u; //!< Sampling pass count override. 0 means policy default. + uint32_t sampleMaxCandidates = 0u; //!< Sampling candidate count override. 0 means policy default. +}; +//! Final worker and chunk layout selected for one stage. +struct SLoaderRuntimeTuningResult +{ + size_t workerCount = 1ull; //!< Selected worker count for the stage. + size_t chunkCount = 1ull; //!< Total chunk count for the stage. + uint64_t chunkWorkUnits = 1ull; //!< Work units per chunk assigned by tuner. +}; +//! Stateless runtime tuner used by loaders and hash stages to size worker pools and chunking. +struct SLoaderRuntimeTuner +{ + private: + //! Aggregated timings collected while probing one worker-count candidate. + struct SBenchmarkSampleStats + { + uint64_t medianNs = 0ull; + uint64_t minNs = 0ull; + uint64_t maxNs = 0ull; + uint64_t totalNs = 0ull; + }; + public: + /** + Dispatches workers `1..N-1` on `std::jthread` + and runs worker `0` on the caller thread. + */ + template + requires std::invocable + static void dispatchWorkers(const size_t workerCount, Fn&& fn) + { + if (workerCount <= 1ull) + return fn(0ull); + std::vector workers; + workers.reserve(workerCount - 1ull); + for (size_t workerIx = 1ull; workerIx < workerCount; ++workerIx) + workers.emplace_back([&fn, workerIx]() { fn(workerIx); }); + fn(0ull); + } + + //! Integer ceil division. Callers must pass a non-zero denominator. + static constexpr uint64_t ceilDiv(const uint64_t numerator, const uint64_t denominator) { return (numerator + denominator - 1ull) / denominator; } + + /** + Measures one sampled memory-touch pass configuration + and returns aggregate wall time across all passes. + */ + template + requires std::same_as> + static inline TimeUnit benchmarkSample(const uint8_t* const sampleData, const uint64_t sampleBytes, const size_t workerCount, const uint32_t passes) + { + if (!sampleData || sampleBytes == 0ull || workerCount == 0ull) + return TimeUnit::zero(); + const uint32_t passCount = std::max(1u, passes); + std::vector partial(workerCount, 0ull); + uint64_t elapsedNs = 0ull; + using clock_t = std::chrono::steady_clock; + for (uint32_t passIx = 0u; passIx < passCount; ++passIx) + { + const auto passStart = clock_t::now(); + SLoaderRuntimeTuner::dispatchWorkers(workerCount, [&](const size_t workerIx) + { + const uint64_t begin = (sampleBytes * workerIx) / workerCount; + const uint64_t end = (sampleBytes * (workerIx + 1ull)) / workerCount; + const uint8_t* ptr = sampleData + begin; + uint64_t local = 0ull; + for (uint64_t i = 0ull, count = end - begin; i < count; ++i) + local += static_cast(ptr[i]); + partial[workerIx] ^= local; + }); + elapsedNs += static_cast(std::chrono::duration_cast(clock_t::now() - passStart).count()); + } + uint64_t reduced = 0ull; + for (const uint64_t v : partial) + reduced ^= v; + static std::atomic sink = 0ull; + sink.fetch_xor(reduced, std::memory_order_relaxed); + return std::chrono::duration_cast(std::chrono::nanoseconds(elapsedNs)); + } + + //! Warms up once and then collects timing observations for one worker-count candidate. + static inline SBenchmarkSampleStats benchmarkSampleStats(const uint8_t* const sampleData, const uint64_t sampleBytes, const size_t workerCount, const uint32_t passes, const uint32_t observations) + { + SBenchmarkSampleStats stats = {}; + if (!sampleData || sampleBytes == 0ull || workerCount == 0ull) + return stats; + const uint32_t observationCount = std::max(1u, observations); + std::vector samples; + samples.reserve(observationCount); + benchmarkSample(sampleData, sampleBytes, workerCount, 1u); + for (uint32_t obsIx = 0u; obsIx < observationCount; ++obsIx) + { + const uint64_t elapsedNs = static_cast(benchmarkSample(sampleData, sampleBytes, workerCount, passes).count()); + if (elapsedNs == 0ull) + continue; + stats.totalNs += elapsedNs; + samples.push_back(elapsedNs); + } + if (samples.empty()) + return {}; + std::sort(samples.begin(), samples.end()); + stats.minNs = samples.front(); + stats.maxNs = samples.back(); + if ((samples.size() & 1ull) != 0ull) + stats.medianNs = samples[samples.size() / 2ull]; + else + stats.medianNs = (samples[samples.size() / 2ull - 1ull] + samples[samples.size() / 2ull]) / 2ull; + return stats; + } + //! Keeps the candidate probe list unique while preserving insertion order. + static inline void appendCandidate(std::vector& dst, const size_t candidate) { if (candidate != 0ull && std::find(dst.begin(), dst.end(), candidate) == dst.end()) dst.push_back(candidate); } + //! Chooses the sample byte budget used by hybrid tuning from the known input size and policy clamps. + static inline uint64_t resolveSampleBytes(const SFileIOPolicy& ioPolicy, const uint64_t knownInputBytes) + { + if (knownInputBytes == 0ull) + return 0ull; + const uint64_t minSampleBytes = std::max(1ull, ioPolicy.runtimeTuning.minSampleBytes); + const uint64_t maxSampleBytes = std::max(minSampleBytes, ioPolicy.runtimeTuning.maxSampleBytes); + const uint64_t cappedMin = std::min(minSampleBytes, knownInputBytes); + const uint64_t cappedMax = std::min(maxSampleBytes, knownInputBytes); + const uint64_t adaptive = std::max(knownInputBytes / 64ull, cappedMin); + return std::clamp(adaptive, cappedMin, cappedMax); + } + //! Returns true when the hash build is small enough to stay on the caller thread. + static inline bool shouldInlineHashBuild(const SFileIOPolicy& ioPolicy, const uint64_t inputBytes) { return inputBytes <= std::max(1ull, ioPolicy.runtimeTuning.hashInlineThresholdBytes); } + //! Resolves the effective hardware thread count and always returns at least one worker. + static inline size_t resolveHardwareThreads(const uint32_t requested = 0u) { const size_t hw = requested ? static_cast(requested) : static_cast(std::thread::hardware_concurrency()); return hw ? hw : 1ull; } + //! Applies worker headroom while keeping at least two workers when parallel hardware is available. + static inline size_t resolveHardMaxWorkers(const size_t hardwareThreads, const uint32_t workerHeadroom) + { + const size_t hw = std::max(1ull, hardwareThreads), minWorkers = hw >= 2ull ? 2ull : 1ull, headroom = static_cast(workerHeadroom); + if (headroom == 0ull) + return hw; + if (hw <= headroom) + return minWorkers; + return std::max(minWorkers, hw - headroom); + } + //! Resolves worker and chunk counts for one stage using policy limits plus optional hybrid sampling. + static inline SLoaderRuntimeTuningResult tune(const SFileIOPolicy& ioPolicy, const SLoaderRuntimeTuningRequest& request) + { + using RTMode = SFileIOPolicy::SRuntimeTuning::Mode; + SLoaderRuntimeTuningResult result = {}; + if (request.totalWorkUnits == 0ull) + return (result.chunkWorkUnits = 0ull), (result.chunkCount = 0ull), result; + const size_t hw = SLoaderRuntimeTuner::resolveHardwareThreads(request.hardwareThreads); + size_t maxWorkers = hw; + if (request.hardMaxWorkers > 0u) + maxWorkers = std::min(maxWorkers, static_cast(request.hardMaxWorkers)); + if (ioPolicy.runtimeTuning.maxWorkers > 0u) + maxWorkers = std::min(maxWorkers, static_cast(ioPolicy.runtimeTuning.maxWorkers)); + maxWorkers = std::max(1ull, maxWorkers); + const uint64_t minWorkUnitsPerWorker = std::max(1ull, request.minWorkUnitsPerWorker); + const uint64_t minBytesPerWorker = std::max(1ull, request.minBytesPerWorker); + const size_t maxByWork = static_cast(SLoaderRuntimeTuner::ceilDiv(request.totalWorkUnits, minWorkUnitsPerWorker)); + const size_t maxByBytes = request.inputBytes ? static_cast(SLoaderRuntimeTuner::ceilDiv(request.inputBytes, minBytesPerWorker)) : maxWorkers; + const bool heuristicEnabled = ioPolicy.runtimeTuning.mode != RTMode::Sequential; + const bool hybridEnabled = ioPolicy.runtimeTuning.mode == RTMode::Hybrid; + size_t workerCount = 1ull; + if (heuristicEnabled) + workerCount = std::max(1ull, std::min({ maxWorkers, maxByWork, maxByBytes })); + const size_t targetChunksPerWorker = std::max(1ull, static_cast(request.targetChunksPerWorker ? request.targetChunksPerWorker : ioPolicy.runtimeTuning.targetChunksPerWorker)); + if (workerCount > 1ull && heuristicEnabled) + { + const double maxOverheadRatio = std::max(0.0, static_cast(ioPolicy.runtimeTuning.maxOverheadRatio)); + const double minExpectedGainRatio = std::clamp(static_cast(ioPolicy.runtimeTuning.minExpectedGainRatio), 0.0, 0.99); + while (workerCount > 1ull) + { + const double idealGain = 1.0 - (1.0 / static_cast(workerCount)); + const double overheadRatio = static_cast(workerCount * targetChunksPerWorker) / static_cast(std::max(1ull, request.totalWorkUnits)); + if (idealGain < minExpectedGainRatio || overheadRatio > maxOverheadRatio) + { + --workerCount; + continue; + } + break; + } + } + const size_t heuristicWorkerCount = std::max(1ull, workerCount); + if (heuristicEnabled && hybridEnabled && request.sampleData != nullptr && request.sampleBytes > 0ull && heuristicWorkerCount > 1ull && maxWorkers > 1ull) + { + const uint64_t autoMinSamplingWorkUnits = std::max(static_cast(targetChunksPerWorker) * 8ull, static_cast(maxWorkers * targetChunksPerWorker)); + const uint64_t minSamplingWorkUnits = request.sampleMinWorkUnits ? request.sampleMinWorkUnits : (ioPolicy.runtimeTuning.samplingMinWorkUnits ? ioPolicy.runtimeTuning.samplingMinWorkUnits : autoMinSamplingWorkUnits); + if (request.totalWorkUnits >= minSamplingWorkUnits) + { + const double samplingBudgetRatio = std::clamp(static_cast(ioPolicy.runtimeTuning.samplingBudgetRatio), 0.0, 0.5); + uint64_t effectiveSampleBytes = request.sampleBytes; + if (request.inputBytes) + effectiveSampleBytes = std::min(effectiveSampleBytes, request.inputBytes); + if (effectiveSampleBytes > 0ull && samplingBudgetRatio > 0.0) + { + if (request.inputBytes > 0ull) + { + // Keep probing lightweight: sample fraction scales with input and parallelism. + const uint64_t sampleDivisor = std::max(4ull, static_cast(heuristicWorkerCount) * static_cast(targetChunksPerWorker)); + const uint64_t adaptiveSampleBytes = std::max(1ull, request.inputBytes / sampleDivisor); + effectiveSampleBytes = std::min(effectiveSampleBytes, adaptiveSampleBytes); + } + const uint32_t samplePasses = request.samplePasses ? request.samplePasses : ioPolicy.runtimeTuning.samplingPasses; + uint32_t maxCandidates = request.sampleMaxCandidates ? request.sampleMaxCandidates : ioPolicy.runtimeTuning.samplingMaxCandidates; + maxCandidates = std::max(2u, maxCandidates); + std::vector candidates; + candidates.reserve(maxCandidates); + appendCandidate(candidates, heuristicWorkerCount); + appendCandidate(candidates, heuristicWorkerCount > 1ull ? (heuristicWorkerCount - 1ull) : 1ull); + appendCandidate(candidates, std::min(maxWorkers, heuristicWorkerCount + 1ull)); + if (heuristicWorkerCount > 2ull) + appendCandidate(candidates, heuristicWorkerCount - 2ull); + if (heuristicWorkerCount + 2ull <= maxWorkers) + appendCandidate(candidates, heuristicWorkerCount + 2ull); + if (candidates.size() > maxCandidates) + candidates.resize(maxCandidates); + // Probe heuristic first and only continue when budget can amortize additional probes. + const auto heuristicStatsProbe = benchmarkSampleStats(request.sampleData, effectiveSampleBytes, heuristicWorkerCount, samplePasses, 2u); + if (heuristicStatsProbe.medianNs > 0ull) + { + const double scale = request.inputBytes ? (static_cast(request.inputBytes) / static_cast(effectiveSampleBytes)) : 1.0; + const uint64_t estimatedFullNs = static_cast(static_cast(heuristicStatsProbe.medianNs) * std::max(1.0, scale)); + const uint64_t samplingBudgetNs = static_cast(static_cast(estimatedFullNs) * samplingBudgetRatio); + uint64_t spentNs = heuristicStatsProbe.totalNs; + const size_t alternativeCandidates = (candidates.size() > 0ull) ? (candidates.size() - 1ull) : 0ull; + if (alternativeCandidates > 0ull && spentNs < samplingBudgetNs) + { + const uint64_t spareBudgetNs = samplingBudgetNs - spentNs; + const uint64_t estimatedEvalNs = std::max(1ull, heuristicStatsProbe.medianNs); + const uint64_t estimatedEvaluations = std::max(1ull, spareBudgetNs / estimatedEvalNs); + const uint32_t observations = static_cast(std::clamp(estimatedEvaluations / static_cast(alternativeCandidates), 1ull, 3ull)); + SBenchmarkSampleStats bestStats = heuristicStatsProbe; + size_t bestWorker = heuristicWorkerCount; + for (const size_t candidate : candidates) + { + if (candidate == heuristicWorkerCount) + continue; + if (spentNs >= samplingBudgetNs) + break; + const auto candidateStats = benchmarkSampleStats( + request.sampleData, effectiveSampleBytes, candidate, samplePasses, observations); + if (candidateStats.medianNs == 0ull) + continue; + spentNs += candidateStats.totalNs; + if (candidateStats.medianNs < bestStats.medianNs) + bestStats = candidateStats, bestWorker = candidate; + } + if (bestWorker != heuristicWorkerCount) + { + const double gain = static_cast(heuristicStatsProbe.medianNs - bestStats.medianNs) / static_cast(heuristicStatsProbe.medianNs); + const uint64_t heuristicSpan = heuristicStatsProbe.maxNs - heuristicStatsProbe.minNs; + const uint64_t bestSpan = bestStats.maxNs - bestStats.minNs; + const double heuristicNoise = static_cast(heuristicSpan) / static_cast(std::max(1ull, heuristicStatsProbe.medianNs)); + const double bestNoise = static_cast(bestSpan) / static_cast(std::max(1ull, bestStats.medianNs)); + const double requiredGain = std::max(std::clamp(static_cast(ioPolicy.runtimeTuning.minExpectedGainRatio), 0.0, 0.99), std::clamp(std::max(heuristicNoise, bestNoise) * 1.25, 0.0, 0.99)); + if (gain >= requiredGain) + workerCount = bestWorker; + } + } + } + } + } + } + result.workerCount = std::max(1ull, workerCount); + const uint64_t minChunkWorkUnits = std::max(1ull, request.minChunkWorkUnits); + uint64_t maxChunkWorkUnits = std::max(minChunkWorkUnits, request.maxChunkWorkUnits); + const uint64_t desiredChunkCount = static_cast(std::max(1ull, result.workerCount * targetChunksPerWorker)); + uint64_t chunkWorkUnits = SLoaderRuntimeTuner::ceilDiv(request.totalWorkUnits, desiredChunkCount); + chunkWorkUnits = std::clamp(chunkWorkUnits, minChunkWorkUnits, maxChunkWorkUnits); + result.chunkWorkUnits = chunkWorkUnits; + result.chunkCount = static_cast(SLoaderRuntimeTuner::ceilDiv(request.totalWorkUnits, chunkWorkUnits)); + return result; + } +}; +} +#endif diff --git a/include/nbl/asset/interchange/SOBJPolygonGeometryAuxLayout.h b/include/nbl/asset/interchange/SOBJPolygonGeometryAuxLayout.h new file mode 100644 index 0000000000..ed2743e493 --- /dev/null +++ b/include/nbl/asset/interchange/SOBJPolygonGeometryAuxLayout.h @@ -0,0 +1,14 @@ +// Copyright (C) 2018-2025 - DevSH Graphics Programming Sp. z O.O. +// This file is part of the "Nabla Engine". +// For conditions of distribution and use, see copyright notice in nabla.h +#ifndef _NBL_ASSET_S_OBJ_POLYGON_GEOMETRY_AUX_LAYOUT_H_INCLUDED_ +#define _NBL_ASSET_S_OBJ_POLYGON_GEOMETRY_AUX_LAYOUT_H_INCLUDED_ +namespace nbl::asset +{ +//! Public OBJ aux-view slot ids shared by loader and writer code. +struct SOBJPolygonGeometryAuxLayout +{ + static inline constexpr uint32_t UV0 = 0u; +}; +} +#endif diff --git a/include/nbl/asset/interchange/SPLYPolygonGeometryAuxLayout.h b/include/nbl/asset/interchange/SPLYPolygonGeometryAuxLayout.h new file mode 100644 index 0000000000..66a1f3d692 --- /dev/null +++ b/include/nbl/asset/interchange/SPLYPolygonGeometryAuxLayout.h @@ -0,0 +1,14 @@ +// Copyright (C) 2018-2025 - DevSH Graphics Programming Sp. z O.O. +// This file is part of the "Nabla Engine". +// For conditions of distribution and use, see copyright notice in nabla.h +#ifndef _NBL_ASSET_S_PLY_POLYGON_GEOMETRY_AUX_LAYOUT_H_INCLUDED_ +#define _NBL_ASSET_S_PLY_POLYGON_GEOMETRY_AUX_LAYOUT_H_INCLUDED_ +namespace nbl::asset +{ +//! Public PLY aux-view slot ids shared by loader and writer code. +struct SPLYPolygonGeometryAuxLayout +{ + static inline constexpr uint32_t UV0 = 0u; +}; +} +#endif diff --git a/include/nbl/asset/interchange/SSTLPolygonGeometryAuxLayout.h b/include/nbl/asset/interchange/SSTLPolygonGeometryAuxLayout.h new file mode 100644 index 0000000000..a49b16b6ee --- /dev/null +++ b/include/nbl/asset/interchange/SSTLPolygonGeometryAuxLayout.h @@ -0,0 +1,14 @@ +// Copyright (C) 2018-2025 - DevSH Graphics Programming Sp. z O.O. +// This file is part of the "Nabla Engine". +// For conditions of distribution and use, see copyright notice in nabla.h +#ifndef _NBL_ASSET_S_STL_POLYGON_GEOMETRY_AUX_LAYOUT_H_INCLUDED_ +#define _NBL_ASSET_S_STL_POLYGON_GEOMETRY_AUX_LAYOUT_H_INCLUDED_ +namespace nbl::asset +{ +//! Public STL aux-view slot ids shared by loader and writer code. +struct SSTLPolygonGeometryAuxLayout +{ + static inline constexpr uint32_t COLOR0 = 0u; +}; +} +#endif diff --git a/include/nbl/asset/metadata/CPLYMetadata.h b/include/nbl/asset/metadata/CPLYMetadata.h index 39ad07561a..ec7112bd0a 100644 --- a/include/nbl/asset/metadata/CPLYMetadata.h +++ b/include/nbl/asset/metadata/CPLYMetadata.h @@ -6,6 +6,8 @@ #include "nbl/asset/metadata/IAssetMetadata.h" +#include +#include namespace nbl::asset @@ -13,12 +15,38 @@ namespace nbl::asset class CPLYMetadata final : public IAssetMetadata { - public: - CPLYMetadata() : IAssetMetadata() {} + public: + class CPolygonGeometry : public IPolygonGeometryMetadata + { + public: + using IPolygonGeometryMetadata::IPolygonGeometryMetadata; + inline CPolygonGeometry& operator=(CPolygonGeometry&& other) + { + IPolygonGeometryMetadata::operator=(std::move(other)); + std::swap(m_auxAttributeNames, other.m_auxAttributeNames); + return *this; + } + inline std::string_view getAuxAttributeName(const uint32_t auxViewIx) const + { + return auxViewIx < m_auxAttributeNames.size() ? std::string_view(m_auxAttributeNames[auxViewIx]) : std::string_view{}; + } + core::vector m_auxAttributeNames; + }; + CPLYMetadata(const uint32_t geometryCount = 0u) : IAssetMetadata(), m_geometryMetaStorage(createContainer(geometryCount)) {} _NBL_STATIC_INLINE_CONSTEXPR const char* LoaderName = "CPLYMeshFileLoader"; const char* getLoaderName() const override { return LoaderName; } + private: + meta_container_t m_geometryMetaStorage; + friend class CPLYMeshFileLoader; + inline void placeMeta(const uint32_t offset, const ICPUPolygonGeometry* geometry, core::vector&& auxAttributeNames) + { + auto& meta = (*m_geometryMetaStorage)[offset]; + meta = CPolygonGeometry{}; + meta.m_auxAttributeNames = std::move(auxAttributeNames); + IAssetMetadata::insertAssetSpecificMetadata(geometry, &meta); + } }; } -#endif \ No newline at end of file +#endif diff --git a/include/nbl/asset/utils/CPolygonGeometryManipulator.h b/include/nbl/asset/utils/CPolygonGeometryManipulator.h index c3bed0e49e..1d971776ec 100644 --- a/include/nbl/asset/utils/CPolygonGeometryManipulator.h +++ b/include/nbl/asset/utils/CPolygonGeometryManipulator.h @@ -8,9 +8,11 @@ #include "nbl/core/declarations.h" #include "nbl/asset/ICPUPolygonGeometry.h" +#include "nbl/asset/interchange/SFileIOPolicy.h" #include "nbl/asset/utils/CGeometryManipulator.h" #include "nbl/asset/utils/CSmoothNormalGenerator.h" #include "nbl/asset/utils/COBBGenerator.h" +#include "nbl/builtin/hlsl/shapes/AABBAccumulator.hlsl" #include "nbl/builtin/hlsl/shapes/obb.hlsl" namespace nbl::asset @@ -20,25 +22,44 @@ namespace nbl::asset class NBL_API2 CPolygonGeometryManipulator { public: + enum class EContentHashMode : uint8_t + { + MissingOnly, + RecomputeAll + }; + + static void collectUniqueBuffers(const ICPUPolygonGeometry* geo, core::vector>& outBuffers); + static void computeContentHashesParallel(ICPUPolygonGeometry* geo, const SFileIOPolicy& ioPolicy, const EContentHashMode mode = EContentHashMode::MissingOnly); + static inline void computeMissingContentHashesParallel(ICPUPolygonGeometry* geo, const SFileIOPolicy& ioPolicy) + { + computeContentHashesParallel(geo, ioPolicy, EContentHashMode::MissingOnly); + } + static inline void recomputeContentHashesParallel(ICPUPolygonGeometry* geo, const SFileIOPolicy& ioPolicy) + { + computeContentHashesParallel(geo, ioPolicy, EContentHashMode::RecomputeAll); + } static inline void recomputeContentHashes(ICPUPolygonGeometry* geo) { - if (!geo) - return; - CGeometryManipulator::recomputeContentHash(geo->getPositionView()); - CGeometryManipulator::recomputeContentHash(geo->getIndexView()); - CGeometryManipulator::recomputeContentHash(geo->getNormalView()); - for (const auto& view : *geo->getJointWeightViews()) - { - CGeometryManipulator::recomputeContentHash(view.indices); - CGeometryManipulator::recomputeContentHash(view.weights); - } - if (auto pView=geo->getJointOBBView(); pView) - CGeometryManipulator::recomputeContentHash(*pView); - for (const auto& view : *geo->getAuxAttributeViews()) - CGeometryManipulator::recomputeContentHash(view); + recomputeContentHashesParallel(geo, SFileIOPolicy{}); } + //! Public aliases for the generic smooth-normal accumulation core. + //! The default path keeps float32 positions to match current geometry storage. + using ESmoothNormalAccumulationMode = CSmoothNormalGenerator::EAccumulationMode; + using SSmoothNormalCorner = CSmoothNormalGenerator::SAccumulatedCorner<>; + using CSmoothNormalAccumulator = CSmoothNormalGenerator::CAccumulatedNormals<>; + + //! Convenience wrapper over the incremental smooth-normal accumulator for the common + //! "indexed positions + generate only missing normals" case. This keeps the existing + //! area-weighted behaviour while reusing the generic accumulator implementation. + static bool generateMissingSmoothNormals( + core::vector& normals, + const core::vector& positions, + const core::vector& indices, + const core::vector& normalNeedsGeneration + ); + // static inline void recomputeRanges(ICPUPolygonGeometry* geo, const bool deduceRangeFormats=true) { @@ -89,6 +110,15 @@ class NBL_API2 CPolygonGeometryManipulator auto addToAABB = [&](auto& aabb)->void { using aabb_t = std::remove_reference_t; + using point_t = typename aabb_t::point_t; + using component_t = std::remove_cv_t>; + hlsl::shapes::util::AABBAccumulator3 parsedAABB = hlsl::shapes::util::createAABBAccumulator(); + auto addVertexToAABB = [&](const uint32_t vertex_i)->void + { + point_t pt; + geo->getPositionView().decodeElement(vertex_i, pt); + hlsl::shapes::util::extendAABBAccumulator(parsedAABB, pt); + }; if (geo->getIndexView()) { for (auto index_i = 0u; index_i != geo->getIndexView().getElementCount(); index_i++) @@ -96,20 +126,17 @@ class NBL_API2 CPolygonGeometryManipulator hlsl::vector vertex_i; geo->getIndexView().decodeElement(index_i, vertex_i); if (isVertexSkinned(geo, vertex_i.x)) continue; - typename aabb_t::point_t pt; - geo->getPositionView().decodeElement(vertex_i.x, pt); - aabb.addPoint(pt); + addVertexToAABB(vertex_i.x); } } else { for (auto vertex_i = 0u; vertex_i != geo->getPositionView().getElementCount(); vertex_i++) { if (isVertexSkinned(geo, vertex_i)) continue; - typename aabb_t::point_t pt; - geo->getPositionView().decodeElement(vertex_i, pt); - aabb.addPoint(pt); + addVertexToAABB(vertex_i); } } + hlsl::shapes::util::assignAABBFromAccumulator(aabb, parsedAABB); }; IGeometryBase::SDataViewBase tmp = geo->getPositionView().composed; tmp.resetRange(); diff --git a/include/nbl/asset/utils/SGeometryNormalCommon.h b/include/nbl/asset/utils/SGeometryNormalCommon.h new file mode 100644 index 0000000000..8900559421 --- /dev/null +++ b/include/nbl/asset/utils/SGeometryNormalCommon.h @@ -0,0 +1,22 @@ +// Copyright (C) 2018-2025 - DevSH Graphics Programming Sp. z O.O. +// This file is part of the "Nabla Engine". +// For conditions of distribution and use, see copyright notice in nabla.h +#ifndef _NBL_ASSET_S_GEOMETRY_NORMAL_COMMON_H_INCLUDED_ +#define _NBL_ASSET_S_GEOMETRY_NORMAL_COMMON_H_INCLUDED_ +#include "nbl/builtin/hlsl/tgmath.hlsl" +namespace nbl::asset +{ +class SGeometryNormalCommon +{ + public: + static_assert(sizeof(hlsl::float32_t3) == sizeof(float[3])); + static_assert(alignof(hlsl::float32_t3) == alignof(float)); + + static inline hlsl::float32_t3 normalizeOrZero(const hlsl::float32_t3& v, const float epsilon = 0.f) { const float len2 = hlsl::dot(v, v), epsilon2 = epsilon * epsilon; return len2 <= epsilon2 ? hlsl::float32_t3(0.f, 0.f, 0.f) : hlsl::normalize(v); } + + static inline hlsl::float32_t3 computeFaceNormal(const hlsl::float32_t3& a, const hlsl::float32_t3& b, const hlsl::float32_t3& c, const float epsilon = 0.000001f) { return normalizeOrZero(hlsl::cross(b - a, c - a), epsilon); } + + static inline void computeFaceNormal(const float a[3], const float b[3], const float c[3], float normal[3], const float epsilon = 0.000001f) { *(hlsl::float32_t3*)normal = computeFaceNormal(*(const hlsl::float32_t3*)a, *(const hlsl::float32_t3*)b, *(const hlsl::float32_t3*)c, epsilon); } +}; +} +#endif diff --git a/include/nbl/builtin/hlsl/math/linalg/transform.hlsl b/include/nbl/builtin/hlsl/math/linalg/transform.hlsl index e46dfe997b..82001770a1 100644 --- a/include/nbl/builtin/hlsl/math/linalg/transform.hlsl +++ b/include/nbl/builtin/hlsl/math/linalg/transform.hlsl @@ -8,6 +8,7 @@ #include #include #include +#include namespace nbl { @@ -52,7 +53,19 @@ inline matrix rhLookAt( r[1] = vector(yaxis, -hlsl::dot(yaxis, position)); r[2] = vector(zaxis, -hlsl::dot(zaxis, position)); - return r; + return r; +} + +// Transforms an AABB by a full affine 3x4 matrix and returns the enclosing AABB. +// This exists because shapes::util::transform(matrix, AABB) applies only the linear part and leaves translation out. +template) +inline shapes::AABB<3, T> pseudo_mul(NBL_CONST_REF_ARG(matrix) lhs, NBL_CONST_REF_ARG(shapes::AABB<3, T>) rhs) +{ + const auto translation = hlsl::transpose(lhs)[3]; + auto transformed = shapes::util::transform(lhs, rhs); + transformed.minVx += translation; + transformed.maxVx += translation; + return transformed; } } diff --git a/include/nbl/builtin/hlsl/shapes/AABBAccumulator.hlsl b/include/nbl/builtin/hlsl/shapes/AABBAccumulator.hlsl new file mode 100644 index 0000000000..eda7ef536f --- /dev/null +++ b/include/nbl/builtin/hlsl/shapes/AABBAccumulator.hlsl @@ -0,0 +1,122 @@ +// Copyright (C) 2018-2026 - DevSH Graphics Programming Sp. z O.O. +// This file is part of the "Nabla Engine". +// For conditions of distribution and use, see copyright notice in nabla.h +#ifndef _NBL_BUILTIN_HLSL_SHAPES_AABB_ACCUMULATOR_INCLUDED_ +#define _NBL_BUILTIN_HLSL_SHAPES_AABB_ACCUMULATOR_INCLUDED_ + + +#include "nbl/builtin/hlsl/shapes/aabb.hlsl" +#include "nbl/builtin/hlsl/array_accessors.hlsl" +#include "nbl/builtin/hlsl/concepts/vector.hlsl" + + +namespace nbl +{ +namespace hlsl +{ +namespace shapes +{ +namespace util +{ + +template +struct AABBAccumulator3 +{ + using scalar_t = Scalar; + using aabb_t = AABB<3, Scalar>; + using point_t = typename aabb_t::point_t; + + static AABBAccumulator3 create() + { + AABBAccumulator3 retval; + retval.value = aabb_t::create(); + return retval; + } + + bool empty() NBL_CONST_MEMBER_FUNC + { + return + value.minVx.x > value.maxVx.x || + value.minVx.y > value.maxVx.y || + value.minVx.z > value.maxVx.z; + } + + void addPoint(NBL_CONST_REF_ARG(point_t) pt) + { + value.addPoint(pt); + } + + void addXYZ(const Scalar x, const Scalar y, const Scalar z) + { + point_t pt = point_t(x, y, z); + value.addPoint(pt); + } + + aabb_t value; +}; + +template +inline AABBAccumulator3 createAABBAccumulator() +{ + return AABBAccumulator3::create(); +} + +template +inline void extendAABBAccumulator(NBL_REF_ARG(AABBAccumulator3) aabb, const Scalar x, const Scalar y, const Scalar z) +{ + aabb.addXYZ(x, y, z); +} + +template && (vector_traits::Dimension >= 3)) +inline void extendAABBAccumulator(NBL_REF_ARG(AABBAccumulator3) aabb, NBL_CONST_REF_ARG(Point) pt) +{ + array_get::scalar_type> getter; + typename AABBAccumulator3::point_t converted = typename AABBAccumulator3::point_t( + Scalar(getter(pt, 0)), + Scalar(getter(pt, 1)), + Scalar(getter(pt, 2)) + ); + aabb.addPoint(converted); +} + +template= 3 && SrcD >= 3) +inline bool assignAABB(NBL_REF_ARG(AABB) dst, NBL_CONST_REF_ARG(AABB) src) +{ + array_set::point_t, DstScalar> setter; + array_get::point_t, SrcScalar> getter; + + if ( + getter(src.minVx, 0) > getter(src.maxVx, 0) || + getter(src.minVx, 1) > getter(src.maxVx, 1) || + getter(src.minVx, 2) > getter(src.maxVx, 2)) + return false; + + dst = AABB::create(); + NBL_UNROLL for (int16_t i = 0; i < 3; ++i) + { + setter(dst.minVx, i, DstScalar(getter(src.minVx, i))); + setter(dst.maxVx, i, DstScalar(getter(src.maxVx, i))); + } + NBL_UNROLL for (int16_t i = 3; i < DstD; ++i) + { + setter(dst.minVx, i, DstScalar(0)); + setter(dst.maxVx, i, DstScalar(0)); + } + return true; +} + +template= 3) +inline bool assignAABBFromAccumulator(NBL_REF_ARG(AABB) dst, NBL_CONST_REF_ARG(AABBAccumulator3) aabb) +{ + if (aabb.empty()) + return false; + + return assignAABB(dst, aabb.value); +} + +} +} +} +} + +#endif diff --git a/include/nbl/builtin/hlsl/shapes/aabb.hlsl b/include/nbl/builtin/hlsl/shapes/aabb.hlsl index 07219c6687..ec916f2734 100644 --- a/include/nbl/builtin/hlsl/shapes/aabb.hlsl +++ b/include/nbl/builtin/hlsl/shapes/aabb.hlsl @@ -66,28 +66,28 @@ namespace util namespace impl { template -struct intersect_helper> +struct intersect_helper > { using type = AABB; static inline type __call(NBL_CONST_REF_ARG(type) lhs, NBL_CONST_REF_ARG(type) rhs) { type retval; - retval.minVx = hlsl::max(lhs.minVx,rhs.minVx); - retval.maxVx = hlsl::min(lhs.maxVx,rhs.maxVx); + retval.minVx = hlsl::max(lhs.minVx,rhs.minVx); + retval.maxVx = hlsl::min(lhs.maxVx,rhs.maxVx); return retval; } }; template -struct union_helper> +struct union_helper > { using type = AABB; static inline type __call(NBL_CONST_REF_ARG(type) lhs, NBL_CONST_REF_ARG(type) rhs) { type retval; - retval.minVx = hlsl::min(lhs.minVx,rhs.minVx); - retval.maxVx = hlsl::max(lhs.maxVx,rhs.maxVx); + retval.minVx = hlsl::min(lhs.minVx,rhs.minVx); + retval.maxVx = hlsl::max(lhs.maxVx,rhs.maxVx); return retval; } }; diff --git a/include/nbl/config/BuildConfigOptions.h.in b/include/nbl/config/BuildConfigOptions.h.in index d130ff4ce2..7bd4e950f3 100644 --- a/include/nbl/config/BuildConfigOptions.h.in +++ b/include/nbl/config/BuildConfigOptions.h.in @@ -35,6 +35,7 @@ #cmakedefine _NBL_COMPILE_WITH_GLTF_LOADER_ // writers +#cmakedefine _NBL_COMPILE_WITH_OBJ_WRITER_ #cmakedefine _NBL_COMPILE_WITH_STL_WRITER_ #cmakedefine _NBL_COMPILE_WITH_PLY_WRITER_ #cmakedefine _NBL_COMPILE_WITH_BAW_WRITER_ @@ -95,4 +96,4 @@ #define NBL_API2 #endif -#endif // __NBL_BUILD_CONFIG_OPTIONS_H_INCLUDED__ \ No newline at end of file +#endif // __NBL_BUILD_CONFIG_OPTIONS_H_INCLUDED__ diff --git a/include/nbl/core/hash/blake.h b/include/nbl/core/hash/blake.h index fb91c9969f..61d1c02d9a 100644 --- a/include/nbl/core/hash/blake.h +++ b/include/nbl/core/hash/blake.h @@ -1,4 +1,4 @@ -// Copyright (C) 2024-2024 - DevSH Graphics Programming Sp. z O.O. +// Copyright (C) 2018-2025 - DevSH Graphics Programming Sp. z O.O. // This file is part of the "Nabla Engine". // For conditions of distribution and use, see copyright notice in nabla.h #ifndef _NBL_CORE_HASH_BLAKE3_H_INCLUDED_ @@ -6,20 +6,25 @@ #include "nbl/config/BuildConfigOptions.h" -#include "blake3.h" +#include +#include #include +#include +#include namespace nbl::core { struct blake3_hash_t final { + static inline constexpr size_t DigestSize = 32ull; + inline bool operator==(const blake3_hash_t&) const = default; // could initialize this to a hash of a zero-length array, // but that requires a .cpp file and a static - uint8_t data[BLAKE3_OUT_LEN]; + uint8_t data[DigestSize]; }; class NBL_API2 blake3_hasher final @@ -37,7 +42,12 @@ class NBL_API2 blake3_hasher final } }; - ::blake3_hasher m_state; + static inline constexpr size_t OpaqueStateSize = 1920ull; + static inline constexpr size_t OpaqueStateAlign = 16ull; + + static void validateOpaqueStateLayout(); + + alignas(OpaqueStateAlign) unsigned char m_state[OpaqueStateSize]; public: blake3_hasher(); @@ -55,7 +65,10 @@ class NBL_API2 blake3_hasher final explicit operator blake3_hash_t() const; }; -// Useful specializations +NBL_API2 blake3_hash_t blake3_hash_buffer(const void* data, size_t bytes); +NBL_API2 blake3_hash_t blake3_hash_buffer_sequential(const void* data, size_t bytes); + +// Convenience specializations for common wrapper inputs. template struct blake3_hasher::update_impl { @@ -113,11 +126,11 @@ struct hash { auto* as_p_uint64_t = reinterpret_cast(blake3.data); size_t retval = as_p_uint64_t[0]; - for (auto i=1; i> 2); return retval; } }; } -#endif // _NBL_CORE_HASH_BLAKE3_H_INCLUDED_ \ No newline at end of file +#endif // _NBL_CORE_HASH_BLAKE3_H_INCLUDED_ diff --git a/include/nbl/logging_macros.h b/include/nbl/logging_macros.h index cf4f63f9bc..97cbdcc0c2 100644 --- a/include/nbl/logging_macros.h +++ b/include/nbl/logging_macros.h @@ -1,8 +1,8 @@ #if defined(NBL_LOG) || defined(NBL_LOG_ERROR) #error redefinition of NBL_LOG/NBL_LOG_ERROR. did you forgot to undefine logging macros somewhere? #include "nbl/undefine_logging_macros.h" -#elif !defined(_GIT_INFO_H_INCLUDED_) - #error logging macros require git meta info, include "git_info.h" +#elif !defined(_NBL_GIT_INFO_H_INCLUDED_) + #error logging macros require git meta info, include "nbl/git/info.h" #else - #define NBL_LOG(SEVERITY, FORMAT, ...) NBL_LOG_FUNCTION(FORMAT" [%s][%s - %s:%d]", SEVERITY __VA_OPT__(,) __VA_ARGS__, nbl::gtml::nabla_git_info.commitShortHash, __FUNCTION__, __FILE__, __LINE__); + #define NBL_LOG(SEVERITY, FORMAT, ...) NBL_LOG_FUNCTION(FORMAT" [%s][%s - %s:%d]", SEVERITY __VA_OPT__(,) __VA_ARGS__, nbl::gtml::nabla_git_info.commitShortHash().data(), __FUNCTION__, __FILE__, __LINE__); #define NBL_LOG_ERROR(FORMAT, ...) NBL_LOG(nbl::system::ILogger::ELL_ERROR, FORMAT __VA_OPT__(,) __VA_ARGS__) -#endif \ No newline at end of file +#endif diff --git a/include/nbl/system/CGrowableMemoryFile.h b/include/nbl/system/CGrowableMemoryFile.h new file mode 100644 index 0000000000..aea3e60a15 --- /dev/null +++ b/include/nbl/system/CGrowableMemoryFile.h @@ -0,0 +1,325 @@ +// Copyright (C) 2018-2025 - DevSH Graphics Programming Sp. z O.O. +// This file is part of the "Nabla Engine". +// For conditions of distribution and use, see copyright notice in nabla.h +#ifndef _NBL_SYSTEM_C_GROWABLE_MEMORY_FILE_H_INCLUDED_ +#define _NBL_SYSTEM_C_GROWABLE_MEMORY_FILE_H_INCLUDED_ + +#include "nbl/system/IFile.h" + +#include +#include +#include +#include +#include +#include +#include +#include + +namespace nbl::system +{ + +namespace impl +{ + +struct CImmediateFileIoResultSetter final : ISystem::IFutureManipulator +{ + using ISystem::IFutureManipulator::set_result; +}; + +struct CNoopMutex +{ + inline void lock() {} + inline void unlock() {} +}; + +class CGrowableMemoryFileStorage +{ + public: + constexpr static inline size_t InitialGrowthBytes = 1ull << 20; // 1 MiB + + inline size_t size() const + { + return m_storage.size(); + } + + inline size_t capacity() const + { + return m_storage.capacity(); + } + + inline void reserve(const size_t reservedSize) + { + m_storage.reserve(reservedSize); + } + + inline void clear() + { + m_storage.clear(); + } + + inline const std::byte* data() const + { + return m_storage.empty() ? nullptr : m_storage.data(); + } + + inline std::byte* data() + { + return m_storage.empty() ? nullptr : m_storage.data(); + } + + inline std::vector copyData() const + { + return m_storage; + } + + inline size_t read(void* const buffer, const size_t offset, const size_t sizeToRead) const + { + if (offset >= m_storage.size()) + return 0ull; + + const size_t clampedRead = std::min(sizeToRead, m_storage.size() - offset); + std::memcpy(buffer, m_storage.data() + offset, clampedRead); + return clampedRead; + } + + inline size_t write(const void* const buffer, const size_t offset, const size_t sizeToWrite) + { + const size_t requiredSize = offset + sizeToWrite; + if (requiredSize > m_storage.capacity()) + reserve(growCapacity(requiredSize)); + if (requiredSize > m_storage.size()) + m_storage.resize(requiredSize); + std::memcpy(m_storage.data() + offset, buffer, sizeToWrite); + return sizeToWrite; + } + + private: + inline size_t growCapacity(const size_t requiredSize) const + { + size_t currentCapacity = m_storage.capacity(); + if (currentCapacity == 0ull) + currentCapacity = InitialGrowthBytes; + + size_t nextCapacity = currentCapacity; + while (nextCapacity < requiredSize) + { + const size_t growth = std::max(nextCapacity, InitialGrowthBytes); + if (nextCapacity > std::numeric_limits::max() - growth) + return requiredSize; + nextCapacity += growth; + } + return nextCapacity; + } + + std::vector m_storage; +}; + +template +class IGrowableMemoryFile : public IFile +{ + protected: + using mutex_t = MutexType; + + inline explicit IGrowableMemoryFile(path&& filename, const size_t reservedSize = 0ull, const time_point_t initialModified = std::chrono::utc_clock::now()) + : IFile(std::move(filename), core::bitflag(E_CREATE_FLAGS::ECF_READ_WRITE), initialModified) + { + reserve(reservedSize); + } + + template + inline decltype(auto) withLockedStorage(Fn&& fn) + { + std::lock_guard lock(m_mutex); + return std::forward(fn)(m_storage); + } + + template + inline decltype(auto) withLockedStorage(Fn&& fn) const + { + std::lock_guard lock(m_mutex); + return std::forward(fn)(m_storage); + } + + public: + inline size_t getSize() const override + { + return withLockedStorage([](const CGrowableMemoryFileStorage& storage) { + return storage.size(); + }); + } + + inline size_t capacity() const + { + return withLockedStorage([](const CGrowableMemoryFileStorage& storage) { + return storage.capacity(); + }); + } + + //! Optional capacity hint for callers that can estimate the final serialized size. + /** The internal storage already uses an adaptive growth policy, so this is only a performance hint. */ + inline void reserve(const size_t reservedSize) + { + withLockedStorage([reservedSize](CGrowableMemoryFileStorage& storage) { + storage.reserve(reservedSize); + }); + } + + inline void clear() + { + withLockedStorage([](CGrowableMemoryFileStorage& storage) { + storage.clear(); + }); + setLastWriteTime(); + } + + inline std::vector copyData() const + { + return withLockedStorage([](const CGrowableMemoryFileStorage& storage) { + return storage.copyData(); + }); + } + + protected: + inline void* getMappedPointer_impl() override + { + return nullptr; + } + + inline const void* getMappedPointer_impl() const override + { + return nullptr; + } + + inline void unmappedRead(ISystem::future_t& fut, void* buffer, size_t offset, size_t sizeToRead) override + { + static const CImmediateFileIoResultSetter resultSetter = {}; + const size_t processed = withLockedStorage([buffer, offset, sizeToRead](const CGrowableMemoryFileStorage& storage) { + return storage.read(buffer, offset, sizeToRead); + }); + resultSetter.set_result(fut, processed); + } + + inline void unmappedWrite(ISystem::future_t& fut, const void* buffer, size_t offset, size_t sizeToWrite) override + { + static const CImmediateFileIoResultSetter resultSetter = {}; + const size_t processed = withLockedStorage([buffer, offset, sizeToWrite](CGrowableMemoryFileStorage& storage) { + return storage.write(buffer, offset, sizeToWrite); + }); + resultSetter.set_result(fut, processed); + } + + private: + mutable mutex_t m_mutex; + CGrowableMemoryFileStorage m_storage; +}; + +} + +//! A lightweight growable in-memory implementation of `system::IFile`. +/** + This class stores file contents in a dynamically growing byte buffer while preserving the regular + Nabla file-oriented API. It is useful in flows that want `IFile*` interoperability without + forcing an obligatory round-trip through the host filesystem. + + Representative use-cases include: + - serialization roundtrip validation + - benchmark or profiling harnesses that want to separate codec work from storage latency + - tool pipelines that need a temporary serialized representation but do not require a persistent file + + The object grows on demand during writes and can later be consumed by APIs that read from + `system::IFile*`, for example `IAssetManager::getAsset(system::IFile*, supposedFilename, ...)`. + + Allocation policy: + - storage growth is handled internally + - capacity expansion is geometric rather than exact-size-only + - the first growth step uses a minimum allocation quantum of `1 MiB` + - callers may still provide an explicit `reserve(...)` hint if they already know the likely output size + + This keeps the common case simple for callers while reducing the amount of repeated reallocation + and copying that would otherwise happen during long sequential write streams. + + Important notes: + - reads and writes are positional and operate on the current logical size + - `getMappedPointer()` intentionally returns `nullptr` + The storage is growable, so exposing a stable mapped pointer would be misleading + - this class is not thread-safe + Concurrent read, write, reserve, clear, or direct `data()` access on the same object requires external synchronization +*/ +class CGrowableMemoryFile final : public impl::IGrowableMemoryFile +{ + using base_t = impl::IGrowableMemoryFile; + + public: + using base_t::capacity; + using base_t::clear; + using base_t::copyData; + using base_t::reserve; + + inline explicit CGrowableMemoryFile(path&& filename, const size_t reservedSize = 0ull, const time_point_t initialModified = std::chrono::utc_clock::now()) + : base_t(std::move(filename), reservedSize, initialModified) + { + } + + inline const std::byte* data() const + { + return withLockedStorage([](const impl::CGrowableMemoryFileStorage& storage) { + return storage.data(); + }); + } + + inline std::byte* data() + { + return withLockedStorage([](impl::CGrowableMemoryFileStorage& storage) { + return storage.data(); + }); + } +}; + +//! A synchronized growable in-memory implementation of `system::IFile`. +/** + This variant serializes internal operations with a mutex. It is intended for cases where the same + memory-backed file object may be touched from multiple threads and external synchronization is not + desirable or not available. + + The synchronized variant intentionally does not expose raw `data()` accessors. A raw pointer would + not carry any lifetime relationship to the internal lock and would therefore invite accidental use + after another thread mutates or reallocates the storage. Callers that need to inspect the contents + can either: + - take a snapshot with `copyData()` + - use `withLockedData(...)` and keep any pointer or span-like view strictly inside the callback +*/ +class CSynchronizedGrowableMemoryFile final : public impl::IGrowableMemoryFile +{ + using base_t = impl::IGrowableMemoryFile; + + public: + using base_t::capacity; + using base_t::clear; + using base_t::copyData; + using base_t::reserve; + + inline explicit CSynchronizedGrowableMemoryFile(path&& filename, const size_t reservedSize = 0ull, const time_point_t initialModified = std::chrono::utc_clock::now()) + : base_t(std::move(filename), reservedSize, initialModified) + { + } + + template + inline decltype(auto) withLockedData(Fn&& fn) + { + return withLockedStorage([&fn](impl::CGrowableMemoryFileStorage& storage) -> decltype(auto) { + return std::forward(fn)(storage.data(), storage.size()); + }); + } + + template + inline decltype(auto) withLockedData(Fn&& fn) const + { + return withLockedStorage([&fn](const impl::CGrowableMemoryFileStorage& storage) -> decltype(auto) { + return std::forward(fn)(storage.data(), storage.size()); + }); + } +}; + +} + +#endif diff --git a/include/nbl/system/CSystemWin32.h b/include/nbl/system/CSystemWin32.h index 7c73525c43..97ab6ce709 100644 --- a/include/nbl/system/CSystemWin32.h +++ b/include/nbl/system/CSystemWin32.h @@ -22,7 +22,7 @@ class NBL_API2 CSystemWin32 : public ISystem public: CCaller(ISystem* _system) : ICaller(_system) {} - core::smart_refctd_ptr createFile(const std::filesystem::path& filename, const core::bitflag flags) override final; + core::smart_refctd_ptr createFile(const std::filesystem::path& filename, core::bitflag flags) override final; }; public: diff --git a/include/nbl/system/IFile.h b/include/nbl/system/IFile.h index 0ab739ba4a..f2c615c311 100644 --- a/include/nbl/system/IFile.h +++ b/include/nbl/system/IFile.h @@ -81,11 +81,16 @@ class IFile : public IFileBase, private ISystem::IFutureManipulator }; void read(success_t& fut, void* buffer, size_t offset, size_t sizeToRead) { + // The higher-level IO helpers may queue multiple chunked operations before waiting on the futures. + // Backends therefore need to treat `offset` as the request-local byte position rather than relying on + // a mutable shared file pointer hidden inside the OS file handle. read(fut.m_internalFuture,buffer,offset,sizeToRead); fut.sizeToProcess = sizeToRead; } void write(success_t& fut, const void* buffer, size_t offset, size_t sizeToWrite) { + // Same requirement as `read(...)`: writes are logically positional requests and must honor the explicit + // byte offset even when multiple operations are submitted before the caller drains their futures. write(fut.m_internalFuture,buffer,offset,sizeToWrite); fut.sizeToProcess = sizeToWrite; } diff --git a/include/nbl/system/ISystem.h b/include/nbl/system/ISystem.h index 65f0351582..9ee5f0bb83 100644 --- a/include/nbl/system/ISystem.h +++ b/include/nbl/system/ISystem.h @@ -105,7 +105,9 @@ class NBL_API2 ISystem : public core::IReferenceCounted void createFile( future_t>& future, // creation may happen on a dedicated thread, so its async path filename, // absolute path within our virtual filesystem - const core::bitflag flags, // access flags (IMPORTANT: files from most archives wont open with ECF_WRITE bit) + const core::bitflag flags, // intended access flags (IMPORTANT: files from most archives wont open with ECF_WRITE bit) + // actual file flags may be downgraded when backend/archive cannot honor all requested flags + // for example a backend may open the file successfully but strip mapping/coherency when it cannot provide them const std::string_view& accessToken="" // usually password for archives, but should be SSH key for URL downloads ); @@ -148,6 +150,7 @@ class NBL_API2 ISystem : public core::IReferenceCounted struct SystemInfo { uint64_t cpuFrequencyHz = 0u; + uint32_t physicalCoreCount = 0u; // in bytes uint64_t totalMemory = 0u; @@ -156,6 +159,7 @@ class NBL_API2 ISystem : public core::IReferenceCounted uint32_t desktopResX = 0u; uint32_t desktopResY = 0u; + std::string cpuName = "Unknown"; std::string OSFullName = "Unknown"; }; virtual SystemInfo getSystemInfo() const = 0; @@ -168,6 +172,7 @@ class NBL_API2 ISystem : public core::IReferenceCounted { public: // each per-platform backend must override this function + // returned files may expose fewer flags than requested if the backend had to fall back virtual core::smart_refctd_ptr createFile(const std::filesystem::path& filename, const core::bitflag flags) = 0; // these contain some hoisted common sense checks @@ -219,7 +224,7 @@ class NBL_API2 ISystem : public core::IReferenceCounted using retval_t = core::smart_refctd_ptr; void operator()(core::StorageTrivializer* retval, ICaller* _caller); - char filename[MAX_FILENAME_LENGTH] {}; + std::filesystem::path filename; IFileBase::E_CREATE_FLAGS flags; }; struct SRequestParams_READ diff --git a/include/nbl/video/IGPUCommandBuffer.h b/include/nbl/video/IGPUCommandBuffer.h index 8f0f1fce30..b863de3030 100644 --- a/include/nbl/video/IGPUCommandBuffer.h +++ b/include/nbl/video/IGPUCommandBuffer.h @@ -10,7 +10,7 @@ #include "nbl/video/IGPUCommandPool.h" #include "nbl/video/IQueue.h" -#include "git_info.h" +#include "nbl/git/info.h" #define NBL_LOG_FUNCTION m_logger.log #include "nbl/logging_macros.h" diff --git a/include/nbl/video/ILogicalDevice.h b/include/nbl/video/ILogicalDevice.h index ae351fdecd..983f6c6b5a 100644 --- a/include/nbl/video/ILogicalDevice.h +++ b/include/nbl/video/ILogicalDevice.h @@ -15,7 +15,7 @@ #include "nbl/video/CThreadSafeQueueAdapter.h" #include "nbl/video/CJITIncludeLoader.h" -#include "git_info.h" +#include "nbl/git/info.h" #define NBL_LOG_FUNCTION m_logger.log #include "nbl/logging_macros.h" @@ -1606,4 +1606,4 @@ inline bool ILogicalDevice::validateMemoryBarrier(const uint32_t queueFamilyInde } // namespace nbl::video #include "nbl/undef_logging_macros.h" -#endif //_NBL_VIDEO_I_LOGICAL_DEVICE_H_INCLUDED_ \ No newline at end of file +#endif //_NBL_VIDEO_I_LOGICAL_DEVICE_H_INCLUDED_ diff --git a/src/nbl/CMakeLists.txt b/src/nbl/CMakeLists.txt index 18a25c8619..f0aabc1c95 100644 --- a/src/nbl/CMakeLists.txt +++ b/src/nbl/CMakeLists.txt @@ -1,5 +1,5 @@ -# Copyright (C) 2018-2020 - DevSH Graphics Programming Sp. z O.O. -# Copyright (C) 2018-2020 - DevSH Graphics Programming Sp. z O.O. +# Copyright (C) 2018-2025 - DevSH Graphics Programming Sp. z O.O. +# Copyright (C) 2018-2025 - DevSH Graphics Programming Sp. z O.O. # This file is part of the "Nabla Engine". # For conditions of distribution and use, see copyright notice in nabla.h @@ -50,12 +50,12 @@ include(common) #[[ Loaders and writers compile options available to edit by user All revelant _NBL_COMPILE_WITH will be there]] option(_NBL_COMPILE_WITH_MTL_LOADER_ "Compile with MTL Loader" OFF) #default off until Material Compiler 2 -option(_NBL_COMPILE_WITH_OBJ_LOADER_ "Compile with OBJ Loader" OFF) #default off until Material Compiler 2 -#option(_NBL_COMPILE_WITH_OBJ_WRITER_ "Compile with OBJ Writer" ON) uncomment when writer exists -option(_NBL_COMPILE_WITH_STL_LOADER_ "Compile with STL Loader" OFF) #default off until reimplemented -option(_NBL_COMPILE_WITH_STL_WRITER_ "Compile with STL Writer" OFF) #default off until reimplemented +option(_NBL_COMPILE_WITH_OBJ_LOADER_ "Compile with OBJ Loader" ON) +option(_NBL_COMPILE_WITH_OBJ_WRITER_ "Compile with OBJ Writer" ON) +option(_NBL_COMPILE_WITH_STL_LOADER_ "Compile with STL Loader" ON) +option(_NBL_COMPILE_WITH_STL_WRITER_ "Compile with STL Writer" ON) option(_NBL_COMPILE_WITH_PLY_LOADER_ "Compile with PLY Loader" ON) -option(_NBL_COMPILE_WITH_PLY_WRITER_ "Compile with PLY Writer" OFF) #default off until reimplemented +option(_NBL_COMPILE_WITH_PLY_WRITER_ "Compile with PLY Writer" ON) option(_NBL_COMPILE_WITH_JPG_LOADER_ "Compile with JPG Loader" ON) option(_NBL_COMPILE_WITH_JPG_WRITER_ "Compile with JPG Writer" ON) option(_NBL_COMPILE_WITH_PNG_LOADER_ "Compile with PNG Loader" ON) @@ -165,6 +165,7 @@ set(NBL_ASSET_SOURCES asset/ICPUImage.cpp asset/ICPUPolygonGeometry.cpp asset/interchange/IAssetWriter.cpp + asset/interchange/IGeometryWriter.cpp asset/interchange/IAssetLoader.cpp # Shaders @@ -199,6 +200,7 @@ set(NBL_ASSET_SOURCES asset/interchange/CGLTFLoader.cpp # Mesh writers + asset/interchange/COBJMeshWriter.cpp asset/interchange/CPLYMeshWriter.cpp asset/interchange/CSTLMeshWriter.cpp asset/interchange/CGLTFWriter.cpp @@ -504,7 +506,6 @@ endif() # blake3 add_dependencies(Nabla blake3) -list(APPEND PUBLIC_BUILD_INCLUDE_DIRS $) if(NBL_STATIC_BUILD) target_link_libraries(Nabla INTERFACE blake3) else() @@ -554,6 +555,8 @@ else() endif() list(APPEND PUBLIC_BUILD_INCLUDE_DIRS ${THIRD_PARTY_SOURCE_DIR}/simdjson) +list(APPEND PUBLIC_BUILD_INCLUDE_DIRS ${THIRD_PARTY_SOURCE_DIR}/fast_float/include) + # libjpeg add_dependencies(Nabla jpeg-static) if(NBL_STATIC_BUILD) diff --git a/src/nbl/asset/IAssetManager.cpp b/src/nbl/asset/IAssetManager.cpp index 29930bccd9..4d5e762aa3 100644 --- a/src/nbl/asset/IAssetManager.cpp +++ b/src/nbl/asset/IAssetManager.cpp @@ -1,4 +1,4 @@ -// Copyright (C) 2018-2020 - DevSH Graphics Programming Sp. z O.O. +// Copyright (C) 2018-2025 - DevSH Graphics Programming Sp. z O.O. // This file is part of the "Nabla Engine". // For conditions of distribution and use, see copyright notice in nabla.h @@ -8,6 +8,7 @@ #include "nbl/asset/interchange/CHLSLLoader.h" #include "nbl/asset/interchange/CSPVLoader.h" +#include #include #include @@ -55,6 +56,10 @@ #include "nbl/asset/interchange/CSTLMeshWriter.h" #endif +#ifdef _NBL_COMPILE_WITH_OBJ_WRITER_ +#include "nbl/asset/interchange/COBJMeshWriter.h" +#endif + #ifdef _NBL_COMPILE_WITH_PLY_WRITER_ #include "nbl/asset/interchange/CPLYMeshWriter.h" #endif @@ -160,6 +165,9 @@ void IAssetManager::addLoadersAndWriters() #ifdef _NBL_COMPILE_WITH_GLTF_WRITER_ addAssetWriter(core::make_smart_refctd_ptr()); #endif +#ifdef _NBL_COMPILE_WITH_OBJ_WRITER_ + addAssetWriter(core::make_smart_refctd_ptr()); +#endif #ifdef _NBL_COMPILE_WITH_PLY_WRITER_ addAssetWriter(core::make_smart_refctd_ptr()); #endif @@ -195,22 +203,24 @@ SAssetBundle IAssetManager::getAssetInHierarchy_impl(system::IFile* _file, const IAssetLoader::SAssetLoadContext ctx{params,_file}; std::filesystem::path filename = _file ? _file->getFileName() : std::filesystem::path(_supposedFilename); - auto file = _override->getLoadFile(_file, filename.string(), ctx, _hierarchyLevel); + auto filenameString = filename.string(); + auto file = _override->getLoadFile(_file, filenameString, ctx, _hierarchyLevel); filename = file.get() ? file->getFileName() : std::filesystem::path(_supposedFilename); + filenameString = filename.string(); // TODO: should we remove? (is a root absolute path working dir ever needed) if (params.workingDirectory.empty()) params.workingDirectory = filename.parent_path(); - const uint64_t levelFlags = params.cacheFlags >> ((uint64_t)_hierarchyLevel * 2ull); + const auto levelFlags = IAssetLoader::caching_flags_t(static_cast(params.cacheFlags.value) >> ((uint64_t)_hierarchyLevel * 2ull)); SAssetBundle bundle; - if ((levelFlags & IAssetLoader::ECF_DUPLICATE_TOP_LEVEL) != IAssetLoader::ECF_DUPLICATE_TOP_LEVEL) + if (!levelFlags.hasFlags(IAssetLoader::ECF_DUPLICATE_TOP_LEVEL)) { - auto found = findAssets(filename.string()); + auto found = findAssets(filenameString); if (found->size()) return _override->chooseRelevantFromFound(found->begin(), found->end(), ctx, _hierarchyLevel); - else if (!(bundle = _override->handleSearchFail(filename.string(), ctx, _hierarchyLevel)).getContents().empty()) + else if (!(bundle = _override->handleSearchFail(filenameString, ctx, _hierarchyLevel)).getContents().empty()) return bundle; } @@ -220,30 +230,36 @@ SAssetBundle IAssetManager::getAssetInHierarchy_impl(system::IFile* _file, const auto ext = system::extension_wo_dot(filename); auto capableLoadersRng = m_loaders.perFileExt.findRange(ext); - // loaders associated with the file's extension tryout + core::vector extensionLoaders; + extensionLoaders.reserve(8u); for (auto& loader : capableLoadersRng) { - if (loader.second->isALoadableFileFormat(file.get()) && !(bundle = loader.second->loadAsset(file.get(), params, _override, _hierarchyLevel)).getContents().empty()) + auto* extensionLoader = loader.second; + extensionLoaders.push_back(extensionLoader); + if (extensionLoader->isALoadableFileFormat(file.get()) && !(bundle = extensionLoader->loadAsset(file.get(), params, _override, _hierarchyLevel)).getContents().empty()) break; } - for (auto loaderItr = std::begin(m_loaders.vector); bundle.getContents().empty() && loaderItr != std::end(m_loaders.vector); ++loaderItr) // all loaders tryout + for (auto loaderItr = std::begin(m_loaders.vector); bundle.getContents().empty() && loaderItr != std::end(m_loaders.vector); ++loaderItr) { - if ((*loaderItr)->isALoadableFileFormat(file.get()) && !(bundle = (*loaderItr)->loadAsset(file.get(), params, _override, _hierarchyLevel)).getContents().empty()) + auto* loader = loaderItr->get(); + if (std::find(extensionLoaders.begin(), extensionLoaders.end(), loader) != extensionLoaders.end()) + continue; + if (loader->isALoadableFileFormat(file.get()) && !(bundle = loader->loadAsset(file.get(), params, _override, _hierarchyLevel)).getContents().empty()) break; } if (!bundle.getContents().empty() && - ((levelFlags & IAssetLoader::ECF_DONT_CACHE_TOP_LEVEL) != IAssetLoader::ECF_DONT_CACHE_TOP_LEVEL) && - ((levelFlags & IAssetLoader::ECF_DUPLICATE_TOP_LEVEL) != IAssetLoader::ECF_DUPLICATE_TOP_LEVEL)) + !levelFlags.hasFlags(IAssetLoader::ECF_DONT_CACHE_TOP_LEVEL) && + !levelFlags.hasFlags(IAssetLoader::ECF_DUPLICATE_TOP_LEVEL)) { - _override->insertAssetIntoCache(bundle, filename.string(), ctx.params, _hierarchyLevel); + _override->insertAssetIntoCache(bundle, filenameString, ctx.params, _hierarchyLevel); } else if (bundle.getContents().empty()) { bool addToCache; - bundle = _override->handleLoadFail(addToCache, file.get(), filename.string(), filename.string(), ctx, _hierarchyLevel); + bundle = _override->handleLoadFail(addToCache, file.get(), filenameString, filenameString, ctx, _hierarchyLevel); if (!bundle.getContents().empty() && addToCache) - _override->insertAssetIntoCache(bundle, filename.string(), ctx.params, _hierarchyLevel); + _override->insertAssetIntoCache(bundle, filenameString, ctx.params, _hierarchyLevel); } return bundle; } diff --git a/src/nbl/asset/ICPUImage.cpp b/src/nbl/asset/ICPUImage.cpp index cd3f884890..1e06f4ccf7 100644 --- a/src/nbl/asset/ICPUImage.cpp +++ b/src/nbl/asset/ICPUImage.cpp @@ -1,4 +1,5 @@ #include +#include #include "nbl/asset/ICPUImage.h" #include "nbl/asset/filters/CMatchedSizeInOutImageFilterCommon.h" #include "nbl/asset/filters/CFlattenRegionsImageFilter.h" @@ -37,7 +38,7 @@ class CFlattenRegionsStreamHashImageFilter : public CMatchedSizeInOutImageFilter const auto product = parameters.mipLevels * parameters.arrayLayers; size_t bufferSize = product * sizeof(CState::outHash); - bufferSize += product * sizeof(blake3_hasher); + bufferSize += product * sizeof(core::blake3_hasher); bufferSize += getFlattenBufferSize(input); return bufferSize; @@ -136,9 +137,11 @@ class CFlattenRegionsStreamHashImageFilter : public CMatchedSizeInOutImageFilter const auto product = parameters.mipLevels * parameters.arrayLayers; scratch.hashes = { static_cast(state->scratch.memory), product }; - scratch.hashers = { reinterpret_cast(scratch.hashes.data() + scratch.hashes.size()), product }; + scratch.hashers = { reinterpret_cast(scratch.hashes.data() + scratch.hashes.size()), product }; scratch.flatten = { .offset = scratch.hashes.size_bytes() + scratch.hashers.size_bytes(), .size = state->scratch.size - scratch.hashers.size_bytes() - scratch.hashes.size_bytes(), .buffer = buffer}; } + for (auto& hasher : scratch.hashers) + std::construct_at(&hasher); const auto isFullyFlatten = scratch.flatten.size == 0ull; @@ -225,7 +228,7 @@ class CFlattenRegionsStreamHashImageFilter : public CMatchedSizeInOutImageFilter auto* const hasher = hashers + pOffset; auto* const hash = hashes + pOffset; - blake3_hasher_init(hasher); + hasher->reset(); IImage::SSubresourceLayers subresource = { .aspectMask = static_cast(0u), .mipLevel = miplevel, .baseArrayLayer = layer, .layerCount = 1u }; // stick to given mip level and single layer CMatchedSizeInOutImageFilterCommon::state_type::TexelRange range = { .offset = {}, .extent = { parameters.extent.width, parameters.extent.height, parameters.extent.depth } }; // cover all texels within layer range, take 0th mip level size to not clip anything at all @@ -233,7 +236,7 @@ class CFlattenRegionsStreamHashImageFilter : public CMatchedSizeInOutImageFilter auto executePerTexelOrBlock = [&](uint32_t readBlockArrayOffset, core::vectorSIMDu32 readBlockPos) -> void { - blake3_hasher_update(hasher, inData + readBlockArrayOffset, texelOrBlockByteSize); + hasher->update(inData + readBlockArrayOffset, texelOrBlockByteSize); }; const auto regions = image->getRegions(miplevel); @@ -242,7 +245,7 @@ class CFlattenRegionsStreamHashImageFilter : public CMatchedSizeInOutImageFilter if (!performNullHash) CBasicImageFilterCommon::executePerRegion(std::execution::seq, image, executePerTexelOrBlock, regions, clipFunctor); // fire the hasher for a layer, note we forcing seq policy because texels/blocks cannot be handled with par policies when we hash them - blake3_hasher_finalize(hasher, reinterpret_cast(hash), sizeof(CState::hash_t)); // finalize hash for layer + put it to heap for given mip level + *hash = static_cast(*hasher); // finalize hash for layer + put it to heap for given mip level }; std::for_each(policy, layers.begin(), layers.end(), executePerLayer); // fire per layer for given given mip level with specified execution policy, yes you can use parallel policy here if you want at it will work @@ -255,8 +258,8 @@ class CFlattenRegionsStreamHashImageFilter : public CMatchedSizeInOutImageFilter time to use them and compute final hash */ - blake3_hasher hasher; - blake3_hasher_init(&hasher); + core::blake3_hasher hasher; + hasher.reset(); { for (auto miplevel = 0u; miplevel < parameters.mipLevels; ++miplevel) { @@ -265,11 +268,11 @@ class CFlattenRegionsStreamHashImageFilter : public CMatchedSizeInOutImageFilter for (auto layer = 0u; layer < parameters.arrayLayers; ++layer) { auto* hash = hashes + mipOffset + layer; - blake3_hasher_update(&hasher, hash->data, sizeof(CState::hash_t)); + hasher.update(hash->data, sizeof(CState::hash_t)); } } - blake3_hasher_finalize(&hasher, reinterpret_cast(&state->outHash), sizeof(CState::hash_t)); // finalize output hash for whole image given all hashes + state->outHash = static_cast(hasher); // finalize output hash for whole image given all hashes } return true; @@ -284,7 +287,7 @@ class CFlattenRegionsStreamHashImageFilter : public CMatchedSizeInOutImageFilter struct ScratchMap { std::span hashes; // hashes, single hash is obtained from given miplevel & layer, full hash for an image is a hash of this hash buffer - std::span hashers; // hashers, used to produce a hash + std::span hashers; // hashers, used to produce a hash asset::SBufferRange flatten; // tightly packed texels from input, no memory gaps }; }; @@ -307,4 +310,4 @@ core::blake3_hash_t ICPUImage::computeContentHash() const assert(passed); // actually this should never fail, leaving in case return state.outHash; -} \ No newline at end of file +} diff --git a/src/nbl/asset/interchange/CGLIWriter.h b/src/nbl/asset/interchange/CGLIWriter.h index db88583054..fccde37735 100644 --- a/src/nbl/asset/interchange/CGLIWriter.h +++ b/src/nbl/asset/interchange/CGLIWriter.h @@ -35,9 +35,9 @@ class CGLIWriter final : public asset::IAssetWriter uint64_t getSupportedAssetTypesBitfield() const override { return asset::IAsset::ET_IMAGE_VIEW; } - uint32_t getSupportedFlags() override { return asset::EWF_NONE | asset::EWF_BINARY; } + writer_flags_t getSupportedFlags() override { return asset::EWF_BINARY; } - uint32_t getForcedFlags() override { return asset::EWF_NONE | asset::EWF_BINARY; } + writer_flags_t getForcedFlags() override { return asset::EWF_BINARY; } bool writeAsset(system::IFile* _file, const SAssetWriteParams& _params, IAssetWriterOverride* _override = nullptr) override; diff --git a/src/nbl/asset/interchange/CGLTFLoader.cpp b/src/nbl/asset/interchange/CGLTFLoader.cpp index fde9552179..c7c4be034b 100644 --- a/src/nbl/asset/interchange/CGLTFLoader.cpp +++ b/src/nbl/asset/interchange/CGLTFLoader.cpp @@ -1,4 +1,4 @@ -// Copyright (C) 2020 AnastaZIuk +// Copyright (C) 2018-2025 - DevSH Graphics Programming Sp. z O.O. // This file is part of the "Nabla Engine". // For conditions of distribution and use, see copyright notice in Nabla.h @@ -60,7 +60,7 @@ using namespace nbl::asset; core::smart_refctd_ptr glslFile = loadBuiltinData(decltype(constexprStringType)::value); auto glsl = asset::ICPUBuffer::create({ glslFile->getSize() }); - memcpy(glsl->getPointer(),glslFile->getMappedPointer(),glsl->getSize()); + memcpy(glsl->getPointer(),static_cast(glslFile.get())->getMappedPointer(),glsl->getSize()); auto unspecializedShader = core::make_smart_refctd_ptr(std::move(glsl), stage, asset::ICPUShader::E_CONTENT_TYPE::ECT_GLSL, stage != ICPUShader::ESS_VERTEX ? "?IrrlichtBAW glTFLoader FragmentShader?" : "?IrrlichtBAW glTFLoader VertexShader?"); if (extraDefine) @@ -1274,26 +1274,26 @@ using namespace nbl::asset; auto* packedJointsData = reinterpret_cast(reinterpret_cast(vOverrideRepackedJointsBuffer->getPointer()) + vAttributeIx * repackJointsTexelByteSize); auto* packedWeightsData = reinterpret_cast(reinterpret_cast(vOverrideRepackedWeightsBuffer->getPointer()) + vAttributeIx * repackWeightsTexelByteSize); - auto quantize = [&](const core::vectorSIMDf& input, void* data, const E_FORMAT requestQuantizeFormat) + auto quantize = [&](const hlsl::float32_t4& input, void* data, const E_FORMAT requestQuantizeFormat) { - return ICPUMeshBuffer::setAttribute(input, data, requestQuantizeFormat); + return ICPUMeshBuffer::setAttribute(&input[0], data, requestQuantizeFormat); }; auto decodeQuant = [&](void* data, const E_FORMAT requestQuantizeFormat) { - core::vectorSIMDf out; - ICPUMeshBuffer::getAttribute(out, data, requestQuantizeFormat); + hlsl::float32_t4 out = {}; + ICPUMeshBuffer::getAttribute(&out[0], data, requestQuantizeFormat); return out; }; - core::vectorSIMDf packedWeightsStream; //! always go with full vectorSIMDf stream, weights being not used are leaved with default vector's compoment value and are not considered + hlsl::float32_t4 packedWeightsStream = {}; //! always go with full float4 stream, weights being not used are leaved with default vector's compoment value and are not considered for (uint16_t i = 0, vxSkinComponentOffset = 0; i < 4u; ++i) //! packing { if (unpackedWeightsData[i]) { packedJointsData[vxSkinComponentOffset] = unpackedJointsData[i]; - packedWeightsStream.pointer[i] = packedWeightsData[vxSkinComponentOffset] = unpackedWeightsData[i]; + packedWeightsStream[i] = packedWeightsData[vxSkinComponentOffset] = unpackedWeightsData[i]; ++vxSkinComponentOffset; assert(vxSkinComponentOffset <= maxJointsPerVertex); @@ -1309,14 +1309,14 @@ using namespace nbl::asset; const E_FORMAT requestQuantFormat = std::get(encode); quantize(packedWeightsStream, quantBuffer, requestQuantFormat); - core::vectorSIMDf quantsDecoded = decodeQuant(quantBuffer, requestQuantFormat); + hlsl::float32_t4 quantsDecoded = decodeQuant(quantBuffer, requestQuantFormat); for (uint16_t i = 0; i < MAX_INFLUENCE_WEIGHTS_PER_VERTEX; ++i) { - const auto& weightInput = packedWeightsStream.pointer[i]; + const auto weightInput = packedWeightsStream[i]; if (weightInput) { - const typename QuantRequest::ERROR_TYPE& errorComponent = errorBuffer[i] = core::abs(quantsDecoded.pointer[i] - weightInput); + const typename QuantRequest::ERROR_TYPE& errorComponent = errorBuffer[i] = core::abs(quantsDecoded[i] - weightInput); if (errorComponent) { @@ -1420,13 +1420,13 @@ using namespace nbl::asset; const size_t quantizedVWeightsOffset = vAttributeIx * weightComponentsByteStride; void* quantizedWeightsData = reinterpret_cast(vOverrideQuantizedWeightsBuffer->getPointer()) + quantizedVWeightsOffset; - core::vectorSIMDf packedWeightsStream; //! always go with full vectorSIMDf stream, weights being not used are leaved with default vector's compoment value and are not considered + hlsl::float32_t4 packedWeightsStream = {}; //! always go with full float4 stream, weights being not used are leaved with default vector's compoment value and are not considered auto* packedWeightsData = reinterpret_cast(reinterpret_cast(vOverrideRepackedWeightsBuffer->getPointer()) + vAttributeIx * repackWeightsTexelByteSize); for (uint16_t i = 0; i < maxJointsPerVertex; ++i) - packedWeightsStream.pointer[i] = packedWeightsData[i]; + packedWeightsStream[i] = packedWeightsData[i]; - ICPUMeshBuffer::setAttribute(packedWeightsStream, quantizedWeightsData, weightsQuantizeFormat); //! quantize + ICPUMeshBuffer::setAttribute(&packedWeightsStream[0], quantizedWeightsData, weightsQuantizeFormat); //! quantize } } diff --git a/src/nbl/asset/interchange/CGLTFWriter.h b/src/nbl/asset/interchange/CGLTFWriter.h index 6184bc0be2..7fde5eb319 100644 --- a/src/nbl/asset/interchange/CGLTFWriter.h +++ b/src/nbl/asset/interchange/CGLTFWriter.h @@ -40,9 +40,9 @@ namespace nbl uint64_t getSupportedAssetTypesBitfield() const override { return asset::IAsset::ET_MESH; } - uint32_t getSupportedFlags() override { return asset::EWF_NONE; } + writer_flags_t getSupportedFlags() override { return asset::EWF_NONE; } - uint32_t getForcedFlags() override { return asset::EWF_NONE; } + writer_flags_t getForcedFlags() override { return asset::EWF_NONE; } bool writeAsset(system::IFile* _file, const SAssetWriteParams& _params, IAssetWriterOverride* _override = nullptr) override; }; diff --git a/src/nbl/asset/interchange/CGraphicsPipelineLoaderMTL.cpp b/src/nbl/asset/interchange/CGraphicsPipelineLoaderMTL.cpp index b538f75eb3..6521fa9775 100644 --- a/src/nbl/asset/interchange/CGraphicsPipelineLoaderMTL.cpp +++ b/src/nbl/asset/interchange/CGraphicsPipelineLoaderMTL.cpp @@ -9,6 +9,8 @@ #include #include #include +#include +#include #include "nbl/system/CFileView.h" @@ -458,10 +460,15 @@ const char* CGraphicsPipelineLoaderMTL::readTexture(const char* _bufPtr, const c mapType = found->second; } } - else if (strncmp(_bufPtr,"-bm",3)==0) + else if (strncmp(_bufPtr,"-bm",3)==0) { _bufPtr = goAndCopyNextWord(tmpbuf, _bufPtr, WORD_BUFFER_LENGTH, _bufEnd); - sscanf(tmpbuf, "%f", &_currMaterial->params.bumpFactor); + const char* tokenEnd = tmpbuf; + while (*tokenEnd != '\0') + ++tokenEnd; + const auto parseResult = fast_float::from_chars(tmpbuf, tokenEnd, _currMaterial->params.bumpFactor); + if (!(parseResult.ec == std::errc() && parseResult.ptr == tokenEnd)) + _currMaterial->params.bumpFactor = 0.f; } else if (strncmp(_bufPtr,"-blendu",7)==0) @@ -763,12 +770,15 @@ auto CGraphicsPipelineLoaderMTL::readMaterials(system::IFile* _file, const syste char tmpbuf[WORD_BUFFER_LENGTH]{}; auto readFloat = [&tmpbuf, &bufPtr, bufEnd] { - float f = 0.f; - bufPtr = goAndCopyNextWord(tmpbuf, bufPtr, WORD_BUFFER_LENGTH, bufEnd); - sscanf(tmpbuf, "%f", &f); - return f; + const char* tokenEnd = tmpbuf; + while (*tokenEnd != '\0') + ++tokenEnd; + + float f = 0.f; + const auto parseResult = fast_float::from_chars(tmpbuf, tokenEnd, f); + return (parseResult.ec == std::errc() && parseResult.ptr == tokenEnd) ? f : 0.f; }; auto readRGB = [&readFloat] { core::vector3df_SIMD rgb(1.f); @@ -817,7 +827,13 @@ auto CGraphicsPipelineLoaderMTL::readMaterials(system::IFile* _file, const syste if (currMaterial) { bufPtr = goAndCopyNextWord(tmpbuf, bufPtr, WORD_BUFFER_LENGTH, bufEnd); - currMaterial->params.extra |= (atol(tmpbuf)&0x0f);//illum values are in range [0;10] + const char* tokenEnd = tmpbuf; + while (*tokenEnd != '\0') + ++tokenEnd; + uint32_t illum = 0u; + const auto parseResult = std::from_chars(tmpbuf, tokenEnd, illum, 10); + if (parseResult.ec == std::errc() && parseResult.ptr == tokenEnd) + currMaterial->params.extra |= (illum & 0x0fu);//illum values are in range [0;10] } break; case 'N': diff --git a/src/nbl/asset/interchange/CImageWriterJPG.cpp b/src/nbl/asset/interchange/CImageWriterJPG.cpp index 3943e207ed..4557b0e3d4 100644 --- a/src/nbl/asset/interchange/CImageWriterJPG.cpp +++ b/src/nbl/asset/interchange/CImageWriterJPG.cpp @@ -198,16 +198,16 @@ bool CImageWriterJPG::writeAsset(system::IFile* _file, const SAssetWriteParams& #else SAssetWriteContext ctx{ _params, _file }; - auto imageView = IAsset::castDown(_params.rootAsset); + auto imageView = IAsset::castDown(_params.rootAsset); system::IFile* file = _override->getOutputFile(_file, ctx, { imageView, 0u}); - const asset::E_WRITER_FLAGS flags = _override->getAssetWritingFlags(ctx, imageView, 0u); + const auto flags = _override->getAssetWritingFlags(ctx, imageView, 0u); const float comprLvl = _override->getAssetCompressionLevel(ctx, imageView, 0u); - return writeJPEGFile(file, m_system.get(), imageView, (!!(flags & asset::EWF_COMPRESSED)) * static_cast((1.f-comprLvl)*100.f), _params.logger); // if quality==0, then it defaults to 75 + return writeJPEGFile(file, m_system.get(), imageView, flags.hasAnyFlag(asset::EWF_COMPRESSED) * static_cast((1.f-comprLvl)*100.f), _params.logger); // if quality==0, then it defaults to 75 #endif//!defined(_NBL_COMPILE_WITH_LIBJPEG_ ) } #undef OUTPUT_BUF_SIZE -#endif \ No newline at end of file +#endif diff --git a/src/nbl/asset/interchange/CImageWriterJPG.h b/src/nbl/asset/interchange/CImageWriterJPG.h index 40157f0bf6..1d2b5f2963 100644 --- a/src/nbl/asset/interchange/CImageWriterJPG.h +++ b/src/nbl/asset/interchange/CImageWriterJPG.h @@ -33,9 +33,9 @@ class CImageWriterJPG : public asset::IAssetWriter virtual uint64_t getSupportedAssetTypesBitfield() const override { return asset::IAsset::ET_IMAGE_VIEW; } - virtual uint32_t getSupportedFlags() override { return asset::EWF_COMPRESSED; } + virtual writer_flags_t getSupportedFlags() override { return asset::EWF_COMPRESSED; } - virtual uint32_t getForcedFlags() { return asset::EWF_BINARY; } + virtual writer_flags_t getForcedFlags() { return asset::EWF_BINARY; } virtual bool writeAsset(system::IFile* _file, const SAssetWriteParams& _params, IAssetWriterOverride* _override = nullptr) override; }; diff --git a/src/nbl/asset/interchange/CImageWriterOpenEXR.h b/src/nbl/asset/interchange/CImageWriterOpenEXR.h index 37da219c64..5a2e0a1cda 100644 --- a/src/nbl/asset/interchange/CImageWriterOpenEXR.h +++ b/src/nbl/asset/interchange/CImageWriterOpenEXR.h @@ -33,9 +33,9 @@ class CImageWriterOpenEXR final : public IImageWriter uint64_t getSupportedAssetTypesBitfield() const override { return asset::IAsset::ET_IMAGE_VIEW; } - uint32_t getSupportedFlags() override { return asset::EWF_BINARY; } + writer_flags_t getSupportedFlags() override { return asset::EWF_BINARY; } - uint32_t getForcedFlags() { return asset::EWF_BINARY; } + writer_flags_t getForcedFlags() { return asset::EWF_BINARY; } bool writeAsset(system::IFile* _file, const SAssetWriteParams& _params, IAssetWriterOverride* _override = nullptr) override; diff --git a/src/nbl/asset/interchange/CImageWriterPNG.h b/src/nbl/asset/interchange/CImageWriterPNG.h index ec2f3b39ef..5111df6ac5 100644 --- a/src/nbl/asset/interchange/CImageWriterPNG.h +++ b/src/nbl/asset/interchange/CImageWriterPNG.h @@ -39,9 +39,9 @@ class CImageWriterPNG : public asset::IAssetWriter virtual uint64_t getSupportedAssetTypesBitfield() const override { return asset::IAsset::ET_IMAGE_VIEW; } - virtual uint32_t getSupportedFlags() override { return 0u; } - - virtual uint32_t getForcedFlags() { return asset::EWF_BINARY; } + virtual writer_flags_t getSupportedFlags() override { return asset::EWF_NONE; } + + virtual writer_flags_t getForcedFlags() { return asset::EWF_BINARY; } virtual bool writeAsset(system::IFile* _file, const SAssetWriteParams& _params, IAssetWriterOverride* _override = nullptr) override; }; diff --git a/src/nbl/asset/interchange/CImageWriterTGA.h b/src/nbl/asset/interchange/CImageWriterTGA.h index 2341d1a910..a741898fbb 100644 --- a/src/nbl/asset/interchange/CImageWriterTGA.h +++ b/src/nbl/asset/interchange/CImageWriterTGA.h @@ -33,9 +33,9 @@ class CImageWriterTGA : public asset::IAssetWriter return asset::IAsset::ET_IMAGE_VIEW; } - virtual uint32_t getSupportedFlags() override { return 0u; } + virtual writer_flags_t getSupportedFlags() override { return asset::EWF_NONE; } - virtual uint32_t getForcedFlags() { return asset::EWF_BINARY; } + virtual writer_flags_t getForcedFlags() { return asset::EWF_BINARY; } virtual bool writeAsset(system::IFile* _file, const SAssetWriteParams& _params, IAssetWriterOverride* _override = nullptr) override; }; diff --git a/src/nbl/asset/interchange/COBJMeshFileLoader.cpp b/src/nbl/asset/interchange/COBJMeshFileLoader.cpp index 69651f8061..0662628799 100644 --- a/src/nbl/asset/interchange/COBJMeshFileLoader.cpp +++ b/src/nbl/asset/interchange/COBJMeshFileLoader.cpp @@ -1,747 +1,958 @@ -// Copyright (C) 2019 - DevSH Graphics Programming Sp. z O.O. +#ifdef _NBL_COMPILE_WITH_OBJ_LOADER_ +// Copyright (C) 2018-2025 - DevSH Graphics Programming Sp. z O.O. // This file is part of the "Nabla Engine" and was originally part of the "Irrlicht Engine" // For conditions of distribution and use, see copyright notice in nabla.h // See the original file in irrlicht source for authors - #include "nbl/core/declarations.h" - #include "nbl/asset/IAssetManager.h" +#include "nbl/asset/ICPUGeometryCollection.h" +#include "nbl/asset/ICPUMorphTargets.h" +#include "nbl/asset/ICPUScene.h" +#include "nbl/asset/interchange/SGeometryContentHash.h" +#include "nbl/asset/interchange/SGeometryLoaderCommon.h" +#include "nbl/asset/interchange/SOBJPolygonGeometryAuxLayout.h" +#include "nbl/asset/interchange/SInterchangeIO.h" +#include "nbl/asset/interchange/SLoaderRuntimeTuning.h" #include "nbl/asset/utils/CPolygonGeometryManipulator.h" - -#ifdef _NBL_COMPILE_WITH_OBJ_LOADER_ - -#include "nbl/system/ISystem.h" +#include "nbl/builtin/hlsl/shapes/AABBAccumulator.hlsl" #include "nbl/system/IFile.h" - -#include "nbl/asset/utils/CQuantNormalCache.h" - #include "COBJMeshFileLoader.h" - -#include - -namespace nbl -{ -namespace asset +#include "impl/SFileAccess.h" +#include "impl/STextParse.h" +#include +#include +#include +#include +#include +#include +#include +namespace nbl::asset { - -//#ifdef _NBL_DEBUG -#define _NBL_DEBUG_OBJ_LOADER_ -//#endif - -static const uint32_t WORD_BUFFER_LENGTH = 512; - -constexpr uint32_t POSITION = 0u; -constexpr uint32_t UV = 2u; -constexpr uint32_t NORMAL = 3u; -constexpr uint32_t BND_NUM = 0u; - -//! Constructor -COBJMeshFileLoader::COBJMeshFileLoader(IAssetManager* _manager) : AssetManager(_manager), System(_manager->getSystem()) +namespace { -} - - -//! destructor -COBJMeshFileLoader::~COBJMeshFileLoader() -{ -} - -asset::SAssetBundle COBJMeshFileLoader::loadAsset(system::IFile* _file, const asset::IAssetLoader::SAssetLoadParams& _params, asset::IAssetLoader::IAssetLoaderOverride* _override, uint32_t _hierarchyLevel) +struct Parse { - SContext ctx( - asset::IAssetLoader::SAssetLoadContext{ - _params, - _file - }, - _hierarchyLevel, - _override - ); - - if (_params.meshManipulatorOverride == nullptr) + using Common = impl::TextParse; + struct VertexDedupNode { int32_t uv = -1; int32_t normal = -1; uint32_t smoothingGroup = 0u; uint32_t outIndex = 0u; int32_t next = -1; }; + static bool resolveIndex(const int32_t rawIndex, const size_t elementCount, int32_t& resolved) { - _NBL_DEBUG_BREAK_IF(true); - assert(false); + if (rawIndex > 0) + { + const uint64_t oneBased = static_cast(rawIndex); + if (oneBased == 0ull) + return false; + const uint64_t zeroBased = oneBased - 1ull; + if (zeroBased >= elementCount) + return false; + resolved = static_cast(zeroBased); + return true; + } + const int64_t zeroBased = static_cast(elementCount) + static_cast(rawIndex); + if (zeroBased < 0 || zeroBased >= static_cast(elementCount)) + return false; + resolved = static_cast(zeroBased); + return true; } - - CQuantNormalCache* const quantNormalCache = _params.meshManipulatorOverride->getQuantNormalCache(); - - const long filesize = _file->getSize(); - if (!filesize) - return {}; - - const uint32_t WORD_BUFFER_LENGTH = 512u; - char tmpbuf[WORD_BUFFER_LENGTH]{}; - - uint32_t smoothingGroup=0; - - const std::filesystem::path fullName = _file->getFileName(); - const std::string relPath = [&fullName]() -> std::string + static void parseSmoothingGroup(const char* linePtr, const char* const lineEnd, uint32_t& outGroup) { - auto dir = fullName.parent_path().string(); - return dir; - }(); - - //value_type: directory from which .mtl (pipeline) was loaded and the pipeline - using pipeline_meta_pair_t = std::pair,const CMTLMetadata::CRenderpassIndependentPipeline*>; - struct hash_t + Common::skipInlineWhitespace(linePtr, lineEnd); + if (linePtr >= lineEnd) + return void(outGroup = 0u); + const char* const tokenStart = linePtr; + while (linePtr < lineEnd && !Common::isInlineWhitespace(*linePtr)) + ++linePtr; + const std::string_view token(tokenStart, static_cast(linePtr - tokenStart)); + if (token.size() == 2u && std::tolower(token[0]) == 'o' && std::tolower(token[1]) == 'n') + return void(outGroup = 1u); + if (token.size() == 3u && std::tolower(token[0]) == 'o' && std::tolower(token[1]) == 'f' && std::tolower(token[2]) == 'f') + return void(outGroup = 0u); + uint32_t value = 0u; + outGroup = Common::parseExactNumber(token, value) ? value : 0u; + } + static std::string parseIdentifier(const char* linePtr, const char* const lineEnd, const std::string_view fallback) { - inline auto operator()(const pipeline_meta_pair_t& item) const + const char* endPtr = lineEnd; + Common::skipInlineWhitespace(linePtr, lineEnd); + while (endPtr > linePtr && Common::isInlineWhitespace(endPtr[-1])) + --endPtr; + if (linePtr >= endPtr) + return std::string(fallback); + return std::string(linePtr, static_cast(endPtr - linePtr)); + } + static bool parseTrianglePositiveTripletLine(const char* const lineStart, const char* const lineEnd, std::array& out, const size_t posCount, const size_t uvCount, const size_t normalCount) + { + const char* ptr = lineStart; + auto parsePositive = [&](const size_t count, int32_t& outIx) -> bool { + uint32_t value = 0u; + if (!Common::parseNonZeroNumber(ptr, lineEnd, value)) + return false; + if (value > count) + return false; + outIx = value - 1u; + return true; + }; + for (uint32_t corner = 0u; corner < 3u; ++corner) { - return std::hash()(item.second->m_name); + Common::skipInlineWhitespace(ptr, lineEnd); + if (ptr >= lineEnd || !Common::isDigit(*ptr)) + return false; + int32_t posIx = -1; + if (!parsePositive(posCount, posIx)) + return false; + if (ptr >= lineEnd || *ptr != '/') + return false; + ++ptr; + int32_t uvIx = -1; + if (!parsePositive(uvCount, uvIx)) + return false; + if (ptr >= lineEnd || *ptr != '/') + return false; + ++ptr; + int32_t normalIx = -1; + if (!parsePositive(normalCount, normalIx)) + return false; + out[corner] = hlsl::int32_t3(posIx, uvIx, normalIx); } - }; - struct key_equal_t + Common::skipInlineWhitespace(ptr, lineEnd); + return ptr == lineEnd; + } + static bool parseTrianglePositivePositionNormalLine(const char* const lineStart, const char* const lineEnd, std::array& out, const size_t posCount, const size_t normalCount) { - inline bool operator()(const pipeline_meta_pair_t& lhs, const pipeline_meta_pair_t& rhs) const + const char* ptr = lineStart; + auto parsePositive = [&](const size_t count, int32_t& outIx) -> bool { + uint32_t value = 0u; + if (!Common::parseNonZeroNumber(ptr, lineEnd, value)) + return false; + if (value > count) + return false; + outIx = value - 1u; + return true; + }; + for (uint32_t corner = 0u; corner < 3u; ++corner) { - return lhs.second->m_name==rhs.second->m_name; + Common::skipInlineWhitespace(ptr, lineEnd); + if (ptr >= lineEnd || !Common::isDigit(*ptr)) + return false; + int32_t posIx = -1; + if (!parsePositive(posCount, posIx)) + return false; + if ((ptr + 1) >= lineEnd || ptr[0] != '/' || ptr[1] != '/') + return false; + ptr += 2; + int32_t normalIx = -1; + if (!parsePositive(normalCount, normalIx)) + return false; + out[corner] = hlsl::int32_t3(posIx, -1, normalIx); } - }; - core::unordered_multiset pipelines; - - // TODO: map the file whenever possible - std::string fileContents; - fileContents.resize(filesize); - char* const buf = fileContents.data(); - - system::IFile::success_t success; - _file->read(success, buf, 0, filesize); - if (!success) - return {}; - - const char* const bufEnd = buf+filesize; - // Process obj information - const char* bufPtr = buf; - std::string grpName, mtlName; - - auto performActionBasedOnOrientationSystem = [&](auto performOnRightHanded, auto performOnLeftHanded) - { - if (_params.loaderFlags & E_LOADER_PARAMETER_FLAGS::ELPF_RIGHT_HANDED_MESHES) - performOnRightHanded(); - else - performOnLeftHanded(); - }; - - - struct vec3 { - float data[3]; - }; - struct vec2 { - float data[2]; - }; - core::vector vertexBuffer; - core::vector normalsBuffer; - core::vector textureCoordBuffer; - - core::vector> submeshes; - core::vector> indices; - core::vector vertices; - core::map map_vtx2ix; - core::vector recalcNormals; - core::vector submeshWasLoadedFromCache; - core::vector submeshCacheKeys; - core::vector submeshMaterialNames; - core::vector vtxSmoothGrp; - - // TODO: handle failures much better! - constexpr const char* NO_MATERIAL_MTL_NAME = "#"; - bool noMaterial = true; - bool dummyMaterialCreated = false; - while(bufPtr != bufEnd) + Common::skipInlineWhitespace(ptr, lineEnd); + return ptr == lineEnd; + } + static bool parseFaceVertexToken(const char*& linePtr, const char* const lineEnd, hlsl::int32_t3& idx, const size_t posCount, const size_t uvCount, const size_t normalCount) { - switch(bufPtr[0]) + Common::skipInlineWhitespace(linePtr, lineEnd); + if (linePtr >= lineEnd) + return false; + idx = hlsl::int32_t3(-1, -1, -1); + const char* ptr = linePtr; + auto parsePositive = [&](const size_t count, int32_t& outIx) -> bool { + uint32_t raw = 0u; + if (!Common::parseNonZeroNumber(ptr, lineEnd, raw)) + return false; + if (raw > count) + return false; + outIx = raw - 1u; + return true; + }; + auto parseResolved = [&](const size_t count, int32_t& outIx) -> bool { + int32_t raw = 0; + return Common::parseNonZeroNumber(ptr, lineEnd, raw) && resolveIndex(raw, count, outIx); + }; + if (*ptr != '-' && *ptr != '+') { - case 'm': // mtllib (material) - { - if (ctx.useMaterials) + if (!parsePositive(posCount, idx.x)) + return false; + if (ptr < lineEnd && *ptr == '/') { - bufPtr = goAndCopyNextWord(tmpbuf, bufPtr, WORD_BUFFER_LENGTH, bufEnd); - _params.logger.log("Reading material _file %s", system::ILogger::ELL_DEBUG, tmpbuf); - - std::string mtllib = tmpbuf; - std::replace(mtllib.begin(), mtllib.end(), '\\', '/'); - SAssetLoadParams loadParams(_params); - loadParams.workingDirectory = _file->getFileName().parent_path(); - auto bundle = interm_getAssetInHierarchy(AssetManager, mtllib, loadParams, _hierarchyLevel+ICPUMesh::PIPELINE_HIERARCHYLEVELS_BELOW, _override); - - if (bundle.getContents().empty()) - break; - - if (bundle.getMetadata()) + ++ptr; + if (ptr < lineEnd && *ptr != '/') + { + if (!parsePositive(uvCount, idx.y)) + return false; + } + if (ptr < lineEnd && *ptr == '/') { - auto meta = bundle.getMetadata()->selfCast(); - if (bundle.getAssetType()==IAsset::ET_RENDERPASS_INDEPENDENT_PIPELINE) - for (auto ass : bundle.getContents()) + ++ptr; + if (ptr < lineEnd && !Common::isInlineWhitespace(*ptr)) { - auto ppln = core::smart_refctd_ptr_static_cast(ass); - const auto pplnMeta = meta->getAssetSpecificMetadata(ppln.get()); - if (!pplnMeta) - continue; - - pipelines.emplace(std::move(ppln),pplnMeta); + if (!parsePositive(normalCount, idx.z)) + return false; } } + else if (ptr < lineEnd && !Common::isInlineWhitespace(*ptr)) + return false; } + else if (ptr < lineEnd && !Common::isInlineWhitespace(*ptr)) + return false; } - break; - - case 'v': // v, vn, vt - //reset flags - noMaterial = true; - dummyMaterialCreated = false; - switch(bufPtr[1]) - { - case ' ': // vertex - { - vec3 vec; - bufPtr = readVec3(bufPtr, vec.data, bufEnd); - performActionBasedOnOrientationSystem([&]() {vec.data[0] = -vec.data[0];}, [&]() {}); - vertexBuffer.push_back(vec); - } - break; - - case 'n': // normal - { - vec3 vec; - bufPtr = readVec3(bufPtr, vec.data, bufEnd); - performActionBasedOnOrientationSystem([&]() {vec.data[0] = -vec.data[0]; }, [&]() {}); - normalsBuffer.push_back(vec); - } - break; - - case 't': // texcoord - { - vec2 vec; - bufPtr = readUV(bufPtr, vec.data, bufEnd); - textureCoordBuffer.push_back(vec); - } - break; - } - break; - - case 'g': // group name - bufPtr = goAndCopyNextWord(tmpbuf, bufPtr, WORD_BUFFER_LENGTH, bufEnd); - grpName = tmpbuf; - break; - case 's': // smoothing can be a group or off (equiv. to 0) - { - bufPtr = goAndCopyNextWord(tmpbuf, bufPtr, WORD_BUFFER_LENGTH, bufEnd); - _params.logger.log("Loaded smoothing group start %s",system::ILogger::ELL_DEBUG, tmpbuf); - if (strcmp("off", tmpbuf)==0) - smoothingGroup=0u; - else - sscanf(tmpbuf,"%u",&smoothingGroup); - } - break; - - case 'u': // usemtl - // get name of material - { - noMaterial = false; - bufPtr = goAndCopyNextWord(tmpbuf, bufPtr, WORD_BUFFER_LENGTH, bufEnd); - _params.logger.log("Loaded material start %s", system::ILogger::ELL_DEBUG, tmpbuf); - mtlName=tmpbuf; - - if (ctx.useMaterials && !ctx.useGroups) - { - asset::IAsset::E_TYPE types[] {asset::IAsset::ET_SUB_MESH, (asset::IAsset::E_TYPE)0u }; - auto mb_bundle = _override->findCachedAsset(genKeyForMeshBuf(ctx, _file->getFileName().string(), mtlName, grpName), types, ctx.inner, _hierarchyLevel+ICPUMesh::MESHBUFFER_HIERARCHYLEVELS_BELOW); - auto mbs = mb_bundle.getContents(); - bool notempty = mbs.size()!=0ull; - { - auto mb = notempty ? core::smart_refctd_ptr_static_cast(*mbs.begin()) : core::make_smart_refctd_ptr(); - submeshes.push_back(std::move(mb)); - } - indices.emplace_back(); - recalcNormals.push_back(false); - submeshWasLoadedFromCache.push_back(notempty); - //if submesh was loaded from cache - insert empty "cache key" (submesh loaded from cache won't be added to cache again) - submeshCacheKeys.push_back(submeshWasLoadedFromCache.back() ? "" : genKeyForMeshBuf(ctx, _file->getFileName().string(), mtlName, grpName)); - submeshMaterialNames.push_back(mtlName); - } - } - break; - case 'f': // face + else { - if (noMaterial && !dummyMaterialCreated) + if (!parseResolved(posCount, idx.x)) + return false; + if (ptr < lineEnd && *ptr == '/') { - dummyMaterialCreated = true; - - submeshes.push_back(core::make_smart_refctd_ptr()); - indices.emplace_back(); - recalcNormals.push_back(false); - submeshWasLoadedFromCache.push_back(false); - submeshCacheKeys.push_back(genKeyForMeshBuf(ctx, _file->getFileName().string(), NO_MATERIAL_MTL_NAME, grpName)); - submeshMaterialNames.push_back(NO_MATERIAL_MTL_NAME); - } - - SObjVertex v; - - // get all vertices data in this face (current line of obj _file) - const std::string wordBuffer = copyLine(bufPtr, bufEnd); - const char* linePtr = wordBuffer.c_str(); - const char* const endPtr = linePtr + wordBuffer.size(); - - core::vector faceCorners; - faceCorners.reserve(32ull); - - // read in all vertices - linePtr = goNextWord(linePtr, endPtr); - while (0 != linePtr[0]) - { - // Array to communicate with retrieveVertexIndices() - // sends the buffer sizes and gets the actual indices - // if index not set returns -1 - int32_t Idx[3]; - Idx[1] = Idx[2] = -1; - - // read in next vertex's data - uint32_t wlength = copyWord(tmpbuf, linePtr, WORD_BUFFER_LENGTH, endPtr); - // this function will also convert obj's 1-based index to c++'s 0-based index - retrieveVertexIndices(tmpbuf, Idx, tmpbuf+wlength+1, vertexBuffer.size(), textureCoordBuffer.size(), normalsBuffer.size()); - v.pos[0] = vertexBuffer[Idx[0]].data[0]; - v.pos[1] = vertexBuffer[Idx[0]].data[1]; - v.pos[2] = vertexBuffer[Idx[0]].data[2]; - //set texcoord - if ( -1 != Idx[1] ) - { - v.uv[0] = textureCoordBuffer[Idx[1]].data[0]; - v.uv[1] = textureCoordBuffer[Idx[1]].data[1]; - } - else - { - v.uv[0] = core::nan(); - v.uv[1] = core::nan(); - } - //set normal - if ( -1 != Idx[2] ) - { - core::vectorSIMDf simdNormal; - simdNormal.set(normalsBuffer[Idx[2]].data); - simdNormal.makeSafe3D(); - v.normal32bit = quantNormalCache->quantize(simdNormal); - } - else + ++ptr; + if (ptr < lineEnd && *ptr != '/') { - v.normal32bit = core::vectorSIMDu32(0u); - recalcNormals.back() = true; + if (!parseResolved(uvCount, idx.y)) + return false; } - - uint32_t ix; - auto vtx_ix = map_vtx2ix.find(v); - if (vtx_ix != map_vtx2ix.end() && smoothingGroup==vtxSmoothGrp[vtx_ix->second]) - ix = vtx_ix->second; - else + if (ptr < lineEnd && *ptr == '/') { - ix = vertices.size(); - vertices.push_back(v); - vtxSmoothGrp.push_back(smoothingGroup); - map_vtx2ix.insert({v, ix}); - } - - faceCorners.push_back(ix); - - // go to next vertex - linePtr = goNextWord(linePtr, endPtr); - } - - // triangulate the face - for (uint32_t i = 1u; i < faceCorners.size()-1u; ++i) - { - // Add a triangle - performActionBasedOnOrientationSystem - ( - [&]() - { - indices.back().push_back(faceCorners[0]); - indices.back().push_back(faceCorners[i]); - indices.back().push_back(faceCorners[i + 1]); - }, - [&]() - { - indices.back().push_back(faceCorners[i + 1]); - indices.back().push_back(faceCorners[i]); - indices.back().push_back(faceCorners[0]); - } - ); - } - } - break; - - case '#': // comment - default: - break; - } // end switch(bufPtr[0]) - // eat up rest of line - bufPtr = goNextLine(bufPtr, bufEnd); - } // end while(bufPtr && (bufPtr-buf usedPipelines; - { - uint64_t ixBufOffset = 0ull; - for (size_t i = 0ull; i < submeshes.size(); ++i) - { - if (submeshWasLoadedFromCache[i]) - continue; - - submeshes[i]->setIndexCount(indices[i].size()); - submeshes[i]->setIndexType(EIT_32BIT); - submeshes[i]->setIndexBufferBinding({ixBufOffset,nullptr}); - ixBufOffset += indices[i].size()*4ull; - - const uint32_t hasUV = !core::isnan(vertices[indices[i][0]].uv[0]); - using namespace std::string_literals; - _params.logger.log("Has UV: "s + (hasUV ? "YES":"NO"), system::ILogger::ELL_DEBUG); - // search in loaded - pipeline_meta_pair_t pipeline; - { - CMTLMetadata::CRenderpassIndependentPipeline dummyKey; - dummyKey.m_name = submeshCacheKeys[i].substr(submeshCacheKeys[i].find_last_of('?')+1u); - pipeline_meta_pair_t dummy{nullptr,&dummyKey}; - - auto rng = pipelines.equal_range(dummy); - for (auto it=rng.first; it!=rng.second; it++) - if (it->second->m_hash==hasUV) - { - pipeline = *it; - break; - } - } - //if there's no pipeline for this meshbuffer, set dummy one - if (!pipeline.first) - { - const IAsset::E_TYPE searchTypes[] = {IAsset::ET_RENDERPASS_INDEPENDENT_PIPELINE,static_cast(0u)}; - auto bundle = _override->findCachedAsset("nbl/builtin/renderpass_independent_pipeline/loader/mtl/missing_material_pipeline",searchTypes,ctx.inner,_hierarchyLevel+ICPUMesh::PIPELINE_HIERARCHYLEVELS_BELOW); - const auto* meta = bundle.getMetadata()->selfCast(); - const auto contents = bundle.getContents(); - for (auto pplnIt=contents.begin(); pplnIt!=contents.end(); pplnIt++) - { - auto ppln = core::smart_refctd_ptr_static_cast(*pplnIt); - auto pplnMeta = meta->getAssetSpecificMetadata(ppln.get()); - if (pplnMeta && pplnMeta->m_hash==hasUV) + ++ptr; + if (ptr < lineEnd && !Common::isInlineWhitespace(*ptr)) { - pipeline = { std::move(ppln),pplnMeta }; - break; + if (!parseResolved(normalCount, idx.z)) + return false; } } + else if (ptr < lineEnd && !Common::isInlineWhitespace(*ptr)) + return false; } - // do some checks - assert(pipeline.first && pipeline.second); - const auto* cPpln = pipeline.first.get(); - if (hasUV) - { - const auto& vtxParams = cPpln->getCachedCreationParams().vertexInput; - assert(vtxParams.attributes[POSITION].relativeOffset==offsetof(SObjVertex,pos)); - assert(vtxParams.attributes[NORMAL].relativeOffset==offsetof(SObjVertex,normal32bit)); - assert(vtxParams.attributes[UV].relativeOffset==offsetof(SObjVertex,uv)); - assert(vtxParams.enabledAttribFlags&(1u<getLayout()->getPushConstantRanges().begin()[0].offset; - submeshes[i]->setAttachedDescriptorSet(core::smart_refctd_ptr(pipeline.second->m_descriptorSet3)); - memcpy( - submeshes[i]->getPushConstantsDataPtr()+pcoffset, - &pipeline.second->m_materialParams, - sizeof(CMTLMetadata::CRenderpassIndependentPipeline::SMaterialParameters) - ); - - usedPipelines.insert(pipeline); - submeshes[i]->setPipeline(std::move(pipeline.first)); - } - - core::smart_refctd_ptr vtxBuf = ICPUBuffer::create({ vertices.size() * sizeof(SObjVertex) }); - memcpy(vtxBuf->getPointer(), vertices.data(), vtxBuf->getSize()); - - auto ixBuf = ICPUBuffer::create({ ixBufOffset }); - for (size_t i = 0ull; i < submeshes.size(); ++i) - { - if (submeshWasLoadedFromCache[i]) - continue; - - submeshes[i]->setPositionAttributeIx(POSITION); - submeshes[i]->setNormalAttributeIx(NORMAL); - - submeshes[i]->setIndexBufferBinding({submeshes[i]->getIndexBufferBinding().offset,ixBuf}); - const uint64_t offset = submeshes[i]->getIndexBufferBinding().offset; - memcpy(reinterpret_cast(ixBuf->getPointer())+offset, indices[i].data(), indices[i].size()*4ull); - - SBufferBinding vtxBufBnd; - vtxBufBnd.offset = 0ull; - vtxBufBnd.buffer = vtxBuf; - submeshes[i]->setVertexBufferBinding(std::move(vtxBufBnd), BND_NUM); - - if (recalcNormals[i]) - { - auto vtxcmp = [&vtxSmoothGrp](const IMeshManipulator::SSNGVertexData& v0, const IMeshManipulator::SSNGVertexData& v1, ICPUMeshBuffer* buffer) - { - return vtxSmoothGrp[v0.indexOffset]==vtxSmoothGrp[v1.indexOffset]; - }; - - auto* meshManipulator = AssetManager->getMeshManipulator(); - meshManipulator->calculateSmoothNormals(submeshes[i].get(), false, 1.52e-5f, NORMAL, vtxcmp); - } - } - } - - auto mesh = core::make_smart_refctd_ptr(); - for (auto& submesh : submeshes) - { - IMeshManipulator::recalculateBoundingBox(submesh.get()); - mesh->getMeshBufferVector().emplace_back(std::move(submesh)); - } - - IMeshManipulator::recalculateBoundingBox(mesh.get()); - if (mesh->getMeshBuffers().empty()) - return {}; - - // - auto meta = core::make_smart_refctd_ptr(usedPipelines.size()); - uint32_t metaOffset = 0u; - for (auto pipeAndMeta : usedPipelines) - meta->placeMeta(metaOffset++,pipeAndMeta.first.get(),*pipeAndMeta.second); - - //at the very end, insert submeshes into cache - uint32_t i = 0u; - for (auto meshbuffer : mesh->getMeshBuffers()) - { - auto bundle = SAssetBundle(meta,{ core::smart_refctd_ptr(meshbuffer) }); - _override->insertAssetIntoCache(bundle, submeshCacheKeys[i++], ctx.inner, _hierarchyLevel+ICPUMesh::MESHBUFFER_HIERARCHYLEVELS_BELOW); - } - - return SAssetBundle(std::move(meta),{std::move(mesh)}); -} - - -//! Read 3d vector of floats -const char* COBJMeshFileLoader::readVec3(const char* bufPtr, float vec[3], const char* const bufEnd) -{ - const uint32_t WORD_BUFFER_LENGTH = 256; - char wordBuffer[WORD_BUFFER_LENGTH]; - - bufPtr = goAndCopyNextWord(wordBuffer, bufPtr, WORD_BUFFER_LENGTH, bufEnd); - sscanf(wordBuffer,"%f",vec); - bufPtr = goAndCopyNextWord(wordBuffer, bufPtr, WORD_BUFFER_LENGTH, bufEnd); - sscanf(wordBuffer,"%f",vec+1); - bufPtr = goAndCopyNextWord(wordBuffer, bufPtr, WORD_BUFFER_LENGTH, bufEnd); - sscanf(wordBuffer,"%f",vec+2); - - vec[0] = -vec[0]; // change handedness - return bufPtr; -} - - -//! Read 2d vector of floats -const char* COBJMeshFileLoader::readUV(const char* bufPtr, float vec[2], const char* const bufEnd) -{ - const uint32_t WORD_BUFFER_LENGTH = 256; - char wordBuffer[WORD_BUFFER_LENGTH]; - - bufPtr = goAndCopyNextWord(wordBuffer, bufPtr, WORD_BUFFER_LENGTH, bufEnd); - sscanf(wordBuffer,"%f",vec); - bufPtr = goAndCopyNextWord(wordBuffer, bufPtr, WORD_BUFFER_LENGTH, bufEnd); - sscanf(wordBuffer,"%f",vec+1); - - vec[1] = 1.f-vec[1]; // change handedness - return bufPtr; -} - - -//! Read boolean value represented as 'on' or 'off' -const char* COBJMeshFileLoader::readBool(const char* bufPtr, bool& tf, const char* const bufEnd) -{ - const uint32_t BUFFER_LENGTH = 8; - char tfStr[BUFFER_LENGTH]; - - bufPtr = goAndCopyNextWord(tfStr, bufPtr, BUFFER_LENGTH, bufEnd); - tf = strcmp(tfStr, "off") != 0; - return bufPtr; -} - -//! skip space characters and stop on first non-space -const char* COBJMeshFileLoader::goFirstWord(const char* buf, const char* const bufEnd, bool acrossNewlines) -{ - // skip space characters - if (acrossNewlines) - while((buf != bufEnd) && core::isspace(*buf)) - ++buf; - else - while((buf != bufEnd) && core::isspace(*buf) && (*buf != '\n')) - ++buf; - - return buf; -} - - -//! skip current word and stop at beginning of next one -const char* COBJMeshFileLoader::goNextWord(const char* buf, const char* const bufEnd, bool acrossNewlines) -{ - // skip current word - while(( buf != bufEnd ) && !core::isspace(*buf)) - ++buf; - - return goFirstWord(buf, bufEnd, acrossNewlines); -} - - -//! Read until line break is reached and stop at the next non-space character -const char* COBJMeshFileLoader::goNextLine(const char* buf, const char* const bufEnd) -{ - // look for newline characters - while(buf != bufEnd) - { - // found it, so leave - if (*buf=='\n' || *buf=='\r') - break; - ++buf; + else if (ptr < lineEnd && !Common::isInlineWhitespace(*ptr)) + return false; + } + if (ptr < lineEnd && !Common::isInlineWhitespace(*ptr)) + return false; + linePtr = ptr; + return true; } - return goFirstWord(buf, bufEnd); +}; } - - -uint32_t COBJMeshFileLoader::copyWord(char* outBuf, const char* const inBuf, uint32_t outBufLength, const char* const bufEnd) +COBJMeshFileLoader::COBJMeshFileLoader(IAssetManager*) { - if (!outBufLength) - return 0; - if (!inBuf) - { - *outBuf = 0; - return 0; - } - - uint32_t i = 0; - while(inBuf[i]) - { - if (core::isspace(inBuf[i]) || &(inBuf[i]) == bufEnd) - break; - ++i; - } - - uint32_t length = core::min(i, outBufLength-1); - for (uint32_t j=0; jgetSize(); + if (fileSize <= 0) + return false; + constexpr size_t ProbeBytes = 4096ull; + const size_t bytesToRead = std::min(ProbeBytes, static_cast(fileSize)); + std::array probe = {}; + system::IFile::success_t succ; + _file->read(succ, probe.data(), 0ull, bytesToRead); + if (!succ || bytesToRead == 0ull) + return false; + const char* ptr = probe.data(); + const char* const end = probe.data() + bytesToRead; + if ((end - ptr) >= 3 && static_cast(ptr[0]) == 0xEFu && static_cast(ptr[1]) == 0xBBu && static_cast(ptr[2]) == 0xBFu) + ptr += 3; + while (ptr < end) { - if (*ptr=='\n' || *ptr=='\r') + while (ptr < end && (*ptr == ' ' || *ptr == '\t' || *ptr == '\r' || *ptr == '\n')) + ++ptr; + if (ptr >= end) break; - ++ptr; + if (*ptr == '#') + { + while (ptr < end && *ptr != '\n') + ++ptr; + continue; + } + switch (std::tolower(*ptr)) + { + case 'v': + case 'f': + case 'o': + case 'g': + case 's': + case 'u': + case 'm': + case 'l': + case 'p': + return true; + default: + return false; + } } - // we must avoid the +1 in case the array is used up - return std::string(inBuf, (uint32_t)(ptr-inBuf+((ptr < bufEnd) ? 1 : 0))); + return false; } - - -const char* COBJMeshFileLoader::goAndCopyNextWord(char* outBuf, const char* inBuf, uint32_t outBufLength, const char* bufEnd) +const char** COBJMeshFileLoader::getAssociatedFileExtensions() const { - inBuf = goNextWord(inBuf, bufEnd, false); - copyWord(outBuf, inBuf, outBufLength, bufEnd); - return inBuf; + static const char* ext[] = { "obj", nullptr }; + return ext; } - - -bool COBJMeshFileLoader::retrieveVertexIndices(char* vertexData, int32_t* idx, const char* bufEnd, uint32_t vbsize, uint32_t vtsize, uint32_t vnsize) -{ - char word[16] = ""; - const char* p = goFirstWord(vertexData, bufEnd); - uint32_t idxType = 0; // 0 = posIdx, 1 = texcoordIdx, 2 = normalIdx - - uint32_t i = 0; - while ( p != bufEnd ) - { - if ( ( core::isdigit(*p)) || (*p == '-') ) - { - // build up the number - word[i++] = *p; - } - else if ( *p == '/' || *p == ' ' || *p == '\0' ) - { - // number is completed. Convert and store it - word[i] = '\0'; - // if no number was found index will become 0 and later on -1 by decrement - sscanf(word,"%d",idx+idxType); - if (idx[idxType]<0) - { - switch (idxType) - { - case 0: - idx[idxType] += vbsize; - break; - case 1: - idx[idxType] += vtsize; - break; - case 2: - idx[idxType] += vnsize; - break; - } - } - else - idx[idxType]-=1; - - // reset the word - word[0] = '\0'; - i = 0; - - // go to the next kind of index type - if (*p == '/') - { - if ( ++idxType > 2 ) - { - // error checking, shouldn't reach here unless file is wrong - idxType = 0; - } - } - else - { - // set all missing values to disable (=-1) - while (++idxType < 3) - idx[idxType]=-1; - ++p; - break; // while +asset::SAssetBundle COBJMeshFileLoader::loadAsset( + system::IFile* _file, const asset::IAssetLoader::SAssetLoadParams& _params, + asset::IAssetLoader::IAssetLoaderOverride* _override [[maybe_unused]], + uint32_t _hierarchyLevel [[maybe_unused]]) { + if (!_file) + return {}; + uint64_t faceCount = 0u; + uint64_t faceFastTokenCount = 0u; + uint64_t faceFallbackTokenCount = 0u; + SFileReadTelemetry ioTelemetry = {}; + const long filesize = _file->getSize(); + if (filesize <= 0) + return {}; + impl::SLoadSession loadSession = {}; + if (!impl::SLoadSession::begin(_params.logger, "OBJ loader", _file, _params.ioPolicy, static_cast(filesize), true, loadSession)) + return {}; + core::vector fileContents; + const auto* fileData = loadSession.mapOrReadWholeFile(fileContents, &ioTelemetry); + if (!fileData) + return {}; + const char* const buf = reinterpret_cast(fileData); + const char* const bufEnd = buf + static_cast(filesize); + const char* bufPtr = buf; + core::vector positions; + core::vector normals; + core::vector uvs; + const size_t estimatedAttributeCount = + std::max(16ull, static_cast(filesize) / 32ull); + positions.reserve(estimatedAttributeCount); + normals.reserve(estimatedAttributeCount); + uvs.reserve(estimatedAttributeCount); + core::vector outPositions; + core::vector outNormals; + core::vector outNormalNeedsGeneration; + core::vector outUVs; + std::optional smoothNormalAccumulator; + core::vector indices; + core::vector dedupHeadByPos; + core::vector dedupNodes; + const size_t estimatedOutVertexCount = std::max( + estimatedAttributeCount, static_cast(filesize) / 20ull); + const size_t estimatedOutIndexCount = + (estimatedOutVertexCount <= (std::numeric_limits::max() / 3ull)) + ? (estimatedOutVertexCount * 3ull) + : std::numeric_limits::max(); + const size_t initialOutVertexCapacity = + std::max(1ull, estimatedOutVertexCount); + const size_t initialOutIndexCapacity = + (estimatedOutIndexCount == std::numeric_limits::max()) + ? 3ull + : std::max(3ull, estimatedOutIndexCount); + size_t outVertexWriteCount = 0ull; + size_t outIndexWriteCount = 0ull; + size_t dedupNodeCount = 0ull; + struct SDedupHotEntry { + int32_t pos = -1; + int32_t uv = -1; + int32_t normal = -1; + uint32_t outIndex = 0u; + }; + const size_t hw = SLoaderRuntimeTuner::resolveHardwareThreads(); + const size_t hardMaxWorkers = SLoaderRuntimeTuner::resolveHardMaxWorkers( + hw, _params.ioPolicy.runtimeTuning.workerHeadroom); + SLoaderRuntimeTuningRequest dedupTuningRequest = {}; + dedupTuningRequest.inputBytes = static_cast(filesize); + dedupTuningRequest.totalWorkUnits = estimatedOutVertexCount; + dedupTuningRequest.hardwareThreads = static_cast(hw); + dedupTuningRequest.hardMaxWorkers = static_cast(hardMaxWorkers); + dedupTuningRequest.targetChunksPerWorker = + _params.ioPolicy.runtimeTuning.targetChunksPerWorker; + dedupTuningRequest.sampleData = reinterpret_cast(buf); + dedupTuningRequest.sampleBytes = SLoaderRuntimeTuner::resolveSampleBytes( + _params.ioPolicy, static_cast(filesize)); + const auto dedupTuning = + SLoaderRuntimeTuner::tune(_params.ioPolicy, dedupTuningRequest); + const size_t dedupHotSeed = std::max( + 16ull, estimatedOutVertexCount / + std::max(1ull, dedupTuning.workerCount * 8ull)); + const size_t dedupHotEntryCount = std::bit_ceil(dedupHotSeed); + core::vector dedupHotCache(dedupHotEntryCount); + const size_t dedupHotMask = dedupHotEntryCount - 1ull; + struct SLoadedGeometry { + core::smart_refctd_ptr geometry = {}; + std::string objectName = {}; + std::string groupName = {}; + uint64_t faceCount = 0ull; + uint64_t faceFastTokenCount = 0ull; + uint64_t faceFallbackTokenCount = 0ull; + }; + core::vector loadedGeometries; + std::string currentObjectName = "default_object"; + std::string currentGroupName = "default_group"; + bool sawObjectDirective = false; + bool sawGroupDirective = false; + bool hasProvidedNormals = false; + bool needsNormalGeneration = false; + bool hasUVs = false; + hlsl::shapes::util::AABBAccumulator3 parsedAABB = + hlsl::shapes::util::createAABBAccumulator(); + uint64_t currentFaceCount = 0ull; + uint64_t currentFaceFastTokenCount = 0ull; + uint64_t currentFaceFallbackTokenCount = 0ull; + const auto resetBuilderState = [&]() -> void { + outPositions.clear(); + outNormals.clear(); + outNormalNeedsGeneration.clear(); + outUVs.clear(); + smoothNormalAccumulator.reset(); + indices.clear(); + dedupNodes.clear(); + outPositions.resize(initialOutVertexCapacity); + outNormals.resize(initialOutVertexCapacity); + outNormalNeedsGeneration.resize(initialOutVertexCapacity, 0u); + outUVs.resize(initialOutVertexCapacity); + indices.resize(initialOutIndexCapacity); + dedupHeadByPos.assign(positions.size(), -1); + dedupNodes.resize(initialOutVertexCapacity); + outVertexWriteCount = 0ull; + outIndexWriteCount = 0ull; + dedupNodeCount = 0ull; + hasProvidedNormals = false; + needsNormalGeneration = false; + hasUVs = false; + parsedAABB = hlsl::shapes::util::createAABBAccumulator(); + currentFaceCount = 0ull; + currentFaceFastTokenCount = 0ull; + currentFaceFallbackTokenCount = 0ull; + const SDedupHotEntry emptyHotEntry = {}; + std::fill(dedupHotCache.begin(), dedupHotCache.end(), emptyHotEntry); + }; + const auto finalizeCurrentGeometry = [&]() -> bool { + if (outVertexWriteCount == 0ull) + return true; + outPositions.resize(outVertexWriteCount); + outNormals.resize(outVertexWriteCount); + outNormalNeedsGeneration.resize(outVertexWriteCount); + outUVs.resize(outVertexWriteCount); + indices.resize(outIndexWriteCount); + if (needsNormalGeneration) { + // OBJ smoothing groups are already encoded in the parser-side vertex + // split corners that must stay sharp become different output vertices + // even if they share position. We therefore feed the parser-final + // indexed triangles into a smoothing accumulator and finalize only + // the normals that were missing in the source. + if (!smoothNormalAccumulator) + return false; + smoothNormalAccumulator->reserveVertices(outVertexWriteCount); + if (!smoothNormalAccumulator->finalize( + std::span(outNormals.data(), outNormals.size()), + std::span(outNormalNeedsGeneration.data(), outNormalNeedsGeneration.size()))) + return false; + } + const size_t outVertexCount = outPositions.size(); + auto geometry = core::make_smart_refctd_ptr(); + { + auto view = SGeometryLoaderCommon::createAdoptedView( + std::move(outPositions)); + if (!view) + return false; + geometry->setPositionView(std::move(view)); + } + const bool hasNormals = hasProvidedNormals || needsNormalGeneration; + if (hasNormals) { + auto view = SGeometryLoaderCommon::createAdoptedView( + std::move(outNormals)); + if (!view) + return false; + geometry->setNormalView(std::move(view)); + } + if (hasUVs) { + auto view = SGeometryLoaderCommon::createAdoptedView( + std::move(outUVs)); + if (!view) + return false; + auto* const auxViews = geometry->getAuxAttributeViews(); + auxViews->resize(SOBJPolygonGeometryAuxLayout::UV0 + 1u); + (*auxViews)[SOBJPolygonGeometryAuxLayout::UV0] = std::move(view); + } + if (!indices.empty()) { + geometry->setIndexing(IPolygonGeometryBase::TriangleList()); + if (outVertexCount <= + static_cast(std::numeric_limits::max()) + 1ull) { + core::vector indices16(indices.size()); + for (size_t i = 0u; i < indices.size(); ++i) + indices16[i] = static_cast(indices[i]); + auto view = SGeometryLoaderCommon::createAdoptedView( + std::move(indices16)); + if (!view) + return false; + geometry->setIndexView(std::move(view)); + } else { + auto view = SGeometryLoaderCommon::createAdoptedView( + std::move(indices)); + if (!view) + return false; + geometry->setIndexView(std::move(view)); + } + } else { + geometry->setIndexing(IPolygonGeometryBase::PointList()); + } + if (!_params.loaderFlags.hasAnyFlag( + IAssetLoader::ELPF_DONT_COMPUTE_CONTENT_HASHES)) + SPolygonGeometryContentHash::computeMissing(geometry.get(), + _params.ioPolicy); + if (!parsedAABB.empty()) + geometry->applyAABB(parsedAABB.value); + else + CPolygonGeometryManipulator::recomputeAABB(geometry.get()); + loadedGeometries.push_back(SLoadedGeometry{ + .geometry = std::move(geometry), + .objectName = currentObjectName, + .groupName = currentGroupName, + .faceCount = currentFaceCount, + .faceFastTokenCount = currentFaceFastTokenCount, + .faceFallbackTokenCount = currentFaceFallbackTokenCount}); + return true; + }; + resetBuilderState(); + auto allocateOutVertex = [&](uint32_t& outIx) -> bool { + if (outVertexWriteCount >= outPositions.size()) { + const size_t newCapacity = std::max(outVertexWriteCount + 1ull, + outPositions.size() * 2ull); + outPositions.resize(newCapacity); + outNormals.resize(newCapacity); + outNormalNeedsGeneration.resize(newCapacity, 0u); + outUVs.resize(newCapacity); + if (smoothNormalAccumulator) { + smoothNormalAccumulator->reserveVertices(newCapacity); + smoothNormalAccumulator->prepareIdentityGroups(newCapacity); + } + } + if (outVertexWriteCount > + static_cast(std::numeric_limits::max())) + return false; + outIx = static_cast(outVertexWriteCount++); + return true; + }; + auto appendIndex = [&](const uint32_t value) -> bool { + if (outIndexWriteCount >= indices.size()) { + const size_t newCapacity = + std::max(outIndexWriteCount + 1ull, indices.size() * 2ull); + indices.resize(newCapacity); + } + indices[outIndexWriteCount++] = value; + return true; + }; + auto allocateDedupNode = [&]() -> int32_t { + if (dedupNodeCount >= dedupNodes.size()) { + const size_t newCapacity = + std::max(dedupNodeCount + 1ull, dedupNodes.size() * 2ull); + dedupNodes.resize(newCapacity); + } + if (dedupNodeCount > + static_cast(std::numeric_limits::max())) + return -1; + const int32_t ix = static_cast(dedupNodeCount++); + return ix; + }; + auto findCornerIndex = + [&](const int32_t posIx, const int32_t uvIx, const int32_t normalIx, + const uint32_t dedupSmoothingGroup, uint32_t& outIx) -> bool { + if (posIx < 0 || static_cast(posIx) >= positions.size()) + return false; + if (static_cast(posIx) >= dedupHeadByPos.size()) + dedupHeadByPos.resize(positions.size(), -1); + int32_t nodeIx = dedupHeadByPos[static_cast(posIx)]; + while (nodeIx >= 0) { + const auto& node = dedupNodes[static_cast(nodeIx)]; + if (node.uv == uvIx && node.normal == normalIx && + node.smoothingGroup == dedupSmoothingGroup) { + outIx = node.outIndex; + return true; + } + nodeIx = node.next; + } + return false; + }; + auto materializeCornerIndex = + [&](const int32_t posIx, const int32_t uvIx, const int32_t normalIx, + const uint32_t dedupSmoothingGroup, uint32_t& outIx) -> bool { + if (!allocateOutVertex(outIx)) + return false; + const int32_t newNodeIx = allocateDedupNode(); + if (newNodeIx < 0) + return false; + auto& node = dedupNodes[static_cast(newNodeIx)]; + node.uv = uvIx; + node.normal = normalIx; + node.smoothingGroup = dedupSmoothingGroup; + node.outIndex = outIx; + node.next = dedupHeadByPos[static_cast(posIx)]; + dedupHeadByPos[static_cast(posIx)] = newNodeIx; + const auto& srcPos = positions[static_cast(posIx)]; + outPositions[static_cast(outIx)] = srcPos; + hlsl::shapes::util::extendAABBAccumulator(parsedAABB, srcPos); + hlsl::float32_t2 uv(0.f, 0.f); + if (uvIx >= 0 && static_cast(uvIx) < uvs.size()) { + uv = uvs[static_cast(uvIx)]; + hasUVs = true; + } + outUVs[static_cast(outIx)] = uv; + hlsl::float32_t3 normal(0.f, 0.f, 0.f); + if (normalIx >= 0 && static_cast(normalIx) < normals.size()) { + normal = normals[static_cast(normalIx)]; + hasProvidedNormals = true; + outNormalNeedsGeneration[static_cast(outIx)] = 0u; + } else { + needsNormalGeneration = true; + outNormalNeedsGeneration[static_cast(outIx)] = 1u; + } + outNormals[static_cast(outIx)] = normal; + return true; + }; + auto acquireCornerIndex = [&](const hlsl::int32_t3& idx, + const uint32_t smoothingGroup, + uint32_t& outIx) -> bool { + const int32_t posIx = idx.x; + if (posIx < 0 || static_cast(posIx) >= positions.size()) + return false; + const uint32_t dedupSmoothingGroup = idx.z >= 0 ? 0u : smoothingGroup; + if (findCornerIndex(posIx, idx.y, idx.z, dedupSmoothingGroup, outIx)) + return true; + return materializeCornerIndex(posIx, idx.y, idx.z, dedupSmoothingGroup, + outIx); + }; + auto acquireCornerIndexPositiveTriplet = [&](const hlsl::int32_t3& idx, + uint32_t& outIx) -> bool { + const uint32_t hotHash = static_cast(idx.x) * 73856093u ^ + static_cast(idx.y) * 19349663u ^ + static_cast(idx.z) * 83492791u; + auto& hotEntry = dedupHotCache[static_cast(hotHash) & dedupHotMask]; + if (hotEntry.pos == idx.x && hotEntry.uv == idx.y && + hotEntry.normal == idx.z) { + outIx = hotEntry.outIndex; + return true; + } + if (findCornerIndex(idx.x, idx.y, idx.z, 0u, outIx) || + materializeCornerIndex(idx.x, idx.y, idx.z, 0u, outIx)) { + hotEntry.pos = idx.x; + hotEntry.uv = idx.y; + hotEntry.normal = idx.z; + hotEntry.outIndex = outIx; + return true; + } + return false; + }; + auto acquireCornerIndexPositiveNormal = [&](const hlsl::int32_t3& idx, + uint32_t& outIx) -> bool { + const uint32_t hotHash = static_cast(idx.x) * 73856093u ^ + static_cast(idx.z) * 83492791u ^ + 0x9e3779b9u; + auto& hotEntry = dedupHotCache[static_cast(hotHash) & dedupHotMask]; + if (hotEntry.pos == idx.x && hotEntry.uv == -1 && + hotEntry.normal == idx.z) { + outIx = hotEntry.outIndex; + return true; + } + if (findCornerIndex(idx.x, -1, idx.z, 0u, outIx) || + materializeCornerIndex(idx.x, -1, idx.z, 0u, outIx)) { + hotEntry.pos = idx.x; + hotEntry.uv = -1; + hotEntry.normal = idx.z; + hotEntry.outIndex = outIx; + return true; + } + return false; + }; + auto acquireTriangleCorners = [&](auto&& acquire, const std::array& triIdx, hlsl::uint32_t3& cornerIx) -> bool { + return acquire(triIdx[0], cornerIx.x) && acquire(triIdx[1], cornerIx.y) && acquire(triIdx[2], cornerIx.z); + }; + auto appendTriangle = [&](const hlsl::uint32_t3& cornerIx) -> bool { + if (!(appendIndex(cornerIx.z) && appendIndex(cornerIx.y) && appendIndex(cornerIx.x))) + return false; + if (!needsNormalGeneration) + return true; + if (!smoothNormalAccumulator) { + smoothNormalAccumulator.emplace(CPolygonGeometryManipulator::ESmoothNormalAccumulationMode::AreaWeighted); + smoothNormalAccumulator->reserveVertices(outVertexWriteCount); + smoothNormalAccumulator->prepareIdentityGroups(outPositions.size()); + } + if (outNormalNeedsGeneration[static_cast(cornerIx.x)] == 0u && + outNormalNeedsGeneration[static_cast(cornerIx.y)] == 0u && + outNormalNeedsGeneration[static_cast(cornerIx.z)] == 0u) + return true; + return smoothNormalAccumulator->addPreparedIdentityTriangle( + cornerIx.z, outPositions[static_cast(cornerIx.z)], + cornerIx.y, outPositions[static_cast(cornerIx.y)], + cornerIx.x, outPositions[static_cast(cornerIx.x)]); + }; + uint32_t currentSmoothingGroup = 0u; + while (bufPtr < bufEnd) { + const char* const lineStart = bufPtr; + const size_t remaining = static_cast(bufEnd - lineStart); + const char* lineTerminator = + static_cast(std::memchr(lineStart, '\n', remaining)); + if (!lineTerminator) + lineTerminator = + static_cast(std::memchr(lineStart, '\r', remaining)); + if (!lineTerminator) + lineTerminator = bufEnd; + const char* lineEnd = lineTerminator; + if (lineEnd > lineStart && lineEnd[-1] == '\r') + --lineEnd; + if (lineStart < lineEnd) { + const char lineType = std::tolower(*lineStart); + if (lineType == 'v') { + auto parseVector = [&](const char* ptr, float* values, + const uint32_t count) -> bool { + for (uint32_t i = 0u; i < count; ++i) { + while (ptr < lineEnd && Parse::Common::isInlineWhitespace(*ptr)) + ++ptr; + if (ptr >= lineEnd || !Parse::Common::parseNumber(ptr, lineEnd, values[i])) + return false; + } + return true; + }; + const char subType = + ((lineStart + 1) < lineEnd) ? std::tolower(lineStart[1]) : '\0'; + if ((lineStart + 1) < lineEnd && subType == ' ') { + hlsl::float32_t3 vec{}; + if (!parseVector(lineStart + 2, &vec.x, 3u)) + return {}; + positions.push_back(vec); + dedupHeadByPos.push_back(-1); + } else if ((lineStart + 2) < lineEnd && subType == 'n' && + Parse::Common::isInlineWhitespace(lineStart[2])) { + hlsl::float32_t3 vec{}; + if (!parseVector(lineStart + 3, &vec.x, 3u)) + return {}; + normals.push_back(vec); + } else if ((lineStart + 2) < lineEnd && subType == 't' && + Parse::Common::isInlineWhitespace(lineStart[2])) { + hlsl::float32_t2 vec{}; + if (!parseVector(lineStart + 3, &vec.x, 2u)) + return {}; + vec.y = 1.f - vec.y; + uvs.push_back(vec); + } + } else if (lineType == 'o' && (lineStart + 1) < lineEnd && + Parse::Common::isInlineWhitespace(lineStart[1])) { + if (!finalizeCurrentGeometry()) + return {}; + resetBuilderState(); + currentObjectName = + Parse::parseIdentifier(lineStart + 2, lineEnd, "default_object"); + sawObjectDirective = true; + } else if (lineType == 'g' && (lineStart + 1) < lineEnd && + Parse::Common::isInlineWhitespace(lineStart[1])) { + if (!finalizeCurrentGeometry()) + return {}; + resetBuilderState(); + currentGroupName = + Parse::parseIdentifier(lineStart + 2, lineEnd, "default_group"); + sawGroupDirective = true; + } else if (lineType == 's' && (lineStart + 1) < lineEnd && + Parse::Common::isInlineWhitespace(lineStart[1])) { + Parse::parseSmoothingGroup(lineStart + 2, lineEnd, + currentSmoothingGroup); + } else if (lineType == 'f' && (lineStart + 1) < lineEnd && + Parse::Common::isInlineWhitespace(lineStart[1])) { + if (positions.empty()) + return {}; + ++faceCount; + ++currentFaceCount; + const size_t posCount = positions.size(); + const size_t uvCount = uvs.size(); + const size_t normalCount = normals.size(); + const char* triLinePtr = lineStart + 1; + std::array triIdx = {hlsl::int32_t3(-1, -1, -1), + hlsl::int32_t3(-1, -1, -1), + hlsl::int32_t3(-1, -1, -1)}; + bool triangleFastPath = Parse::parseTrianglePositiveTripletLine( + lineStart + 1, lineEnd, triIdx, posCount, uvCount, normalCount); + bool positiveNormalOnlyFastPath = false; + if (!triangleFastPath && uvCount == 0u && normalCount != 0u) { + triangleFastPath = Parse::parseTrianglePositivePositionNormalLine( + lineStart + 1, lineEnd, triIdx, posCount, normalCount); + positiveNormalOnlyFastPath = triangleFastPath; + } + bool parsedFirstThree = triangleFastPath; + if (!triangleFastPath) { + triLinePtr = lineStart + 1; + parsedFirstThree = + Parse::parseFaceVertexToken(triLinePtr, lineEnd, triIdx[0], + posCount, uvCount, normalCount) && + Parse::parseFaceVertexToken(triLinePtr, lineEnd, triIdx[1], + posCount, uvCount, normalCount) && + Parse::parseFaceVertexToken(triLinePtr, lineEnd, triIdx[2], + posCount, uvCount, normalCount); + triangleFastPath = parsedFirstThree; + if (parsedFirstThree) { + while (triLinePtr < lineEnd && + Parse::Common::isInlineWhitespace(*triLinePtr)) + ++triLinePtr; + triangleFastPath = (triLinePtr == lineEnd); + } + } + if (triangleFastPath && !positiveNormalOnlyFastPath) { + const bool fullTriplet = std::all_of( + triIdx.begin(), triIdx.end(), [](const hlsl::int32_t3& idx) { + return hlsl::all(glm::greaterThanEqual(idx, hlsl::int32_t3(0))); + }); + if (!fullTriplet) + triangleFastPath = false; + } + if (triangleFastPath) { + hlsl::uint32_t3 cornerIx = {}; + if (positiveNormalOnlyFastPath) { + if (!acquireTriangleCorners(acquireCornerIndexPositiveNormal, triIdx, cornerIx)) + return {}; + } else if (!acquireTriangleCorners(acquireCornerIndexPositiveTriplet, triIdx, cornerIx)) + return {}; + faceFastTokenCount += 3u; + currentFaceFastTokenCount += 3u; + if (!appendTriangle(cornerIx)) + return {}; + } else { + const char* linePtr = lineStart + 1; + uint32_t firstCorner = 0u; + uint32_t previousCorner = 0u; + uint32_t cornerCount = 0u; + if (parsedFirstThree) { + hlsl::uint32_t3 cornerIx = {}; + if (!acquireTriangleCorners([&](const hlsl::int32_t3& idx, uint32_t& outIx) { return acquireCornerIndex(idx, currentSmoothingGroup, outIx); }, triIdx, cornerIx)) + return {}; + faceFallbackTokenCount += 3u; + currentFaceFallbackTokenCount += 3u; + if (!appendTriangle(cornerIx)) + return {}; + firstCorner = cornerIx.x; + previousCorner = cornerIx.z; + cornerCount = 3u; + linePtr = triLinePtr; + } + while (linePtr < lineEnd) { + while (linePtr < lineEnd && + Parse::Common::isInlineWhitespace(*linePtr)) + ++linePtr; + if (linePtr >= lineEnd) + break; + hlsl::int32_t3 idx(-1, -1, -1); + if (!Parse::parseFaceVertexToken(linePtr, lineEnd, idx, posCount, + uvCount, normalCount)) + return {}; + ++faceFallbackTokenCount; + ++currentFaceFallbackTokenCount; + uint32_t cornerIx = 0u; + if (!acquireCornerIndex(idx, currentSmoothingGroup, cornerIx)) + return {}; + if (cornerCount == 0u) { + firstCorner = cornerIx; + ++cornerCount; + continue; + } + if (cornerCount == 1u) { + previousCorner = cornerIx; + ++cornerCount; + continue; + } + if (!appendTriangle(hlsl::uint32_t3(firstCorner, previousCorner, cornerIx))) + return {}; + previousCorner = cornerIx; + ++cornerCount; + } + } + } + } + if (lineTerminator >= bufEnd) + bufPtr = bufEnd; + else if (*lineTerminator == '\r' && (lineTerminator + 1) < bufEnd && + lineTerminator[1] == '\n') + bufPtr = lineTerminator + 2; + else + bufPtr = lineTerminator + 1; + } + if (!finalizeCurrentGeometry()) + return {}; + if (loadedGeometries.empty()) + return {}; + uint64_t outVertexCount = 0ull; + uint64_t outIndexCount = 0ull; + uint64_t faceFastTokenCountSum = 0ull; + uint64_t faceFallbackTokenCountSum = 0ull; + for (const auto& loaded : loadedGeometries) { + const auto& posView = loaded.geometry->getPositionView(); + outVertexCount += + static_cast(posView ? posView.getElementCount() : 0ull); + const auto& indexView = loaded.geometry->getIndexView(); + outIndexCount += + static_cast(indexView ? indexView.getElementCount() : 0ull); + faceFastTokenCountSum += loaded.faceFastTokenCount; + faceFallbackTokenCountSum += loaded.faceFallbackTokenCount; + } + loadSession.logTinyIO(_params.logger, ioTelemetry); + core::vector> objectCollections; + objectCollections.reserve(loadedGeometries.size()); + std::unordered_map objectIndices; + objectIndices.reserve(loadedGeometries.size()); + size_t currentObjectIx = ~size_t(0ull); + std::string_view currentCollectionObjectName; + for (auto& loaded : loadedGeometries) { + const std::string_view objectName(loaded.objectName); + size_t objectIx = currentObjectIx; + if (objectIx == ~size_t(0ull) || currentCollectionObjectName != objectName) { + auto [it, inserted] = objectIndices.try_emplace(objectName, objectCollections.size()); + if (inserted) { + auto collection = core::make_smart_refctd_ptr(); + if (!collection) + return {}; + objectCollections.push_back(std::move(collection)); } + objectIx = it->second; + currentObjectIx = objectIx; + currentCollectionObjectName = objectName; } - - // go to the next char - ++p; + auto* refs = objectCollections[objectIx]->getGeometries(); + if (!refs) + return {}; + IGeometryCollection::SGeometryReference ref = {}; + ref.geometry = core::smart_refctd_ptr_static_cast>(loaded.geometry); + refs->push_back(std::move(ref)); } - - return true; + auto scene = ICPUScene::create(nullptr); + if (!scene) + return {}; + auto& instances = scene->getInstances(); + instances.resize(objectCollections.size()); + auto morphTargets = instances.getMorphTargets(); + for (size_t i = 0ull; i < objectCollections.size(); ++i) { + auto targets = core::make_smart_refctd_ptr(); + if (!targets) + return {}; + auto* targetList = targets->getTargets(); + if (!targetList) + return {}; + targetList->push_back({.geoCollection = std::move(objectCollections[i])}); + morphTargets[i] = std::move(targets); + } + // Plain OBJ now loads as a flat scene so later material pairing can attach + // to scene instances. We keep identity transforms here and leave material + // tables invalid until `MTL` support lands. + core::vector> outputAssets; + outputAssets.push_back(core::smart_refctd_ptr_static_cast(std::move(scene))); + const uint64_t objectCount = objectCollections.size(); + _params.logger.log( + "OBJ loader stats: file=%s in(v=%llu n=%llu uv=%llu) out(v=%llu idx=%llu " + "faces=%llu face_fast_tokens=%llu face_fallback_tokens=%llu " + "geometries=%llu objects=%llu io_reads=%llu io_min_read=%llu " + "io_avg_read=%llu io_req=%s io_eff=%s io_chunk=%llu io_reason=%s", + system::ILogger::ELL_PERFORMANCE, _file->getFileName().string().c_str(), + static_cast(positions.size()), + static_cast(normals.size()), + static_cast(uvs.size()), + static_cast(outVertexCount), + static_cast(outIndexCount), + static_cast(faceCount), + static_cast(faceFastTokenCountSum), + static_cast(faceFallbackTokenCountSum), + static_cast(loadedGeometries.size()), + static_cast(objectCount), + static_cast(ioTelemetry.callCount), + static_cast(ioTelemetry.getMinOrZero()), + static_cast(ioTelemetry.getAvgOrZero()), + system::to_string(_params.ioPolicy.strategy).c_str(), + system::to_string(loadSession.ioPlan.strategy).c_str(), + static_cast(loadSession.ioPlan.chunkSizeBytes()), loadSession.ioPlan.reason); + return SAssetBundle(core::smart_refctd_ptr(), + std::move(outputAssets)); } - -std::string COBJMeshFileLoader::genKeyForMeshBuf(const SContext& _ctx, const std::string& _baseKey, const std::string& _mtlName, const std::string& _grpName) const -{ - return _baseKey + "?" + _grpName + "?" + _mtlName; } - - - - -} // end namespace scene -} // end namespace nbl - #endif // _NBL_COMPILE_WITH_OBJ_LOADER_ diff --git a/src/nbl/asset/interchange/COBJMeshFileLoader.h b/src/nbl/asset/interchange/COBJMeshFileLoader.h index c11a09e671..2af6f62bd7 100644 --- a/src/nbl/asset/interchange/COBJMeshFileLoader.h +++ b/src/nbl/asset/interchange/COBJMeshFileLoader.h @@ -1,136 +1,38 @@ -// Copyright (C) 2019-2025 - DevSH Graphics Programming Sp. z O.O. +// Copyright (C) 2018-2025 - DevSH Graphics Programming Sp. z O.O. // This file is part of the "Nabla Engine" and was originally part of the "Irrlicht Engine" // For conditions of distribution and use, see copyright notice in nabla.h // See the original file in irrlicht source for authors #ifndef _NBL_ASSET_C_OBJ_MESH_FILE_LOADER_H_INCLUDED_ #define _NBL_ASSET_C_OBJ_MESH_FILE_LOADER_H_INCLUDED_ - #include "nbl/core/declarations.h" -#include "nbl/asset/ICPUPolygonGeometry.h" -#include "nbl/asset/interchange/IAssetLoader.h" -#include "nbl/asset/metadata/CMTLMetadata.h" - +#include "nbl/asset/interchange/ISceneLoader.h" namespace nbl::asset { - -#include "nbl/nblpack.h" -class SObjVertex -{ -public: - inline bool operator<(const SObjVertex& other) const - { - if (pos[0]==other.pos[0]) - { - if (pos[1]==other.pos[1]) - { - if (pos[2]==other.pos[2]) - { - if (uv[0]==other.uv[0]) - { - if (uv[1]==other.uv[1]) - return normal32bit normal32bit; -} PACK_STRUCT; -#include "nbl/nblunpack.h" - -//! Meshloader capable of loading obj meshes. -class COBJMeshFileLoader : public IGeometryLoader +/** + Loads plain OBJ into a flat `ICPUScene`. + Multiple `o` and `g` blocks become separate scene instances backed by + geometry collections. + All instance transforms stay identity here. + Material tables stay invalid until `MTL` support is implemented. + + References: + - https://www.loc.gov/preservation/digital/formats/fdd/fdd000507 + - https://www.fileformat.info/format/wavefrontobj/egff.htm +*/ +class COBJMeshFileLoader : public ISceneLoader { - struct SContext - { - SContext(const IAssetLoader::SAssetLoadContext& _innerCtx, uint32_t _topHierarchyLevel, IAssetLoader::IAssetLoaderOverride* _override) - : inner(_innerCtx), topHierarchyLevel(_topHierarchyLevel), loaderOverride(_override) {} - - IAssetLoader::SAssetLoadContext inner; - uint32_t topHierarchyLevel; - IAssetLoader::IAssetLoaderOverride* loaderOverride; - - const bool useGroups = false; - const bool useMaterials = true; - }; - -protected: - //! destructor - virtual ~COBJMeshFileLoader(); + public: + ~COBJMeshFileLoader() override; -public: - //! Constructor - COBJMeshFileLoader(IAssetManager* _manager); + //! Constructor + explicit COBJMeshFileLoader(IAssetManager* _manager); - inline bool isALoadableFileFormat(system::IFile* _file, const system::logger_opt_ptr logger) const override - { - // OBJ doesn't really have any header but usually starts with a comment - system::IFile::success_t succ; - char firstChar = 0; - _file->read(succ, &firstChar, 0, sizeof(firstChar)); - return succ && (firstChar =='#' || firstChar =='v'); - } + bool isALoadableFileFormat(system::IFile* _file, const system::logger_opt_ptr logger) const override; - virtual const char** getAssociatedFileExtensions() const override - { - static const char* ext[]{ "obj", nullptr }; - return ext; - } + const char** getAssociatedFileExtensions() const override; - virtual asset::SAssetBundle loadAsset(system::IFile* _file, const asset::IAssetLoader::SAssetLoadParams& _params, asset::IAssetLoader::IAssetLoaderOverride* _override = nullptr, uint32_t _hierarchyLevel = 0u) override; - -private: - // returns a pointer to the first printable character available in the buffer - const char* goFirstWord(const char* buf, const char* const bufEnd, bool acrossNewlines=true); - // returns a pointer to the first printable character after the first non-printable - const char* goNextWord(const char* buf, const char* const bufEnd, bool acrossNewlines=true); - // returns a pointer to the next printable character after the first line break - const char* goNextLine(const char* buf, const char* const bufEnd); - // copies the current word from the inBuf to the outBuf - uint32_t copyWord(char* outBuf, const char* inBuf, uint32_t outBufLength, const char* const pBufEnd); - // copies the current line from the inBuf to the outBuf - std::string copyLine(const char* inBuf, const char* const bufEnd); - - // combination of goNextWord followed by copyWord - const char* goAndCopyNextWord(char* outBuf, const char* inBuf, uint32_t outBufLength, const char* const pBufEnd); - - //! Read 3d vector of floats - const char* readVec3(const char* bufPtr, float vec[3], const char* const pBufEnd); - //! Read 2d vector of floats - const char* readUV(const char* bufPtr, float vec[2], const char* const pBufEnd); - //! Read boolean value represented as 'on' or 'off' - const char* readBool(const char* bufPtr, bool& tf, const char* const bufEnd); - - // reads and convert to integer the vertex indices in a line of obj file's face statement - // -1 for the index if it doesn't exist - // indices are changed to 0-based index instead of 1-based from the obj file - bool retrieveVertexIndices(char* vertexData, int32_t* idx, const char* bufEnd, uint32_t vbsize, uint32_t vtsize, uint32_t vnsize); - - std::string genKeyForMeshBuf(const SContext& _ctx, const std::string& _baseKey, const std::string& _mtlName, const std::string& _grpName) const; - - IAssetManager* AssetManager; - system::ISystem* System; - - template - static inline void performActionBasedOnOrientationSystem(aType& varToHandle, void (*performOnCertainOrientation)(aType& varToHandle)) - { - performOnCertainOrientation(varToHandle); - } + //! Loads one OBJ asset bundle from an already opened file. + asset::SAssetBundle loadAsset(system::IFile* _file, const asset::IAssetLoader::SAssetLoadParams& _params, asset::IAssetLoader::IAssetLoaderOverride* _override = nullptr, uint32_t _hierarchyLevel = 0u) override; }; - } // end namespace nbl::asset - #endif diff --git a/src/nbl/asset/interchange/COBJMeshWriter.cpp b/src/nbl/asset/interchange/COBJMeshWriter.cpp new file mode 100644 index 0000000000..ccd48e599d --- /dev/null +++ b/src/nbl/asset/interchange/COBJMeshWriter.cpp @@ -0,0 +1,296 @@ +#ifdef _NBL_COMPILE_WITH_OBJ_WRITER_ +// Copyright (C) 2018-2025 - DevSH Graphics Programming Sp. z O.O. +// This file is part of the "Nabla Engine". +// For conditions of distribution and use, see copyright notice in nabla.h +#include "nbl/asset/interchange/COBJMeshWriter.h" +#include "nbl/asset/interchange/SGeometryViewDecode.h" +#include "nbl/asset/interchange/SGeometryWriterCommon.h" +#include "nbl/asset/interchange/SOBJPolygonGeometryAuxLayout.h" +#include "nbl/asset/interchange/SInterchangeIO.h" +#include "impl/SFileAccess.h" +#include "nbl/builtin/hlsl/array_accessors.hlsl" +#include "nbl/builtin/hlsl/vector_utils/vector_traits.hlsl" +#include "nbl/system/IFile.h" +#include +#include +#include +#include +#include +#include +#include +namespace nbl::asset +{ +COBJMeshWriter::COBJMeshWriter() +{ + #ifdef _NBL_DEBUG + setDebugName("COBJMeshWriter"); + #endif +} +uint64_t COBJMeshWriter::getSupportedAssetTypesBitfield() const +{ + return IAsset::ET_GEOMETRY | IAsset::ET_GEOMETRY_COLLECTION | IAsset::ET_SCENE; +} +const char** COBJMeshWriter::getAssociatedFileExtensions() const +{ + static const char* ext[] = { "obj", nullptr }; + return ext; +} +writer_flags_t COBJMeshWriter::getSupportedFlags() +{ + return EWF_MESH_IS_RIGHT_HANDED; +} +writer_flags_t COBJMeshWriter::getForcedFlags() +{ + return EWF_NONE; +} +namespace +{ +struct Parse +{ + static constexpr size_t MaxFloatTextChars = std::numeric_limits::max_digits10 + 8ull; + static constexpr size_t MaxUInt32Chars = std::numeric_limits::digits10 + 1ull; + static constexpr size_t MaxIndexTokenBytes = MaxUInt32Chars * 3ull + 2ull; + struct IndexStringRef { uint32_t offset = 0u; uint16_t length = 0u; }; + struct GeometryTransformState { hlsl::float32_t3x4 transform; hlsl::float32_t3x3 linear; bool identity = true; bool reverseWinding = false; hlsl::math::linalg::cofactors_base normalTransform; }; + + template + static void appendVecLine(std::string& out, const char* prefix, const size_t prefixSize, const Vec& values) + { + constexpr size_t N = hlsl::vector_traits::Dimension; + const size_t oldSize = out.size(); + out.resize(oldSize + prefixSize + (N * MaxFloatTextChars) + N); + char* const lineBegin = out.data() + oldSize; + char* cursor = lineBegin; + char* const lineEnd = out.data() + out.size(); + hlsl::array_get getter; + std::memcpy(cursor, prefix, prefixSize); + cursor += prefixSize; + for (size_t i = 0ull; i < N; ++i) + { + cursor = SGeometryWriterCommon::appendFloatToBuffer(cursor, lineEnd, getter(values, static_cast(i))); + if (cursor < lineEnd) + *(cursor++) = (i + 1ull < N) ? ' ' : '\n'; + } + out.resize(oldSize + static_cast(cursor - lineBegin)); + } + + static void appendFaceLine(std::string& out, const std::string& storage, const core::vector& refs, const hlsl::uint32_t3& face) + { + const auto& ref0 = refs[face.x]; + const auto& ref1 = refs[face.y]; + const auto& ref2 = refs[face.z]; + const size_t oldSize = out.size(); + const size_t lineSize = 2ull + static_cast(ref0.length) + 1ull + static_cast(ref1.length) + 1ull + static_cast(ref2.length) + 1ull; + out.resize(oldSize + lineSize); + char* cursor = out.data() + oldSize; + *(cursor++) = 'f'; + *(cursor++) = ' '; + std::memcpy(cursor, storage.data() + ref0.offset, ref0.length); + cursor += ref0.length; + *(cursor++) = ' '; + std::memcpy(cursor, storage.data() + ref1.offset, ref1.length); + cursor += ref1.length; + *(cursor++) = ' '; + std::memcpy(cursor, storage.data() + ref2.offset, ref2.length); + cursor += ref2.length; + *(cursor++) = '\n'; + } + + static void appendIndexToken(std::string& storage, core::vector& refs, const uint32_t positionIx, const bool hasUVs, const uint32_t uvIx, const bool hasNormals, const uint32_t normalIx) + { + IndexStringRef ref = {}; + ref.offset = static_cast(storage.size()); + const size_t oldSize = storage.size(); + storage.resize(oldSize + MaxIndexTokenBytes); + char* const token = storage.data() + oldSize; + char* const tokenEnd = token + MaxIndexTokenBytes; + char* cursor = token; + cursor = SGeometryWriterCommon::appendUIntToBuffer(cursor, tokenEnd, positionIx); + if (hasUVs || hasNormals) + { + if (cursor < tokenEnd) + *(cursor++) = '/'; + if (hasUVs) + cursor = SGeometryWriterCommon::appendUIntToBuffer(cursor, tokenEnd, uvIx); + if (hasNormals) + { + if (cursor < tokenEnd) + *(cursor++) = '/'; + cursor = SGeometryWriterCommon::appendUIntToBuffer(cursor, tokenEnd, normalIx); + } + } + storage.resize(oldSize + static_cast(cursor - token)); + ref.length = static_cast(storage.size() - ref.offset); + refs.push_back(ref); + } + + static void appendHeader(std::string& out, const SGeometryWriterCommon::SPolygonGeometryWriteItem& item) + { + std::array name = {}; + if (item.instanceIx != ~0u) + std::snprintf(name.data(), name.size(), "o instance_%u_target_%u_geometry_%u\n", item.instanceIx, item.targetIx, item.geometryIx); + else + std::snprintf(name.data(), name.size(), "o geometry_%u\n", item.geometryIx); + out.append(name.data()); + } + + static GeometryTransformState createTransformState(const hlsl::float32_t3x4& transform) + { + const auto linear = hlsl::float32_t3x3(transform); + return {.transform = transform, .linear = linear, .identity = SGeometryWriterCommon::isIdentityTransform(transform), .reverseWinding = hlsl::determinant(linear) < 0.f, .normalTransform = hlsl::math::linalg::cofactors_base::create(linear)}; + } + static hlsl::float32_t3 applyPosition(const GeometryTransformState& state, const hlsl::float32_t3& value) { return state.identity ? value : hlsl::mul(state.transform, hlsl::float32_t4(value.x, value.y, value.z, 1.f)); } + static hlsl::float32_t3 applyNormal(const GeometryTransformState& state, const hlsl::float32_t3& value) { return state.identity ? value : state.normalTransform.normalTransform(value); } +}; +} +bool COBJMeshWriter::writeAsset(system::IFile* _file, const SAssetWriteParams& _params, IAssetWriterOverride* _override) +{ + SFileWriteTelemetry ioTelemetry = {}; + if (!_override) + getDefaultOverride(_override); + if (!_file || !_params.rootAsset) + return false; + const auto items = SGeometryWriterCommon::collectPolygonGeometryWriteItems(_params.rootAsset); + if (items.empty()) + return false; + SAssetWriteContext ctx = {_params, _file}; + system::IFile* file = _override->getOutputFile(_file, ctx, {_params.rootAsset, 0u}); + if (!file) + return false; + std::string output; + output.append("# Nabla OBJ\n"); + uint64_t totalVertexCount = 0ull; + uint64_t totalFaceCount = 0ull; + uint32_t positionBase = 1u; + uint32_t uvBase = 1u; + uint32_t normalBase = 1u; + using SemanticDecode = SGeometryViewDecode::Prepared; + for (size_t itemIx = 0u; itemIx < items.size(); ++itemIx) + { + const auto& item = items[itemIx]; + const auto* geom = item.geometry; + if (!geom || !geom->valid()) + return false; + const auto& positionView = geom->getPositionView(); + if (!positionView) + return false; + const auto& normalView = geom->getNormalView(); + const bool hasNormals = static_cast(normalView); + const size_t vertexCount = positionView.getElementCount(); + const ICPUPolygonGeometry::SDataView* uvView = SGeometryWriterCommon::getAuxViewAt(geom, SOBJPolygonGeometryAuxLayout::UV0, vertexCount); + if (uvView && getFormatChannelCount(uvView->composed.format) != 2u) + uvView = nullptr; + const bool hasUVs = uvView != nullptr; + if (vertexCount == 0ull) + return false; + if (hasNormals && normalView.getElementCount() != vertexCount) + return false; + if (hasUVs && uvView->getElementCount() != vertexCount) + return false; + const auto* indexing = geom->getIndexingCallback(); + if (!indexing) + return false; + if (indexing->knownTopology() != E_PRIMITIVE_TOPOLOGY::EPT_TRIANGLE_LIST) + return false; + size_t faceCount = 0ull; + if (!SGeometryWriterCommon::getTriangleFaceCount(geom, faceCount)) + return false; + const auto flags = _override->getAssetWritingFlags(ctx, geom, 0u); + const bool flipHandedness = !flags.hasAnyFlag(E_WRITER_FLAGS::EWF_MESH_IS_RIGHT_HANDED); + // Scene input is flattened here by baking transforms and writing every collected polygon geometry as its own OBJ object block. + const auto transformState = Parse::createTransformState(item.transform); + const hlsl::float32_t3* const tightPositions = SGeometryWriterCommon::getTightView(positionView); + const hlsl::float32_t3* const tightNormals = hasNormals ? SGeometryWriterCommon::getTightView(normalView) : nullptr; + const hlsl::float32_t2* const tightUV = hasUVs ? SGeometryWriterCommon::getTightView(*uvView) : nullptr; + const SemanticDecode positionDecode = tightPositions ? SemanticDecode{} : SGeometryViewDecode::prepare(positionView); + const SemanticDecode uvDecode = (!hasUVs || tightUV) ? SemanticDecode{} : SGeometryViewDecode::prepare(*uvView); + const SemanticDecode normalDecode = (!hasNormals || tightNormals) ? SemanticDecode{} : SGeometryViewDecode::prepare(normalView); + if (itemIx != 0u) + output.push_back('\n'); + Parse::appendHeader(output, item); + for (size_t i = 0u; i < vertexCount; ++i) + { + hlsl::float32_t3 vertex = {}; + if (tightPositions) + vertex = tightPositions[i]; + else if (!positionDecode.decode(i, vertex)) + return false; + vertex = Parse::applyPosition(transformState, vertex); + if (flipHandedness) + vertex.x = -vertex.x; + Parse::appendVecLine(output, "v ", sizeof("v ") - 1ull, vertex); + } + if (hasUVs) + { + for (size_t i = 0u; i < vertexCount; ++i) + { + hlsl::float32_t2 uv = {}; + if (tightUV) + uv = hlsl::float32_t2(tightUV[i].x, 1.f - tightUV[i].y); + else if (!uvDecode.decode(i, uv)) + return false; + if (!tightUV) + uv.y = 1.f - uv.y; + Parse::appendVecLine(output, "vt ", sizeof("vt ") - 1ull, uv); + } + } + if (hasNormals) + { + for (size_t i = 0u; i < vertexCount; ++i) + { + hlsl::float32_t3 normal = {}; + if (tightNormals) + normal = tightNormals[i]; + else if (!normalDecode.decode(i, normal)) + return false; + normal = Parse::applyNormal(transformState, normal); + if (flipHandedness) + normal.x = -normal.x; + Parse::appendVecLine(output, "vn ", sizeof("vn ") - 1ull, normal); + } + } + core::vector faceIndexRefs; + faceIndexRefs.reserve(vertexCount); + std::string faceIndexStorage; + faceIndexStorage.reserve(vertexCount * 24ull); + for (size_t i = 0u; i < vertexCount; ++i) + { + const uint32_t positionIx = positionBase + static_cast(i); + const uint32_t uvIx = hasUVs ? (uvBase + static_cast(i)) : 0u; + const uint32_t normalIx = hasNormals ? (normalBase + static_cast(i)) : 0u; + Parse::appendIndexToken(faceIndexStorage, faceIndexRefs, positionIx, hasUVs, uvIx, hasNormals, normalIx); + } + const hlsl::uint32_t3 faceLimit(static_cast(faceIndexRefs.size())); + if (!SGeometryWriterCommon::visitTriangleIndices(geom, [&](const uint32_t i0, const uint32_t i1, const uint32_t i2) -> bool { + const hlsl::uint32_t3 face(transformState.reverseWinding ? i0 : i2, i1, transformState.reverseWinding ? i2 : i0); + if (hlsl::any(glm::greaterThanEqual(face, faceLimit))) + return false; + Parse::appendFaceLine(output, faceIndexStorage, faceIndexRefs, face); + return true; + })) + return false; + + positionBase += static_cast(vertexCount); + if (hasUVs) + uvBase += static_cast(vertexCount); + if (hasNormals) + normalBase += static_cast(vertexCount); + totalVertexCount += vertexCount; + totalFaceCount += faceCount; + } + const auto ioPlan = impl::SFileAccess::resolvePlan(_params.ioPolicy, static_cast(output.size()), true, file); + if (impl::SFileAccess::logInvalidPlan(_params.logger, "OBJ writer", file->getFileName().string().c_str(), ioPlan)) + return false; + const bool writeOk = SInterchangeIO::writeFileWithPolicy(file, ioPlan, output.data(), output.size(), &ioTelemetry); + const uint64_t ioMinWrite = ioTelemetry.getMinOrZero(); + const uint64_t ioAvgWrite = ioTelemetry.getAvgOrZero(); + impl::SFileAccess::logTinyIO(_params.logger, "OBJ writer", file->getFileName().string().c_str(), ioTelemetry, static_cast(output.size()), _params.ioPolicy, "writes"); + _params.logger.log("OBJ writer stats: file=%s bytes=%llu vertices=%llu faces=%llu geometries=%llu io_writes=%llu io_min_write=%llu io_avg_write=%llu io_req=%s io_eff=%s io_chunk=%llu io_reason=%s", + system::ILogger::ELL_PERFORMANCE, file->getFileName().string().c_str(), static_cast(output.size()), + static_cast(totalVertexCount), static_cast(totalFaceCount), static_cast(items.size()), + static_cast(ioTelemetry.callCount), static_cast(ioMinWrite), static_cast(ioAvgWrite), + system::to_string(_params.ioPolicy.strategy).c_str(), system::to_string(ioPlan.strategy).c_str(), static_cast(ioPlan.chunkSizeBytes()), ioPlan.reason); + return writeOk; +} +} +#endif // _NBL_COMPILE_WITH_OBJ_WRITER_ diff --git a/src/nbl/asset/interchange/CPLYMeshFileLoader.cpp b/src/nbl/asset/interchange/CPLYMeshFileLoader.cpp index 932a04b82c..3e009207a6 100644 --- a/src/nbl/asset/interchange/CPLYMeshFileLoader.cpp +++ b/src/nbl/asset/interchange/CPLYMeshFileLoader.cpp @@ -1,883 +1,1840 @@ -// Copyright (C) 2019 - DevSH Graphics Programming Sp. z O.O. +#ifdef _NBL_COMPILE_WITH_PLY_LOADER_ +// Copyright (C) 2018-2025 - DevSH Graphics Programming Sp. z O.O. // This file is part of the "Nabla Engine" and was originally part of the "Irrlicht Engine" // For conditions of distribution and use, see copyright notice in nabla.h // See the original file in irrlicht source for authors -#ifdef _NBL_COMPILE_WITH_PLY_LOADER_ - - #include "CPLYMeshFileLoader.h" - -#include - +#include "impl/SBinaryData.h" +#include "impl/SFileAccess.h" +#include "impl/STextParse.h" #include "nbl/asset/IAssetManager.h" - -#include "nbl/system/ISystem.h" +#include "nbl/asset/interchange/SGeometryContentHash.h" +#include "nbl/asset/interchange/SGeometryLoaderCommon.h" +#include "nbl/asset/interchange/SPLYPolygonGeometryAuxLayout.h" +#include "nbl/asset/interchange/SInterchangeIO.h" +#include "nbl/asset/interchange/SLoaderRuntimeTuning.h" +#include "nbl/asset/metadata/CPLYMetadata.h" +#include "nbl/builtin/hlsl/array_accessors.hlsl" +#include "nbl/builtin/hlsl/shapes/AABBAccumulator.hlsl" +#include "nbl/builtin/hlsl/vector_utils/vector_traits.hlsl" +#include "nbl/core/hash/blake.h" #include "nbl/system/IFile.h" - -//#include "nbl/asset/utils/IMeshManipulator.h" - - +#include "nbl/system/ISystem.h" +#include +#include namespace nbl::asset { - -bool CPLYMeshFileLoader::isALoadableFileFormat(system::IFile* _file, const system::logger_opt_ptr logger) const +namespace { - char buf[40]; - - system::IFile::success_t success; - _file->read(success,buf,0,sizeof(buf)); - if (!success) - return false; - - char* header = buf; - if (strncmp(header,"ply",3u)!=0) - return false; - - header += 4; - char* lf = strstr(header,"\n"); - if (!lf) - return false; - - constexpr std::array headers = { - "format ascii 1.0", - "format binary_little_endian 1.0", - "format binary_big_endian 1.0" - }; - return std::find(headers.begin(),headers.end(),std::string_view(header,lf))!=headers.end(); -} - -template -inline T byteswap(const T& v) -{ - T retval; - auto it = reinterpret_cast(&v); - std::reverse_copy(it,it+sizeof(T),reinterpret_cast(&retval)); - return retval; -} - -struct SContext +struct Parse { - - // - struct SProperty + using Binary = impl::BinaryData; + using Common = impl::TextParse; + struct ContentHashBuild { - static E_FORMAT getType(const char* typeString) - { - if (strcmp(typeString, "char")==0 || strcmp(typeString, "int8")==0) - return EF_R8_SINT; - else if (strcmp(typeString, "uchar")==0 || strcmp(typeString, "uint8")==0) - return EF_R8_UINT; - else if (strcmp(typeString, "short")==0 || strcmp(typeString, "int16")==0) - return EF_R16_SINT; - else if (strcmp(typeString, "ushort")==0 || strcmp(typeString, "uint16")==0) - return EF_R16_UINT; - else if (strcmp(typeString, "long")==0 || strcmp(typeString, "int")==0 || strcmp(typeString, "int16")==0) - return EF_R32_SINT; - else if (strcmp(typeString, "ulong")==0 || strcmp(typeString, "uint16")==0) - return EF_R32_UINT; - else if (strcmp(typeString, "float")==0 || strcmp(typeString, "float32")==0) - return EF_R32_SFLOAT; - else if (strcmp(typeString, "double")==0 || strcmp(typeString, "float64")==0) - return EF_R64_SFLOAT; - else - return EF_UNKNOWN; - } - - inline bool isList() const {return type==EF_UNKNOWN && asset::isIntegerFormat(list.countType) && asset::isIntegerFormat(list.itemType);} - - void skip(SContext& _ctx) const + bool enabled = false; + bool inlineHash = false; + core::vector> hashedBuffers = {}; + std::jthread deferredThread = {}; + static inline ContentHashBuild create(const bool enabled, const bool inlineHash) { return {.enabled = enabled, .inlineHash = inlineHash}; } + inline bool hashesInline() const { return enabled && inlineHash; } + inline bool hashesDeferred() const { return enabled && !inlineHash; } + inline void hashNow(ICPUBuffer* const buffer) { - if (isList()) - { - int32_t count = _ctx.getInt(list.countType); - - for (decltype(count) i=0; igetContentHash() != IPreHashed::INVALID_HASH) + return; + for (const auto& hashed : hashedBuffers) + if (hashed.get() == buffer) + return; + buffer->setContentHash(buffer->computeContentHash()); + hashedBuffers.push_back(core::smart_refctd_ptr(buffer)); } - - std::string Name; - E_FORMAT type; - struct SListTypes + inline void tryDefer(ICPUBuffer* const buffer) { - E_FORMAT countType; - E_FORMAT itemType; - } list; - }; - struct SElement - { - void skipElement(SContext& _ctx) const - { - if (_ctx.IsBinaryFile) - { - if (KnownSize) - _ctx.moveForward(KnownSize); - else - for (auto i=0u; igetContentHash() != IPreHashed::INVALID_HASH) + return; + auto keepAlive = core::smart_refctd_ptr(buffer); + deferredThread = std::jthread([buffer = std::move(keepAlive)]() mutable {buffer->setContentHash(buffer->computeContentHash());}); } - - // name of the element. We only want "vertex" and "face" elements - // but we have to parse the others anyway. - std::string Name; - // Properties of this element - core::vector Properties; - // The number of elements in the file - size_t Count; - // known size in bytes, 0 if unknown - uint32_t KnownSize; + inline void wait() { if (deferredThread.joinable()) deferredThread.join(); } }; - - inline void init() + static std::string_view toStringView(const char* text) { - EndPointer = StartPointer = Buffer.data(); - LineEndPointer = EndPointer-1; - - fillBuffer(); + return text ? std::string_view{text} : std::string_view{}; } - - // gets more data from the file - void fillBuffer() + struct Context { - if (EndOfFile) - return; - else if (fileOffset>=inner.mainFile->getSize()) + static constexpr uint64_t ReadWindowPaddingBytes = 1ull; + struct SProperty { - EndOfFile = true; - return; - } - - const auto length = std::distance(StartPointer,EndPointer); - auto newStart = Buffer.data(); - // copy the remaining data to the start of the buffer - if (length && StartPointer!=newStart) - memmove(newStart,StartPointer,length); - // reset start position - StartPointer = newStart; - EndPointer = newStart+length; - - // read data from the file - const size_t requestSize = Buffer.size()-length; - system::IFile::success_t success; - inner.mainFile->read(success,EndPointer,fileOffset,requestSize); - const size_t bytesRead = success.getBytesProcessed(); - fileOffset += bytesRead; - EndPointer += bytesRead; - - // if we didn't completely fill the buffer - if (bytesRead!=requestSize) - { - // cauterize the string - *EndPointer = 0; - EndOfFile = true; - } - } - // Split the string data into a line in place by terminating it instead of copying. - const char* getNextLine() - { - // move the start pointer along - StartPointer = LineEndPointer+1; - - // crlf split across buffer move - if (*StartPointer=='\n') - *(StartPointer++) = '\0'; - - // begin at the start of the next line - const std::array Terminators = { '\0','\r','\n'}; - auto terminator = std::find_first_of(StartPointer,EndPointer,Terminators.begin(),Terminators.end()); - if (terminator!=EndPointer) - *(terminator++) = '\0'; - - // we have reached the end of the buffer - if (terminator==EndPointer) - { - // get data from the file - if (EndOfFile) + static E_FORMAT getType(const char* typeString) { - StartPointer = EndPointer-1; - *StartPointer = '\0'; - return StartPointer; + struct STypeAlias + { + std::string_view name; + E_FORMAT format; + }; + constexpr std::array typeAliases = {{ + {"char", EF_R8_SINT}, + {"int8", EF_R8_SINT}, + {"uchar", EF_R8_UINT}, + {"uint8", EF_R8_UINT}, + {"short", EF_R16_SINT}, + {"int16", EF_R16_SINT}, + {"ushort", EF_R16_UINT}, + {"uint16", EF_R16_UINT}, + {"long", EF_R32_SINT}, + {"int", EF_R32_SINT}, + {"int32", EF_R32_SINT}, + {"ulong", EF_R32_UINT}, + {"uint", EF_R32_UINT}, + {"uint32", EF_R32_UINT}, + {"float", EF_R32_SFLOAT}, + {"float32", EF_R32_SFLOAT} + }}; + const std::string_view typeName = Parse::toStringView(typeString); + for (const auto& alias : typeAliases) + { + if (alias.name == typeName) + return alias.format; + } + if (typeName == "double" || typeName == "float64") + return EF_R64_SFLOAT; + return EF_UNKNOWN; } - else + bool isList() const { - fillBuffer(); - // reset line end pointer - LineEndPointer = StartPointer-1; - if (StartPointer!=EndPointer) - return getNextLine(); - else - return StartPointer; + return type == EF_UNKNOWN && asset::isIntegerFormat(list.countType) && asset::isIntegerFormat(list.itemType); } - } - else - { - LineEndPointer = terminator-1; - WordLength = -1; - // return pointer to the start of the line - return StartPointer; - } - } - // null terminate the next word on the previous line and move the next word pointer along - // since we already have a full line in the buffer, we never need to retrieve more data - const char* getNextWord() - { - // move the start pointer along - StartPointer += WordLength + 1; - if (!*StartPointer) - getNextLine(); - - if (StartPointer==LineEndPointer) - { - WordLength = -1; // - return LineEndPointer; - } - // process the next word - { - assert(LineEndPointer<=EndPointer); - const std::array WhiteSpace = {'\0',' ','\t'}; - auto wordEnd = std::find_first_of(StartPointer,LineEndPointer,WhiteSpace.begin(),WhiteSpace.end()); - // null terminate the next word - if (wordEnd!=LineEndPointer) - *(wordEnd++) = '\0'; - // find next word - auto notWhiteSpace = [WhiteSpace](const char c)->bool + void skip(Context& _ctx) const { - return std::find(WhiteSpace.begin(),WhiteSpace.end(),c)==WhiteSpace.end(); - }; - auto nextWord = std::find_if(wordEnd,LineEndPointer,notWhiteSpace); - WordLength = std::distance(StartPointer,nextWord)-1; - } - // return pointer to the start of current word - return StartPointer; - } - // skips x bytes in the file, getting more data if required - void moveForward(const size_t bytes) - { - assert(IsBinaryFile); - if (StartPointer+bytes>=EndPointer) - fillBuffer(); - - if (StartPointer+bytesEndPointer) - fillBuffer(); - - switch (getTexelOrBlockBytesize(f)) - { - case 1: - if (StartPointer+sizeof(int8_t)>EndPointer) - break; - return *(StartPointer++); - case 2: - { - if (StartPointer+sizeof(int16_t)>EndPointer) - break; - auto retval = *(reinterpret_cast(StartPointer)++); - if (IsWrongEndian) - retval = byteswap(retval); - return retval; - } - case 4: + if (isList()) { - if (StartPointer+sizeof(int32_t)>EndPointer) - break; - auto retval = *(reinterpret_cast(StartPointer)++); - if (IsWrongEndian) - retval = byteswap(retval); - return retval; + int32_t count = _ctx.getInt(list.countType); + for (decltype(count) i = 0; i < count; ++i) + _ctx.getInt(list.itemType); } - default: - assert(false); - break; + else if (_ctx.IsBinaryFile) + _ctx.moveForward(getTexelOrBlockBytesize(type)); + else + _ctx.getNextWord(); } - return 0; - } - return std::atoi(getNextWord()); - } - // read the next float from the file and move the start pointer along - hlsl::float64_t getFloat(const E_FORMAT f) - { - assert(isFloatingPointFormat(f)); - if (IsBinaryFile) + std::string Name; + E_FORMAT type; + struct SListTypes + { + E_FORMAT countType; + E_FORMAT itemType; + } list; + }; + struct SElement { - if (StartPointer+sizeof(hlsl::float64_t)>EndPointer) - fillBuffer(); - - switch (getTexelOrBlockBytesize(f)) + void skipElement(Context& _ctx) const { - case 4: + if (_ctx.IsBinaryFile) { - if (StartPointer+sizeof(hlsl::float32_t)>EndPointer) - break; - auto retval = *(reinterpret_cast(StartPointer)++); - if (IsWrongEndian) - retval = byteswap(retval); - return retval; - } - case 8: - { - if (StartPointer+sizeof(hlsl::float64_t)>EndPointer) - break; - auto retval = *(reinterpret_cast(StartPointer)++); - if (IsWrongEndian) - retval = byteswap(retval); - return retval; + if (KnownSize) + _ctx.moveForward(KnownSize); + else + for (auto i = 0u; i < Properties.size(); ++i) + Properties[i].skip(_ctx); } - default: - assert(false); - break; + else + _ctx.getNextLine(); } - return 0; - } - return std::atoi(getNextWord()); - } - // read the next thing from the file and move the start pointer along - void getData(void* dst, const E_FORMAT f) - { - const auto size = getTexelOrBlockBytesize(f); - if (StartPointer+size>EndPointer) + std::string Name; // name of the element. We only want "vertex" and "face" elements + // but we have to parse the others anyway. + core::vector Properties; // Properties of this element + size_t Count; // The number of elements in the file + uint32_t KnownSize; // known size in bytes, 0 if unknown + }; + static constexpr size_t DefaultIoReadWindowBytes = 50ull << 10; + void init(size_t _ioReadWindowSize = DefaultIoReadWindowBytes) { + ioReadWindowSize = std::max(_ioReadWindowSize, DefaultIoReadWindowBytes); + Buffer.resize(ioReadWindowSize + ReadWindowPaddingBytes, '\0'); + EndPointer = StartPointer = Buffer.data(); + LineEndPointer = EndPointer - 1; fillBuffer(); - if (StartPointer+size>EndPointer) - return; } - if (IsWrongEndian) - std::reverse_copy(StartPointer,StartPointer+size,reinterpret_cast(dst)); - else - memcpy(dst,StartPointer,size); - StartPointer += size; - } - struct SVertAttrIt - { - uint8_t* ptr; - uint32_t stride; - E_FORMAT dstFmt; - }; - inline void readVertex(const IAssetLoader::SAssetLoadParams& _params, const SElement& el) - { - assert(el.Name=="vertex"); - assert(el.Properties.size()==vertAttrIts.size()); - if (!IsBinaryFile) - getNextLine(); - - for (size_t j=0; j= inner.mainFile->getSize()) { - assert(isIntegerFormat(it.dstFmt)==isIntegerFormat(prop.type)); - if (isIntegerFormat(it.dstFmt)) - { - uint64_t tmp = getInt(prop.type); - encodePixels(it.dstFmt,it.ptr,&tmp); - } - else - { - hlsl::float64_t tmp = getFloat(prop.type); - encodePixels(it.dstFmt,it.ptr,&tmp); - } + EndOfFile = true; + return; } - else - getData(it.ptr,prop.type); - // - it.ptr += it.stride; - } - } - bool readFace(const SElement& Element, core::vector& _outIndices) - { - if (!IsBinaryFile) - getNextLine(); - - for (const auto& prop : Element.Properties) - { - if (prop.isList() && (prop.Name=="vertex_indices" || prop.Name == "vertex_index")) + const auto length = std::distance(StartPointer, EndPointer); + auto newStart = Buffer.data(); + // copy the remaining data to the start of the buffer + if (length && StartPointer != newStart) + memmove(newStart, StartPointer, length); + // reset start position + StartPointer = newStart; + EndPointer = newStart + length; + const size_t usableBufferSize = Buffer.size() > 0ull ? Buffer.size() - ReadWindowPaddingBytes : 0ull; + if (usableBufferSize <= length) { - const uint32_t count = getInt(prop.list.countType); - //_NBL_DEBUG_BREAK_IF(count != 3) - const auto srcIndexFmt = prop.list.itemType; - - _outIndices.push_back(getInt(srcIndexFmt)); - _outIndices.push_back(getInt(srcIndexFmt)); - _outIndices.push_back(getInt(srcIndexFmt)); - // TODO: handle varying vertex count faces via variable vertex count geometry collections (PLY loader should be a Geometry Collection loader) - for (auto j=3u; jread(success, EndPointer, fileOffset, requestSize); + const size_t bytesRead = success.getBytesProcessed(); + ++readCallCount; + readBytesTotal += bytesRead; + if (bytesRead < readMinBytes) + readMinBytes = bytesRead; + fileOffset += bytesRead; + EndPointer += bytesRead; + // if we didn't completely fill the buffer + if (bytesRead != requestSize) { - // todo: face intensity - prop.skip(*this); + // cauterize the string + *EndPointer = 0; + EndOfFile = true; } - else - prop.skip(*this); } - return true; - } - - IAssetLoader::SAssetLoadContext inner; - uint32_t topHierarchyLevel; - IAssetLoader::IAssetLoaderOverride* loaderOverride; - // input buffer must be at least twice as long as the longest line in the file - std::array Buffer; // 50kb seems sane to store a line - core::vector ElementList = {}; - char* StartPointer = nullptr, *EndPointer = nullptr, *LineEndPointer = nullptr; - int32_t LineLength = 0; - int32_t WordLength = -1; // this variable is a misnomer, its really the offset to next word minus one - bool IsBinaryFile = false, IsWrongEndian = false, EndOfFile = false; - size_t fileOffset = {}; - // - core::vector vertAttrIts; -}; - -//! creates/loads an animated mesh from the file. -SAssetBundle CPLYMeshFileLoader::loadAsset(system::IFile* _file, const IAssetLoader::SAssetLoadParams& _params, IAssetLoader::IAssetLoaderOverride* _override, uint32_t _hierarchyLevel) -{ - using namespace nbl::core; - if (!_file) - return {}; - - SContext ctx = { - asset::IAssetLoader::SAssetLoadContext{ - _params, - _file - }, - _hierarchyLevel, - _override - }; - ctx.init(); - - // start with empty mesh - auto geometry = make_smart_refctd_ptr(); - uint32_t vertCount=0; - - // Currently only supports ASCII or binary meshes - if (strcmp(ctx.getNextLine(),"ply")) - { - _params.logger.log("Not a valid PLY file %s", system::ILogger::ELL_ERROR,ctx.inner.mainFile->getFileName().string().c_str()); - return {}; - } - - // cut the next line out - ctx.getNextLine(); - // grab the word from this line - const char* word = ctx.getNextWord(); - // ignore comments - for (; strcmp(word,"comment")==0; ctx.getNextLine()) - word = ctx.getNextWord(); - - bool readingHeader = true; - bool continueReading = true; - ctx.IsBinaryFile = false; - ctx.IsWrongEndian= false; - - do - { - if (strcmp(word,"property") == 0) + std::string_view getNextLine() // Split the string data into a line in place by terminating it instead of copying. { - word = ctx.getNextWord(); - - if (ctx.ElementList.empty()) + // move the start pointer along + StartPointer = LineEndPointer + 1; + // crlf split across buffer move + if (StartPointer < EndPointer && *StartPointer == '\n') + *(StartPointer++) = '\0'; + const char* const lineStart = StartPointer; + // begin at the start of the next line + const std::array Terminators = {'\0', '\r', '\n'}; + auto terminator = std::find_first_of(StartPointer, EndPointer, Terminators.begin(), Terminators.end()); + if (terminator != EndPointer) { - _params.logger.log("PLY property token found before element %s", system::ILogger::ELL_WARNING, word); + const char* const lineEnd = terminator; + *(terminator++) = '\0'; + LineEndPointer = terminator - 1; + WordLength = -1; + return std::string_view(lineStart, static_cast(lineEnd - lineStart)); } - else + // we have reached the end of the buffer + if (terminator == EndPointer) { - // get element - auto& el = ctx.ElementList.back(); - - // fill property struct - auto& prop = el.Properties.emplace_back(); - prop.type = prop.getType(word); - if (prop.type==EF_UNKNOWN) + if (EndOfFile) { - el.KnownSize = false; - - word = ctx.getNextWord(); - - prop.list.countType = prop.getType(word); - if (ctx.IsBinaryFile && !isIntegerFormat(prop.list.countType)) - { - _params.logger.log("Cannot read binary PLY file containing data types of unknown or non integer length %s", system::ILogger::ELL_WARNING, word); - continueReading = false; - } - else - { - word = ctx.getNextWord(); - prop.list.itemType = prop.getType(word); - if (ctx.IsBinaryFile && !isIntegerFormat(prop.list.itemType)) - { - _params.logger.log("Cannot read binary PLY file containing data types of unknown or non integer length %s", system::ILogger::ELL_ERROR, word); - continueReading = false; - } - } + StartPointer = EndPointer - 1; + *StartPointer = '\0'; + return {}; } - else if (ctx.IsBinaryFile && prop.type==EF_UNKNOWN) + // get data from the file + fillBuffer(); + // reset line end pointer + LineEndPointer = StartPointer - 1; + return StartPointer != EndPointer ? getNextLine() : std::string_view{}; + } + return {}; + } + const char* getNextWord() // null terminate the next word on the previous line and move the next word pointer along since we already have a full line in the buffer, we never need to retrieve more data + { + // move the start pointer along + StartPointer += WordLength + 1; + if (StartPointer >= EndPointer) + { + if (EndOfFile) { - _params.logger.log("Cannot read binary PLY file containing data types of unknown length %s", system::ILogger::ELL_ERROR, word); - continueReading = false; + WordLength = -1; + return EndPointer; } - else - el.KnownSize += getTexelOrBlockBytesize(prop.type); - - prop.Name = ctx.getNextWord(); + getNextLine(); + } + if (StartPointer < EndPointer && !*StartPointer) + getNextLine(); + if (StartPointer >= LineEndPointer) + { + WordLength = -1; + return StartPointer; } + assert(LineEndPointer <= EndPointer); + // process the next word + const std::array WhiteSpace = {'\0', ' ', '\t'}; + auto wordEnd = std::find_first_of(StartPointer, LineEndPointer, WhiteSpace.begin(), WhiteSpace.end()); + // null terminate the next word + if (wordEnd != LineEndPointer) + *(wordEnd++) = '\0'; + // find next word + auto nextWord = std::find_if(wordEnd, LineEndPointer, [WhiteSpace](const char c) -> bool { return std::find(WhiteSpace.begin(), WhiteSpace.end(), c) == WhiteSpace.end(); }); + WordLength = std::distance(StartPointer, nextWord) - 1; + // return pointer to the start of current word + return StartPointer; } - else if (strcmp(word,"element")==0) + size_t getAbsoluteOffset(const char* ptr) const { - auto& el = ctx.ElementList.emplace_back(); - el.Name = ctx.getNextWord(); - el.Count = atoi(ctx.getNextWord()); - el.KnownSize = 0; - if (el.Name=="vertex") - vertCount = el.Count; + if (!ptr || ptr > EndPointer) + return fileOffset; + const size_t trailingBytes = static_cast(EndPointer - ptr); + return fileOffset >= trailingBytes ? (fileOffset - trailingBytes) : 0ull; } - else if (strcmp(word,"comment")==0) + void useMappedBinaryWindow(const char* data, const size_t sizeBytes) { - // ignore line + if (!data) + return; + StartPointer = const_cast(data); + EndPointer = StartPointer + sizeBytes; + LineEndPointer = StartPointer - 1; + WordLength = -1; + EndOfFile = true; + fileOffset = inner.mainFile ? inner.mainFile->getSize() : fileOffset; } - // must be `format {binary_little_endian|binary_big_endian|ascii} 1.0` - else if (strcmp(word,"format") == 0) + void moveForward(const size_t bytes) // skips x bytes in the file, getting more data if required { - word = ctx.getNextWord(); - - if (strcmp(word, "binary_little_endian") == 0) - { - ctx.IsBinaryFile = true; - } - else if (strcmp(word, "binary_big_endian") == 0) - { - ctx.IsBinaryFile = true; - ctx.IsWrongEndian = true; - } - else if (strcmp(word, "ascii")==0) - { - } - else + assert(IsBinaryFile); + size_t remaining = bytes; + if (remaining == 0ull) + return; + const size_t availableInitially = EndPointer > StartPointer ? static_cast(EndPointer - StartPointer) : 0ull; + if (remaining > availableInitially) { - // abort if this isn't an ascii or a binary mesh - _params.logger.log("Unsupported PLY mesh format %s", system::ILogger::ELL_ERROR, word); - continueReading = false; + remaining -= availableInitially; + StartPointer = EndPointer; + if (remaining > ioReadWindowSize) + { + const size_t fileSize = inner.mainFile->getSize(); + const size_t fileRemaining = fileSize > fileOffset ? (fileSize - fileOffset) : 0ull; + const size_t directSkip = std::min(remaining, fileRemaining); + fileOffset += directSkip; + remaining -= directSkip; + } } - - if (continueReading) + while (remaining) { - word = ctx.getNextWord(); - if (strcmp(word, "1.0")) + if (StartPointer >= EndPointer) { - _params.logger.log("Unsupported PLY mesh version %s",system::ILogger::ELL_WARNING,word); + fillBuffer(); + if (StartPointer >= EndPointer) + return; } + const size_t available = static_cast(EndPointer - StartPointer); + const size_t step = std::min(available, remaining); + StartPointer += step; + remaining -= step; } } - else if (strcmp(word,"end_header")==0) + using widest_int_t = uint32_t; + const char* getCurrentWordEnd(const char* word) const { - readingHeader = false; - if (ctx.IsBinaryFile) - ctx.StartPointer = ctx.LineEndPointer+1; + const size_t tokenLen = WordLength >= 0 ? static_cast(WordLength + 1) : std::char_traits::length(word); + return word + tokenLen; } - else + inline bool ensureBytes(const size_t bytes) { - _params.logger.log("Unknown item in PLY file %s", system::ILogger::ELL_WARNING, word); + if (StartPointer + bytes > EndPointer) + fillBuffer(); + return StartPointer + bytes <= EndPointer; } - - if (readingHeader && continueReading) + template + inline T loadBinaryScalar() { - ctx.getNextLine(); - word = ctx.getNextWord(); + if (!ensureBytes(sizeof(T))) + return T{}; + const auto retval = Binary::loadUnaligned(StartPointer, IsWrongEndian); + StartPointer += sizeof(T); + return retval; } - } - while (readingHeader && continueReading); - - // - if (!continueReading) - return {}; - - // now to read the actual data from the file - using index_t = uint32_t; - core::vector indices = {}; - - // loop through each of the elements - bool verticesProcessed = false; - for (uint32_t i=0; i https://paulbourke.net/dataformats/ply/ + template + inline T parseCurrentWordValue() { - if (verticesProcessed) - { - _params.logger.log("Multiple `vertex` elements not supported!", system::ILogger::ELL_ERROR); - return {}; - } - ICPUPolygonGeometry::SDataViewBase posView = {}, normalView = {}; - for (auto& vertexProperty : el.Properties) + const char* word = getNextWord(); + if (!word) + return T{}; + const char* const wordEnd = getCurrentWordEnd(word); + if (word == wordEnd) + return T{}; + T value = {}; + auto ptr = word; + if (Common::parseNumber(ptr, wordEnd, value) && ptr == wordEnd) + return value; + return ptr != word ? value : T{}; + } + widest_int_t getInt(const E_FORMAT f) // read the next int from the file and move the start pointer along + { + assert(!isFloatingPointFormat(f)); + if (IsBinaryFile) { - const auto& propertyName = vertexProperty.Name; - // only positions and normals need to be structured/canonicalized in any way - auto negotiateFormat = [&vertexProperty](ICPUPolygonGeometry::SDataViewBase& view, const uint8_t component)->void + switch (getTexelOrBlockBytesize(f)) { - assert(getFormatChannelCount(vertexProperty.type)!=0); - if (getTexelOrBlockBytesize(vertexProperty.type)>getTexelOrBlockBytesize(view.format)) - view.format = vertexProperty.type; - view.stride = hlsl::max(view.stride,component); - }; - if (propertyName=="x") - negotiateFormat(posView,0); - else if (propertyName=="y") - negotiateFormat(posView,1); - else if (propertyName=="z") - negotiateFormat(posView,2); - else if (propertyName=="nx") - negotiateFormat(normalView,0); - else if (propertyName=="ny") - negotiateFormat(normalView,1); - else if (propertyName=="nz") - negotiateFormat(normalView,2); - else - { -// TODO: record the `propertyName` - geometry->getAuxAttributeViews()->push_back(createView(vertexProperty.type,el.Count)); + case 1: + if (ensureBytes(sizeof(int8_t))) + return *(StartPointer++); + break; + case 2: return static_cast(loadBinaryScalar()); + case 4: return static_cast(loadBinaryScalar()); + default: + assert(false); + break; } + return 0u; } - auto setFinalFormat = [&ctx](ICPUPolygonGeometry::SDataViewBase& view)->void + return isSignedFormat(f) ? static_cast(parseCurrentWordValue()) : static_cast(parseCurrentWordValue()); + } + hlsl::float64_t getFloat(const E_FORMAT f) // read the next float from the file and move the start pointer along + { + assert(isFloatingPointFormat(f)); + if (IsBinaryFile) { - const auto componentFormat = view.format; - const auto componentCount = view.stride+1; - // turn single channel format to multiple - view.format = [=]()->E_FORMAT + switch (getTexelOrBlockBytesize(f)) { - switch (view.format) - { - case EF_R8_SINT: - switch (componentCount) - { - case 1: - return EF_R8_SINT; - case 2: - return EF_R8G8_SINT; - case 3: - return EF_R8G8B8_SINT; - case 4: - return EF_R8G8B8A8_SINT; - default: - break; - } - break; - case EF_R8_UINT: - switch (componentCount) - { - case 1: - return EF_R8_UINT; - case 2: - return EF_R8G8_UINT; - case 3: - return EF_R8G8B8_UINT; - case 4: - return EF_R8G8B8A8_UINT; - default: - break; - } - break; - case EF_R16_SINT: - switch (componentCount) - { - case 1: - return EF_R16_SINT; - case 2: - return EF_R16G16_SINT; - case 3: - return EF_R16G16B16_SINT; - case 4: - return EF_R16G16B16A16_SINT; - default: - break; - } - break; - case EF_R16_UINT: - switch (componentCount) - { - case 1: - return EF_R16_UINT; - case 2: - return EF_R16G16_UINT; - case 3: - return EF_R16G16B16_UINT; - case 4: - return EF_R16G16B16A16_UINT; - default: - break; - } - break; - case EF_R32_SINT: - switch (componentCount) - { - case 1: - return EF_R32_SINT; - case 2: - return EF_R32G32_SINT; - case 3: - return EF_R32G32B32_SINT; - case 4: - return EF_R32G32B32A32_SINT; - default: - break; - } - break; - case EF_R32_UINT: - switch (componentCount) - { - case 1: - return EF_R32_UINT; - case 2: - return EF_R32G32_UINT; - case 3: - return EF_R32G32B32_UINT; - case 4: - return EF_R32G32B32A32_UINT; - default: - break; - } - break; - case EF_R32_SFLOAT: - switch (componentCount) - { - case 1: - return EF_R32_SFLOAT; - case 2: - return EF_R32G32_SFLOAT; - case 3: - return EF_R32G32B32_SFLOAT; - case 4: - return EF_R32G32B32A32_SFLOAT; - default: - break; - } - break; - case EF_R64_SFLOAT: - switch (componentCount) - { - case 1: - return EF_R64_SFLOAT; - case 2: - return EF_R64G64_SFLOAT; - case 3: - return EF_R64G64B64_SFLOAT; - case 4: - return EF_R64G64B64A64_SFLOAT; - default: - break; - } - break; - default: - break; - } - return EF_UNKNOWN; - }(); - view.stride = getTexelOrBlockBytesize(view.format); - // - for (auto c=0u; c(offset), - .stride = view.stride, - .dstFmt = componentFormat - }); + case 4: return loadBinaryScalar(); + case 8: return loadBinaryScalar(); + default: + assert(false); + break; } - }; - if (posView.format!=EF_UNKNOWN) - { - auto beginIx = ctx.vertAttrIts.size(); - setFinalFormat(posView); - auto view = createView(posView.format,el.Count); - for (const auto size=ctx.vertAttrIts.size(); beginIx!=size; beginIx++) - ctx.vertAttrIts[beginIx].ptr += ptrdiff_t(view.src.buffer->getPointer())+view.src.offset; - geometry->setPositionView(std::move(view)); - } - if (normalView.format!=EF_UNKNOWN) - { - auto beginIx = ctx.vertAttrIts.size(); - setFinalFormat(normalView); - auto view = createView(normalView.format,el.Count); - for (const auto size=ctx.vertAttrIts.size(); beginIx!=size; beginIx++) - ctx.vertAttrIts[beginIx].ptr += ptrdiff_t(view.src.buffer->getPointer())+view.src.offset; - geometry->setNormalView(std::move(view)); + return 0.0; } - // - for (auto& view : *geometry->getAuxAttributeViews()) - ctx.vertAttrIts.push_back({ - .ptr = reinterpret_cast(view.src.buffer->getPointer())+view.src.offset, - .stride = getTexelOrBlockBytesize(view.composed.format), - .dstFmt = view.composed.format - }); - // loop through vertex properties - ctx.readVertex(_params,el); - verticesProcessed = true; + return parseCurrentWordValue(); } - else if (el.Name=="face") + void getData(void* dst, const E_FORMAT f) // read the next thing from the file and move the start pointer along { - for (size_t j=0; j(dst)); + else + memcpy(dst, StartPointer, size); + StartPointer += size; } - } - - // do before indices so we don't compute their stuff again - CPolygonGeometryManipulator::recomputeContentHashes(geometry.get()); - CPolygonGeometryManipulator::recomputeRanges(geometry.get()); - - if (indices.empty()) - { - // no index buffer means point cloud - geometry->setIndexing(IPolygonGeometryBase::PointList()); - } - else - { - geometry->setIndexing(IPolygonGeometryBase::TriangleList()); - auto view = IGeometryLoader::createView(EF_R32_UINT,indices.size(),indices.data()); - geometry->setIndexView(std::move(view)); - } - - CPolygonGeometryManipulator::recomputeAABB(geometry.get()); - - auto meta = core::make_smart_refctd_ptr(); - return SAssetBundle(std::move(meta),{std::move(geometry)}); + struct SVertAttrIt { + uint8_t* ptr; + uint32_t stride; + E_FORMAT dstFmt; + }; + enum class EFastVertexReadResult : uint8_t { + NotApplicable, + Success, + Error + }; + EFastVertexReadResult readVertexElementFast( + const SElement& el, + hlsl::shapes::util::AABBAccumulator3* parsedAABB) { + if (!IsBinaryFile || el.Name != "vertex") + return EFastVertexReadResult::NotApplicable; + enum class ELayoutKind : uint8_t { XYZ, XYZ_N, XYZ_N_UV }; + auto allF32 = [&el]()->bool { + for (const auto& prop : el.Properties) + if (prop.type != EF_R32_SFLOAT) + return false; + return true; + }; + if (!allF32()) + return EFastVertexReadResult::NotApplicable; + auto matchNames = [&el](std::initializer_list names)->bool { + if (el.Properties.size() != names.size()) + return false; + size_t i = 0ull; + for (const auto* name : names) + { + if (el.Properties[i].Name != name) + return false; + ++i; + } + return true; + }; + ELayoutKind layout = ELayoutKind::XYZ; + if (matchNames({"x", "y", "z"})) + layout = ELayoutKind::XYZ; + else if (matchNames({"x", "y", "z", "nx", "ny", "nz"})) + layout = ELayoutKind::XYZ_N; + else if (matchNames({"x", "y", "z", "nx", "ny", "nz", "u", "v"}) || + matchNames({"x", "y", "z", "nx", "ny", "nz", "s", "t"})) + layout = ELayoutKind::XYZ_N_UV; + else + return EFastVertexReadResult::NotApplicable; + const size_t floatBytes = sizeof(hlsl::float32_t); + auto validateTuple = [&](const size_t beginIx, const size_t componentCount, uint32_t& outStride, uint8_t*& outBase)->bool { + if (beginIx + componentCount > vertAttrIts.size()) + return false; + auto& first = vertAttrIts[beginIx]; + if (!first.ptr || first.dstFmt != EF_R32_SFLOAT) + return false; + outStride = first.stride; + outBase = first.ptr; + for (size_t c = 1ull; c < componentCount; ++c) + { + auto& it = vertAttrIts[beginIx + c]; + if (!it.ptr || it.dstFmt != EF_R32_SFLOAT) + return false; + if (it.stride != outStride) + return false; + if (it.ptr != outBase + c * floatBytes) + return false; + } + return true; + }; + uint32_t posStride = 0u, normalStride = 0u, uvStride = 0u; + uint8_t* posBase = nullptr; + uint8_t* normalBase = nullptr; + uint8_t* uvBase = nullptr; + switch (layout) + { + case ELayoutKind::XYZ: + if (vertAttrIts.size() != 3u || !validateTuple(0u, 3u, posStride, posBase)) + return EFastVertexReadResult::NotApplicable; + break; + case ELayoutKind::XYZ_N: + if (vertAttrIts.size() != 6u) + return EFastVertexReadResult::NotApplicable; + if (!validateTuple(0u, 3u, posStride, posBase) || !validateTuple(3u, 3u, normalStride, normalBase)) + return EFastVertexReadResult::NotApplicable; + break; + case ELayoutKind::XYZ_N_UV: + if (vertAttrIts.size() != 8u) + return EFastVertexReadResult::NotApplicable; + if (!validateTuple(0u, 3u, posStride, posBase) || !validateTuple(3u, 3u, normalStride, normalBase) || !validateTuple(6u, 2u, uvStride, uvBase)) + return EFastVertexReadResult::NotApplicable; + break; + } + const size_t srcBytesPerVertex = [layout]()->size_t { + switch (layout) + { + case ELayoutKind::XYZ: return sizeof(hlsl::float32_t) * 3ull; + case ELayoutKind::XYZ_N: return sizeof(hlsl::float32_t) * 6ull; + case ELayoutKind::XYZ_N_UV: return sizeof(hlsl::float32_t) * 8ull; + default: return 0ull; + } + }(); + if (srcBytesPerVertex == 0ull || el.Count > (std::numeric_limits::max() / srcBytesPerVertex)) + return EFastVertexReadResult::Error; + const bool trackAABB = parsedAABB != nullptr; + const bool needsByteSwap = IsWrongEndian; + auto decodeF32 = [needsByteSwap](const uint8_t* src)->float { + uint32_t bits = 0u; + std::memcpy(&bits, src, sizeof(bits)); + if (needsByteSwap) + bits = Binary::byteswap(bits); + float value = 0.f; + std::memcpy(&value, &bits, sizeof(value)); + return value; + }; + size_t remainingVertices = el.Count; + while (remainingVertices > 0ull) + { + if (StartPointer + srcBytesPerVertex > EndPointer) + fillBuffer(); + const size_t available = EndPointer > StartPointer ? static_cast(EndPointer - StartPointer) : 0ull; + if (available < srcBytesPerVertex) + return EFastVertexReadResult::Error; + const size_t batchVertices = std::min(remainingVertices, available / srcBytesPerVertex); + const uint8_t* src = reinterpret_cast(StartPointer); + switch (layout) + { + case ELayoutKind::XYZ: + { + if (posStride == 3ull * floatBytes) + { + const size_t batchBytes = batchVertices * 3ull * floatBytes; + if (trackAABB && batchVertices >= (1ull << 20)) + { + const size_t hw = SLoaderRuntimeTuner::resolveHardwareThreads(); + const size_t hardMaxWorkers = SLoaderRuntimeTuner::resolveHardMaxWorkers(hw, inner.params.ioPolicy.runtimeTuning.workerHeadroom); + SLoaderRuntimeTuningRequest vertexTuningRequest = {}; + vertexTuningRequest.inputBytes = batchBytes; + vertexTuningRequest.totalWorkUnits = batchVertices; + vertexTuningRequest.minBytesPerWorker = 3ull * floatBytes; + vertexTuningRequest.hardwareThreads = static_cast(hw); + vertexTuningRequest.hardMaxWorkers = static_cast(hardMaxWorkers); + vertexTuningRequest.targetChunksPerWorker = inner.params.ioPolicy.runtimeTuning.targetChunksPerWorker; + vertexTuningRequest.sampleData = reinterpret_cast(src); + vertexTuningRequest.sampleBytes = SLoaderRuntimeTuner::resolveSampleBytes(inner.params.ioPolicy, batchBytes); + const auto vertexTuning = SLoaderRuntimeTuner::tune(inner.params.ioPolicy, vertexTuningRequest); + const size_t workerCount = std::min(vertexTuning.workerCount, batchVertices); + if (workerCount > 1ull) + { + struct SAABBRange { float minX = std::numeric_limits::max(); float minY = std::numeric_limits::max(); float minZ = std::numeric_limits::max(); float maxX = std::numeric_limits::lowest(); float maxY = std::numeric_limits::lowest(); float maxZ = std::numeric_limits::lowest(); }; + std::vector workerRanges(workerCount); + uint8_t* dstBase = posBase; + SLoaderRuntimeTuner::dispatchWorkers(workerCount, [&](const size_t workerIx) { + const size_t begin = (batchVertices * workerIx) / workerCount; + const size_t end = (batchVertices * (workerIx + 1ull)) / workerCount; + const size_t count = end - begin; + if (count == 0ull) + return; + auto& range = workerRanges[workerIx]; + const uint8_t* inBytes = src + begin * 3ull * floatBytes; + float* outFloats = reinterpret_cast(dstBase + begin * 3ull * floatBytes); + if (!needsByteSwap) + { + std::memcpy(outFloats, inBytes, count * 3ull * floatBytes); + const float* xyz = reinterpret_cast(inBytes); + for (size_t v = 0ull; v < count; ++v) + { + const float x = xyz[v * 3ull + 0ull]; + const float y = xyz[v * 3ull + 1ull]; + const float z = xyz[v * 3ull + 2ull]; + if (x < range.minX) range.minX = x; + if (y < range.minY) range.minY = y; + if (z < range.minZ) range.minZ = z; + if (x > range.maxX) range.maxX = x; + if (y > range.maxY) range.maxY = y; + if (z > range.maxZ) range.maxZ = z; + } + } + else + { + for (size_t v = 0ull; v < count; ++v) + { + uint32_t xb = 0u, yb = 0u, zb = 0u; + std::memcpy(&xb, inBytes + 0ull * floatBytes, sizeof(xb)); + std::memcpy(&yb, inBytes + 1ull * floatBytes, sizeof(yb)); + std::memcpy(&zb, inBytes + 2ull * floatBytes, sizeof(zb)); + xb = Binary::byteswap(xb); + yb = Binary::byteswap(yb); + zb = Binary::byteswap(zb); + float x = 0.f, y = 0.f, z = 0.f; + std::memcpy(&x, &xb, sizeof(x)); + std::memcpy(&y, &yb, sizeof(y)); + std::memcpy(&z, &zb, sizeof(z)); + outFloats[0] = x; + outFloats[1] = y; + outFloats[2] = z; + if (x < range.minX) range.minX = x; + if (y < range.minY) range.minY = y; + if (z < range.minZ) range.minZ = z; + if (x > range.maxX) range.maxX = x; + if (y > range.maxY) range.maxY = y; + if (z > range.maxZ) range.maxZ = z; + inBytes += 3ull * floatBytes; + outFloats += 3ull; + } + } + }); + auto& aabb = parsedAABB->value; + for (const auto& range : workerRanges) + { + if (range.minX < aabb.minVx.x) aabb.minVx.x = range.minX; + if (range.minY < aabb.minVx.y) aabb.minVx.y = range.minY; + if (range.minZ < aabb.minVx.z) aabb.minVx.z = range.minZ; + if (range.maxX > aabb.maxVx.x) aabb.maxVx.x = range.maxX; + if (range.maxY > aabb.maxVx.y) aabb.maxVx.y = range.maxY; + if (range.maxZ > aabb.maxVx.z) aabb.maxVx.z = range.maxZ; + } + src += batchBytes; + posBase += batchBytes; + break; + } + } + if (!needsByteSwap) + { + std::memcpy(posBase, src, batchBytes); + if (trackAABB) + { + const float* xyz = reinterpret_cast(src); + auto& aabb = parsedAABB->value; + for (size_t v = 0ull; v < batchVertices; ++v) + { + const float x = xyz[v * 3ull + 0ull]; + const float y = xyz[v * 3ull + 1ull]; + const float z = xyz[v * 3ull + 2ull]; + if (x < aabb.minVx.x) aabb.minVx.x = x; + if (y < aabb.minVx.y) aabb.minVx.y = y; + if (z < aabb.minVx.z) aabb.minVx.z = z; + if (x > aabb.maxVx.x) aabb.maxVx.x = x; + if (y > aabb.maxVx.y) aabb.maxVx.y = y; + if (z > aabb.maxVx.z) aabb.maxVx.z = z; + } + } + src += batchBytes; + posBase += batchBytes; + } + else + { + for (size_t v = 0ull; v < batchVertices; ++v) + { + const float x = decodeF32(src + 0ull * floatBytes); + const float y = decodeF32(src + 1ull * floatBytes); + const float z = decodeF32(src + 2ull * floatBytes); + reinterpret_cast(posBase)[0] = x; + reinterpret_cast(posBase)[1] = y; + reinterpret_cast(posBase)[2] = z; + if (trackAABB) + hlsl::shapes::util::extendAABBAccumulator(*parsedAABB, x, y, z); + src += 3ull * floatBytes; + posBase += posStride; + } + } + } + else + { + for (size_t v = 0ull; v < batchVertices; ++v) + { + const float x = decodeF32(src + 0ull * floatBytes); + const float y = decodeF32(src + 1ull * floatBytes); + const float z = decodeF32(src + 2ull * floatBytes); + reinterpret_cast(posBase)[0] = x; + reinterpret_cast(posBase)[1] = y; + reinterpret_cast(posBase)[2] = z; + if (trackAABB) + hlsl::shapes::util::extendAABBAccumulator(*parsedAABB, x, y, z); + src += 3ull * floatBytes; + posBase += posStride; + } + } + } + break; + case ELayoutKind::XYZ_N: + { + for (size_t v = 0ull; v < batchVertices; ++v) + { + const float x = decodeF32(src + 0ull * floatBytes); + const float y = decodeF32(src + 1ull * floatBytes); + const float z = decodeF32(src + 2ull * floatBytes); + reinterpret_cast(posBase)[0] = x; + reinterpret_cast(posBase)[1] = y; + reinterpret_cast(posBase)[2] = z; + if (trackAABB) + hlsl::shapes::util::extendAABBAccumulator(*parsedAABB, hlsl::float32_t3(x, y, z)); + src += 3ull * floatBytes; + posBase += posStride; + reinterpret_cast(normalBase)[0] = decodeF32(src + 0ull * floatBytes); + reinterpret_cast(normalBase)[1] = decodeF32(src + 1ull * floatBytes); + reinterpret_cast(normalBase)[2] = decodeF32(src + 2ull * floatBytes); + src += 3ull * floatBytes; + normalBase += normalStride; + } + } + break; + case ELayoutKind::XYZ_N_UV: + { + for (size_t v = 0ull; v < batchVertices; ++v) + { + const float x = decodeF32(src + 0ull * floatBytes); + const float y = decodeF32(src + 1ull * floatBytes); + const float z = decodeF32(src + 2ull * floatBytes); + reinterpret_cast(posBase)[0] = x; + reinterpret_cast(posBase)[1] = y; + reinterpret_cast(posBase)[2] = z; + if (trackAABB) + hlsl::shapes::util::extendAABBAccumulator(*parsedAABB, hlsl::float32_t3(x, y, z)); + src += 3ull * floatBytes; + posBase += posStride; + reinterpret_cast(normalBase)[0] = decodeF32(src + 0ull * floatBytes); + reinterpret_cast(normalBase)[1] = decodeF32(src + 1ull * floatBytes); + reinterpret_cast(normalBase)[2] = decodeF32(src + 2ull * floatBytes); + src += 3ull * floatBytes; + normalBase += normalStride; + reinterpret_cast(uvBase)[0] = decodeF32(src + 0ull * floatBytes); + reinterpret_cast(uvBase)[1] = decodeF32(src + 1ull * floatBytes); + src += 2ull * floatBytes; + uvBase += uvStride; + } + } + } + const size_t consumed = batchVertices * srcBytesPerVertex; + StartPointer += consumed; + remainingVertices -= batchVertices; + } + const size_t posAdvance = el.Count * posStride; + vertAttrIts[0].ptr += posAdvance; + vertAttrIts[1].ptr += posAdvance; + vertAttrIts[2].ptr += posAdvance; + if (layout == ELayoutKind::XYZ_N || layout == ELayoutKind::XYZ_N_UV) + { + const size_t normalAdvance = el.Count * normalStride; + vertAttrIts[3].ptr += normalAdvance; + vertAttrIts[4].ptr += normalAdvance; + vertAttrIts[5].ptr += normalAdvance; + } + if (layout == ELayoutKind::XYZ_N_UV) + { + const size_t uvAdvance = el.Count * uvStride; + vertAttrIts[6].ptr += uvAdvance; + vertAttrIts[7].ptr += uvAdvance; + } + return EFastVertexReadResult::Success; + } + void readVertex(const IAssetLoader::SAssetLoadParams& _params, + const SElement& el) { + assert(el.Name == "vertex"); + assert(el.Properties.size() == vertAttrIts.size()); + if (!IsBinaryFile) + getNextLine(); + for (size_t j = 0; j < el.Count; ++j) + for (auto i = 0u; i < vertAttrIts.size(); i++) { + const auto& prop = el.Properties[i]; + auto& it = vertAttrIts[i]; + if (!it.ptr) { + prop.skip(*this); + continue; + } + if (!IsBinaryFile) { + if (isIntegerFormat(prop.type)) { + uint64_t tmp = getInt(prop.type); + encodePixels(it.dstFmt, it.ptr, &tmp); + } else { + hlsl::float64_t tmp = getFloat(prop.type); + encodePixels(it.dstFmt, it.ptr, &tmp); + } + } else if (it.dstFmt != prop.type) { + assert(isIntegerFormat(it.dstFmt) == isIntegerFormat(prop.type)); + if (isIntegerFormat(it.dstFmt)) { + uint64_t tmp = getInt(prop.type); + encodePixels(it.dstFmt, it.ptr, &tmp); + } else { + hlsl::float64_t tmp = getFloat(prop.type); + encodePixels(it.dstFmt, it.ptr, &tmp); + } + } else + getData(it.ptr, prop.type); + // + it.ptr += it.stride; + } + } + bool readFace(const SElement& Element, core::vector& _outIndices, + uint32_t& _maxIndex, const uint32_t vertexCount) { + if (!IsBinaryFile) + getNextLine(); + const bool hasVertexCount = vertexCount != 0u; + for (const auto& prop : Element.Properties) { + if (prop.isList() && + (prop.Name == "vertex_indices" || prop.Name == "vertex_index")) { + const uint32_t count = getInt(prop.list.countType); + const auto srcIndexFmt = prop.list.itemType; + if (count < 3u) { + for (uint32_t j = 0u; j < count; ++j) + getInt(srcIndexFmt); + continue; + } + if (count > 3u) + _outIndices.reserve(_outIndices.size() + + static_cast(count - 2u) * 3ull); + auto emitFan = [&_outIndices, &_maxIndex, hasVertexCount, + vertexCount](auto&& readIndex, + const uint32_t faceVertexCount) -> bool { + uint32_t i0 = readIndex(); + uint32_t i1 = readIndex(); + uint32_t i2 = readIndex(); + if (hasVertexCount) { + if (i0 >= vertexCount || i1 >= vertexCount || i2 >= vertexCount) + return false; + } else { + _maxIndex = std::max(_maxIndex, std::max(i0, std::max(i1, i2))); + } + _outIndices.push_back(i0); + _outIndices.push_back(i1); + _outIndices.push_back(i2); + uint32_t prev = i2; + for (uint32_t j = 3u; j < faceVertexCount; ++j) { + const uint32_t idx = readIndex(); + if (hasVertexCount) { + if (idx >= vertexCount) + return false; + } else { + _maxIndex = std::max(_maxIndex, idx); + } + _outIndices.push_back(i0); + _outIndices.push_back(prev); + _outIndices.push_back(idx); + prev = idx; + } + return true; + }; + auto tryReadContiguousFan = [&]() -> bool { + const size_t bytesNeeded = static_cast(count) * sizeof(T); + if (!ensureBytes(bytesNeeded)) + return false; + const uint8_t* ptr = reinterpret_cast(StartPointer); + auto readIndex = [&ptr]() -> uint32_t { + T v = {}; + std::memcpy(&v, ptr, sizeof(v)); + ptr += sizeof(v); + return static_cast(v); + }; + if (!emitFan(readIndex, count)) + return false; + StartPointer = reinterpret_cast(const_cast(ptr)); + return true; + }; + if (IsBinaryFile && !IsWrongEndian && srcIndexFmt == EF_R32_UINT && tryReadContiguousFan.template operator()()) + continue; + if (IsBinaryFile && !IsWrongEndian && srcIndexFmt == EF_R16_UINT && tryReadContiguousFan.template operator()()) + continue; + auto readIndex = [&]() -> uint32_t { + return static_cast(getInt(srcIndexFmt)); + }; + if (!emitFan(readIndex, count)) + return false; + } else if (prop.Name == "intensity") { + // todo: face intensity + prop.skip(*this); + } else + prop.skip(*this); + } + return true; + } + enum class EFastFaceReadResult : uint8_t { NotApplicable, + Success, + Error }; + EFastFaceReadResult readFaceElementFast( + const SElement& element, core::vector& _outIndices, + uint32_t& _maxIndex, uint64_t& _faceCount, const uint32_t vertexCount, + const bool computeIndexHash, core::blake3_hash_t& outIndexHash) { + if (!IsBinaryFile) + return EFastFaceReadResult::NotApplicable; + if (element.Properties.size() != 1u) + return EFastFaceReadResult::NotApplicable; + const auto& prop = element.Properties[0]; + if (!prop.isList() || + (prop.Name != "vertex_indices" && prop.Name != "vertex_index")) + return EFastFaceReadResult::NotApplicable; + if (prop.list.countType != EF_R8_UINT) + return EFastFaceReadResult::NotApplicable; + const E_FORMAT srcIndexFmt = prop.list.itemType; + const bool isSrcU32 = srcIndexFmt == EF_R32_UINT; + const bool isSrcS32 = srcIndexFmt == EF_R32_SINT; + const bool isSrcU16 = srcIndexFmt == EF_R16_UINT; + const bool isSrcS16 = srcIndexFmt == EF_R16_SINT; + if (!isSrcU32 && !isSrcS32 && !isSrcU16 && !isSrcS16) + return EFastFaceReadResult::NotApplicable; + const bool is32Bit = isSrcU32 || isSrcS32; + const bool needEndianSwap = IsWrongEndian; + const size_t indexSize = is32Bit ? sizeof(uint32_t) : sizeof(uint16_t); + const bool hasVertexCount = vertexCount != 0u; + const bool trackMaxIndex = !hasVertexCount; + outIndexHash = IPreHashed::INVALID_HASH; + const size_t minTriangleRecordSize = sizeof(uint8_t) + indexSize * 3u; + if (element.Count > + (std::numeric_limits::max() / minTriangleRecordSize)) + return EFastFaceReadResult::Error; + const size_t minBytesNeeded = element.Count * minTriangleRecordSize; + if (StartPointer + minBytesNeeded <= EndPointer) { + if (element.Count > (std::numeric_limits::max() / 3u)) + return EFastFaceReadResult::Error; + const size_t triIndices = element.Count * 3u; + if (_outIndices.size() > + (std::numeric_limits::max() - triIndices)) + return EFastFaceReadResult::Error; + const size_t oldSize = _outIndices.size(); + const uint32_t oldMaxIndex = _maxIndex; + _outIndices.resize(oldSize + triIndices); + uint32_t* out = _outIndices.data() + oldSize; + const uint8_t* ptr = reinterpret_cast(StartPointer); + auto readU32 = [needEndianSwap](const uint8_t* src) -> uint32_t { + uint32_t value = 0u; + std::memcpy(&value, src, sizeof(value)); + if (needEndianSwap) + value = Binary::byteswap(value); + return value; + }; + auto readU16 = [needEndianSwap](const uint8_t* src) -> uint16_t { + uint16_t value = 0u; + std::memcpy(&value, src, sizeof(value)); + if (needEndianSwap) + value = Binary::byteswap(value); + return value; + }; + if (is32Bit) { + const size_t hw = SLoaderRuntimeTuner::resolveHardwareThreads(); + const size_t hardMaxWorkers = + SLoaderRuntimeTuner::resolveHardMaxWorkers( + hw, inner.params.ioPolicy.runtimeTuning.workerHeadroom); + const size_t recordBytes = sizeof(uint8_t) + 3ull * sizeof(uint32_t); + SLoaderRuntimeTuningRequest faceTuningRequest = {}; + faceTuningRequest.inputBytes = minBytesNeeded; + faceTuningRequest.totalWorkUnits = element.Count; + faceTuningRequest.minBytesPerWorker = recordBytes; + faceTuningRequest.hardwareThreads = static_cast(hw); + faceTuningRequest.hardMaxWorkers = + static_cast(hardMaxWorkers); + faceTuningRequest.targetChunksPerWorker = + inner.params.ioPolicy.runtimeTuning.targetChunksPerWorker; + faceTuningRequest.sampleData = ptr; + faceTuningRequest.sampleBytes = + SLoaderRuntimeTuner::resolveSampleBytes(inner.params.ioPolicy, + minBytesNeeded); + const auto faceTuning = SLoaderRuntimeTuner::tune( + inner.params.ioPolicy, faceTuningRequest); + size_t workerCount = std::min(faceTuning.workerCount, element.Count); + if (workerCount > 1ull) { + const bool needMax = trackMaxIndex; + const bool validateAgainstVertexCount = hasVertexCount; + std::vector workerNonTriangle(workerCount, 0u); + std::vector workerInvalid(workerCount, 0u); + std::vector workerMax(needMax ? workerCount : 0ull, 0u); + const bool hashInParsePipeline = computeIndexHash; + std::vector workerReady( + hashInParsePipeline ? workerCount : 0ull, 0u); + std::vector workerHashable( + hashInParsePipeline ? workerCount : 0ull, 1u); + std::atomic_bool hashPipelineOk = true; + core::blake3_hash_t parsedIndexHash = IPreHashed::INVALID_HASH; + std::jthread hashThread; + if (hashInParsePipeline) { + hashThread = std::jthread([&]() { + try { + core::blake3_hasher hasher; + for (size_t workerIx = 0ull; workerIx < workerCount; + ++workerIx) { + auto ready = + std::atomic_ref(workerReady[workerIx]); + while (ready.load(std::memory_order_acquire) == 0u) + ready.wait(0u, std::memory_order_acquire); + if (workerHashable[workerIx] == 0u) { + hashPipelineOk.store(false, std::memory_order_relaxed); + return; + } + const size_t begin = + (element.Count * workerIx) / workerCount; + const size_t end = + (element.Count * (workerIx + 1ull)) / workerCount; + const size_t faceCount = end - begin; + hasher.update(out + begin * 3ull, + faceCount * 3ull * sizeof(uint32_t)); + } + parsedIndexHash = static_cast(hasher); + } catch (...) { + hashPipelineOk.store(false, std::memory_order_relaxed); + } + }); + } + auto parseChunk = [&](const size_t workerIx, const size_t beginFace, + const size_t endFace) -> void { + const uint8_t* in = ptr + beginFace * recordBytes; + uint32_t* outLocal = out + beginFace * 3ull; + uint32_t localMax = 0u; + for (size_t faceIx = beginFace; faceIx < endFace; ++faceIx) { + if (*in != 3u) { + workerNonTriangle[workerIx] = 1u; + if (hashInParsePipeline) + workerHashable[workerIx] = 0u; + break; + } + ++in; + const uint32_t i0 = readU32(in + 0ull * sizeof(uint32_t)); + const uint32_t i1 = readU32(in + 1ull * sizeof(uint32_t)); + const uint32_t i2 = readU32(in + 2ull * sizeof(uint32_t)); + outLocal[0] = i0; + outLocal[1] = i1; + outLocal[2] = i2; + const uint32_t triOr = i0 | i1 | i2; + if (isSrcS32 && (triOr & 0x80000000u)) { + workerInvalid[workerIx] = 1u; + if (hashInParsePipeline) + workerHashable[workerIx] = 0u; + break; + } + if (validateAgainstVertexCount) { + if (i0 >= vertexCount || i1 >= vertexCount || i2 >= vertexCount) { + workerInvalid[workerIx] = 1u; + if (hashInParsePipeline) + workerHashable[workerIx] = 0u; + break; + } + } else if (needMax) { + if (i0 > localMax) localMax = i0; + if (i1 > localMax) localMax = i1; + if (i2 > localMax) localMax = i2; + } + in += 3ull * sizeof(uint32_t); + outLocal += 3ull; + } + if (needMax) + workerMax[workerIx] = localMax; + if (hashInParsePipeline) { + auto ready = std::atomic_ref(workerReady[workerIx]); + ready.store(1u, std::memory_order_release); + ready.notify_one(); + } + }; + SLoaderRuntimeTuner::dispatchWorkers( + workerCount, [&](const size_t workerIx) { + const size_t begin = (element.Count * workerIx) / workerCount; + const size_t end = + (element.Count * (workerIx + 1ull)) / workerCount; + parseChunk(workerIx, begin, end); + }); + if (hashThread.joinable()) + hashThread.join(); + const bool anyNonTriangle = + std::any_of(workerNonTriangle.begin(), workerNonTriangle.end(), + [](const uint8_t v) { return v != 0u; }); + if (anyNonTriangle) { + _outIndices.resize(oldSize); + _maxIndex = oldMaxIndex; + return EFastFaceReadResult::NotApplicable; + } + const bool anyInvalid = + std::any_of(workerInvalid.begin(), workerInvalid.end(), + [](const uint8_t v) { return v != 0u; }); + if (anyInvalid) { + _outIndices.resize(oldSize); + _maxIndex = oldMaxIndex; + return EFastFaceReadResult::Error; + } + if (trackMaxIndex) { + for (const uint32_t local : workerMax) + if (local > _maxIndex) + _maxIndex = local; + } + if (hashInParsePipeline && + hashPipelineOk.load(std::memory_order_relaxed)) + outIndexHash = parsedIndexHash; + StartPointer = reinterpret_cast( + const_cast(ptr + element.Count * recordBytes)); + _faceCount += element.Count; + return EFastFaceReadResult::Success; + } + } + if (is32Bit) + { + if (isSrcU32) + { + if (trackMaxIndex) + { + for (size_t j = 0u; j < element.Count; ++j) + { + const uint8_t c = *ptr++; + if (c != 3u) + return EFastFaceReadResult::NotApplicable; + out[0] = readU32(ptr + 0ull * sizeof(uint32_t)); + out[1] = readU32(ptr + 1ull * sizeof(uint32_t)); + out[2] = readU32(ptr + 2ull * sizeof(uint32_t)); + ptr += 3ull * sizeof(uint32_t); + if (out[0] > _maxIndex) _maxIndex = out[0]; + if (out[1] > _maxIndex) _maxIndex = out[1]; + if (out[2] > _maxIndex) _maxIndex = out[2]; + out += 3u; + } + } + else + { + for (size_t j = 0u; j < element.Count; ++j) + { + const uint8_t c = *ptr++; + if (c != 3u) + return EFastFaceReadResult::NotApplicable; + out[0] = readU32(ptr + 0ull * sizeof(uint32_t)); + out[1] = readU32(ptr + 1ull * sizeof(uint32_t)); + out[2] = readU32(ptr + 2ull * sizeof(uint32_t)); + ptr += 3ull * sizeof(uint32_t); + if (out[0] >= vertexCount || out[1] >= vertexCount || out[2] >= vertexCount) + return EFastFaceReadResult::Error; + out += 3u; + } + } + } + else + { + for (size_t j = 0u; j < element.Count; ++j) + { + const uint8_t c = *ptr++; + if (c != 3u) + return EFastFaceReadResult::NotApplicable; + out[0] = readU32(ptr + 0ull * sizeof(uint32_t)); + out[1] = readU32(ptr + 1ull * sizeof(uint32_t)); + out[2] = readU32(ptr + 2ull * sizeof(uint32_t)); + ptr += 3ull * sizeof(uint32_t); + if ((out[0] | out[1] | out[2]) & 0x80000000u) + return EFastFaceReadResult::Error; + if (trackMaxIndex) + { + if (out[0] > _maxIndex) _maxIndex = out[0]; + if (out[1] > _maxIndex) _maxIndex = out[1]; + if (out[2] > _maxIndex) _maxIndex = out[2]; + } + else if (out[0] >= vertexCount || out[1] >= vertexCount || out[2] >= vertexCount) + return EFastFaceReadResult::Error; + out += 3u; + } + } + } + else + { + if (isSrcU16) + { + if (trackMaxIndex) + { + for (size_t j = 0u; j < element.Count; ++j) + { + const uint8_t c = *ptr++; + if (c != 3u) + return EFastFaceReadResult::NotApplicable; + out[0] = readU16(ptr + 0ull * sizeof(uint16_t)); + out[1] = readU16(ptr + 1ull * sizeof(uint16_t)); + out[2] = readU16(ptr + 2ull * sizeof(uint16_t)); + ptr += 3ull * sizeof(uint16_t); + if (out[0] > _maxIndex) _maxIndex = out[0]; + if (out[1] > _maxIndex) _maxIndex = out[1]; + if (out[2] > _maxIndex) _maxIndex = out[2]; + out += 3u; + } + } + else + { + for (size_t j = 0u; j < element.Count; ++j) + { + const uint8_t c = *ptr++; + if (c != 3u) + return EFastFaceReadResult::NotApplicable; + out[0] = readU16(ptr + 0ull * sizeof(uint16_t)); + out[1] = readU16(ptr + 1ull * sizeof(uint16_t)); + out[2] = readU16(ptr + 2ull * sizeof(uint16_t)); + ptr += 3ull * sizeof(uint16_t); + if (out[0] >= vertexCount || out[1] >= vertexCount || out[2] >= vertexCount) + return EFastFaceReadResult::Error; + out += 3u; + } + } + } + else + { + for (size_t j = 0u; j < element.Count; ++j) + { + const uint8_t c = *ptr++; + if (c != 3u) + return EFastFaceReadResult::NotApplicable; + out[0] = readU16(ptr + 0ull * sizeof(uint16_t)); + out[1] = readU16(ptr + 1ull * sizeof(uint16_t)); + out[2] = readU16(ptr + 2ull * sizeof(uint16_t)); + ptr += 3ull * sizeof(uint16_t); + if ((out[0] | out[1] | out[2]) & 0x8000u) + return EFastFaceReadResult::Error; + if (trackMaxIndex) + { + if (out[0] > _maxIndex) _maxIndex = out[0]; + if (out[1] > _maxIndex) _maxIndex = out[1]; + if (out[2] > _maxIndex) _maxIndex = out[2]; + } + else if (out[0] >= vertexCount || out[1] >= vertexCount || out[2] >= vertexCount) + return EFastFaceReadResult::Error; + out += 3u; + } + } + } + StartPointer = reinterpret_cast(const_cast(ptr)); + _faceCount += element.Count; + return EFastFaceReadResult::Success; + } + if (element.Count > (std::numeric_limits::max() / 3u)) + return EFastFaceReadResult::Error; + const size_t reserveCount = element.Count * 3u; + if (_outIndices.size() > + (std::numeric_limits::max() - reserveCount)) + return EFastFaceReadResult::Error; + const size_t oldSize = _outIndices.size(); + _outIndices.resize(oldSize + reserveCount); + uint32_t* out = _outIndices.data() + oldSize; + size_t written = 0ull; + auto ensureBytes = [this](const size_t bytes) -> bool { + if (StartPointer + bytes > EndPointer) + fillBuffer(); + return StartPointer + bytes <= EndPointer; + }; + auto readCount = [&ensureBytes, this](int32_t& outCount) -> bool { + if (!ensureBytes(sizeof(uint8_t))) + return false; + outCount = static_cast(*StartPointer++); + return true; + }; + auto readIndex = [&ensureBytes, this, is32Bit, isSrcU32, isSrcU16, + needEndianSwap](uint32_t& out) -> bool { + if (is32Bit) { + if (!ensureBytes(sizeof(uint32_t))) + return false; + if (isSrcU32) { + std::memcpy(&out, StartPointer, sizeof(uint32_t)); + if (needEndianSwap) + out = Binary::byteswap(out); + } else { + int32_t v = 0; + std::memcpy(&v, StartPointer, sizeof(v)); + if (needEndianSwap) + v = Binary::byteswap(v); + if (v < 0) + return false; + out = static_cast(v); + } + StartPointer += sizeof(uint32_t); + return true; + } + if (!ensureBytes(sizeof(uint16_t))) + return false; + if (isSrcU16) { + uint16_t v = 0u; + std::memcpy(&v, StartPointer, sizeof(uint16_t)); + if (needEndianSwap) + v = Binary::byteswap(v); + out = v; + } else { + int16_t v = 0; + std::memcpy(&v, StartPointer, sizeof(int16_t)); + if (needEndianSwap) + v = Binary::byteswap(v); + if (v < 0) + return false; + out = static_cast(v); + } + StartPointer += sizeof(uint16_t); + return true; + }; + auto readPackedU32 = [needEndianSwap](const uint8_t* src) -> uint32_t { + uint32_t value = 0u; + std::memcpy(&value, src, sizeof(value)); + if (needEndianSwap) + value = Binary::byteswap(value); + return value; + }; + auto readPackedU16 = [needEndianSwap](const uint8_t* src) -> uint32_t { + uint16_t value = 0u; + std::memcpy(&value, src, sizeof(value)); + if (needEndianSwap) + value = Binary::byteswap(value); + return value; + }; + for (size_t j = 0u; j < element.Count; ++j) { + if (is32Bit && ensureBytes(sizeof(uint8_t) + sizeof(uint32_t) * 3ull) && static_cast(*StartPointer) == 3u) + { + ++StartPointer; + const uint32_t i0 = readPackedU32(reinterpret_cast(StartPointer) + 0ull * sizeof(uint32_t)); + const uint32_t i1 = readPackedU32(reinterpret_cast(StartPointer) + 1ull * sizeof(uint32_t)); + const uint32_t i2 = readPackedU32(reinterpret_cast(StartPointer) + 2ull * sizeof(uint32_t)); + StartPointer += 3ull * sizeof(uint32_t); + if (isSrcS32 && ((i0 | i1 | i2) & 0x80000000u)) + return EFastFaceReadResult::Error; + if (trackMaxIndex) + { + if (i0 > _maxIndex) _maxIndex = i0; + if (i1 > _maxIndex) _maxIndex = i1; + if (i2 > _maxIndex) _maxIndex = i2; + } + else if (i0 >= vertexCount || i1 >= vertexCount || i2 >= vertexCount) + return EFastFaceReadResult::Error; + out[0] = i0; + out[1] = i1; + out[2] = i2; + out += 3u; + written += 3ull; + ++_faceCount; + continue; + } + if (!is32Bit && ensureBytes(sizeof(uint8_t) + sizeof(uint16_t) * 3ull) && static_cast(*StartPointer) == 3u) + { + ++StartPointer; + const uint32_t i0 = readPackedU16(reinterpret_cast(StartPointer) + 0ull * sizeof(uint16_t)); + const uint32_t i1 = readPackedU16(reinterpret_cast(StartPointer) + 1ull * sizeof(uint16_t)); + const uint32_t i2 = readPackedU16(reinterpret_cast(StartPointer) + 2ull * sizeof(uint16_t)); + StartPointer += 3ull * sizeof(uint16_t); + if (isSrcS16 && ((i0 | i1 | i2) & 0x8000u)) + return EFastFaceReadResult::Error; + if (trackMaxIndex) + { + if (i0 > _maxIndex) _maxIndex = i0; + if (i1 > _maxIndex) _maxIndex = i1; + if (i2 > _maxIndex) _maxIndex = i2; + } + else if (i0 >= vertexCount || i1 >= vertexCount || i2 >= vertexCount) + return EFastFaceReadResult::Error; + out[0] = i0; + out[1] = i1; + out[2] = i2; + out += 3u; + written += 3ull; + ++_faceCount; + continue; + } + int32_t countSigned = 0; + if (!readCount(countSigned)) + return EFastFaceReadResult::Error; + const uint32_t count = static_cast(countSigned); + if (count < 3u) { + uint32_t dummy = 0u; + for (uint32_t k = 0u; k < count; ++k) { + if (!readIndex(dummy)) + return EFastFaceReadResult::Error; + } + ++_faceCount; + continue; + } + uint32_t i0 = 0u; + uint32_t i1 = 0u; + uint32_t i2 = 0u; + if (!readIndex(i0) || !readIndex(i1) || !readIndex(i2)) + return EFastFaceReadResult::Error; + if (trackMaxIndex) { + _maxIndex = std::max(_maxIndex, std::max(i0, std::max(i1, i2))); + } else if (i0 >= vertexCount || i1 >= vertexCount || + i2 >= vertexCount) { + return EFastFaceReadResult::Error; + } + out[0] = i0; + out[1] = i1; + out[2] = i2; + out += 3u; + written += 3ull; + uint32_t prev = i2; + for (uint32_t k = 3u; k < count; ++k) { + uint32_t idx = 0u; + if (!readIndex(idx)) + return EFastFaceReadResult::Error; + if (trackMaxIndex) { + _maxIndex = std::max(_maxIndex, idx); + } else if (idx >= vertexCount) { + return EFastFaceReadResult::Error; + } + if (_outIndices.size() < oldSize + written + 3ull) + { + const size_t outOffset = static_cast(out - _outIndices.data()); + _outIndices.resize(oldSize + written + 3ull); + out = _outIndices.data() + outOffset; + } + out[0] = i0; + out[1] = prev; + out[2] = idx; + out += 3u; + written += 3ull; + prev = idx; + } + ++_faceCount; + } + _outIndices.resize(oldSize + written); + return EFastFaceReadResult::Success; + } + IAssetLoader::SAssetLoadContext inner; + uint32_t topHierarchyLevel; + IAssetLoader::IAssetLoaderOverride* loaderOverride; + core::vector Buffer; // input buffer must be at least twice as long as the longest line in the file + size_t ioReadWindowSize = DefaultIoReadWindowBytes; + core::vector ElementList = {}; + char *StartPointer = nullptr, *EndPointer = nullptr, + *LineEndPointer = nullptr; + int32_t LineLength = 0; + int32_t WordLength = -1; // this variable is a misnomer, its really the offset to next word minus one + bool IsBinaryFile = false, IsWrongEndian = false, EndOfFile = false; + size_t fileOffset = {}; + uint64_t readCallCount = 0ull; + uint64_t readBytesTotal = 0ull; + uint64_t readMinBytes = std::numeric_limits::max(); + core::vector vertAttrIts; + }; +}; +} +CPLYMeshFileLoader::CPLYMeshFileLoader() = default; +const char** CPLYMeshFileLoader::getAssociatedFileExtensions() const +{ + static const char* ext[] = { "ply", nullptr }; + return ext; +} +bool CPLYMeshFileLoader::isALoadableFileFormat(system::IFile* _file, const system::logger_opt_ptr) const { + std::array buf = {}; + system::IFile::success_t success; + _file->read(success, buf.data(), 0, buf.size()); + if (!success) + return false; + const std::string_view fileHeader(buf.data(), success.getBytesProcessed()); + Parse::Common::LineCursor lineCursor = {.cursor = fileHeader.data(), .end = fileHeader.data() + fileHeader.size()}; + const auto firstLineOpt = lineCursor.readLine(); + if (!firstLineOpt.has_value() || Parse::Common::trimWhitespace(*firstLineOpt) != "ply") + return false; + constexpr std::array headers = { + "format ascii 1.0", "format binary_little_endian 1.0", + "format binary_big_endian 1.0"}; + while (const auto lineOpt = lineCursor.readLine()) { + const std::string_view line = Parse::Common::trimWhitespace(*lineOpt); + if (line.starts_with("format ")) + return std::find(headers.begin(), headers.end(), line) != headers.end(); + } + return false; +} +//! creates/loads an animated mesh from the file. +SAssetBundle CPLYMeshFileLoader::loadAsset( + system::IFile* _file, const IAssetLoader::SAssetLoadParams& _params, + IAssetLoader::IAssetLoaderOverride* _override, uint32_t _hierarchyLevel) { + using namespace nbl::core; + using clock_t = std::chrono::high_resolution_clock; + if (!_file) + return {}; + const bool computeContentHashes = !_params.loaderFlags.hasAnyFlag( + IAssetLoader::ELPF_DONT_COMPUTE_CONTENT_HASHES); + uint64_t faceCount = 0u; + uint64_t fastFaceElementCount = 0u; + uint64_t fastVertexElementCount = 0u; + uint32_t maxIndexRead = 0u; + core::blake3_hash_t precomputedIndexHash = IPreHashed::INVALID_HASH; + const uint64_t fileSize = _file->getSize(); + const bool hashInBuild = + computeContentHashes && + SLoaderRuntimeTuner::shouldInlineHashBuild(_params.ioPolicy, fileSize); + impl::SLoadSession loadSession = {}; + if (!impl::SLoadSession::begin(_params.logger, "PLY loader", _file, _params.ioPolicy, fileSize, true, loadSession)) + return {}; + Parse::Context ctx = {asset::IAssetLoader::SAssetLoadContext{_params, _file}, + _hierarchyLevel, _override}; + uint64_t desiredReadWindow = + loadSession.isWholeFile() + ? (fileSize + Parse::Context::ReadWindowPaddingBytes) + : loadSession.ioPlan.chunkSizeBytes(); + if (loadSession.isWholeFile()) { + const bool mappedInput = loadSession.mappedPointer() != nullptr; + if (mappedInput && + fileSize > (Parse::Context::DefaultIoReadWindowBytes * 2ull)) + desiredReadWindow = Parse::Context::DefaultIoReadWindowBytes; + } + const uint64_t safeReadWindow = std::min(desiredReadWindow, static_cast(std::numeric_limits::max() - Parse::Context::ReadWindowPaddingBytes)); + ctx.init(static_cast(safeReadWindow)); + // start with empty mesh + auto geometry = make_smart_refctd_ptr(); + std::optional> geometryMetadata = std::nullopt; + hlsl::shapes::util::AABBAccumulator3 parsedAABB = hlsl::shapes::util::createAABBAccumulator(); + uint32_t vertCount = 0; + Parse::ContentHashBuild contentHashBuild = Parse::ContentHashBuild::create(computeContentHashes, hashInBuild); + double headerMs = 0.0, vertexMs = 0.0, faceMs = 0.0, finalizeMs = 0.0; + auto hashViewBufferIfNeeded = [&](const IGeometry::SDataView& view) -> void { + if (!view || !view.src.buffer) + return; + contentHashBuild.hashNow(view.src.buffer.get()); + }; + auto hashRemainingGeometryBuffers = [&]() -> void { + if (contentHashBuild.hashesInline()) + SGeometryLoaderCommon::visitGeometryViews(geometry.get(), hashViewBufferIfNeeded); + }; + auto tryLaunchDeferredHash = [&](const IGeometry::SDataView& view) -> void { + if (!view || !view.src.buffer) + return; + contentHashBuild.tryDefer(view.src.buffer.get()); + }; + // Currently only supports ASCII or binary meshes + if (Parse::Common::trimWhitespace(ctx.getNextLine()) != "ply") { + _params.logger.log("Not a valid PLY file %s", system::ILogger::ELL_ERROR, + ctx.inner.mainFile->getFileName().string().c_str()); + return {}; + } + // cut the next line out + ctx.getNextLine(); + // grab the word from this line + const char* word = ctx.getNextWord(); + // ignore comments + for (; Parse::toStringView(word) == "comment"; ctx.getNextLine()) + word = ctx.getNextWord(); + bool readingHeader = true; + bool continueReading = true; + ctx.IsBinaryFile = false; + ctx.IsWrongEndian = false; + const auto headerStart = clock_t::now(); + do { + const std::string_view wordView = Parse::toStringView(word); + if (wordView == "property") { + word = ctx.getNextWord(); + if (ctx.ElementList.empty()) { + _params.logger.log("PLY property token found before element %s", + system::ILogger::ELL_WARNING, word); + } else { + // get element + auto& el = ctx.ElementList.back(); + // fill property struct + auto& prop = el.Properties.emplace_back(); + prop.type = prop.getType(word); + if (prop.type == EF_UNKNOWN) { + el.KnownSize = false; + word = ctx.getNextWord(); + prop.list.countType = prop.getType(word); + if (ctx.IsBinaryFile && !isIntegerFormat(prop.list.countType)) { + _params.logger.log("Cannot read binary PLY file containing data " + "types of unknown or non integer length %s", + system::ILogger::ELL_WARNING, word); + continueReading = false; + } else { + word = ctx.getNextWord(); + prop.list.itemType = prop.getType(word); + if (ctx.IsBinaryFile && !isIntegerFormat(prop.list.itemType)) { + _params.logger.log("Cannot read binary PLY file containing data " + "types of unknown or non integer length %s", + system::ILogger::ELL_ERROR, word); + continueReading = false; + } + } + } else if (ctx.IsBinaryFile && prop.type == EF_UNKNOWN) { + _params.logger.log("Cannot read binary PLY file containing data " + "types of unknown length %s", + system::ILogger::ELL_ERROR, word); + continueReading = false; + } else + el.KnownSize += getTexelOrBlockBytesize(prop.type); + prop.Name = ctx.getNextWord(); + } + } else if (wordView == "element") { + auto& el = ctx.ElementList.emplace_back(); + el.Name = ctx.getNextWord(); + const char* const countWord = ctx.getNextWord(); + uint64_t parsedCount = 0ull; + const std::string_view countWordView = Parse::toStringView(countWord); + if (!countWordView.empty()) { + if (!Parse::Common::parseExactNumber(countWordView, parsedCount)) + parsedCount = 0ull; + } + el.Count = static_cast(parsedCount); + el.KnownSize = 0; + if (el.Name == "vertex") + vertCount = el.Count; + } else if (wordView == "comment") { + // ignore line + } else if (wordView == "format") { + // must be `format {binary_little_endian|binary_big_endian|ascii} 1.0` + word = ctx.getNextWord(); + const std::string_view formatView = Parse::toStringView(word); + if (formatView == "binary_little_endian") { + ctx.IsBinaryFile = true; + } else if (formatView == "binary_big_endian") { + ctx.IsBinaryFile = true; + ctx.IsWrongEndian = true; + } else if (formatView == "ascii") { + } else { + // abort if this isn't an ascii or a binary mesh + _params.logger.log("Unsupported PLY mesh format %s", + system::ILogger::ELL_ERROR, word); + continueReading = false; + } + if (continueReading) { + word = ctx.getNextWord(); + if (Parse::toStringView(word) != "1.0") { + _params.logger.log("Unsupported PLY mesh version %s", + system::ILogger::ELL_WARNING, word); + } + } + } else if (wordView == "end_header") { + readingHeader = false; + if (ctx.IsBinaryFile) { + char* const binaryStartInBuffer = ctx.LineEndPointer + 1; + const auto* const mappedBase = reinterpret_cast(loadSession.mappedPointer()); + if (mappedBase) { + const size_t binaryOffset = + ctx.getAbsoluteOffset(binaryStartInBuffer); + const size_t remainingBytes = static_cast( + binaryOffset < fileSize ? (fileSize - binaryOffset) : 0ull); + ctx.useMappedBinaryWindow(mappedBase + binaryOffset, remainingBytes); + } else { + ctx.StartPointer = binaryStartInBuffer; + } + } + } else { + _params.logger.log("Unknown item in PLY file %s", + system::ILogger::ELL_WARNING, word); + } + if (readingHeader && continueReading) { + ctx.getNextLine(); + word = ctx.getNextWord(); + } + } while (readingHeader && continueReading); + headerMs = std::chrono::duration(clock_t::now() - headerStart).count(); + if (!continueReading) + return {}; + // now to read the actual data from the file + using index_t = uint32_t; + core::vector indices = {}; + bool verticesProcessed = false; + const std::string fileName = _file->getFileName().string(); + auto logMalformedElement = [&](const char* const elementName) -> void { + _params.logger.log("PLY %s fast path failed on malformed data for %s", system::ILogger::ELL_ERROR, elementName, fileName.c_str()); + }; + auto skipUnknownElement = [&](const Parse::Context::SElement& el) -> bool { + if (ctx.IsBinaryFile && el.KnownSize) { + const uint64_t bytesToSkip64 = static_cast(el.KnownSize) * static_cast(el.Count); + if (bytesToSkip64 > static_cast(std::numeric_limits::max())) + return false; + ctx.moveForward(static_cast(bytesToSkip64)); + } else { + for (size_t j = 0; j < el.Count; ++j) + el.skipElement(ctx); + } + return true; + }; + auto readFaceElement = [&](const Parse::Context::SElement& el) -> bool { + const uint32_t vertexCount32 = vertCount <= static_cast(std::numeric_limits::max()) ? static_cast(vertCount) : 0u; + const auto fastFaceResult = ctx.readFaceElementFast(el, indices, maxIndexRead, faceCount, vertexCount32, contentHashBuild.hashesDeferred(), precomputedIndexHash); + if (fastFaceResult == Parse::Context::EFastFaceReadResult::Success) { + ++fastFaceElementCount; + return true; + } + if (fastFaceResult == Parse::Context::EFastFaceReadResult::NotApplicable) { + indices.reserve(indices.size() + el.Count * 3u); + for (size_t j = 0; j < el.Count; ++j) { + if (!ctx.readFace(el, indices, maxIndexRead, vertexCount32)) + return false; + ++faceCount; + } + return true; + } + logMalformedElement("face"); + return false; + }; + // loop through each of the elements + for (uint32_t i = 0; i < ctx.ElementList.size(); ++i) { + auto& el = ctx.ElementList[i]; + if (el.Name == "vertex") { + const auto vertexStart = clock_t::now(); + if (verticesProcessed) { + // multiple vertex elements are currently treated as unsupported + _params.logger.log("Multiple `vertex` elements not supported!", + system::ILogger::ELL_ERROR); + return {}; + } + ICPUPolygonGeometry::SDataViewBase posView = {}, normalView = {}, + uvView = {}; + core::vector extraViews; + core::vector extraViewNames; + for (auto& vertexProperty : el.Properties) { + const auto& propertyName = vertexProperty.Name; + if (propertyName == "x") + SGeometryLoaderCommon::negotiateStructuredComponent(posView, vertexProperty.type, 0); + else if (propertyName == "y") + SGeometryLoaderCommon::negotiateStructuredComponent(posView, vertexProperty.type, 1); + else if (propertyName == "z") + SGeometryLoaderCommon::negotiateStructuredComponent(posView, vertexProperty.type, 2); + else if (propertyName == "nx") + SGeometryLoaderCommon::negotiateStructuredComponent(normalView, vertexProperty.type, 0); + else if (propertyName == "ny") + SGeometryLoaderCommon::negotiateStructuredComponent(normalView, vertexProperty.type, 1); + else if (propertyName == "nz") + SGeometryLoaderCommon::negotiateStructuredComponent(normalView, vertexProperty.type, 2); + else if (propertyName == "u" || propertyName == "s") + SGeometryLoaderCommon::negotiateStructuredComponent(uvView, vertexProperty.type, 0); + else if (propertyName == "v" || propertyName == "t") + SGeometryLoaderCommon::negotiateStructuredComponent(uvView, vertexProperty.type, 1); + else + { + extraViews.push_back(createView(vertexProperty.type, el.Count)); + extraViewNames.push_back(propertyName); + } + } + auto pushStructuredAttr = [](auto& iterators, const size_t offset, const uint32_t stride, const E_FORMAT componentFormat) -> void { + iterators.push_back({.ptr = reinterpret_cast(offset), .stride = stride, .dstFmt = componentFormat}); + }; + auto rebaseStructuredAttr = [](auto& iter, const ptrdiff_t basePtr) -> void { + iter.ptr += basePtr; + }; + SGeometryLoaderCommon::attachStructuredView(posView, el.Count, ctx.vertAttrIts, pushStructuredAttr, rebaseStructuredAttr, [&](auto view) { geometry->setPositionView(std::move(view)); }); + SGeometryLoaderCommon::attachStructuredView(normalView, el.Count, ctx.vertAttrIts, pushStructuredAttr, rebaseStructuredAttr, [&](auto view) { geometry->setNormalView(std::move(view)); }); + SGeometryLoaderCommon::attachStructuredView(uvView, el.Count, ctx.vertAttrIts, pushStructuredAttr, rebaseStructuredAttr, [&](auto view) { SGeometryLoaderCommon::setAuxViewAt(geometry.get(), SPLYPolygonGeometryAuxLayout::UV0, std::move(view)); }); + core::vector auxAttributeNames; + const size_t extraNameOffset = geometry->getAuxAttributeViews()->size(); + for (auto& view : extraViews) + ctx.vertAttrIts.push_back({.ptr = reinterpret_cast(view.src.buffer->getPointer()) + view.src.offset, + .stride = getTexelOrBlockBytesize(view.composed.format), + .dstFmt = view.composed.format}); + for (auto& view : extraViews) + geometry->getAuxAttributeViews()->push_back(std::move(view)); + if (!extraViewNames.empty()) + { + auxAttributeNames.resize(geometry->getAuxAttributeViews()->size()); + for (size_t extraIx = 0ull; extraIx < extraViewNames.size(); ++extraIx) + auxAttributeNames[extraNameOffset + extraIx] = std::move(extraViewNames[extraIx]); + } + // loop through vertex properties + const auto fastVertexResult = ctx.readVertexElementFast(el, &parsedAABB); + if (fastVertexResult == Parse::Context::EFastVertexReadResult::Success) { + ++fastVertexElementCount; + } else if (fastVertexResult == + Parse::Context::EFastVertexReadResult::NotApplicable) { + ctx.readVertex(_params, el); + } else { + logMalformedElement("vertex"); + return {}; + } + SGeometryLoaderCommon::visitVertexAttributeViews(geometry.get(), hashViewBufferIfNeeded); + tryLaunchDeferredHash(geometry->getPositionView()); + verticesProcessed = true; + if (!auxAttributeNames.empty()) + { + geometryMetadata = std::move(auxAttributeNames); + } + vertexMs += std::chrono::duration(clock_t::now() - vertexStart).count(); + } else if (el.Name == "face") { + const auto faceStart = clock_t::now(); + if (!readFaceElement(el)) + return {}; + faceMs += std::chrono::duration(clock_t::now() - faceStart).count(); + } else { + if (!skipUnknownElement(el)) + return {}; + } + } + if (!parsedAABB.empty()) + geometry->applyAABB(parsedAABB.value); + else + CPolygonGeometryManipulator::recomputeAABB(geometry.get()); + const uint64_t indexCount = static_cast(indices.size()); + if (indices.empty()) { + // no index buffer means point cloud + geometry->setIndexing(IPolygonGeometryBase::PointList()); + } else { + if (vertCount != 0u && maxIndexRead >= vertCount) { + _params.logger.log("PLY indices out of range for %s", + system::ILogger::ELL_ERROR, + _file->getFileName().string().c_str()); + return {}; + } + geometry->setIndexing(IPolygonGeometryBase::TriangleList()); + const bool canUseU16 = + (vertCount != 0u) + ? (vertCount <= std::numeric_limits::max()) + : (maxIndexRead <= std::numeric_limits::max()); + if (canUseU16) { + core::vector indices16(indices.size()); + for (size_t i = 0u; i < indices.size(); ++i) + indices16[i] = static_cast(indices[i]); + auto view = SGeometryLoaderCommon::createAdoptedView( + std::move(indices16)); + if (!view) + return {}; + geometry->setIndexView(std::move(view)); + hashViewBufferIfNeeded(geometry->getIndexView()); + } else { + auto view = SGeometryLoaderCommon::createAdoptedView( + std::move(indices)); + if (!view) + return {}; + if (precomputedIndexHash != IPreHashed::INVALID_HASH) + view.src.buffer->setContentHash(precomputedIndexHash); + geometry->setIndexView(std::move(view)); + hashViewBufferIfNeeded(geometry->getIndexView()); + } + } + const auto finalizeStart = clock_t::now(); + if (contentHashBuild.hashesDeferred()) { + contentHashBuild.wait(); + SPolygonGeometryContentHash::computeMissing(geometry.get(), + _params.ioPolicy); + } else { + hashRemainingGeometryBuffers(); + } + finalizeMs = std::chrono::duration(clock_t::now() - finalizeStart).count(); + const uint64_t ioMinRead = ctx.readCallCount ? ctx.readMinBytes : 0ull; + const uint64_t ioAvgRead = + ctx.readCallCount ? (ctx.readBytesTotal / ctx.readCallCount) : 0ull; + const SFileReadTelemetry ioTelemetry = {.callCount = ctx.readCallCount, + .totalBytes = ctx.readBytesTotal, + .minBytes = ctx.readMinBytes}; + loadSession.logTinyIO(_params.logger, ioTelemetry); + _params.logger.log( + "PLY loader stats: file=%s binary=%d verts=%llu faces=%llu idx=%llu " + "vertex_fast=%llu face_fast=%llu io_reads=%llu io_min_read=%llu " + "io_avg_read=%llu io_req=%s io_eff=%s io_chunk=%llu io_reason=%s", + system::ILogger::ELL_PERFORMANCE, _file->getFileName().string().c_str(), + ctx.IsBinaryFile ? 1 : 0, static_cast(vertCount), + static_cast(faceCount), + static_cast(indexCount), + static_cast(fastVertexElementCount), + static_cast(fastFaceElementCount), + static_cast(ctx.readCallCount), + static_cast(ioMinRead), + static_cast(ioAvgRead), + system::to_string(_params.ioPolicy.strategy).c_str(), + system::to_string(loadSession.ioPlan.strategy).c_str(), + static_cast(loadSession.ioPlan.chunkSizeBytes()), loadSession.ioPlan.reason); + _params.logger.log("PLY loader stages: file=%s header=%.3f ms vertex=%.3f ms face=%.3f ms finalize=%.3f ms", system::ILogger::ELL_PERFORMANCE, _file->getFileName().string().c_str(), headerMs, vertexMs, faceMs, finalizeMs); + auto meta = core::make_smart_refctd_ptr(1u); + if (geometryMetadata) + meta->placeMeta(0u, geometry.get(), std::move(*geometryMetadata)); + return SAssetBundle(std::move(meta), {std::move(geometry)}); +} } - - -} // end namespace nbl::asset #endif // _NBL_COMPILE_WITH_PLY_LOADER_ diff --git a/src/nbl/asset/interchange/CPLYMeshFileLoader.h b/src/nbl/asset/interchange/CPLYMeshFileLoader.h index 6215364466..43d57e74d7 100644 --- a/src/nbl/asset/interchange/CPLYMeshFileLoader.h +++ b/src/nbl/asset/interchange/CPLYMeshFileLoader.h @@ -1,39 +1,25 @@ -// Copyright (C) 2019-2025 - DevSH Graphics Programming Sp. z O.O. +// Copyright (C) 2018-2025 - DevSH Graphics Programming Sp. z O.O. // This file is part of the "Nabla Engine" and was originally part of the "Irrlicht Engine" // For conditions of distribution and use, see copyright notice in nabla.h // See the original file in irrlicht source for authors #ifndef _NBL_ASSET_C_PLY_MESH_FILE_LOADER_H_INCLUDED_ #define _NBL_ASSET_C_PLY_MESH_FILE_LOADER_H_INCLUDED_ -#ifdef _NBL_COMPILE_WITH_PLY_LOADER_ - #include "nbl/core/declarations.h" - #include "nbl/asset/interchange/IGeometryLoader.h" - -#include "nbl/asset/ICPUPolygonGeometry.h" -#include "nbl/asset/metadata/CPLYMetadata.h" - namespace nbl::asset { - -//! Meshloader capable of loading obj meshes. +//! Mesh loader capable of loading PLY meshes. class CPLYMeshFileLoader final : public IGeometryLoader { public: - inline CPLYMeshFileLoader() = default; + CPLYMeshFileLoader(); bool isALoadableFileFormat(system::IFile* _file, const system::logger_opt_ptr logger) const override; - const char** getAssociatedFileExtensions() const override - { - static const char* ext[]{ "ply", nullptr }; - return ext; - } + const char** getAssociatedFileExtensions() const override; - //! creates/loads an animated mesh from the file. + //! Loads one PLY asset bundle from an already opened file. SAssetBundle loadAsset(system::IFile* _file, const IAssetLoader::SAssetLoadParams& _params, IAssetLoader::IAssetLoaderOverride* _override = nullptr, uint32_t _hierarchyLevel = 0u) override; }; - } // end namespace nbl::asset #endif -#endif diff --git a/src/nbl/asset/interchange/CPLYMeshWriter.cpp b/src/nbl/asset/interchange/CPLYMeshWriter.cpp index fd6fa3ea9e..0d6f1e7b92 100644 --- a/src/nbl/asset/interchange/CPLYMeshWriter.cpp +++ b/src/nbl/asset/interchange/CPLYMeshWriter.cpp @@ -1,620 +1,721 @@ -// Copyright (C) 2019 - DevSH Graphics Programming Sp. z O.O. +#ifdef _NBL_COMPILE_WITH_PLY_WRITER_ +// Copyright (C) 2018-2025 - DevSH Graphics Programming Sp. z O.O. // This file is part of the "Nabla Engine" and was originally part of the "Irrlicht Engine" // For conditions of distribution and use, see copyright notice in nabla.h // See the original file in irrlicht source for authors - #include "CPLYMeshWriter.h" - -#ifdef _NBL_COMPILE_WITH_PLY_WRITER_ - -#include "nbl/system/ISystem.h" +#include "nbl/asset/interchange/SGeometryViewDecode.h" +#include "nbl/asset/interchange/SPLYPolygonGeometryAuxLayout.h" +#include "nbl/asset/interchange/SGeometryWriterCommon.h" +#include "nbl/asset/interchange/SInterchangeIO.h" +#include "impl/SFileAccess.h" #include "nbl/system/IFile.h" -#include "nbl/asset/utils/CMeshManipulator.h" - -namespace nbl -{ -namespace asset -{ - -namespace impl +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +namespace nbl::asset { -static asset::E_FORMAT getCorrespondingIntegerFormat(asset::E_FORMAT _fmt) -{ - using namespace asset; - switch (_fmt) - { - case EF_R8_UNORM: return EF_R8_UINT; - case EF_R8_SNORM: return EF_R8_SINT; - case EF_R8G8_UNORM: return EF_R8G8_UINT; - case EF_R8G8_SNORM: return EF_R8G8_SINT; - case EF_R8G8B8_UNORM: return EF_R8G8B8_UINT; - case EF_R8G8B8_SNORM: return EF_R8G8B8_SINT; - case EF_R8G8B8A8_UNORM: return EF_R8G8B8A8_UINT; - case EF_R8G8B8A8_SNORM: return EF_R8G8B8A8_SINT; - case EF_R16_UNORM: return EF_R16_UINT; - case EF_R16_SNORM: return EF_R16_SINT; - case EF_R16G16_UNORM: return EF_R16G16_UINT; - case EF_R16G16_SNORM: return EF_R16G16_SINT; - case EF_R16G16B16_UNORM: return EF_R16G16B16_UINT; - case EF_R16G16B16_SNORM: return EF_R16G16B16_SINT; - case EF_R16G16B16A16_UNORM: return EF_R16G16B16A16_UINT; - case EF_R16G16B16A16_SNORM: return EF_R16G16B16A16_SINT; - case EF_A2B10G10R10_UNORM_PACK32: return EF_A2B10G10R10_UINT_PACK32; - case EF_A2B10G10R10_SNORM_PACK32: return EF_A2B10G10R10_SINT_PACK32; - case EF_B8G8R8A8_UNORM: return EF_R8G8B8A8_SINT; - case EF_A2R10G10B10_UNORM_PACK32: return EF_A2B10G10R10_UINT_PACK32; - case EF_A2R10G10B10_SNORM_PACK32: return EF_A2B10G10R10_SINT_PACK32; - default: return EF_UNKNOWN; - } -} -} - CPLYMeshWriter::CPLYMeshWriter() { #ifdef _NBL_DEBUG setDebugName("CPLYMeshWriter"); #endif } - -//! writes a mesh -bool CPLYMeshWriter::writeAsset(system::IFile* _file, const SAssetWriteParams& _params, IAssetWriterOverride* _override) +const char** CPLYMeshWriter::getAssociatedFileExtensions() const { - if (!_override) - getDefaultOverride(_override); - - SAssetWriteContext inCtx{ _params, _file }; - - const asset::ICPUMesh* mesh = IAsset::castDown(_params.rootAsset); - if (!mesh) - return false; - - system::IFile* file = _override->getOutputFile(_file, inCtx, {mesh, 0u}); - - auto meshbuffers = mesh->getMeshBuffers(); - if (!file || !mesh) - return false; - - SContext context = { SAssetWriteContext{ inCtx.params, file} }; - - if (meshbuffers.size() > 1) - { - #ifdef _NBL_DEBUG - context.writeContext.params.logger.log("PLY WRITER WARNING (" + std::to_string(__LINE__) + " line): Only one meshbuffer input is allowed for writing! Saving first one", system::ILogger::ELL_WARNING, file->getFileName().string().c_str()); - #endif // _NBL_DEBUG - } - - context.writeContext.params.logger.log("Writing PLY mesh", system::ILogger::ELL_INFO, file->getFileName().string().c_str()); - - const asset::E_WRITER_FLAGS flags = _override->getAssetWritingFlags(context.writeContext, mesh, 0u); - - auto getConvertedCpuMeshBufferWithIndexBuffer = [&]() -> core::smart_refctd_ptr - { - auto inputMeshBuffer = *meshbuffers.begin(); - const bool doesItHaveIndexBuffer = inputMeshBuffer->getIndexBufferBinding().buffer.get(); - const bool isItNotTriangleListsPrimitive = inputMeshBuffer->getPipeline()->getCachedCreationParams().primitiveAssembly.primitiveType != asset::EPT_TRIANGLE_LIST; - - if (doesItHaveIndexBuffer && isItNotTriangleListsPrimitive) - { - auto cpuConvertedMeshBuffer = core::smart_refctd_ptr_static_cast(inputMeshBuffer->clone()); - IMeshManipulator::homogenizePrimitiveTypeAndIndices(&cpuConvertedMeshBuffer, &cpuConvertedMeshBuffer + 1, asset::EPT_TRIANGLE_LIST, asset::EIT_32BIT); - return cpuConvertedMeshBuffer; - } - else - return nullptr; - }; - - const auto cpuConvertedMeshBufferWithIndexBuffer = getConvertedCpuMeshBufferWithIndexBuffer(); - const asset::ICPUMeshBuffer* rawCopyMeshBuffer = cpuConvertedMeshBufferWithIndexBuffer.get() ? cpuConvertedMeshBufferWithIndexBuffer.get() : *meshbuffers.begin(); - const bool doesItUseIndexBufferBinding = (rawCopyMeshBuffer->getIndexBufferBinding().buffer.get() && rawCopyMeshBuffer->getIndexType() != asset::EIT_UNKNOWN); - - uint32_t faceCount = {}; - size_t vertexCount = {}; - - void* indices = nullptr; - { - auto indexCount = rawCopyMeshBuffer->getIndexCount(); - - indices = _NBL_ALIGNED_MALLOC(indexCount * sizeof(uint32_t), _NBL_SIMD_ALIGNMENT); - memcpy(indices, rawCopyMeshBuffer->getIndices(), indexCount * sizeof(uint32_t)); - - IMeshManipulator::getPolyCount(faceCount, rawCopyMeshBuffer); - vertexCount = IMeshManipulator::upperBoundVertexID(rawCopyMeshBuffer); - } - - // write PLY header - std::string header = "ply\n"; - header += (flags & asset::EWF_BINARY) ? "format binary_little_endian 1.0" : "format ascii 1.0"; - header += "\ncomment IrrlichtBAW "; - header += NABLA_SDK_VERSION; - - // vertex definition - header += "\nelement vertex "; - header += std::to_string(vertexCount) + '\n'; - - bool vaidToWrite[4]{ 0, 0, 0, 0 }; - - const uint32_t POSITION_ATTRIBUTE = rawCopyMeshBuffer->getPositionAttributeIx(); - constexpr uint32_t COLOR_ATTRIBUTE = 1; - constexpr uint32_t UV_ATTRIBUTE = 2; - const uint32_t NORMAL_ATTRIBUTE = rawCopyMeshBuffer->getNormalAttributeIx(); - - if (rawCopyMeshBuffer->getAttribBoundBuffer(POSITION_ATTRIBUTE).buffer) - { - const asset::E_FORMAT t = rawCopyMeshBuffer->getAttribFormat(POSITION_ATTRIBUTE); - std::string typeStr = getTypeString(t); - vaidToWrite[0] = true; - header += - "property " + typeStr + " x\n" + - "property " + typeStr + " y\n" + - "property " + typeStr + " z\n"; - } - if (rawCopyMeshBuffer->getAttribBoundBuffer(COLOR_ATTRIBUTE).buffer) - { - const asset::E_FORMAT t = rawCopyMeshBuffer->getAttribFormat(COLOR_ATTRIBUTE); - std::string typeStr = getTypeString(t); - vaidToWrite[1] = true; - header += - "property " + typeStr + " red\n" + - "property " + typeStr + " green\n" + - "property " + typeStr + " blue\n"; - if (asset::getFormatChannelCount(t) == 4u) - { - header += "property " + typeStr + " alpha\n"; - } - } - if (rawCopyMeshBuffer->getAttribBoundBuffer(UV_ATTRIBUTE).buffer) - { - const asset::E_FORMAT t = rawCopyMeshBuffer->getAttribFormat(UV_ATTRIBUTE); - std::string typeStr = getTypeString(t); - vaidToWrite[2] = true; - header += - "property " + typeStr + " u\n" + - "property " + typeStr + " v\n"; - } - if (rawCopyMeshBuffer->getAttribBoundBuffer(NORMAL_ATTRIBUTE).buffer) - { - const asset::E_FORMAT t = rawCopyMeshBuffer->getAttribFormat(NORMAL_ATTRIBUTE); - std::string typeStr = getTypeString(t); - vaidToWrite[3] = true; - header += - "property " + typeStr + " nx\n" + - "property " + typeStr + " ny\n" + - "property " + typeStr + " nz\n"; - } - - asset::E_INDEX_TYPE idxT = asset::EIT_UNKNOWN; - bool forceFaces = false; - - const auto primitiveType = rawCopyMeshBuffer->getPipeline()->getCachedCreationParams().primitiveAssembly.primitiveType; - const auto indexType = rawCopyMeshBuffer->getIndexType(); - - if (primitiveType == asset::EPT_POINT_LIST) - faceCount = 0u; - else if (doesItUseIndexBufferBinding) - { - header += "element face "; - header += std::to_string(faceCount) + '\n'; - idxT = indexType; - const std::string idxTypeStr = idxT == asset::EIT_32BIT ? "uint32" : "uint16"; - header += "property list uchar " + idxTypeStr + " vertex_indices\n"; - } - else if (primitiveType == asset::EPT_TRIANGLE_LIST) - { - forceFaces = true; - - header += "element face "; - header += std::to_string(faceCount) + '\n'; - idxT = vertexCount <= ((1u<<16) - 1) ? asset::EIT_16BIT : asset::EIT_32BIT; - const std::string idxTypeStr = idxT == asset::EIT_32BIT ? "uint32" : "uint16"; - header += "property list uchar " + idxTypeStr + " vertex_indices\n"; - } - else - faceCount = 0u; - header += "end_header\n"; - - { - system::IFile::success_t success; - file->write(success, header.c_str(), context.fileOffset, header.size()); - context.fileOffset += success.getBytesProcessed(); - } - - if (flags & asset::EWF_BINARY) - writeBinary(rawCopyMeshBuffer, vertexCount, faceCount, idxT, indices, forceFaces, vaidToWrite, context); - else - writeText(rawCopyMeshBuffer, vertexCount, faceCount, idxT, indices, forceFaces, vaidToWrite, context); - - _NBL_ALIGNED_FREE(const_cast(indices)); - - return true; + static const char* ext[] = { "ply", nullptr }; + return ext; } - -void CPLYMeshWriter::writeBinary(const asset::ICPUMeshBuffer* _mbuf, size_t _vtxCount, size_t _fcCount, asset::E_INDEX_TYPE _idxType, void* const _indices, bool _forceFaces, const bool _vaidToWrite[4], SContext& context) const +writer_flags_t CPLYMeshWriter::getSupportedFlags() { - const size_t colCpa = asset::getFormatChannelCount(_mbuf->getAttribFormat(1)); - - bool flipVectors = (!(context.writeContext.params.flags & E_WRITER_FLAGS::EWF_MESH_IS_RIGHT_HANDED)) ? true : false; - - auto mbCopy = createCopyMBuffNormalizedReplacedWithTrueInt(_mbuf); - for (size_t i = 0u; i < _vtxCount; ++i) - { - core::vectorSIMDf f; - uint32_t ui[4]; - if (_vaidToWrite[0]) - { - writeAttribBinary(context, mbCopy.get(), 0, i, 3u, flipVectors); - } - if (_vaidToWrite[1]) - { - writeAttribBinary(context, mbCopy.get(), 1, i, colCpa); - } - if (_vaidToWrite[2]) - { - writeAttribBinary(context, mbCopy.get(), 2, i, 2u); - } - if (_vaidToWrite[3]) - { - writeAttribBinary(context, mbCopy.get(), 3, i, 3u, flipVectors); - } - } - - constexpr uint8_t listSize = 3u; - void* indices = _indices; - if (_forceFaces) - { - indices = _NBL_ALIGNED_MALLOC((_idxType == asset::EIT_32BIT ? 4 : 2) * listSize * _fcCount,_NBL_SIMD_ALIGNMENT); - if (_idxType == asset::EIT_16BIT) - { - for (uint16_t i = 0u; i < _fcCount; ++i) - ((uint16_t*)indices)[i] = i; - } - else - { - for (uint32_t i = 0u; i < _fcCount; ++i) - ((uint32_t*)indices)[i] = i; - } - } - if (_idxType == asset::EIT_32BIT) - { - uint32_t* ind = (uint32_t*)indices; - for (size_t i = 0u; i < _fcCount; ++i) - { - { - system::IFile::success_t success; - context.writeContext.outputFile->write(success, &listSize, context.fileOffset, sizeof(listSize)); - context.fileOffset += success.getBytesProcessed(); - } - - { - system::IFile::success_t success; - context.writeContext.outputFile->write(success, ind, context.fileOffset, listSize * 4); - context.fileOffset += success.getBytesProcessed(); - } - - ind += listSize; - } - } - else - { - uint16_t* ind = (uint16_t*)indices; - for (size_t i = 0u; i < _fcCount; ++i) - { - { - system::IFile::success_t success; - context.writeContext.outputFile->write(success, &listSize, context.fileOffset, sizeof(listSize)); - context.fileOffset += success.getBytesProcessed(); - } - - { - system::IFile::success_t success; - context.writeContext.outputFile->write(success, ind, context.fileOffset, listSize * 2); - context.fileOffset += success.getBytesProcessed(); - } - - ind += listSize; - } - } - - if (_forceFaces) - _NBL_ALIGNED_FREE(indices); + return writer_flags_t(asset::EWF_BINARY | asset::EWF_MESH_IS_RIGHT_HANDED); } - -void CPLYMeshWriter::writeText(const asset::ICPUMeshBuffer* _mbuf, size_t _vtxCount, size_t _fcCount, asset::E_INDEX_TYPE _idxType, void* const _indices, bool _forceFaces, const bool _vaidToWrite[4], SContext& context) const +writer_flags_t CPLYMeshWriter::getForcedFlags() { - auto mbCopy = createCopyMBuffNormalizedReplacedWithTrueInt(_mbuf); - - auto writefunc = [&context, &mbCopy, this](uint32_t _vaid, size_t _ix, size_t _cpa) - { - bool flipVerteciesAndNormals = false; - if (!(context.writeContext.params.flags & E_WRITER_FLAGS::EWF_MESH_IS_RIGHT_HANDED)) - if(_vaid == 0u || _vaid == 3u) - flipVerteciesAndNormals = true; - - uint32_t ui[4]; - core::vectorSIMDf f; - const asset::E_FORMAT t = mbCopy->getAttribFormat(_vaid); - if (asset::isScaledFormat(t) || asset::isIntegerFormat(t)) - { - mbCopy->getAttribute(ui, _vaid, _ix); - if (!asset::isSignedFormat(t)) - writeVectorAsText(context, ui, _cpa, flipVerteciesAndNormals); - else - { - int32_t ii[4]; - memcpy(ii, ui, 4*4); - writeVectorAsText(context, ii, _cpa, flipVerteciesAndNormals); - } - } - else - { - mbCopy->getAttribute(f, _vaid, _ix); - writeVectorAsText(context, f.pointer, _cpa, flipVerteciesAndNormals); - } - }; - - const size_t colCpa = asset::getFormatChannelCount(_mbuf->getAttribFormat(1)); - - for (size_t i = 0u; i < _vtxCount; ++i) - { - core::vectorSIMDf f; - uint32_t ui[4]; - if (_vaidToWrite[0]) - { - writefunc(0, i, 3u); - } - if (_vaidToWrite[1]) - { - writefunc(1, i, colCpa); - } - if (_vaidToWrite[2]) - { - writefunc(2, i, 2u); - } - if (_vaidToWrite[3]) - { - writefunc(3, i, 3u); - } - - { - system::IFile::success_t success; - context.writeContext.outputFile->write(success, "\n", context.fileOffset, 1); - context.fileOffset += success.getBytesProcessed(); - } - } - - const char* listSize = "3 "; - void* indices = _indices; - if (_forceFaces) - { - indices = _NBL_ALIGNED_MALLOC((_idxType == asset::EIT_32BIT ? 4 : 2) * 3 * _fcCount,_NBL_SIMD_ALIGNMENT); - if (_idxType == asset::EIT_16BIT) - { - for (uint16_t i = 0u; i < _fcCount; ++i) - ((uint16_t*)indices)[i] = i; - } - else - { - for (uint32_t i = 0u; i < _fcCount; ++i) - ((uint32_t*)indices)[i] = i; - } - } - if (_idxType == asset::EIT_32BIT) - { - uint32_t* ind = (uint32_t*)indices; - for (size_t i = 0u; i < _fcCount; ++i) - { - { - system::IFile::success_t success; - context.writeContext.outputFile->write(success, listSize, context.fileOffset, 2); - context.fileOffset += success.getBytesProcessed(); - } - - writeVectorAsText(context, ind, 3); - - { - system::IFile::success_t success; - context.writeContext.outputFile->write(success, "\n", context.fileOffset, 1); - context.fileOffset += success.getBytesProcessed(); - } - - ind += 3; - } - } - else - { - uint16_t* ind = (uint16_t*)indices; - for (size_t i = 0u; i < _fcCount; ++i) - { - { - system::IFile::success_t success; - context.writeContext.outputFile->write(success, listSize, context.fileOffset, 2); - context.fileOffset += success.getBytesProcessed(); - } - - writeVectorAsText(context, ind, 3); - - { - system::IFile::success_t success; - context.writeContext.outputFile->write(success, "\n", context.fileOffset, 1); - context.fileOffset += success.getBytesProcessed(); - } - - ind += 3; - } - } - - if (_forceFaces) - _NBL_ALIGNED_FREE(indices); + return EWF_NONE; } - -void CPLYMeshWriter::writeAttribBinary(SContext& context, asset::ICPUMeshBuffer* _mbuf, uint32_t _vaid, size_t _ix, size_t _cpa, bool flipAttribute) const +namespace { - uint32_t ui[4]; - core::vectorSIMDf f; - asset::E_FORMAT t = _mbuf->getAttribFormat(_vaid); - - if (asset::isScaledFormat(t) || asset::isIntegerFormat(t)) - { - _mbuf->getAttribute(ui, _vaid, _ix); - if (flipAttribute) - ui[0] = -ui[0]; - - const uint32_t bytesPerCh = asset::getTexelOrBlockBytesize(t)/asset::getFormatChannelCount(t); - if (bytesPerCh == 1u || t == asset::EF_A2B10G10R10_UINT_PACK32 || t == asset::EF_A2B10G10R10_SINT_PACK32 || t == asset::EF_A2B10G10R10_SSCALED_PACK32 || t == asset::EF_A2B10G10R10_USCALED_PACK32) - { - uint8_t a[4]; - for (uint32_t k = 0u; k < _cpa; ++k) - a[k] = ui[k]; - - { - system::IFile::success_t success; - context.writeContext.outputFile->write(success, a, context.fileOffset, _cpa); - context.fileOffset += success.getBytesProcessed(); - } - } - else if (bytesPerCh == 2u) - { - uint16_t a[4]; - for (uint32_t k = 0u; k < _cpa; ++k) - a[k] = ui[k]; - - { - system::IFile::success_t success; - context.writeContext.outputFile->write(success, a, context.fileOffset, 2 * _cpa); - context.fileOffset += success.getBytesProcessed(); - } - } - else if (bytesPerCh == 4u) - { - { - system::IFile::success_t success; - context.writeContext.outputFile->write(success, ui, context.fileOffset, 4 * _cpa); - context.fileOffset += success.getBytesProcessed(); - } - } - } - else - { - _mbuf->getAttribute(f, _vaid, _ix); - if (flipAttribute) - f[0] = -f[0]; - - { - system::IFile::success_t success; - context.writeContext.outputFile->write(success, f.pointer, context.fileOffset, 4 * _cpa); - context.fileOffset += success.getBytesProcessed(); - } - } -} - -core::smart_refctd_ptr CPLYMeshWriter::createCopyMBuffNormalizedReplacedWithTrueInt(const asset::ICPUMeshBuffer* _mbuf) +struct Parse { - auto mbCopy = core::smart_refctd_ptr_static_cast(_mbuf->clone(2)); - - for (size_t i = 0; i < ICPUMeshBuffer::MAX_VERTEX_ATTRIB_COUNT; ++i) - { - auto vaid = i; - asset::E_FORMAT t = _mbuf->getAttribFormat(vaid); - - if (_mbuf->getAttribBoundBuffer(vaid).buffer) - mbCopy->getPipeline()->getCachedCreationParams().vertexInput.attributes[vaid].format = asset::isNormalizedFormat(t) ? impl::getCorrespondingIntegerFormat(t) : t; - } - - return mbCopy; + enum class ScalarType : uint8_t { Int8, UInt8, Int16, UInt16, Int32, UInt32, Float32, Float64 }; + using SemanticDecode = SGeometryViewDecode::Prepared; + using StoredDecode = SGeometryViewDecode::Prepared; + struct ScalarMeta { const char* name = "float32"; uint32_t byteSize = sizeof(float); bool integer = false; bool signedType = true; }; + struct ExtraAuxView { const ICPUPolygonGeometry::SDataView* view = nullptr; uint32_t components = 0u; uint32_t auxIndex = 0u; ScalarType scalarType = ScalarType::Float32; }; + struct WriteInput { const ICPUPolygonGeometry* geom = nullptr; ScalarType positionScalarType = ScalarType::Float32; const ICPUPolygonGeometry::SDataView* uvView = nullptr; ScalarType uvScalarType = ScalarType::Float32; const core::vector* extraAuxViews = nullptr; bool writeNormals = false; ScalarType normalScalarType = ScalarType::Float32; size_t vertexCount = 0ull; const uint32_t* indices = nullptr; size_t faceCount = 0ull; bool write16BitIndices = false; bool flipVectors = false; }; + static constexpr size_t ApproxTextBytesPerVertex = sizeof("0.000000 0.000000 0.000000 0.000000 0.000000 0.000000\n") - 1ull; + static constexpr size_t ApproxTextBytesPerFace = sizeof("3 4294967295 4294967295 4294967295\n") - 1ull; + static constexpr size_t MaxFloatTextChars = std::numeric_limits::max_digits10 + 16ull; + template + static void appendIntegral(std::string& out, const T value) { std::array buf = {}; const auto res = std::to_chars(buf.data(), buf.data() + buf.size(), value); if (res.ec == std::errc()) out.append(buf.data(), static_cast(res.ptr - buf.data())); } + static void appendFloat(std::string& out, double value) + { + const size_t oldSize = out.size(); + out.resize(oldSize + MaxFloatTextChars); + char* const begin = out.data() + oldSize; + char* const end = begin + MaxFloatTextChars; + char* const cursor = SGeometryWriterCommon::appendFloatToBuffer(begin, end, value); + out.resize(oldSize + static_cast(cursor - begin)); + } + static ScalarMeta getScalarMeta(const ScalarType type) + { + switch (type) + { + case ScalarType::Int8: return {"int8", sizeof(int8_t), true, true}; + case ScalarType::UInt8: return {"uint8", sizeof(uint8_t), true, false}; + case ScalarType::Int16: return {"int16", sizeof(int16_t), true, true}; + case ScalarType::UInt16: return {"uint16", sizeof(uint16_t), true, false}; + case ScalarType::Int32: return {"int32", sizeof(int32_t), true, true}; + case ScalarType::UInt32: return {"uint32", sizeof(uint32_t), true, false}; + case ScalarType::Float64: return {"float64", sizeof(double), false, true}; + default: return {"float32", sizeof(float), false, true}; + } + } + struct PreparedView + { + const ICPUPolygonGeometry::SDataView* view = nullptr; + uint32_t componentCount = 0u; + ScalarType scalarType = ScalarType::Float32; + bool flipVectors = false; + SemanticDecode semantic = {}; + StoredDecode stored = {}; + static inline PreparedView create(const ICPUPolygonGeometry::SDataView& view, const uint32_t componentCount, const ScalarType scalarType, const bool flipVectors) + { + PreparedView retval = {.view = &view, .componentCount = componentCount, .scalarType = scalarType, .flipVectors = flipVectors}; + const auto meta = getScalarMeta(scalarType); + if (meta.integer) + retval.stored = SGeometryViewDecode::prepare(view); + else + retval.semantic = SGeometryViewDecode::prepare(view); + return retval; + } + }; + static bool isSupportedScalarFormat(const E_FORMAT format) + { + if (format == EF_UNKNOWN) + return false; + const uint32_t channels = getFormatChannelCount(format); + if (channels == 0u) + return false; + if (!(isIntegerFormat(format) || isFloatingPointFormat(format) || isNormalizedFormat(format) || isScaledFormat(format))) + return false; + const auto bytesPerPixel = getBytesPerPixel(format); + if (bytesPerPixel.getDenominator() != 1u) + return false; + const uint32_t pixelBytes = bytesPerPixel.getNumerator(); + if (pixelBytes == 0u || (pixelBytes % channels) != 0u) + return false; + const uint32_t bytesPerChannel = pixelBytes / channels; + return bytesPerChannel == 1u || bytesPerChannel == 2u || bytesPerChannel == 4u || bytesPerChannel == 8u; + } + static ScalarType selectScalarType(const E_FORMAT format) + { + if (!isSupportedScalarFormat(format)) + return ScalarType::Float32; + if (isNormalizedFormat(format) || isScaledFormat(format)) + return ScalarType::Float32; + const uint32_t channels = getFormatChannelCount(format); + if (channels == 0u) + { + assert(format == EF_UNKNOWN); + return ScalarType::Float32; + } + const auto bytesPerPixel = getBytesPerPixel(format); + if (bytesPerPixel.getDenominator() != 1u) + return ScalarType::Float32; + const uint32_t pixelBytes = bytesPerPixel.getNumerator(); + if (pixelBytes == 0u || (pixelBytes % channels) != 0u) + return ScalarType::Float32; + const uint32_t bytesPerChannel = pixelBytes / channels; + if (isIntegerFormat(format)) + { + const bool signedType = isSignedFormat(format); + switch (bytesPerChannel) + { + case 1u: return signedType ? ScalarType::Int8 : ScalarType::UInt8; + case 2u: return signedType ? ScalarType::Int16 : ScalarType::UInt16; + case 4u: return signedType ? ScalarType::Int32 : ScalarType::UInt32; + default: return ScalarType::Float64; + } + } + if (isFloatingPointFormat(format)) + return bytesPerChannel >= 8u ? ScalarType::Float64 : ScalarType::Float32; + return ScalarType::Float32; + } + static bool isDirectScalarFormat(const E_FORMAT format, const ScalarType scalarType, const uint32_t componentCount, uint32_t& outByteSize) + { + outByteSize = 0u; + if (format == EF_UNKNOWN || componentCount == 0u) + return false; + if (isNormalizedFormat(format) || isScaledFormat(format)) + return false; + const uint32_t channels = getFormatChannelCount(format); + if (channels < componentCount) + return false; + const auto bytesPerPixel = getBytesPerPixel(format); + if (bytesPerPixel.getDenominator() != 1u) + return false; + const uint32_t pixelBytes = bytesPerPixel.getNumerator(); + if (pixelBytes == 0u || (pixelBytes % channels) != 0u) + return false; + const uint32_t byteSize = pixelBytes / channels; + const auto meta = getScalarMeta(scalarType); + if (byteSize != meta.byteSize) + return false; + switch (scalarType) + { + case ScalarType::Float32: + case ScalarType::Float64: + if (!isFloatingPointFormat(format)) + return false; + break; + case ScalarType::Int8: + case ScalarType::Int16: + case ScalarType::Int32: + if (!isIntegerFormat(format) || !isSignedFormat(format)) + return false; + break; + case ScalarType::UInt8: + case ScalarType::UInt16: + case ScalarType::UInt32: + if (!isIntegerFormat(format) || isSignedFormat(format)) + return false; + break; + } + outByteSize = byteSize; + return true; + } + static bool writeDirectBinaryView(const ICPUPolygonGeometry::SDataView& view, const size_t ix, const uint32_t componentCount, const ScalarType scalarType, const bool flipVectors, uint8_t*& dst) + { + if (flipVectors || !dst || !view.composed.isFormatted()) + return false; + uint32_t byteSize = 0u; + if (!isDirectScalarFormat(view.composed.format, scalarType, componentCount, byteSize)) + return false; + const uint32_t pixelBytes = getBytesPerPixel(view.composed.format).getNumerator(); + if (view.composed.getStride() != pixelBytes) + return false; + const void* src = view.getPointer(ix); + if (!src) + return false; + const size_t copyBytes = static_cast(componentCount) * byteSize; + std::memcpy(dst, src, copyBytes); + dst += copyBytes; + return true; + } + static bool writeTypedViewBinary(const PreparedView& prepared, const size_t ix, uint8_t*& dst) + { + if (!prepared.view || !dst) + return false; + const auto& view = *prepared.view; + const auto componentCount = prepared.componentCount; + const auto scalarType = prepared.scalarType; + const auto flipVectors = prepared.flipVectors; + if (!dst) + return false; + if (writeDirectBinaryView(view, ix, componentCount, scalarType, flipVectors, dst)) + return true; + switch (scalarType) + { + case ScalarType::Float64: + case ScalarType::Float32: + { + std::array tmp = {}; + if (!prepared.semantic.decode(ix, tmp)) + return false; + for (uint32_t c = 0u; c < componentCount; ++c) + { + double value = tmp[c]; + if (flipVectors && c == 0u) + value = -value; + if (scalarType == ScalarType::Float64) + { + std::memcpy(dst, &value, sizeof(value)); + dst += sizeof(value); + } + else + { + const float typed = static_cast(value); + std::memcpy(dst, &typed, sizeof(typed)); + dst += sizeof(typed); + } + } + return true; + } + case ScalarType::Int8: + case ScalarType::Int16: + case ScalarType::Int32: + { + std::array tmp = {}; + if (!prepared.stored.decode(ix, tmp)) + return false; + for (uint32_t c = 0u; c < componentCount; ++c) + { + int64_t value = tmp[c]; + if (flipVectors && c == 0u) + value = -value; + switch (scalarType) + { + case ScalarType::Int8: + { + const int8_t typed = static_cast(value); + std::memcpy(dst, &typed, sizeof(typed)); + dst += sizeof(typed); + } break; + case ScalarType::Int16: + { + const int16_t typed = static_cast(value); + std::memcpy(dst, &typed, sizeof(typed)); + dst += sizeof(typed); + } break; + default: + { + const int32_t typed = static_cast(value); + std::memcpy(dst, &typed, sizeof(typed)); + dst += sizeof(typed); + } break; + } + } + return true; + } + case ScalarType::UInt8: + case ScalarType::UInt16: + case ScalarType::UInt32: + { + std::array tmp = {}; + if (!prepared.stored.decode(ix, tmp)) + return false; + for (uint32_t c = 0u; c < componentCount; ++c) + { + switch (scalarType) + { + case ScalarType::UInt8: + { + const uint8_t typed = static_cast(tmp[c]); + std::memcpy(dst, &typed, sizeof(typed)); + dst += sizeof(typed); + } break; + case ScalarType::UInt16: + { + const uint16_t typed = static_cast(tmp[c]); + std::memcpy(dst, &typed, sizeof(typed)); + dst += sizeof(typed); + } break; + default: + { + const uint32_t typed = static_cast(tmp[c]); + std::memcpy(dst, &typed, sizeof(typed)); + dst += sizeof(typed); + } break; + } + } + return true; + } + } + return false; + } + static bool writeTypedViewText(std::string& output, const PreparedView& prepared, const size_t ix) + { + if (!prepared.view) + return false; + const auto componentCount = prepared.componentCount; + const auto scalarType = prepared.scalarType; + const auto flipVectors = prepared.flipVectors; + switch (scalarType) + { + case ScalarType::Float64: + case ScalarType::Float32: + { + std::array tmp = {}; + if (!prepared.semantic.decode(ix, tmp)) + return false; + for (uint32_t c = 0u; c < componentCount; ++c) + { + double value = tmp[c]; + if (flipVectors && c == 0u) + value = -value; + appendFloat(output, value); + output.push_back(' '); + } + return true; + } + case ScalarType::Int8: + case ScalarType::Int16: + case ScalarType::Int32: + { + std::array tmp = {}; + if (!prepared.stored.decode(ix, tmp)) + return false; + for (uint32_t c = 0u; c < componentCount; ++c) + { + int64_t value = tmp[c]; + if (flipVectors && c == 0u) + value = -value; + appendIntegral(output, value); + output.push_back(' '); + } + return true; + } + case ScalarType::UInt8: + case ScalarType::UInt16: + case ScalarType::UInt32: + { + std::array tmp = {}; + if (!prepared.stored.decode(ix, tmp)) + return false; + for (uint32_t c = 0u; c < componentCount; ++c) + { + appendIntegral(output, tmp[c]); + output.push_back(' '); + } + return true; + } + } + return false; + } + static bool writeBinaryFast(const WriteInput& input, uint8_t*& dst) + { + if (!input.geom || !input.indices || !input.extraAuxViews || !dst || input.flipVectors || input.writeNormals || input.uvView || !input.extraAuxViews->empty() || input.positionScalarType != ScalarType::Float32) + return false; + const auto& positionView = input.geom->getPositionView(); + if (!positionView.composed.isFormatted() || positionView.composed.format != EF_R32G32B32_SFLOAT || positionView.composed.getStride() != sizeof(hlsl::float32_t3)) + return false; + const void* src = positionView.getPointer(); + if (!src) + return false; + const size_t vertexBytes = input.vertexCount * sizeof(hlsl::float32_t3); + std::memcpy(dst, src, vertexBytes); + dst += vertexBytes; + for (size_t i = 0u; i < input.faceCount; ++i) + { + *dst++ = 3u; + const uint32_t* tri = input.indices + i * 3u; + if (input.write16BitIndices) + { + const uint16_t tri16[3] = {static_cast(tri[0]), static_cast(tri[1]), static_cast(tri[2])}; + std::memcpy(dst, tri16, sizeof(tri16)); + dst += sizeof(tri16); + } + else + { + std::memcpy(dst, tri, sizeof(uint32_t) * 3u); + dst += sizeof(uint32_t) * 3u; + } + } + return true; + } + static bool writeBinary(const WriteInput& input, uint8_t* dst) + { + if (!input.geom || !input.extraAuxViews || !dst) + return false; + if (writeBinaryFast(input, dst)) + return true; + const auto& positionView = input.geom->getPositionView(); + const auto& normalView = input.geom->getNormalView(); + const auto& extraAuxViews = *input.extraAuxViews; + const PreparedView preparedPosition = PreparedView::create(positionView, 3u, input.positionScalarType, input.flipVectors); + const PreparedView preparedNormal = input.writeNormals ? PreparedView::create(normalView, 3u, input.normalScalarType, input.flipVectors) : PreparedView{}; + const PreparedView preparedUV = input.uvView ? PreparedView::create(*input.uvView, 2u, input.uvScalarType, false) : PreparedView{}; + core::vector preparedExtraAuxViews; + preparedExtraAuxViews.reserve(extraAuxViews.size()); + for (const auto& extra : extraAuxViews) + { + if (!extra.view) + return false; + preparedExtraAuxViews.push_back(PreparedView::create(*extra.view, extra.components, extra.scalarType, false)); + } + for (size_t i = 0u; i < input.vertexCount; ++i) + { + if (!writeTypedViewBinary(preparedPosition, i, dst)) + return false; + if (input.writeNormals && !writeTypedViewBinary(preparedNormal, i, dst)) + return false; + if (input.uvView && !writeTypedViewBinary(preparedUV, i, dst)) + return false; + for (const auto& extra : preparedExtraAuxViews) + if (!writeTypedViewBinary(extra, i, dst)) + return false; + } + if (!input.indices) + return false; + for (size_t i = 0u; i < input.faceCount; ++i) + { + const uint8_t listSize = 3u; + *dst++ = listSize; + const uint32_t* tri = input.indices + i * 3u; + if (input.write16BitIndices) + { + const uint16_t tri16[3] = {static_cast(tri[0]), static_cast(tri[1]), static_cast(tri[2])}; + std::memcpy(dst, tri16, sizeof(tri16)); + dst += sizeof(tri16); + } + else + { + std::memcpy(dst, tri, sizeof(uint32_t) * 3u); + dst += sizeof(uint32_t) * 3u; + } + } + return true; + } + static bool writeText(const WriteInput& input, std::string& output) + { + if (!input.geom || !input.extraAuxViews) + return false; + const auto& extraAuxViews = *input.extraAuxViews; + const PreparedView preparedPosition = PreparedView::create(input.geom->getPositionView(), 3u, input.positionScalarType, input.flipVectors); + const PreparedView preparedNormal = input.writeNormals ? PreparedView::create(input.geom->getNormalView(), 3u, input.normalScalarType, input.flipVectors) : PreparedView{}; + const PreparedView preparedUV = input.uvView ? PreparedView::create(*input.uvView, 2u, input.uvScalarType, false) : PreparedView{}; + core::vector preparedExtraAuxViews; + preparedExtraAuxViews.reserve(extraAuxViews.size()); + for (const auto& extra : extraAuxViews) + { + if (!extra.view) + return false; + preparedExtraAuxViews.push_back(PreparedView::create(*extra.view, extra.components, extra.scalarType, false)); + } + for (size_t i = 0u; i < input.vertexCount; ++i) + { + if (!writeTypedViewText(output, preparedPosition, i)) + return false; + if (input.writeNormals && !writeTypedViewText(output, preparedNormal, i)) + return false; + if (input.uvView && !writeTypedViewText(output, preparedUV, i)) + return false; + for (const auto& extra : preparedExtraAuxViews) + if (!writeTypedViewText(output, extra, i)) + return false; + output.push_back('\n'); + } + if (!input.indices) + return false; + for (size_t i = 0u; i < input.faceCount; ++i) + { + const uint32_t* tri = input.indices + i * 3u; + output.append("3 "); + appendIntegral(output, tri[0]); + output.push_back(' '); + appendIntegral(output, tri[1]); + output.push_back(' '); + appendIntegral(output, tri[2]); + output.push_back('\n'); + } + return true; + } +}; } - -std::string CPLYMeshWriter::getTypeString(asset::E_FORMAT _t) +bool CPLYMeshWriter::writeAsset(system::IFile* _file, const SAssetWriteParams& _params, IAssetWriterOverride* _override) { - using namespace asset; - - if (isFloatingPointFormat(_t)) - return "float"; - - switch (_t) - { - case EF_R8_SNORM: - case EF_R8_SINT: - case EF_R8_SSCALED: - case EF_R8G8_SNORM: - case EF_R8G8_SINT: - case EF_R8G8_SSCALED: - case EF_R8G8B8_SNORM: - case EF_R8G8B8_SINT: - case EF_R8G8B8_SSCALED: - case EF_R8G8B8A8_SNORM: - case EF_R8G8B8A8_SINT: - case EF_R8G8B8A8_SSCALED: - case EF_B8G8R8A8_UNORM: - case EF_A2B10G10R10_SNORM_PACK32: - case EF_A2B10G10R10_SINT_PACK32: - case EF_A2B10G10R10_SSCALED_PACK32: - case EF_A2R10G10B10_SNORM_PACK32: - return "char"; - - case EF_R8_UNORM: - case EF_R8_UINT: - case EF_R8_USCALED: - case EF_R8G8_UNORM: - case EF_R8G8_UINT: - case EF_R8G8_USCALED: - case EF_R8G8B8_UNORM: - case EF_R8G8B8_UINT: - case EF_R8G8B8_USCALED: - case EF_R8G8B8A8_UNORM: - case EF_R8G8B8A8_UINT: - case EF_R8G8B8A8_USCALED: - case EF_A2R10G10B10_UNORM_PACK32: - case EF_A2B10G10R10_UNORM_PACK32: - case EF_A2B10G10R10_UINT_PACK32: - case EF_A2B10G10R10_USCALED_PACK32: - return "uchar"; - - case EF_R16_UNORM: - case EF_R16_UINT: - case EF_R16_USCALED: - case EF_R16G16_UNORM: - case EF_R16G16_UINT: - case EF_R16G16_USCALED: - case EF_R16G16B16_UNORM: - case EF_R16G16B16_UINT: - case EF_R16G16B16_USCALED: - case EF_R16G16B16A16_UNORM: - case EF_R16G16B16A16_UINT: - case EF_R16G16B16A16_USCALED: - return "ushort"; - - case EF_R16_SNORM: - case EF_R16_SINT: - case EF_R16_SSCALED: - case EF_R16G16_SNORM: - case EF_R16G16_SINT: - case EF_R16G16_SSCALED: - case EF_R16G16B16_SNORM: - case EF_R16G16B16_SINT: - case EF_R16G16B16_SSCALED: - case EF_R16G16B16A16_SNORM: - case EF_R16G16B16A16_SINT: - case EF_R16G16B16A16_SSCALED: - return "short"; - - case EF_R32_UINT: - case EF_R32G32_UINT: - case EF_R32G32B32_UINT: - case EF_R32G32B32A32_UINT: - return "uint"; - - case EF_R32_SINT: - case EF_R32G32_SINT: - case EF_R32G32B32_SINT: - case EF_R32G32B32A32_SINT: - return "int"; - - default: - return ""; - } + using ScalarType = Parse::ScalarType; + using clock_t = std::chrono::high_resolution_clock; + SFileWriteTelemetry ioTelemetry = {}; + if (!_override) + getDefaultOverride(_override); + if (!_file || !_params.rootAsset) + return _params.logger.log("PLY writer: missing output file or root asset.", system::ILogger::ELL_ERROR), false; + const auto items = SGeometryWriterCommon::collectPolygonGeometryWriteItems(_params.rootAsset); + if (items.size() != 1u) + return _params.logger.log("PLY writer: expected exactly one polygon geometry to write.", system::ILogger::ELL_ERROR), false; + const auto& item = items.front(); + const auto* geom = item.geometry; + if (!geom || !geom->valid()) + return _params.logger.log("PLY writer: root asset is not a valid polygon geometry.", system::ILogger::ELL_ERROR), false; + if (!SGeometryWriterCommon::isIdentityTransform(item.transform)) + return _params.logger.log("PLY writer: transformed scene or collection export is not supported.", system::ILogger::ELL_ERROR), false; + SAssetWriteContext ctx = {_params, _file}; + system::IFile* file = _override->getOutputFile(_file, ctx, {geom, 0u}); + if (!file) + return _params.logger.log("PLY writer: output override returned null file.", system::ILogger::ELL_ERROR), false; + const auto& positionView = geom->getPositionView(); + const auto& normalView = geom->getNormalView(); + const size_t vertexCount = positionView.getElementCount(); + if (vertexCount == 0ull) + return _params.logger.log("PLY writer: empty position view.", system::ILogger::ELL_ERROR), false; + const bool writeNormals = static_cast(normalView); + if (writeNormals && normalView.getElementCount() != vertexCount) + return _params.logger.log("PLY writer: normal vertex count mismatch.", system::ILogger::ELL_ERROR), false; + const ICPUPolygonGeometry::SDataView* uvView = SGeometryWriterCommon::getAuxViewAt(geom, SPLYPolygonGeometryAuxLayout::UV0, vertexCount); + if (uvView && getFormatChannelCount(uvView->composed.format) != 2u) + uvView = nullptr; + core::vector extraAuxViews; + const auto& auxViews = geom->getAuxAttributeViews(); + extraAuxViews.reserve(auxViews.size()); + for (uint32_t auxIx = 0u; auxIx < static_cast(auxViews.size()); ++auxIx) + { + const auto& view = auxViews[auxIx]; + if (!view || (uvView && auxIx == SPLYPolygonGeometryAuxLayout::UV0)) + continue; + if (view.getElementCount() != vertexCount) + continue; + const uint32_t channels = getFormatChannelCount(view.composed.format); + if (channels == 0u) + continue; + const uint32_t components = std::min(4u, channels); + extraAuxViews.push_back({&view, components, auxIx, Parse::selectScalarType(view.composed.format)}); + } + _params.logger.log("PLY writer input: file=%s pos_fmt=%u pos_stride=%u pos_count=%llu normal_fmt=%u normal_stride=%u normal_count=%llu uv_fmt=%u uv_stride=%u uv_count=%llu aux=%u", + system::ILogger::ELL_INFO, file->getFileName().string().c_str(), static_cast(positionView.composed.format), positionView.composed.getStride(), + static_cast(positionView.getElementCount()), static_cast(normalView.composed.format), normalView.composed.getStride(), + static_cast(normalView.getElementCount()), uvView ? static_cast(uvView->composed.format) : static_cast(EF_UNKNOWN), + uvView ? uvView->composed.getStride() : 0u, uvView ? static_cast(uvView->getElementCount()) : 0ull, static_cast(extraAuxViews.size())); + const auto* indexing = geom->getIndexingCallback(); + if (!indexing) + return _params.logger.log("PLY writer: missing indexing callback.", system::ILogger::ELL_ERROR), false; + if (indexing->knownTopology() != E_PRIMITIVE_TOPOLOGY::EPT_TRIANGLE_LIST) + return _params.logger.log("PLY writer: only triangle-list topology is supported.", system::ILogger::ELL_ERROR), false; + const auto& indexView = geom->getIndexView(); + core::vector indexData; + const uint32_t* indices = nullptr; + size_t faceCount = 0ull; + if (indexView) + { + const size_t indexCount = indexView.getElementCount(); + if ((indexCount % 3u) != 0u) + return _params.logger.log("PLY writer: failed to validate triangle indexing.", system::ILogger::ELL_ERROR), false; + const void* src = indexView.getPointer(); + if (!src) + return _params.logger.log("PLY writer: missing index buffer pointer.", system::ILogger::ELL_ERROR), false; + if (indexView.composed.format == EF_R32_UINT && indexView.composed.getStride() == sizeof(uint32_t)) + indices = reinterpret_cast(src); + else if (indexView.composed.format == EF_R16_UINT && indexView.composed.getStride() == sizeof(uint16_t)) + { + const auto* src16 = reinterpret_cast(src); + indexData.resize(indexCount); + for (size_t i = 0u; i < indexCount; ++i) + indexData[i] = src16[i]; + indices = indexData.data(); + } + else + { + indexData.resize(indexCount); + for (size_t i = 0u; i < indexCount; ++i) + { + hlsl::uint32_t4 decoded = {}; + if (!indexView.decodeElement(i, decoded)) + return _params.logger.log("PLY writer: failed to decode index view.", system::ILogger::ELL_ERROR), false; + indexData[i] = decoded.x; + } + indices = indexData.data(); + } + faceCount = indexCount / 3u; + } + else + { + if ((vertexCount % 3u) != 0u) + return _params.logger.log("PLY writer: failed to derive triangle indexing from positions.", system::ILogger::ELL_ERROR), false; + indexData.resize(vertexCount); + for (size_t i = 0u; i < vertexCount; ++i) + indexData[i] = static_cast(i); + indices = indexData.data(); + faceCount = vertexCount / 3u; + } + const auto flags = _override->getAssetWritingFlags(ctx, geom, 0u); + const bool binary = flags.hasAnyFlag(E_WRITER_FLAGS::EWF_BINARY); + const bool flipVectors = !flags.hasAnyFlag(E_WRITER_FLAGS::EWF_MESH_IS_RIGHT_HANDED); + const bool write16BitIndices = vertexCount <= static_cast(std::numeric_limits::max()) + 1ull; + ScalarType positionScalarType = Parse::selectScalarType(positionView.composed.format); + if (flipVectors && Parse::getScalarMeta(positionScalarType).integer && !Parse::getScalarMeta(positionScalarType).signedType) + positionScalarType = ScalarType::Float32; + ScalarType normalScalarType = Parse::selectScalarType(normalView.composed.format); + if (flipVectors && Parse::getScalarMeta(normalScalarType).integer && !Parse::getScalarMeta(normalScalarType).signedType) + normalScalarType = ScalarType::Float32; + const ScalarType uvScalarType = uvView ? Parse::selectScalarType(uvView->composed.format) : ScalarType::Float32; + const auto positionMeta = Parse::getScalarMeta(positionScalarType); + const auto normalMeta = Parse::getScalarMeta(normalScalarType); + const auto uvMeta = Parse::getScalarMeta(uvScalarType); + size_t extraAuxBytesPerVertex = 0ull; + for (const auto& extra : extraAuxViews) + extraAuxBytesPerVertex += static_cast(extra.components) * Parse::getScalarMeta(extra.scalarType).byteSize; + std::ostringstream headerBuilder; + headerBuilder << "ply\n"; + headerBuilder << (binary ? "format binary_little_endian 1.0" : "format ascii 1.0"); + headerBuilder << "\ncomment Nabla " << NABLA_SDK_VERSION; + headerBuilder << "\nelement vertex " << vertexCount << "\n"; + headerBuilder << "property " << positionMeta.name << " x\n"; + headerBuilder << "property " << positionMeta.name << " y\n"; + headerBuilder << "property " << positionMeta.name << " z\n"; + if (writeNormals) + { + headerBuilder << "property " << normalMeta.name << " nx\n"; + headerBuilder << "property " << normalMeta.name << " ny\n"; + headerBuilder << "property " << normalMeta.name << " nz\n"; + } + if (uvView) + { + headerBuilder << "property " << uvMeta.name << " u\n"; + headerBuilder << "property " << uvMeta.name << " v\n"; + } + for (const auto& extra : extraAuxViews) + { + const auto extraMeta = Parse::getScalarMeta(extra.scalarType); + for (uint32_t component = 0u; component < extra.components; ++component) + { + headerBuilder << "property " << extraMeta.name << " aux" << extra.auxIndex; + if (extra.components > 1u) + headerBuilder << "_" << component; + headerBuilder << "\n"; + } + } + headerBuilder << "element face " << faceCount; + headerBuilder << (write16BitIndices ? "\nproperty list uchar uint16 vertex_indices\n" : "\nproperty list uchar uint32 vertex_indices\n"); + headerBuilder << "end_header\n"; + const std::string header = headerBuilder.str(); + const Parse::WriteInput input = {.geom = geom, .positionScalarType = positionScalarType, .uvView = uvView, .uvScalarType = uvScalarType, .extraAuxViews = &extraAuxViews, .writeNormals = writeNormals, .normalScalarType = normalScalarType, .vertexCount = vertexCount, .indices = indices, .faceCount = faceCount, .write16BitIndices = write16BitIndices, .flipVectors = flipVectors}; + bool writeOk = false; + size_t outputBytes = 0ull; + double writeIoMs = 0.0; + auto writePayload = [&](const void* bodyData, const size_t bodySize) -> bool { + const size_t outputSize = header.size() + bodySize; + const auto ioPlan = impl::SFileAccess::resolvePlan(_params.ioPolicy, static_cast(outputSize), true, file); + if (impl::SFileAccess::logInvalidPlan(_params.logger, "PLY writer", file->getFileName().string().c_str(), ioPlan)) + return false; + outputBytes = outputSize; + const SInterchangeIO::SBufferRange writeBuffers[] = {{.data = header.data(), .byteCount = header.size()}, {.data = bodyData, .byteCount = bodySize}}; + const auto ioStart = clock_t::now(); + writeOk = SInterchangeIO::writeBuffersWithPolicy(file, ioPlan, writeBuffers, &ioTelemetry); + writeIoMs = std::chrono::duration(clock_t::now() - ioStart).count(); + const uint64_t ioMinWrite = ioTelemetry.getMinOrZero(); + const uint64_t ioAvgWrite = ioTelemetry.getAvgOrZero(); + impl::SFileAccess::logTinyIO(_params.logger, "PLY writer", file->getFileName().string().c_str(), ioTelemetry, static_cast(outputBytes), _params.ioPolicy, "writes"); + _params.logger.log("PLY writer stats: file=%s bytes=%llu vertices=%llu faces=%llu binary=%d io_writes=%llu io_min_write=%llu io_avg_write=%llu io_req=%s io_eff=%s io_chunk=%llu io_reason=%s", + system::ILogger::ELL_PERFORMANCE, file->getFileName().string().c_str(), static_cast(outputBytes), + static_cast(vertexCount), static_cast(faceCount), binary ? 1 : 0, + static_cast(ioTelemetry.callCount), static_cast(ioMinWrite), static_cast(ioAvgWrite), + system::to_string(_params.ioPolicy.strategy).c_str(), system::to_string(ioPlan.strategy).c_str(), static_cast(ioPlan.chunkSizeBytes()), ioPlan.reason); + return writeOk; + }; + if (binary) + { + const size_t vertexStride = static_cast(positionMeta.byteSize) * 3ull + (writeNormals ? static_cast(normalMeta.byteSize) * 3ull : 0ull) + (uvView ? static_cast(uvMeta.byteSize) * 2ull : 0ull) + extraAuxBytesPerVertex; + const size_t faceStride = sizeof(uint8_t) + (write16BitIndices ? sizeof(uint16_t) : sizeof(uint32_t)) * 3u; + const size_t bodySize = vertexCount * vertexStride + faceCount * faceStride; + core::vector body; + const auto fillStart = clock_t::now(); + body.resize(bodySize); + if (!Parse::writeBinary(input, body.data())) + return _params.logger.log("PLY writer: binary payload generation failed.", system::ILogger::ELL_ERROR), false; + const auto fillMs = std::chrono::duration(clock_t::now() - fillStart).count(); + const bool ok = writePayload(body.data(), body.size()); + _params.logger.log("PLY writer stages: file=%s header=%llu body=%llu fill=%.3f ms io=%.3f ms", system::ILogger::ELL_PERFORMANCE, file->getFileName().string().c_str(), static_cast(header.size()), static_cast(body.size()), fillMs, writeIoMs); + return ok; + } + std::string body; + body.reserve(vertexCount * Parse::ApproxTextBytesPerVertex + faceCount * Parse::ApproxTextBytesPerFace); + const auto fillStart = clock_t::now(); + if (!Parse::writeText(input, body)) + return _params.logger.log("PLY writer: text payload generation failed.", system::ILogger::ELL_ERROR), false; + const auto fillMs = std::chrono::duration(clock_t::now() - fillStart).count(); + const bool ok = writePayload(body.data(), body.size()); + _params.logger.log("PLY writer stages: file=%s header=%llu body=%llu fill=%.3f ms io=%.3f ms", system::ILogger::ELL_PERFORMANCE, file->getFileName().string().c_str(), static_cast(header.size()), static_cast(body.size()), fillMs, writeIoMs); + return ok; +} } - -} // end namespace -} // end namespace - #endif // _NBL_COMPILE_WITH_PLY_WRITER_ - diff --git a/src/nbl/asset/interchange/CPLYMeshWriter.h b/src/nbl/asset/interchange/CPLYMeshWriter.h index e709ffa0fe..4adacc4c68 100644 --- a/src/nbl/asset/interchange/CPLYMeshWriter.h +++ b/src/nbl/asset/interchange/CPLYMeshWriter.h @@ -1,79 +1,24 @@ -// Copyright (C) 2019-2025 - DevSH Graphics Programming Sp. z O.O. +// Copyright (C) 2018-2025 - DevSH Graphics Programming Sp. z O.O. // This file is part of the "Nabla Engine" and was originally part of the "Irrlicht Engine" // For conditions of distribution and use, see copyright notice in nabla.h // See the original file in irrlicht source for authors #ifndef _NBL_ASSET_PLY_MESH_WRITER_H_INCLUDED_ #define _NBL_ASSET_PLY_MESH_WRITER_H_INCLUDED_ - - -#include "nbl/asset/ICPUPolygonGeometry.h" #include "nbl/asset/interchange/IGeometryWriter.h" - -#include - - namespace nbl::asset { - -//! class to write PLY mesh files +//! Geometry writer capable of emitting PLY mesh files. class CPLYMeshWriter : public IGeometryWriter { public: CPLYMeshWriter(); - virtual const char** getAssociatedFileExtensions() const - { - static const char* ext[]{ "ply", nullptr }; - return ext; - } - - virtual uint32_t getSupportedFlags() override { return asset::EWF_BINARY; } - - virtual uint32_t getForcedFlags() { return 0u; } - - virtual bool writeAsset(system::IFile* _file, const SAssetWriteParams& _params, IAssetWriterOverride* _override = nullptr) override; + const char** getAssociatedFileExtensions() const override; - private: + writer_flags_t getSupportedFlags() override; + writer_flags_t getForcedFlags() override; - struct SContext - { - SAssetWriteContext writeContext; - size_t fileOffset = 0; - }; - - void writeBinary(const ICPUPolygonGeometry* geom, size_t _vtxCount, size_t _fcCount, asset::E_INDEX_TYPE _idxType, void* const _indices, bool _forceFaces, const bool _vaidToWrite[4], SContext& context) const; - void writeText(const ICPUPolygonGeometry* geom, size_t _vtxCount, size_t _fcCount, asset::E_INDEX_TYPE _idxType, void* const _indices, bool _forceFaces, const bool _vaidToWrite[4], SContext& context) const; - - void writeAttribBinary(SContext& context, ICPUPolygonGeometry* geom, uint32_t _vaid, size_t _ix, size_t _cpa, bool flipAttribute = false) const; - - //! Creates new geometry with the same attribute buffers mapped but with normalized types changed to corresponding true integer types. - static core::smart_refctd_ptr createCopyNormalizedReplacedWithTrueInt(const ICPUPolygonGeometry* geom); - - static std::string getTypeString(asset::E_FORMAT _t); - - template - void writeVectorAsText(SContext& context, const T* _vec, size_t _elementsToWrite, bool flipVectors = false) const - { - constexpr size_t xID = 0u; - std::stringstream ss; - ss << std::fixed; - bool currentFlipOnVariable = false; - for (size_t i = 0u; i < _elementsToWrite; ++i) - { - if (flipVectors && i == xID) - currentFlipOnVariable = true; - else - currentFlipOnVariable = false; - - ss << std::setprecision(6) << _vec[i] * (currentFlipOnVariable ? -1 : 1) << " "; - } - auto str = ss.str(); - - system::IFile::success_t succ; - context.writeContext.outputFile->write(succ, str.c_str(), context.fileOffset, str.size()); - context.fileOffset += succ.getBytesProcessed(); - } + bool writeAsset(system::IFile* _file, const SAssetWriteParams& _params, IAssetWriterOverride* _override = nullptr) override; }; - } // end namespace #endif diff --git a/src/nbl/asset/interchange/CSTLMeshFileLoader.cpp b/src/nbl/asset/interchange/CSTLMeshFileLoader.cpp index d00c37cf10..a92b86f839 100644 --- a/src/nbl/asset/interchange/CSTLMeshFileLoader.cpp +++ b/src/nbl/asset/interchange/CSTLMeshFileLoader.cpp @@ -1,437 +1,629 @@ -// Copyright (C) 2019 - DevSH Graphics Programming Sp. z O.O. +#ifdef _NBL_COMPILE_WITH_STL_LOADER_ +// Copyright (C) 2018-2025 - DevSH Graphics Programming Sp. z O.O. // This file is part of the "Nabla Engine" and was originally part of the "Irrlicht Engine" // For conditions of distribution and use, see copyright notice in nabla.h // See the original file in irrlicht source for authors - #include "CSTLMeshFileLoader.h" - -#ifdef _NBL_COMPILE_WITH_STL_LOADER_ - +#include "impl/SFileAccess.h" +#include "impl/STextParse.h" #include "nbl/asset/asset.h" - -#include "nbl/asset/IAssetManager.h" - -#include "nbl/system/ISystem.h" +#include "nbl/asset/format/convertColor.h" +#include "nbl/asset/interchange/SGeometryContentHash.h" +#include "nbl/asset/interchange/SGeometryLoaderCommon.h" +#include "nbl/asset/interchange/SSTLPolygonGeometryAuxLayout.h" +#include "nbl/asset/interchange/SInterchangeIO.h" +#include "nbl/asset/interchange/SLoaderRuntimeTuning.h" +#include "nbl/asset/metadata/CSTLMetadata.h" +#include "nbl/asset/utils/CPolygonGeometryManipulator.h" +#include "nbl/asset/utils/SGeometryNormalCommon.h" +#include "nbl/builtin/hlsl/shapes/AABBAccumulator.hlsl" +#include "nbl/core/hash/blake.h" #include "nbl/system/IFile.h" - -using namespace nbl; -using namespace nbl::asset; - -constexpr auto POSITION_ATTRIBUTE = 0; -constexpr auto COLOR_ATTRIBUTE = 1; -constexpr auto UV_ATTRIBUTE = 2; -constexpr auto NORMAL_ATTRIBUTE = 3; - -CSTLMeshFileLoader::CSTLMeshFileLoader(asset::IAssetManager* _m_assetMgr) - : IRenderpassIndependentPipelineLoader(_m_assetMgr), m_assetMgr(_m_assetMgr) +#include +namespace nbl::asset { - -} - -void CSTLMeshFileLoader::initialize() +namespace +{ +struct Parse { - IRenderpassIndependentPipelineLoader::initialize(); + using Common = impl::TextParse; + struct LayoutProbe { bool hasPrefix = false; bool startsWithSolid = false; bool binaryBySize = false; uint32_t triangleCount = 0u; }; + static hlsl::float32_t3 resolveStoredNormal(const hlsl::float32_t3& fileNormal) { const float fileLen2 = hlsl::dot(fileNormal, fileNormal); return (fileLen2 > 0.f && std::abs(fileLen2 - 1.f) < 1e-4f) ? fileNormal : SGeometryNormalCommon::normalizeOrZero(fileNormal); } + static void pushTriangleReversed(const std::array& p, core::vector& positions) { positions.push_back(p[2u]); positions.push_back(p[1u]); positions.push_back(p[0u]); } + static uint32_t decodeViscamColorToB8G8R8A8(const uint16_t packedColor) { std::array src = {&packedColor}; uint32_t outColor = 0u; convertColor(src.data(), &outColor, 0u, 0u); return outColor; } - auto precomputeAndCachePipeline = [&](bool withColorAttribute) + struct Context { - auto getShaderDefaultPaths = [&]() -> std::pair - { - if (withColorAttribute) - return std::make_pair("nbl/builtin/material/debug/vertex_color/specialized_shader.vert", "nbl/builtin/material/debug/vertex_color/specialized_shader.frag"); - else - return std::make_pair("nbl/builtin/material/debug/vertex_normal/specialized_shader.vert", "nbl/builtin/material/debug/vertex_normal/specialized_shader.frag"); - }; - - auto defaultOverride = IAssetLoaderOverride(m_assetMgr); - const std::string pipelineCacheHash = getPipelineCacheKey(withColorAttribute).data(); - const uint32_t _hierarchyLevel = 0; - const IAssetLoader::SAssetLoadContext fakeContext(IAssetLoader::SAssetLoadParams{}, nullptr); - - const asset::IAsset::E_TYPE types[]{ asset::IAsset::ET_RENDERPASS_INDEPENDENT_PIPELINE, (asset::IAsset::E_TYPE)0u }; - auto pipelineBundle = defaultOverride.findCachedAsset(pipelineCacheHash, types, fakeContext, _hierarchyLevel + ICPURenderpassIndependentPipeline::DESC_SET_HIERARCHYLEVELS_BELOW); - if (pipelineBundle.getContents().empty()) - { - auto mbVertexShader = core::smart_refctd_ptr(); - auto mbFragmentShader = core::smart_refctd_ptr(); - { - const IAsset::E_TYPE types[]{ IAsset::E_TYPE::ET_SPECIALIZED_SHADER, static_cast(0u) }; - const auto shaderPaths = getShaderDefaultPaths(); - - auto vertexShaderBundle = m_assetMgr->findAssets(shaderPaths.first.data(), types); - auto fragmentShaderBundle = m_assetMgr->findAssets(shaderPaths.second.data(), types); - - mbVertexShader = core::smart_refctd_ptr_static_cast(vertexShaderBundle->begin()->getContents().begin()[0]); - mbFragmentShader = core::smart_refctd_ptr_static_cast(fragmentShaderBundle->begin()->getContents().begin()[0]); - } - - auto defaultOverride = IAssetLoaderOverride(m_assetMgr); - - const IAssetLoader::SAssetLoadContext fakeContext(IAssetLoader::SAssetLoadParams{}, nullptr); - auto mbBundlePipelineLayout = defaultOverride.findDefaultAsset("nbl/builtin/pipeline_layout/loader/STL", fakeContext, _hierarchyLevel + ICPURenderpassIndependentPipeline::PIPELINE_LAYOUT_HIERARCHYLEVELS_BELOW); - auto mbPipelineLayout = mbBundlePipelineLayout.first; - - auto const positionFormatByteSize = getTexelOrBlockBytesize(EF_R32G32B32_SFLOAT); - auto const colorFormatByteSize = withColorAttribute ? getTexelOrBlockBytesize(EF_B8G8R8A8_UNORM) : 0; - auto const normalFormatByteSize = getTexelOrBlockBytesize(EF_A2B10G10R10_SNORM_PACK32); - - SVertexInputParams mbInputParams; - const auto stride = positionFormatByteSize + colorFormatByteSize + normalFormatByteSize; - mbInputParams.enabledBindingFlags |= core::createBitmask({ 0 }); - mbInputParams.enabledAttribFlags |= core::createBitmask({ POSITION_ATTRIBUTE, NORMAL_ATTRIBUTE, withColorAttribute ? COLOR_ATTRIBUTE : 0 }); - mbInputParams.bindings[0] = { stride, EVIR_PER_VERTEX }; - - mbInputParams.attributes[POSITION_ATTRIBUTE].format = EF_R32G32B32_SFLOAT; - mbInputParams.attributes[POSITION_ATTRIBUTE].relativeOffset = 0; - mbInputParams.attributes[POSITION_ATTRIBUTE].binding = 0; - - if (withColorAttribute) - { - mbInputParams.attributes[COLOR_ATTRIBUTE].format = EF_R32G32B32_SFLOAT; - mbInputParams.attributes[COLOR_ATTRIBUTE].relativeOffset = positionFormatByteSize; - mbInputParams.attributes[COLOR_ATTRIBUTE].binding = 0; - } - - mbInputParams.attributes[NORMAL_ATTRIBUTE].format = EF_R32G32B32_SFLOAT; - mbInputParams.attributes[NORMAL_ATTRIBUTE].relativeOffset = positionFormatByteSize + colorFormatByteSize; - mbInputParams.attributes[NORMAL_ATTRIBUTE].binding = 0; - - SBlendParams blendParams; - SPrimitiveAssemblyParams primitiveAssemblyParams; - primitiveAssemblyParams.primitiveType = E_PRIMITIVE_TOPOLOGY::EPT_TRIANGLE_LIST; + IAssetLoader::SAssetLoadContext inner; + SFileReadTelemetry ioTelemetry = {}; + static constexpr size_t TextProbeBytes = 6ull; + static constexpr size_t BinaryHeaderBytes = 80ull; + static constexpr size_t TriangleCountBytes = sizeof(uint32_t); + static constexpr size_t BinaryPrefixBytes = BinaryHeaderBytes + TriangleCountBytes; + static constexpr size_t TriangleFloatCount = 12ull; + static constexpr size_t TriangleFloatBytes = sizeof(float) * TriangleFloatCount; + static constexpr size_t TriangleAttributeBytes = sizeof(uint16_t); + static constexpr size_t TriangleRecordBytes = TriangleFloatBytes + TriangleAttributeBytes; + static constexpr size_t VerticesPerTriangle = 3ull; + static constexpr size_t FloatChannelsPerVertex = 3ull; + }; - SRasterizationParams rastarizationParmas; + static bool probeLayout(system::IFile* file, const size_t fileSize, const uint8_t* const wholeFileData, SFileReadTelemetry* const ioTelemetry, LayoutProbe& out) + { + out = {}; + if (!file || fileSize < Context::TextProbeBytes) + return false; - auto mbPipeline = core::make_smart_refctd_ptr(std::move(mbPipelineLayout), nullptr, nullptr, mbInputParams, blendParams, primitiveAssemblyParams, rastarizationParmas); + if (fileSize >= Context::BinaryPrefixBytes) + { + std::array prefix = {}; + out.hasPrefix = wholeFileData ? true : SInterchangeIO::readFileExact(file, prefix.data(), 0ull, Context::BinaryPrefixBytes, ioTelemetry); + if (out.hasPrefix) { - mbPipeline->setShaderAtStage(asset::IShader::ESS_VERTEX, mbVertexShader.get()); - mbPipeline->setShaderAtStage(asset::IShader::ESS_FRAGMENT, mbFragmentShader.get()); + if (wholeFileData) + std::memcpy(prefix.data(), wholeFileData, Context::BinaryPrefixBytes); + out.startsWithSolid = (std::memcmp(prefix.data(), "solid ", Context::TextProbeBytes) == 0); + std::memcpy(&out.triangleCount, prefix.data() + Context::BinaryHeaderBytes, sizeof(out.triangleCount)); + const uint64_t expectedSize = Context::BinaryPrefixBytes + static_cast(out.triangleCount) * Context::TriangleRecordBytes; + out.binaryBySize = (expectedSize == fileSize); + return true; } - - asset::SAssetBundle newPipelineBundle(nullptr, {core::smart_refctd_ptr(mbPipeline)}); - defaultOverride.insertAssetIntoCache(newPipelineBundle, pipelineCacheHash, fakeContext, _hierarchyLevel + ICPURenderpassIndependentPipeline::DESC_SET_HIERARCHYLEVELS_BELOW); } - else - return; - }; - - /* - Pipeline permutations are cached - */ - - precomputeAndCachePipeline(true); - precomputeAndCachePipeline(false); -} - -SAssetBundle CSTLMeshFileLoader::loadAsset(system::IFile* _file, const IAssetLoader::SAssetLoadParams& _params, IAssetLoader::IAssetLoaderOverride* _override, uint32_t _hierarchyLevel) -{ - if (!_file) - return {}; - - SContext context = { - asset::IAssetLoader::SAssetLoadContext{ - _params, - _file - }, - _hierarchyLevel, - _override - }; - - - const size_t filesize = context.inner.mainFile->getSize(); - if (filesize < 6ull) // we need a header - return {}; - - bool hasColor = false; - - auto mesh = core::make_smart_refctd_ptr(); - auto meshbuffer = core::make_smart_refctd_ptr(); - meshbuffer->setPositionAttributeIx(POSITION_ATTRIBUTE); - meshbuffer->setNormalAttributeIx(NORMAL_ATTRIBUTE); - - bool binary = false; - std::string token; - if (getNextToken(&context, token) != "solid") - binary = hasColor = true; - - core::vector positions, normals; - core::vector colors; - if (binary) - { - if (_file->getSize() < 80) - return {}; - - constexpr size_t headerOffset = 80; - context.fileOffset = headerOffset; //! skip header - - uint32_t vertexCount = 0u; - system::IFile::success_t success; - context.inner.mainFile->read(success, &vertexCount, context.fileOffset, sizeof(vertexCount)); - if (!success) - return {}; - context.fileOffset += sizeof(vertexCount); - - positions.reserve(3 * vertexCount); - normals.reserve(vertexCount); - colors.reserve(vertexCount); + char header[Context::TextProbeBytes] = {}; + if (wholeFileData) + std::memcpy(header, wholeFileData, sizeof(header)); + else if (!SInterchangeIO::readFileExact(file, header, 0ull, sizeof(header), ioTelemetry)) + return false; + out.startsWithSolid = (std::strncmp(header, "solid ", Context::TextProbeBytes) == 0); + return true; } - else - goNextLine(&context); // skip header - uint16_t attrib = 0u; - token.reserve(32); - while (context.fileOffset < filesize) // TODO: check it + class AsciiParser { - if (!binary) - { - if (getNextToken(&context, token) != "facet") + public: + inline AsciiParser(const char* begin, const char* end) : m_cursor(begin), m_end(end) {} + inline std::optional readToken() { return Common::readToken(m_cursor, m_end); } + inline std::optional readFloat() { - if (token == "endsolid") - break; - return {}; + Common::skipWhitespace(m_cursor, m_end); + float value = 0.f; + return Common::parseNumber(m_cursor, m_end, value) ? std::optional(value) : std::nullopt; } - if (getNextToken(&context, token) != "normal") + inline std::optional readVec3() { - return {}; + const auto x = readFloat(), y = readFloat(), z = readFloat(); + return x.has_value() && y.has_value() && z.has_value() ? std::optional(hlsl::float32_t3(*x, *y, *z)) : std::nullopt; } - } - - { - core::vectorSIMDf n; - getNextVector(&context, n, binary); - if(_params.loaderFlags & E_LOADER_PARAMETER_FLAGS::ELPF_RIGHT_HANDED_MESHES) - performActionBasedOnOrientationSystem(n.x, [](float& varToFlip) {varToFlip = -varToFlip;}); - normals.push_back(core::normalize(n)); - } + private: + const char* m_cursor = nullptr; + const char* m_end = nullptr; + }; - if (!binary) - { - if (getNextToken(&context, token) != "outer" || getNextToken(&context, token) != "loop") - return {}; - } + class SplitBlockMemoryResource final : public core::refctd_memory_resource + { + public: + inline SplitBlockMemoryResource(core::smart_refctd_ptr&& upstream, void* block, const size_t blockBytes, const size_t alignment) : m_upstream(std::move(upstream)), m_block(block), m_blockBytes(blockBytes), m_alignment(alignment) {} + inline void* allocate(std::size_t, std::size_t) override { assert(false); return nullptr; } - { - core::vectorSIMDf p[3]; - for (uint32_t i = 0u; i < 3u; ++i) + inline void deallocate(void* p, std::size_t bytes, std::size_t) override { - if (!binary) - { - if (getNextToken(&context, token) != "vertex") - return {}; - } - getNextVector(&context, p[i], binary); - if (_params.loaderFlags & E_LOADER_PARAMETER_FLAGS::ELPF_RIGHT_HANDED_MESHES) - performActionBasedOnOrientationSystem(p[i].x, [](float& varToFlip){varToFlip = -varToFlip; }); + const auto* const begin = reinterpret_cast(m_block); + const auto* const end = begin + m_blockBytes; + const auto* const ptr = reinterpret_cast(p); + assert(ptr >= begin && ptr <= end); + assert(ptr + bytes <= end); } - for (uint32_t i = 0u; i < 3u; ++i) // seems like in STL format vertices are ordered in clockwise manner... - positions.push_back(p[2u - i]); - } - if (!binary) - { - if (getNextToken(&context, token) != "endloop" || getNextToken(&context, token) != "endfacet") - return {}; - } - else - { - system::IFile::success_t success; - context.inner.mainFile->read(success, &attrib, context.fileOffset, sizeof(attrib)); - if (!success) - return {}; - context.fileOffset += sizeof(attrib); - } - - if (hasColor && (attrib & 0x8000u)) // assuming VisCam/SolidView non-standard trick to store color in 2 bytes of extra attribute - { - const void* srcColor[1]{ &attrib }; - uint32_t color{}; - convertColor(srcColor, &color, 0u, 0u); - colors.push_back(color); - } - else - { - hasColor = false; - colors.clear(); - } - - if ((normals.back() == core::vectorSIMDf()).all()) - { - normals.back().set( - core::plane3dSIMDf( - *(positions.rbegin() + 2), - *(positions.rbegin() + 1), - *(positions.rbegin() + 0)).getNormal() - ); - } - } // end while (_file->getPos() < filesize) - - const size_t vtxSize = hasColor ? (3 * sizeof(float) + 4 + 4) : (3 * sizeof(float) + 4); - auto vertexBuf = asset::ICPUBuffer::create({ vtxSize * positions.size() }); - - quant_normal_t normal; - for (size_t i = 0u; i < positions.size(); ++i) - { - if (i % 3 == 0) - normal = quantNormalCache->quantize(normals[i / 3]); - uint8_t* ptr = (reinterpret_cast(vertexBuf->getPointer())) + i * vtxSize; - memcpy(ptr, positions[i].pointer, 3 * 4); - - *reinterpret_cast(ptr + 12) = normal; - - if (hasColor) - memcpy(ptr + 16, colors.data() + i / 3, 4); - } - - const IAssetLoader::SAssetLoadContext fakeContext(IAssetLoader::SAssetLoadParams{}, nullptr); - const asset::IAsset::E_TYPE types[]{ asset::IAsset::ET_RENDERPASS_INDEPENDENT_PIPELINE, (asset::IAsset::E_TYPE)0u }; - auto pipelineBundle = _override->findCachedAsset(getPipelineCacheKey(hasColor).data(), types, fakeContext, _hierarchyLevel + ICPURenderpassIndependentPipeline::DESC_SET_HIERARCHYLEVELS_BELOW); - { - bool status = !pipelineBundle.getContents().empty(); - assert(status); - } - - auto mbPipeline = core::smart_refctd_ptr_static_cast(pipelineBundle.getContents().begin()[0]); + protected: + inline ~SplitBlockMemoryResource() override { if (m_upstream && m_block) m_upstream->deallocate(m_block, m_blockBytes, m_alignment); } - auto meta = core::make_smart_refctd_ptr(1u, std::move(m_basicViewParamsSemantics)); - meta->placeMeta(0u, mbPipeline.get()); - - meshbuffer->setPipeline(std::move(mbPipeline)); - meshbuffer->setIndexCount(positions.size()); - meshbuffer->setIndexType(asset::EIT_UNKNOWN); - - meshbuffer->setVertexBufferBinding({ 0ul, vertexBuf }, 0); - mesh->getMeshBufferVector().emplace_back(std::move(meshbuffer)); - - return SAssetBundle(std::move(meta), { std::move(mesh) }); + private: + core::smart_refctd_ptr m_upstream; + void* m_block = nullptr; + size_t m_blockBytes = 0ull; + size_t m_alignment = 1ull; + }; +}; } - -bool CSTLMeshFileLoader::isALoadableFileFormat(system::IFile* _file, const system::logger_opt_ptr logger) const +CSTLMeshFileLoader::CSTLMeshFileLoader(asset::IAssetManager*) { - if (!_file || _file->getSize() <= 6u) - return false; +} - char header[6]; - { - system::IFile::success_t success; - _file->read(success, header, 0, sizeof(header)); - if (!success) - return false; - } +const char** CSTLMeshFileLoader::getAssociatedFileExtensions() const +{ + static const char* ext[] = { "stl", nullptr }; + return ext; +} - if (strncmp(header, "solid ", 6u) == 0) - return true; - else - { - if (_file->getSize() < 84u) - return false; +SAssetBundle CSTLMeshFileLoader::loadAsset(system::IFile* _file, const IAssetLoader::SAssetLoadParams& _params, IAssetLoader::IAssetLoaderOverride* _override [[maybe_unused]], uint32_t _hierarchyLevel [[maybe_unused]]) +{ + using Context = Parse::Context; + using AsciiParser = Parse::AsciiParser; + using SplitBlockMemoryResource = Parse::SplitBlockMemoryResource; - uint32_t triangleCount; + if (!_file) + return {}; - constexpr size_t readOffset = 80; - system::IFile::success_t success; - _file->read(success, &triangleCount, readOffset, sizeof(triangleCount)); - if (!success) - return false; + uint64_t triangleCount = 0u; + const char* parsePath = "unknown"; + const bool computeContentHashes = !_params.loaderFlags.hasAnyFlag(IAssetLoader::ELPF_DONT_COMPUTE_CONTENT_HASHES); + bool hasTriangleColors = false; - constexpr size_t STL_TRI_SZ = 50u; - return _file->getSize() == (STL_TRI_SZ * triangleCount + 84u); - } -} + Context context = {asset::IAssetLoader::SAssetLoadContext{_params, _file}, 0ull}; + const size_t filesize = context.inner.mainFile->getSize(); + if (filesize < Context::TextProbeBytes) + return {}; -//! Read 3d vector of floats -void CSTLMeshFileLoader::getNextVector(SContext* context, core::vectorSIMDf& vec, bool binary) const -{ - if (binary) - { - { - system::IFile::success_t success; - context->inner.mainFile->read(success, &vec.X, context->fileOffset, 4); - context->fileOffset += success.getBytesProcessed(); - } - - { - system::IFile::success_t success; - context->inner.mainFile->read(success, &vec.Y, context->fileOffset, 4); - context->fileOffset += success.getBytesProcessed(); - } + impl::SLoadSession loadSession = {}; + if (!impl::SLoadSession::begin(_params.logger, "STL loader", _file, _params.ioPolicy, static_cast(filesize), true, loadSession)) + return {}; - { - system::IFile::success_t success; - context->inner.mainFile->read(success, &vec.Z, context->fileOffset, 4); - context->fileOffset += success.getBytesProcessed(); - } - } - else + core::vector wholeFilePayload; + const uint8_t* wholeFileData = nullptr; + if (loadSession.isWholeFile()) { - goNextWord(context); - std::string tmp; - - getNextToken(context, tmp); - sscanf(tmp.c_str(), "%f", &vec.X); - getNextToken(context, tmp); - sscanf(tmp.c_str(), "%f", &vec.Y); - getNextToken(context, tmp); - sscanf(tmp.c_str(), "%f", &vec.Z); + wholeFileData = loadSession.mapOrReadWholeFile(wholeFilePayload, &context.ioTelemetry); + if (!wholeFileData) + return {}; } - vec.X = -vec.X; -} -//! Read next word -const std::string& CSTLMeshFileLoader::getNextToken(SContext* context, std::string& token) const -{ - goNextWord(context); - char c; - token = ""; + Parse::LayoutProbe layout = {}; + if (!Parse::probeLayout(context.inner.mainFile, filesize, wholeFileData, &context.ioTelemetry, layout)) + return {}; + const bool binary = layout.binaryBySize || !layout.startsWithSolid; + const bool hasBinaryTriCountFromDetect = layout.hasPrefix; + const uint32_t binaryTriCountFromDetect = layout.triangleCount; + + auto geometry = core::make_smart_refctd_ptr(); + geometry->setIndexing(IPolygonGeometryBase::TriangleList()); + hlsl::shapes::util::AABBAccumulator3 parsedAABB = hlsl::shapes::util::createAABBAccumulator(); + uint64_t vertexCount = 0ull; + + if (binary) { + parsePath = "binary_fast"; + if (filesize < Context::BinaryPrefixBytes) + return {}; - while (context->fileOffset != context->inner.mainFile->getSize()) - { - system::IFile::success_t success; - context->inner.mainFile->read(success, &c, context->fileOffset, sizeof(c)); - context->fileOffset += success.getBytesProcessed(); - - // found it, so leave - if (core::isspace(c)) - break; - token += c; - } - return token; + uint32_t triangleCount32 = binaryTriCountFromDetect; + if (!hasBinaryTriCountFromDetect && !SInterchangeIO::readFileExact(context.inner.mainFile, &triangleCount32, Context::BinaryHeaderBytes, sizeof(triangleCount32), &context.ioTelemetry)) + return {}; + triangleCount = triangleCount32; + const size_t dataSize = static_cast(triangleCount) * Context::TriangleRecordBytes; + const size_t expectedSize = Context::BinaryPrefixBytes + dataSize; + if (filesize < expectedSize) + return {}; + const uint8_t* payloadData = wholeFileData ? (wholeFileData + Context::BinaryPrefixBytes) : loadSession.readRange(Context::BinaryPrefixBytes, dataSize, wholeFilePayload, &context.ioTelemetry); + if (!payloadData) + return {}; + vertexCount = triangleCount * Context::VerticesPerTriangle; + const size_t vertexCountSizeT = static_cast(vertexCount); + if (vertexCountSizeT > (std::numeric_limits::max() / sizeof(hlsl::float32_t3))) + return {}; + const size_t viewByteSize = vertexCountSizeT * sizeof(hlsl::float32_t3); + if (viewByteSize > (std::numeric_limits::max() - viewByteSize)) + return {}; + const size_t blockBytes = viewByteSize * 2ull; + auto upstream = core::getDefaultMemoryResource(); + if (!upstream) + return {}; + void* block = upstream->allocate(blockBytes, alignof(float)); + if (!block) + return {}; + auto blockResource = core::make_smart_refctd_ptr(core::smart_refctd_ptr(std::move(upstream)), block, blockBytes, alignof(float)); + auto posBuffer = ICPUBuffer::create({{viewByteSize}, block, core::smart_refctd_ptr(blockResource), alignof(float)}, core::adopt_memory); + auto normalBuffer = ICPUBuffer::create({{viewByteSize}, reinterpret_cast(block) + viewByteSize, core::smart_refctd_ptr(blockResource), alignof(float)}, core::adopt_memory); + if (!posBuffer || !normalBuffer) + return {}; + ICPUPolygonGeometry::SDataView posView = {}; + posView.composed = {.stride = sizeof(hlsl::float32_t3), .format = EF_R32G32B32_SFLOAT, .rangeFormat = IGeometryBase::getMatchingAABBFormat(EF_R32G32B32_SFLOAT)}; + posView.src = {.offset = 0ull, .size = viewByteSize, .buffer = std::move(posBuffer)}; + ICPUPolygonGeometry::SDataView normalView = {}; + normalView.composed = {.stride = sizeof(hlsl::float32_t3), .format = EF_R32G32B32_SFLOAT, .rangeFormat = IGeometryBase::getMatchingAABBFormat(EF_R32G32B32_SFLOAT)}; + normalView.src = {.offset = 0ull, .size = viewByteSize, .buffer = std::move(normalBuffer)}; + auto* posOutFloat = reinterpret_cast(posView.getPointer()); + auto* normalOutFloat = reinterpret_cast(normalView.getPointer()); + if (!posOutFloat || !normalOutFloat) + return {}; + + const uint8_t* cursor = payloadData; + const uint8_t* const end = cursor + dataSize; + if (end < cursor || + static_cast(end - cursor) < + static_cast(triangleCount) * Context::TriangleRecordBytes) + return {}; + core::vector faceColors(static_cast(triangleCount), 0u); + std::atomic_bool colorValidForAllFaces = true; + const size_t hw = SLoaderRuntimeTuner::resolveHardwareThreads(); + const size_t hardMaxWorkers = SLoaderRuntimeTuner::resolveHardMaxWorkers( + hw, _params.ioPolicy.runtimeTuning.workerHeadroom); + SLoaderRuntimeTuningRequest parseTuningRequest = {}; + parseTuningRequest.inputBytes = dataSize; + parseTuningRequest.totalWorkUnits = triangleCount; + parseTuningRequest.minBytesPerWorker = Context::TriangleRecordBytes; + parseTuningRequest.hardwareThreads = static_cast(hw); + parseTuningRequest.hardMaxWorkers = static_cast(hardMaxWorkers); + parseTuningRequest.targetChunksPerWorker = _params.ioPolicy.runtimeTuning.targetChunksPerWorker; + parseTuningRequest.minChunkWorkUnits = 1ull; + parseTuningRequest.maxChunkWorkUnits = std::max(1ull, triangleCount); + parseTuningRequest.sampleData = payloadData; + parseTuningRequest.sampleBytes = SLoaderRuntimeTuner::resolveSampleBytes(_params.ioPolicy, dataSize); + const auto parseTuning = SLoaderRuntimeTuner::tune(_params.ioPolicy, parseTuningRequest); + const size_t workerCount = std::max(1ull, std::min(parseTuning.workerCount, static_cast(std::max(1ull, triangleCount)))); + static constexpr bool ComputeAABBInParse = true; + struct SThreadAABB { bool has = false; float minX = 0.f; float minY = 0.f; float minZ = 0.f; float maxX = 0.f; float maxY = 0.f; float maxZ = 0.f; }; + std::vector threadAABBs(ComputeAABBInParse ? workerCount : 0ull); + const uint64_t parseChunkTriangles = std::max(1ull, parseTuning.chunkWorkUnits); + const size_t parseChunkCount = static_cast(SLoaderRuntimeTuner::ceilDiv(triangleCount, parseChunkTriangles)); + const bool hashInParsePipeline = computeContentHashes; + std::vector hashChunkReady(hashInParsePipeline ? parseChunkCount : 0ull, 0u); + std::atomic_bool hashPipelineOk = true; + core::blake3_hash_t parsedPositionHash = static_cast(core::blake3_hasher{}); + core::blake3_hash_t parsedNormalHash = static_cast(core::blake3_hasher{}); + auto parseRange = [&](const uint64_t beginTri, const uint64_t endTri, SThreadAABB& localAABB) -> void { + const uint8_t* localCursor = payloadData + beginTri * Context::TriangleRecordBytes; + float* posCursor = posOutFloat + beginTri * Context::VerticesPerTriangle * Context::FloatChannelsPerVertex; + float* normalCursor = normalOutFloat + beginTri * Context::VerticesPerTriangle * Context::FloatChannelsPerVertex; + for (uint64_t tri = beginTri; tri < endTri; ++tri) { + const uint8_t* const triRecord = localCursor; + localCursor += Context::TriangleRecordBytes; + std::array triValues = {}; + std::memcpy(triValues.data(), triRecord, sizeof(triValues)); + uint16_t packedColor = 0u; + std::memcpy(&packedColor, triRecord + Context::TriangleFloatBytes, sizeof(packedColor)); + if (packedColor & 0x8000u) + faceColors[static_cast(tri)] = Parse::decodeViscamColorToB8G8R8A8(packedColor); + else + colorValidForAllFaces.store(false, std::memory_order_relaxed); + + float normalX = triValues[0ull]; + float normalY = triValues[1ull]; + float normalZ = triValues[2ull]; + + const float vertex0x = triValues[9ull]; + const float vertex0y = triValues[10ull]; + const float vertex0z = triValues[11ull]; + const float vertex1x = triValues[6ull]; + const float vertex1y = triValues[7ull]; + const float vertex1z = triValues[8ull]; + const float vertex2x = triValues[3ull]; + const float vertex2y = triValues[4ull]; + const float vertex2z = triValues[5ull]; + + posCursor[0ull] = vertex0x; + posCursor[1ull] = vertex0y; + posCursor[2ull] = vertex0z; + posCursor[3ull] = vertex1x; + posCursor[4ull] = vertex1y; + posCursor[5ull] = vertex1z; + posCursor[6ull] = vertex2x; + posCursor[7ull] = vertex2y; + posCursor[8ull] = vertex2z; + if constexpr (ComputeAABBInParse) { + if (!localAABB.has) { + localAABB.has = true; + localAABB.minX = vertex0x; + localAABB.minY = vertex0y; + localAABB.minZ = vertex0z; + localAABB.maxX = vertex0x; + localAABB.maxY = vertex0y; + localAABB.maxZ = vertex0z; + } + if (vertex0x < localAABB.minX) + localAABB.minX = vertex0x; + if (vertex0y < localAABB.minY) + localAABB.minY = vertex0y; + if (vertex0z < localAABB.minZ) + localAABB.minZ = vertex0z; + if (vertex0x > localAABB.maxX) + localAABB.maxX = vertex0x; + if (vertex0y > localAABB.maxY) + localAABB.maxY = vertex0y; + if (vertex0z > localAABB.maxZ) + localAABB.maxZ = vertex0z; + if (vertex1x < localAABB.minX) + localAABB.minX = vertex1x; + if (vertex1y < localAABB.minY) + localAABB.minY = vertex1y; + if (vertex1z < localAABB.minZ) + localAABB.minZ = vertex1z; + if (vertex1x > localAABB.maxX) + localAABB.maxX = vertex1x; + if (vertex1y > localAABB.maxY) + localAABB.maxY = vertex1y; + if (vertex1z > localAABB.maxZ) + localAABB.maxZ = vertex1z; + if (vertex2x < localAABB.minX) + localAABB.minX = vertex2x; + if (vertex2y < localAABB.minY) + localAABB.minY = vertex2y; + if (vertex2z < localAABB.minZ) + localAABB.minZ = vertex2z; + if (vertex2x > localAABB.maxX) + localAABB.maxX = vertex2x; + if (vertex2y > localAABB.maxY) + localAABB.maxY = vertex2y; + if (vertex2z > localAABB.maxZ) + localAABB.maxZ = vertex2z; + } + if (normalX == 0.f && normalY == 0.f && normalZ == 0.f) { + const float edge10x = vertex1x - vertex0x; + const float edge10y = vertex1y - vertex0y; + const float edge10z = vertex1z - vertex0z; + const float edge20x = vertex2x - vertex0x; + const float edge20y = vertex2y - vertex0y; + const float edge20z = vertex2z - vertex0z; + + normalX = edge10y * edge20z - edge10z * edge20y; + normalY = edge10z * edge20x - edge10x * edge20z; + normalZ = edge10x * edge20y - edge10y * edge20x; + const float planeLen2 = + normalX * normalX + normalY * normalY + normalZ * normalZ; + if (planeLen2 > 0.f) { + const float invLen = 1.f / std::sqrt(planeLen2); + normalX *= invLen; + normalY *= invLen; + normalZ *= invLen; + } else { + normalX = 0.f; + normalY = 0.f; + normalZ = 0.f; + } + } + normalCursor[0ull] = normalX; + normalCursor[1ull] = normalY; + normalCursor[2ull] = normalZ; + normalCursor[3ull] = normalX; + normalCursor[4ull] = normalY; + normalCursor[5ull] = normalZ; + normalCursor[6ull] = normalX; + normalCursor[7ull] = normalY; + normalCursor[8ull] = normalZ; + posCursor += + Context::VerticesPerTriangle * Context::FloatChannelsPerVertex; + normalCursor += + Context::VerticesPerTriangle * Context::FloatChannelsPerVertex; + } + }; + std::jthread positionHashThread; + std::jthread normalHashThread; + if (hashInParsePipeline) { + auto launchHashThread = + [&](const float* srcFloat, + core::blake3_hash_t& outHash) -> std::jthread { + return std::jthread([&, srcFloat, outHashPtr = &outHash]() { + try { + core::blake3_hasher hasher; + size_t chunkIx = 0ull; + while (chunkIx < parseChunkCount) { + auto ready = std::atomic_ref(hashChunkReady[chunkIx]); + while (ready.load(std::memory_order_acquire) == 0u) + ready.wait(0u, std::memory_order_acquire); + + size_t runEnd = chunkIx + 1ull; + while (runEnd < parseChunkCount) { + const auto runReady = + std::atomic_ref(hashChunkReady[runEnd]) + .load(std::memory_order_acquire); + if (runReady == 0u) + break; + ++runEnd; + } + + const uint64_t begin = + static_cast(chunkIx) * parseChunkTriangles; + const uint64_t endTri = std::min( + static_cast(runEnd) * parseChunkTriangles, + triangleCount); + const size_t runTriangles = static_cast(endTri - begin); + const size_t runBytes = + runTriangles * Context::VerticesPerTriangle * + Context::FloatChannelsPerVertex * sizeof(float); + hasher.update(srcFloat + begin * Context::VerticesPerTriangle * + Context::FloatChannelsPerVertex, + runBytes); + chunkIx = runEnd; + } + *outHashPtr = static_cast(hasher); + } catch (...) { + hashPipelineOk.store(false, std::memory_order_relaxed); + } + }); + }; + positionHashThread = launchHashThread(posOutFloat, parsedPositionHash); + normalHashThread = launchHashThread(normalOutFloat, parsedNormalHash); + } + std::atomic_size_t nextChunkIx = 0ull; + auto parseWorker = [&](const size_t workerIx) -> void { + SThreadAABB localAABB = {}; + while (true) { + const size_t chunkIx = + nextChunkIx.fetch_add(1ull, std::memory_order_relaxed); + if (chunkIx >= parseChunkCount) + break; + const uint64_t begin = + static_cast(chunkIx) * parseChunkTriangles; + const uint64_t endTri = + std::min(begin + parseChunkTriangles, triangleCount); + parseRange(begin, endTri, localAABB); + if (hashInParsePipeline) { + auto ready = std::atomic_ref(hashChunkReady[chunkIx]); + ready.store(1u, std::memory_order_release); + ready.notify_all(); + } + } + if constexpr (ComputeAABBInParse) + threadAABBs[workerIx] = localAABB; + }; + SLoaderRuntimeTuner::dispatchWorkers(workerCount, parseWorker); + if (positionHashThread.joinable()) + positionHashThread.join(); + if (normalHashThread.joinable()) + normalHashThread.join(); + if (hashInParsePipeline) { + if (!hashPipelineOk.load(std::memory_order_relaxed)) + return {}; + posView.src.buffer->setContentHash(parsedPositionHash); + normalView.src.buffer->setContentHash(parsedNormalHash); + } + if constexpr (ComputeAABBInParse) { + for (const auto& localAABB : threadAABBs) { + if (!localAABB.has) + continue; + hlsl::shapes::util::extendAABBAccumulator( + parsedAABB, localAABB.minX, localAABB.minY, localAABB.minZ); + hlsl::shapes::util::extendAABBAccumulator( + parsedAABB, localAABB.maxX, localAABB.maxY, localAABB.maxZ); + } + } + geometry->setPositionView(std::move(posView)); + geometry->setNormalView(std::move(normalView)); + if (colorValidForAllFaces.load(std::memory_order_relaxed)) { + core::vector vertexColors(vertexCountSizeT); + for (size_t triIx = 0ull; triIx < static_cast(triangleCount); + ++triIx) { + const uint32_t triColor = faceColors[triIx]; + const size_t baseIx = triIx * Context::VerticesPerTriangle; + vertexColors[baseIx + 0ull] = triColor; + vertexColors[baseIx + 1ull] = triColor; + vertexColors[baseIx + 2ull] = triColor; + } + auto colorView = + SGeometryLoaderCommon::createAdoptedView( + std::move(vertexColors)); + if (!colorView) + return {}; + auto* const auxViews = geometry->getAuxAttributeViews(); + auxViews->resize(SSTLPolygonGeometryAuxLayout::COLOR0 + 1u); + (*auxViews)[SSTLPolygonGeometryAuxLayout::COLOR0] = std::move(colorView); + hasTriangleColors = true; + } + } else { + parsePath = "ascii_fallback"; + if (!wholeFileData) + { + wholeFileData = loadSession.mapOrReadWholeFile(wholeFilePayload, &context.ioTelemetry); + if (!wholeFileData) + return {}; + } + + const char* const begin = reinterpret_cast(wholeFileData); + const char* const end = begin + filesize; + AsciiParser parser(begin, end); + core::vector positions; + core::vector normals; + const auto firstToken = parser.readToken(); + if (!firstToken.has_value() || *firstToken != std::string_view("solid")) + return {}; + + for (;;) { + const auto maybeToken = parser.readToken(); + if (!maybeToken.has_value()) + break; + const std::string_view textToken = *maybeToken; + if (textToken == std::string_view("endsolid")) + break; + if (textToken != std::string_view("facet")) + continue; + + const auto normalKeyword = parser.readToken(); + if (!normalKeyword.has_value() || + *normalKeyword != std::string_view("normal")) + return {}; + + const auto fileNormal = parser.readVec3(); + if (!fileNormal.has_value()) + return {}; + + const auto outerKeyword = parser.readToken(); + if (!outerKeyword.has_value() || + *outerKeyword != std::string_view("outer")) + return {}; + const auto loopKeyword = parser.readToken(); + if (!loopKeyword.has_value() || *loopKeyword != std::string_view("loop")) + return {}; + + std::array p = {}; + for (uint32_t i = 0u; i < 3u; ++i) { + const auto vertexKeyword = parser.readToken(); + if (!vertexKeyword.has_value() || + *vertexKeyword != std::string_view("vertex")) + return {}; + const auto vertex = parser.readVec3(); + if (!vertex.has_value()) + return {}; + p[i] = *vertex; + } + + Parse::pushTriangleReversed(p, positions); + hlsl::float32_t3 faceNormal = Parse::resolveStoredNormal(*fileNormal); + if (hlsl::dot(faceNormal, faceNormal) <= 0.f) + faceNormal = + SGeometryNormalCommon::computeFaceNormal(p[2u], p[1u], p[0u]); + normals.push_back(faceNormal); + normals.push_back(faceNormal); + normals.push_back(faceNormal); + hlsl::shapes::util::extendAABBAccumulator(parsedAABB, p[2u]); + hlsl::shapes::util::extendAABBAccumulator(parsedAABB, p[1u]); + hlsl::shapes::util::extendAABBAccumulator(parsedAABB, p[0u]); + + const auto endLoopKeyword = parser.readToken(); + if (!endLoopKeyword.has_value() || *endLoopKeyword != std::string_view("endloop")) + return {}; + const auto endFacetKeyword = parser.readToken(); + if (!endFacetKeyword.has_value() || *endFacetKeyword != std::string_view("endfacet")) + return {}; + } + if (positions.empty()) + return {}; + + triangleCount = positions.size() / Context::VerticesPerTriangle; + vertexCount = positions.size(); + auto posView = SGeometryLoaderCommon::createAdoptedView(std::move(positions)); + auto normalView = SGeometryLoaderCommon::createAdoptedView(std::move(normals)); + if (!posView || !normalView) + return {}; + geometry->setPositionView(std::move(posView)); + geometry->setNormalView(std::move(normalView)); + } + + if (vertexCount == 0ull) + return {}; + if (computeContentHashes) + SPolygonGeometryContentHash::computeMissing(geometry.get(), _params.ioPolicy); + if (!parsedAABB.empty()) + geometry->applyAABB(parsedAABB.value); + else + CPolygonGeometryManipulator::recomputeAABB(geometry.get()); + const uint64_t ioMinRead = context.ioTelemetry.getMinOrZero(); + const uint64_t ioAvgRead = context.ioTelemetry.getAvgOrZero(); + loadSession.logTinyIO(_params.logger, context.ioTelemetry); + _params.logger.log( + "STL loader stats: file=%s binary=%d parse_path=%s triangles=%llu " + "vertices=%llu colors=%d io_reads=%llu io_min_read=%llu io_avg_read=%llu " + "io_req=%s io_eff=%s io_chunk=%llu io_reason=%s", + system::ILogger::ELL_PERFORMANCE, _file->getFileName().string().c_str(), + binary ? 1 : 0, parsePath, static_cast(triangleCount), + static_cast(vertexCount), hasTriangleColors ? 1 : 0, + static_cast(context.ioTelemetry.callCount), + static_cast(ioMinRead), + static_cast(ioAvgRead), + system::to_string(_params.ioPolicy.strategy).c_str(), + system::to_string(loadSession.ioPlan.strategy).c_str(), + static_cast(loadSession.ioPlan.chunkSizeBytes()), loadSession.ioPlan.reason); + auto meta = core::make_smart_refctd_ptr(); + return SAssetBundle(std::move(meta), {std::move(geometry)}); } -//! skip to next word -void CSTLMeshFileLoader::goNextWord(SContext* context) const -{ - uint8_t c; - while (context->fileOffset != context->inner.mainFile->getSize()) // TODO: check it - { - system::IFile::success_t success; - context->inner.mainFile->read(success, &c, context->fileOffset, sizeof(c)); - context->fileOffset += success.getBytesProcessed(); - - // found it, so leave - if (!core::isspace(c)) - { - context->fileOffset -= success.getBytesProcessed(); - break; - } - } +bool CSTLMeshFileLoader::isALoadableFileFormat( + system::IFile* _file, const system::logger_opt_ptr) const { + using Context = Parse::Context; + if (!_file || _file->getSize() <= Context::TextProbeBytes) + return false; + Parse::LayoutProbe layout = {}; + if (!Parse::probeLayout(_file, _file->getSize(), nullptr, nullptr, layout)) + return false; + return layout.startsWithSolid || layout.binaryBySize; } - -//! Read until line break is reached and stop at the next non-space character -void CSTLMeshFileLoader::goNextLine(SContext* context) const -{ - uint8_t c; - // look for newline characters - while (context->fileOffset != context->inner.mainFile->getSize()) // TODO: check it - { - system::IFile::success_t success; - context->inner.mainFile->read(success, &c, context->fileOffset, sizeof(c)); - context->fileOffset += success.getBytesProcessed(); - - // found it, so leave - if (c == '\n' || c == '\r') - break; - } } - - #endif // _NBL_COMPILE_WITH_STL_LOADER_ diff --git a/src/nbl/asset/interchange/CSTLMeshFileLoader.h b/src/nbl/asset/interchange/CSTLMeshFileLoader.h index f7020ab292..dadfb1ca7f 100644 --- a/src/nbl/asset/interchange/CSTLMeshFileLoader.h +++ b/src/nbl/asset/interchange/CSTLMeshFileLoader.h @@ -1,64 +1,26 @@ -// Copyright (C) 2019-2025 - DevSH Graphics Programming Sp. z O.O. +// Copyright (C) 2018-2025 - DevSH Graphics Programming Sp. z O.O. // This file is part of the "Nabla Engine" and was originally part of the "Irrlicht Engine" // For conditions of distribution and use, see copyright notice in nabla.h // See the original file in irrlicht source for authors #ifndef _NBL_ASSET_C_STL_MESH_FILE_LOADER_H_INCLUDED_ #define _NBL_ASSET_C_STL_MESH_FILE_LOADER_H_INCLUDED_ - - #include "nbl/core/declarations.h" - #include "nbl/asset/interchange/IGeometryLoader.h" -#include "nbl/asset/metadata/CSTLMetadata.h" - - namespace nbl::asset { - -//! Meshloader capable of loading STL meshes. +//! Mesh loader capable of loading STL meshes. class CSTLMeshFileLoader final : public IGeometryLoader { public: + explicit CSTLMeshFileLoader(asset::IAssetManager* _assetManager); - CSTLMeshFileLoader(asset::IAssetManager* _m_assetMgr); - + //! Loads one STL asset bundle from an already opened file. asset::SAssetBundle loadAsset(system::IFile* _file, const IAssetLoader::SAssetLoadParams& _params, IAssetLoader::IAssetLoaderOverride* _override = nullptr, uint32_t _hierarchyLevel = 0u) override; bool isALoadableFileFormat(system::IFile* _file, const system::logger_opt_ptr logger) const override; - const char** getAssociatedFileExtensions() const override - { - static const char* ext[]{ "stl", nullptr }; - return ext; - } - - private: - struct SContext - { - IAssetLoader::SAssetLoadContext inner; - uint32_t topHierarchyLevel; - IAssetLoader::IAssetLoaderOverride* loaderOverride; - - size_t fileOffset = {}; - }; - - virtual void initialize() override; - - const std::string_view getPipelineCacheKey(bool withColorAttribute) { return withColorAttribute ? "nbl/builtin/pipeline/loader/STL/color_attribute" : "nbl/builtin/pipeline/loader/STL/no_color_attribute"; } - - // skips to the first non-space character available - void goNextWord(SContext* context) const; - // returns the next word - - const std::string& getNextToken(SContext* context, std::string& token) const; - // skip to next printable character after the first line break - void goNextLine(SContext* context) const; - //! Read 3d vector of floats - void getNextVector(SContext* context, core::vectorSIMDf& vec, bool binary) const; - - asset::IAssetManager* m_assetMgr; + const char** getAssociatedFileExtensions() const override; }; - -} // end namespace nbl::scene +} // end namespace nbl::asset #endif diff --git a/src/nbl/asset/interchange/CSTLMeshWriter.cpp b/src/nbl/asset/interchange/CSTLMeshWriter.cpp index 45c7c1f939..db2b70aef4 100644 --- a/src/nbl/asset/interchange/CSTLMeshWriter.cpp +++ b/src/nbl/asset/interchange/CSTLMeshWriter.cpp @@ -1,474 +1,553 @@ -// Copyright (C) 2019 - DevSH Graphics Programming Sp. z O.O. +#ifdef _NBL_COMPILE_WITH_STL_WRITER_ +// Copyright (C) 2018-2025 - DevSH Graphics Programming Sp. z O.O. // This file is part of the "Nabla Engine" and was originally part of the "Irrlicht Engine" // For conditions of distribution and use, see copyright notice in nabla.h // See the original file in irrlicht source for authors -#include "nbl/system/ISystem.h" #include "nbl/system/IFile.h" - #include "CSTLMeshWriter.h" -#include "SColor.h" - -using namespace nbl; -using namespace nbl::asset; - -#ifdef _NBL_COMPILE_WITH_STL_WRITER_ -constexpr auto POSITION_ATTRIBUTE = 0; -constexpr auto COLOR_ATTRIBUTE = 1; -constexpr auto UV_ATTRIBUTE = 2; -constexpr auto NORMAL_ATTRIBUTE = 3; - -CSTLMeshWriter::CSTLMeshWriter() +#include "impl/SFileAccess.h" +#include "nbl/asset/format/convertColor.h" +#include "nbl/asset/interchange/SSTLPolygonGeometryAuxLayout.h" +#include "nbl/asset/interchange/SGeometryWriterCommon.h" +#include "nbl/asset/interchange/SInterchangeIO.h" +#include +#include +#include +#include +#include +#include +#include +#include +namespace nbl::asset { - #ifdef _NBL_DEBUG - setDebugName("CSTLMeshWriter"); - #endif -} - - -CSTLMeshWriter::~CSTLMeshWriter() -{ -} - -//! writes a mesh -bool CSTLMeshWriter::writeAsset(system::IFile* _file, const SAssetWriteParams& _params, IAssetWriterOverride* _override) -{ - if (!_override) - getDefaultOverride(_override); - - SAssetWriteContext inCtx{_params, _file}; - - const asset::ICPUMesh* mesh = -# ifndef _NBL_DEBUG - static_cast(_params.rootAsset); -# else - dynamic_cast(_params.rootAsset); -# endif - assert(mesh); - - system::IFile* file = _override->getOutputFile(_file, inCtx, {mesh, 0u}); - - if (!file) - return false; - - SContext context = { SAssetWriteContext{ inCtx.params, file} }; - - _params.logger.log("WRITING STL: writing the file %s", system::ILogger::ELL_INFO, file->getFileName().string().c_str()); - - const asset::E_WRITER_FLAGS flags = _override->getAssetWritingFlags(context.writeContext, mesh, 0u); - if (flags & asset::EWF_BINARY) - return writeMeshBinary(mesh, &context); - else - return writeMeshASCII(mesh, &context); -} - namespace { -template -inline void writeFacesBinary(const asset::ICPUMeshBuffer* buffer, const bool& noIndices, system::IFile* file, uint32_t _colorVaid, IAssetWriter::SAssetWriteContext* context, size_t* fileOffset) +struct Parse { - auto& inputParams = buffer->getPipeline()->getCachedCreationParams().vertexInput; - bool hasColor = inputParams.enabledAttribFlags & core::createBitmask({ COLOR_ATTRIBUTE }); - const asset::E_FORMAT colorType = static_cast(hasColor ? inputParams.attributes[COLOR_ATTRIBUTE].format : asset::EF_UNKNOWN); - - const uint32_t indexCount = buffer->getIndexCount(); - for (uint32_t j = 0u; j < indexCount; j += 3u) - { - I idx[3]; - for (uint32_t i = 0u; i < 3u; ++i) - { - if (noIndices) - idx[i] = j + i; - else - idx[i] = ((I*)buffer->getIndices())[j + i]; - } - - core::vectorSIMDf v[3]; - for (uint32_t i = 0u; i < 3u; ++i) - v[i] = buffer->getPosition(idx[i]); - - uint16_t color = 0u; - if (hasColor) - { - if (asset::isIntegerFormat(colorType)) - { - uint32_t res[4]; - for (uint32_t i = 0u; i < 3u; ++i) - { - uint32_t d[4]; - buffer->getAttribute(d, _colorVaid, idx[i]); - res[0] += d[0]; res[1] += d[1]; res[2] += d[2]; - } - color = video::RGB16(res[0]/3, res[1]/3, res[2]/3); - } - else - { - core::vectorSIMDf res; - for (uint32_t i = 0u; i < 3u; ++i) - { - core::vectorSIMDf d; - buffer->getAttribute(d, _colorVaid, idx[i]); - res += d; - } - res /= 3.f; - color = video::RGB16(res.X, res.Y, res.Z); - } - } - - core::vectorSIMDf normal = core::plane3dSIMDf(v[0], v[1], v[2]).getNormal(); - core::vectorSIMDf vertex1 = v[2]; - core::vectorSIMDf vertex2 = v[1]; - core::vectorSIMDf vertex3 = v[0]; - - auto flipVectors = [&]() - { - vertex1.X = -vertex1.X; - vertex2.X = -vertex2.X; - vertex3.X = -vertex3.X; - normal = core::plane3dSIMDf(vertex1, vertex2, vertex3).getNormal(); - }; - - if (!(context->params.flags & E_WRITER_FLAGS::EWF_MESH_IS_RIGHT_HANDED)) - flipVectors(); - - { - system::IFile::success_t success;; - file->write(success, &normal, *fileOffset, 12); - - *fileOffset += success.getBytesProcessed(); - } - - { - system::IFile::success_t success;; - file->write(success, &vertex1, *fileOffset, 12); - - *fileOffset += success.getBytesProcessed(); - } - - { - system::IFile::success_t success;; - file->write(success, &vertex2, *fileOffset, 12); - - *fileOffset += success.getBytesProcessed(); - } - - { - system::IFile::success_t success;; - file->write(success, &vertex3, *fileOffset, 12); - - *fileOffset += success.getBytesProcessed(); - } - - { - system::IFile::success_t success;; - file->write(success, &color, *fileOffset, 2); // saving color using non-standard VisCAM/SolidView trick - - *fileOffset += success.getBytesProcessed(); - } - } -} -} - -bool CSTLMeshWriter::writeMeshBinary(const asset::ICPUMesh* mesh, SContext* context) -{ - // write STL MESH header - const char headerTxt[] = "Irrlicht-baw Engine"; - constexpr size_t HEADER_SIZE = 80u; - - { - system::IFile::success_t success;; - context->writeContext.outputFile->write(success, headerTxt, context->fileOffset, sizeof(headerTxt)); - - context->fileOffset += success.getBytesProcessed(); - } - - const std::string name = context->writeContext.outputFile->getFileName().filename().replace_extension().string(); // TODO: check it - const int32_t sizeleft = HEADER_SIZE - sizeof(headerTxt) - name.size(); - - if (sizeleft < 0) + struct Context { - system::IFile::success_t success;; - context->writeContext.outputFile->write(success, name.c_str(), context->fileOffset, HEADER_SIZE - sizeof(headerTxt)); - - context->fileOffset += success.getBytesProcessed(); - } - else - { - const char buf[80] = {0}; - + IAssetWriter::SAssetWriteContext writeContext; + SResolvedFileIOPolicy ioPlan = {}; + core::vector ioBuffer = {}; + size_t fileOffset = 0ull; + SFileWriteTelemetry writeTelemetry = {}; + bool flush() { - system::IFile::success_t success;; - context->writeContext.outputFile->write(success, name.c_str(), context->fileOffset, name.size()); - - context->fileOffset += success.getBytesProcessed(); + if (ioBuffer.empty()) + return true; + size_t bytesWritten = 0ull; + const size_t totalBytes = ioBuffer.size(); + while (bytesWritten < totalBytes) + { + system::IFile::success_t success; + writeContext.outputFile->write(success, ioBuffer.data() + bytesWritten, fileOffset + bytesWritten, totalBytes - bytesWritten); + if (!success) + return false; + const size_t processed = success.getBytesProcessed(); + if (processed == 0ull) + return false; + writeTelemetry.account(processed); + bytesWritten += processed; + } + fileOffset += totalBytes; + ioBuffer.clear(); + return true; } - + bool write(const void* data, size_t size) { - system::IFile::success_t success;; - context->writeContext.outputFile->write(success, buf, context->fileOffset, sizeleft); - - context->fileOffset += success.getBytesProcessed(); + if (!data && size != 0ull) + return false; + if (size == 0ull) + return true; + const uint8_t* src = reinterpret_cast(data); + switch (ioPlan.strategy) + { + case SResolvedFileIOPolicy::Strategy::WholeFile: + { + const size_t oldSize = ioBuffer.size(); + ioBuffer.resize(oldSize + size); + std::memcpy(ioBuffer.data() + oldSize, src, size); + return true; + } + case SResolvedFileIOPolicy::Strategy::Chunked: + default: + { + const size_t chunkSize = static_cast(ioPlan.chunkSizeBytes()); + size_t remaining = size; + while (remaining > 0ull) + { + const size_t freeSpace = chunkSize - ioBuffer.size(); + const size_t toCopy = std::min(freeSpace, remaining); + const size_t oldSize = ioBuffer.size(); + ioBuffer.resize(oldSize + toCopy); + std::memcpy(ioBuffer.data() + oldSize, src, toCopy); + src += toCopy; + remaining -= toCopy; + if (ioBuffer.size() == chunkSize && !flush()) + return false; + } + return true; + } + } } - } - - uint32_t facenum = 0; - for (auto& mb : mesh->getMeshBuffers()) - facenum += mb->getIndexCount()/3; + }; + struct TriangleData { hlsl::float32_t3 normal = {}; hlsl::float32_t3 vertex1 = {}; hlsl::float32_t3 vertex2 = {}; hlsl::float32_t3 vertex3 = {}; }; + static constexpr size_t BinaryHeaderBytes = 80ull; + static constexpr size_t BinaryTriangleCountBytes = sizeof(uint32_t); + static constexpr size_t BinaryTriangleFloatCount = 12ull; + static constexpr size_t BinaryTriangleFloatBytes = sizeof(float) * BinaryTriangleFloatCount; + static constexpr size_t BinaryTriangleAttributeBytes = sizeof(uint16_t); + static constexpr size_t BinaryTriangleRecordBytes = BinaryTriangleFloatBytes + BinaryTriangleAttributeBytes; + static constexpr size_t BinaryPrefixBytes = BinaryHeaderBytes + BinaryTriangleCountBytes; + static constexpr size_t IoFallbackReserveBytes = 1ull << 20; + static constexpr size_t AsciiFaceTextMaxBytes = 1024ull; + static constexpr char AsciiSolidPrefix[] = "solid "; + static constexpr char AsciiEndSolidPrefix[] = "endsolid "; + static constexpr char AsciiDefaultName[] = "nabla_mesh"; + static_assert(BinaryTriangleRecordBytes == 50ull); + static bool appendLiteral(char*& cursor, char* const end, const char* text, const size_t textSize) { - system::IFile::success_t success;; - context->writeContext.outputFile->write(success, &facenum, context->fileOffset, sizeof(facenum)); - - context->fileOffset += success.getBytesProcessed(); + if (!cursor || cursor + textSize > end) + return false; + std::memcpy(cursor, text, textSize); + cursor += textSize; + return true; } - // write mesh buffers - - for (auto& buffer : mesh->getMeshBuffers()) - if (buffer) + static bool appendVectorAsAsciiLine(char*& cursor, char* const end, const hlsl::float32_t3& v) { - asset::E_INDEX_TYPE type = buffer->getIndexType(); - if (!buffer->getIndexBufferBinding().buffer) - type = asset::EIT_UNKNOWN; - - if (type== asset::EIT_16BIT) - writeFacesBinary(buffer, false, context->writeContext.outputFile, COLOR_ATTRIBUTE, &context->writeContext, &context->fileOffset); - else if (type== asset::EIT_32BIT) - writeFacesBinary(buffer, false, context->writeContext.outputFile, COLOR_ATTRIBUTE, &context->writeContext, &context->fileOffset); - else - writeFacesBinary(buffer, true, context->writeContext.outputFile, COLOR_ATTRIBUTE, &context->writeContext, &context->fileOffset); //template param doesn't matter if there's no indices + cursor = SGeometryWriterCommon::appendFloatToBuffer(cursor, end, v.x); + if (cursor >= end) + return false; + *(cursor++) = ' '; + cursor = SGeometryWriterCommon::appendFloatToBuffer(cursor, end, v.y); + if (cursor >= end) + return false; + *(cursor++) = ' '; + cursor = SGeometryWriterCommon::appendFloatToBuffer(cursor, end, v.z); + if (cursor >= end) + return false; + *(cursor++) = '\n'; + return true; } - return true; -} - -bool CSTLMeshWriter::writeMeshASCII(const asset::ICPUMesh* mesh, SContext* context) -{ - // write STL MESH header - const char headerTxt[] = "Irrlicht-baw Engine "; - + static bool decodeTriangle(const ICPUPolygonGeometry* geom, const IPolygonGeometryBase::IIndexingCallback* indexing, const ICPUPolygonGeometry::SDataView& posView, uint32_t primIx, hlsl::float32_t3& out0, hlsl::float32_t3& out1, hlsl::float32_t3& out2, hlsl::uint32_t3* outIdx) { - system::IFile::success_t success;; - context->writeContext.outputFile->write(success, "solid ", context->fileOffset, 6); - - context->fileOffset += success.getBytesProcessed(); + hlsl::uint32_t3 idx(0u); + const auto& indexView = geom->getIndexView(); + const void* indexBuffer = indexView ? indexView.getPointer() : nullptr; + const uint64_t indexSize = indexView ? indexView.composed.getStride() : 0u; + IPolygonGeometryBase::IIndexingCallback::SContext ctx = {.indexBuffer = indexBuffer, .indexSize = indexSize, .beginPrimitive = primIx, .endPrimitive = primIx + 1u, .out = &idx.x}; + indexing->operator()(ctx); + if (outIdx) + *outIdx = idx; + std::array positions = {}; + if (!decodeIndexedTriple(idx, [&posView](const uint32_t vertexIx, hlsl::float32_t3& out) -> bool { return posView.decodeElement(vertexIx, out); }, positions.data())) + return false; + out0 = positions[0]; + out1 = positions[1]; + out2 = positions[2]; + return true; } - - + template + static bool decodeIndexedTriple(const hlsl::uint32_t3& idx, DecodeFn&& decode, T* out) { - system::IFile::success_t success;; - context->writeContext.outputFile->write(success, headerTxt, context->fileOffset, sizeof(headerTxt) - 1); - - context->fileOffset += success.getBytesProcessed(); + return out && decode(idx.x, out[0]) && decode(idx.y, out[1]) && decode(idx.z, out[2]); } - - const std::string name = context->writeContext.outputFile->getFileName().filename().replace_extension().string(); - + static bool decodeTriangleNormal(const ICPUPolygonGeometry::SDataView& normalView, const hlsl::uint32_t3& idx, hlsl::float32_t3& outNormal) { - system::IFile::success_t success;; - context->writeContext.outputFile->write(success, name.c_str(), context->fileOffset, name.size()); - - context->fileOffset += success.getBytesProcessed(); + if (!normalView) + return false; + std::array normals = {}; + if (!decodeIndexedTriple(idx, [&normalView](const uint32_t vertexIx, hlsl::float32_t3& out) -> bool { return normalView.decodeElement(vertexIx, out); }, normals.data())) + return false; + return selectFirstValidNormal(normals.data(), static_cast(normals.size()), outNormal); } - - + static bool selectFirstValidNormal(const hlsl::float32_t3* const normals, const uint32_t count, hlsl::float32_t3& outNormal) { - system::IFile::success_t success;; - context->writeContext.outputFile->write(success, "\n", context->fileOffset, 1); - - context->fileOffset += success.getBytesProcessed(); + if (!normals || count == 0u) + return false; + for (uint32_t i = 0u; i < count; ++i) + if (hlsl::dot(normals[i], normals[i]) > 0.f) + return outNormal = normals[i], true; + return false; } - - // write mesh buffers - for (auto& buffer : mesh->getMeshBuffers()) - if (buffer) + static void prepareVertices(const hlsl::float32_t3& p0, const hlsl::float32_t3& p1, const hlsl::float32_t3& p2, const bool flipHandedness, hlsl::float32_t3& vertex1, hlsl::float32_t3& vertex2, hlsl::float32_t3& vertex3) { - asset::E_INDEX_TYPE type = buffer->getIndexType(); - if (!buffer->getIndexBufferBinding().buffer) - type = asset::EIT_UNKNOWN; - const uint32_t indexCount = buffer->getIndexCount(); - if (type==asset::EIT_16BIT) - { - //os::Printer::log("Writing mesh with 16bit indices"); - for (uint32_t j=0; jgetPosition(((uint16_t*)buffer->getIndices())[j]), - buffer->getPosition(((uint16_t*)buffer->getIndices())[j+1]), - buffer->getPosition(((uint16_t*)buffer->getIndices())[j+2]), - context - ); - } - } - else if (type==asset::EIT_32BIT) - { - //os::Printer::log("Writing mesh with 32bit indices"); - for (uint32_t j=0; jgetPosition(((uint32_t*)buffer->getIndices())[j]), - buffer->getPosition(((uint32_t*)buffer->getIndices())[j+1]), - buffer->getPosition(((uint32_t*)buffer->getIndices())[j+2]), - context - ); - } - } - else - { - //os::Printer::log("Writing mesh with no indices"); - for (uint32_t j=0; jgetPosition(j), - buffer->getPosition(j+1ul), - buffer->getPosition(j+2ul), - context - ); - } - } - + vertex1 = p2; + vertex2 = p1; + vertex3 = p0; + if (flipHandedness) { - system::IFile::success_t success;; - context->writeContext.outputFile->write(success, "\n", context->fileOffset, 1); - - context->fileOffset += success.getBytesProcessed(); + vertex1.x = -vertex1.x; + vertex2.x = -vertex2.x; + vertex3.x = -vertex3.x; } } - + static hlsl::float32_t3 computePlaneNormal(const hlsl::float32_t3& vertex1, const hlsl::float32_t3& vertex2, const hlsl::float32_t3& vertex3, float* const planeNormalLen2 = nullptr) { - system::IFile::success_t success;; - context->writeContext.outputFile->write(success, "endsolid ", context->fileOffset, 9); - - context->fileOffset += success.getBytesProcessed(); + const hlsl::float32_t3 planeNormal = hlsl::cross(vertex2 - vertex1, vertex3 - vertex1); + const float len2 = hlsl::dot(planeNormal, planeNormal); + if (planeNormalLen2) + return *planeNormalLen2 = len2, planeNormal; + return len2 > 0.f ? hlsl::normalize(planeNormal) : hlsl::float32_t3(0.f, 0.f, 0.f); } - + static hlsl::float32_t3 resolveTriangleNormal(const hlsl::float32_t3& planeNormal, const float planeNormalLen2, const hlsl::float32_t3* const attrNormals, const uint32_t attrNormalCount, const bool flipHandedness, const bool alignToPlane) { - system::IFile::success_t success;; - context->writeContext.outputFile->write(success, headerTxt, context->fileOffset, sizeof(headerTxt) - 1); - - context->fileOffset += success.getBytesProcessed(); + hlsl::float32_t3 attrNormal = {}; + if (selectFirstValidNormal(attrNormals, attrNormalCount, attrNormal)) + { + if (flipHandedness) + attrNormal.x = -attrNormal.x; + if (alignToPlane && planeNormalLen2 > 0.f && hlsl::dot(attrNormal, planeNormal) < 0.f) + attrNormal = -attrNormal; + return attrNormal; + } + return planeNormalLen2 > 0.f ? hlsl::normalize(planeNormal) : hlsl::float32_t3(0.f, 0.f, 0.f); } - + static void buildTriangle(const hlsl::float32_t3& p0, const hlsl::float32_t3& p1, const hlsl::float32_t3& p2, const hlsl::float32_t3* const attrNormals, const uint32_t attrNormalCount, const bool flipHandedness, const bool alignToPlane, TriangleData& triangle) { - system::IFile::success_t success;; - context->writeContext.outputFile->write(success, name.c_str(), context->fileOffset, name.size()); - - context->fileOffset += success.getBytesProcessed(); + prepareVertices(p0, p1, p2, flipHandedness, triangle.vertex1, triangle.vertex2, triangle.vertex3); + float planeNormalLen2 = 0.f; + const hlsl::float32_t3 planeNormal = computePlaneNormal(triangle.vertex1, triangle.vertex2, triangle.vertex3, &planeNormalLen2); + triangle.normal = resolveTriangleNormal(planeNormal, planeNormalLen2, attrNormals, attrNormalCount, flipHandedness, alignToPlane); } - - return true; -} - -void CSTLMeshWriter::getVectorAsStringLine(const core::vectorSIMDf& v, std::string& s) const -{ - std::ostringstream tmp; - tmp << v.X << " " << v.Y << " " << v.Z << "\n"; - s = std::string(tmp.str().c_str()); -} - -void CSTLMeshWriter::writeFaceText( - const core::vectorSIMDf& v1, - const core::vectorSIMDf& v2, - const core::vectorSIMDf& v3, - SContext* context) -{ - core::vectorSIMDf vertex1 = v3; - core::vectorSIMDf vertex2 = v2; - core::vectorSIMDf vertex3 = v1; - core::vectorSIMDf normal = core::plane3dSIMDf(vertex1, vertex2, vertex3).getNormal(); - std::string tmp; - - auto flipVectors = [&]() - { - vertex1.X = -vertex1.X; - vertex2.X = -vertex2.X; - vertex3.X = -vertex3.X; - normal = core::plane3dSIMDf(vertex1, vertex2, vertex3).getNormal(); - }; - - if (!(context->writeContext.params.flags & E_WRITER_FLAGS::EWF_MESH_IS_RIGHT_HANDED)) - flipVectors(); - - { - system::IFile::success_t success;; - context->writeContext.outputFile->write(success, "facet normal ", context->fileOffset, 13); - - context->fileOffset += success.getBytesProcessed(); - } - - getVectorAsStringLine(normal, tmp); - + static double normalizeColorComponentToUnit(double value) { - system::IFile::success_t success;; - context->writeContext.outputFile->write(success, tmp.c_str(), context->fileOffset, tmp.size()); - - context->fileOffset += success.getBytesProcessed(); + if (!std::isfinite(value)) + return 0.0; + if (value > 1.0) + value /= 255.0; + return std::clamp(value, 0.0, 1.0); } - + struct PackedColor { - system::IFile::success_t success;; - context->writeContext.outputFile->write(success, " outer loop\n", context->fileOffset, 13); - - context->fileOffset += success.getBytesProcessed(); - } - + uint32_t value = 0u; + E_FORMAT format = EF_B8G8R8A8_UNORM; + }; + static uint16_t packViscamColorFromB8G8R8A8(const uint32_t color) { - system::IFile::success_t success;; - context->writeContext.outputFile->write(success, " vertex ", context->fileOffset, 11); - - context->fileOffset += success.getBytesProcessed(); + const void* src[4] = {&color, nullptr, nullptr, nullptr}; + uint16_t packed = 0u; + convertColor(src, &packed, 0u, 0u); + return packed | 0x8000u; } - - getVectorAsStringLine(vertex1, tmp); - + static const ICPUPolygonGeometry::SDataView* getColorView(const ICPUPolygonGeometry* geom, const size_t vertexCount) { - system::IFile::success_t success;; - context->writeContext.outputFile->write(success, tmp.c_str(), context->fileOffset, tmp.size()); - - context->fileOffset += success.getBytesProcessed(); + const auto* view = SGeometryWriterCommon::getAuxViewAt(geom, SSTLPolygonGeometryAuxLayout::COLOR0, vertexCount); + return view && getFormatChannelCount(view->composed.format) >= 3u ? view : nullptr; } - + static bool decodeColorB8G8R8A8(const ICPUPolygonGeometry::SDataView& colorView, const uint32_t ix, PackedColor& outColor) { - system::IFile::success_t success;; - context->writeContext.outputFile->write(success, " vertex ", context->fileOffset, 11); - - context->fileOffset += success.getBytesProcessed(); + if ((colorView.composed.format == EF_B8G8R8A8_UNORM || colorView.composed.format == EF_B8G8R8A8_SRGB) && colorView.composed.getStride() == sizeof(uint32_t)) + { + const auto* const ptr = reinterpret_cast(colorView.getPointer()); + if (!ptr) + return false; + std::memcpy(&outColor.value, ptr + static_cast(ix) * sizeof(uint32_t), sizeof(outColor.value)); + outColor.format = colorView.composed.format; + return true; + } + hlsl::float32_t4 decoded = {}; + if (!colorView.decodeElement(ix, decoded)) + return false; + const double rgbaUnit[4] = {normalizeColorComponentToUnit(decoded.x), normalizeColorComponentToUnit(decoded.y), normalizeColorComponentToUnit(decoded.z), getFormatChannelCount(colorView.composed.format) >= 4u ? normalizeColorComponentToUnit(decoded.w) : 1.0}; + encodePixels(&outColor.value, rgbaUnit); + outColor.format = EF_B8G8R8A8_UNORM; + return true; } - - getVectorAsStringLine(vertex2, tmp); - + static void decodeColorUnitRGBAFromB8G8R8A8(const PackedColor& color, double* const outRGBA) { - system::IFile::success_t success;; - context->writeContext.outputFile->write(success, tmp.c_str(), context->fileOffset, tmp.size()); - - context->fileOffset += success.getBytesProcessed(); + const void* src[4] = {&color.value, nullptr, nullptr, nullptr}; + if (color.format == EF_B8G8R8A8_SRGB) + decodePixels(src, outRGBA, 0u, 0u); + else + decodePixels(src, outRGBA, 0u, 0u); } - + static bool writeMeshBinary(const asset::ICPUPolygonGeometry* geom, Context* context) { - system::IFile::success_t success;; - context->writeContext.outputFile->write(success, " vertex ", context->fileOffset, 11); - - context->fileOffset += success.getBytesProcessed(); + if (!geom || !context || !context->writeContext.outputFile) + return false; + const auto& posView = geom->getPositionView(); + if (!posView) + return false; + const bool flipHandedness = !context->writeContext.params.flags.hasAnyFlag(E_WRITER_FLAGS::EWF_MESH_IS_RIGHT_HANDED); + const size_t vertexCount = posView.getElementCount(); + if (vertexCount == 0ull) + return false; + size_t faceCount = 0ull; + if (!SGeometryWriterCommon::getTriangleFaceCount(geom, faceCount)) + return false; + if (faceCount > static_cast(std::numeric_limits::max())) + return false; + const uint32_t facenum = static_cast(faceCount); + const size_t outputSize = BinaryPrefixBytes + static_cast(facenum) * BinaryTriangleRecordBytes; + std::unique_ptr output(new (std::nothrow) uint8_t[outputSize]); + if (!output) + return false; + uint8_t* dst = output.get(); + std::memset(dst, 0, BinaryHeaderBytes); + dst += BinaryHeaderBytes; + std::memcpy(dst, &facenum, sizeof(facenum)); + dst += sizeof(facenum); + const auto& normalView = geom->getNormalView(); + const bool hasNormals = static_cast(normalView); + const auto* const colorView = getColorView(geom, vertexCount); + const hlsl::float32_t3* const tightPositions = SGeometryWriterCommon::getTightView(posView); + const hlsl::float32_t3* const tightNormals = hasNormals ? SGeometryWriterCommon::getTightView(normalView) : nullptr; + const bool hasFastTightPath = !geom->getIndexView() && tightPositions && (!hasNormals || tightNormals); + const float handednessSign = flipHandedness ? -1.f : 1.f; + auto decodePosition = [&](const uint32_t ix, hlsl::float32_t3& out) -> bool { return tightPositions ? (out = tightPositions[ix], true) : posView.decodeElement(ix, out); }; + auto decodeNormal = [&](const uint32_t ix, hlsl::float32_t3& out) -> bool { return hasNormals && (tightNormals ? (out = tightNormals[ix], true) : normalView.decodeElement(ix, out)); }; + auto computeFaceColor = [&](const hlsl::uint32_t3& idx, uint16_t& outColor) -> bool { + outColor = 0u; + if (!colorView) + return true; + const std::array vertexIx = {idx.x, idx.y, idx.z}; + std::array rgbaAvg = {}; + for (uint32_t corner = 0u; corner < vertexIx.size(); ++corner) + { + PackedColor color = {}; + if (!decodeColorB8G8R8A8(*colorView, vertexIx[corner], color)) + return false; + std::array rgba = {}; + decodeColorUnitRGBAFromB8G8R8A8(color, rgba.data()); + rgbaAvg[0] += rgba[0]; + rgbaAvg[1] += rgba[1]; + rgbaAvg[2] += rgba[2]; + } + rgbaAvg[0] /= 3.0; + rgbaAvg[1] /= 3.0; + rgbaAvg[2] /= 3.0; + rgbaAvg[3] = 1.0; + uint32_t avgColor = 0u; + encodePixels(&avgColor, rgbaAvg.data()); + outColor = packViscamColorFromB8G8R8A8(avgColor); + return true; + }; + auto writeRecord = [&dst](const hlsl::float32_t3& normal, const hlsl::float32_t3& vertex1, const hlsl::float32_t3& vertex2, const hlsl::float32_t3& vertex3, const uint16_t attribute) -> void { + const float payload[BinaryTriangleFloatCount] = {normal.x, normal.y, normal.z, vertex1.x, vertex1.y, vertex1.z, vertex2.x, vertex2.y, vertex2.z, vertex3.x, vertex3.y, vertex3.z}; + std::memcpy(dst, payload, BinaryTriangleFloatBytes); + dst += BinaryTriangleFloatBytes; + std::memcpy(dst, &attribute, BinaryTriangleAttributeBytes); + dst += BinaryTriangleAttributeBytes; + }; + auto emitTriangle = [&](const hlsl::float32_t3& p0, const hlsl::float32_t3& p1, const hlsl::float32_t3& p2, const hlsl::uint32_t3& idx, const hlsl::float32_t3* const attrNormals, const uint32_t attrNormalCount, const bool alignToPlane) -> bool { + uint16_t faceColor = 0u; + if (!computeFaceColor(idx, faceColor)) + return false; + TriangleData triangle = {}; + buildTriangle(p0, p1, p2, attrNormals, attrNormalCount, flipHandedness, alignToPlane, triangle); + writeRecord(triangle.normal, triangle.vertex1, triangle.vertex2, triangle.vertex3, faceColor); + return true; + }; + if (hasFastTightPath && hasNormals) + { + const hlsl::float32_t3* posTri = tightPositions; + const hlsl::float32_t3* nrmTri = tightNormals; + bool allFastNormalsNonZero = true; + for (size_t i = 0ull, normalCount = static_cast(facenum) * 3ull; i < normalCount; ++i) + { + const auto& n = tightNormals[i]; + if (n.x == 0.f && n.y == 0.f && n.z == 0.f) + { + allFastNormalsNonZero = false; + break; + } + } + for (uint32_t primIx = 0u; primIx < facenum; ++primIx, posTri += 3u, nrmTri += 3u) + { + const hlsl::uint32_t3 idx(primIx * 3u + 0u, primIx * 3u + 1u, primIx * 3u + 2u); + uint16_t faceColor = 0u; + if (!computeFaceColor(idx, faceColor)) + return false; + hlsl::float32_t3 vertex1 = posTri[2u]; + hlsl::float32_t3 vertex2 = posTri[1u]; + hlsl::float32_t3 vertex3 = posTri[0u]; + vertex1.x *= handednessSign; + vertex2.x *= handednessSign; + vertex3.x *= handednessSign; + hlsl::float32_t3 normal = {}; + if (allFastNormalsNonZero) + { + normal = nrmTri[0u]; + if (flipHandedness) + normal.x = -normal.x; + } + else if (selectFirstValidNormal(nrmTri, 3u, normal)) + { + if (flipHandedness) + normal.x = -normal.x; + } + else + { + float planeNormalLen2 = 0.f; + const hlsl::float32_t3 planeNormal = computePlaneNormal(vertex1, vertex2, vertex3, &planeNormalLen2); + normal = planeNormalLen2 > 0.f ? hlsl::normalize(planeNormal) : hlsl::float32_t3(0.f, 0.f, 0.f); + } + writeRecord(normal, vertex1, vertex2, vertex3, faceColor); + } + } + else if (hasFastTightPath) + { + const hlsl::float32_t3* posTri = tightPositions; + for (uint32_t primIx = 0u; primIx < facenum; ++primIx, posTri += 3u) + { + const hlsl::uint32_t3 idx(primIx * 3u + 0u, primIx * 3u + 1u, primIx * 3u + 2u); + uint16_t faceColor = 0u; + if (!computeFaceColor(idx, faceColor)) + return false; + hlsl::float32_t3 vertex1 = posTri[2u]; + hlsl::float32_t3 vertex2 = posTri[1u]; + hlsl::float32_t3 vertex3 = posTri[0u]; + vertex1.x *= handednessSign; + vertex2.x *= handednessSign; + vertex3.x *= handednessSign; + float planeNormalLen2 = 0.f; + const hlsl::float32_t3 planeNormal = computePlaneNormal(vertex1, vertex2, vertex3, &planeNormalLen2); + const hlsl::float32_t3 normal = planeNormalLen2 > 0.f ? hlsl::normalize(planeNormal) : hlsl::float32_t3(0.f, 0.f, 0.f); + writeRecord(normal, vertex1, vertex2, vertex3, faceColor); + } + } + else if (!SGeometryWriterCommon::visitTriangleIndices(geom, [&](const uint32_t i0, const uint32_t i1, const uint32_t i2) -> bool { + const hlsl::uint32_t3 idx(i0, i1, i2); + std::array positions = {}; + if (!decodeIndexedTriple(idx, decodePosition, positions.data())) + return false; + std::array normals = {}; + if (hasNormals && !decodeIndexedTriple(idx, decodeNormal, normals.data())) + return false; + return emitTriangle(positions[0], positions[1], positions[2], idx, hasNormals ? normals.data() : nullptr, hasNormals ? 3u : 0u, true); + })) + return false; + const bool writeOk = SInterchangeIO::writeFileWithPolicy(context->writeContext.outputFile, context->ioPlan, output.get(), outputSize, &context->writeTelemetry); + if (writeOk) + context->fileOffset += outputSize; + return writeOk; } - - getVectorAsStringLine(vertex3, tmp); - + static bool writeMeshASCII(const asset::ICPUPolygonGeometry* geom, Context* context) { - system::IFile::success_t success;; - context->writeContext.outputFile->write(success, tmp.c_str(), context->fileOffset, tmp.size()); - - context->fileOffset += success.getBytesProcessed(); + if (!geom) + return false; + const auto* indexing = geom->getIndexingCallback(); + if (!indexing || indexing->degree() != 3u) + return false; + const auto& posView = geom->getPositionView(); + if (!posView) + return false; + const auto& normalView = geom->getNormalView(); + const bool flipHandedness = !context->writeContext.params.flags.hasAnyFlag(E_WRITER_FLAGS::EWF_MESH_IS_RIGHT_HANDED); + const std::string name = context->writeContext.outputFile->getFileName().filename().replace_extension().string(); + const std::string_view solidName = name.empty() ? std::string_view(AsciiDefaultName) : std::string_view(name); + if (!context->write(AsciiSolidPrefix, sizeof(AsciiSolidPrefix) - 1ull) || !context->write(solidName.data(), solidName.size()) || !context->write("\n", sizeof("\n") - 1ull)) + return false; + const uint32_t faceCount = static_cast(geom->getPrimitiveCount()); + for (uint32_t primIx = 0u; primIx < faceCount; ++primIx) + { + hlsl::float32_t3 v0 = {}; + hlsl::float32_t3 v1 = {}; + hlsl::float32_t3 v2 = {}; + hlsl::uint32_t3 idx(0u); + if (!decodeTriangle(geom, indexing, posView, primIx, v0, v1, v2, &idx)) + return false; + if (!writeFaceText(v0, v1, v2, idx, normalView, flipHandedness, context)) + return false; + if (!context->write("\n", sizeof("\n") - 1ull)) + return false; + } + return context->write(AsciiEndSolidPrefix, sizeof(AsciiEndSolidPrefix) - 1ull) && context->write(solidName.data(), solidName.size()); } - + static bool writeFaceText(const hlsl::float32_t3& v1, const hlsl::float32_t3& v2, const hlsl::float32_t3& v3, const hlsl::uint32_t3& idx, const asset::ICPUPolygonGeometry::SDataView& normalView, const bool flipHandedness, Context* context) { - system::IFile::success_t success;; - context->writeContext.outputFile->write(success, " endloop\n", context->fileOffset, 10); - - context->fileOffset += success.getBytesProcessed(); + hlsl::float32_t3 attrNormal = {}; + TriangleData triangle = {}; + const hlsl::float32_t3* const attrNormalPtr = decodeTriangleNormal(normalView, idx, attrNormal) ? &attrNormal : nullptr; + buildTriangle(v1, v2, v3, attrNormalPtr, attrNormalPtr ? 1u : 0u, flipHandedness, true, triangle); + std::array faceText = {}; + char* cursor = faceText.data(); + char* const end = faceText.data() + faceText.size(); + const std::array vertices = {triangle.vertex1, triangle.vertex2, triangle.vertex3}; + if (!appendLiteral(cursor, end, "facet normal ", sizeof("facet normal ") - 1ull)) + return false; + if (!appendVectorAsAsciiLine(cursor, end, triangle.normal)) + return false; + if (!appendLiteral(cursor, end, " outer loop\n", sizeof(" outer loop\n") - 1ull)) + return false; + for (const auto& vertex : vertices) + { + if (!appendLiteral(cursor, end, " vertex ", sizeof(" vertex ") - 1ull)) + return false; + if (!appendVectorAsAsciiLine(cursor, end, vertex)) + return false; + } + if (!appendLiteral(cursor, end, " endloop\n", sizeof(" endloop\n") - 1ull)) + return false; + if (!appendLiteral(cursor, end, "endfacet\n", sizeof("endfacet\n") - 1ull)) + return false; + return context->write(faceText.data(), static_cast(cursor - faceText.data())); } - +}; +} +CSTLMeshWriter::CSTLMeshWriter() +{ + #ifdef _NBL_DEBUG + setDebugName("CSTLMeshWriter"); + #endif +} +CSTLMeshWriter::~CSTLMeshWriter() +{ +} +const char** CSTLMeshWriter::getAssociatedFileExtensions() const +{ + static const char* ext[] = { "stl", nullptr }; + return ext; +} +writer_flags_t CSTLMeshWriter::getSupportedFlags() +{ + return writer_flags_t(asset::EWF_BINARY | asset::EWF_MESH_IS_RIGHT_HANDED); +} +writer_flags_t CSTLMeshWriter::getForcedFlags() +{ + return EWF_NONE; +} +bool CSTLMeshWriter::writeAsset(system::IFile* _file, const SAssetWriteParams& _params, IAssetWriterOverride* _override) +{ + using Context = Parse::Context; + if (!_override) + getDefaultOverride(_override); + IAssetWriter::SAssetWriteContext inCtx{_params, _file}; + const asset::ICPUPolygonGeometry* geom = IAsset::castDown(_params.rootAsset); + if (!geom) + return false; + system::IFile* file = _override->getOutputFile(_file, inCtx, {geom, 0u}); + if (!file) + return false; + Context context = {IAssetWriter::SAssetWriteContext{inCtx.params, file}}; + _params.logger.log("WRITING STL: writing the file %s", system::ILogger::ELL_INFO, file->getFileName().string().c_str()); + const auto flags = _override->getAssetWritingFlags(context.writeContext, geom, 0u); + const bool binary = flags.hasAnyFlag(asset::EWF_BINARY); + uint64_t expectedSize = 0ull; + bool sizeKnown = false; + if (binary) { - system::IFile::success_t success;; - context->writeContext.outputFile->write(success, "endfacet\n", context->fileOffset, 9); - - context->fileOffset += success.getBytesProcessed(); + expectedSize = Parse::BinaryPrefixBytes + static_cast(geom->getPrimitiveCount()) * Parse::BinaryTriangleRecordBytes; + sizeKnown = true; } + context.ioPlan = impl::SFileAccess::resolvePlan(_params.ioPolicy, expectedSize, sizeKnown, file); + if (impl::SFileAccess::logInvalidPlan(_params.logger, "STL writer", file->getFileName().string().c_str(), context.ioPlan)) + return false; + context.ioBuffer.reserve(static_cast(context.ioPlan.strategy == SResolvedFileIOPolicy::Strategy::WholeFile && sizeKnown ? expectedSize : std::min(context.ioPlan.chunkSizeBytes(), Parse::IoFallbackReserveBytes))); + const bool written = binary ? Parse::writeMeshBinary(geom, &context) : Parse::writeMeshASCII(geom, &context); + if (!written) + return false; + if (!context.flush()) + return false; + const uint64_t ioMinWrite = context.writeTelemetry.getMinOrZero(); + const uint64_t ioAvgWrite = context.writeTelemetry.getAvgOrZero(); + impl::SFileAccess::logTinyIO(_params.logger, "STL writer", file->getFileName().string().c_str(), context.writeTelemetry, context.fileOffset, _params.ioPolicy, "writes"); + _params.logger.log("STL writer stats: file=%s bytes=%llu binary=%d io_writes=%llu io_min_write=%llu io_avg_write=%llu io_req=%s io_eff=%s io_chunk=%llu io_reason=%s", + system::ILogger::ELL_PERFORMANCE, file->getFileName().string().c_str(), static_cast(context.fileOffset), binary ? 1 : 0, + static_cast(context.writeTelemetry.callCount), static_cast(ioMinWrite), static_cast(ioAvgWrite), + system::to_string(_params.ioPolicy.strategy).c_str(), system::to_string(context.ioPlan.strategy).c_str(), static_cast(context.ioPlan.chunkSizeBytes()), context.ioPlan.reason); + return true; +} } - #endif diff --git a/src/nbl/asset/interchange/CSTLMeshWriter.h b/src/nbl/asset/interchange/CSTLMeshWriter.h index a25a84534c..e06e5c5b65 100644 --- a/src/nbl/asset/interchange/CSTLMeshWriter.h +++ b/src/nbl/asset/interchange/CSTLMeshWriter.h @@ -1,59 +1,25 @@ -// Copyright (C) 2019-2025 - DevSH Graphics Programming Sp. z O.O. +// Copyright (C) 2018-2025 - DevSH Graphics Programming Sp. z O.O. // This file is part of the "Nabla Engine" and was originally part of the "Irrlicht Engine" // For conditions of distribution and use, see copyright notice in nabla.h // See the original file in irrlicht source for authors #ifndef _NBL_ASSET_STL_MESH_WRITER_H_INCLUDED_ #define _NBL_ASSET_STL_MESH_WRITER_H_INCLUDED_ - - -#include "nbl/asset/ICPUPolygonGeometry.h" #include "nbl/asset/interchange/IGeometryWriter.h" - - namespace nbl::asset { - -//! class to write meshes, implementing a STL writer +//! Geometry writer capable of emitting STL mesh files. class CSTLMeshWriter : public IGeometryWriter { - protected: - virtual ~CSTLMeshWriter(); - public: CSTLMeshWriter(); + ~CSTLMeshWriter() override; - virtual const char** getAssociatedFileExtensions() const - { - static const char* ext[]{ "stl", nullptr }; - return ext; - } - - virtual uint32_t getSupportedFlags() override { return asset::EWF_BINARY; } + const char** getAssociatedFileExtensions() const override; - virtual uint32_t getForcedFlags() { return 0u; } + writer_flags_t getSupportedFlags() override; + writer_flags_t getForcedFlags() override; - virtual bool writeAsset(system::IFile* _file, const SAssetWriteParams& _params, IAssetWriterOverride* _override = nullptr) override; - - private: - - struct SContext - { - SAssetWriteContext writeContext; - size_t fileOffset; - }; - - // write binary format - bool writeMeshBinary(const ICPUPolygonGeometry* geom, SContext* context); - - // write text format - bool writeMeshASCII(const ICPUPolygonGeometry* geom, SContext* context); - - // create vector output with line end into string - void getVectorAsStringLine(const core::vectorSIMDf& v, std::string& s) const; - - // write face information to file - void writeFaceText(const core::vectorSIMDf& v1, const core::vectorSIMDf& v2, const core::vectorSIMDf& v3, SContext* context); + bool writeAsset(system::IFile* _file, const SAssetWriteParams& _params, IAssetWriterOverride* _override = nullptr) override; }; - } // end namespace #endif diff --git a/src/nbl/asset/interchange/IAssetLoader.cpp b/src/nbl/asset/interchange/IAssetLoader.cpp index 4a9a8f0378..98f579257d 100644 --- a/src/nbl/asset/interchange/IAssetLoader.cpp +++ b/src/nbl/asset/interchange/IAssetLoader.cpp @@ -16,8 +16,8 @@ IAssetLoader::IAssetLoaderOverride::IAssetLoaderOverride(SCreationParams&& param SAssetBundle IAssetLoader::IAssetLoaderOverride::findCachedAsset(const std::string& inSearchKey, const IAsset::E_TYPE* inAssetTypes, const SAssetLoadContext& ctx, const uint32_t hierarchyLevel) { - auto levelFlag = ctx.params.cacheFlags >> (uint64_t(hierarchyLevel) * 2ull); - if ((levelFlag & ECF_DUPLICATE_TOP_LEVEL) == ECF_DUPLICATE_TOP_LEVEL) + const auto levelFlags = caching_flags_t(static_cast(ctx.params.cacheFlags.value) >> (uint64_t(hierarchyLevel) * 2ull)); + if (levelFlags.hasFlags(ECF_DUPLICATE_TOP_LEVEL)) return {}; auto found = getManager()->findAssets(inSearchKey, inAssetTypes); @@ -30,8 +30,8 @@ void IAssetLoader::IAssetLoaderOverride::insertAssetIntoCache(SAssetBundle& asse { getManager()->changeAssetKey(asset, supposedKey); - auto levelFlag = _params.cacheFlags >> (uint64_t(hierarchyLevel) * 2ull); - if (!(levelFlag&ECF_DONT_CACHE_TOP_LEVEL)) + const auto levelFlags = caching_flags_t(static_cast(_params.cacheFlags.value) >> (uint64_t(hierarchyLevel) * 2ull)); + if (!levelFlags.hasAnyFlag(ECF_DONT_CACHE_TOP_LEVEL)) getManager()->insertAssetIntoCache(asset,ASSET_MUTABILITY_ON_CACHE_INSERT); } @@ -126,4 +126,4 @@ smart_refctd_ptr IAssetLoader::createDefaultImageView(core::smart .viewType = viewType, .format = imageParams.format }); -} \ No newline at end of file +} diff --git a/src/nbl/asset/interchange/IGeometryWriter.cpp b/src/nbl/asset/interchange/IGeometryWriter.cpp new file mode 100644 index 0000000000..795241e539 --- /dev/null +++ b/src/nbl/asset/interchange/IGeometryWriter.cpp @@ -0,0 +1,8 @@ +// Copyright (C) 2018-2025 - DevSH Graphics Programming Sp. z O.O. +// This file is part of the "Nabla Engine". +// For conditions of distribution and use, see copyright notice in nabla.h +#include "nbl/asset/interchange/IGeometryWriter.h" +namespace nbl::asset +{ +IGeometryWriter::~IGeometryWriter() = default; +} diff --git a/src/nbl/asset/interchange/SGeometryViewDecode.h b/src/nbl/asset/interchange/SGeometryViewDecode.h new file mode 100644 index 0000000000..3e8d72adba --- /dev/null +++ b/src/nbl/asset/interchange/SGeometryViewDecode.h @@ -0,0 +1,113 @@ +// Internal src-only header. Do not include from public headers. +#ifndef _NBL_ASSET_S_GEOMETRY_VIEW_DECODE_H_INCLUDED_ +#define _NBL_ASSET_S_GEOMETRY_VIEW_DECODE_H_INCLUDED_ +#include "nbl/asset/ICPUPolygonGeometry.h" +#include "nbl/asset/format/decodePixels.h" +#include "nbl/builtin/hlsl/concepts.hlsl" +#include "nbl/builtin/hlsl/vector_utils/vector_traits.hlsl" +#include +#include +#include +namespace nbl::asset +{ +//! Shared decode helper for geometry `SDataView` read paths used by writers. +class SGeometryViewDecode +{ + public: + //! Selects whether the output should be in logical attribute space or storage space. + enum class EMode : uint8_t + { + Semantic, //!< Decode values ready for writer-side math and text/binary emission. + Stored //!< Decode values in storage-domain form for raw integer emission. + }; + + //! Prepared decode state hoisted out of inner loops for one formatted view. + template + struct Prepared + { + const uint8_t* data = nullptr; //!< First byte of the view payload. + uint32_t stride = 0u; //!< Byte stride between consecutive elements. + E_FORMAT format = EF_UNKNOWN; //!< Source format used by `decodePixels`. + uint32_t channels = 0u; //!< Channel count cached from `format`. + bool normalized = false; //!< True when semantic decode must apply `range`. + + //! Decoded attribute range used for normalized semantic outputs. + hlsl::shapes::AABB<4, hlsl::float64_t> range = hlsl::shapes::AABB<4, hlsl::float64_t>::create(); + inline explicit operator bool() const { return data != nullptr && stride != 0u && format != EF_UNKNOWN && channels != 0u; } + + //! Decodes one element into a fixed-size `std::array`. + template + inline bool decode(const size_t ix, std::array& out) const { out.fill(T{}); return SGeometryViewDecode::template decodePrepared(*this, ix, out.data(), static_cast(N)); } + + //! Decodes one element into an HLSL vector type. + template requires hlsl::concepts::Vector + inline bool decode(const size_t ix, V& out) const { out = V{}; return SGeometryViewDecode::template decodePrepared(*this, ix, out); } + }; + + //! Prepares one decode state that can be reused across many elements of the same view. + template + static inline Prepared prepare(const ICPUPolygonGeometry::SDataView& view) + { + Prepared retval = {}; + if (!view.composed.isFormatted()) + return {}; + if (!(retval.data = reinterpret_cast(view.getPointer()))) + return {}; + retval.stride = view.composed.getStride(); + retval.format = view.composed.format; + retval.channels = getFormatChannelCount(retval.format); + if constexpr (Mode == EMode::Semantic) + if (retval.normalized = isNormalizedFormat(retval.format); retval.normalized) + retval.range = view.composed.getRange>(); + return retval; + } + + //! One-shot convenience wrapper over `prepare(...).decode(...)`. + template + static inline bool decodeElement(const ICPUPolygonGeometry::SDataView& view, const size_t ix, Out& out) { return prepare(view).decode(ix, out); } + private: + //! Shared scalar/vector backend that decodes one prepared element into plain components. + template + static inline bool decodePreparedComponents(const Prepared& prepared, const size_t ix, T* out, const uint32_t outDim) + { + if (!prepared || !out || outDim == 0u) + return false; + using storage_t = std::conditional_t, hlsl::float64_t, std::conditional_t, int64_t, uint64_t>>; + std::array tmp = {}; + const void* srcArr[4] = {prepared.data + ix * prepared.stride, nullptr}; + if (!decodePixels(prepared.format, srcArr, tmp.data(), 0u, 0u)) + return false; + const uint32_t componentCount = std::min({prepared.channels, outDim, 4u}); + if constexpr (Mode == EMode::Semantic && std::is_floating_point_v) + { + if (prepared.normalized) + { + for (uint32_t i = 0u; i < componentCount; ++i) + tmp[i] = static_cast(tmp[i] * (prepared.range.maxVx[i] - prepared.range.minVx[i]) + prepared.range.minVx[i]); + } + } + for (uint32_t i = 0u; i < componentCount; ++i) + out[i] = static_cast(tmp[i]); + return true; + } + + //! Vector overload built on top of `decodePreparedComponents`. + template requires hlsl::concepts::Vector + static inline bool decodePrepared(const Prepared& prepared, const size_t ix, V& out) + { + using scalar_t = typename hlsl::vector_traits::scalar_type; + constexpr uint32_t Dimension = hlsl::vector_traits::Dimension; + std::array tmp = {}; + if (!decodePreparedComponents(prepared, ix, tmp.data(), Dimension)) + return false; + for (uint32_t i = 0u; i < Dimension; ++i) + out[i] = tmp[i]; + return true; + } + + //! Pointer overload used by `std::array` and internal scratch storage. + template + static inline bool decodePrepared(const Prepared& prepared, const size_t ix, T* out, const uint32_t outDim) { return decodePreparedComponents(prepared, ix, out, outDim); } +}; +} +#endif diff --git a/src/nbl/asset/interchange/impl/SBinaryData.h b/src/nbl/asset/interchange/impl/SBinaryData.h new file mode 100644 index 0000000000..370f1a383d --- /dev/null +++ b/src/nbl/asset/interchange/impl/SBinaryData.h @@ -0,0 +1,36 @@ +// Internal src-only header. Do not include from public headers. +#ifndef _NBL_ASSET_IMPL_S_BINARY_DATA_H_INCLUDED_ +#define _NBL_ASSET_IMPL_S_BINARY_DATA_H_INCLUDED_ +#include +#include +#include +namespace nbl::asset::impl +{ +//! Binary helpers for endian conversion and unaligned loads/stores. +struct BinaryData +{ + //! Returns `value` with byte order reversed. + template + static inline T byteswap(const T value) { auto retval = value; const auto* it = reinterpret_cast(&value); std::reverse_copy(it, it + sizeof(retval), reinterpret_cast(&retval)); return retval; } + + //! Loads one trivially copyable value from unaligned memory and optionally byte-swaps it. + template + static inline T loadUnaligned(const void* src, const bool swapEndian = false) + { + T value = {}; + if (!src) + return value; + std::memcpy(&value, src, sizeof(value)); + return swapEndian ? byteswap(value) : value; + } + + //! Stores one trivially copyable value into unaligned memory. + template + static inline void storeUnaligned(void* dst, const T& value) { std::memcpy(dst, &value, sizeof(value)); } + + //! Stores one value and advances the destination pointer by `sizeof(T)`. + template + static inline void storeUnalignedAdvance(uint8_t*& dst, const T& value) { storeUnaligned(dst, value); dst += sizeof(value); } +}; +} +#endif diff --git a/src/nbl/asset/interchange/impl/SFileAccess.h b/src/nbl/asset/interchange/impl/SFileAccess.h new file mode 100644 index 0000000000..b1e15010f7 --- /dev/null +++ b/src/nbl/asset/interchange/impl/SFileAccess.h @@ -0,0 +1,105 @@ +// Internal src-only header. Do not include from public headers. +#ifndef _NBL_ASSET_IMPL_S_FILE_ACCESS_H_INCLUDED_ +#define _NBL_ASSET_IMPL_S_FILE_ACCESS_H_INCLUDED_ +#include "nbl/core/declarations.h" +#include "nbl/asset/interchange/SInterchangeIO.h" +#include "nbl/system/ILogger.h" +#include +namespace nbl::asset::impl +{ +//! Small file access helper shared by interchange loaders. +class SFileAccess +{ + public: + //! Returns true when the file exposes a mapped pointer. + static inline bool isMappable(const system::IFile* file) { return file && core::bitflag(file->getFlags()).hasAnyFlag(system::IFile::ECF_MAPPABLE); } + //! Resolves the final IO plan after considering payload size and mapping support. + static inline SResolvedFileIOPolicy resolvePlan(const SFileIOPolicy& ioPolicy, const uint64_t payloadBytes, const bool sizeKnown, const system::IFile* file) { return SResolvedFileIOPolicy(ioPolicy, payloadBytes, sizeKnown, isMappable(file)); } + + //! Logs an invalid plan and returns true when the caller should abort. + template + static inline bool logInvalidPlan(Logger& logger, const char* const owner, const char* const fileName, const SResolvedFileIOPolicy& ioPlan) + { + if (ioPlan.isValid()) + return false; + logger.log("%s: invalid io policy for %s reason=%s", system::ILogger::ELL_ERROR, owner, fileName, ioPlan.reason); + return true; + } + + //! Emits the shared tiny-IO warning when telemetry indicates suspiciously small reads. + template + static inline void logTinyIO(Logger& logger, const char* const owner, const char* const fileName, const SInterchangeIO::STelemetry& telemetry, const uint64_t payloadBytes, const SFileIOPolicy& ioPolicy, const char* const opName) + { + if (!SInterchangeIO::isTinyIOTelemetryLikely(telemetry, payloadBytes, ioPolicy)) + return; + logger.log("%s tiny-io guard: file=%s %s=%llu min=%llu avg=%llu", system::ILogger::ELL_WARNING, owner, fileName, opName, static_cast(telemetry.callCount), static_cast(telemetry.getMinOrZero()), static_cast(telemetry.getAvgOrZero())); + } + //! Reads one byte range using the already resolved IO plan. + static inline const uint8_t* readRange(system::IFile* file, const size_t offset, const size_t bytes, core::vector& storage, const SResolvedFileIOPolicy& ioPlan, SFileReadTelemetry* ioTelemetry = nullptr, const bool zeroTerminate = false) + { + storage.resize(bytes + (zeroTerminate ? 1ull : 0ull), 0u); + if (!SInterchangeIO::readFileWithPolicy(file, storage.data(), offset, bytes, ioPlan, ioTelemetry)) + return nullptr; + if (zeroTerminate) + storage[bytes] = 0u; + return storage.data(); + } + //! Uses the mapped pointer for whole-file mode when available, otherwise falls back to `readRange`. + static inline const uint8_t* mapOrReadWholeFile(system::IFile* file, const size_t bytes, core::vector& storage, const SResolvedFileIOPolicy& ioPlan, SFileReadTelemetry* ioTelemetry = nullptr, bool* wasMapped = nullptr, const bool zeroTerminate = false) + { + if (wasMapped) + *wasMapped = false; + if (ioPlan.strategy == SResolvedFileIOPolicy::Strategy::WholeFile) + { + const auto* mapped = reinterpret_cast(static_cast(file)->getMappedPointer()); + if (mapped) + { + if (ioTelemetry) ioTelemetry->account(bytes); + if (wasMapped) *wasMapped = true; + return mapped; + } + } + return readRange(file, 0ull, bytes, storage, ioPlan, ioTelemetry, zeroTerminate); + } +}; +//! Per-load session state shared across the loader entry points. +class SLoadSession +{ + public: + system::IFile* file = nullptr; //!< File being processed by the loader. + const SFileIOPolicy* requestedPolicy = nullptr; //!< Original policy requested by the caller. + SResolvedFileIOPolicy ioPlan = {}; //!< Final plan chosen for this payload. + uint64_t payloadBytes = 0ull; //!< Logical payload size covered by `ioPlan`. + const char* owner = nullptr; //!< Human-readable loader name used in logs. + std::string fileName = {}; //!< Cached file name used in diagnostics. + + //! Initializes the session and resolves the IO plan. + template + static inline bool begin(Logger& logger, const char* const owner, system::IFile* file, const SFileIOPolicy& ioPolicy, const uint64_t payloadBytes, const bool sizeKnown, SLoadSession& out) + { + out = {}; + if (!file) + return false; + out.file = file; + out.requestedPolicy = &ioPolicy; + out.ioPlan = SFileAccess::resolvePlan(ioPolicy, payloadBytes, sizeKnown, file); + out.payloadBytes = payloadBytes; + out.owner = owner; + out.fileName = file->getFileName().string(); + return !SFileAccess::logInvalidPlan(logger, owner, out.fileName.c_str(), out.ioPlan); + } + //! Returns true when the resolved plan prefers whole-file access. + inline bool isWholeFile() const { return ioPlan.strategy == SResolvedFileIOPolicy::Strategy::WholeFile; } + //! Returns the mapped pointer for whole-file mode or `nullptr` when unavailable. + inline const uint8_t* mappedPointer() const { return file && isWholeFile() ? reinterpret_cast(static_cast(file)->getMappedPointer()) : nullptr; } + //! Convenience wrapper over `SFileAccess::readRange` bound to this session. + inline const uint8_t* readRange(const size_t offset, const size_t bytes, core::vector& storage, SFileReadTelemetry* const ioTelemetry = nullptr, const bool zeroTerminate = false) const { return SFileAccess::readRange(file, offset, bytes, storage, ioPlan, ioTelemetry, zeroTerminate); } + //! Convenience wrapper over `SFileAccess::mapOrReadWholeFile` bound to this session. + inline const uint8_t* mapOrReadWholeFile(core::vector& storage, SFileReadTelemetry* const ioTelemetry = nullptr, bool* const wasMapped = nullptr, const bool zeroTerminate = false) const { return SFileAccess::mapOrReadWholeFile(file, static_cast(payloadBytes), storage, ioPlan, ioTelemetry, wasMapped, zeroTerminate); } + + //! Emits the shared tiny-IO diagnostic for this session. + template + inline void logTinyIO(Logger& logger, const Telemetry& telemetry, const char* const opName = "reads") const { if (requestedPolicy) SFileAccess::logTinyIO(logger, owner, fileName.c_str(), telemetry, payloadBytes, *requestedPolicy, opName); } +}; +} +#endif diff --git a/src/nbl/asset/interchange/impl/STextParse.h b/src/nbl/asset/interchange/impl/STextParse.h new file mode 100644 index 0000000000..ac4ed2d9b2 --- /dev/null +++ b/src/nbl/asset/interchange/impl/STextParse.h @@ -0,0 +1,208 @@ +// Internal src-only header. Do not include from public headers. +#ifndef _NBL_ASSET_IMPL_S_TEXT_PARSE_H_INCLUDED_ +#define _NBL_ASSET_IMPL_S_TEXT_PARSE_H_INCLUDED_ +#include "nbl/core/string/stringutil.h" +#include +#include +#include +#include +#include +#include +#include +#include +namespace nbl::asset::impl +{ +//! Text token and numeric parsing helpers shared by interchange text formats. +struct TextParse +{ + struct LineCursor + { + const char* cursor = nullptr; + const char* end = nullptr; + inline std::optional readLine() + { + if (!cursor || cursor >= end) + return std::nullopt; + const char* lineEnd = cursor; + while (lineEnd < end && *lineEnd != '\0' && *lineEnd != '\r' && *lineEnd != '\n') + ++lineEnd; + const std::string_view line(cursor, static_cast(lineEnd - cursor)); + if (lineEnd < end && *lineEnd == '\r') + ++lineEnd; + if (lineEnd < end && *lineEnd == '\n') + ++lineEnd; + else if (lineEnd < end && *lineEnd == '\0') + ++lineEnd; + cursor = lineEnd; + return line; + } + }; + static inline bool isDigit(const char c) { return c >= '0' && c <= '9'; } + //! Parses one arithmetic token and advances `ptr` on success. + template + static inline bool parseNumber(const char*& ptr, const char* const end, T& out) + { + static_assert(std::is_arithmetic_v); + if constexpr (std::is_floating_point_v) + { + const char* const start = ptr; + if (start >= end) + return false; + const char* p = start; + bool negative = false; + if (*p == '-' || *p == '+') + { + negative = (*p == '-'); + ++p; + if (p >= end) + return false; + } + // Fast path for the common plain-decimal subset: optional sign, digits, and an optional fractional part, but no exponent. + // This follows the same broad idea as RapidJSON's StrtodFast: cheaply handle the dominant simple spellings before delegating + // harder cases to the full parser. This is not a standalone general-purpose parser. Tokens with exponents or otherwise + // non-trivial spellings still fall back to fast_float. + if (*p != '.' && isDigit(*p)) + { + uint64_t integerPart = 0ull; + while (p < end && isDigit(*p)) + { + integerPart = integerPart * 10ull + static_cast(*p - '0'); + ++p; + } + double value = static_cast(integerPart); + if (p < end && *p == '.') + { + const char* const dot = p; + if ((dot + 7) <= end) + { + const char d0 = dot[1]; + const char d1 = dot[2]; + const char d2 = dot[3]; + const char d3 = dot[4]; + const char d4 = dot[5]; + const char d5 = dot[6]; + if (isDigit(d0) && isDigit(d1) && isDigit(d2) && isDigit(d3) && isDigit(d4) && isDigit(d5)) + { + const bool hasNext = (dot + 7) < end; + const char next = hasNext ? dot[7] : '\0'; + if ((!hasNext || !isDigit(next)) && (!hasNext || (next != 'e' && next != 'E'))) + { + const uint32_t frac = + static_cast(d0 - '0') * 100000u + + static_cast(d1 - '0') * 10000u + + static_cast(d2 - '0') * 1000u + + static_cast(d3 - '0') * 100u + + static_cast(d4 - '0') * 10u + + static_cast(d5 - '0'); + value += static_cast(frac) * 1e-6; + ptr = dot + 7; + out = static_cast(negative ? -value : value); + return true; + } + } + } + static constexpr double InvPow10[] = { + 1.0, + 1e-1, 1e-2, 1e-3, 1e-4, 1e-5, + 1e-6, 1e-7, 1e-8, 1e-9, 1e-10, + 1e-11, 1e-12, 1e-13, 1e-14, 1e-15, + 1e-16, 1e-17, 1e-18 + }; + ++p; + uint64_t fractionPart = 0ull; + uint32_t fractionDigits = 0u; + while (p < end && isDigit(*p)) + { + if (fractionDigits >= (std::size(InvPow10) - 1u)) + break; + fractionPart = fractionPart * 10ull + static_cast(*p - '0'); + ++fractionDigits; + ++p; + } + if (fractionDigits) + value += static_cast(fractionPart) * InvPow10[fractionDigits]; + if (p < end && isDigit(*p)) + { + const auto parseResult = fast_float::from_chars(start, end, out); + if (parseResult.ec != std::errc() || parseResult.ptr == start) + return false; + ptr = parseResult.ptr; + return true; + } + } + if (p < end && (*p == 'e' || *p == 'E')) + { + const auto parseResult = fast_float::from_chars(start, end, out); + if (parseResult.ec != std::errc() || parseResult.ptr == start) + return false; + ptr = parseResult.ptr; + return true; + } + ptr = p; + out = static_cast(negative ? -value : value); + return true; + } + const auto parseResult = fast_float::from_chars(ptr, end, out); + if (parseResult.ec != std::errc() || parseResult.ptr == ptr) + return false; + ptr = parseResult.ptr; + return true; + } + else + { + const auto parseResult = std::from_chars(ptr, end, out); + if (parseResult.ec != std::errc() || parseResult.ptr == ptr) + return false; + ptr = parseResult.ptr; + return true; + } + } + + //! Parses one arithmetic token and succeeds only if the whole range was consumed. + template + static inline bool parseExactNumber(const char* const begin, const char* const end, T& out) { auto ptr = begin; return parseNumber(ptr, end, out) && ptr == end; } + + //! `std::string_view` convenience wrapper over `parseExactNumber(begin,end,...)`. + template + static inline bool parseExactNumber(const std::string_view token, T& out) { return parseExactNumber(token.data(), token.data() + token.size(), out); } + + //! Parses one arithmetic token and rejects zero. + template + static inline bool parseNonZeroNumber(const char*& ptr, const char* const end, T& out) { return parseNumber(ptr, end, out) && out != static_cast(0); } + + //! Returns true for inline whitespace accepted inside tokenized text formats. + static inline bool isInlineWhitespace(const char c) { return c == ' ' || c == '\t' || c == '\v' || c == '\f'; } + //! Skips spaces and tabs that stay within the current logical line. + static inline void skipInlineWhitespace(const char*& ptr, const char* const end) { while (ptr < end && isInlineWhitespace(*ptr)) ++ptr; } + //! Skips generic whitespace according to `core::isspace`. + static inline void skipWhitespace(const char*& ptr, const char* const end) { while (ptr < end && core::isspace(*ptr)) ++ptr; } + //! Trims leading and trailing whitespace from a token view. + static inline std::string_view trimWhitespace(std::string_view token) + { + while (!token.empty() && core::isspace(token.front())) token.remove_prefix(1ull); + while (!token.empty() && core::isspace(token.back())) token.remove_suffix(1ull); + return token; + } + //! Reads one whitespace-delimited token and advances `cursor` past it. + static inline std::optional readToken(const char*& cursor, const char* const end) + { + skipWhitespace(cursor, end); + if (cursor >= end) + return std::nullopt; + const auto* tokenEnd = cursor; + while (tokenEnd < end && !core::isspace(*tokenEnd)) + ++tokenEnd; + const std::string_view token(cursor, static_cast(tokenEnd - cursor)); + return cursor = tokenEnd, token; + } + //! Reads one line view from a contiguous text buffer and advances `cursor`. + static inline std::optional readLine(const char*& cursor, const char* const end) + { + LineCursor lineCursor = {.cursor = cursor, .end = end}; + auto line = lineCursor.readLine(); + cursor = lineCursor.cursor; + return line; + } +}; +} +#endif diff --git a/src/nbl/asset/pch_asset.h b/src/nbl/asset/pch_asset.h index 361df786f1..d24252be24 100644 --- a/src/nbl/asset/pch_asset.h +++ b/src/nbl/asset/pch_asset.h @@ -1,4 +1,4 @@ -// Copyright (C) 2018-2020 - DevSH Graphics Programming Sp. z O.O. +// Copyright (C) 2018-2025 - DevSH Graphics Programming Sp. z O.O. // This file is part of the "Nabla Engine". // For conditions of distribution and use, see copyright notice in nabla.h #ifndef _NBL_ASSET_PCH_ASSET_H_INCLUDED_ @@ -37,6 +37,7 @@ #include "nbl/asset/interchange/CPLYMeshFileLoader.h" #include "nbl/asset/interchange/CSTLMeshFileLoader.h" // writers +#include "nbl/asset/interchange/COBJMeshWriter.h" #include "nbl/asset/interchange/CPLYMeshWriter.h" #include "nbl/asset/interchange/CSTLMeshWriter.h" // manipulation diff --git a/src/nbl/asset/utils/CGeometryCreator.cpp b/src/nbl/asset/utils/CGeometryCreator.cpp index 3750a37a70..9950f8b997 100644 --- a/src/nbl/asset/utils/CGeometryCreator.cpp +++ b/src/nbl/asset/utils/CGeometryCreator.cpp @@ -4,6 +4,7 @@ #include "nbl/asset/utils/CGeometryCreator.h" +#include "nbl/asset/utils/SGeometryNormalCommon.h" #include "nbl/builtin/hlsl/tgmath.hlsl" #include "nbl/builtin/hlsl/math/linalg/transform.hlsl" #include "nbl/builtin/hlsl/math/quaternions.hlsl" @@ -946,44 +947,6 @@ class Icosphere private: - /* - return face normal (4th param) of a triangle v1-v2-v3 - if a triangle has no surface (normal length = 0), then return a zero vector - */ - - static inline void computeFaceNormal(const float v1[3], const float v2[3], const float v3[3], float normal[3]) - { - constexpr float EPSILON = 0.000001f; - - // default return value (0, 0, 0) - normal[0] = normal[1] = normal[2] = 0; - - // find 2 edge vectors: v1-v2, v1-v3 - float ex1 = v2[0] - v1[0]; - float ey1 = v2[1] - v1[1]; - float ez1 = v2[2] - v1[2]; - float ex2 = v3[0] - v1[0]; - float ey2 = v3[1] - v1[1]; - float ez2 = v3[2] - v1[2]; - - // cross product: e1 x e2 - float nx, ny, nz; - nx = ey1 * ez2 - ez1 * ey2; - ny = ez1 * ex2 - ex1 * ez2; - nz = ex1 * ey2 - ey1 * ex2; - - // normalize only if the length is > 0 - float length = sqrtf(nx * nx + ny * ny + nz * nz); - if (length > EPSILON) - { - // normalize - float lengthInv = 1.0f / length; - normal[0] = nx * lengthInv; - normal[1] = ny * lengthInv; - normal[2] = nz * lengthInv; - } - } - /* return vertex normal (2nd param) by mormalizing the vertex vector */ @@ -1229,27 +1192,27 @@ class Icosphere t11[0] = 2 * i * S_STEP; t11[1] = T_STEP * 3; // add a triangle in 1st row - Icosphere::computeFaceNormal(v0, v1, v2, n); + SGeometryNormalCommon::computeFaceNormal(v0, v1, v2, n); addVertices(v0, v1, v2); addNormals(n, n, n); addTexCoords(t0, t1, t2); addIndices(index, index + 1, index + 2); // add 2 triangles in 2nd row - Icosphere::computeFaceNormal(v1, v3, v2, n); + SGeometryNormalCommon::computeFaceNormal(v1, v3, v2, n); addVertices(v1, v3, v2); addNormals(n, n, n); addTexCoords(t1, t3, t2); addIndices(index + 3, index + 4, index + 5); - Icosphere::computeFaceNormal(v2, v3, v4, n); + SGeometryNormalCommon::computeFaceNormal(v2, v3, v4, n); addVertices(v2, v3, v4); addNormals(n, n, n); addTexCoords(t2, t3, t4); addIndices(index + 6, index + 7, index + 8); // add a triangle in 3rd row - Icosphere::computeFaceNormal(v3, v11, v4, n); + SGeometryNormalCommon::computeFaceNormal(v3, v11, v4, n); addVertices(v3, v11, v4); addNormals(n, n, n); addTexCoords(t3, t11, t4); @@ -1562,25 +1525,25 @@ class Icosphere // add 4 new triangles addVertices(v1, newV1, newV3); addTexCoords(t1, newT1, newT3); - computeFaceNormal(v1, newV1, newV3, normal); + SGeometryNormalCommon::computeFaceNormal(v1, newV1, newV3, normal); addNormals(normal, normal, normal); addIndices(index, index + 1, index + 2); addVertices(newV1, v2, newV2); addTexCoords(newT1, t2, newT2); - computeFaceNormal(newV1, v2, newV2, normal); + SGeometryNormalCommon::computeFaceNormal(newV1, v2, newV2, normal); addNormals(normal, normal, normal); addIndices(index + 3, index + 4, index + 5); addVertices(newV1, newV2, newV3); addTexCoords(newT1, newT2, newT3); - computeFaceNormal(newV1, newV2, newV3, normal); + SGeometryNormalCommon::computeFaceNormal(newV1, newV2, newV3, normal); addNormals(normal, normal, normal); addIndices(index + 6, index + 7, index + 8); addVertices(newV3, newV2, v3); addTexCoords(newT3, newT2, t3); - computeFaceNormal(newV3, newV2, v3, normal); + SGeometryNormalCommon::computeFaceNormal(newV3, newV2, v3, normal); addNormals(normal, normal, normal); addIndices(index + 9, index + 10, index + 11); diff --git a/src/nbl/asset/utils/CPolygonGeometryManipulator.cpp b/src/nbl/asset/utils/CPolygonGeometryManipulator.cpp index 818751052b..f83fb3c3e0 100644 --- a/src/nbl/asset/utils/CPolygonGeometryManipulator.cpp +++ b/src/nbl/asset/utils/CPolygonGeometryManipulator.cpp @@ -7,18 +7,171 @@ #include #include +#include #include "nbl/asset/utils/CPolygonGeometryManipulator.h" +#include "nbl/asset/interchange/SLoaderRuntimeTuning.h" #include "nbl/asset/utils/CVertexWelder.h" #include "nbl/asset/utils/CSmoothNormalGenerator.h" #include "nbl/asset/utils/CForsythVertexCacheOptimizer.h" #include "nbl/asset/utils/COverdrawPolygonGeometryOptimizer.h" #include "nbl/asset/utils/COBBGenerator.h" +#include "nbl/asset/IPreHashed.h" namespace nbl::asset { +void CPolygonGeometryManipulator::collectUniqueBuffers(const ICPUPolygonGeometry* geo, core::vector>& outBuffers) +{ + if (!geo) + { + outBuffers.clear(); + return; + } + + outBuffers.clear(); + auto appendBuffer = [&outBuffers](const IGeometry::SDataView& view)->void + { + if (!view || !view.src.buffer) + return; + for (const auto& existing : outBuffers) + { + if (existing.get() == view.src.buffer.get()) + return; + } + outBuffers.push_back(core::smart_refctd_ptr(view.src.buffer)); + }; + + appendBuffer(geo->getPositionView()); + appendBuffer(geo->getIndexView()); + appendBuffer(geo->getNormalView()); + for (const auto& view : geo->getAuxAttributeViews()) + appendBuffer(view); + for (const auto& view : geo->getJointWeightViews()) + { + appendBuffer(view.indices); + appendBuffer(view.weights); + } + if (auto jointOBB = geo->getJointOBBView(); jointOBB) + appendBuffer(*jointOBB); +} + +void CPolygonGeometryManipulator::computeContentHashesParallel(ICPUPolygonGeometry* geo, const SFileIOPolicy& ioPolicy, const EContentHashMode mode) +{ + if (!geo) + return; + + core::vector> buffers; + collectUniqueBuffers(geo, buffers); + if (buffers.empty()) + return; + + core::vector pending; + pending.reserve(buffers.size()); + uint64_t totalBytes = 0ull; + for (size_t i = 0ull; i < buffers.size(); ++i) + { + auto& buffer = buffers[i]; + if (!buffer) + continue; + if (mode == EContentHashMode::MissingOnly && buffer->getContentHash() != IPreHashed::INVALID_HASH) + continue; + totalBytes += static_cast(buffer->getSize()); + pending.push_back(i); + } + if (pending.empty()) + return; + + const auto hashPendingRange = [&](const size_t beginIx, const size_t endIx) -> void + { + for (size_t i = beginIx; i < endIx; ++i) + { + auto& buffer = buffers[pending[i]]; + buffer->setContentHash(buffer->computeContentHash()); + } + }; + + if (ioPolicy.runtimeTuning.mode == SFileIOPolicy::SRuntimeTuning::Mode::Sequential) + { + hashPendingRange(0ull, pending.size()); + return; + } + + const size_t hw = SLoaderRuntimeTuner::resolveHardwareThreads(); + const uint8_t* hashSampleData = nullptr; + uint64_t hashSampleBytes = 0ull; + for (const auto pendingIx : pending) + { + auto& buffer = buffers[pendingIx]; + const auto* ptr = reinterpret_cast(buffer->getPointer()); + if (!ptr) + continue; + hashSampleData = ptr; + hashSampleBytes = SLoaderRuntimeTuner::resolveSampleBytes(ioPolicy, static_cast(buffer->getSize())); + if (hashSampleBytes > 0ull) + break; + } + + SLoaderRuntimeTuningRequest tuningRequest = {}; + tuningRequest.inputBytes = totalBytes; + tuningRequest.totalWorkUnits = pending.size(); + tuningRequest.minBytesPerWorker = std::max(1ull, SLoaderRuntimeTuner::ceilDiv(totalBytes, static_cast(pending.size()))); + tuningRequest.hardwareThreads = static_cast(hw); + const size_t hardMaxWorkers = SLoaderRuntimeTuner::resolveHardMaxWorkers(hw, ioPolicy.runtimeTuning.workerHeadroom); + tuningRequest.hardMaxWorkers = static_cast(std::min(pending.size(), hardMaxWorkers)); + tuningRequest.targetChunksPerWorker = ioPolicy.runtimeTuning.hashTaskTargetChunksPerWorker; + tuningRequest.sampleData = hashSampleData; + tuningRequest.sampleBytes = hashSampleBytes; + const auto tuning = SLoaderRuntimeTuner::tune(ioPolicy, tuningRequest); + const size_t workerCount = std::min(tuning.workerCount, pending.size()); + + if (workerCount > 1ull) + { + SLoaderRuntimeTuner::dispatchWorkers(workerCount, [&](const size_t workerIx) + { + const size_t beginIx = (pending.size() * workerIx) / workerCount; + const size_t endIx = (pending.size() * (workerIx + 1ull)) / workerCount; + hashPendingRange(beginIx, endIx); + }); + return; + } + + hashPendingRange(0ull, pending.size()); +} + +bool CPolygonGeometryManipulator::generateMissingSmoothNormals( + core::vector& normals, + const core::vector& positions, + const core::vector& indices, + const core::vector& normalNeedsGeneration +) +{ + if (normals.size() != positions.size() || normals.size() != normalNeedsGeneration.size()) + return false; + + CSmoothNormalAccumulator accumulator(ESmoothNormalAccumulationMode::AreaWeighted); + accumulator.reserveVertices(positions.size()); + accumulator.prepareIdentityGroups(positions.size()); + const size_t triangleCount = indices.size() / 3ull; + for (size_t triIx = 0ull; triIx < triangleCount; ++triIx) + { + const uint32_t i0 = indices[triIx * 3ull + 0ull]; + const uint32_t i1 = indices[triIx * 3ull + 1ull]; + const uint32_t i2 = indices[triIx * 3ull + 2ull]; + if (i0 >= positions.size() || i1 >= positions.size() || i2 >= positions.size()) + continue; + if (!accumulator.addPreparedIdentityTriangle( + i0, positions[static_cast(i0)], + i1, positions[static_cast(i1)], + i2, positions[static_cast(i2)])) + return false; + } + return accumulator.finalize( + std::span(normals.data(), normals.size()), + std::span(normalNeedsGeneration.data(), normalNeedsGeneration.size())); +} + core::smart_refctd_ptr CPolygonGeometryManipulator::createUnweldedList(const ICPUPolygonGeometry* inGeo, const bool reverse, const bool recomputeHash) { diff --git a/src/nbl/asset/utils/CSmoothNormalGenerator.h b/src/nbl/asset/utils/CSmoothNormalGenerator.h index 7c9bf5358f..8bce90f864 100644 --- a/src/nbl/asset/utils/CSmoothNormalGenerator.h +++ b/src/nbl/asset/utils/CSmoothNormalGenerator.h @@ -5,18 +5,283 @@ #define _NBL_ASSET_C_SMOOTH_NORMAL_GENERATOR_H_INCLUDED_ #include "nbl/asset/utils/CVertexHashGrid.h" +#include "nbl/builtin/hlsl/shapes/triangle.hlsl" + +#include +#include +#include +#include namespace nbl::asset { -// TODO: implement a class template that take position type(either float32_t3 or float64_t3 as template argument +template +concept SmoothNormalPosition = std::same_as || std::same_as; + +//! Generic smooth-normal accumulation utilities. The core accepts triangles incrementally, +//! supports indexed inputs, optional caller-defined grouping, and finalizes into a caller-owned +//! normal buffer. Parsing and authoring of any format-specific grouping rules stay outside. class CSmoothNormalGenerator final { public: CSmoothNormalGenerator() = delete; ~CSmoothNormalGenerator() = delete; + //! AreaWeighted matches the existing behaviour used by current loaders. AngleWeighted + //! is available for future callers that need angle-based smoothing without changing the API. + enum class EAccumulationMode : uint8_t + { + AreaWeighted, + AngleWeighted + }; + + //! One triangle corner to be accumulated. `vertexIx` points at the output vertex whose + //! normal will be written on finalize. `accumulationGroup` controls which corners smooth + //! together. This is the generic equivalent of format-specific smoothing-group semantics. + //! Callers can keep it equal to `vertexIx` for identity grouping or map it to any other + //! stable grouping key when corners that share a position must stay sharp. + template + struct SAccumulatedCorner + { + uint32_t vertexIx = 0u; + uint32_t accumulationGroup = 0u; + PositionT position = PositionT(0.f, 0.f, 0.f); + }; + + //! Incremental smooth-normal accumulator. Callers feed triangles through `addTriangle(...)` + //! and then materialize results with `finalize(...)`. Grouping is provided entirely by + //! the caller through `accumulationGroup`. + template + class CAccumulatedNormals final + { + public: + using vector_t = PositionT; + + explicit CAccumulatedNormals(const EAccumulationMode mode = EAccumulationMode::AreaWeighted) : m_mode(mode) {} + + //! Records how many output vertices may need normals. This affects finalize-time + //! validation and may reserve group storage if non-identity grouping is already active. + NBL_FORCE_INLINE void reserveVertices(const size_t count) + { + if (count > m_vertexCount) + m_vertexCount = count; + if (count > m_groupsByVertex.capacity() && !m_groupsByVertex.empty()) + m_groupsByVertex.reserve(growSize(count)); + } + + //! Reserves accumulation storage for explicit grouping. Callers that know they will + //! feed many non-identity groups can use this to avoid repeated reallocations. + NBL_FORCE_INLINE void reserveGroups(const size_t count) + { + if (count > m_accumulatedNormals.capacity()) + m_accumulatedNormals.reserve(growSize(count)); + } + + //! Prepares the common identity-group case (`accumulationGroup == vertexIx`) up front. + //! This enables a lighter hot path where `addPreparedIdentityTriangle(...)` can skip + //! per-corner registration and write straight into pre-sized accumulation slots. + NBL_FORCE_INLINE void prepareIdentityGroups(const size_t count) + { + if (!m_groupsByVertex.empty()) + return; + ensureGroupStorage(count); + } + + //! Generic triangle submission path. Use this when the caller needs custom grouping. + //! In particular, callers can encode smoothing-group-like semantics by assigning + //! the same `accumulationGroup` to corners that should share a smooth normal and a + //! different one to corners that must stay sharp. + NBL_FORCE_INLINE bool addTriangle(const std::array, 3>& corners) + { + if (canUseIdentityFastPath(corners)) + return addTriangle(corners[0].vertexIx, corners[0].position, corners[1].vertexIx, corners[1].position, corners[2].vertexIx, corners[2].position); + for (const auto& corner : corners) + { + if (!registerCorner(corner)) + return false; + } + return accumulateTriangle(corners, [](const SAccumulatedCorner& corner) { return corner.accumulationGroup; }); + } + + NBL_FORCE_INLINE bool addTriangle(const uint32_t i0, const PositionT& p0, const uint32_t i1, const PositionT& p1, const uint32_t i2, const PositionT& p2) + { + const size_t maxIx = std::max(static_cast(i0), std::max(static_cast(i1), static_cast(i2))); + const size_t requiredCount = maxIx + 1ull; + if (requiredCount > m_vertexCount) + m_vertexCount = requiredCount; + ensureGroupStorage(requiredCount); + if (m_groupsByVertex.empty()) + return accumulateTriangle(p0, p1, p2, i0, i1, i2); + return addTriangle({{ + {.vertexIx = i0, .accumulationGroup = i0, .position = p0}, + {.vertexIx = i1, .accumulationGroup = i1, .position = p1}, + {.vertexIx = i2, .accumulationGroup = i2, .position = p2} + }}); + } + + //! Hot path for already-prepared identity grouping. This is still triangle accumulation, + //! not a separate algorithm. It simply avoids the generic registration overhead once the + //! caller has committed to `vertexIx == accumulationGroup`. + NBL_FORCE_INLINE bool addPreparedIdentityTriangle(const uint32_t i0, const PositionT& p0, const uint32_t i1, const PositionT& p1, const uint32_t i2, const PositionT& p2) + { + if (!m_groupsByVertex.empty()) + return false; + const size_t requiredCount = std::max(static_cast(i0), std::max(static_cast(i1), static_cast(i2))) + 1ull; + if (requiredCount > m_vertexCount) + m_vertexCount = requiredCount; + if (requiredCount > m_accumulatedNormals.size()) + return false; + return accumulateTriangle(p0, p1, p2, i0, i1, i2); + } + + //! Writes accumulated normals into the caller-owned output buffer. If `normalNeedsGeneration` + //! is supplied, only those entries marked non-zero are overwritten. This supports the + //! common "preserve existing normals and fill only the missing ones" workflow. + template + NBL_FORCE_INLINE bool finalize(const std::span normals, const std::span normalNeedsGeneration = {}, const NormalT& fallback = NormalT(0.f, 0.f, 1.f)) const + { + if (!normalNeedsGeneration.empty() && normalNeedsGeneration.size() != normals.size()) + return false; + if (normals.size() < m_vertexCount) + return false; + + if (m_groupsByVertex.empty()) + { + for (size_t vertexIx = 0ull; vertexIx < m_vertexCount; ++vertexIx) + { + if (!normalNeedsGeneration.empty() && normalNeedsGeneration[vertexIx] == 0u) + continue; + const auto normal = vertexIx < m_accumulatedNormals.size() ? m_accumulatedNormals[vertexIx] : vector_t(0.f, 0.f, 0.f); + const auto lenSq = hlsl::dot(normal, normal); + normals[vertexIx] = (lenSq > 1e-20f) ? (normal * hlsl::rsqrt(lenSq)) : fallback; + } + return true; + } + + for (size_t vertexIx = 0ull; vertexIx < m_vertexCount; ++vertexIx) + { + if (!normalNeedsGeneration.empty() && normalNeedsGeneration[vertexIx] == 0u) + continue; + const uint32_t group = resolveGroup(static_cast(vertexIx)); + if (group == InvalidGroup) + return false; + + const auto normal = group < m_accumulatedNormals.size() ? m_accumulatedNormals[group] : vector_t(0.f, 0.f, 0.f); + const auto lenSq = hlsl::dot(normal, normal); + normals[vertexIx] = (lenSq > 1e-20f) ? (normal * hlsl::rsqrt(lenSq)) : fallback; + } + return true; + } + + private: + static inline constexpr uint32_t InvalidGroup = std::numeric_limits::max(); + + static NBL_FORCE_INLINE size_t growSize(const size_t required) + { + return required > 1ull ? std::bit_ceil(required) : 1ull; + } + + template + NBL_FORCE_INLINE bool accumulateTriangle(const std::array, 3>& corners, GroupFn&& groupFn) + { + return accumulateTriangle( + corners[0].position, corners[1].position, corners[2].position, + groupFn(corners[0]), groupFn(corners[1]), groupFn(corners[2]) + ); + } + + NBL_FORCE_INLINE void ensureGroupStorage(const size_t requiredCount) + { + if (requiredCount <= m_accumulatedNormals.size()) + return; + const size_t grownCount = growSize(requiredCount); + if (requiredCount > m_accumulatedNormals.capacity()) + m_accumulatedNormals.reserve(grownCount); + m_accumulatedNormals.resize(grownCount, vector_t(0.f, 0.f, 0.f)); + } + + NBL_FORCE_INLINE bool accumulateTriangle(const PositionT& p0, const PositionT& p1, const PositionT& p2, const uint32_t g0, const uint32_t g1, const uint32_t g2) + { + const auto edge10 = p1 - p0; + const auto edge20 = p2 - p0; + const auto faceNormal = hlsl::cross(edge10, edge20); + const auto faceLenSq = hlsl::dot(faceNormal, faceNormal); + if (faceLenSq <= 1e-20f) + return true; + + if (m_mode == EAccumulationMode::AreaWeighted) + { + m_accumulatedNormals[g0] += faceNormal; + m_accumulatedNormals[g1] += faceNormal; + m_accumulatedNormals[g2] += faceNormal; + return true; + } + + const auto weights = hlsl::shapes::util::anglesFromTriangleEdges(p2 - p1, p0 - p2, p1 - p0); + const auto unitNormal = faceNormal * hlsl::rsqrt(faceLenSq); + m_accumulatedNormals[g0] += unitNormal * weights.x; + m_accumulatedNormals[g1] += unitNormal * weights.y; + m_accumulatedNormals[g2] += unitNormal * weights.z; + return true; + } + + NBL_FORCE_INLINE bool canUseIdentityFastPath(const std::array, 3>& corners) const + { + if (!m_groupsByVertex.empty()) + return false; + for (const auto& corner : corners) + { + if (corner.vertexIx != corner.accumulationGroup) + return false; + } + return true; + } + + NBL_FORCE_INLINE uint32_t resolveGroup(const uint32_t vertexIx) const + { + if (vertexIx >= m_vertexCount) + return InvalidGroup; + if (m_groupsByVertex.empty()) + return vertexIx; + if (vertexIx >= m_groupsByVertex.size()) + return vertexIx; + const uint32_t mapped = m_groupsByVertex[vertexIx]; + return mapped == InvalidGroup ? vertexIx : mapped; + } + + NBL_FORCE_INLINE bool registerCorner(const SAccumulatedCorner& corner) + { + if ((static_cast(corner.vertexIx) + 1ull) > m_vertexCount) + m_vertexCount = static_cast(corner.vertexIx) + 1ull; + ensureGroupStorage(static_cast(corner.accumulationGroup) + 1ull); + if (m_groupsByVertex.empty()) + { + if (corner.vertexIx == corner.accumulationGroup) + return true; + m_groupsByVertex.reserve(growSize(m_vertexCount)); + } + else if (corner.vertexIx >= m_groupsByVertex.size()) + m_groupsByVertex.reserve(growSize(m_vertexCount)); + if (corner.vertexIx >= m_groupsByVertex.size()) + m_groupsByVertex.resize(growSize(static_cast(corner.vertexIx) + 1ull), InvalidGroup); + auto& group = m_groupsByVertex[corner.vertexIx]; + if (group == InvalidGroup) + { + if (corner.vertexIx == corner.accumulationGroup) + return true; + group = corner.accumulationGroup; + return true; + } + return group == corner.accumulationGroup; + } + + EAccumulationMode m_mode; + size_t m_vertexCount = 0ull; + core::vector m_groupsByVertex; + core::vector m_accumulatedNormals; + }; + struct VertexData { //offset of the vertex into index buffer @@ -31,9 +296,9 @@ class CSmoothNormalGenerator final return position; } - void setHash(uint32_t hash) + void setHash(uint32_t newHash) { - this->hash = hash; + hash = newHash; } uint32_t getHash() const @@ -60,4 +325,4 @@ class CSmoothNormalGenerator final }; } -#endif \ No newline at end of file +#endif diff --git a/src/nbl/core/hash/blake.cpp b/src/nbl/core/hash/blake.cpp index 043c28698d..5e6ee253e9 100644 --- a/src/nbl/core/hash/blake.cpp +++ b/src/nbl/core/hash/blake.cpp @@ -1,29 +1,570 @@ #include "nbl/core/hash/blake.h" +#include +#include +#include +#include +#include +#include + +extern "C" +{ +#include "blake3.h" +#include "blake3_impl.h" +} + +/* + BLAKE3 is tree-based and explicitly designed for parallel processing. The tree mode + (chunks and parent-node reduction) is part of the specification, so a parallel + implementation can be done without changing hash semantics. + + Why this local implementation exists: + - Nabla needs a multithreaded hash path integrated with its own runtime policy and + standard C++ threading. + - Upstream C API exposes a single-threaded update path and an optional oneTBB path + (`blake3_hasher_update_tbb`) which requires building with `BLAKE3_USE_TBB`. + - Here we keep the same algorithmic rules and final digest, while using only C++20 + standard facilities (`std::async`, `std::thread`) and no oneTBB dependency. + - The local helpers below are adapted from upstream tree-processing internals used + in `c/blake3.c` and the oneTBB integration path. + + Primary references: + - BLAKE3 spec repository (paper): https://github.com/BLAKE3-team/BLAKE3-specs + - C2SP BLAKE3 specification: https://c2sp.org/BLAKE3 + - Upstream BLAKE3 C API notes (`update_tbb`): https://github.com/BLAKE3-team/BLAKE3/blob/master/c/README.md +*/ + namespace nbl::core { +namespace +{ + +struct output_t +{ + uint32_t input_cv[8]; + uint64_t counter; + uint8_t block[BLAKE3_BLOCK_LEN]; + uint8_t block_len; + uint8_t flags; +}; + +INLINE void chunk_state_init_local(blake3_chunk_state* self, const uint32_t key[8], uint8_t flags) +{ + std::memcpy(self->cv, key, BLAKE3_KEY_LEN); + self->chunk_counter = 0; + std::memset(self->buf, 0, BLAKE3_BLOCK_LEN); + self->buf_len = 0; + self->blocks_compressed = 0; + self->flags = flags; +} + +INLINE void chunk_state_reset_local(blake3_chunk_state* self, const uint32_t key[8], uint64_t chunk_counter) +{ + std::memcpy(self->cv, key, BLAKE3_KEY_LEN); + self->chunk_counter = chunk_counter; + self->blocks_compressed = 0; + std::memset(self->buf, 0, BLAKE3_BLOCK_LEN); + self->buf_len = 0; +} + +INLINE size_t chunk_state_len_local(const blake3_chunk_state* self) +{ + return (BLAKE3_BLOCK_LEN * static_cast(self->blocks_compressed)) + static_cast(self->buf_len); +} + +INLINE size_t chunk_state_fill_buf_local(blake3_chunk_state* self, const uint8_t* input, size_t input_len) +{ + size_t take = BLAKE3_BLOCK_LEN - static_cast(self->buf_len); + if (take > input_len) + take = input_len; + auto* const dest = self->buf + static_cast(self->buf_len); + std::memcpy(dest, input, take); + self->buf_len += static_cast(take); + return take; +} + +INLINE uint8_t chunk_state_maybe_start_flag_local(const blake3_chunk_state* self) +{ + return self->blocks_compressed == 0 ? CHUNK_START : 0; +} + +INLINE output_t make_output_local(const uint32_t input_cv[8], const uint8_t block[BLAKE3_BLOCK_LEN], uint8_t block_len, uint64_t counter, uint8_t flags) +{ + output_t ret = {}; + std::memcpy(ret.input_cv, input_cv, 32); + std::memcpy(ret.block, block, BLAKE3_BLOCK_LEN); + ret.block_len = block_len; + ret.counter = counter; + ret.flags = flags; + return ret; +} + +INLINE void output_chaining_value_local(const output_t* self, uint8_t cv[32]) +{ + uint32_t cv_words[8]; + std::memcpy(cv_words, self->input_cv, 32); + blake3_compress_in_place(cv_words, self->block, self->block_len, self->counter, self->flags); + store_cv_words(cv, cv_words); +} + +INLINE void chunk_state_update_local(blake3_chunk_state* self, const uint8_t* input, size_t input_len) +{ + if (self->buf_len > 0) + { + size_t take = chunk_state_fill_buf_local(self, input, input_len); + input += take; + input_len -= take; + if (input_len > 0) + { + blake3_compress_in_place( + self->cv, + self->buf, + BLAKE3_BLOCK_LEN, + self->chunk_counter, + self->flags | chunk_state_maybe_start_flag_local(self)); + self->blocks_compressed += 1; + self->buf_len = 0; + std::memset(self->buf, 0, BLAKE3_BLOCK_LEN); + } + } + + while (input_len > BLAKE3_BLOCK_LEN) + { + blake3_compress_in_place( + self->cv, + input, + BLAKE3_BLOCK_LEN, + self->chunk_counter, + self->flags | chunk_state_maybe_start_flag_local(self)); + self->blocks_compressed += 1; + input += BLAKE3_BLOCK_LEN; + input_len -= BLAKE3_BLOCK_LEN; + } + + (void)chunk_state_fill_buf_local(self, input, input_len); +} + +INLINE output_t chunk_state_output_local(const blake3_chunk_state* self) +{ + const uint8_t block_flags = self->flags | chunk_state_maybe_start_flag_local(self) | CHUNK_END; + return make_output_local(self->cv, self->buf, self->buf_len, self->chunk_counter, block_flags); +} + +INLINE output_t parent_output_local(const uint8_t block[BLAKE3_BLOCK_LEN], const uint32_t key[8], uint8_t flags) +{ + return make_output_local(key, block, BLAKE3_BLOCK_LEN, 0, flags | PARENT); +} + +INLINE size_t left_len_local(size_t content_len) +{ + const size_t full_chunks = (content_len - 1) / BLAKE3_CHUNK_LEN; + return round_down_to_power_of_2(full_chunks) * BLAKE3_CHUNK_LEN; +} + +INLINE size_t compress_chunks_parallel_local( + const uint8_t* input, + size_t input_len, + const uint32_t key[8], + uint64_t chunk_counter, + uint8_t flags, + uint8_t* out) +{ + const uint8_t* chunks_array[MAX_SIMD_DEGREE]; + size_t input_position = 0; + size_t chunks_array_len = 0; + while (input_len - input_position >= BLAKE3_CHUNK_LEN) + { + chunks_array[chunks_array_len] = &input[input_position]; + input_position += BLAKE3_CHUNK_LEN; + chunks_array_len += 1; + } + + blake3_hash_many( + chunks_array, + chunks_array_len, + BLAKE3_CHUNK_LEN / BLAKE3_BLOCK_LEN, + key, + chunk_counter, + true, + flags, + CHUNK_START, + CHUNK_END, + out); + + if (input_len > input_position) + { + const uint64_t counter = chunk_counter + static_cast(chunks_array_len); + blake3_chunk_state chunk_state = {}; + chunk_state_init_local(&chunk_state, key, flags); + chunk_state.chunk_counter = counter; + chunk_state_update_local(&chunk_state, &input[input_position], input_len - input_position); + const auto output = chunk_state_output_local(&chunk_state); + output_chaining_value_local(&output, &out[chunks_array_len * BLAKE3_OUT_LEN]); + return chunks_array_len + 1; + } + + return chunks_array_len; +} + +INLINE size_t compress_parents_parallel_local( + const uint8_t* child_chaining_values, + size_t num_chaining_values, + const uint32_t key[8], + uint8_t flags, + uint8_t* out) +{ + const uint8_t* parents_array[MAX_SIMD_DEGREE_OR_2]; + size_t parents_array_len = 0; + while (num_chaining_values - (2 * parents_array_len) >= 2) + { + parents_array[parents_array_len] = + &child_chaining_values[2 * parents_array_len * BLAKE3_OUT_LEN]; + parents_array_len += 1; + } + + blake3_hash_many( + parents_array, + parents_array_len, + 1, + key, + 0, + false, + flags | PARENT, + 0, + 0, + out); + + if (num_chaining_values > 2 * parents_array_len) + { + std::memcpy( + &out[parents_array_len * BLAKE3_OUT_LEN], + &child_chaining_values[2 * parents_array_len * BLAKE3_OUT_LEN], + BLAKE3_OUT_LEN); + return parents_array_len + 1; + } + + return parents_array_len; +} + +constexpr size_t ParallelMinInputBytes = 1ull << 20; +constexpr size_t ParallelThreadGranularityBytes = 768ull << 10; +constexpr size_t ParallelSpawnMinSubtreeBytes = 512ull << 10; +constexpr uint32_t ParallelMaxThreads = 8u; +std::atomic_uint32_t g_parallelHashCalls = 0u; + +class SParallelCallGuard final +{ + public: + SParallelCallGuard() : m_active(g_parallelHashCalls.fetch_add(1u, std::memory_order_relaxed) + 1u) + { + } + + ~SParallelCallGuard() + { + g_parallelHashCalls.fetch_sub(1u, std::memory_order_relaxed); + } + + inline uint32_t activeCalls() const + { + return m_active; + } + + private: + uint32_t m_active = 1u; +}; + +size_t compress_subtree_wide_mt( + const uint8_t* input, + size_t input_len, + const uint32_t key[8], + uint64_t chunk_counter, + uint8_t flags, + uint8_t* out, + uint32_t threadBudget); + +INLINE void compress_subtree_to_parent_node_mt( + const uint8_t* input, + size_t input_len, + const uint32_t key[8], + uint64_t chunk_counter, + uint8_t flags, + uint8_t out[2 * BLAKE3_OUT_LEN], + uint32_t threadBudget) +{ + uint8_t cv_array[MAX_SIMD_DEGREE_OR_2 * BLAKE3_OUT_LEN]; + size_t num_cvs = compress_subtree_wide_mt(input, input_len, key, chunk_counter, flags, cv_array, threadBudget); + assert(num_cvs <= MAX_SIMD_DEGREE_OR_2); + +#if MAX_SIMD_DEGREE_OR_2 > 2 + uint8_t out_array[MAX_SIMD_DEGREE_OR_2 * BLAKE3_OUT_LEN / 2]; + while (num_cvs > 2) + { + num_cvs = compress_parents_parallel_local(cv_array, num_cvs, key, flags, out_array); + std::memcpy(cv_array, out_array, num_cvs * BLAKE3_OUT_LEN); + } +#endif + + std::memcpy(out, cv_array, 2 * BLAKE3_OUT_LEN); +} + +size_t compress_subtree_wide_mt( + const uint8_t* input, + size_t input_len, + const uint32_t key[8], + uint64_t chunk_counter, + uint8_t flags, + uint8_t* out, + uint32_t threadBudget) +{ + if (input_len <= blake3_simd_degree() * BLAKE3_CHUNK_LEN) + return compress_chunks_parallel_local(input, input_len, key, chunk_counter, flags, out); + + const size_t left_input_len = left_len_local(input_len); + const size_t right_input_len = input_len - left_input_len; + const uint8_t* const right_input = &input[left_input_len]; + const uint64_t right_chunk_counter = chunk_counter + static_cast(left_input_len / BLAKE3_CHUNK_LEN); + + uint8_t cv_array[2 * MAX_SIMD_DEGREE_OR_2 * BLAKE3_OUT_LEN]; + size_t degree = blake3_simd_degree(); + if (left_input_len > BLAKE3_CHUNK_LEN && degree == 1) + degree = 2; + uint8_t* const right_cvs = &cv_array[degree * BLAKE3_OUT_LEN]; + + size_t left_n = 0; + size_t right_n = 0; + bool spawned = false; + if ( + threadBudget > 1u && + left_input_len >= ParallelSpawnMinSubtreeBytes && + right_input_len >= ParallelSpawnMinSubtreeBytes) + { + try + { + uint32_t leftBudget = threadBudget / 2u; + if (leftBudget == 0u) + leftBudget = 1u; + uint32_t rightBudget = threadBudget - leftBudget; + if (rightBudget == 0u) + rightBudget = 1u; + + auto rightFuture = std::async(std::launch::async, [right_input, right_input_len, key, right_chunk_counter, flags, right_cvs, rightBudget]() -> size_t + { + return compress_subtree_wide_mt(right_input, right_input_len, key, right_chunk_counter, flags, right_cvs, rightBudget); + }); + left_n = compress_subtree_wide_mt(input, left_input_len, key, chunk_counter, flags, cv_array, leftBudget); + right_n = rightFuture.get(); + spawned = true; + } + catch (...) + { + spawned = false; + } + } + + if (!spawned) + { + left_n = compress_subtree_wide_mt(input, left_input_len, key, chunk_counter, flags, cv_array, 1u); + right_n = compress_subtree_wide_mt(right_input, right_input_len, key, right_chunk_counter, flags, right_cvs, 1u); + } + + if (left_n == 1) + { + std::memcpy(out, cv_array, 2 * BLAKE3_OUT_LEN); + return 2; + } + + const size_t num_chaining_values = left_n + right_n; + return compress_parents_parallel_local(cv_array, num_chaining_values, key, flags, out); +} + +INLINE void hasher_merge_cv_stack_local(::blake3_hasher* self, uint64_t total_len) +{ + const size_t post_merge_stack_len = static_cast(popcnt(total_len)); + while (self->cv_stack_len > post_merge_stack_len) + { + auto* const parent_node = &self->cv_stack[(self->cv_stack_len - 2) * BLAKE3_OUT_LEN]; + const auto output = parent_output_local(parent_node, self->key, self->chunk.flags); + output_chaining_value_local(&output, parent_node); + self->cv_stack_len -= 1; + } +} + +INLINE void hasher_push_cv_local(::blake3_hasher* self, uint8_t new_cv[BLAKE3_OUT_LEN], uint64_t chunk_counter) +{ + hasher_merge_cv_stack_local(self, chunk_counter); + std::memcpy(&self->cv_stack[self->cv_stack_len * BLAKE3_OUT_LEN], new_cv, BLAKE3_OUT_LEN); + self->cv_stack_len += 1; +} + +void hasher_update_parallel(::blake3_hasher* self, const uint8_t* input_bytes, size_t input_len, uint32_t threadBudget) +{ + if (input_len == 0) + return; + + if (chunk_state_len_local(&self->chunk) > 0) + { + size_t take = BLAKE3_CHUNK_LEN - chunk_state_len_local(&self->chunk); + if (take > input_len) + take = input_len; + chunk_state_update_local(&self->chunk, input_bytes, take); + input_bytes += take; + input_len -= take; + if (input_len > 0) + { + const auto output = chunk_state_output_local(&self->chunk); + uint8_t chunk_cv[BLAKE3_OUT_LEN]; + output_chaining_value_local(&output, chunk_cv); + hasher_push_cv_local(self, chunk_cv, self->chunk.chunk_counter); + chunk_state_reset_local(&self->chunk, self->key, self->chunk.chunk_counter + 1); + } + else + { + return; + } + } + + while (input_len > BLAKE3_CHUNK_LEN) + { + size_t subtree_len = round_down_to_power_of_2(input_len); + const uint64_t count_so_far = self->chunk.chunk_counter * BLAKE3_CHUNK_LEN; + while ((((uint64_t)(subtree_len - 1)) & count_so_far) != 0) + subtree_len /= 2; + + const uint64_t subtree_chunks = subtree_len / BLAKE3_CHUNK_LEN; + if (subtree_len <= BLAKE3_CHUNK_LEN) + { + blake3_chunk_state chunk_state = {}; + chunk_state_init_local(&chunk_state, self->key, self->chunk.flags); + chunk_state.chunk_counter = self->chunk.chunk_counter; + chunk_state_update_local(&chunk_state, input_bytes, subtree_len); + const auto output = chunk_state_output_local(&chunk_state); + uint8_t cv[BLAKE3_OUT_LEN]; + output_chaining_value_local(&output, cv); + hasher_push_cv_local(self, cv, chunk_state.chunk_counter); + } + else + { + uint8_t cv_pair[2 * BLAKE3_OUT_LEN]; + compress_subtree_to_parent_node_mt( + input_bytes, + subtree_len, + self->key, + self->chunk.chunk_counter, + self->chunk.flags, + cv_pair, + threadBudget); + hasher_push_cv_local(self, cv_pair, self->chunk.chunk_counter); + hasher_push_cv_local(self, &cv_pair[BLAKE3_OUT_LEN], self->chunk.chunk_counter + (subtree_chunks / 2)); + } + self->chunk.chunk_counter += subtree_chunks; + input_bytes += subtree_len; + input_len -= subtree_len; + } + + if (input_len > 0) + { + chunk_state_update_local(&self->chunk, input_bytes, input_len); + hasher_merge_cv_stack_local(self, self->chunk.chunk_counter); + } +} + +INLINE uint32_t pick_parallel_budget(const size_t bytes) +{ + const uint32_t hw = std::thread::hardware_concurrency(); + if (hw <= 1u || bytes < ParallelMinInputBytes) + return 1u; + + const uint32_t maxBySize = static_cast(std::max(1ull, bytes / ParallelThreadGranularityBytes)); + uint32_t budget = std::min(hw, ParallelMaxThreads); + budget = std::min(budget, maxBySize); + return std::max(1u, budget); +} + +} + +void blake3_hasher::validateOpaqueStateLayout() +{ + // The wrapper keeps a small inline storage margin so the real vendor hasher + // stays out of the public API. The margin gives us a safe footprint reserve + // for ABI or platform differences and only increases the wrapper size slightly. + static_assert(sizeof(::blake3_hasher) <= OpaqueStateSize); + static_assert(alignof(::blake3_hasher) <= OpaqueStateAlign); +} + blake3_hasher::blake3_hasher() { - ::blake3_hasher_init(&m_state); + validateOpaqueStateLayout(); + ::blake3_hasher_init(reinterpret_cast<::blake3_hasher*>(m_state)); } blake3_hasher& blake3_hasher::update(const void* data, const size_t bytes) { - ::blake3_hasher_update(&m_state, data, bytes); - return *this; + if (bytes == 0ull) + return *this; + + assert(data != nullptr); + if (!data) + return *this; + + ::blake3_hasher_update(reinterpret_cast<::blake3_hasher*>(m_state), data, bytes); + return *this; } -void blake3_hasher::reset() { - ::blake3_hasher_reset(&m_state); +void blake3_hasher::reset() +{ + ::blake3_hasher_init(reinterpret_cast<::blake3_hasher*>(m_state)); } blake3_hasher::operator blake3_hash_t() const { - blake3_hash_t retval; - // the blake3 docs say that the hasher can be finalized multiple times - ::blake3_hasher_finalize(&m_state, retval.data, sizeof(retval)); - return retval; + blake3_hash_t retval = {}; + const auto* const state = reinterpret_cast(m_state); + ::blake3_hasher stateCopy = *state; + ::blake3_hasher_finalize(&stateCopy, retval.data, blake3_hash_t::DigestSize); + return retval; +} + +blake3_hash_t blake3_hash_buffer(const void* data, size_t bytes) +{ + if (!data && bytes != 0ull) + return {}; + if (bytes == 0ull) + return static_cast(blake3_hasher{}); + + uint32_t threadBudget = pick_parallel_budget(bytes); + if (threadBudget <= 1u) + return blake3_hash_buffer_sequential(data, bytes); + + SParallelCallGuard guard; + const uint32_t activeCalls = std::max(1u, guard.activeCalls()); + const uint32_t hw = std::max(1u, std::thread::hardware_concurrency()); + const uint32_t hwShare = std::max(1u, hw / activeCalls); + threadBudget = std::min(threadBudget, hwShare); + if (threadBudget <= 1u) + return blake3_hash_buffer_sequential(data, bytes); + + ::blake3_hasher hasherState = {}; + ::blake3_hasher_init(&hasherState); + hasher_update_parallel(&hasherState, reinterpret_cast(data), bytes, threadBudget); + blake3_hash_t retval = {}; + ::blake3_hasher_finalize(&hasherState, retval.data, blake3_hash_t::DigestSize); + return retval; +} + +blake3_hash_t blake3_hash_buffer_sequential(const void* data, size_t bytes) +{ + if (!data && bytes != 0ull) + return {}; + + ::blake3_hasher hasher = {}; + ::blake3_hasher_init(&hasher); + if (bytes != 0ull) + ::blake3_hasher_update(&hasher, data, bytes); + + blake3_hash_t retval = {}; + ::blake3_hasher_finalize(&hasher, retval.data, blake3_hash_t::DigestSize); + return retval; } } diff --git a/src/nbl/gtml.cpp b/src/nbl/gtml.cpp index 2829c03c07..f1f9b1d0fe 100644 --- a/src/nbl/gtml.cpp +++ b/src/nbl/gtml.cpp @@ -1,7 +1,7 @@ -#include "git_info.h" +#include "nbl/git/info.h" namespace nbl { - const gtml::GitInfo& getGitInfo(gtml::E_GIT_REPO_META repo) { - return gtml::gitMeta[repo]; + const ::gtml::IGitInfo& getGitInfo(gtml::E_GIT_REPO_META repo) { + return *gtml::gitMeta[repo]; } -} \ No newline at end of file +} diff --git a/src/nbl/system/CFilePOSIX.cpp b/src/nbl/system/CFilePOSIX.cpp index 1f78d5befa..2eb9e62ed2 100644 --- a/src/nbl/system/CFilePOSIX.cpp +++ b/src/nbl/system/CFilePOSIX.cpp @@ -28,13 +28,13 @@ CFilePOSIX::~CFilePOSIX() size_t CFilePOSIX::asyncRead(void* buffer, size_t offset, size_t sizeToRead) { - lseek(m_native, offset, SEEK_SET); - return ::read(m_native, buffer, sizeToRead); + const auto processed = pread(m_native, buffer, sizeToRead, static_cast(offset)); + return processed > 0 ? static_cast(processed):0ull; } size_t CFilePOSIX::asyncWrite(const void* buffer, size_t offset, size_t sizeToWrite) { - lseek(m_native, offset, SEEK_SET); - return ::write(m_native, buffer, sizeToWrite); + const auto processed = pwrite(m_native, buffer, sizeToWrite, static_cast(offset)); + return processed > 0 ? static_cast(processed):0ull; } -#endif \ No newline at end of file +#endif diff --git a/src/nbl/system/CFileWin32.cpp b/src/nbl/system/CFileWin32.cpp index ae888e0d9b..ffe9d9c6b0 100644 --- a/src/nbl/system/CFileWin32.cpp +++ b/src/nbl/system/CFileWin32.cpp @@ -57,17 +57,19 @@ inline size_t CFileWin32::getSize() const size_t CFileWin32::asyncRead(void* buffer, size_t offset, size_t sizeToRead) { - seek(offset); + OVERLAPPED overlapped = {}; + overlapped.Offset = LODWORD(offset); + overlapped.OffsetHigh = HIDWORD(offset); DWORD numOfBytesRead; - ReadFile(m_native, buffer, sizeToRead, &numOfBytesRead, nullptr); - return numOfBytesRead; + return ReadFile(m_native, buffer, sizeToRead, &numOfBytesRead, &overlapped) ? numOfBytesRead:0ull; } size_t CFileWin32::asyncWrite(const void* buffer, size_t offset, size_t sizeToWrite) { - seek(offset); + OVERLAPPED overlapped = {}; + overlapped.Offset = LODWORD(offset); + overlapped.OffsetHigh = HIDWORD(offset); DWORD numOfBytesWritten; - WriteFile(m_native, buffer, sizeToWrite, &numOfBytesWritten, nullptr); - return numOfBytesWritten; + return WriteFile(m_native, buffer, sizeToWrite, &numOfBytesWritten, &overlapped) ? numOfBytesWritten:0ull; } @@ -76,4 +78,4 @@ void CFileWin32::seek(size_t position) LONG hiDword = HIDWORD(position); SetFilePointer(m_native,position,&hiDword,FILE_BEGIN); } -#endif \ No newline at end of file +#endif diff --git a/src/nbl/system/CSystemLinux.cpp b/src/nbl/system/CSystemLinux.cpp index 58aaeeb51b..8a47ac09b3 100644 --- a/src/nbl/system/CSystemLinux.cpp +++ b/src/nbl/system/CSystemLinux.cpp @@ -5,10 +5,29 @@ using namespace nbl::system; #ifdef _NBL_PLATFORM_LINUX_ +#include +#include +#include +#include +#include #include #include #include #include + +namespace +{ + +std::string trimCopy(std::string value) +{ + auto notSpace = [](unsigned char ch) { return !std::isspace(ch); }; + value.erase(value.begin(), std::find_if(value.begin(), value.end(), notSpace)); + value.erase(std::find_if(value.rbegin(), value.rend(), notSpace).base(), value.end()); + return value; +} + +} + ISystem::SystemInfo CSystemLinux::getSystemInfo() const { SystemInfo info; @@ -27,6 +46,53 @@ ISystem::SystemInfo CSystemLinux::getSystemInfo() const info.desktopResX = 0xdeadbeefu; info.desktopResY = 0xdeadbeefu; + std::ifstream cpuInfo("/proc/cpuinfo"); + std::unordered_set uniquePhysicalCores; + std::string currentPhysicalId; + std::string currentCoreId; + auto flushCurrentCore = [&]() + { + if (!currentPhysicalId.empty() || !currentCoreId.empty()) + uniquePhysicalCores.insert(currentPhysicalId + ":" + currentCoreId); + currentPhysicalId.clear(); + currentCoreId.clear(); + }; + + for (std::string line; std::getline(cpuInfo, line);) + { + if (line.empty()) + { + flushCurrentCore(); + continue; + } + + if (line.starts_with("model name")) + { + const auto separator = line.find(':'); + if (separator != std::string::npos && info.cpuName == "Unknown") + info.cpuName = trimCopy(line.substr(separator + 1u)); + continue; + } + + if (line.starts_with("physical id")) + { + const auto separator = line.find(':'); + if (separator != std::string::npos) + currentPhysicalId = trimCopy(line.substr(separator + 1u)); + continue; + } + + if (line.starts_with("core id")) + { + const auto separator = line.find(':'); + if (separator != std::string::npos) + currentCoreId = trimCopy(line.substr(separator + 1u)); + continue; + } + } + flushCurrentCore(); + info.physicalCoreCount = static_cast(uniquePhysicalCores.size()); + return info; } @@ -62,4 +128,4 @@ bool isDebuggerAttached() return false; } -#endif \ No newline at end of file +#endif diff --git a/src/nbl/system/CSystemWin32.cpp b/src/nbl/system/CSystemWin32.cpp index 2798b4fb27..49aaf2e3ac 100644 --- a/src/nbl/system/CSystemWin32.cpp +++ b/src/nbl/system/CSystemWin32.cpp @@ -1,11 +1,70 @@ #include "nbl/system/CSystemWin32.h" #include "nbl/system/CFileWin32.h" +#include "nbl/system/SWin32PathUtilities.h" using namespace nbl; using namespace nbl::system; #ifdef _NBL_PLATFORM_WINDOWS_ +#include +#include #include +#include +#include +#include + +namespace +{ +std::string queryCpuName() +{ + int cpuInfo[4] = {}; + __cpuid(cpuInfo, 0x80000000); + const auto maxExtendedLeaf = static_cast(cpuInfo[0]); + if (maxExtendedLeaf < 0x80000004u) + return "Unknown"; + + std::array brandString = {}; + auto* cursor = reinterpret_cast(brandString.data()); + for (auto leaf = 0x80000002; leaf <= 0x80000004; ++leaf) + { + __cpuid(cpuInfo, leaf); + std::memcpy(cursor, cpuInfo, sizeof(cpuInfo)); + cursor += sizeof(cpuInfo) / sizeof(int); + } + + std::string result = brandString.data(); + auto notSpace = [](unsigned char ch) { return !std::isspace(ch); }; + result.erase(result.begin(), std::find_if(result.begin(), result.end(), notSpace)); + result.erase(std::find_if(result.rbegin(), result.rend(), notSpace).base(), result.end()); + return result.empty() ? std::string("Unknown") : result; +} + +uint32_t queryPhysicalCoreCount() +{ + DWORD bufferSize = 0u; + GetLogicalProcessorInformationEx(RelationProcessorCore, nullptr, &bufferSize); + if (bufferSize == 0u) + return 0u; + + std::vector buffer(bufferSize); + auto* info = reinterpret_cast(buffer.data()); + if (!GetLogicalProcessorInformationEx(RelationProcessorCore, info, &bufferSize)) + return 0u; + + uint32_t coreCount = 0u; + auto* current = reinterpret_cast(info); + const auto* end = current + bufferSize; + while (current < end) + { + auto* entry = reinterpret_cast(current); + if (entry->Relationship == RelationProcessorCore) + ++coreCount; + current += entry->Size; + } + return coreCount; +} + +} //LOL the struct definition wasn't added to winapi headers do they ask to declare them yourself typedef struct _PROCESSOR_POWER_INFORMATION { @@ -34,12 +93,14 @@ ISystem::SystemInfo CSystemWin32::getSystemInfo() const info.desktopResX = GetSystemMetrics(SM_CXSCREEN); info.desktopResY = GetSystemMetrics(SM_CYSCREEN); + info.cpuName = queryCpuName(); + info.physicalCoreCount = queryPhysicalCoreCount(); return info; } -core::smart_refctd_ptr CSystemWin32::CCaller::createFile(const std::filesystem::path& filename, const core::bitflag flags) +core::smart_refctd_ptr CSystemWin32::CCaller::createFile(const std::filesystem::path& filename, core::bitflag flags) { const bool writeAccess = flags.value&IFile::ECF_WRITE; const DWORD fileAccess = ((flags.value&IFile::ECF_READ) ? FILE_GENERIC_READ:0)|(writeAccess ? FILE_GENERIC_WRITE:0); @@ -52,12 +113,11 @@ core::smart_refctd_ptr CSystemWin32::CCaller::createFile(const std: SECURITY_ATTRIBUTES secAttribs{ sizeof(SECURITY_ATTRIBUTES), nullptr, FALSE }; system::path p = filename; - if (p.is_absolute()) - p.make_preferred(); // Replace "/" separators with "\" + const auto nativePath = impl::makeLongPathAwareWindowsPath(p); // only write access should create new files if they don't exist const auto creationDisposition = writeAccess ? OPEN_ALWAYS : OPEN_EXISTING; - HANDLE _native = CreateFileA(p.string().data(), fileAccess, shareMode, &secAttribs, creationDisposition, FILE_ATTRIBUTE_NORMAL, nullptr); + HANDLE _native = CreateFileW(nativePath.c_str(), fileAccess, shareMode, &secAttribs, creationDisposition, FILE_ATTRIBUTE_NORMAL, nullptr); if (_native==INVALID_HANDLE_VALUE) { auto e = GetLastError(); @@ -73,36 +133,37 @@ core::smart_refctd_ptr CSystemWin32::CCaller::createFile(const std: For now it equals the size of a file so it'll work fine for archive reading, but if we try to write outside those boungs, things will go bad. */ - _fileMappingObj = CreateFileMappingA(_native,nullptr,writeAccess ? PAGE_READWRITE:PAGE_READONLY, 0, 0, filename.string().c_str()); + _fileMappingObj = CreateFileMappingA(_native,nullptr,writeAccess ? PAGE_READWRITE:PAGE_READONLY, 0, 0, nullptr); if (!_fileMappingObj) { - CloseHandle(_native); - return nullptr; + // backend fallback: file opens successfully but mapping-related flags are removed + flags.value &= ~(IFile::ECF_COHERENT | IFile::ECF_MAPPABLE); } - DWORD hi = 0; - size_t size = GetFileSize(_native,&hi); - size |= size_t(hi) << 32ull; - switch (flags.value&IFile::ECF_READ_WRITE) + else { - case IFile::ECF_READ: - _mappedPtr = MapViewOfFile(_fileMappingObj,FILE_MAP_READ,0,0,size); - break; - case IFile::ECF_WRITE: - _mappedPtr = MapViewOfFile(_fileMappingObj,FILE_MAP_WRITE,0,0,size); - break; - case IFile::ECF_READ_WRITE: - _mappedPtr = MapViewOfFile(_fileMappingObj,FILE_MAP_ALL_ACCESS,0,0,size); - break; - default: - assert(false); // should never happen - break; - } - if (!_mappedPtr) - { - CloseHandle(_native); - CloseHandle(_fileMappingObj); - return nullptr; - } + switch (flags.value&IFile::ECF_READ_WRITE) + { + case IFile::ECF_READ: + _mappedPtr = MapViewOfFile(_fileMappingObj,FILE_MAP_READ,0,0,0); + break; + case IFile::ECF_WRITE: + _mappedPtr = MapViewOfFile(_fileMappingObj,FILE_MAP_WRITE,0,0,0); + break; + case IFile::ECF_READ_WRITE: + _mappedPtr = MapViewOfFile(_fileMappingObj,FILE_MAP_ALL_ACCESS,0,0,0); + break; + default: + assert(false); // should never happen + break; + } + if (!_mappedPtr) + { + CloseHandle(_fileMappingObj); + _fileMappingObj = nullptr; + // backend fallback: file opens successfully but mapping-related flags are removed + flags.value &= ~(IFile::ECF_COHERENT | IFile::ECF_MAPPABLE); + } + } } return core::make_smart_refctd_ptr(core::smart_refctd_ptr(m_system),path(filename),flags,_mappedPtr,_native,_fileMappingObj); } diff --git a/src/nbl/system/ISystem.cpp b/src/nbl/system/ISystem.cpp index 6b25471f8d..f1b3dec85e 100644 --- a/src/nbl/system/ISystem.cpp +++ b/src/nbl/system/ISystem.cpp @@ -11,6 +11,14 @@ #include "nbl/system/CArchiveLoaderZip.h" #include "nbl/system/CArchiveLoaderTar.h" #include "nbl/system/CMountDirectoryArchive.h" +#include "nbl/system/SWin32PathUtilities.h" + +#ifdef _NBL_PLATFORM_WINDOWS_ +#ifndef WIN32_LEAN_AND_MEAN +#define WIN32_LEAN_AND_MEAN +#endif +#include +#endif using namespace nbl; using namespace nbl::system; @@ -37,15 +45,12 @@ ISystem::ISystem(core::smart_refctd_ptr&& caller) : m_dispatch bool ISystem::exists(const system::path& filename, const core::bitflag flags) const { const bool writeUsage = flags.value&IFile::ECF_WRITE; - - // filename too long - if (filename.string().size() >= sizeof(SRequestParams_CREATE_FILE::filename)) - return false; - // archive file - if (!writeUsage && findFileInArchive(filename).archive) - return true; // regular file - return std::filesystem::exists(filename); + std::error_code fsEc; + if (std::filesystem::exists(filename, fsEc) && !fsEc) + return true; + // archive file + return !writeUsage && findFileInArchive(filename).archive; } bool ISystem::isPathReadOnly(const system::path& p) const @@ -122,10 +127,18 @@ bool ISystem::deleteDirectory(const system::path& p) bool nbl::system::ISystem::deleteFile(const system::path& p) { +#ifdef _NBL_PLATFORM_WINDOWS_ + const auto nativePath = impl::makeLongPathAwareWindowsPath(std::filesystem::path(p.string())); + const DWORD attributes = GetFileAttributesW(nativePath.c_str()); + if (attributes == INVALID_FILE_ATTRIBUTES || (attributes & FILE_ATTRIBUTE_DIRECTORY)) + return false; + return DeleteFileW(nativePath.c_str()); +#else if (std::filesystem::exists(p) && !std::filesystem::is_directory(p)) return std::filesystem::remove(p); else return false; +#endif } std::error_code ISystem::moveFileOrDirectory(const system::path& oldPath, const system::path& newPath) @@ -193,14 +206,34 @@ bool ISystem::copy(const system::path& from, const system::path& to) void ISystem::createFile(future_t>& future, std::filesystem::path filename, const core::bitflag flags, const std::string_view& accessToken) { - // canonicalize - if (std::filesystem::exists(filename)) - filename = std::filesystem::canonical(filename); + std::error_code fsEc; + const bool writeUsage = flags.value&IFile::ECF_WRITE; + const bool absoluteInput = filename.is_absolute(); + bool pathExists = false; + if (!writeUsage) + { + fsEc.clear(); + pathExists = std::filesystem::exists(filename, fsEc) && !fsEc; + if (pathExists && !absoluteInput) + { + fsEc.clear(); + const auto absolute = std::filesystem::absolute(filename, fsEc); + if (!fsEc) + filename = absolute; + } + } // try archives (readonly, for now) - if (!(flags.value&IFile::ECF_WRITE)) + if (!writeUsage && !pathExists) { - const auto found = findFileInArchive(filename); + auto found = findFileInArchive(filename); + if (!found.archive && !absoluteInput) + { + fsEc.clear(); + const auto absolute = std::filesystem::absolute(filename, fsEc); + if (!fsEc) + found = findFileInArchive(absolute); + } if (found.archive) { auto file = found.archive->getFile(found.pathRelativeToArchive,flags,accessToken); @@ -213,17 +246,8 @@ void ISystem::createFile(future_t>& future, std::f } // - if (std::filesystem::exists(filename)) - filename = std::filesystem::absolute(filename).generic_string(); - if (filename.string().size()>=MAX_FILENAME_LENGTH) - { - future.set_result(nullptr); - return; - } - - SRequestParams_CREATE_FILE params; - strcpy(params.filename,filename.string().c_str()); + params.filename = std::move(filename); params.flags = flags.value; m_dispatcher.request(&future,params); } @@ -255,26 +279,69 @@ core::smart_refctd_ptr ISystem::openFileArchive(core::smart_refctd ISystem::FoundArchiveFile ISystem::findFileInArchive(const system::path& absolutePath) const { - system::path path = std::filesystem::exists(absolutePath) ? std::filesystem::canonical(absolutePath.parent_path()):absolutePath.parent_path(); - // going up the directory tree - while (!path.empty() && path.parent_path()!=path) + std::error_code fsEc; + const system::path normalizedAbsolutePath = absolutePath.lexically_normal(); + system::path normalizedAbsoluteFallback = {}; + bool hasAbsoluteFallback = false; + if (!normalizedAbsolutePath.is_absolute()) { - path = std::filesystem::exists(path) ? std::filesystem::canonical(path):path; + const auto absoluteCandidate = std::filesystem::absolute(normalizedAbsolutePath, fsEc); + if (!fsEc) + { + normalizedAbsoluteFallback = absoluteCandidate.lexically_normal(); + hasAbsoluteFallback = true; + } + } - const auto archives = m_cachedArchiveFiles.findRange(path); - for (auto& archive : archives) + auto tryMatchAtPath = [&](const system::path& archivePath) -> FoundArchiveFile + { + auto tryMatchSingle = [&](const system::path& normalizedPath) -> FoundArchiveFile { - const auto relative = std::filesystem::relative(absolutePath,path); - const auto items = static_cast(archive.second->listAssets()); + std::error_code relativeEc; + const auto relative = std::filesystem::relative(normalizedPath, archivePath, relativeEc); + if (relativeEc) + return { nullptr, {} }; - const IFileArchive::SFileList::SEntry itemToFind = { relative }; - auto found = std::lower_bound(items.begin(), items.end(), itemToFind); - if (found!=items.end() && found->pathRelativeToArchive==relative) - return {archive.second.get(),relative}; + const auto archives = m_cachedArchiveFiles.findRange(archivePath); + for (auto& archive : archives) + { + const auto items = static_cast(archive.second->listAssets()); + const IFileArchive::SFileList::SEntry itemToFind = { relative }; + auto found = std::lower_bound(items.begin(), items.end(), itemToFind); + if (found != items.end() && found->pathRelativeToArchive == relative) + return { archive.second.get(), relative }; + } + return { nullptr, {} }; + }; + + if (auto found = tryMatchSingle(normalizedAbsolutePath); found.archive) + return found; + if (hasAbsoluteFallback) + return tryMatchSingle(normalizedAbsoluteFallback); + return { nullptr, {} }; + }; + + system::path path = normalizedAbsolutePath.parent_path().lexically_normal(); + while (!path.empty() && path.parent_path() != path) + { + if (auto found = tryMatchAtPath(path); found.archive) + return found; + + fsEc.clear(); + if (std::filesystem::exists(path, fsEc) && !fsEc) + { + fsEc.clear(); + const auto canonicalPath = std::filesystem::canonical(path, fsEc); + if (!fsEc && canonicalPath != path) + { + if (auto found = tryMatchAtPath(canonicalPath); found.archive) + return found; + } } + path = path.parent_path(); } - return { nullptr,{} }; + return { nullptr, {} }; } @@ -394,4 +461,4 @@ bool ISystem::isDebuggerAttached() return false; } -#endif \ No newline at end of file +#endif diff --git a/src/nbl/system/SWin32PathUtilities.h b/src/nbl/system/SWin32PathUtilities.h new file mode 100644 index 0000000000..0f2ae33f24 --- /dev/null +++ b/src/nbl/system/SWin32PathUtilities.h @@ -0,0 +1,43 @@ +// Internal src-only header. Do not include from public headers. +#ifndef _NBL_SYSTEM_S_WIN32_PATH_UTILITIES_H_INCLUDED_ +#define _NBL_SYSTEM_S_WIN32_PATH_UTILITIES_H_INCLUDED_ + +#ifdef _NBL_PLATFORM_WINDOWS_ + +#include +#include +#include +#include + +namespace nbl::system::impl +{ + +inline std::wstring makeLongPathAwareWindowsPath(std::filesystem::path path) +{ + path = path.lexically_normal(); + if (!path.is_absolute()) + { + std::error_code ec; + const auto absolutePath = std::filesystem::absolute(path, ec); + if (!ec) + path = absolutePath.lexically_normal(); + } + path.make_preferred(); + + std::wstring native = path.native(); + constexpr std::wstring_view ExtendedPrefix = LR"(\\?\)"; + constexpr std::wstring_view UncPrefix = LR"(\\)"; + constexpr std::wstring_view ExtendedUncPrefix = LR"(\\?\UNC\)"; + + if (native.rfind(ExtendedPrefix.data(), 0u) == 0u) + return native; + if (native.rfind(UncPrefix.data(), 0u) == 0u) + return std::wstring(ExtendedUncPrefix) + native.substr(2u); + return std::wstring(ExtendedPrefix) + native; +} + +} + +#endif + +#endif diff --git a/src/nbl/video/ILogicalDevice.cpp b/src/nbl/video/ILogicalDevice.cpp index a98deff5c7..c24f7c1950 100644 --- a/src/nbl/video/ILogicalDevice.cpp +++ b/src/nbl/video/ILogicalDevice.cpp @@ -1,6 +1,6 @@ #include "nbl/video/IPhysicalDevice.h" -#include "git_info.h" +#include "nbl/git/info.h" #define NBL_LOG_FUNCTION m_logger.log #include "nbl/logging_macros.h" @@ -1147,4 +1147,4 @@ bool ILogicalDevice::createRayTracingPipelines(IGPUPipelineCache* const pipeline } return retval; } -#include "nbl/undef_logging_macros.h" \ No newline at end of file +#include "nbl/undef_logging_macros.h" diff --git a/src/nbl/video/IQueue.cpp b/src/nbl/video/IQueue.cpp index 70acecffca..4e24a0e7e9 100644 --- a/src/nbl/video/IQueue.cpp +++ b/src/nbl/video/IQueue.cpp @@ -3,7 +3,7 @@ #include "nbl/video/ILogicalDevice.h" #include "nbl/video/TimelineEventHandlers.h" -#include "git_info.h" +#include "nbl/git/info.h" #define NBL_LOG_FUNCTION logger->log #include "nbl/logging_macros.h" @@ -245,4 +245,4 @@ void IQueue::DeferredSubmitCallback::operator()() } // namespace nbl::video -#include "nbl/undef_logging_macros.h" \ No newline at end of file +#include "nbl/undef_logging_macros.h" diff --git a/src/nbl/video/utilities/CAssetConverter.cpp b/src/nbl/video/utilities/CAssetConverter.cpp index d7f2d7dbbc..4be0913ebe 100644 --- a/src/nbl/video/utilities/CAssetConverter.cpp +++ b/src/nbl/video/utilities/CAssetConverter.cpp @@ -1,4 +1,4 @@ -// Copyright (C) 2024-2024 - DevSH Graphics Programming Sp. z O.O. +// Copyright (C) 2018-2025 - DevSH Graphics Programming Sp. z O.O. // This file is part of the "Nabla Engine". #include "nbl/video/utilities/CAssetConverter.h" @@ -1183,7 +1183,10 @@ bool CAssetConverter::CHashCache::hash_impl::operator()(lookup_t loo auto patchedParams = lookup.asset->getCreationParams(); assert(lookup.patch->usage.hasFlags(patchedParams.usage)); patchedParams.usage = lookup.patch->usage; - hasher.update(&patchedParams,sizeof(patchedParams)) << lookup.asset->getContentHash(); + const auto contentHash = lookup.asset->getContentHash(); + if (contentHash==NoContentHash) + return false; + hasher.update(&patchedParams,sizeof(patchedParams)) << contentHash; return true; } bool CAssetConverter::CHashCache::hash_impl::operator()(lookup_t lookup) diff --git a/tools/CMakeLists.txt b/tools/CMakeLists.txt index 57f66ad44b..8d78ea75db 100644 --- a/tools/CMakeLists.txt +++ b/tools/CMakeLists.txt @@ -1,8 +1,9 @@ add_subdirectory(nsc) add_subdirectory(xxHash256) +add_subdirectory(hcp) if(NBL_BUILD_IMGUI) add_subdirectory(nite EXCLUDE_FROM_ALL) endif() -NBL_ADJUST_FOLDERS(tools) \ No newline at end of file +NBL_ADJUST_FOLDERS(tools) diff --git a/tools/hcp/CMakeLists.txt b/tools/hcp/CMakeLists.txt new file mode 100644 index 0000000000..456b0f3e1b --- /dev/null +++ b/tools/hcp/CMakeLists.txt @@ -0,0 +1,31 @@ +nbl_create_executable_project("" "" "" "") +add_dependencies(${EXECUTABLE_NAME} argparse) +target_include_directories(${EXECUTABLE_NAME} PRIVATE $) + +enable_testing() + +set(NBL_HCP_CI_ARGS + --buffer-bytes 67108864 + --seed 12345 +) + +function(nbl_hcp_add_ci_test mode) + string(TOUPPER "${mode}" mode_upper) + add_test(NAME "NBL_HCP_${mode_upper}" + COMMAND "$" --runtime-tuning "${mode}" ${NBL_HCP_CI_ARGS} + WORKING_DIRECTORY "$" + COMMAND_EXPAND_LISTS + ) +endfunction() + +nbl_hcp_add_ci_test(sequential) +nbl_hcp_add_ci_test(heuristic) +nbl_hcp_add_ci_test(hybrid) + +set_tests_properties( + NBL_HCP_SEQUENTIAL + NBL_HCP_HEURISTIC + NBL_HCP_HYBRID + PROPERTIES + LABELS "hash;ci" +) diff --git a/tools/hcp/README.md b/tools/hcp/README.md new file mode 100644 index 0000000000..e1a11fffb7 --- /dev/null +++ b/tools/hcp/README.md @@ -0,0 +1,22 @@ +# hcp + +Headless parity checker for polygon geometry content hashing. + +## What it checks +- input geometry buffers are generated as deterministic dummy blobs from `--seed` +- `recompute(..., sequential)` as baseline +- `recompute(..., )` equals baseline hash +- `computeMissing(..., )` preserves pre-set hashes and equals baseline hash +- confirms `BLAKE3` content hashing parity independent of runtime tuning mode +- timing logs for baseline, recompute and computeMissing + +## Args +- `--runtime-tuning ` (alias: `none` -> `sequential`, default: `heuristic`) +- `--buffer-bytes ` (minimum: `2097152`) +- `--seed ` (deterministic payload seed) + +## Example +`./hcp_d.exe --runtime-tuning heuristic --buffer-bytes 67108864 --seed 12345` + +## CTest +`ctest --output-on-failure -C Debug -R NBL_HCP` diff --git a/tools/hcp/main.cpp b/tools/hcp/main.cpp new file mode 100644 index 0000000000..afc8373280 --- /dev/null +++ b/tools/hcp/main.cpp @@ -0,0 +1,359 @@ +// Copyright (C) 2018-2025 - DevSH Graphics Programming Sp. z O.O. +// This file is part of the "Nabla Engine". +// For conditions of distribution and use, see copyright notice in nabla.h + +#include "nabla.h" +#include "nbl/system/IApplicationFramework.h" +#include "nbl/system/CStdoutLogger.h" + +#include "nbl/asset/interchange/SFileIOPolicy.h" +#include "nbl/asset/interchange/SGeometryContentHash.h" +#include "nbl/core/hash/blake.h" +#include "argparse/argparse.hpp" + +#include +#include +#include +#include +#include +#include + +using namespace nbl; +using namespace nbl::asset; +using namespace nbl::system; + +constexpr size_t kMinBufferBytes = 2ull * 1024ull * 1024ull; +constexpr uint64_t kDefaultSeed = 0x6a09e667f3bcc909ull; + +enum class RuntimeMode : uint8_t +{ + Sequential, + Heuristic, + Hybrid +}; + +struct Options +{ + RuntimeMode mode = RuntimeMode::Heuristic; + size_t bufferBytes = kMinBufferBytes; + uint64_t seed = kDefaultSeed; +}; + +static const char* modeName(RuntimeMode mode) +{ + if (mode == RuntimeMode::Sequential) + return "sequential"; + if (mode == RuntimeMode::Hybrid) + return "hybrid"; + return "heuristic"; +} + +static SFileIOPolicy makePolicy(RuntimeMode mode) +{ + SFileIOPolicy policy = {}; + if (mode == RuntimeMode::Sequential) + policy.runtimeTuning.mode = SFileIOPolicy::SRuntimeTuning::Mode::Sequential; + else if (mode == RuntimeMode::Hybrid) + policy.runtimeTuning.mode = SFileIOPolicy::SRuntimeTuning::Mode::Hybrid; + else + policy.runtimeTuning.mode = SFileIOPolicy::SRuntimeTuning::Mode::Heuristic; + return policy; +} + +static uint64_t nextRand(uint64_t& state) +{ + state ^= state >> 12u; + state ^= state << 25u; + state ^= state >> 27u; + return state * 2685821657736338717ull; +} + +static std::vector makeRandomBytes(const size_t byteCount, const uint64_t seed, const uint64_t stream) +{ + std::vector data(byteCount); + uint64_t state = seed ^ (stream * 0x9e3779b97f4a7c15ull); + if (state == 0ull) + state = kDefaultSeed ^ stream; + for (auto& byte : data) + byte = static_cast(nextRand(state) & 0xffull); + return data; +} + +static std::optional parseOptions(const core::vector& args) +{ + argparse::ArgumentParser parser("hcp"); + parser.add_argument("--runtime-tuning").default_value(std::string("heuristic")); + parser.add_argument("--buffer-bytes").default_value(std::to_string(kMinBufferBytes)); + parser.add_argument("--seed").default_value(std::to_string(kDefaultSeed)); + + try + { + parser.parse_args({ args.data(), args.data() + args.size() }); + } + catch (const std::exception&) + { + return std::nullopt; + } + + auto parseU64 = [](const std::string& v) -> std::optional + { + try { return std::stoull(v, nullptr, 10); } catch (...) { return std::nullopt; } + }; + auto parseSize = [](const std::string& v) -> std::optional + { + try + { + const auto x = std::stoull(v, nullptr, 10); + if (x > static_cast(std::numeric_limits::max())) + return std::nullopt; + return static_cast(x); + } + catch (...) + { + return std::nullopt; + } + }; + + Options options = {}; + const auto mode = parser.get("--runtime-tuning"); + if (mode == "sequential" || mode == "none") + options.mode = RuntimeMode::Sequential; + else if (mode == "heuristic") + options.mode = RuntimeMode::Heuristic; + else if (mode == "hybrid") + options.mode = RuntimeMode::Hybrid; + else + return std::nullopt; + + const auto bytes = parseSize(parser.get("--buffer-bytes")); + const auto seed = parseU64(parser.get("--seed")); + if (!bytes.has_value() || !seed.has_value() || *bytes < kMinBufferBytes) + return std::nullopt; + + options.bufferBytes = *bytes; + options.seed = *seed; + return options; +} + +static core::smart_refctd_ptr createGeometry(const Options& options) +{ + constexpr E_FORMAT positionFormat = EF_R32G32B32_SFLOAT; + constexpr E_FORMAT normalFormat = EF_R32G32B32_SFLOAT; + constexpr E_FORMAT indexFormat = EF_R32_UINT; + constexpr E_FORMAT colorFormat = EF_R8G8B8A8_UNORM; + + const uint32_t positionStride = getTexelOrBlockBytesize(positionFormat); + const uint32_t normalStride = getTexelOrBlockBytesize(normalFormat); + const uint32_t indexStride = getTexelOrBlockBytesize(indexFormat); + const uint32_t colorStride = getTexelOrBlockBytesize(colorFormat); + const auto alignDown = [&](uint32_t stride) -> size_t { return options.bufferBytes - (options.bufferBytes % stride); }; + + auto makeBuffer = [&](size_t bytes, core::bitflag usage, uint64_t stream) -> core::smart_refctd_ptr + { + auto data = makeRandomBytes(bytes, options.seed, stream); + + ICPUBuffer::SCreationParams params = {}; + params.size = data.size(); + params.usage = usage; + params.data = data.data(); + return ICPUBuffer::create(std::move(params)); + }; + + auto makeView = [](const core::smart_refctd_ptr& buffer, E_FORMAT format, uint32_t stride) -> ICPUPolygonGeometry::SDataView + { + ICPUPolygonGeometry::SDataView view = {}; + view.composed.format = format; + view.composed.stride = stride; + view.composed.rangeFormat = IGeometryBase::getMatchingAABBFormat(format); + view.composed.resetRange(); + view.src.offset = 0ull; + view.src.size = buffer ? buffer->getSize() : 0ull; + view.src.buffer = buffer; + return view; + }; + + auto positionBuffer = makeBuffer(alignDown(positionStride), IBuffer::EUF_VERTEX_BUFFER_BIT, 1ull); + auto normalBuffer = makeBuffer(alignDown(normalStride), IBuffer::EUF_VERTEX_BUFFER_BIT, 2ull); + auto indexBuffer = makeBuffer(alignDown(indexStride), IBuffer::EUF_INDEX_BUFFER_BIT, 3ull); + auto colorBuffer = makeBuffer(alignDown(colorStride), IBuffer::EUF_VERTEX_BUFFER_BIT, 4ull); + if (!positionBuffer || !normalBuffer || !indexBuffer || !colorBuffer) + return nullptr; + + auto geometry = core::make_smart_refctd_ptr(); + geometry->setIndexing(IPolygonGeometryBase::TriangleList()); + geometry->setPositionView(makeView(positionBuffer, positionFormat, positionStride)); + geometry->setNormalView(makeView(normalBuffer, normalFormat, normalStride)); + geometry->setIndexView(makeView(indexBuffer, indexFormat, indexStride)); + geometry->getAuxAttributeViews()->push_back(makeView(colorBuffer, colorFormat, colorStride)); + geometry->getAuxAttributeViews()->push_back(makeView(colorBuffer, colorFormat, colorStride)); + return geometry; +} + +static bool runStandaloneBufferParityCheck(const Options& options, ILogger* logger) +{ + using clock_t = std::chrono::high_resolution_clock; + auto toMs = [](clock_t::duration d) { return std::chrono::duration(d).count(); }; + auto toMiB = [](size_t bytes) { return static_cast(bytes) / (1024.0 * 1024.0); }; + auto throughput = [&](size_t bytes, double ms) { return ms > 0.0 ? toMiB(bytes) * 1000.0 / ms : 0.0; }; + + auto data = makeRandomBytes(options.bufferBytes, options.seed, 0x11ull); + ICPUBuffer::SCreationParams params = {}; + params.size = data.size(); + params.usage = IBuffer::EUF_TRANSFER_SRC_BIT; + params.data = data.data(); + auto buffer = ICPUBuffer::create(std::move(params)); + if (!buffer) + { + logger->log("Failed to create standalone buffer.", ILogger::ELL_ERROR); + return false; + } + + const auto legacyStart = clock_t::now(); + const auto legacyHash = core::blake3_hash_buffer_sequential(data.data(), data.size()); + const double legacyMs = toMs(clock_t::now() - legacyStart); + + const auto directStart = clock_t::now(); + const auto directHash = core::blake3_hash_buffer(data.data(), data.size()); + const double directMs = toMs(clock_t::now() - directStart); + if (directHash != legacyHash) + { + logger->log("Direct BLAKE3 hash mismatch.", ILogger::ELL_ERROR); + return false; + } + + const auto bufferStart = clock_t::now(); + const auto bufferHash = buffer->computeContentHash(); + const double bufferMs = toMs(clock_t::now() - bufferStart); + if (bufferHash != legacyHash) + { + logger->log("ICPUBuffer::computeContentHash mismatch.", ILogger::ELL_ERROR); + return false; + } + + logger->log("HCP single-buffer bytes=%llu mib=%.3f", ILogger::ELL_INFO, static_cast(data.size()), toMiB(data.size())); + logger->log("HCP single-buffer legacy ms=%.3f mib_s=%.3f", ILogger::ELL_INFO, legacyMs, throughput(data.size(), legacyMs)); + logger->log("HCP single-buffer direct ms=%.3f mib_s=%.3f", ILogger::ELL_INFO, directMs, throughput(data.size(), directMs)); + logger->log("HCP single-buffer api ms=%.3f mib_s=%.3f", ILogger::ELL_INFO, bufferMs, throughput(data.size(), bufferMs)); + return true; +} + +static bool runGeometryParityCheck(const Options& options, ILogger* logger) +{ + using clock_t = std::chrono::high_resolution_clock; + auto toMs = [](clock_t::duration d) { return std::chrono::duration(d).count(); }; + auto toMiB = [](size_t bytes) { return static_cast(bytes) / (1024.0 * 1024.0); }; + auto throughput = [&](size_t bytes, double ms) { return ms > 0.0 ? toMiB(bytes) * 1000.0 / ms : 0.0; }; + + auto geometry = createGeometry(options); + if (!geometry) + { + logger->log("Failed to create dummy geometry.", ILogger::ELL_ERROR); + return false; + } + + core::vector> buffers; + SPolygonGeometryContentHash::collectBuffers(geometry.get(), buffers); + if (buffers.empty()) + { + logger->log("No buffers collected from geometry.", ILogger::ELL_ERROR); + return false; + } + + size_t totalBytes = 0ull; + for (const auto& buffer : buffers) + totalBytes += buffer ? buffer->getSize() : 0ull; + if (totalBytes == 0ull) + { + logger->log("Collected zero-sized buffers.", ILogger::ELL_ERROR); + return false; + } + + const auto legacyPolicy = makePolicy(RuntimeMode::Sequential); + SPolygonGeometryContentHash::reset(geometry.get()); + const auto legacyStart = clock_t::now(); + const auto legacyHash = SPolygonGeometryContentHash::recompute(geometry.get(), legacyPolicy); + const double legacyMs = toMs(clock_t::now() - legacyStart); + + SPolygonGeometryContentHash::reset(geometry.get()); + const auto recomputeStart = clock_t::now(); + const auto recomputeHash = SPolygonGeometryContentHash::recompute(geometry.get(), makePolicy(options.mode)); + const double recomputeMs = toMs(clock_t::now() - recomputeStart); + if (recomputeHash != legacyHash) + { + logger->log("recompute hash mismatch.", ILogger::ELL_ERROR); + return false; + } + + if (!buffers[0]) + { + logger->log("First geometry buffer is null.", ILogger::ELL_ERROR); + return false; + } + const auto preservedHash = buffers[0]->getContentHash(); + const size_t missingBytes = totalBytes - buffers[0]->getSize(); + SPolygonGeometryContentHash::reset(geometry.get()); + buffers[0]->setContentHash(preservedHash); + const auto missingStart = clock_t::now(); + const auto missingHash = SPolygonGeometryContentHash::computeMissing(geometry.get(), makePolicy(options.mode)); + const double missingMs = toMs(clock_t::now() - missingStart); + if (buffers[0]->getContentHash() != preservedHash) + { + logger->log("computeMissing overwrote pre-set hash.", ILogger::ELL_ERROR); + return false; + } + if (missingHash != legacyHash) + { + logger->log("computeMissing hash mismatch.", ILogger::ELL_ERROR); + return false; + } + + logger->log("HCP mode=%s buffers=%llu total_mib=%.3f", ILogger::ELL_INFO, modeName(options.mode), static_cast(buffers.size()), toMiB(totalBytes)); + logger->log("HCP legacy ms=%.3f mib_s=%.3f", ILogger::ELL_INFO, legacyMs, throughput(totalBytes, legacyMs)); + logger->log("HCP recompute ms=%.3f mib_s=%.3f", ILogger::ELL_INFO, recomputeMs, throughput(totalBytes, recomputeMs)); + logger->log("HCP computeMissing ms=%.3f mib_s=%.3f missing_mib=%.3f", ILogger::ELL_INFO, missingMs, throughput(missingBytes, missingMs), toMiB(missingBytes)); + return true; +} + +static bool runParityCheck(const Options& options, ILogger* logger) +{ + if (!runStandaloneBufferParityCheck(options, logger)) + return false; + return runGeometryParityCheck(options, logger); +} + +class HashContentParityApp final : public IApplicationFramework +{ +public: + using IApplicationFramework::IApplicationFramework; + + bool onAppInitialized(core::smart_refctd_ptr&&) override + { + m_logger = core::make_smart_refctd_ptr(ILogger::DefaultLogMask()); + if (!isAPILoaded()) + { + m_logger->log("Could not load Nabla API.", ILogger::ELL_ERROR); + return false; + } + + const auto options = parseOptions(argv); + if (!options.has_value()) + { + m_logger->log("Usage: hcp [--runtime-tuning sequential|heuristic|hybrid] [--buffer-bytes N] [--seed U64]", ILogger::ELL_ERROR); + m_logger->log("Constraint: --buffer-bytes must be >= %llu", ILogger::ELL_ERROR, static_cast(kMinBufferBytes)); + return false; + } + + if (!runParityCheck(*options, m_logger.get())) + return false; + m_logger->log("OK", ILogger::ELL_INFO); + return true; + } + + void workLoopBody() override {} + bool keepRunning() override { return false; } + +private: + core::smart_refctd_ptr m_logger; +}; + +NBL_MAIN_FUNC(HashContentParityApp) diff --git a/tools/nsc/main.cpp b/tools/nsc/main.cpp index 203aa6ce8c..9745a17299 100644 --- a/tools/nsc/main.cpp +++ b/tools/nsc/main.cpp @@ -1,4 +1,5 @@ #include "nabla.h" +#include "nbl/gtml/SJsonFormatter.h" #include "nbl/system/IApplicationFramework.h" #include #include @@ -418,27 +419,8 @@ class ShaderCompiler final : public IApplicationFramework { ::json j; auto& modules = j["modules"]; - - auto serialize = [&](const gtml::GitInfo& info, std::string_view target) - { - auto& s = modules[target.data()]; - s["isPopulated"] = info.isPopulated; - s["hasUncommittedChanges"] = info.hasUncommittedChanges.has_value() ? ::json(info.hasUncommittedChanges.value()) : ::json("UNKNOWN, BUILT WITHOUT DIRTY-CHANGES CAPTURE"); - s["commitAuthorName"] = info.commitAuthorName; - s["commitAuthorEmail"] = info.commitAuthorEmail; - s["commitHash"] = info.commitHash; - s["commitShortHash"] = info.commitShortHash; - s["commitDate"] = info.commitDate; - s["commitSubject"] = info.commitSubject; - s["commitBody"] = info.commitBody; - s["describe"] = info.describe; - s["branchName"] = info.branchName; - s["latestTag"] = info.latestTag; - s["latestTagName"] = info.latestTagName; - }; - - serialize(gtml::nabla_git_info, "nabla"); - serialize(gtml::dxc_git_info, "dxc"); + modules["nabla"] = ::json::parse(::gtml::SJsonFormatter::toString(nbl::gtml::nabla_git_info)); + modules["dxc"] = ::json::parse(::gtml::SJsonFormatter::toString(nbl::gtml::dxc_git_info)); const auto pretty = j.dump(4); std::cout << pretty << std::endl;