From 396dd1fe04310fd3e3bf4f2ef67dff76fc8bb8fd Mon Sep 17 00:00:00 2001 From: leejet Date: Mon, 6 Apr 2026 19:52:57 +0800 Subject: [PATCH 1/4] refactor: apply RAII ownership to examples and align naming conventions --- examples/cli/main.cpp | 151 ++++----- examples/common/common.hpp | 11 +- examples/common/log.cpp | 13 +- examples/common/media_io.cpp | 466 ++++++++++++++-------------- examples/common/resource_owners.hpp | 207 ++++++++++++ examples/server/main.cpp | 178 +++++------ 6 files changed, 588 insertions(+), 438 deletions(-) create mode 100644 examples/common/resource_owners.hpp diff --git a/examples/cli/main.cpp b/examples/cli/main.cpp index ae7c34f53..133785a4b 100644 --- a/examples/cli/main.cpp +++ b/examples/cli/main.cpp @@ -17,6 +17,7 @@ #include "common/common.hpp" #include "common/media_io.h" +#include "common/resource_owners.hpp" #include "image_metadata.h" const char* previews_str[] = { @@ -275,7 +276,7 @@ void sd_log_cb(enum sd_log_level_t level, const char* log, void* data) { } bool load_images_from_dir(const std::string dir, - std::vector& images, + SDImageVec& images, int expected_width = 0, int expected_height = 0, int max_image_num = 0, @@ -317,7 +318,7 @@ bool load_images_from_dir(const std::string dir, 3, image_buffer}); - if (max_image_num > 0 && images.size() >= max_image_num) { + if (max_image_num > 0 && static_cast(images.size()) >= max_image_num) { break; } } @@ -554,39 +555,17 @@ int main(int argc, const char* argv[]) { } } - bool vae_decode_only = true; - sd_image_t init_image = {0, 0, 3, nullptr}; - sd_image_t end_image = {0, 0, 3, nullptr}; - sd_image_t control_image = {0, 0, 3, nullptr}; - sd_image_t mask_image = {0, 0, 1, nullptr}; - std::vector ref_images; - std::vector pmid_images; - std::vector control_frames; - - auto release_all_resources = [&]() { - free(init_image.data); - free(end_image.data); - free(control_image.data); - free(mask_image.data); - for (auto image : ref_images) { - free(image.data); - image.data = nullptr; - } - ref_images.clear(); - for (auto image : pmid_images) { - free(image.data); - image.data = nullptr; - } - pmid_images.clear(); - for (auto image : control_frames) { - free(image.data); - image.data = nullptr; - } - control_frames.clear(); - }; + bool vae_decode_only = true; + SDImageOwner init_image({0, 0, 3, nullptr}); + SDImageOwner end_image({0, 0, 3, nullptr}); + SDImageOwner control_image({0, 0, 3, nullptr}); + SDImageOwner mask_image({0, 0, 1, nullptr}); + SDImageVec ref_images; + SDImageVec pmid_images; + SDImageVec control_frames; auto load_image_and_update_size = [&](const std::string& path, - sd_image_t& image, + SDImageOwner& image, bool resize_image = true, int expected_channel = 3) -> bool { int expected_width = 0; @@ -596,13 +575,12 @@ int main(int argc, const char* argv[]) { expected_height = gen_params.height; } - if (!load_sd_image_from_file(&image, path.c_str(), expected_width, expected_height, expected_channel)) { + if (!load_sd_image_from_file(image.put(), path.c_str(), expected_width, expected_height, expected_channel)) { LOG_ERROR("load image from '%s' failed", path.c_str()); - release_all_resources(); return false; } - gen_params.set_width_and_height_if_unset(image.width, image.height); + gen_params.set_width_and_height_if_unset(image.get().width, image.get().height); return true; }; @@ -623,47 +601,46 @@ int main(int argc, const char* argv[]) { if (gen_params.ref_image_paths.size() > 0) { vae_decode_only = false; for (auto& path : gen_params.ref_image_paths) { - sd_image_t ref_image = {0, 0, 3, nullptr}; + SDImageOwner ref_image({0, 0, 3, nullptr}); if (!load_image_and_update_size(path, ref_image, false)) { return 1; } - ref_images.push_back(ref_image); + ref_images.push_back(std::move(ref_image)); } } if (gen_params.mask_image_path.size() > 0) { - if (!load_sd_image_from_file(&mask_image, + if (!load_sd_image_from_file(mask_image.put(), gen_params.mask_image_path.c_str(), gen_params.get_resolved_width(), gen_params.get_resolved_height(), 1)) { LOG_ERROR("load image from '%s' failed", gen_params.mask_image_path.c_str()); - release_all_resources(); return 1; } } else { - mask_image.data = (uint8_t*)malloc(gen_params.get_resolved_width() * gen_params.get_resolved_height()); - if (mask_image.data == nullptr) { + sd_image_t generated_mask = {0, 0, 1, nullptr}; + generated_mask.data = (uint8_t*)malloc(gen_params.get_resolved_width() * gen_params.get_resolved_height()); + if (generated_mask.data == nullptr) { LOG_ERROR("malloc mask image failed"); - release_all_resources(); return 1; } - mask_image.width = gen_params.get_resolved_width(); - mask_image.height = gen_params.get_resolved_height(); - memset(mask_image.data, 255, gen_params.get_resolved_width() * gen_params.get_resolved_height()); + generated_mask.width = gen_params.get_resolved_width(); + generated_mask.height = gen_params.get_resolved_height(); + memset(generated_mask.data, 255, gen_params.get_resolved_width() * gen_params.get_resolved_height()); + mask_image.reset(generated_mask); } if (gen_params.control_image_path.size() > 0) { - if (!load_sd_image_from_file(&control_image, + if (!load_sd_image_from_file(control_image.put(), gen_params.control_image_path.c_str(), gen_params.get_resolved_width(), gen_params.get_resolved_height())) { LOG_ERROR("load image from '%s' failed", gen_params.control_image_path.c_str()); - release_all_resources(); return 1; } if (cli_params.canny_preprocess) { // apply preprocessor - preprocess_canny(control_image, + preprocess_canny(control_image.get(), 0.08f, 0.08f, 0.8f, @@ -679,7 +656,6 @@ int main(int argc, const char* argv[]) { gen_params.get_resolved_height(), gen_params.video_frames, cli_params.verbose)) { - release_all_resources(); return 1; } } @@ -691,7 +667,6 @@ int main(int argc, const char* argv[]) { 0, 0, cli_params.verbose)) { - release_all_resources(); return 1; } } @@ -702,39 +677,30 @@ int main(int argc, const char* argv[]) { sd_ctx_params_t sd_ctx_params = ctx_params.to_sd_ctx_params_t(vae_decode_only, true, cli_params.taesd_preview); - sd_image_t* results = nullptr; - int num_results = 0; + SDImageVec results; + int num_results = 0; if (cli_params.mode == UPSCALE) { num_results = 1; - results = (sd_image_t*)calloc(num_results, sizeof(sd_image_t)); - if (results == nullptr) { - LOG_INFO("failed to allocate results array"); - release_all_resources(); - return 1; - } - - results[0] = init_image; - init_image.data = nullptr; + results.push_back(init_image.release()); } else { - sd_ctx_t* sd_ctx = new_sd_ctx(&sd_ctx_params); + SDCtxPtr sd_ctx(new_sd_ctx(&sd_ctx_params)); if (sd_ctx == nullptr) { LOG_INFO("new_sd_ctx_t failed"); - release_all_resources(); return 1; } if (gen_params.sample_params.sample_method == SAMPLE_METHOD_COUNT) { - gen_params.sample_params.sample_method = sd_get_default_sample_method(sd_ctx); + gen_params.sample_params.sample_method = sd_get_default_sample_method(sd_ctx.get()); } if (gen_params.high_noise_sample_params.sample_method == SAMPLE_METHOD_COUNT) { - gen_params.high_noise_sample_params.sample_method = sd_get_default_sample_method(sd_ctx); + gen_params.high_noise_sample_params.sample_method = sd_get_default_sample_method(sd_ctx.get()); } if (gen_params.sample_params.scheduler == SCHEDULER_COUNT) { - gen_params.sample_params.scheduler = sd_get_default_scheduler(sd_ctx, gen_params.sample_params.sample_method); + gen_params.sample_params.scheduler = sd_get_default_scheduler(sd_ctx.get(), gen_params.sample_params.sample_method); } if (cli_params.mode == IMG_GEN) { @@ -744,19 +710,19 @@ int main(int argc, const char* argv[]) { gen_params.prompt.c_str(), gen_params.negative_prompt.c_str(), gen_params.clip_skip, - init_image, + init_image.get(), ref_images.data(), (int)ref_images.size(), gen_params.auto_resize_ref_image, gen_params.increase_ref_index, - mask_image, + mask_image.get(), gen_params.get_resolved_width(), gen_params.get_resolved_height(), gen_params.sample_params, gen_params.strength, gen_params.seed, gen_params.batch_count, - control_image, + control_image.get(), gen_params.control_strength, { pmid_images.data(), @@ -768,8 +734,8 @@ int main(int argc, const char* argv[]) { gen_params.cache_params, }; - results = generate_image(sd_ctx, &img_gen_params); num_results = gen_params.batch_count; + results.adopt(generate_image(sd_ctx.get(), &img_gen_params), num_results); } else if (cli_params.mode == VID_GEN) { sd_vid_gen_params_t vid_gen_params = { gen_params.lora_vec.data(), @@ -777,8 +743,8 @@ int main(int argc, const char* argv[]) { gen_params.prompt.c_str(), gen_params.negative_prompt.c_str(), gen_params.clip_skip, - init_image, - end_image, + init_image.get(), + end_image.get(), control_frames.data(), (int)control_frames.size(), gen_params.get_resolved_width(), @@ -794,25 +760,22 @@ int main(int argc, const char* argv[]) { gen_params.cache_params, }; - results = generate_video(sd_ctx, &vid_gen_params, &num_results); + results.adopt(generate_video(sd_ctx.get(), &vid_gen_params, &num_results), num_results); } - if (results == nullptr) { + if (!results) { LOG_ERROR("generate failed"); - free_sd_ctx(sd_ctx); return 1; } - - free_sd_ctx(sd_ctx); } int upscale_factor = 4; // unused for RealESRGAN_x4plus_anime_6B.pth if (ctx_params.esrgan_path.size() > 0 && gen_params.upscale_repeats > 0) { - upscaler_ctx_t* upscaler_ctx = new_upscaler_ctx(ctx_params.esrgan_path.c_str(), - ctx_params.offload_params_to_cpu, - ctx_params.diffusion_conv_direct, - ctx_params.n_threads, - gen_params.upscale_tile_size); + UpscalerCtxPtr upscaler_ctx(new_upscaler_ctx(ctx_params.esrgan_path.c_str(), + ctx_params.offload_params_to_cpu, + ctx_params.diffusion_conv_direct, + ctx_params.n_threads, + gen_params.upscale_tile_size)); if (upscaler_ctx == nullptr) { LOG_ERROR("new_upscaler_ctx failed"); @@ -821,32 +784,24 @@ int main(int argc, const char* argv[]) { if (results[i].data == nullptr) { continue; } - sd_image_t current_image = results[i]; + SDImageOwner current_image(results[i]); + results[i] = {0, 0, 0, nullptr}; for (int u = 0; u < gen_params.upscale_repeats; ++u) { - sd_image_t upscaled_image = upscale(upscaler_ctx, current_image, upscale_factor); - if (upscaled_image.data == nullptr) { + SDImageOwner upscaled_image(upscale(upscaler_ctx.get(), current_image.get(), upscale_factor)); + if (upscaled_image.get().data == nullptr) { LOG_ERROR("upscale failed"); break; } - free(current_image.data); - current_image = upscaled_image; + current_image = std::move(upscaled_image); } - results[i] = current_image; // Set the final upscaled image as the result + results[i] = current_image.release(); // Set the final upscaled image as the result } } } - if (!save_results(cli_params, ctx_params, gen_params, results, num_results)) { + if (!save_results(cli_params, ctx_params, gen_params, results.data(), num_results)) { return 1; } - for (int i = 0; i < num_results; i++) { - free(results[i].data); - results[i].data = nullptr; - } - free(results); - - release_all_resources(); - return 0; } diff --git a/examples/common/common.hpp b/examples/common/common.hpp index 7beef9d58..79bc4ce80 100644 --- a/examples/common/common.hpp +++ b/examples/common/common.hpp @@ -20,6 +20,7 @@ namespace fs = std::filesystem; #endif // _WIN32 #include "log.h" +#include "resource_owners.hpp" #include "stable-diffusion.h" #define SAFE_STR(s) ((s) ? (s) : "") @@ -1751,8 +1752,8 @@ struct SDGenerationParams { } std::string to_string() const { - char* sample_params_str = sd_sample_params_to_str(&sample_params); - char* high_noise_sample_params_str = sd_sample_params_to_str(&high_noise_sample_params); + FreeUniquePtr sample_params_str(sd_sample_params_to_str(&sample_params)); + FreeUniquePtr high_noise_sample_params_str(sd_sample_params_to_str(&high_noise_sample_params)); std::ostringstream lora_ss; lora_ss << "{\n"; @@ -1801,9 +1802,9 @@ struct SDGenerationParams { << " pm_id_embed_path: \"" << pm_id_embed_path << "\",\n" << " pm_style_strength: " << pm_style_strength << ",\n" << " skip_layers: " << vec_to_string(skip_layers) << ",\n" - << " sample_params: " << sample_params_str << ",\n" + << " sample_params: " << SAFE_STR(sample_params_str.get()) << ",\n" << " high_noise_skip_layers: " << vec_to_string(high_noise_skip_layers) << ",\n" - << " high_noise_sample_params: " << high_noise_sample_params_str << ",\n" + << " high_noise_sample_params: " << SAFE_STR(high_noise_sample_params_str.get()) << ",\n" << " custom_sigmas: " << vec_to_string(custom_sigmas) << ",\n" << " cache_mode: \"" << cache_mode << "\",\n" << " cache_option: \"" << cache_option << "\",\n" @@ -1829,8 +1830,6 @@ struct SDGenerationParams { << vae_tiling_params.rel_size_x << ", " << vae_tiling_params.rel_size_y << " },\n" << "}"; - free(sample_params_str); - free(high_noise_sample_params_str); return oss.str(); } }; diff --git a/examples/common/log.cpp b/examples/common/log.cpp index 44fcd1e43..2c4343912 100644 --- a/examples/common/log.cpp +++ b/examples/common/log.cpp @@ -1,5 +1,7 @@ #include "log.h" +#include + bool log_verbose = false; bool log_color = false; @@ -34,17 +36,12 @@ void print_utf8(FILE* stream, const char* utf8) { return; } - wchar_t* wbuf = (wchar_t*)malloc(wlen * sizeof(wchar_t)); - if (!wbuf) { - return; - } + std::vector wbuf(static_cast(wlen)); - MultiByteToWideChar(CP_UTF8, 0, utf8, -1, wbuf, wlen); + MultiByteToWideChar(CP_UTF8, 0, utf8, -1, wbuf.data(), wlen); DWORD written; - WriteConsoleW(h, wbuf, wlen - 1, &written, NULL); - - free(wbuf); + WriteConsoleW(h, wbuf.data(), wlen - 1, &written, NULL); } else { DWORD written; WriteFile(h, utf8, (DWORD)strlen(utf8), &written, NULL); diff --git a/examples/common/media_io.cpp b/examples/common/media_io.cpp index ba3965326..4706b2228 100644 --- a/examples/common/media_io.cpp +++ b/examples/common/media_io.cpp @@ -1,5 +1,6 @@ #include "log.h" #include "media_io.h" +#include "resource_owners.hpp" #include #include @@ -38,6 +39,63 @@ namespace fs = std::filesystem; namespace { +#ifdef SD_USE_WEBP +struct WebPFreeDeleter { + void operator()(void* ptr) const { + if (ptr != nullptr) { + WebPFree(ptr); + } + } +}; + +struct WebPMuxDeleter { + void operator()(WebPMux* mux) const { + if (mux != nullptr) { + WebPMuxDelete(mux); + } + } +}; + +struct WebPAnimEncoderDeleter { + void operator()(WebPAnimEncoder* enc) const { + if (enc != nullptr) { + WebPAnimEncoderDelete(enc); + } + } +}; + +struct WebPDataGuard { + WebPDataGuard() { + WebPDataInit(&data); + } + + ~WebPDataGuard() { + WebPDataClear(&data); + } + + WebPData data; +}; + +struct WebPPictureGuard { + WebPPictureGuard() + : initialized(WebPPictureInit(&picture) != 0) { + } + + ~WebPPictureGuard() { + if (initialized) { + WebPPictureFree(&picture); + } + } + + WebPPicture picture; + bool initialized; +}; + +using WebPBufferPtr = std::unique_ptr; +using WebPMuxPtr = std::unique_ptr; +using WebPAnimEncoderPtr = std::unique_ptr; +#endif + bool read_binary_file_bytes(const char* path, std::vector& data) { std::ifstream fin(fs::path(path), std::ios::binary); if (!fin) { @@ -158,27 +216,25 @@ uint8_t* decode_webp_image_to_buffer(const uint8_t* data, if (expected_channel == 1) { int decoded_width = width; int decoded_height = height; - uint8_t* decoded = features.has_alpha - ? WebPDecodeRGBA(data, size, &decoded_width, &decoded_height) - : WebPDecodeRGB(data, size, &decoded_width, &decoded_height); + WebPBufferPtr decoded(features.has_alpha + ? WebPDecodeRGBA(data, size, &decoded_width, &decoded_height) + : WebPDecodeRGB(data, size, &decoded_width, &decoded_height)); if (decoded == nullptr) { return nullptr; } - uint8_t* grayscale = (uint8_t*)malloc(pixel_count); + FreeUniquePtr grayscale((uint8_t*)malloc(pixel_count)); if (grayscale == nullptr) { - WebPFree(decoded); return nullptr; } const int decoded_channels = features.has_alpha ? 4 : 3; for (size_t i = 0; i < pixel_count; ++i) { - const uint8_t* src = decoded + i * decoded_channels; + const uint8_t* src = decoded.get() + i * decoded_channels; grayscale[i] = static_cast((77 * src[0] + 150 * src[1] + 29 * src[2] + 128) >> 8); } - WebPFree(decoded); - return grayscale; + return grayscale.release(); } if (expected_channel != 3 && expected_channel != 4) { @@ -187,23 +243,21 @@ uint8_t* decode_webp_image_to_buffer(const uint8_t* data, int decoded_width = width; int decoded_height = height; - uint8_t* decoded = (expected_channel == 4) - ? WebPDecodeRGBA(data, size, &decoded_width, &decoded_height) - : WebPDecodeRGB(data, size, &decoded_width, &decoded_height); + WebPBufferPtr decoded((expected_channel == 4) + ? WebPDecodeRGBA(data, size, &decoded_width, &decoded_height) + : WebPDecodeRGB(data, size, &decoded_width, &decoded_height)); if (decoded == nullptr) { return nullptr; } const size_t out_size = pixel_count * static_cast(expected_channel); - uint8_t* output = (uint8_t*)malloc(out_size); + FreeUniquePtr output((uint8_t*)malloc(out_size)); if (output == nullptr) { - WebPFree(decoded); return nullptr; } - memcpy(output, decoded, out_size); - WebPFree(decoded); - return output; + memcpy(output.get(), decoded.get(), out_size); + return output.release(); } std::string build_webp_xmp_packet(const std::string& parameters) { @@ -255,30 +309,29 @@ bool encode_webp_image_to_vector(const uint8_t* image, return false; } - uint8_t* encoded = nullptr; - size_t encoded_size = (input_channels == 4) - ? WebPEncodeRGBA(input_image, width, height, width * input_channels, static_cast(quality), &encoded) - : WebPEncodeRGB(input_image, width, height, width * input_channels, static_cast(quality), &encoded); + uint8_t* encoded_raw = nullptr; + size_t encoded_size = (input_channels == 4) + ? WebPEncodeRGBA(input_image, width, height, width * input_channels, static_cast(quality), &encoded_raw) + : WebPEncodeRGB(input_image, width, height, width * input_channels, static_cast(quality), &encoded_raw); + WebPBufferPtr encoded(encoded_raw); if (encoded == nullptr || encoded_size == 0) { return false; } - out.assign(encoded, encoded + encoded_size); - WebPFree(encoded); + out.assign(encoded.get(), encoded.get() + encoded_size); if (parameters.empty()) { return true; } WebPData image_data; - WebPData assembled_data; WebPDataInit(&image_data); - WebPDataInit(&assembled_data); + WebPDataGuard assembled_data; image_data.bytes = out.data(); image_data.size = out.size(); - WebPMux* mux = WebPMuxNew(); + WebPMuxPtr mux(WebPMuxNew()); if (mux == nullptr) { return false; } @@ -289,16 +342,14 @@ bool encode_webp_image_to_vector(const uint8_t* image, xmp_data.bytes = reinterpret_cast(xmp_packet.data()); xmp_data.size = xmp_packet.size(); - const bool ok = WebPMuxSetImage(mux, &image_data, 1) == WEBP_MUX_OK && - WebPMuxSetChunk(mux, "XMP ", &xmp_data, 1) == WEBP_MUX_OK && - WebPMuxAssemble(mux, &assembled_data) == WEBP_MUX_OK; + const bool ok = WebPMuxSetImage(mux.get(), &image_data, 1) == WEBP_MUX_OK && + WebPMuxSetChunk(mux.get(), "XMP ", &xmp_data, 1) == WEBP_MUX_OK && + WebPMuxAssemble(mux.get(), &assembled_data.data) == WEBP_MUX_OK; if (ok) { - out.assign(assembled_data.bytes, assembled_data.bytes + assembled_data.size); + out.assign(assembled_data.data.bytes, assembled_data.data.bytes + assembled_data.data.size); } - WebPDataClear(&assembled_data); - WebPMuxDelete(mux); return ok; } @@ -382,19 +433,19 @@ uint8_t* load_image_common(bool from_memory, int expected_height, int expected_channel) { const char* image_path; - uint8_t* image_buffer = nullptr; + FreeUniquePtr image_buffer; int source_channel_count = 0; #ifdef SD_USE_WEBP if (from_memory) { image_path = "memory"; if (len > 0 && is_webp_signature(reinterpret_cast(image_path_or_bytes), static_cast(len))) { - image_buffer = decode_webp_image_to_buffer(reinterpret_cast(image_path_or_bytes), - static_cast(len), - width, - height, - expected_channel, - source_channel_count); + image_buffer.reset(decode_webp_image_to_buffer(reinterpret_cast(image_path_or_bytes), + static_cast(len), + width, + height, + expected_channel, + source_channel_count)); } } else { image_path = image_path_or_bytes; @@ -408,12 +459,12 @@ uint8_t* load_image_common(bool from_memory, LOG_ERROR("load image from '%s' failed", image_path_or_bytes); return nullptr; } - image_buffer = decode_webp_image_to_buffer(file_bytes.data(), - file_bytes.size(), - width, - height, - expected_channel, - source_channel_count); + image_buffer.reset(decode_webp_image_to_buffer(file_bytes.data(), + file_bytes.size(), + width, + height, + expected_channel, + source_channel_count)); } } #endif @@ -422,14 +473,14 @@ uint8_t* load_image_common(bool from_memory, image_path = "memory"; if (image_buffer == nullptr) { int c = 0; - image_buffer = (uint8_t*)stbi_load_from_memory((const stbi_uc*)image_path_or_bytes, len, &width, &height, &c, expected_channel); + image_buffer.reset((uint8_t*)stbi_load_from_memory((const stbi_uc*)image_path_or_bytes, len, &width, &height, &c, expected_channel)); source_channel_count = c; } } else { image_path = image_path_or_bytes; if (image_buffer == nullptr) { int c = 0; - image_buffer = (uint8_t*)stbi_load(image_path_or_bytes, &width, &height, &c, expected_channel); + image_buffer.reset((uint8_t*)stbi_load(image_path_or_bytes, &width, &height, &c, expected_channel)); source_channel_count = c; } } @@ -444,17 +495,14 @@ uint8_t* load_image_common(bool from_memory, expected_channel, source_channel_count, image_path); - free(image_buffer); return nullptr; } if (width <= 0) { LOG_ERROR("error: the width of image must be greater than 0, image_path = %s", image_path); - free(image_buffer); return nullptr; } if (height <= 0) { LOG_ERROR("error: the height of image must be greater than 0, image_path = %s", image_path); - free(image_buffer); return nullptr; } @@ -475,43 +523,39 @@ uint8_t* load_image_common(bool from_memory, if (crop_x != 0 || crop_y != 0) { LOG_INFO("crop input image from %dx%d to %dx%d, image_path = %s", width, height, crop_w, crop_h, image_path); - uint8_t* cropped_image_buffer = (uint8_t*)malloc(crop_w * crop_h * expected_channel); + FreeUniquePtr cropped_image_buffer((uint8_t*)malloc(crop_w * crop_h * expected_channel)); if (cropped_image_buffer == nullptr) { LOG_ERROR("error: allocate memory for crop\n"); - free(image_buffer); return nullptr; } for (int row = 0; row < crop_h; row++) { - uint8_t* src = image_buffer + ((crop_y + row) * width + crop_x) * expected_channel; - uint8_t* dst = cropped_image_buffer + (row * crop_w) * expected_channel; + uint8_t* src = image_buffer.get() + ((crop_y + row) * width + crop_x) * expected_channel; + uint8_t* dst = cropped_image_buffer.get() + (row * crop_w) * expected_channel; memcpy(dst, src, crop_w * expected_channel); } width = crop_w; height = crop_h; - free(image_buffer); - image_buffer = cropped_image_buffer; + image_buffer = std::move(cropped_image_buffer); } LOG_INFO("resize input image from %dx%d to %dx%d", width, height, expected_width, expected_height); - uint8_t* resized_image_buffer = (uint8_t*)malloc(expected_height * expected_width * expected_channel); + FreeUniquePtr resized_image_buffer((uint8_t*)malloc(expected_height * expected_width * expected_channel)); if (resized_image_buffer == nullptr) { LOG_ERROR("error: allocate memory for resize input image\n"); - free(image_buffer); return nullptr; } - stbir_resize(image_buffer, width, height, 0, - resized_image_buffer, expected_width, expected_height, 0, STBIR_TYPE_UINT8, + stbir_resize(image_buffer.get(), width, height, 0, + resized_image_buffer.get(), expected_width, expected_height, 0, STBIR_TYPE_UINT8, expected_channel, STBIR_ALPHA_CHANNEL_NONE, 0, STBIR_EDGE_CLAMP, STBIR_EDGE_CLAMP, STBIR_FILTER_BOX, STBIR_FILTER_BOX, STBIR_COLORSPACE_SRGB, nullptr); width = expected_width; height = expected_height; - free(image_buffer); - image_buffer = resized_image_buffer; + image_buffer = std::move(resized_image_buffer); } - return image_buffer; + return image_buffer.release(); } typedef struct { @@ -662,7 +706,7 @@ int create_mjpg_avi_from_sd_images(const char* filename, sd_image_t* images, int return -1; } - FILE* f = fopen(filename, "wb"); + FilePtr f(fopen(filename, "wb")); if (!f) { perror("Error opening file for writing"); return -1; @@ -673,139 +717,126 @@ int create_mjpg_avi_from_sd_images(const char* filename, sd_image_t* images, int uint32_t channels = images[0].channel; if (channels != 3 && channels != 4) { fprintf(stderr, "Error: Unsupported channel count: %u\n", channels); - fclose(f); return -1; } - fwrite("RIFF", 4, 1, f); - long riff_size_pos = ftell(f); - write_u32_le(f, 0); - fwrite("AVI ", 4, 1, f); - - fwrite("LIST", 4, 1, f); - write_u32_le(f, 4 + 8 + 56 + 8 + 4 + 8 + 56 + 8 + 40); - fwrite("hdrl", 4, 1, f); - - fwrite("avih", 4, 1, f); - write_u32_le(f, 56); - write_u32_le(f, 1000000 / fps); - write_u32_le(f, 0); - write_u32_le(f, 0); - write_u32_le(f, 0x110); - write_u32_le(f, num_images); - write_u32_le(f, 0); - write_u32_le(f, 1); - write_u32_le(f, width * height * 3); - write_u32_le(f, width); - write_u32_le(f, height); - write_u32_le(f, 0); - write_u32_le(f, 0); - write_u32_le(f, 0); - write_u32_le(f, 0); - - fwrite("LIST", 4, 1, f); - write_u32_le(f, 4 + 8 + 56 + 8 + 40); - fwrite("strl", 4, 1, f); - - fwrite("strh", 4, 1, f); - write_u32_le(f, 56); - fwrite("vids", 4, 1, f); - fwrite("MJPG", 4, 1, f); - write_u32_le(f, 0); - write_u16_le(f, 0); - write_u16_le(f, 0); - write_u32_le(f, 0); - write_u32_le(f, 1); - write_u32_le(f, fps); - write_u32_le(f, 0); - write_u32_le(f, num_images); - write_u32_le(f, width * height * 3); - write_u32_le(f, (uint32_t)-1); - write_u32_le(f, 0); - write_u16_le(f, 0); - write_u16_le(f, 0); - write_u16_le(f, 0); - write_u16_le(f, 0); - - fwrite("strf", 4, 1, f); - write_u32_le(f, 40); - write_u32_le(f, 40); - write_u32_le(f, width); - write_u32_le(f, height); - write_u16_le(f, 1); - write_u16_le(f, 24); - fwrite("MJPG", 4, 1, f); - write_u32_le(f, width * height * 3); - write_u32_le(f, 0); - write_u32_le(f, 0); - write_u32_le(f, 0); - write_u32_le(f, 0); - - fwrite("LIST", 4, 1, f); - long movi_size_pos = ftell(f); - write_u32_le(f, 0); - fwrite("movi", 4, 1, f); - - avi_index_entry* index = (avi_index_entry*)malloc(sizeof(avi_index_entry) * num_images); - if (!index) { - fclose(f); - return -1; - } - - struct { - uint8_t* buf; - size_t size; - } jpeg_data; + fwrite("RIFF", 4, 1, f.get()); + long riff_size_pos = ftell(f.get()); + write_u32_le(f.get(), 0); + fwrite("AVI ", 4, 1, f.get()); + + fwrite("LIST", 4, 1, f.get()); + write_u32_le(f.get(), 4 + 8 + 56 + 8 + 4 + 8 + 56 + 8 + 40); + fwrite("hdrl", 4, 1, f.get()); + + fwrite("avih", 4, 1, f.get()); + write_u32_le(f.get(), 56); + write_u32_le(f.get(), 1000000 / fps); + write_u32_le(f.get(), 0); + write_u32_le(f.get(), 0); + write_u32_le(f.get(), 0x110); + write_u32_le(f.get(), num_images); + write_u32_le(f.get(), 0); + write_u32_le(f.get(), 1); + write_u32_le(f.get(), width * height * 3); + write_u32_le(f.get(), width); + write_u32_le(f.get(), height); + write_u32_le(f.get(), 0); + write_u32_le(f.get(), 0); + write_u32_le(f.get(), 0); + write_u32_le(f.get(), 0); + + fwrite("LIST", 4, 1, f.get()); + write_u32_le(f.get(), 4 + 8 + 56 + 8 + 40); + fwrite("strl", 4, 1, f.get()); + + fwrite("strh", 4, 1, f.get()); + write_u32_le(f.get(), 56); + fwrite("vids", 4, 1, f.get()); + fwrite("MJPG", 4, 1, f.get()); + write_u32_le(f.get(), 0); + write_u16_le(f.get(), 0); + write_u16_le(f.get(), 0); + write_u32_le(f.get(), 0); + write_u32_le(f.get(), 1); + write_u32_le(f.get(), fps); + write_u32_le(f.get(), 0); + write_u32_le(f.get(), num_images); + write_u32_le(f.get(), width * height * 3); + write_u32_le(f.get(), (uint32_t)-1); + write_u32_le(f.get(), 0); + write_u16_le(f.get(), 0); + write_u16_le(f.get(), 0); + write_u16_le(f.get(), 0); + write_u16_le(f.get(), 0); + + fwrite("strf", 4, 1, f.get()); + write_u32_le(f.get(), 40); + write_u32_le(f.get(), 40); + write_u32_le(f.get(), width); + write_u32_le(f.get(), height); + write_u16_le(f.get(), 1); + write_u16_le(f.get(), 24); + fwrite("MJPG", 4, 1, f.get()); + write_u32_le(f.get(), width * height * 3); + write_u32_le(f.get(), 0); + write_u32_le(f.get(), 0); + write_u32_le(f.get(), 0); + write_u32_le(f.get(), 0); + + fwrite("LIST", 4, 1, f.get()); + long movi_size_pos = ftell(f.get()); + write_u32_le(f.get(), 0); + fwrite("movi", 4, 1, f.get()); + + std::vector index(static_cast(num_images)); + std::vector jpeg_data; for (int i = 0; i < num_images; i++) { - jpeg_data.buf = nullptr; - jpeg_data.size = 0; + jpeg_data.clear(); auto write_to_buf = [](void* context, void* data, int size) { - auto jd = (decltype(jpeg_data)*)context; - jd->buf = (uint8_t*)realloc(jd->buf, jd->size + size); - memcpy(jd->buf + jd->size, data, size); - jd->size += size; + auto* buffer = reinterpret_cast*>(context); + const uint8_t* src = reinterpret_cast(data); + buffer->insert(buffer->end(), src, src + size); }; - stbi_write_jpg_to_func(write_to_buf, &jpeg_data, images[i].width, images[i].height, channels, images[i].data, quality); + if (!stbi_write_jpg_to_func(write_to_buf, &jpeg_data, images[i].width, images[i].height, channels, images[i].data, quality)) { + fprintf(stderr, "Error: Failed to encode JPEG frame.\n"); + return -1; + } - fwrite("00dc", 4, 1, f); - write_u32_le(f, (uint32_t)jpeg_data.size); - index[i].offset = ftell(f) - 8; - index[i].size = (uint32_t)jpeg_data.size; - fwrite(jpeg_data.buf, 1, jpeg_data.size, f); + fwrite("00dc", 4, 1, f.get()); + write_u32_le(f.get(), (uint32_t)jpeg_data.size()); + index[i].offset = ftell(f.get()) - 8; + index[i].size = (uint32_t)jpeg_data.size(); + fwrite(jpeg_data.data(), 1, jpeg_data.size(), f.get()); - if (jpeg_data.size % 2) { - fputc(0, f); + if (jpeg_data.size() % 2) { + fputc(0, f.get()); } - - free(jpeg_data.buf); } - long cur_pos = ftell(f); + long cur_pos = ftell(f.get()); long movi_size = cur_pos - movi_size_pos - 4; - fseek(f, movi_size_pos, SEEK_SET); - write_u32_le(f, movi_size); - fseek(f, cur_pos, SEEK_SET); + fseek(f.get(), movi_size_pos, SEEK_SET); + write_u32_le(f.get(), movi_size); + fseek(f.get(), cur_pos, SEEK_SET); - fwrite("idx1", 4, 1, f); - write_u32_le(f, num_images * 16); + fwrite("idx1", 4, 1, f.get()); + write_u32_le(f.get(), num_images * 16); for (int i = 0; i < num_images; i++) { - fwrite("00dc", 4, 1, f); - write_u32_le(f, 0x10); - write_u32_le(f, index[i].offset); - write_u32_le(f, index[i].size); + fwrite("00dc", 4, 1, f.get()); + write_u32_le(f.get(), 0x10); + write_u32_le(f.get(), index[i].offset); + write_u32_le(f.get(), index[i].size); } - cur_pos = ftell(f); + cur_pos = ftell(f.get()); long file_size = cur_pos - riff_size_pos - 4; - fseek(f, riff_size_pos, SEEK_SET); - write_u32_le(f, file_size); - fseek(f, cur_pos, SEEK_SET); - - fclose(f); - free(index); + fseek(f.get(), riff_size_pos, SEEK_SET); + write_u32_le(f.get(), file_size); + fseek(f.get(), cur_pos, SEEK_SET); return 0; } @@ -847,31 +878,30 @@ int create_animated_webp_from_sd_images(const char* filename, sd_image_t* images return -1; } - WebPAnimEncoder* enc = WebPAnimEncoderNew(width, height, &anim_options); + WebPAnimEncoderPtr enc(WebPAnimEncoderNew(width, height, &anim_options)); if (enc == nullptr) { fprintf(stderr, "Error: Could not create WebPAnimEncoder object.\n"); return -1; } const int frame_duration_ms = std::max(1, static_cast(std::lround(1000.0 / static_cast(fps)))); - int timestamp_ms = 0; - int ret = -1; + int timestamp_ms = 0; for (int i = 0; i < num_images; ++i) { const sd_image_t& image = images[i]; if (static_cast(image.width) != width || static_cast(image.height) != height) { fprintf(stderr, "Error: Frame dimensions do not match.\n"); - goto cleanup; + return -1; } - WebPPicture picture; - if (!WebPPictureInit(&picture)) { + WebPPictureGuard picture; + if (!picture.initialized) { fprintf(stderr, "Error: Failed to initialize WebPPicture.\n"); - goto cleanup; + return -1; } - picture.use_argb = 1; - picture.width = width; - picture.height = height; + picture.picture.use_argb = 1; + picture.picture.width = width; + picture.picture.height = height; bool picture_ok = false; std::vector rgb_buffer; @@ -882,64 +912,48 @@ int create_animated_webp_from_sd_images(const char* filename, sd_image_t* images rgb_buffer[p * 3 + 1] = image.data[p]; rgb_buffer[p * 3 + 2] = image.data[p]; } - picture_ok = WebPPictureImportRGB(&picture, rgb_buffer.data(), width * 3) != 0; + picture_ok = WebPPictureImportRGB(&picture.picture, rgb_buffer.data(), width * 3) != 0; } else if (image.channel == 4) { - picture_ok = WebPPictureImportRGBA(&picture, image.data, width * 4) != 0; + picture_ok = WebPPictureImportRGBA(&picture.picture, image.data, width * 4) != 0; } else { - picture_ok = WebPPictureImportRGB(&picture, image.data, width * 3) != 0; + picture_ok = WebPPictureImportRGB(&picture.picture, image.data, width * 3) != 0; } if (!picture_ok) { fprintf(stderr, "Error: Failed to import frame into WebPPicture.\n"); - WebPPictureFree(&picture); - goto cleanup; + return -1; } - if (!WebPAnimEncoderAdd(enc, &picture, timestamp_ms, &config)) { - fprintf(stderr, "Error: Failed to add frame to animated WebP: %s\n", WebPAnimEncoderGetError(enc)); - WebPPictureFree(&picture); - goto cleanup; + if (!WebPAnimEncoderAdd(enc.get(), &picture.picture, timestamp_ms, &config)) { + fprintf(stderr, "Error: Failed to add frame to animated WebP: %s\n", WebPAnimEncoderGetError(enc.get())); + return -1; } - WebPPictureFree(&picture); timestamp_ms += frame_duration_ms; } - if (!WebPAnimEncoderAdd(enc, nullptr, timestamp_ms, nullptr)) { - fprintf(stderr, "Error: Failed to finalize animated WebP frames: %s\n", WebPAnimEncoderGetError(enc)); - goto cleanup; + if (!WebPAnimEncoderAdd(enc.get(), nullptr, timestamp_ms, nullptr)) { + fprintf(stderr, "Error: Failed to finalize animated WebP frames: %s\n", WebPAnimEncoderGetError(enc.get())); + return -1; } - { - WebPData webp_data; - WebPDataInit(&webp_data); - if (!WebPAnimEncoderAssemble(enc, &webp_data)) { - fprintf(stderr, "Error: Failed to assemble animated WebP: %s\n", WebPAnimEncoderGetError(enc)); - WebPDataClear(&webp_data); - goto cleanup; - } - - FILE* f = fopen(filename, "wb"); - if (!f) { - perror("Error opening file for writing"); - WebPDataClear(&webp_data); - goto cleanup; - } - if (webp_data.size > 0 && fwrite(webp_data.bytes, 1, webp_data.size, f) != webp_data.size) { - fprintf(stderr, "Error: Failed to write animated WebP file.\n"); - fclose(f); - WebPDataClear(&webp_data); - goto cleanup; - } - fclose(f); - WebPDataClear(&webp_data); + WebPDataGuard webp_data; + if (!WebPAnimEncoderAssemble(enc.get(), &webp_data.data)) { + fprintf(stderr, "Error: Failed to assemble animated WebP: %s\n", WebPAnimEncoderGetError(enc.get())); + return -1; } - ret = 0; + FilePtr f(fopen(filename, "wb")); + if (!f) { + perror("Error opening file for writing"); + return -1; + } + if (webp_data.data.size > 0 && fwrite(webp_data.data.bytes, 1, webp_data.data.size, f.get()) != webp_data.data.size) { + fprintf(stderr, "Error: Failed to write animated WebP file.\n"); + return -1; + } -cleanup: - WebPAnimEncoderDelete(enc); - return ret; + return 0; } #endif diff --git a/examples/common/resource_owners.hpp b/examples/common/resource_owners.hpp new file mode 100644 index 000000000..64cce1575 --- /dev/null +++ b/examples/common/resource_owners.hpp @@ -0,0 +1,207 @@ +#ifndef __EXAMPLE_RESOURCE_OWNERS_H__ +#define __EXAMPLE_RESOURCE_OWNERS_H__ + +#include +#include +#include +#include +#include + +#include "stable-diffusion.h" + +struct FreeDeleter { + void operator()(void* ptr) const { + free(ptr); + } +}; + +struct FileCloser { + void operator()(FILE* file) const { + if (file != nullptr) { + fclose(file); + } + } +}; + +struct SDCtxDeleter { + void operator()(sd_ctx_t* ctx) const { + if (ctx != nullptr) { + free_sd_ctx(ctx); + } + } +}; + +struct UpscalerCtxDeleter { + void operator()(upscaler_ctx_t* ctx) const { + if (ctx != nullptr) { + free_upscaler_ctx(ctx); + } + } +}; + +template +using FreeUniquePtr = std::unique_ptr; + +using FilePtr = std::unique_ptr; +using SDCtxPtr = std::unique_ptr; +using UpscalerCtxPtr = std::unique_ptr; + +class SDImageOwner { +public: + SDImageOwner() = default; + explicit SDImageOwner(sd_image_t image) + : image_(image) { + } + + SDImageOwner(const SDImageOwner&) = delete; + SDImageOwner& operator=(const SDImageOwner&) = delete; + + SDImageOwner(SDImageOwner&& other) noexcept + : image_(other.release()) { + } + + SDImageOwner& operator=(SDImageOwner&& other) noexcept { + if (this != &other) { + reset(); + image_ = other.release(); + } + return *this; + } + + ~SDImageOwner() { + reset(); + } + + sd_image_t* put() { + if (image_.data != nullptr) { + free(image_.data); + image_.data = nullptr; + } + image_.width = 0; + image_.height = 0; + return &image_; + } + + sd_image_t& get() { + return image_; + } + + const sd_image_t& get() const { + return image_; + } + + sd_image_t release() { + sd_image_t image = image_; + image_ = {0, 0, 0, nullptr}; + return image; + } + + void reset(sd_image_t image = {0, 0, 0, nullptr}) { + if (image_.data != nullptr) { + free(image_.data); + } + image_ = image; + } + +private: + sd_image_t image_ = {0, 0, 0, nullptr}; +}; + +class SDImageVec { +public: + SDImageVec() = default; + + SDImageVec(const SDImageVec&) = delete; + SDImageVec& operator=(const SDImageVec&) = delete; + + SDImageVec(SDImageVec&& other) noexcept + : images_(std::move(other.images_)) { + } + + SDImageVec& operator=(SDImageVec&& other) noexcept { + if (this != &other) { + clear(); + images_ = std::move(other.images_); + } + return *this; + } + + ~SDImageVec() { + clear(); + } + + void push_back(sd_image_t image) { + images_.push_back(image); + } + + void push_back(SDImageOwner&& image) { + images_.push_back(image.release()); + } + + void reserve(size_t count) { + images_.reserve(count); + } + + void adopt(sd_image_t* images, int count) { + clear(); + if (images == nullptr || count <= 0) { + free(images); + return; + } + + images_.reserve(static_cast(count)); + for (int i = 0; i < count; ++i) { + images_.push_back(images[i]); + } + free(images); + } + + size_t size() const { + return images_.size(); + } + + bool empty() const { + return images_.empty(); + } + + explicit operator bool() const { + return !images_.empty(); + } + + sd_image_t* data() { + return images_.data(); + } + + const sd_image_t* data() const { + return images_.data(); + } + + sd_image_t& operator[](size_t index) { + return images_[index]; + } + + const sd_image_t& operator[](size_t index) const { + return images_[index]; + } + + std::vector& raw() { + return images_; + } + + const std::vector& raw() const { + return images_; + } + + void clear() { + for (sd_image_t& image : images_) { + free(image.data); + image.data = nullptr; + } + images_.clear(); + } + +private: + std::vector images_; +}; + +#endif // __EXAMPLE_RESOURCE_OWNERS_H__ diff --git a/examples/server/main.cpp b/examples/server/main.cpp index 8d4e644b5..78f1779ae 100644 --- a/examples/server/main.cpp +++ b/examples/server/main.cpp @@ -13,6 +13,7 @@ #include "common/common.hpp" #include "common/media_io.h" +#include "common/resource_owners.hpp" #ifdef HAVE_INDEX_HTML #include "frontend/dist/gen_index_html.h" @@ -286,18 +287,6 @@ std::string get_lora_full_path(ServerRuntime& rt, const std::string& path) { return (it != rt.lora_cache->end()) ? it->fullpath : ""; } -void free_results(sd_image_t* result_images, int num_results) { - if (result_images) { - for (int i = 0; i < num_results; ++i) { - if (result_images[i].data) { - free(result_images[i].data); - result_images[i].data = nullptr; - } - } - } - free(result_images); -} - void register_index_endpoints(httplib::Server& svr, const SDSvrParams& svr_params, const std::string& index_html) { const std::string serve_html_path = svr_params.serve_html_path; svr.Get("/", [serve_html_path, index_html](const httplib::Request&, httplib::Response& res) { @@ -405,10 +394,10 @@ void register_openai_api_endpoints(httplib::Server& svr, ServerRuntime& rt) { LOG_DEBUG("%s\n", gen_params.to_string().c_str()); - sd_image_t init_image = {(uint32_t)gen_params.width, (uint32_t)gen_params.height, 3, nullptr}; - sd_image_t control_image = {(uint32_t)gen_params.width, (uint32_t)gen_params.height, 3, nullptr}; - sd_image_t mask_image = {(uint32_t)gen_params.width, (uint32_t)gen_params.height, 1, nullptr}; - std::vector pmid_images; + SDImageOwner init_image({(uint32_t)gen_params.width, (uint32_t)gen_params.height, 3, nullptr}); + SDImageOwner control_image({(uint32_t)gen_params.width, (uint32_t)gen_params.height, 3, nullptr}); + SDImageOwner mask_image({(uint32_t)gen_params.width, (uint32_t)gen_params.height, 1, nullptr}); + SDImageVec pmid_images; sd_img_gen_params_t img_gen_params = { gen_params.lora_vec.data(), @@ -416,19 +405,19 @@ void register_openai_api_endpoints(httplib::Server& svr, ServerRuntime& rt) { gen_params.prompt.c_str(), gen_params.negative_prompt.c_str(), gen_params.clip_skip, - init_image, + init_image.get(), nullptr, 0, gen_params.auto_resize_ref_image, gen_params.increase_ref_index, - mask_image, + mask_image.get(), gen_params.width, gen_params.height, gen_params.sample_params, gen_params.strength, gen_params.seed, gen_params.batch_count, - control_image, + control_image.get(), gen_params.control_strength, { pmid_images.data(), @@ -440,13 +429,19 @@ void register_openai_api_endpoints(httplib::Server& svr, ServerRuntime& rt) { gen_params.cache_params, }; - sd_image_t* results = nullptr; - int num_results = 0; + SDImageVec results; + int num_results = 0; { std::lock_guard lock(*runtime->sd_ctx_mutex); - results = generate_image(runtime->sd_ctx, &img_gen_params); num_results = gen_params.batch_count; + results.adopt(generate_image(runtime->sd_ctx, &img_gen_params), num_results); + } + + if (!results) { + res.status = 500; + res.set_content(R"({"error":"generate failed"})", "application/json"); + return; } for (int i = 0; i < num_results; i++) { @@ -477,8 +472,6 @@ void register_openai_api_endpoints(httplib::Server& svr, ServerRuntime& rt) { item["b64_json"] = b64; out["data"].push_back(item); } - free_results(results, num_results); - res.set_content(out.dump(), "application/json"); res.status = 200; @@ -599,9 +592,9 @@ void register_openai_api_endpoints(httplib::Server& svr, ServerRuntime& rt) { LOG_DEBUG("%s\n", gen_params.to_string().c_str()); - sd_image_t init_image = {0, 0, 3, nullptr}; - sd_image_t control_image = {0, 0, 3, nullptr}; - std::vector pmid_images; + SDImageOwner init_image({0, 0, 3, nullptr}); + SDImageOwner control_image({0, 0, 3, nullptr}); + SDImageVec pmid_images; auto get_resolved_width = [&gen_params, runtime]() -> int { if (gen_params.width > 0) @@ -618,7 +611,7 @@ void register_openai_api_endpoints(httplib::Server& svr, ServerRuntime& rt) { return 512; }; - std::vector ref_images; + SDImageVec ref_images; ref_images.reserve(images_bytes.size()); for (auto& bytes : images_bytes) { int img_w; @@ -634,12 +627,12 @@ void register_openai_api_endpoints(httplib::Server& svr, ServerRuntime& rt) { continue; } - sd_image_t img{(uint32_t)img_w, (uint32_t)img_h, 3, raw_pixels}; - gen_params.set_width_and_height_if_unset(img.width, img.height); - ref_images.push_back(img); + SDImageOwner img({(uint32_t)img_w, (uint32_t)img_h, 3, raw_pixels}); + gen_params.set_width_and_height_if_unset(img.get().width, img.get().height); + ref_images.push_back(std::move(img)); } - sd_image_t mask_image = {0}; + SDImageOwner mask_image({0, 0, 1, nullptr}); if (!mask_bytes.empty()) { int expected_width = 0; int expected_height = 0; @@ -655,13 +648,10 @@ void register_openai_api_endpoints(httplib::Server& svr, ServerRuntime& rt) { static_cast(mask_bytes.size()), mask_w, mask_h, expected_width, expected_height, 1); - mask_image = {(uint32_t)mask_w, (uint32_t)mask_h, 1, mask_raw}; - gen_params.set_width_and_height_if_unset(mask_image.width, mask_image.height); + mask_image.reset({(uint32_t)mask_w, (uint32_t)mask_h, 1, mask_raw}); + gen_params.set_width_and_height_if_unset(mask_image.get().width, mask_image.get().height); } else { - mask_image.width = get_resolved_width(); - mask_image.height = get_resolved_height(); - mask_image.channel = 1; - mask_image.data = nullptr; + mask_image.reset({(uint32_t)get_resolved_width(), (uint32_t)get_resolved_height(), 1, nullptr}); } sd_img_gen_params_t img_gen_params = { @@ -670,19 +660,19 @@ void register_openai_api_endpoints(httplib::Server& svr, ServerRuntime& rt) { gen_params.prompt.c_str(), gen_params.negative_prompt.c_str(), gen_params.clip_skip, - init_image, + init_image.get(), ref_images.data(), (int)ref_images.size(), gen_params.auto_resize_ref_image, gen_params.increase_ref_index, - mask_image, + mask_image.get(), get_resolved_width(), get_resolved_height(), gen_params.sample_params, gen_params.strength, gen_params.seed, gen_params.batch_count, - control_image, + control_image.get(), gen_params.control_strength, { pmid_images.data(), @@ -694,13 +684,19 @@ void register_openai_api_endpoints(httplib::Server& svr, ServerRuntime& rt) { gen_params.cache_params, }; - sd_image_t* results = nullptr; - int num_results = 0; + SDImageVec results; + int num_results = 0; { std::lock_guard lock(*runtime->sd_ctx_mutex); - results = generate_image(runtime->sd_ctx, &img_gen_params); num_results = gen_params.batch_count; + results.adopt(generate_image(runtime->sd_ctx, &img_gen_params), num_results); + } + + if (!results) { + res.status = 500; + res.set_content(R"({"error":"generate failed"})", "application/json"); + return; } json out; @@ -730,20 +726,8 @@ void register_openai_api_endpoints(httplib::Server& svr, ServerRuntime& rt) { item["b64_json"] = b64; out["data"].push_back(item); } - free_results(results, num_results); - res.set_content(out.dump(), "application/json"); res.status = 200; - - if (init_image.data) { - free(init_image.data); - } - if (mask_image.data) { - free(mask_image.data); - } - for (auto ref_image : ref_images) { - free(ref_image.data); - } } catch (const std::exception& e) { res.status = 500; json err; @@ -892,12 +876,11 @@ void register_sdapi_endpoints(httplib::Server& svr, ServerRuntime& rt) { LOG_DEBUG("%s\n", gen_params.to_string().c_str()); - sd_image_t init_image = {0, 0, 3, nullptr}; - sd_image_t control_image = {0, 0, 3, nullptr}; - sd_image_t mask_image = {0, 0, 1, nullptr}; - std::vector mask_data; - std::vector pmid_images; - std::vector ref_images; + SDImageOwner init_image({0, 0, 3, nullptr}); + SDImageOwner control_image({0, 0, 3, nullptr}); + SDImageOwner mask_image({0, 0, 1, nullptr}); + SDImageVec pmid_images; + SDImageVec ref_images; auto get_resolved_width = [&gen_params, runtime]() -> int { if (gen_params.width > 0) @@ -914,7 +897,7 @@ void register_sdapi_endpoints(httplib::Server& svr, ServerRuntime& rt) { return 512; }; - auto decode_image = [&gen_params](sd_image_t& image, std::string encoded) -> bool { + auto decode_image = [&gen_params](SDImageOwner& image, std::string encoded) -> bool { auto comma_pos = encoded.find(','); if (comma_pos != std::string::npos) { encoded = encoded.substr(comma_pos + 1); @@ -933,10 +916,10 @@ void register_sdapi_endpoints(httplib::Server& svr, ServerRuntime& rt) { uint8_t* raw_data = load_image_from_memory( (const char*)img_data.data(), (int)img_data.size(), img_w, img_h, - expected_width, expected_height, image.channel); + expected_width, expected_height, image.get().channel); if (raw_data) { - image = {(uint32_t)img_w, (uint32_t)img_h, image.channel, raw_data}; - gen_params.set_width_and_height_if_unset(image.width, image.height); + image.reset({(uint32_t)img_w, (uint32_t)img_h, image.get().channel, raw_data}); + gen_params.set_width_and_height_if_unset(image.get().width, image.get().height); return true; } } @@ -953,19 +936,21 @@ void register_sdapi_endpoints(httplib::Server& svr, ServerRuntime& rt) { std::string encoded = j["mask"].get(); decode_image(mask_image, encoded); bool inpainting_mask_invert = j.value("inpainting_mask_invert", 0) != 0; - if (inpainting_mask_invert && mask_image.data != nullptr) { - for (uint32_t i = 0; i < mask_image.width * mask_image.height; i++) { - mask_image.data[i] = 255 - mask_image.data[i]; + if (inpainting_mask_invert && mask_image.get().data != nullptr) { + for (uint32_t i = 0; i < mask_image.get().width * mask_image.get().height; i++) { + mask_image.get().data[i] = 255 - mask_image.get().data[i]; } } } else { - int m_width = get_resolved_width(); - int m_height = get_resolved_height(); - mask_data = std::vector(m_width * m_height, 255); - mask_image.width = m_width; - mask_image.height = m_height; - mask_image.channel = 1; - mask_image.data = mask_data.data(); + int m_width = get_resolved_width(); + int m_height = get_resolved_height(); + sd_image_t generated_mask = {(uint32_t)m_width, (uint32_t)m_height, 1, nullptr}; + generated_mask.data = (uint8_t*)malloc(static_cast(m_width) * static_cast(m_height)); + if (generated_mask.data == nullptr) { + return bad("failed to allocate default mask"); + } + memset(generated_mask.data, 255, static_cast(m_width) * static_cast(m_height)); + mask_image.reset(generated_mask); } float denoising_strength = j.value("denoising_strength", -1.f); @@ -977,10 +962,10 @@ void register_sdapi_endpoints(httplib::Server& svr, ServerRuntime& rt) { if (j.contains("extra_images") && j["extra_images"].is_array()) { for (auto extra_image : j["extra_images"]) { - std::string encoded = extra_image.get(); - sd_image_t tmp_image = {(uint32_t)gen_params.width, (uint32_t)gen_params.height, 3, nullptr}; + std::string encoded = extra_image.get(); + SDImageOwner tmp_image({(uint32_t)gen_params.width, (uint32_t)gen_params.height, 3, nullptr}); if (decode_image(tmp_image, encoded)) { - ref_images.push_back(tmp_image); + ref_images.push_back(std::move(tmp_image)); } } } @@ -991,19 +976,19 @@ void register_sdapi_endpoints(httplib::Server& svr, ServerRuntime& rt) { gen_params.prompt.c_str(), gen_params.negative_prompt.c_str(), gen_params.clip_skip, - init_image, + init_image.get(), ref_images.data(), (int)ref_images.size(), gen_params.auto_resize_ref_image, gen_params.increase_ref_index, - mask_image, + mask_image.get(), get_resolved_width(), get_resolved_height(), gen_params.sample_params, gen_params.strength, gen_params.seed, gen_params.batch_count, - control_image, + control_image.get(), gen_params.control_strength, { pmid_images.data(), @@ -1015,13 +1000,19 @@ void register_sdapi_endpoints(httplib::Server& svr, ServerRuntime& rt) { gen_params.cache_params, }; - sd_image_t* results = nullptr; - int num_results = 0; + SDImageVec results; + int num_results = 0; { std::lock_guard lock(*runtime->sd_ctx_mutex); - results = generate_image(runtime->sd_ctx, &img_gen_params); num_results = gen_params.batch_count; + results.adopt(generate_image(runtime->sd_ctx, &img_gen_params), num_results); + } + + if (!results) { + res.status = 500; + res.set_content(R"({"error":"generate failed"})", "application/json"); + return; } json out; @@ -1052,21 +1043,9 @@ void register_sdapi_endpoints(httplib::Server& svr, ServerRuntime& rt) { std::string b64 = base64_encode(image_bytes); out["images"].push_back(b64); } - free_results(results, num_results); - res.set_content(out.dump(), "application/json"); res.status = 200; - if (init_image.data) { - free(init_image.data); - } - if (mask_image.data && mask_data.empty()) { - free(mask_image.data); - } - for (auto ref_image : ref_images) { - free(ref_image.data); - } - } catch (const std::exception& e) { res.status = 500; json err; @@ -1178,7 +1157,7 @@ int main(int argc, const char** argv) { LOG_DEBUG("%s", default_gen_params.to_string().c_str()); sd_ctx_params_t sd_ctx_params = ctx_params.to_sd_ctx_params_t(false, false, false); - sd_ctx_t* sd_ctx = new_sd_ctx(&sd_ctx_params); + SDCtxPtr sd_ctx(new_sd_ctx(&sd_ctx_params)); if (sd_ctx == nullptr) { LOG_ERROR("new_sd_ctx_t failed"); @@ -1190,7 +1169,7 @@ int main(int argc, const char** argv) { std::vector lora_cache; std::mutex lora_mutex; ServerRuntime runtime = { - sd_ctx, + sd_ctx.get(), &sd_ctx_mutex, &svr_params, &ctx_params, @@ -1231,6 +1210,5 @@ int main(int argc, const char** argv) { LOG_INFO("listening on: %s:%d\n", svr_params.listen_ip.c_str(), svr_params.listen_port); svr.listen(svr_params.listen_ip, svr_params.listen_port); - free_sd_ctx(sd_ctx); return 0; } From 3676195ec62a197c4aef07a0147f523f6ecb6301 Mon Sep 17 00:00:00 2001 From: leejet Date: Mon, 6 Apr 2026 19:57:50 +0800 Subject: [PATCH 2/4] fix: correct unique_ptr buffer access in media_io --- examples/common/media_io.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/examples/common/media_io.cpp b/examples/common/media_io.cpp index 4706b2228..c570db9ae 100644 --- a/examples/common/media_io.cpp +++ b/examples/common/media_io.cpp @@ -231,7 +231,7 @@ uint8_t* decode_webp_image_to_buffer(const uint8_t* data, const int decoded_channels = features.has_alpha ? 4 : 3; for (size_t i = 0; i < pixel_count; ++i) { const uint8_t* src = decoded.get() + i * decoded_channels; - grayscale[i] = static_cast((77 * src[0] + 150 * src[1] + 29 * src[2] + 128) >> 8); + grayscale.get()[i] = static_cast((77 * src[0] + 150 * src[1] + 29 * src[2] + 128) >> 8); } return grayscale.release(); From 03956b3e99838d417f6d1ed4fb72f9903cc623fa Mon Sep 17 00:00:00 2001 From: leejet Date: Mon, 6 Apr 2026 20:08:42 +0800 Subject: [PATCH 3/4] simplify FILE access in create_mjpg_avi_from_sd_images --- examples/common/media_io.cpp | 179 ++++++++++++++++++----------------- 1 file changed, 90 insertions(+), 89 deletions(-) diff --git a/examples/common/media_io.cpp b/examples/common/media_io.cpp index c570db9ae..ef6e6ceff 100644 --- a/examples/common/media_io.cpp +++ b/examples/common/media_io.cpp @@ -706,11 +706,12 @@ int create_mjpg_avi_from_sd_images(const char* filename, sd_image_t* images, int return -1; } - FilePtr f(fopen(filename, "wb")); - if (!f) { + FilePtr file(fopen(filename, "wb")); + if (!file) { perror("Error opening file for writing"); return -1; } + FILE* f = file.get(); uint32_t width = images[0].width; uint32_t height = images[0].height; @@ -720,74 +721,74 @@ int create_mjpg_avi_from_sd_images(const char* filename, sd_image_t* images, int return -1; } - fwrite("RIFF", 4, 1, f.get()); - long riff_size_pos = ftell(f.get()); - write_u32_le(f.get(), 0); - fwrite("AVI ", 4, 1, f.get()); - - fwrite("LIST", 4, 1, f.get()); - write_u32_le(f.get(), 4 + 8 + 56 + 8 + 4 + 8 + 56 + 8 + 40); - fwrite("hdrl", 4, 1, f.get()); - - fwrite("avih", 4, 1, f.get()); - write_u32_le(f.get(), 56); - write_u32_le(f.get(), 1000000 / fps); - write_u32_le(f.get(), 0); - write_u32_le(f.get(), 0); - write_u32_le(f.get(), 0x110); - write_u32_le(f.get(), num_images); - write_u32_le(f.get(), 0); - write_u32_le(f.get(), 1); - write_u32_le(f.get(), width * height * 3); - write_u32_le(f.get(), width); - write_u32_le(f.get(), height); - write_u32_le(f.get(), 0); - write_u32_le(f.get(), 0); - write_u32_le(f.get(), 0); - write_u32_le(f.get(), 0); - - fwrite("LIST", 4, 1, f.get()); - write_u32_le(f.get(), 4 + 8 + 56 + 8 + 40); - fwrite("strl", 4, 1, f.get()); - - fwrite("strh", 4, 1, f.get()); - write_u32_le(f.get(), 56); - fwrite("vids", 4, 1, f.get()); - fwrite("MJPG", 4, 1, f.get()); - write_u32_le(f.get(), 0); - write_u16_le(f.get(), 0); - write_u16_le(f.get(), 0); - write_u32_le(f.get(), 0); - write_u32_le(f.get(), 1); - write_u32_le(f.get(), fps); - write_u32_le(f.get(), 0); - write_u32_le(f.get(), num_images); - write_u32_le(f.get(), width * height * 3); - write_u32_le(f.get(), (uint32_t)-1); - write_u32_le(f.get(), 0); - write_u16_le(f.get(), 0); - write_u16_le(f.get(), 0); - write_u16_le(f.get(), 0); - write_u16_le(f.get(), 0); - - fwrite("strf", 4, 1, f.get()); - write_u32_le(f.get(), 40); - write_u32_le(f.get(), 40); - write_u32_le(f.get(), width); - write_u32_le(f.get(), height); - write_u16_le(f.get(), 1); - write_u16_le(f.get(), 24); - fwrite("MJPG", 4, 1, f.get()); - write_u32_le(f.get(), width * height * 3); - write_u32_le(f.get(), 0); - write_u32_le(f.get(), 0); - write_u32_le(f.get(), 0); - write_u32_le(f.get(), 0); - - fwrite("LIST", 4, 1, f.get()); - long movi_size_pos = ftell(f.get()); - write_u32_le(f.get(), 0); - fwrite("movi", 4, 1, f.get()); + fwrite("RIFF", 4, 1, f); + long riff_size_pos = ftell(f); + write_u32_le(f, 0); + fwrite("AVI ", 4, 1, f); + + fwrite("LIST", 4, 1, f); + write_u32_le(f, 4 + 8 + 56 + 8 + 4 + 8 + 56 + 8 + 40); + fwrite("hdrl", 4, 1, f); + + fwrite("avih", 4, 1, f); + write_u32_le(f, 56); + write_u32_le(f, 1000000 / fps); + write_u32_le(f, 0); + write_u32_le(f, 0); + write_u32_le(f, 0x110); + write_u32_le(f, num_images); + write_u32_le(f, 0); + write_u32_le(f, 1); + write_u32_le(f, width * height * 3); + write_u32_le(f, width); + write_u32_le(f, height); + write_u32_le(f, 0); + write_u32_le(f, 0); + write_u32_le(f, 0); + write_u32_le(f, 0); + + fwrite("LIST", 4, 1, f); + write_u32_le(f, 4 + 8 + 56 + 8 + 40); + fwrite("strl", 4, 1, f); + + fwrite("strh", 4, 1, f); + write_u32_le(f, 56); + fwrite("vids", 4, 1, f); + fwrite("MJPG", 4, 1, f); + write_u32_le(f, 0); + write_u16_le(f, 0); + write_u16_le(f, 0); + write_u32_le(f, 0); + write_u32_le(f, 1); + write_u32_le(f, fps); + write_u32_le(f, 0); + write_u32_le(f, num_images); + write_u32_le(f, width * height * 3); + write_u32_le(f, (uint32_t)-1); + write_u32_le(f, 0); + write_u16_le(f, 0); + write_u16_le(f, 0); + write_u16_le(f, 0); + write_u16_le(f, 0); + + fwrite("strf", 4, 1, f); + write_u32_le(f, 40); + write_u32_le(f, 40); + write_u32_le(f, width); + write_u32_le(f, height); + write_u16_le(f, 1); + write_u16_le(f, 24); + fwrite("MJPG", 4, 1, f); + write_u32_le(f, width * height * 3); + write_u32_le(f, 0); + write_u32_le(f, 0); + write_u32_le(f, 0); + write_u32_le(f, 0); + + fwrite("LIST", 4, 1, f); + long movi_size_pos = ftell(f); + write_u32_le(f, 0); + fwrite("movi", 4, 1, f); std::vector index(static_cast(num_images)); std::vector jpeg_data; @@ -806,37 +807,37 @@ int create_mjpg_avi_from_sd_images(const char* filename, sd_image_t* images, int return -1; } - fwrite("00dc", 4, 1, f.get()); - write_u32_le(f.get(), (uint32_t)jpeg_data.size()); - index[i].offset = ftell(f.get()) - 8; + fwrite("00dc", 4, 1, f); + write_u32_le(f, (uint32_t)jpeg_data.size()); + index[i].offset = ftell(f) - 8; index[i].size = (uint32_t)jpeg_data.size(); - fwrite(jpeg_data.data(), 1, jpeg_data.size(), f.get()); + fwrite(jpeg_data.data(), 1, jpeg_data.size(), f); if (jpeg_data.size() % 2) { - fputc(0, f.get()); + fputc(0, f); } } - long cur_pos = ftell(f.get()); + long cur_pos = ftell(f); long movi_size = cur_pos - movi_size_pos - 4; - fseek(f.get(), movi_size_pos, SEEK_SET); - write_u32_le(f.get(), movi_size); - fseek(f.get(), cur_pos, SEEK_SET); + fseek(f, movi_size_pos, SEEK_SET); + write_u32_le(f, movi_size); + fseek(f, cur_pos, SEEK_SET); - fwrite("idx1", 4, 1, f.get()); - write_u32_le(f.get(), num_images * 16); + fwrite("idx1", 4, 1, f); + write_u32_le(f, num_images * 16); for (int i = 0; i < num_images; i++) { - fwrite("00dc", 4, 1, f.get()); - write_u32_le(f.get(), 0x10); - write_u32_le(f.get(), index[i].offset); - write_u32_le(f.get(), index[i].size); + fwrite("00dc", 4, 1, f); + write_u32_le(f, 0x10); + write_u32_le(f, index[i].offset); + write_u32_le(f, index[i].size); } - cur_pos = ftell(f.get()); + cur_pos = ftell(f); long file_size = cur_pos - riff_size_pos - 4; - fseek(f.get(), riff_size_pos, SEEK_SET); - write_u32_le(f.get(), file_size); - fseek(f.get(), cur_pos, SEEK_SET); + fseek(f, riff_size_pos, SEEK_SET); + write_u32_le(f, file_size); + fseek(f, cur_pos, SEEK_SET); return 0; } From 5295cededa5ab541d3277541be6d8194f9e1b265 Mon Sep 17 00:00:00 2001 From: leejet Date: Mon, 6 Apr 2026 20:14:22 +0800 Subject: [PATCH 4/4] fix vid_gen --- examples/cli/main.cpp | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/examples/cli/main.cpp b/examples/cli/main.cpp index 133785a4b..aef94e852 100644 --- a/examples/cli/main.cpp +++ b/examples/cli/main.cpp @@ -760,7 +760,8 @@ int main(int argc, const char* argv[]) { gen_params.cache_params, }; - results.adopt(generate_video(sd_ctx.get(), &vid_gen_params, &num_results), num_results); + sd_image_t* generated_video = generate_video(sd_ctx.get(), &vid_gen_params, &num_results); + results.adopt(generated_video, num_results); } if (!results) {