Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions .github/workflows/build.yml
Original file line number Diff line number Diff line change
Expand Up @@ -239,6 +239,7 @@ jobs:
id: build-push
uses: docker/build-push-action@v6
with:
context: .
platforms: linux/amd64
push: ${{ ( github.event_name == 'push' && github.ref == 'refs/heads/master' ) || github.event.inputs.create_release == 'true' }}
file: Dockerfile.${{ matrix.variant }}
Expand Down
3 changes: 3 additions & 0 deletions .gitmodules
Original file line number Diff line number Diff line change
Expand Up @@ -7,3 +7,6 @@
[submodule "thirdparty/libwebp"]
path = thirdparty/libwebp
url = https://github.com/webmproject/libwebp.git
[submodule "thirdparty/libwebm"]
path = thirdparty/libwebm
url = https://github.com/webmproject/libwebm.git
38 changes: 37 additions & 1 deletion CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,16 @@ else()
set(SD_WEBP_DEFAULT ${SD_USE_SYSTEM_WEBP})
endif()

set(SD_SUBMODULE_WEBM FALSE)
if(EXISTS "${CMAKE_CURRENT_SOURCE_DIR}/thirdparty/libwebm/CMakeLists.txt")
set(SD_SUBMODULE_WEBM TRUE)
endif()
if(SD_SUBMODULE_WEBM)
set(SD_WEBM_DEFAULT ON)
else()
set(SD_WEBM_DEFAULT ${SD_USE_SYSTEM_WEBM})
endif()

#
# Option list
#
Expand All @@ -41,6 +51,8 @@ endif()
option(SD_BUILD_EXAMPLES "sd: build examples" ${SD_STANDALONE})
option(SD_WEBP "sd: enable WebP image I/O support" ${SD_WEBP_DEFAULT})
option(SD_USE_SYSTEM_WEBP "sd: link against system libwebp" OFF)
option(SD_WEBM "sd: enable WebM video output support" ${SD_WEBM_DEFAULT})
option(SD_USE_SYSTEM_WEBM "sd: link against system libwebm" OFF)
option(SD_CUDA "sd: cuda backend" OFF)
option(SD_HIPBLAS "sd: rocm backend" OFF)
option(SD_METAL "sd: metal backend" OFF)
Expand Down Expand Up @@ -111,7 +123,31 @@ if(SD_WEBP)
)
endif()
endif()
add_compile_definitions(SD_USE_WEBP)
endif()

if(SD_WEBM)
if(NOT SD_WEBP)
message(FATAL_ERROR "SD_WEBM requires SD_WEBP because WebM output reuses libwebp VP8 encoding.")
endif()
if(NOT SD_SUBMODULE_WEBM AND NOT SD_USE_SYSTEM_WEBM)
message(FATAL_ERROR "WebM support enabled but no source found.
Either initialize the submodule:\n git submodule update --init thirdparty/libwebm\n\n"
"Or link against system library:\n cmake (...) -DSD_USE_SYSTEM_WEBM=ON")
endif()
if(SD_USE_SYSTEM_WEBM)
find_path(WEBM_INCLUDE_DIR
NAMES mkvmuxer/mkvmuxer.h mkvparser/mkvparser.h common/webmids.h
PATH_SUFFIXES webm
REQUIRED)
find_library(WEBM_LIBRARY
NAMES webm libwebm
REQUIRED)

add_library(webm UNKNOWN IMPORTED)
set_target_properties(webm PROPERTIES
IMPORTED_LOCATION "${WEBM_LIBRARY}"
INTERFACE_INCLUDE_DIRECTORIES "${WEBM_INCLUDE_DIR}")
endif()
endif()

set(SD_LIB stable-diffusion)
Expand Down
16 changes: 12 additions & 4 deletions docs/build.md
Original file line number Diff line number Diff line change
Expand Up @@ -16,15 +16,23 @@ git submodule init
git submodule update
```

## WebP Support in Examples
## WebP and WebM Support in Examples

The example applications (`examples/cli` and `examples/server`) use `libwebp` to support WebP image I/O. This is enabled by default.
The example applications (`examples/cli` and `examples/server`) use `libwebp` to support WebP image I/O, and `examples/cli` can also use `libwebm` for `.webm` video output. Both are enabled by default. WebM output currently reuses `libwebp` to encode each frame as VP8 before muxing with `libwebm`.

If you do not want WebP support, you can disable it at configure time:
If you do not want WebP/WebM support, you can disable them at configure time:

```shell
mkdir build && cd build
cmake .. -DSD_WEBP=OFF
cmake .. -DSD_WEBP=OFF -DSD_WEBM=OFF
cmake --build . --config Release
```

If the submodules are not available, you can also link against system packages instead:

```shell
mkdir build && cd build
cmake .. -DSD_USE_SYSTEM_WEBP=ON -DSD_USE_SYSTEM_WEBM=ON
cmake --build . --config Release
```

Expand Down
5 changes: 5 additions & 0 deletions examples/cli/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,11 @@ add_executable(${TARGET}
install(TARGETS ${TARGET} RUNTIME)
target_link_libraries(${TARGET} PRIVATE stable-diffusion zip ${CMAKE_THREAD_LIBS_INIT})
if(SD_WEBP)
target_compile_definitions(${TARGET} PRIVATE SD_USE_WEBP)
target_link_libraries(${TARGET} PRIVATE webp libwebpmux)
endif()
if(SD_WEBM)
target_compile_definitions(${TARGET} PRIVATE SD_USE_WEBM)
target_link_libraries(${TARGET} PRIVATE webm)
endif()
target_compile_features(${TARGET} PUBLIC c_std_11 cxx_std_17)
4 changes: 2 additions & 2 deletions examples/cli/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -5,8 +5,8 @@ usage: ./bin/sd-cli [options]

CLI Options:
-o, --output <string> path to write result image to. you can use printf-style %d format specifiers for image sequences (default:
./output.png) (eg. output_%03d.png). For video generation, single-file outputs support .avi and animated .webp
--preview-path <string> path to write preview image to (default: ./preview.png). Multi-frame previews support .avi and animated .webp
./output.png) (eg. output_%03d.png). For video generation, single-file outputs support .avi, .webm, and animated .webp
--preview-path <string> path to write preview image to (default: ./preview.png). Multi-frame previews support .avi, .webm, and animated .webp
--preview-interval <int> interval in denoising steps between consecutive updates of the image preview file (default is 1, meaning updating at
every step)
--output-begin-idx <int> starting index for output image sequence, must be non-negative (default 0 if specified %d in output path, 1 otherwise)
Expand Down
10 changes: 6 additions & 4 deletions examples/cli/main.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -58,7 +58,7 @@ struct SDCliParams {
options.string_options = {
{"-o",
"--output",
"path to write result image to. you can use printf-style %d format specifiers for image sequences (default: ./output.png) (eg. output_%03d.png)",
"path to write result image to. you can use printf-style %d format specifiers for image sequences (default: ./output.png) (eg. output_%03d.png). Single-file video outputs support .avi, .webm, and animated .webp",
&output_path},
{"",
"--image",
Expand All @@ -70,7 +70,7 @@ struct SDCliParams {
&metadata_format},
{"",
"--preview-path",
"path to write preview image to (default: ./preview.png)",
"path to write preview image to (default: ./preview.png). Multi-frame previews support .avi, .webm, and animated .webp",
&preview_path},
};

Expand Down Expand Up @@ -396,7 +396,9 @@ bool save_results(const SDCliParams& cli_params,
if (!ext.empty()) {
if (output_format == EncodedImageFormat::JPEG ||
output_format == EncodedImageFormat::PNG ||
output_format == EncodedImageFormat::WEBP) {
output_format == EncodedImageFormat::WEBP ||
ext_lower == ".avi" ||
ext_lower == ".webm") {
base_path.replace_extension();
}
}
Expand Down Expand Up @@ -438,7 +440,7 @@ bool save_results(const SDCliParams& cli_params,
}

if (cli_params.mode == VID_GEN && num_results > 1) {
if (ext_lower != ".avi" && ext_lower != ".webp")
if (ext_lower != ".avi" && ext_lower != ".webp" && ext_lower != ".webm")
ext = ".avi";
fs::path video_path = base_path;
video_path += ext;
Expand Down
181 changes: 181 additions & 0 deletions examples/common/media_io.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,11 @@
#include "webp/mux.h"
#endif

#ifdef SD_USE_WEBM
#include "mkvmuxer/mkvmuxer.h"
#include "mkvmuxer/mkvwriter.h"
#endif

namespace fs = std::filesystem;

namespace {
Expand Down Expand Up @@ -71,6 +76,13 @@ bool write_binary_file_bytes(const std::string& path, const std::vector<uint8_t>
return true;
}

uint32_t read_u32_le_bytes(const uint8_t* data) {
return static_cast<uint32_t>(data[0]) |
(static_cast<uint32_t>(data[1]) << 8) |
(static_cast<uint32_t>(data[2]) << 16) |
(static_cast<uint32_t>(data[3]) << 24);
}

int stbi_ext_write_png_to_func(stbi_write_func* func,
void* context,
int x,
Expand Down Expand Up @@ -289,6 +301,76 @@ bool encode_webp_image_to_vector(const uint8_t* image,
WebPMuxDelete(mux);
return ok;
}

#ifdef SD_USE_WEBM
bool extract_vp8_frame_from_webp(const std::vector<uint8_t>& webp_data, std::vector<uint8_t>& vp8_frame) {
if (!is_webp_signature(webp_data.data(), webp_data.size())) {
return false;
}

size_t offset = 12;
while (offset + 8 <= webp_data.size()) {
const uint8_t* chunk = webp_data.data() + offset;
const uint32_t chunk_len = read_u32_le_bytes(chunk + 4);
const size_t chunk_start = offset + 8;
const size_t padded_len = static_cast<size_t>(chunk_len) + (chunk_len & 1u);

if (chunk_start + chunk_len > webp_data.size()) {
return false;
}

if (memcmp(chunk, "VP8 ", 4) == 0) {
vp8_frame.assign(webp_data.data() + chunk_start,
webp_data.data() + chunk_start + chunk_len);
return !vp8_frame.empty();
}

offset = chunk_start + padded_len;
}

return false;
}

bool encode_sd_image_to_vp8_frame(const sd_image_t& image, int quality, std::vector<uint8_t>& vp8_frame) {
if (image.data == nullptr || image.width == 0 || image.height == 0) {
return false;
}

const int width = static_cast<int>(image.width);
const int height = static_cast<int>(image.height);
const int input_channel = static_cast<int>(image.channel);
if (input_channel != 1 && input_channel != 3 && input_channel != 4) {
return false;
}

std::vector<uint8_t> rgb_buffer;
const uint8_t* rgb_data = image.data;
if (input_channel == 1) {
rgb_buffer.resize(static_cast<size_t>(width) * static_cast<size_t>(height) * 3);
for (int i = 0; i < width * height; ++i) {
rgb_buffer[i * 3 + 0] = image.data[i];
rgb_buffer[i * 3 + 1] = image.data[i];
rgb_buffer[i * 3 + 2] = image.data[i];
}
rgb_data = rgb_buffer.data();
} else if (input_channel == 4) {
rgb_buffer.resize(static_cast<size_t>(width) * static_cast<size_t>(height) * 3);
for (int i = 0; i < width * height; ++i) {
rgb_buffer[i * 3 + 0] = image.data[i * 4 + 0];
rgb_buffer[i * 3 + 1] = image.data[i * 4 + 1];
rgb_buffer[i * 3 + 2] = image.data[i * 4 + 2];
}
rgb_data = rgb_buffer.data();
}

std::vector<uint8_t> encoded_webp;
if (!encode_webp_image_to_vector(rgb_data, width, height, 3, "", quality, encoded_webp)) {
return false;
}

return extract_vp8_frame_from_webp(encoded_webp, vp8_frame);
}
#endif
#endif

uint8_t* load_image_common(bool from_memory,
Expand Down Expand Up @@ -861,6 +943,99 @@ int create_animated_webp_from_sd_images(const char* filename, sd_image_t* images
}
#endif

#ifdef SD_USE_WEBM
int create_webm_from_sd_images(const char* filename, sd_image_t* images, int num_images, int fps, int quality) {
if (num_images == 0) {
fprintf(stderr, "Error: Image array is empty.\n");
return -1;
}
if (fps <= 0) {
fprintf(stderr, "Error: FPS must be positive.\n");
return -1;
}

const int width = static_cast<int>(images[0].width);
const int height = static_cast<int>(images[0].height);
if (width <= 0 || height <= 0) {
fprintf(stderr, "Error: Invalid frame dimensions.\n");
return -1;
}

mkvmuxer::MkvWriter writer;
if (!writer.Open(filename)) {
fprintf(stderr, "Error: Could not open WebM file for writing.\n");
return -1;
}

const int ret = [&]() -> int {
mkvmuxer::Segment segment;
if (!segment.Init(&writer)) {
fprintf(stderr, "Error: Failed to initialize WebM muxer.\n");
return -1;
}

segment.set_mode(mkvmuxer::Segment::kFile);
segment.OutputCues(true);

const uint64_t track_number = segment.AddVideoTrack(width, height, 0);
if (track_number == 0) {
fprintf(stderr, "Error: Failed to add VP8 video track.\n");
return -1;
}
if (!segment.CuesTrack(track_number)) {
fprintf(stderr, "Error: Failed to set WebM cues track.\n");
return -1;
}

mkvmuxer::VideoTrack* video_track = static_cast<mkvmuxer::VideoTrack*>(segment.GetTrackByNumber(track_number));
if (video_track != nullptr) {
video_track->set_display_width(static_cast<uint64_t>(width));
video_track->set_display_height(static_cast<uint64_t>(height));
video_track->set_frame_rate(static_cast<double>(fps));
}
segment.GetSegmentInfo()->set_writing_app("stable-diffusion.cpp");
segment.GetSegmentInfo()->set_muxing_app("stable-diffusion.cpp");

const uint64_t frame_duration_ns = std::max<uint64_t>(
1, static_cast<uint64_t>(std::llround(1000000000.0 / static_cast<double>(fps))));
uint64_t timestamp_ns = 0;

for (int i = 0; i < num_images; ++i) {
const sd_image_t& image = images[i];
if (static_cast<int>(image.width) != width || static_cast<int>(image.height) != height) {
fprintf(stderr, "Error: Frame dimensions do not match.\n");
return -1;
}

std::vector<uint8_t> vp8_frame;
if (!encode_sd_image_to_vp8_frame(image, quality, vp8_frame)) {
fprintf(stderr, "Error: Failed to encode frame %d as VP8.\n", i);
return -1;
}

if (!segment.AddFrame(vp8_frame.data(),
static_cast<uint64_t>(vp8_frame.size()),
track_number,
timestamp_ns,
true)) {
fprintf(stderr, "Error: Failed to mux frame %d into WebM.\n", i);
return -1;
}

timestamp_ns += frame_duration_ns;
}

if (!segment.Finalize()) {
fprintf(stderr, "Error: Failed to finalize WebM output.\n");
return -1;
}
return 0;
}();
writer.Close();
return ret;
}
#endif

int create_video_from_sd_images(const char* filename, sd_image_t* images, int num_images, int fps, int quality) {
std::string path = filename ? filename : "";
auto pos = path.find_last_of('.');
Expand All @@ -869,6 +1044,12 @@ int create_video_from_sd_images(const char* filename, sd_image_t* images, int nu
ch = static_cast<char>(tolower(static_cast<unsigned char>(ch)));
}

#ifdef SD_USE_WEBM
if (ext == ".webm") {
return create_webm_from_sd_images(filename, images, num_images, fps, quality);
}
#endif

#ifdef SD_USE_WEBP
if (ext == ".webp") {
return create_animated_webp_from_sd_images(filename, images, num_images, fps, quality);
Expand Down
8 changes: 8 additions & 0 deletions examples/common/media_io.h
Original file line number Diff line number Diff line change
Expand Up @@ -67,6 +67,14 @@ int create_animated_webp_from_sd_images(const char* filename,
int quality = 90);
#endif

#ifdef SD_USE_WEBM
int create_webm_from_sd_images(const char* filename,
sd_image_t* images,
int num_images,
int fps,
int quality = 90);
#endif

int create_video_from_sd_images(const char* filename,
sd_image_t* images,
int num_images,
Expand Down
Loading
Loading