From acd36fc0abb2e2143e9edcd799bd23a6a7a2eb16 Mon Sep 17 00:00:00 2001 From: Yi LIU Date: Wed, 25 Feb 2026 23:05:35 +0800 Subject: [PATCH 1/4] Add bounds checking for output tensor buffer in wasi-nn llama.cpp The get_output function copies LLM output into output_tensor->buf without checking against output_tensor->size, allowing writes past the buffer when the model generates output longer than the caller-provided buffer. Add size checks for both the metadata path and the token output loop. --- .../iwasm/libraries/wasi-nn/src/wasi_nn_llamacpp.c | 14 ++++++++++---- 1 file changed, 10 insertions(+), 4 deletions(-) diff --git a/core/iwasm/libraries/wasi-nn/src/wasi_nn_llamacpp.c b/core/iwasm/libraries/wasi-nn/src/wasi_nn_llamacpp.c index 2e1e649365..f7055d0bfe 100644 --- a/core/iwasm/libraries/wasi-nn/src/wasi_nn_llamacpp.c +++ b/core/iwasm/libraries/wasi-nn/src/wasi_nn_llamacpp.c @@ -623,8 +623,11 @@ get_output(void *ctx, graph_execution_context exec_ctx, uint32_t index, printf("%s\n", output_metadata); } - memcpy(output_tensor->buf, output_metadata, strlen(output_metadata)); - *output_tensor_size = strlen(output_metadata); + size_t metadata_len = strlen(output_metadata); + if (metadata_len > output_tensor->size) + metadata_len = output_tensor->size; + memcpy(output_tensor->buf, output_metadata, metadata_len); + *output_tensor_size = metadata_len; return success; } @@ -643,8 +646,11 @@ get_output(void *ctx, graph_execution_context exec_ctx, uint32_t index, printf("%s", buf); } - memcpy(output_tensor->buf + end_pos, buf, strlen(buf)); - end_pos += strlen(buf); + size_t piece_len = strlen(buf); + if (end_pos + piece_len > output_tensor->size) + break; + memcpy(output_tensor->buf + end_pos, buf, piece_len); + end_pos += piece_len; } if (backend_ctx->config.stream_stdout) { From be996151dcdac6cf41636883338e9403618e2b6e Mon Sep 17 00:00:00 2001 From: Yi LIU Date: Fri, 27 Feb 2026 15:45:49 +0800 Subject: [PATCH 2/4] Return too_large error instead of silently truncating output Instead of silently truncating output when the buffer is too small, return the too_large error with a diagnostic message. This makes the behavior consistent with the OpenVINO backend's get_output and allows callers to distinguish between successful completion and insufficient buffer size. --- .../libraries/wasi-nn/src/wasi_nn_llamacpp.c | 16 ++++++++++++---- 1 file changed, 12 insertions(+), 4 deletions(-) diff --git a/core/iwasm/libraries/wasi-nn/src/wasi_nn_llamacpp.c b/core/iwasm/libraries/wasi-nn/src/wasi_nn_llamacpp.c index f7055d0bfe..d895aba992 100644 --- a/core/iwasm/libraries/wasi-nn/src/wasi_nn_llamacpp.c +++ b/core/iwasm/libraries/wasi-nn/src/wasi_nn_llamacpp.c @@ -624,8 +624,12 @@ get_output(void *ctx, graph_execution_context exec_ctx, uint32_t index, } size_t metadata_len = strlen(output_metadata); - if (metadata_len > output_tensor->size) - metadata_len = output_tensor->size; + if (metadata_len > output_tensor->size) { + NN_ERR_PRINTF("Output buffer too small for metadata: " + "need %zu, got %zu", + metadata_len, output_tensor->size); + return too_large; + } memcpy(output_tensor->buf, output_metadata, metadata_len); *output_tensor_size = metadata_len; return success; @@ -647,8 +651,12 @@ get_output(void *ctx, graph_execution_context exec_ctx, uint32_t index, } size_t piece_len = strlen(buf); - if (end_pos + piece_len > output_tensor->size) - break; + if (end_pos + piece_len > output_tensor->size) { + NN_ERR_PRINTF("Output buffer too small: need at least %zu," + " got %zu", + end_pos + piece_len, output_tensor->size); + return too_large; + } memcpy(output_tensor->buf + end_pos, buf, piece_len); end_pos += piece_len; } From 07929785b63f2e2366ba238ae2ea58ca05a64521 Mon Sep 17 00:00:00 2001 From: Yi LIU Date: Sun, 1 Mar 2026 13:35:15 +0800 Subject: [PATCH 3/4] Retrigger CI From ff3d044dfc35fd6e4da0d79b134f569b5e636d70 Mon Sep 17 00:00:00 2001 From: Yi LIU Date: Tue, 3 Mar 2026 00:45:23 +0800 Subject: [PATCH 4/4] Retrigger CI