use the request_async_id in cpp (#43)

xianshijing-lk · web-flow · commit af4e3e52c34a · 2026-01-29T12:02:10.000-08:00
* use the request_async_id in cpp

* changed the code back to always throw when ffi returns invalid result

* fix the timeout issue and a few bugs related to the SimpleRoom example

* update to the latest rust sdk commit with the audio thread fix
diff --git a/client-sdk-rust b/client-sdk-rust
@@ -1 +1 @@
-Subproject commit 3a3f42d7403a648c40920d60f3cf6f1e4b808aea
+Subproject commit d7c19cedc6938231cb8849e76e584f109452a45c
diff --git a/examples/simple_room/main.cpp b/examples/simple_room/main.cpp
@@ -319,7 +319,7 @@ int main(int argc, char *argv[]) {
             << "  Creation time (ms): " << info.creation_time << "\n";
 
   // Setup Audio Source / Track
-  auto audioSource = std::make_shared<AudioSource>(44100, 1, 10);
+  auto audioSource = std::make_shared<AudioSource>(44100, 1, 0);
   auto audioTrack =
       LocalAudioTrack::createLocalAudioTrack("micTrack", audioSource);
 
@@ -385,6 +385,8 @@ int main(int argc, char *argv[]) {
   // Shutdown the audio / video capture threads.
   media.stopMic();
   media.stopCamera();
+  media.stopSpeaker();
+  media.shutdownRenderer();
 
   // Drain any queued tasks that might still try to update the renderer /
   // speaker
diff --git a/examples/simple_room/sdl_media_manager.cpp b/examples/simple_room/sdl_media_manager.cpp
@@ -31,6 +31,7 @@ SDLMediaManager::~SDLMediaManager() {
   stopMic();
   stopCamera();
   stopSpeaker();
+  shutdownRenderer();
 }
 
 bool SDLMediaManager::ensureSDLInit(Uint32 flags) {
diff --git a/examples/simple_room/sdl_video_renderer.cpp b/examples/simple_room/sdl_video_renderer.cpp
@@ -22,6 +22,8 @@
 
 using namespace livekit;
 
+constexpr int kMaxFPS = 60;
+
 SDLVideoRenderer::SDLVideoRenderer() = default;
 
 SDLVideoRenderer::~SDLVideoRenderer() { shutdown(); }
@@ -95,6 +97,16 @@ void SDLVideoRenderer::render() {
     return;
   }
 
+  // Throttle rendering to kMaxFPS
+  const auto now = std::chrono::steady_clock::now();
+  if (last_render_time_.time_since_epoch().count() != 0) {
+    const auto min_interval = std::chrono::microseconds(1'000'000 / kMaxFPS);
+    if (now - last_render_time_ < min_interval) {
+      return;
+    }
+  }
+  last_render_time_ = now;
+
   // 3) Read a frame from VideoStream (blocking until one is available)
   livekit::VideoFrameEvent vfe;
   bool gotFrame = stream_->read(vfe);
diff --git a/examples/simple_room/sdl_video_renderer.h b/examples/simple_room/sdl_video_renderer.h
@@ -49,4 +49,5 @@ class SDLVideoRenderer {
   std::shared_ptr<livekit::VideoStream> stream_;
   int width_ = 0;
   int height_ = 0;
+  std::chrono::steady_clock::time_point last_render_time_{};
 };
diff --git a/include/livekit/audio_source.h b/include/livekit/audio_source.h
@@ -41,8 +41,34 @@ class AudioSource {
    * @param sample_rate   Sample rate in Hz.
    * @param num_channels  Number of channels.
    * @param queue_size_ms Max buffer duration for the internal queue in ms.
+   *
+   * Buffering behavior:
+   * -------------------
+   * - queue_size_ms == 0 (recommended for real-time capture):
+   *     Disables internal buffering entirely. Audio frames are forwarded
+   *     directly to WebRTC sinks and consumed synchronously.
+   *
+   *     This mode is optimized for real-time audio capture driven by hardware
+   *     media callbacks (e.g. microphone capture). The caller is expected to
+   *     provide fixed-size real-time frames (typically 10 ms per call).
+   *
+   *     Because the native side consumes frames immediately, this mode
+   * minimizes latency and jitter and is the best choice for live capture
+   * scenarios.
+   *
+   * - queue_size_ms > 0 (buffered / blocking mode):
+   *     Enables an internal queue that buffers audio up to the specified
+   * duration. Frames are accumulated and flushed asynchronously once the buffer
+   * reaches its threshold.
+   *
+   *     This mode is intended for non-real-time producers (e.g. TTS engines,
+   *     file-based audio, or agents generating audio faster or slower than
+   *     real-time). The buffering layer smooths timing and allows the audio to
+   * be streamed out in real time even if the producer is bursty.
+   *
+   *     queue_size_ms must be a multiple of 10.
    */
-  AudioSource(int sample_rate, int num_channels, int queue_size_ms = 1000);
+  AudioSource(int sample_rate, int num_channels, int queue_size_ms = 0);
   virtual ~AudioSource() = default;
 
   AudioSource(const AudioSource &) = delete;
@@ -86,19 +112,32 @@ class AudioSource {
    * callback arrives (recommended for production unless the caller needs
    * explicit timeout control).
    *
-   * Notes:
-   *   - This is a blocking call.
-   *   - timeout_ms == 0 (infinite wait) is the safest mode because it
-   * guarantees the callback completes before the function returns, which in
-   * turn guarantees that the audio buffer lifetime is fully protected. The
-   * caller does not need to manage or extend the frame lifetime manually.
+   * Blocking semantics:
+   * The blocking behavior of this call depends on the buffering mode selected
+   * at construction time:
+   *
+   * - queue_size_ms == 0 (real-time capture mode):
+   *     Frames are consumed synchronously by the native layer. The FFI callback
+   *     is invoked immediately as part of the capture call, so this function
+   *     returns quickly.
+   *
+   *     This mode relies on the caller being paced by a real-time media
+   * callback (e.g. audio hardware interrupt / capture thread). It provides the
+   * lowest possible latency and is ideal for live microphone capture.
+   *
+   * - queue_size_ms > 0 (buffered / non-real-time mode):
+   *     Frames are queued internally and flushed asynchronously. This function
+   *     will block until the buffered audio corresponding to this frame has
+   * been consumed by the native side and the FFI callback fires.
    *
-   *   - May throw std::runtime_error if:
-   *       • the FFI reports an error
+   *     This mode is best suited for non-real-time audio producers (such as TTS
+   *     engines or agents) that generate audio independently of real-time
+   * pacing, while still streaming audio out in real time.
    *
-   *   - The underlying FFI request *must* eventually produce a callback for
-   * each frame. If the FFI layer is misbehaving or the event loop is stalled,
-   *     a timeout may occur in bounded-wait mode.
+   * Safety notes:
+   * May throw std::runtime_error if:
+   *   - the FFI reports an error
+   *   - a timeout occurs in bounded-wait mode
    */
   void captureFrame(const AudioFrame &frame, int timeout_ms = 20);
 
diff --git a/src/ffi_client.cpp b/src/ffi_client.cpp
diff --git a/src/ffi_client.h b/src/ffi_client.h

Original file line number	Diff line number	Diff line change
`@@ -31,6 +31,7 @@ SDLMediaManager::~SDLMediaManager() {`
`31`	`31`	`stopMic();`
`32`	`32`	`stopCamera();`
`33`	`33`	`stopSpeaker();`
	`34`	`+ shutdownRenderer();`
`34`	`35`	`}`
`35`	`36`
`36`	`37`	`bool SDLMediaManager::ensureSDLInit(Uint32 flags) {`