Skip to content

Commit af4e3e5

Browse files
use the request_async_id in cpp (#43)
* use the request_async_id in cpp * changed the code back to always throw when ffi returns invalid result * fix the timeout issue and a few bugs related to the SimpleRoom example * update to the latest rust sdk commit with the audio thread fix
1 parent 070759d commit af4e3e5

File tree

8 files changed

+559
-205
lines changed

8 files changed

+559
-205
lines changed

client-sdk-rust

examples/simple_room/main.cpp

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -319,7 +319,7 @@ int main(int argc, char *argv[]) {
319319
<< " Creation time (ms): " << info.creation_time << "\n";
320320

321321
// Setup Audio Source / Track
322-
auto audioSource = std::make_shared<AudioSource>(44100, 1, 10);
322+
auto audioSource = std::make_shared<AudioSource>(44100, 1, 0);
323323
auto audioTrack =
324324
LocalAudioTrack::createLocalAudioTrack("micTrack", audioSource);
325325

@@ -385,6 +385,8 @@ int main(int argc, char *argv[]) {
385385
// Shutdown the audio / video capture threads.
386386
media.stopMic();
387387
media.stopCamera();
388+
media.stopSpeaker();
389+
media.shutdownRenderer();
388390

389391
// Drain any queued tasks that might still try to update the renderer /
390392
// speaker

examples/simple_room/sdl_media_manager.cpp

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -31,6 +31,7 @@ SDLMediaManager::~SDLMediaManager() {
3131
stopMic();
3232
stopCamera();
3333
stopSpeaker();
34+
shutdownRenderer();
3435
}
3536

3637
bool SDLMediaManager::ensureSDLInit(Uint32 flags) {

examples/simple_room/sdl_video_renderer.cpp

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,8 @@
2222

2323
using namespace livekit;
2424

25+
constexpr int kMaxFPS = 60;
26+
2527
SDLVideoRenderer::SDLVideoRenderer() = default;
2628

2729
SDLVideoRenderer::~SDLVideoRenderer() { shutdown(); }
@@ -95,6 +97,16 @@ void SDLVideoRenderer::render() {
9597
return;
9698
}
9799

100+
// Throttle rendering to kMaxFPS
101+
const auto now = std::chrono::steady_clock::now();
102+
if (last_render_time_.time_since_epoch().count() != 0) {
103+
const auto min_interval = std::chrono::microseconds(1'000'000 / kMaxFPS);
104+
if (now - last_render_time_ < min_interval) {
105+
return;
106+
}
107+
}
108+
last_render_time_ = now;
109+
98110
// 3) Read a frame from VideoStream (blocking until one is available)
99111
livekit::VideoFrameEvent vfe;
100112
bool gotFrame = stream_->read(vfe);

examples/simple_room/sdl_video_renderer.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -49,4 +49,5 @@ class SDLVideoRenderer {
4949
std::shared_ptr<livekit::VideoStream> stream_;
5050
int width_ = 0;
5151
int height_ = 0;
52+
std::chrono::steady_clock::time_point last_render_time_{};
5253
};

include/livekit/audio_source.h

Lines changed: 51 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -41,8 +41,34 @@ class AudioSource {
4141
* @param sample_rate Sample rate in Hz.
4242
* @param num_channels Number of channels.
4343
* @param queue_size_ms Max buffer duration for the internal queue in ms.
44+
*
45+
* Buffering behavior:
46+
* -------------------
47+
* - queue_size_ms == 0 (recommended for real-time capture):
48+
* Disables internal buffering entirely. Audio frames are forwarded
49+
* directly to WebRTC sinks and consumed synchronously.
50+
*
51+
* This mode is optimized for real-time audio capture driven by hardware
52+
* media callbacks (e.g. microphone capture). The caller is expected to
53+
* provide fixed-size real-time frames (typically 10 ms per call).
54+
*
55+
* Because the native side consumes frames immediately, this mode
56+
* minimizes latency and jitter and is the best choice for live capture
57+
* scenarios.
58+
*
59+
* - queue_size_ms > 0 (buffered / blocking mode):
60+
* Enables an internal queue that buffers audio up to the specified
61+
* duration. Frames are accumulated and flushed asynchronously once the buffer
62+
* reaches its threshold.
63+
*
64+
* This mode is intended for non-real-time producers (e.g. TTS engines,
65+
* file-based audio, or agents generating audio faster or slower than
66+
* real-time). The buffering layer smooths timing and allows the audio to
67+
* be streamed out in real time even if the producer is bursty.
68+
*
69+
* queue_size_ms must be a multiple of 10.
4470
*/
45-
AudioSource(int sample_rate, int num_channels, int queue_size_ms = 1000);
71+
AudioSource(int sample_rate, int num_channels, int queue_size_ms = 0);
4672
virtual ~AudioSource() = default;
4773

4874
AudioSource(const AudioSource &) = delete;
@@ -86,19 +112,32 @@ class AudioSource {
86112
* callback arrives (recommended for production unless the caller needs
87113
* explicit timeout control).
88114
*
89-
* Notes:
90-
* - This is a blocking call.
91-
* - timeout_ms == 0 (infinite wait) is the safest mode because it
92-
* guarantees the callback completes before the function returns, which in
93-
* turn guarantees that the audio buffer lifetime is fully protected. The
94-
* caller does not need to manage or extend the frame lifetime manually.
115+
* Blocking semantics:
116+
* The blocking behavior of this call depends on the buffering mode selected
117+
* at construction time:
118+
*
119+
* - queue_size_ms == 0 (real-time capture mode):
120+
* Frames are consumed synchronously by the native layer. The FFI callback
121+
* is invoked immediately as part of the capture call, so this function
122+
* returns quickly.
123+
*
124+
* This mode relies on the caller being paced by a real-time media
125+
* callback (e.g. audio hardware interrupt / capture thread). It provides the
126+
* lowest possible latency and is ideal for live microphone capture.
127+
*
128+
* - queue_size_ms > 0 (buffered / non-real-time mode):
129+
* Frames are queued internally and flushed asynchronously. This function
130+
* will block until the buffered audio corresponding to this frame has
131+
* been consumed by the native side and the FFI callback fires.
95132
*
96-
* - May throw std::runtime_error if:
97-
* • the FFI reports an error
133+
* This mode is best suited for non-real-time audio producers (such as TTS
134+
* engines or agents) that generate audio independently of real-time
135+
* pacing, while still streaming audio out in real time.
98136
*
99-
* - The underlying FFI request *must* eventually produce a callback for
100-
* each frame. If the FFI layer is misbehaving or the event loop is stalled,
101-
* a timeout may occur in bounded-wait mode.
137+
* Safety notes:
138+
* May throw std::runtime_error if:
139+
* - the FFI reports an error
140+
* - a timeout occurs in bounded-wait mode
102141
*/
103142
void captureFrame(const AudioFrame &frame, int timeout_ms = 20);
104143

0 commit comments

Comments
 (0)