Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 5 additions & 1 deletion src/agents/voice/result.py
Original file line number Diff line number Diff line change
Expand Up @@ -201,7 +201,7 @@ async def _add_text(self, text: str):

combined_sentences, self._text_buffer = self.tts_settings.text_splitter(self._text_buffer)

if len(combined_sentences) >= 20:
if combined_sentences:
local_queue: asyncio.Queue[VoiceStreamEvent | None] = asyncio.Queue()
self._ordered_tasks.append(local_queue)
self._tasks.append(
Expand All @@ -220,6 +220,10 @@ async def _turn_done(self):
)
)
self._text_buffer = ""
elif self._started_processing_turn:
local_queue: asyncio.Queue[VoiceStreamEvent | None] = asyncio.Queue()
await local_queue.put(VoiceStreamEventLifecycle(event="turn_ended"))
self._ordered_tasks.append(local_queue)
self._done_processing = True
if self._dispatcher_task is None:
self._dispatcher_task = asyncio.create_task(self._dispatch_audio())
Expand Down
34 changes: 34 additions & 0 deletions tests/voice/test_pipeline.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
from __future__ import annotations

import asyncio
from collections.abc import AsyncIterator

import numpy as np
import numpy.typing as npt
Expand Down Expand Up @@ -82,6 +83,39 @@ async def run(self, text: str, settings: TTSModelSettings):
assert audio_chunks == [np.array([1], dtype=np.int16).tobytes()]


@pytest.mark.asyncio
async def test_streamed_audio_result_sends_short_custom_splitter_chunks() -> None:
class RecordingTTS(FakeTTS):
def __init__(self) -> None:
super().__init__()
self.texts: list[str] = []

async def run(self, text: str, settings: TTSModelSettings) -> AsyncIterator[bytes]:
del settings
self.texts.append(text)
yield np.zeros(2, dtype=np.int16).tobytes()

def split_immediately(text: str) -> tuple[str, str]:
return text, ""

fake_tts = RecordingTTS()
result = StreamedAudioResult(
fake_tts,
TTSModelSettings(buffer_size=1, text_splitter=split_immediately),
VoicePipelineConfig(),
)

await result._add_text("ok")
await result._turn_done()
await result._done()

events, audio_chunks = await extract_events(result)

assert fake_tts.texts == ["ok"]
assert events == ["turn_started", "audio", "turn_ended", "session_ended"]
assert len(audio_chunks) == 1


@pytest.mark.asyncio
async def test_voicepipeline_run_single_turn() -> None:
# Single turn. Should produce a single audio output, which is the TTS output for "out_1".
Expand Down
Loading