diff --git a/src/agents/voice/result.py b/src/agents/voice/result.py index 511c8e6e7d..ae3c92c056 100644 --- a/src/agents/voice/result.py +++ b/src/agents/voice/result.py @@ -201,7 +201,7 @@ async def _add_text(self, text: str): combined_sentences, self._text_buffer = self.tts_settings.text_splitter(self._text_buffer) - if len(combined_sentences) >= 20: + if combined_sentences: local_queue: asyncio.Queue[VoiceStreamEvent | None] = asyncio.Queue() self._ordered_tasks.append(local_queue) self._tasks.append( @@ -220,6 +220,10 @@ async def _turn_done(self): ) ) self._text_buffer = "" + elif self._started_processing_turn: + local_queue: asyncio.Queue[VoiceStreamEvent | None] = asyncio.Queue() + await local_queue.put(VoiceStreamEventLifecycle(event="turn_ended")) + self._ordered_tasks.append(local_queue) self._done_processing = True if self._dispatcher_task is None: self._dispatcher_task = asyncio.create_task(self._dispatch_audio()) diff --git a/tests/voice/test_pipeline.py b/tests/voice/test_pipeline.py index 7bc46279ad..7ea912af1c 100644 --- a/tests/voice/test_pipeline.py +++ b/tests/voice/test_pipeline.py @@ -1,6 +1,7 @@ from __future__ import annotations import asyncio +from collections.abc import AsyncIterator import numpy as np import numpy.typing as npt @@ -82,6 +83,39 @@ async def run(self, text: str, settings: TTSModelSettings): assert audio_chunks == [np.array([1], dtype=np.int16).tobytes()] +@pytest.mark.asyncio +async def test_streamed_audio_result_sends_short_custom_splitter_chunks() -> None: + class RecordingTTS(FakeTTS): + def __init__(self) -> None: + super().__init__() + self.texts: list[str] = [] + + async def run(self, text: str, settings: TTSModelSettings) -> AsyncIterator[bytes]: + del settings + self.texts.append(text) + yield np.zeros(2, dtype=np.int16).tobytes() + + def split_immediately(text: str) -> tuple[str, str]: + return text, "" + + fake_tts = RecordingTTS() + result = StreamedAudioResult( + fake_tts, + TTSModelSettings(buffer_size=1, text_splitter=split_immediately), + VoicePipelineConfig(), + ) + + await result._add_text("ok") + await result._turn_done() + await result._done() + + events, audio_chunks = await extract_events(result) + + assert fake_tts.texts == ["ok"] + assert events == ["turn_started", "audio", "turn_ended", "session_ended"] + assert len(audio_chunks) == 1 + + @pytest.mark.asyncio async def test_voicepipeline_run_single_turn() -> None: # Single turn. Should produce a single audio output, which is the TTS output for "out_1".