From 2d932032c89de4f69c91b23e3e6e1666823bfb7e Mon Sep 17 00:00:00 2001
From: Xiting Zhang <xitzhang@microsoft.com>
Date: Fri, 22 May 2026 13:22:11 -0700
Subject: [PATCH 1/3] Add VoiceLive beta API updates

---
 sdk/voicelive/azure-ai-voicelive/CHANGELOG.md |  15 +
 sdk/voicelive/azure-ai-voicelive/README.md    |  42 +-
 .../azure-ai-voicelive/_metadata.json         |   4 +-
 .../apiview-properties.json                   |  42 +-
 .../azure/ai/voicelive/_types.py              |   6 +-
 .../azure/ai/voicelive/_utils/model_base.py   | 374 +++++++++++++--
 .../ai/voicelive/_utils/serialization.py      |  23 +-
 .../azure/ai/voicelive/_version.py            |   2 +-
 .../azure/ai/voicelive/aio/_patch.py          |  12 +-
 .../azure/ai/voicelive/models/__init__.py     |  20 +
 .../azure/ai/voicelive/models/_enums.py       |  52 ++
 .../azure/ai/voicelive/models/_models.py      | 448 ++++++++++++++++--
 .../azure-ai-voicelive/pyproject.toml         |   2 +-
 .../samples/BASIC_VOICE_ASSISTANT.md          |  16 +-
 .../azure-ai-voicelive/samples/README.md      |  17 +-
 .../samples/async_mcp_sample.py               |   4 +-
 .../samples/basic_voice_assistant_async.py    |   6 +-
 .../tests/test_live_realtime_service.py       |  49 +-
 .../tests/test_unit_client_events.py          |  25 +
 .../tests/test_unit_connection.py             |   4 +-
 .../tests/test_unit_enums.py                  |  33 ++
 .../tests/test_unit_models.py                 |  93 ++++
 ...st_unit_models_interim_response_foundry.py |  21 +
 23 files changed, 1142 insertions(+), 168 deletions(-)

diff --git a/sdk/voicelive/azure-ai-voicelive/CHANGELOG.md b/sdk/voicelive/azure-ai-voicelive/CHANGELOG.md
index 11d1229e22de..ca0fca5574da 100644
--- a/sdk/voicelive/azure-ai-voicelive/CHANGELOG.md
+++ b/sdk/voicelive/azure-ai-voicelive/CHANGELOG.md
@@ -1,5 +1,20 @@
 # Release History
 
+## 1.3.0b1 (2026-05-22)
+
+### Features Added
+
+- **Azure Realtime Native Voice Support**: Added `AzureRealtimeNativeVoice` and
+  `AzureRealtimeNativeVoiceName`, and expanded `voice` fields to accept Azure realtime native voices.
+- **WebRTC Call Negotiation Support**: Added `ClientEventRtcCallSdpCreate`, `ServerEventRtcCallSdpCreated`,
+  `ServerEventRtcCallError`, and `RtcCallErrorDetails` for SDP-based WebRTC call setup.
+- **Hosted Agent Invocation Input**: Added `invoke_input` to `ResponseCreateParams` and
+  `ServerEventResponseInvocationDelta` for hosted agent invocation passthrough data.
+- **Audio Playback Lifecycle Events**: Added `ServerEventOutputAudioBufferStarted` and
+  `ServerEventOutputAudioBufferStopped` to track model audio playback start and stop.
+- **Echo Cancellation Configuration**: Added `EchoCancellationReferenceSource` and new
+  `reference_source` / `channels` options on `AudioEchoCancellation` for client-provided stereo echo reference input.
+
 ## 1.2.0 (2026-05-22)
 
 ### Features Added
diff --git a/sdk/voicelive/azure-ai-voicelive/README.md b/sdk/voicelive/azure-ai-voicelive/README.md
index cf1b1a2d8bb4..4eecdca3aa8a 100644
--- a/sdk/voicelive/azure-ai-voicelive/README.md
+++ b/sdk/voicelive/azure-ai-voicelive/README.md
@@ -5,7 +5,7 @@ This package provides a **real-time, speech-to-speech** client for Azure AI Voic
 It opens a WebSocket session to stream microphone audio to the service and receive
 typed server events (including audio) for responsive, interruptible conversations.
 
-> **Status:** General Availability (GA). This is a stable release suitable for production use.
+> **Status:** Preview (`1.3.0b1`). This beta release includes the latest SDK and sample updates and may change before the next stable release.
 
 > **Important:** As of version 1.0.0, this SDK is **async-only**. The synchronous API has been removed to focus exclusively on async patterns. All examples and samples use `async`/`await` syntax.
 
@@ -16,34 +16,35 @@ Getting started
 
 ### Prerequisites
 
-- **Python 3.9+**
+- **Python 3.10+**
 - An **Azure subscription**
 - A **VoiceLive** resource and endpoint
 - A working **microphone** and **speakers/headphones** if you run the voice samples
 
 ### Install
 
-Install the stable GA version:
+Install the latest preview version:
 
 ```bash
 # Base install (core client only)
-python -m pip install azure-ai-voicelive
+python -m pip install --pre azure-ai-voicelive
 
 # For asynchronous streaming (uses aiohttp)
-python -m pip install "azure-ai-voicelive[aiohttp]"
+python -m pip install --pre "azure-ai-voicelive[aiohttp]"
 
 # For voice samples (includes audio processing)
 # First install PyAudio dependencies for your platform:
 #   Linux: sudo apt-get install -y portaudio19-dev libasound2-dev
 #   macOS: brew install portaudio
-python -m pip install azure-ai-voicelive[aiohttp] pyaudio python-dotenv
+python -m pip install --pre "azure-ai-voicelive[aiohttp]" azure-identity pyaudio python-dotenv
 ```
 
 The SDK provides async-only WebSocket connections using `aiohttp` for optimal performance and reliability.
 
 ### Authenticate
 
-You can authenticate with an **API key** or an **Azure Active Directory (AAD) token**.
+You can authenticate with an **API key** or a Microsoft Entra ID token.
+The samples default to `DefaultAzureCredential`; for local development, `az login` is usually the simplest path.
 
 #### API Key Authentication (Quick Start)
 
@@ -66,7 +67,7 @@ async def main():
     async with connect(
         endpoint="your-endpoint",
         credential=AzureKeyCredential("your-api-key"),
-        model="gpt-4o-realtime-preview"
+        model="gpt-realtime"
     ) as connection:
         # Your async code here
         pass
@@ -76,7 +77,7 @@ asyncio.run(main())
 
 #### AAD Token Authentication
 
-For production applications, AAD authentication is recommended:
+For production applications, Entra ID authentication is recommended:
 
 ```python
 import asyncio
@@ -85,14 +86,17 @@ from azure.ai.voicelive import connect
 
 async def main():
     credential = DefaultAzureCredential()
-    
-    async with connect(
-        endpoint="your-endpoint",
-        credential=credential,
-        model="gpt-4o-realtime-preview"
-    ) as connection:
-        # Your async code here
-        pass
+
+    try:
+        async with connect(
+            endpoint="your-endpoint",
+            credential=credential,
+            model="gpt-realtime"
+        ) as connection:
+            # Your async code here
+            pass
+    finally:
+        await credential.close()
 
 asyncio.run(main())
 ```
@@ -142,7 +146,7 @@ The Basic Voice Assistant sample demonstrates full-featured voice interaction wi
 python samples/basic_voice_assistant_async.py
 
 # With custom parameters
-python samples/basic_voice_assistant_async.py --model gpt-4o-realtime-preview --voice alloy --instructions "You're a helpful assistant"
+python samples/basic_voice_assistant_async.py --model gpt-realtime --voice alloy --instructions "You're a helpful assistant"
 ```
 
 ### Minimal example
@@ -157,7 +161,7 @@ from azure.ai.voicelive.models import (
 
 API_KEY = "your-api-key"
 ENDPOINT = "wss://your-endpoint.com/openai/realtime"
-MODEL = "gpt-4o-realtime-preview"
+MODEL = "gpt-realtime"
 
 async def main():
     async with connect(
diff --git a/sdk/voicelive/azure-ai-voicelive/_metadata.json b/sdk/voicelive/azure-ai-voicelive/_metadata.json
index 5786f7fc266c..312af8013e92 100644
--- a/sdk/voicelive/azure-ai-voicelive/_metadata.json
+++ b/sdk/voicelive/azure-ai-voicelive/_metadata.json
@@ -1,6 +1,6 @@
 {
-  "apiVersion": "2026-04-10",
+  "apiVersion": "2026-06-01-preview",
   "apiVersions": {
-    "VoiceLive": "2026-04-10"
+    "VoiceLive": "2026-06-01-preview"
   }
 }
\ No newline at end of file
diff --git a/sdk/voicelive/azure-ai-voicelive/apiview-properties.json b/sdk/voicelive/azure-ai-voicelive/apiview-properties.json
index 8a3964ca1de4..e90b8a3a7b9e 100644
--- a/sdk/voicelive/azure-ai-voicelive/apiview-properties.json
+++ b/sdk/voicelive/azure-ai-voicelive/apiview-properties.json
@@ -18,6 +18,7 @@
         "azure.ai.voicelive.models.AzureAvatarVoiceSyncVoice": "VoiceLive.AzureAvatarVoiceSyncVoice",
         "azure.ai.voicelive.models.AzureCustomVoice": "VoiceLive.AzureCustomVoice",
         "azure.ai.voicelive.models.AzurePersonalVoice": "VoiceLive.AzurePersonalVoice",
+        "azure.ai.voicelive.models.AzureRealtimeNativeVoice": "VoiceLive.AzureRealtimeNativeVoice",
         "azure.ai.voicelive.models.EouDetection": "VoiceLive.EouDetection",
         "azure.ai.voicelive.models.AzureSemanticDetection": "VoiceLive.AzureSemanticDetection",
         "azure.ai.voicelive.models.AzureSemanticDetectionEn": "VoiceLive.AzureSemanticDetectionEn",
@@ -45,6 +46,7 @@
         "azure.ai.voicelive.models.ClientEventOutputAudioBufferClear": "VoiceLive.ClientEventOutputAudioBufferClear",
         "azure.ai.voicelive.models.ClientEventResponseCancel": "VoiceLive.ClientEventResponseCancel",
         "azure.ai.voicelive.models.ClientEventResponseCreate": "VoiceLive.ClientEventResponseCreate",
+        "azure.ai.voicelive.models.ClientEventRtcCallSdpCreate": "VoiceLive.ClientEventRtcCallSdpCreate",
         "azure.ai.voicelive.models.ClientEventSessionAvatarConnect": "VoiceLive.ClientEventSessionAvatarConnect",
         "azure.ai.voicelive.models.ClientEventSessionUpdate": "VoiceLive.ClientEventSessionUpdate",
         "azure.ai.voicelive.models.ContentPart": "VoiceLive.ContentPart",
@@ -92,6 +94,7 @@
         "azure.ai.voicelive.models.ResponseSession": "VoiceLive.ResponseSession",
         "azure.ai.voicelive.models.ResponseTextContentPart": "VoiceLive.ResponseTextContentPart",
         "azure.ai.voicelive.models.ResponseWebSearchCallItem": "VoiceLive.ResponseWebSearchCallItem",
+        "azure.ai.voicelive.models.RtcCallErrorDetails": "VoiceLive.RtcCallErrorDetails",
         "azure.ai.voicelive.models.Scene": "VoiceLive.Scene",
         "azure.ai.voicelive.models.ServerEvent": "VoiceLive.ServerEvent",
         "azure.ai.voicelive.models.ServerEventConversationItemCreated": "VoiceLive.ServerEventConversationItemCreated",
@@ -111,6 +114,8 @@
         "azure.ai.voicelive.models.ServerEventMcpListToolsFailed": "VoiceLive.ServerEventMcpListToolsFailed",
         "azure.ai.voicelive.models.ServerEventMcpListToolsInProgress": "VoiceLive.ServerEventMcpListToolsInProgress",
         "azure.ai.voicelive.models.ServerEventOutputAudioBufferCleared": "VoiceLive.ServerEventOutputAudioBufferCleared",
+        "azure.ai.voicelive.models.ServerEventOutputAudioBufferStarted": "VoiceLive.ServerEventOutputAudioBufferStarted",
+        "azure.ai.voicelive.models.ServerEventOutputAudioBufferStopped": "VoiceLive.ServerEventOutputAudioBufferStopped",
         "azure.ai.voicelive.models.ServerEventResponseAnimationBlendshapeDelta": "VoiceLive.ServerEventResponseAnimationBlendshapeDelta",
         "azure.ai.voicelive.models.ServerEventResponseAnimationBlendshapeDone": "VoiceLive.ServerEventResponseAnimationBlendshapeDone",
         "azure.ai.voicelive.models.ServerEventResponseAnimationVisemeDelta": "VoiceLive.ServerEventResponseAnimationVisemeDelta",
@@ -131,6 +136,7 @@
         "azure.ai.voicelive.models.ServerEventResponseFileSearchCallSearching": "VoiceLive.ServerEventResponseFileSearchCallSearching",
         "azure.ai.voicelive.models.ServerEventResponseFunctionCallArgumentsDelta": "VoiceLive.ServerEventResponseFunctionCallArgumentsDelta",
         "azure.ai.voicelive.models.ServerEventResponseFunctionCallArgumentsDone": "VoiceLive.ServerEventResponseFunctionCallArgumentsDone",
+        "azure.ai.voicelive.models.ServerEventResponseInvocationDelta": "VoiceLive.ServerEventResponseInvocationDelta",
         "azure.ai.voicelive.models.ServerEventResponseMcpCallArgumentsDelta": "VoiceLive.ServerEventResponseMcpCallArgumentsDelta",
         "azure.ai.voicelive.models.ServerEventResponseMcpCallArgumentsDone": "VoiceLive.ServerEventResponseMcpCallArgumentsDone",
         "azure.ai.voicelive.models.ServerEventResponseMcpCallCompleted": "VoiceLive.ServerEventResponseMcpCallCompleted",
@@ -144,6 +150,8 @@
         "azure.ai.voicelive.models.ServerEventResponseWebSearchCallCompleted": "VoiceLive.ServerEventResponseWebSearchCallCompleted",
         "azure.ai.voicelive.models.ServerEventResponseWebSearchCallInProgress": "VoiceLive.ServerEventResponseWebSearchCallInProgress",
         "azure.ai.voicelive.models.ServerEventResponseWebSearchCallSearching": "VoiceLive.ServerEventResponseWebSearchCallSearching",
+        "azure.ai.voicelive.models.ServerEventRtcCallError": "VoiceLive.ServerEventRtcCallError",
+        "azure.ai.voicelive.models.ServerEventRtcCallSdpCreated": "VoiceLive.ServerEventRtcCallSdpCreated",
         "azure.ai.voicelive.models.ServerEventSessionAvatarConnecting": "VoiceLive.ServerEventSessionAvatarConnecting",
         "azure.ai.voicelive.models.ServerEventSessionAvatarSwitchToIdle": "VoiceLive.ServerEventSessionAvatarSwitchToIdle",
         "azure.ai.voicelive.models.ServerEventSessionAvatarSwitchToSpeaking": "VoiceLive.ServerEventSessionAvatarSwitchToSpeaking",
@@ -165,35 +173,37 @@
         "azure.ai.voicelive.models.VideoParams": "VoiceLive.VideoParams",
         "azure.ai.voicelive.models.VideoResolution": "VoiceLive.VideoResolution",
         "azure.ai.voicelive.models.VoiceLiveErrorDetails": "VoiceLive.VoiceLiveErrorDetails",
-        "azure.ai.voicelive.models.ClientEventType": "VoiceLive.ClientEventType",
-        "azure.ai.voicelive.models.ItemType": "VoiceLive.ItemType",
-        "azure.ai.voicelive.models.ItemParamStatus": "VoiceLive.ItemParamStatus",
-        "azure.ai.voicelive.models.MessageRole": "VoiceLive.MessageRole",
-        "azure.ai.voicelive.models.ContentPartType": "VoiceLive.ContentPartType",
-        "azure.ai.voicelive.models.Modality": "VoiceLive.Modality",
+        "azure.ai.voicelive.models.AnimationOutputType": "VoiceLive.AnimationOutputType",
         "azure.ai.voicelive.models.OpenAIVoiceName": "VoiceLive.OAIVoice",
         "azure.ai.voicelive.models.AzureVoiceType": "VoiceLive.AzureVoiceType",
         "azure.ai.voicelive.models.PersonalVoiceModels": "VoiceLive.PersonalVoiceModels",
-        "azure.ai.voicelive.models.OutputAudioFormat": "VoiceLive.OutputAudioFormat",
+        "azure.ai.voicelive.models.AzureRealtimeNativeVoiceName": "VoiceLive.AzureRealtimeNativeVoiceName",
+        "azure.ai.voicelive.models.EouThresholdLevel": "VoiceLive.EouThresholdLevel",
+        "azure.ai.voicelive.models.TurnDetectionType": "VoiceLive.TurnDetectionType",
+        "azure.ai.voicelive.models.EchoCancellationReferenceSource": "VoiceLive.EchoCancellationReferenceSource",
+        "azure.ai.voicelive.models.AvatarConfigTypes": "VoiceLive.AvatarConfigTypes",
+        "azure.ai.voicelive.models.PhotoAvatarBaseModes": "VoiceLive.PhotoAvatarBaseModes",
+        "azure.ai.voicelive.models.AvatarOutputProtocol": "VoiceLive.AvatarOutputProtocol",
         "azure.ai.voicelive.models.ToolType": "VoiceLive.ToolType",
         "azure.ai.voicelive.models.MCPApprovalType": "VoiceLive.MCPApprovalType",
-        "azure.ai.voicelive.models.ReasoningEffort": "VoiceLive.ReasoningEffort",
         "azure.ai.voicelive.models.InterimResponseConfigType": "VoiceLive.InterimResponseConfigType",
         "azure.ai.voicelive.models.InterimResponseTrigger": "VoiceLive.InterimResponseTrigger",
-        "azure.ai.voicelive.models.AnimationOutputType": "VoiceLive.AnimationOutputType",
+        "azure.ai.voicelive.models.Modality": "VoiceLive.Modality",
         "azure.ai.voicelive.models.InputAudioFormat": "VoiceLive.InputAudioFormat",
-        "azure.ai.voicelive.models.TurnDetectionType": "VoiceLive.TurnDetectionType",
-        "azure.ai.voicelive.models.EouThresholdLevel": "VoiceLive.EouThresholdLevel",
-        "azure.ai.voicelive.models.AvatarConfigTypes": "VoiceLive.AvatarConfigTypes",
-        "azure.ai.voicelive.models.PhotoAvatarBaseModes": "VoiceLive.PhotoAvatarBaseModes",
-        "azure.ai.voicelive.models.AvatarOutputProtocol": "VoiceLive.AvatarOutputProtocol",
+        "azure.ai.voicelive.models.OutputAudioFormat": "VoiceLive.OutputAudioFormat",
         "azure.ai.voicelive.models.AudioTimestampType": "VoiceLive.AudioTimestampType",
         "azure.ai.voicelive.models.ToolChoiceLiteral": "VoiceLive.ToolChoiceLiteral",
+        "azure.ai.voicelive.models.ReasoningEffort": "VoiceLive.ReasoningEffort",
         "azure.ai.voicelive.models.SessionIncludeOption": "VoiceLive.SessionIncludeOption",
+        "azure.ai.voicelive.models.ClientEventType": "VoiceLive.ClientEventType",
+        "azure.ai.voicelive.models.ItemType": "VoiceLive.ItemType",
+        "azure.ai.voicelive.models.ItemParamStatus": "VoiceLive.ItemParamStatus",
+        "azure.ai.voicelive.models.MessageRole": "VoiceLive.MessageRole",
+        "azure.ai.voicelive.models.ContentPartType": "VoiceLive.ContentPartType",
         "azure.ai.voicelive.models.ResponseStatus": "VoiceLive.ResponseStatus",
-        "azure.ai.voicelive.models.ResponseItemStatus": "VoiceLive.ResponseItemStatus",
         "azure.ai.voicelive.models.RequestImageContentPartDetail": "VoiceLive.RequestImageContentPartDetail",
+        "azure.ai.voicelive.models.ResponseItemStatus": "VoiceLive.ResponseItemStatus",
         "azure.ai.voicelive.models.ServerEventType": "VoiceLive.ServerEventType"
     },
-    "CrossLanguageVersion": "4f7c08a38aa5"
+    "CrossLanguageVersion": "d4391398f022"
 }
\ No newline at end of file
diff --git a/sdk/voicelive/azure-ai-voicelive/azure/ai/voicelive/_types.py b/sdk/voicelive/azure-ai-voicelive/azure/ai/voicelive/_types.py
index c2ff170a25f0..ae3e380f7eaa 100644
--- a/sdk/voicelive/azure-ai-voicelive/azure/ai/voicelive/_types.py
+++ b/sdk/voicelive/azure-ai-voicelive/azure/ai/voicelive/_types.py
@@ -10,6 +10,8 @@
 
 if TYPE_CHECKING:
     from . import models as _models
-Voice = Union[str, "_models.OpenAIVoiceName", "_models.OpenAIVoice", "_models.AzureVoice"]
-InterimResponseConfig = Union["_models.StaticInterimResponseConfig", "_models.LlmInterimResponseConfig"]
+Voice = Union[
+    str, "_models.OpenAIVoiceName", "_models.OpenAIVoice", "_models.AzureVoice", "_models.AzureRealtimeNativeVoice"
+]
 ToolChoice = Union[str, "_models.ToolChoiceLiteral", "_models.ToolChoiceSelection"]
+InterimResponseConfig = Union["_models.StaticInterimResponseConfig", "_models.LlmInterimResponseConfig"]
diff --git a/sdk/voicelive/azure-ai-voicelive/azure/ai/voicelive/_utils/model_base.py b/sdk/voicelive/azure-ai-voicelive/azure/ai/voicelive/_utils/model_base.py
index 4102784f9a85..d725c55906d3 100644
--- a/sdk/voicelive/azure-ai-voicelive/azure/ai/voicelive/_utils/model_base.py
+++ b/sdk/voicelive/azure-ai-voicelive/azure/ai/voicelive/_utils/model_base.py
@@ -590,6 +590,239 @@ def _create_value(rf: typing.Optional["_RestField"], value: typing.Any) -> typin
     return _serialize(value, rf._format)
 
 
+# ============================================================================
+# Fast-path scalar deserializer functions for rest_field(deserializer=...)
+# These are referenced from rest_field declarations to bypass the generic
+# _deserialize -> _deserialize_with_callable chain.
+# Only simple/primitive types — no models or container types.
+# ============================================================================
+
+
+def _xml_deser_str(value):
+    if isinstance(value, ET.Element):
+        return value.text or ""
+    return str(value) if value is not None else None
+
+
+def _xml_deser_int(value):
+    if isinstance(value, ET.Element):
+        return int(value.text) if value.text else None
+    return int(value) if value is not None else None
+
+
+def _xml_deser_float(value):
+    if isinstance(value, ET.Element):
+        return float(value.text) if value.text else None
+    return float(value) if value is not None else None
+
+
+def _xml_deser_bool(value):
+    if isinstance(value, ET.Element):
+        text = value.text
+    else:
+        text = value
+    if text is None:
+        return None
+    if text in (True, False):
+        return text
+    return text.lower() == "true"
+
+
+# pylint: disable=docstring-missing-param
+def _xml_deser_bytes(value):
+    """Deserialize bytes from XML (base64)."""
+    if isinstance(value, ET.Element):
+        text = value.text
+    else:
+        text = value
+    if text is None:
+        return None
+    return _deserialize_bytes(text)
+
+
+def _xml_deser_bytes_base64url(value):
+    """Deserialize bytes from XML (base64url)."""
+    if isinstance(value, ET.Element):
+        text = value.text
+    else:
+        text = value
+    if text is None:
+        return None
+    return _deserialize_bytes_base64(text)
+
+
+def _xml_deser_datetime(value):
+    """Deserialize a datetime from XML (ISO 8601 / rfc3339)."""
+    if isinstance(value, ET.Element):
+        text = value.text
+    else:
+        text = value
+    if text is None:
+        return None
+    return _deserialize_datetime(text)
+
+
+def _xml_deser_datetime_rfc7231(value):
+    """Deserialize a datetime from XML (RFC7231 format)."""
+    if isinstance(value, ET.Element):
+        text = value.text
+    else:
+        text = value
+    if text is None:
+        return None
+    return _deserialize_datetime_rfc7231(text)
+
+
+def _xml_deser_datetime_unix_timestamp(value):
+    """Deserialize a datetime from XML (Unix timestamp)."""
+    if isinstance(value, ET.Element):
+        text = value.text
+    else:
+        text = value
+    if text is None:
+        return None
+    return _deserialize_datetime_unix_timestamp(float(text))
+
+
+def _xml_deser_date(value):
+    """Deserialize a date from XML (ISO 8601)."""
+    if isinstance(value, ET.Element):
+        text = value.text
+    else:
+        text = value
+    if text is None:
+        return None
+    return _deserialize_date(text)
+
+
+def _xml_deser_time(value):
+    """Deserialize a time from XML (ISO 8601)."""
+    if isinstance(value, ET.Element):
+        text = value.text
+    else:
+        text = value
+    if text is None:
+        return None
+    return _deserialize_time(text)
+
+
+def _xml_deser_duration(value):
+    """Deserialize a timedelta from XML (ISO 8601 duration)."""
+    if isinstance(value, ET.Element):
+        text = value.text
+    else:
+        text = value
+    if text is None:
+        return None
+    return _deserialize_duration(text)
+
+
+def _xml_deser_decimal(value):
+    """Deserialize a Decimal from XML."""
+    if isinstance(value, ET.Element):
+        text = value.text
+    else:
+        text = value
+    if text is None:
+        return None
+    return _deserialize_decimal(text)
+
+
+def _xml_deser_enum_or_str(enum_cls, value):
+    """Deserialize a Union[EnumType, str] from XML."""
+    text = value.text if isinstance(value, ET.Element) else value
+    if text is None:
+        return None
+    try:
+        return enum_cls(text)
+    except ValueError:
+        return text
+
+
+def _extract_xml_model_type(rf_type):
+    """Extract the concrete Model class from a resolved rf._type partial chain.
+
+    Unwraps ``Optional[Model]`` and ``_deserialize_model(Model, ...)``
+    wrappers.  Only handles Model and Optional[Model] — other composite
+    types (List, Dict, Union, etc.) return None and fall through to the
+    generic ``_deserialize`` path at runtime.
+    """
+    if rf_type is None:
+        return None
+    if isinstance(rf_type, type) and _is_model(rf_type):
+        return rf_type
+    if not isinstance(rf_type, functools.partial):
+        return None
+    func = rf_type.func
+    args = rf_type.args
+    if func is _deserialize_with_optional and args:
+        return _extract_xml_model_type(args[0])
+    if func is _deserialize_model and args:
+        cls = args[0]
+        return cls if isinstance(cls, type) and _is_model(cls) else None
+    return None
+
+
+def _build_xml_field_plan(  # pylint: disable=docstring-missing-return, docstring-missing-rtype, unused-variable
+    cls, attr_to_rest_field: dict
+) -> list:
+    """Build a precomputed XML field plan for fast _init_from_xml iteration.
+
+    Called once per model class in __new__. Returns a list of tuples:
+        (rest_name, xml_name, kind, deser, rf_type, is_optional, items_name)
+
+    kind: 0=wrapped, 1=attribute, 2=unwrapped, 3=text
+
+    For Model and Optional[Model] fields that lack a scalar
+    ``_deserializer``, this function precomputes the Model class as the
+    deserializer so ``_init_from_xml`` can call ``ModelClass(element)``
+    directly instead of going through the expensive
+    ``_get_deserialize_callable_from_annotation`` chain at runtime.
+    """
+    model_meta = getattr(cls, "_xml", {})
+    model_ns = model_meta.get("ns") or model_meta.get("namespace")
+    plan = []
+
+    for rf in attr_to_rest_field.values():
+        prop_meta = getattr(rf, "_xml", {})
+        deser = rf._deserializer
+
+        xml_name = prop_meta.get("name", rf._rest_name)
+        xml_ns = _resolve_xml_ns(prop_meta, model_meta)
+        if xml_ns:
+            xml_name = "{" + xml_ns + "}" + xml_name
+
+        is_optional = rf._is_optional
+
+        # For Model / Optional[Model] fields without a scalar deserializer,
+        # precompute the Model class as the deserializer.
+        if deser is None and rf._type is not None:
+            model_cls = _extract_xml_model_type(rf._type)
+            if model_cls is not None:
+                deser = model_cls
+
+        if prop_meta.get("attribute", False):
+            plan.append((rf._rest_name, xml_name, 1, deser, rf._type, is_optional, None))
+        elif prop_meta.get("unwrapped", False):
+            items_name = prop_meta.get("itemsName")
+            if items_name:
+                items_ns = prop_meta.get("itemsNs")
+                if items_ns is not None:
+                    xml_ns = items_ns
+                if xml_ns:
+                    items_name = "{" + xml_ns + "}" + items_name
+            else:
+                items_name = xml_name
+            plan.append((rf._rest_name, xml_name, 2, deser, rf._type, is_optional, items_name))
+        elif prop_meta.get("text", False):
+            plan.append((rf._rest_name, xml_name, 3, deser, rf._type, is_optional, None))
+        else:
+            plan.append((rf._rest_name, xml_name, 0, deser, rf._type, is_optional, None))
+
+    return plan
+
+
+# pylint: enable=docstring-missing-param
 class Model(_MyMutableMapping):
     _is_model = True
     # label whether current class's _attr_to_rest_field has been calculated
@@ -630,7 +863,9 @@ def __init__(self, *args: typing.Any, **kwargs: typing.Any) -> None:
             dict_to_pass[rf._rest_name] = _create_value(rf, rf._default)
         super().__init__(dict_to_pass)
 
-    def _init_from_xml(self, element: ET.Element) -> dict[str, typing.Any]:
+    def _init_from_xml(  # pylint: disable=too-many-branches, too-many-statements
+        self, element: ET.Element
+    ) -> dict[str, typing.Any]:
         """Deserialize an XML element into a dict mapping rest field names to values.
 
         :param ET.Element element: The XML element to deserialize from.
@@ -638,53 +873,89 @@ def _init_from_xml(self, element: ET.Element) -> dict[str, typing.Any]:
         :rtype: dict
         """
         result: dict[str, typing.Any] = {}
-        model_meta = getattr(self, "_xml", {})
         existed_attr_keys: list[str] = []
 
-        for rf in self._attr_to_rest_field.values():
-            prop_meta = getattr(rf, "_xml", {})
-            xml_name = prop_meta.get("name", rf._rest_name)
-            xml_ns = _resolve_xml_ns(prop_meta, model_meta)
-            if xml_ns:
-                xml_name = "{" + xml_ns + "}" + xml_name
-
-            # attribute
-            if prop_meta.get("attribute", False) and element.get(xml_name) is not None:
-                existed_attr_keys.append(xml_name)
-                result[rf._rest_name] = _deserialize(rf._type, element.get(xml_name))
-                continue
-
-            # unwrapped element is array
-            if prop_meta.get("unwrapped", False):
-                # unwrapped array could either use prop items meta/prop meta
-                _items_name = prop_meta.get("itemsName")
-                if _items_name:
-                    xml_name = _items_name
-                    _items_ns = prop_meta.get("itemsNs")
-                    if _items_ns is not None:
-                        xml_ns = _items_ns
-                    if xml_ns:
-                        xml_name = "{" + xml_ns + "}" + xml_name
-                items = element.findall(xml_name)  # pyright: ignore
-                if len(items) > 0:
+        field_plan = getattr(self, "_xml_field_plan", None)
+        if field_plan:
+            for rest_name, xml_name, kind, deser, rf_type, is_optional, items_name in field_plan:
+                if kind == 0:  # wrapped element (most common)
+                    item = element.find(xml_name)
+                    if item is not None:
+                        existed_attr_keys.append(xml_name)
+                        if deser:
+                            result[rest_name] = deser(item)
+                        else:
+                            result[rest_name] = _deserialize(rf_type, item)
+                elif kind == 1:  # attribute
+                    attr_val = element.get(xml_name)
+                    if attr_val is not None:
+                        existed_attr_keys.append(xml_name)
+                        if deser:
+                            result[rest_name] = deser(attr_val)
+                        else:
+                            result[rest_name] = attr_val
+                elif kind == 2:  # unwrapped array
+                    items = element.findall(items_name)  # pyright: ignore
+                    if len(items) > 0:
+                        existed_attr_keys.append(items_name)
+                        if deser:
+                            result[rest_name] = deser(items)
+                        else:
+                            result[rest_name] = _deserialize(rf_type, items)
+                    elif not is_optional:
+                        existed_attr_keys.append(items_name)
+                        result[rest_name] = []
+                elif kind == 3:  # text
+                    if element.text is not None:
+                        if deser:
+                            result[rest_name] = deser(element.text)
+                        else:
+                            result[rest_name] = element.text
+        else:
+            model_meta = getattr(self, "_xml", {})
+            for rf in self._attr_to_rest_field.values():
+                prop_meta = getattr(rf, "_xml", {})
+                xml_name = prop_meta.get("name", rf._rest_name)
+                xml_ns = _resolve_xml_ns(prop_meta, model_meta)
+                if xml_ns:
+                    xml_name = "{" + xml_ns + "}" + xml_name
+
+                # attribute
+                if prop_meta.get("attribute", False) and element.get(xml_name) is not None:
                     existed_attr_keys.append(xml_name)
-                    result[rf._rest_name] = _deserialize(rf._type, items)
-                elif not rf._is_optional:
+                    result[rf._rest_name] = _deserialize(rf._type, element.get(xml_name))
+                    continue
+
+                # unwrapped element is array
+                if prop_meta.get("unwrapped", False):
+                    _items_name = prop_meta.get("itemsName")
+                    if _items_name:
+                        xml_name = _items_name
+                        _items_ns = prop_meta.get("itemsNs")
+                        if _items_ns is not None:
+                            xml_ns = _items_ns
+                        if xml_ns:
+                            xml_name = "{" + xml_ns + "}" + xml_name
+                    items = element.findall(xml_name)  # pyright: ignore
+                    if len(items) > 0:
+                        existed_attr_keys.append(xml_name)
+                        result[rf._rest_name] = _deserialize(rf._type, items)
+                    elif not rf._is_optional:
+                        existed_attr_keys.append(xml_name)
+                        result[rf._rest_name] = []
+                    continue
+
+                # text element is primitive type
+                if prop_meta.get("text", False):
+                    if element.text is not None:
+                        result[rf._rest_name] = _deserialize(rf._type, element.text)
+                    continue
+
+                # wrapped element could be normal property or array
+                item = element.find(xml_name)
+                if item is not None:
                     existed_attr_keys.append(xml_name)
-                    result[rf._rest_name] = []
-                continue
-
-            # text element is primitive type
-            if prop_meta.get("text", False):
-                if element.text is not None:
-                    result[rf._rest_name] = _deserialize(rf._type, element.text)
-                continue
-
-            # wrapped element could be normal property or array, it should only have one element
-            item = element.find(xml_name)
-            if item is not None:
-                existed_attr_keys.append(xml_name)
-                result[rf._rest_name] = _deserialize(rf._type, item)
+                    result[rf._rest_name] = _deserialize(rf._type, item)
 
         # rest thing is additional properties
         for e in element:
@@ -717,6 +988,9 @@ def __new__(cls, *args: typing.Any, **kwargs: typing.Any) -> Self:
                 if not rf._rest_name_input:
                     rf._rest_name_input = attr
             cls._attr_to_rest_field: dict[str, _RestField] = dict(attr_to_rest_field.items())
+            # Build XML field plan for fast _init_from_xml (only for XML models)
+            if getattr(cls, "_xml", None):
+                cls._xml_field_plan = _build_xml_field_plan(cls, attr_to_rest_field)
             cls._calculated.add(f"{cls.__module__}.{cls.__qualname__}")
 
         return super().__new__(cls)
@@ -1091,6 +1365,7 @@ def __init__(
         format: typing.Optional[str] = None,
         is_multipart_file_input: bool = False,
         xml: typing.Optional[dict[str, typing.Any]] = None,
+        deserializer: typing.Optional[typing.Callable] = None,
     ):
         self._type = type
         self._rest_name_input = name
@@ -1103,6 +1378,7 @@ def __init__(
         self._format = format
         self._is_multipart_file_input = is_multipart_file_input
         self._xml = xml if xml is not None else {}
+        self._deserializer = deserializer
 
     @property
     def _class_type(self) -> typing.Any:
@@ -1138,7 +1414,11 @@ def __get__(self, obj: Model, type=None):  # pylint: disable=redefined-builtin
             # Return the value from _data directly (it's been deserialized in place)
             return obj._data.get(self._rest_name)
 
-        deserialized = _deserialize(self._type, _serialize(item, self._format), rf=self)
+        # Fast path: use _deserializer directly (avoids _serialize/_deserialize chain)
+        if self._deserializer:
+            deserialized = self._deserializer(item)
+        else:
+            deserialized = _deserialize(self._type, _serialize(item, self._format), rf=self)
 
         # For mutable types, store the deserialized value back in _data
         # so mutations directly affect _data
@@ -1184,6 +1464,7 @@ def rest_field(
     format: typing.Optional[str] = None,
     is_multipart_file_input: bool = False,
     xml: typing.Optional[dict[str, typing.Any]] = None,
+    deserializer: typing.Optional[typing.Callable] = None,
 ) -> typing.Any:
     return _RestField(
         name=name,
@@ -1193,6 +1474,7 @@ def rest_field(
         format=format,
         is_multipart_file_input=is_multipart_file_input,
         xml=xml,
+        deserializer=deserializer,
     )
 
 
@@ -1426,6 +1708,8 @@ def _deserialize_xml(
     value: str,
 ) -> typing.Any:
     element = ET.fromstring(value)  # nosec
+    if _is_model(deserializer):
+        return deserializer._deserialize(element, [])
     return _deserialize(deserializer, element)
 
 
diff --git a/sdk/voicelive/azure-ai-voicelive/azure/ai/voicelive/_utils/serialization.py b/sdk/voicelive/azure-ai-voicelive/azure/ai/voicelive/_utils/serialization.py
index 954bf7ebffa7..a088671e9c51 100644
--- a/sdk/voicelive/azure-ai-voicelive/azure/ai/voicelive/_utils/serialization.py
+++ b/sdk/voicelive/azure-ai-voicelive/azure/ai/voicelive/_utils/serialization.py
@@ -1405,7 +1405,7 @@ def __init__(self, classes: Optional[Mapping[str, type]] = None) -> None:
         # Otherwise, result are unexpected
         self.additional_properties_detection = True
 
-    def __call__(self, target_obj, response_data, content_type=None):
+    def __call__(self, target_obj, response_data, content_type=None):  # pylint: disable=too-many-return-statements
         """Call the deserializer to process a REST response.
 
         :param str target_obj: Target data type to deserialize to.
@@ -1415,6 +1415,27 @@ def __call__(self, target_obj, response_data, content_type=None):
         :return: Deserialized object.
         :rtype: object
         """
+        # Fast path for header deserialization: response_data is a plain str or None
+        # and target_obj is a simple scalar type. This avoids the expensive
+        # _unpack_content → _deserialize → _classify_target → deserialize_data chain.
+        if response_data is None:
+            return None
+        if target_obj == "str" and isinstance(response_data, str):
+            return response_data
+        if isinstance(response_data, str):
+            if target_obj == "int":
+                return int(response_data)
+            if target_obj == "bool":
+                if response_data in ("true", "1", "True"):
+                    return True
+                if response_data in ("false", "0", "False"):
+                    return False
+                return bool(response_data)
+            if target_obj == "rfc-1123":
+                return Deserializer.deserialize_rfc(response_data)
+            if target_obj == "bytearray":
+                return Deserializer.deserialize_bytearray(response_data)
+
         data = self._unpack_content(response_data, content_type)
         return self._deserialize(target_obj, data)
 
diff --git a/sdk/voicelive/azure-ai-voicelive/azure/ai/voicelive/_version.py b/sdk/voicelive/azure-ai-voicelive/azure/ai/voicelive/_version.py
index a73f358d285a..8b42750446d5 100644
--- a/sdk/voicelive/azure-ai-voicelive/azure/ai/voicelive/_version.py
+++ b/sdk/voicelive/azure-ai-voicelive/azure/ai/voicelive/_version.py
@@ -6,4 +6,4 @@
 # Changes may cause incorrect behavior and will be lost if the code is regenerated.
 # --------------------------------------------------------------------------
 
-VERSION = "1.2.0"
+VERSION = "1.3.0b1"
diff --git a/sdk/voicelive/azure-ai-voicelive/azure/ai/voicelive/aio/_patch.py b/sdk/voicelive/azure-ai-voicelive/azure/ai/voicelive/aio/_patch.py
index 5369ffe1eb1d..889c4a0d235c 100644
--- a/sdk/voicelive/azure-ai-voicelive/azure/ai/voicelive/aio/_patch.py
+++ b/sdk/voicelive/azure-ai-voicelive/azure/ai/voicelive/aio/_patch.py
@@ -1,4 +1,4 @@
-# pylint: disable=line-too-long,useless-suppression
+# pylint: disable=line-too-long,useless-suppression,too-many-lines
 # coding=utf-8
 # --------------------------------------------------------------------------
 # Copyright (c) Microsoft Corporation.
@@ -115,7 +115,9 @@ def _build_foundry_agent_config(
         return None
 
     if agent_name is None or project_name is None:
-        raise ValueError("Both 'agent_name' and 'project_name' are required when connecting to an Azure AI Foundry agent.")
+        raise ValueError(
+            "Both 'agent_name' and 'project_name' are required when connecting to an Azure AI Foundry agent."
+        )
 
     return {key: value for key, value in agent_config.items() if value is not None}
 
@@ -877,8 +879,7 @@ def connect(
     headers: Optional[Mapping[str, Any]] = None,
     connection_options: Optional[WebsocketConnectionOptions] = None,
     credential_scopes: Optional[Union[str, Sequence[str]]] = None,
-) -> AbstractAsyncContextManager["VoiceLiveConnection"]:
-    ...
+) -> AbstractAsyncContextManager["VoiceLiveConnection"]: ...
 
 
 @overload
@@ -898,8 +899,7 @@ def connect(
     headers: Optional[Mapping[str, Any]] = None,
     connection_options: Optional[WebsocketConnectionOptions] = None,
     credential_scopes: Optional[Union[str, Sequence[str]]] = None,
-) -> AbstractAsyncContextManager["VoiceLiveConnection"]:
-    ...
+) -> AbstractAsyncContextManager["VoiceLiveConnection"]: ...
 
 
 def connect(
diff --git a/sdk/voicelive/azure-ai-voicelive/azure/ai/voicelive/models/__init__.py b/sdk/voicelive/azure-ai-voicelive/azure/ai/voicelive/models/__init__.py
index 9b2114496b5e..55c3a8acb5cf 100644
--- a/sdk/voicelive/azure-ai-voicelive/azure/ai/voicelive/models/__init__.py
+++ b/sdk/voicelive/azure-ai-voicelive/azure/ai/voicelive/models/__init__.py
@@ -28,6 +28,7 @@
     AzureAvatarVoiceSyncVoice,
     AzureCustomVoice,
     AzurePersonalVoice,
+    AzureRealtimeNativeVoice,
     AzureSemanticDetection,
     AzureSemanticDetectionEn,
     AzureSemanticDetectionMultilingual,
@@ -54,6 +55,7 @@
     ClientEventOutputAudioBufferClear,
     ClientEventResponseCancel,
     ClientEventResponseCreate,
+    ClientEventRtcCallSdpCreate,
     ClientEventSessionAvatarConnect,
     ClientEventSessionUpdate,
     ContentPart,
@@ -103,6 +105,7 @@
     ResponseStatusDetails,
     ResponseTextContentPart,
     ResponseWebSearchCallItem,
+    RtcCallErrorDetails,
     Scene,
     ServerEvent,
     ServerEventConversationItemCreated,
@@ -122,6 +125,8 @@
     ServerEventMcpListToolsFailed,
     ServerEventMcpListToolsInProgress,
     ServerEventOutputAudioBufferCleared,
+    ServerEventOutputAudioBufferStarted,
+    ServerEventOutputAudioBufferStopped,
     ServerEventResponseAnimationBlendshapeDelta,
     ServerEventResponseAnimationBlendshapeDone,
     ServerEventResponseAnimationVisemeDelta,
@@ -142,6 +147,7 @@
     ServerEventResponseFileSearchCallSearching,
     ServerEventResponseFunctionCallArgumentsDelta,
     ServerEventResponseFunctionCallArgumentsDone,
+    ServerEventResponseInvocationDelta,
     ServerEventResponseMcpCallArgumentsDelta,
     ServerEventResponseMcpCallArgumentsDone,
     ServerEventResponseMcpCallCompleted,
@@ -155,6 +161,8 @@
     ServerEventResponseWebSearchCallCompleted,
     ServerEventResponseWebSearchCallInProgress,
     ServerEventResponseWebSearchCallSearching,
+    ServerEventRtcCallError,
+    ServerEventRtcCallSdpCreated,
     ServerEventSessionAvatarConnecting,
     ServerEventSessionAvatarSwitchToIdle,
     ServerEventSessionAvatarSwitchToSpeaking,
@@ -185,9 +193,11 @@
     AudioTimestampType,
     AvatarConfigTypes,
     AvatarOutputProtocol,
+    AzureRealtimeNativeVoiceName,
     AzureVoiceType,
     ClientEventType,
     ContentPartType,
+    EchoCancellationReferenceSource,
     EouThresholdLevel,
     InputAudioFormat,
     InterimResponseConfigType,
@@ -230,6 +240,7 @@
     "AzureAvatarVoiceSyncVoice",
     "AzureCustomVoice",
     "AzurePersonalVoice",
+    "AzureRealtimeNativeVoice",
     "AzureSemanticDetection",
     "AzureSemanticDetectionEn",
     "AzureSemanticDetectionMultilingual",
@@ -256,6 +267,7 @@
     "ClientEventOutputAudioBufferClear",
     "ClientEventResponseCancel",
     "ClientEventResponseCreate",
+    "ClientEventRtcCallSdpCreate",
     "ClientEventSessionAvatarConnect",
     "ClientEventSessionUpdate",
     "ContentPart",
@@ -305,6 +317,7 @@
     "ResponseStatusDetails",
     "ResponseTextContentPart",
     "ResponseWebSearchCallItem",
+    "RtcCallErrorDetails",
     "Scene",
     "ServerEvent",
     "ServerEventConversationItemCreated",
@@ -324,6 +337,8 @@
     "ServerEventMcpListToolsFailed",
     "ServerEventMcpListToolsInProgress",
     "ServerEventOutputAudioBufferCleared",
+    "ServerEventOutputAudioBufferStarted",
+    "ServerEventOutputAudioBufferStopped",
     "ServerEventResponseAnimationBlendshapeDelta",
     "ServerEventResponseAnimationBlendshapeDone",
     "ServerEventResponseAnimationVisemeDelta",
@@ -344,6 +359,7 @@
     "ServerEventResponseFileSearchCallSearching",
     "ServerEventResponseFunctionCallArgumentsDelta",
     "ServerEventResponseFunctionCallArgumentsDone",
+    "ServerEventResponseInvocationDelta",
     "ServerEventResponseMcpCallArgumentsDelta",
     "ServerEventResponseMcpCallArgumentsDone",
     "ServerEventResponseMcpCallCompleted",
@@ -357,6 +373,8 @@
     "ServerEventResponseWebSearchCallCompleted",
     "ServerEventResponseWebSearchCallInProgress",
     "ServerEventResponseWebSearchCallSearching",
+    "ServerEventRtcCallError",
+    "ServerEventRtcCallSdpCreated",
     "ServerEventSessionAvatarConnecting",
     "ServerEventSessionAvatarSwitchToIdle",
     "ServerEventSessionAvatarSwitchToSpeaking",
@@ -384,9 +402,11 @@
     "AudioTimestampType",
     "AvatarConfigTypes",
     "AvatarOutputProtocol",
+    "AzureRealtimeNativeVoiceName",
     "AzureVoiceType",
     "ClientEventType",
     "ContentPartType",
+    "EchoCancellationReferenceSource",
     "EouThresholdLevel",
     "InputAudioFormat",
     "InterimResponseConfigType",
diff --git a/sdk/voicelive/azure-ai-voicelive/azure/ai/voicelive/models/_enums.py b/sdk/voicelive/azure-ai-voicelive/azure/ai/voicelive/models/_enums.py
index b01b33321a0b..e21cd92d45fe 100644
--- a/sdk/voicelive/azure-ai-voicelive/azure/ai/voicelive/models/_enums.py
+++ b/sdk/voicelive/azure-ai-voicelive/azure/ai/voicelive/models/_enums.py
@@ -44,6 +44,37 @@ class AvatarOutputProtocol(str, Enum, metaclass=CaseInsensitiveEnumMeta):
     """WebSocket protocol, output the video frames over WebSocket."""
 
 
+class AzureRealtimeNativeVoiceName(str, Enum, metaclass=CaseInsensitiveEnumMeta):
+    """Currently known voice names for the Azure realtime native voice type. This is an extensible
+    enum; additional voice names may be accepted by the service in the future.
+    """
+
+    AARTI = "aarti"
+    """Aarti voice."""
+    ANDREW = "andrew"
+    """Andrew voice."""
+    AVA = "ava"
+    """Ava voice."""
+    DENISE = "denise"
+    """Denise voice."""
+    DIYA = "diya"
+    """Diya voice."""
+    ELSA = "elsa"
+    """Elsa voice."""
+    FLORIAN = "florian"
+    """Florian voice."""
+    FRANCISCA = "francisca"
+    """Francisca voice."""
+    MEERA = "meera"
+    """Meera voice."""
+    XIAOXIAO = "xiaoxiao"
+    """Xiaoxiao voice."""
+    YUNXI = "yunxi"
+    """Yunxi voice."""
+    XIMENA = "ximena"
+    """Ximena voice."""
+
+
 class AzureVoiceType(str, Enum, metaclass=CaseInsensitiveEnumMeta):
     """Union of all supported Azure voice types."""
 
@@ -96,6 +127,8 @@ class ClientEventType(str, Enum, metaclass=CaseInsensitiveEnumMeta):
     """MCP_APPROVAL_RESPONSE."""
     OUTPUT_AUDIO_BUFFER_CLEAR = "output_audio_buffer.clear"
     """Client request to clear the avatar output buffer."""
+    RTC_CALL_SDP_CREATE = "rtc.call.sdp.create"
+    """Sent by the client to initiate a WebRTC session with an SDP offer."""
 
 
 class ContentPartType(str, Enum, metaclass=CaseInsensitiveEnumMeta):
@@ -113,6 +146,15 @@ class ContentPartType(str, Enum, metaclass=CaseInsensitiveEnumMeta):
     """AUDIO."""
 
 
+class EchoCancellationReferenceSource(str, Enum, metaclass=CaseInsensitiveEnumMeta):
+    """The source of the echo cancellation reference signal."""
+
+    SERVER = "server"
+    """EC uses the internal TTS loopback as the reference signal."""
+    CLIENT = "client"
+    """EC uses the client-supplied reference channel from the stereo input stream."""
+
+
 class EouThresholdLevel(str, Enum, metaclass=CaseInsensitiveEnumMeta):
     """Threshold level settings for Azure semantic end-of-utterance detection."""
 
@@ -466,6 +508,16 @@ class ServerEventType(str, Enum, metaclass=CaseInsensitiveEnumMeta):
     """Output audio buffer has been cleared."""
     RESPONSE_AUDIO_TRANSCRIPT_ANNOTATION_ADDED = "response.audio_transcript.annotation.added"
     """Audio transcript annotation added."""
+    RESPONSE_INVOCATION_DELTA = "response.invocation.delta"
+    """Invocation passthrough delta from hosted agent."""
+    RTC_CALL_SDP_CREATED = "rtc.call.sdp.created"
+    """Returned when the WebRTC SDP negotiation completes successfully."""
+    RTC_CALL_ERROR = "rtc.call.error"
+    """Returned when a WebRTC call operation fails."""
+    OUTPUT_AUDIO_BUFFER_STARTED = "output_audio_buffer.started"
+    """Output audio buffer playback started."""
+    OUTPUT_AUDIO_BUFFER_STOPPED = "output_audio_buffer.stopped"
+    """Output audio buffer playback stopped."""
 
 
 class SessionIncludeOption(str, Enum, metaclass=CaseInsensitiveEnumMeta):
diff --git a/sdk/voicelive/azure-ai-voicelive/azure/ai/voicelive/models/_models.py b/sdk/voicelive/azure-ai-voicelive/azure/ai/voicelive/models/_models.py
index 94d77d7bac95..3accdd4f4906 100644
--- a/sdk/voicelive/azure-ai-voicelive/azure/ai/voicelive/models/_models.py
+++ b/sdk/voicelive/azure-ai-voicelive/azure/ai/voicelive/models/_models.py
@@ -397,11 +397,58 @@ class AudioEchoCancellation(_Model):
     :ivar type: The type of echo cancellation model to use. Required. Default value is
      "server_echo_cancellation".
     :vartype type: str
+    :ivar reference_source: The source of the echo cancellation reference signal.
+
+     * `server`: EC uses the internal TTS loopback as the reference signal (default, existing
+       behavior).
+     * `client`: EC uses the client-supplied reference channel (ch1 of stereo input). Internal
+       TTS loopback is skipped. Known values are: "server" and "client".
+    :vartype reference_source: str or ~azure.ai.voicelive.models.EchoCancellationReferenceSource
+    :ivar channels: Number of input audio channels.
+
+     * `1`: Mono input (default).
+     * `2`: Interleaved stereo input where channel 0 is the microphone signal and channel 1 is
+       the echo reference signal.
+     When set to 2, `reference_source` must be `client` and `input_audio_format` must be
+     `pcm16`.
+    :vartype channels: int
     """
 
     type: Literal["server_echo_cancellation"] = rest_field(visibility=["read", "create", "update", "delete", "query"])
     """The type of echo cancellation model to use. Required. Default value is
      \"server_echo_cancellation\"."""
+    reference_source: Optional[Union[str, "_models.EchoCancellationReferenceSource"]] = rest_field(
+        visibility=["read", "create", "update", "delete", "query"]
+    )
+    """The source of the echo cancellation reference signal.
+ 
+      * `server`: EC uses the internal TTS loopback as the reference signal (default, existing
+        behavior).
+      * `client`: EC uses the client-supplied reference channel (ch1 of stereo input). Internal
+        TTS loopback is skipped. Known values are: \"server\" and \"client\"."""
+    channels: Optional[int] = rest_field(visibility=["read", "create", "update", "delete", "query"])
+    """Number of input audio channels.
+ 
+      * `1`: Mono input (default).
+      * `2`: Interleaved stereo input where channel 0 is the microphone signal and channel 1 is
+        the echo reference signal.
+      When set to 2, `reference_source` must be `client` and `input_audio_format` must be
+      `pcm16`."""
+
+    @overload
+    def __init__(
+        self,
+        *,
+        reference_source: Optional[Union[str, "_models.EchoCancellationReferenceSource"]] = None,
+        channels: Optional[int] = None,
+    ) -> None: ...
+
+    @overload
+    def __init__(self, mapping: Mapping[str, Any]) -> None:
+        """
+        :param mapping: raw JSON to initialize the model.
+        :type mapping: Mapping[str, Any]
+        """
 
     def __init__(self, *args: Any, **kwargs: Any) -> None:
         super().__init__(*args, **kwargs)
@@ -1047,6 +1094,47 @@ def __init__(self, *args: Any, **kwargs: Any) -> None:
         self.type = AzureVoiceType.AZURE_PERSONAL  # type: ignore
 
 
+class AzureRealtimeNativeVoice(_Model):
+    """Azure realtime native voice configuration. These voices are natively supported by the
+    ``azure-realtime`` model and offer higher quality speech synthesis than standard Azure voices.
+    Only valid when using the ``azure-realtime`` model.
+
+    :ivar type: The type of the voice. Required. Default value is "azure-realtime-native".
+    :vartype type: str
+    :ivar name: The name of the Azure realtime native voice. Required. Known values are: "aarti",
+     "andrew", "ava", "denise", "diya", "elsa", "florian", "francisca", "meera", "xiaoxiao",
+     "yunxi", and "ximena".
+    :vartype name: str or ~azure.ai.voicelive.models.AzureRealtimeNativeVoiceName
+    """
+
+    type: Literal["azure-realtime-native"] = rest_field(visibility=["read", "create", "update", "delete", "query"])
+    """The type of the voice. Required. Default value is \"azure-realtime-native\"."""
+    name: Union[str, "_models.AzureRealtimeNativeVoiceName"] = rest_field(
+        visibility=["read", "create", "update", "delete", "query"]
+    )
+    """The name of the Azure realtime native voice. Required. Known values are: \"aarti\", \"andrew\",
+     \"ava\", \"denise\", \"diya\", \"elsa\", \"florian\", \"francisca\", \"meera\", \"xiaoxiao\",
+     \"yunxi\", and \"ximena\"."""
+
+    @overload
+    def __init__(
+        self,
+        *,
+        name: Union[str, "_models.AzureRealtimeNativeVoiceName"],
+    ) -> None: ...
+
+    @overload
+    def __init__(self, mapping: Mapping[str, Any]) -> None:
+        """
+        :param mapping: raw JSON to initialize the model.
+        :type mapping: Mapping[str, Any]
+        """
+
+    def __init__(self, *args: Any, **kwargs: Any) -> None:
+        super().__init__(*args, **kwargs)
+        self.type: Literal["azure-realtime-native"] = "azure-realtime-native"
+
+
 class EouDetection(_Model):
     """Top-level union for end-of-utterance (EOU) semantic detection configuration.
 
@@ -1703,16 +1791,16 @@ class ClientEvent(_Model):
     ClientEventInputAudioTurnEnd, ClientEventInputAudioTurnStart,
     ClientEventInputAudioBufferAppend, ClientEventInputAudioBufferClear,
     ClientEventInputAudioBufferCommit, ClientEventOutputAudioBufferClear,
-    ClientEventResponseCancel, ClientEventResponseCreate, ClientEventSessionAvatarConnect,
-    ClientEventSessionUpdate
+    ClientEventResponseCancel, ClientEventResponseCreate, ClientEventRtcCallSdpCreate,
+    ClientEventSessionAvatarConnect, ClientEventSessionUpdate
 
     :ivar type: The type of event. Required. Known values are: "session.update",
      "input_audio_buffer.append", "input_audio_buffer.commit", "input_audio_buffer.clear",
      "input_audio.turn.start", "input_audio.turn.append", "input_audio.turn.end",
      "input_audio.turn.cancel", "input_audio.clear", "conversation.item.create",
      "conversation.item.retrieve", "conversation.item.truncate", "conversation.item.delete",
-     "response.create", "response.cancel", "session.avatar.connect", "mcp_approval_response", and
-     "output_audio_buffer.clear".
+     "response.create", "response.cancel", "session.avatar.connect", "mcp_approval_response",
+     "output_audio_buffer.clear", and "rtc.call.sdp.create".
     :vartype type: str or ~azure.ai.voicelive.models.ClientEventType
     :ivar event_id:
     :vartype event_id: str
@@ -1726,7 +1814,7 @@ class ClientEvent(_Model):
      \"input_audio.turn.cancel\", \"input_audio.clear\", \"conversation.item.create\",
      \"conversation.item.retrieve\", \"conversation.item.truncate\", \"conversation.item.delete\",
      \"response.create\", \"response.cancel\", \"session.avatar.connect\",
-     \"mcp_approval_response\", and \"output_audio_buffer.clear\"."""
+     \"mcp_approval_response\", \"output_audio_buffer.clear\", and \"rtc.call.sdp.create\"."""
     event_id: Optional[str] = rest_field(visibility=["read", "create", "update", "delete", "query"])
 
     @overload
@@ -2364,6 +2452,50 @@ def __init__(self, *args: Any, **kwargs: Any) -> None:
         self.type = ClientEventType.RESPONSE_CREATE  # type: ignore
 
 
+class ClientEventRtcCallSdpCreate(ClientEvent, discriminator="rtc.call.sdp.create"):
+    """Sent by the client to initiate a WebRTC session with an SDP offer.
+
+    :ivar event_id:
+    :vartype event_id: str
+    :ivar type: The event type, must be ``rtc.call.sdp.create``. Required. Sent by the client to
+     initiate a WebRTC session with an SDP offer.
+    :vartype type: str or ~azure.ai.voicelive.models.RTC_CALL_SDP_CREATE
+    :ivar sdp_offer: The SDP offer from the client for WebRTC negotiation. Required.
+    :vartype sdp_offer: str
+    :ivar session: Optional initial session configuration. If provided, applied before the session
+     is established.
+    :vartype session: ~azure.ai.voicelive.models.RequestSession
+    """
+
+    type: Literal[ClientEventType.RTC_CALL_SDP_CREATE] = rest_discriminator(name="type", visibility=["read", "create", "update", "delete", "query"])  # type: ignore
+    """The event type, must be ``rtc.call.sdp.create``. Required. Sent by the client to initiate a
+     WebRTC session with an SDP offer."""
+    sdp_offer: str = rest_field(visibility=["read", "create", "update", "delete", "query"])
+    """The SDP offer from the client for WebRTC negotiation. Required."""
+    session: Optional["_models.RequestSession"] = rest_field(visibility=["read", "create", "update", "delete", "query"])
+    """Optional initial session configuration. If provided, applied before the session is established."""
+
+    @overload
+    def __init__(
+        self,
+        *,
+        sdp_offer: str,
+        event_id: Optional[str] = None,
+        session: Optional["_models.RequestSession"] = None,
+    ) -> None: ...
+
+    @overload
+    def __init__(self, mapping: Mapping[str, Any]) -> None:
+        """
+        :param mapping: raw JSON to initialize the model.
+        :type mapping: Mapping[str, Any]
+        """
+
+    def __init__(self, *args: Any, **kwargs: Any) -> None:
+        super().__init__(*args, **kwargs)
+        self.type = ClientEventType.RTC_CALL_SDP_CREATE  # type: ignore
+
+
 class ClientEventSessionAvatarConnect(ClientEvent, discriminator="session.avatar.connect"):
     """Sent when the client connects and provides its SDP (Session Description Protocol)
 
@@ -3411,9 +3543,10 @@ class RequestSession(_Model):
     :ivar animation: The animation configuration for the session.
     :vartype animation: ~azure.ai.voicelive.models.Animation
     :ivar voice: The voice configuration for the session. Is one of the following types: Union[str,
-     "_models.OpenAIVoiceName"], OpenAIVoice, AzureVoice
+     "_models.OpenAIVoiceName"], OpenAIVoice, AzureVoice, AzureRealtimeNativeVoice
     :vartype voice: str or ~azure.ai.voicelive.models.OpenAIVoiceName or
-     ~azure.ai.voicelive.models.OpenAIVoice or ~azure.ai.voicelive.models.AzureVoice
+     ~azure.ai.voicelive.models.OpenAIVoice or ~azure.ai.voicelive.models.AzureVoice or
+     ~azure.ai.voicelive.models.AzureRealtimeNativeVoice
     :ivar instructions: Optional instructions to guide the model's behavior throughout the session.
     :vartype instructions: str
     :ivar input_audio_sampling_rate: Input audio sampling rate in Hz. Available values:
@@ -3484,7 +3617,7 @@ class RequestSession(_Model):
     """The animation configuration for the session."""
     voice: Optional["_types.Voice"] = rest_field(visibility=["read", "create", "update", "delete", "query"])
     """The voice configuration for the session. Is one of the following types: Union[str,
-     \"_models.OpenAIVoiceName\"], OpenAIVoice, AzureVoice"""
+     \"_models.OpenAIVoiceName\"], OpenAIVoice, AzureVoice, AzureRealtimeNativeVoice"""
     instructions: Optional[str] = rest_field(visibility=["read", "create", "update", "delete", "query"])
     """Optional instructions to guide the model's behavior throughout the session."""
     input_audio_sampling_rate: Optional[int] = rest_field(visibility=["read", "create", "update", "delete", "query"])
@@ -3661,9 +3794,10 @@ class Response(_Model):
      like ``conv_1234``.
     :vartype conversation_id: str
     :ivar voice: supported voice identifiers and configurations. Is one of the following types:
-     Union[str, "_models.OpenAIVoiceName"], OpenAIVoice, AzureVoice
+     Union[str, "_models.OpenAIVoiceName"], OpenAIVoice, AzureVoice, AzureRealtimeNativeVoice
     :vartype voice: str or ~azure.ai.voicelive.models.OpenAIVoiceName or
-     ~azure.ai.voicelive.models.OpenAIVoice or ~azure.ai.voicelive.models.AzureVoice
+     ~azure.ai.voicelive.models.OpenAIVoice or ~azure.ai.voicelive.models.AzureVoice or
+     ~azure.ai.voicelive.models.AzureRealtimeNativeVoice
     :ivar modalities: The set of modalities the model used to respond. If there are multiple
      modalities, the model will pick one, for example if ``modalities`` is ``["text", "audio"]``,
      the model could be responding in either text or audio.
@@ -3718,7 +3852,7 @@ class Response(_Model):
      default conversation, thus the ``conversation_id`` will be an id like ``conv_1234``."""
     voice: Optional["_types.Voice"] = rest_field(visibility=["read", "create", "update", "delete", "query"])
     """supported voice identifiers and configurations. Is one of the following types: Union[str,
-     \"_models.OpenAIVoiceName\"], OpenAIVoice, AzureVoice"""
+     \"_models.OpenAIVoiceName\"], OpenAIVoice, AzureVoice, AzureRealtimeNativeVoice"""
     modalities: Optional[list[Union[str, "_models.Modality"]]] = rest_field(
         visibility=["read", "create", "update", "delete", "query"]
     )
@@ -3906,9 +4040,10 @@ class ResponseCreateParams(_Model):
      start of the session.
     :vartype instructions: str
     :ivar voice: supported voice identifiers and configurations. Is one of the following types:
-     Union[str, "_models.OpenAIVoiceName"], OpenAIVoice, AzureVoice
+     Union[str, "_models.OpenAIVoiceName"], OpenAIVoice, AzureVoice, AzureRealtimeNativeVoice
     :vartype voice: str or ~azure.ai.voicelive.models.OpenAIVoiceName or
-     ~azure.ai.voicelive.models.OpenAIVoice or ~azure.ai.voicelive.models.AzureVoice
+     ~azure.ai.voicelive.models.OpenAIVoice or ~azure.ai.voicelive.models.AzureVoice or
+     ~azure.ai.voicelive.models.AzureRealtimeNativeVoice
     :ivar output_audio_format: The format of output audio. Options are ``pcm16``, ``g711_ulaw``, or
      ``g711_alaw``. Known values are: "pcm16", "pcm16_8000hz", "pcm16_16000hz", "g711_ulaw", and
      "g711_alaw".
@@ -3942,6 +4077,8 @@ class ResponseCreateParams(_Model):
      calls. Is either a StaticInterimResponseConfig type or a LlmInterimResponseConfig type.
     :vartype interim_response: ~azure.ai.voicelive.models.StaticInterimResponseConfig or
      ~azure.ai.voicelive.models.LlmInterimResponseConfig
+    :ivar invoke_input: Input data to invoke the hosted agent. This feature is in preview.
+    :vartype invoke_input: dict[str, any]
     """
 
     commit: Optional[bool] = rest_field(visibility=["read", "create", "update", "delete", "query"])
@@ -3976,7 +4113,7 @@ class ResponseCreateParams(_Model):
      start of the session."""
     voice: Optional["_types.Voice"] = rest_field(visibility=["read", "create", "update", "delete", "query"])
     """supported voice identifiers and configurations. Is one of the following types: Union[str,
-     \"_models.OpenAIVoiceName\"], OpenAIVoice, AzureVoice"""
+     \"_models.OpenAIVoiceName\"], OpenAIVoice, AzureVoice, AzureRealtimeNativeVoice"""
     output_audio_format: Optional[Union[str, "_models.OutputAudioFormat"]] = rest_field(
         visibility=["read", "create", "update", "delete", "query"]
     )
@@ -4017,6 +4154,8 @@ class ResponseCreateParams(_Model):
     )
     """Configuration for interim response generation during latency or tool calls. Is either a
      StaticInterimResponseConfig type or a LlmInterimResponseConfig type."""
+    invoke_input: Optional[dict[str, Any]] = rest_field(visibility=["read", "create", "update", "delete", "query"])
+    """Input data to invoke the hosted agent. This feature is in preview."""
 
     @overload
     def __init__(
@@ -4038,6 +4177,7 @@ def __init__(
         reasoning_effort: Optional[Union[str, "_models.ReasoningEffort"]] = None,
         metadata: Optional[dict[str, str]] = None,
         interim_response: Optional["_types.InterimResponseConfig"] = None,
+        invoke_input: Optional[dict[str, Any]] = None,
     ) -> None: ...
 
     @overload
@@ -4604,9 +4744,10 @@ class ResponseSession(_Model):
     :ivar animation: The animation configuration for the session.
     :vartype animation: ~azure.ai.voicelive.models.Animation
     :ivar voice: The voice configuration for the session. Is one of the following types: Union[str,
-     "_models.OpenAIVoiceName"], OpenAIVoice, AzureVoice
+     "_models.OpenAIVoiceName"], OpenAIVoice, AzureVoice, AzureRealtimeNativeVoice
     :vartype voice: str or ~azure.ai.voicelive.models.OpenAIVoiceName or
-     ~azure.ai.voicelive.models.OpenAIVoice or ~azure.ai.voicelive.models.AzureVoice
+     ~azure.ai.voicelive.models.OpenAIVoice or ~azure.ai.voicelive.models.AzureVoice or
+     ~azure.ai.voicelive.models.AzureRealtimeNativeVoice
     :ivar instructions: Optional instructions to guide the model's behavior throughout the session.
     :vartype instructions: str
     :ivar input_audio_sampling_rate: Input audio sampling rate in Hz. Available values:
@@ -4681,7 +4822,7 @@ class ResponseSession(_Model):
     """The animation configuration for the session."""
     voice: Optional["_types.Voice"] = rest_field(visibility=["read", "create", "update", "delete", "query"])
     """The voice configuration for the session. Is one of the following types: Union[str,
-     \"_models.OpenAIVoiceName\"], OpenAIVoice, AzureVoice"""
+     \"_models.OpenAIVoiceName\"], OpenAIVoice, AzureVoice, AzureRealtimeNativeVoice"""
     instructions: Optional[str] = rest_field(visibility=["read", "create", "update", "delete", "query"])
     """Optional instructions to guide the model's behavior throughout the session."""
     input_audio_sampling_rate: Optional[int] = rest_field(visibility=["read", "create", "update", "delete", "query"])
@@ -4878,6 +5019,44 @@ def __init__(self, *args: Any, **kwargs: Any) -> None:
         self.type = ItemType.WEB_SEARCH_CALL  # type: ignore
 
 
+class RtcCallErrorDetails(_Model):
+    """Error details for RTC call errors.
+
+    :ivar type: The error category: ``invalid_request_error`` or ``server_error``. Required.
+    :vartype type: str
+    :ivar code: A machine-readable error code.
+    :vartype code: str
+    :ivar message: A human-readable error description. Required.
+    :vartype message: str
+    """
+
+    type: str = rest_field(visibility=["read", "create", "update", "delete", "query"])
+    """The error category: ``invalid_request_error`` or ``server_error``. Required."""
+    code: Optional[str] = rest_field(visibility=["read", "create", "update", "delete", "query"])
+    """A machine-readable error code."""
+    message: str = rest_field(visibility=["read", "create", "update", "delete", "query"])
+    """A human-readable error description. Required."""
+
+    @overload
+    def __init__(
+        self,
+        *,
+        type: str,
+        message: str,
+        code: Optional[str] = None,
+    ) -> None: ...
+
+    @overload
+    def __init__(self, mapping: Mapping[str, Any]) -> None:
+        """
+        :param mapping: raw JSON to initialize the model.
+        :type mapping: Mapping[str, Any]
+        """
+
+    def __init__(self, *args: Any, **kwargs: Any) -> None:
+        super().__init__(*args, **kwargs)
+
+
 class Scene(_Model):
     """Configuration for avatar's zoom level, position, rotation and movement amplitude in the video
     frame.
@@ -4963,7 +5142,8 @@ class ServerEvent(_Model):
     ServerEventInputAudioBufferCommitted, ServerEventInputAudioBufferSpeechStarted,
     ServerEventInputAudioBufferSpeechStopped, ServerEventMcpListToolsCompleted,
     ServerEventMcpListToolsFailed, ServerEventMcpListToolsInProgress,
-    ServerEventOutputAudioBufferCleared, ServerEventResponseAnimationBlendshapeDelta,
+    ServerEventOutputAudioBufferCleared, ServerEventOutputAudioBufferStarted,
+    ServerEventOutputAudioBufferStopped, ServerEventResponseAnimationBlendshapeDelta,
     ServerEventResponseAnimationBlendshapeDone, ServerEventResponseAnimationVisemeDelta,
     ServerEventResponseAnimationVisemeDone, ServerEventResponseAudioDelta,
     ServerEventResponseAudioDone, ServerEventResponseAudioTimestampDelta,
@@ -4973,13 +5153,14 @@ class ServerEvent(_Model):
     ServerEventResponseCreated, ServerEventResponseDone,
     ServerEventResponseFileSearchCallCompleted, ServerEventResponseFileSearchCallInProgress,
     ServerEventResponseFileSearchCallSearching, ServerEventResponseFunctionCallArgumentsDelta,
-    ServerEventResponseFunctionCallArgumentsDone, ServerEventResponseMcpCallCompleted,
-    ServerEventResponseMcpCallFailed, ServerEventResponseMcpCallInProgress,
-    ServerEventResponseMcpCallArgumentsDelta, ServerEventResponseMcpCallArgumentsDone,
-    ServerEventResponseOutputItemAdded, ServerEventResponseOutputItemDone,
-    ServerEventResponseTextDelta, ServerEventResponseTextDone, ServerEventResponseVideoDelta,
-    ServerEventResponseWebSearchCallCompleted, ServerEventResponseWebSearchCallInProgress,
-    ServerEventResponseWebSearchCallSearching, ServerEventSessionAvatarConnecting,
+    ServerEventResponseFunctionCallArgumentsDone, ServerEventResponseInvocationDelta,
+    ServerEventResponseMcpCallCompleted, ServerEventResponseMcpCallFailed,
+    ServerEventResponseMcpCallInProgress, ServerEventResponseMcpCallArgumentsDelta,
+    ServerEventResponseMcpCallArgumentsDone, ServerEventResponseOutputItemAdded,
+    ServerEventResponseOutputItemDone, ServerEventResponseTextDelta, ServerEventResponseTextDone,
+    ServerEventResponseVideoDelta, ServerEventResponseWebSearchCallCompleted,
+    ServerEventResponseWebSearchCallInProgress, ServerEventResponseWebSearchCallSearching,
+    ServerEventRtcCallError, ServerEventRtcCallSdpCreated, ServerEventSessionAvatarConnecting,
     ServerEventSessionAvatarSwitchToIdle, ServerEventSessionAvatarSwitchToSpeaking,
     ServerEventSessionCreated, ServerEventSessionUpdated, ServerEventWarning
 
@@ -5005,8 +5186,10 @@ class ServerEvent(_Model):
      "session.avatar.switch_to_idle", "response.video.delta", "response.web_search_call.searching",
      "response.web_search_call.in_progress", "response.web_search_call.completed",
      "response.file_search_call.searching", "response.file_search_call.in_progress",
-     "response.file_search_call.completed", "output_audio_buffer.cleared", and
-     "response.audio_transcript.annotation.added".
+     "response.file_search_call.completed", "output_audio_buffer.cleared",
+     "response.audio_transcript.annotation.added", "response.invocation.delta",
+     "rtc.call.sdp.created", "rtc.call.error", "output_audio_buffer.started", and
+     "output_audio_buffer.stopped".
     :vartype type: str or ~azure.ai.voicelive.models.ServerEventType
     :ivar event_id:
     :vartype event_id: str
@@ -5038,8 +5221,10 @@ class ServerEvent(_Model):
      \"response.video.delta\", \"response.web_search_call.searching\",
      \"response.web_search_call.in_progress\", \"response.web_search_call.completed\",
      \"response.file_search_call.searching\", \"response.file_search_call.in_progress\",
-     \"response.file_search_call.completed\", \"output_audio_buffer.cleared\", and
-     \"response.audio_transcript.annotation.added\"."""
+     \"response.file_search_call.completed\", \"output_audio_buffer.cleared\",
+     \"response.audio_transcript.annotation.added\", \"response.invocation.delta\",
+     \"rtc.call.sdp.created\", \"rtc.call.error\", \"output_audio_buffer.started\", and
+     \"output_audio_buffer.stopped\"."""
     event_id: Optional[str] = rest_field(visibility=["read", "create", "update", "delete", "query"])
 
     @overload
@@ -5832,6 +6017,82 @@ def __init__(self, *args: Any, **kwargs: Any) -> None:
         self.type = ServerEventType.OUTPUT_AUDIO_BUFFER_CLEARED  # type: ignore
 
 
+class ServerEventOutputAudioBufferStarted(ServerEvent, discriminator="output_audio_buffer.started"):
+    """Returned when model audio output starts playing.
+
+    :ivar event_id:
+    :vartype event_id: str
+    :ivar type: The event type, must be ``output_audio_buffer.started``. Required. Output audio
+     buffer playback started.
+    :vartype type: str or ~azure.ai.voicelive.models.OUTPUT_AUDIO_BUFFER_STARTED
+    :ivar response_id: The ID of the response whose audio started playing.
+    :vartype response_id: str
+    """
+
+    type: Literal[ServerEventType.OUTPUT_AUDIO_BUFFER_STARTED] = rest_discriminator(name="type", visibility=["read", "create", "update", "delete", "query"])  # type: ignore
+    """The event type, must be ``output_audio_buffer.started``. Required. Output audio buffer playback
+     started."""
+    response_id: Optional[str] = rest_field(visibility=["read", "create", "update", "delete", "query"])
+    """The ID of the response whose audio started playing."""
+
+    @overload
+    def __init__(
+        self,
+        *,
+        event_id: Optional[str] = None,
+        response_id: Optional[str] = None,
+    ) -> None: ...
+
+    @overload
+    def __init__(self, mapping: Mapping[str, Any]) -> None:
+        """
+        :param mapping: raw JSON to initialize the model.
+        :type mapping: Mapping[str, Any]
+        """
+
+    def __init__(self, *args: Any, **kwargs: Any) -> None:
+        super().__init__(*args, **kwargs)
+        self.type = ServerEventType.OUTPUT_AUDIO_BUFFER_STARTED  # type: ignore
+
+
+class ServerEventOutputAudioBufferStopped(ServerEvent, discriminator="output_audio_buffer.stopped"):
+    """Returned when model audio output finishes playing.
+
+    :ivar event_id:
+    :vartype event_id: str
+    :ivar type: The event type, must be ``output_audio_buffer.stopped``. Required. Output audio
+     buffer playback stopped.
+    :vartype type: str or ~azure.ai.voicelive.models.OUTPUT_AUDIO_BUFFER_STOPPED
+    :ivar response_id: The ID of the response whose audio stopped playing.
+    :vartype response_id: str
+    """
+
+    type: Literal[ServerEventType.OUTPUT_AUDIO_BUFFER_STOPPED] = rest_discriminator(name="type", visibility=["read", "create", "update", "delete", "query"])  # type: ignore
+    """The event type, must be ``output_audio_buffer.stopped``. Required. Output audio buffer playback
+     stopped."""
+    response_id: Optional[str] = rest_field(visibility=["read", "create", "update", "delete", "query"])
+    """The ID of the response whose audio stopped playing."""
+
+    @overload
+    def __init__(
+        self,
+        *,
+        event_id: Optional[str] = None,
+        response_id: Optional[str] = None,
+    ) -> None: ...
+
+    @overload
+    def __init__(self, mapping: Mapping[str, Any]) -> None:
+        """
+        :param mapping: raw JSON to initialize the model.
+        :type mapping: Mapping[str, Any]
+        """
+
+    def __init__(self, *args: Any, **kwargs: Any) -> None:
+        super().__init__(*args, **kwargs)
+        self.type = ServerEventType.OUTPUT_AUDIO_BUFFER_STOPPED  # type: ignore
+
+
 class ServerEventResponseAnimationBlendshapeDelta(
     ServerEvent, discriminator="response.animation_blendshapes.delta"
 ):  # pylint: disable=name-too-long
@@ -6950,6 +7211,44 @@ def __init__(self, *args: Any, **kwargs: Any) -> None:
         self.type = ServerEventType.RESPONSE_FUNCTION_CALL_ARGUMENTS_DONE  # type: ignore
 
 
+class ServerEventResponseInvocationDelta(ServerEvent, discriminator="response.invocation.delta"):
+    """Returned when a hosted agent invocation produces a non-speech SSE event, passed through as-is.
+
+    :ivar event_id:
+    :vartype event_id: str
+    :ivar type: The event type, must be ``response.invocation.delta``. Required. Invocation
+     passthrough delta from hosted agent.
+    :vartype type: str or ~azure.ai.voicelive.models.RESPONSE_INVOCATION_DELTA
+    :ivar delta: The raw event data from the hosted agent invocation. Required.
+    :vartype delta: dict[str, any]
+    """
+
+    type: Literal[ServerEventType.RESPONSE_INVOCATION_DELTA] = rest_discriminator(name="type", visibility=["read", "create", "update", "delete", "query"])  # type: ignore
+    """The event type, must be ``response.invocation.delta``. Required. Invocation passthrough delta
+     from hosted agent."""
+    delta: dict[str, Any] = rest_field(visibility=["read", "create", "update", "delete", "query"])
+    """The raw event data from the hosted agent invocation. Required."""
+
+    @overload
+    def __init__(
+        self,
+        *,
+        delta: dict[str, Any],
+        event_id: Optional[str] = None,
+    ) -> None: ...
+
+    @overload
+    def __init__(self, mapping: Mapping[str, Any]) -> None:
+        """
+        :param mapping: raw JSON to initialize the model.
+        :type mapping: Mapping[str, Any]
+        """
+
+    def __init__(self, *args: Any, **kwargs: Any) -> None:
+        super().__init__(*args, **kwargs)
+        self.type = ServerEventType.RESPONSE_INVOCATION_DELTA  # type: ignore
+
+
 class ServerEventResponseMcpCallArgumentsDelta(ServerEvent, discriminator="response.mcp_call_arguments.delta"):
     """Represents a delta update of the arguments for an MCP tool call.
 
@@ -7599,6 +7898,97 @@ def __init__(self, *args: Any, **kwargs: Any) -> None:
         self.type = ServerEventType.RESPONSE_WEB_SEARCH_CALL_SEARCHING  # type: ignore
 
 
+class ServerEventRtcCallError(ServerEvent, discriminator="rtc.call.error"):
+    """Returned when a WebRTC call operation fails.
+
+    :ivar event_id:
+    :vartype event_id: str
+    :ivar type: The event type, must be ``rtc.call.error``. Required. Returned when a WebRTC call
+     operation fails.
+    :vartype type: str or ~azure.ai.voicelive.models.RTC_CALL_ERROR
+    :ivar operation: The operation that caused the error (e.g., ``rtc.call.sdp.create``).
+    :vartype operation: str
+    :ivar rtc_call_id: The RTC call identifier, if available.
+    :vartype rtc_call_id: str
+    :ivar error: The error details. Required.
+    :vartype error: ~azure.ai.voicelive.models.RtcCallErrorDetails
+    """
+
+    type: Literal[ServerEventType.RTC_CALL_ERROR] = rest_discriminator(name="type", visibility=["read", "create", "update", "delete", "query"])  # type: ignore
+    """The event type, must be ``rtc.call.error``. Required. Returned when a WebRTC call operation
+     fails."""
+    operation: Optional[str] = rest_field(visibility=["read", "create", "update", "delete", "query"])
+    """The operation that caused the error (e.g., ``rtc.call.sdp.create``)."""
+    rtc_call_id: Optional[str] = rest_field(visibility=["read", "create", "update", "delete", "query"])
+    """The RTC call identifier, if available."""
+    error: "_models.RtcCallErrorDetails" = rest_field(visibility=["read", "create", "update", "delete", "query"])
+    """The error details. Required."""
+
+    @overload
+    def __init__(
+        self,
+        *,
+        error: "_models.RtcCallErrorDetails",
+        event_id: Optional[str] = None,
+        operation: Optional[str] = None,
+        rtc_call_id: Optional[str] = None,
+    ) -> None: ...
+
+    @overload
+    def __init__(self, mapping: Mapping[str, Any]) -> None:
+        """
+        :param mapping: raw JSON to initialize the model.
+        :type mapping: Mapping[str, Any]
+        """
+
+    def __init__(self, *args: Any, **kwargs: Any) -> None:
+        super().__init__(*args, **kwargs)
+        self.type = ServerEventType.RTC_CALL_ERROR  # type: ignore
+
+
+class ServerEventRtcCallSdpCreated(ServerEvent, discriminator="rtc.call.sdp.created"):
+    """Returned when the WebRTC SDP negotiation completes successfully.
+
+    :ivar event_id:
+    :vartype event_id: str
+    :ivar type: The event type, must be ``rtc.call.sdp.created``. Required. Returned when the
+     WebRTC SDP negotiation completes successfully.
+    :vartype type: str or ~azure.ai.voicelive.models.RTC_CALL_SDP_CREATED
+    :ivar rtc_call_id: The unique identifier for this RTC call session. Required.
+    :vartype rtc_call_id: str
+    :ivar sdp_answer: The SDP answer from the server for WebRTC negotiation. Required.
+    :vartype sdp_answer: str
+    """
+
+    type: Literal[ServerEventType.RTC_CALL_SDP_CREATED] = rest_discriminator(name="type", visibility=["read", "create", "update", "delete", "query"])  # type: ignore
+    """The event type, must be ``rtc.call.sdp.created``. Required. Returned when the WebRTC SDP
+     negotiation completes successfully."""
+    rtc_call_id: str = rest_field(visibility=["read", "create", "update", "delete", "query"])
+    """The unique identifier for this RTC call session. Required."""
+    sdp_answer: str = rest_field(visibility=["read", "create", "update", "delete", "query"])
+    """The SDP answer from the server for WebRTC negotiation. Required."""
+
+    @overload
+    def __init__(
+        self,
+        *,
+        rtc_call_id: str,
+        sdp_answer: str,
+        event_id: Optional[str] = None,
+    ) -> None: ...
+
+    @overload
+    def __init__(self, mapping: Mapping[str, Any]) -> None:
+        """
+        :param mapping: raw JSON to initialize the model.
+        :type mapping: Mapping[str, Any]
+        """
+
+    def __init__(self, *args: Any, **kwargs: Any) -> None:
+        super().__init__(*args, **kwargs)
+        self.type = ServerEventType.RTC_CALL_SDP_CREATED  # type: ignore
+
+
 class ServerEventSessionAvatarConnecting(ServerEvent, discriminator="session.avatar.connecting"):
     """Sent when the server is in the process of establishing an avatar media connection and provides
     its SDP answer.
diff --git a/sdk/voicelive/azure-ai-voicelive/pyproject.toml b/sdk/voicelive/azure-ai-voicelive/pyproject.toml
index 63dfecc7ca70..f966545977e7 100644
--- a/sdk/voicelive/azure-ai-voicelive/pyproject.toml
+++ b/sdk/voicelive/azure-ai-voicelive/pyproject.toml
@@ -17,7 +17,7 @@ authors = [
 description = "Microsoft Corporation Azure Ai Voicelive Client Library for Python"
 license = "MIT"
 classifiers = [
-    "Development Status :: 5 - Production/Stable",
+    "Development Status :: 4 - Beta",
     "Programming Language :: Python",
     "Programming Language :: Python :: 3 :: Only",
     "Programming Language :: Python :: 3",
diff --git a/sdk/voicelive/azure-ai-voicelive/samples/BASIC_VOICE_ASSISTANT.md b/sdk/voicelive/azure-ai-voicelive/samples/BASIC_VOICE_ASSISTANT.md
index 04a30e660c45..de4c955173ce 100644
--- a/sdk/voicelive/azure-ai-voicelive/samples/BASIC_VOICE_ASSISTANT.md
+++ b/sdk/voicelive/azure-ai-voicelive/samples/BASIC_VOICE_ASSISTANT.md
@@ -14,7 +14,7 @@ This sample demonstrates a complete voice assistant implementation using the Azu
 
 ## Prerequisites
 
-- Python 3.9+
+- Python 3.10+
 - Microphone and speakers/headphones
 - Azure AI VoiceLive endpoint
 - An Entra ID identity with access to Azure AI VoiceLive, or a VoiceLive API key
@@ -22,9 +22,9 @@ This sample demonstrates a complete voice assistant implementation using the Azu
 ## Installation
 
 1. **Install the SDK**:
-   ```bash
-    pip install azure-ai-voicelive azure-identity python-dotenv
-   ```
+    ```bash
+    python -m pip install --pre "azure-ai-voicelive[aiohttp]" azure-identity python-dotenv
+    ```
 
 2. **Install PyAudio** (required for audio capture/playback):
 
@@ -47,12 +47,12 @@ This sample demonstrates a complete voice assistant implementation using the Azu
 
 ## Configuration
 
-Create a `.env` file. By default, the sample uses Entra ID via `DefaultAzureCredential`:
+Create a `.env` file. By default, the sample uses Entra ID via `DefaultAzureCredential`. For local development, `az login` is the easiest way to satisfy that credential chain:
 
 ```bash
 AZURE_VOICELIVE_ENDPOINT=your-endpoint
 AZURE_VOICELIVE_MODEL=gpt-realtime
-AZURE_VOICELIVE_VOICE=en-US-AvaNeural
+AZURE_VOICELIVE_VOICE=en-US-Ava:DragonHDLatestNeural
 AZURE_VOICELIVE_INSTRUCTIONS=You are a helpful AI assistant. Respond naturally and conversationally.
 ```
 
@@ -69,6 +69,8 @@ AZURE_VOICELIVE_API_KEY=your-api-key
 python basic_voice_assistant_async.py
 ```
 
+The sample writes logs to standard output and does not create log files.
+
 Optional command-line arguments:
 
 ```bash
@@ -177,7 +179,7 @@ Main application class that coordinates WebSocket connection, session management
 - **Network timeouts**: Check firewall settings and network connectivity
 
 ### Authentication Issues
-- **Default auth failures**: Confirm `DefaultAzureCredential` can get a token and your identity can access the VoiceLive resource
+- **Default auth failures**: Run `az login` or otherwise confirm `DefaultAzureCredential` can get a token and your identity can access the VoiceLive resource
 - **API key auth failures**: Set `AZURE_VOICELIVE_USE_API_KEY=true` and verify `AZURE_VOICELIVE_API_KEY`
 
 ### PyAudio Installation Issues
diff --git a/sdk/voicelive/azure-ai-voicelive/samples/README.md b/sdk/voicelive/azure-ai-voicelive/samples/README.md
index 84694bc4bd14..139553efd768 100644
--- a/sdk/voicelive/azure-ai-voicelive/samples/README.md
+++ b/sdk/voicelive/azure-ai-voicelive/samples/README.md
@@ -6,7 +6,7 @@ This directory contains sample applications demonstrating various capabilities o
 
 ## Prerequisites
 
-- Python 3.9 or later
+- Python 3.10 or later
 - An Azure subscription with access to Azure AI VoiceLive
 - An Entra ID identity with access to Azure AI VoiceLive, or a VoiceLive API key
 
@@ -14,9 +14,9 @@ This directory contains sample applications demonstrating various capabilities o
 
 1. **Install dependencies**:
 
-   ```bash
-  pip install azure-ai-voicelive[aiohttp] azure-identity python-dotenv
-   ```
+  ```bash
+  python -m pip install --pre "azure-ai-voicelive[aiohttp]" azure-identity python-dotenv
+  ```
 
 2. **Install PyAudio** (required for audio samples):
 
@@ -39,7 +39,7 @@ This directory contains sample applications demonstrating various capabilities o
 
 3. **Configure environment variables**:
 
-  Create a `.env` file at the root of the azure-ai-voicelive directory or in the samples directory. By default, the samples use Entra ID via `DefaultAzureCredential`:
+  Create a `.env` file at the root of the azure-ai-voicelive directory or in the samples directory. By default, the samples use Entra ID via `DefaultAzureCredential`. For local development, run `az login` first if you want to use your Azure CLI session:
 
    ```ini
    AZURE_VOICELIVE_ENDPOINT=wss://api.voicelive.com/v1
@@ -116,9 +116,10 @@ python basic_voice_assistant_async.py --help
 
 ## Sample descriptions
 
-- **basic_voice_assistant_async.py**: 🌟 **[Featured Sample]** Complete async voice assistant demonstrating real-time conversation, interruption handling, and server VAD. Supports optional OpenTelemetry tracing via `--enable-tracing`. Perfect starting point for voice applications. See "BASIC_VOICE_ASSISTANT.md" for detailed documentation.
-- **agent_v2_sample.py**: Demonstrates how to connect to an Azure AI Foundry agent using flattened `connect()` keyword arguments. Shows the new pattern where agents are configured at connection time rather than as tools in the session. Features callback-based audio streaming, sequence number based interrupt handling, and standard logger output for conversation events.
+- **basic_voice_assistant_async.py**: 🌟 **[Featured Sample]** Complete async voice assistant demonstrating real-time conversation, interruption handling, and server VAD. Supports optional OpenTelemetry tracing via `--enable-tracing`, defaults to Entra ID auth, and writes logs to standard output instead of creating log files. See "BASIC_VOICE_ASSISTANT.md" for detailed documentation.
+- **agent_v2_sample.py**: Demonstrates how to connect to an Azure AI Foundry agent using flattened `connect()` keyword arguments. Shows the new pattern where agents are configured at connection time rather than as tools in the session. Features callback-based audio streaming, sequence number based interrupt handling, standard logger output for conversation events, and defaults the agent connection to API version `2026-04-10`.
 - **async_function_calling_sample.py**: Demonstrates async function calling capabilities with the VoiceLive SDK, showing how to handle function calls from the AI model.
+- **async_mcp_sample.py**: Demonstrates async MCP capabilities with Entra ID-first authentication and uses API version `2026-04-10` for MCP support.
 
 ### Telemetry samples
 
@@ -162,7 +163,7 @@ Set `AZURE_EXPERIMENTAL_ENABLE_GENAI_TRACING=true` to enable tracing.
   - Confirm your network allows WSS to the service
 
 - **Auth errors**
-  - By default: ensure `DefaultAzureCredential` can acquire a token and your identity has access to the resource
+  - By default: run `az login` or otherwise ensure `DefaultAzureCredential` can acquire a token and your identity has access to the resource
   - For API key auth: set `AZURE_VOICELIVE_USE_API_KEY=true` and confirm `AZURE_VOICELIVE_API_KEY`
 
 ## Next steps
diff --git a/sdk/voicelive/azure-ai-voicelive/samples/async_mcp_sample.py b/sdk/voicelive/azure-ai-voicelive/samples/async_mcp_sample.py
index 0ce7bdd446e4..8561b9144f8c 100644
--- a/sdk/voicelive/azure-ai-voicelive/samples/async_mcp_sample.py
+++ b/sdk/voicelive/azure-ai-voicelive/samples/async_mcp_sample.py
@@ -12,7 +12,8 @@
 
 DESCRIPTION:
     This sample demonstrates how to use the Azure AI Voice Live SDK asynchronously
-    with MCP capabilities. It shows how to define mcp servers, handle mcp call events.
+    with MCP capabilities. It shows how to define mcp servers, handle mcp call events,
+    and connects with API version 2026-04-10 because MCP support requires that service version.
 
 USAGE:
     python async_mcp_sample.py
@@ -753,4 +754,3 @@ async def main():
         asyncio.run(main())
     except KeyboardInterrupt:
         print("\n👋 Voice Live MCP shut down.")
-
diff --git a/sdk/voicelive/azure-ai-voicelive/samples/basic_voice_assistant_async.py b/sdk/voicelive/azure-ai-voicelive/samples/basic_voice_assistant_async.py
index 32efeb725ae4..2f61fd3999ed 100644
--- a/sdk/voicelive/azure-ai-voicelive/samples/basic_voice_assistant_async.py
+++ b/sdk/voicelive/azure-ai-voicelive/samples/basic_voice_assistant_async.py
@@ -14,7 +14,8 @@
     This sample demonstrates the fundamental capabilities of the VoiceLive SDK by creating
     a basic voice assistant that can engage in natural conversation with proper interruption
     handling. This serves as the foundational example that showcases the core value
-    proposition of unified speech-to-speech interaction.
+    proposition of unified speech-to-speech interaction. Logs are written to standard output
+    and the sample does not create log files.
 
 USAGE:
     python basic_voice_assistant_async.py
@@ -26,7 +27,7 @@
     - AZURE_VOICELIVE_USE_API_KEY - Set to "true" to use AZURE_VOICELIVE_API_KEY instead of Entra ID
     - AZURE_VOICELIVE_API_KEY - VoiceLive API key used when AZURE_VOICELIVE_USE_API_KEY is enabled
     - AZURE_VOICELIVE_MODEL - The VoiceLive model to use (default: gpt-realtime)
-    - AZURE_VOICELIVE_VOICE - The voice to use for synthesis
+    - AZURE_VOICELIVE_VOICE - The voice to use for synthesis (default: en-US-Ava:DragonHDLatestNeural)
     - AZURE_VOICELIVE_INSTRUCTIONS - System instructions for the assistant
 
     Or copy .env.template to .env and fill in your values.
@@ -566,6 +567,7 @@ def main():
 
     # Start the assistant
     try:
+
         async def _run_assistant() -> None:
             try:
                 await assistant.start()
diff --git a/sdk/voicelive/azure-ai-voicelive/tests/test_live_realtime_service.py b/sdk/voicelive/azure-ai-voicelive/tests/test_live_realtime_service.py
index e27895a87475..20bd8f0d0ce7 100644
--- a/sdk/voicelive/azure-ai-voicelive/tests/test_live_realtime_service.py
+++ b/sdk/voicelive/azure-ai-voicelive/tests/test_live_realtime_service.py
@@ -178,7 +178,7 @@ def smoke_test(self, **kwargs):
     @VoiceLivePreparer()
     @pytest.mark.flaky(reruns=3, reruns_delay=2)
     @pytest.mark.parametrize("model", ["gpt-realtime", "gpt-4.1", "phi4-mm-realtime", "phi4-mini"])
-    @pytest.mark.parametrize("api_version", ["2025-10-01", "2026-01-01-preview"])
+    @pytest.mark.parametrize("api_version", ["2025-10-01", "2026-04-10"])
     async def test_realtime_service(self, test_data_dir: Path, model: str, api_version: str, **kwargs):
         voicelive_openai_endpoint = kwargs.pop("voicelive_openai_endpoint")
         voicelive_openai_api_key = kwargs.pop("voicelive_openai_api_key")
@@ -232,7 +232,7 @@ async def test_realtime_service(self, test_data_dir: Path, model: str, api_versi
     @VoiceLivePreparer()
     @pytest.mark.flaky(reruns=3, reruns_delay=2)
     @pytest.mark.parametrize("model", ["gpt-realtime", "gpt-4.1"])
-    @pytest.mark.parametrize("api_version", ["2025-10-01", "2026-01-01-preview"])
+    @pytest.mark.parametrize("api_version", ["2025-10-01", "2026-04-10"])
     async def test_realtime_service_with_audio_enhancements(
         self,
         test_data_dir: Path,
@@ -281,7 +281,7 @@ async def test_realtime_service_with_audio_enhancements(
             ),
         ],
     )
-    @pytest.mark.parametrize("api_version", ["2025-10-01", "2026-01-01-preview"])
+    @pytest.mark.parametrize("api_version", ["2025-10-01", "2026-04-10"])
     async def test_realtime_service_with_turn_detection_long_tts_vad_duration(
         self, test_data_dir: Path, model: str, server_sd_conf: dict, api_version: str, **kwargs
     ):
@@ -327,7 +327,7 @@ async def test_realtime_service_with_turn_detection_long_tts_vad_duration(
             pytest.param("gpt-4o", {"languages": ["en", "es"]}, id="cascaded-realtime"),
         ],
     )
-    @pytest.mark.parametrize("api_version", ["2025-10-01", "2026-01-01-preview"])
+    @pytest.mark.parametrize("api_version", ["2025-10-01", "2026-04-10"])
     async def test_realtime_service_with_turn_detection_multilingual(
         self, test_data_dir: Path, model: str, semantic_vad_params: dict, api_version: str, **kwargs
     ):
@@ -360,7 +360,7 @@ async def test_realtime_service_with_turn_detection_multilingual(
             "filler_word_24kHz.wav",
         ],
     )
-    @pytest.mark.parametrize("api_version", ["2025-10-01", "2026-01-01-preview"])
+    @pytest.mark.parametrize("api_version", ["2025-10-01", "2026-04-10"])
     async def test_realtime_service_with_filler_word_removal(
         self,
         test_data_dir: Path,
@@ -396,7 +396,7 @@ async def test_realtime_service_with_filler_word_removal(
             "filler_word_24kHz.wav",
         ],
     )
-    @pytest.mark.parametrize("api_version", ["2025-10-01", "2026-01-01-preview"])
+    @pytest.mark.parametrize("api_version", ["2025-10-01", "2026-04-10"])
     async def test_realtime_service_with_filler_word_removal_multilingual(
         self, test_data_dir: Path, test_audio_file: str, api_version: str, **kwargs
     ):
@@ -428,7 +428,7 @@ async def test_realtime_service_with_filler_word_removal_multilingual(
     @VoiceLivePreparer()
     @pytest.mark.flaky(reruns=3, reruns_delay=2)
     @pytest.mark.parametrize("model", ["gpt-realtime", "gpt-4o"])
-    @pytest.mark.parametrize("api_version", ["2025-10-01", "2026-01-01-preview"])
+    @pytest.mark.parametrize("api_version", ["2025-10-01", "2026-04-10"])
     async def test_realtime_service_tool_call(self, test_data_dir: Path, model: str, api_version: str, **kwargs):
         audio_file = test_data_dir / "4-1.wav"
         voicelive_openai_endpoint = kwargs.pop("voicelive_openai_endpoint")
@@ -483,7 +483,7 @@ async def test_realtime_service_tool_call(self, test_data_dir: Path, model: str,
     @VoiceLivePreparer()
     @pytest.mark.flaky(reruns=3, reruns_delay=2)
     @pytest.mark.parametrize("model", ["gpt-realtime", "gpt-4o", "gpt-5-chat"])
-    @pytest.mark.parametrize("api_version", ["2025-10-01", "2026-01-01-preview"])
+    @pytest.mark.parametrize("api_version", ["2025-10-01", "2026-04-10"])
     async def test_realtime_service_tool_choice(self, test_data_dir: Path, model: str, api_version: str, **kwargs):
         if "realtime" in model:
             pytest.skip("Tool choice is not supported in realtime models yet")
@@ -578,7 +578,7 @@ async def test_realtime_service_tool_choice(self, test_data_dir: Path, model: st
     @VoiceLivePreparer()
     @pytest.mark.flaky(reruns=3, reruns_delay=2)
     @pytest.mark.parametrize("model", ["gpt-realtime", "gpt-4.1", "gpt-5", "gpt-5.1", "gpt-5.2", "phi4-mm-realtime"])
-    @pytest.mark.parametrize("api_version", ["2025-10-01", "2026-01-01-preview"])
+    @pytest.mark.parametrize("api_version", ["2025-10-01", "2026-04-10"])
     async def test_realtime_service_tool_call_parameter(
         self,
         test_data_dir: Path,
@@ -668,7 +668,7 @@ def get_weather(arguments: Union[str, Mapping[str, Any]]) -> str:
     @VoiceLivePreparer()
     @pytest.mark.flaky(reruns=3, reruns_delay=2)
     @pytest.mark.parametrize("model", ["gpt-realtime"])
-    @pytest.mark.parametrize("api_version", ["2025-05-01-preview", "2026-01-01-preview"])
+    @pytest.mark.parametrize("api_version", ["2025-05-01-preview", "2026-04-10"])
     async def test_realtime_service_live_session_update(
         self,
         test_data_dir: Path,
@@ -747,7 +747,7 @@ async def test_realtime_service_live_session_update(
     @pytest.mark.flaky(reruns=3, reruns_delay=2)
     @pytest.mark.skip()
     @pytest.mark.parametrize("model", ["gpt-4o", "gpt-realtime"])
-    @pytest.mark.parametrize("api_version", ["2025-10-01", "2026-01-01-preview"])
+    @pytest.mark.parametrize("api_version", ["2025-10-01", "2026-04-10"])
     async def test_realtime_service_tool_call_no_audio_overlap(
         self,
         test_data_dir: Path,
@@ -820,7 +820,7 @@ async def test_realtime_service_tool_call_no_audio_overlap(
             "mai-transcribe-1",
         ],
     )
-    @pytest.mark.parametrize("api_version", ["2025-05-01-preview", "2026-01-01-preview"])
+    @pytest.mark.parametrize("api_version", ["2025-05-01-preview", "2026-04-10"])
     async def test_realtime_service_input_audio_transcription(
         self,
         test_data_dir: Path,
@@ -891,7 +891,7 @@ async def test_realtime_service_input_audio_transcription(
             ),
         ],
     )
-    @pytest.mark.parametrize("api_version", ["2025-10-01", "2026-01-01-preview"])
+    @pytest.mark.parametrize("api_version", ["2025-10-01", "2026-04-10"])
     async def test_realtime_service_with_eou(
         self,
         test_data_dir: Path,
@@ -927,7 +927,7 @@ async def test_realtime_service_with_eou(
     @VoiceLivePreparer()
     @pytest.mark.flaky(reruns=3, reruns_delay=2)
     @pytest.mark.parametrize("model", ["gpt-realtime", "gpt-4.1"])
-    @pytest.mark.parametrize("api_version", ["2025-10-01", "2026-01-01-preview"])
+    @pytest.mark.parametrize("api_version", ["2025-10-01", "2026-04-10"])
     async def test_realtime_service_with_audio_timestamp_viseme(
         self,
         test_data_dir: Path,
@@ -985,7 +985,7 @@ async def test_realtime_service_with_audio_timestamp_viseme(
     @VoiceLivePreparer()
     @pytest.mark.flaky(reruns=3, reruns_delay=2)
     @pytest.mark.parametrize("model", ["gpt-realtime", "gpt-4o", "phi4-mm-realtime", "phi4-mini"])
-    @pytest.mark.parametrize("api_version", ["2025-10-01", "2026-01-01-preview"])
+    @pytest.mark.parametrize("api_version", ["2025-10-01", "2026-04-10"])
     async def test_realtime_service_wo_turn_detection(
         self,
         test_data_dir: Path,
@@ -1023,7 +1023,7 @@ async def test_realtime_service_wo_turn_detection(
     @VoiceLivePreparer()
     @pytest.mark.flaky(reruns=3, reruns_delay=2)
     @pytest.mark.parametrize("model", ["gpt-realtime", "gpt-4.1", "phi4-mm-realtime"])
-    @pytest.mark.parametrize("api_version", ["2025-10-01", "2026-01-01-preview"])
+    @pytest.mark.parametrize("api_version", ["2025-10-01", "2026-04-10"])
     async def test_realtime_service_with_voice_properties(
         self,
         test_data_dir: Path,
@@ -1059,7 +1059,7 @@ async def test_realtime_service_with_voice_properties(
     @VoiceLivePreparer()
     @pytest.mark.flaky(reruns=3, reruns_delay=2)
     @pytest.mark.parametrize("model", ["gpt-realtime"])
-    @pytest.mark.parametrize("api_version", ["2025-10-01", "2026-01-01-preview"])
+    @pytest.mark.parametrize("api_version", ["2025-10-01", "2026-04-10"])
     async def test_realtime_service_retrieve_item(self, test_data_dir: Path, model: str, api_version: str, **kwargs):
         file = test_data_dir / "largest_lake.wav"
         voicelive_openai_endpoint = kwargs.pop("voicelive_openai_endpoint")
@@ -1102,7 +1102,7 @@ async def test_realtime_service_retrieve_item(self, test_data_dir: Path, model:
     @VoiceLivePreparer()
     @pytest.mark.flaky(reruns=3, reruns_delay=2)
     @pytest.mark.parametrize("model", ["gpt-realtime"])
-    @pytest.mark.parametrize("api_version", ["2025-05-01-preview", "2026-01-01-preview"])
+    @pytest.mark.parametrize("api_version", ["2025-05-01-preview", "2026-04-10"])
     async def test_realtime_service_truncate_item(self, test_data_dir: Path, model: str, api_version: str, **kwargs):
         file = test_data_dir / "largest_lake.wav"
         voicelive_openai_endpoint = kwargs.pop("voicelive_openai_endpoint")
@@ -1195,7 +1195,7 @@ async def test_realtime_service_truncate_item(self, test_data_dir: Path, model:
             ),
         ],
     )
-    @pytest.mark.parametrize("api_version", ["2025-10-01", "2026-01-01-preview"])
+    @pytest.mark.parametrize("api_version", ["2025-10-01", "2026-04-10"])
     async def test_realtime_service_with_input_audio_format(
         self,
         test_data_dir: Path,
@@ -1270,7 +1270,7 @@ async def test_realtime_service_with_input_audio_format(
             pytest.param("phi4-mm-realtime", 44100, id="phi4_mm_realtime_44kHz_no_resample"),
         ],
     )
-    @pytest.mark.parametrize("api_version", ["2025-10-01", "2026-01-01-preview"])
+    @pytest.mark.parametrize("api_version", ["2025-10-01", "2026-04-10"])
     async def test_realtime_service_with_input_audio_sampling_rate(
         self, test_data_dir: Path, model: str, sampling_rate: int, api_version: str, **kwargs
     ):
@@ -1334,7 +1334,7 @@ async def test_realtime_service_with_input_audio_sampling_rate(
             "g711_alaw",
         ],
     )
-    @pytest.mark.parametrize("api_version", ["2025-10-01", "2026-01-01-preview"])
+    @pytest.mark.parametrize("api_version", ["2025-10-01", "2026-04-10"])
     async def test_output_formats_with_azure_voice(
         self, test_data_dir: Path, model: str, audio_output_format: str, api_version: str, **kwargs
     ):
@@ -1375,7 +1375,7 @@ async def test_output_formats_with_azure_voice(
             "g711_alaw",
         ],
     )
-    @pytest.mark.parametrize("api_version", ["2025-10-01", "2026-01-01-preview"])
+    @pytest.mark.parametrize("api_version", ["2025-10-01", "2026-04-10"])
     async def test_output_formats_with_openai_voice(
         self, test_data_dir: Path, model: str, audio_output_format: str, api_version: str, **kwargs
     ):
@@ -1408,7 +1408,7 @@ async def test_output_formats_with_openai_voice(
     @VoiceLivePreparer()
     @pytest.mark.flaky(reruns=3, reruns_delay=2)
     @pytest.mark.parametrize("model", ["gpt-realtime", "gpt-4.1"])
-    @pytest.mark.parametrize("api_version", ["2025-10-01", "2026-01-01-preview"])
+    @pytest.mark.parametrize("api_version", ["2025-10-01", "2026-04-10"])
     async def test_realtime_service_with_echo_cancellation(
         self,
         test_data_dir: Path,
@@ -1458,7 +1458,7 @@ async def test_realtime_service_with_echo_cancellation(
             "g711_alaw",
         ],
     )
-    @pytest.mark.parametrize("api_version", ["2025-10-01", "2026-01-01-preview"])
+    @pytest.mark.parametrize("api_version", ["2025-10-01", "2026-04-10"])
     async def test_write_loopback_audio_echo_cancellation(
         self, test_data_dir: Path, model: str, audio_output_format: str, api_version: str, **kwargs
     ):
@@ -1485,4 +1485,3 @@ async def test_write_loopback_audio_echo_cancellation(
             contents, audio_bytes = await _collect_event(conn, event_type=ServerEventType.RESPONSE_CONTENT_PART_ADDED)
             assert contents >= 1, "Response should be generated with echo cancellation"
             assert audio_bytes > 0, "Audio bytes should be greater than 0"
-
diff --git a/sdk/voicelive/azure-ai-voicelive/tests/test_unit_client_events.py b/sdk/voicelive/azure-ai-voicelive/tests/test_unit_client_events.py
index 019ac1fde905..50911a2f9df4 100644
--- a/sdk/voicelive/azure-ai-voicelive/tests/test_unit_client_events.py
+++ b/sdk/voicelive/azure-ai-voicelive/tests/test_unit_client_events.py
@@ -18,6 +18,7 @@
     ClientEventConversationItemTruncate,
     ClientEventResponseCreate,
     ClientEventResponseCancel,
+    ClientEventRtcCallSdpCreate,
     # Event Types
     ClientEventType,
     # Supporting Models
@@ -247,6 +248,30 @@ def test_response_cancel_with_response_id(self):
         assert event.event_id == event_id
 
 
+class TestClientEventRtcCall:
+    """Test RTC call client events."""
+
+    def test_rtc_call_sdp_create_basic(self):
+        """Test creating an RTC SDP offer event."""
+        event = ClientEventRtcCallSdpCreate(sdp_offer="v=0\r\no=- 1 2 IN IP4 127.0.0.1")
+
+        assert event.type == ClientEventType.RTC_CALL_SDP_CREATE
+        assert event.sdp_offer.startswith("v=0")
+        assert event.session is None
+
+    def test_rtc_call_sdp_create_with_session(self):
+        """Test creating an RTC SDP offer event with an initial session."""
+        session = RequestSession(model="gpt-4o-realtime-preview", modalities=[Modality.AUDIO])
+        event = ClientEventRtcCallSdpCreate(
+            sdp_offer="v=0\r\no=- 1 2 IN IP4 127.0.0.1",
+            session=session,
+            event_id="rtc-evt-1",
+        )
+
+        assert event.event_id == "rtc-evt-1"
+        assert event.session == session
+
+
 class TestClientEventSerialization:
     """Test client event serialization capabilities."""
 
diff --git a/sdk/voicelive/azure-ai-voicelive/tests/test_unit_connection.py b/sdk/voicelive/azure-ai-voicelive/tests/test_unit_connection.py
index e35cc719a8be..fbcc022d4e97 100644
--- a/sdk/voicelive/azure-ai-voicelive/tests/test_unit_connection.py
+++ b/sdk/voicelive/azure-ai-voicelive/tests/test_unit_connection.py
@@ -671,7 +671,7 @@ def test_url_includes_api_version(self):
         manager = _VoiceLiveConnectionManager(
             credential=self.credential,
             endpoint="https://test.azure.com",
-            api_version="2026-01-01-preview",
+            api_version="2026-04-10",
             agent_config=agent_config,
             extra_query={},
             extra_headers={},
@@ -679,4 +679,4 @@ def test_url_includes_api_version(self):
 
         url = manager._prepare_url()
 
-        assert "api-version=2026-01-01-preview" in url
+        assert "api-version=2026-04-10" in url
diff --git a/sdk/voicelive/azure-ai-voicelive/tests/test_unit_enums.py b/sdk/voicelive/azure-ai-voicelive/tests/test_unit_enums.py
index 0bdaa03aee4e..aad8f3c5bb15 100644
--- a/sdk/voicelive/azure-ai-voicelive/tests/test_unit_enums.py
+++ b/sdk/voicelive/azure-ai-voicelive/tests/test_unit_enums.py
@@ -7,9 +7,11 @@
 from azure.ai.voicelive.models import (
     AnimationOutputType,
     AudioTimestampType,
+    AzureRealtimeNativeVoiceName,
     AzureVoiceType,
     ClientEventType,
     ContentPartType,
+    EchoCancellationReferenceSource,
     SessionIncludeOption,
     InputAudioFormat,
     ItemParamStatus,
@@ -73,6 +75,24 @@ def test_case_insensitive(self):
         assert AzureVoiceType.AZURE_STANDARD.value == "azure-standard"
 
 
+class TestAzureRealtimeNativeVoiceName:
+    """Test AzureRealtimeNativeVoiceName enum."""
+
+    def test_all_values(self):
+        """Test representative realtime native voice values are accessible."""
+        assert AzureRealtimeNativeVoiceName.AVA == "ava"
+        assert AzureRealtimeNativeVoiceName.XIAOXIAO == "xiaoxiao"
+
+
+class TestEchoCancellationReferenceSource:
+    """Test EchoCancellationReferenceSource enum."""
+
+    def test_all_values(self):
+        """Test all echo cancellation reference source values are accessible."""
+        assert EchoCancellationReferenceSource.SERVER == "server"
+        assert EchoCancellationReferenceSource.CLIENT == "client"
+
+
 class TestClientEventType:
     """Test ClientEventType enum."""
 
@@ -99,6 +119,10 @@ def test_response_events(self):
         assert ClientEventType.RESPONSE_CREATE == "response.create"
         assert ClientEventType.RESPONSE_CANCEL == "response.cancel"
 
+    def test_rtc_call_events(self):
+        """Test RTC call events."""
+        assert ClientEventType.RTC_CALL_SDP_CREATE == "rtc.call.sdp.create"
+
 
 class TestContentPartType:
     """Test ContentPartType enum."""
@@ -283,6 +307,15 @@ def test_file_search_events(self):
     def test_output_audio_buffer_cleared(self):
         assert ServerEventType.OUTPUT_AUDIO_BUFFER_CLEARED == "output_audio_buffer.cleared"
 
+    def test_output_audio_buffer_lifecycle(self):
+        assert ServerEventType.OUTPUT_AUDIO_BUFFER_STARTED == "output_audio_buffer.started"
+        assert ServerEventType.OUTPUT_AUDIO_BUFFER_STOPPED == "output_audio_buffer.stopped"
+
+    def test_invocation_and_rtc_events(self):
+        assert ServerEventType.RESPONSE_INVOCATION_DELTA == "response.invocation.delta"
+        assert ServerEventType.RTC_CALL_SDP_CREATED == "rtc.call.sdp.created"
+        assert ServerEventType.RTC_CALL_ERROR == "rtc.call.error"
+
     def test_audio_transcript_annotation(self):
         assert (
             ServerEventType.RESPONSE_AUDIO_TRANSCRIPT_ANNOTATION_ADDED == "response.audio_transcript.annotation.added"
diff --git a/sdk/voicelive/azure-ai-voicelive/tests/test_unit_models.py b/sdk/voicelive/azure-ai-voicelive/tests/test_unit_models.py
index 7413758582a5..386fef051f23 100644
--- a/sdk/voicelive/azure-ai-voicelive/tests/test_unit_models.py
+++ b/sdk/voicelive/azure-ai-voicelive/tests/test_unit_models.py
@@ -9,12 +9,16 @@
     ActionOpenPage,
     ActionSearch,
     ActionSearchSource,
+    AudioEchoCancellation,
     AssistantMessageItem,
     AzureAvatarVoiceSyncVoice,
     AzureCustomVoice,
     AzurePersonalVoice,
+    AzureRealtimeNativeVoice,
+    AzureRealtimeNativeVoiceName,
     AzureStandardVoice,
     AzureVoiceType,
+    EchoCancellationReferenceSource,
     FileSearchResult,
     InputAudioContentPart,
     InputTextContentPart,
@@ -39,11 +43,17 @@
     ResponseMCPListToolItem,
     ResponseSession,
     ResponseWebSearchCallItem,
+    RtcCallErrorDetails,
     ServerEventMcpListToolsCompleted,
     ServerEventMcpListToolsFailed,
     ServerEventMcpListToolsInProgress,
+    ServerEventOutputAudioBufferStarted,
+    ServerEventOutputAudioBufferStopped,
+    ServerEventResponseInvocationDelta,
     ServerEventResponseMcpCallArgumentsDelta,
     ServerEventResponseMcpCallArgumentsDone,
+    ServerEventRtcCallError,
+    ServerEventRtcCallSdpCreated,
     ServerEventType,
     SystemMessageItem,
     ToolType,
@@ -101,6 +111,13 @@ def test_azure_personal_voice_with_temperature(self):
         assert voice.temperature == 0.5
         assert voice.model == PersonalVoiceModels.DRAGON_LATEST_NEURAL
 
+    def test_azure_realtime_native_voice(self):
+        """Test AzureRealtimeNativeVoice model."""
+        voice = AzureRealtimeNativeVoice(name=AzureRealtimeNativeVoiceName.AVA)
+
+        assert voice.type == "azure-realtime-native"
+        assert voice.name == AzureRealtimeNativeVoiceName.AVA
+
 
 class TestOpenAIVoice:
     """Test OpenAIVoice model."""
@@ -261,6 +278,14 @@ def test_request_session_with_temperature(self):
         assert session.temperature == 0.7
         assert session.max_response_output_tokens == 1000
 
+    def test_request_session_with_azure_realtime_native_voice(self):
+        """Test request session with Azure realtime native voice configuration."""
+        voice = AzureRealtimeNativeVoice(name=AzureRealtimeNativeVoiceName.XIAOXIAO)
+        session = RequestSession(model="azure-realtime", voice=voice)
+
+        assert session.voice == voice
+        assert session.voice.type == "azure-realtime-native"
+
 
 class TestResponseSession:
     """Test ResponseSession model."""
@@ -319,6 +344,17 @@ def test_complex_model_structure(self):
         assert session.voice.model == PersonalVoiceModels.PHOENIX_LATEST_NEURAL
 
 
+class TestAudioEchoCancellationModel:
+    """Test enhanced audio echo cancellation configuration."""
+
+    def test_audio_echo_cancellation_with_client_reference(self):
+        """Test AudioEchoCancellation with client-provided stereo reference."""
+        config = AudioEchoCancellation(reference_source=EchoCancellationReferenceSource.CLIENT, channels=2)
+
+        assert config.reference_source == EchoCancellationReferenceSource.CLIENT
+        assert config.channels == 2
+
+
 class TestMCPModels:
     """Test MCP (Model Context Protocol) related models."""
 
@@ -758,6 +794,63 @@ def test_server_event_response_mcp_call_arguments_done_with_full_arguments(self)
         assert event.arguments == full_args
 
 
+class TestRealtimeAndRtcServerEvents:
+    """Test realtime playback and RTC server event models."""
+
+    def test_server_event_output_audio_buffer_started(self):
+        """Test output audio buffer started event."""
+        event = ServerEventOutputAudioBufferStarted(event_id="evt-1", response_id="resp-123")
+
+        assert event.type == ServerEventType.OUTPUT_AUDIO_BUFFER_STARTED
+        assert event.event_id == "evt-1"
+        assert event.response_id == "resp-123"
+
+    def test_server_event_output_audio_buffer_stopped(self):
+        """Test output audio buffer stopped event."""
+        event = ServerEventOutputAudioBufferStopped(event_id="evt-2", response_id="resp-456")
+
+        assert event.type == ServerEventType.OUTPUT_AUDIO_BUFFER_STOPPED
+        assert event.event_id == "evt-2"
+        assert event.response_id == "resp-456"
+
+    def test_server_event_response_invocation_delta(self):
+        """Test hosted agent invocation delta event."""
+        delta = {"type": "trace", "message": "partial hosted agent event"}
+        event = ServerEventResponseInvocationDelta(delta=delta, event_id="evt-3")
+
+        assert event.type == ServerEventType.RESPONSE_INVOCATION_DELTA
+        assert event.event_id == "evt-3"
+        assert event.delta == delta
+
+    def test_server_event_rtc_call_sdp_created(self):
+        """Test RTC SDP created event."""
+        event = ServerEventRtcCallSdpCreated(
+            event_id="evt-4",
+            rtc_call_id="rtc-123",
+            sdp_answer="v=0\r\no=- 1 2 IN IP4 127.0.0.1",
+        )
+
+        assert event.type == ServerEventType.RTC_CALL_SDP_CREATED
+        assert event.rtc_call_id == "rtc-123"
+        assert event.sdp_answer.startswith("v=0")
+
+    def test_server_event_rtc_call_error(self):
+        """Test RTC call error event."""
+        error = RtcCallErrorDetails(type="server_error", message="RTC negotiation failed", code="rtc_failed")
+        event = ServerEventRtcCallError(
+            error=error,
+            operation="rtc.call.sdp.create",
+            rtc_call_id="rtc-123",
+            event_id="evt-5",
+        )
+
+        assert event.type == ServerEventType.RTC_CALL_ERROR
+        assert event.error.code == "rtc_failed"
+        assert event.error.message == "RTC negotiation failed"
+        assert event.operation == "rtc.call.sdp.create"
+        assert event.rtc_call_id == "rtc-123"
+
+
 class TestMCPApprovalType:
     """Test MCPApprovalType enum."""
 
diff --git a/sdk/voicelive/azure-ai-voicelive/tests/test_unit_models_interim_response_foundry.py b/sdk/voicelive/azure-ai-voicelive/tests/test_unit_models_interim_response_foundry.py
index e90bf34bb26d..35fb031ab8d6 100644
--- a/sdk/voicelive/azure-ai-voicelive/tests/test_unit_models_interim_response_foundry.py
+++ b/sdk/voicelive/azure-ai-voicelive/tests/test_unit_models_interim_response_foundry.py
@@ -21,6 +21,7 @@
     Response,
     ResponseCreateParams,
     ResponseSession,
+    ServerEventResponseInvocationDelta,
     ServerEventType,
     ServerEventWarning,
     ServerEventWarningDetails,
@@ -198,6 +199,26 @@ def test_response_create_params_with_metadata(self):
         assert params.metadata == metadata
 
 
+class TestHostedAgentInvocation:
+    """Test hosted agent invocation models."""
+
+    def test_response_create_params_with_invoke_input(self):
+        """Test ResponseCreateParams with invoke_input."""
+        invoke_input = {"thread_id": "thread-123", "input": "hello"}
+        params = ResponseCreateParams(invoke_input=invoke_input)
+
+        assert params.invoke_input == invoke_input
+
+    def test_response_invocation_delta_event(self):
+        """Test ServerEventResponseInvocationDelta model."""
+        delta = {"type": "status", "message": "agent invoked"}
+        event = ServerEventResponseInvocationDelta(delta=delta, event_id="evt-invoke")
+
+        assert event.type == ServerEventType.RESPONSE_INVOCATION_DELTA
+        assert event.event_id == "evt-invoke"
+        assert event.delta == delta
+
+
 class TestSessionWithInterimResponse:
     """Test session models with interim_response field."""
 

From f43d569c4aa3b891c5304b93f481fb08b30e18f6 Mon Sep 17 00:00:00 2001
From: Xiting Zhang <xitzhang@microsoft.com>
Date: Fri, 22 May 2026 17:43:48 -0700
Subject: [PATCH 2/3] update cspell names

---
 sdk/voicelive/azure-ai-voicelive/cspell.json  | 25 ++++++++++++++++++-
 .../tests/test_live_realtime_service.py       |  2 +-
 2 files changed, 25 insertions(+), 2 deletions(-)

diff --git a/sdk/voicelive/azure-ai-voicelive/cspell.json b/sdk/voicelive/azure-ai-voicelive/cspell.json
index 1dc68d63932d..f2b691d03b5b 100644
--- a/sdk/voicelive/azure-ai-voicelive/cspell.json
+++ b/sdk/voicelive/azure-ai-voicelive/cspell.json
@@ -1,21 +1,44 @@
 {
   "ignoreWords": [
+    "AARTI",
+    "aarti",
+    "Aarti",
+    "DIYA",
+    "diya",
+    "Diya",
+    "deser",
+    "FLORIAN",
+    "florian",
+    "Florian",
     "GENAI",
     "genai",
     "HDOMNI",
     "libasound",
     "logprobs",
+    "MEERA",
+    "meera",
+    "Meera",
     "pyaudio",
     "PyAudio",
     "SSML",
     "ULAW",
     "ulaw",
+    "precomputes",
     "VISEME",
     "viseme",
     "WEBRTC",
     "webrtc",
+    "XIAOXIAO",
+    "xiaoxiao",
+    "Xiaoxiao",
+    "XIMENA",
+    "ximena",
+    "Ximena",
     "XHIGH",
-    "xhigh"
+    "xhigh",
+    "YUNXI",
+    "yunxi",
+    "Yunxi"
   ],
     "ignorePaths": [
         "*.csv",
diff --git a/sdk/voicelive/azure-ai-voicelive/tests/test_live_realtime_service.py b/sdk/voicelive/azure-ai-voicelive/tests/test_live_realtime_service.py
index 20bd8f0d0ce7..3f1676eeaa8d 100644
--- a/sdk/voicelive/azure-ai-voicelive/tests/test_live_realtime_service.py
+++ b/sdk/voicelive/azure-ai-voicelive/tests/test_live_realtime_service.py
@@ -820,7 +820,7 @@ async def test_realtime_service_tool_call_no_audio_overlap(
             "mai-transcribe-1",
         ],
     )
-    @pytest.mark.parametrize("api_version", ["2025-05-01-preview", "2026-04-10"])
+    @pytest.mark.parametrize("api_version", ["2025-05-01-preview", "2026-01-01-preview"])
     async def test_realtime_service_input_audio_transcription(
         self,
         test_data_dir: Path,

From 2aff5ae090b3a973f59e2a55f0cd080338db9898 Mon Sep 17 00:00:00 2001
From: Xiting Zhang <xitzhang@microsoft.com>
Date: Fri, 22 May 2026 18:11:57 -0700
Subject: [PATCH 3/3] update docs

---
 .../azure/ai/voicelive/models/_models.py                   | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

diff --git a/sdk/voicelive/azure-ai-voicelive/azure/ai/voicelive/models/_models.py b/sdk/voicelive/azure-ai-voicelive/azure/ai/voicelive/models/_models.py
index 3accdd4f4906..b775d6b7aa6c 100644
--- a/sdk/voicelive/azure-ai-voicelive/azure/ai/voicelive/models/_models.py
+++ b/sdk/voicelive/azure-ai-voicelive/azure/ai/voicelive/models/_models.py
@@ -403,14 +403,15 @@ class AudioEchoCancellation(_Model):
        behavior).
      * `client`: EC uses the client-supplied reference channel (ch1 of stereo input). Internal
        TTS loopback is skipped. Known values are: "server" and "client".
+
     :vartype reference_source: str or ~azure.ai.voicelive.models.EchoCancellationReferenceSource
     :ivar channels: Number of input audio channels.
 
      * `1`: Mono input (default).
      * `2`: Interleaved stereo input where channel 0 is the microphone signal and channel 1 is
-       the echo reference signal.
-     When set to 2, `reference_source` must be `client` and `input_audio_format` must be
-     `pcm16`.
+             the echo reference signal. When set to 2, `reference_source` must be `client` and
+             `input_audio_format` must be `pcm16`.
+
     :vartype channels: int
     """