AstrBotDevs · yuxwd · Apr 4, 2026 · Apr 5, 2026 · Apr 5, 2026 · Apr 5, 2026
diff --git a/astrbot/core/astr_agent_run_util.py b/astrbot/core/astr_agent_run_util.py
@@ -19,6 +19,7 @@
 )
 from astrbot.core.provider.entities import LLMResponse
 from astrbot.core.provider.provider import TTSProvider
+from astrbot.core.utils.tts_text_filter import FilteredQueue
 
 AgentRunner = ToolLoopAgentRunner[AstrAgentContext]
 
@@ -354,6 +355,8 @@ async def run_live_agent(
     show_tool_use: bool = True,
     show_tool_call_result: bool = False,
     show_reasoning: bool = False,
+    tts_filter_enable: bool = False,
+    tts_filter_custom_rules: list[str] | None = None,
     buffer_intermediate_messages: bool = False,
 ) -> AsyncGenerator[MessageChain | None, None]:
     """Live Mode 的 Agent 运行器，支持流式 TTS
@@ -365,6 +368,8 @@ async def run_live_agent(
         show_tool_use: 是否显示工具使用
         show_tool_call_result: 是否显示工具返回结果
         show_reasoning: 是否显示推理过程
+        tts_filter_enable: 是否启用 TTS 文本过滤
+        tts_filter_custom_rules: 自定义 TTS 过滤正则规则
 
     Yields:
         MessageChain: 包含文本或音频数据的消息链
@@ -398,15 +403,22 @@ async def run_live_agent(
     first_chunk_received = False
 
     # 创建队列
-    text_queue: asyncio.Queue[str | None] = asyncio.Queue()
+    raw_text_queue: asyncio.Queue[str | None] = asyncio.Queue()
     # audio_queue stored bytes or (text, bytes)
     audio_queue: asyncio.Queue[bytes | tuple[str, bytes] | None] = asyncio.Queue()
 
+    # 为 TTS 创建过滤队列（Feeder 写入原始文本，TTS 读取过滤后文本）
+    tts_text_queue: asyncio.Queue[str | None] | FilteredQueue = (
+        FilteredQueue(raw_text_queue, tts_filter_custom_rules)
+        if tts_filter_enable
+        else raw_text_queue
+    )
+
     # 1. 启动 Agent Feeder 任务：负责运行 Agent 并将文本分句喂给 text_queue
     feeder_task = asyncio.create_task(
         _run_agent_feeder(
             agent_runner,
-            text_queue,
+            raw_text_queue,
             max_step,
             show_tool_use,
             show_tool_call_result,
@@ -415,14 +427,14 @@ async def run_live_agent(
         )
     )
 
-    # 2. 启动 TTS 任务：负责从 text_queue 读取文本并生成音频到 audio_queue
+    # 2. 启动 TTS 任务：负责从 tts_text_queue 读取文本并生成音频到 audio_queue
     if support_stream:
         tts_task = asyncio.create_task(
-            _safe_tts_stream_wrapper(tts_provider, text_queue, audio_queue)
+            _safe_tts_stream_wrapper(tts_provider, tts_text_queue, audio_queue)
         )
     else:
         tts_task = asyncio.create_task(
-            _simulated_stream_tts(tts_provider, text_queue, audio_queue)
+            _simulated_stream_tts(tts_provider, tts_text_queue, audio_queue)
         )
 
     # 3. 主循环：从 audio_queue 读取音频并 yield

diff --git a/astrbot/core/config/default.py b/astrbot/core/config/default.py
@@ -207,6 +207,10 @@
         "dual_output": False,
         "use_file_service": False,
         "trigger_probability": 1.0,
+        "tts_text_filter": {
+            "enable": False,
+            "custom_rules": [],
+        },
     },
     "provider_ltm_settings": {
         "group_icl_enable": False,
@@ -1782,6 +1786,40 @@ class ChatProviderTemplate(TypedDict):
                         "gemini_tts_voice_name": "Leda",
                         "proxy": "",
                     },
+                    "Qwen TTS Realtime(API)": {
+                        "id": "qwen_tts_realtime",
+                        "type": "qwen_tts_realtime",
+                        "provider": "qwen",
+                        "provider_type": "text_to_speech",
+                        "hint": "千问实时语音合成，支持流式输入输出、低延迟响应。模型可选 qwen3-tts-flash-realtime、qwen3-tts-instruct-flash-realtime、qwen-tts-realtime 等。API Key 从 https://bailian.console.aliyun.com/?tab=model#/api-key 获取",
+                        "enable": False,
+                        "api_key": "",
+                        "model": "qwen3-tts-flash-realtime",
+                        "qwen_tts_voice": "Cherry",
+                        "qwen_tts_instructions": "",
+                        "qwen_tts_optimize_instructions": False,
+                        "qwen_tts_speech_rate": 1.0,
+                        "qwen_tts_volume": 1.0,
+                        "qwen_tts_pitch_rate": 1.0,
+                        "qwen_tts_url": "wss://dashscope.aliyuncs.com/api-ws/v1/realtime",
+                        "timeout": "30",
+                    },
+                    "CosyVoice TTS(API)": {
+                        "id": "cosyvoice_tts",
+                        "type": "cosyvoice_tts",
+                        "provider": "cosyvoice",
+                        "provider_type": "text_to_speech",
+                        "hint": "CosyVoice 语音合成，支持多种系统音色和复刻音色。模型可选 cosyvoice-v3.5-plus、cosyvoice-v3.5-flash、cosyvoice-v3-plus、cosyvoice-v3-flash、cosyvoice-v2 等。API Key 从 https://bailian.console.aliyun.com/?tab=model#/api-key 获取",
+                        "enable": False,
+                        "api_key": "",
+                        "model": "cosyvoice-v3-flash",
+                        "cosyvoice_voice": "longanyang",
+                        "cosyvoice_speech_rate": 1.0,
+                        "cosyvoice_volume": 1.0,
+                        "cosyvoice_pitch_rate": 1.0,
+                        "cosyvoice_base_url": "wss://dashscope.aliyuncs.com/api-ws/v1/inference",
+                        "timeout": "20",
+                    },
                     "OpenAI Embedding": {
                         "id": "openai_embedding",
                         "type": "openai_embedding",
@@ -2249,6 +2287,66 @@ class ChatProviderTemplate(TypedDict):
                         "hint": "Azure_TTS 服务的订阅密钥（注意不是令牌）",
                     },
                     "dashscope_tts_voice": {"description": "音色", "type": "string"},
+                    "qwen_tts_voice": {
+                        "description": "音色",
+                        "type": "string",
+                        "hint": "Qwen TTS Realtime 音色名称，可选: Cherry(芊悦)、Serena(苏瑶)、Ethan(晨煦) 等。详见 https://help.aliyun.com/zh/model-studio/qwen-tts-realtime-api-reference",
+                    },
+                    "qwen_tts_instructions": {
+                        "description": "指令控制",
+                        "type": "string",
+                        "hint": "通过自然语言描述控制语音表达效果，如'语速较快，带有明显的上扬语调'。仅 qwen3-tts-instruct-flash-realtime 模型支持。长度不超过 1600 Token。",
+                    },
+                    "qwen_tts_optimize_instructions": {
+                        "description": "优化指令",
+                        "type": "bool",
+                        "hint": "启用后模型会自动优化指令描述以获得更好的效果。仅 qwen3-tts-instruct-flash-realtime 模型支持。",
+                    },
+                    "qwen_tts_speech_rate": {
+                        "description": "语速",
+                        "type": "number",
+                        "hint": "语速调节比例，1.0 为正常语速，大于 1.0 加快，小于 1.0 减慢。",
+                    },
+                    "qwen_tts_volume": {
+                        "description": "音量",
+                        "type": "number",
+                        "hint": "音量调节比例，1.0 为正常音量。",
+                    },
+                    "qwen_tts_pitch_rate": {
+                        "description": "音调",
+                        "type": "number",
+                        "hint": "音调调节比例，1.0 为正常音调。",
+                    },
+                    "qwen_tts_url": {
+                        "description": "WebSocket 地址",
+                        "type": "string",
+                        "hint": "Qwen TTS Realtime WebSocket 地址。北京: wss://dashscope.aliyuncs.com/api-ws/v1/realtime；新加坡: wss://dashscope-intl.aliyuncs.com/api-ws/v1/realtime",
+                    },
+                    "cosyvoice_voice": {
+                        "description": "音色",
+                        "type": "string",
+                        "hint": "CosyVoice 音色名称，可选: longanyang、longxiaochun_v2 等。不同模型版本需使用对应版本的音色。详见 https://help.aliyun.com/zh/model-studio/cosyvoice-voice-list",
+                    },
+                    "cosyvoice_speech_rate": {
+                        "description": "语速",
+                        "type": "number",
+                        "hint": "语速调节比例，1.0 为正常语速。仅部分模型支持。",
+                    },
+                    "cosyvoice_volume": {
+                        "description": "音量",
+                        "type": "number",
+                        "hint": "音量调节比例，1.0 为正常音量。仅部分模型支持。",
+                    },
+                    "cosyvoice_pitch_rate": {
+                        "description": "音调",
+                        "type": "number",
+                        "hint": "音调(音高)调节比例，1.0 为正常音调。仅部分模型支持。",
+                    },
+                    "cosyvoice_base_url": {
+                        "description": "WebSocket 地址",
+                        "type": "string",
+                        "hint": "CosyVoice WebSocket 地址。北京: wss://dashscope.aliyuncs.com/api-ws/v1/inference；新加坡: wss://dashscope-intl.aliyuncs.com/api-ws/v1/inference",
+                    },
                     "gm_resp_image_modal": {
                         "description": "启用图片模态",
                         "type": "bool",
@@ -3127,6 +3225,23 @@ class ChatProviderTemplate(TypedDict):
                             "provider_tts_settings.enable": True,
                         },
                     },
+                    "provider_tts_settings.tts_text_filter.enable": {
+                        "description": "过滤 TTS 文本中的括号内容",
+                        "type": "bool",
+                        "hint": "开启后将自动去除 *文字*、【文字】、(文字) 等括号/标记内容，避免 TTS 朗读情绪标记",
+                        "condition": {
+                            "provider_tts_settings.enable": True,
+                        },
+                    },
+                    "provider_tts_settings.tts_text_filter.custom_rules": {
+                        "description": "自定义 TTS 过滤正则",
+                        "type": "list",
+                        "items": {"type": "string"},
+                        "hint": "每行一条正则表达式，将匹配到的内容从 TTS 文本中移除",
+                        "condition": {
+                            "provider_tts_settings.enable": True,
+                        },
+                    },
                     "provider_settings.image_caption_prompt": {
                         "description": "图片转述提示词",
                         "type": "text",

diff --git a/astrbot/core/pipeline/process_stage/method/agent_sub_stages/internal.py b/astrbot/core/pipeline/process_stage/method/agent_sub_stages/internal.py
@@ -277,6 +277,13 @@ async def process(
                                 "[Live Mode] TTS Provider 未配置，将使用普通流式模式"
                             )
 
+                        # 获取 TTS 文本过滤配置
+                        tts_filter_cfg = self.ctx.astrbot_config.get(
+                            "provider_tts_settings", {}
+                        ).get("tts_text_filter", {})
+                        tts_filter_enable = tts_filter_cfg.get("enable", False)
+                        tts_filter_rules = tts_filter_cfg.get("custom_rules", [])
+
                         # 使用 run_live_agent，总是使用流式响应
                         event.set_result(
                             MessageEventResult()
@@ -289,6 +296,8 @@ async def process(
                                     self.show_tool_use,
                                     self.show_tool_call_result,
                                     show_reasoning=self.show_reasoning,
+                                    tts_filter_enable=tts_filter_enable,
+                                    tts_filter_custom_rules=tts_filter_rules,
                                     buffer_intermediate_messages=self.buffer_intermediate_messages,
                                 ),
                             ),

diff --git a/astrbot/core/pipeline/result_decorate/stage.py b/astrbot/core/pipeline/result_decorate/stage.py
@@ -13,6 +13,7 @@
 from astrbot.core.star.session_llm_manager import SessionServiceManager
 from astrbot.core.star.star import star_map
 from astrbot.core.star.star_handler import EventType, star_handlers_registry
+from astrbot.core.utils.tts_text_filter import TTSTextFilter
 
 from ..context import PipelineContext
 from ..stage import Stage, register_stage, registered_stages
@@ -296,8 +297,27 @@ async def process(
                 for comp in result.chain:
                     if isinstance(comp, Plain) and len(comp.text) > 1:
                         try:
-                            logger.info(f"TTS 请求: {comp.text}")
-                            audio_path = await tts_provider.get_audio(comp.text)
+                            # 应用 TTS 文本过滤
+                            tts_filter_config = self.ctx.astrbot_config[
+                                "provider_tts_settings"
+                            ].get("tts_text_filter", {})
+                            tts_filter_enable = tts_filter_config.get("enable", False)
+                            tts_custom_rules = tts_filter_config.get("custom_rules", [])
+
+                            if tts_filter_enable:
+                                tts_text = TTSTextFilter.apply(
+                                    comp.text, tts_custom_rules
+                                )
+                            else:
+                                tts_text = comp.text
+
+                            if not tts_text:
+                                # 过滤后为空，跳过 TTS
+                                new_chain.append(comp)
+                                continue
+
+                            logger.info(f"TTS 请求: {tts_text}")
+                            audio_path = await tts_provider.get_audio(tts_text)
                             logger.info(f"TTS 结果: {audio_path}")
                             if not audio_path:
                                 logger.error(

diff --git a/astrbot/core/provider/manager.py b/astrbot/core/provider/manager.py
@@ -461,6 +461,14 @@ def dynamic_import_provider(self, type: str) -> None:
                 from .sources.gemini_tts_source import (
                     ProviderGeminiTTSAPI as ProviderGeminiTTSAPI,
                 )
+            case "qwen_tts_realtime":
+                from .sources.qwen_tts_realtime_source import (
+                    ProviderQwenTTSRealtime as ProviderQwenTTSRealtime,
+                )
+            case "cosyvoice_tts":
+                from .sources.cosyvoice_tts_source import (
+                    ProviderCosyVoiceTTS as ProviderCosyVoiceTTS,
+                )
             case "openai_embedding":
                 from .sources.openai_embedding_source import (
                     OpenAIEmbeddingProvider as OpenAIEmbeddingProvider,

diff --git a/astrbot/core/provider/sources/cosyvoice_tts_source.py b/astrbot/core/provider/sources/cosyvoice_tts_source.py
@@ -0,0 +1,96 @@
+"""CosyVoice TTS provider using DashScope API.
+
+Supports models:
+- cosyvoice-v3.5-plus, cosyvoice-v3.5-flash
+- cosyvoice-v3-plus, cosyvoice-v3-flash
+- cosyvoice-v2, cosyvoice-v1
+- sambert-* models
+
+Uses dashscope.audio.tts_v2.SpeechSynthesizer for non-streaming TTS.
+"""
+
+import asyncio
+import os
+import uuid
+
+from dashscope.audio.tts_v2 import AudioFormat, SpeechSynthesizer
+
+from astrbot.core import logger
+from astrbot.core.utils.astrbot_path import get_astrbot_temp_path
+
+from ..entities import ProviderType
+from ..provider import TTSProvider
+from ..register import register_provider_adapter
+
+
+@register_provider_adapter(
+    "cosyvoice_tts",
+    "CosyVoice TTS (DashScope)",
+    provider_type=ProviderType.TEXT_TO_SPEECH,
+)
+class ProviderCosyVoiceTTS(TTSProvider):
+    def __init__(
+        self,
+        provider_config: dict,
+        provider_settings: dict,
+    ) -> None:
+        super().__init__(provider_config, provider_settings)
+        self.chosen_api_key: str = provider_config.get("api_key", "")
+        self.voice: str = provider_config.get("cosyvoice_voice", "longanyang")
+        self.speech_rate: float = provider_config.get("cosyvoice_speech_rate", 1.0)
+        self.volume: float = provider_config.get("cosyvoice_volume", 1.0)
+        self.pitch_rate: float = provider_config.get("cosyvoice_pitch_rate", 1.0)
+        self.timeout_ms: float = float(provider_config.get("timeout", 20)) * 1000
+        self.base_url: str = provider_config.get(
+            "cosyvoice_base_url",
+            "wss://dashscope.aliyuncs.com/api-ws/v1/inference",
+        )
+
+        model = provider_config.get("model", "cosyvoice-v3-flash")
+        self.set_model(model)
+
+        if not self.base_url.startswith("wss://"):
+            logger.warning(
+                f"[CosyVoice TTS] WebSocket URL 未使用 wss:// 协议: {self.base_url}"
+            )
+
+    async def get_audio(self, text: str) -> str:
+        """Synthesize speech using CosyVoice and return the audio file path."""
+        temp_dir = get_astrbot_temp_path()
+        os.makedirs(temp_dir, exist_ok=True)
+
+        audio_bytes = await self._synthesize(text)
+        if not audio_bytes:
+            raise RuntimeError(
+                f"Audio synthesis failed for model '{self.get_model()}'. "
+                "The model may not be supported or the service is unavailable.",
+            )
+
+        path = os.path.join(temp_dir, f"cosyvoice_tts_{uuid.uuid4()}.wav")
+        with open(path, "wb") as f:
+            f.write(audio_bytes)
+        return path
+
+    async def _synthesize(self, text: str) -> bytes | None:
+        """Use CosyVoice SpeechSynthesizer to synthesize speech."""
+        loop = asyncio.get_running_loop()
+
+        model = self.get_model()
+        fmt = AudioFormat.WAV_24000HZ_MONO_16BIT
+
+        synthesizer = SpeechSynthesizer(
+            model=model,
+            voice=self.voice,
+            format=fmt,
+            api_key=self.chosen_api_key,
+            url=self.base_url,
+        )
+
+        audio_bytes = await loop.run_in_executor(
+            None,
+            synthesizer.call,
+            text,
+            self.timeout_ms,
+        )
+
+        return audio_bytes