AstrBotDevs · Sjshi763 · Jan 9, 2026 · Jan 17, 2026 · Jan 29, 2026 · Jan 29, 2026
diff --git a/astrbot/core/message/components.py b/astrbot/core/message/components.py
@@ -537,6 +537,13 @@ class Reply(BaseMessageComponent):
     def __init__(self, **_) -> None:
         super().__init__(**_)
 
+    async def to_dict(self) -> dict:
+        chain = self.chain if self.chain is not None else []
+        return {
+            "type": self.type.lower(),
+            "data": {"id": self.id, "chain": [await comp.to_dict() for comp in chain]},
+        }
+
 
 class Poke(BaseMessageComponent):
     type: str = ComponentType.Poke
@@ -639,11 +646,30 @@ async def to_dict(self) -> dict:
 class Json(BaseMessageComponent):
     type: ComponentType = ComponentType.Json
     data: dict
+    raw_data: str | None = None
 
     def __init__(self, data: str | dict, **_) -> None:
+        raw_data = None
         if isinstance(data, str):
-            data = json.loads(data)
-        super().__init__(data=data, **_)
+            raw_data = data
+            try:
+                data = json.loads(data)
+            except json.JSONDecodeError:
+                data = {"raw": data}
+        super().__init__(data=data, raw_data=raw_data, **_)
+
+    async def to_dict(self) -> dict:
+        # 如果原始数据是字符串，使用 content 包装形式
+        if self.raw_data is not None:
+            return {
+                "type": self.type.lower(),
+                "data": {"content": self.raw_data},
+            }
+        # 如果原始数据是字典，直接返回原始字典结构
+        return {
+            "type": self.type.lower(),
+            "data": self.data,
+        }
 
 
 class Unknown(BaseMessageComponent):

diff --git a/astrbot/core/provider/sources/openai_source.py b/astrbot/core/provider/sources/openai_source.py
@@ -9,7 +9,6 @@
 
 import httpx
 from openai import AsyncAzureOpenAI, AsyncOpenAI
-from openai._exceptions import NotFoundError
 from openai.lib.streaming.chat._completions import ChatCompletionStreamState
 from openai.types.chat.chat_completion import ChatCompletion
 from openai.types.chat.chat_completion_chunk import ChatCompletionChunk
@@ -210,17 +209,6 @@ def __init__(self, provider_config, provider_settings) -> None:
 
         self.reasoning_key = "reasoning_content"
 
-    async def get_models(self):
-        try:
-            models_str = []
-            models = await self.client.models.list()
-            models = sorted(models.data, key=lambda x: x.id)
-            for model in models:
-                models_str.append(model.id)
-            return models_str
-        except NotFoundError as e:
-            raise Exception(f"获取模型列表失败：{e}")
-
     async def _query(self, payloads: dict, tools: ToolSet | None) -> LLMResponse:
         if tools:
             model = payloads.get("model", "").lower()
@@ -372,8 +360,7 @@ def _extract_usage(self, usage: CompletionUsage) -> TokenUsage:
             output=completion_tokens,
         )
 
-    @staticmethod
-    def _normalize_content(raw_content: Any, strip: bool = True) -> str:
+    def _normalize_content(self, raw_content: Any, strip: bool = True) -> str:
         """Normalize content from various formats to plain string.
 
         Some LLM providers return content as list[dict] format
@@ -462,6 +449,31 @@ def _normalize_content(raw_content: Any, strip: bool = True) -> str:
         # Fallback for other types (int, float, etc.)
         return str(raw_content) if raw_content is not None else ""
 
+    def _parse_image_url_part(self, image_field) -> str | None:
+        """解析 OpenAI image_url 部分并提取 URL
+
+        Args:
+            image_field: 可以是字典或字符串格式的 image_url 字段
+
+        Returns:
+            提取的 URL 或 base64 数据，如果无效则返回 None
+        """
+        if isinstance(image_field, dict):
+            url = image_field.get("url")
+        else:
+            url = image_field
+
+        if not url:
+            return None
+
+        # 统一处理 base64 格式，提取纯 base64 数据
+        if isinstance(url, str) and "base64," in url:
+            return url.split("base64,", 1)[1]
+        elif isinstance(url, str) and url.startswith("base64://"):
+            return url.replace("base64://", "")
+        else:
+            return url
+
     async def _parse_openai_completion(
         self, completion: ChatCompletion, tools: ToolSet | None
     ) -> LLMResponse:
@@ -474,19 +486,56 @@ async def _parse_openai_completion(
 
         # parse the text completion
         if choice.message.content is not None:
-            completion_text = self._normalize_content(choice.message.content)
-            # specially, some providers may set <think> tags around reasoning content in the completion text,
-            # we use regex to remove them, and store then in reasoning_content field
-            reasoning_pattern = re.compile(r"<think>(.*?)</think>", re.DOTALL)
-            matches = reasoning_pattern.findall(completion_text)
-            if matches:
-                llm_response.reasoning_content = "\n".join(
-                    [match.strip() for match in matches],
-                )
-                completion_text = reasoning_pattern.sub("", completion_text).strip()
-            # Also clean up orphan </think> tags that may leak from some models
-            completion_text = re.sub(r"</think>\s*$", "", completion_text).strip()
-            llm_response.result_chain = MessageChain().message(completion_text)
+            # content can be either a plain string or a multimodal list
+            content = choice.message.content
+            # handle multimodal content returned as a list of parts
+            if isinstance(content, list):
+                reasoning_parts = []
+                mc = MessageChain()
+                for part in content:
+                    if not isinstance(part, dict):
+                        # fallback: append as plain text
+                        mc.message(str(part))
+                        continue
+                    ptype = part.get("type")
+                    if ptype == "text":
+                        mc.message(part.get("text", ""))
+                    elif ptype == "image_url":
+                        image_field = part.get("image_url")
+                        url = self._parse_image_url_part(image_field)
+                        if url:
+                            # 判断是 base64 数据还是 URL
+                            if url.startswith("http"):
+                                mc.url_image(url)
+                            else:
+                                mc.base64_image(url)
+                    elif ptype == "think":
+                        # collect reasoning parts for later extraction
+                        think_val = part.get("think")
+                        if think_val:
+                            reasoning_parts.append(str(think_val))
+                    else:
+                        # unknown part type, append its textual representation
+                        mc.message(json.dumps(part, ensure_ascii=False))
+
+                if reasoning_parts:
+                    llm_response.reasoning_content = "\n".join(
+                        [rp.strip() for rp in reasoning_parts]
+                    )
+                llm_response.result_chain = mc
+            else:
+                # text completion (string)
+                completion_text = str(content).strip()
+                # specially, some providers may set <think> tags around reasoning content in the completion text,
+                # we use regex to remove them, and store then in reasoning_content field
+                reasoning_pattern = re.compile(r"<think>(.*?)</think>", re.DOTALL)
+                matches = reasoning_pattern.findall(completion_text)
+                if matches:
+                    llm_response.reasoning_content = "\n".join(
+                        [match.strip() for match in matches],
+                    )
+                    completion_text = reasoning_pattern.sub("", completion_text).strip()
+                llm_response.result_chain = MessageChain().message(completion_text)
 
         # parse the reasoning content if any
         # the priority is higher than the <think> tag extraction