diff --git a/astrbot/core/message/components.py b/astrbot/core/message/components.py
index 15265c38d1..12aa531221 100644
--- a/astrbot/core/message/components.py
+++ b/astrbot/core/message/components.py
@@ -537,6 +537,13 @@ class Reply(BaseMessageComponent):
def __init__(self, **_) -> None:
super().__init__(**_)
+ async def to_dict(self) -> dict:
+ chain = self.chain if self.chain is not None else []
+ return {
+ "type": self.type.lower(),
+ "data": {"id": self.id, "chain": [await comp.to_dict() for comp in chain]},
+ }
+
class Poke(BaseMessageComponent):
type: str = ComponentType.Poke
@@ -639,11 +646,30 @@ async def to_dict(self) -> dict:
class Json(BaseMessageComponent):
type: ComponentType = ComponentType.Json
data: dict
+ raw_data: str | None = None
def __init__(self, data: str | dict, **_) -> None:
+ raw_data = None
if isinstance(data, str):
- data = json.loads(data)
- super().__init__(data=data, **_)
+ raw_data = data
+ try:
+ data = json.loads(data)
+ except json.JSONDecodeError:
+ data = {"raw": data}
+ super().__init__(data=data, raw_data=raw_data, **_)
+
+ async def to_dict(self) -> dict:
+ # 如果原始数据是字符串,使用 content 包装形式
+ if self.raw_data is not None:
+ return {
+ "type": self.type.lower(),
+ "data": {"content": self.raw_data},
+ }
+ # 如果原始数据是字典,直接返回原始字典结构
+ return {
+ "type": self.type.lower(),
+ "data": self.data,
+ }
class Unknown(BaseMessageComponent):
diff --git a/astrbot/core/provider/sources/openai_source.py b/astrbot/core/provider/sources/openai_source.py
index adee24073d..de43acdecf 100644
--- a/astrbot/core/provider/sources/openai_source.py
+++ b/astrbot/core/provider/sources/openai_source.py
@@ -9,7 +9,6 @@
import httpx
from openai import AsyncAzureOpenAI, AsyncOpenAI
-from openai._exceptions import NotFoundError
from openai.lib.streaming.chat._completions import ChatCompletionStreamState
from openai.types.chat.chat_completion import ChatCompletion
from openai.types.chat.chat_completion_chunk import ChatCompletionChunk
@@ -210,17 +209,6 @@ def __init__(self, provider_config, provider_settings) -> None:
self.reasoning_key = "reasoning_content"
- async def get_models(self):
- try:
- models_str = []
- models = await self.client.models.list()
- models = sorted(models.data, key=lambda x: x.id)
- for model in models:
- models_str.append(model.id)
- return models_str
- except NotFoundError as e:
- raise Exception(f"获取模型列表失败:{e}")
-
async def _query(self, payloads: dict, tools: ToolSet | None) -> LLMResponse:
if tools:
model = payloads.get("model", "").lower()
@@ -372,8 +360,7 @@ def _extract_usage(self, usage: CompletionUsage) -> TokenUsage:
output=completion_tokens,
)
- @staticmethod
- def _normalize_content(raw_content: Any, strip: bool = True) -> str:
+ def _normalize_content(self, raw_content: Any, strip: bool = True) -> str:
"""Normalize content from various formats to plain string.
Some LLM providers return content as list[dict] format
@@ -462,6 +449,31 @@ def _normalize_content(raw_content: Any, strip: bool = True) -> str:
# Fallback for other types (int, float, etc.)
return str(raw_content) if raw_content is not None else ""
+ def _parse_image_url_part(self, image_field) -> str | None:
+ """解析 OpenAI image_url 部分并提取 URL
+
+ Args:
+ image_field: 可以是字典或字符串格式的 image_url 字段
+
+ Returns:
+ 提取的 URL 或 base64 数据,如果无效则返回 None
+ """
+ if isinstance(image_field, dict):
+ url = image_field.get("url")
+ else:
+ url = image_field
+
+ if not url:
+ return None
+
+ # 统一处理 base64 格式,提取纯 base64 数据
+ if isinstance(url, str) and "base64," in url:
+ return url.split("base64,", 1)[1]
+ elif isinstance(url, str) and url.startswith("base64://"):
+ return url.replace("base64://", "")
+ else:
+ return url
+
async def _parse_openai_completion(
self, completion: ChatCompletion, tools: ToolSet | None
) -> LLMResponse:
@@ -474,19 +486,56 @@ async def _parse_openai_completion(
# parse the text completion
if choice.message.content is not None:
- completion_text = self._normalize_content(choice.message.content)
- # specially, some providers may set tags around reasoning content in the completion text,
- # we use regex to remove them, and store then in reasoning_content field
- reasoning_pattern = re.compile(r"(.*?)", re.DOTALL)
- matches = reasoning_pattern.findall(completion_text)
- if matches:
- llm_response.reasoning_content = "\n".join(
- [match.strip() for match in matches],
- )
- completion_text = reasoning_pattern.sub("", completion_text).strip()
- # Also clean up orphan tags that may leak from some models
- completion_text = re.sub(r"\s*$", "", completion_text).strip()
- llm_response.result_chain = MessageChain().message(completion_text)
+ # content can be either a plain string or a multimodal list
+ content = choice.message.content
+ # handle multimodal content returned as a list of parts
+ if isinstance(content, list):
+ reasoning_parts = []
+ mc = MessageChain()
+ for part in content:
+ if not isinstance(part, dict):
+ # fallback: append as plain text
+ mc.message(str(part))
+ continue
+ ptype = part.get("type")
+ if ptype == "text":
+ mc.message(part.get("text", ""))
+ elif ptype == "image_url":
+ image_field = part.get("image_url")
+ url = self._parse_image_url_part(image_field)
+ if url:
+ # 判断是 base64 数据还是 URL
+ if url.startswith("http"):
+ mc.url_image(url)
+ else:
+ mc.base64_image(url)
+ elif ptype == "think":
+ # collect reasoning parts for later extraction
+ think_val = part.get("think")
+ if think_val:
+ reasoning_parts.append(str(think_val))
+ else:
+ # unknown part type, append its textual representation
+ mc.message(json.dumps(part, ensure_ascii=False))
+
+ if reasoning_parts:
+ llm_response.reasoning_content = "\n".join(
+ [rp.strip() for rp in reasoning_parts]
+ )
+ llm_response.result_chain = mc
+ else:
+ # text completion (string)
+ completion_text = str(content).strip()
+ # specially, some providers may set tags around reasoning content in the completion text,
+ # we use regex to remove them, and store then in reasoning_content field
+ reasoning_pattern = re.compile(r"(.*?)", re.DOTALL)
+ matches = reasoning_pattern.findall(completion_text)
+ if matches:
+ llm_response.reasoning_content = "\n".join(
+ [match.strip() for match in matches],
+ )
+ completion_text = reasoning_pattern.sub("", completion_text).strip()
+ llm_response.result_chain = MessageChain().message(completion_text)
# parse the reasoning content if any
# the priority is higher than the tag extraction