diff --git a/pkg/attachment/advisor.go b/pkg/attachment/advisor.go
new file mode 100644
index 000000000..c790184d2
--- /dev/null
+++ b/pkg/attachment/advisor.go
@@ -0,0 +1,10 @@
+package attachment
+
+// Advisor is implemented by providers that support document attachments.
+// The UI layer uses SupportedMIMETypes to populate file-picker filters so that
+// only files the active provider can handle are offered to the user.
+type Advisor interface {
+ // SupportedMIMETypes returns the list of MIME types that this provider
+ // can accept as document attachments (e.g. "application/pdf", "image/png").
+ SupportedMIMETypes() []string
+}
diff --git a/pkg/attachment/attachment.go b/pkg/attachment/attachment.go
new file mode 100644
index 000000000..251a7db0e
--- /dev/null
+++ b/pkg/attachment/attachment.go
@@ -0,0 +1,98 @@
+// Package attachment provides helpers for deciding how document attachments
+// should be delivered to LLM providers and for fetching remote content.
+package attachment
+
+import (
+ "fmt"
+
+ "github.com/docker/docker-agent/pkg/chat"
+)
+
+// Strategy describes how a document attachment should be delivered to a provider.
+type Strategy int
+
+const (
+ // StrategyDrop means the attachment cannot be handled and should be omitted.
+ StrategyDrop Strategy = iota
+ // StrategyTXT means the document's inline text should be sent as a text envelope.
+ StrategyTXT
+ // StrategyB64 means the document's inline binary data should be base64-encoded and sent inline.
+ StrategyB64
+ // StrategyURL means the document's URL should be passed directly to the provider.
+ StrategyURL
+ // StrategyFetchAsB64 means the URL should be fetched and its bytes sent as base64.
+ StrategyFetchAsB64
+ // StrategyFetchAsTXT means the URL should be fetched and its content sent as plain text.
+ StrategyFetchAsTXT
+)
+
+// Capability describes which delivery modes a provider supports for a given MIME type.
+type Capability struct {
+ TXT bool // provider accepts plain text for this MIME type
+ B64 bool // provider accepts base64-encoded binary for this MIME type
+ URL bool // provider accepts a public URL for this MIME type
+}
+
+// CapabilityTable maps MIME types to provider capabilities.
+// Providers build this table to declare what they can handle.
+type CapabilityTable map[string]Capability
+
+// Decide selects the best delivery Strategy for doc given the provider's
+// capability table. It returns the chosen strategy and, when the strategy is
+// a fallback or a drop, a human-readable reason string.
+//
+// When multiple source fields are set, URL takes priority over InlineData,
+// which takes priority over InlineText.
+//
+// Decision order (exact — do not deviate):
+// 1. MIME type not in table → Drop
+// 2. Source is URL + provider supports URL → URL (reason: "")
+// 3. Source is URL + provider does NOT support URL:
+// a. provider supports B64 → FetchAsB64 (reason: "url not supported, will fetch as b64")
+// b. provider supports TXT → FetchAsTXT (reason: "url not supported, will fetch as text")
+// c. otherwise → Drop
+// 4. Source is InlineData (non-nil) + provider supports B64 → B64 (reason: "")
+// 5. Source is InlineText (non-empty) + provider supports TXT → TXT (reason: "")
+// 6. → Drop
+//
+// Note: FetchAsB64 and FetchAsTXT carry non-empty reason strings intentionally —
+// they signal an automatic fallback that callers may want to surface in logs or UI.
+func Decide(doc chat.Document, table CapabilityTable) (Strategy, string) {
+ capability, ok := table[doc.MimeType]
+ if !ok {
+ return StrategyDrop, "mime not in provider table"
+ }
+
+ if doc.Source.URL != "" {
+ switch {
+ case capability.URL:
+ return StrategyURL, ""
+ case capability.B64:
+ return StrategyFetchAsB64, "url not supported, will fetch as b64"
+ case capability.TXT:
+ return StrategyFetchAsTXT, "url not supported, will fetch as text"
+ default:
+ return StrategyDrop, "provider cannot handle url or inline for this mime"
+ }
+ }
+ if doc.Source.InlineData != nil && capability.B64 {
+ return StrategyB64, ""
+ }
+ if doc.Source.InlineText != "" && capability.TXT {
+ return StrategyTXT, ""
+ }
+ return StrategyDrop, "no supported variant for this provider"
+}
+
+// TXTEnvelope wraps plain-text document content in an XML-like tag for
+// inclusion in a chat message. The envelope makes the document's name and
+// MIME type visible to the model.
+//
+// Example output:
+//
+//
+// ...content...
+//
+func TXTEnvelope(name, mimeType, body string) string {
+ return fmt.Sprintf("\n%s\n", name, mimeType, body)
+}
diff --git a/pkg/attachment/decide_test.go b/pkg/attachment/decide_test.go
new file mode 100644
index 000000000..886a43c01
--- /dev/null
+++ b/pkg/attachment/decide_test.go
@@ -0,0 +1,192 @@
+package attachment_test
+
+import (
+ "strings"
+ "testing"
+
+ "github.com/docker/docker-agent/pkg/attachment"
+ "github.com/docker/docker-agent/pkg/chat"
+)
+
+func TestDecide(t *testing.T) {
+ // A table that exercises every capability combination used in the tests.
+ table := attachment.CapabilityTable{
+ "text/plain": {TXT: true, B64: false, URL: false},
+ "image/png": {TXT: false, B64: true, URL: true},
+ "application/pdf": {TXT: false, B64: true, URL: false},
+ "text/markdown": {TXT: true, B64: false, URL: true},
+ "video/mp4": {TXT: false, B64: false, URL: false}, // nothing supported
+ }
+
+ tests := []struct {
+ name string
+ doc chat.Document
+ wantStrategy attachment.Strategy
+ wantReasonSubs string // non-empty substring that must appear in the reason
+ }{
+ // ── 1. MIME miss ────────────────────────────────────────────────────────
+ {
+ name: "mime not in table → Drop",
+ doc: chat.Document{
+ MimeType: "application/zip",
+ Source: chat.DocumentSource{InlineData: []byte("data")},
+ },
+ wantStrategy: attachment.StrategyDrop,
+ wantReasonSubs: "mime not in provider table",
+ },
+
+ // ── 2. URL + provider supports URL → URL ─────────────────────────────
+ {
+ name: "url source + URL cap → URL",
+ doc: chat.Document{
+ MimeType: "image/png",
+ Source: chat.DocumentSource{URL: "https://example.com/img.png"},
+ },
+ wantStrategy: attachment.StrategyURL,
+ },
+
+ // ── 3a. URL + no URL cap + B64 cap → FetchAsB64 ──────────────────────
+ // Non-empty reason is intentional: signals an automatic fallback for logging/UI.
+ {
+ name: "url source + no URL cap + B64 → FetchAsB64",
+ doc: chat.Document{
+ MimeType: "application/pdf",
+ Source: chat.DocumentSource{URL: "https://example.com/doc.pdf"},
+ },
+ wantStrategy: attachment.StrategyFetchAsB64,
+ wantReasonSubs: "url not supported",
+ },
+
+ // ── 3b. URL + no URL cap + no B64 + TXT cap → FetchAsTXT ─────────────
+ // Non-empty reason is intentional: signals an automatic fallback for logging/UI.
+ {
+ name: "url source + no URL cap + TXT → FetchAsTXT",
+ doc: chat.Document{
+ MimeType: "text/plain",
+ Source: chat.DocumentSource{URL: "https://example.com/readme.txt"},
+ },
+ wantStrategy: attachment.StrategyFetchAsTXT,
+ wantReasonSubs: "url not supported",
+ },
+
+ // ── 3c. URL + no URL cap + no B64 + no TXT → Drop ────────────────────
+ {
+ name: "url source + nothing supported → Drop",
+ doc: chat.Document{
+ MimeType: "video/mp4",
+ Source: chat.DocumentSource{URL: "https://example.com/clip.mp4"},
+ },
+ wantStrategy: attachment.StrategyDrop,
+ wantReasonSubs: "provider cannot handle",
+ },
+
+ // ── 4. InlineData (non-nil) + B64 cap → B64 ──────────────────────────
+ {
+ name: "inline binary + B64 cap → B64",
+ doc: chat.Document{
+ MimeType: "image/png",
+ Source: chat.DocumentSource{InlineData: []byte("\x89PNG\r\n\x1a\n")},
+ },
+ wantStrategy: attachment.StrategyB64,
+ },
+
+ // ── 4b. InlineData non-nil but zero-length → B64 (spec: nil check, not len) ──
+ {
+ name: "inline binary (empty slice, non-nil) + B64 cap → B64",
+ doc: chat.Document{
+ MimeType: "image/png",
+ Source: chat.DocumentSource{InlineData: []byte{}},
+ },
+ wantStrategy: attachment.StrategyB64,
+ },
+
+ // ── 5. InlineText + TXT cap → TXT ────────────────────────────────────
+ {
+ name: "inline text + TXT cap → TXT",
+ doc: chat.Document{
+ MimeType: "text/plain",
+ Source: chat.DocumentSource{InlineText: "hello world"},
+ },
+ wantStrategy: attachment.StrategyTXT,
+ },
+
+ // ── 6. InlineData present but only TXT supported → Drop ──────────────
+ {
+ name: "inline binary but only TXT cap → Drop",
+ doc: chat.Document{
+ MimeType: "text/plain",
+ // validates step-6 fall-through: B64 variant present but provider only supports TXT
+ Source: chat.DocumentSource{InlineData: []byte("binary-data")},
+ },
+ wantStrategy: attachment.StrategyDrop,
+ wantReasonSubs: "no supported variant",
+ },
+
+ // ── 7. InlineText present but only B64/URL supported → Drop ──────────
+ {
+ name: "inline text but only B64/URL cap → Drop",
+ doc: chat.Document{
+ MimeType: "image/png",
+ Source: chat.DocumentSource{InlineText: "not really an image"},
+ },
+ wantStrategy: attachment.StrategyDrop,
+ wantReasonSubs: "no supported variant",
+ },
+
+ // ── 8. No source at all → Drop ────────────────────────────────────────
+ {
+ name: "empty source → Drop",
+ doc: chat.Document{
+ MimeType: "text/plain",
+ Source: chat.DocumentSource{},
+ },
+ wantStrategy: attachment.StrategyDrop,
+ wantReasonSubs: "no supported variant",
+ },
+
+ // ── 9. URL mime that also supports URL ────────────────────────────────
+ {
+ name: "url source + URL cap (text/markdown) → URL",
+ doc: chat.Document{
+ MimeType: "text/markdown",
+ Source: chat.DocumentSource{URL: "https://example.com/readme.md"},
+ },
+ wantStrategy: attachment.StrategyURL,
+ },
+ }
+
+ for _, tc := range tests {
+ t.Run(tc.name, func(t *testing.T) {
+ got, reason := attachment.Decide(tc.doc, table)
+ if got != tc.wantStrategy {
+ t.Errorf("Decide() strategy = %v, want %v (reason: %q)", got, tc.wantStrategy, reason)
+ }
+ if tc.wantReasonSubs != "" && !strings.Contains(reason, tc.wantReasonSubs) {
+ t.Errorf("Decide() reason = %q, want substring %q", reason, tc.wantReasonSubs)
+ }
+ if tc.wantReasonSubs == "" && reason != "" {
+ // For happy-path strategies (URL, B64, TXT) the reason should be empty.
+ t.Errorf("Decide() reason = %q, want empty", reason)
+ }
+ })
+ }
+}
+
+func TestTXTEnvelope(t *testing.T) {
+ got := attachment.TXTEnvelope("readme.md", "text/markdown", "# Hello\nworld")
+ if !strings.Contains(got, `name="readme.md"`) {
+ t.Errorf("TXTEnvelope missing name attribute: %s", got)
+ }
+ if !strings.Contains(got, `mime-type="text/markdown"`) {
+ t.Errorf("TXTEnvelope missing mime-type attribute: %s", got)
+ }
+ if !strings.Contains(got, "# Hello\nworld") {
+ t.Errorf("TXTEnvelope missing body: %s", got)
+ }
+ if !strings.HasPrefix(got, "") {
+ t.Errorf("TXTEnvelope should end with : %s", got)
+ }
+}
diff --git a/pkg/attachment/fetch.go b/pkg/attachment/fetch.go
new file mode 100644
index 000000000..3a936f16d
--- /dev/null
+++ b/pkg/attachment/fetch.go
@@ -0,0 +1,45 @@
+package attachment
+
+import (
+ "context"
+ "fmt"
+ "io"
+ "net/http"
+ "time"
+)
+
+// fetchTimeout is the maximum time allowed for a URL fetch.
+// TODO: make fetchTimeout per-runtime-configurable in Phase 2.
+const fetchTimeout = 10 * time.Second
+
+// FetchURL fetches a public URL with a 10-second timeout.
+// Returns the raw response body bytes on success.
+// Returns an error if the request fails or the server responds with a non-2xx status.
+//
+// TODO: consider bounding redirects / restricting schemes for user-supplied URLs (SSRF) in Phase 2.
+func FetchURL(ctx context.Context, url string) ([]byte, error) {
+ ctx, cancel := context.WithTimeout(ctx, fetchTimeout)
+ defer cancel()
+
+ req, err := http.NewRequestWithContext(ctx, http.MethodGet, url, http.NoBody)
+ if err != nil {
+ return nil, fmt.Errorf("attachment: creating request for %q: %w", url, err)
+ }
+
+ resp, err := http.DefaultClient.Do(req)
+ if err != nil {
+ return nil, fmt.Errorf("attachment: fetching %q: %w", url, err)
+ }
+ defer resp.Body.Close()
+
+ if resp.StatusCode < 200 || resp.StatusCode >= 300 {
+ return nil, fmt.Errorf("attachment: fetching %q: unexpected status %d", url, resp.StatusCode)
+ }
+
+ data, err := io.ReadAll(resp.Body)
+ if err != nil {
+ return nil, fmt.Errorf("attachment: reading response from %q: %w", url, err)
+ }
+
+ return data, nil
+}
diff --git a/pkg/attachment/fetch_test.go b/pkg/attachment/fetch_test.go
new file mode 100644
index 000000000..9de7c1283
--- /dev/null
+++ b/pkg/attachment/fetch_test.go
@@ -0,0 +1,107 @@
+package attachment_test
+
+import (
+ "context"
+ "net/http"
+ "net/http/httptest"
+ "strings"
+ "testing"
+
+ "github.com/docker/docker-agent/pkg/attachment"
+)
+
+func TestFetchURL_Success(t *testing.T) {
+ t.Parallel()
+
+ const body = "hello from test server"
+ srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
+ w.WriteHeader(http.StatusOK)
+ _, _ = w.Write([]byte(body))
+ }))
+ defer srv.Close()
+
+ got, err := attachment.FetchURL(t.Context(), srv.URL)
+ if err != nil {
+ t.Fatalf("FetchURL returned unexpected error: %v", err)
+ }
+ if string(got) != body {
+ t.Errorf("FetchURL body = %q, want %q", string(got), body)
+ }
+}
+
+func TestFetchURL_Non2xx(t *testing.T) {
+ t.Parallel()
+
+ srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
+ http.Error(w, "not found", http.StatusNotFound)
+ }))
+ defer srv.Close()
+
+ _, err := attachment.FetchURL(t.Context(), srv.URL)
+ if err == nil {
+ t.Fatal("FetchURL expected an error for 404 response, got nil")
+ }
+ if !strings.Contains(err.Error(), "404") {
+ t.Errorf("error should mention status code 404, got: %v", err)
+ }
+}
+
+func TestFetchURL_ServerError(t *testing.T) {
+ t.Parallel()
+
+ srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
+ http.Error(w, "internal error", http.StatusInternalServerError)
+ }))
+ defer srv.Close()
+
+ _, err := attachment.FetchURL(t.Context(), srv.URL)
+ if err == nil {
+ t.Fatal("FetchURL expected an error for 500 response, got nil")
+ }
+ if !strings.Contains(err.Error(), "500") {
+ t.Errorf("error should mention status code 500, got: %v", err)
+ }
+}
+
+func TestFetchURL_CancelledContext(t *testing.T) {
+ t.Parallel()
+
+ srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
+ // Never respond — let the context cancel first.
+ <-r.Context().Done()
+ }))
+ defer srv.Close()
+
+ ctx, cancel := context.WithCancel(t.Context())
+ cancel() // cancel immediately
+
+ _, err := attachment.FetchURL(ctx, srv.URL)
+ if err == nil {
+ t.Fatal("FetchURL expected an error for cancelled context, got nil")
+ }
+}
+
+func TestFetchURL_BinaryContent(t *testing.T) {
+ t.Parallel()
+
+ content := []byte{0x89, 0x50, 0x4e, 0x47, 0x0d, 0x0a, 0x1a, 0x0a} // PNG magic bytes
+ srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
+ w.Header().Set("Content-Type", "image/png")
+ w.WriteHeader(http.StatusOK)
+ _, _ = w.Write(content)
+ }))
+ defer srv.Close()
+
+ got, err := attachment.FetchURL(t.Context(), srv.URL)
+ if err != nil {
+ t.Fatalf("FetchURL returned unexpected error: %v", err)
+ }
+ if len(got) != len(content) {
+ t.Errorf("FetchURL returned %d bytes, want %d", len(got), len(content))
+ }
+ for i, b := range content {
+ if got[i] != b {
+ t.Errorf("byte %d: got %02x, want %02x", i, got[i], b)
+ }
+ }
+}
diff --git a/pkg/chat/chat.go b/pkg/chat/chat.go
index 9e4a2ced0..06e69e6a1 100644
--- a/pkg/chat/chat.go
+++ b/pkg/chat/chat.go
@@ -32,6 +32,7 @@ const (
MessagePartTypeText MessagePartType = "text"
MessagePartTypeImageURL MessagePartType = "image_url"
MessagePartTypeFile MessagePartType = "file"
+ MessagePartTypeDocument MessagePartType = "document"
)
type ImageURLDetail string
@@ -110,6 +111,7 @@ type MessagePart struct {
Text string `json:"text,omitempty"`
ImageURL *MessageImageURL `json:"image_url,omitempty"`
File *MessageFile `json:"file,omitempty"`
+ Document *Document `json:"document,omitempty"`
}
// FinishReason represents the reason why the model finished generating a response
diff --git a/pkg/chat/document.go b/pkg/chat/document.go
new file mode 100644
index 000000000..2e3d74d68
--- /dev/null
+++ b/pkg/chat/document.go
@@ -0,0 +1,20 @@
+package chat
+
+// DocumentSource holds the actual content of a document attachment.
+// Exactly one of InlineText, InlineData, or URL should be set.
+type DocumentSource struct {
+ InlineText string `json:"inline_text,omitempty"` // plain text (TXT/MD/HTML/CSV)
+ InlineData []byte `json:"inline_data,omitempty"` // binary bytes (images, PDFs, etc.)
+ URL string `json:"url,omitempty"` // public HTTPS URL
+}
+
+// Document represents a file or document attachment that can be included in a
+// message part (set Type = MessagePartTypeDocument when populating this field).
+// Providers use the attachment package to decide how to handle the document
+// based on their capability tables.
+type Document struct {
+ Name string `json:"name"`
+ MimeType string `json:"mime_type"`
+ Size int64 `json:"size,omitempty"`
+ Source DocumentSource `json:"source"`
+}