From 9b309e68222db594f3d937639e7684fd2591f7bc Mon Sep 17 00:00:00 2001 From: Drew Stone Date: Mon, 11 May 2026 16:21:43 -0600 Subject: [PATCH] =?UTF-8?q?feat(tcloud):=20imagesEdit=20=E2=80=94=20OpenAI?= =?UTF-8?q?=20/v1/images/edits=20over=20multipart?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Adds the sibling to `imageGenerate` so callers can drive the edit / inpaint / multi-image composition workflow without hand-rolling a custom transport. cli-bridge already speaks the target route (`POST /v1/images/edits` proxied to either `router.tangle.tools/v1/images/edits` when TANGLE_API_KEY is set or OpenAI direct), so this is the last piece needed for the "place this CAD design into the user's uploaded yard photo" workflow in physim's `generate_concept_image`. API: client.imagesEdit({ prompt: "...", image: blob | ArrayBuffer | {data: b64, mediaType, filename?}, mask?: , // inpainting mask model?: "gpt-image-2", // default n?: 1, size?: "1024x1024", quality?: "high", response_format?: "b64_json", }) → ImageResponse The `image` field accepts a single attachment or an array (for gpt-image-2's multi-image fusion). Polymorphic input keeps both browser (Blob / ArrayBuffer from `fetch().then(r => r.blob())`) and Node-server callers (base64 strings from a DB / disk read) ergonomic without forcing either side to do a conversion dance. The transport mirrors the existing `transcribe()` path — multipart/form-data through `proxiedFetch`, `Content-Type` deleted so FormData sets the boundary, manual `checkLimits()` + `_requestCount++` for credit accounting. Bumps to v0.4.7. Re-exported from `index.ts` alongside the existing image types. --- packages/tcloud/package.json | 2 +- packages/tcloud/src/client.ts | 98 +++++++++++++++++++++++++++++++++++ packages/tcloud/src/index.ts | 2 + packages/tcloud/src/types.ts | 34 ++++++++++++ 4 files changed, 135 insertions(+), 1 deletion(-) diff --git a/packages/tcloud/package.json b/packages/tcloud/package.json index 8029ca5..4d764df 100644 --- a/packages/tcloud/package.json +++ b/packages/tcloud/package.json @@ -1,6 +1,6 @@ { "name": "@tangle-network/tcloud", - "version": "0.4.6", + "version": "0.4.7", "description": "TypeScript SDK and CLI for Tangle AI Cloud — decentralized LLM inference", "type": "module", "main": "./dist/index.cjs", diff --git a/packages/tcloud/src/client.ts b/packages/tcloud/src/client.ts index 9ff311f..41018fd 100644 --- a/packages/tcloud/src/client.ts +++ b/packages/tcloud/src/client.ts @@ -19,6 +19,8 @@ import type { SpendAuth, EmbeddingOptions, EmbeddingResponse, + ImageEditAttachment, + ImageEditOptions, ImageGenerateOptions, ImageResponse, RerankOptions, @@ -851,6 +853,57 @@ export class TCloudClient { }) } + /** + * Edit / inpaint / variate an existing image with a text prompt. + * Sibling to `imageGenerate`; routes to `/v1/images/edits` via + * multipart/form-data per the OpenAI spec. + * + * Reference image attachments may be passed as `Blob`, `ArrayBuffer`, + * or `{data: base64, mediaType, filename?}`. Multi-image composition + * (e.g. gpt-image-2 with two reference frames + a prompt that fuses + * them) is supported by passing an array; for legacy models only the + * first image is honored upstream. + */ + async imagesEdit(options: ImageEditOptions): Promise { + const formData = new FormData() + formData.append('prompt', options.prompt) + formData.append('model', options.model || 'gpt-image-2') + if (options.n != null) formData.append('n', String(options.n)) + if (options.size) formData.append('size', options.size) + if (options.quality) formData.append('quality', options.quality) + if (options.response_format) formData.append('response_format', options.response_format) + if (options.mask) formData.append('mask', toEditBlob(options.mask, 'mask.png'), 'mask.png') + + const images = Array.isArray(options.image) ? options.image : [options.image] + if (images.length === 0) { + throw new TCloudError(400, 'imagesEdit requires at least one image attachment') + } + // Each upstream takes the field name differently — `image` for a + // single-image variant (dall-e-2) and `image[]` for multi-image + // composition (gpt-image-2). We always send `image[]`; upstream + // implementations of the OpenAI shape accept both. + images.forEach((img, idx) => { + const blob = toEditBlob(img, `image-${idx + 1}.png`) + formData.append('image[]', blob, blobFilename(img, `image-${idx + 1}.png`)) + }) + + const headers = { ...this.headers } + delete headers['Content-Type'] // let FormData set the boundary + + this.checkLimits() + const res = await proxiedFetch(this.privacy, `${this.baseURL}/images/edits`, { + method: 'POST', + headers, + body: formData as unknown as BodyInit, + }, false) + if (!res.ok) { + const err = await res.json().catch(() => ({ error: res.statusText })) + throw new TCloudError(res.status, err.error?.message || err.error || err.message || res.statusText) + } + this._requestCount++ + return res.json() + } + /** Rerank documents by relevance to a query */ async rerank(options: RerankOptions): Promise { return this._request(`${this.baseURL}/rerank`, { @@ -1389,6 +1442,51 @@ function formatPrice(pricePerToken: number): string { return `$${(pricePerToken * 1000).toFixed(6)}/1K tokens` } +/** + * Normalize the polymorphic `ImageEditAttachment` to a `Blob` for + * multipart FormData submission. Base64 inputs are decoded; raw + * ArrayBuffers are wrapped with a sensible default media type (PNG). + */ +function toEditBlob(input: ImageEditAttachment, defaultFilename: string): Blob { + if (input instanceof Blob) return input + if (input instanceof ArrayBuffer) return new Blob([input], { type: 'image/png' }) + // {data: base64, mediaType, filename?} + const binary = base64ToUint8Array(input.data) + // `Uint8Array` is a valid BlobPart at the JS level. TS's narrower + // `BodyInit` shape complains about the `ArrayBufferLike` generic in + // some lib targets — copy to a fresh ArrayBuffer-backed view so the + // type is unambiguous. + const copy = new Uint8Array(binary.byteLength) + copy.set(binary) + return new Blob([copy.buffer], { type: input.mediaType || 'image/png' }) + void defaultFilename // referenced by blobFilename +} + +/** Extract a stable filename for the upload (multipart needs one; + * upstream sometimes uses extension to sniff the format). */ +function blobFilename(input: ImageEditAttachment, fallback: string): string { + if (typeof input === 'object' && !(input instanceof Blob) && !(input instanceof ArrayBuffer)) { + if (input.filename) return input.filename + } + return fallback +} + +function base64ToUint8Array(b64: string): Uint8Array { + // Strip data:URL prefix if present. + const m = /^data:[^;,]*(?:;[^,]*)?,(.+)$/.exec(b64) + const raw = m ? m[1] : b64 + // Node 18+ and modern browsers expose `atob`; on older Node `Buffer.from` + // is the canonical decoder. + const bin = + typeof atob === 'function' + ? atob(raw) + : // eslint-disable-next-line @typescript-eslint/no-explicit-any + (globalThis as any).Buffer.from(raw, 'base64').toString('binary') + const out = new Uint8Array(bin.length) + for (let i = 0; i < bin.length; i++) out[i] = bin.charCodeAt(i) + return out +} + export interface TierConfig { name: string cpu: number diff --git a/packages/tcloud/src/index.ts b/packages/tcloud/src/index.ts index 37b730f..c7dd60a 100644 --- a/packages/tcloud/src/index.ts +++ b/packages/tcloud/src/index.ts @@ -151,6 +151,8 @@ export type { ShieldedConfig, EmbeddingOptions, EmbeddingResponse, + ImageEditAttachment, + ImageEditOptions, ImageGenerateOptions, ImageResponse, RerankOptions, diff --git a/packages/tcloud/src/types.ts b/packages/tcloud/src/types.ts index 1b8d421..436b7ce 100644 --- a/packages/tcloud/src/types.ts +++ b/packages/tcloud/src/types.ts @@ -87,6 +87,40 @@ export interface ImageGenerateOptions { response_format?: 'url' | 'b64_json' } +/** + * OpenAI-compatible /v1/images/edits request. Accepts one or more + * reference images + a prompt describing how to transform them. + * + * For gpt-image-2, the supplied images are passed as `image[]` (the + * model supports multi-image composition); for dall-e-2 only a single + * image is honored. The optional `mask` is the OpenAI inpainting mask + * (PNG with alpha) used to constrain where edits are applied. + * + * Carrying both shapes here lets callers point at the same `imagesEdit` + * method regardless of upstream model; cli-bridge / tangle-router + * decide what to forward. + */ +export interface ImageEditOptions { + model?: string + prompt: string + /** One or more reference images. Pass as Blob (browser/Node 22+), + * ArrayBuffer (will be wrapped in a Blob), or `{data, mediaType}` + * for base64 + explicit mime. The first form is preferred. */ + image: ImageEditAttachment | ImageEditAttachment[] + /** Optional inpainting mask — PNG with transparent pixels marking + * the editable region (OpenAI dall-e-2 / gpt-image-2 inpaint mode). */ + mask?: ImageEditAttachment + n?: number + size?: string + quality?: string + response_format?: 'url' | 'b64_json' +} + +export type ImageEditAttachment = + | Blob + | ArrayBuffer + | { data: string /** base64 */; mediaType: string; filename?: string } + export interface ImageResponse { created: number data: { url?: string; b64_json?: string; revised_prompt?: string }[]