Extend image API request templates

This commit is contained in:
Codex
2026-05-13 02:40:55 +00:00
parent 8430b771e1
commit bfc98c6a92
4 changed files with 297 additions and 156 deletions

View File

@@ -13,12 +13,8 @@ import { fetchPublicHttpUrl } from '@/lib/remote-fetch';
import { isTrustedInternalGenerationRequest, isUuid, resolveServerApiConfig } from '@/lib/server-api-config';
import { updateGenerationJobProgress } from '@/lib/generation-job-estimates';
import {
isDallE3Model,
isGptImageModel,
isOpenAICompatibleImageApi,
normalizeOpenAICompatibleImageCount,
normalizeOpenAICompatibleImageSize,
resolveImageApiTemplate,
type ImageApiTemplate,
} from '@/lib/image-api-templates';
import {
compressImageBufferForUpstream,
@@ -98,29 +94,6 @@ function mergeStylePrompt(prompt: string, stylePrompt: unknown): string {
return `${prompt.trim()}\n\nStyle instruction: ${normalized}`;
}
function applyNewApiImageGenerationParams(
requestBody: Record<string, unknown>,
options: {
modelName: string | undefined;
outputFormat: 'png' | 'jpeg' | 'webp';
imageQuality: 'auto' | 'high' | 'medium' | 'low';
stream?: boolean;
style?: unknown;
user?: unknown;
},
) {
const isGptImage = isGptImageModel(options.modelName);
if (isGptImage) {
requestBody.output_format = options.outputFormat;
requestBody.quality = options.imageQuality;
} else if (isDallE3Model(options.modelName)) {
requestBody.quality = options.imageQuality === 'high' ? 'hd' : 'standard';
if (options.style === 'natural' || options.style === 'vivid') requestBody.style = options.style;
}
if (isGptImage && options.stream !== undefined) requestBody.stream = options.stream;
if (typeof options.user === 'string' && options.user.trim()) requestBody.user = options.user.trim();
}
function normalizeImageCount(value: unknown): number | undefined {
if (value === 'auto' || value === undefined || value === null || value === '') return undefined;
const parsed = Number(value);
@@ -400,27 +373,6 @@ async function uploadDataUrlAndGetPublicUrl(dataUrl: string): Promise<string | n
}
}
/**
* Derive the chat completions endpoint URL from an images/generations URL.
*/
function deriveChatCompletionsUrl(imagesUrl: string): string {
if (imagesUrl.includes('/chat/completions')) return imagesUrl;
return imagesUrl
.replace(/\/images\/(generations|edits).*/i, '/chat/completions')
.replace(/\/+$/, '');
}
/**
* Derive the images/edits endpoint URL from an images/generations URL.
* This is the official OpenAI endpoint for image-to-image.
*/
function deriveImagesEditsUrl(imagesUrl: string): string {
if (imagesUrl.includes('/images/edits')) return imagesUrl;
return imagesUrl
.replace(/\/images\/generations.*/i, '/images/edits')
.replace(/\/+$/, '');
}
/**
* Extract image URLs/data from a chat completions response.
*/
@@ -608,41 +560,20 @@ async function tryImageStrategy(
async function tryEditsWithFormData(
url: string,
apiKey: string,
model: string,
prompt: string,
fields: Record<string, string>,
imageBuffer: Buffer,
imageMimeType: string,
size: string | undefined,
strength: number | undefined,
count: number,
outputFormat: 'png' | 'jpeg' | 'webp',
imageQuality: 'auto' | 'high' | 'medium' | 'low',
useNewApi: boolean,
onProgress?: (progress: Record<string, unknown>) => void | Promise<void>,
): Promise<StrategyResult> {
const strategyName = '策略2: images/edits (FormData)';
console.log(`[Custom API img2img → ${strategyName}] URL:`, url, '| model:', model);
const strategyName = '策略1: images/edits (FormData)';
console.log(`[Custom API img2img → ${strategyName}] URL:`, url, '| model:', fields.model);
try {
// Build multipart/form-data manually (Node.js doesn't have native FormData that works with fetch)
const boundary = `----FormBoundary${Date.now()}${Math.random().toString(36).slice(2)}`;
const parts: Buffer[] = [];
// Add text fields
const textFields: Record<string, string> = {
model,
prompt,
stream: 'true',
};
if (size) textFields.size = size;
if (count > 1) textFields.n = String(count);
if (strength !== undefined) textFields.strength = String(strength);
if (useNewApi && isGptImageModel(model)) {
textFields.output_format = outputFormat;
textFields.quality = imageQuality;
}
for (const [key, value] of Object.entries(textFields)) {
for (const [key, value] of Object.entries(fields)) {
parts.push(Buffer.from(
`--${boundary}\r\nContent-Disposition: form-data; name="${key}"\r\n\r\n${value}\r\n`
));
@@ -706,17 +637,21 @@ async function tryEditsWithFormData(
*/
async function customApiImageToImage(
customApiConfig: CustomApiConfig,
imageApiTemplate: ImageApiTemplate,
prompt: string,
negativePrompt: string | undefined,
image: string,
strength: number | undefined,
size: string | undefined,
requestedSize: string | undefined,
count: number,
targetSize: TargetImageSize | null,
outputFormat: 'png' | 'jpeg' | 'webp',
imageQuality: 'auto' | 'high' | 'medium' | 'low',
useNewApi: boolean,
aspectRatio?: string,
resolution?: string,
quality?: string,
guidanceScale?: number,
style?: unknown,
user?: unknown,
onProgress?: (progress: Record<string, unknown>) => void | Promise<void>,
): Promise<NextResponse> {
const endpoint = customApiConfig.apiUrl;
@@ -797,28 +732,57 @@ async function customApiImageToImage(
if (hint) promptText += `\n\n[${hint}]`;
}
const headers = buildCustomApiHeaders(customApiConfig.apiKey);
const denoisingStrength = strength ?? 0.5;
const headers = buildCustomApiHeaders(customApiConfig.apiKey);
let rawBase64 = normalizedImage;
if (normalizedImage.startsWith('data:')) {
const commaIndex = normalizedImage.indexOf(',');
if (commaIndex !== -1) rawBase64 = normalizedImage.substring(commaIndex + 1);
}
const templatedRequest = imageApiTemplate.buildImageToImageRequest({
apiUrl: endpoint,
modelName: customApiConfig.modelName,
prompt: promptText,
negativePrompt,
aspectRatio,
size: requestedSize,
count,
outputFormat,
imageQuality,
guidanceScale,
style,
user,
imageUrl,
base64Image: rawBase64,
strength: denoisingStrength,
});
const targetSize = resolveTargetImageSize(
templatedRequest.requestSize,
aspectRatio,
resolution,
quality,
);
console.log('[Custom API img2img] Request template:', imageApiTemplate.id,
'| size:', templatedRequest.logFields.size,
'| n:', templatedRequest.logFields.n,
'| output_format:', templatedRequest.logFields.output_format,
'| quality:', templatedRequest.logFields.quality,
'| aspect_ratio:', templatedRequest.logFields.aspect_ratio,
'| stream:', templatedRequest.logFields.stream,
'| strength:', templatedRequest.logFields.strength);
// --- Strategy 1: /v1/images/edits with multipart/form-data ---
// This is THE format Cherry Studio uses! OpenAI's official endpoint.
// API proxies route multipart/form-data to the correct img2img account pool.
let result1: StrategyResult | null = null;
if (imageBuffer) {
const editsUrl = deriveImagesEditsUrl(endpoint);
result1 = await tryEditsWithFormData(
editsUrl,
templatedRequest.editsFormData.endpoint,
customApiConfig.apiKey,
customApiConfig.modelName,
promptText,
templatedRequest.editsFormData.fields,
imageBuffer,
imageMimeType,
size,
denoisingStrength,
count,
outputFormat,
imageQuality,
useNewApi,
onProgress,
);
if (result1.success && result1.images) {
@@ -829,30 +793,14 @@ async function customApiImageToImage(
}
// --- Strategy 2: chat/completions with image_url (multimodal style) ---
const chatUrl = deriveChatCompletionsUrl(endpoint);
const chatBody: Record<string, unknown> = {
model: customApiConfig.modelName,
stream: true,
messages: [
{
role: 'user',
content: [
{ type: 'image_url', image_url: { url: imageUrl } },
{ type: 'text', text: promptText },
],
},
],
size: size || '1024x1024',
n: count,
};
if (useNewApi) {
applyNewApiImageGenerationParams(chatBody, {
modelName: customApiConfig.modelName,
outputFormat,
imageQuality,
});
}
const result2 = await tryImageStrategy(chatUrl, headers, chatBody, '策略2: chat/completions', true, onProgress);
const result2 = await tryImageStrategy(
templatedRequest.chatJson.endpoint,
headers,
templatedRequest.chatJson.body,
'策略2: chat/completions',
templatedRequest.chatJson.isChatFormat,
onProgress,
);
if (result2.success && result2.images) {
const persisted = await persistQualifiedImageUrls(result2.images, 'generated/images', targetSize, 'Custom API img2img strategy2');
if (persisted.images.length > 0) return NextResponse.json({ images: persisted.images });
@@ -861,31 +809,14 @@ async function customApiImageToImage(
}
// --- Strategy 3: /v1/images/generations with init_image (Reference code / SD style) ---
let rawBase64 = normalizedImage;
if (normalizedImage.startsWith('data:')) {
const commaIndex = normalizedImage.indexOf(',');
if (commaIndex !== -1) rawBase64 = normalizedImage.substring(commaIndex + 1);
}
const imgBody: Record<string, unknown> = {
model: customApiConfig.modelName,
prompt: promptText,
n: count,
size: size || '1024x1024',
stream: true,
init_image: rawBase64,
denoising_strength: denoisingStrength,
};
if (useNewApi) {
applyNewApiImageGenerationParams(imgBody, {
modelName: customApiConfig.modelName,
outputFormat,
imageQuality,
});
} else {
imgBody.response_format = 'b64_json';
}
const result3 = await tryImageStrategy(endpoint, headers, imgBody, '策略3: images/generations+init_image', false, onProgress);
const result3 = await tryImageStrategy(
templatedRequest.generationJson.endpoint,
headers,
templatedRequest.generationJson.body,
'策略3: images/generations+init_image',
templatedRequest.generationJson.isChatFormat,
onProgress,
);
if (result3.success && result3.images) {
const persisted = await persistQualifiedImageUrls(result3.images, 'generated/images', targetSize, 'Custom API img2img strategy3');
if (persisted.images.length > 0) return NextResponse.json({ images: persisted.images });
@@ -1016,35 +947,27 @@ export async function POST(request: NextRequest) {
// ---- Custom API mode ----
if (resolvedCustomApiConfig && resolvedCustomApiConfig.apiKey) {
const resolvedApiKey = resolvedCustomApiConfig.apiKey;
const useNewApi = isOpenAICompatibleImageApi(resolvedCustomApiConfig as CustomApiConfig);
const imageApiTemplate = resolveImageApiTemplate(resolvedCustomApiConfig as CustomApiConfig);
try {
// Image-to-image: use multi-strategy approach
if (image) {
const customApiSize = useNewApi
? normalizeOpenAICompatibleImageSize(resolvedCustomApiConfig.modelName, requestedCustomSize)
: requestedCustomSize;
const customApiCount = useNewApi
? normalizeOpenAICompatibleImageCount(resolvedCustomApiConfig.modelName, resolvedAutoParams.count)
: resolvedAutoParams.count;
const customTargetSize = resolveTargetImageSize(
customApiSize,
resolvedAutoParams.aspectRatio,
resolvedAutoParams.resolution,
quality,
);
return await customApiImageToImage(
resolvedCustomApiConfig as CustomApiConfig,
imageApiTemplate,
promptForGeneration,
negativePrompt,
image,
strength,
customApiSize,
customApiCount,
customTargetSize,
requestedCustomSize,
resolvedAutoParams.count,
resolvedOutputFormat,
resolvedImageQuality,
useNewApi,
resolvedAutoParams.aspectRatio,
resolvedAutoParams.resolution,
quality,
guidanceScale,
style,
user,
handleUpstreamProgress,
);
}
@@ -1061,7 +984,6 @@ export async function POST(request: NextRequest) {
// Resolve the selected model's API template and let it build the upstream request.
const ratioHint = getAspectRatioPromptHint(resolvedAutoParams.aspectRatio);
const augmentedPrompt = ratioHint ? `${promptForGeneration}\n\n[${ratioHint}]` : promptForGeneration;
const imageApiTemplate = resolveImageApiTemplate(resolvedCustomApiConfig as CustomApiConfig);
const templatedRequest = imageApiTemplate.buildTextToImageRequest({
apiUrl: endpoint,
modelName: resolvedCustomApiConfig.modelName,

View File

@@ -1,5 +1,19 @@
import type { ImageApiTemplate } from './types';
function deriveChatCompletionsUrl(imagesUrl: string): string {
if (imagesUrl.includes('/chat/completions')) return imagesUrl;
return imagesUrl
.replace(/\/images\/(generations|edits).*/i, '/chat/completions')
.replace(/\/+$/, '');
}
function deriveImagesEditsUrl(imagesUrl: string): string {
if (imagesUrl.includes('/images/edits')) return imagesUrl;
return imagesUrl
.replace(/\/images\/generations.*/i, '/images/edits')
.replace(/\/+$/, '');
}
export const genericJsonImageTemplate: ImageApiTemplate = {
id: 'generic-json',
label: 'Generic JSON image generation',
@@ -36,4 +50,82 @@ export const genericJsonImageTemplate: ImageApiTemplate = {
},
};
},
buildImageToImageRequest(input) {
const requestCount = Math.min(10, Math.max(1, Math.floor(input.count)));
const requestSize = input.size || '1024x1024';
const denoisingStrength = input.strength ?? 0.5;
const editsFields: Record<string, string> = {
model: input.modelName,
prompt: input.prompt,
stream: 'true',
size: requestSize,
};
if (requestCount > 1) editsFields.n = String(requestCount);
if (input.strength !== undefined) editsFields.strength = String(input.strength);
const chatBody: Record<string, unknown> = {
model: input.modelName,
stream: true,
messages: [
{
role: 'user',
content: [
{ type: 'image_url', image_url: { url: input.imageUrl } },
{ type: 'text', text: input.prompt },
],
},
],
size: requestSize,
n: requestCount,
};
const generationBody: Record<string, unknown> = {
model: input.modelName,
prompt: input.prompt,
n: requestCount,
size: requestSize,
stream: true,
init_image: input.base64Image,
denoising_strength: denoisingStrength,
response_format: 'b64_json',
};
if (input.negativePrompt) generationBody.negative_prompt = input.negativePrompt;
if (input.guidanceScale && input.guidanceScale !== 7) generationBody.guidance_scale = input.guidanceScale;
if (input.aspectRatio && input.aspectRatio !== 'original') generationBody.aspect_ratio = input.aspectRatio;
const logFields = {
adapter: 'generic-json',
size: requestSize,
n: requestCount,
output_format: generationBody.output_format,
quality: generationBody.quality,
aspect_ratio: generationBody.aspect_ratio,
stream: generationBody.stream,
guidance_scale: generationBody.guidance_scale,
strength: denoisingStrength,
};
return {
editsFormData: {
endpoint: deriveImagesEditsUrl(input.apiUrl),
fields: editsFields,
logFields,
},
chatJson: {
endpoint: deriveChatCompletionsUrl(input.apiUrl),
body: chatBody,
isChatFormat: true,
logFields,
},
generationJson: {
endpoint: input.apiUrl,
body: generationBody,
isChatFormat: false,
logFields,
},
requestCount,
requestSize,
logFields,
};
},
};

View File

@@ -1,4 +1,4 @@
import type { ImageApiConfigForTemplate, ImageApiTemplate, TextToImageTemplateInput } from './types';
import type { ImageApiConfigForTemplate, ImageApiTemplate, ImageToImageTemplateInput, TextToImageTemplateInput } from './types';
export function isGptImageModel(modelName: string | undefined): boolean {
return /^gpt-image-/i.test((modelName || '').trim());
@@ -82,6 +82,36 @@ function applyOpenAICompatibleExtras(body: Record<string, unknown>, input: TextT
if (typeof input.user === 'string' && input.user.trim()) body.user = input.user.trim();
}
function deriveChatCompletionsUrl(imagesUrl: string): string {
if (imagesUrl.includes('/chat/completions')) return imagesUrl;
return imagesUrl
.replace(/\/images\/(generations|edits).*/i, '/chat/completions')
.replace(/\/+$/, '');
}
function deriveImagesEditsUrl(imagesUrl: string): string {
if (imagesUrl.includes('/images/edits')) return imagesUrl;
return imagesUrl
.replace(/\/images\/generations.*/i, '/images/edits')
.replace(/\/+$/, '');
}
function buildOpenAICompatibleImageEditFields(input: ImageToImageTemplateInput, requestCount: number, requestSize: string | undefined) {
const fields: Record<string, string> = {
model: input.modelName,
prompt: input.prompt,
stream: 'true',
};
if (requestSize) fields.size = requestSize;
if (requestCount > 1) fields.n = String(requestCount);
if (input.strength !== undefined) fields.strength = String(input.strength);
if (isGptImageModel(input.modelName)) {
fields.output_format = input.outputFormat;
fields.quality = input.imageQuality;
}
return fields;
}
export const openAICompatibleImageTemplate: ImageApiTemplate = {
id: 'openai-compatible',
label: 'OpenAI/NewAPI compatible image generation',
@@ -116,4 +146,72 @@ export const openAICompatibleImageTemplate: ImageApiTemplate = {
},
};
},
buildImageToImageRequest(input) {
const requestCount = normalizeOpenAICompatibleImageCount(input.modelName, input.count);
const requestSize = normalizeOpenAICompatibleImageSize(input.modelName, input.size);
const denoisingStrength = input.strength ?? 0.5;
const formFields = buildOpenAICompatibleImageEditFields(input, requestCount, requestSize);
const chatBody: Record<string, unknown> = {
model: input.modelName,
stream: true,
messages: [
{
role: 'user',
content: [
{ type: 'image_url', image_url: { url: input.imageUrl } },
{ type: 'text', text: input.prompt },
],
},
],
size: requestSize,
n: requestCount,
};
applyOpenAICompatibleExtras(chatBody, input);
const generationBody: Record<string, unknown> = {
model: input.modelName,
prompt: input.prompt,
n: requestCount,
size: requestSize,
stream: true,
init_image: input.base64Image,
denoising_strength: denoisingStrength,
};
applyOpenAICompatibleExtras(generationBody, input);
const logFields = {
adapter: 'openai-compatible',
size: requestSize,
n: requestCount,
output_format: generationBody.output_format,
quality: generationBody.quality,
aspect_ratio: generationBody.aspect_ratio,
stream: generationBody.stream,
guidance_scale: generationBody.guidance_scale,
strength: denoisingStrength,
};
return {
editsFormData: {
endpoint: deriveImagesEditsUrl(input.apiUrl),
fields: formFields,
logFields,
},
chatJson: {
endpoint: deriveChatCompletionsUrl(input.apiUrl),
body: chatBody,
isChatFormat: true,
logFields,
},
generationJson: {
endpoint: input.apiUrl,
body: generationBody,
isChatFormat: false,
logFields,
},
requestCount,
requestSize,
logFields,
};
},
};

View File

@@ -30,9 +30,38 @@ export type TextToImageTemplateResult = {
logFields: Record<string, unknown>;
};
export type ImageToImageTemplateInput = TextToImageTemplateInput & {
imageUrl: string;
base64Image: string;
strength?: number;
};
export type ImageToImageFormDataRequest = {
endpoint: string;
fields: Record<string, string>;
logFields: Record<string, unknown>;
};
export type ImageToImageJsonRequest = {
endpoint: string;
body: Record<string, unknown>;
isChatFormat: boolean;
logFields: Record<string, unknown>;
};
export type ImageToImageTemplateResult = {
editsFormData: ImageToImageFormDataRequest;
chatJson: ImageToImageJsonRequest;
generationJson: ImageToImageJsonRequest;
requestCount: number;
requestSize: string | undefined;
logFields: Record<string, unknown>;
};
export type ImageApiTemplate = {
id: string;
label: string;
matches: (config: ImageApiConfigForTemplate) => boolean;
buildTextToImageRequest: (input: TextToImageTemplateInput) => TextToImageTemplateResult;
buildImageToImageRequest: (input: ImageToImageTemplateInput) => ImageToImageTemplateResult;
};