feat: link reference images in prompts

This commit is contained in:
FengLee
2026-06-06 12:55:00 +08:00
parent fb5760cb36
commit e0d606a6c1
11 changed files with 462 additions and 32 deletions

View File

@@ -85,8 +85,8 @@ All email sends route through `src/lib/email-service.ts`, which renders HTML and
| Method | Path | Auth | Source | Request | Response |
| --- | --- | --- | --- | --- | --- |
| GET | `/api/admin/invitations` | Admin | `src/app/api/admin/invitations/route.ts` | Optional `search`, `page`, `pageSize` | Long-term invitation records joining inviter and invitee profile details. |
| POST | `/api/generate/image` | Trusted internal or resolved user/system API context | `src/app/api/generate/image/route.ts` | Image generation payload; supports prompt, negative prompt, reference images, model/system/custom API config, aspect/size/resolution/count/quality. | Calls SDK or OpenAI/New API-compatible endpoint, persists original images to object storage and local WEBP thumbnails to `thumbnails/generated/images`, returns `images` original URLs plus `thumbnails`, `thumbnailUrls`, and `dimensions` `{ [originalUrl]: { width, height } }`, updates job progress when headers include job ID. |
| POST | `/api/generate/video` | Trusted internal or resolved user/system API context | `src/app/api/generate/video/route.ts` | Video generation payload; supports prompt, reference image, model/system/custom API config, ratio/duration/fps-like params. | Calls SDK or Manifest/custom endpoint, polls async Manifest providers such as 元界 media tasks, then persists generated video media as object-backed `/api/local-storage/generated/videos/...` URLs when object storage is configured. |
| POST | `/api/generate/image` | Trusted internal or resolved user/system API context | `src/app/api/generate/image/route.ts` | Image generation payload; supports prompt, negative prompt, reference images (`image` plus `extraImages`), optional `referenceImageAnnotations`, model/system/custom API config, aspect/size/resolution/count/quality. | Calls SDK or OpenAI/New API-compatible endpoint, persists original images to object storage and local WEBP thumbnails to `thumbnails/generated/images`, returns `images` original URLs plus `thumbnails`, `thumbnailUrls`, and `dimensions` `{ [originalUrl]: { width, height } }`, updates job progress when headers include job ID. When `referenceImageAnnotations` is present, `src/lib/reference-image-prompt.ts` adds a model-readable `@参考图N` mapping block to the upstream prompt before style prompts and Manifest execution. |
| POST | `/api/generate/video` | Trusted internal or resolved user/system API context | `src/app/api/generate/video/route.ts` | Video generation payload; supports prompt, reference images (`image`, `images`, `extraImages`), optional `referenceImageAnnotations`, model/system/custom API config, ratio/duration/fps-like params. | Calls SDK or Manifest/custom endpoint, polls async Manifest providers such as 元界 media tasks, then persists generated video media as object-backed `/api/local-storage/generated/videos/...` URLs when object storage is configured. When `referenceImageAnnotations` is present, `src/lib/reference-image-prompt.ts` adds a model-readable `@参考图N` mapping block to the upstream prompt before Manifest/custom/SDK execution. |
| POST | `/api/generate/reverse-prompt` | Uses supplied/resolved API config; Bearer token required when resolving user custom or gated system API IDs | `src/app/api/generate/reverse-prompt/route.ts` | `image`, `outputMode`, `language`, optional `customApiConfig`/system/custom IDs | Returns prompt fields and may persist reference image. The create-panel caller must forward the stored access token in `Authorization` because server-side API resolution cannot read browser localStorage. When the input image is a data URL, the route persists it under `reverse-prompt/reference-images/...` and sends the public `/api/local-storage/...` URL upstream when available so the multimodal model sees a normal fetchable image URL instead of a raw upload blob. This route sends a multimodal `chat/completions` payload with `image_url`, so 524 errors here reflect multimodal upstream latency/capability rather than image-generation sync behavior. |
| POST | `/api/generate/suggest-prompt` | Uses supplied/resolved API config | `src/app/api/generate/suggest-prompt/route.ts` | `prompt`, optional `customApiConfig`, `systemPrefix` | Returns optimized `prompt` and optional `negativePrompt`. This route also uses a multimodal `chat/completions` path, so 524 should be interpreted as a multimodal upstream timeout. |
@@ -181,7 +181,7 @@ Primary SQL tables touched directly in API routes include:
`src/lib/yuanjie-pricing-sync.ts` is the canonical source for manual 元界 AI pricing metadata sync. It derives billing modes from the built-in image/video templates and local docs: image models default to fixed per-use pricing, duration-sensitive video models sync to `duration`, Seedance token-billed video models sync to `token`, and special variable-cost video models sync to `ratio` with a warning note. The sync is manual from the admin system-default-model page and only updates existing 元界 rows, including legacy provider spellings such as `元界AI`; update SQL still includes a 元界 provider/model-group guard so mozheAPI rows cannot be touched by the sync.
Yuanjie Manifest references use `$inputImages.urls` for provider-facing JSON fields. For image-to-image, `/api/generate/image` reads the primary `image` plus `extraImages` and sends all references to `src/lib/user-api-manifest-executor.ts`; the executor uploads data URL references into storage before rendering Yuanjie `params.images`, top-level `images`, or `base64Array`. Yuanjie video templates keep documented model-specific fields inside `src/lib/yuanjie-video-model-templates.ts`, including first/last reference fields and mode fields such as `input_reference`, `reference_urls`, `img_url`, `image_tail`, `ratio`, `size`, and `generation_mode`.
Yuanjie Manifest references use `$inputImages.urls` for provider-facing JSON fields. For image-to-image, `/api/generate/image` reads the primary `image` plus `extraImages` and sends all references to `src/lib/user-api-manifest-executor.ts`; for image-to-video, `/api/generate/video` reads `image`, `images`, and `extraImages` before Manifest execution. The executor uploads data URL references into storage before rendering Yuanjie `params.images`, top-level `images`, `reference_urls`, or `base64Array`. `referenceImageAnnotations` is an API payload field rather than a Manifest variable; image/video routes use `src/lib/reference-image-prompt.ts` to merge `@参考图N` token mappings into the upstream prompt so existing Manifest templates receive the mapping through `$prompt`. Yuanjie video templates keep documented model-specific fields inside `src/lib/yuanjie-video-model-templates.ts`, including first/last reference fields and mode fields such as `input_reference`, `reference_urls`, `img_url`, `image_tail`, `ratio`, `size`, and `generation_mode`.
`src/lib/yuanjie-system-manifest.ts` provides the runtime bridge for existing admin system API rows that were created before Manifest-backed Yuanjie templates. It exposes built-in capabilities to `/api/model-config` even when `manifest_path` is empty, and when a known 元界 system API is resolved directly or as a default-model polling candidate it writes missing or stale `system-api-manifests/<systemApiId>.json`, normalizes `api_url` back to the 元界 base URL, and preserves the encrypted API key and administrator pricing.

View File

@@ -51,9 +51,9 @@ Use this document to jump directly to code before broad searching.
| --- | --- | --- |
| Tab container | `src/app/create/page.tsx` | Owns the five creation tabs. Active tab is persisted in localStorage and mirrored to `/create?type=...`, so refreshes and shared links stay on text-to-image, image-to-image, text-to-video, image-to-video, or reverse-prompt. On phones the mode switch is the single fixed icon row below the navbar; the page title and duplicate text mode strip are hidden. Mobile layout classes in this page and `src/app/globals.css` turn the create center into a chat-style flow: text-to-image sorts history from oldest to newest and auto-scrolls to the latest work above the fixed composer, hides the empty result placeholder until the user submits a prompt, renders generating tasks as the newest prompt-plus-progress message, and uses `src/components/create/mobile-creation-composer.tsx` as the fixed bottom composer with compact labeled ratio/resolution/count controls, optional style strip that expands the composer upward, prompt input, and right send button. |
| Text to image | `src/components/create/text-to-image.tsx` | `src/app/api/generation-jobs/route.ts`, `src/app/api/generate/image/route.ts`, `src/components/create/use-generation-job-recovery.ts`. The create button is disabled while the current entry has active tasks and shows `任务生成中`; users should use the count control for multiple images instead of repeatedly pressing submit. Active jobs render through `src/components/create/generation-task-list.tsx` inside the results column. Model select items use `src/components/create/grouped-model-select-items.tsx` so admin global system models appear under `默认模型` and user-added keys appear under `自定义模型`. Selected model capabilities from `src/lib/model-capabilities.ts` can hide unsupported aspect ratio/resolution/format/quality controls as well as filter their options, which is required for built-in 元界 image templates such as GPT Image 2 where the docs expose `size` pixel values instead of a separate aspect-ratio control. It consumes reuse drafts from `src/lib/creation-reuse.ts` and opens `src/components/create/inspiration-gallery-dialog.tsx` from the `获取灵感` action so gallery text-to-image works can fill prompt, negative prompt, model, ratio, resolution, format, quality, count, style, and guidance into the form. The mobile conversation history should only mount on mobile viewports; CSS-hidden mobile history still runs image effects if mounted on desktop. |
| Image to image | `src/components/create/image-to-image.tsx` | `src/app/api/generation-jobs/route.ts`, `src/app/api/generate/image/route.ts`, `src/components/create/use-generation-job-recovery.ts`. Reference thumbnails single-click into a bare image overlay, active jobs render through `src/components/create/generation-task-list.tsx`, and the create button is disabled while active tasks exist to avoid duplicate in-flight submissions. Model select items use `src/components/create/grouped-model-select-items.tsx` for `默认模型` versus `自定义模型` grouping. Selected model capabilities from `src/lib/model-capabilities.ts` can hide unsupported aspect ratio/resolution/format/quality controls as well as filter their options, which is required for built-in 元界 image templates such as GPT Image 2 where the docs expose `size` pixel values instead of a separate aspect-ratio control. 图生图 removes `自动` from ratio/resolution/count controls, defaults count to `1`, and derives ratio from Yuanjie size labels or dimensions when the selected model hides the separate ratio control. It consumes reuse drafts from `src/lib/creation-reuse.ts` and opens `src/components/create/inspiration-gallery-dialog.tsx` from the `获取灵感` action so gallery image-to-image works can place reference images and fill prompt, negative prompt, model, ratio, resolution, format, quality, count, style, and strength into the form. |
| Image to image | `src/components/create/image-to-image.tsx`, `src/components/create/reference-image-mention-controls.tsx` | `src/app/api/generation-jobs/route.ts`, `src/app/api/generate/image/route.ts`, `src/components/create/use-generation-job-recovery.ts`, `src/lib/reference-image-prompt.ts`. Reference thumbnails single-click into a bare image overlay, active jobs render through `src/components/create/generation-task-list.tsx`, and the create button is disabled while active tasks exist to avoid duplicate in-flight submissions. Model select items use `src/components/create/grouped-model-select-items.tsx` for `默认模型` versus `自定义模型` grouping. Selected model capabilities from `src/lib/model-capabilities.ts` can hide unsupported aspect ratio/resolution/format/quality controls as well as filter their options, which is required for built-in 元界 image templates such as GPT Image 2 where the docs expose `size` pixel values instead of a separate aspect-ratio control. 图生图 removes `自动` from ratio/resolution/count controls, defaults count to `1`, and derives ratio from Yuanjie size labels or dimensions when the selected model hides the separate ratio control. It consumes reuse drafts from `src/lib/creation-reuse.ts` and opens `src/components/create/inspiration-gallery-dialog.tsx` from the `获取灵感` action so gallery image-to-image works can place reference images and fill prompt, negative prompt, model, ratio, resolution, format, quality, count, style, and strength into the form. 多参考图会显示 `@参考图1` 等标签,提示词输入框输入 `@` 可选择参考图,提交时发送 `referenceImageAnnotations`,后端把 token 与上传顺序、文件名、尺寸写入上游 prompt。 |
| Text to video | `src/components/create/text-to-video.tsx` | `src/app/api/generation-jobs/route.ts`, `src/app/api/generate/video/route.ts`, `src/components/create/use-generation-job-recovery.ts`. The create button is disabled while active tasks exist, active jobs render through `src/components/create/generation-task-list.tsx`, and model select items use `src/components/create/grouped-model-select-items.tsx` for `默认模型` versus `自定义模型` grouping. It consumes video reuse drafts from `src/lib/creation-reuse.ts` and opens `src/components/create/inspiration-gallery-dialog.tsx` from the `获取灵感` action so gallery text-to-video works can fill prompt, negative prompt, model, ratio, duration, camera movement, and style. |
| Image to video | `src/components/create/image-to-video.tsx` | `src/app/api/generation-jobs/route.ts`, `src/app/api/generate/video/route.ts`, `src/components/create/use-generation-job-recovery.ts`. Uploaded reference thumbnails single-click into the same bare image overlay used by image-to-image, active jobs render through `src/components/create/generation-task-list.tsx`, and the create button is disabled while active tasks exist. Model select items use `src/components/create/grouped-model-select-items.tsx` for `默认模型` versus `自定义模型` grouping. It consumes video reuse drafts from `src/lib/creation-reuse.ts` and opens `src/components/create/inspiration-gallery-dialog.tsx` from the `获取灵感` action so gallery image-to-video works can place reference images and fill prompt, negative prompt, model, ratio, duration, and camera movement. |
| Image to video | `src/components/create/image-to-video.tsx`, `src/components/create/reference-image-mention-controls.tsx` | `src/app/api/generation-jobs/route.ts`, `src/app/api/generate/video/route.ts`, `src/components/create/use-generation-job-recovery.ts`, `src/lib/reference-image-prompt.ts`. Uploaded reference thumbnails single-click into the same bare image overlay used by image-to-image, active jobs render through `src/components/create/generation-task-list.tsx`, and the create button is disabled while active tasks exist. Model select items use `src/components/create/grouped-model-select-items.tsx` for `默认模型` versus `自定义模型` grouping. It consumes video reuse drafts from `src/lib/creation-reuse.ts` and opens `src/components/create/inspiration-gallery-dialog.tsx` from the `获取灵感` action so gallery image-to-video works can place reference images and fill prompt, negative prompt, model, ratio, duration, and camera movement. 多参考图会显示 `@参考图1` 等标签,提示词输入框输入 `@` 可选择参考图,提交时发送 `referenceImageAnnotations`,后端把 token 与上传顺序、文件名、尺寸写入上游 prompt。 |
| Reverse prompt | `src/components/create/reverse-prompt-panel.tsx` | `src/app/api/generate/reverse-prompt/route.ts`, `src/app/api/generate/suggest-prompt/route.ts`, `src/lib/generation-job-client.ts`, `src/components/create/use-generation-job-recovery.ts`. Reverse prompt now runs as a background job, survives refresh/auth change/tab switch, and writes the completed result back into the normal creation history flow instead of relying on an optimistic local-only row. |
| Prompt textarea | `src/components/create/expandable-prompt-textarea.tsx` | Shared prompt input. |
| Mobile creation composer | `src/components/create/mobile-creation-composer.tsx`, `src/app/globals.css` | Mobile-only fixed bottom composer used by text-to-image to match chat-style clients: top parameter strip with compact dropdown buttons for ratio/resolution/count, optional style strip, prompt input, and right send button. The mobile creation center uses one 16px UI font size across selected values, style chips, composer input, and conversation prompts. The mobile text-to-image parameter strip hides the `画面比例`/`分辨率`/`生成数量` labels and removes `自动` from ratio, resolution, and count choices, defaulting to explicit values instead. The mobile style strip shows only one horizontal row when collapsed and expands upward for search/more presets after tapping `展开`. Mode selection stays only in the sticky header tabs. Desktop creation forms remain the source for full advanced controls. |
@@ -73,14 +73,14 @@ Use this document to jump directly to code before broad searching.
| Worker loop | `src/lib/generation-job-worker.ts` | Picks and processes queued jobs. After successful system default image/video generation, it calls `src/lib/generation-credit-service.ts` to deduct credits from `profiles.credits_balance`, insert `credit_transactions`, and add `creditsCost`/`creditsBalance` to the job result for frontend display. Failed generation jobs do not enter the charge path. |
| Internal runner | `src/lib/generation-job-runner.ts` | Calls `/api/generate/image` or `/api/generate/video` with internal headers. |
| ETA/progress | `src/lib/generation-job-estimates.ts` | Runtime schema, ETA samples, progress payload. |
| Image route | `src/app/api/generate/image/route.ts` | SDK + custom/system API + New API image compatibility, persistence. New image originals persist through `src/lib/media-storage.ts` into object storage, while local WEBP thumbnails are returned as `thumbnails`/`thumbnailUrls` for preview rendering and `dimensions` maps each original URL to persisted width/height so history detail metadata can avoid loading originals. Generated image originals are normalized to the user-selected output format before upload, so providers that ignore `output_format` and return PNG still produce `.jpg`/`.webp` objects when JPEG/WebP was requested. For admin default system models, image generation resolves all same-type/same-display-name default API candidates, automatically retries stream-timeout failures once with `stream:false`, and returns actionable upstream timeout/gateway messages when all candidates fail. If a Manifest provider such as 元界 returns result URLs but MiaoJing cannot download or save them, the route reports a platform download/save failure instead of a resolution mismatch. User custom APIs remain single-config and do not use this polling fallback. |
| Video route | `src/app/api/generate/video/route.ts` | SDK + custom/system API video, persistence. Generated video data URLs and upstream video URLs are persisted through `localStorage.uploadFileObjectOnly(...)` under `generated/videos`, so production video originals live in object storage when configured. Video create panels must use backend returned `creditsCost`/`creditsBalance` after job success; they should not locally predict or deduct credits. |
| Image route | `src/app/api/generate/image/route.ts`, `src/lib/reference-image-prompt.ts` | SDK + custom/system API + New API image compatibility, persistence. New image originals persist through `src/lib/media-storage.ts` into object storage, while local WEBP thumbnails are returned as `thumbnails`/`thumbnailUrls` for preview rendering and `dimensions` maps each original URL to persisted width/height so history detail metadata can avoid loading originals. Generated image originals are normalized to the user-selected output format before upload, so providers that ignore `output_format` and return PNG still produce `.jpg`/`.webp` objects when JPEG/WebP was requested. For admin default system models, image generation resolves all same-type/same-display-name default API candidates, automatically retries stream-timeout failures once with `stream:false`, and returns actionable upstream timeout/gateway messages when all candidates fail. If a Manifest provider such as 元界 returns result URLs but MiaoJing cannot download or save them, the route reports a platform download/save failure instead of a resolution mismatch. User custom APIs remain single-config and do not use this polling fallback. For image-to-image, optional `referenceImageAnnotations` are merged into the model prompt so `@参考图N` maps to the corresponding uploaded reference image. |
| Video route | `src/app/api/generate/video/route.ts`, `src/lib/reference-image-prompt.ts` | SDK + custom/system API video, persistence. Generated video data URLs and upstream video URLs are persisted through `localStorage.uploadFileObjectOnly(...)` under `generated/videos`, so production video originals live in object storage when configured. Video create panels must use backend returned `creditsCost`/`creditsBalance` after job success; they should not locally predict or deduct credits. For image-to-video, optional `referenceImageAnnotations` are merged into the model prompt so `@参考图N` maps to the corresponding uploaded reference image. |
| Custom API transport | `src/lib/custom-api-fetch.ts`, `src/lib/custom-image-fallback.ts` | Headers, one retry for 502/503/504 gateway failures, progress JSON parsing, upstream error parsing, stream-to-sync fallback policy for system image APIs. |
| Server API resolution | `src/lib/server-api-config.ts`, `src/lib/yuanjie-system-manifest.ts` | Resolves user custom API and admin system API IDs into decrypted credentials, enforces system API default visibility plus membership-tier allowlists before generation, and builds default-model polling candidates by media type plus admin display name (`system_api_configs.name`). For known 元界 system rows with missing or stale `manifest_path`, both direct system API resolution and default-model polling candidates can rewrite the built-in Manifest and normalize `api_url` to the 元界 base URL before generation. The upstream `model_name` remains the per-provider request model only. |
| User API smart import | `src/components/profile/api-key-manager.tsx`, `src/app/api/user-api-keys/smart-import/route.ts`, `src/lib/user-api-manifest.ts`, `src/lib/user-api-manifest-executor.ts`, `src/lib/model-capabilities.ts`, `src/lib/model-display.ts` | The profile API settings page has an `智能配置 API` button next to `添加 API 密钥`. It opens a wide viewport-capped Manifest editor, can copy the LLM prompt, shows guidance under the prompt button explaining the copy-to-chat-AI and paste-and-import flow, can paste clipboard JSON without importing, and can paste-and-import in one action. The prompt instructs the LLM to stop and ask the user for the relay API Base URL when the docs do not contain it. Imports create each profile/model as an independent `user_api_keys` row plus a separate `user-api-manifests/<userId>/<keyId>.json` file and reject incomplete configs without a resolvable request URL. Imported rows should store a human-readable provider name in the editable provider/supplier fields and resolve the visible API request URL from `profile.baseUrl + submit.path` for synchronous endpoints. Generic placeholder notes such as `导入的 API Key` must not be used as model labels; creation/profile UI should prefer a real note plus model, or provider plus model. Optional `profile.capabilities` filters or hides create-page aspect ratio, resolution, image format, and quality controls for the selected model. Polling Manifest query values can include `{task_id}` so task IDs are sent as real query parameters rather than being embedded into pathname strings. Generation routes must use the selected model key's `manifest_path`; do not merge different request configs under one user-level file. |
| Admin system API smart import | `src/components/admin/api-management-tab.tsx`, `src/app/api/admin/system-apis/smart-import/route.ts`, `src/app/api/admin/system-apis/route.ts`, `src/lib/server-api-config.ts`, `src/lib/user-api-manifest.ts`, `src/lib/user-api-manifest-executor.ts`, `src/lib/model-capabilities.ts` | The console API management page has a separate `智能配置 API` section for admins, but this section is generic Manifest import only. It supports copy-to-chat-AI and paste-and-import Manifest flow, then creates one independent system API row and `system-api-manifests/<systemApiId>.json` file per imported profile/model. Imported rows resolve the visible API request URL from the Manifest profile/provider before save, and optional `profile.capabilities` can constrain or hide create-page image/video parameter choices for the selected system model. Provider-specific built-in template management, including 元界 AI and Agnes AI, belongs in the `系统默认模型` management flow and should not be exposed in the smart import UI. 元界价格/计费方式手动同步 uses `src/app/api/admin/system-apis/yuanjie-pricing/route.ts` and `src/lib/yuanjie-pricing-sync.ts`; it updates only existing 元界 image/video rows, tolerates provider spellings such as `元界AI`, and leaves mozheAPI/global smart-import configs untouched. |
| Admin console active page persistence | `src/modules/console/pages/console-dashboard-page.tsx` | The console active view is stored in `sessionStorage`, so browser refresh keeps the current admin page/tab. Logout clears the value, and closing/reopening the console starts from the dashboard because `sessionStorage` is tab-scoped. |
| Manifest input image URLs | `src/lib/user-api-manifest-executor.ts`, `src/app/api/generate/image/route.ts`, `src/app/api/generate/video/route.ts` | Manifest templates can use `$inputImages.dataUrls` for raw uploaded data and `$inputImages.urls` for provider-facing public references. The executor converts data URL input images into storage-backed URLs before rendering templates. Image-to-image generation normalizes the primary `image` plus `extraImages` into Manifest `inputImages`, so multi-reference providers such as Yuanjie GPT Image 2 receive all references. |
| Manifest input image URLs | `src/lib/user-api-manifest-executor.ts`, `src/app/api/generate/image/route.ts`, `src/app/api/generate/video/route.ts`, `src/lib/reference-image-prompt.ts` | Manifest templates can use `$inputImages.dataUrls` for raw uploaded data and `$inputImages.urls` for provider-facing public references. The executor converts data URL input images into storage-backed URLs before rendering templates. Image-to-image and image-to-video generation normalize the primary `image` plus `extraImages`/`images` into Manifest `inputImages`, so multi-reference providers such as Yuanjie GPT Image 2 receive all references. `referenceImageAnnotations` are not a Manifest variable; routes fold them into `$prompt` before execution so existing templates inherit the mapping. |
## Models And Providers

View File

@@ -23,6 +23,7 @@
"test:gallery-publish-fast-path": "tsx ./scripts/test-gallery-publish-fast-path.mjs",
"test:gallery-response": "node --no-warnings ./scripts/test-gallery-response.mjs",
"test:media-watermark-policy": "tsx ./scripts/test-media-watermark-policy.mjs",
"test:reference-image-prompt-links": "tsx ./scripts/test-reference-image-prompt-links.mjs",
"test:yuanjie-media-manifest-mapping": "tsx ./scripts/test-yuanjie-media-manifest-mapping.mjs",
"test:yuanjie-image2-persistence": "tsx ./scripts/test-yuanjie-image2-persistence.mjs",
"test:yuanjie-pricing-sync": "tsx ./scripts/test-yuanjie-pricing-sync.mjs",

View File

@@ -0,0 +1,68 @@
import assert from 'node:assert/strict';
import fs from 'node:fs';
import path from 'node:path';
const repoRoot = path.resolve(import.meta.dirname, '..');
const {
buildReferenceImagePrompt,
normalizeReferenceImageAnnotations,
} = await import('../src/lib/reference-image-prompt.ts');
async function runTest(name, fn) {
try {
await fn();
console.log(`PASS ${name}`);
} catch (error) {
console.error(`FAIL ${name}`);
console.error(error);
process.exitCode = 1;
}
}
await runTest('adds model-readable mappings for referenced uploaded images', () => {
const prompt = '让 @参考图2 的外套穿到 @参考图1 的人物身上,保持 @参考图1 的脸部特征';
const result = buildReferenceImagePrompt(prompt, 2, [
{ index: 1, token: '@参考图1', name: 'person.jpg', width: 1024, height: 1536 },
{ index: 2, token: '@参考图2', name: 'coat.png', width: 800, height: 800 },
]);
assert.ok(result.startsWith(prompt));
assert.match(result, /参考图标注说明/);
assert.match(result, /@参考图1 对应上传的第1张参考图/);
assert.match(result, /文件名person\.jpg/);
assert.match(result, /尺寸1024x1536/);
assert.match(result, /@参考图2 对应上传的第2张参考图/);
assert.match(result, /文件名coat\.png/);
assert.match(result, /尺寸800x800/);
assert.match(result, /当提示词提到 @参考图2 时/);
});
await runTest('normalizes annotations and ignores impossible image indexes', () => {
const annotations = normalizeReferenceImageAnnotations([
{ index: 2, token: '@衣服', name: 'coat.png' },
{ index: 9, token: '@不存在', name: 'missing.png' },
{ index: 1, token: '人物', name: 'person.jpg', width: 'bad', height: 1024 },
], 2);
assert.deepEqual(annotations, [
{ index: 2, token: '@衣服', name: 'coat.png' },
{ index: 1, token: '@参考图1', name: 'person.jpg', height: 1024 },
]);
});
await runTest('does not alter prompts without reference images', () => {
assert.equal(buildReferenceImagePrompt('一只杯子', 0, []), '一只杯子');
assert.equal(buildReferenceImagePrompt('', 2, []), '');
});
await runTest('image-to-image and image-to-video send reference annotations from the @ picker', () => {
const imageToImageSource = fs.readFileSync(path.join(repoRoot, 'src/components/create/image-to-image.tsx'), 'utf8');
const imageToVideoSource = fs.readFileSync(path.join(repoRoot, 'src/components/create/image-to-video.tsx'), 'utf8');
for (const source of [imageToImageSource, imageToVideoSource]) {
assert.match(source, /ReferenceImageMentionControls/);
assert.match(source, /referenceImageAnnotations/);
assert.match(source, /buildReferenceImageAnnotations/);
}
});

View File

@@ -38,6 +38,7 @@ import {
dataUrlToImageBuffer,
} from '@/lib/server-image-compression';
import { executeUserApiManifest } from '@/lib/user-api-manifest-executor';
import { buildReferenceImagePrompt } from '@/lib/reference-image-prompt';
import {
getImageExtension as getMediaImageExtension,
normalizeImageBufferForOutputFormat,
@@ -1024,6 +1025,7 @@ export async function POST(request: NextRequest) {
stream,
image,
extraImages,
referenceImageAnnotations,
strength,
customApiConfig,
} = body as {
@@ -1044,6 +1046,7 @@ export async function POST(request: NextRequest) {
stream?: boolean;
image?: string;
extraImages?: string[];
referenceImageAnnotations?: unknown;
strength?: number;
customApiConfig?: CustomApiConfig;
};
@@ -1069,7 +1072,8 @@ export async function POST(request: NextRequest) {
}
const resolvedOutputFormat = normalizeImageOutputFormat(outputFormat);
const resolvedImageQuality = normalizeImageQuality(imageQuality);
const promptForGeneration = mergeStylePrompt(prompt, stylePrompt);
const promptWithReferenceImages = buildReferenceImagePrompt(prompt, referenceImages.length, referenceImageAnnotations);
const promptForGeneration = mergeStylePrompt(promptWithReferenceImages, stylePrompt);
const requestedCustomSize = size && size !== 'auto'
? size
: resolveCustomApiImageSize(resolvedAutoParams.aspectRatio, resolvedAutoParams.resolution);

View File

@@ -11,6 +11,7 @@ import {
imageBufferToDataUrl,
} from '@/lib/server-image-compression';
import { executeUserApiManifest } from '@/lib/user-api-manifest-executor';
import { buildReferenceImagePrompt } from '@/lib/reference-image-prompt';
import { fetchPublicHttpUrlWithRetry } from '@/lib/remote-fetch';
interface CustomApiConfig {
@@ -490,6 +491,7 @@ export async function POST(request: NextRequest) {
image,
images,
extraImages,
referenceImageAnnotations,
customApiConfig,
} = body as {
prompt?: string;
@@ -504,9 +506,11 @@ export async function POST(request: NextRequest) {
image?: string;
images?: string[];
extraImages?: string[];
referenceImageAnnotations?: unknown;
customApiConfig?: CustomApiConfig;
};
const referenceImages = normalizeReferenceImages(image, images, extraImages);
const promptForGeneration = buildReferenceImagePrompt(prompt || '', referenceImages.length, referenceImageAnnotations);
const numericDuration = Number(duration);
const sdkDuration = Number.isFinite(numericDuration) ? numericDuration : 5;
@@ -540,7 +544,7 @@ export async function POST(request: NextRequest) {
apiUrl: resolvedCustomApiConfig.apiUrl,
apiKey: resolvedApiKey,
modelName: resolvedCustomApiConfig.modelName,
prompt: prompt || '',
prompt: promptForGeneration,
params: {
n: 1,
aspect_ratio: aspectRatio,
@@ -569,7 +573,7 @@ export async function POST(request: NextRequest) {
if (referenceImages.length > 0) {
return await customApiImageToVideo(
resolvedCustomApiConfig as CustomApiConfig,
prompt,
promptForGeneration,
negativePrompt,
referenceImages[0],
referenceImages,
@@ -587,7 +591,7 @@ export async function POST(request: NextRequest) {
// Augment prompt with aspect ratio hint as fallback
const ratioHint = aspectRatio ? getAspectRatioPromptHint(aspectRatio) : '';
const augmentedPrompt = ratioHint ? `${prompt || ''}\n\n[${ratioHint}]` : (prompt || '');
const augmentedPrompt = ratioHint ? `${promptForGeneration}\n\n[${ratioHint}]` : promptForGeneration;
const requestBody: Record<string, unknown> = {
model: resolvedCustomApiConfig.modelName,
@@ -652,8 +656,8 @@ export async function POST(request: NextRequest) {
referenceImages.forEach((url, index) => {
contentItems.push({ type: 'image_url', image_url: { url }, role: index === 0 ? 'first_frame' : 'reference' });
});
if (prompt) {
contentItems.push({ type: 'text', text: prompt });
if (promptForGeneration) {
contentItems.push({ type: 'text', text: promptForGeneration });
}
const ratioMap: Record<string, '16:9' | '9:16' | '1:1' | '4:3' | '3:4'> = {

View File

@@ -1,6 +1,6 @@
'use client';
import { useState } from 'react';
import { useState, type FocusEventHandler, type KeyboardEventHandler, type MouseEventHandler, type ReactEventHandler, type Ref } from 'react';
import { Button } from '@/components/ui/button';
import { Textarea } from '@/components/ui/textarea';
import {
@@ -16,6 +16,13 @@ interface ExpandablePromptTextareaProps {
rows?: number;
value: string;
className?: string;
textareaRef?: Ref<HTMLTextAreaElement>;
onBlur?: FocusEventHandler<HTMLTextAreaElement>;
onClick?: MouseEventHandler<HTMLTextAreaElement>;
onFocus?: FocusEventHandler<HTMLTextAreaElement>;
onKeyDown?: KeyboardEventHandler<HTMLTextAreaElement>;
onKeyUp?: KeyboardEventHandler<HTMLTextAreaElement>;
onSelect?: ReactEventHandler<HTMLTextAreaElement>;
onValueChange: (value: string) => void;
}
@@ -25,6 +32,13 @@ export function ExpandablePromptTextarea({
rows,
value,
className,
textareaRef,
onBlur,
onClick,
onFocus,
onKeyDown,
onKeyUp,
onSelect,
onValueChange,
}: ExpandablePromptTextareaProps) {
const [open, setOpen] = useState(false);
@@ -32,11 +46,18 @@ export function ExpandablePromptTextarea({
return (
<>
<Textarea
ref={textareaRef}
placeholder={placeholder}
rows={rows}
value={value}
onChange={event => onValueChange(event.target.value)}
onBlur={onBlur}
onClick={onClick}
onDoubleClick={() => setOpen(true)}
onFocus={onFocus}
onKeyDown={onKeyDown}
onKeyUp={onKeyUp}
onSelect={onSelect}
title="双击放大编辑"
className={className}
/>

View File

@@ -40,6 +40,7 @@ import { BareImagePreview, ImageLightbox } from '@/components/lightbox';
import { CreationDetailDialog } from '@/components/creation-detail-dialog';
import { GenerationErrorPanel, createGenerationError, type GenerationErrorState } from '@/components/create/generation-error-panel';
import { ExpandablePromptTextarea } from '@/components/create/expandable-prompt-textarea';
import { ReferenceImageMentionControls, buildReferenceImageAnnotations } from '@/components/create/reference-image-mention-controls';
import { compressImageFileForUpload } from '@/lib/browser-image-compression';
import { ImageCountCombobox } from '@/components/create/image-count-combobox';
import { StylePresetSelector } from '@/components/create/style-preset-selector';
@@ -595,9 +596,10 @@ export function ImageToImagePanel() {
stylePrompt: selectedStylePreset?.prompt,
strength,
image: primaryImage,
// Additional reference images
extraImages: refImages.length > 1 ? refImages.slice(1).map(img => img.dataUrl) : undefined,
};
// Additional reference images
extraImages: refImages.length > 1 ? refImages.slice(1).map(img => img.dataUrl) : undefined,
referenceImageAnnotations: buildReferenceImageAnnotations(refImages),
};
if (isCustomModel(selectedModel)) {
const key = imageKeys.find(k => k.id === getCustomKeyId(selectedModel));
@@ -622,6 +624,7 @@ export function ImageToImagePanel() {
styleLabel: selectedStylePreset?.label || '',
strength,
references: refImages.map(img => img.dataUrl),
referenceImageAnnotations: buildReferenceImageAnnotations(refImages),
});
if (activeSubmissionSignaturesRef.current.has(submissionSignature)) {
toast.info('相同任务正在生成中,请勿重复提交');
@@ -709,6 +712,7 @@ export function ImageToImagePanel() {
styleLabel: selectedStylePreset?.label,
strength,
refImageCount: refImages.length,
referenceImageAnnotations: buildReferenceImageAnnotations(refImages),
},
creditsCost: creditsPerImage,
});
@@ -787,7 +791,7 @@ export function ImageToImagePanel() {
>
{refImages.length > 0 ? (
<div className="grid w-full grid-cols-3 gap-3">
{refImages.map(img => (
{refImages.map((img, index) => (
<div
key={img.id}
className="liquid-glass-soft relative group aspect-square cursor-zoom-in overflow-hidden rounded-2xl"
@@ -795,6 +799,17 @@ export function ImageToImagePanel() {
>
{/* eslint-disable-next-line @next/next/no-img-element */}
<img src={img.dataUrl} alt={img.name} className="w-full h-full object-cover" />
<button
type="button"
className="absolute bottom-1 left-1 rounded-full bg-black/70 px-2 py-0.5 text-[11px] font-medium text-white shadow-sm backdrop-blur"
onClick={(event) => {
event.stopPropagation();
setPrompt(prev => `${prev}${prev.endsWith(' ') || prev.length === 0 ? '' : ' '}@参考图${index + 1} `);
}}
title={`插入 @参考图${index + 1}`}
>
@参考图{index + 1}
</button>
<button
className="absolute top-0.5 right-0.5 w-5 h-5 rounded-full bg-black/60 text-white flex items-center justify-center opacity-0 group-hover:opacity-100 transition-opacity"
onClick={(event) => {
@@ -867,12 +882,13 @@ export function ImageToImagePanel() {
)}
</div>
</div>
<ExpandablePromptTextarea
<ReferenceImageMentionControls
title="创作描述"
placeholder="描述你想要的图片变化..."
rows={3}
className="h-32 resize-none overflow-y-auto"
value={prompt}
references={refImages}
onValueChange={setPrompt}
/>
<StylePresetSelector

View File

@@ -30,6 +30,7 @@ import Link from 'next/link';
import { CreationDetailDialog } from '@/components/creation-detail-dialog';
import { GenerationErrorPanel, createGenerationError, type GenerationErrorState } from '@/components/create/generation-error-panel';
import { ExpandablePromptTextarea } from '@/components/create/expandable-prompt-textarea';
import { ReferenceImageMentionControls, buildReferenceImageAnnotations } from '@/components/create/reference-image-mention-controls';
import { compressImageFileForUpload } from '@/lib/browser-image-compression';
import { BareImagePreview } from '@/components/lightbox';
import { GenerationTaskList, type ActiveGenerationTask } from '@/components/create/generation-task-list';
@@ -49,10 +50,12 @@ const VIDEO_RESOLUTION_OPTIONS = [
] as const;
interface RefImage {
id: string;
dataUrl: string;
name: string;
}
id: string;
dataUrl: string;
name: string;
width?: number;
height?: number;
}
export function ImageToVideoPanel() {
const { user, accessToken, updateProfile } = useAuth();
@@ -296,6 +299,8 @@ export function ImageToVideoPanel() {
id: `ref-${Date.now()}-${Math.random().toString(36).slice(2, 6)}`,
dataUrl: result.dataUrl,
name: result.name,
width: result.width,
height: result.height,
});
} catch (err) {
toast.error(err instanceof Error ? err.message : '图片读取失败');
@@ -393,7 +398,14 @@ export function ImageToVideoPanel() {
isCustomModel: isCustomModel(selectedModel) || isSystemModel(selectedModel),
referenceImage: primaryImage,
referenceImages: refImages.map(img => img.dataUrl),
params: { creationMode: 'img2video', aspectRatio, duration, cameraMovement, refImageCount: refImages.length },
params: {
creationMode: 'img2video',
aspectRatio,
duration,
cameraMovement,
refImageCount: refImages.length,
referenceImageAnnotations: buildReferenceImageAnnotations(refImages),
},
creditsCost: creditsPerVideo,
});
}
@@ -426,9 +438,10 @@ export function ImageToVideoPanel() {
fps: 30,
clientRequestId: taskId,
image: primaryImage,
extraImages: refImages.length > 1 ? refImages.slice(1).map(img => img.dataUrl) : undefined,
images: refImages.length > 0 ? refImages.map(img => img.dataUrl) : undefined,
};
extraImages: refImages.length > 1 ? refImages.slice(1).map(img => img.dataUrl) : undefined,
images: refImages.length > 0 ? refImages.map(img => img.dataUrl) : undefined,
referenceImageAnnotations: buildReferenceImageAnnotations(refImages),
};
if (isCustomModel(selectedModel)) {
const key = videoKeys.find(k => k.id === getCustomKeyId(selectedModel));
@@ -450,6 +463,7 @@ export function ImageToVideoPanel() {
resolution,
cameraMovement,
references: refImages.map(img => img.dataUrl),
referenceImageAnnotations: buildReferenceImageAnnotations(refImages),
});
if (activeSubmissionSignaturesRef.current.has(submissionSignature)) {
toast.info('相同任务正在生成中,请勿重复提交');
@@ -491,7 +505,14 @@ export function ImageToVideoPanel() {
isCustomModel: isCustomModel(selectedModel) || isSystemModel(selectedModel),
referenceImage: primaryImage,
referenceImages: refImages.map(img => img.dataUrl),
params: { creationMode: 'img2video', aspectRatio, duration, cameraMovement, refImageCount: refImages.length },
params: {
creationMode: 'img2video',
aspectRatio,
duration,
cameraMovement,
refImageCount: refImages.length,
referenceImageAnnotations: buildReferenceImageAnnotations(refImages),
},
creditsCost: creditsPerVideo,
});
}
@@ -555,14 +576,25 @@ export function ImageToVideoPanel() {
>
{refImages.length > 0 ? (
<div className="grid w-full grid-cols-3 gap-3">
{refImages.map(img => (
{refImages.map((img, index) => (
<div
key={img.id}
className="liquid-glass-soft relative aspect-square cursor-zoom-in overflow-hidden rounded-2xl"
onClick={() => setReferencePreviewSrc(img.dataUrl)}
>
>
{/* eslint-disable-next-line @next/next/no-img-element */}
<img src={img.dataUrl} alt={img.name} className="h-full w-full object-cover" />
<button
type="button"
className="absolute bottom-1 left-1 rounded-full bg-black/70 px-2 py-0.5 text-[11px] font-medium text-white shadow-sm backdrop-blur"
onClick={(event) => {
event.stopPropagation();
setPrompt(prev => `${prev}${prev.endsWith(' ') || prev.length === 0 ? '' : ' '}@参考图${index + 1} `);
}}
title={`插入 @参考图${index + 1}`}
>
@参考图{index + 1}
</button>
<button
className="absolute right-1 top-1 flex h-5 w-5 items-center justify-center rounded-full bg-black/60 text-white"
onClick={(event) => {
@@ -634,12 +666,13 @@ export function ImageToVideoPanel() {
)}
</div>
</div>
<ExpandablePromptTextarea
<ReferenceImageMentionControls
title="视频描述"
placeholder="描述你想要的视频效果..."
rows={3}
className="h-32 resize-none overflow-y-auto"
value={prompt}
references={refImages}
onValueChange={setPrompt}
/>
</div>

View File

@@ -0,0 +1,191 @@
'use client';
import { useCallback, useMemo, useRef, useState } from 'react';
import { Image as ImageIcon } from 'lucide-react';
import { Button } from '@/components/ui/button';
import { ExpandablePromptTextarea } from '@/components/create/expandable-prompt-textarea';
export interface ReferenceImageMentionItem {
id: string;
name: string;
dataUrl: string;
width?: number;
height?: number;
}
export interface ReferenceImageAnnotationPayload {
index: number;
token: string;
name?: string;
width?: number;
height?: number;
}
interface ReferenceImageMentionControlsProps {
title: string;
placeholder?: string;
rows?: number;
className?: string;
value: string;
references: ReferenceImageMentionItem[];
onValueChange: (value: string) => void;
}
export function buildReferenceImageAnnotations(
references: ReferenceImageMentionItem[],
): ReferenceImageAnnotationPayload[] {
return references.map((reference, index) => ({
index: index + 1,
token: `@参考图${index + 1}`,
name: reference.name,
width: reference.width,
height: reference.height,
}));
}
function getMentionQuery(value: string, cursor: number): string | null {
const beforeCursor = value.slice(0, cursor);
const match = beforeCursor.match(/(^|\s)(@[\u4e00-\u9fa5\w-]*)$/);
return match ? match[2] : null;
}
export function ReferenceImageMentionControls({
title,
placeholder,
rows,
className,
value,
references,
onValueChange,
}: ReferenceImageMentionControlsProps) {
const textareaRef = useRef<HTMLTextAreaElement | null>(null);
const [menuOpen, setMenuOpen] = useState(false);
const [mentionQuery, setMentionQuery] = useState<string | null>(null);
const referenceItems = useMemo(() => buildReferenceImageAnnotations(references), [references]);
const visibleItems = useMemo(() => {
if (!mentionQuery || mentionQuery === '@') return referenceItems;
const query = mentionQuery.slice(1).trim().toLowerCase();
return referenceItems.filter(item => (
item.token.toLowerCase().includes(query)
|| item.name?.toLowerCase().includes(query)
));
}, [mentionQuery, referenceItems]);
const refreshMentionState = useCallback((nextValue = value) => {
const textarea = textareaRef.current;
if (!textarea || references.length === 0) {
setMenuOpen(false);
setMentionQuery(null);
return;
}
const query = getMentionQuery(nextValue, textarea.selectionStart || 0);
setMentionQuery(query);
setMenuOpen(Boolean(query));
}, [references.length, value]);
const insertReferenceToken = useCallback((token: string) => {
const textarea = textareaRef.current;
if (!textarea) {
onValueChange(`${value}${value.endsWith(' ') || value.length === 0 ? '' : ' '}${token} `);
setMenuOpen(false);
return;
}
const cursorStart = textarea.selectionStart || 0;
const cursorEnd = textarea.selectionEnd || cursorStart;
const query = getMentionQuery(value, cursorStart);
const replaceStart = query ? cursorStart - query.length : cursorStart;
const prefix = value.slice(0, replaceStart);
const suffix = value.slice(cursorEnd);
const needsLeadingSpace = prefix.length > 0 && !/\s$/.test(prefix);
const insertion = `${needsLeadingSpace ? ' ' : ''}${token} `;
const nextValue = `${prefix}${insertion}${suffix}`;
const nextCursor = prefix.length + insertion.length;
onValueChange(nextValue);
setMenuOpen(false);
setMentionQuery(null);
window.requestAnimationFrame(() => {
textarea.focus();
textarea.setSelectionRange(nextCursor, nextCursor);
});
}, [onValueChange, value]);
const handleValueChange = useCallback((nextValue: string) => {
onValueChange(nextValue);
window.requestAnimationFrame(() => refreshMentionState(nextValue));
}, [onValueChange, refreshMentionState]);
const hasReferences = references.length > 0;
return (
<div className="relative space-y-2">
<ExpandablePromptTextarea
title={title}
placeholder={hasReferences ? `${placeholder || ''}${placeholder ? '' : ''}输入 @ 可选择参考图` : placeholder}
rows={rows}
className={className}
value={value}
textareaRef={textareaRef}
onBlur={() => window.setTimeout(() => setMenuOpen(false), 120)}
onClick={() => refreshMentionState()}
onFocus={() => refreshMentionState()}
onKeyDown={(event) => {
if (event.key === 'Escape') setMenuOpen(false);
if (event.key === '@' && hasReferences) {
setMenuOpen(true);
setMentionQuery('@');
}
}}
onKeyUp={() => refreshMentionState()}
onSelect={() => refreshMentionState()}
onValueChange={handleValueChange}
/>
{hasReferences && (
<div className="flex flex-wrap gap-1.5">
{referenceItems.map(item => (
<Button
key={item.index}
type="button"
variant="outline"
size="sm"
className="h-7 gap-1.5 rounded-full px-2.5 text-xs"
onClick={() => insertReferenceToken(item.token)}
title={`插入 ${item.token}`}
>
<ImageIcon className="h-3 w-3" />
{item.token}
</Button>
))}
</div>
)}
{menuOpen && visibleItems.length > 0 && (
<div className="absolute left-0 right-0 top-full z-30 mt-1 max-h-56 overflow-y-auto rounded-xl border border-border/80 bg-background/95 p-1 shadow-xl backdrop-blur">
{visibleItems.map(item => (
<button
key={item.index}
type="button"
className="flex w-full items-center gap-2 rounded-lg px-2 py-2 text-left text-sm hover:bg-muted"
onMouseDown={(event) => {
event.preventDefault();
insertReferenceToken(item.token);
}}
>
<span className="flex h-7 w-7 shrink-0 items-center justify-center overflow-hidden rounded-md bg-muted">
{/* eslint-disable-next-line @next/next/no-img-element */}
<img src={references[item.index - 1]?.dataUrl} alt="" className="h-full w-full object-cover" />
</span>
<span className="min-w-0 flex-1">
<span className="block font-medium">{item.token}</span>
{item.name && <span className="block truncate text-xs text-muted-foreground">{item.name}</span>}
</span>
</button>
))}
</div>
)}
</div>
);
}

View File

@@ -0,0 +1,92 @@
export interface ReferenceImagePromptAnnotation {
index: number;
token: string;
name?: string;
width?: number;
height?: number;
}
function normalizePositiveInteger(value: unknown): number | undefined {
if (typeof value === 'number' && Number.isInteger(value) && value > 0) return value;
if (typeof value === 'string' && /^\d+$/.test(value.trim())) {
const parsed = Number(value);
return parsed > 0 ? parsed : undefined;
}
return undefined;
}
function normalizeDimension(value: unknown): number | undefined {
const parsed = normalizePositiveInteger(value);
return parsed && parsed <= 100_000 ? parsed : undefined;
}
function normalizeToken(value: unknown, index: number): string {
const raw = typeof value === 'string' ? value.trim() : '';
if (!raw) return `@参考图${index}`;
return raw.startsWith('@') ? raw : `@参考图${index}`;
}
function truncateInlineText(value: string): string {
return value.replace(/[\r\n\t]+/g, ' ').trim().slice(0, 80);
}
export function normalizeReferenceImageAnnotations(
value: unknown,
referenceCount: number,
): ReferenceImagePromptAnnotation[] {
if (!Array.isArray(value) || referenceCount <= 0) return [];
const normalized: ReferenceImagePromptAnnotation[] = [];
const usedIndexes = new Set<number>();
for (const item of value) {
if (!item || typeof item !== 'object') continue;
const record = item as Record<string, unknown>;
const index = normalizePositiveInteger(record.index);
if (!index || index > referenceCount || usedIndexes.has(index)) continue;
usedIndexes.add(index);
const name = typeof record.name === 'string' ? truncateInlineText(record.name) : undefined;
const width = normalizeDimension(record.width);
const height = normalizeDimension(record.height);
normalized.push({
index,
token: normalizeToken(record.token, index),
...(name ? { name } : {}),
...(width ? { width } : {}),
...(height ? { height } : {}),
});
}
return normalized;
}
export function buildReferenceImagePrompt(
prompt: string,
referenceCount: number,
annotationsInput?: unknown,
): string {
const trimmedPrompt = typeof prompt === 'string' ? prompt.trim() : '';
if (!trimmedPrompt || referenceCount <= 0) return trimmedPrompt;
const normalized = normalizeReferenceImageAnnotations(annotationsInput, referenceCount);
const byIndex = new Map(normalized.map(annotation => [annotation.index, annotation]));
const annotations = Array.from({ length: referenceCount }, (_, offset) => {
const index = offset + 1;
return byIndex.get(index) || { index, token: `@参考图${index}` };
});
const lines = annotations.map(annotation => {
const details = [
annotation.name ? `文件名:${annotation.name}` : '',
annotation.width && annotation.height ? `尺寸:${annotation.width}x${annotation.height}` : '',
].filter(Boolean);
const suffix = details.length > 0 ? `${details.join('')}` : '';
return `${annotation.token} 对应上传的第${annotation.index}张参考图${suffix}。当提示词提到 ${annotation.token} 时,请把它理解为这张参考图,并按用户描述提取其主体、风格、构图、动作或局部元素。`;
});
return [
trimmedPrompt,
'[参考图标注说明]',
...lines,
].join('\n');
}