From 20224a455c85c8e88b6c3635bfb03965fd3659d6 Mon Sep 17 00:00:00 2001 From: Hong Minhee Date: Thu, 7 May 2026 03:35:12 +0900 Subject: [PATCH 01/24] =?UTF-8?q?Add=20AI=20alt=20text=20generation=20for?= =?UTF-8?q?=20media=20(Phase=201=E2=80=932)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - Add generateAltText() to the ai/ package with locale-specific system prompts (en, ko, ja, zh-CN, zh-TW); use negotiateLocale() for correct Chinese script fallback (zh-Hant→zh-TW, zh-Hans→zh-CN) - Cap context argument at 1000 characters and set maxOutputTokens: 200 to bound token cost - Export ./alttext from ai/ package (deno.json + package.json) - Add altTextGenerator (gemini-3.1-flash-lite-preview) to graphql/ai.ts and web/ai.ts; upgrade translator to claude-sonnet-4-6 (graphql/ only) - Thread altTextGenerator through ServerContext so tests can inject mocks - Add Medium.generatedAltText(language, context) GraphQL field with complexity 1000 and authentication guard - Add graphql/medium.test.ts covering guest rejection and authenticated success and context-passing cases - Add default no-op altTextGenerator to makeUserContext/makeGuestContext in test/postgres.ts Assisted-by: Claude Code:claude-sonnet-4-6 --- ai/alttext.test.ts | 238 ++++++++++++++++++++++++++++++++++++ ai/alttext.ts | 65 ++++++++++ ai/deno.json | 1 + ai/mod.ts | 1 + ai/package.json | 1 + ai/prompts/alttext/en.md | 9 ++ ai/prompts/alttext/ja.md | 9 ++ ai/prompts/alttext/ko.md | 9 ++ ai/prompts/alttext/zh-CN.md | 9 ++ ai/prompts/alttext/zh-TW.md | 9 ++ deno.lock | 1 + graphql/ai.ts | 3 +- graphql/builder.ts | 2 + graphql/main.ts | 1 + graphql/medium.test.ts | 203 ++++++++++++++++++++++++++++++ graphql/post.ts | 26 ++++ graphql/schema.graphql | 5 + test/postgres.ts | 14 +++ web/ai.ts | 1 + web/main.ts | 1 + 20 files changed, 607 insertions(+), 1 deletion(-) create mode 100644 ai/alttext.test.ts create mode 100644 ai/alttext.ts create mode 100644 ai/prompts/alttext/en.md create mode 100644 ai/prompts/alttext/ja.md create mode 100644 ai/prompts/alttext/ko.md create mode 100644 ai/prompts/alttext/zh-CN.md create mode 100644 ai/prompts/alttext/zh-TW.md create mode 100644 graphql/medium.test.ts diff --git a/ai/alttext.test.ts b/ai/alttext.test.ts new file mode 100644 index 000000000..56b97155c --- /dev/null +++ b/ai/alttext.test.ts @@ -0,0 +1,238 @@ +import assert from "node:assert/strict"; +import test from "node:test"; +import { MockLanguageModelV3 } from "ai/test"; +import { generateAltText } from "./alttext.ts"; + +// A 1×1 transparent GIF as a data URL — avoids network downloads in tests. +const DATA_URL = + "data:image/gif;base64,R0lGODlhAQABAIAAAAAAAP///yH5BAEAAAAALAAAAAABAAEAAAIBRAA7"; + +test("generateAltText() returns trimmed text from the model response", async () => { + const model = new MockLanguageModelV3({ + doGenerate: async () => ({ + content: [{ type: "text", text: " A cat sitting on a keyboard. \n" }], + finishReason: { unified: "stop", raw: undefined }, + usage: { + inputTokens: { + total: 10, + noCache: 10, + cacheRead: undefined, + cacheWrite: undefined, + }, + outputTokens: { total: 5, text: 5, reasoning: undefined }, + }, + warnings: [], + }), + }); + + const result = await generateAltText({ + model, + imageUrl: DATA_URL, + language: "en", + }); + + assert.equal(result, "A cat sitting on a keyboard."); +}); + +test("generateAltText() sends an image file part to the model", async () => { + let hasImageFilePart = false; + const model = new MockLanguageModelV3({ + doGenerate: async (options) => { + for (const message of options.prompt) { + if (message.role !== "user") continue; + for (const part of message.content) { + if ( + part.type === "file" && + typeof part.mediaType === "string" && + part.mediaType.startsWith("image/") + ) { + hasImageFilePart = true; + } + } + } + return { + content: [{ type: "text", text: "A description." }], + finishReason: { unified: "stop", raw: undefined }, + usage: { + inputTokens: { + total: 10, + noCache: 10, + cacheRead: undefined, + cacheWrite: undefined, + }, + outputTokens: { total: 5, text: 5, reasoning: undefined }, + }, + warnings: [], + }; + }, + }); + + await generateAltText({ model, imageUrl: DATA_URL, language: "en" }); + + assert.ok(hasImageFilePart, "model should receive an image file part"); +}); + +test("generateAltText() sends a system prompt to the model", async () => { + let capturedSystem: string | undefined; + const model = new MockLanguageModelV3({ + doGenerate: async (options) => { + const sysMsg = options.prompt.find((m) => m.role === "system"); + if (sysMsg?.role === "system") capturedSystem = sysMsg.content; + return { + content: [{ type: "text", text: "A description." }], + finishReason: { unified: "stop", raw: undefined }, + usage: { + inputTokens: { + total: 10, + noCache: 10, + cacheRead: undefined, + cacheWrite: undefined, + }, + outputTokens: { total: 5, text: 5, reasoning: undefined }, + }, + warnings: [], + }; + }, + }); + + await generateAltText({ model, imageUrl: DATA_URL, language: "en" }); + + assert.ok(capturedSystem != null, "model should receive a system prompt"); + assert.ok(capturedSystem.length > 0, "system prompt should not be empty"); +}); + +test("generateAltText() uses a Korean system prompt for Korean language", async () => { + let capturedSystem: string | undefined; + const model = new MockLanguageModelV3({ + doGenerate: async (options) => { + const sysMsg = options.prompt.find((m) => m.role === "system"); + if (sysMsg?.role === "system") capturedSystem = sysMsg.content; + return { + content: [{ type: "text", text: "설명입니다." }], + finishReason: { unified: "stop", raw: undefined }, + usage: { + inputTokens: { + total: 10, + noCache: 10, + cacheRead: undefined, + cacheWrite: undefined, + }, + outputTokens: { total: 5, text: 5, reasoning: undefined }, + }, + warnings: [], + }; + }, + }); + + await generateAltText({ model, imageUrl: DATA_URL, language: "ko" }); + + assert.ok(capturedSystem != null, "system prompt should be set"); + assert.ok( + capturedSystem.includes("한국어") || capturedSystem.includes("접근성"), + "Korean prompt should contain Korean-specific text", + ); +}); + +test("generateAltText() falls back to English prompt for unsupported locales", async () => { + let capturedSystem: string | undefined; + const model = new MockLanguageModelV3({ + doGenerate: async (options) => { + const sysMsg = options.prompt.find((m) => m.role === "system"); + if (sysMsg?.role === "system") capturedSystem = sysMsg.content; + return { + content: [{ type: "text", text: "A description." }], + finishReason: { unified: "stop", raw: undefined }, + usage: { + inputTokens: { + total: 10, + noCache: 10, + cacheRead: undefined, + cacheWrite: undefined, + }, + outputTokens: { total: 5, text: 5, reasoning: undefined }, + }, + warnings: [], + }; + }, + }); + + await generateAltText({ model, imageUrl: DATA_URL, language: "ar" }); + + assert.ok(capturedSystem != null, "system prompt should be set"); + assert.ok( + capturedSystem.includes("accessibility") || + capturedSystem.includes("English"), + "should fall back to English prompt for unsupported locales", + ); +}); + +test("generateAltText() includes note context in the user text when provided", async () => { + let capturedTextPart: string | undefined; + const model = new MockLanguageModelV3({ + doGenerate: async (options) => { + const userMsg = options.prompt.find((m) => m.role === "user"); + if (userMsg?.role === "user") { + const textPart = userMsg.content.find((p) => p.type === "text"); + if (textPart?.type === "text") capturedTextPart = textPart.text; + } + return { + content: [{ type: "text", text: "A cat." }], + finishReason: { unified: "stop", raw: undefined }, + usage: { + inputTokens: { + total: 10, + noCache: 10, + cacheRead: undefined, + cacheWrite: undefined, + }, + outputTokens: { total: 5, text: 5, reasoning: undefined }, + }, + warnings: [], + }; + }, + }); + + await generateAltText({ + model, + imageUrl: DATA_URL, + language: "en", + context: "My home office setup", + }); + + assert.ok(capturedTextPart?.includes("My home office setup")); +}); + +test("generateAltText() does not add context hint when context is absent", async () => { + let capturedTextPart: string | undefined; + const model = new MockLanguageModelV3({ + doGenerate: async (options) => { + const userMsg = options.prompt.find((m) => m.role === "user"); + if (userMsg?.role === "user") { + const textPart = userMsg.content.find((p) => p.type === "text"); + if (textPart?.type === "text") capturedTextPart = textPart.text; + } + return { + content: [{ type: "text", text: "A photo." }], + finishReason: { unified: "stop", raw: undefined }, + usage: { + inputTokens: { + total: 10, + noCache: 10, + cacheRead: undefined, + cacheWrite: undefined, + }, + outputTokens: { total: 5, text: 5, reasoning: undefined }, + }, + warnings: [], + }; + }, + }); + + await generateAltText({ model, imageUrl: DATA_URL, language: "en" }); + + assert.ok(capturedTextPart != null); + assert.ok( + !capturedTextPart.toLowerCase().includes("context:"), + "no context hint should appear when context is absent", + ); +}); diff --git a/ai/alttext.ts b/ai/alttext.ts new file mode 100644 index 000000000..9028c4967 --- /dev/null +++ b/ai/alttext.ts @@ -0,0 +1,65 @@ +import { readdir, readFile } from "node:fs/promises"; +import { join } from "node:path"; +import { + isLocale, + type Locale, + negotiateLocale, +} from "@hackerspub/models/i18n"; +import { generateText, type LanguageModel } from "ai"; + +const MAX_CONTEXT_LENGTH = 1000; +const MAX_ALT_TEXT_TOKENS = 200; + +const PROMPT_LANGUAGES: Locale[] = ( + await readdir( + join(import.meta.dirname!, "prompts", "alttext"), + { withFileTypes: true }, + ) +).map((f) => f.name.replace(/\.md$/, "")).filter(isLocale); + +async function getAltTextPrompt(language: string): Promise { + const locale = new Intl.Locale(language); + const promptLocale = negotiateLocale(locale, PROMPT_LANGUAGES) ?? + new Intl.Locale("en"); + const promptPath = join( + import.meta.dirname!, + "prompts", + "alttext", + `${promptLocale.baseName}.md`, + ); + return await readFile(promptPath, "utf8"); +} + +export interface AltTextOptions { + model: LanguageModel; + imageUrl: string; + language: string; + context?: string; +} + +export async function generateAltText( + options: AltTextOptions, +): Promise { + const { model, imageUrl, language } = options; + const context = options.context?.slice(0, MAX_CONTEXT_LENGTH); + const systemPrompt = await getAltTextPrompt(language); + + const textContent = context + ? `Generate alt text for this image. Context from the accompanying note: ${context}` + : "Generate alt text for this image."; + + const result = await generateText({ + model, + system: systemPrompt, + maxOutputTokens: MAX_ALT_TEXT_TOKENS, + messages: [{ + role: "user", + content: [ + { type: "image", image: new URL(imageUrl) }, + { type: "text", text: textContent }, + ], + }], + }); + + return result.text.trim(); +} diff --git a/ai/deno.json b/ai/deno.json index c835860e6..dcccb0393 100644 --- a/ai/deno.json +++ b/ai/deno.json @@ -3,6 +3,7 @@ "version": "0.2.0", "exports": { ".": "./mod.ts", + "./alttext": "./alttext.ts", "./summary": "./summary.ts", "./translate": "./translate.ts" } diff --git a/ai/mod.ts b/ai/mod.ts index 449216320..f800e830f 100644 --- a/ai/mod.ts +++ b/ai/mod.ts @@ -1,2 +1,3 @@ +export { generateAltText } from "./alttext.ts"; export { summarize } from "./summary.ts"; export { translate } from "./translate.ts"; diff --git a/ai/package.json b/ai/package.json index 9812eda2c..3e760df0c 100644 --- a/ai/package.json +++ b/ai/package.json @@ -3,6 +3,7 @@ "type": "module", "exports": { ".": "./mod.ts", + "./alttext": "./alttext.ts", "./*": "./*.ts" }, "dependencies": { diff --git a/ai/prompts/alttext/en.md b/ai/prompts/alttext/en.md new file mode 100644 index 000000000..3ce9d5a53 --- /dev/null +++ b/ai/prompts/alttext/en.md @@ -0,0 +1,9 @@ +You are an accessibility assistant. Generate concise, descriptive alt text for the provided image so that visually impaired users understand what the image shows. + +Rules: +- Write 1–3 short sentences describing the image objectively. +- Focus on the main subject, action, setting, and any relevant text visible in the image. +- Do not begin with "Image of", "Photo of", or similar redundant phrases. +- Do not include personal opinions or interpretations. +- Write in English. +- Keep it under 150 characters when possible. diff --git a/ai/prompts/alttext/ja.md b/ai/prompts/alttext/ja.md new file mode 100644 index 000000000..c65a40888 --- /dev/null +++ b/ai/prompts/alttext/ja.md @@ -0,0 +1,9 @@ +あなたはアクセシビリティアシスタントです。視覚障害のあるユーザーが画像の内容を理解できるよう、提供された画像に対して簡潔で説明的な代替テキストを生成してください。 + +ルール: +- 画像を客観的に説明する1〜3つの短い文を書きます。 +- 主な被写体、動作、背景、画像に見えるテキストに焦点を当てます。 +- 「画像:」「写真:」などの不要な接頭辞で始めないでください。 +- 個人的な意見や解釈は含めないでください。 +- 日本語で書きます。 +- できるだけ150文字以内に収めます。 diff --git a/ai/prompts/alttext/ko.md b/ai/prompts/alttext/ko.md new file mode 100644 index 000000000..fb4d7784e --- /dev/null +++ b/ai/prompts/alttext/ko.md @@ -0,0 +1,9 @@ +당신은 접근성 보조 도구입니다. 시각 장애인 사용자가 이미지의 내용을 이해할 수 있도록 제공된 이미지에 대한 간결하고 서술적인 대체 텍스트를 생성하세요. + +규칙: +- 이미지를 객관적으로 설명하는 1–3개의 짧은 문장을 작성합니다. +- 주요 피사체, 행동, 배경, 이미지에 보이는 관련 텍스트에 초점을 맞춥니다. +- "이미지:", "사진:", "그림:" 등 불필요한 접두사로 시작하지 않습니다. +- 개인적인 의견이나 해석은 포함하지 않습니다. +- 한국어로 작성합니다. +- 가능한 한 150자 이내로 유지합니다. diff --git a/ai/prompts/alttext/zh-CN.md b/ai/prompts/alttext/zh-CN.md new file mode 100644 index 000000000..2326a56af --- /dev/null +++ b/ai/prompts/alttext/zh-CN.md @@ -0,0 +1,9 @@ +您是一个无障碍辅助工具。请为提供的图像生成简洁、描述性的替代文本,以便视觉障碍用户了解图像内容。 + +规则: +- 用1–3个简短句子客观描述图像。 +- 重点关注图像中的主要主体、动作、背景及可见文本。 +- 不要以"图像:"、"照片:"等冗余短语开头。 +- 不包含个人意见或解读。 +- 用简体中文书写。 +- 尽量控制在150个字符以内。 diff --git a/ai/prompts/alttext/zh-TW.md b/ai/prompts/alttext/zh-TW.md new file mode 100644 index 000000000..c397c5378 --- /dev/null +++ b/ai/prompts/alttext/zh-TW.md @@ -0,0 +1,9 @@ +您是一個無障礙輔助工具。請為提供的圖像生成簡潔、描述性的替代文字,以便視覺障礙使用者了解圖像內容。 + +規則: +- 用1–3個簡短句子客觀描述圖像。 +- 重點關注圖像中的主要主體、動作、背景及可見文字。 +- 不要以「圖像:」、「照片:」等冗贅短語開頭。 +- 不包含個人意見或解讀。 +- 用繁體中文書寫。 +- 盡量控制在150個字元以內。 diff --git a/deno.lock b/deno.lock index 0d9e1bf39..03e9da3e9 100644 --- a/deno.lock +++ b/deno.lock @@ -177,6 +177,7 @@ "npm:@types/node@*": "24.2.0", "npm:@types/relay-runtime@^19.0.2": "19.0.2", "npm:@vertana/context-web@~0.1.1": "0.1.1_@standard-schema+spec@1.1.0_ai@6.0.108__zod@4.2.1", + "npm:ai@6": "6.0.108_zod@4.2.1", "npm:ai@^6.0.3": "6.0.108_zod@4.2.1", "npm:ai@^6.0.86": "6.0.108_zod@4.2.1", "npm:apns2@^12.2.0": "12.2.0", diff --git a/graphql/ai.ts b/graphql/ai.ts index 3da47f75f..489cad3f8 100644 --- a/graphql/ai.ts +++ b/graphql/ai.ts @@ -1,5 +1,6 @@ import { anthropic } from "@ai-sdk/anthropic"; import { google } from "@ai-sdk/google"; +export const altTextGenerator = google("gemini-3.1-flash-lite-preview"); export const summarizer = google("gemini-3-flash-preview"); -export const translator = anthropic("claude-sonnet-4-5-20250929"); +export const translator = anthropic("claude-sonnet-4-6"); diff --git a/graphql/builder.ts b/graphql/builder.ts index f4277652d..bd1c70bc7 100644 --- a/graphql/builder.ts +++ b/graphql/builder.ts @@ -13,6 +13,7 @@ import type { Transport } from "@upyo/core"; import { getTableConfig } from "drizzle-orm/pg-core"; import type DataLoader from "dataloader"; import type { Disk } from "flydrive"; +import type { LanguageModel } from "ai"; import { GraphQLScalarType, Kind } from "graphql"; import { DateResolver, @@ -42,6 +43,7 @@ export type ValuesOfEnumType = T extends PothosSchemaTypes.EnumRef ? V : never; export interface ServerContext { + altTextGenerator: LanguageModel; db: Database; kv: Keyv; disk: Disk; diff --git a/graphql/main.ts b/graphql/main.ts index a4775ce98..739c7a5b3 100644 --- a/graphql/main.ts +++ b/graphql/main.ts @@ -41,6 +41,7 @@ Deno.serve({ port: 8080 }, async (req, info) => { return federation.fetch(req, { contextData: { db, kv, disk, models } }); } return yogaServer.fetch(req, { + altTextGenerator: models.altTextGenerator, db, kv, disk, diff --git a/graphql/medium.test.ts b/graphql/medium.test.ts new file mode 100644 index 000000000..559d54c90 --- /dev/null +++ b/graphql/medium.test.ts @@ -0,0 +1,203 @@ +import { assertEquals } from "@std/assert/equals"; +import { encodeGlobalID } from "@pothos/plugin-relay"; +import { execute, parse } from "graphql"; +import { MockLanguageModelV3 } from "ai/test"; +import { mediumTable } from "@hackerspub/models/schema"; +import { generateUuidV7 } from "@hackerspub/models/uuid"; +import { schema } from "./mod.ts"; +import { + insertAccountWithActor, + makeGuestContext, + makeUserContext, + withRollback, +} from "../test/postgres.ts"; + +// MockLanguageModelV3 declares support for the test disk URL pattern so +// the AI SDK does not attempt to download the image during tests. +const TEST_MEDIUM_URL_PATTERN = /^http:\/\/localhost\/media\/.+/; + +function makeAltTextModel(responseText: string): MockLanguageModelV3 { + return new MockLanguageModelV3({ + supportedUrls: { "image/*": [TEST_MEDIUM_URL_PATTERN] }, + doGenerate: async () => ({ + content: [{ type: "text", text: responseText }], + finishReason: { unified: "stop", raw: undefined }, + usage: { + inputTokens: { + total: 10, + noCache: 10, + cacheRead: undefined, + cacheWrite: undefined, + }, + outputTokens: { total: 5, text: 5, reasoning: undefined }, + }, + warnings: [], + }), + }); +} + +const generatedAltTextQuery = parse(` + query GeneratedAltText($id: ID!, $language: Locale!) { + node(id: $id) { + ... on Medium { + generatedAltText(language: $language) + } + } + } +`); + +const generatedAltTextWithContextQuery = parse(` + query GeneratedAltTextWithContext($id: ID!, $language: Locale!, $context: String) { + node(id: $id) { + ... on Medium { + generatedAltText(language: $language, context: $context) + } + } + } +`); + +Deno.test({ + name: "Medium.generatedAltText returns errors for guests", + sanitizeOps: false, + sanitizeResources: false, + async fn() { + await withRollback(async (tx) => { + const mediumId = generateUuidV7(); + await tx.insert(mediumTable).values({ + id: mediumId, + key: `test/medium-${mediumId}.webp`, + type: "image/webp", + }); + + const relayId = encodeGlobalID("Medium", mediumId); + const ctx = makeGuestContext(tx, { + altTextGenerator: makeAltTextModel("A test image."), + }); + + const result = await execute({ + schema, + document: generatedAltTextQuery, + contextValue: ctx, + variableValues: { id: relayId, language: "en" }, + }); + + assertEquals( + result.errors != null, + true, + "should return errors for guest", + ); + }); + }, +}); + +Deno.test({ + name: + "Medium.generatedAltText returns generated text for authenticated users", + sanitizeOps: false, + sanitizeResources: false, + async fn() { + await withRollback(async (tx) => { + const { account } = await insertAccountWithActor(tx, { + username: "alttext_auth_test", + name: "Test User", + email: "alttext_auth@example.com", + }); + + const mediumId = generateUuidV7(); + await tx.insert(mediumTable).values({ + id: mediumId, + key: `test/medium-${mediumId}.webp`, + type: "image/webp", + }); + + const relayId = encodeGlobalID("Medium", mediumId); + const ctx = makeUserContext(tx, account, { + altTextGenerator: makeAltTextModel( + "A cheerful cat sitting on a keyboard.", + ), + }); + + const result = await execute({ + schema, + document: generatedAltTextQuery, + contextValue: ctx, + variableValues: { id: relayId, language: "en" }, + }); + + assertEquals(result.errors, undefined); + const node = (result.data as { node: { generatedAltText: string } }).node; + assertEquals( + node.generatedAltText, + "A cheerful cat sitting on a keyboard.", + ); + }); + }, +}); + +Deno.test({ + name: "Medium.generatedAltText passes context to the AI model", + sanitizeOps: false, + sanitizeResources: false, + async fn() { + await withRollback(async (tx) => { + const { account } = await insertAccountWithActor(tx, { + username: "alttext_ctx_test", + name: "Test User", + email: "alttext_ctx@example.com", + }); + + const mediumId = generateUuidV7(); + await tx.insert(mediumTable).values({ + id: mediumId, + key: `test/medium-${mediumId}.webp`, + type: "image/webp", + }); + + let capturedTextPart: string | undefined; + const model = new MockLanguageModelV3({ + supportedUrls: { "image/*": [TEST_MEDIUM_URL_PATTERN] }, + doGenerate: async (options) => { + const userMsg = options.prompt.find((m) => m.role === "user"); + if (userMsg?.role === "user") { + const textPart = userMsg.content.find((p) => p.type === "text"); + if (textPart?.type === "text") capturedTextPart = textPart.text; + } + return { + content: [{ type: "text", text: "A description." }], + finishReason: { unified: "stop", raw: undefined }, + usage: { + inputTokens: { + total: 10, + noCache: 10, + cacheRead: undefined, + cacheWrite: undefined, + }, + outputTokens: { total: 5, text: 5, reasoning: undefined }, + }, + warnings: [], + }; + }, + }); + + const relayId = encodeGlobalID("Medium", mediumId); + const ctx = makeUserContext(tx, account, { altTextGenerator: model }); + + await execute({ + schema, + document: generatedAltTextWithContextQuery, + contextValue: ctx, + variableValues: { + id: relayId, + language: "en", + context: "My trip to the mountains", + }, + }); + + assertEquals( + capturedTextPart?.includes("My trip to the mountains"), + true, + "context should be passed to the AI model", + ); + }); + }, +}); diff --git a/graphql/post.ts b/graphql/post.ts index d5e07575e..22a02cea2 100644 --- a/graphql/post.ts +++ b/graphql/post.ts @@ -1,3 +1,4 @@ +import { generateAltText } from "@hackerspub/ai/alttext"; import { getLogger } from "@logtape/logtape"; import { drizzleConnectionHelpers } from "@pothos/plugin-drizzle"; import { unreachable } from "@std/assert"; @@ -715,6 +716,31 @@ export const Medium = builder.drizzleNode("mediumTable", { }), }); +builder.drizzleObjectField(Medium, "generatedAltText", (t) => + t.string({ + nullable: true, + description: "AI-generated alternative text for this medium. " + + "Requires authentication. High-complexity operation (cost 1000). " + + "The context argument is truncated server-side to 1000 characters. " + + "Note: authorization is based on knowing the medium's ID (UUID); " + + "the mediumTable has no owner column.", + complexity: 1000, + args: { + language: t.arg({ type: "Locale", required: true }), + context: t.arg({ type: "String", required: false }), + }, + async resolve(medium, args, ctx) { + if ((await ctx.session) == null) throw new NotAuthenticatedError(); + const imageUrl = await ctx.disk.getUrl(medium.key); + return await generateAltText({ + model: ctx.altTextGenerator, + imageUrl, + language: (args.language as Intl.Locale).baseName, + context: args.context ?? undefined, + }); + }, + })); + const MediumUploadHeader = builder.simpleObject("MediumUploadHeader", { fields: (t) => ({ name: t.string(), diff --git a/graphql/schema.graphql b/graphql/schema.graphql index 348134641..ba40ef339 100644 --- a/graphql/schema.graphql +++ b/graphql/schema.graphql @@ -754,6 +754,11 @@ type Medium implements Node { """SHA-256 hash of the normalized stored content, if known.""" contentHash: Sha256 created: DateTime! + + """ + AI-generated alternative text for this medium. Requires authentication. High-complexity operation (cost 1000). The context argument is truncated server-side to 1000 characters. Note: authorization is based on knowing the medium's ID (UUID); the mediumTable has no owner column. + """ + generatedAltText(context: String, language: Locale!): String height: Int id: ID! diff --git a/test/postgres.ts b/test/postgres.ts index 608cac6ae..93f85c6c6 100644 --- a/test/postgres.ts +++ b/test/postgres.ts @@ -4,6 +4,7 @@ import { sql } from "drizzle-orm"; import type { ContextData } from "@hackerspub/models/context"; import type { Transaction } from "@hackerspub/models/db"; import type { Transport } from "@upyo/core"; +import { MockLanguageModelV3 } from "ai/test"; import { accountEmailTable, accountTable, @@ -460,6 +461,17 @@ export function createFedCtx( } as unknown as RequestContext; } +function createNoopAltTextModel(): MockLanguageModelV3 { + return new MockLanguageModelV3({ + doGenerate: async () => { + throw new Error( + "altTextGenerator was called in a test that did not expect it. " + + "Pass altTextGenerator in overrides to handle this.", + ); + }, + }); +} + export function makeUserContext( tx: Transaction, account: AuthenticatedAccount, @@ -470,6 +482,7 @@ export function makeUserContext( const fedCtx = overrides.fedCtx ?? createFedCtx(tx, { kv }); return { + altTextGenerator: createNoopAltTextModel(), db: tx, kv, disk: createTestDisk() as UserContext["disk"], @@ -495,6 +508,7 @@ export function makeGuestContext( const fedCtx = overrides.fedCtx ?? createFedCtx(tx, { kv }); return { + altTextGenerator: createNoopAltTextModel(), db: tx, kv, disk: createTestDisk() as UserContext["disk"], diff --git a/web/ai.ts b/web/ai.ts index 3da47f75f..bf6dc694f 100644 --- a/web/ai.ts +++ b/web/ai.ts @@ -1,5 +1,6 @@ import { anthropic } from "@ai-sdk/anthropic"; import { google } from "@ai-sdk/google"; +export const altTextGenerator = google("gemini-3.1-flash-lite-preview"); export const summarizer = google("gemini-3-flash-preview"); export const translator = anthropic("claude-sonnet-4-5-20250929"); diff --git a/web/main.ts b/web/main.ts index 74d6e7d1f..8489ebef0 100644 --- a/web/main.ts +++ b/web/main.ts @@ -188,6 +188,7 @@ app.use(async (ctx) => { const disk = drive.use(); const graphqlContext: Context = { + altTextGenerator: models.altTextGenerator, db, kv, disk, From 4f06eecffa934df13d7665916e7740f3e11bf798 Mon Sep 17 00:00:00 2001 From: Hong Minhee Date: Thu, 7 May 2026 03:56:08 +0900 Subject: [PATCH 02/24] =?UTF-8?q?web-next:=20Add=20note=20image=20attachme?= =?UTF-8?q?nt=20UI=20(Phase=203=E2=80=934)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - Add startMediumUploadOnServer and finishMediumUploadOnServer server actions to uploadImage.ts, using the startMediumUpload / finishMediumUpload GraphQL API; finishMediumUpload now returns the Relay Medium ID needed for alt text generation - Add uploadMediumWithProgress.ts: client-side XHR upload with progress callback, timeout (5 min), onabort/ontimeout error handling, and session.method used from server response - Redesign NoteComposer to support image attachment: - Clipboard paste, drag-and-drop, and file-dialog attachment - Per-image upload progress bar with overlay - Alt text textarea (required before submission) - "Auto-fill" button that calls Medium.generatedAltText via Relay - Max 20 images enforced in UI - Object URL cleanup on unmount and on failed upload - Drop handler gated on dataTransfer.types.includes("Files") - Visibility selector wrapped in role=group for screen-reader label - Add translations for all 11 new i18n strings across ja-JP, ko-KR, zh-CN, zh-TW Assisted-by: Claude Code:claude-sonnet-4-6 --- web-next/src/components/NoteComposer.tsx | 408 ++++++++++++++++++- web-next/src/lib/uploadImage.ts | 137 +++++++ web-next/src/lib/uploadMediumWithProgress.ts | 59 +++ web-next/src/locales/en-US/messages.po | 160 +++++--- web-next/src/locales/ja-JP/messages.po | 160 +++++--- web-next/src/locales/ko-KR/messages.po | 160 +++++--- web-next/src/locales/zh-CN/messages.po | 160 +++++--- web-next/src/locales/zh-TW/messages.po | 160 +++++--- 8 files changed, 1122 insertions(+), 282 deletions(-) create mode 100644 web-next/src/lib/uploadMediumWithProgress.ts diff --git a/web-next/src/components/NoteComposer.tsx b/web-next/src/components/NoteComposer.tsx index 62c81d402..39bf19ac3 100644 --- a/web-next/src/components/NoteComposer.tsx +++ b/web-next/src/components/NoteComposer.tsx @@ -1,7 +1,8 @@ import { fetchQuery, graphql } from "relay-runtime"; -import { createEffect, createSignal, onCleanup, Show } from "solid-js"; +import { createEffect, createSignal, For, onCleanup, Show } from "solid-js"; import { createMutation, useRelayEnvironment } from "solid-relay"; import { detectLanguage } from "~/lib/langdet.ts"; +import { uploadMediumFile } from "~/lib/uploadMediumWithProgress.ts"; import { LanguageSelect } from "~/components/LanguageSelect.tsx"; import { MentionAutocomplete } from "~/components/MentionAutocomplete.tsx"; import { @@ -25,6 +26,7 @@ import IconX from "~icons/lucide/x"; import type { NoteComposerMutation } from "./__generated__/NoteComposerMutation.graphql.ts"; import type { NoteComposerPostByUrlQuery } from "./__generated__/NoteComposerPostByUrlQuery.graphql.ts"; import type { NoteComposerQuotedPostQuery } from "./__generated__/NoteComposerQuotedPostQuery.graphql.ts"; +import type { NoteComposerGeneratedAltTextQuery } from "./__generated__/NoteComposerGeneratedAltTextQuery.graphql.ts"; const NoteComposerMutation = graphql` mutation NoteComposerMutation($input: CreateNoteInput!) { @@ -82,6 +84,41 @@ const NoteComposerPostByUrlQuery = graphql` } `; +const NoteComposerGeneratedAltTextQuery = graphql` + query NoteComposerGeneratedAltTextQuery( + $mediumId: ID! + $language: Locale! + $context: String + ) { + node(id: $mediumId) { + ... on Medium { + generatedAltText(language: $language, context: $context) + } + } + } +`; + +const SUPPORTED_IMAGE_TYPES = [ + "image/jpeg", + "image/png", + "image/gif", + "image/webp", +]; + +const MAX_MEDIA = 20; + +interface MediaItem { + localId: string; + file: File; + previewUrl: string; + alt: string; + mediumRelayId?: string; + uuid?: string; + uploading: boolean; + uploadProgress: number; + generatingAlt: boolean; +} + interface QuotedPostPreview { typename: "Note" | "Article"; excerpt: string; @@ -121,7 +158,17 @@ export function NoteComposer(props: NoteComposerProps) { const [createNote, isCreating] = createMutation( NoteComposerMutation, ); + const [mediaItems, setMediaItems] = createSignal([]); + const [isDraggingOver, setIsDraggingOver] = createSignal(false); let textareaRef: HTMLTextAreaElement | undefined; + let fileInputRef: HTMLInputElement | undefined; + + // Revoke all object URLs on cleanup to avoid memory leaks. + onCleanup(() => { + for (const item of mediaItems()) { + URL.revokeObjectURL(item.previewUrl); + } + }); // Fetch quoted post preview when quotedPostId changes createEffect(() => { @@ -184,7 +231,87 @@ export function NoteComposer(props: NoteComposerProps) { } }); + const addFiles = (files: FileList | File[]) => { + const fileArray = Array.from(files).filter((f) => + SUPPORTED_IMAGE_TYPES.includes(f.type) + ); + if (fileArray.length === 0) return; + + const current = mediaItems(); + const remaining = MAX_MEDIA - current.length; + if (remaining <= 0) { + showToast({ + title: t`Error`, + description: t`You can attach up to ${MAX_MEDIA} images`, + variant: "error", + }); + return; + } + + const toAdd = fileArray.slice(0, remaining); + const newItems: MediaItem[] = toAdd.map((file) => ({ + localId: crypto.randomUUID(), + file, + previewUrl: URL.createObjectURL(file), + alt: "", + uploading: true, + uploadProgress: 0, + generatingAlt: false, + })); + + setMediaItems((prev) => [...prev, ...newItems]); + + for (const item of newItems) { + uploadMediumFile(item.file, (progress) => { + setMediaItems((prev) => + prev.map((m) => + m.localId === item.localId ? { ...m, uploadProgress: progress } : m + ) + ); + }).then((result) => { + setMediaItems((prev) => + prev.map((m) => + m.localId === item.localId + ? { + ...m, + uploading: false, + uploadProgress: 100, + uuid: result.uuid, + mediumRelayId: result.mediumRelayId, + } + : m + ) + ); + }).catch(() => { + setMediaItems((prev) => { + const failed = prev.find((m) => m.localId === item.localId); + if (failed) URL.revokeObjectURL(failed.previewUrl); + return prev.filter((m) => m.localId !== item.localId); + }); + showToast({ + title: t`Error`, + description: t`Failed to upload image`, + variant: "error", + }); + }); + } + }; + const handlePaste = (e: ClipboardEvent) => { + // Check for pasted images first + const files = e.clipboardData?.files; + if (files && files.length > 0) { + const imageFiles = Array.from(files).filter((f) => + SUPPORTED_IMAGE_TYPES.includes(f.type) + ); + if (imageFiles.length > 0) { + e.preventDefault(); + addFiles(imageFiles); + return; + } + } + + // Fall through to URL-paste-to-quote logic if (effectiveQuotedPostId()) return; const text = e.clipboardData?.getData("text/plain")?.trim(); if (!text || !URL.canParse(text) || !text.match(/^https?:/)) return; @@ -234,6 +361,9 @@ export function NoteComposer(props: NoteComposerProps) { }; const resetForm = () => { + for (const item of mediaItems()) { + URL.revokeObjectURL(item.previewUrl); + } setContent(""); setVisibility("PUBLIC"); setLanguage(new Intl.Locale(i18n.locale)); @@ -241,6 +371,7 @@ export function NoteComposer(props: NoteComposerProps) { setQuotedPost(null); setPastedQuoteId(null); setQuoteFetchError(false); + setMediaItems([]); }; const handleSubmit = (e: Event) => { @@ -256,6 +387,24 @@ export function NoteComposer(props: NoteComposerProps) { return; } + const items = mediaItems(); + if (items.some((m) => m.uploading)) { + showToast({ + title: t`Error`, + description: t`All images must finish uploading before posting`, + variant: "error", + }); + return; + } + if (items.some((m) => !m.alt.trim())) { + showToast({ + title: t`Error`, + description: t`All images require alt text`, + variant: "error", + }); + return; + } + createNote({ variables: { input: { @@ -264,6 +413,11 @@ export function NoteComposer(props: NoteComposerProps) { visibility: visibility(), quotedPostId: effectiveQuotedPostId() ?? null, replyTargetId: props.replyTargetId ?? null, + media: items.map((m) => ({ + mediumId: m + .uuid! as `${string}-${string}-${string}-${string}-${string}`, + alt: m.alt.trim(), + })), }, }, onCompleted(response) { @@ -301,9 +455,89 @@ export function NoteComposer(props: NoteComposerProps) { }); }; + const handleGenerateAlt = (localId: string) => { + const item = mediaItems().find((m) => m.localId === localId); + if (!item?.mediumRelayId) return; + + setMediaItems((prev) => + prev.map((m) => m.localId === localId ? { ...m, generatingAlt: true } : m) + ); + + fetchQuery( + environment(), + NoteComposerGeneratedAltTextQuery, + { + mediumId: item.mediumRelayId, + language: language()?.baseName ?? i18n.locale, + context: content().trim() || undefined, + }, + ).subscribe({ + next(data) { + const medium = data.node; + if (medium && "generatedAltText" in medium) { + setMediaItems((prev) => + prev.map((m) => + m.localId === localId + ? { + ...m, + generatingAlt: false, + alt: medium.generatedAltText ?? m.alt, + } + : m + ) + ); + } else { + setMediaItems((prev) => + prev.map((m) => + m.localId === localId ? { ...m, generatingAlt: false } : m + ) + ); + } + }, + error() { + setMediaItems((prev) => + prev.map((m) => + m.localId === localId ? { ...m, generatingAlt: false } : m + ) + ); + showToast({ + title: t`Error`, + description: t`Failed to generate alt text`, + variant: "error", + }); + }, + }); + }; + return ( -
-
+ { + if ( + e.dataTransfer?.types.includes("Files") && + mediaItems().length < MAX_MEDIA + ) { + e.preventDefault(); + setIsDraggingOver(true); + } + }} + onDragLeave={() => setIsDraggingOver(false)} + onDrop={(e) => { + setIsDraggingOver(false); + if (!e.dataTransfer?.types.includes("Files")) return; + e.preventDefault(); + const files = e.dataTransfer.files; + if (files) addFiles(files); + }} + > +
{/* Quoted post preview */}
@@ -413,25 +647,174 @@ export function NoteComposer(props: NoteComposerProps) { textareaRef} onComplete={() => { - // Update content signal after autocomplete inserts text if (textareaRef) setContent(textareaRef.value); }} /> -
- + + {/* Toolbar: language, visibility, attach button */} +
-
-
- - + +
+ + (fileInputRef = el)} + type="file" + accept={SUPPORTED_IMAGE_TYPES.join(",")} + multiple + class="hidden" + onChange={(e) => { + const files = e.currentTarget.files; + if (files) addFiles(files); + e.currentTarget.value = ""; + }} />
+ + {/* Media previews */} + 0}> +
+ + {(item) => ( +
+ {/* Thumbnail with progress overlay */} +
+ + +
+ + + {item.uploadProgress}% + +
+
+
+ + {/* Alt text input + controls */} +
+