AI Text Refinements (#329)

* Bump deps

* Add openai object generation with zod

* Streamline AI query instructions

* Use single AI request when editing uploads

* Clean up AI text from object requests

* Fix AI text formatting logic

* Reorganize upload network requests
This commit is contained in:
Sam Becker 2025-09-27 19:33:14 -05:00 committed by GitHub
parent ffe7fe7b3d
commit 9819a2b81e
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
10 changed files with 693 additions and 690 deletions

View File

@ -8,7 +8,6 @@ import {
} from '@/photo/cache';
import UploadPageClient from '@/photo/UploadPageClient';
import {
AI_TEXT_AUTO_GENERATED_FIELDS,
AI_CONTENT_GENERATION_ENABLED,
BLUR_ENABLED,
} from '@/app/config';
@ -16,6 +15,7 @@ import ErrorNote from '@/components/ErrorNote';
import { getRecipeTitleForData } from '@/photo/query';
import { getAlbumsWithMeta } from '@/album/query';
import { addAiTextToFormData } from '@/photo/ai/server';
import AppGrid from '@/components/AppGrid';
export const maxDuration = 60;
@ -28,17 +28,27 @@ export default async function UploadPage({ params, searchParams }: Params) {
const uploadPath = (await params).uploadPath;
const title = (await searchParams)[PARAM_UPLOAD_TITLE];
const {
blobId,
formDataFromExif: _formDataFromExif,
imageResizedBase64: imageThumbnailBase64,
shouldStripGpsData,
error,
} = await extractImageDataFromBlobPath(uploadPath, {
includeInitialPhotoFields: true,
generateBlurData: BLUR_ENABLED,
generateResizedImage: AI_CONTENT_GENERATION_ENABLED,
});
const [
albums,
uniqueRecipes,
uniqueFilms,
uniqueTags, {
blobId,
formDataFromExif: _formDataFromExif,
imageResizedBase64: imageThumbnailBase64,
shouldStripGpsData,
error,
}] = await Promise.all([
getAlbumsWithMeta(),
getUniqueRecipesCached(),
getUniqueFilmsCached(),
getUniqueTagsCached(),
extractImageDataFromBlobPath(uploadPath, {
includeInitialPhotoFields: true,
generateBlurData: BLUR_ENABLED,
generateResizedImage: AI_CONTENT_GENERATION_ENABLED,
}),
]);
const isDataMissing =
!_formDataFromExif ||
@ -50,31 +60,23 @@ export default async function UploadPage({ params, searchParams }: Params) {
}
const [
albums,
uniqueTags,
uniqueRecipes,
uniqueFilms,
recipeTitle,
formDataFromExif,
] = await Promise.all([
getAlbumsWithMeta(),
getUniqueTagsCached(),
getUniqueRecipesCached(),
getUniqueFilmsCached(),
_formDataFromExif?.recipeData && _formDataFromExif.film
? getRecipeTitleForData(
_formDataFromExif.recipeData,
_formDataFromExif.recipeData,
_formDataFromExif.film,
)
: undefined,
addAiTextToFormData(
_formDataFromExif,
imageThumbnailBase64,
),
addAiTextToFormData({
formData: _formDataFromExif,
imageBase64: imageThumbnailBase64,
uniqueTags,
}),
]);
const hasAiTextGeneration = AI_CONTENT_GENERATION_ENABLED;
let textFieldsToAutoGenerate = AI_TEXT_AUTO_GENERATED_FIELDS;
if (formDataFromExif) {
if (recipeTitle) {
@ -82,8 +84,6 @@ export default async function UploadPage({ params, searchParams }: Params) {
}
if (typeof title === 'string') {
formDataFromExif.title = title;
textFieldsToAutoGenerate = textFieldsToAutoGenerate
.filter(field => field !== 'title');
}
}
@ -97,12 +97,13 @@ export default async function UploadPage({ params, searchParams }: Params) {
uniqueRecipes,
uniqueFilms,
hasAiTextGeneration,
textFieldsToAutoGenerate,
imageThumbnailBase64,
shouldStripGpsData,
}} />
: <ErrorNote>
{error ?? 'Unknown error'}
</ErrorNote>
: <AppGrid contentMain={
<ErrorNote>
{error ?? 'Unknown error'}
</ErrorNote>
}/>
);
};

View File

@ -8,12 +8,12 @@
"test": "jest --watch --transformIgnorePatterns 'node_modules/(?!my-library-dir)/'",
"analyze": "ANALYZE=true next build"
},
"packageManager": "pnpm@10.17.0",
"packageManager": "pnpm@10.17.1",
"dependencies": {
"@ai-sdk/openai": "^2.0.32",
"@ai-sdk/rsc": "^1.0.48",
"@aws-sdk/client-s3": "3.893.0",
"@aws-sdk/s3-request-presigner": "3.893.0",
"@ai-sdk/openai": "^2.0.38",
"@ai-sdk/rsc": "^1.0.56",
"@aws-sdk/client-s3": "3.896.0",
"@aws-sdk/s3-request-presigner": "3.896.0",
"@radix-ui/react-dialog": "^1.1.15",
"@radix-ui/react-dropdown-menu": "^2.1.16",
"@radix-ui/react-tooltip": "^1.2.8",
@ -24,7 +24,7 @@
"@vercel/analytics": "^1.5.0",
"@vercel/blob": "^2.0.0",
"@vercel/speed-insights": "^1.2.0",
"ai": "^5.0.48",
"ai": "^5.0.56",
"camelcase-keys": "^10.0.0",
"clsx": "^2.1.1",
"cmdk": "^1.1.1",
@ -35,9 +35,9 @@
"extract-colors": "^4.2.1",
"fast-average-color": "^9.5.0",
"fast-deep-equal": "^3.1.3",
"framer-motion": "^12.23.16",
"nanoid": "^5.1.5",
"next": "15.5.3",
"framer-motion": "^12.23.22",
"nanoid": "^5.1.6",
"next": "15.5.4",
"next-auth": "5.0.0-beta.29",
"next-themes": "^0.4.6",
"pg": "^8.16.3",
@ -51,12 +51,13 @@
"swr": "^2.3.6",
"ts-exif-parser": "^0.2.2",
"use-debounce": "^10.0.6",
"viewerjs": "^1.11.7"
"viewerjs": "^1.11.7",
"zod": "^4.1.11"
},
"devDependencies": {
"@eslint/eslintrc": "^3.3.1",
"@next/bundle-analyzer": "15.5.3",
"@next/eslint-plugin-next": "^15.5.3",
"@next/bundle-analyzer": "15.5.4",
"@next/eslint-plugin-next": "^15.5.4",
"@stylistic/eslint-plugin": "^5.4.0",
"@tailwindcss/postcss": "^4.1.13",
"@testing-library/dom": "^10.4.1",
@ -66,12 +67,12 @@
"@types/jest": "^30.0.0",
"@types/node": "^24.5.2",
"@types/pg": "^8.15.5",
"@types/react": "19.1.13",
"@types/react": "19.1.14",
"@types/react-dom": "19.1.9",
"@types/sanitize-html": "^2.16.0",
"cross-fetch": "^4.1.0",
"eslint": "9.36.0",
"eslint-config-next": "15.5.3",
"eslint-config-next": "15.5.4",
"eslint-plugin-react-hooks": "^5.2.0",
"jest": "^30.1.3",
"jest-environment-jsdom": "^30.1.2",

982
pnpm-lock.yaml generated

File diff suppressed because it is too large Load Diff

View File

@ -10,7 +10,6 @@ import PhotoForm from './form/PhotoForm';
import { Tags } from '@/tag';
import usePhotoFormParent from './form/usePhotoFormParent';
import AiButton from './ai/AiButton';
import { AiAutoGeneratedField } from './ai';
import { useMemo } from 'react';
import { Recipes } from '@/recipe';
import { Films } from '@/film';
@ -24,7 +23,6 @@ export default function UploadPageClient({
uniqueRecipes,
uniqueFilms,
hasAiTextGeneration,
textFieldsToAutoGenerate,
imageThumbnailBase64,
shouldStripGpsData,
}: {
@ -35,7 +33,6 @@ export default function UploadPageClient({
uniqueRecipes: Recipes
uniqueFilms: Films
hasAiTextGeneration?: boolean
textFieldsToAutoGenerate?: AiAutoGeneratedField[],
imageThumbnailBase64?: string
shouldStripGpsData?: boolean
}) {
@ -49,7 +46,6 @@ export default function UploadPageClient({
aiContent,
} = usePhotoFormParent({
photoForm: formDataFromExif,
textFieldsToAutoGenerate,
imageThumbnailBase64,
});

View File

@ -47,7 +47,7 @@ import {
extractImageDataFromBlobPath,
propagateRecipeTitleIfNecessary,
} from './server';
import { TAG_FAVS, isPhotoFav, isTagFavs } from '@/tag';
import { TAG_FAVS, Tags, isPhotoFav, isTagFavs } from '@/tag';
import { convertPhotoToPhotoDbInsert, Photo } from '.';
import { runAuthenticatedAdminServerAction } from '@/auth/server';
import { AiImageQuery, getAiImageQuery, getAiTextFieldsToGenerate } from './ai';
@ -115,6 +115,7 @@ const addUpload = async ({
excludeFromFeeds,
takenAtLocal,
takenAtNaiveLocal,
uniqueTags: _uniqueTags,
onStreamUpdate,
onFinish,
shouldRevalidateAllKeysAndPaths,
@ -128,6 +129,7 @@ const addUpload = async ({
excludeFromFeeds?: string
takenAtLocal: string
takenAtNaiveLocal: string
uniqueTags?: Tags
onStreamUpdate?: (
statusMessage: string,
status?: UrlAddStatus['status'],
@ -155,21 +157,24 @@ const addUpload = async ({
const caption = formDataFromExif.caption;
const tags = _tags || formDataFromExif.tags;
const uniqueTags = _uniqueTags || await getUniqueTags();
const {
title: aiTitle,
caption: aiCaption,
tags: aiTags,
semanticDescription,
} = await generateAiImageQueries(
imageResizedBase64,
getAiTextFieldsToGenerate(
semantic,
} = await generateAiImageQueries({
imageBase64: imageResizedBase64,
textFieldsToGenerate: getAiTextFieldsToGenerate(
AI_TEXT_AUTO_GENERATED_FIELDS,
Boolean(title),
Boolean(caption),
Boolean(tags),
),
title,
);
existingTitle: title,
uniqueTags,
});
const form: Partial<PhotoFormData> = {
...formDataFromExif,
@ -179,7 +184,7 @@ const addUpload = async ({
excludeFromFeeds,
hidden,
favorite,
semanticDescription,
semanticDescription: semantic,
takenAt: formDataFromExif.takenAt || takenAtLocal,
takenAtNaive: formDataFromExif.takenAtNaive || takenAtNaiveLocal,
};
@ -254,6 +259,8 @@ export const addUploadsAction = async ({
progress: ++progress / PROGRESS_TASK_COUNT,
});
const uniqueTags = await getUniqueTags();
const albumIds = albumTitles
? await createAlbumsAndGetIds(albumTitles)
: [];
@ -276,6 +283,7 @@ export const addUploadsAction = async ({
excludeFromFeeds,
takenAtLocal,
takenAtNaiveLocal,
uniqueTags,
onStreamUpdate: streamUpdate,
onFinish: () => {
addedUploadUrls.push(url);
@ -561,6 +569,8 @@ export const syncPhotoAction = async (
),
});
const uniqueTags = await getUniqueTags();
let urlToDelete: string | undefined;
if (formDataFromExif) {
if (await shouldBackfillPhotoStorage(photo) || shouldStripGpsData) {
@ -582,13 +592,13 @@ export const syncPhotoAction = async (
title: atTitle,
caption: aiCaption,
tags: aiTags,
semanticDescription: aiSemanticDescription,
} = await generateAiImageQueries(
imageResizedBase64,
photo.updateStatus?.isMissingAiTextFields,
undefined,
semantic: aiSemanticDescription,
} = await generateAiImageQueries({
imageBase64: imageResizedBase64,
textFieldsToGenerate: photo.updateStatus?.isMissingAiTextFields ?? [],
isBatch,
);
uniqueTags,
});
const formDataFromPhoto = convertPhotoToFormData(photo);
@ -646,7 +656,7 @@ export const streamAiImageQueryAction = async (
const existingTags = await getUniqueTags();
return streamOpenAiImageQuery(
imageBase64,
getAiImageQuery(query, existingTags, existingTitle),
getAiImageQuery(query, existingTitle, existingTags),
);
});

View File

@ -2,6 +2,7 @@
import { Tags } from '@/tag';
import { parseCommaSeparatedKeyString } from '@/utility/key';
import { z } from 'zod';
export type AiAutoGeneratedField =
'title' |
@ -51,64 +52,72 @@ export type AiImageQuery =
'caption' |
'title-and-caption' |
'tags' |
'description-small' |
'description' |
'description-large' |
'description-semantic';
'semantic';
export const getAiImageQuery = (
query: AiImageQuery,
existingTags: Tags = [],
existingTitle?: string,
existingTags: Tags = [],
): string => {
switch (query) {
case 'title': return 'Write a compelling title for this image in 3 words or less';
case 'title': return 'Write a compelling title for this image in 3 words or less.';
case 'caption': return existingTitle
? `Write a pithy caption for this image in 6 words or less and no punctuation that complements the existing title: "${existingTitle}"`
: 'Write a pithy caption for this image in 6 words or less and no punctuation';
case 'title-and-caption': return 'Write a compelling title and pithy caption of 8 words or less for this image, using the format Title: "title" Caption: "caption"';
? `Write a pithy caption for this image in 6 words or less and no punctuation that complements the existing title: "${existingTitle}."`
: 'Write a pithy caption for this image in 6 words or less and no punctuation.';
case 'title-and-caption': return 'Write a compelling title and pithy caption of 8 words or less for this image, using the format Title: "title" Caption: "caption."';
case 'tags':
const tagQuery = 'Describe this image in 1-2 comma-separated unique keywords, with no adjective or adverbs. Avoid using general terms like "nature," "travel," "architecture," or "sky." Use terms that are highly specific to the image and not redundant.';
const tags = existingTags.map(({ tag }) => tag).join(', ');
return tags
? `${tagQuery}. Consider using some of these existing tags, but only if they are relevant: ${tags}.`
: tagQuery;
case 'description-small': return 'Describe this image succinctly without the initial text "This image shows" or "This is a picture of"';
case 'description': return 'Describe this image';
case 'description-large': return 'Describe this image in detail';
case 'description-semantic': return 'List up to 5 things in this image without description as a comma-separated list';
case 'semantic': return 'Describe this image succinctly without initial text like "This image shows" or "This is a picture of."';
}
};
const getAiImageQueryForField = (
field: AiAutoGeneratedField,
existingTags: Tags = [],
existingTitle?: string,
existingTags?: Tags,
) => {
switch(field) {
case 'title': return `TITLE: ${getAiImageQuery('title', existingTags, existingTitle)}`;
case 'caption': return `CAPTION: ${getAiImageQuery('caption', existingTags, existingTitle)}`;
case 'tags': return `TAGS: ${getAiImageQuery('tags', existingTags, existingTitle)}`;
case 'semantic': return `SEMANTIC: ${getAiImageQuery('description-small', existingTags, existingTitle)}`;
case 'title': return `TITLE: ${getAiImageQuery('title', existingTitle, existingTags)}`;
case 'caption': return `CAPTION: ${getAiImageQuery('caption', existingTitle, existingTags)}`;
case 'tags': return `TAGS: ${getAiImageQuery('tags', existingTitle, existingTags)}`;
case 'semantic': return `SEMANTIC: ${getAiImageQuery('semantic', existingTitle, existingTags)}`;
}
};
export const getAiImageryQuerySet = (
export const getAiImageQuerySchema = (
fields: AiAutoGeneratedField[],
existingTags: Tags = [],
existingTitle?: string,
existingTags?: Tags,
) => {
const query = ['Generate a suite of structured meta content for the attached image:' + '\n'];
const queryLines = [
'Generate a set of meta content for the attached image:\n',
];
fields.forEach(field => {
query.push(getAiImageQueryForField(field, existingTags, existingTitle));
queryLines.push(getAiImageQueryForField(field, existingTitle, existingTags));
});
const query = queryLines.join('\n');
let schema = z.object();
query.push(`\nRespond with a valid JSON object with the following format: { ${fields.map(field => `${field}: 'string'`).join(', ')} }`);
if (fields.includes('title')) {
schema = schema.extend({ title: z.string() }); }
if (fields.includes('caption')) {
schema = schema.extend({ caption: z.string() }); }
if (fields.includes('tags')) {
schema = schema.extend({ tags: z.string() }); }
if (fields.includes('semantic')) {
schema = schema.extend({ semantic: z.string() }); }
console.log(query.join('\n'));
return query.join('\n');
return {
query,
schema,
};
};
export const parseTitleAndCaption = (text: string) => {

View File

@ -1,133 +1,92 @@
import { generateOpenAiImageQuery } from '@/platforms/openai';
import { generateOpenAiImageObjectQuery } from '@/platforms/openai';
import {
AiAutoGeneratedField,
getAiImageQuery,
getAiImageQuerySchema,
getAiTextFieldsToGenerate,
parseTitleAndCaption,
} from '.';
import { getUniqueTags } from '@/photo/query';
import { AI_TEXT_AUTO_GENERATED_FIELDS } from '@/app/config';
import { PhotoFormData } from '../form';
import { Tags } from '@/tag';
export const generateAiImageQueries = async (
imageBase64?: string,
textFieldsToGenerate: AiAutoGeneratedField[] = [],
existingTitle?: string,
isBatch?: boolean,
): Promise<{
export const generateAiImageQueries = async ({
imageBase64,
textFieldsToGenerate = [],
existingTitle,
uniqueTags,
isBatch,
}: {
imageBase64?: string
textFieldsToGenerate: AiAutoGeneratedField[]
existingTitle?: string
uniqueTags: Tags
isBatch?: boolean
}): Promise<{
title?: string
caption?: string
tags?: string
semanticDescription?: string
semantic?: string
error?: string
}> => {
let title: string | undefined;
let caption: string | undefined;
let tags: string | undefined;
let semanticDescription: string | undefined;
let error: string | undefined;
try {
if (imageBase64) {
const shouldGenerateTitleAndCaption =
textFieldsToGenerate.includes('title') &&
textFieldsToGenerate.includes('caption');
const shouldGenerateTitle =
!shouldGenerateTitleAndCaption &&
textFieldsToGenerate.includes('title');
const shouldGenerateCaption =
!shouldGenerateTitleAndCaption &&
textFieldsToGenerate.includes('caption');
const shouldGenerateTags = textFieldsToGenerate.includes('tags');
const shouldGenerateSemantic = textFieldsToGenerate.includes('semantic');
const [
titleAndCaption,
_title,
_caption,
_tags,
_semanticDescription,
] = await Promise.all([
shouldGenerateTitleAndCaption ? generateOpenAiImageQuery(
imageBase64,
getAiImageQuery('title-and-caption'),
isBatch,
): undefined,
shouldGenerateTitle ? generateOpenAiImageQuery(
imageBase64,
getAiImageQuery('title', undefined, existingTitle),
isBatch,
): undefined,
shouldGenerateCaption ? generateOpenAiImageQuery(
imageBase64,
getAiImageQuery('caption'),
isBatch,
): undefined,
shouldGenerateTags ? getUniqueTags()
.then(existingTags => generateOpenAiImageQuery(
imageBase64,
getAiImageQuery('tags', existingTags),
isBatch,
)): undefined,
shouldGenerateSemantic ? generateOpenAiImageQuery(
imageBase64,
getAiImageQuery('description-small'),
isBatch,
): undefined,
]);
if (titleAndCaption) {
const titleAndCaptionParsed = parseTitleAndCaption(titleAndCaption);
title = titleAndCaptionParsed.title;
caption = titleAndCaptionParsed.caption;
} else {
title = _title;
caption = _caption;
}
tags = _tags;
semanticDescription = _semanticDescription;
if (imageBase64) {
try {
const { query, schema } = getAiImageQuerySchema(
textFieldsToGenerate,
existingTitle,
uniqueTags,
);
return generateOpenAiImageObjectQuery(
imageBase64,
query,
schema,
isBatch,
);
} catch (e: any) {
return {
error: e.message,
};
}
} catch (e: any) {
error = e.message;
console.log('Error generating AI image text', e.message);
} else {
return {
error: 'No image provided',
};
}
return {
title,
caption,
tags,
semanticDescription,
error,
};
};
export const addAiTextToFormData = async (
formData: Partial<PhotoFormData> = {},
imageBase64?: string,
title?: string,
tags?: string,
): Promise<Partial<PhotoFormData>> => {
export const addAiTextToFormData = async ({
formData = {},
imageBase64,
existingTitle,
existingTags,
uniqueTags,
}: {
formData?: Partial<PhotoFormData>
imageBase64?: string
existingTitle?: string
existingTags?: string
uniqueTags: Tags
}): Promise<Partial<PhotoFormData>> => {
const {
title: aiTitle,
caption: aiCaption,
tags: aiTags,
semanticDescription,
} = await generateAiImageQueries(
semantic,
} = await generateAiImageQueries({
imageBase64,
getAiTextFieldsToGenerate(
textFieldsToGenerate: getAiTextFieldsToGenerate(
AI_TEXT_AUTO_GENERATED_FIELDS,
Boolean(title || formData?.title),
Boolean(existingTitle || formData?.title),
Boolean(formData?.caption),
Boolean(tags || formData?.tags),
Boolean(existingTags || formData?.tags),
),
title || formData?.title,
);
existingTitle: existingTitle || formData?.title,
uniqueTags,
});
return {
...formData,
title: formData?.title || aiTitle,
caption: formData?.caption || aiCaption,
tags: formData?.tags || aiTags,
semanticDescription,
semanticDescription: semantic,
};
};

View File

@ -43,7 +43,7 @@ export default function useAiImageQueries(
requestSemantic,
semanticDescription,
isLoadingSemantic,
] = useAiImageQuery(imageBase64, 'description-small');
] = useAiImageQuery(imageBase64, 'semantic');
const title = _title || titleSolo;
const caption = _caption || captionSolo;

View File

@ -1,15 +1,12 @@
import { useCallback, useState } from 'react';
import { PhotoFormData, formHasExistingAiTextContent } from '.';
import useAiImageQueries from '../ai/useAiImageQueries';
import { AiAutoGeneratedField } from '../ai';
export default function usePhotoFormParent({
photoForm,
textFieldsToAutoGenerate: _textFieldsToAutoGenerate = [],
imageThumbnailBase64,
}: {
photoForm?: Partial<PhotoFormData>
textFieldsToAutoGenerate?: AiAutoGeneratedField[]
imageThumbnailBase64?: string,
}) {
const [pending, setIsPending] = useState(false);

View File

@ -1,4 +1,4 @@
import { generateText, streamText } from 'ai';
import { generateText, streamText, generateObject } from 'ai';
import { createStreamableValue } from '@ai-sdk/rsc';
import { createOpenAI } from '@ai-sdk/openai';
import { Ratelimit } from '@upstash/ratelimit';
@ -6,6 +6,7 @@ import { AI_CONTENT_GENERATION_ENABLED, OPENAI_BASE_URL } from '@/app/config';
import { removeBase64Prefix } from '@/utility/image';
import { cleanUpAiTextResponse } from '@/photo/ai';
import { redis } from '@/platforms/redis';
import { z } from 'zod';
const RATE_LIMIT_IDENTIFIER = 'openai-image-query';
const MODEL = 'gpt-4o';
@ -107,6 +108,39 @@ export const generateOpenAiImageQuery = async (
}
};
export const generateOpenAiImageObjectQuery = async <T extends z.ZodSchema>(
imageBase64: string,
query: string,
schema: T,
isBatch?: boolean,
): Promise<z.infer<T>> => {
await checkRateLimitAndThrow(isBatch);
if (openai) {
return generateObject({
model: openai(MODEL),
messages: [{
'role': 'user',
'content': [
{
'type': 'text',
'text': query,
}, {
'type': 'image',
'image': removeBase64Prefix(imageBase64),
},
],
}],
schema,
}).then(result => Object.fromEntries(Object
.entries(result.object || {})
.map(([k, v]) => [k, cleanUpAiTextResponse(v as string)]),
) as z.infer<T>);
} else {
throw new Error('No OpenAI client');
}
};
export const testOpenAiConnection = async () => {
await checkRateLimitAndThrow();