Create proof-of-concept AI-driven image description

This commit is contained in:
Sam Becker 2024-03-19 19:06:31 -05:00
parent 6abb2e611d
commit 137b718fb7
3 changed files with 53 additions and 16 deletions

View File

@ -10,6 +10,7 @@
"cloudflarestorage",
"cmdk",
"CredentialsSignin",
"datetime",
"Eterna",
"exif",
"exifr",
@ -33,6 +34,7 @@
"Reala",
"skippable",
"sonner",
"Streamable",
"thephotoblog",
"trpc",
"unnest",
@ -41,8 +43,7 @@
"WRHGZC",
"wxyz",
"zadd",
"zrange",
"datetime"
"zrange"
],
"files.associations": {
"*.css": "tailwindcss"

View File

@ -25,6 +25,9 @@ import ImageBlurFallback from '@/components/ImageBlurFallback';
import { BLUR_ENABLED } from '@/site/config';
import { Tags, sortTagsObjectWithoutFavs } from '@/tag';
import { formatCount, formatCountDescriptive } from '@/utility/string';
import { streamImageQuery } from '@/services/openai';
import { readStreamableValue } from 'ai/rsc';
import Spinner from '@/components/Spinner';
const THUMBNAIL_SIZE = 300;
@ -116,9 +119,22 @@ export default function PhotoForm({
}
}, []);
const [aiTags, setAiTags] = useState('');
const [isLoadingAi, setIsLoadingAi] = useState(false);
return (
<div className="space-y-8 max-w-[38rem]">
<button onClick={() => console.log(imageData)}>
<button onClick={async () => {
setIsLoadingAi(true);
const textStream = await streamImageQuery(
imageData ?? '',
'description',
);
for await (const text of readStreamableValue(textStream)) {
setAiTags(text ?? '');
}
setIsLoadingAi(false);
}}>
Generate Text
</button>
<div className="flex gap-2">
@ -152,6 +168,13 @@ export default function PhotoForm({
height={height}
/>}
</div>
<p>
AI RESPONSE: {aiTags} {isLoadingAi && <>
<span className="inline-flex translate-y-[1.5px]">
<Spinner />
</span>
</>}
</p>
<form
action={type === 'create' ? createPhotoAction : updatePhotoAction}
onSubmit={() => blur()}

View File

@ -1,14 +1,16 @@
'use server';
import OpenAI from 'openai';
import { OpenAIStream, StreamingTextResponse } from 'ai';
import { createStreamableValue, render } from 'ai/rsc';
const openai = new OpenAI({ apiKey: process.env.OPENAI_SECRET_KEY });
const provider = new OpenAI({ apiKey: process.env.OPENAI_SECRET_KEY });
const queryImage = async (imageBase64: string, query: string) => {
const response = await openai.chat.completions.create({
const streamImageQueryRaw = async (imageBase64: string, query: string) => {
const stream = createStreamableValue('');
render({
provider,
model: 'gpt-4-vision-preview',
stream: true,
messages: [{
'role': 'user',
'content': [
@ -23,15 +25,26 @@ const queryImage = async (imageBase64: string, query: string) => {
},
],
}],
text: ({ content, done }): any => {
if (done) {
stream.done(content);
} else {
stream.update(content);
}
},
});
const stream = OpenAIStream(response);
return new StreamingTextResponse(stream);
return stream.value;
};
export const tagImage = async (imageBase64: string) =>
queryImage(
imageBase64,
'Describe this image three or less comma-separated keywords',
);
export type ImageQuery = 'title' | 'caption' | 'tags' | 'description';
export const IMAGE_QUERIES: Record<ImageQuery, string> = {
title: 'What is the title of this image?',
caption: 'What is the caption of this image?',
tags: 'Describe this image three or less comma-separated keywords',
description: 'Describe this image in detail',
};
export const streamImageQuery = (imageBase64: string, query: ImageQuery) =>
streamImageQueryRaw(imageBase64, IMAGE_QUERIES[query]);