Create proof-of-concept AI-driven image description

2024-03-19 19:06:31 -05:00 · 2024-03-19 19:06:31 -05:00 · 137b718fb7
commit 137b718fb7
parent 6abb2e611d
3 changed files with 53 additions and 16 deletions
--- a/.vscode/settings.json
+++ b/.vscode/settings.json
@ -10,6 +10,7 @@
    "cloudflarestorage",
    "cmdk",
    "CredentialsSignin",
+    "datetime",
    "Eterna",
    "exif",
    "exifr",
@ -33,6 +34,7 @@
    "Reala",
    "skippable",
    "sonner",
+    "Streamable",
    "thephotoblog",
    "trpc",
    "unnest",
@ -41,8 +43,7 @@
    "WRHGZC",
    "wxyz",
    "zadd",
-    "zrange",
-    "datetime"
+    "zrange"
  ],
  "files.associations": {
    "*.css": "tailwindcss"
--- a/src/photo/form/PhotoForm.tsx
+++ b/src/photo/form/PhotoForm.tsx
@ -25,6 +25,9 @@ import ImageBlurFallback from '@/components/ImageBlurFallback';
 import { BLUR_ENABLED } from '@/site/config';
 import { Tags, sortTagsObjectWithoutFavs } from '@/tag';
 import { formatCount, formatCountDescriptive } from '@/utility/string';
+import { streamImageQuery } from '@/services/openai';
+import { readStreamableValue } from 'ai/rsc';
+import Spinner from '@/components/Spinner';

 const THUMBNAIL_SIZE = 300;

@ -116,9 +119,22 @@ export default function PhotoForm({
    }
  }, []);

+  const [aiTags, setAiTags] = useState('');
+  const [isLoadingAi, setIsLoadingAi] = useState(false);
+
  return (
    <div className="space-y-8 max-w-[38rem]">
-      <button onClick={() => console.log(imageData)}>
+      <button onClick={async () => {
+        setIsLoadingAi(true);
+        const textStream = await streamImageQuery(
+          imageData ?? '',
+          'description',
+        );
+        for await (const text of readStreamableValue(textStream)) {
+          setAiTags(text ?? '');
+        }
+        setIsLoadingAi(false);
+      }}>
        Generate Text ✨
      </button>
      <div className="flex gap-2">
@ -152,6 +168,13 @@ export default function PhotoForm({
            height={height}
          />}
      </div>
+      <p>
+        AI RESPONSE: {aiTags} {isLoadingAi && <>
+          <span className="inline-flex translate-y-[1.5px]">
+            <Spinner />
+          </span>
+        </>}
+      </p>
      <form
        action={type === 'create' ? createPhotoAction : updatePhotoAction}
        onSubmit={() => blur()}
--- a/src/services/openai.ts
+++ b/src/services/openai.ts
@ -1,14 +1,16 @@
 'use server';

 import OpenAI from 'openai';
-import { OpenAIStream, StreamingTextResponse } from 'ai';
+import { createStreamableValue, render } from 'ai/rsc';

-const openai = new OpenAI({ apiKey: process.env.OPENAI_SECRET_KEY });
+const provider = new OpenAI({ apiKey: process.env.OPENAI_SECRET_KEY });

-const queryImage = async (imageBase64: string, query: string) => {
-  const response = await openai.chat.completions.create({
+const streamImageQueryRaw = async (imageBase64: string, query: string) => {
+  const stream = createStreamableValue('');
+
+  render({
+    provider,
    model: 'gpt-4-vision-preview',
-    stream: true,
    messages: [{
      'role': 'user',
      'content': [
@ -23,15 +25,26 @@ const queryImage = async (imageBase64: string, query: string) => {
        },
      ],
    }],
+    text: ({ content, done }): any => {
+      if (done) {
+        stream.done(content);
+      } else {
+        stream.update(content);
+      }
+    },
  });

-  const stream = OpenAIStream(response);
-
-  return new StreamingTextResponse(stream);
+  return stream.value;
 };

-export const tagImage = async (imageBase64: string) =>
-  queryImage(
-    imageBase64,
-    'Describe this image three or less comma-separated keywords',
-  );
+export type ImageQuery = 'title' | 'caption' | 'tags' | 'description';
+
+export const IMAGE_QUERIES: Record<ImageQuery, string> = {
+  title: 'What is the title of this image?',
+  caption: 'What is the caption of this image?',
+  tags: 'Describe this image three or less comma-separated keywords',
+  description: 'Describe this image in detail',
+};
+
+export const streamImageQuery = (imageBase64: string, query: ImageQuery) =>
+  streamImageQueryRaw(imageBase64, IMAGE_QUERIES[query]);