Allow downloading and re-announcing messages of infinite length (#602)

* Break big messages into smaller chunks for Speak Option to avoid surpassing max TTS limit * Allow downloading large messages by combining smaller generated blobs * Optimize audio download speed using concurrent promises * Guarantee that all broken chunks are under max character length for all cases * Use dynamic character length when force breaking chunks
tarasglek · Apr 19, 2024 · 7d152f8 · 7d152f8
1 parent e264c8e
commit 7d152f8
Show file tree

Hide file tree

Showing 5 changed files with 153 additions and 34 deletions.
diff --git a/package.json b/package.json
@@ -43,6 +43,7 @@
     "nanoid": "^5.0.5",
     "nomnoml": "^1.6.2",
     "openai": "^4.26.1",
+    "p-limit": "^5.0.0",
     "react": "^18.2.0",
     "react-dom": "^18.2.0",
     "react-icons": "^5.0.1",

diff --git a/pnpm-lock.yaml b/pnpm-lock.yaml
diff --git a/src/components/Message/MessageBase.tsx b/src/components/Message/MessageBase.tsx
@@ -33,12 +33,12 @@ import {
   type ReactNode,
 } from "react";
 
-import { Menu, MenuItem, SubMenu, MenuDivider } from "../Menu";
-import ResizeTextarea from "react-textarea-autosize";
-import { TbTrash, TbShare2 } from "react-icons/tb";
 import { AiOutlineEdit } from "react-icons/ai";
 import { MdContentCopy } from "react-icons/md";
+import { TbShare2, TbTrash } from "react-icons/tb";
 import { Link as ReactRouterLink } from "react-router-dom";
+import ResizeTextarea from "react-textarea-autosize";
+import { Menu, MenuDivider, MenuItem, SubMenu } from "../Menu";
 
 import { useCopyToClipboard } from "react-use";
 import { useAlert } from "../../hooks/use-alert";
@@ -53,7 +53,14 @@ import {
   ChatCraftSystemMessage,
 } from "../../lib/ChatCraftMessage";
 import { ChatCraftModel } from "../../lib/ChatCraftModel";
-import { download, formatDate, formatNumber, getMetaKey, screenshotElement } from "../../lib/utils";
+import {
+  download,
+  formatDate,
+  formatNumber,
+  getMetaKey,
+  screenshotElement,
+  utilizeAlert,
+} from "../../lib/utils";
 import ImageModal from "../ImageModal";
 import Markdown from "../Markdown";
 
@@ -65,6 +72,7 @@ import { useUser } from "../../hooks/use-user";
 import { ChatCraftChat } from "../../lib/ChatCraftChat";
 import { textToSpeech } from "../../lib/ai";
 import { usingOfficialOpenAI } from "../../lib/providers";
+import { getSentenceChunksFrom } from "../../lib/summarize";
 import "./Message.css";
 
 export interface MessageBaseProps {
@@ -246,27 +254,55 @@ function MessageBase({
     if (messageContent.current) {
       const text = messageContent.current.textContent;
       if (text) {
+        const { loading, closeLoading } = await utilizeAlert();
+
+        const alertId = loading({
+          title: "Downloading...",
+          message: "Please wait while we prepare your audio download.",
+        });
+
         try {
-          info({
-            title: "Downloading...",
-            message: "Please wait while we prepare your audio download.",
+          const textChunks = getSentenceChunksFrom(text, 500);
+          const audioClips: Blob[] = new Array<Blob>(textChunks.length);
+
+          // Limit the number of concurrent tasks
+          const pLimit = (await import("p-limit")).default;
+
+          const limit = pLimit(8); // Adjust the concurrency limit as needed
+
+          const tasks = textChunks.map((textChunk, index) => {
+            return limit(async () => {
+              const audioClipUrl = await textToSpeech(
+                textChunk,
+                settings.textToSpeech.voice,
+                "tts-1-hd"
+              );
+
+              const audioClip = await fetch(audioClipUrl).then((r) => r.blob());
+              audioClips[index] = audioClip;
+            });
           });
 
-          const audioClipUrl = await textToSpeech(text, settings.textToSpeech.voice, "tts-1-hd");
-          const audioClip = await fetch(audioClipUrl).then((r) => r.blob());
+          // Wait for all the tasks to complete
+          await Promise.all(tasks);
+
+          const audioClip = new Blob(audioClips, { type: audioClips[0].type });
 
           download(
             audioClip,
             `${settings.currentProvider.name}_message.${audioClip.type.split("/")[1]}`,
             audioClip.type
           );
 
+          closeLoading(alertId);
           info({
             title: "Downloaded",
             message: "Message was downloaded as Audio",
           });
         } catch (err: any) {
           console.error(err);
+
+          closeLoading(alertId);
           error({ title: "Error while downloading audio", message: err.message });
         }
       }
@@ -363,8 +399,12 @@ function MessageBase({
 
         const { voice } = settings.textToSpeech;
 
-        // Use lighter tts-1 model to minimize latency
-        addToAudioQueue(textToSpeech(messageContent, voice, "tts-1"));
+        const messageChunks = getSentenceChunksFrom(messageContent, 500);
+
+        messageChunks.forEach((messageChunk) => {
+          // Use lighter tts-1 model to minimize latency
+          addToAudioQueue(textToSpeech(messageChunk, voice, "tts-1"));
+        });
       } catch (err: any) {
         console.error(err);
         error({ title: "Error while generating Audio", message: err.message });

diff --git a/src/hooks/use-audio-player.tsx b/src/hooks/use-audio-player.tsx
@@ -1,4 +1,5 @@
-import { useState, useEffect, createContext, useContext, ReactNode, FC } from "react";
+import { useState, useEffect, createContext, useContext, ReactNode, FC, useCallback } from "react";
+import { useAlert } from "./use-alert";
 
 type AudioPlayerContextType = {
   addToAudioQueue: (audioClipUri: Promise<string>) => void;
@@ -23,31 +24,44 @@ export const AudioPlayerProvider: FC<{ children: ReactNode }> = ({ children }) =
   const [queue, setQueue] = useState<Promise<string>[]>([]);
   const [isPlaying, setIsPlaying] = useState<boolean>(false);
   const [currentAudioClip, setCurrentAudioClip] = useState<AudioClip | null>();
+  const { error } = useAlert();
+
+  const playAudio = useCallback(
+    async (audioClipUri: Promise<string>) => {
+      try {
+        setIsPlaying(true);
+        const audioUrl: string = await audioClipUri;
+        const audio = new Audio(audioUrl);
+        audio.preload = "auto";
+        audio.onended = () => {
+          URL.revokeObjectURL(audioUrl);
+          setQueue((oldQueue) => oldQueue.slice(1));
+          setIsPlaying(false);
+
+          setCurrentAudioClip(null);
+        };
+        audio.play();
+        setCurrentAudioClip({
+          audioElement: audio,
+          audioUrl: audioUrl,
+        });
+      } catch (err: any) {
+        console.error(err);
+
+        error({
+          title: "Error playing audio",
+          message: err.message,
+        });
+      }
+    },
+    [error]
+  );
 
   useEffect(() => {
     if (!isPlaying && queue.length > 0) {
       playAudio(queue[0]);
     }
-  }, [queue, isPlaying]);
-
-  const playAudio = async (audioClipUri: Promise<string>) => {
-    setIsPlaying(true);
-    const audioUrl: string = await audioClipUri;
-    const audio = new Audio(audioUrl);
-    audio.preload = "auto";
-    audio.onended = () => {
-      URL.revokeObjectURL(audioUrl);
-      setQueue((oldQueue) => oldQueue.slice(1));
-      setIsPlaying(false);
-
-      setCurrentAudioClip(null);
-    };
-    audio.play();
-    setCurrentAudioClip({
-      audioElement: audio,
-      audioUrl: audioUrl,
-    });
-  };
+  }, [queue, isPlaying, playAudio]);
 
   const addToAudioQueue = (audioClipUri: Promise<string>) => {
     setQueue((oldQueue) => [...oldQueue, audioClipUri]);

diff --git a/src/lib/summarize.ts b/src/lib/summarize.ts
@@ -56,6 +56,69 @@ export function tokenize(text: string) {
 
   return { sentences, terms };
 }
+/**
+ *
+ * Tries to split the provided text into
+ * an array of text chunks where
+ * each chunk is composed of one or more sentences.
+ *
+ * The function attempts to limit each chunk to maximum
+ * preferred characters.
+ * If a single sentence exceeds preferred character length,
+ * that sentence will be force broken into chunks of preferred length
+ * with no guarantee that individual chunks make sense.
+ *
+ * @param text The text content that needs to be split into Chunks
+ * @param maxCharsPerSentence Maximum number of characters preferred per chunk
+ * @returns Array of text chunks
+ */
+export function getSentenceChunksFrom(text: string, maxCharsPerSentence: number = 4096): string[] {
+  const { sentences } = tokenize(text);
+  const chunks: string[] = [];
+
+  let currentText = "";
+
+  for (const sentence of sentences) {
+    if (sentence.length >= maxCharsPerSentence) {
+      // If the sentence itself is greater than maxCharsPerSentence
+
+      // Flush existing text buffer as a chunk
+      if (currentText.length) {
+        chunks.push(currentText);
+        currentText = "";
+      }
+
+      // Force break the long sentence without caring
+      // about natural language
+      const sentencePieces =
+        sentence.match(new RegExp(`.{1,${maxCharsPerSentence}}\\b`, "g")) || [];
+
+      chunks.push(...sentencePieces);
+    } else {
+      // Check if adding the new sentence to the buffer
+      // exceeds the allowed limit.
+
+      // If not, add another sentence to the buffer
+      if (currentText.length + sentence.length < maxCharsPerSentence) {
+        currentText += ` ${sentence.trim()}`;
+      } else {
+        // Flush the buffer as a chunk
+        if (currentText.length) {
+          chunks.push(currentText);
+        }
+
+        currentText = sentence;
+      }
+    }
+  }
+
+  if (currentText.length) {
+    chunks.push(currentText);
+    currentText = "";
+  }
+
+  return chunks;
+}
 
 function calculateTermFrequencies(sentences: string[], terms: string[]): Record<string, number> {
   const termFrequencies: Record<string, number> = {};