From 7d152f8fd69f8cdd93766a78203a42d89ee50167 Mon Sep 17 00:00:00 2001 From: Amnish Singh Arora <78865303+Amnish04@users.noreply.github.com> Date: Fri, 19 Apr 2024 16:39:39 -0400 Subject: [PATCH] Allow downloading and re-announcing messages of infinite length (#602) * Break big messages into smaller chunks for Speak Option to avoid surpassing max TTS limit * Allow downloading large messages by combining smaller generated blobs * Optimize audio download speed using concurrent promises * Guarantee that all broken chunks are under max character length for all cases * Use dynamic character length when force breaking chunks --- package.json | 1 + pnpm-lock.yaml | 5 +- src/components/Message/MessageBase.tsx | 62 ++++++++++++++++++++----- src/hooks/use-audio-player.tsx | 56 ++++++++++++++--------- src/lib/summarize.ts | 63 ++++++++++++++++++++++++++ 5 files changed, 153 insertions(+), 34 deletions(-) diff --git a/package.json b/package.json index a050aaad..65d3a0dc 100644 --- a/package.json +++ b/package.json @@ -43,6 +43,7 @@ "nanoid": "^5.0.5", "nomnoml": "^1.6.2", "openai": "^4.26.1", + "p-limit": "^5.0.0", "react": "^18.2.0", "react-dom": "^18.2.0", "react-icons": "^5.0.1", diff --git a/pnpm-lock.yaml b/pnpm-lock.yaml index 4cdc6e88..62eb17ba 100644 --- a/pnpm-lock.yaml +++ b/pnpm-lock.yaml @@ -92,6 +92,9 @@ dependencies: openai: specifier: ^4.26.1 version: 4.26.1 + p-limit: + specifier: ^5.0.0 + version: 5.0.0 react: specifier: ^18.2.0 version: 18.2.0 @@ -8746,7 +8749,6 @@ packages: engines: {node: '>=18'} dependencies: yocto-queue: 1.0.0 - dev: true /p-locate@5.0.0: resolution: {integrity: sha512-LaNjtRWUBY++zB5nE/NwcaoMylSPk+S+ZHNB1TzdbMJMny6dynpAGt7X/tl/QYq3TIeE6nxHppbo2LGymrG5Pw==} @@ -11139,7 +11141,6 @@ packages: /yocto-queue@1.0.0: resolution: {integrity: sha512-9bnSc/HEW2uRy67wc+T8UwauLuPJVn28jb+GtJY16iiKWyvmYJRXVT4UamsAEGQfPohgr2q4Tq0sQbQlxTfi1g==} engines: {node: '>=12.20'} - dev: true /youch@3.3.2: resolution: {integrity: sha512-9cwz/z7abtcHOIuH45nzmUFCZbyJA1nLqlirKvyNRx4wDMhqsBaifAJzBej7L4fsVPjFxYq3NK3GAcfvZsydFw==} diff --git a/src/components/Message/MessageBase.tsx b/src/components/Message/MessageBase.tsx index a927bae8..f7f097e9 100644 --- a/src/components/Message/MessageBase.tsx +++ b/src/components/Message/MessageBase.tsx @@ -33,12 +33,12 @@ import { type ReactNode, } from "react"; -import { Menu, MenuItem, SubMenu, MenuDivider } from "../Menu"; -import ResizeTextarea from "react-textarea-autosize"; -import { TbTrash, TbShare2 } from "react-icons/tb"; import { AiOutlineEdit } from "react-icons/ai"; import { MdContentCopy } from "react-icons/md"; +import { TbShare2, TbTrash } from "react-icons/tb"; import { Link as ReactRouterLink } from "react-router-dom"; +import ResizeTextarea from "react-textarea-autosize"; +import { Menu, MenuDivider, MenuItem, SubMenu } from "../Menu"; import { useCopyToClipboard } from "react-use"; import { useAlert } from "../../hooks/use-alert"; @@ -53,7 +53,14 @@ import { ChatCraftSystemMessage, } from "../../lib/ChatCraftMessage"; import { ChatCraftModel } from "../../lib/ChatCraftModel"; -import { download, formatDate, formatNumber, getMetaKey, screenshotElement } from "../../lib/utils"; +import { + download, + formatDate, + formatNumber, + getMetaKey, + screenshotElement, + utilizeAlert, +} from "../../lib/utils"; import ImageModal from "../ImageModal"; import Markdown from "../Markdown"; @@ -65,6 +72,7 @@ import { useUser } from "../../hooks/use-user"; import { ChatCraftChat } from "../../lib/ChatCraftChat"; import { textToSpeech } from "../../lib/ai"; import { usingOfficialOpenAI } from "../../lib/providers"; +import { getSentenceChunksFrom } from "../../lib/summarize"; import "./Message.css"; export interface MessageBaseProps { @@ -246,14 +254,39 @@ function MessageBase({ if (messageContent.current) { const text = messageContent.current.textContent; if (text) { + const { loading, closeLoading } = await utilizeAlert(); + + const alertId = loading({ + title: "Downloading...", + message: "Please wait while we prepare your audio download.", + }); + try { - info({ - title: "Downloading...", - message: "Please wait while we prepare your audio download.", + const textChunks = getSentenceChunksFrom(text, 500); + const audioClips: Blob[] = new Array(textChunks.length); + + // Limit the number of concurrent tasks + const pLimit = (await import("p-limit")).default; + + const limit = pLimit(8); // Adjust the concurrency limit as needed + + const tasks = textChunks.map((textChunk, index) => { + return limit(async () => { + const audioClipUrl = await textToSpeech( + textChunk, + settings.textToSpeech.voice, + "tts-1-hd" + ); + + const audioClip = await fetch(audioClipUrl).then((r) => r.blob()); + audioClips[index] = audioClip; + }); }); - const audioClipUrl = await textToSpeech(text, settings.textToSpeech.voice, "tts-1-hd"); - const audioClip = await fetch(audioClipUrl).then((r) => r.blob()); + // Wait for all the tasks to complete + await Promise.all(tasks); + + const audioClip = new Blob(audioClips, { type: audioClips[0].type }); download( audioClip, @@ -261,12 +294,15 @@ function MessageBase({ audioClip.type ); + closeLoading(alertId); info({ title: "Downloaded", message: "Message was downloaded as Audio", }); } catch (err: any) { console.error(err); + + closeLoading(alertId); error({ title: "Error while downloading audio", message: err.message }); } } @@ -363,8 +399,12 @@ function MessageBase({ const { voice } = settings.textToSpeech; - // Use lighter tts-1 model to minimize latency - addToAudioQueue(textToSpeech(messageContent, voice, "tts-1")); + const messageChunks = getSentenceChunksFrom(messageContent, 500); + + messageChunks.forEach((messageChunk) => { + // Use lighter tts-1 model to minimize latency + addToAudioQueue(textToSpeech(messageChunk, voice, "tts-1")); + }); } catch (err: any) { console.error(err); error({ title: "Error while generating Audio", message: err.message }); diff --git a/src/hooks/use-audio-player.tsx b/src/hooks/use-audio-player.tsx index e7033125..50c4a587 100644 --- a/src/hooks/use-audio-player.tsx +++ b/src/hooks/use-audio-player.tsx @@ -1,4 +1,5 @@ -import { useState, useEffect, createContext, useContext, ReactNode, FC } from "react"; +import { useState, useEffect, createContext, useContext, ReactNode, FC, useCallback } from "react"; +import { useAlert } from "./use-alert"; type AudioPlayerContextType = { addToAudioQueue: (audioClipUri: Promise) => void; @@ -23,31 +24,44 @@ export const AudioPlayerProvider: FC<{ children: ReactNode }> = ({ children }) = const [queue, setQueue] = useState[]>([]); const [isPlaying, setIsPlaying] = useState(false); const [currentAudioClip, setCurrentAudioClip] = useState(); + const { error } = useAlert(); + + const playAudio = useCallback( + async (audioClipUri: Promise) => { + try { + setIsPlaying(true); + const audioUrl: string = await audioClipUri; + const audio = new Audio(audioUrl); + audio.preload = "auto"; + audio.onended = () => { + URL.revokeObjectURL(audioUrl); + setQueue((oldQueue) => oldQueue.slice(1)); + setIsPlaying(false); + + setCurrentAudioClip(null); + }; + audio.play(); + setCurrentAudioClip({ + audioElement: audio, + audioUrl: audioUrl, + }); + } catch (err: any) { + console.error(err); + + error({ + title: "Error playing audio", + message: err.message, + }); + } + }, + [error] + ); useEffect(() => { if (!isPlaying && queue.length > 0) { playAudio(queue[0]); } - }, [queue, isPlaying]); - - const playAudio = async (audioClipUri: Promise) => { - setIsPlaying(true); - const audioUrl: string = await audioClipUri; - const audio = new Audio(audioUrl); - audio.preload = "auto"; - audio.onended = () => { - URL.revokeObjectURL(audioUrl); - setQueue((oldQueue) => oldQueue.slice(1)); - setIsPlaying(false); - - setCurrentAudioClip(null); - }; - audio.play(); - setCurrentAudioClip({ - audioElement: audio, - audioUrl: audioUrl, - }); - }; + }, [queue, isPlaying, playAudio]); const addToAudioQueue = (audioClipUri: Promise) => { setQueue((oldQueue) => [...oldQueue, audioClipUri]); diff --git a/src/lib/summarize.ts b/src/lib/summarize.ts index 8f6da952..a2c29dc3 100644 --- a/src/lib/summarize.ts +++ b/src/lib/summarize.ts @@ -56,6 +56,69 @@ export function tokenize(text: string) { return { sentences, terms }; } +/** + * + * Tries to split the provided text into + * an array of text chunks where + * each chunk is composed of one or more sentences. + * + * The function attempts to limit each chunk to maximum + * preferred characters. + * If a single sentence exceeds preferred character length, + * that sentence will be force broken into chunks of preferred length + * with no guarantee that individual chunks make sense. + * + * @param text The text content that needs to be split into Chunks + * @param maxCharsPerSentence Maximum number of characters preferred per chunk + * @returns Array of text chunks + */ +export function getSentenceChunksFrom(text: string, maxCharsPerSentence: number = 4096): string[] { + const { sentences } = tokenize(text); + const chunks: string[] = []; + + let currentText = ""; + + for (const sentence of sentences) { + if (sentence.length >= maxCharsPerSentence) { + // If the sentence itself is greater than maxCharsPerSentence + + // Flush existing text buffer as a chunk + if (currentText.length) { + chunks.push(currentText); + currentText = ""; + } + + // Force break the long sentence without caring + // about natural language + const sentencePieces = + sentence.match(new RegExp(`.{1,${maxCharsPerSentence}}\\b`, "g")) || []; + + chunks.push(...sentencePieces); + } else { + // Check if adding the new sentence to the buffer + // exceeds the allowed limit. + + // If not, add another sentence to the buffer + if (currentText.length + sentence.length < maxCharsPerSentence) { + currentText += ` ${sentence.trim()}`; + } else { + // Flush the buffer as a chunk + if (currentText.length) { + chunks.push(currentText); + } + + currentText = sentence; + } + } + } + + if (currentText.length) { + chunks.push(currentText); + currentText = ""; + } + + return chunks; +} function calculateTermFrequencies(sentences: string[], terms: string[]): Record { const termFrequencies: Record = {};