From 384fb0e1c8141b9743198a25bbdd5cc3252a3903 Mon Sep 17 00:00:00 2001 From: Phil Miesle Date: Wed, 11 Dec 2024 14:42:37 +0000 Subject: [PATCH] enabling pass-through of InsertOptions to Astra --- .../src/vectorstores/astradb.ts | 18 +++++++--- .../vectorstores/tests/astradb.int.test.ts | 33 +++++++++++++++++-- 2 files changed, 45 insertions(+), 6 deletions(-) diff --git a/libs/langchain-community/src/vectorstores/astradb.ts b/libs/langchain-community/src/vectorstores/astradb.ts index c3fa6141dcb5..f57d4c6bd860 100644 --- a/libs/langchain-community/src/vectorstores/astradb.ts +++ b/libs/langchain-community/src/vectorstores/astradb.ts @@ -7,6 +7,7 @@ import { CreateCollectionOptions, Db, InsertManyError, + InsertManyOptions, } from "@datastax/astra-db-ts"; import { @@ -135,19 +136,22 @@ export class AstraDBVectorStore extends VectorStore { * * @param vectors Vectors to save. * @param documents The documents associated with the vectors. + * @param options Optional configuration for saving vectors: + * - `ids`: An array of unique identifiers for the documents. If not provided, IDs will be auto-generated. + * - `insertOptions`: Additional options to customize the `insertMany` operation (e.g., `ordered`, `bypassDocumentValidation`). * @returns Promise that resolves when the vectors have been added. */ async addVectors( vectors: number[][], documents: Document[], - options?: string[] + options?: { ids?: string[]; insertOptions?: InsertManyOptions } ) { if (!this.collection) { throw new Error("Must connect to a collection before adding vectors"); } const docs = vectors.map((embedding, idx) => ({ - [this.idKey]: options?.[idx] ?? uuid.v4(), + [this.idKey]: options?.ids?.[idx] ?? uuid.v4(), [this.contentKey]: documents[idx].pageContent, $vector: embedding, ...documents[idx].metadata, @@ -161,6 +165,7 @@ export class AstraDBVectorStore extends VectorStore { try { insertResults = await this.collection.insertMany(docs, { ordered: false, + ...options?.insertOptions, }); } catch (error) { if (isInsertManyError(error)) { @@ -192,10 +197,15 @@ export class AstraDBVectorStore extends VectorStore { * Method that adds documents to AstraDB. * * @param documents Array of documents to add to AstraDB. - * @param options Optional ids for the documents. + * @param options Optional configuration for saving documents: + * - `ids`: An array of unique identifiers for the documents. If not provided, IDs will be auto-generated. + * - `insertOptions`: Additional options to customize the `insertMany` operation (e.g., `ordered`, `bypassDocumentValidation`). * @returns Promise that resolves the documents have been added. */ - async addDocuments(documents: Document[], options?: string[]) { + async addDocuments( + documents: Document[], + options?: { ids?: string[]; insertOptions?: InsertManyOptions } + ) { if (!this.collection) { throw new Error("Must connect to a collection before adding vectors"); } diff --git a/libs/langchain-community/src/vectorstores/tests/astradb.int.test.ts b/libs/langchain-community/src/vectorstores/tests/astradb.int.test.ts index 98febdfffa48..798ab875e412 100644 --- a/libs/langchain-community/src/vectorstores/tests/astradb.int.test.ts +++ b/libs/langchain-community/src/vectorstores/tests/astradb.int.test.ts @@ -158,8 +158,8 @@ describe.skip("AstraDBVectorStore", () => { fail("Should have thrown error"); // eslint-disable-next-line @typescript-eslint/no-explicit-any } catch (e: any) { - expect(e.message).toContain( - "already exists with different collection options" + expect(e.message).toMatch( + /Collection already exists.*different settings/ ); } }, 60000); @@ -213,6 +213,7 @@ describe.skip("AstraDBVectorStore", () => { await store.addDocuments([ { pageContent: "upserted", metadata: { a: 1, _id: "123456789" } }, + { pageContent: "upserted", metadata: { a: 2 } }, ]); const collection = await db.collection(astraConfig.collection); @@ -220,4 +221,32 @@ describe.skip("AstraDBVectorStore", () => { expect(doc?.text).toEqual("upserted"); }); + + test("addDocuments with insertOptions (timeout)", async () => { + const store = new AstraDBVectorStore(new FakeEmbeddings(), { + ...astraConfig, + collectionOptions: { + vector: { + dimension: 4, + metric: "cosine", + }, + }, + }); + await store.initialize(); + + const documents = [ + new Document({ pageContent: "Test document 1", metadata: { key: "value1" } }), + new Document({ pageContent: "Test document 2", metadata: { key: "value2" } }), + ]; + + try { + // Setting maxTimeMS to 1 to trigger a timeout + await store.addDocuments(documents, { + insertOptions: { maxTimeMS: 1 }, + }); + fail("Should have thrown timeout error"); + } catch (e: any) { + expect(e.message).toContain("Command timed out"); + } + }); });