From 69400bc11cc34f959e0558dd77e69c99ad691a5e Mon Sep 17 00:00:00 2001 From: PedroMiolaSilva Date: Wed, 11 Dec 2024 12:59:17 -0300 Subject: [PATCH 1/3] feat: adding azion edgesql integration to langchain-community --- .../integrations/retrievers/azion-edgesql.mdx | 53 ++ .../vectorstores/azion-edgesql.mdx | 39 + .../src/retrievers/azion_edgesql.ts | 499 ++++++++++ .../tests/azion_edgesql.int.test.ts | 44 + .../src/vectorstores/azion_edgesql.ts | 900 ++++++++++++++++++ .../tests/azion_edgesql.int.test.ts | 157 +++ 6 files changed, 1692 insertions(+) create mode 100644 docs/core_docs/docs/integrations/retrievers/azion-edgesql.mdx create mode 100644 docs/core_docs/docs/integrations/vectorstores/azion-edgesql.mdx create mode 100644 libs/langchain-community/src/retrievers/azion_edgesql.ts create mode 100644 libs/langchain-community/src/retrievers/tests/azion_edgesql.int.test.ts create mode 100644 libs/langchain-community/src/vectorstores/azion_edgesql.ts create mode 100644 libs/langchain-community/src/vectorstores/tests/azion_edgesql.int.test.ts diff --git a/docs/core_docs/docs/integrations/retrievers/azion-edgesql.mdx b/docs/core_docs/docs/integrations/retrievers/azion-edgesql.mdx new file mode 100644 index 000000000000..d1c993aacdd2 --- /dev/null +++ b/docs/core_docs/docs/integrations/retrievers/azion-edgesql.mdx @@ -0,0 +1,53 @@ +### Azion Edge SQL Retriever + +The `AzionRetriever` is used to perform advanced search operations, including hybrid and similarity searches directly on Azion's Edge Plataform using Edge SQL. Make sure to install the `@langchain/community` package to use this retriever. Besides that, you will need an Azion account and a Token to use the Azion API. + +```typescript +import { AzionRetriever } from "@langchain/community/retrievers/azion"; +import { OpenAIEmbeddings, ChatOpenAI } from "@langchain/openai"; + +// Initialize the retriever +const embeddingModel = new OpenAIEmbeddings({ model: "text-embedding-3-small" }); +// Initialize the entity extractor model to extract the entities to perform Full Text Search operations +const entityExtractor = new ChatOpenAI({ model: "gpt-4o-mini" }); + +// Initialize the retriever +const retriever = new AzionRetriever(embeddingModel, entityExtractor, { + dbName: "mydb", + similarityK: 2, + ftsK: 2, + searchType: "hybrid", + // Filter documents by language = "en" AND topic IN ("nature", "biology") + filters: [ + // Only return English language documents + { column: "language", operator: "=", value: "en" }, + // Only return documents with topics of nature or biology + { column: "topic", operator: "IN", value: "'nature', 'biology'" } + ], + // Return only the topic and language metadata + metadataItems: ["topic", "language"] +}); + +// Perform a search +const documents = await retriever._getRelevantDocuments("Australia"); +console.log(documents); +``` + +Using AzionRetriever as a tool in an agent requires the `createRetrieverTool` function to wrap the retriever: + +```typescript +import {createRetrieverTool} from "@langchain/core/tools"; +import {AzionRetriever} from "./src/function/AzionRetriever"; + +const retriever = new AzionRetriever(embeddingModel, entityExtractor, { + dbName: "mydb", + similarityK: 2, + ftsK: 2, +}); + +const retrieverTool = createRetrieverTool(retriever, { + name: "AzionRetriever", + description: "A tool that retrieves documents from a vector database" +}); + +``` \ No newline at end of file diff --git a/docs/core_docs/docs/integrations/vectorstores/azion-edgesql.mdx b/docs/core_docs/docs/integrations/vectorstores/azion-edgesql.mdx new file mode 100644 index 000000000000..b0f0d11ee1de --- /dev/null +++ b/docs/core_docs/docs/integrations/vectorstores/azion-edgesql.mdx @@ -0,0 +1,39 @@ +### Azion Edge SQL Vector Store +The `AzionVectorStore` is used to manage and search through a collection of documents using vector embeddings, directly on Azion's Edge Plataform using Edge SQL. Make sure to install the `@langchain/community` package to use this vector store. Besides that, you will need an Azion account and a Token to use the Azion API. + +```typescript +import { AzionVectorStore } from "@langchain/community/vectorstores/azionedgesql"; +import { OpenAIEmbeddings } from "@langchain/openai"; +import { Document } from "@langchain/core/documents"; + +// Initialize the vector store +const embeddingModel = new OpenAIEmbeddings({ model: "text-embedding-3-small" }); +const vectorStore = new AzionVectorStore(embeddingModel, { dbName: "mydb", tableName: "documents" }); + +// Setup database with hybrid search and metadata columns +await vectorStore.setupDatabase({ + columns: ["topic", "language"], + mode: "hybrid" +}); + +//OR you can setup the database with the static method createDatabase +//const vectorStore = await AzionVectorStore.createDatabase(embeddingModel,{dbName:"mydb", tableName:"documents"}, {columns: ["topic", "language"], mode: "hybrid"}); + +// Add documents to the vector store +await vectorStore.addDocuments([ + new Document({ pageContent: "Australia is known for its unique wildlife", metadata: { topic: "nature", language: "en" } }), + // Add more documents as needed +]); + +// Perform a similarity search +const results = await vectorStore.AzionSimilaritySearch("Australia", { kvector: 1, metadataItems: ["topic"] }); + +// OR +// Perform a full text search +const results = await vectorStore.AzionFullTextSearch("Australia", { kfts: 1, metadataItems: ["topic"] }); + +// OR +// Perform a hybrid search +const results = await vectorStore.AzionHybridSearch("Australia", { kfts: 1, kvector: 1, metadataItems: ["topic"] }); +console.log(results); +``` \ No newline at end of file diff --git a/libs/langchain-community/src/retrievers/azion_edgesql.ts b/libs/langchain-community/src/retrievers/azion_edgesql.ts new file mode 100644 index 000000000000..a7f82eef93fe --- /dev/null +++ b/libs/langchain-community/src/retrievers/azion_edgesql.ts @@ -0,0 +1,499 @@ +import { QueryResult, useQuery } from 'azion/sql'; +import type { EmbeddingsInterface } from '@langchain/core/embeddings'; +import { Document } from '@langchain/core/documents'; +import { BaseRetriever, BaseRetrieverInput } from '@langchain/core/retrievers'; +import { ChatOpenAI } from '@langchain/openai'; +import { SystemMessage, HumanMessage } from '@langchain/core/messages'; + +export type AzionMetadata = Record; + +/** + * Represents a filter condition for querying the Azion database + * @property operator - The comparison operator to use (e.g. =, !=, >, <, etc) + * @property column - The database column to filter on + * @property value - The value to compare against + */ +export type AzionFilter = {operator: Operator, column: Column, value: string}; + +/** + * Represents a database column name + */ +export type Column = string; + +/** + * Valid SQL operators that can be used in filter conditions + */ +export type Operator = + | '=' | '!=' | '>' | '<>' | '<' // Basic comparison operators + | '>=' | '<=' // Range operators + | 'LIKE' | 'NOT LIKE' // Pattern matching + | 'IN' | 'NOT IN' // Set membership + | 'IS NULL' | 'IS NOT NULL'; // NULL checks + +/** + * Interface for the response returned when searching embeddings. + */ +interface SearchEmbeddingsResponse { + id: number; + content: string; + metadata: { + searchtype: string; + [key: string]: any; + }; +} + +/** + * Interface for the arguments required to initialize an Azion library. + */ +export interface AzionRetrieverArgs extends BaseRetrieverInput { + /** + * Search type to perform. Cosine similarity and hybrid (vector + FTS) are currently supported. + */ + searchType?: 'hybrid' | 'similarity'; + + /** + * The number of documents retrieved with cosine similarity (vector) search. Minimum is 1. + */ + similarityK?: number; + + /** + * The number of documents retrieved with full text search. Minimum is 1. + */ + ftsK?: number; + + /** + * The name of the database to search for documents. + */ + dbName?: string; + + /** + * The prompt to the chatmodel to extract entities to perform Full text search on the database + */ + promptEntityExtractor?: string; + + /** + * Max items to maintain per searchtype. Default is 3. + */ + maxItemsSearch?: number; + + /** + * The columns from the tables that metadata must contain + */ + metadataItems?: string[]; + + /** + * Name of the table to perform vector similarity seach. Default is 'documents' + */ + vectorTable?: string + + /** + * Name of the table to perform full text search. Default is 'document_fts' + */ + ftsTable?: string + + /** + * Filters to apply to the search. Default is an empty array. + */ + filters?: AzionFilter[]; + + /** Whether the metadata is contained in a single column or multiple columns */ + expandedMetadata?: boolean; +} + +/** + * class for performing hybrid search operations on Azion's Edge SQL database. + * It extends the 'BaseRetriever' class and implements methods for + * similarity search and full-text search (FTS). + */ +/** + * Example usage: + * ```ts + * // Initialize embeddings and chat model + * const embeddings = new OpenAIEmbeddings(); + * const chatModel = new ChatOpenAI(); + * + * // Create retriever with hybrid search + * const retriever = new AzionRetriever(embeddings, chatModel, { + * searchType: 'hybrid', + * similarityK: 3, + * ftsK: 2, + * dbName: 'my_docs', + * metadataItems: ['category', 'author'], + * vectorTable: 'documents', + * ftsTable: 'documents_fts', + * filters: [ + * { operator: '=', column: 'status', value: 'published' } + * ] + * }); + * + * // Retrieve relevant documents + * const docs = await retriever._getRelevantDocuments( + * "What are coral reefs in Australia?" + * ); + * + * // Create retriever with similarity search only + * const simRetriever = new AzionRetriever(embeddings, chatModel, { + * searchType: 'similarity', + * similarityK: 5, + * dbName: 'my_docs', + * vectorTable: 'documents' + * }); + * + * // Customize entity extraction prompt + * const customRetriever = new AzionRetriever(embeddings, chatModel, { + * searchType: 'hybrid', + * similarityK: 3, + * ftsK: 2, + * dbName: 'my_docs', + * promptEntityExtractor: "Extract key entities from: {{query}}" + * }); + * ``` + */ + +export class AzionRetriever extends BaseRetriever { + static lc_name() { + return 'azionRetriever'; + } + + /** Namespace for the retriever in LangChain */ + lc_namespace = ['langchain', 'retrievers', 'azion']; + + /** Type of search to perform - either hybrid (combining vector + FTS) or similarity only */ + searchType?: 'hybrid' | 'similarity'; + + /** Number of results to return from similarity search. Minimum is 1. */ + similarityK: number; + + /** Number of results to return from full text search. Minimum is 1. */ + ftsK: number; + + /** Interface for generating embeddings from text */ + embeddings: EmbeddingsInterface; + + /** Name of the database to search */ + dbName: string; + + /** ChatOpenAI model used to extract entities from queries */ + entityExtractor: ChatOpenAI; + + /** Prompt template for entity extraction */ + promptEntityExtractor: string; + + /** Optional metadata columns to include in results */ + metadataItems?: string[]; + + /** Name of table containing vector embeddings for similarity search */ + vectorTable: string; + + /** Name of table containing documents for full text search */ + ftsTable: string; + + /** Array of filters to apply to search results */ + filters: AzionFilter[]; + + /** Whether the metadata is contained in a single column or multiple columns */ + expandedMetadata: boolean + + constructor( + embeddings: EmbeddingsInterface, + entityExtractor: ChatOpenAI, + args: AzionRetrieverArgs + ) { + super(args) + + this.ftsTable = args.ftsTable || "document_fts" + this.vectorTable = args.vectorTable || "documents" + this.similarityK = Math.max(1, args.similarityK || 1); + this.ftsK = Math.max(1, args.ftsK || 1); + this.dbName = args.dbName || "azioncopilotprod" + + this.embeddings = embeddings; + this.searchType = args.searchType || "similarity" + + this.entityExtractor = entityExtractor + this.metadataItems = args.metadataItems || undefined + this.promptEntityExtractor = args.promptEntityExtractor || "Provide them as a space-separated string in lowercase, translated to English." + this.filters = args.filters || [] + this.expandedMetadata = args.expandedMetadata || false + } + + /** + * Generates a string of filters for the SQL query. + * @param {AzionFilter[]} filters - The filters to apply to the search. + * @returns {string} A string of filters for the SQL query. + */ + protected generateFilters( + filters: AzionFilter[] + ): string { + if (filters.length === 0) return ''; + + return 'AND ' + filters.map(({operator, column, value}) => { + if (['IN', 'NOT IN'].includes(operator.toUpperCase())) { + return `${column} ${operator} (${value})`; + } + return `${column} ${operator} '${value}'`; + }).join(' AND '); + } + + /** + * Generates SQL queries for full-text search and similarity search. + * @param {number[]} embeddedQuery - The embedded query vector. + * @param {string} queryEntities - The entities extracted from the query for full-text search. + * @param {string} metadata - Additional metadata columns to be included in the results. + * @returns An object containing the FTS query and similarity query strings. + */ + protected generateSqlQueries( + embeddedQuery: number[], + queryEntities: string, + metadata: string + ): { ftsQuery: string, similarityQuery: string } { + const filters = this.generateFilters(this.filters) + + let rowsNumber = this.similarityK + if (this.searchType === "hybrid") { + rowsNumber+=this.ftsK + } + + const ftsQuery = ` + SELECT id, content, ${metadata.replace('hybrid', 'fts')} + FROM ${this.ftsTable} + WHERE ${this.ftsTable} MATCH '${queryEntities}' ${filters} + ORDER BY rank + LIMIT ${rowsNumber} + `; + + const similarityQuery = ` + SELECT id, content, ${metadata.replace('hybrid', 'similarity')} + FROM ${this.vectorTable} + WHERE rowid IN vector_top_k('${this.vectorTable}_idx', vector('[${embeddedQuery}]'), ${rowsNumber}) ${filters} + `; + + return { ftsQuery, similarityQuery }; + } + + /** + * Generates the SQL statements for the similarity search and full-text search. + * @param query The user query. + * @returns An array of SQL statements. + */ + protected async generateStatements( + query: string + ): Promise { + const embeddedQuery = await this.embeddings.embedQuery(query) + + const metadata = this.generateMetadata() + + let queryEntities = '' + if (this.searchType === 'hybrid') { + queryEntities = await this.extractEntities(query) + } + + const { ftsQuery, similarityQuery } = this.generateSqlQueries(embeddedQuery, queryEntities, metadata); + + if (this.searchType === "similarity") { + return [similarityQuery] + } + + return [similarityQuery, ftsQuery] + } + + /** + * Generates the metadata string for the SQL query. + * @returns {string} The metadata string. + */ + protected generateMetadata(): string { + if (!this.metadataItems) { + return `json_object('searchtype', '${this.searchType}') as metadata` + } + + if (this.expandedMetadata) { + return `json_object('searchtype','${this.searchType}',${this.metadataItems.map(item => `'${item}', ${item}`).join(', ')}) as metadata` + } + + return `json_patch(json_object(${this.metadataItems?.map(item => `'${item}', metadata->>'$.${item}'`).join(', ')}), '{"searchtype":"${this.searchType}"}') as metadata` + } + + /** + * Performs a similarity search on the vector store and returns the top 'similarityK' similar documents. + * @param query The query string. + * @returns A promise that resolves with the similarity search results when the search is complete. + */ + protected async similaritySearchWithScore( + query: string + ): Promise<[Document][]> { + + const statements = await this.generateStatements(query) + + const { data: response, error: errorQuery } = await useQuery(this.dbName,statements); + + if (!response) { + console.error('RESPONSE ERROR: ', errorQuery); + return this.searchError(errorQuery) + } + const searches = this.mapRows(response.results) + const result = this.mapSearches(searches) + return result + } + + /** + * Extracts entities from a user query using the entityExtractor model. + * @param query The user query + * @returns A promise that resolves with the extracted entities when the extraction is complete. + */ + protected async extractEntities(query: string): Promise { + const entityExtractionPrompt = new SystemMessage( + this.promptEntityExtractor + ); + const entityQuery = await this.entityExtractor.invoke([ + entityExtractionPrompt, + new HumanMessage(query), + ]); + return entityQuery.content.toString().replace(/[^a-zA-Z0-9\s]/g, ' ').split(' ').join(' OR ') + } + + /** + * Performs a hybrid search on the vector store, using cosine similarity and FTS search, and + * returns the top 'similarityK' + 'ftsK' similar documents. + * @param query The user query + * @returns A promise that resolves with the hybrid search results when the search is complete. + */ + protected async hybridSearchAzion( + query: string + ): Promise<[Document][]> { + + const statements = await this.generateStatements(query) + + const { data: response, error: errorQuery } = await useQuery(this.dbName,statements) + + if (!response) { + console.error('RESPONSE ERROR: ', errorQuery); + return this.searchError(errorQuery) + } + + const results = this.mapRows(response.results) + + const finalResults = this.removeDuplicates(results) + + return this.mapSearches(finalResults) + } + + /** + * Generates an error document based on the provided error information + * @param error The error object containing details about the issue + * @returns A promise that resolves to an array containing a single Document representing the error + */ + protected searchError( + error: { + message: string; + operation: string;} | undefined + ): Promise<[Document][]> { + return Promise.resolve([ + [ + new Document({ + pageContent: JSON.stringify(error), + metadata: { searchtype: 'error' }, + }), + ], + ]); + } + + /** + * Performs the selected search and returns the documents retrieved. + * @param query The user query + * @returns A promise that resolves with the completion of the search results. + */ + async _getRelevantDocuments( + query: string + ): Promise { + let result: [Document][]; + + if (this.searchType === 'similarity') { + result = await this.similaritySearchWithScore(query); + } else { + result = await this.hybridSearchAzion(query); + } + + return result.map(([doc]) => doc); + } + + /** + * Removes duplicate results from the search results, prioritizing a mix of similarity and FTS results. + * @param {SearchEmbeddingsResponse[]} results - The array of search results to process. + * @returns {SearchEmbeddingsResponse[]} An array of unique search results, with a maximum of 3 similarity and 3 FTS results. + */ + private removeDuplicates( + results: SearchEmbeddingsResponse[] + ): SearchEmbeddingsResponse[] { + const uniqueResults: SearchEmbeddingsResponse[] = []; + const seenIds = new Set(); + + let similarityCount = 0 + let ftsCount = 0 + const maxItems = this.ftsK + this.similarityK + + for (const result of results) { + if (!seenIds.has(result.id)) { + if (result.metadata.searchtype === 'similarity' && similarityCount < this.similarityK) { + seenIds.add(result.id) + uniqueResults.push(result) + similarityCount++ + } else if (result.metadata.searchtype === 'fts' && ftsCount < this.ftsK) { + seenIds.add(result.id) + uniqueResults.push(result) + ftsCount++ + } + } + if (similarityCount + ftsCount === maxItems) break + } + return uniqueResults; + } + +/** + * Converts query results to SearchEmbeddingsResponse objects. + * @param {QueryResult[]} results - The raw query results from the database. + * @returns {SearchEmbeddingsResponse[]} An array of SearchEmbeddingsResponse objects. + */ +private mapRows( + results: QueryResult[] | undefined +): SearchEmbeddingsResponse[] { + + if (!results) { + return [] + } + + return results.flatMap(( + queryResult: QueryResult + ): SearchEmbeddingsResponse[] => { + + if (!queryResult.rows || !queryResult.columns) { + return [] + } + + return queryResult.rows.map( + (row): SearchEmbeddingsResponse => ({ + id: Number(row[0]), + content: String(row[1]), + metadata: JSON.parse(String(row[2])) + }) + ); + } + ); +} + + /** + * Maps search results to Document objects. + * @param {SearchEmbeddingsResponse[]} searches An array of SearchEmbeddingsResponse objects. + * @returns An array of tuples, each containing a single Document object. + */ + protected mapSearches( + searches: SearchEmbeddingsResponse[] + ): [Document][] { + return searches.map((resp: SearchEmbeddingsResponse) => [ + new Document({ + metadata: resp.metadata, + pageContent: resp.content, + id: resp.id.toString(), + }) + ]); + } +} \ No newline at end of file diff --git a/libs/langchain-community/src/retrievers/tests/azion_edgesql.int.test.ts b/libs/langchain-community/src/retrievers/tests/azion_edgesql.int.test.ts new file mode 100644 index 000000000000..e4361626a205 --- /dev/null +++ b/libs/langchain-community/src/retrievers/tests/azion_edgesql.int.test.ts @@ -0,0 +1,44 @@ +/* eslint-disable no-process-env */ +/* eslint-disable @typescript-eslint/no-non-null-assertion */ +import { ChatOpenAI, OpenAIEmbeddings } from "@langchain/openai"; +import { AzionRetriever } from "@langchain/community/retrievers/azion"; +import { jest, test, expect } from "@jest/globals"; + +// Increase timeout to 30 seconds +jest.setTimeout(30000); + +test("Azion search", async () => { + + const embeddings = new OpenAIEmbeddings(); + const entityExtractor = new ChatOpenAI({ + modelName: "gpt-4o-mini", + temperature: 0, + }); + const retrieverHybrid = new AzionRetriever(embeddings, entityExtractor, { + searchType: "hybrid", + similarityK: 2, + ftsK: 2, + dbName: 'vectorstore', + vectorTable:'vectors', + ftsTable:'vectors' + }); + + expect(retrieverHybrid).toBeDefined(); + + const results1 = await retrieverHybrid._getRelevantDocuments("hello"); + + expect(results1.length).toBeGreaterThan(0); + + const retrieverSimilarity = new AzionRetriever(embeddings, entityExtractor, { + searchType: "similarity", + similarityK: 2, + ftsK: 2, + dbName: 'vectorstore', + vectorTable:'vectors', + ftsTable:'vectors' + }); + + const results2 = await retrieverSimilarity._getRelevantDocuments("hello"); + + expect(results2.length).toBeGreaterThan(0); +}); \ No newline at end of file diff --git a/libs/langchain-community/src/vectorstores/azion_edgesql.ts b/libs/langchain-community/src/vectorstores/azion_edgesql.ts new file mode 100644 index 000000000000..6a51e4b43bac --- /dev/null +++ b/libs/langchain-community/src/vectorstores/azion_edgesql.ts @@ -0,0 +1,900 @@ +import { VectorStore } from '@langchain/core/vectorstores'; +import { useQuery, useExecute, getDatabases, createDatabase, getTables, type AzionDatabaseResponse, QueryResult, AzionDatabaseQueryResponse } from 'azion/sql'; +import type { EmbeddingsInterface } from '@langchain/core/embeddings'; +import { Document } from '@langchain/core/documents'; + +/** + * Represents a filter condition for querying the Azion database + * @property operator - The comparison operator to use (e.g. =, !=, >, <, etc) + * @property column - The database column to filter on + * @property value - The value to compare against + */ +export type AzionFilter = {operator: Operator, column: Column, value: string}; + +/** + * Represents a database column name + */ +export type Column = string; + +/** + * Valid SQL operators that can be used in filter conditions + */ +export type Operator = + | '=' | '!=' | '>' | '<>' | '<' // Basic comparison operators + | '>=' | '<=' // Range operators + | 'LIKE' | 'NOT LIKE' // Pattern matching + | 'IN' | 'NOT IN' // Set membership + | 'IS NULL' | 'IS NOT NULL'; // NULL checks + + +/** + * Interface for configuring the Azion vector store setup + * @property {string[]} columns - Additional columns to create in the database table beyond the required ones + * @property {"vector" | "hybrid"} mode - The search mode to enable: + * "vector" - Only vector similarity search + * "hybrid" - Both vector and full-text search capabilities + */ +interface AzionSetupOptions { + columns: string[], + mode: "vector" | "hybrid" +} + +/** + * Interface representing the structure of a row in the vector store + * @property content - The text content of the document + * @property embedding - The vector embedding of the content as an array of numbers + * @property metadata - Additional metadata associated with the document as key-value pairs + */ +interface rowsInterface { + content: string; + embedding: number[]; + metadata: Record; +} + +export type AzionMetadata = Record; + +/** + * Interface for the response returned when searching embeddings. + */ +interface SearchEmbeddingsResponse { + id: number; + content: string; + similarity: number; + metadata: { + searchtype: string; + [key: string]: any; + }; +} + +/** + * Interface for configuring hybrid search options that combines vector and full-text search + * @property {number} kfts - Number of results to return from full-text search + * @property {number} kvector - Number of results to return from vector similarity search + * @property {AzionFilter[]} [filter] - Optional array of filters to apply to search results + * @property {string[]} [metadataItems] - Optional array of metadata fields to include in results + */ +interface hybridSearchOptions { + kfts: number, + kvector: number, + filter?: AzionFilter[], + metadataItems?: string[] +} + +/** + * Interface for configuring full-text search options + * @property {number} kfts - Number of results to return from full-text search + * @property {AzionFilter[]} [filter] - Optional array of filters to apply to search results + * @property {string[]} [metadataItems] - Optional array of metadata fields to include in results + */ +interface fullTextSearchOptions { + kfts: number, + filter?: AzionFilter[], + metadataItems?: string[] +} + +/** + * Interface for configuring vector similarity search options + * @property {number} kvector - Number of results to return from vector similarity search + * @property {AzionFilter[]} [filter] - Optional array of filters to apply to search results + * @property {string[]} [metadataItems] - Optional array of metadata fields to include in results + */ +interface similaritySearchOptions { + kvector: number, + filter?: AzionFilter[], + metadataItems?: string[] +} + +/** + * Interface for the arguments required to initialize an Azion library. + */ +export interface AzionVectorStoreArgs { + tableName: string; + filter?: AzionMetadata; + dbName: string; + expandedMetadata?: boolean; +} + +/** + * Example usage: + * ```ts + * // Initialize the vector store + * const vectorStore = new AzionVectorStore(embeddings, { + * dbName: "mydb", + * tableName: "documents" + * }); + * + * // Setup database with hybrid search and metadata columns + * await vectorStore.setupDatabase({ + * columns: ["topic", "language"], + * mode: "hybrid" + * }); + * + * // OR: Initialize using the static create method + * const vectorStore = await AzionVectorStore.createVectorStore(embeddings, { + * dbName: "mydb", + * tableName: "documents" + * }, { + * columns: ["topic", "language"], + * mode: "hybrid" + * }); + * + * // Add documents to the vector store + * await vectorStore.addDocuments([ + * new Document({ + * pageContent: "Australia is known for its unique wildlife", + * metadata: { topic: "nature", language: "en" } + * }) + * ]); + * + * // Perform similarity search + * const results = await vectorStore.similaritySearch( + * "coral reefs in Australia", + * 2, // Return top 2 results + * { filter: [{ operator: "=", column: "topic", string: "biology" }] } // Optional AzionFilter + * ); + * + * // Perform full text search + * const ftResults = await vectorStore.fullTextSearch( + * "Sydney Opera House", + * 1, // Return top result + * { filter: [{ operator: "=", column: "language", string: "en" }] } // Optional AzionFilter + * ); + * ``` + */ + +export class AzionVectorStore extends VectorStore { + /** Type declaration for filter type */ + declare FilterType: AzionMetadata + + /** Name of the main table to store vectors and documents */ + tableName: string + + /** Name of the database to use */ + dbName: string + + /** Whether the metadata is contained in a single column or multiple columns */ + expandedMetadata: boolean + + _vectorstoreType(): string { + return 'azionEdgeSQL' + } + + constructor( + embeddings: EmbeddingsInterface, + args: AzionVectorStoreArgs + ) { + super(embeddings, args) + this.tableName = args.tableName + this.dbName = args.dbName + this.expandedMetadata = args.expandedMetadata ?? false + } + + /** + * Creates a new vector store instance and sets up the database. + * @param {EmbeddingsInterface} embeddings - The embeddings interface to use for vectorizing documents + * @param {AzionVectorStoreArgs} args - Configuration options: + * @param {string} args.dbName - Name of the database to create/use + * @param {string} args.tableName - Name of the table to create/use + * @param {AzionSetupOptions} setupOptions - Database setup options: + * @param {string[]} setupOptions.columns - Additional columns to create in the table beyond the required ones + * @param {"vector"|"hybrid"} setupOptions.mode - The search mode to enable: + * - "vector": Only vector similarity search capabilities + * - "hybrid": Both vector and full-text search capabilities + * @returns {Promise} A promise that resolves with the configured vector store instance + */ + static async createVectorStore( + embeddings: EmbeddingsInterface, + args: AzionVectorStoreArgs, + setupOptions: AzionSetupOptions + ): Promise { + const instance = new AzionVectorStore(embeddings, args) + await instance.setupDatabase(setupOptions) + return instance + } + + /** + * Adds documents to the vector store. + * @param {Document[]} documents The documents to add. + * @param {Object} options Optional parameters for adding the documents. + * @returns A promise that resolves when the documents have been added. + */ + async addDocuments( + documents: Document[], + options?: { ids?: string[] | number[] } + ) { + const texts = documents.map((doc) => doc.pageContent) + const embeddings = await this.embeddings.embedDocuments(texts) + return this.addVectors(embeddings, documents, options) + } + + /** + * Adds vectors to the vector store. + * @param {number[][]} vectors The vectors to add. + * @param {Document[]} documents The documents associated with the vectors. + * @param {Object} options Optional parameters for adding the vectors. + * @returns A promise that resolves with the IDs of the added vectors when the vectors have been added. + */ + async addVectors( + vectors: number[][], + documents: Document[], + options?: { ids?: string[] | number[] } + ) { + + const rows = await this.mapRowsFromDocuments(vectors, documents) + const insertStatements = this.createStatements(rows) + const chunks = this.createInsertChunks(insertStatements) + + await this.insertChunks(chunks) + } + + /** + * Gets the dimensions of the embeddings. + * @returns {Promise} The dimensions of the embeddings. + */ + private async getEmbeddingsDimensions( + ): Promise { + return (await this.embeddings.embedQuery("test")).length + } + + /** + * Maps the rows and metadata to the correct format. + * @param vectors The vectors to map. + * @param {Document[]} documents The documents to map. + * @returns {Promise} The mapped rows and metadata. + */ + private async mapRowsFromDocuments( + vectors: number[][], + documents: Document[] + ): Promise< rowsInterface[] > { + + return vectors.map((embedding, idx) => ({ + content: documents[idx].pageContent, + embedding, + metadata: documents[idx].metadata, + })) + } + + /** + * Sets up the database and tables. + * @param {AzionSetupOptions} setupOptions The setup options: + * - columns: string[] - The metadata columns to add to the table + * - mode: "vector" | "hybrid" - The mode to use for the table. "vector" for vector search only, "hybrid" for vector and full-text search + * @returns {Promise} A promise that resolves when the database and tables have been set up. + */ + async setupDatabase( + setupOptions:AzionSetupOptions + ): Promise{ + const {columns, mode} = setupOptions + + await this.handleDatabase() + await new Promise(resolve => setTimeout(resolve, 15000)) + console.log("Database created") + await this.handleTables(mode, columns) + } + + /** + * Handles the table creation and setup. + * @param {string} mode The mode. + * @param {string[]} columns The columns to setup. + * @returns {Promise} A promise that resolves when the table has been created and setup. + */ + private async handleTables( + mode: "vector" | "hybrid", + columns: string[] + ): Promise{ + + const {data : dataTables, error : errorTables} = await getTables(this.dbName) + + this.errorHandler(errorTables, "Error getting tables") + + const tables = dataTables?.results?.[0]?.rows?.map(row => row[1]) + + if (!this.areTablesSetup(tables, mode)){ + const { error : errorSetupDb} = await this.setupTables(mode, columns) + this.errorHandler(errorSetupDb, "Error setting up tables") + } + } + + /** + * Handles the error. + * @param {Object} error The error object. + * @param {string} message The message to display. + * @returns {void} A void value. + */ + private errorHandler( + error:{ + message: string + operation: string} | undefined, + message: string + ): void { + if (error){ + console.log(message, error) + throw new Error(error?.message ?? message) + } + } + + /** + * Checks if the tables are setup. + * @param {string | number | string[] | number[]} tables The tables. + * @param {string} mode The mode. + * @returns {boolean} Whether the tables are setup. + */ + private areTablesSetup( + tables: (string | number)[] | undefined, + mode: "vector" | "hybrid" + ): boolean { + + if (!tables){ + return false + } + + if (mode === "hybrid"){ + return tables?.includes(this.tableName) && tables?.includes(this.tableName + "_fts") + } + + return tables?.includes(this.tableName) + } + + /** + * Handles the database creation and setup. + * @returns {Promise} A promise that resolves when the database has been created and setup. + */ + private async handleDatabase( + ): Promise{ + const {data : dataGet, error : errorGet} = await getDatabases() + + this.errorHandler(errorGet, "Error getting databases") + + if (!dataGet?.databases?.find((db) => db.name === this.dbName)){ + console.log("Creating database: ",this.dbName) + const {error : errorCreate} = await createDatabase(this.dbName, {debug:true}) + + this.errorHandler(errorCreate, "Error creating database") + } + } + + /** + * Sets up the tables based on the specified mode and columns. + * @param {string} mode The mode to use - either "vector" for vector search only or "hybrid" for vector + full text search + * @param {string[]} columns Additional metadata columns to add to the tables + * @returns {Promise>} A promise that resolves when the tables have been created and setup + */ + private async setupTables( + mode: "vector" | "hybrid", + columns: string[] + ): Promise> { + + const createTableStatement = ` + CREATE TABLE ${this.tableName} ( + id INTEGER PRIMARY KEY AUTOINCREMENT, + content TEXT NOT NULL, + embedding F32_BLOB(${await this.getEmbeddingsDimensions()}) + ${this.expandedMetadata ? + (columns.length > 0 ? ',' + columns.map(key => `${key} TEXT`).join(',') : '') : + ',metadata JSON' + } + );` + + const createIndexStatement = ` + CREATE INDEX ${this.tableName}_idx ON ${this.tableName} ( + libsql_vector_idx(embedding, 'metric=cosine', 'compress_neighbors=float8', 'max_neighbors=20') + )` + + const createFtsStatement = ` + CREATE VIRTUAL TABLE IF NOT EXISTS ${this.tableName}_fts USING fts5( + content, + id UNINDEXED + ${this.expandedMetadata ? + (columns.length > 0 ? ',' + columns.map(key => `${key}`).join(',') : '') : + ',metadata' + }, + tokenize = 'porter' + )` + + const createTriggersStatements = [ + `CREATE TRIGGER IF NOT EXISTS insert_into_${this.tableName}_fts + AFTER INSERT ON ${this.tableName} + BEGIN + INSERT INTO ${this.tableName}_fts(id, content ${this.expandedMetadata ? (columns.length > 0 ? ',' + columns.join(',') : '') : ',metadata'}) + VALUES(new.id, new.content ${this.expandedMetadata ? (columns.length > 0 ? ',' + columns.map(key => `new.${key}`).join(',') : '') : ',new.metadata'}); + END`, + + `CREATE TRIGGER IF NOT EXISTS update_${this.tableName}_fts + AFTER UPDATE ON ${this.tableName} + BEGIN + UPDATE ${this.tableName}_fts + SET content = new.content + ${this.expandedMetadata ? (columns.length > 0 ? ',' + columns.map(key => `${key} = new.${key}`).join(',') : '') : ',metadata = new.metadata'} + WHERE id = old.id; + END`, + + `CREATE TRIGGER IF NOT EXISTS delete_${this.tableName}_fts + AFTER DELETE ON ${this.tableName} + BEGIN + DELETE FROM ${this.tableName}_fts WHERE id = old.id; + END` + ] + + let allStatements = [ + createTableStatement, + createIndexStatement, + createFtsStatement, + ...createTriggersStatements + ] + + if (mode === "vector"){ + allStatements = allStatements.slice(0,2) + } + + const { error } = await useExecute(this.dbName, allStatements) + this.errorHandler(error, "Error setting up tables") + return {data: "Database setup successfully", error: undefined} + } + + /** + * Inserts the chunks into the database. + * @param {string[][]} chunks The chunks to insert. + * @returns {Promise} A promise that resolves when the chunks have been inserted. + */ + private async insertChunks( + chunks: string[][] + ): Promise { + console.log("Inserting chunks") + for (const chunk of chunks){ + console.log("Inserting chunk", chunks.indexOf(chunk)) + const { error } = await useExecute(this.dbName,chunk) + this.errorHandler(error, "Error inserting chunk") + } + } + + /** + * Extracts the metadata columns from the rows. + * @param {rowsInterface[]} rows The rows to extract the metadata columns from. + * @returns {string[]} The metadata columns. + */ + private extractMetadataColumns( + rows: rowsInterface[] + ): string[] { + const metadataColumns: string[] = [] + + for (const row of Object.values(rows)) { + if (row.metadata) { + Object.keys(row.metadata).forEach(key => { + if (!metadataColumns.includes(key)) { + metadataColumns.push(key) + } + }) + } + } + return metadataColumns + } + + /** + * Creates the insert statement for a row. + * @param {rowsInterface} row The row to create the insert statement for. + * @param {string[]} metadataColumns The metadata columns. + * @returns {string} The insert statement. + */ + private createInsertStatement( + row: rowsInterface, + metadataColumns: string[] + ): string { + + if (this.expandedMetadata) { + const columnNames = ['content', 'embedding', ...metadataColumns] + const values = [ + row.content, + row.embedding, + ...metadataColumns.map(col => row.metadata?.[col] ?? null) + ] + return this.createInsertString(columnNames, values) + } + + const columnNames = ['content', 'embedding', 'metadata'] + const values = [ + row.content, + row.embedding, + JSON.stringify(row.metadata) + ]; + + return this.createInsertString(columnNames, values) + } + + /** + * Creates the insert statements for the rows. + * @param {rowsInterface[]} rows The rows to create the insert statements for. + * @returns {string[]} The insert statements. + */ + private createStatements( + rows: rowsInterface[] + ): string[] { + const insertStatements = [] + const metadataColumns = this.extractMetadataColumns(rows) + + for (const row of rows) { + const statement = this.createInsertStatement(row, metadataColumns) + insertStatements.push(statement) + } + + return insertStatements + } + + /** + * Creates the insert chunks for the statements. + * @param {string[]} statements The statements to create the insert chunks for. + * @returns {string[][]} The insert chunks. + */ + private createInsertChunks( + statements: string[] + ): string[][] { + const maxChunkLength = 1000 + const maxMbSize = 0.8 * 1024 * 1024 + let insertChunk = [] + let originalStatements = statements + const totalSize = this.getStringBytes(originalStatements.join(' ')) + + if (totalSize < maxMbSize && originalStatements.length < maxChunkLength) { + return [originalStatements] + } + + console.log("Total size exceeded max size. Initiating chunking...") + let array: string[] = [] + while (originalStatements.length > 0){ + for (const statement of originalStatements){ + const totalStringBytes = this.getStringBytes(statement) + this.getStringBytes(array.join(' ')) + if (totalStringBytes > maxMbSize || (array.length+1 > maxChunkLength)){ + insertChunk.push(array) + array = [statement] + originalStatements = originalStatements.slice(1) + } else { + array.push(statement) + if (originalStatements.length == 1){ + insertChunk.push(array) + } + originalStatements = originalStatements.slice(1) + } + } + } + + return insertChunk + } + + /** + * Gets the number of bytes in a string. + * @param {string} str The string to get the number of bytes for. + * @returns {number} The number of bytes in the string. + */ + private getStringBytes( + str: string + ): number { + return new TextEncoder().encode(str).length; + } + +/** + * Performs a similarity search on the vector store and returns the top 'similarityK' similar documents. + * @param {number[]} vector The vector to search for. + * @param {number} k The number of documents to return. + * @param {AzionFilter[]} filter Optional filters to apply to the search. + * @param {string[]} metadataItems Optional metadata items to include in the search. + * @returns {Promise<[Document, number][]>} A promise that resolves with the similarity search results when the search is complete. + */ + async similaritySearchVectorWithScore( + vector: number[], + k: number, + filter?: AzionFilter[], + metadataItems?: string[] + ): Promise<[Document, number][]> { + + const metadata = this.generateMetadata(metadataItems, 'similarity') + + const filters = this.generateFilters(filter) + + const similarityQuery = ` + SELECT id, content, ${metadata}, 1 - vector_distance_cos(embedding, vector('[${vector}]')) as similarity + FROM ${this.tableName} + WHERE rowid IN vector_top_k('${this.tableName}_idx', vector('[${vector}]'), ${k}) ${filters}` + + const { data, error } = await useQuery(this.dbName, [similarityQuery]) + + if (!data) { + this.errorHandler(error, "Error performing similarity search") + return this.searchError(error) + } + + const searches = this.mapRows(data.results) + const results = this.mapSearches(searches) + return results + } + + /** + * Performs a full-text search on the vector store and returns the top 'k' similar documents. + * @param query The query string to search for + * @param options The options for the full-text search, including: + * - kfts: The number of full-text search results to return + * - filter: Optional filters to apply to narrow down the search results + * - metadataItems: Optional metadata fields to include in the results + * @returns A promise that resolves with the full-text search results when the search is complete. + */ + async AzionFullTextSearch( + query: string, + options: fullTextSearchOptions + ){ + const {kfts, filter, metadataItems} = options + const metadata = this.generateMetadata(metadataItems, 'fulltextsearch') + + const filters = this.generateFilters(filter) + + const fullTextQuery = ` + SELECT id, content, ${metadata}, rank as bm25_similarity + FROM ${this.tableName}_fts + WHERE ${this.tableName}_fts MATCH '${query.toString().replace(/[^a-zA-Z0-9\s]/g, '').split(' ').join(' OR ')}' ${filters} + LIMIT ${kfts}` + + const { data, error } = await useQuery(this.dbName, [fullTextQuery]) + + if (!data) { + this.errorHandler(error, "Error performing full-text search") + return this.searchError(error) + } + + const searches = this.mapRows(data?.results) + const results = this.mapSearches(searches) + return results + } + + /** + * Performs a hybrid search on the vector store and returns the top 'k' similar documents. + * @param query The query string to search for + * @param options The options for the hybrid search, including: + * - kfts: The number of full-text search results to return + * - kvector: The number of vector search results to return + * - filter: Optional filters to apply to narrow down the search results + * - metadataItems: Optional metadata fields to include in the results + * @returns A promise that resolves with the hybrid search results when the search is complete. + */ + async AzionHybridSearch( + query: string, + hybridSearchOptions: hybridSearchOptions + ): Promise<[Document, number][]> { + const {kfts, kvector, filter, metadataItems} = hybridSearchOptions + + const vector = await this.embeddings.embedQuery(query) + const ftsResults = await this.AzionFullTextSearch(query, {kfts, filter, metadataItems}) + + const vectorResults = await this.similaritySearchVectorWithScore(vector, kvector, filter, metadataItems) + + return this.removeDuplicates([...ftsResults, ...vectorResults], kfts, kvector) + } + + /** + * Performs a similarity search on the vector store and returns the top 'k' similar documents. + * @param query The query string. + * @param options The options for the similarity search, including: + * - kvector: The number of vector search results to return + * - filter: Optional filters to apply to the search + * - metadataItems: Optional metadata fields to include in results + * @returns A promise that resolves with the similarity search results when the search is complete. + */ + async AzionSimilaritySearch( + query: string, + options: similaritySearchOptions + ): Promise<[Document, number][]>{ + const {kvector, filter, metadataItems} = options + const vector = await this.embeddings.embedQuery(query) + return this.similaritySearchVectorWithScore(vector, kvector, filter, metadataItems) + } + +/** + * Generates an error document based on the provided error information + * @param {Object} error The error object containing details about the issue + * @returns {Promise<[Document, number][]>} A promise that resolves to an array containing a single Document representing the error + */ + private searchError( + error: { + message: string; + operation: string;} | undefined + ): Promise<[Document, number][]> { + return Promise.resolve([ + [ + new Document({ + pageContent: JSON.stringify(error), + metadata: { searchtype: 'error' }, + }), + 0 + ], + ]); + } + + /** + * Removes duplicate results from the search results, prioritizing a mix of similarity and FTS results. + * @param {[Document, number][]} results - The array of search results to process, containing document and score pairs + * @param {number} kfts - Maximum number of full-text search results to include + * @param {number} kvector - Maximum number of vector similarity search results to include + * @returns {[Document, number][]} An array of unique search results, limited by kfts and kvector parameters + */ + private removeDuplicates( + results: [Document, number][], + kfts: number, + kvector: number + ): [Document, number][] { + const uniqueResults: [Document, number][] = []; + const seenIds = new Set(); + + let similarityCount = 0 + let ftsCount = 0 + const maxItems = kfts + kvector + + for (const result of results) { + if (!seenIds.has(result[0].id)) { + if (result[0].metadata?.searchtype === 'similarity' && similarityCount < kvector) { + seenIds.add(result[0].id) + uniqueResults.push(result) + similarityCount++ + } else if (result[0].metadata.searchtype === 'fulltextsearch' && ftsCount < kfts) { + seenIds.add(result[0].id) + uniqueResults.push(result) + ftsCount++ + } + } + if (similarityCount + ftsCount === maxItems) break + } + return uniqueResults; + } + +/** + * Converts query results to SearchEmbeddingsResponse objects. + * @param {QueryResult[]} results - The raw query results from the database. + * @returns {SearchEmbeddingsResponse[]} An array of SearchEmbeddingsResponse objects. + */ + private mapRows( + results: QueryResult[] | undefined + ): SearchEmbeddingsResponse[] { + + if (!results) { + return [] + } + + return results.flatMap(( + queryResult: QueryResult + ): SearchEmbeddingsResponse[] => { + + if (!queryResult.rows || !queryResult.columns) { + return [] + } + + return queryResult.rows.map( + (row): SearchEmbeddingsResponse => ({ + id: Number(row[0]), + content: String(row[1]), + metadata: JSON.parse(String(row[2])), + similarity: Number(row[3]) + }) + ); + } + ); + } + + /** + * Maps search results to Document objects. + * @param {SearchEmbeddingsResponse[]} searches An array of SearchEmbeddingsResponse objects. + * @returns An array of tuples, each containing a single Document object. + */ + private mapSearches( + searches: SearchEmbeddingsResponse[] + ): [Document, number][] { + return searches.map((resp: SearchEmbeddingsResponse) => [ + new Document({ + metadata: resp.metadata, + pageContent: resp.content, + id: resp.id.toString(), + }), + resp.similarity + ]); + } + + /** + * Generates the metadata string for the SQL query. + * @param {string[]} metadataItems - The metadata items to include in the query. + * @param {string} searchType - The type of search. + * @returns {string} The metadata string. + */ + private generateMetadata( + metadataItems: string[] | undefined, + searchType: string + ): string { + + if (!metadataItems) { + return `json_object('searchtype', '${searchType}') as metadata` + } + + if (this.expandedMetadata) { + return `json_object('searchtype','${searchType}',${metadataItems.map(item => `'${item}', ${item}`).join(', ')}) as metadata` + } + + return `json_patch(json_object(${metadataItems?.map(item => `'${item}', metadata->>'$.${item}'`).join(', ')}), '{"searchtype":"${searchType}"}') as metadata` + } + + /** + * Generates the filters string for the SQL query. + * @param {AzionFilter[]} filters The filters to apply to the query. + * @returns {string} The filters string. + */ + private generateFilters( + filters: AzionFilter[] | undefined + ): string { + + if (!filters || filters?.length === 0) { + return ''; + } + + return 'AND ' + filters.map(({operator, column, value}) => { + if (['IN', 'NOT IN'].includes(operator.toUpperCase())) { + return `${column} ${operator} (${value})`; + } + return `${column} ${operator} '${value}'`; + }).join(' AND '); + } + + /** + * Creates the insert sql query for a row. + * @param {string[]} columnNames The column names. + * @param {string[]} values The values. + * @returns {string} The insert sql query. + */ + private createInsertString( + columnNames: string[], + values: any[] + ): string { + + if (this.expandedMetadata) { + const string = `INSERT INTO ${this.tableName} (${columnNames.join(', ')}) + VALUES (${values.map((value, index) => columnNames[index] === 'embedding' ? + `vector('[${value}]')` : `'${this.escapeQuotes(value)}'`).join(', ')})` + + return string + } + + const string = `INSERT INTO ${this.tableName} (${columnNames.join(', ')}) + VALUES (${values.map((value, index) => { + if (columnNames[index] === 'embedding') { + return `vector('[${value}]')` + } else if (columnNames[index] === 'metadata') { + return `'${value}'` + } else { + return `'${this.escapeQuotes(value)}'` + } + }).join(', ')})` + return string + } + + /** + * Escapes the quotes in the value. + * @param {string} value The value to escape the quotes in. + * @returns {string} The value with the quotes escaped. + */ + private escapeQuotes( + value: string + ): string { + return value.replace(/'/g, " ").replace(/"/g, ' ') + } +} diff --git a/libs/langchain-community/src/vectorstores/tests/azion_edgesql.int.test.ts b/libs/langchain-community/src/vectorstores/tests/azion_edgesql.int.test.ts new file mode 100644 index 000000000000..78097a7e09dd --- /dev/null +++ b/libs/langchain-community/src/vectorstores/tests/azion_edgesql.int.test.ts @@ -0,0 +1,157 @@ +/* eslint-disable no-process-env */ +/* eslint-disable @typescript-eslint/no-non-null-assertion */ +import { OpenAIEmbeddings } from "@langchain/openai"; +import { AzionVectorStore } from "@langchain/community/vectorstores/azionedgesql"; +import { Document } from "@langchain/core/documents"; +import { jest, test, expect, describe, beforeAll } from "@jest/globals"; + +// Increase timeout for database operations +jest.setTimeout(60000); + +describe("AzionVectorStore", () => { + let vectorStore: AzionVectorStore; + const dbName = "testvectorstore"; + const tableName = "testvectors"; + + const testDocs = [ + new Document({ + pageContent: "Aspirin is good for headaches", + metadata: { category: "medicine", type: "pain relief" } + }), + new Document({ + pageContent: "Ibuprofen reduces inflammation and pain", + metadata: { category: "medicine", type: "pain relief" } + }), + new Document({ + pageContent: "Regular exercise helps prevent headaches", + metadata: { category: "lifestyle", type: "prevention" } + }) + ]; + + beforeAll(async () => { + const embeddings = new OpenAIEmbeddings(); + + // Test static factory method + vectorStore = await AzionVectorStore.createVectorStore( + embeddings, + { + dbName, + tableName, + expandedMetadata: true + }, + { + columns: ["category", "type"], + mode: "hybrid" + } + ); + + // Add test documents + await vectorStore.addDocuments(testDocs); + }); + + test("should create vector store instance", () => { + expect(vectorStore).toBeDefined(); + expect(vectorStore._vectorstoreType()).toBe("azionEdgeSQL"); + }); + + test("should perform similarity search", async () => { + const results = await vectorStore.AzionSimilaritySearch( + "what helps with headaches?", + { + kvector: 2, + filter: [{ operator: "=", column: "category", value: "medicine" }], + metadataItems: ["category", "type"] + } + ); + + expect(results).toBeDefined(); + expect(results.length).toBeLessThanOrEqual(2); + expect(results[0][0].metadata.category).toBe("medicine"); + }); + + test("should perform full text search", async () => { + const results = await vectorStore.AzionFullTextSearch( + "exercise headaches", + { + kfts: 1, + filter: [{ operator: "=", column: "category", value: "lifestyle" }], + metadataItems: ["category", "type"] + } + ); + + expect(results).toBeDefined(); + expect(results.length).toBeLessThanOrEqual(1); + expect(results[0][0].metadata.category).toBe("lifestyle"); + }); + + test("should perform hybrid search", async () => { + const results = await vectorStore.AzionHybridSearch( + "pain relief medicine", + { + kfts: 2, + kvector: 2, + filter: [{ operator: "=", column: "type", value: "pain relief" }], + metadataItems: ["category", "type"] + } + ); + + expect(results).toBeDefined(); + expect(results.length).toBeLessThanOrEqual(4); + expect(results[0][0].metadata.type).toBe("pain relief"); + }); + + test("should handle filters correctly", async () => { + const results = await vectorStore.AzionSimilaritySearch( + "medicine", + { + kvector: 2, + filter: [ + { operator: "=", column: "category", value: "medicine" }, + { operator: "=", column: "type", value: "pain relief" } + ], + metadataItems: ["category", "type"] + } + ); + + expect(results).toBeDefined(); + expect(results.length).toBeGreaterThan(0); + results.forEach(([doc]) => { + expect(doc.metadata.category).toBe("medicine"); + expect(doc.metadata.type).toBe("pain relief"); + }); + }); + + test("should handle empty search results", async () => { + const results = await vectorStore.AzionSimilaritySearch( + "nonexistent content", + { + kvector: 2, + filter: [{ operator: "=", column: "category", value: "nonexistent" }] + } + ); + + expect(results).toBeDefined(); + expect(results.length).toBe(0); + }); + + test("should add new documents", async () => { + const newDoc = new Document({ + pageContent: "Meditation can help with stress headaches", + metadata: { category: "lifestyle", type: "stress relief" } + }); + + await vectorStore.addDocuments([newDoc]); + + const results = await vectorStore.AzionFullTextSearch( + "meditation stress", + { + kfts: 1, + filter: [{ operator: "=", column: "type", value: "stress relief" }] + } + ); + + expect(results).toBeDefined(); + expect(results.length).toBe(1); + expect(results[0][0].pageContent).toContain("Meditation"); + }); +}); \ No newline at end of file From 1cc26c21df4fc42e1837a59fdaa900698c406ec2 Mon Sep 17 00:00:00 2001 From: PedroMiolaSilva Date: Wed, 11 Dec 2024 13:05:18 -0300 Subject: [PATCH 2/3] adding optionalDependencies and entrypoints --- libs/langchain-community/langchain.config.js | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/libs/langchain-community/langchain.config.js b/libs/langchain-community/langchain.config.js index b0207b8612ab..378d15962a90 100644 --- a/libs/langchain-community/langchain.config.js +++ b/libs/langchain-community/langchain.config.js @@ -117,6 +117,7 @@ export const config = { // vectorstores "vectorstores/analyticdb": "vectorstores/analyticdb", "vectorstores/astradb": "vectorstores/astradb", + "vectorstores/azion_edgesql": "vectorstores/azion_edgesql", "vectorstores/azure_aisearch": "vectorstores/azure_aisearch", "vectorstores/azure_cosmosdb": "vectorstores/azure_cosmosdb", "vectorstores/cassandra": "vectorstores/cassandra", @@ -196,6 +197,7 @@ export const config = { // retrievers "retrievers/amazon_kendra": "retrievers/amazon_kendra", "retrievers/amazon_knowledge_base": "retrievers/amazon_knowledge_base", + "retrievers/azion_edgesql": "retrievers/azion_edgesql", "retrievers/bm25": "retrievers/bm25", "retrievers/chaindesk": "retrievers/chaindesk", "retrievers/databerry": "retrievers/databerry", @@ -377,6 +379,7 @@ export const config = { "llms/layerup_security", "vectorstores/analyticdb", "vectorstores/astradb", + "vectorstores/azion_edgesql", "vectorstores/azure_aisearch", "vectorstores/azure_cosmosdb", "vectorstores/cassandra", @@ -434,6 +437,7 @@ export const config = { "chat_models/zhipuai", "retrievers/amazon_kendra", "retrievers/amazon_knowledge_base", + "retrievers/azion_edgesql", "retrievers/dria", "retrievers/metal", "retrievers/supabase", From cbb9ef2c25e04df9d3c6dd74c6b1236e9d1b2468 Mon Sep 17 00:00:00 2001 From: PedroMiolaSilva Date: Wed, 11 Dec 2024 13:25:17 -0300 Subject: [PATCH 3/3] adding instructions on how to use azion token --- docs/core_docs/docs/integrations/retrievers/azion-edgesql.mdx | 2 +- docs/core_docs/docs/integrations/vectorstores/azion-edgesql.mdx | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/docs/core_docs/docs/integrations/retrievers/azion-edgesql.mdx b/docs/core_docs/docs/integrations/retrievers/azion-edgesql.mdx index d1c993aacdd2..33bd4aa24078 100644 --- a/docs/core_docs/docs/integrations/retrievers/azion-edgesql.mdx +++ b/docs/core_docs/docs/integrations/retrievers/azion-edgesql.mdx @@ -1,6 +1,6 @@ ### Azion Edge SQL Retriever -The `AzionRetriever` is used to perform advanced search operations, including hybrid and similarity searches directly on Azion's Edge Plataform using Edge SQL. Make sure to install the `@langchain/community` package to use this retriever. Besides that, you will need an Azion account and a Token to use the Azion API. +The `AzionRetriever` is used to perform advanced search operations, including hybrid and similarity searches directly on Azion's Edge Plataform using Edge SQL. Make sure to install the `@langchain/community` package to use this retriever. Besides that, you will need an Azion account and a Token to use the Azion API, configuring it as environment variable `AZION_TOKEN`. ```typescript import { AzionRetriever } from "@langchain/community/retrievers/azion"; diff --git a/docs/core_docs/docs/integrations/vectorstores/azion-edgesql.mdx b/docs/core_docs/docs/integrations/vectorstores/azion-edgesql.mdx index b0f0d11ee1de..7d6a535ff088 100644 --- a/docs/core_docs/docs/integrations/vectorstores/azion-edgesql.mdx +++ b/docs/core_docs/docs/integrations/vectorstores/azion-edgesql.mdx @@ -1,5 +1,5 @@ ### Azion Edge SQL Vector Store -The `AzionVectorStore` is used to manage and search through a collection of documents using vector embeddings, directly on Azion's Edge Plataform using Edge SQL. Make sure to install the `@langchain/community` package to use this vector store. Besides that, you will need an Azion account and a Token to use the Azion API. +The `AzionVectorStore` is used to manage and search through a collection of documents using vector embeddings, directly on Azion's Edge Plataform using Edge SQL. Make sure to install the `@langchain/community` package to use this vector store. Besides that, you will need an Azion account and a Token to use the Azion API, configuring it as environment variable `AZION_TOKEN`. ```typescript import { AzionVectorStore } from "@langchain/community/vectorstores/azionedgesql";