feat: Add Ollama keep_alive param to control how long models stay loa…

…ded (#319) https://github.com/ollama/ollama/releases/tag/v0.1.23
davidmigloz · Feb 2, 2024 · 3b86e22 · 3b86e22
1 parent 4100023
commit 3b86e22
Show file tree

Hide file tree

Showing 14 changed files with 211 additions and 42 deletions.
diff --git a/packages/langchain_ollama/lib/src/chat_models/chat_ollama.dart b/packages/langchain_ollama/lib/src/chat_models/chat_ollama.dart
@@ -231,6 +231,7 @@ class ChatOllama extends BaseChatModel<ChatOllamaOptions> {
       model: options?.model ?? defaultOptions.model ?? throwNullModelError(),
       messages: messages.toMessages(),
       format: options?.format?.toResponseFormat(),
+      keepAlive: options?.keepAlive,
       stream: stream,
       options: RequestOptions(
         numKeep: options?.numKeep ?? defaultOptions.numKeep,

diff --git a/packages/langchain_ollama/lib/src/chat_models/models/models.dart b/packages/langchain_ollama/lib/src/chat_models/models/models.dart
@@ -10,6 +10,7 @@ class ChatOllamaOptions extends ChatModelOptions {
   const ChatOllamaOptions({
     this.model = 'llama2',
     this.format,
+    this.keepAlive,
     this.numKeep,
     this.seed,
     this.numPredict,
@@ -58,6 +59,14 @@ class ChatOllamaOptions extends ChatModelOptions {
   /// Otherwise, the model may generate large amounts whitespace.
   final OllamaResponseFormat? format;
 
+  /// How long (in minutes) to keep the model loaded in memory.
+  ///
+  /// - If set to a positive duration (e.g. 20), the model will stay loaded for the provided duration.
+  /// - If set to a negative duration (e.g. -1), the model will stay loaded indefinitely.
+  /// - If set to 0, the model will be unloaded immediately once finished.
+  /// - If not set, the model will stay loaded for 5 minutes by default
+  final int? keepAlive;
+
   /// Number of tokens to keep from the prompt.
   /// (Default: 0)
   final int? numKeep;

diff --git a/packages/langchain_ollama/lib/src/llms/models/models.dart b/packages/langchain_ollama/lib/src/llms/models/models.dart
@@ -12,6 +12,7 @@ class OllamaOptions extends LLMOptions {
     this.context,
     this.format,
     this.raw,
+    this.keepAlive,
     this.numKeep,
     this.seed,
     this.numPredict,
@@ -80,6 +81,14 @@ class OllamaOptions extends LLMOptions {
   /// yourself.
   final bool? raw;
 
+  /// How long (in minutes) to keep the model loaded in memory.
+  ///
+  /// - If set to a positive duration (e.g. 20), the model will stay loaded for the provided duration.
+  /// - If set to a negative duration (e.g. -1), the model will stay loaded indefinitely.
+  /// - If set to 0, the model will be unloaded immediately once finished.
+  /// - If not set, the model will stay loaded for 5 minutes by default
+  final int? keepAlive;
+
   /// Number of tokens to keep from the prompt.
   /// (Default: 0)
   final int? numKeep;

diff --git a/packages/langchain_ollama/lib/src/llms/ollama.dart b/packages/langchain_ollama/lib/src/llms/ollama.dart
@@ -223,6 +223,7 @@ class Ollama extends BaseLLM<OllamaOptions> {
       context: options?.context,
       format: options?.format?.toResponseFormat(),
       raw: options?.raw,
+      keepAlive: options?.keepAlive,
       stream: stream,
       options: RequestOptions(
         numKeep: options?.numKeep ?? defaultOptions.numKeep,

diff --git a/packages/ollama_dart/lib/src/generated/schema/generate_chat_completion_request.dart b/packages/ollama_dart/lib/src/generated/schema/generate_chat_completion_request.dart
@@ -39,6 +39,14 @@ class GenerateChatCompletionRequest with _$GenerateChatCompletionRequest {
 
     /// If `false` the response will be returned as a single response object, otherwise the response will be streamed as a series of objects.
     @Default(false) bool stream,
+
+    /// How long (in minutes) to keep the model loaded in memory.
+    ///
+    /// - If set to a positive duration (e.g. 20), the model will stay loaded for the provided duration.
+    /// - If set to a negative duration (e.g. -1), the model will stay loaded indefinitely.
+    /// - If set to 0, the model will be unloaded immediately once finished.
+    /// - If not set, the model will stay loaded for 5 minutes by default
+    @JsonKey(name: 'keep_alive', includeIfNull: false) int? keepAlive,
   }) = _GenerateChatCompletionRequest;
 
   /// Object construction from a JSON representation
@@ -51,7 +59,8 @@ class GenerateChatCompletionRequest with _$GenerateChatCompletionRequest {
     'messages',
     'format',
     'options',
-    'stream'
+    'stream',
+    'keep_alive'
   ];
 
   /// Perform validations on the schema property values
@@ -67,6 +76,7 @@ class GenerateChatCompletionRequest with _$GenerateChatCompletionRequest {
       'format': format,
       'options': options,
       'stream': stream,
+      'keep_alive': keepAlive,
     };
   }
 }
diff --git a/packages/ollama_dart/lib/src/generated/schema/generate_completion_request.dart b/packages/ollama_dart/lib/src/generated/schema/generate_completion_request.dart
@@ -56,6 +56,14 @@ class GenerateCompletionRequest with _$GenerateCompletionRequest {
 
     /// If `false` the response will be returned as a single response object, otherwise the response will be streamed as a series of objects.
     @Default(false) bool stream,
+
+    /// How long (in minutes) to keep the model loaded in memory.
+    ///
+    /// - If set to a positive duration (e.g. 20), the model will stay loaded for the provided duration.
+    /// - If set to a negative duration (e.g. -1), the model will stay loaded indefinitely.
+    /// - If set to 0, the model will be unloaded immediately once finished.
+    /// - If not set, the model will stay loaded for 5 minutes by default
+    @JsonKey(name: 'keep_alive', includeIfNull: false) int? keepAlive,
   }) = _GenerateCompletionRequest;
 
   /// Object construction from a JSON representation
@@ -73,7 +81,8 @@ class GenerateCompletionRequest with _$GenerateCompletionRequest {
     'options',
     'format',
     'raw',
-    'stream'
+    'stream',
+    'keep_alive'
   ];
 
   /// Perform validations on the schema property values
@@ -94,6 +103,7 @@ class GenerateCompletionRequest with _$GenerateCompletionRequest {
       'format': format,
       'raw': raw,
       'stream': stream,
+      'keep_alive': keepAlive,
     };
   }
 }