Skip to content

Commit

Permalink
feat: Add Ollama keep_alive param to control how long models stay loa…
Browse files Browse the repository at this point in the history
  • Loading branch information
davidmigloz authored Feb 2, 2024
1 parent 4100023 commit 3b86e22
Show file tree
Hide file tree
Showing 14 changed files with 211 additions and 42 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -231,6 +231,7 @@ class ChatOllama extends BaseChatModel<ChatOllamaOptions> {
model: options?.model ?? defaultOptions.model ?? throwNullModelError(),
messages: messages.toMessages(),
format: options?.format?.toResponseFormat(),
keepAlive: options?.keepAlive,
stream: stream,
options: RequestOptions(
numKeep: options?.numKeep ?? defaultOptions.numKeep,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@ class ChatOllamaOptions extends ChatModelOptions {
const ChatOllamaOptions({
this.model = 'llama2',
this.format,
this.keepAlive,
this.numKeep,
this.seed,
this.numPredict,
Expand Down Expand Up @@ -58,6 +59,14 @@ class ChatOllamaOptions extends ChatModelOptions {
/// Otherwise, the model may generate large amounts whitespace.
final OllamaResponseFormat? format;

/// How long (in minutes) to keep the model loaded in memory.
///
/// - If set to a positive duration (e.g. 20), the model will stay loaded for the provided duration.
/// - If set to a negative duration (e.g. -1), the model will stay loaded indefinitely.
/// - If set to 0, the model will be unloaded immediately once finished.
/// - If not set, the model will stay loaded for 5 minutes by default
final int? keepAlive;

/// Number of tokens to keep from the prompt.
/// (Default: 0)
final int? numKeep;
Expand Down
9 changes: 9 additions & 0 deletions packages/langchain_ollama/lib/src/llms/models/models.dart
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@ class OllamaOptions extends LLMOptions {
this.context,
this.format,
this.raw,
this.keepAlive,
this.numKeep,
this.seed,
this.numPredict,
Expand Down Expand Up @@ -80,6 +81,14 @@ class OllamaOptions extends LLMOptions {
/// yourself.
final bool? raw;

/// How long (in minutes) to keep the model loaded in memory.
///
/// - If set to a positive duration (e.g. 20), the model will stay loaded for the provided duration.
/// - If set to a negative duration (e.g. -1), the model will stay loaded indefinitely.
/// - If set to 0, the model will be unloaded immediately once finished.
/// - If not set, the model will stay loaded for 5 minutes by default
final int? keepAlive;

/// Number of tokens to keep from the prompt.
/// (Default: 0)
final int? numKeep;
Expand Down
1 change: 1 addition & 0 deletions packages/langchain_ollama/lib/src/llms/ollama.dart
Original file line number Diff line number Diff line change
Expand Up @@ -223,6 +223,7 @@ class Ollama extends BaseLLM<OllamaOptions> {
context: options?.context,
format: options?.format?.toResponseFormat(),
raw: options?.raw,
keepAlive: options?.keepAlive,
stream: stream,
options: RequestOptions(
numKeep: options?.numKeep ?? defaultOptions.numKeep,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,14 @@ class GenerateChatCompletionRequest with _$GenerateChatCompletionRequest {

/// If `false` the response will be returned as a single response object, otherwise the response will be streamed as a series of objects.
@Default(false) bool stream,

/// How long (in minutes) to keep the model loaded in memory.
///
/// - If set to a positive duration (e.g. 20), the model will stay loaded for the provided duration.
/// - If set to a negative duration (e.g. -1), the model will stay loaded indefinitely.
/// - If set to 0, the model will be unloaded immediately once finished.
/// - If not set, the model will stay loaded for 5 minutes by default
@JsonKey(name: 'keep_alive', includeIfNull: false) int? keepAlive,
}) = _GenerateChatCompletionRequest;

/// Object construction from a JSON representation
Expand All @@ -51,7 +59,8 @@ class GenerateChatCompletionRequest with _$GenerateChatCompletionRequest {
'messages',
'format',
'options',
'stream'
'stream',
'keep_alive'
];

/// Perform validations on the schema property values
Expand All @@ -67,6 +76,7 @@ class GenerateChatCompletionRequest with _$GenerateChatCompletionRequest {
'format': format,
'options': options,
'stream': stream,
'keep_alive': keepAlive,
};
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -56,6 +56,14 @@ class GenerateCompletionRequest with _$GenerateCompletionRequest {

/// If `false` the response will be returned as a single response object, otherwise the response will be streamed as a series of objects.
@Default(false) bool stream,

/// How long (in minutes) to keep the model loaded in memory.
///
/// - If set to a positive duration (e.g. 20), the model will stay loaded for the provided duration.
/// - If set to a negative duration (e.g. -1), the model will stay loaded indefinitely.
/// - If set to 0, the model will be unloaded immediately once finished.
/// - If not set, the model will stay loaded for 5 minutes by default
@JsonKey(name: 'keep_alive', includeIfNull: false) int? keepAlive,
}) = _GenerateCompletionRequest;

/// Object construction from a JSON representation
Expand All @@ -73,7 +81,8 @@ class GenerateCompletionRequest with _$GenerateCompletionRequest {
'options',
'format',
'raw',
'stream'
'stream',
'keep_alive'
];

/// Perform validations on the schema property values
Expand All @@ -94,6 +103,7 @@ class GenerateCompletionRequest with _$GenerateCompletionRequest {
'format': format,
'raw': raw,
'stream': stream,
'keep_alive': keepAlive,
};
}
}
Loading

0 comments on commit 3b86e22

Please sign in to comment.