Skip to content

Commit

Permalink
llama : handle KV shift for recurrent models (ggerganov#10402)
Browse files Browse the repository at this point in the history
ggml-ci
  • Loading branch information
ggerganov authored Nov 21, 2024
1 parent 87a533b commit 1bb30bf
Showing 1 changed file with 5 additions and 5 deletions.
10 changes: 5 additions & 5 deletions src/llama.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -18211,13 +18211,13 @@ static void llama_kv_cache_defrag_internal(struct llama_context & lctx) {
static void llama_kv_cache_update_internal(struct llama_context & lctx) {
bool need_reserve = false;

// apply K-shift if needed
if (lctx.model.hparams.rope_type != LLAMA_ROPE_TYPE_NONE && lctx.kv_self.has_shift) {
if (lctx.kv_self.has_shift) {
if (!llama_kv_cache_can_shift(&lctx)) {
GGML_ABORT("Deepseek2 does not support K-shift");
GGML_ABORT("The current context does not support K-shift");
}

{
// apply K-shift if needed
if (lctx.model.hparams.rope_type != LLAMA_ROPE_TYPE_NONE) {
ggml_backend_sched_reset(lctx.sched.get());

ggml_cgraph * gf = llama_build_graph_k_shift(lctx);
Expand Down Expand Up @@ -20463,7 +20463,7 @@ void llama_kv_cache_update(struct llama_context * ctx) {
}

bool llama_kv_cache_can_shift(struct llama_context * ctx) {
return ctx->model.arch != LLM_ARCH_DEEPSEEK2; // not supported due to MLA
return !ctx->kv_self.recurrent && ctx->model.arch != LLM_ARCH_DEEPSEEK2; // not supported due to MLA
}

// deprecated
Expand Down

0 comments on commit 1bb30bf

Please sign in to comment.