bug fix for issue 9688

sleepwalker2017 · Dec 13, 2024 · 348855f · 348855f
1 parent 00c1bde
commit 348855f
Show file tree

Hide file tree

Showing 2 changed files with 4 additions and 3 deletions.
diff --git a/vllm/worker/model_runner.py b/vllm/worker/model_runner.py
@@ -1327,6 +1327,10 @@ def profile_run(self) -> None:
 
         self.execute_model(model_input, kv_caches, intermediate_tensors)
         torch.cuda.synchronize()
+        # Cleanup
+        if self.lora_config:
+            assert self.lora_manager is not None
+            self.remove_all_loras(()
         return
 
     def remove_all_loras(self):

diff --git a/vllm/worker/worker.py b/vllm/worker/worker.py
@@ -252,9 +252,6 @@ def determine_num_available_blocks(self) -> Tuple[int, int]:
             available_kv_cache_memory / (1024**3),
             self.cache_config.gpu_memory_utilization)
 
-        # Final cleanup
-        if self.model_runner.lora_manager:
-            self.model_runner.remove_all_loras()
         gc.collect()
 
         return num_gpu_blocks, num_cpu_blocks