[ROCm][AMD] Disable auto enabling chunked prefill on ROCm (vllm-proje…

…ct#11146) Signed-off-by: Gregory Shtrasberg <[email protected]>
sleepwalker2017 · Dec 13, 2024 · 00c1bde · 00c1bde
1 parent 3989a79
commit 00c1bde
Showing 1 changed file with 2 additions and 1 deletion.
diff --git a/vllm/engine/arg_utils.py b/vllm/engine/arg_utils.py
@@ -1074,7 +1074,8 @@ def create_engine_config(self,
                 if (is_gpu and not use_sliding_window and not use_spec_decode
                         and not self.enable_lora
                         and not self.enable_prompt_adapter
-                        and model_config.runner_type != "pooling"):
+                        and model_config.runner_type != "pooling"
+                        and not current_platform.is_rocm()):
                     self.enable_chunked_prefill = True
                     logger.warning(
                         "Chunked prefill is enabled by default for models with "