vllm-project · qli88 · Jun 18, 2024 · Jun 19, 2024 · Jun 19, 2024 · Jun 19, 2024
diff --git a/.buildkite/run-amd-test.sh b/.buildkite/run-amd-test.sh
@@ -5,7 +5,7 @@ set -o pipefail
 echo "--- Confirming Clean Initial State"
 while true; do
         sleep 3
-        if grep -q clean /opt/amdgpu/etc/gpu_state; then
+        if grep -q clean ${BUILDKITE_AGENT_META_DATA_RESET_TARGET}; then
                 echo "GPUs state is \"clean\""
                 break
         fi
@@ -44,11 +44,11 @@ cleanup_docker
 
 echo "--- Resetting GPUs"
 
-echo "reset" > /opt/amdgpu/etc/gpu_state
+echo "reset" > ${BUILDKITE_AGENT_META_DATA_RESET_TARGET}
 
 while true; do
         sleep 3
-        if grep -q clean /opt/amdgpu/etc/gpu_state; then
+	if grep -q clean ${BUILDKITE_AGENT_META_DATA_RESET_TARGET}; then
                 echo "GPUs state is \"clean\""
                 break
         fi
@@ -139,8 +139,9 @@ if [[ $commands == *"--shard-id="* ]]; then
     fi
   done
 else
+  echo "Render devices: $BUILDKITE_AGENT_META_DATA_RENDER_DEVICES"
   docker run \
-          --device /dev/kfd --device /dev/dri \
+          --device /dev/kfd $BUILDKITE_AGENT_META_DATA_RENDER_DEVICES \
           --network host \
           --shm-size=16gb \
           --rm \

diff --git a/.buildkite/test-template.j2 b/.buildkite/test-template.j2
@@ -0,0 +1,38 @@
+{% set docker_image = "public.ecr.aws/q9t5s3a7/vllm-ci-test-repo:$BUILDKITE_COMMIT" %}
+{% set docker_image_amd = "rocm/vllm-ci:$BUILDKITE_COMMIT" %}
+{% set default_working_dir = "vllm/tests" %}
+{% set hf_home = "/root/.cache/huggingface" %}
+
+steps:
+  - label: ":docker: build image"
+    depends_on: ~
+    commands:
+      - "docker build --build-arg max_jobs=16 --tag {{ docker_image_amd }} -f Dockerfile.rocm --progress plain ."
+      - "docker push {{ docker_image_amd }}"
+    key: "amd-build"
+    env:
+      DOCKER_BUILDKIT: "1"
+    retry:
+      automatic:
+        - exit_status: -1  # Agent was lost
+          limit: 5
+        - exit_status: -10  # Agent was lost
+          limit: 5
+    agents:
+      queue: amd
+
+{% for step in steps %}
+{% if step.mirror_hardwares and "amd" in step.mirror_hardwares %}
+  - label: "AMD: {{ step.label }}"
+    depends_on: 
+      - "amd-build"
+    agents:
+      queue: amd
+    commands: 
+      - bash .buildkite/run-amd-test.sh "cd {{ (step.working_dir or default_working_dir) | safe  }} ; {{ step.command  or (step.commands | join(" && ")) | safe }}"
+    env:
+      DOCKER_BUILDKIT: "1"
+    priority: 100
+    soft_fail: true
+{% endif %}
+{% endfor %}
diff --git a/.github/workflows/reminder_comment.yml b/.github/workflows/reminder_comment.yml
@@ -37,7 +37,7 @@ set(PYTHON_SUPPORTED_VERSIONS "3.8" "3.9" "3.10" "3.11" "3.12")
 set(CUDA_SUPPORTED_ARCHS "7.0;7.5;8.0;8.6;8.9;9.0")
 
 # Supported AMD GPU architectures.
-set(HIP_SUPPORTED_ARCHS "gfx906;gfx908;gfx90a;gfx940;gfx941;gfx942;gfx1030;gfx1100")
+set(HIP_SUPPORTED_ARCHS "gfx906;gfx908;gfx90a;gfx940;gfx941;gfx942;gfx1030;gfx1100;gfx1101;gfx1200")
 
 #
 # Supported/expected torch versions for CUDA/ROCm.
@@ -152,13 +152,33 @@ else()
     "${${VLLM_GPU_LANG}_SUPPORTED_ARCHS}")
 endif()
 
+#
+# Setting up debug flags for pleasant debug experience.
+#
+set(CMAKE_${VLLM_GPU_LANG}_FLAGS_DEBUG "${CMAKE_${VLLM_GPU_LANG}_FLAGS_DEBUG} -O0 -ggdb3")
+set(CMAKE_CXX_FLAGS_DEBUG "${CMAKE_CXX_FLAGS_DEBUG} -O0 -ggdb3")
+
 #
 # Query torch for additional GPU compilation flags for the given
 # `VLLM_GPU_LANG`.
 # The final set of arches is stored in `VLLM_GPU_FLAGS`.
 #
 get_torch_gpu_compiler_flags(VLLM_GPU_FLAGS ${VLLM_GPU_LANG})
 
+#
+# Get supported FP8 format based on GPU arches
+#
+get_supported_fp8_format(FP8_FORMAT ${VLLM_GPU_LANG} "${VLLM_GPU_ARCHES}")
+if(${FP8_FORMAT} STREQUAL "E4M3FN")
+  message(STATUS "FP8 format: E4M3FN")
+  list(APPEND VLLM_GPU_FLAGS "-DUSE_CUDA_FP8_FORMAT")
+elseif(${FP8_FORMAT} STREQUAL "E4M3FNUZ")
+  message(STATUS "FP8 format: E4M3FNUZ")
+  list(APPEND VLLM_GPU_FLAGS "-DUSE_HIP_FP8_FORMAT")
+elseif(${FP8_FORMAT} STREQUAL "CONFLICT")
+  message(FATAL_ERROR "Target architectures support different types of FP8 formats!")
+endif()
+
 #
 # Set nvcc parallelism.
 #
@@ -178,7 +198,14 @@ set(FETCHCONTENT_BASE_DIR "${PROJECT_ROOT_DIR}/.deps")
 message(STATUS "FetchContent base directory: ${FETCHCONTENT_BASE_DIR}")
 
 #
-# Define other extension targets
+# Set rocm version dev int.
+#
+if(VLLM_GPU_LANG STREQUAL "HIP")
+  list(APPEND VLLM_GPU_FLAGS "-DROCM_VERSION=${ROCM_VERSION_DEV_INT}")
+endif()
+
+#
+# Define extension targets
 #
 
 #
@@ -381,6 +408,11 @@ if(VLLM_GPU_LANG STREQUAL "CUDA")
 # if CUDA endif
 endif()
 
+if(VLLM_GPU_LANG STREQUAL "HIP")
+  list(APPEND VLLM_EXT_SRC
+    "csrc/custom_all_reduce.cu")
+endif()
+
 message(STATUS "Enabling C extension.")
 define_gpu_extension_target(
   _C
@@ -453,7 +485,10 @@ if(VLLM_GPU_LANG STREQUAL "HIP")
   #
   set(VLLM_ROCM_EXT_SRC
     "csrc/rocm/torch_bindings.cpp"
-    "csrc/rocm/attention.cu")
+    "csrc/rocm/attention.cu"
+    "csrc/rocm/custom_kernels.cu"
+    "csrc/rocm/fused_kernels.cu"
+    "csrc/rocm/custom.cu")
 
   define_gpu_extension_target(
     _rocm_C