Skip to content

Commit

Permalink
fixed issues
Browse files Browse the repository at this point in the history
parthraut committed May 2, 2024
1 parent 9ab0c3c commit 99fcd38
Showing 1 changed file with 20 additions and 18 deletions.
38 changes: 20 additions & 18 deletions zeus/device/gpu.py
Original file line number Diff line number Diff line change
@@ -8,8 +8,10 @@
import contextlib

import pynvml # necessary for testing to mock!

# import amdsmi
try:
import amdsmi
except ImportError:
amdsmi = None

from zeus.device.exception import ZeusBaseGPUError
from zeus.utils.logging import get_logger
@@ -488,8 +490,8 @@ def _get_handle(self):
@_handle_amdsmi_errors
def getPowerManagementLimitConstraints(self) -> tuple[int, int]:
"""Returns the minimum and maximum power management limits for the specified GPU. Units: mW."""
info = amdsmi.amdsmi_get_power_cap_info(self.handle)
return (info.min_power_cap * 1000, info.max_power_cap * 1000)
info = amdsmi.amdsmi_get_power_cap_info(self.handle) # Returns in W
return (info["min_power_cap"] * 1000, info["max_power_cap"] * 1000)

@_handle_amdsmi_errors
def setPersistenceMode(self, enable: bool) -> None:
@@ -507,8 +509,8 @@ def setPowerManagementLimit(self, value: int) -> None:
@_handle_amdsmi_errors
def resetPowerManagementLimit(self) -> None:
"""Resets the power management limit for the specified GPU to the default value."""
info = amdsmi.amdsmi_get_power_cap_info(self.handle)
amdsmi.amdsmi_set_power_cap(self.handle, 0, cap=info.default_power_cap)
info = amdsmi.amdsmi_get_power_cap_info(self.handle) # Returns in W
amdsmi.amdsmi_set_power_cap(self.handle, 0, cap=int(info["default_power_cap"]*1e6)) # expects value in microwatts

@_handle_amdsmi_errors
def setMemoryLockedClocks(self, minMemClockMHz: int, maxMemClockMHz: int) -> None:
@@ -554,37 +556,37 @@ def setGpuLockedClocks(self, minGpuClockMHz: int, maxGpuClockMHz: int) -> None:
def resetMemoryLockedClocks(self) -> None:
"""Resets the memory locked clocks of the specified GPU to their default values."""
# Get default MEM clock values
_, max_clk, min_clk = amdsmi.amdsmi_get_clock_info(
info = amdsmi.amdsmi_get_clock_info(
self.handle, amdsmi.AmdSmiClkType.MEM
)
) # returns MHz

amdsmi.amdsmi_set_gpu_clk_range(
self.handle,
min_clk,
max_clk,
clk_type=amdsmi.AmdSmiClkType.MEM,
)
info["min_clk"],
info["max_clk"],
clk_type=amdsmi.AmdSmiClkType.MEM
) # expects MHz

@_handle_amdsmi_errors
def resetGpuLockedClocks(self) -> None:
"""Resets the GPU locked clocks of the specified GPU to their default values."""
# Get default GPU clock values
_, max_clk, min_clk = amdsmi.amdsmi_get_clock_info(
info = amdsmi.amdsmi_get_clock_info(
self.handle, amdsmi.AmdSmiClkType.GFX
)
) # returns MHz

amdsmi.amdsmi_set_gpu_clk_range(
self.handle,
min_clk,
max_clk,
info["min_clk"],
info["max_clk"],
clk_type=amdsmi.AmdSmiClkType.GFX,
)
) # expects MHz

@_handle_amdsmi_errors
def getPowerUsage(self) -> int:
"""Returns the power usage of the specified GPU. Units: mW."""
return (
amdsmi.amdsmi_get_power_info(self.handle)["average_socket_power"] * 1000
int(amdsmi.amdsmi_get_power_info(self.handle)["average_socket_power"] * 1000)
) # returns in W, convert to mW

@_handle_amdsmi_errors

0 comments on commit 99fcd38

Please sign in to comment.