fix(gpu): handle power for dedicated amd gpus (#414)

This commit is contained in:
Henry Dollman
2025-01-30 20:28:31 -05:00
parent 120aff0d18
commit 83668e5727

View File

@@ -32,7 +32,8 @@ type RocmSmiJson struct {
MemoryUsed string `json:"VRAM Total Used Memory (B)"` MemoryUsed string `json:"VRAM Total Used Memory (B)"`
MemoryTotal string `json:"VRAM Total Memory (B)"` MemoryTotal string `json:"VRAM Total Memory (B)"`
Usage string `json:"GPU use (%)"` Usage string `json:"GPU use (%)"`
Power string `json:"Current Socket Graphics Package Power (W)"` PowerPackage string `json:"Average Graphics Package Power (W)"`
PowerSocket string `json:"Current Socket Graphics Package Power (W)"`
} }
// gpuCollector defines a collector for a specific GPU management utility (nvidia-smi or rocm-smi) // gpuCollector defines a collector for a specific GPU management utility (nvidia-smi or rocm-smi)
@@ -186,21 +187,23 @@ func (gm *GPUManager) parseAmdData(output []byte) bool {
gm.mutex.Lock() gm.mutex.Lock()
defer gm.mutex.Unlock() defer gm.mutex.Unlock()
for _, v := range rocmSmiInfo { for _, v := range rocmSmiInfo {
temp, _ := strconv.ParseFloat(v.Temperature, 64) var power float64
if v.PowerPackage != "" {
power, _ = strconv.ParseFloat(v.PowerPackage, 64)
} else {
power, _ = strconv.ParseFloat(v.PowerSocket, 64)
}
memoryUsage, _ := strconv.ParseFloat(v.MemoryUsed, 64) memoryUsage, _ := strconv.ParseFloat(v.MemoryUsed, 64)
totalMemory, _ := strconv.ParseFloat(v.MemoryTotal, 64) totalMemory, _ := strconv.ParseFloat(v.MemoryTotal, 64)
usage, _ := strconv.ParseFloat(v.Usage, 64) usage, _ := strconv.ParseFloat(v.Usage, 64)
power, _ := strconv.ParseFloat(v.Power, 64)
memoryUsage = bytesToMegabytes(memoryUsage)
totalMemory = bytesToMegabytes(totalMemory)
if _, ok := gm.GpuDataMap[v.ID]; !ok { if _, ok := gm.GpuDataMap[v.ID]; !ok {
gm.GpuDataMap[v.ID] = &system.GPUData{Name: v.Name} gm.GpuDataMap[v.ID] = &system.GPUData{Name: v.Name}
} }
gpu := gm.GpuDataMap[v.ID] gpu := gm.GpuDataMap[v.ID]
gpu.Temperature = temp gpu.Temperature, _ = strconv.ParseFloat(v.Temperature, 64)
gpu.MemoryUsed = memoryUsage gpu.MemoryUsed = bytesToMegabytes(memoryUsage)
gpu.MemoryTotal = totalMemory gpu.MemoryTotal = bytesToMegabytes(totalMemory)
gpu.Usage += usage gpu.Usage += usage
gpu.Power += power gpu.Power += power
gpu.Count++ gpu.Count++