mirror of
https://github.com/fankes/beszel.git
synced 2025-10-19 17:59:28 +08:00
Probable fix for Jetson gpu issue (#895)
This commit is contained in:
@@ -243,21 +243,26 @@ func (gm *GPUManager) GetCurrentData() map[string]system.GPUData {
|
|||||||
// copy / reset the data
|
// copy / reset the data
|
||||||
gpuData := make(map[string]system.GPUData, len(gm.GpuDataMap))
|
gpuData := make(map[string]system.GPUData, len(gm.GpuDataMap))
|
||||||
for id, gpu := range gm.GpuDataMap {
|
for id, gpu := range gm.GpuDataMap {
|
||||||
// sum the data
|
var gpuAvg system.GPUData
|
||||||
gpu.Temperature = twoDecimals(gpu.Temperature)
|
|
||||||
gpu.MemoryUsed = twoDecimals(gpu.MemoryUsed)
|
gpuAvg.Temperature = twoDecimals(gpu.Temperature)
|
||||||
gpu.MemoryTotal = twoDecimals(gpu.MemoryTotal)
|
gpuAvg.MemoryUsed = twoDecimals(gpu.MemoryUsed)
|
||||||
gpu.Usage = twoDecimals(gpu.Usage / gpu.Count)
|
gpuAvg.MemoryTotal = twoDecimals(gpu.MemoryTotal)
|
||||||
gpu.Power = twoDecimals(gpu.Power / gpu.Count)
|
|
||||||
// reset the count
|
// avoid division by zero
|
||||||
gpu.Count = 1
|
if gpu.Count > 0 {
|
||||||
// dereference to avoid overwriting anything else
|
gpuAvg.Usage = twoDecimals(gpu.Usage / gpu.Count)
|
||||||
gpuCopy := *gpu
|
gpuAvg.Power = twoDecimals(gpu.Power / gpu.Count)
|
||||||
|
}
|
||||||
|
|
||||||
|
// reset accumulators in the original
|
||||||
|
gpu.Usage, gpu.Power, gpu.Count = 0, 0, 0
|
||||||
|
|
||||||
// append id to the name if there are multiple GPUs with the same name
|
// append id to the name if there are multiple GPUs with the same name
|
||||||
if nameCounts[gpu.Name] > 1 {
|
if nameCounts[gpu.Name] > 1 {
|
||||||
gpuCopy.Name = fmt.Sprintf("%s %s", gpu.Name, id)
|
gpuAvg.Name = fmt.Sprintf("%s %s", gpu.Name, id)
|
||||||
}
|
}
|
||||||
gpuData[id] = gpuCopy
|
gpuData[id] = gpuAvg
|
||||||
}
|
}
|
||||||
slog.Debug("GPU", "data", gpuData)
|
slog.Debug("GPU", "data", gpuData)
|
||||||
return gpuData
|
return gpuData
|
||||||
|
@@ -279,6 +279,19 @@ func TestParseJetsonData(t *testing.T) {
|
|||||||
Count: 1,
|
Count: 1,
|
||||||
},
|
},
|
||||||
},
|
},
|
||||||
|
{
|
||||||
|
name: "orin nano",
|
||||||
|
input: "06-18-2025 11:25:24 RAM 3452/7620MB (lfb 25x4MB) SWAP 1518/16384MB (cached 174MB) CPU [1%@1420,2%@1420,0%@1420,2%@1420,2%@729,1%@729] GR3D_FREQ 0% cpu@50.031C soc2@49.031C soc0@50C gpu@49.031C tj@50.25C soc1@50.25C VDD_IN 4824mW/4824mW VDD_CPU_GPU_CV 518mW/518mW VDD_SOC 1475mW/1475mW",
|
||||||
|
wantMetrics: &system.GPUData{
|
||||||
|
Name: "GPU",
|
||||||
|
MemoryUsed: 3452.0,
|
||||||
|
MemoryTotal: 7620.0,
|
||||||
|
Usage: 0.0,
|
||||||
|
Temperature: 50.25,
|
||||||
|
Power: 0.518,
|
||||||
|
Count: 1,
|
||||||
|
},
|
||||||
|
},
|
||||||
{
|
{
|
||||||
name: "missing temperature",
|
name: "missing temperature",
|
||||||
input: "11-14-2024 22:54:33 RAM 4300/30698MB GR3D_FREQ 45% VDD_GPU_SOC 2171mW",
|
input: "11-14-2024 22:54:33 RAM 4300/30698MB GR3D_FREQ 45% VDD_GPU_SOC 2171mW",
|
||||||
@@ -318,6 +331,7 @@ func TestParseJetsonData(t *testing.T) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
func TestGetCurrentData(t *testing.T) {
|
func TestGetCurrentData(t *testing.T) {
|
||||||
|
t.Run("calculates averages and resets accumulators", func(t *testing.T) {
|
||||||
gm := &GPUManager{
|
gm := &GPUManager{
|
||||||
GpuDataMap: map[string]*system.GPUData{
|
GpuDataMap: map[string]*system.GPUData{
|
||||||
"0": {
|
"0": {
|
||||||
@@ -347,15 +361,45 @@ func TestGetCurrentData(t *testing.T) {
|
|||||||
assert.Equal(t, "GPU1 0", result["0"].Name)
|
assert.Equal(t, "GPU1 0", result["0"].Name)
|
||||||
assert.Equal(t, "GPU1 1", result["1"].Name)
|
assert.Equal(t, "GPU1 1", result["1"].Name)
|
||||||
|
|
||||||
// Check averaged values
|
// Check averaged values in the result
|
||||||
assert.InDelta(t, 50.0, result["0"].Usage, 0.01)
|
assert.InDelta(t, 50.0, result["0"].Usage, 0.01)
|
||||||
assert.InDelta(t, 100.0, result["0"].Power, 0.01)
|
assert.InDelta(t, 100.0, result["0"].Power, 0.01)
|
||||||
assert.InDelta(t, 30.0, result["1"].Usage, 0.01)
|
assert.InDelta(t, 30.0, result["1"].Usage, 0.01)
|
||||||
assert.InDelta(t, 60.0, result["1"].Power, 0.01)
|
assert.InDelta(t, 60.0, result["1"].Power, 0.01)
|
||||||
|
|
||||||
// Verify reset counts
|
// Verify that accumulators in the original map are reset
|
||||||
assert.Equal(t, float64(1), gm.GpuDataMap["0"].Count)
|
assert.Equal(t, float64(0), gm.GpuDataMap["0"].Count, "GPU 0 Count should be reset")
|
||||||
assert.Equal(t, float64(1), gm.GpuDataMap["1"].Count)
|
assert.Equal(t, float64(0), gm.GpuDataMap["0"].Usage, "GPU 0 Usage should be reset")
|
||||||
|
assert.Equal(t, float64(0), gm.GpuDataMap["0"].Power, "GPU 0 Power should be reset")
|
||||||
|
assert.Equal(t, float64(0), gm.GpuDataMap["1"].Count, "GPU 1 Count should be reset")
|
||||||
|
assert.Equal(t, float64(0), gm.GpuDataMap["1"].Usage, "GPU 1 Usage should be reset")
|
||||||
|
assert.Equal(t, float64(0), gm.GpuDataMap["1"].Power, "GPU 1 Power should be reset")
|
||||||
|
})
|
||||||
|
|
||||||
|
t.Run("handles zero count without panicking", func(t *testing.T) {
|
||||||
|
gm := &GPUManager{
|
||||||
|
GpuDataMap: map[string]*system.GPUData{
|
||||||
|
"0": {
|
||||||
|
Name: "TestGPU",
|
||||||
|
Count: 0,
|
||||||
|
Usage: 0,
|
||||||
|
Power: 0,
|
||||||
|
},
|
||||||
|
},
|
||||||
|
}
|
||||||
|
|
||||||
|
var result map[string]system.GPUData
|
||||||
|
assert.NotPanics(t, func() {
|
||||||
|
result = gm.GetCurrentData()
|
||||||
|
})
|
||||||
|
|
||||||
|
// Check that usage and power are 0
|
||||||
|
assert.Equal(t, 0.0, result["0"].Usage)
|
||||||
|
assert.Equal(t, 0.0, result["0"].Power)
|
||||||
|
|
||||||
|
// Verify reset count
|
||||||
|
assert.Equal(t, float64(0), gm.GpuDataMap["0"].Count)
|
||||||
|
})
|
||||||
}
|
}
|
||||||
|
|
||||||
func TestDetectGPUs(t *testing.T) {
|
func TestDetectGPUs(t *testing.T) {
|
||||||
@@ -722,6 +766,18 @@ func TestAccumulation(t *testing.T) {
|
|||||||
assert.InDelta(t, expected.avgUsage, gpu.Usage, 0.01, "Average usage in GetCurrentData should match")
|
assert.InDelta(t, expected.avgUsage, gpu.Usage, 0.01, "Average usage in GetCurrentData should match")
|
||||||
assert.InDelta(t, expected.avgPower, gpu.Power, 0.01, "Average power in GetCurrentData should match")
|
assert.InDelta(t, expected.avgPower, gpu.Power, 0.01, "Average power in GetCurrentData should match")
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Verify that accumulators in the original map are reset
|
||||||
|
for id := range tt.expectedValues {
|
||||||
|
gpu, exists := gm.GpuDataMap[id]
|
||||||
|
assert.True(t, exists, "GPU with ID %s should still exist after GetCurrentData", id)
|
||||||
|
if !exists {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
assert.Equal(t, float64(0), gpu.Count, "Count should be reset for GPU ID %s", id)
|
||||||
|
assert.Equal(t, float64(0), gpu.Usage, "Usage should be reset for GPU ID %s", id)
|
||||||
|
assert.Equal(t, float64(0), gpu.Power, "Power should be reset for GPU ID %s", id)
|
||||||
|
}
|
||||||
})
|
})
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
Reference in New Issue
Block a user