mirror of
https://github.com/fankes/beszel.git
synced 2025-10-19 09:49:28 +08:00
remove nvidia-smi dependency for jetson / tegrastats (#286)
This commit is contained in:
@@ -125,14 +125,13 @@ func (gm *GPUManager) getJetsonParser() func(output []byte) bool {
|
|||||||
// TODO: Maybe use VDD_IN for Nano / NX and add a total system power chart
|
// TODO: Maybe use VDD_IN for Nano / NX and add a total system power chart
|
||||||
powerPattern := regexp.MustCompile(`(GPU_SOC|CPU_GPU_CV) (\d+)mW`)
|
powerPattern := regexp.MustCompile(`(GPU_SOC|CPU_GPU_CV) (\d+)mW`)
|
||||||
|
|
||||||
|
// jetson devices have only one gpu so we'll just initialize here
|
||||||
|
gpuData := &system.GPUData{Name: "GPU"}
|
||||||
|
gm.GpuDataMap["0"] = gpuData
|
||||||
|
|
||||||
return func(output []byte) bool {
|
return func(output []byte) bool {
|
||||||
gm.Lock()
|
gm.Lock()
|
||||||
defer gm.Unlock()
|
defer gm.Unlock()
|
||||||
// we get gpu name from the intitial run of nvidia-smi, so return if it hasn't been initialized
|
|
||||||
gpuData, ok := gm.GpuDataMap["0"]
|
|
||||||
if !ok {
|
|
||||||
return true
|
|
||||||
}
|
|
||||||
// Parse RAM usage
|
// Parse RAM usage
|
||||||
ramMatches := ramPattern.FindSubmatch(output)
|
ramMatches := ramPattern.FindSubmatch(output)
|
||||||
if ramMatches != nil {
|
if ramMatches != nil {
|
||||||
@@ -184,12 +183,6 @@ func (gm *GPUManager) parseNvidiaData(output []byte) bool {
|
|||||||
if _, ok := gm.GpuDataMap[id]; !ok {
|
if _, ok := gm.GpuDataMap[id]; !ok {
|
||||||
name := strings.TrimPrefix(fields[1], "NVIDIA ")
|
name := strings.TrimPrefix(fields[1], "NVIDIA ")
|
||||||
gm.GpuDataMap[id] = &system.GPUData{Name: strings.TrimSuffix(name, " Laptop GPU")}
|
gm.GpuDataMap[id] = &system.GPUData{Name: strings.TrimSuffix(name, " Laptop GPU")}
|
||||||
// check if tegrastats is active - if so we will only use nvidia-smi to get gpu name
|
|
||||||
// - nvidia-smi does not provide metrics for tegra / jetson devices
|
|
||||||
// this will end the nvidia-smi collector
|
|
||||||
if gm.tegrastats {
|
|
||||||
return false
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
// update gpu data
|
// update gpu data
|
||||||
gpu := gm.GpuDataMap[id]
|
gpu := gm.GpuDataMap[id]
|
||||||
@@ -283,6 +276,7 @@ func (gm *GPUManager) detectGPUs() error {
|
|||||||
}
|
}
|
||||||
if _, err := exec.LookPath(tegraStatsCmd); err == nil {
|
if _, err := exec.LookPath(tegraStatsCmd); err == nil {
|
||||||
gm.tegrastats = true
|
gm.tegrastats = true
|
||||||
|
gm.nvidiaSmi = false
|
||||||
}
|
}
|
||||||
if gm.nvidiaSmi || gm.rocmSmi || gm.tegrastats {
|
if gm.nvidiaSmi || gm.rocmSmi || gm.tegrastats {
|
||||||
return nil
|
return nil
|
||||||
@@ -297,9 +291,11 @@ func (gm *GPUManager) startCollector(command string) {
|
|||||||
}
|
}
|
||||||
switch command {
|
switch command {
|
||||||
case nvidiaSmiCmd:
|
case nvidiaSmiCmd:
|
||||||
collector.cmdArgs = []string{"-l", nvidiaSmiInterval,
|
collector.cmdArgs = []string{
|
||||||
|
"-l", nvidiaSmiInterval,
|
||||||
"--query-gpu=index,name,temperature.gpu,memory.used,memory.total,utilization.gpu,power.draw",
|
"--query-gpu=index,name,temperature.gpu,memory.used,memory.total,utilization.gpu,power.draw",
|
||||||
"--format=csv,noheader,nounits"}
|
"--format=csv,noheader,nounits",
|
||||||
|
}
|
||||||
collector.parse = gm.parseNvidiaData
|
collector.parse = gm.parseNvidiaData
|
||||||
go collector.start()
|
go collector.start()
|
||||||
case tegraStatsCmd:
|
case tegraStatsCmd:
|
||||||
|
@@ -251,14 +251,13 @@ func TestParseJetsonData(t *testing.T) {
|
|||||||
tests := []struct {
|
tests := []struct {
|
||||||
name string
|
name string
|
||||||
input string
|
input string
|
||||||
gm *GPUManager
|
|
||||||
wantMetrics *system.GPUData
|
wantMetrics *system.GPUData
|
||||||
}{
|
}{
|
||||||
{
|
{
|
||||||
name: "valid data",
|
name: "valid data",
|
||||||
input: "11-14-2024 22:54:33 RAM 4300/30698MB GR3D_FREQ 45% tj@52.468C VDD_GPU_SOC 2171mW",
|
input: "11-14-2024 22:54:33 RAM 4300/30698MB GR3D_FREQ 45% tj@52.468C VDD_GPU_SOC 2171mW",
|
||||||
wantMetrics: &system.GPUData{
|
wantMetrics: &system.GPUData{
|
||||||
Name: "Jetson",
|
Name: "GPU",
|
||||||
MemoryUsed: 4300.0,
|
MemoryUsed: 4300.0,
|
||||||
MemoryTotal: 30698.0,
|
MemoryTotal: 30698.0,
|
||||||
Usage: 45.0,
|
Usage: 45.0,
|
||||||
@@ -271,7 +270,7 @@ func TestParseJetsonData(t *testing.T) {
|
|||||||
name: "more valid data",
|
name: "more valid data",
|
||||||
input: "11-15-2024 08:38:09 RAM 6185/7620MB (lfb 8x2MB) SWAP 851/3810MB (cached 1MB) CPU [15%@729,11%@729,14%@729,13%@729,11%@729,8%@729] EMC_FREQ 43%@2133 GR3D_FREQ 63%@[621] NVDEC off NVJPG off NVJPG1 off VIC off OFA off APE 200 cpu@53.968C soc2@52.437C soc0@50.75C gpu@53.343C tj@53.968C soc1@51.656C VDD_IN 12479mW/12479mW VDD_CPU_GPU_CV 4667mW/4667mW VDD_SOC 2817mW/2817mW",
|
input: "11-15-2024 08:38:09 RAM 6185/7620MB (lfb 8x2MB) SWAP 851/3810MB (cached 1MB) CPU [15%@729,11%@729,14%@729,13%@729,11%@729,8%@729] EMC_FREQ 43%@2133 GR3D_FREQ 63%@[621] NVDEC off NVJPG off NVJPG1 off VIC off OFA off APE 200 cpu@53.968C soc2@52.437C soc0@50.75C gpu@53.343C tj@53.968C soc1@51.656C VDD_IN 12479mW/12479mW VDD_CPU_GPU_CV 4667mW/4667mW VDD_SOC 2817mW/2817mW",
|
||||||
wantMetrics: &system.GPUData{
|
wantMetrics: &system.GPUData{
|
||||||
Name: "Jetson",
|
Name: "GPU",
|
||||||
MemoryUsed: 6185.0,
|
MemoryUsed: 6185.0,
|
||||||
MemoryTotal: 7620.0,
|
MemoryTotal: 7620.0,
|
||||||
Usage: 63.0,
|
Usage: 63.0,
|
||||||
@@ -284,7 +283,7 @@ func TestParseJetsonData(t *testing.T) {
|
|||||||
name: "missing temperature",
|
name: "missing temperature",
|
||||||
input: "11-14-2024 22:54:33 RAM 4300/30698MB GR3D_FREQ 45% VDD_GPU_SOC 2171mW",
|
input: "11-14-2024 22:54:33 RAM 4300/30698MB GR3D_FREQ 45% VDD_GPU_SOC 2171mW",
|
||||||
wantMetrics: &system.GPUData{
|
wantMetrics: &system.GPUData{
|
||||||
Name: "Jetson",
|
Name: "GPU",
|
||||||
MemoryUsed: 4300.0,
|
MemoryUsed: 4300.0,
|
||||||
MemoryTotal: 30698.0,
|
MemoryTotal: 30698.0,
|
||||||
Usage: 45.0,
|
Usage: 45.0,
|
||||||
@@ -292,32 +291,18 @@ func TestParseJetsonData(t *testing.T) {
|
|||||||
Count: 1,
|
Count: 1,
|
||||||
},
|
},
|
||||||
},
|
},
|
||||||
{
|
|
||||||
name: "no gpu defined by nvidia-smi",
|
|
||||||
input: "11-14-2024 22:54:33 RAM 4300/30698MB GR3D_FREQ 45% VDD_GPU_SOC 2171mW",
|
|
||||||
gm: &GPUManager{
|
|
||||||
GpuDataMap: map[string]*system.GPUData{},
|
|
||||||
},
|
|
||||||
},
|
|
||||||
}
|
}
|
||||||
|
|
||||||
for _, tt := range tests {
|
for _, tt := range tests {
|
||||||
t.Run(tt.name, func(t *testing.T) {
|
t.Run(tt.name, func(t *testing.T) {
|
||||||
if tt.gm != nil {
|
gm := &GPUManager{
|
||||||
// should return if no gpu set by nvidia-smi
|
GpuDataMap: make(map[string]*system.GPUData),
|
||||||
assert.Empty(t, tt.gm.GpuDataMap)
|
|
||||||
return
|
|
||||||
}
|
}
|
||||||
tt.gm = &GPUManager{
|
parser := gm.getJetsonParser()
|
||||||
GpuDataMap: map[string]*system.GPUData{
|
|
||||||
"0": {Name: "Jetson"},
|
|
||||||
},
|
|
||||||
}
|
|
||||||
parser := tt.gm.getJetsonParser()
|
|
||||||
valid := parser([]byte(tt.input))
|
valid := parser([]byte(tt.input))
|
||||||
assert.Equal(t, true, valid)
|
assert.Equal(t, true, valid)
|
||||||
|
|
||||||
got := tt.gm.GpuDataMap["0"]
|
got := gm.GpuDataMap["0"]
|
||||||
require.NotNil(t, got)
|
require.NotNil(t, got)
|
||||||
assert.Equal(t, tt.wantMetrics.Name, got.Name)
|
assert.Equal(t, tt.wantMetrics.Name, got.Name)
|
||||||
assert.InDelta(t, tt.wantMetrics.MemoryUsed, got.MemoryUsed, 0.01)
|
assert.InDelta(t, tt.wantMetrics.MemoryUsed, got.MemoryUsed, 0.01)
|
||||||
@@ -443,7 +428,7 @@ echo "test"`
|
|||||||
}
|
}
|
||||||
return nil
|
return nil
|
||||||
},
|
},
|
||||||
wantNvidiaSmi: true,
|
wantNvidiaSmi: false,
|
||||||
wantRocmSmi: true,
|
wantRocmSmi: true,
|
||||||
wantTegrastats: true,
|
wantTegrastats: true,
|
||||||
wantErr: false,
|
wantErr: false,
|
||||||
|
Reference in New Issue
Block a user