From b1db450e00b48328f3369f6f480dd9e3230276e4 Mon Sep 17 00:00:00 2001 From: Henry Dollman Date: Tue, 12 Nov 2024 18:13:57 -0500 Subject: [PATCH] enable gpu monitoring by default --- beszel/internal/agent/agent.go | 10 ++++------ beszel/internal/agent/gpu.go | 6 +++--- 2 files changed, 7 insertions(+), 9 deletions(-) diff --git a/beszel/internal/agent/agent.go b/beszel/internal/agent/agent.go index 6190b10..7ff03b2 100644 --- a/beszel/internal/agent/agent.go +++ b/beszel/internal/agent/agent.go @@ -76,12 +76,10 @@ func (a *Agent) Run(pubKey []byte, addr string) { a.dockerManager = newDockerManager(a) // initialize GPU manager - if os.Getenv("GPU") == "true" { - if gm, err := NewGPUManager(); err != nil { - slog.Warn("GPU", "err", err) - } else { - a.gpuManager = gm - } + if gm, err := NewGPUManager(); err != nil { + slog.Debug("GPU", "err", err) + } else { + a.gpuManager = gm } // if debugging, print stats diff --git a/beszel/internal/agent/gpu.go b/beszel/internal/agent/gpu.go index c36070f..3859f07 100644 --- a/beszel/internal/agent/gpu.go +++ b/beszel/internal/agent/gpu.go @@ -57,7 +57,7 @@ func (gm *GPUManager) collectNvidiaStats() error { } // Use a scanner to read each line of output scanner := bufio.NewScanner(stdout) - buf := make([]byte, 0, 64*1024) // 64KB buffer + buf := make([]byte, 0, 8*1024) // 8KB buffer scanner.Buffer(buf, bufio.MaxScanTokenSize) for scanner.Scan() { line := scanner.Bytes() @@ -120,7 +120,7 @@ func (gm *GPUManager) startAmdCollector() { // collectAmdStats runs rocm-smi in a loop and passes the output to parseAmdData func (gm *GPUManager) collectAmdStats() error { - cmd := exec.Command("/bin/sh", "-c", "while true; do rocm-smi --showid --showtemp --showuse --showpower --showproductname --showmeminfo vram --json; sleep 3.7; done") + cmd := exec.Command("/bin/sh", "-c", "while true; do rocm-smi --showid --showtemp --showuse --showpower --showproductname --showmeminfo vram --json; sleep 4.3; done") // Set up a pipe to capture stdout stdout, err := cmd.StdoutPipe() if err != nil { @@ -132,7 +132,7 @@ func (gm *GPUManager) collectAmdStats() error { } // Use a scanner to read each line of output scanner := bufio.NewScanner(stdout) - buf := make([]byte, 0, 64*1024) // 64KB buffer + buf := make([]byte, 0, 8*1024) // 8KB buffer scanner.Buffer(buf, bufio.MaxScanTokenSize) for scanner.Scan() { var rocmSmiInfo map[string]RocmSmiJson