mirror of
https://github.com/fankes/beszel.git
synced 2025-10-19 01:39:34 +08:00
update project structure
- move agent to /agent - change /beszel to /src - update workflows and docker builds
This commit is contained in:
182
agent/agent.go
Normal file
182
agent/agent.go
Normal file
@@ -0,0 +1,182 @@
|
||||
// Package agent handles the agent's SSH server and system stats collection.
|
||||
package agent
|
||||
|
||||
import (
|
||||
"crypto/sha256"
|
||||
"encoding/hex"
|
||||
"log/slog"
|
||||
"os"
|
||||
"path/filepath"
|
||||
"strings"
|
||||
"sync"
|
||||
"time"
|
||||
|
||||
"github.com/gliderlabs/ssh"
|
||||
"github.com/henrygd/beszel"
|
||||
"github.com/henrygd/beszel/src/entities/system"
|
||||
"github.com/shirou/gopsutil/v4/host"
|
||||
gossh "golang.org/x/crypto/ssh"
|
||||
)
|
||||
|
||||
type Agent struct {
|
||||
sync.Mutex // Used to lock agent while collecting data
|
||||
debug bool // true if LOG_LEVEL is set to debug
|
||||
zfs bool // true if system has arcstats
|
||||
memCalc string // Memory calculation formula
|
||||
fsNames []string // List of filesystem device names being monitored
|
||||
fsStats map[string]*system.FsStats // Keeps track of disk stats for each filesystem
|
||||
netInterfaces map[string]struct{} // Stores all valid network interfaces
|
||||
netIoStats system.NetIoStats // Keeps track of bandwidth usage
|
||||
dockerManager *dockerManager // Manages Docker API requests
|
||||
sensorConfig *SensorConfig // Sensors config
|
||||
systemInfo system.Info // Host system info
|
||||
gpuManager *GPUManager // Manages GPU data
|
||||
cache *SessionCache // Cache for system stats based on primary session ID
|
||||
connectionManager *ConnectionManager // Channel to signal connection events
|
||||
server *ssh.Server // SSH server
|
||||
dataDir string // Directory for persisting data
|
||||
keys []gossh.PublicKey // SSH public keys
|
||||
}
|
||||
|
||||
// NewAgent creates a new agent with the given data directory for persisting data.
|
||||
// If the data directory is not set, it will attempt to find the optimal directory.
|
||||
func NewAgent(dataDir ...string) (agent *Agent, err error) {
|
||||
agent = &Agent{
|
||||
fsStats: make(map[string]*system.FsStats),
|
||||
cache: NewSessionCache(69 * time.Second),
|
||||
}
|
||||
|
||||
agent.dataDir, err = getDataDir(dataDir...)
|
||||
if err != nil {
|
||||
slog.Warn("Data directory not found")
|
||||
} else {
|
||||
slog.Info("Data directory", "path", agent.dataDir)
|
||||
}
|
||||
|
||||
agent.memCalc, _ = GetEnv("MEM_CALC")
|
||||
agent.sensorConfig = agent.newSensorConfig()
|
||||
// Set up slog with a log level determined by the LOG_LEVEL env var
|
||||
if logLevelStr, exists := GetEnv("LOG_LEVEL"); exists {
|
||||
switch strings.ToLower(logLevelStr) {
|
||||
case "debug":
|
||||
agent.debug = true
|
||||
slog.SetLogLoggerLevel(slog.LevelDebug)
|
||||
case "warn":
|
||||
slog.SetLogLoggerLevel(slog.LevelWarn)
|
||||
case "error":
|
||||
slog.SetLogLoggerLevel(slog.LevelError)
|
||||
}
|
||||
}
|
||||
|
||||
slog.Debug(beszel.Version)
|
||||
|
||||
// initialize system info
|
||||
agent.initializeSystemInfo()
|
||||
|
||||
// initialize connection manager
|
||||
agent.connectionManager = newConnectionManager(agent)
|
||||
|
||||
// initialize disk info
|
||||
agent.initializeDiskInfo()
|
||||
|
||||
// initialize net io stats
|
||||
agent.initializeNetIoStats()
|
||||
|
||||
// initialize docker manager
|
||||
agent.dockerManager = newDockerManager(agent)
|
||||
|
||||
// initialize GPU manager
|
||||
if gm, err := NewGPUManager(); err != nil {
|
||||
slog.Debug("GPU", "err", err)
|
||||
} else {
|
||||
agent.gpuManager = gm
|
||||
}
|
||||
|
||||
// if debugging, print stats
|
||||
if agent.debug {
|
||||
slog.Debug("Stats", "data", agent.gatherStats(""))
|
||||
}
|
||||
|
||||
return agent, nil
|
||||
}
|
||||
|
||||
// GetEnv retrieves an environment variable with a "BESZEL_AGENT_" prefix, or falls back to the unprefixed key.
|
||||
func GetEnv(key string) (value string, exists bool) {
|
||||
if value, exists = os.LookupEnv("BESZEL_AGENT_" + key); exists {
|
||||
return value, exists
|
||||
}
|
||||
// Fallback to the old unprefixed key
|
||||
return os.LookupEnv(key)
|
||||
}
|
||||
|
||||
func (a *Agent) gatherStats(sessionID string) *system.CombinedData {
|
||||
a.Lock()
|
||||
defer a.Unlock()
|
||||
|
||||
data, isCached := a.cache.Get(sessionID)
|
||||
if isCached {
|
||||
slog.Debug("Cached data", "session", sessionID)
|
||||
return data
|
||||
}
|
||||
|
||||
*data = system.CombinedData{
|
||||
Stats: a.getSystemStats(),
|
||||
Info: a.systemInfo,
|
||||
}
|
||||
slog.Debug("System data", "data", data)
|
||||
|
||||
if a.dockerManager != nil {
|
||||
if containerStats, err := a.dockerManager.getDockerStats(); err == nil {
|
||||
data.Containers = containerStats
|
||||
slog.Debug("Containers", "data", data.Containers)
|
||||
} else {
|
||||
slog.Debug("Containers", "err", err)
|
||||
}
|
||||
}
|
||||
|
||||
data.Stats.ExtraFs = make(map[string]*system.FsStats)
|
||||
for name, stats := range a.fsStats {
|
||||
if !stats.Root && stats.DiskTotal > 0 {
|
||||
data.Stats.ExtraFs[name] = stats
|
||||
}
|
||||
}
|
||||
slog.Debug("Extra FS", "data", data.Stats.ExtraFs)
|
||||
|
||||
a.cache.Set(sessionID, data)
|
||||
return data
|
||||
}
|
||||
|
||||
// StartAgent initializes and starts the agent with optional WebSocket connection
|
||||
func (a *Agent) Start(serverOptions ServerOptions) error {
|
||||
a.keys = serverOptions.Keys
|
||||
return a.connectionManager.Start(serverOptions)
|
||||
}
|
||||
|
||||
func (a *Agent) getFingerprint() string {
|
||||
// first look for a fingerprint in the data directory
|
||||
if a.dataDir != "" {
|
||||
if fp, err := os.ReadFile(filepath.Join(a.dataDir, "fingerprint")); err == nil {
|
||||
return string(fp)
|
||||
}
|
||||
}
|
||||
|
||||
// if no fingerprint is found, generate one
|
||||
fingerprint, err := host.HostID()
|
||||
if err != nil || fingerprint == "" {
|
||||
fingerprint = a.systemInfo.Hostname + a.systemInfo.CpuModel
|
||||
}
|
||||
|
||||
// hash fingerprint
|
||||
sum := sha256.Sum256([]byte(fingerprint))
|
||||
fingerprint = hex.EncodeToString(sum[:24])
|
||||
|
||||
// save fingerprint to data directory
|
||||
if a.dataDir != "" {
|
||||
err = os.WriteFile(filepath.Join(a.dataDir, "fingerprint"), []byte(fingerprint), 0644)
|
||||
if err != nil {
|
||||
slog.Warn("Failed to save fingerprint", "err", err)
|
||||
}
|
||||
}
|
||||
|
||||
return fingerprint
|
||||
}
|
37
agent/agent_cache.go
Normal file
37
agent/agent_cache.go
Normal file
@@ -0,0 +1,37 @@
|
||||
package agent
|
||||
|
||||
import (
|
||||
"time"
|
||||
|
||||
"github.com/henrygd/beszel/src/entities/system"
|
||||
)
|
||||
|
||||
// Not thread safe since we only access from gatherStats which is already locked
|
||||
type SessionCache struct {
|
||||
data *system.CombinedData
|
||||
lastUpdate time.Time
|
||||
primarySession string
|
||||
leaseTime time.Duration
|
||||
}
|
||||
|
||||
func NewSessionCache(leaseTime time.Duration) *SessionCache {
|
||||
return &SessionCache{
|
||||
leaseTime: leaseTime,
|
||||
data: &system.CombinedData{},
|
||||
}
|
||||
}
|
||||
|
||||
func (c *SessionCache) Get(sessionID string) (stats *system.CombinedData, isCached bool) {
|
||||
if sessionID != c.primarySession && time.Since(c.lastUpdate) < c.leaseTime {
|
||||
return c.data, true
|
||||
}
|
||||
return c.data, false
|
||||
}
|
||||
|
||||
func (c *SessionCache) Set(sessionID string, data *system.CombinedData) {
|
||||
if data != nil {
|
||||
*c.data = *data
|
||||
}
|
||||
c.primarySession = sessionID
|
||||
c.lastUpdate = time.Now()
|
||||
}
|
89
agent/agent_cache_test.go
Normal file
89
agent/agent_cache_test.go
Normal file
@@ -0,0 +1,89 @@
|
||||
//go:build testing
|
||||
// +build testing
|
||||
|
||||
package agent
|
||||
|
||||
import (
|
||||
"testing"
|
||||
"testing/synctest"
|
||||
"time"
|
||||
|
||||
"github.com/henrygd/beszel/src/entities/system"
|
||||
|
||||
"github.com/stretchr/testify/assert"
|
||||
"github.com/stretchr/testify/require"
|
||||
)
|
||||
|
||||
func TestSessionCache_GetSet(t *testing.T) {
|
||||
synctest.Test(t, func(t *testing.T) {
|
||||
cache := NewSessionCache(69 * time.Second)
|
||||
|
||||
testData := &system.CombinedData{
|
||||
Info: system.Info{
|
||||
Hostname: "test-host",
|
||||
Cores: 4,
|
||||
},
|
||||
Stats: system.Stats{
|
||||
Cpu: 50.0,
|
||||
MemPct: 30.0,
|
||||
DiskPct: 40.0,
|
||||
},
|
||||
}
|
||||
|
||||
// Test initial state - should not be cached
|
||||
data, isCached := cache.Get("session1")
|
||||
assert.False(t, isCached, "Expected no cached data initially")
|
||||
assert.NotNil(t, data, "Expected data to be initialized")
|
||||
// Set data for session1
|
||||
cache.Set("session1", testData)
|
||||
|
||||
time.Sleep(15 * time.Second)
|
||||
|
||||
// Get data for a different session - should be cached
|
||||
data, isCached = cache.Get("session2")
|
||||
assert.True(t, isCached, "Expected data to be cached for non-primary session")
|
||||
require.NotNil(t, data, "Expected cached data to be returned")
|
||||
assert.Equal(t, "test-host", data.Info.Hostname, "Hostname should match test data")
|
||||
assert.Equal(t, 4, data.Info.Cores, "Cores should match test data")
|
||||
assert.Equal(t, 50.0, data.Stats.Cpu, "CPU should match test data")
|
||||
assert.Equal(t, 30.0, data.Stats.MemPct, "Memory percentage should match test data")
|
||||
assert.Equal(t, 40.0, data.Stats.DiskPct, "Disk percentage should match test data")
|
||||
|
||||
time.Sleep(10 * time.Second)
|
||||
|
||||
// Get data for the primary session - should not be cached
|
||||
data, isCached = cache.Get("session1")
|
||||
assert.False(t, isCached, "Expected data not to be cached for primary session")
|
||||
require.NotNil(t, data, "Expected data to be returned even if not cached")
|
||||
assert.Equal(t, "test-host", data.Info.Hostname, "Hostname should match test data")
|
||||
// if not cached, agent will update the data
|
||||
cache.Set("session1", testData)
|
||||
|
||||
time.Sleep(45 * time.Second)
|
||||
|
||||
// Get data for a different session - should still be cached
|
||||
_, isCached = cache.Get("session2")
|
||||
assert.True(t, isCached, "Expected data to be cached for non-primary session")
|
||||
|
||||
// Wait for the lease to expire
|
||||
time.Sleep(30 * time.Second)
|
||||
|
||||
// Get data for session2 - should not be cached
|
||||
_, isCached = cache.Get("session2")
|
||||
assert.False(t, isCached, "Expected data not to be cached after lease expiration")
|
||||
})
|
||||
}
|
||||
|
||||
func TestSessionCache_NilData(t *testing.T) {
|
||||
// Create a new SessionCache
|
||||
cache := NewSessionCache(30 * time.Second)
|
||||
|
||||
// Test setting nil data (should not panic)
|
||||
assert.NotPanics(t, func() {
|
||||
cache.Set("session1", nil)
|
||||
}, "Setting nil data should not panic")
|
||||
|
||||
// Get data - should not be nil even though we set nil
|
||||
data, _ := cache.Get("session2")
|
||||
assert.NotNil(t, data, "Expected data to not be nil after setting nil data")
|
||||
}
|
9
agent/agent_test_helpers.go
Normal file
9
agent/agent_test_helpers.go
Normal file
@@ -0,0 +1,9 @@
|
||||
//go:build testing
|
||||
// +build testing
|
||||
|
||||
package agent
|
||||
|
||||
// TESTING ONLY: GetConnectionManager is a helper function to get the connection manager for testing.
|
||||
func (a *Agent) GetConnectionManager() *ConnectionManager {
|
||||
return a.connectionManager
|
||||
}
|
53
agent/battery/battery.go
Normal file
53
agent/battery/battery.go
Normal file
@@ -0,0 +1,53 @@
|
||||
//go:build !freebsd
|
||||
|
||||
// Package battery provides functions to check if the system has a battery and to get the battery stats.
|
||||
package battery
|
||||
|
||||
import (
|
||||
"errors"
|
||||
"log/slog"
|
||||
|
||||
"github.com/distatus/battery"
|
||||
)
|
||||
|
||||
var systemHasBattery = false
|
||||
var haveCheckedBattery = false
|
||||
|
||||
// HasReadableBattery checks if the system has a battery and returns true if it does.
|
||||
func HasReadableBattery() bool {
|
||||
if haveCheckedBattery {
|
||||
return systemHasBattery
|
||||
}
|
||||
haveCheckedBattery = true
|
||||
bat, err := battery.Get(0)
|
||||
if err == nil && bat != nil {
|
||||
systemHasBattery = true
|
||||
} else {
|
||||
slog.Debug("No battery found", "err", err)
|
||||
}
|
||||
return systemHasBattery
|
||||
}
|
||||
|
||||
// GetBatteryStats returns the current battery percent and charge state
|
||||
func GetBatteryStats() (batteryPercent uint8, batteryState uint8, err error) {
|
||||
if !systemHasBattery {
|
||||
return batteryPercent, batteryState, errors.ErrUnsupported
|
||||
}
|
||||
batteries, err := battery.GetAll()
|
||||
if err != nil || len(batteries) == 0 {
|
||||
return batteryPercent, batteryState, err
|
||||
}
|
||||
totalCapacity := float64(0)
|
||||
totalCharge := float64(0)
|
||||
for _, bat := range batteries {
|
||||
if bat.Design != 0 {
|
||||
totalCapacity += bat.Design
|
||||
} else {
|
||||
totalCapacity += bat.Full
|
||||
}
|
||||
totalCharge += bat.Current
|
||||
}
|
||||
batteryPercent = uint8(totalCharge / totalCapacity * 100)
|
||||
batteryState = uint8(batteries[0].State.Raw)
|
||||
return batteryPercent, batteryState, nil
|
||||
}
|
13
agent/battery/battery_freebsd.go
Normal file
13
agent/battery/battery_freebsd.go
Normal file
@@ -0,0 +1,13 @@
|
||||
//go:build freebsd
|
||||
|
||||
package battery
|
||||
|
||||
import "errors"
|
||||
|
||||
func HasReadableBattery() bool {
|
||||
return false
|
||||
}
|
||||
|
||||
func GetBatteryStats() (uint8, uint8, error) {
|
||||
return 0, 0, errors.ErrUnsupported
|
||||
}
|
266
agent/client.go
Normal file
266
agent/client.go
Normal file
@@ -0,0 +1,266 @@
|
||||
package agent
|
||||
|
||||
import (
|
||||
"crypto/tls"
|
||||
"errors"
|
||||
"fmt"
|
||||
"log/slog"
|
||||
"net"
|
||||
"net/http"
|
||||
"net/url"
|
||||
"os"
|
||||
"path"
|
||||
"strings"
|
||||
"time"
|
||||
|
||||
"github.com/henrygd/beszel"
|
||||
"github.com/henrygd/beszel/src/common"
|
||||
|
||||
"github.com/fxamacker/cbor/v2"
|
||||
"github.com/lxzan/gws"
|
||||
"golang.org/x/crypto/ssh"
|
||||
)
|
||||
|
||||
const (
|
||||
wsDeadline = 70 * time.Second
|
||||
)
|
||||
|
||||
// WebSocketClient manages the WebSocket connection between the agent and hub.
|
||||
// It handles authentication, message routing, and connection lifecycle management.
|
||||
type WebSocketClient struct {
|
||||
gws.BuiltinEventHandler
|
||||
options *gws.ClientOption // WebSocket client configuration options
|
||||
agent *Agent // Reference to the parent agent
|
||||
Conn *gws.Conn // Active WebSocket connection
|
||||
hubURL *url.URL // Parsed hub URL for connection
|
||||
token string // Authentication token for hub registration
|
||||
fingerprint string // System fingerprint for identification
|
||||
hubRequest *common.HubRequest[cbor.RawMessage] // Reusable request structure for message parsing
|
||||
lastConnectAttempt time.Time // Timestamp of last connection attempt
|
||||
hubVerified bool // Whether the hub has been cryptographically verified
|
||||
}
|
||||
|
||||
// newWebSocketClient creates a new WebSocket client for the given agent.
|
||||
// It reads configuration from environment variables and validates the hub URL.
|
||||
func newWebSocketClient(agent *Agent) (client *WebSocketClient, err error) {
|
||||
hubURLStr, exists := GetEnv("HUB_URL")
|
||||
if !exists {
|
||||
return nil, errors.New("HUB_URL environment variable not set")
|
||||
}
|
||||
|
||||
client = &WebSocketClient{}
|
||||
|
||||
client.hubURL, err = url.Parse(hubURLStr)
|
||||
if err != nil {
|
||||
return nil, errors.New("invalid hub URL")
|
||||
}
|
||||
// get registration token
|
||||
client.token, err = getToken()
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
client.agent = agent
|
||||
client.hubRequest = &common.HubRequest[cbor.RawMessage]{}
|
||||
client.fingerprint = agent.getFingerprint()
|
||||
|
||||
return client, nil
|
||||
}
|
||||
|
||||
// getToken returns the token for the WebSocket client.
|
||||
// It first checks the TOKEN environment variable, then the TOKEN_FILE environment variable.
|
||||
// If neither is set, it returns an error.
|
||||
func getToken() (string, error) {
|
||||
// get token from env var
|
||||
token, _ := GetEnv("TOKEN")
|
||||
if token != "" {
|
||||
return token, nil
|
||||
}
|
||||
// get token from file
|
||||
tokenFile, _ := GetEnv("TOKEN_FILE")
|
||||
if tokenFile == "" {
|
||||
return "", errors.New("must set TOKEN or TOKEN_FILE")
|
||||
}
|
||||
tokenBytes, err := os.ReadFile(tokenFile)
|
||||
if err != nil {
|
||||
return "", err
|
||||
}
|
||||
return string(tokenBytes), nil
|
||||
}
|
||||
|
||||
// getOptions returns the WebSocket client options, creating them if necessary.
|
||||
// It configures the connection URL, TLS settings, and authentication headers.
|
||||
func (client *WebSocketClient) getOptions() *gws.ClientOption {
|
||||
if client.options != nil {
|
||||
return client.options
|
||||
}
|
||||
|
||||
// update the hub url to use websocket scheme and api path
|
||||
if client.hubURL.Scheme == "https" {
|
||||
client.hubURL.Scheme = "wss"
|
||||
} else {
|
||||
client.hubURL.Scheme = "ws"
|
||||
}
|
||||
client.hubURL.Path = path.Join(client.hubURL.Path, "api/beszel/agent-connect")
|
||||
|
||||
client.options = &gws.ClientOption{
|
||||
Addr: client.hubURL.String(),
|
||||
TlsConfig: &tls.Config{InsecureSkipVerify: true},
|
||||
RequestHeader: http.Header{
|
||||
"User-Agent": []string{getUserAgent()},
|
||||
"X-Token": []string{client.token},
|
||||
"X-Beszel": []string{beszel.Version},
|
||||
},
|
||||
}
|
||||
return client.options
|
||||
}
|
||||
|
||||
// Connect establishes a WebSocket connection to the hub.
|
||||
// It closes any existing connection before attempting to reconnect.
|
||||
func (client *WebSocketClient) Connect() (err error) {
|
||||
client.lastConnectAttempt = time.Now()
|
||||
|
||||
// make sure previous connection is closed
|
||||
client.Close()
|
||||
|
||||
client.Conn, _, err = gws.NewClient(client, client.getOptions())
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
go client.Conn.ReadLoop()
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
// OnOpen handles WebSocket connection establishment.
|
||||
// It sets a deadline for the connection to prevent hanging.
|
||||
func (client *WebSocketClient) OnOpen(conn *gws.Conn) {
|
||||
conn.SetDeadline(time.Now().Add(wsDeadline))
|
||||
}
|
||||
|
||||
// OnClose handles WebSocket connection closure.
|
||||
// It logs the closure reason and notifies the connection manager.
|
||||
func (client *WebSocketClient) OnClose(conn *gws.Conn, err error) {
|
||||
slog.Warn("Connection closed", "err", strings.TrimPrefix(err.Error(), "gws: "))
|
||||
client.agent.connectionManager.eventChan <- WebSocketDisconnect
|
||||
}
|
||||
|
||||
// OnMessage handles incoming WebSocket messages from the hub.
|
||||
// It decodes CBOR messages and routes them to appropriate handlers.
|
||||
func (client *WebSocketClient) OnMessage(conn *gws.Conn, message *gws.Message) {
|
||||
defer message.Close()
|
||||
conn.SetDeadline(time.Now().Add(wsDeadline))
|
||||
|
||||
if message.Opcode != gws.OpcodeBinary {
|
||||
return
|
||||
}
|
||||
|
||||
if err := cbor.NewDecoder(message.Data).Decode(client.hubRequest); err != nil {
|
||||
slog.Error("Error parsing message", "err", err)
|
||||
return
|
||||
}
|
||||
if err := client.handleHubRequest(client.hubRequest); err != nil {
|
||||
slog.Error("Error handling message", "err", err)
|
||||
}
|
||||
}
|
||||
|
||||
// OnPing handles WebSocket ping frames.
|
||||
// It responds with a pong and updates the connection deadline.
|
||||
func (client *WebSocketClient) OnPing(conn *gws.Conn, message []byte) {
|
||||
conn.SetDeadline(time.Now().Add(wsDeadline))
|
||||
conn.WritePong(message)
|
||||
}
|
||||
|
||||
// handleAuthChallenge verifies the authenticity of the hub and returns the system's fingerprint.
|
||||
func (client *WebSocketClient) handleAuthChallenge(msg *common.HubRequest[cbor.RawMessage]) (err error) {
|
||||
var authRequest common.FingerprintRequest
|
||||
if err := cbor.Unmarshal(msg.Data, &authRequest); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
if err := client.verifySignature(authRequest.Signature); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
client.hubVerified = true
|
||||
client.agent.connectionManager.eventChan <- WebSocketConnect
|
||||
|
||||
response := &common.FingerprintResponse{
|
||||
Fingerprint: client.fingerprint,
|
||||
}
|
||||
|
||||
if authRequest.NeedSysInfo {
|
||||
response.Hostname = client.agent.systemInfo.Hostname
|
||||
serverAddr := client.agent.connectionManager.serverOptions.Addr
|
||||
_, response.Port, _ = net.SplitHostPort(serverAddr)
|
||||
}
|
||||
|
||||
return client.sendMessage(response)
|
||||
}
|
||||
|
||||
// verifySignature verifies the signature of the token using the public keys.
|
||||
func (client *WebSocketClient) verifySignature(signature []byte) (err error) {
|
||||
for _, pubKey := range client.agent.keys {
|
||||
sig := ssh.Signature{
|
||||
Format: pubKey.Type(),
|
||||
Blob: signature,
|
||||
}
|
||||
if err = pubKey.Verify([]byte(client.token), &sig); err == nil {
|
||||
return nil
|
||||
}
|
||||
}
|
||||
return errors.New("invalid signature - check KEY value")
|
||||
}
|
||||
|
||||
// Close closes the WebSocket connection gracefully.
|
||||
// This method is safe to call multiple times.
|
||||
func (client *WebSocketClient) Close() {
|
||||
if client.Conn != nil {
|
||||
_ = client.Conn.WriteClose(1000, nil)
|
||||
}
|
||||
}
|
||||
|
||||
// handleHubRequest routes the request to the appropriate handler.
|
||||
// It ensures the hub is verified before processing most requests.
|
||||
func (client *WebSocketClient) handleHubRequest(msg *common.HubRequest[cbor.RawMessage]) error {
|
||||
if !client.hubVerified && msg.Action != common.CheckFingerprint {
|
||||
return errors.New("hub not verified")
|
||||
}
|
||||
switch msg.Action {
|
||||
case common.GetData:
|
||||
return client.sendSystemData()
|
||||
case common.CheckFingerprint:
|
||||
return client.handleAuthChallenge(msg)
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
// sendSystemData gathers and sends current system statistics to the hub.
|
||||
func (client *WebSocketClient) sendSystemData() error {
|
||||
sysStats := client.agent.gatherStats(client.token)
|
||||
return client.sendMessage(sysStats)
|
||||
}
|
||||
|
||||
// sendMessage encodes the given data to CBOR and sends it as a binary message over the WebSocket connection to the hub.
|
||||
func (client *WebSocketClient) sendMessage(data any) error {
|
||||
bytes, err := cbor.Marshal(data)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
return client.Conn.WriteMessage(gws.OpcodeBinary, bytes)
|
||||
}
|
||||
|
||||
// getUserAgent returns one of two User-Agent strings based on current time.
|
||||
// This is used to avoid being blocked by Cloudflare or other anti-bot measures.
|
||||
func getUserAgent() string {
|
||||
const (
|
||||
uaBase = "Mozilla/5.0 (%s) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/124.0.0.0 Safari/537.36"
|
||||
uaWindows = "Windows NT 11.0; Win64; x64"
|
||||
uaMac = "Macintosh; Intel Mac OS X 14_0_0"
|
||||
)
|
||||
if time.Now().UnixNano()%2 == 0 {
|
||||
return fmt.Sprintf(uaBase, uaWindows)
|
||||
}
|
||||
return fmt.Sprintf(uaBase, uaMac)
|
||||
}
|
540
agent/client_test.go
Normal file
540
agent/client_test.go
Normal file
@@ -0,0 +1,540 @@
|
||||
//go:build testing
|
||||
// +build testing
|
||||
|
||||
package agent
|
||||
|
||||
import (
|
||||
"crypto/ed25519"
|
||||
"net/url"
|
||||
"os"
|
||||
"strings"
|
||||
"testing"
|
||||
"time"
|
||||
|
||||
"github.com/henrygd/beszel"
|
||||
|
||||
"github.com/henrygd/beszel/src/common"
|
||||
|
||||
"github.com/fxamacker/cbor/v2"
|
||||
"github.com/stretchr/testify/assert"
|
||||
"github.com/stretchr/testify/require"
|
||||
"golang.org/x/crypto/ssh"
|
||||
)
|
||||
|
||||
// TestNewWebSocketClient tests WebSocket client creation
|
||||
func TestNewWebSocketClient(t *testing.T) {
|
||||
agent := createTestAgent(t)
|
||||
|
||||
testCases := []struct {
|
||||
name string
|
||||
hubURL string
|
||||
token string
|
||||
expectError bool
|
||||
errorMsg string
|
||||
}{
|
||||
{
|
||||
name: "valid configuration",
|
||||
hubURL: "http://localhost:8080",
|
||||
token: "test-token-123",
|
||||
expectError: false,
|
||||
},
|
||||
{
|
||||
name: "valid https URL",
|
||||
hubURL: "https://hub.example.com",
|
||||
token: "secure-token",
|
||||
expectError: false,
|
||||
},
|
||||
{
|
||||
name: "missing hub URL",
|
||||
hubURL: "",
|
||||
token: "test-token",
|
||||
expectError: true,
|
||||
errorMsg: "HUB_URL environment variable not set",
|
||||
},
|
||||
{
|
||||
name: "invalid URL",
|
||||
hubURL: "ht\ttp://invalid",
|
||||
token: "test-token",
|
||||
expectError: true,
|
||||
errorMsg: "invalid hub URL",
|
||||
},
|
||||
{
|
||||
name: "missing token",
|
||||
hubURL: "http://localhost:8080",
|
||||
token: "",
|
||||
expectError: true,
|
||||
errorMsg: "must set TOKEN or TOKEN_FILE",
|
||||
},
|
||||
}
|
||||
|
||||
for _, tc := range testCases {
|
||||
t.Run(tc.name, func(t *testing.T) {
|
||||
// Set up environment
|
||||
if tc.hubURL != "" {
|
||||
os.Setenv("BESZEL_AGENT_HUB_URL", tc.hubURL)
|
||||
} else {
|
||||
os.Unsetenv("BESZEL_AGENT_HUB_URL")
|
||||
}
|
||||
if tc.token != "" {
|
||||
os.Setenv("BESZEL_AGENT_TOKEN", tc.token)
|
||||
} else {
|
||||
os.Unsetenv("BESZEL_AGENT_TOKEN")
|
||||
}
|
||||
defer func() {
|
||||
os.Unsetenv("BESZEL_AGENT_HUB_URL")
|
||||
os.Unsetenv("BESZEL_AGENT_TOKEN")
|
||||
}()
|
||||
|
||||
client, err := newWebSocketClient(agent)
|
||||
|
||||
if tc.expectError {
|
||||
assert.Error(t, err)
|
||||
if err != nil && tc.errorMsg != "" {
|
||||
assert.Contains(t, err.Error(), tc.errorMsg)
|
||||
}
|
||||
assert.Nil(t, client)
|
||||
} else {
|
||||
require.NoError(t, err)
|
||||
assert.NotNil(t, client)
|
||||
assert.Equal(t, agent, client.agent)
|
||||
assert.Equal(t, tc.token, client.token)
|
||||
assert.Equal(t, tc.hubURL, client.hubURL.String())
|
||||
assert.NotEmpty(t, client.fingerprint)
|
||||
assert.NotNil(t, client.hubRequest)
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
// TestWebSocketClient_GetOptions tests WebSocket client options configuration
|
||||
func TestWebSocketClient_GetOptions(t *testing.T) {
|
||||
agent := createTestAgent(t)
|
||||
|
||||
testCases := []struct {
|
||||
name string
|
||||
inputURL string
|
||||
expectedScheme string
|
||||
expectedPath string
|
||||
}{
|
||||
{
|
||||
name: "http to ws conversion",
|
||||
inputURL: "http://localhost:8080",
|
||||
expectedScheme: "ws",
|
||||
expectedPath: "/api/beszel/agent-connect",
|
||||
},
|
||||
{
|
||||
name: "https to wss conversion",
|
||||
inputURL: "https://hub.example.com",
|
||||
expectedScheme: "wss",
|
||||
expectedPath: "/api/beszel/agent-connect",
|
||||
},
|
||||
{
|
||||
name: "existing path preservation",
|
||||
inputURL: "http://localhost:8080/custom/path",
|
||||
expectedScheme: "ws",
|
||||
expectedPath: "/custom/path/api/beszel/agent-connect",
|
||||
},
|
||||
}
|
||||
|
||||
for _, tc := range testCases {
|
||||
t.Run(tc.name, func(t *testing.T) {
|
||||
// Set up environment
|
||||
os.Setenv("BESZEL_AGENT_HUB_URL", tc.inputURL)
|
||||
os.Setenv("BESZEL_AGENT_TOKEN", "test-token")
|
||||
defer func() {
|
||||
os.Unsetenv("BESZEL_AGENT_HUB_URL")
|
||||
os.Unsetenv("BESZEL_AGENT_TOKEN")
|
||||
}()
|
||||
|
||||
client, err := newWebSocketClient(agent)
|
||||
require.NoError(t, err)
|
||||
|
||||
options := client.getOptions()
|
||||
|
||||
// Parse the WebSocket URL
|
||||
wsURL, err := url.Parse(options.Addr)
|
||||
require.NoError(t, err)
|
||||
|
||||
assert.Equal(t, tc.expectedScheme, wsURL.Scheme)
|
||||
assert.Equal(t, tc.expectedPath, wsURL.Path)
|
||||
|
||||
// Check headers
|
||||
assert.Equal(t, "test-token", options.RequestHeader.Get("X-Token"))
|
||||
assert.Equal(t, beszel.Version, options.RequestHeader.Get("X-Beszel"))
|
||||
assert.Contains(t, options.RequestHeader.Get("User-Agent"), "Mozilla/5.0")
|
||||
|
||||
// Test options caching
|
||||
options2 := client.getOptions()
|
||||
assert.Same(t, options, options2, "Options should be cached")
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
// TestWebSocketClient_VerifySignature tests signature verification
|
||||
func TestWebSocketClient_VerifySignature(t *testing.T) {
|
||||
agent := createTestAgent(t)
|
||||
|
||||
// Generate test key pairs
|
||||
_, goodPrivKey, err := ed25519.GenerateKey(nil)
|
||||
require.NoError(t, err)
|
||||
goodPubKey, err := ssh.NewPublicKey(goodPrivKey.Public().(ed25519.PublicKey))
|
||||
require.NoError(t, err)
|
||||
|
||||
_, badPrivKey, err := ed25519.GenerateKey(nil)
|
||||
require.NoError(t, err)
|
||||
badPubKey, err := ssh.NewPublicKey(badPrivKey.Public().(ed25519.PublicKey))
|
||||
require.NoError(t, err)
|
||||
|
||||
// Set up environment
|
||||
os.Setenv("BESZEL_AGENT_HUB_URL", "http://localhost:8080")
|
||||
os.Setenv("BESZEL_AGENT_TOKEN", "test-token")
|
||||
defer func() {
|
||||
os.Unsetenv("BESZEL_AGENT_HUB_URL")
|
||||
os.Unsetenv("BESZEL_AGENT_TOKEN")
|
||||
}()
|
||||
|
||||
client, err := newWebSocketClient(agent)
|
||||
require.NoError(t, err)
|
||||
|
||||
testCases := []struct {
|
||||
name string
|
||||
keys []ssh.PublicKey
|
||||
token string
|
||||
signWith ed25519.PrivateKey
|
||||
expectError bool
|
||||
}{
|
||||
{
|
||||
name: "valid signature with correct key",
|
||||
keys: []ssh.PublicKey{goodPubKey},
|
||||
token: "test-token",
|
||||
signWith: goodPrivKey,
|
||||
expectError: false,
|
||||
},
|
||||
{
|
||||
name: "invalid signature with wrong key",
|
||||
keys: []ssh.PublicKey{goodPubKey},
|
||||
token: "test-token",
|
||||
signWith: badPrivKey,
|
||||
expectError: true,
|
||||
},
|
||||
{
|
||||
name: "valid signature with multiple keys",
|
||||
keys: []ssh.PublicKey{badPubKey, goodPubKey},
|
||||
token: "test-token",
|
||||
signWith: goodPrivKey,
|
||||
expectError: false,
|
||||
},
|
||||
{
|
||||
name: "no valid keys",
|
||||
keys: []ssh.PublicKey{badPubKey},
|
||||
token: "test-token",
|
||||
signWith: goodPrivKey,
|
||||
expectError: true,
|
||||
},
|
||||
}
|
||||
|
||||
for _, tc := range testCases {
|
||||
t.Run(tc.name, func(t *testing.T) {
|
||||
// Set up agent with test keys
|
||||
agent.keys = tc.keys
|
||||
client.token = tc.token
|
||||
|
||||
// Create signature
|
||||
signature := ed25519.Sign(tc.signWith, []byte(tc.token))
|
||||
|
||||
err := client.verifySignature(signature)
|
||||
|
||||
if tc.expectError {
|
||||
assert.Error(t, err)
|
||||
assert.Contains(t, err.Error(), "invalid signature")
|
||||
} else {
|
||||
assert.NoError(t, err)
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
// TestWebSocketClient_HandleHubRequest tests hub request routing (basic verification logic)
|
||||
func TestWebSocketClient_HandleHubRequest(t *testing.T) {
|
||||
agent := createTestAgent(t)
|
||||
|
||||
// Set up environment
|
||||
os.Setenv("BESZEL_AGENT_HUB_URL", "http://localhost:8080")
|
||||
os.Setenv("BESZEL_AGENT_TOKEN", "test-token")
|
||||
defer func() {
|
||||
os.Unsetenv("BESZEL_AGENT_HUB_URL")
|
||||
os.Unsetenv("BESZEL_AGENT_TOKEN")
|
||||
}()
|
||||
|
||||
client, err := newWebSocketClient(agent)
|
||||
require.NoError(t, err)
|
||||
|
||||
testCases := []struct {
|
||||
name string
|
||||
action common.WebSocketAction
|
||||
hubVerified bool
|
||||
expectError bool
|
||||
errorMsg string
|
||||
}{
|
||||
{
|
||||
name: "CheckFingerprint without verification",
|
||||
action: common.CheckFingerprint,
|
||||
hubVerified: false,
|
||||
expectError: false, // CheckFingerprint is allowed without verification
|
||||
},
|
||||
{
|
||||
name: "GetData without verification",
|
||||
action: common.GetData,
|
||||
hubVerified: false,
|
||||
expectError: true,
|
||||
errorMsg: "hub not verified",
|
||||
},
|
||||
}
|
||||
|
||||
for _, tc := range testCases {
|
||||
t.Run(tc.name, func(t *testing.T) {
|
||||
client.hubVerified = tc.hubVerified
|
||||
|
||||
// Create minimal request
|
||||
hubRequest := &common.HubRequest[cbor.RawMessage]{
|
||||
Action: tc.action,
|
||||
Data: cbor.RawMessage{},
|
||||
}
|
||||
|
||||
err := client.handleHubRequest(hubRequest)
|
||||
|
||||
if tc.expectError {
|
||||
assert.Error(t, err)
|
||||
if tc.errorMsg != "" {
|
||||
assert.Contains(t, err.Error(), tc.errorMsg)
|
||||
}
|
||||
} else {
|
||||
// For CheckFingerprint, we expect a decode error since we're not providing valid data,
|
||||
// but it shouldn't be the "hub not verified" error
|
||||
if err != nil && tc.errorMsg != "" {
|
||||
assert.NotContains(t, err.Error(), tc.errorMsg)
|
||||
}
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
// TestWebSocketClient_GetUserAgent tests user agent generation
|
||||
func TestGetUserAgent(t *testing.T) {
|
||||
// Run multiple times to check both variants
|
||||
userAgents := make(map[string]bool)
|
||||
|
||||
for range 20 {
|
||||
ua := getUserAgent()
|
||||
userAgents[ua] = true
|
||||
|
||||
// Check that it's a valid Mozilla user agent
|
||||
assert.Contains(t, ua, "Mozilla/5.0")
|
||||
assert.Contains(t, ua, "AppleWebKit/537.36")
|
||||
assert.Contains(t, ua, "Chrome/124.0.0.0")
|
||||
assert.Contains(t, ua, "Safari/537.36")
|
||||
|
||||
// Should contain either Windows or Mac
|
||||
isWindows := strings.Contains(ua, "Windows NT 11.0")
|
||||
isMac := strings.Contains(ua, "Macintosh; Intel Mac OS X 14_0_0")
|
||||
assert.True(t, isWindows || isMac, "User agent should contain either Windows or Mac identifier")
|
||||
}
|
||||
|
||||
// With enough iterations, we should see both variants
|
||||
// though this might occasionally fail
|
||||
if len(userAgents) == 1 {
|
||||
t.Log("Note: Only one user agent variant was generated in this test run")
|
||||
}
|
||||
}
|
||||
|
||||
// TestWebSocketClient_Close tests connection closing
|
||||
func TestWebSocketClient_Close(t *testing.T) {
|
||||
agent := createTestAgent(t)
|
||||
|
||||
// Set up environment
|
||||
os.Setenv("BESZEL_AGENT_HUB_URL", "http://localhost:8080")
|
||||
os.Setenv("BESZEL_AGENT_TOKEN", "test-token")
|
||||
defer func() {
|
||||
os.Unsetenv("BESZEL_AGENT_HUB_URL")
|
||||
os.Unsetenv("BESZEL_AGENT_TOKEN")
|
||||
}()
|
||||
|
||||
client, err := newWebSocketClient(agent)
|
||||
require.NoError(t, err)
|
||||
|
||||
// Test closing with nil connection (should not panic)
|
||||
assert.NotPanics(t, func() {
|
||||
client.Close()
|
||||
})
|
||||
}
|
||||
|
||||
// TestWebSocketClient_ConnectRateLimit tests connection rate limiting
|
||||
func TestWebSocketClient_ConnectRateLimit(t *testing.T) {
|
||||
agent := createTestAgent(t)
|
||||
|
||||
// Set up environment
|
||||
os.Setenv("BESZEL_AGENT_HUB_URL", "http://localhost:8080")
|
||||
os.Setenv("BESZEL_AGENT_TOKEN", "test-token")
|
||||
defer func() {
|
||||
os.Unsetenv("BESZEL_AGENT_HUB_URL")
|
||||
os.Unsetenv("BESZEL_AGENT_TOKEN")
|
||||
}()
|
||||
|
||||
client, err := newWebSocketClient(agent)
|
||||
require.NoError(t, err)
|
||||
|
||||
// Set recent connection attempt
|
||||
client.lastConnectAttempt = time.Now()
|
||||
|
||||
// Test that connection fails quickly due to rate limiting
|
||||
// This won't actually connect but should fail fast
|
||||
err = client.Connect()
|
||||
assert.Error(t, err, "Connection should fail but not hang")
|
||||
}
|
||||
|
||||
// TestGetToken tests the getToken function with various scenarios
|
||||
func TestGetToken(t *testing.T) {
|
||||
unsetEnvVars := func() {
|
||||
os.Unsetenv("BESZEL_AGENT_TOKEN")
|
||||
os.Unsetenv("TOKEN")
|
||||
os.Unsetenv("BESZEL_AGENT_TOKEN_FILE")
|
||||
os.Unsetenv("TOKEN_FILE")
|
||||
}
|
||||
|
||||
t.Run("token from TOKEN environment variable", func(t *testing.T) {
|
||||
unsetEnvVars()
|
||||
|
||||
// Set TOKEN env var
|
||||
expectedToken := "test-token-from-env"
|
||||
os.Setenv("TOKEN", expectedToken)
|
||||
defer os.Unsetenv("TOKEN")
|
||||
|
||||
token, err := getToken()
|
||||
assert.NoError(t, err)
|
||||
assert.Equal(t, expectedToken, token)
|
||||
})
|
||||
|
||||
t.Run("token from BESZEL_AGENT_TOKEN environment variable", func(t *testing.T) {
|
||||
unsetEnvVars()
|
||||
|
||||
// Set BESZEL_AGENT_TOKEN env var (should take precedence)
|
||||
expectedToken := "test-token-from-beszel-env"
|
||||
os.Setenv("BESZEL_AGENT_TOKEN", expectedToken)
|
||||
defer os.Unsetenv("BESZEL_AGENT_TOKEN")
|
||||
|
||||
token, err := getToken()
|
||||
assert.NoError(t, err)
|
||||
assert.Equal(t, expectedToken, token)
|
||||
})
|
||||
|
||||
t.Run("token from TOKEN_FILE", func(t *testing.T) {
|
||||
unsetEnvVars()
|
||||
|
||||
// Create a temporary token file
|
||||
expectedToken := "test-token-from-file"
|
||||
tokenFile, err := os.CreateTemp("", "token-test-*.txt")
|
||||
require.NoError(t, err)
|
||||
defer os.Remove(tokenFile.Name())
|
||||
|
||||
_, err = tokenFile.WriteString(expectedToken)
|
||||
require.NoError(t, err)
|
||||
tokenFile.Close()
|
||||
|
||||
// Set TOKEN_FILE env var
|
||||
os.Setenv("TOKEN_FILE", tokenFile.Name())
|
||||
defer os.Unsetenv("TOKEN_FILE")
|
||||
|
||||
token, err := getToken()
|
||||
assert.NoError(t, err)
|
||||
assert.Equal(t, expectedToken, token)
|
||||
})
|
||||
|
||||
t.Run("token from BESZEL_AGENT_TOKEN_FILE", func(t *testing.T) {
|
||||
unsetEnvVars()
|
||||
|
||||
// Create a temporary token file
|
||||
expectedToken := "test-token-from-beszel-file"
|
||||
tokenFile, err := os.CreateTemp("", "token-test-*.txt")
|
||||
require.NoError(t, err)
|
||||
defer os.Remove(tokenFile.Name())
|
||||
|
||||
_, err = tokenFile.WriteString(expectedToken)
|
||||
require.NoError(t, err)
|
||||
tokenFile.Close()
|
||||
|
||||
// Set BESZEL_AGENT_TOKEN_FILE env var (should take precedence)
|
||||
os.Setenv("BESZEL_AGENT_TOKEN_FILE", tokenFile.Name())
|
||||
defer os.Unsetenv("BESZEL_AGENT_TOKEN_FILE")
|
||||
|
||||
token, err := getToken()
|
||||
assert.NoError(t, err)
|
||||
assert.Equal(t, expectedToken, token)
|
||||
})
|
||||
|
||||
t.Run("TOKEN takes precedence over TOKEN_FILE", func(t *testing.T) {
|
||||
unsetEnvVars()
|
||||
|
||||
// Create a temporary token file
|
||||
fileToken := "token-from-file"
|
||||
tokenFile, err := os.CreateTemp("", "token-test-*.txt")
|
||||
require.NoError(t, err)
|
||||
defer os.Remove(tokenFile.Name())
|
||||
|
||||
_, err = tokenFile.WriteString(fileToken)
|
||||
require.NoError(t, err)
|
||||
tokenFile.Close()
|
||||
|
||||
// Set both TOKEN and TOKEN_FILE
|
||||
envToken := "token-from-env"
|
||||
os.Setenv("TOKEN", envToken)
|
||||
os.Setenv("TOKEN_FILE", tokenFile.Name())
|
||||
defer func() {
|
||||
os.Unsetenv("TOKEN")
|
||||
os.Unsetenv("TOKEN_FILE")
|
||||
}()
|
||||
|
||||
token, err := getToken()
|
||||
assert.NoError(t, err)
|
||||
assert.Equal(t, envToken, token, "TOKEN should take precedence over TOKEN_FILE")
|
||||
})
|
||||
|
||||
t.Run("error when neither TOKEN nor TOKEN_FILE is set", func(t *testing.T) {
|
||||
unsetEnvVars()
|
||||
|
||||
token, err := getToken()
|
||||
assert.Error(t, err)
|
||||
assert.Equal(t, "", token)
|
||||
assert.Contains(t, err.Error(), "must set TOKEN or TOKEN_FILE")
|
||||
})
|
||||
|
||||
t.Run("error when TOKEN_FILE points to non-existent file", func(t *testing.T) {
|
||||
unsetEnvVars()
|
||||
|
||||
// Set TOKEN_FILE to a non-existent file
|
||||
os.Setenv("TOKEN_FILE", "/non/existent/file.txt")
|
||||
defer os.Unsetenv("TOKEN_FILE")
|
||||
|
||||
token, err := getToken()
|
||||
assert.Error(t, err)
|
||||
assert.Equal(t, "", token)
|
||||
assert.Contains(t, err.Error(), "no such file or directory")
|
||||
})
|
||||
|
||||
t.Run("handles empty token file", func(t *testing.T) {
|
||||
unsetEnvVars()
|
||||
|
||||
// Create an empty token file
|
||||
tokenFile, err := os.CreateTemp("", "token-test-*.txt")
|
||||
require.NoError(t, err)
|
||||
defer os.Remove(tokenFile.Name())
|
||||
tokenFile.Close()
|
||||
|
||||
// Set TOKEN_FILE env var
|
||||
os.Setenv("TOKEN_FILE", tokenFile.Name())
|
||||
defer os.Unsetenv("TOKEN_FILE")
|
||||
|
||||
token, err := getToken()
|
||||
assert.NoError(t, err)
|
||||
assert.Equal(t, "", token, "Empty file should return empty string")
|
||||
})
|
||||
}
|
221
agent/connection_manager.go
Normal file
221
agent/connection_manager.go
Normal file
@@ -0,0 +1,221 @@
|
||||
package agent
|
||||
|
||||
import (
|
||||
"errors"
|
||||
"log/slog"
|
||||
"os"
|
||||
"os/signal"
|
||||
"syscall"
|
||||
"time"
|
||||
|
||||
"github.com/henrygd/beszel/agent/health"
|
||||
)
|
||||
|
||||
// ConnectionManager manages the connection state and events for the agent.
|
||||
// It handles both WebSocket and SSH connections, automatically switching between
|
||||
// them based on availability and managing reconnection attempts.
|
||||
type ConnectionManager struct {
|
||||
agent *Agent // Reference to the parent agent
|
||||
State ConnectionState // Current connection state
|
||||
eventChan chan ConnectionEvent // Channel for connection events
|
||||
wsClient *WebSocketClient // WebSocket client for hub communication
|
||||
serverOptions ServerOptions // Configuration for SSH server
|
||||
wsTicker *time.Ticker // Ticker for WebSocket connection attempts
|
||||
isConnecting bool // Prevents multiple simultaneous reconnection attempts
|
||||
}
|
||||
|
||||
// ConnectionState represents the current connection state of the agent.
|
||||
type ConnectionState uint8
|
||||
|
||||
// ConnectionEvent represents connection-related events that can occur.
|
||||
type ConnectionEvent uint8
|
||||
|
||||
// Connection states
|
||||
const (
|
||||
Disconnected ConnectionState = iota // No active connection
|
||||
WebSocketConnected // Connected via WebSocket
|
||||
SSHConnected // Connected via SSH
|
||||
)
|
||||
|
||||
// Connection events
|
||||
const (
|
||||
WebSocketConnect ConnectionEvent = iota // WebSocket connection established
|
||||
WebSocketDisconnect // WebSocket connection lost
|
||||
SSHConnect // SSH connection established
|
||||
SSHDisconnect // SSH connection lost
|
||||
)
|
||||
|
||||
const wsTickerInterval = 10 * time.Second
|
||||
|
||||
// newConnectionManager creates a new connection manager for the given agent.
|
||||
func newConnectionManager(agent *Agent) *ConnectionManager {
|
||||
cm := &ConnectionManager{
|
||||
agent: agent,
|
||||
State: Disconnected,
|
||||
}
|
||||
return cm
|
||||
}
|
||||
|
||||
// startWsTicker starts or resets the WebSocket connection attempt ticker.
|
||||
func (c *ConnectionManager) startWsTicker() {
|
||||
if c.wsTicker == nil {
|
||||
c.wsTicker = time.NewTicker(wsTickerInterval)
|
||||
} else {
|
||||
c.wsTicker.Reset(wsTickerInterval)
|
||||
}
|
||||
}
|
||||
|
||||
// stopWsTicker stops the WebSocket connection attempt ticker.
|
||||
func (c *ConnectionManager) stopWsTicker() {
|
||||
if c.wsTicker != nil {
|
||||
c.wsTicker.Stop()
|
||||
}
|
||||
}
|
||||
|
||||
// Start begins connection attempts and enters the main event loop.
|
||||
// It handles connection events, periodic health updates, and graceful shutdown.
|
||||
func (c *ConnectionManager) Start(serverOptions ServerOptions) error {
|
||||
if c.eventChan != nil {
|
||||
return errors.New("already started")
|
||||
}
|
||||
|
||||
wsClient, err := newWebSocketClient(c.agent)
|
||||
if err != nil {
|
||||
slog.Warn("Error creating WebSocket client", "err", err)
|
||||
}
|
||||
c.wsClient = wsClient
|
||||
|
||||
c.serverOptions = serverOptions
|
||||
c.eventChan = make(chan ConnectionEvent, 1)
|
||||
|
||||
// signal handling for shutdown
|
||||
sigChan := make(chan os.Signal, 1)
|
||||
signal.Notify(sigChan, syscall.SIGINT, syscall.SIGTERM)
|
||||
|
||||
c.startWsTicker()
|
||||
c.connect()
|
||||
|
||||
// update health status immediately and every 90 seconds
|
||||
_ = health.Update()
|
||||
healthTicker := time.Tick(90 * time.Second)
|
||||
|
||||
for {
|
||||
select {
|
||||
case connectionEvent := <-c.eventChan:
|
||||
c.handleEvent(connectionEvent)
|
||||
case <-c.wsTicker.C:
|
||||
_ = c.startWebSocketConnection()
|
||||
case <-healthTicker:
|
||||
_ = health.Update()
|
||||
case <-sigChan:
|
||||
slog.Info("Shutting down")
|
||||
_ = c.agent.StopServer()
|
||||
c.closeWebSocket()
|
||||
return health.CleanUp()
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// handleEvent processes connection events and updates the connection state accordingly.
|
||||
func (c *ConnectionManager) handleEvent(event ConnectionEvent) {
|
||||
switch event {
|
||||
case WebSocketConnect:
|
||||
c.handleStateChange(WebSocketConnected)
|
||||
case SSHConnect:
|
||||
c.handleStateChange(SSHConnected)
|
||||
case WebSocketDisconnect:
|
||||
if c.State == WebSocketConnected {
|
||||
c.handleStateChange(Disconnected)
|
||||
}
|
||||
case SSHDisconnect:
|
||||
if c.State == SSHConnected {
|
||||
c.handleStateChange(Disconnected)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// handleStateChange updates the connection state and performs necessary actions
|
||||
// based on the new state, including stopping services and initiating reconnections.
|
||||
func (c *ConnectionManager) handleStateChange(newState ConnectionState) {
|
||||
if c.State == newState {
|
||||
return
|
||||
}
|
||||
c.State = newState
|
||||
switch newState {
|
||||
case WebSocketConnected:
|
||||
slog.Info("WebSocket connected", "host", c.wsClient.hubURL.Host)
|
||||
c.stopWsTicker()
|
||||
_ = c.agent.StopServer()
|
||||
c.isConnecting = false
|
||||
case SSHConnected:
|
||||
// stop new ws connection attempts
|
||||
slog.Info("SSH connection established")
|
||||
c.stopWsTicker()
|
||||
c.isConnecting = false
|
||||
case Disconnected:
|
||||
if c.isConnecting {
|
||||
// Already handling reconnection, avoid duplicate attempts
|
||||
return
|
||||
}
|
||||
c.isConnecting = true
|
||||
slog.Warn("Disconnected from hub")
|
||||
// make sure old ws connection is closed
|
||||
c.closeWebSocket()
|
||||
// reconnect
|
||||
go c.connect()
|
||||
}
|
||||
}
|
||||
|
||||
// connect handles the connection logic with proper delays and priority.
|
||||
// It attempts WebSocket connection first, falling back to SSH server if needed.
|
||||
func (c *ConnectionManager) connect() {
|
||||
c.isConnecting = true
|
||||
defer func() {
|
||||
c.isConnecting = false
|
||||
}()
|
||||
|
||||
if c.wsClient != nil && time.Since(c.wsClient.lastConnectAttempt) < 5*time.Second {
|
||||
time.Sleep(5 * time.Second)
|
||||
}
|
||||
|
||||
// Try WebSocket first, if it fails, start SSH server
|
||||
err := c.startWebSocketConnection()
|
||||
if err != nil && c.State == Disconnected {
|
||||
c.startSSHServer()
|
||||
c.startWsTicker()
|
||||
}
|
||||
}
|
||||
|
||||
// startWebSocketConnection attempts to establish a WebSocket connection to the hub.
|
||||
func (c *ConnectionManager) startWebSocketConnection() error {
|
||||
if c.State != Disconnected {
|
||||
return errors.New("already connected")
|
||||
}
|
||||
if c.wsClient == nil {
|
||||
return errors.New("WebSocket client not initialized")
|
||||
}
|
||||
if time.Since(c.wsClient.lastConnectAttempt) < 5*time.Second {
|
||||
return errors.New("already connecting")
|
||||
}
|
||||
|
||||
err := c.wsClient.Connect()
|
||||
if err != nil {
|
||||
slog.Warn("WebSocket connection failed", "err", err)
|
||||
c.closeWebSocket()
|
||||
}
|
||||
return err
|
||||
}
|
||||
|
||||
// startSSHServer starts the SSH server if the agent is currently disconnected.
|
||||
func (c *ConnectionManager) startSSHServer() {
|
||||
if c.State == Disconnected {
|
||||
go c.agent.StartServer(c.serverOptions)
|
||||
}
|
||||
}
|
||||
|
||||
// closeWebSocket closes the WebSocket connection if it exists.
|
||||
func (c *ConnectionManager) closeWebSocket() {
|
||||
if c.wsClient != nil {
|
||||
c.wsClient.Close()
|
||||
}
|
||||
}
|
315
agent/connection_manager_test.go
Normal file
315
agent/connection_manager_test.go
Normal file
@@ -0,0 +1,315 @@
|
||||
//go:build testing
|
||||
// +build testing
|
||||
|
||||
package agent
|
||||
|
||||
import (
|
||||
"crypto/ed25519"
|
||||
"fmt"
|
||||
"net"
|
||||
"net/url"
|
||||
"os"
|
||||
"testing"
|
||||
"time"
|
||||
|
||||
"github.com/stretchr/testify/assert"
|
||||
"github.com/stretchr/testify/require"
|
||||
"golang.org/x/crypto/ssh"
|
||||
)
|
||||
|
||||
func createTestAgent(t *testing.T) *Agent {
|
||||
dataDir := t.TempDir()
|
||||
agent, err := NewAgent(dataDir)
|
||||
require.NoError(t, err)
|
||||
return agent
|
||||
}
|
||||
|
||||
func createTestServerOptions(t *testing.T) ServerOptions {
|
||||
// Generate test key pair
|
||||
_, privKey, err := ed25519.GenerateKey(nil)
|
||||
require.NoError(t, err)
|
||||
sshPubKey, err := ssh.NewPublicKey(privKey.Public().(ed25519.PublicKey))
|
||||
require.NoError(t, err)
|
||||
|
||||
// Find available port
|
||||
listener, err := net.Listen("tcp", "127.0.0.1:0")
|
||||
require.NoError(t, err)
|
||||
port := listener.Addr().(*net.TCPAddr).Port
|
||||
listener.Close()
|
||||
|
||||
return ServerOptions{
|
||||
Network: "tcp",
|
||||
Addr: fmt.Sprintf("127.0.0.1:%d", port),
|
||||
Keys: []ssh.PublicKey{sshPubKey},
|
||||
}
|
||||
}
|
||||
|
||||
// TestConnectionManager_NewConnectionManager tests connection manager creation
|
||||
func TestConnectionManager_NewConnectionManager(t *testing.T) {
|
||||
agent := createTestAgent(t)
|
||||
cm := newConnectionManager(agent)
|
||||
|
||||
assert.NotNil(t, cm, "Connection manager should not be nil")
|
||||
assert.Equal(t, agent, cm.agent, "Agent reference should be set")
|
||||
assert.Equal(t, Disconnected, cm.State, "Initial state should be Disconnected")
|
||||
assert.Nil(t, cm.eventChan, "Event channel should be nil initially")
|
||||
assert.Nil(t, cm.wsClient, "WebSocket client should be nil initially")
|
||||
assert.Nil(t, cm.wsTicker, "WebSocket ticker should be nil initially")
|
||||
assert.False(t, cm.isConnecting, "isConnecting should be false initially")
|
||||
}
|
||||
|
||||
// TestConnectionManager_StateTransitions tests basic state transitions
|
||||
func TestConnectionManager_StateTransitions(t *testing.T) {
|
||||
agent := createTestAgent(t)
|
||||
cm := agent.connectionManager
|
||||
initialState := cm.State
|
||||
cm.wsClient = &WebSocketClient{
|
||||
hubURL: &url.URL{
|
||||
Host: "localhost:8080",
|
||||
},
|
||||
}
|
||||
assert.NotNil(t, cm, "Connection manager should not be nil")
|
||||
assert.Equal(t, Disconnected, initialState, "Initial state should be Disconnected")
|
||||
|
||||
// Test state transitions
|
||||
cm.handleStateChange(WebSocketConnected)
|
||||
assert.Equal(t, WebSocketConnected, cm.State, "State should change to WebSocketConnected")
|
||||
|
||||
cm.handleStateChange(SSHConnected)
|
||||
assert.Equal(t, SSHConnected, cm.State, "State should change to SSHConnected")
|
||||
|
||||
cm.handleStateChange(Disconnected)
|
||||
assert.Equal(t, Disconnected, cm.State, "State should change to Disconnected")
|
||||
|
||||
// Test that same state doesn't trigger changes
|
||||
cm.State = WebSocketConnected
|
||||
cm.handleStateChange(WebSocketConnected)
|
||||
assert.Equal(t, WebSocketConnected, cm.State, "Same state should not trigger change")
|
||||
}
|
||||
|
||||
// TestConnectionManager_EventHandling tests event handling logic
|
||||
func TestConnectionManager_EventHandling(t *testing.T) {
|
||||
agent := createTestAgent(t)
|
||||
cm := agent.connectionManager
|
||||
cm.wsClient = &WebSocketClient{
|
||||
hubURL: &url.URL{
|
||||
Host: "localhost:8080",
|
||||
},
|
||||
}
|
||||
|
||||
testCases := []struct {
|
||||
name string
|
||||
initialState ConnectionState
|
||||
event ConnectionEvent
|
||||
expectedState ConnectionState
|
||||
}{
|
||||
{
|
||||
name: "WebSocket connect from disconnected",
|
||||
initialState: Disconnected,
|
||||
event: WebSocketConnect,
|
||||
expectedState: WebSocketConnected,
|
||||
},
|
||||
{
|
||||
name: "SSH connect from disconnected",
|
||||
initialState: Disconnected,
|
||||
event: SSHConnect,
|
||||
expectedState: SSHConnected,
|
||||
},
|
||||
{
|
||||
name: "WebSocket disconnect from connected",
|
||||
initialState: WebSocketConnected,
|
||||
event: WebSocketDisconnect,
|
||||
expectedState: Disconnected,
|
||||
},
|
||||
{
|
||||
name: "SSH disconnect from connected",
|
||||
initialState: SSHConnected,
|
||||
event: SSHDisconnect,
|
||||
expectedState: Disconnected,
|
||||
},
|
||||
{
|
||||
name: "WebSocket disconnect from SSH connected (no change)",
|
||||
initialState: SSHConnected,
|
||||
event: WebSocketDisconnect,
|
||||
expectedState: SSHConnected,
|
||||
},
|
||||
{
|
||||
name: "SSH disconnect from WebSocket connected (no change)",
|
||||
initialState: WebSocketConnected,
|
||||
event: SSHDisconnect,
|
||||
expectedState: WebSocketConnected,
|
||||
},
|
||||
}
|
||||
|
||||
for _, tc := range testCases {
|
||||
t.Run(tc.name, func(t *testing.T) {
|
||||
cm.State = tc.initialState
|
||||
cm.handleEvent(tc.event)
|
||||
assert.Equal(t, tc.expectedState, cm.State, "State should match expected after event")
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
// TestConnectionManager_TickerManagement tests WebSocket ticker management
|
||||
func TestConnectionManager_TickerManagement(t *testing.T) {
|
||||
agent := createTestAgent(t)
|
||||
cm := agent.connectionManager
|
||||
|
||||
// Test starting ticker
|
||||
cm.startWsTicker()
|
||||
assert.NotNil(t, cm.wsTicker, "Ticker should be created")
|
||||
|
||||
// Test stopping ticker (should not panic)
|
||||
assert.NotPanics(t, func() {
|
||||
cm.stopWsTicker()
|
||||
}, "Stopping ticker should not panic")
|
||||
|
||||
// Test stopping nil ticker (should not panic)
|
||||
cm.wsTicker = nil
|
||||
assert.NotPanics(t, func() {
|
||||
cm.stopWsTicker()
|
||||
}, "Stopping nil ticker should not panic")
|
||||
|
||||
// Test restarting ticker
|
||||
cm.startWsTicker()
|
||||
assert.NotNil(t, cm.wsTicker, "Ticker should be recreated")
|
||||
|
||||
// Test resetting existing ticker
|
||||
firstTicker := cm.wsTicker
|
||||
cm.startWsTicker()
|
||||
assert.Equal(t, firstTicker, cm.wsTicker, "Same ticker instance should be reused")
|
||||
|
||||
cm.stopWsTicker()
|
||||
}
|
||||
|
||||
// TestConnectionManager_WebSocketConnectionFlow tests WebSocket connection logic
|
||||
func TestConnectionManager_WebSocketConnectionFlow(t *testing.T) {
|
||||
if testing.Short() {
|
||||
t.Skip("Skipping WebSocket connection test in short mode")
|
||||
}
|
||||
|
||||
agent := createTestAgent(t)
|
||||
cm := agent.connectionManager
|
||||
|
||||
// Test WebSocket connection without proper environment
|
||||
err := cm.startWebSocketConnection()
|
||||
assert.Error(t, err, "WebSocket connection should fail without proper environment")
|
||||
assert.Equal(t, Disconnected, cm.State, "State should remain Disconnected after failed connection")
|
||||
|
||||
// Test with invalid URL
|
||||
os.Setenv("BESZEL_AGENT_HUB_URL", "invalid-url")
|
||||
os.Setenv("BESZEL_AGENT_TOKEN", "test-token")
|
||||
defer func() {
|
||||
os.Unsetenv("BESZEL_AGENT_HUB_URL")
|
||||
os.Unsetenv("BESZEL_AGENT_TOKEN")
|
||||
}()
|
||||
|
||||
// Test with missing token
|
||||
os.Setenv("BESZEL_AGENT_HUB_URL", "http://localhost:8080")
|
||||
os.Unsetenv("BESZEL_AGENT_TOKEN")
|
||||
|
||||
_, err2 := newWebSocketClient(agent)
|
||||
assert.Error(t, err2, "WebSocket client creation should fail without token")
|
||||
}
|
||||
|
||||
// TestConnectionManager_ReconnectionLogic tests reconnection prevention logic
|
||||
func TestConnectionManager_ReconnectionLogic(t *testing.T) {
|
||||
agent := createTestAgent(t)
|
||||
cm := agent.connectionManager
|
||||
cm.eventChan = make(chan ConnectionEvent, 1)
|
||||
|
||||
// Test that isConnecting flag prevents duplicate reconnection attempts
|
||||
// Start from connected state, then simulate disconnect
|
||||
cm.State = WebSocketConnected
|
||||
cm.isConnecting = false
|
||||
|
||||
// First disconnect should trigger reconnection logic
|
||||
cm.handleStateChange(Disconnected)
|
||||
assert.Equal(t, Disconnected, cm.State, "Should change to disconnected")
|
||||
assert.True(t, cm.isConnecting, "Should set isConnecting flag")
|
||||
}
|
||||
|
||||
// TestConnectionManager_ConnectWithRateLimit tests connection rate limiting
|
||||
func TestConnectionManager_ConnectWithRateLimit(t *testing.T) {
|
||||
agent := createTestAgent(t)
|
||||
cm := agent.connectionManager
|
||||
|
||||
// Set up environment for WebSocket client creation
|
||||
os.Setenv("BESZEL_AGENT_HUB_URL", "ws://localhost:8080")
|
||||
os.Setenv("BESZEL_AGENT_TOKEN", "test-token")
|
||||
defer func() {
|
||||
os.Unsetenv("BESZEL_AGENT_HUB_URL")
|
||||
os.Unsetenv("BESZEL_AGENT_TOKEN")
|
||||
}()
|
||||
|
||||
// Create WebSocket client
|
||||
wsClient, err := newWebSocketClient(agent)
|
||||
require.NoError(t, err)
|
||||
cm.wsClient = wsClient
|
||||
|
||||
// Set recent connection attempt
|
||||
cm.wsClient.lastConnectAttempt = time.Now()
|
||||
|
||||
// Test that connection is rate limited
|
||||
err = cm.startWebSocketConnection()
|
||||
assert.Error(t, err, "Should error due to rate limiting")
|
||||
assert.Contains(t, err.Error(), "already connecting", "Error should indicate rate limiting")
|
||||
|
||||
// Test connection after rate limit expires
|
||||
cm.wsClient.lastConnectAttempt = time.Now().Add(-10 * time.Second)
|
||||
err = cm.startWebSocketConnection()
|
||||
// This will fail due to no actual server, but should not be rate limited
|
||||
assert.Error(t, err, "Connection should fail but not due to rate limiting")
|
||||
assert.NotContains(t, err.Error(), "already connecting", "Error should not indicate rate limiting")
|
||||
}
|
||||
|
||||
// TestConnectionManager_StartWithInvalidConfig tests starting with invalid configuration
|
||||
func TestConnectionManager_StartWithInvalidConfig(t *testing.T) {
|
||||
agent := createTestAgent(t)
|
||||
cm := agent.connectionManager
|
||||
serverOptions := createTestServerOptions(t)
|
||||
|
||||
// Test starting when already started
|
||||
cm.eventChan = make(chan ConnectionEvent, 5)
|
||||
err := cm.Start(serverOptions)
|
||||
assert.Error(t, err, "Should error when starting already started connection manager")
|
||||
}
|
||||
|
||||
// TestConnectionManager_CloseWebSocket tests WebSocket closing
|
||||
func TestConnectionManager_CloseWebSocket(t *testing.T) {
|
||||
agent := createTestAgent(t)
|
||||
cm := agent.connectionManager
|
||||
|
||||
// Test closing when no WebSocket client exists
|
||||
assert.NotPanics(t, func() {
|
||||
cm.closeWebSocket()
|
||||
}, "Should not panic when closing nil WebSocket client")
|
||||
|
||||
// Set up environment and create WebSocket client
|
||||
os.Setenv("BESZEL_AGENT_HUB_URL", "ws://localhost:8080")
|
||||
os.Setenv("BESZEL_AGENT_TOKEN", "test-token")
|
||||
defer func() {
|
||||
os.Unsetenv("BESZEL_AGENT_HUB_URL")
|
||||
os.Unsetenv("BESZEL_AGENT_TOKEN")
|
||||
}()
|
||||
|
||||
wsClient, err := newWebSocketClient(agent)
|
||||
require.NoError(t, err)
|
||||
cm.wsClient = wsClient
|
||||
|
||||
// Test closing when WebSocket client exists
|
||||
assert.NotPanics(t, func() {
|
||||
cm.closeWebSocket()
|
||||
}, "Should not panic when closing WebSocket client")
|
||||
}
|
||||
|
||||
// TestConnectionManager_ConnectFlow tests the connect method
|
||||
func TestConnectionManager_ConnectFlow(t *testing.T) {
|
||||
agent := createTestAgent(t)
|
||||
cm := agent.connectionManager
|
||||
|
||||
// Test connect without WebSocket client
|
||||
assert.NotPanics(t, func() {
|
||||
cm.connect()
|
||||
}, "Connect should not panic without WebSocket client")
|
||||
}
|
117
agent/data_dir.go
Normal file
117
agent/data_dir.go
Normal file
@@ -0,0 +1,117 @@
|
||||
package agent
|
||||
|
||||
import (
|
||||
"errors"
|
||||
"fmt"
|
||||
"os"
|
||||
"path/filepath"
|
||||
"runtime"
|
||||
)
|
||||
|
||||
// getDataDir returns the path to the data directory for the agent and an error
|
||||
// if the directory is not valid. Attempts to find the optimal data directory if
|
||||
// no data directories are provided.
|
||||
func getDataDir(dataDirs ...string) (string, error) {
|
||||
if len(dataDirs) > 0 {
|
||||
return testDataDirs(dataDirs)
|
||||
}
|
||||
|
||||
dataDir, _ := GetEnv("DATA_DIR")
|
||||
if dataDir != "" {
|
||||
dataDirs = append(dataDirs, dataDir)
|
||||
}
|
||||
|
||||
if runtime.GOOS == "windows" {
|
||||
dataDirs = append(dataDirs,
|
||||
filepath.Join(os.Getenv("APPDATA"), "beszel-agent"),
|
||||
filepath.Join(os.Getenv("LOCALAPPDATA"), "beszel-agent"),
|
||||
)
|
||||
} else {
|
||||
dataDirs = append(dataDirs, "/var/lib/beszel-agent")
|
||||
if homeDir, err := os.UserHomeDir(); err == nil {
|
||||
dataDirs = append(dataDirs, filepath.Join(homeDir, ".config", "beszel"))
|
||||
}
|
||||
}
|
||||
return testDataDirs(dataDirs)
|
||||
}
|
||||
|
||||
func testDataDirs(paths []string) (string, error) {
|
||||
// first check if the directory exists and is writable
|
||||
for _, path := range paths {
|
||||
if valid, _ := isValidDataDir(path, false); valid {
|
||||
return path, nil
|
||||
}
|
||||
}
|
||||
// if the directory doesn't exist, try to create it
|
||||
for _, path := range paths {
|
||||
exists, _ := directoryExists(path)
|
||||
if exists {
|
||||
continue
|
||||
}
|
||||
|
||||
if err := os.MkdirAll(path, 0755); err != nil {
|
||||
continue
|
||||
}
|
||||
|
||||
// Verify the created directory is actually writable
|
||||
writable, _ := directoryIsWritable(path)
|
||||
if !writable {
|
||||
continue
|
||||
}
|
||||
|
||||
return path, nil
|
||||
}
|
||||
|
||||
return "", errors.New("data directory not found")
|
||||
}
|
||||
|
||||
func isValidDataDir(path string, createIfNotExists bool) (bool, error) {
|
||||
exists, err := directoryExists(path)
|
||||
if err != nil {
|
||||
return false, err
|
||||
}
|
||||
|
||||
if !exists {
|
||||
if !createIfNotExists {
|
||||
return false, nil
|
||||
}
|
||||
if err = os.MkdirAll(path, 0755); err != nil {
|
||||
return false, err
|
||||
}
|
||||
}
|
||||
|
||||
// Always check if the directory is writable
|
||||
writable, err := directoryIsWritable(path)
|
||||
if err != nil {
|
||||
return false, err
|
||||
}
|
||||
return writable, nil
|
||||
}
|
||||
|
||||
// directoryExists checks if a directory exists
|
||||
func directoryExists(path string) (bool, error) {
|
||||
// Check if directory exists
|
||||
stat, err := os.Stat(path)
|
||||
if err != nil {
|
||||
if os.IsNotExist(err) {
|
||||
return false, nil
|
||||
}
|
||||
return false, err
|
||||
}
|
||||
if !stat.IsDir() {
|
||||
return false, fmt.Errorf("%s is not a directory", path)
|
||||
}
|
||||
return true, nil
|
||||
}
|
||||
|
||||
// directoryIsWritable tests if a directory is writable by creating and removing a temporary file
|
||||
func directoryIsWritable(path string) (bool, error) {
|
||||
testFile := filepath.Join(path, ".write-test")
|
||||
file, err := os.Create(testFile)
|
||||
if err != nil {
|
||||
return false, err
|
||||
}
|
||||
defer file.Close()
|
||||
defer os.Remove(testFile)
|
||||
return true, nil
|
||||
}
|
263
agent/data_dir_test.go
Normal file
263
agent/data_dir_test.go
Normal file
@@ -0,0 +1,263 @@
|
||||
//go:build testing
|
||||
// +build testing
|
||||
|
||||
package agent
|
||||
|
||||
import (
|
||||
"os"
|
||||
"path/filepath"
|
||||
"runtime"
|
||||
"testing"
|
||||
|
||||
"github.com/stretchr/testify/assert"
|
||||
"github.com/stretchr/testify/require"
|
||||
)
|
||||
|
||||
func TestGetDataDir(t *testing.T) {
|
||||
// Test with explicit dataDir parameter
|
||||
t.Run("explicit data dir", func(t *testing.T) {
|
||||
tempDir := t.TempDir()
|
||||
result, err := getDataDir(tempDir)
|
||||
require.NoError(t, err)
|
||||
assert.Equal(t, tempDir, result)
|
||||
})
|
||||
|
||||
// Test with explicit non-existent dataDir that can be created
|
||||
t.Run("explicit data dir - create new", func(t *testing.T) {
|
||||
tempDir := t.TempDir()
|
||||
newDir := filepath.Join(tempDir, "new-data-dir")
|
||||
result, err := getDataDir(newDir)
|
||||
require.NoError(t, err)
|
||||
assert.Equal(t, newDir, result)
|
||||
|
||||
// Verify directory was created
|
||||
stat, err := os.Stat(newDir)
|
||||
require.NoError(t, err)
|
||||
assert.True(t, stat.IsDir())
|
||||
})
|
||||
|
||||
// Test with DATA_DIR environment variable
|
||||
t.Run("DATA_DIR environment variable", func(t *testing.T) {
|
||||
tempDir := t.TempDir()
|
||||
|
||||
// Set environment variable
|
||||
oldValue := os.Getenv("DATA_DIR")
|
||||
defer func() {
|
||||
if oldValue == "" {
|
||||
os.Unsetenv("BESZEL_AGENT_DATA_DIR")
|
||||
} else {
|
||||
os.Setenv("BESZEL_AGENT_DATA_DIR", oldValue)
|
||||
}
|
||||
}()
|
||||
|
||||
os.Setenv("BESZEL_AGENT_DATA_DIR", tempDir)
|
||||
|
||||
result, err := getDataDir()
|
||||
require.NoError(t, err)
|
||||
assert.Equal(t, tempDir, result)
|
||||
})
|
||||
|
||||
// Test with invalid explicit dataDir
|
||||
t.Run("invalid explicit data dir", func(t *testing.T) {
|
||||
invalidPath := "/invalid/path/that/cannot/be/created"
|
||||
_, err := getDataDir(invalidPath)
|
||||
assert.Error(t, err)
|
||||
})
|
||||
|
||||
// Test fallback behavior (empty dataDir, no env var)
|
||||
t.Run("fallback to default directories", func(t *testing.T) {
|
||||
// Clear DATA_DIR environment variable
|
||||
oldValue := os.Getenv("DATA_DIR")
|
||||
defer func() {
|
||||
if oldValue == "" {
|
||||
os.Unsetenv("DATA_DIR")
|
||||
} else {
|
||||
os.Setenv("DATA_DIR", oldValue)
|
||||
}
|
||||
}()
|
||||
os.Unsetenv("DATA_DIR")
|
||||
|
||||
// This will try platform-specific defaults, which may or may not work
|
||||
// We're mainly testing that it doesn't panic and returns some result
|
||||
result, err := getDataDir()
|
||||
// We don't assert success/failure here since it depends on system permissions
|
||||
// Just verify we get a string result if no error
|
||||
if err == nil {
|
||||
assert.NotEmpty(t, result)
|
||||
}
|
||||
})
|
||||
}
|
||||
|
||||
func TestTestDataDirs(t *testing.T) {
|
||||
// Test with existing valid directory
|
||||
t.Run("existing valid directory", func(t *testing.T) {
|
||||
tempDir := t.TempDir()
|
||||
result, err := testDataDirs([]string{tempDir})
|
||||
require.NoError(t, err)
|
||||
assert.Equal(t, tempDir, result)
|
||||
})
|
||||
|
||||
// Test with multiple directories, first one valid
|
||||
t.Run("multiple dirs - first valid", func(t *testing.T) {
|
||||
tempDir := t.TempDir()
|
||||
invalidDir := "/invalid/path"
|
||||
result, err := testDataDirs([]string{tempDir, invalidDir})
|
||||
require.NoError(t, err)
|
||||
assert.Equal(t, tempDir, result)
|
||||
})
|
||||
|
||||
// Test with multiple directories, second one valid
|
||||
t.Run("multiple dirs - second valid", func(t *testing.T) {
|
||||
tempDir := t.TempDir()
|
||||
invalidDir := "/invalid/path"
|
||||
result, err := testDataDirs([]string{invalidDir, tempDir})
|
||||
require.NoError(t, err)
|
||||
assert.Equal(t, tempDir, result)
|
||||
})
|
||||
|
||||
// Test with non-existing directory that can be created
|
||||
t.Run("create new directory", func(t *testing.T) {
|
||||
tempDir := t.TempDir()
|
||||
newDir := filepath.Join(tempDir, "new-dir")
|
||||
result, err := testDataDirs([]string{newDir})
|
||||
require.NoError(t, err)
|
||||
assert.Equal(t, newDir, result)
|
||||
|
||||
// Verify directory was created
|
||||
stat, err := os.Stat(newDir)
|
||||
require.NoError(t, err)
|
||||
assert.True(t, stat.IsDir())
|
||||
})
|
||||
|
||||
// Test with no valid directories
|
||||
t.Run("no valid directories", func(t *testing.T) {
|
||||
invalidPaths := []string{"/invalid/path1", "/invalid/path2"}
|
||||
_, err := testDataDirs(invalidPaths)
|
||||
assert.Error(t, err)
|
||||
assert.Contains(t, err.Error(), "data directory not found")
|
||||
})
|
||||
}
|
||||
|
||||
func TestIsValidDataDir(t *testing.T) {
|
||||
// Test with existing directory
|
||||
t.Run("existing directory", func(t *testing.T) {
|
||||
tempDir := t.TempDir()
|
||||
valid, err := isValidDataDir(tempDir, false)
|
||||
require.NoError(t, err)
|
||||
assert.True(t, valid)
|
||||
})
|
||||
|
||||
// Test with non-existing directory, createIfNotExists=false
|
||||
t.Run("non-existing dir - no create", func(t *testing.T) {
|
||||
tempDir := t.TempDir()
|
||||
nonExistentDir := filepath.Join(tempDir, "does-not-exist")
|
||||
valid, err := isValidDataDir(nonExistentDir, false)
|
||||
require.NoError(t, err)
|
||||
assert.False(t, valid)
|
||||
})
|
||||
|
||||
// Test with non-existing directory, createIfNotExists=true
|
||||
t.Run("non-existing dir - create", func(t *testing.T) {
|
||||
tempDir := t.TempDir()
|
||||
newDir := filepath.Join(tempDir, "new-dir")
|
||||
valid, err := isValidDataDir(newDir, true)
|
||||
require.NoError(t, err)
|
||||
assert.True(t, valid)
|
||||
|
||||
// Verify directory was created
|
||||
stat, err := os.Stat(newDir)
|
||||
require.NoError(t, err)
|
||||
assert.True(t, stat.IsDir())
|
||||
})
|
||||
|
||||
// Test with file instead of directory
|
||||
t.Run("file instead of directory", func(t *testing.T) {
|
||||
tempDir := t.TempDir()
|
||||
tempFile := filepath.Join(tempDir, "testfile")
|
||||
err := os.WriteFile(tempFile, []byte("test"), 0644)
|
||||
require.NoError(t, err)
|
||||
|
||||
valid, err := isValidDataDir(tempFile, false)
|
||||
assert.Error(t, err)
|
||||
assert.False(t, valid)
|
||||
assert.Contains(t, err.Error(), "is not a directory")
|
||||
})
|
||||
}
|
||||
|
||||
func TestDirectoryExists(t *testing.T) {
|
||||
// Test with existing directory
|
||||
t.Run("existing directory", func(t *testing.T) {
|
||||
tempDir := t.TempDir()
|
||||
exists, err := directoryExists(tempDir)
|
||||
require.NoError(t, err)
|
||||
assert.True(t, exists)
|
||||
})
|
||||
|
||||
// Test with non-existing directory
|
||||
t.Run("non-existing directory", func(t *testing.T) {
|
||||
tempDir := t.TempDir()
|
||||
nonExistentDir := filepath.Join(tempDir, "does-not-exist")
|
||||
exists, err := directoryExists(nonExistentDir)
|
||||
require.NoError(t, err)
|
||||
assert.False(t, exists)
|
||||
})
|
||||
|
||||
// Test with file instead of directory
|
||||
t.Run("file instead of directory", func(t *testing.T) {
|
||||
tempDir := t.TempDir()
|
||||
tempFile := filepath.Join(tempDir, "testfile")
|
||||
err := os.WriteFile(tempFile, []byte("test"), 0644)
|
||||
require.NoError(t, err)
|
||||
|
||||
exists, err := directoryExists(tempFile)
|
||||
assert.Error(t, err)
|
||||
assert.False(t, exists)
|
||||
assert.Contains(t, err.Error(), "is not a directory")
|
||||
})
|
||||
}
|
||||
|
||||
func TestDirectoryIsWritable(t *testing.T) {
|
||||
// Test with writable directory
|
||||
t.Run("writable directory", func(t *testing.T) {
|
||||
tempDir := t.TempDir()
|
||||
writable, err := directoryIsWritable(tempDir)
|
||||
require.NoError(t, err)
|
||||
assert.True(t, writable)
|
||||
})
|
||||
|
||||
// Test with non-existing directory
|
||||
t.Run("non-existing directory", func(t *testing.T) {
|
||||
tempDir := t.TempDir()
|
||||
nonExistentDir := filepath.Join(tempDir, "does-not-exist")
|
||||
writable, err := directoryIsWritable(nonExistentDir)
|
||||
assert.Error(t, err)
|
||||
assert.False(t, writable)
|
||||
})
|
||||
|
||||
// Test with non-writable directory (Unix-like systems only)
|
||||
t.Run("non-writable directory", func(t *testing.T) {
|
||||
if runtime.GOOS != "linux" && runtime.GOOS != "darwin" {
|
||||
t.Skip("Skipping non-writable directory test on", runtime.GOOS)
|
||||
}
|
||||
|
||||
tempDir := t.TempDir()
|
||||
readOnlyDir := filepath.Join(tempDir, "readonly")
|
||||
|
||||
// Create the directory
|
||||
err := os.Mkdir(readOnlyDir, 0755)
|
||||
require.NoError(t, err)
|
||||
|
||||
// Make it read-only
|
||||
err = os.Chmod(readOnlyDir, 0444)
|
||||
require.NoError(t, err)
|
||||
|
||||
// Restore permissions after test for cleanup
|
||||
defer func() {
|
||||
os.Chmod(readOnlyDir, 0755)
|
||||
}()
|
||||
|
||||
writable, err := directoryIsWritable(readOnlyDir)
|
||||
assert.Error(t, err)
|
||||
assert.False(t, writable)
|
||||
})
|
||||
}
|
191
agent/disk.go
Normal file
191
agent/disk.go
Normal file
@@ -0,0 +1,191 @@
|
||||
package agent
|
||||
|
||||
import (
|
||||
"log/slog"
|
||||
"os"
|
||||
"path/filepath"
|
||||
"runtime"
|
||||
"strings"
|
||||
"time"
|
||||
|
||||
"github.com/henrygd/beszel/src/entities/system"
|
||||
|
||||
"github.com/shirou/gopsutil/v4/disk"
|
||||
)
|
||||
|
||||
// Sets up the filesystems to monitor for disk usage and I/O.
|
||||
func (a *Agent) initializeDiskInfo() {
|
||||
filesystem, _ := GetEnv("FILESYSTEM")
|
||||
efPath := "/extra-filesystems"
|
||||
hasRoot := false
|
||||
|
||||
partitions, err := disk.Partitions(false)
|
||||
if err != nil {
|
||||
slog.Error("Error getting disk partitions", "err", err)
|
||||
}
|
||||
slog.Debug("Disk", "partitions", partitions)
|
||||
|
||||
// ioContext := context.WithValue(a.sensorsContext,
|
||||
// common.EnvKey, common.EnvMap{common.HostProcEnvKey: "/tmp/testproc"},
|
||||
// )
|
||||
// diskIoCounters, err := disk.IOCountersWithContext(ioContext)
|
||||
|
||||
diskIoCounters, err := disk.IOCounters()
|
||||
if err != nil {
|
||||
slog.Error("Error getting diskstats", "err", err)
|
||||
}
|
||||
slog.Debug("Disk I/O", "diskstats", diskIoCounters)
|
||||
|
||||
// Helper function to add a filesystem to fsStats if it doesn't exist
|
||||
addFsStat := func(device, mountpoint string, root bool) {
|
||||
var key string
|
||||
if runtime.GOOS == "windows" {
|
||||
key = device
|
||||
} else {
|
||||
key = filepath.Base(device)
|
||||
}
|
||||
var ioMatch bool
|
||||
if _, exists := a.fsStats[key]; !exists {
|
||||
if root {
|
||||
slog.Info("Detected root device", "name", key)
|
||||
// Check if root device is in /proc/diskstats, use fallback if not
|
||||
if _, ioMatch = diskIoCounters[key]; !ioMatch {
|
||||
key, ioMatch = findIoDevice(filesystem, diskIoCounters, a.fsStats)
|
||||
if !ioMatch {
|
||||
slog.Info("Using I/O fallback", "device", device, "mountpoint", mountpoint, "fallback", key)
|
||||
}
|
||||
}
|
||||
} else {
|
||||
// Check if non-root has diskstats and fall back to folder name if not
|
||||
// Scenario: device is encrypted and named luks-2bcb02be-999d-4417-8d18-5c61e660fb6e - not in /proc/diskstats.
|
||||
// However, the device can be specified by mounting folder from luks device at /extra-filesystems/sda1
|
||||
if _, ioMatch = diskIoCounters[key]; !ioMatch {
|
||||
efBase := filepath.Base(mountpoint)
|
||||
if _, ioMatch = diskIoCounters[efBase]; ioMatch {
|
||||
key = efBase
|
||||
}
|
||||
}
|
||||
}
|
||||
a.fsStats[key] = &system.FsStats{Root: root, Mountpoint: mountpoint}
|
||||
}
|
||||
}
|
||||
|
||||
// Use FILESYSTEM env var to find root filesystem
|
||||
if filesystem != "" {
|
||||
for _, p := range partitions {
|
||||
if strings.HasSuffix(p.Device, filesystem) || p.Mountpoint == filesystem {
|
||||
addFsStat(p.Device, p.Mountpoint, true)
|
||||
hasRoot = true
|
||||
break
|
||||
}
|
||||
}
|
||||
if !hasRoot {
|
||||
slog.Warn("Partition details not found", "filesystem", filesystem)
|
||||
}
|
||||
}
|
||||
|
||||
// Add EXTRA_FILESYSTEMS env var values to fsStats
|
||||
if extraFilesystems, exists := GetEnv("EXTRA_FILESYSTEMS"); exists {
|
||||
for _, fs := range strings.Split(extraFilesystems, ",") {
|
||||
found := false
|
||||
for _, p := range partitions {
|
||||
if strings.HasSuffix(p.Device, fs) || p.Mountpoint == fs {
|
||||
addFsStat(p.Device, p.Mountpoint, false)
|
||||
found = true
|
||||
break
|
||||
}
|
||||
}
|
||||
// if not in partitions, test if we can get disk usage
|
||||
if !found {
|
||||
if _, err := disk.Usage(fs); err == nil {
|
||||
addFsStat(filepath.Base(fs), fs, false)
|
||||
} else {
|
||||
slog.Error("Invalid filesystem", "name", fs, "err", err)
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Process partitions for various mount points
|
||||
for _, p := range partitions {
|
||||
// fmt.Println(p.Device, p.Mountpoint)
|
||||
// Binary root fallback or docker root fallback
|
||||
if !hasRoot && (p.Mountpoint == "/" || (p.Mountpoint == "/etc/hosts" && strings.HasPrefix(p.Device, "/dev"))) {
|
||||
fs, match := findIoDevice(filepath.Base(p.Device), diskIoCounters, a.fsStats)
|
||||
if match {
|
||||
addFsStat(fs, p.Mountpoint, true)
|
||||
hasRoot = true
|
||||
}
|
||||
}
|
||||
|
||||
// Check if device is in /extra-filesystems
|
||||
if strings.HasPrefix(p.Mountpoint, efPath) {
|
||||
addFsStat(p.Device, p.Mountpoint, false)
|
||||
}
|
||||
}
|
||||
|
||||
// Check all folders in /extra-filesystems and add them if not already present
|
||||
if folders, err := os.ReadDir(efPath); err == nil {
|
||||
existingMountpoints := make(map[string]bool)
|
||||
for _, stats := range a.fsStats {
|
||||
existingMountpoints[stats.Mountpoint] = true
|
||||
}
|
||||
for _, folder := range folders {
|
||||
if folder.IsDir() {
|
||||
mountpoint := filepath.Join(efPath, folder.Name())
|
||||
slog.Debug("/extra-filesystems", "mountpoint", mountpoint)
|
||||
if !existingMountpoints[mountpoint] {
|
||||
addFsStat(folder.Name(), mountpoint, false)
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// If no root filesystem set, use fallback
|
||||
if !hasRoot {
|
||||
rootDevice, _ := findIoDevice(filepath.Base(filesystem), diskIoCounters, a.fsStats)
|
||||
slog.Info("Root disk", "mountpoint", "/", "io", rootDevice)
|
||||
a.fsStats[rootDevice] = &system.FsStats{Root: true, Mountpoint: "/"}
|
||||
}
|
||||
|
||||
a.initializeDiskIoStats(diskIoCounters)
|
||||
}
|
||||
|
||||
// Returns matching device from /proc/diskstats,
|
||||
// or the device with the most reads if no match is found.
|
||||
// bool is true if a match was found.
|
||||
func findIoDevice(filesystem string, diskIoCounters map[string]disk.IOCountersStat, fsStats map[string]*system.FsStats) (string, bool) {
|
||||
var maxReadBytes uint64
|
||||
maxReadDevice := "/"
|
||||
for _, d := range diskIoCounters {
|
||||
if d.Name == filesystem || (d.Label != "" && d.Label == filesystem) {
|
||||
return d.Name, true
|
||||
}
|
||||
if d.ReadBytes > maxReadBytes {
|
||||
// don't use if device already exists in fsStats
|
||||
if _, exists := fsStats[d.Name]; !exists {
|
||||
maxReadBytes = d.ReadBytes
|
||||
maxReadDevice = d.Name
|
||||
}
|
||||
}
|
||||
}
|
||||
return maxReadDevice, false
|
||||
}
|
||||
|
||||
// Sets start values for disk I/O stats.
|
||||
func (a *Agent) initializeDiskIoStats(diskIoCounters map[string]disk.IOCountersStat) {
|
||||
for device, stats := range a.fsStats {
|
||||
// skip if not in diskIoCounters
|
||||
d, exists := diskIoCounters[device]
|
||||
if !exists {
|
||||
slog.Warn("Device not found in diskstats", "name", device)
|
||||
continue
|
||||
}
|
||||
// populate initial values
|
||||
stats.Time = time.Now()
|
||||
stats.TotalRead = d.ReadBytes
|
||||
stats.TotalWrite = d.WriteBytes
|
||||
// add to list of valid io device names
|
||||
a.fsNames = append(a.fsNames, device)
|
||||
}
|
||||
}
|
370
agent/docker.go
Normal file
370
agent/docker.go
Normal file
@@ -0,0 +1,370 @@
|
||||
package agent
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"context"
|
||||
"encoding/json"
|
||||
"fmt"
|
||||
"log/slog"
|
||||
"net"
|
||||
"net/http"
|
||||
"net/url"
|
||||
"os"
|
||||
"strings"
|
||||
"sync"
|
||||
"time"
|
||||
|
||||
"github.com/henrygd/beszel/src/entities/container"
|
||||
|
||||
"github.com/blang/semver"
|
||||
)
|
||||
|
||||
type dockerManager struct {
|
||||
client *http.Client // Client to query Docker API
|
||||
wg sync.WaitGroup // WaitGroup to wait for all goroutines to finish
|
||||
sem chan struct{} // Semaphore to limit concurrent container requests
|
||||
containerStatsMutex sync.RWMutex // Mutex to prevent concurrent access to containerStatsMap
|
||||
apiContainerList []*container.ApiInfo // List of containers from Docker API (no pointer)
|
||||
containerStatsMap map[string]*container.Stats // Keeps track of container stats
|
||||
validIds map[string]struct{} // Map of valid container ids, used to prune invalid containers from containerStatsMap
|
||||
goodDockerVersion bool // Whether docker version is at least 25.0.0 (one-shot works correctly)
|
||||
isWindows bool // Whether the Docker Engine API is running on Windows
|
||||
buf *bytes.Buffer // Buffer to store and read response bodies
|
||||
decoder *json.Decoder // Reusable JSON decoder that reads from buf
|
||||
apiStats *container.ApiStats // Reusable API stats object
|
||||
}
|
||||
|
||||
// userAgentRoundTripper is a custom http.RoundTripper that adds a User-Agent header to all requests
|
||||
type userAgentRoundTripper struct {
|
||||
rt http.RoundTripper
|
||||
userAgent string
|
||||
}
|
||||
|
||||
// RoundTrip implements the http.RoundTripper interface
|
||||
func (u *userAgentRoundTripper) RoundTrip(req *http.Request) (*http.Response, error) {
|
||||
req.Header.Set("User-Agent", u.userAgent)
|
||||
return u.rt.RoundTrip(req)
|
||||
}
|
||||
|
||||
// Add goroutine to the queue
|
||||
func (d *dockerManager) queue() {
|
||||
d.wg.Add(1)
|
||||
if d.goodDockerVersion {
|
||||
d.sem <- struct{}{}
|
||||
}
|
||||
}
|
||||
|
||||
// Remove goroutine from the queue
|
||||
func (d *dockerManager) dequeue() {
|
||||
d.wg.Done()
|
||||
if d.goodDockerVersion {
|
||||
<-d.sem
|
||||
}
|
||||
}
|
||||
|
||||
// Returns stats for all running containers
|
||||
func (dm *dockerManager) getDockerStats() ([]*container.Stats, error) {
|
||||
resp, err := dm.client.Get("http://localhost/containers/json")
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
dm.apiContainerList = dm.apiContainerList[:0]
|
||||
if err := dm.decode(resp, &dm.apiContainerList); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
dm.isWindows = strings.Contains(resp.Header.Get("Server"), "windows")
|
||||
|
||||
containersLength := len(dm.apiContainerList)
|
||||
|
||||
// store valid ids to clean up old container ids from map
|
||||
if dm.validIds == nil {
|
||||
dm.validIds = make(map[string]struct{}, containersLength)
|
||||
} else {
|
||||
clear(dm.validIds)
|
||||
}
|
||||
|
||||
var failedContainers []*container.ApiInfo
|
||||
|
||||
for i := range dm.apiContainerList {
|
||||
ctr := dm.apiContainerList[i]
|
||||
ctr.IdShort = ctr.Id[:12]
|
||||
dm.validIds[ctr.IdShort] = struct{}{}
|
||||
// check if container is less than 1 minute old (possible restart)
|
||||
// note: can't use Created field because it's not updated on restart
|
||||
if strings.Contains(ctr.Status, "second") {
|
||||
// if so, remove old container data
|
||||
dm.deleteContainerStatsSync(ctr.IdShort)
|
||||
}
|
||||
dm.queue()
|
||||
go func() {
|
||||
defer dm.dequeue()
|
||||
err := dm.updateContainerStats(ctr)
|
||||
// if error, delete from map and add to failed list to retry
|
||||
if err != nil {
|
||||
dm.containerStatsMutex.Lock()
|
||||
delete(dm.containerStatsMap, ctr.IdShort)
|
||||
failedContainers = append(failedContainers, ctr)
|
||||
dm.containerStatsMutex.Unlock()
|
||||
}
|
||||
}()
|
||||
}
|
||||
|
||||
dm.wg.Wait()
|
||||
|
||||
// retry failed containers separately so we can run them in parallel (docker 24 bug)
|
||||
if len(failedContainers) > 0 {
|
||||
slog.Debug("Retrying failed containers", "count", len(failedContainers))
|
||||
for i := range failedContainers {
|
||||
ctr := failedContainers[i]
|
||||
dm.queue()
|
||||
go func() {
|
||||
defer dm.dequeue()
|
||||
err = dm.updateContainerStats(ctr)
|
||||
if err != nil {
|
||||
slog.Error("Error getting container stats", "err", err)
|
||||
}
|
||||
}()
|
||||
}
|
||||
dm.wg.Wait()
|
||||
}
|
||||
|
||||
// populate final stats and remove old / invalid container stats
|
||||
stats := make([]*container.Stats, 0, containersLength)
|
||||
for id, v := range dm.containerStatsMap {
|
||||
if _, exists := dm.validIds[id]; !exists {
|
||||
delete(dm.containerStatsMap, id)
|
||||
} else {
|
||||
stats = append(stats, v)
|
||||
}
|
||||
}
|
||||
|
||||
return stats, nil
|
||||
}
|
||||
|
||||
// Updates stats for individual container
|
||||
func (dm *dockerManager) updateContainerStats(ctr *container.ApiInfo) error {
|
||||
name := ctr.Names[0][1:]
|
||||
|
||||
resp, err := dm.client.Get("http://localhost/containers/" + ctr.IdShort + "/stats?stream=0&one-shot=1")
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
defer resp.Body.Close()
|
||||
|
||||
dm.containerStatsMutex.Lock()
|
||||
defer dm.containerStatsMutex.Unlock()
|
||||
|
||||
// add empty values if they doesn't exist in map
|
||||
stats, initialized := dm.containerStatsMap[ctr.IdShort]
|
||||
if !initialized {
|
||||
stats = &container.Stats{Name: name}
|
||||
dm.containerStatsMap[ctr.IdShort] = stats
|
||||
}
|
||||
|
||||
// reset current stats
|
||||
stats.Cpu = 0
|
||||
stats.Mem = 0
|
||||
stats.NetworkSent = 0
|
||||
stats.NetworkRecv = 0
|
||||
|
||||
// docker host container stats response
|
||||
// res := dm.getApiStats()
|
||||
// defer dm.putApiStats(res)
|
||||
//
|
||||
|
||||
res := dm.apiStats
|
||||
res.Networks = nil
|
||||
if err := dm.decode(resp, res); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
// calculate cpu and memory stats
|
||||
var usedMemory uint64
|
||||
var cpuPct float64
|
||||
|
||||
// store current cpu stats
|
||||
prevCpuContainer, prevCpuSystem := stats.CpuContainer, stats.CpuSystem
|
||||
stats.CpuContainer = res.CPUStats.CPUUsage.TotalUsage
|
||||
stats.CpuSystem = res.CPUStats.SystemUsage
|
||||
|
||||
if dm.isWindows {
|
||||
usedMemory = res.MemoryStats.PrivateWorkingSet
|
||||
cpuPct = res.CalculateCpuPercentWindows(prevCpuContainer, stats.PrevReadTime)
|
||||
} else {
|
||||
// check if container has valid data, otherwise may be in restart loop (#103)
|
||||
if res.MemoryStats.Usage == 0 {
|
||||
return fmt.Errorf("%s - no memory stats - see https://github.com/henrygd/beszel/issues/144", name)
|
||||
}
|
||||
memCache := res.MemoryStats.Stats.InactiveFile
|
||||
if memCache == 0 {
|
||||
memCache = res.MemoryStats.Stats.Cache
|
||||
}
|
||||
usedMemory = res.MemoryStats.Usage - memCache
|
||||
|
||||
cpuPct = res.CalculateCpuPercentLinux(prevCpuContainer, prevCpuSystem)
|
||||
}
|
||||
|
||||
if cpuPct > 100 {
|
||||
return fmt.Errorf("%s cpu pct greater than 100: %+v", name, cpuPct)
|
||||
}
|
||||
|
||||
// network
|
||||
var total_sent, total_recv uint64
|
||||
for _, v := range res.Networks {
|
||||
total_sent += v.TxBytes
|
||||
total_recv += v.RxBytes
|
||||
}
|
||||
var sent_delta, recv_delta uint64
|
||||
millisecondsElapsed := uint64(time.Since(stats.PrevReadTime).Milliseconds())
|
||||
if initialized && millisecondsElapsed > 0 {
|
||||
// get bytes per second
|
||||
sent_delta = (total_sent - stats.PrevNet.Sent) * 1000 / millisecondsElapsed
|
||||
recv_delta = (total_recv - stats.PrevNet.Recv) * 1000 / millisecondsElapsed
|
||||
// check for unrealistic network values (> 5GB/s)
|
||||
if sent_delta > 5e9 || recv_delta > 5e9 {
|
||||
slog.Warn("Bad network delta", "container", name)
|
||||
sent_delta, recv_delta = 0, 0
|
||||
}
|
||||
}
|
||||
stats.PrevNet.Sent, stats.PrevNet.Recv = total_sent, total_recv
|
||||
|
||||
stats.Cpu = twoDecimals(cpuPct)
|
||||
stats.Mem = bytesToMegabytes(float64(usedMemory))
|
||||
stats.NetworkSent = bytesToMegabytes(float64(sent_delta))
|
||||
stats.NetworkRecv = bytesToMegabytes(float64(recv_delta))
|
||||
stats.PrevReadTime = res.Read
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
// Delete container stats from map using mutex
|
||||
func (dm *dockerManager) deleteContainerStatsSync(id string) {
|
||||
dm.containerStatsMutex.Lock()
|
||||
defer dm.containerStatsMutex.Unlock()
|
||||
delete(dm.containerStatsMap, id)
|
||||
}
|
||||
|
||||
// Creates a new http client for Docker or Podman API
|
||||
func newDockerManager(a *Agent) *dockerManager {
|
||||
dockerHost, exists := GetEnv("DOCKER_HOST")
|
||||
if exists {
|
||||
// return nil if set to empty string
|
||||
if dockerHost == "" {
|
||||
return nil
|
||||
}
|
||||
} else {
|
||||
dockerHost = getDockerHost()
|
||||
}
|
||||
|
||||
parsedURL, err := url.Parse(dockerHost)
|
||||
if err != nil {
|
||||
os.Exit(1)
|
||||
}
|
||||
|
||||
transport := &http.Transport{
|
||||
DisableCompression: true,
|
||||
MaxConnsPerHost: 0,
|
||||
}
|
||||
|
||||
switch parsedURL.Scheme {
|
||||
case "unix":
|
||||
transport.DialContext = func(ctx context.Context, proto, addr string) (net.Conn, error) {
|
||||
return (&net.Dialer{}).DialContext(ctx, "unix", parsedURL.Path)
|
||||
}
|
||||
case "tcp", "http", "https":
|
||||
transport.DialContext = func(ctx context.Context, proto, addr string) (net.Conn, error) {
|
||||
return (&net.Dialer{}).DialContext(ctx, "tcp", parsedURL.Host)
|
||||
}
|
||||
default:
|
||||
slog.Error("Invalid DOCKER_HOST", "scheme", parsedURL.Scheme)
|
||||
os.Exit(1)
|
||||
}
|
||||
|
||||
// configurable timeout
|
||||
timeout := time.Millisecond * 2100
|
||||
if t, set := GetEnv("DOCKER_TIMEOUT"); set {
|
||||
timeout, err = time.ParseDuration(t)
|
||||
if err != nil {
|
||||
slog.Error(err.Error())
|
||||
os.Exit(1)
|
||||
}
|
||||
slog.Info("DOCKER_TIMEOUT", "timeout", timeout)
|
||||
}
|
||||
|
||||
// Custom user-agent to avoid docker bug: https://github.com/docker/for-mac/issues/7575
|
||||
userAgentTransport := &userAgentRoundTripper{
|
||||
rt: transport,
|
||||
userAgent: "Docker-Client/",
|
||||
}
|
||||
|
||||
manager := &dockerManager{
|
||||
client: &http.Client{
|
||||
Timeout: timeout,
|
||||
Transport: userAgentTransport,
|
||||
},
|
||||
containerStatsMap: make(map[string]*container.Stats),
|
||||
sem: make(chan struct{}, 5),
|
||||
apiContainerList: []*container.ApiInfo{},
|
||||
apiStats: &container.ApiStats{},
|
||||
}
|
||||
|
||||
// If using podman, return client
|
||||
if strings.Contains(dockerHost, "podman") {
|
||||
a.systemInfo.Podman = true
|
||||
manager.goodDockerVersion = true
|
||||
return manager
|
||||
}
|
||||
|
||||
// Check docker version
|
||||
// (versions before 25.0.0 have a bug with one-shot which requires all requests to be made in one batch)
|
||||
var versionInfo struct {
|
||||
Version string `json:"Version"`
|
||||
}
|
||||
resp, err := manager.client.Get("http://localhost/version")
|
||||
if err != nil {
|
||||
return manager
|
||||
}
|
||||
|
||||
if err := manager.decode(resp, &versionInfo); err != nil {
|
||||
return manager
|
||||
}
|
||||
|
||||
// if version > 24, one-shot works correctly and we can limit concurrent operations
|
||||
if dockerVersion, err := semver.Parse(versionInfo.Version); err == nil && dockerVersion.Major > 24 {
|
||||
manager.goodDockerVersion = true
|
||||
} else {
|
||||
slog.Info(fmt.Sprintf("Docker %s is outdated. Upgrade if possible. See https://github.com/henrygd/beszel/issues/58", versionInfo.Version))
|
||||
}
|
||||
|
||||
return manager
|
||||
}
|
||||
|
||||
// Decodes Docker API JSON response using a reusable buffer and decoder. Not thread safe.
|
||||
func (dm *dockerManager) decode(resp *http.Response, d any) error {
|
||||
if dm.buf == nil {
|
||||
// initialize buffer with 256kb starting size
|
||||
dm.buf = bytes.NewBuffer(make([]byte, 0, 1024*256))
|
||||
dm.decoder = json.NewDecoder(dm.buf)
|
||||
}
|
||||
defer resp.Body.Close()
|
||||
defer dm.buf.Reset()
|
||||
_, err := dm.buf.ReadFrom(resp.Body)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
return dm.decoder.Decode(d)
|
||||
}
|
||||
|
||||
// Test docker / podman sockets and return if one exists
|
||||
func getDockerHost() string {
|
||||
scheme := "unix://"
|
||||
socks := []string{"/var/run/docker.sock", fmt.Sprintf("/run/user/%v/podman/podman.sock", os.Getuid())}
|
||||
for _, sock := range socks {
|
||||
if _, err := os.Stat(sock); err == nil {
|
||||
return scheme + sock
|
||||
}
|
||||
}
|
||||
return scheme + socks[0]
|
||||
}
|
349
agent/gpu.go
Normal file
349
agent/gpu.go
Normal file
@@ -0,0 +1,349 @@
|
||||
package agent
|
||||
|
||||
import (
|
||||
"bufio"
|
||||
"bytes"
|
||||
"encoding/json"
|
||||
"fmt"
|
||||
"os/exec"
|
||||
"regexp"
|
||||
"strconv"
|
||||
"strings"
|
||||
"sync"
|
||||
"time"
|
||||
|
||||
"github.com/henrygd/beszel/src/entities/system"
|
||||
|
||||
"golang.org/x/exp/slog"
|
||||
)
|
||||
|
||||
const (
|
||||
// Commands
|
||||
nvidiaSmiCmd string = "nvidia-smi"
|
||||
rocmSmiCmd string = "rocm-smi"
|
||||
tegraStatsCmd string = "tegrastats"
|
||||
|
||||
// Polling intervals
|
||||
nvidiaSmiInterval string = "4" // in seconds
|
||||
tegraStatsInterval string = "3700" // in milliseconds
|
||||
rocmSmiInterval time.Duration = 4300 * time.Millisecond
|
||||
|
||||
// Command retry and timeout constants
|
||||
retryWaitTime time.Duration = 5 * time.Second
|
||||
maxFailureRetries int = 5
|
||||
|
||||
cmdBufferSize uint16 = 10 * 1024
|
||||
|
||||
// Unit Conversions
|
||||
mebibytesInAMegabyte float64 = 1.024 // nvidia-smi reports memory in MiB
|
||||
milliwattsInAWatt float64 = 1000.0 // tegrastats reports power in mW
|
||||
)
|
||||
|
||||
// GPUManager manages data collection for GPUs (either Nvidia or AMD)
|
||||
type GPUManager struct {
|
||||
sync.Mutex
|
||||
nvidiaSmi bool
|
||||
rocmSmi bool
|
||||
tegrastats bool
|
||||
GpuDataMap map[string]*system.GPUData
|
||||
}
|
||||
|
||||
// RocmSmiJson represents the JSON structure of rocm-smi output
|
||||
type RocmSmiJson struct {
|
||||
ID string `json:"GUID"`
|
||||
Name string `json:"Card series"`
|
||||
Temperature string `json:"Temperature (Sensor edge) (C)"`
|
||||
MemoryUsed string `json:"VRAM Total Used Memory (B)"`
|
||||
MemoryTotal string `json:"VRAM Total Memory (B)"`
|
||||
Usage string `json:"GPU use (%)"`
|
||||
PowerPackage string `json:"Average Graphics Package Power (W)"`
|
||||
PowerSocket string `json:"Current Socket Graphics Package Power (W)"`
|
||||
}
|
||||
|
||||
// gpuCollector defines a collector for a specific GPU management utility (nvidia-smi or rocm-smi)
|
||||
type gpuCollector struct {
|
||||
name string
|
||||
cmdArgs []string
|
||||
parse func([]byte) bool // returns true if valid data was found
|
||||
buf []byte
|
||||
}
|
||||
|
||||
var errNoValidData = fmt.Errorf("no valid GPU data found") // Error for missing data
|
||||
|
||||
// starts and manages the ongoing collection of GPU data for the specified GPU management utility
|
||||
func (c *gpuCollector) start() {
|
||||
for {
|
||||
err := c.collect()
|
||||
if err != nil {
|
||||
if err == errNoValidData {
|
||||
slog.Warn(c.name + " found no valid GPU data, stopping")
|
||||
break
|
||||
}
|
||||
slog.Warn(c.name+" failed, restarting", "err", err)
|
||||
time.Sleep(retryWaitTime)
|
||||
continue
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// collect executes the command, parses output with the assigned parser function
|
||||
func (c *gpuCollector) collect() error {
|
||||
cmd := exec.Command(c.name, c.cmdArgs...)
|
||||
stdout, err := cmd.StdoutPipe()
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
if err := cmd.Start(); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
scanner := bufio.NewScanner(stdout)
|
||||
if c.buf == nil {
|
||||
c.buf = make([]byte, 0, cmdBufferSize)
|
||||
}
|
||||
scanner.Buffer(c.buf, bufio.MaxScanTokenSize)
|
||||
|
||||
for scanner.Scan() {
|
||||
hasValidData := c.parse(scanner.Bytes())
|
||||
if !hasValidData {
|
||||
return errNoValidData
|
||||
}
|
||||
}
|
||||
|
||||
if err := scanner.Err(); err != nil {
|
||||
return fmt.Errorf("scanner error: %w", err)
|
||||
}
|
||||
return cmd.Wait()
|
||||
}
|
||||
|
||||
// getJetsonParser returns a function to parse the output of tegrastats and update the GPUData map
|
||||
func (gm *GPUManager) getJetsonParser() func(output []byte) bool {
|
||||
// use closure to avoid recompiling the regex
|
||||
ramPattern := regexp.MustCompile(`RAM (\d+)/(\d+)MB`)
|
||||
gr3dPattern := regexp.MustCompile(`GR3D_FREQ (\d+)%`)
|
||||
tempPattern := regexp.MustCompile(`tj@(\d+\.?\d*)C`)
|
||||
// Orin Nano / NX do not have GPU specific power monitor
|
||||
// TODO: Maybe use VDD_IN for Nano / NX and add a total system power chart
|
||||
powerPattern := regexp.MustCompile(`(GPU_SOC|CPU_GPU_CV) (\d+)mW`)
|
||||
|
||||
// jetson devices have only one gpu so we'll just initialize here
|
||||
gpuData := &system.GPUData{Name: "GPU"}
|
||||
gm.GpuDataMap["0"] = gpuData
|
||||
|
||||
return func(output []byte) bool {
|
||||
gm.Lock()
|
||||
defer gm.Unlock()
|
||||
// Parse RAM usage
|
||||
ramMatches := ramPattern.FindSubmatch(output)
|
||||
if ramMatches != nil {
|
||||
gpuData.MemoryUsed, _ = strconv.ParseFloat(string(ramMatches[1]), 64)
|
||||
gpuData.MemoryTotal, _ = strconv.ParseFloat(string(ramMatches[2]), 64)
|
||||
}
|
||||
// Parse GR3D (GPU) usage
|
||||
gr3dMatches := gr3dPattern.FindSubmatch(output)
|
||||
if gr3dMatches != nil {
|
||||
gr3dUsage, _ := strconv.ParseFloat(string(gr3dMatches[1]), 64)
|
||||
gpuData.Usage += gr3dUsage
|
||||
}
|
||||
// Parse temperature
|
||||
tempMatches := tempPattern.FindSubmatch(output)
|
||||
if tempMatches != nil {
|
||||
gpuData.Temperature, _ = strconv.ParseFloat(string(tempMatches[1]), 64)
|
||||
}
|
||||
// Parse power usage
|
||||
powerMatches := powerPattern.FindSubmatch(output)
|
||||
if powerMatches != nil {
|
||||
power, _ := strconv.ParseFloat(string(powerMatches[2]), 64)
|
||||
gpuData.Power += power / milliwattsInAWatt
|
||||
}
|
||||
gpuData.Count++
|
||||
return true
|
||||
}
|
||||
}
|
||||
|
||||
// parseNvidiaData parses the output of nvidia-smi and updates the GPUData map
|
||||
func (gm *GPUManager) parseNvidiaData(output []byte) bool {
|
||||
gm.Lock()
|
||||
defer gm.Unlock()
|
||||
scanner := bufio.NewScanner(bytes.NewReader(output))
|
||||
var valid bool
|
||||
for scanner.Scan() {
|
||||
line := scanner.Text() // Or use scanner.Bytes() for []byte
|
||||
fields := strings.Split(strings.TrimSpace(line), ", ")
|
||||
if len(fields) < 7 {
|
||||
continue
|
||||
}
|
||||
valid = true
|
||||
id := fields[0]
|
||||
temp, _ := strconv.ParseFloat(fields[2], 64)
|
||||
memoryUsage, _ := strconv.ParseFloat(fields[3], 64)
|
||||
totalMemory, _ := strconv.ParseFloat(fields[4], 64)
|
||||
usage, _ := strconv.ParseFloat(fields[5], 64)
|
||||
power, _ := strconv.ParseFloat(fields[6], 64)
|
||||
// add gpu if not exists
|
||||
if _, ok := gm.GpuDataMap[id]; !ok {
|
||||
name := strings.TrimPrefix(fields[1], "NVIDIA ")
|
||||
gm.GpuDataMap[id] = &system.GPUData{Name: strings.TrimSuffix(name, " Laptop GPU")}
|
||||
}
|
||||
// update gpu data
|
||||
gpu := gm.GpuDataMap[id]
|
||||
gpu.Temperature = temp
|
||||
gpu.MemoryUsed = memoryUsage / mebibytesInAMegabyte
|
||||
gpu.MemoryTotal = totalMemory / mebibytesInAMegabyte
|
||||
gpu.Usage += usage
|
||||
gpu.Power += power
|
||||
gpu.Count++
|
||||
}
|
||||
return valid
|
||||
}
|
||||
|
||||
// parseAmdData parses the output of rocm-smi and updates the GPUData map
|
||||
func (gm *GPUManager) parseAmdData(output []byte) bool {
|
||||
var rocmSmiInfo map[string]RocmSmiJson
|
||||
if err := json.Unmarshal(output, &rocmSmiInfo); err != nil || len(rocmSmiInfo) == 0 {
|
||||
return false
|
||||
}
|
||||
gm.Lock()
|
||||
defer gm.Unlock()
|
||||
for _, v := range rocmSmiInfo {
|
||||
var power float64
|
||||
if v.PowerPackage != "" {
|
||||
power, _ = strconv.ParseFloat(v.PowerPackage, 64)
|
||||
} else {
|
||||
power, _ = strconv.ParseFloat(v.PowerSocket, 64)
|
||||
}
|
||||
memoryUsage, _ := strconv.ParseFloat(v.MemoryUsed, 64)
|
||||
totalMemory, _ := strconv.ParseFloat(v.MemoryTotal, 64)
|
||||
usage, _ := strconv.ParseFloat(v.Usage, 64)
|
||||
|
||||
if _, ok := gm.GpuDataMap[v.ID]; !ok {
|
||||
gm.GpuDataMap[v.ID] = &system.GPUData{Name: v.Name}
|
||||
}
|
||||
gpu := gm.GpuDataMap[v.ID]
|
||||
gpu.Temperature, _ = strconv.ParseFloat(v.Temperature, 64)
|
||||
gpu.MemoryUsed = bytesToMegabytes(memoryUsage)
|
||||
gpu.MemoryTotal = bytesToMegabytes(totalMemory)
|
||||
gpu.Usage += usage
|
||||
gpu.Power += power
|
||||
gpu.Count++
|
||||
}
|
||||
return true
|
||||
}
|
||||
|
||||
// sums and resets the current GPU utilization data since the last update
|
||||
func (gm *GPUManager) GetCurrentData() map[string]system.GPUData {
|
||||
gm.Lock()
|
||||
defer gm.Unlock()
|
||||
|
||||
// check for GPUs with the same name
|
||||
nameCounts := make(map[string]int)
|
||||
for _, gpu := range gm.GpuDataMap {
|
||||
nameCounts[gpu.Name]++
|
||||
}
|
||||
|
||||
// copy / reset the data
|
||||
gpuData := make(map[string]system.GPUData, len(gm.GpuDataMap))
|
||||
for id, gpu := range gm.GpuDataMap {
|
||||
gpuAvg := *gpu
|
||||
|
||||
gpuAvg.Temperature = twoDecimals(gpu.Temperature)
|
||||
gpuAvg.MemoryUsed = twoDecimals(gpu.MemoryUsed)
|
||||
gpuAvg.MemoryTotal = twoDecimals(gpu.MemoryTotal)
|
||||
|
||||
// avoid division by zero
|
||||
if gpu.Count > 0 {
|
||||
gpuAvg.Usage = twoDecimals(gpu.Usage / gpu.Count)
|
||||
gpuAvg.Power = twoDecimals(gpu.Power / gpu.Count)
|
||||
}
|
||||
|
||||
// reset accumulators in the original
|
||||
gpu.Usage, gpu.Power, gpu.Count = 0, 0, 0
|
||||
|
||||
// append id to the name if there are multiple GPUs with the same name
|
||||
if nameCounts[gpu.Name] > 1 {
|
||||
gpuAvg.Name = fmt.Sprintf("%s %s", gpu.Name, id)
|
||||
}
|
||||
gpuData[id] = gpuAvg
|
||||
}
|
||||
slog.Debug("GPU", "data", gpuData)
|
||||
return gpuData
|
||||
}
|
||||
|
||||
// detectGPUs checks for the presence of GPU management tools (nvidia-smi, rocm-smi, tegrastats)
|
||||
// in the system path. It sets the corresponding flags in the GPUManager struct if any of these
|
||||
// tools are found. If none of the tools are found, it returns an error indicating that no GPU
|
||||
// management tools are available.
|
||||
func (gm *GPUManager) detectGPUs() error {
|
||||
if _, err := exec.LookPath(nvidiaSmiCmd); err == nil {
|
||||
gm.nvidiaSmi = true
|
||||
}
|
||||
if _, err := exec.LookPath(rocmSmiCmd); err == nil {
|
||||
gm.rocmSmi = true
|
||||
}
|
||||
if _, err := exec.LookPath(tegraStatsCmd); err == nil {
|
||||
gm.tegrastats = true
|
||||
gm.nvidiaSmi = false
|
||||
}
|
||||
if gm.nvidiaSmi || gm.rocmSmi || gm.tegrastats {
|
||||
return nil
|
||||
}
|
||||
return fmt.Errorf("no GPU found - install nvidia-smi, rocm-smi, or tegrastats")
|
||||
}
|
||||
|
||||
// startCollector starts the appropriate GPU data collector based on the command
|
||||
func (gm *GPUManager) startCollector(command string) {
|
||||
collector := gpuCollector{
|
||||
name: command,
|
||||
}
|
||||
switch command {
|
||||
case nvidiaSmiCmd:
|
||||
collector.cmdArgs = []string{
|
||||
"-l", nvidiaSmiInterval,
|
||||
"--query-gpu=index,name,temperature.gpu,memory.used,memory.total,utilization.gpu,power.draw",
|
||||
"--format=csv,noheader,nounits",
|
||||
}
|
||||
collector.parse = gm.parseNvidiaData
|
||||
go collector.start()
|
||||
case tegraStatsCmd:
|
||||
collector.cmdArgs = []string{"--interval", tegraStatsInterval}
|
||||
collector.parse = gm.getJetsonParser()
|
||||
go collector.start()
|
||||
case rocmSmiCmd:
|
||||
collector.cmdArgs = []string{"--showid", "--showtemp", "--showuse", "--showpower", "--showproductname", "--showmeminfo", "vram", "--json"}
|
||||
collector.parse = gm.parseAmdData
|
||||
go func() {
|
||||
failures := 0
|
||||
for {
|
||||
if err := collector.collect(); err != nil {
|
||||
failures++
|
||||
if failures > maxFailureRetries {
|
||||
break
|
||||
}
|
||||
slog.Warn("Error collecting AMD GPU data", "err", err)
|
||||
}
|
||||
time.Sleep(rocmSmiInterval)
|
||||
}
|
||||
}()
|
||||
}
|
||||
}
|
||||
|
||||
// NewGPUManager creates and initializes a new GPUManager
|
||||
func NewGPUManager() (*GPUManager, error) {
|
||||
var gm GPUManager
|
||||
if err := gm.detectGPUs(); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
gm.GpuDataMap = make(map[string]*system.GPUData)
|
||||
|
||||
if gm.nvidiaSmi {
|
||||
gm.startCollector(nvidiaSmiCmd)
|
||||
}
|
||||
if gm.rocmSmi {
|
||||
gm.startCollector(rocmSmiCmd)
|
||||
}
|
||||
if gm.tegrastats {
|
||||
gm.startCollector(tegraStatsCmd)
|
||||
}
|
||||
|
||||
return &gm, nil
|
||||
}
|
794
agent/gpu_test.go
Normal file
794
agent/gpu_test.go
Normal file
@@ -0,0 +1,794 @@
|
||||
//go:build testing
|
||||
// +build testing
|
||||
|
||||
package agent
|
||||
|
||||
import (
|
||||
"os"
|
||||
"path/filepath"
|
||||
"testing"
|
||||
"time"
|
||||
|
||||
"github.com/henrygd/beszel/src/entities/system"
|
||||
|
||||
"github.com/stretchr/testify/assert"
|
||||
"github.com/stretchr/testify/require"
|
||||
)
|
||||
|
||||
func TestParseNvidiaData(t *testing.T) {
|
||||
tests := []struct {
|
||||
name string
|
||||
input string
|
||||
wantData map[string]system.GPUData
|
||||
wantValid bool
|
||||
}{
|
||||
{
|
||||
name: "valid multi-gpu data",
|
||||
input: "0, NVIDIA GeForce RTX 3050 Ti Laptop GPU, 48, 12, 4096, 26.3, 12.73\n1, NVIDIA A100-PCIE-40GB, 38, 74, 40960, [N/A], 36.79",
|
||||
wantData: map[string]system.GPUData{
|
||||
"0": {
|
||||
Name: "GeForce RTX 3050 Ti",
|
||||
Temperature: 48.0,
|
||||
MemoryUsed: 12.0 / 1.024,
|
||||
MemoryTotal: 4096.0 / 1.024,
|
||||
Usage: 26.3,
|
||||
Power: 12.73,
|
||||
Count: 1,
|
||||
},
|
||||
"1": {
|
||||
Name: "A100-PCIE-40GB",
|
||||
Temperature: 38.0,
|
||||
MemoryUsed: 74.0 / 1.024,
|
||||
MemoryTotal: 40960.0 / 1.024,
|
||||
Usage: 0.0,
|
||||
Power: 36.79,
|
||||
Count: 1,
|
||||
},
|
||||
},
|
||||
wantValid: true,
|
||||
},
|
||||
{
|
||||
name: "more valid multi-gpu data",
|
||||
input: `0, NVIDIA A10, 45, 19676, 23028, 0, 58.98
|
||||
1, NVIDIA A10, 45, 19638, 23028, 0, 62.35
|
||||
2, NVIDIA A10, 44, 21700, 23028, 0, 59.57
|
||||
3, NVIDIA A10, 45, 18222, 23028, 0, 61.76`,
|
||||
wantData: map[string]system.GPUData{
|
||||
"0": {
|
||||
Name: "A10",
|
||||
Temperature: 45.0,
|
||||
MemoryUsed: 19676.0 / 1.024,
|
||||
MemoryTotal: 23028.0 / 1.024,
|
||||
Usage: 0.0,
|
||||
Power: 58.98,
|
||||
Count: 1,
|
||||
},
|
||||
"1": {
|
||||
Name: "A10",
|
||||
Temperature: 45.0,
|
||||
MemoryUsed: 19638.0 / 1.024,
|
||||
MemoryTotal: 23028.0 / 1.024,
|
||||
Usage: 0.0,
|
||||
Power: 62.35,
|
||||
Count: 1,
|
||||
},
|
||||
"2": {
|
||||
Name: "A10",
|
||||
Temperature: 44.0,
|
||||
MemoryUsed: 21700.0 / 1.024,
|
||||
MemoryTotal: 23028.0 / 1.024,
|
||||
Usage: 0.0,
|
||||
Power: 59.57,
|
||||
Count: 1,
|
||||
},
|
||||
"3": {
|
||||
Name: "A10",
|
||||
Temperature: 45.0,
|
||||
MemoryUsed: 18222.0 / 1.024,
|
||||
MemoryTotal: 23028.0 / 1.024,
|
||||
Usage: 0.0,
|
||||
Power: 61.76,
|
||||
Count: 1,
|
||||
},
|
||||
},
|
||||
wantValid: true,
|
||||
},
|
||||
{
|
||||
name: "empty input",
|
||||
input: "",
|
||||
wantData: map[string]system.GPUData{},
|
||||
wantValid: false,
|
||||
},
|
||||
{
|
||||
name: "malformed data",
|
||||
input: "bad, data, here",
|
||||
wantData: map[string]system.GPUData{},
|
||||
wantValid: false,
|
||||
},
|
||||
}
|
||||
|
||||
for _, tt := range tests {
|
||||
t.Run(tt.name, func(t *testing.T) {
|
||||
gm := &GPUManager{
|
||||
GpuDataMap: make(map[string]*system.GPUData),
|
||||
}
|
||||
valid := gm.parseNvidiaData([]byte(tt.input))
|
||||
assert.Equal(t, tt.wantValid, valid)
|
||||
|
||||
if tt.wantValid {
|
||||
for id, want := range tt.wantData {
|
||||
got := gm.GpuDataMap[id]
|
||||
require.NotNil(t, got)
|
||||
assert.Equal(t, want.Name, got.Name)
|
||||
assert.InDelta(t, want.Temperature, got.Temperature, 0.01)
|
||||
assert.InDelta(t, want.MemoryUsed, got.MemoryUsed, 0.01)
|
||||
assert.InDelta(t, want.MemoryTotal, got.MemoryTotal, 0.01)
|
||||
assert.InDelta(t, want.Usage, got.Usage, 0.01)
|
||||
assert.InDelta(t, want.Power, got.Power, 0.01)
|
||||
assert.Equal(t, want.Count, got.Count)
|
||||
}
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
func TestParseAmdData(t *testing.T) {
|
||||
tests := []struct {
|
||||
name string
|
||||
input string
|
||||
wantData map[string]system.GPUData
|
||||
wantValid bool
|
||||
}{
|
||||
{
|
||||
name: "valid single gpu data",
|
||||
input: `{
|
||||
"card0": {
|
||||
"GUID": "34756",
|
||||
"Temperature (Sensor edge) (C)": "47.0",
|
||||
"Current Socket Graphics Package Power (W)": "9.215",
|
||||
"GPU use (%)": "0",
|
||||
"VRAM Total Memory (B)": "536870912",
|
||||
"VRAM Total Used Memory (B)": "482263040",
|
||||
"Card Series": "Rembrandt [Radeon 680M]"
|
||||
}
|
||||
}`,
|
||||
wantData: map[string]system.GPUData{
|
||||
"34756": {
|
||||
Name: "Rembrandt [Radeon 680M]",
|
||||
Temperature: 47.0,
|
||||
MemoryUsed: 482263040.0 / (1024 * 1024),
|
||||
MemoryTotal: 536870912.0 / (1024 * 1024),
|
||||
Usage: 0.0,
|
||||
Power: 9.215,
|
||||
Count: 1,
|
||||
},
|
||||
},
|
||||
wantValid: true,
|
||||
},
|
||||
{
|
||||
name: "valid multi gpu data",
|
||||
input: `{
|
||||
"card0": {
|
||||
"GUID": "34756",
|
||||
"Temperature (Sensor edge) (C)": "47.0",
|
||||
"Current Socket Graphics Package Power (W)": "9.215",
|
||||
"GPU use (%)": "0",
|
||||
"VRAM Total Memory (B)": "536870912",
|
||||
"VRAM Total Used Memory (B)": "482263040",
|
||||
"Card Series": "Rembrandt [Radeon 680M]"
|
||||
},
|
||||
"card1": {
|
||||
"GUID": "38294",
|
||||
"Temperature (Sensor edge) (C)": "49.0",
|
||||
"Temperature (Sensor junction) (C)": "49.0",
|
||||
"Temperature (Sensor memory) (C)": "62.0",
|
||||
"Average Graphics Package Power (W)": "19.0",
|
||||
"GPU use (%)": "20.3",
|
||||
"VRAM Total Memory (B)": "25753026560",
|
||||
"VRAM Total Used Memory (B)": "794341376",
|
||||
"Card Series": "Navi 31 [Radeon RX 7900 XT]"
|
||||
}
|
||||
}`,
|
||||
wantData: map[string]system.GPUData{
|
||||
"34756": {
|
||||
Name: "Rembrandt [Radeon 680M]",
|
||||
Temperature: 47.0,
|
||||
MemoryUsed: 482263040.0 / (1024 * 1024),
|
||||
MemoryTotal: 536870912.0 / (1024 * 1024),
|
||||
Usage: 0.0,
|
||||
Power: 9.215,
|
||||
Count: 1,
|
||||
},
|
||||
"38294": {
|
||||
Name: "Navi 31 [Radeon RX 7900 XT]",
|
||||
Temperature: 49.0,
|
||||
MemoryUsed: 794341376.0 / (1024 * 1024),
|
||||
MemoryTotal: 25753026560.0 / (1024 * 1024),
|
||||
Usage: 20.3,
|
||||
Power: 19.0,
|
||||
Count: 1,
|
||||
},
|
||||
},
|
||||
wantValid: true,
|
||||
},
|
||||
{
|
||||
name: "invalid json",
|
||||
input: "{bad json",
|
||||
},
|
||||
{
|
||||
name: "invalid json",
|
||||
input: "{bad json",
|
||||
wantData: map[string]system.GPUData{},
|
||||
wantValid: false,
|
||||
},
|
||||
}
|
||||
|
||||
for _, tt := range tests {
|
||||
t.Run(tt.name, func(t *testing.T) {
|
||||
gm := &GPUManager{
|
||||
GpuDataMap: make(map[string]*system.GPUData),
|
||||
}
|
||||
valid := gm.parseAmdData([]byte(tt.input))
|
||||
assert.Equal(t, tt.wantValid, valid)
|
||||
|
||||
if tt.wantValid {
|
||||
for id, want := range tt.wantData {
|
||||
got := gm.GpuDataMap[id]
|
||||
require.NotNil(t, got)
|
||||
assert.Equal(t, want.Name, got.Name)
|
||||
assert.InDelta(t, want.Temperature, got.Temperature, 0.01)
|
||||
assert.InDelta(t, want.MemoryUsed, got.MemoryUsed, 0.01)
|
||||
assert.InDelta(t, want.MemoryTotal, got.MemoryTotal, 0.01)
|
||||
assert.InDelta(t, want.Usage, got.Usage, 0.01)
|
||||
assert.InDelta(t, want.Power, got.Power, 0.01)
|
||||
assert.Equal(t, want.Count, got.Count)
|
||||
}
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
func TestParseJetsonData(t *testing.T) {
|
||||
tests := []struct {
|
||||
name string
|
||||
input string
|
||||
wantMetrics *system.GPUData
|
||||
}{
|
||||
{
|
||||
name: "valid data",
|
||||
input: "11-14-2024 22:54:33 RAM 4300/30698MB GR3D_FREQ 45% tj@52.468C VDD_GPU_SOC 2171mW",
|
||||
wantMetrics: &system.GPUData{
|
||||
Name: "GPU",
|
||||
MemoryUsed: 4300.0,
|
||||
MemoryTotal: 30698.0,
|
||||
Usage: 45.0,
|
||||
Temperature: 52.468,
|
||||
Power: 2.171,
|
||||
Count: 1,
|
||||
},
|
||||
},
|
||||
{
|
||||
name: "more valid data",
|
||||
input: "11-15-2024 08:38:09 RAM 6185/7620MB (lfb 8x2MB) SWAP 851/3810MB (cached 1MB) CPU [15%@729,11%@729,14%@729,13%@729,11%@729,8%@729] EMC_FREQ 43%@2133 GR3D_FREQ 63%@[621] NVDEC off NVJPG off NVJPG1 off VIC off OFA off APE 200 cpu@53.968C soc2@52.437C soc0@50.75C gpu@53.343C tj@53.968C soc1@51.656C VDD_IN 12479mW/12479mW VDD_CPU_GPU_CV 4667mW/4667mW VDD_SOC 2817mW/2817mW",
|
||||
wantMetrics: &system.GPUData{
|
||||
Name: "GPU",
|
||||
MemoryUsed: 6185.0,
|
||||
MemoryTotal: 7620.0,
|
||||
Usage: 63.0,
|
||||
Temperature: 53.968,
|
||||
Power: 4.667,
|
||||
Count: 1,
|
||||
},
|
||||
},
|
||||
{
|
||||
name: "orin nano",
|
||||
input: "06-18-2025 11:25:24 RAM 3452/7620MB (lfb 25x4MB) SWAP 1518/16384MB (cached 174MB) CPU [1%@1420,2%@1420,0%@1420,2%@1420,2%@729,1%@729] GR3D_FREQ 0% cpu@50.031C soc2@49.031C soc0@50C gpu@49.031C tj@50.25C soc1@50.25C VDD_IN 4824mW/4824mW VDD_CPU_GPU_CV 518mW/518mW VDD_SOC 1475mW/1475mW",
|
||||
wantMetrics: &system.GPUData{
|
||||
Name: "GPU",
|
||||
MemoryUsed: 3452.0,
|
||||
MemoryTotal: 7620.0,
|
||||
Usage: 0.0,
|
||||
Temperature: 50.25,
|
||||
Power: 0.518,
|
||||
Count: 1,
|
||||
},
|
||||
},
|
||||
{
|
||||
name: "missing temperature",
|
||||
input: "11-14-2024 22:54:33 RAM 4300/30698MB GR3D_FREQ 45% VDD_GPU_SOC 2171mW",
|
||||
wantMetrics: &system.GPUData{
|
||||
Name: "GPU",
|
||||
MemoryUsed: 4300.0,
|
||||
MemoryTotal: 30698.0,
|
||||
Usage: 45.0,
|
||||
Power: 2.171,
|
||||
Count: 1,
|
||||
},
|
||||
},
|
||||
}
|
||||
|
||||
for _, tt := range tests {
|
||||
t.Run(tt.name, func(t *testing.T) {
|
||||
gm := &GPUManager{
|
||||
GpuDataMap: make(map[string]*system.GPUData),
|
||||
}
|
||||
parser := gm.getJetsonParser()
|
||||
valid := parser([]byte(tt.input))
|
||||
assert.Equal(t, true, valid)
|
||||
|
||||
got := gm.GpuDataMap["0"]
|
||||
require.NotNil(t, got)
|
||||
assert.Equal(t, tt.wantMetrics.Name, got.Name)
|
||||
assert.InDelta(t, tt.wantMetrics.MemoryUsed, got.MemoryUsed, 0.01)
|
||||
assert.InDelta(t, tt.wantMetrics.MemoryTotal, got.MemoryTotal, 0.01)
|
||||
assert.InDelta(t, tt.wantMetrics.Usage, got.Usage, 0.01)
|
||||
if tt.wantMetrics.Temperature > 0 {
|
||||
assert.InDelta(t, tt.wantMetrics.Temperature, got.Temperature, 0.01)
|
||||
}
|
||||
assert.InDelta(t, tt.wantMetrics.Power, got.Power, 0.01)
|
||||
assert.Equal(t, tt.wantMetrics.Count, got.Count)
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
func TestGetCurrentData(t *testing.T) {
|
||||
t.Run("calculates averages and resets accumulators", func(t *testing.T) {
|
||||
gm := &GPUManager{
|
||||
GpuDataMap: map[string]*system.GPUData{
|
||||
"0": {
|
||||
Name: "GPU1",
|
||||
Temperature: 50,
|
||||
MemoryUsed: 2048,
|
||||
MemoryTotal: 4096,
|
||||
Usage: 100, // 100 over 2 counts = 50 avg
|
||||
Power: 200, // 200 over 2 counts = 100 avg
|
||||
Count: 2,
|
||||
},
|
||||
"1": {
|
||||
Name: "GPU1",
|
||||
Temperature: 60,
|
||||
MemoryUsed: 3072,
|
||||
MemoryTotal: 8192,
|
||||
Usage: 30,
|
||||
Power: 60,
|
||||
Count: 1,
|
||||
},
|
||||
"2": {
|
||||
Name: "GPU 2",
|
||||
Temperature: 70,
|
||||
MemoryUsed: 4096,
|
||||
MemoryTotal: 8192,
|
||||
Usage: 200,
|
||||
Power: 400,
|
||||
Count: 1,
|
||||
},
|
||||
},
|
||||
}
|
||||
|
||||
result := gm.GetCurrentData()
|
||||
|
||||
// Verify name disambiguation
|
||||
assert.Equal(t, "GPU1 0", result["0"].Name)
|
||||
assert.Equal(t, "GPU1 1", result["1"].Name)
|
||||
assert.Equal(t, "GPU 2", result["2"].Name)
|
||||
|
||||
// Check averaged values in the result
|
||||
assert.InDelta(t, 50.0, result["0"].Usage, 0.01)
|
||||
assert.InDelta(t, 100.0, result["0"].Power, 0.01)
|
||||
assert.InDelta(t, 30.0, result["1"].Usage, 0.01)
|
||||
assert.InDelta(t, 60.0, result["1"].Power, 0.01)
|
||||
|
||||
// Verify that accumulators in the original map are reset
|
||||
assert.Equal(t, float64(0), gm.GpuDataMap["0"].Count, "GPU 0 Count should be reset")
|
||||
assert.Equal(t, float64(0), gm.GpuDataMap["0"].Usage, "GPU 0 Usage should be reset")
|
||||
assert.Equal(t, float64(0), gm.GpuDataMap["0"].Power, "GPU 0 Power should be reset")
|
||||
assert.Equal(t, float64(0), gm.GpuDataMap["1"].Count, "GPU 1 Count should be reset")
|
||||
assert.Equal(t, float64(0), gm.GpuDataMap["1"].Usage, "GPU 1 Usage should be reset")
|
||||
assert.Equal(t, float64(0), gm.GpuDataMap["1"].Power, "GPU 1 Power should be reset")
|
||||
})
|
||||
|
||||
t.Run("handles zero count without panicking", func(t *testing.T) {
|
||||
gm := &GPUManager{
|
||||
GpuDataMap: map[string]*system.GPUData{
|
||||
"0": {
|
||||
Name: "TestGPU",
|
||||
Count: 0,
|
||||
Usage: 0,
|
||||
Power: 0,
|
||||
},
|
||||
},
|
||||
}
|
||||
|
||||
var result map[string]system.GPUData
|
||||
assert.NotPanics(t, func() {
|
||||
result = gm.GetCurrentData()
|
||||
})
|
||||
|
||||
// Check that usage and power are 0
|
||||
assert.Equal(t, 0.0, result["0"].Usage)
|
||||
assert.Equal(t, 0.0, result["0"].Power)
|
||||
|
||||
// Verify reset count
|
||||
assert.Equal(t, float64(0), gm.GpuDataMap["0"].Count)
|
||||
})
|
||||
}
|
||||
|
||||
func TestDetectGPUs(t *testing.T) {
|
||||
// Save original PATH
|
||||
origPath := os.Getenv("PATH")
|
||||
defer os.Setenv("PATH", origPath)
|
||||
|
||||
// Set up temp dir with the commands
|
||||
tempDir := t.TempDir()
|
||||
os.Setenv("PATH", tempDir)
|
||||
|
||||
tests := []struct {
|
||||
name string
|
||||
setupCommands func() error
|
||||
wantNvidiaSmi bool
|
||||
wantRocmSmi bool
|
||||
wantTegrastats bool
|
||||
wantErr bool
|
||||
}{
|
||||
{
|
||||
name: "nvidia-smi not available",
|
||||
setupCommands: func() error {
|
||||
return nil
|
||||
},
|
||||
wantNvidiaSmi: false,
|
||||
wantRocmSmi: false,
|
||||
wantTegrastats: false,
|
||||
wantErr: true,
|
||||
},
|
||||
{
|
||||
name: "nvidia-smi available",
|
||||
setupCommands: func() error {
|
||||
path := filepath.Join(tempDir, "nvidia-smi")
|
||||
script := `#!/bin/sh
|
||||
echo "test"`
|
||||
if err := os.WriteFile(path, []byte(script), 0755); err != nil {
|
||||
return err
|
||||
}
|
||||
return nil
|
||||
},
|
||||
wantNvidiaSmi: true,
|
||||
wantTegrastats: false,
|
||||
wantRocmSmi: false,
|
||||
wantErr: false,
|
||||
},
|
||||
{
|
||||
name: "rocm-smi available",
|
||||
setupCommands: func() error {
|
||||
path := filepath.Join(tempDir, "rocm-smi")
|
||||
script := `#!/bin/sh
|
||||
echo "test"`
|
||||
if err := os.WriteFile(path, []byte(script), 0755); err != nil {
|
||||
return err
|
||||
}
|
||||
return nil
|
||||
},
|
||||
wantNvidiaSmi: true,
|
||||
wantRocmSmi: true,
|
||||
wantTegrastats: false,
|
||||
wantErr: false,
|
||||
},
|
||||
{
|
||||
name: "tegrastats available",
|
||||
setupCommands: func() error {
|
||||
path := filepath.Join(tempDir, "tegrastats")
|
||||
script := `#!/bin/sh
|
||||
echo "test"`
|
||||
if err := os.WriteFile(path, []byte(script), 0755); err != nil {
|
||||
return err
|
||||
}
|
||||
return nil
|
||||
},
|
||||
wantNvidiaSmi: false,
|
||||
wantRocmSmi: true,
|
||||
wantTegrastats: true,
|
||||
wantErr: false,
|
||||
},
|
||||
{
|
||||
name: "no gpu tools available",
|
||||
setupCommands: func() error {
|
||||
os.Setenv("PATH", "")
|
||||
return nil
|
||||
},
|
||||
wantErr: true,
|
||||
},
|
||||
}
|
||||
|
||||
for _, tt := range tests {
|
||||
t.Run(tt.name, func(t *testing.T) {
|
||||
if err := tt.setupCommands(); err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
|
||||
gm := &GPUManager{}
|
||||
err := gm.detectGPUs()
|
||||
|
||||
t.Logf("nvidiaSmi: %v, rocmSmi: %v, tegrastats: %v", gm.nvidiaSmi, gm.rocmSmi, gm.tegrastats)
|
||||
|
||||
if tt.wantErr {
|
||||
assert.Error(t, err)
|
||||
return
|
||||
}
|
||||
|
||||
assert.NoError(t, err)
|
||||
assert.Equal(t, tt.wantNvidiaSmi, gm.nvidiaSmi)
|
||||
assert.Equal(t, tt.wantRocmSmi, gm.rocmSmi)
|
||||
assert.Equal(t, tt.wantTegrastats, gm.tegrastats)
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
func TestStartCollector(t *testing.T) {
|
||||
// Save original PATH
|
||||
origPath := os.Getenv("PATH")
|
||||
defer os.Setenv("PATH", origPath)
|
||||
|
||||
// Set up temp dir with the commands
|
||||
dir := t.TempDir()
|
||||
os.Setenv("PATH", dir)
|
||||
|
||||
tests := []struct {
|
||||
name string
|
||||
command string
|
||||
setup func(t *testing.T) error
|
||||
validate func(t *testing.T, gm *GPUManager)
|
||||
gm *GPUManager
|
||||
}{
|
||||
{
|
||||
name: "nvidia-smi collector",
|
||||
command: "nvidia-smi",
|
||||
setup: func(t *testing.T) error {
|
||||
path := filepath.Join(dir, "nvidia-smi")
|
||||
script := `#!/bin/sh
|
||||
echo "0, NVIDIA Test GPU, 50, 1024, 4096, 25, 100"`
|
||||
if err := os.WriteFile(path, []byte(script), 0755); err != nil {
|
||||
return err
|
||||
}
|
||||
return nil
|
||||
},
|
||||
validate: func(t *testing.T, gm *GPUManager) {
|
||||
gpu, exists := gm.GpuDataMap["0"]
|
||||
assert.True(t, exists)
|
||||
if exists {
|
||||
assert.Equal(t, "Test GPU", gpu.Name)
|
||||
assert.Equal(t, 50.0, gpu.Temperature)
|
||||
|
||||
}
|
||||
},
|
||||
},
|
||||
{
|
||||
name: "rocm-smi collector",
|
||||
command: "rocm-smi",
|
||||
setup: func(t *testing.T) error {
|
||||
path := filepath.Join(dir, "rocm-smi")
|
||||
script := `#!/bin/sh
|
||||
echo '{"card0": {"Temperature (Sensor edge) (C)": "49.0", "Current Socket Graphics Package Power (W)": "28.159", "GPU use (%)": "0", "VRAM Total Memory (B)": "536870912", "VRAM Total Used Memory (B)": "445550592", "Card Series": "Rembrandt [Radeon 680M]", "Card Model": "0x1681", "Card Vendor": "Advanced Micro Devices, Inc. [AMD/ATI]", "Card SKU": "REMBRANDT", "Subsystem ID": "0x8a22", "Device Rev": "0xc8", "Node ID": "1", "GUID": "34756", "GFX Version": "gfx1035"}}'`
|
||||
if err := os.WriteFile(path, []byte(script), 0755); err != nil {
|
||||
return err
|
||||
}
|
||||
return nil
|
||||
},
|
||||
validate: func(t *testing.T, gm *GPUManager) {
|
||||
gpu, exists := gm.GpuDataMap["34756"]
|
||||
assert.True(t, exists)
|
||||
if exists {
|
||||
assert.Equal(t, "Rembrandt [Radeon 680M]", gpu.Name)
|
||||
assert.InDelta(t, 49.0, gpu.Temperature, 0.01)
|
||||
assert.InDelta(t, 28.159, gpu.Power, 0.01)
|
||||
}
|
||||
},
|
||||
},
|
||||
{
|
||||
name: "tegrastats collector",
|
||||
command: "tegrastats",
|
||||
setup: func(t *testing.T) error {
|
||||
path := filepath.Join(dir, "tegrastats")
|
||||
script := `#!/bin/sh
|
||||
echo "11-14-2024 22:54:33 RAM 1024/4096MB GR3D_FREQ 80% tj@70C VDD_GPU_SOC 1000mW"`
|
||||
if err := os.WriteFile(path, []byte(script), 0755); err != nil {
|
||||
return err
|
||||
}
|
||||
return nil
|
||||
},
|
||||
validate: func(t *testing.T, gm *GPUManager) {
|
||||
gpu, exists := gm.GpuDataMap["0"]
|
||||
assert.True(t, exists)
|
||||
if exists {
|
||||
assert.InDelta(t, 70.0, gpu.Temperature, 0.1)
|
||||
}
|
||||
},
|
||||
gm: &GPUManager{
|
||||
GpuDataMap: map[string]*system.GPUData{
|
||||
"0": {},
|
||||
},
|
||||
},
|
||||
},
|
||||
}
|
||||
|
||||
for _, tt := range tests {
|
||||
t.Run(tt.name, func(t *testing.T) {
|
||||
if err := tt.setup(t); err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
if tt.gm == nil {
|
||||
tt.gm = &GPUManager{
|
||||
GpuDataMap: make(map[string]*system.GPUData),
|
||||
}
|
||||
}
|
||||
tt.gm.startCollector(tt.command)
|
||||
time.Sleep(50 * time.Millisecond) // Give collector time to run
|
||||
tt.validate(t, tt.gm)
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
// TestAccumulationTableDriven tests the accumulation behavior for all three GPU types
|
||||
func TestAccumulation(t *testing.T) {
|
||||
type expectedGPUValues struct {
|
||||
temperature float64
|
||||
memoryUsed float64
|
||||
memoryTotal float64
|
||||
usage float64
|
||||
power float64
|
||||
count float64
|
||||
avgUsage float64
|
||||
avgPower float64
|
||||
}
|
||||
|
||||
tests := []struct {
|
||||
name string
|
||||
initialGPUData map[string]*system.GPUData
|
||||
dataSamples [][]byte
|
||||
parser func(*GPUManager) func([]byte) bool
|
||||
expectedValues map[string]expectedGPUValues
|
||||
}{
|
||||
{
|
||||
name: "Jetson GPU accumulation",
|
||||
initialGPUData: map[string]*system.GPUData{
|
||||
"0": {
|
||||
Name: "Jetson",
|
||||
Temperature: 0,
|
||||
Usage: 0,
|
||||
Power: 0,
|
||||
Count: 0,
|
||||
},
|
||||
},
|
||||
dataSamples: [][]byte{
|
||||
[]byte("11-14-2024 22:54:33 RAM 1024/4096MB GR3D_FREQ 30% tj@50.5C VDD_GPU_SOC 1000mW"),
|
||||
[]byte("11-14-2024 22:54:33 RAM 1024/4096MB GR3D_FREQ 40% tj@60.5C VDD_GPU_SOC 1200mW"),
|
||||
[]byte("11-14-2024 22:54:33 RAM 1024/4096MB GR3D_FREQ 50% tj@70.5C VDD_GPU_SOC 1400mW"),
|
||||
},
|
||||
parser: func(gm *GPUManager) func([]byte) bool {
|
||||
return gm.getJetsonParser()
|
||||
},
|
||||
expectedValues: map[string]expectedGPUValues{
|
||||
"0": {
|
||||
temperature: 70.5, // Last value
|
||||
memoryUsed: 1024, // Last value
|
||||
memoryTotal: 4096, // Last value
|
||||
usage: 120.0, // Accumulated: 30 + 40 + 50
|
||||
power: 3.6, // Accumulated: 1.0 + 1.2 + 1.4
|
||||
count: 3,
|
||||
avgUsage: 40.0, // 120 / 3
|
||||
avgPower: 1.2, // 3.6 / 3
|
||||
},
|
||||
},
|
||||
},
|
||||
{
|
||||
name: "NVIDIA GPU accumulation",
|
||||
initialGPUData: map[string]*system.GPUData{
|
||||
// NVIDIA parser will create the GPU data entries
|
||||
},
|
||||
dataSamples: [][]byte{
|
||||
[]byte("0, NVIDIA GeForce RTX 3080, 50, 5000, 10000, 30, 200"),
|
||||
[]byte("0, NVIDIA GeForce RTX 3080, 60, 6000, 10000, 40, 250"),
|
||||
[]byte("0, NVIDIA GeForce RTX 3080, 70, 7000, 10000, 50, 300"),
|
||||
},
|
||||
parser: func(gm *GPUManager) func([]byte) bool {
|
||||
return gm.parseNvidiaData
|
||||
},
|
||||
expectedValues: map[string]expectedGPUValues{
|
||||
"0": {
|
||||
temperature: 70.0, // Last value
|
||||
memoryUsed: 7000.0 / 1.024, // Last value
|
||||
memoryTotal: 10000.0 / 1.024, // Last value
|
||||
usage: 120.0, // Accumulated: 30 + 40 + 50
|
||||
power: 750.0, // Accumulated: 200 + 250 + 300
|
||||
count: 3,
|
||||
avgUsage: 40.0, // 120 / 3
|
||||
avgPower: 250.0, // 750 / 3
|
||||
},
|
||||
},
|
||||
},
|
||||
{
|
||||
name: "AMD GPU accumulation",
|
||||
initialGPUData: map[string]*system.GPUData{
|
||||
// AMD parser will create the GPU data entries
|
||||
},
|
||||
dataSamples: [][]byte{
|
||||
[]byte(`{"card0": {"GUID": "34756", "Temperature (Sensor edge) (C)": "50.0", "Current Socket Graphics Package Power (W)": "100.0", "GPU use (%)": "30", "VRAM Total Memory (B)": "10737418240", "VRAM Total Used Memory (B)": "1073741824", "Card Series": "Radeon RX 6800"}}`),
|
||||
[]byte(`{"card0": {"GUID": "34756", "Temperature (Sensor edge) (C)": "60.0", "Current Socket Graphics Package Power (W)": "150.0", "GPU use (%)": "40", "VRAM Total Memory (B)": "10737418240", "VRAM Total Used Memory (B)": "2147483648", "Card Series": "Radeon RX 6800"}}`),
|
||||
[]byte(`{"card0": {"GUID": "34756", "Temperature (Sensor edge) (C)": "70.0", "Current Socket Graphics Package Power (W)": "200.0", "GPU use (%)": "50", "VRAM Total Memory (B)": "10737418240", "VRAM Total Used Memory (B)": "3221225472", "Card Series": "Radeon RX 6800"}}`),
|
||||
},
|
||||
parser: func(gm *GPUManager) func([]byte) bool {
|
||||
return gm.parseAmdData
|
||||
},
|
||||
expectedValues: map[string]expectedGPUValues{
|
||||
"34756": {
|
||||
temperature: 70.0, // Last value
|
||||
memoryUsed: 3221225472.0 / (1024 * 1024), // Last value
|
||||
memoryTotal: 10737418240.0 / (1024 * 1024), // Last value
|
||||
usage: 120.0, // Accumulated: 30 + 40 + 50
|
||||
power: 450.0, // Accumulated: 100 + 150 + 200
|
||||
count: 3,
|
||||
avgUsage: 40.0, // 120 / 3
|
||||
avgPower: 150.0, // 450 / 3
|
||||
},
|
||||
},
|
||||
},
|
||||
}
|
||||
|
||||
for _, tt := range tests {
|
||||
t.Run(tt.name, func(t *testing.T) {
|
||||
// Create a new GPUManager for each test
|
||||
gm := &GPUManager{
|
||||
GpuDataMap: tt.initialGPUData,
|
||||
}
|
||||
|
||||
// Get the parser function
|
||||
parser := tt.parser(gm)
|
||||
|
||||
// Process each data sample
|
||||
for i, sample := range tt.dataSamples {
|
||||
valid := parser(sample)
|
||||
assert.True(t, valid, "Sample %d should be valid", i)
|
||||
}
|
||||
|
||||
// Check accumulated values
|
||||
for id, expected := range tt.expectedValues {
|
||||
gpu, exists := gm.GpuDataMap[id]
|
||||
assert.True(t, exists, "GPU with ID %s should exist", id)
|
||||
if !exists {
|
||||
continue
|
||||
}
|
||||
|
||||
assert.InDelta(t, expected.temperature, gpu.Temperature, 0.01, "Temperature should match")
|
||||
assert.InDelta(t, expected.memoryUsed, gpu.MemoryUsed, 0.01, "Memory used should match")
|
||||
assert.InDelta(t, expected.memoryTotal, gpu.MemoryTotal, 0.01, "Memory total should match")
|
||||
assert.InDelta(t, expected.usage, gpu.Usage, 0.01, "Usage should match")
|
||||
assert.InDelta(t, expected.power, gpu.Power, 0.01, "Power should match")
|
||||
assert.Equal(t, expected.count, gpu.Count, "Count should match")
|
||||
}
|
||||
|
||||
// Verify average calculation in GetCurrentData
|
||||
result := gm.GetCurrentData()
|
||||
for id, expected := range tt.expectedValues {
|
||||
gpu, exists := result[id]
|
||||
assert.True(t, exists, "GPU with ID %s should exist in GetCurrentData result", id)
|
||||
if !exists {
|
||||
continue
|
||||
}
|
||||
|
||||
assert.InDelta(t, expected.temperature, gpu.Temperature, 0.01, "Temperature in GetCurrentData should match")
|
||||
assert.InDelta(t, expected.avgUsage, gpu.Usage, 0.01, "Average usage in GetCurrentData should match")
|
||||
assert.InDelta(t, expected.avgPower, gpu.Power, 0.01, "Average power in GetCurrentData should match")
|
||||
}
|
||||
|
||||
// Verify that accumulators in the original map are reset
|
||||
for id := range tt.expectedValues {
|
||||
gpu, exists := gm.GpuDataMap[id]
|
||||
assert.True(t, exists, "GPU with ID %s should still exist after GetCurrentData", id)
|
||||
if !exists {
|
||||
continue
|
||||
}
|
||||
assert.Equal(t, float64(0), gpu.Count, "Count should be reset for GPU ID %s", id)
|
||||
assert.Equal(t, float64(0), gpu.Usage, "Usage should be reset for GPU ID %s", id)
|
||||
assert.Equal(t, float64(0), gpu.Power, "Power should be reset for GPU ID %s", id)
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
43
agent/health/health.go
Normal file
43
agent/health/health.go
Normal file
@@ -0,0 +1,43 @@
|
||||
// Package health provides functions to check and update the health of the agent.
|
||||
// It uses a file in the temp directory to store the timestamp of the last connection attempt.
|
||||
// If the timestamp is older than 90 seconds, the agent is considered unhealthy.
|
||||
// NB: The agent must be started with the Start() method to be considered healthy.
|
||||
package health
|
||||
|
||||
import (
|
||||
"errors"
|
||||
"log"
|
||||
"os"
|
||||
"path/filepath"
|
||||
"time"
|
||||
)
|
||||
|
||||
// healthFile is the path to the health file
|
||||
var healthFile = filepath.Join(os.TempDir(), "beszel_health")
|
||||
|
||||
// Check checks if the agent is connected by checking the modification time of the health file
|
||||
func Check() error {
|
||||
fileInfo, err := os.Stat(healthFile)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
if time.Since(fileInfo.ModTime()) > 91*time.Second {
|
||||
log.Println("over 90 seconds since last connection")
|
||||
return errors.New("unhealthy")
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
// Update updates the modification time of the health file
|
||||
func Update() error {
|
||||
file, err := os.Create(healthFile)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
return file.Close()
|
||||
}
|
||||
|
||||
// CleanUp removes the health file
|
||||
func CleanUp() error {
|
||||
return os.Remove(healthFile)
|
||||
}
|
67
agent/health/health_test.go
Normal file
67
agent/health/health_test.go
Normal file
@@ -0,0 +1,67 @@
|
||||
//go:build testing
|
||||
// +build testing
|
||||
|
||||
package health
|
||||
|
||||
import (
|
||||
"os"
|
||||
"path/filepath"
|
||||
"testing"
|
||||
"time"
|
||||
|
||||
"testing/synctest"
|
||||
|
||||
"github.com/stretchr/testify/assert"
|
||||
"github.com/stretchr/testify/require"
|
||||
)
|
||||
|
||||
func TestHealth(t *testing.T) {
|
||||
// Override healthFile to use a temporary directory for this test.
|
||||
originalHealthFile := healthFile
|
||||
tmpDir := t.TempDir()
|
||||
healthFile = filepath.Join(tmpDir, "beszel_health_test")
|
||||
defer func() { healthFile = originalHealthFile }()
|
||||
|
||||
t.Run("check with no health file", func(t *testing.T) {
|
||||
err := Check()
|
||||
require.Error(t, err)
|
||||
assert.True(t, os.IsNotExist(err), "expected a file-not-exist error, but got: %v", err)
|
||||
})
|
||||
|
||||
t.Run("update and check", func(t *testing.T) {
|
||||
err := Update()
|
||||
require.NoError(t, err, "Update() failed")
|
||||
|
||||
err = Check()
|
||||
assert.NoError(t, err, "Check() failed immediately after Update()")
|
||||
})
|
||||
|
||||
// This test uses synctest to simulate time passing.
|
||||
// NOTE: This test requires GOEXPERIMENT=synctest to run.
|
||||
t.Run("check with simulated time", func(t *testing.T) {
|
||||
synctest.Test(t, func(t *testing.T) {
|
||||
// Update the file to set the initial timestamp.
|
||||
require.NoError(t, Update(), "Update() failed inside synctest")
|
||||
|
||||
// Set the mtime to the current fake time to align the file's timestamp with the simulated clock.
|
||||
now := time.Now()
|
||||
require.NoError(t, os.Chtimes(healthFile, now, now), "Chtimes failed")
|
||||
|
||||
// Wait a duration less than the threshold.
|
||||
time.Sleep(89 * time.Second)
|
||||
synctest.Wait()
|
||||
|
||||
// The check should still pass.
|
||||
assert.NoError(t, Check(), "Check() failed after 89s")
|
||||
|
||||
// Wait for the total duration to exceed the threshold.
|
||||
time.Sleep(5 * time.Second)
|
||||
synctest.Wait()
|
||||
|
||||
// The check should now fail as unhealthy.
|
||||
err := Check()
|
||||
require.Error(t, err, "Check() should have failed after 91s")
|
||||
assert.Equal(t, "unhealthy", err.Error(), "Check() returned wrong error")
|
||||
})
|
||||
})
|
||||
}
|
80
agent/lhm/beszel_lhm.cs
Normal file
80
agent/lhm/beszel_lhm.cs
Normal file
@@ -0,0 +1,80 @@
|
||||
using System;
|
||||
using System.Globalization;
|
||||
using LibreHardwareMonitor.Hardware;
|
||||
|
||||
class Program
|
||||
{
|
||||
static void Main()
|
||||
{
|
||||
var computer = new Computer
|
||||
{
|
||||
IsCpuEnabled = true,
|
||||
IsGpuEnabled = true,
|
||||
IsMemoryEnabled = true,
|
||||
IsMotherboardEnabled = true,
|
||||
IsStorageEnabled = true,
|
||||
// IsPsuEnabled = true,
|
||||
// IsNetworkEnabled = true,
|
||||
};
|
||||
computer.Open();
|
||||
|
||||
var reader = Console.In;
|
||||
var writer = Console.Out;
|
||||
|
||||
string line;
|
||||
while ((line = reader.ReadLine()) != null)
|
||||
{
|
||||
if (line.Trim().Equals("getTemps", StringComparison.OrdinalIgnoreCase))
|
||||
{
|
||||
foreach (var hw in computer.Hardware)
|
||||
{
|
||||
// process main hardware sensors
|
||||
ProcessSensors(hw, writer);
|
||||
|
||||
// process subhardware sensors
|
||||
foreach (var subhardware in hw.SubHardware)
|
||||
{
|
||||
ProcessSensors(subhardware, writer);
|
||||
}
|
||||
}
|
||||
// send empty line to signal end of sensor data
|
||||
writer.WriteLine();
|
||||
writer.Flush();
|
||||
}
|
||||
}
|
||||
|
||||
computer.Close();
|
||||
}
|
||||
|
||||
static void ProcessSensors(IHardware hardware, System.IO.TextWriter writer)
|
||||
{
|
||||
var updated = false;
|
||||
foreach (var sensor in hardware.Sensors)
|
||||
{
|
||||
var validTemp = sensor.SensorType == SensorType.Temperature && sensor.Value.HasValue;
|
||||
if (!validTemp || sensor.Name.Contains("Distance"))
|
||||
{
|
||||
continue;
|
||||
}
|
||||
|
||||
if (!updated)
|
||||
{
|
||||
hardware.Update();
|
||||
updated = true;
|
||||
}
|
||||
|
||||
var name = sensor.Name;
|
||||
// if sensor.Name starts with "Temperature" replace with hardware.Identifier but retain the rest of the name.
|
||||
// usually this is a number like Temperature 3
|
||||
if (sensor.Name.StartsWith("Temperature"))
|
||||
{
|
||||
name = hardware.Identifier.ToString().Replace("/", "_").TrimStart('_') + sensor.Name.Substring(11);
|
||||
}
|
||||
|
||||
// invariant culture assures the value is parsable as a float
|
||||
var value = sensor.Value.Value.ToString("0.##", CultureInfo.InvariantCulture);
|
||||
// write the name and value to the writer
|
||||
writer.WriteLine($"{name}|{value}");
|
||||
}
|
||||
}
|
||||
}
|
11
agent/lhm/beszel_lhm.csproj
Normal file
11
agent/lhm/beszel_lhm.csproj
Normal file
@@ -0,0 +1,11 @@
|
||||
<Project Sdk="Microsoft.NET.Sdk">
|
||||
<PropertyGroup>
|
||||
<OutputType>Exe</OutputType>
|
||||
<TargetFramework>net48</TargetFramework>
|
||||
<Platforms>x64</Platforms>
|
||||
</PropertyGroup>
|
||||
|
||||
<ItemGroup>
|
||||
<PackageReference Include="LibreHardwareMonitorLib" Version="0.9.4" />
|
||||
</ItemGroup>
|
||||
</Project>
|
67
agent/network.go
Normal file
67
agent/network.go
Normal file
@@ -0,0 +1,67 @@
|
||||
package agent
|
||||
|
||||
import (
|
||||
"log/slog"
|
||||
"strings"
|
||||
"time"
|
||||
|
||||
psutilNet "github.com/shirou/gopsutil/v4/net"
|
||||
)
|
||||
|
||||
func (a *Agent) initializeNetIoStats() {
|
||||
// reset valid network interfaces
|
||||
a.netInterfaces = make(map[string]struct{}, 0)
|
||||
|
||||
// map of network interface names passed in via NICS env var
|
||||
var nicsMap map[string]struct{}
|
||||
nics, nicsEnvExists := GetEnv("NICS")
|
||||
if nicsEnvExists {
|
||||
nicsMap = make(map[string]struct{}, 0)
|
||||
for nic := range strings.SplitSeq(nics, ",") {
|
||||
nicsMap[nic] = struct{}{}
|
||||
}
|
||||
}
|
||||
|
||||
// reset network I/O stats
|
||||
a.netIoStats.BytesSent = 0
|
||||
a.netIoStats.BytesRecv = 0
|
||||
|
||||
// get intial network I/O stats
|
||||
if netIO, err := psutilNet.IOCounters(true); err == nil {
|
||||
a.netIoStats.Time = time.Now()
|
||||
for _, v := range netIO {
|
||||
switch {
|
||||
// skip if nics exists and the interface is not in the list
|
||||
case nicsEnvExists:
|
||||
if _, nameInNics := nicsMap[v.Name]; !nameInNics {
|
||||
continue
|
||||
}
|
||||
// otherwise run the interface name through the skipNetworkInterface function
|
||||
default:
|
||||
if a.skipNetworkInterface(v) {
|
||||
continue
|
||||
}
|
||||
}
|
||||
slog.Info("Detected network interface", "name", v.Name, "sent", v.BytesSent, "recv", v.BytesRecv)
|
||||
a.netIoStats.BytesSent += v.BytesSent
|
||||
a.netIoStats.BytesRecv += v.BytesRecv
|
||||
// store as a valid network interface
|
||||
a.netInterfaces[v.Name] = struct{}{}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func (a *Agent) skipNetworkInterface(v psutilNet.IOCountersStat) bool {
|
||||
switch {
|
||||
case strings.HasPrefix(v.Name, "lo"),
|
||||
strings.HasPrefix(v.Name, "docker"),
|
||||
strings.HasPrefix(v.Name, "br-"),
|
||||
strings.HasPrefix(v.Name, "veth"),
|
||||
strings.HasPrefix(v.Name, "bond"),
|
||||
v.BytesRecv == 0,
|
||||
v.BytesSent == 0:
|
||||
return true
|
||||
default:
|
||||
return false
|
||||
}
|
||||
}
|
198
agent/sensors.go
Normal file
198
agent/sensors.go
Normal file
@@ -0,0 +1,198 @@
|
||||
package agent
|
||||
|
||||
import (
|
||||
"context"
|
||||
"fmt"
|
||||
"log/slog"
|
||||
"path"
|
||||
"runtime"
|
||||
"strconv"
|
||||
"strings"
|
||||
"unicode/utf8"
|
||||
|
||||
"github.com/henrygd/beszel/src/entities/system"
|
||||
|
||||
"github.com/shirou/gopsutil/v4/common"
|
||||
"github.com/shirou/gopsutil/v4/sensors"
|
||||
)
|
||||
|
||||
type SensorConfig struct {
|
||||
context context.Context
|
||||
sensors map[string]struct{}
|
||||
primarySensor string
|
||||
isBlacklist bool
|
||||
hasWildcards bool
|
||||
skipCollection bool
|
||||
}
|
||||
|
||||
func (a *Agent) newSensorConfig() *SensorConfig {
|
||||
primarySensor, _ := GetEnv("PRIMARY_SENSOR")
|
||||
sysSensors, _ := GetEnv("SYS_SENSORS")
|
||||
sensorsEnvVal, sensorsSet := GetEnv("SENSORS")
|
||||
skipCollection := sensorsSet && sensorsEnvVal == ""
|
||||
|
||||
return a.newSensorConfigWithEnv(primarySensor, sysSensors, sensorsEnvVal, skipCollection)
|
||||
}
|
||||
|
||||
// Matches sensors.TemperaturesWithContext to allow for panic recovery (gopsutil/issues/1832)
|
||||
type getTempsFn func(ctx context.Context) ([]sensors.TemperatureStat, error)
|
||||
|
||||
// newSensorConfigWithEnv creates a SensorConfig with the provided environment variables
|
||||
// sensorsSet indicates if the SENSORS environment variable was explicitly set (even to empty string)
|
||||
func (a *Agent) newSensorConfigWithEnv(primarySensor, sysSensors, sensorsEnvVal string, skipCollection bool) *SensorConfig {
|
||||
config := &SensorConfig{
|
||||
context: context.Background(),
|
||||
primarySensor: primarySensor,
|
||||
skipCollection: skipCollection,
|
||||
sensors: make(map[string]struct{}),
|
||||
}
|
||||
|
||||
// Set sensors context (allows overriding sys location for sensors)
|
||||
if sysSensors != "" {
|
||||
slog.Info("SYS_SENSORS", "path", sysSensors)
|
||||
config.context = context.WithValue(config.context,
|
||||
common.EnvKey, common.EnvMap{common.HostSysEnvKey: sysSensors},
|
||||
)
|
||||
}
|
||||
|
||||
// handle blacklist
|
||||
if strings.HasPrefix(sensorsEnvVal, "-") {
|
||||
config.isBlacklist = true
|
||||
sensorsEnvVal = sensorsEnvVal[1:]
|
||||
}
|
||||
|
||||
for sensor := range strings.SplitSeq(sensorsEnvVal, ",") {
|
||||
sensor = strings.TrimSpace(sensor)
|
||||
if sensor != "" {
|
||||
config.sensors[sensor] = struct{}{}
|
||||
if strings.Contains(sensor, "*") {
|
||||
config.hasWildcards = true
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return config
|
||||
}
|
||||
|
||||
// updateTemperatures updates the agent with the latest sensor temperatures
|
||||
func (a *Agent) updateTemperatures(systemStats *system.Stats) {
|
||||
// skip if sensors whitelist is set to empty string
|
||||
if a.sensorConfig.skipCollection {
|
||||
slog.Debug("Skipping temperature collection")
|
||||
return
|
||||
}
|
||||
|
||||
// reset high temp
|
||||
a.systemInfo.DashboardTemp = 0
|
||||
|
||||
temps, err := a.getTempsWithPanicRecovery(getSensorTemps)
|
||||
if err != nil {
|
||||
// retry once on panic (gopsutil/issues/1832)
|
||||
temps, err = a.getTempsWithPanicRecovery(getSensorTemps)
|
||||
if err != nil {
|
||||
slog.Warn("Error updating temperatures", "err", err)
|
||||
if len(systemStats.Temperatures) > 0 {
|
||||
systemStats.Temperatures = make(map[string]float64)
|
||||
}
|
||||
return
|
||||
}
|
||||
}
|
||||
slog.Debug("Temperature", "sensors", temps)
|
||||
|
||||
// return if no sensors
|
||||
if len(temps) == 0 {
|
||||
return
|
||||
}
|
||||
|
||||
systemStats.Temperatures = make(map[string]float64, len(temps))
|
||||
for i, sensor := range temps {
|
||||
// check for malformed strings on darwin (gopsutil/issues/1832)
|
||||
if runtime.GOOS == "darwin" && !utf8.ValidString(sensor.SensorKey) {
|
||||
continue
|
||||
}
|
||||
|
||||
// scale temperature
|
||||
if sensor.Temperature != 0 && sensor.Temperature < 1 {
|
||||
sensor.Temperature = scaleTemperature(sensor.Temperature)
|
||||
}
|
||||
// skip if temperature is unreasonable
|
||||
if sensor.Temperature <= 0 || sensor.Temperature >= 200 {
|
||||
continue
|
||||
}
|
||||
sensorName := sensor.SensorKey
|
||||
if _, ok := systemStats.Temperatures[sensorName]; ok {
|
||||
// if key already exists, append int to key
|
||||
sensorName = sensorName + "_" + strconv.Itoa(i)
|
||||
}
|
||||
// skip if not in whitelist or blacklist
|
||||
if !isValidSensor(sensorName, a.sensorConfig) {
|
||||
continue
|
||||
}
|
||||
// set dashboard temperature
|
||||
switch a.sensorConfig.primarySensor {
|
||||
case "":
|
||||
a.systemInfo.DashboardTemp = max(a.systemInfo.DashboardTemp, sensor.Temperature)
|
||||
case sensorName:
|
||||
a.systemInfo.DashboardTemp = sensor.Temperature
|
||||
}
|
||||
systemStats.Temperatures[sensorName] = twoDecimals(sensor.Temperature)
|
||||
}
|
||||
}
|
||||
|
||||
// getTempsWithPanicRecovery wraps sensors.TemperaturesWithContext to recover from panics (gopsutil/issues/1832)
|
||||
func (a *Agent) getTempsWithPanicRecovery(getTemps getTempsFn) (temps []sensors.TemperatureStat, err error) {
|
||||
defer func() {
|
||||
if r := recover(); r != nil {
|
||||
err = fmt.Errorf("panic: %v", r)
|
||||
}
|
||||
}()
|
||||
// get sensor data (error ignored intentionally as it may be only with one sensor)
|
||||
temps, _ = getTemps(a.sensorConfig.context)
|
||||
return
|
||||
}
|
||||
|
||||
// isValidSensor checks if a sensor is valid based on the sensor name and the sensor config
|
||||
func isValidSensor(sensorName string, config *SensorConfig) bool {
|
||||
// if no sensors configured, everything is valid
|
||||
if len(config.sensors) == 0 {
|
||||
return true
|
||||
}
|
||||
|
||||
// Exact match - return true if whitelist, false if blacklist
|
||||
if _, exactMatch := config.sensors[sensorName]; exactMatch {
|
||||
return !config.isBlacklist
|
||||
}
|
||||
|
||||
// If no wildcards, return true if blacklist, false if whitelist
|
||||
if !config.hasWildcards {
|
||||
return config.isBlacklist
|
||||
}
|
||||
|
||||
// Check for wildcard patterns
|
||||
for pattern := range config.sensors {
|
||||
if !strings.Contains(pattern, "*") {
|
||||
continue
|
||||
}
|
||||
if match, _ := path.Match(pattern, sensorName); match {
|
||||
return !config.isBlacklist
|
||||
}
|
||||
}
|
||||
|
||||
return config.isBlacklist
|
||||
}
|
||||
|
||||
// scaleTemperature scales temperatures in fractional values to reasonable Celsius values
|
||||
func scaleTemperature(temp float64) float64 {
|
||||
if temp > 1 {
|
||||
return temp
|
||||
}
|
||||
scaled100 := temp * 100
|
||||
scaled1000 := temp * 1000
|
||||
|
||||
if scaled100 >= 15 && scaled100 <= 95 {
|
||||
return scaled100
|
||||
} else if scaled1000 >= 15 && scaled1000 <= 95 {
|
||||
return scaled1000
|
||||
}
|
||||
return scaled100
|
||||
}
|
9
agent/sensors_default.go
Normal file
9
agent/sensors_default.go
Normal file
@@ -0,0 +1,9 @@
|
||||
//go:build !windows
|
||||
|
||||
package agent
|
||||
|
||||
import (
|
||||
"github.com/shirou/gopsutil/v4/sensors"
|
||||
)
|
||||
|
||||
var getSensorTemps = sensors.TemperaturesWithContext
|
554
agent/sensors_test.go
Normal file
554
agent/sensors_test.go
Normal file
@@ -0,0 +1,554 @@
|
||||
//go:build testing
|
||||
// +build testing
|
||||
|
||||
package agent
|
||||
|
||||
import (
|
||||
"context"
|
||||
"fmt"
|
||||
"os"
|
||||
"testing"
|
||||
|
||||
"github.com/henrygd/beszel/src/entities/system"
|
||||
|
||||
"github.com/shirou/gopsutil/v4/common"
|
||||
"github.com/shirou/gopsutil/v4/sensors"
|
||||
"github.com/stretchr/testify/assert"
|
||||
"github.com/stretchr/testify/require"
|
||||
)
|
||||
|
||||
func TestIsValidSensor(t *testing.T) {
|
||||
tests := []struct {
|
||||
name string
|
||||
sensorName string
|
||||
config *SensorConfig
|
||||
expectedValid bool
|
||||
}{
|
||||
{
|
||||
name: "Whitelist - sensor in list",
|
||||
sensorName: "cpu_temp",
|
||||
config: &SensorConfig{
|
||||
sensors: map[string]struct{}{"cpu_temp": {}},
|
||||
isBlacklist: false,
|
||||
},
|
||||
expectedValid: true,
|
||||
},
|
||||
{
|
||||
name: "Whitelist - sensor not in list",
|
||||
sensorName: "gpu_temp",
|
||||
config: &SensorConfig{
|
||||
sensors: map[string]struct{}{"cpu_temp": {}},
|
||||
isBlacklist: false,
|
||||
},
|
||||
expectedValid: false,
|
||||
},
|
||||
{
|
||||
name: "Blacklist - sensor in list",
|
||||
sensorName: "cpu_temp",
|
||||
config: &SensorConfig{
|
||||
sensors: map[string]struct{}{"cpu_temp": {}},
|
||||
isBlacklist: true,
|
||||
},
|
||||
expectedValid: false,
|
||||
},
|
||||
{
|
||||
name: "Blacklist - sensor not in list",
|
||||
sensorName: "gpu_temp",
|
||||
config: &SensorConfig{
|
||||
sensors: map[string]struct{}{"cpu_temp": {}},
|
||||
isBlacklist: true,
|
||||
},
|
||||
expectedValid: true,
|
||||
},
|
||||
{
|
||||
name: "Whitelist with wildcard - matching pattern",
|
||||
sensorName: "core_0_temp",
|
||||
config: &SensorConfig{
|
||||
sensors: map[string]struct{}{"core_*_temp": {}},
|
||||
isBlacklist: false,
|
||||
hasWildcards: true,
|
||||
},
|
||||
expectedValid: true,
|
||||
},
|
||||
{
|
||||
name: "Whitelist with wildcard - non-matching pattern",
|
||||
sensorName: "gpu_temp",
|
||||
config: &SensorConfig{
|
||||
sensors: map[string]struct{}{"core_*_temp": {}},
|
||||
isBlacklist: false,
|
||||
hasWildcards: true,
|
||||
},
|
||||
expectedValid: false,
|
||||
},
|
||||
{
|
||||
name: "Blacklist with wildcard - matching pattern",
|
||||
sensorName: "core_0_temp",
|
||||
config: &SensorConfig{
|
||||
sensors: map[string]struct{}{"core_*_temp": {}},
|
||||
isBlacklist: true,
|
||||
hasWildcards: true,
|
||||
},
|
||||
expectedValid: false,
|
||||
},
|
||||
{
|
||||
name: "Blacklist with wildcard - non-matching pattern",
|
||||
sensorName: "gpu_temp",
|
||||
config: &SensorConfig{
|
||||
sensors: map[string]struct{}{"core_*_temp": {}},
|
||||
isBlacklist: true,
|
||||
hasWildcards: true,
|
||||
},
|
||||
expectedValid: true,
|
||||
},
|
||||
{
|
||||
name: "No sensors configured",
|
||||
sensorName: "any_temp",
|
||||
config: &SensorConfig{
|
||||
sensors: map[string]struct{}{},
|
||||
isBlacklist: false,
|
||||
hasWildcards: false,
|
||||
skipCollection: false,
|
||||
},
|
||||
expectedValid: true,
|
||||
},
|
||||
{
|
||||
name: "Mixed patterns in whitelist - exact match",
|
||||
sensorName: "cpu_temp",
|
||||
config: &SensorConfig{
|
||||
sensors: map[string]struct{}{"cpu_temp": {}, "core_*_temp": {}},
|
||||
isBlacklist: false,
|
||||
hasWildcards: true,
|
||||
},
|
||||
expectedValid: true,
|
||||
},
|
||||
{
|
||||
name: "Mixed patterns in whitelist - wildcard match",
|
||||
sensorName: "core_1_temp",
|
||||
config: &SensorConfig{
|
||||
sensors: map[string]struct{}{"cpu_temp": {}, "core_*_temp": {}},
|
||||
isBlacklist: false,
|
||||
hasWildcards: true,
|
||||
},
|
||||
expectedValid: true,
|
||||
},
|
||||
{
|
||||
name: "Mixed patterns in blacklist - exact match",
|
||||
sensorName: "cpu_temp",
|
||||
config: &SensorConfig{
|
||||
sensors: map[string]struct{}{"cpu_temp": {}, "core_*_temp": {}},
|
||||
isBlacklist: true,
|
||||
hasWildcards: true,
|
||||
},
|
||||
expectedValid: false,
|
||||
},
|
||||
{
|
||||
name: "Mixed patterns in blacklist - wildcard match",
|
||||
sensorName: "core_1_temp",
|
||||
config: &SensorConfig{
|
||||
sensors: map[string]struct{}{"cpu_temp": {}, "core_*_temp": {}},
|
||||
isBlacklist: true,
|
||||
hasWildcards: true,
|
||||
},
|
||||
expectedValid: false,
|
||||
},
|
||||
}
|
||||
|
||||
for _, tt := range tests {
|
||||
t.Run(tt.name, func(t *testing.T) {
|
||||
result := isValidSensor(tt.sensorName, tt.config)
|
||||
assert.Equal(t, tt.expectedValid, result, "isValidSensor(%q, config) returned unexpected result", tt.sensorName)
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
func TestNewSensorConfigWithEnv(t *testing.T) {
|
||||
agent := &Agent{}
|
||||
|
||||
tests := []struct {
|
||||
name string
|
||||
primarySensor string
|
||||
sysSensors string
|
||||
sensors string
|
||||
skipCollection bool
|
||||
expectedConfig *SensorConfig
|
||||
}{
|
||||
{
|
||||
name: "Empty configuration",
|
||||
primarySensor: "",
|
||||
sysSensors: "",
|
||||
sensors: "",
|
||||
expectedConfig: &SensorConfig{
|
||||
context: context.Background(),
|
||||
primarySensor: "",
|
||||
sensors: map[string]struct{}{},
|
||||
isBlacklist: false,
|
||||
hasWildcards: false,
|
||||
skipCollection: false,
|
||||
},
|
||||
},
|
||||
{
|
||||
name: "Explicitly set to empty string",
|
||||
primarySensor: "",
|
||||
sysSensors: "",
|
||||
sensors: "",
|
||||
skipCollection: true,
|
||||
expectedConfig: &SensorConfig{
|
||||
context: context.Background(),
|
||||
primarySensor: "",
|
||||
sensors: map[string]struct{}{},
|
||||
isBlacklist: false,
|
||||
hasWildcards: false,
|
||||
skipCollection: true,
|
||||
},
|
||||
},
|
||||
{
|
||||
name: "Primary sensor only - should create sensor map",
|
||||
primarySensor: "cpu_temp",
|
||||
sysSensors: "",
|
||||
sensors: "",
|
||||
expectedConfig: &SensorConfig{
|
||||
context: context.Background(),
|
||||
primarySensor: "cpu_temp",
|
||||
sensors: map[string]struct{}{},
|
||||
isBlacklist: false,
|
||||
hasWildcards: false,
|
||||
},
|
||||
},
|
||||
{
|
||||
name: "Whitelist sensors",
|
||||
primarySensor: "cpu_temp",
|
||||
sysSensors: "",
|
||||
sensors: "cpu_temp,gpu_temp",
|
||||
expectedConfig: &SensorConfig{
|
||||
context: context.Background(),
|
||||
primarySensor: "cpu_temp",
|
||||
sensors: map[string]struct{}{
|
||||
"cpu_temp": {},
|
||||
"gpu_temp": {},
|
||||
},
|
||||
isBlacklist: false,
|
||||
hasWildcards: false,
|
||||
},
|
||||
},
|
||||
{
|
||||
name: "Blacklist sensors",
|
||||
primarySensor: "cpu_temp",
|
||||
sysSensors: "",
|
||||
sensors: "-cpu_temp,gpu_temp",
|
||||
expectedConfig: &SensorConfig{
|
||||
context: context.Background(),
|
||||
primarySensor: "cpu_temp",
|
||||
sensors: map[string]struct{}{
|
||||
"cpu_temp": {},
|
||||
"gpu_temp": {},
|
||||
},
|
||||
isBlacklist: true,
|
||||
hasWildcards: false,
|
||||
},
|
||||
},
|
||||
{
|
||||
name: "Sensors with wildcard",
|
||||
primarySensor: "cpu_temp",
|
||||
sysSensors: "",
|
||||
sensors: "cpu_*,gpu_temp",
|
||||
expectedConfig: &SensorConfig{
|
||||
context: context.Background(),
|
||||
primarySensor: "cpu_temp",
|
||||
sensors: map[string]struct{}{
|
||||
"cpu_*": {},
|
||||
"gpu_temp": {},
|
||||
},
|
||||
isBlacklist: false,
|
||||
hasWildcards: true,
|
||||
},
|
||||
},
|
||||
{
|
||||
name: "Sensors with whitespace",
|
||||
primarySensor: "cpu_temp",
|
||||
sysSensors: "",
|
||||
sensors: "cpu_*, gpu_temp",
|
||||
expectedConfig: &SensorConfig{
|
||||
context: context.Background(),
|
||||
primarySensor: "cpu_temp",
|
||||
sensors: map[string]struct{}{
|
||||
"cpu_*": {},
|
||||
"gpu_temp": {},
|
||||
},
|
||||
isBlacklist: false,
|
||||
hasWildcards: true,
|
||||
},
|
||||
},
|
||||
{
|
||||
name: "With SYS_SENSORS path",
|
||||
primarySensor: "cpu_temp",
|
||||
sysSensors: "/custom/path",
|
||||
sensors: "cpu_temp",
|
||||
expectedConfig: &SensorConfig{
|
||||
primarySensor: "cpu_temp",
|
||||
sensors: map[string]struct{}{
|
||||
"cpu_temp": {},
|
||||
},
|
||||
isBlacklist: false,
|
||||
hasWildcards: false,
|
||||
},
|
||||
},
|
||||
}
|
||||
|
||||
for _, tt := range tests {
|
||||
t.Run(tt.name, func(t *testing.T) {
|
||||
result := agent.newSensorConfigWithEnv(tt.primarySensor, tt.sysSensors, tt.sensors, tt.skipCollection)
|
||||
|
||||
// Check primary sensor
|
||||
assert.Equal(t, tt.expectedConfig.primarySensor, result.primarySensor)
|
||||
|
||||
// Check sensor map
|
||||
if tt.expectedConfig.sensors == nil {
|
||||
assert.Nil(t, result.sensors)
|
||||
} else {
|
||||
assert.Equal(t, len(tt.expectedConfig.sensors), len(result.sensors))
|
||||
for sensor := range tt.expectedConfig.sensors {
|
||||
_, exists := result.sensors[sensor]
|
||||
assert.True(t, exists, "Sensor %s should exist in the result", sensor)
|
||||
}
|
||||
}
|
||||
|
||||
// Check flags
|
||||
assert.Equal(t, tt.expectedConfig.isBlacklist, result.isBlacklist)
|
||||
assert.Equal(t, tt.expectedConfig.hasWildcards, result.hasWildcards)
|
||||
|
||||
// Check context
|
||||
if tt.sysSensors != "" {
|
||||
// Verify context contains correct values
|
||||
envMap, ok := result.context.Value(common.EnvKey).(common.EnvMap)
|
||||
require.True(t, ok, "Context should contain EnvMap")
|
||||
sysPath, ok := envMap[common.HostSysEnvKey]
|
||||
require.True(t, ok, "EnvMap should contain HostSysEnvKey")
|
||||
assert.Equal(t, tt.sysSensors, sysPath)
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
func TestNewSensorConfig(t *testing.T) {
|
||||
// Save original environment variables
|
||||
originalPrimary, hasPrimary := os.LookupEnv("BESZEL_AGENT_PRIMARY_SENSOR")
|
||||
originalSys, hasSys := os.LookupEnv("BESZEL_AGENT_SYS_SENSORS")
|
||||
originalSensors, hasSensors := os.LookupEnv("BESZEL_AGENT_SENSORS")
|
||||
|
||||
// Restore environment variables after the test
|
||||
defer func() {
|
||||
// Clean up test environment variables
|
||||
os.Unsetenv("BESZEL_AGENT_PRIMARY_SENSOR")
|
||||
os.Unsetenv("BESZEL_AGENT_SYS_SENSORS")
|
||||
os.Unsetenv("BESZEL_AGENT_SENSORS")
|
||||
|
||||
// Restore original values if they existed
|
||||
if hasPrimary {
|
||||
os.Setenv("BESZEL_AGENT_PRIMARY_SENSOR", originalPrimary)
|
||||
}
|
||||
if hasSys {
|
||||
os.Setenv("BESZEL_AGENT_SYS_SENSORS", originalSys)
|
||||
}
|
||||
if hasSensors {
|
||||
os.Setenv("BESZEL_AGENT_SENSORS", originalSensors)
|
||||
}
|
||||
}()
|
||||
|
||||
// Set test environment variables
|
||||
os.Setenv("BESZEL_AGENT_PRIMARY_SENSOR", "test_primary")
|
||||
os.Setenv("BESZEL_AGENT_SYS_SENSORS", "/test/path")
|
||||
os.Setenv("BESZEL_AGENT_SENSORS", "test_sensor1,test_*,test_sensor3")
|
||||
|
||||
agent := &Agent{}
|
||||
result := agent.newSensorConfig()
|
||||
|
||||
// Verify results
|
||||
assert.Equal(t, "test_primary", result.primarySensor)
|
||||
assert.NotNil(t, result.sensors)
|
||||
assert.Equal(t, 3, len(result.sensors))
|
||||
assert.True(t, result.hasWildcards)
|
||||
assert.False(t, result.isBlacklist)
|
||||
|
||||
// Check that sys sensors path is in context
|
||||
envMap, ok := result.context.Value(common.EnvKey).(common.EnvMap)
|
||||
require.True(t, ok, "Context should contain EnvMap")
|
||||
sysPath, ok := envMap[common.HostSysEnvKey]
|
||||
require.True(t, ok, "EnvMap should contain HostSysEnvKey")
|
||||
assert.Equal(t, "/test/path", sysPath)
|
||||
}
|
||||
|
||||
func TestScaleTemperature(t *testing.T) {
|
||||
tests := []struct {
|
||||
name string
|
||||
input float64
|
||||
expected float64
|
||||
desc string
|
||||
}{
|
||||
// Normal temperatures (no scaling needed)
|
||||
{"normal_cpu_temp", 45.0, 45.0, "Normal CPU temperature"},
|
||||
{"normal_room_temp", 25.0, 25.0, "Normal room temperature"},
|
||||
{"high_cpu_temp", 85.0, 85.0, "High CPU temperature"},
|
||||
// Zero temperature
|
||||
{"zero_temp", 0.0, 0.0, "Zero temperature"},
|
||||
// Fractional values that should use 100x scaling
|
||||
{"fractional_45c", 0.45, 45.0, "0.45 should become 45°C (100x)"},
|
||||
{"fractional_25c", 0.25, 25.0, "0.25 should become 25°C (100x)"},
|
||||
{"fractional_60c", 0.60, 60.0, "0.60 should become 60°C (100x)"},
|
||||
{"fractional_75c", 0.75, 75.0, "0.75 should become 75°C (100x)"},
|
||||
{"fractional_30c", 0.30, 30.0, "0.30 should become 30°C (100x)"},
|
||||
// Fractional values that should use 1000x scaling
|
||||
{"millifractional_45c", 0.045, 45.0, "0.045 should become 45°C (1000x)"},
|
||||
{"millifractional_25c", 0.025, 25.0, "0.025 should become 25°C (1000x)"},
|
||||
{"millifractional_60c", 0.060, 60.0, "0.060 should become 60°C (1000x)"},
|
||||
{"millifractional_75c", 0.075, 75.0, "0.075 should become 75°C (1000x)"},
|
||||
{"millifractional_35c", 0.035, 35.0, "0.035 should become 35°C (1000x)"},
|
||||
// Edge cases - values outside reasonable range
|
||||
{"very_low_fractional", 0.01, 1.0, "0.01 should default to 100x scaling (1°C)"},
|
||||
{"very_high_fractional", 0.99, 99.0, "0.99 should default to 100x scaling (99°C)"},
|
||||
{"extremely_low", 0.001, 0.1, "0.001 should default to 100x scaling (0.1°C)"},
|
||||
// Boundary cases around the reasonable range (15-95°C)
|
||||
{"boundary_low_100x", 0.15, 15.0, "0.15 should use 100x scaling (15°C)"},
|
||||
{"boundary_high_100x", 0.95, 95.0, "0.95 should use 100x scaling (95°C)"},
|
||||
{"boundary_low_1000x", 0.015, 15.0, "0.015 should use 1000x scaling (15°C)"},
|
||||
{"boundary_high_1000x", 0.095, 95.0, "0.095 should use 1000x scaling (95°C)"},
|
||||
// Values just outside reasonable range
|
||||
{"just_below_range_100x", 0.14, 14.0, "0.14 should default to 100x (14°C)"},
|
||||
{"just_above_range_100x", 0.96, 96.0, "0.96 should default to 100x (96°C)"},
|
||||
}
|
||||
|
||||
for _, tt := range tests {
|
||||
t.Run(tt.name, func(t *testing.T) {
|
||||
result := scaleTemperature(tt.input)
|
||||
assert.InDelta(t, tt.expected, result, 0.001,
|
||||
"scaleTemperature(%v) = %v, expected %v (%s)",
|
||||
tt.input, result, tt.expected, tt.desc)
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
func TestScaleTemperatureLogic(t *testing.T) {
|
||||
// Test the logic flow for ambiguous cases
|
||||
t.Run("prefers_100x_when_both_valid", func(t *testing.T) {
|
||||
// 0.5 could be 50°C (100x) or 500°C (1000x)
|
||||
// Should prefer 100x since it's tried first and is in range
|
||||
result := scaleTemperature(0.5)
|
||||
expected := 50.0
|
||||
assert.InDelta(t, expected, result, 0.001,
|
||||
"scaleTemperature(0.5) = %v, expected %v (should prefer 100x scaling)",
|
||||
result, expected)
|
||||
})
|
||||
|
||||
t.Run("uses_1000x_when_100x_too_low", func(t *testing.T) {
|
||||
// 0.05 -> 5°C (100x, too low) or 50°C (1000x, in range)
|
||||
// Should use 1000x since 100x is below reasonable range
|
||||
result := scaleTemperature(0.05)
|
||||
expected := 50.0
|
||||
assert.InDelta(t, expected, result, 0.001,
|
||||
"scaleTemperature(0.05) = %v, expected %v (should use 1000x scaling)",
|
||||
result, expected)
|
||||
})
|
||||
|
||||
t.Run("defaults_to_100x_when_both_invalid", func(t *testing.T) {
|
||||
// 0.005 -> 0.5°C (100x, too low) or 5°C (1000x, too low)
|
||||
// Should default to 100x scaling
|
||||
result := scaleTemperature(0.005)
|
||||
expected := 0.5
|
||||
assert.InDelta(t, expected, result, 0.001,
|
||||
"scaleTemperature(0.005) = %v, expected %v (should default to 100x)",
|
||||
result, expected)
|
||||
})
|
||||
}
|
||||
|
||||
func TestGetTempsWithPanicRecovery(t *testing.T) {
|
||||
agent := &Agent{
|
||||
systemInfo: system.Info{},
|
||||
sensorConfig: &SensorConfig{
|
||||
context: context.Background(),
|
||||
},
|
||||
}
|
||||
|
||||
tests := []struct {
|
||||
name string
|
||||
getTempsFn getTempsFn
|
||||
expectError bool
|
||||
errorMsg string
|
||||
}{
|
||||
{
|
||||
name: "successful_function_call",
|
||||
getTempsFn: func(ctx context.Context) ([]sensors.TemperatureStat, error) {
|
||||
return []sensors.TemperatureStat{
|
||||
{SensorKey: "test_sensor", Temperature: 45.0},
|
||||
}, nil
|
||||
},
|
||||
expectError: false,
|
||||
},
|
||||
{
|
||||
name: "function_returns_error",
|
||||
getTempsFn: func(ctx context.Context) ([]sensors.TemperatureStat, error) {
|
||||
return []sensors.TemperatureStat{
|
||||
{SensorKey: "test_sensor", Temperature: 45.0},
|
||||
}, fmt.Errorf("sensor error")
|
||||
},
|
||||
expectError: false, // getTempsWithPanicRecovery ignores errors from the function
|
||||
},
|
||||
{
|
||||
name: "function_panics_with_string",
|
||||
getTempsFn: func(ctx context.Context) ([]sensors.TemperatureStat, error) {
|
||||
panic("test panic")
|
||||
},
|
||||
expectError: true,
|
||||
errorMsg: "panic: test panic",
|
||||
},
|
||||
{
|
||||
name: "function_panics_with_error",
|
||||
getTempsFn: func(ctx context.Context) ([]sensors.TemperatureStat, error) {
|
||||
panic(fmt.Errorf("panic error"))
|
||||
},
|
||||
expectError: true,
|
||||
errorMsg: "panic:",
|
||||
},
|
||||
{
|
||||
name: "function_panics_with_index_out_of_bounds",
|
||||
getTempsFn: func(ctx context.Context) ([]sensors.TemperatureStat, error) {
|
||||
slice := []int{1, 2, 3}
|
||||
_ = slice[10] // out of bounds panic
|
||||
return nil, nil
|
||||
},
|
||||
expectError: true,
|
||||
errorMsg: "panic:",
|
||||
},
|
||||
{
|
||||
name: "function_panics_with_any_conversion",
|
||||
getTempsFn: func(ctx context.Context) ([]sensors.TemperatureStat, error) {
|
||||
var i any = "string"
|
||||
_ = i.(int) // type assertion panic
|
||||
return nil, nil
|
||||
},
|
||||
expectError: true,
|
||||
errorMsg: "panic:",
|
||||
},
|
||||
}
|
||||
|
||||
for _, tt := range tests {
|
||||
t.Run(tt.name, func(t *testing.T) {
|
||||
var temps []sensors.TemperatureStat
|
||||
var err error
|
||||
|
||||
// The function should not panic, regardless of what the injected function does
|
||||
assert.NotPanics(t, func() {
|
||||
temps, err = agent.getTempsWithPanicRecovery(tt.getTempsFn)
|
||||
}, "getTempsWithPanicRecovery should not panic")
|
||||
|
||||
if tt.expectError {
|
||||
assert.Error(t, err, "Expected an error to be returned")
|
||||
if tt.errorMsg != "" {
|
||||
assert.Contains(t, err.Error(), tt.errorMsg,
|
||||
"Error message should contain expected text")
|
||||
}
|
||||
assert.Nil(t, temps, "Temps should be nil when panic occurs")
|
||||
} else {
|
||||
assert.NoError(t, err, "Should not return error for successful calls")
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
286
agent/sensors_windows.go
Normal file
286
agent/sensors_windows.go
Normal file
@@ -0,0 +1,286 @@
|
||||
//go:build windows
|
||||
|
||||
//go:generate dotnet build -c Release lhm/beszel_lhm.csproj
|
||||
|
||||
package agent
|
||||
|
||||
import (
|
||||
"bufio"
|
||||
"context"
|
||||
"embed"
|
||||
"errors"
|
||||
"fmt"
|
||||
"io"
|
||||
"log/slog"
|
||||
"os"
|
||||
"os/exec"
|
||||
"path"
|
||||
"path/filepath"
|
||||
"strconv"
|
||||
"strings"
|
||||
"sync"
|
||||
"time"
|
||||
|
||||
"github.com/shirou/gopsutil/v4/sensors"
|
||||
)
|
||||
|
||||
// Note: This is always called from Agent.gatherStats() which holds Agent.Lock(),
|
||||
// so no internal concurrency protection is needed.
|
||||
|
||||
// lhmProcess is a wrapper around the LHM .NET process.
|
||||
type lhmProcess struct {
|
||||
cmd *exec.Cmd
|
||||
stdin io.WriteCloser
|
||||
stdout io.ReadCloser
|
||||
scanner *bufio.Scanner
|
||||
isRunning bool
|
||||
stoppedNoSensors bool
|
||||
consecutiveNoSensors uint8
|
||||
execPath string
|
||||
tempDir string
|
||||
}
|
||||
|
||||
//go:embed all:lhm/bin/Release/net48
|
||||
var lhmFs embed.FS
|
||||
|
||||
var (
|
||||
beszelLhm *lhmProcess
|
||||
beszelLhmOnce sync.Once
|
||||
useLHM = os.Getenv("LHM") == "true"
|
||||
)
|
||||
|
||||
var errNoSensors = errors.New("no sensors found (try running as admin with LHM=true)")
|
||||
|
||||
// newlhmProcess copies the embedded LHM executable to a temporary directory and starts it.
|
||||
func newlhmProcess() (*lhmProcess, error) {
|
||||
destDir := filepath.Join(os.TempDir(), "beszel")
|
||||
execPath := filepath.Join(destDir, "beszel_lhm.exe")
|
||||
|
||||
if err := os.MkdirAll(destDir, 0755); err != nil {
|
||||
return nil, fmt.Errorf("failed to create temp directory: %w", err)
|
||||
}
|
||||
|
||||
// Only copy if executable doesn't exist
|
||||
if _, err := os.Stat(execPath); os.IsNotExist(err) {
|
||||
if err := copyEmbeddedDir(lhmFs, "lhm/bin/Release/net48", destDir); err != nil {
|
||||
return nil, fmt.Errorf("failed to copy embedded directory: %w", err)
|
||||
}
|
||||
}
|
||||
|
||||
lhm := &lhmProcess{
|
||||
execPath: execPath,
|
||||
tempDir: destDir,
|
||||
}
|
||||
|
||||
if err := lhm.startProcess(); err != nil {
|
||||
return nil, fmt.Errorf("failed to start process: %w", err)
|
||||
}
|
||||
|
||||
return lhm, nil
|
||||
}
|
||||
|
||||
// startProcess starts the external LHM process
|
||||
func (lhm *lhmProcess) startProcess() error {
|
||||
// Clean up any existing process
|
||||
lhm.cleanupProcess()
|
||||
|
||||
cmd := exec.Command(lhm.execPath)
|
||||
stdin, err := cmd.StdinPipe()
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
stdout, err := cmd.StdoutPipe()
|
||||
if err != nil {
|
||||
stdin.Close()
|
||||
return err
|
||||
}
|
||||
|
||||
if err := cmd.Start(); err != nil {
|
||||
stdin.Close()
|
||||
stdout.Close()
|
||||
return err
|
||||
}
|
||||
|
||||
// Update process state
|
||||
lhm.cmd = cmd
|
||||
lhm.stdin = stdin
|
||||
lhm.stdout = stdout
|
||||
lhm.scanner = bufio.NewScanner(stdout)
|
||||
lhm.isRunning = true
|
||||
|
||||
// Give process a moment to initialize
|
||||
time.Sleep(100 * time.Millisecond)
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
// cleanupProcess terminates the process and closes resources but preserves files
|
||||
func (lhm *lhmProcess) cleanupProcess() {
|
||||
lhm.isRunning = false
|
||||
|
||||
if lhm.cmd != nil && lhm.cmd.Process != nil {
|
||||
lhm.cmd.Process.Kill()
|
||||
lhm.cmd.Wait()
|
||||
}
|
||||
|
||||
if lhm.stdin != nil {
|
||||
lhm.stdin.Close()
|
||||
lhm.stdin = nil
|
||||
}
|
||||
if lhm.stdout != nil {
|
||||
lhm.stdout.Close()
|
||||
lhm.stdout = nil
|
||||
}
|
||||
|
||||
lhm.cmd = nil
|
||||
lhm.scanner = nil
|
||||
lhm.stoppedNoSensors = false
|
||||
lhm.consecutiveNoSensors = 0
|
||||
}
|
||||
|
||||
func (lhm *lhmProcess) getTemps(ctx context.Context) (temps []sensors.TemperatureStat, err error) {
|
||||
if !useLHM || lhm.stoppedNoSensors {
|
||||
// Fall back to gopsutil if we can't get sensors from LHM
|
||||
return sensors.TemperaturesWithContext(ctx)
|
||||
}
|
||||
|
||||
// Start process if it's not running
|
||||
if !lhm.isRunning || lhm.stdin == nil || lhm.scanner == nil {
|
||||
err := lhm.startProcess()
|
||||
if err != nil {
|
||||
return temps, err
|
||||
}
|
||||
}
|
||||
|
||||
// Send command to process
|
||||
_, err = fmt.Fprintln(lhm.stdin, "getTemps")
|
||||
if err != nil {
|
||||
lhm.isRunning = false
|
||||
return temps, fmt.Errorf("failed to send command: %w", err)
|
||||
}
|
||||
|
||||
// Read all sensor lines until we hit an empty line or EOF
|
||||
for lhm.scanner.Scan() {
|
||||
line := strings.TrimSpace(lhm.scanner.Text())
|
||||
if line == "" {
|
||||
break
|
||||
}
|
||||
|
||||
parts := strings.Split(line, "|")
|
||||
if len(parts) != 2 {
|
||||
slog.Debug("Invalid sensor format", "line", line)
|
||||
continue
|
||||
}
|
||||
|
||||
name := strings.TrimSpace(parts[0])
|
||||
valueStr := strings.TrimSpace(parts[1])
|
||||
|
||||
value, err := strconv.ParseFloat(valueStr, 64)
|
||||
if err != nil {
|
||||
slog.Debug("Failed to parse sensor", "err", err, "line", line)
|
||||
continue
|
||||
}
|
||||
|
||||
if name == "" || value <= 0 || value > 150 {
|
||||
slog.Debug("Invalid sensor", "name", name, "val", value, "line", line)
|
||||
continue
|
||||
}
|
||||
|
||||
temps = append(temps, sensors.TemperatureStat{
|
||||
SensorKey: name,
|
||||
Temperature: value,
|
||||
})
|
||||
}
|
||||
|
||||
if err := lhm.scanner.Err(); err != nil {
|
||||
lhm.isRunning = false
|
||||
return temps, err
|
||||
}
|
||||
|
||||
// Handle no sensors case
|
||||
if len(temps) == 0 {
|
||||
lhm.consecutiveNoSensors++
|
||||
if lhm.consecutiveNoSensors >= 3 {
|
||||
lhm.stoppedNoSensors = true
|
||||
slog.Warn(errNoSensors.Error())
|
||||
lhm.cleanup()
|
||||
}
|
||||
return sensors.TemperaturesWithContext(ctx)
|
||||
}
|
||||
|
||||
lhm.consecutiveNoSensors = 0
|
||||
|
||||
return temps, nil
|
||||
}
|
||||
|
||||
// getSensorTemps attempts to pull sensor temperatures from the embedded LHM process.
|
||||
// NB: LibreHardwareMonitorLib requires admin privileges to access all available sensors.
|
||||
func getSensorTemps(ctx context.Context) (temps []sensors.TemperatureStat, err error) {
|
||||
defer func() {
|
||||
if err != nil {
|
||||
slog.Debug("Error reading sensors", "err", err)
|
||||
}
|
||||
}()
|
||||
|
||||
if !useLHM {
|
||||
return sensors.TemperaturesWithContext(ctx)
|
||||
}
|
||||
|
||||
// Initialize process once
|
||||
beszelLhmOnce.Do(func() {
|
||||
beszelLhm, err = newlhmProcess()
|
||||
})
|
||||
|
||||
if err != nil {
|
||||
return temps, fmt.Errorf("failed to initialize lhm: %w", err)
|
||||
}
|
||||
|
||||
if beszelLhm == nil {
|
||||
return temps, fmt.Errorf("lhm not available")
|
||||
}
|
||||
|
||||
return beszelLhm.getTemps(ctx)
|
||||
}
|
||||
|
||||
// cleanup terminates the process and closes resources
|
||||
func (lhm *lhmProcess) cleanup() {
|
||||
lhm.cleanupProcess()
|
||||
if lhm.tempDir != "" {
|
||||
os.RemoveAll(lhm.tempDir)
|
||||
}
|
||||
}
|
||||
|
||||
// copyEmbeddedDir copies the embedded directory to the destination path
|
||||
func copyEmbeddedDir(fs embed.FS, srcPath, destPath string) error {
|
||||
entries, err := fs.ReadDir(srcPath)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
if err := os.MkdirAll(destPath, 0755); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
for _, entry := range entries {
|
||||
srcEntryPath := path.Join(srcPath, entry.Name())
|
||||
destEntryPath := filepath.Join(destPath, entry.Name())
|
||||
|
||||
if entry.IsDir() {
|
||||
if err := copyEmbeddedDir(fs, srcEntryPath, destEntryPath); err != nil {
|
||||
return err
|
||||
}
|
||||
continue
|
||||
}
|
||||
|
||||
data, err := fs.ReadFile(srcEntryPath)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
if err := os.WriteFile(destEntryPath, data, 0755); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
224
agent/server.go
Normal file
224
agent/server.go
Normal file
@@ -0,0 +1,224 @@
|
||||
package agent
|
||||
|
||||
import (
|
||||
"encoding/json"
|
||||
"errors"
|
||||
"fmt"
|
||||
"io"
|
||||
"log/slog"
|
||||
"net"
|
||||
"os"
|
||||
"strings"
|
||||
"time"
|
||||
|
||||
"github.com/henrygd/beszel"
|
||||
"github.com/henrygd/beszel/src/common"
|
||||
"github.com/henrygd/beszel/src/entities/system"
|
||||
|
||||
"github.com/blang/semver"
|
||||
"github.com/fxamacker/cbor/v2"
|
||||
"github.com/gliderlabs/ssh"
|
||||
gossh "golang.org/x/crypto/ssh"
|
||||
)
|
||||
|
||||
// ServerOptions contains configuration options for starting the SSH server.
|
||||
type ServerOptions struct {
|
||||
Addr string // Network address to listen on (e.g., ":45876" or "/path/to/socket")
|
||||
Network string // Network type ("tcp" or "unix")
|
||||
Keys []gossh.PublicKey // SSH public keys for authentication
|
||||
}
|
||||
|
||||
// hubVersions caches hub versions by session ID to avoid repeated parsing.
|
||||
var hubVersions map[string]semver.Version
|
||||
|
||||
// StartServer starts the SSH server with the provided options.
|
||||
// It configures the server with secure defaults, sets up authentication,
|
||||
// and begins listening for connections. Returns an error if the server
|
||||
// is already running or if there's an issue starting the server.
|
||||
func (a *Agent) StartServer(opts ServerOptions) error {
|
||||
if a.server != nil {
|
||||
return errors.New("server already started")
|
||||
}
|
||||
|
||||
slog.Info("Starting SSH server", "addr", opts.Addr, "network", opts.Network)
|
||||
|
||||
if opts.Network == "unix" {
|
||||
// remove existing socket file if it exists
|
||||
if err := os.Remove(opts.Addr); err != nil && !os.IsNotExist(err) {
|
||||
return err
|
||||
}
|
||||
}
|
||||
|
||||
// start listening on the address
|
||||
ln, err := net.Listen(opts.Network, opts.Addr)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
defer ln.Close()
|
||||
|
||||
// base config (limit to allowed algorithms)
|
||||
config := &gossh.ServerConfig{
|
||||
ServerVersion: fmt.Sprintf("SSH-2.0-%s_%s", beszel.AppName, beszel.Version),
|
||||
}
|
||||
config.KeyExchanges = common.DefaultKeyExchanges
|
||||
config.MACs = common.DefaultMACs
|
||||
config.Ciphers = common.DefaultCiphers
|
||||
|
||||
// set default handler
|
||||
ssh.Handle(a.handleSession)
|
||||
|
||||
a.server = &ssh.Server{
|
||||
ServerConfigCallback: func(ctx ssh.Context) *gossh.ServerConfig {
|
||||
return config
|
||||
},
|
||||
// check public key(s)
|
||||
PublicKeyHandler: func(ctx ssh.Context, key ssh.PublicKey) bool {
|
||||
remoteAddr := ctx.RemoteAddr()
|
||||
for _, pubKey := range opts.Keys {
|
||||
if ssh.KeysEqual(key, pubKey) {
|
||||
slog.Info("SSH connected", "addr", remoteAddr)
|
||||
return true
|
||||
}
|
||||
}
|
||||
slog.Warn("Invalid SSH key", "addr", remoteAddr)
|
||||
return false
|
||||
},
|
||||
// disable pty
|
||||
PtyCallback: func(ctx ssh.Context, pty ssh.Pty) bool {
|
||||
return false
|
||||
},
|
||||
// close idle connections after 70 seconds
|
||||
IdleTimeout: 70 * time.Second,
|
||||
}
|
||||
|
||||
// Start SSH server on the listener
|
||||
return a.server.Serve(ln)
|
||||
}
|
||||
|
||||
// getHubVersion retrieves and caches the hub version for a given session.
|
||||
// It extracts the version from the SSH client version string and caches
|
||||
// it to avoid repeated parsing. Returns a zero version if parsing fails.
|
||||
func (a *Agent) getHubVersion(sessionId string, sessionCtx ssh.Context) semver.Version {
|
||||
if hubVersions == nil {
|
||||
hubVersions = make(map[string]semver.Version, 1)
|
||||
}
|
||||
hubVersion, ok := hubVersions[sessionId]
|
||||
if ok {
|
||||
return hubVersion
|
||||
}
|
||||
// Extract hub version from SSH client version
|
||||
clientVersion := sessionCtx.Value(ssh.ContextKeyClientVersion)
|
||||
if versionStr, ok := clientVersion.(string); ok {
|
||||
hubVersion, _ = extractHubVersion(versionStr)
|
||||
}
|
||||
hubVersions[sessionId] = hubVersion
|
||||
return hubVersion
|
||||
}
|
||||
|
||||
// handleSession handles an incoming SSH session by gathering system statistics
|
||||
// and sending them to the hub. It signals connection events, determines the
|
||||
// appropriate encoding format based on hub version, and exits with appropriate
|
||||
// status codes.
|
||||
func (a *Agent) handleSession(s ssh.Session) {
|
||||
a.connectionManager.eventChan <- SSHConnect
|
||||
|
||||
sessionCtx := s.Context()
|
||||
sessionID := sessionCtx.SessionID()
|
||||
|
||||
hubVersion := a.getHubVersion(sessionID, sessionCtx)
|
||||
|
||||
stats := a.gatherStats(sessionID)
|
||||
|
||||
err := a.writeToSession(s, stats, hubVersion)
|
||||
if err != nil {
|
||||
slog.Error("Error encoding stats", "err", err, "stats", stats)
|
||||
s.Exit(1)
|
||||
} else {
|
||||
s.Exit(0)
|
||||
}
|
||||
}
|
||||
|
||||
// writeToSession encodes and writes system statistics to the session.
|
||||
// It chooses between CBOR and JSON encoding based on the hub version,
|
||||
// using CBOR for newer versions and JSON for legacy compatibility.
|
||||
func (a *Agent) writeToSession(w io.Writer, stats *system.CombinedData, hubVersion semver.Version) error {
|
||||
if hubVersion.GTE(beszel.MinVersionCbor) {
|
||||
return cbor.NewEncoder(w).Encode(stats)
|
||||
}
|
||||
return json.NewEncoder(w).Encode(stats)
|
||||
}
|
||||
|
||||
// extractHubVersion extracts the beszel version from SSH client version string.
|
||||
// Expected format: "SSH-2.0-beszel_X.Y.Z" or "beszel_X.Y.Z"
|
||||
func extractHubVersion(versionString string) (semver.Version, error) {
|
||||
_, after, _ := strings.Cut(versionString, "_")
|
||||
return semver.Parse(after)
|
||||
}
|
||||
|
||||
// ParseKeys parses a string containing SSH public keys in authorized_keys format.
|
||||
// It returns a slice of ssh.PublicKey and an error if any key fails to parse.
|
||||
func ParseKeys(input string) ([]gossh.PublicKey, error) {
|
||||
var parsedKeys []gossh.PublicKey
|
||||
for line := range strings.Lines(input) {
|
||||
line = strings.TrimSpace(line)
|
||||
// Skip empty lines or comments
|
||||
if len(line) == 0 || strings.HasPrefix(line, "#") {
|
||||
continue
|
||||
}
|
||||
// Parse the key
|
||||
parsedKey, _, _, _, err := gossh.ParseAuthorizedKey([]byte(line))
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("failed to parse key: %s, error: %w", line, err)
|
||||
}
|
||||
parsedKeys = append(parsedKeys, parsedKey)
|
||||
}
|
||||
return parsedKeys, nil
|
||||
}
|
||||
|
||||
// GetAddress determines the network address to listen on from various sources.
|
||||
// It checks the provided address, then environment variables (LISTEN, PORT),
|
||||
// and finally defaults to ":45876".
|
||||
func GetAddress(addr string) string {
|
||||
if addr == "" {
|
||||
addr, _ = GetEnv("LISTEN")
|
||||
}
|
||||
if addr == "" {
|
||||
// Legacy PORT environment variable support
|
||||
addr, _ = GetEnv("PORT")
|
||||
}
|
||||
if addr == "" {
|
||||
return ":45876"
|
||||
}
|
||||
// prefix with : if only port was provided
|
||||
if GetNetwork(addr) != "unix" && !strings.Contains(addr, ":") {
|
||||
addr = ":" + addr
|
||||
}
|
||||
return addr
|
||||
}
|
||||
|
||||
// GetNetwork determines the network type based on the address format.
|
||||
// It checks the NETWORK environment variable first, then infers from
|
||||
// the address format: addresses starting with "/" are "unix", others are "tcp".
|
||||
func GetNetwork(addr string) string {
|
||||
if network, ok := GetEnv("NETWORK"); ok && network != "" {
|
||||
return network
|
||||
}
|
||||
if strings.HasPrefix(addr, "/") {
|
||||
return "unix"
|
||||
}
|
||||
return "tcp"
|
||||
}
|
||||
|
||||
// StopServer stops the SSH server if it's running.
|
||||
// It returns an error if the server is not running or if there's an error stopping it.
|
||||
func (a *Agent) StopServer() error {
|
||||
if a.server == nil {
|
||||
return errors.New("SSH server not running")
|
||||
}
|
||||
|
||||
slog.Info("Stopping SSH server")
|
||||
_ = a.server.Close()
|
||||
a.server = nil
|
||||
a.connectionManager.eventChan <- SSHDisconnect
|
||||
return nil
|
||||
}
|
606
agent/server_test.go
Normal file
606
agent/server_test.go
Normal file
@@ -0,0 +1,606 @@
|
||||
package agent
|
||||
|
||||
import (
|
||||
"context"
|
||||
"crypto/ed25519"
|
||||
"encoding/json"
|
||||
"fmt"
|
||||
"net"
|
||||
"os"
|
||||
"path/filepath"
|
||||
"strings"
|
||||
"sync"
|
||||
"testing"
|
||||
"time"
|
||||
|
||||
"github.com/henrygd/beszel/src/entities/container"
|
||||
"github.com/henrygd/beszel/src/entities/system"
|
||||
|
||||
"github.com/blang/semver"
|
||||
"github.com/fxamacker/cbor/v2"
|
||||
"github.com/gliderlabs/ssh"
|
||||
"github.com/stretchr/testify/assert"
|
||||
"github.com/stretchr/testify/require"
|
||||
gossh "golang.org/x/crypto/ssh"
|
||||
)
|
||||
|
||||
func TestStartServer(t *testing.T) {
|
||||
// Generate a test key pair
|
||||
pubKey, privKey, err := ed25519.GenerateKey(nil)
|
||||
require.NoError(t, err)
|
||||
signer, err := gossh.NewSignerFromKey(privKey)
|
||||
require.NoError(t, err)
|
||||
sshPubKey, err := gossh.NewPublicKey(pubKey)
|
||||
require.NoError(t, err)
|
||||
|
||||
// Generate a different key pair for bad key test
|
||||
badPubKey, badPrivKey, err := ed25519.GenerateKey(nil)
|
||||
require.NoError(t, err)
|
||||
badSigner, err := gossh.NewSignerFromKey(badPrivKey)
|
||||
require.NoError(t, err)
|
||||
sshBadPubKey, err := gossh.NewPublicKey(badPubKey)
|
||||
require.NoError(t, err)
|
||||
|
||||
socketFile := filepath.Join(t.TempDir(), "beszel-test.sock")
|
||||
|
||||
tests := []struct {
|
||||
name string
|
||||
config ServerOptions
|
||||
wantErr bool
|
||||
errContains string
|
||||
setup func() error
|
||||
cleanup func() error
|
||||
}{
|
||||
{
|
||||
name: "tcp port only",
|
||||
config: ServerOptions{
|
||||
Network: "tcp",
|
||||
Addr: ":45987",
|
||||
Keys: []gossh.PublicKey{sshPubKey},
|
||||
},
|
||||
},
|
||||
{
|
||||
name: "tcp with ipv4",
|
||||
config: ServerOptions{
|
||||
Network: "tcp4",
|
||||
Addr: "127.0.0.1:45988",
|
||||
Keys: []gossh.PublicKey{sshPubKey},
|
||||
},
|
||||
},
|
||||
{
|
||||
name: "tcp with ipv6",
|
||||
config: ServerOptions{
|
||||
Network: "tcp6",
|
||||
Addr: "[::1]:45989",
|
||||
Keys: []gossh.PublicKey{sshPubKey},
|
||||
},
|
||||
},
|
||||
{
|
||||
name: "unix socket",
|
||||
config: ServerOptions{
|
||||
Network: "unix",
|
||||
Addr: socketFile,
|
||||
Keys: []gossh.PublicKey{sshPubKey},
|
||||
},
|
||||
setup: func() error {
|
||||
// Create a socket file that should be removed
|
||||
f, err := os.Create(socketFile)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
return f.Close()
|
||||
},
|
||||
cleanup: func() error {
|
||||
return os.Remove(socketFile)
|
||||
},
|
||||
},
|
||||
{
|
||||
name: "bad key should fail",
|
||||
config: ServerOptions{
|
||||
Network: "tcp",
|
||||
Addr: ":45987",
|
||||
Keys: []gossh.PublicKey{sshBadPubKey},
|
||||
},
|
||||
wantErr: true,
|
||||
errContains: "ssh: handshake failed",
|
||||
},
|
||||
{
|
||||
name: "good key still good",
|
||||
config: ServerOptions{
|
||||
Network: "tcp",
|
||||
Addr: ":45987",
|
||||
Keys: []gossh.PublicKey{sshPubKey},
|
||||
},
|
||||
},
|
||||
}
|
||||
|
||||
for _, tt := range tests {
|
||||
t.Run(tt.name, func(t *testing.T) {
|
||||
if tt.setup != nil {
|
||||
err := tt.setup()
|
||||
require.NoError(t, err)
|
||||
}
|
||||
|
||||
if tt.cleanup != nil {
|
||||
defer tt.cleanup()
|
||||
}
|
||||
|
||||
agent, err := NewAgent("")
|
||||
require.NoError(t, err)
|
||||
|
||||
// Start server in a goroutine since it blocks
|
||||
errChan := make(chan error, 1)
|
||||
go func() {
|
||||
errChan <- agent.StartServer(tt.config)
|
||||
}()
|
||||
|
||||
// Add a short delay to allow the server to start
|
||||
time.Sleep(100 * time.Millisecond)
|
||||
|
||||
// Try to connect to verify server is running
|
||||
var client *gossh.Client
|
||||
|
||||
// Choose the appropriate signer based on the test case
|
||||
testSigner := signer
|
||||
if tt.name == "bad key should fail" {
|
||||
testSigner = badSigner
|
||||
}
|
||||
|
||||
sshClientConfig := &gossh.ClientConfig{
|
||||
User: "a",
|
||||
Auth: []gossh.AuthMethod{
|
||||
gossh.PublicKeys(testSigner),
|
||||
},
|
||||
HostKeyCallback: gossh.InsecureIgnoreHostKey(),
|
||||
Timeout: 4 * time.Second,
|
||||
}
|
||||
|
||||
switch tt.config.Network {
|
||||
case "unix":
|
||||
client, err = gossh.Dial("unix", tt.config.Addr, sshClientConfig)
|
||||
default:
|
||||
if !strings.Contains(tt.config.Addr, ":") {
|
||||
tt.config.Addr = ":" + tt.config.Addr
|
||||
}
|
||||
client, err = gossh.Dial("tcp", tt.config.Addr, sshClientConfig)
|
||||
}
|
||||
|
||||
if tt.wantErr {
|
||||
assert.Error(t, err)
|
||||
if tt.errContains != "" {
|
||||
assert.Contains(t, err.Error(), tt.errContains)
|
||||
}
|
||||
return
|
||||
}
|
||||
|
||||
require.NoError(t, err)
|
||||
require.NotNil(t, client)
|
||||
client.Close()
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
/////////////////////////////////////////////////////////////////
|
||||
//////////////////// ParseKeys Tests ////////////////////////////
|
||||
/////////////////////////////////////////////////////////////////
|
||||
|
||||
// Helper function to generate a temporary file with content
|
||||
func createTempFile(content string) (string, error) {
|
||||
tmpFile, err := os.CreateTemp("", "ssh_keys_*.txt")
|
||||
if err != nil {
|
||||
return "", fmt.Errorf("failed to create temp file: %w", err)
|
||||
}
|
||||
defer tmpFile.Close()
|
||||
|
||||
if _, err := tmpFile.WriteString(content); err != nil {
|
||||
return "", fmt.Errorf("failed to write to temp file: %w", err)
|
||||
}
|
||||
|
||||
return tmpFile.Name(), nil
|
||||
}
|
||||
|
||||
// Test case 1: String with a single SSH key
|
||||
func TestParseSingleKeyFromString(t *testing.T) {
|
||||
input := "ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAIKCBM91kukN7hbvFKtbpEeo2JXjCcNxXcdBH7V7ADMBo"
|
||||
keys, err := ParseKeys(input)
|
||||
if err != nil {
|
||||
t.Fatalf("Expected no error, got: %v", err)
|
||||
}
|
||||
if len(keys) != 1 {
|
||||
t.Fatalf("Expected 1 key, got %d keys", len(keys))
|
||||
}
|
||||
if keys[0].Type() != "ssh-ed25519" {
|
||||
t.Fatalf("Expected key type 'ssh-ed25519', got '%s'", keys[0].Type())
|
||||
}
|
||||
}
|
||||
|
||||
// Test case 2: String with multiple SSH keys
|
||||
func TestParseMultipleKeysFromString(t *testing.T) {
|
||||
input := "ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAIKCBM91kukN7hbvFKtbpEeo2JXjCcNxXcdBH7V7ADMBo\nssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAIJDMtAOQfxDlCxe+A5lVbUY/DHxK1LAF2Z3AV0FYv36D \n #comment\n ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAIJDMtAOQfxDlCxe+A5lVbUY/DHxK1LAF2Z3AV0FYv36D"
|
||||
keys, err := ParseKeys(input)
|
||||
if err != nil {
|
||||
t.Fatalf("Expected no error, got: %v", err)
|
||||
}
|
||||
if len(keys) != 3 {
|
||||
t.Fatalf("Expected 3 keys, got %d keys", len(keys))
|
||||
}
|
||||
if keys[0].Type() != "ssh-ed25519" || keys[1].Type() != "ssh-ed25519" || keys[2].Type() != "ssh-ed25519" {
|
||||
t.Fatalf("Unexpected key types: %s, %s, %s", keys[0].Type(), keys[1].Type(), keys[2].Type())
|
||||
}
|
||||
}
|
||||
|
||||
// Test case 3: File with a single SSH key
|
||||
func TestParseSingleKeyFromFile(t *testing.T) {
|
||||
content := "ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAIKCBM91kukN7hbvFKtbpEeo2JXjCcNxXcdBH7V7ADMBo"
|
||||
filePath, err := createTempFile(content)
|
||||
if err != nil {
|
||||
t.Fatalf("Failed to create temp file: %v", err)
|
||||
}
|
||||
defer os.Remove(filePath) // Clean up the file after the test
|
||||
|
||||
// Read the file content
|
||||
fileContent, err := os.ReadFile(filePath)
|
||||
if err != nil {
|
||||
t.Fatalf("Failed to read temp file: %v", err)
|
||||
}
|
||||
|
||||
// Parse the keys
|
||||
keys, err := ParseKeys(string(fileContent))
|
||||
if err != nil {
|
||||
t.Fatalf("Expected no error, got: %v", err)
|
||||
}
|
||||
if len(keys) != 1 {
|
||||
t.Fatalf("Expected 1 key, got %d keys", len(keys))
|
||||
}
|
||||
if keys[0].Type() != "ssh-ed25519" {
|
||||
t.Fatalf("Expected key type 'ssh-ed25519', got '%s'", keys[0].Type())
|
||||
}
|
||||
}
|
||||
|
||||
// Test case 4: File with multiple SSH keys
|
||||
func TestParseMultipleKeysFromFile(t *testing.T) {
|
||||
content := "ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAIKCBM91kukN7hbvFKtbpEeo2JXjCcNxXcdBH7V7ADMBo\nssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAIJDMtAOQfxDlCxe+A5lVbUY/DHxK1LAF2Z3AV0FYv36D \n #comment\n ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAIJDMtAOQfxDlCxe+A5lVbUY/DHxK1LAF2Z3AV0FYv36D"
|
||||
filePath, err := createTempFile(content)
|
||||
if err != nil {
|
||||
t.Fatalf("Failed to create temp file: %v", err)
|
||||
}
|
||||
// defer os.Remove(filePath) // Clean up the file after the test
|
||||
|
||||
// Read the file content
|
||||
fileContent, err := os.ReadFile(filePath)
|
||||
if err != nil {
|
||||
t.Fatalf("Failed to read temp file: %v", err)
|
||||
}
|
||||
|
||||
// Parse the keys
|
||||
keys, err := ParseKeys(string(fileContent))
|
||||
if err != nil {
|
||||
t.Fatalf("Expected no error, got: %v", err)
|
||||
}
|
||||
if len(keys) != 3 {
|
||||
t.Fatalf("Expected 3 keys, got %d keys", len(keys))
|
||||
}
|
||||
if keys[0].Type() != "ssh-ed25519" || keys[1].Type() != "ssh-ed25519" || keys[2].Type() != "ssh-ed25519" {
|
||||
t.Fatalf("Unexpected key types: %s, %s, %s", keys[0].Type(), keys[1].Type(), keys[2].Type())
|
||||
}
|
||||
}
|
||||
|
||||
// Test case 5: Invalid SSH key input
|
||||
func TestParseInvalidKey(t *testing.T) {
|
||||
input := "invalid-key-data"
|
||||
_, err := ParseKeys(input)
|
||||
if err == nil {
|
||||
t.Fatalf("Expected an error for invalid key, got nil")
|
||||
}
|
||||
expectedErrMsg := "failed to parse key"
|
||||
if !strings.Contains(err.Error(), expectedErrMsg) {
|
||||
t.Fatalf("Expected error message to contain '%s', got: %v", expectedErrMsg, err)
|
||||
}
|
||||
}
|
||||
|
||||
/////////////////////////////////////////////////////////////////
|
||||
//////////////////// Hub Version Tests //////////////////////////
|
||||
/////////////////////////////////////////////////////////////////
|
||||
|
||||
func TestExtractHubVersion(t *testing.T) {
|
||||
tests := []struct {
|
||||
name string
|
||||
clientVersion string
|
||||
expectedVersion string
|
||||
expectError bool
|
||||
}{
|
||||
{
|
||||
name: "valid beszel client version with underscore",
|
||||
clientVersion: "SSH-2.0-beszel_0.11.1",
|
||||
expectedVersion: "0.11.1",
|
||||
expectError: false,
|
||||
},
|
||||
{
|
||||
name: "valid beszel client version with beta",
|
||||
clientVersion: "SSH-2.0-beszel_1.0.0-beta",
|
||||
expectedVersion: "1.0.0-beta",
|
||||
expectError: false,
|
||||
},
|
||||
{
|
||||
name: "valid beszel client version with rc",
|
||||
clientVersion: "SSH-2.0-beszel_0.12.0-rc1",
|
||||
expectedVersion: "0.12.0-rc1",
|
||||
expectError: false,
|
||||
},
|
||||
{
|
||||
name: "different SSH client",
|
||||
clientVersion: "SSH-2.0-OpenSSH_8.0",
|
||||
expectedVersion: "8.0",
|
||||
expectError: true,
|
||||
},
|
||||
{
|
||||
name: "malformed version string without underscore",
|
||||
clientVersion: "SSH-2.0-beszel",
|
||||
expectError: true,
|
||||
},
|
||||
{
|
||||
name: "empty version string",
|
||||
clientVersion: "",
|
||||
expectError: true,
|
||||
},
|
||||
{
|
||||
name: "version string with underscore but no version",
|
||||
clientVersion: "beszel_",
|
||||
expectedVersion: "",
|
||||
expectError: true,
|
||||
},
|
||||
{
|
||||
name: "version with patch and build metadata",
|
||||
clientVersion: "SSH-2.0-beszel_1.2.3+build.123",
|
||||
expectedVersion: "1.2.3+build.123",
|
||||
expectError: false,
|
||||
},
|
||||
}
|
||||
|
||||
for _, tt := range tests {
|
||||
t.Run(tt.name, func(t *testing.T) {
|
||||
result, err := extractHubVersion(tt.clientVersion)
|
||||
|
||||
if tt.expectError {
|
||||
assert.Error(t, err)
|
||||
return
|
||||
}
|
||||
|
||||
require.NoError(t, err)
|
||||
assert.Equal(t, tt.expectedVersion, result.String())
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
/////////////////////////////////////////////////////////////////
|
||||
/////////////// Hub Version Detection Tests ////////////////////
|
||||
/////////////////////////////////////////////////////////////////
|
||||
|
||||
func TestGetHubVersion(t *testing.T) {
|
||||
agent, err := NewAgent("")
|
||||
require.NoError(t, err)
|
||||
|
||||
// Mock SSH context that implements the ssh.Context interface
|
||||
mockCtx := &mockSSHContext{
|
||||
sessionID: "test-session-123",
|
||||
clientVersion: "SSH-2.0-beszel_0.12.0",
|
||||
}
|
||||
|
||||
// Test first call - should extract and cache version
|
||||
version := agent.getHubVersion("test-session-123", mockCtx)
|
||||
assert.Equal(t, "0.12.0", version.String())
|
||||
|
||||
// Test second call - should return cached version
|
||||
mockCtx.clientVersion = "SSH-2.0-beszel_0.11.0" // Change version but should still return cached
|
||||
version = agent.getHubVersion("test-session-123", mockCtx)
|
||||
assert.Equal(t, "0.12.0", version.String()) // Should still be cached version
|
||||
|
||||
// Test different session - should extract new version
|
||||
version = agent.getHubVersion("different-session", mockCtx)
|
||||
assert.Equal(t, "0.11.0", version.String())
|
||||
|
||||
// Test with invalid version string (non-beszel client)
|
||||
mockCtx.clientVersion = "SSH-2.0-OpenSSH_8.0"
|
||||
version = agent.getHubVersion("invalid-session", mockCtx)
|
||||
assert.Equal(t, "0.0.0", version.String()) // Should be empty version for non-beszel clients
|
||||
|
||||
// Test with no client version
|
||||
mockCtx.clientVersion = ""
|
||||
version = agent.getHubVersion("no-version-session", mockCtx)
|
||||
assert.True(t, version.EQ(semver.Version{})) // Should be empty version
|
||||
}
|
||||
|
||||
// mockSSHContext implements ssh.Context for testing
|
||||
type mockSSHContext struct {
|
||||
context.Context
|
||||
sync.Mutex
|
||||
sessionID string
|
||||
clientVersion string
|
||||
}
|
||||
|
||||
func (m *mockSSHContext) SessionID() string {
|
||||
return m.sessionID
|
||||
}
|
||||
|
||||
func (m *mockSSHContext) ClientVersion() string {
|
||||
return m.clientVersion
|
||||
}
|
||||
|
||||
func (m *mockSSHContext) ServerVersion() string {
|
||||
return "SSH-2.0-beszel_test"
|
||||
}
|
||||
|
||||
func (m *mockSSHContext) Value(key interface{}) interface{} {
|
||||
if key == ssh.ContextKeyClientVersion {
|
||||
return m.clientVersion
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
func (m *mockSSHContext) User() string { return "test-user" }
|
||||
func (m *mockSSHContext) RemoteAddr() net.Addr { return nil }
|
||||
func (m *mockSSHContext) LocalAddr() net.Addr { return nil }
|
||||
func (m *mockSSHContext) Permissions() *ssh.Permissions { return nil }
|
||||
func (m *mockSSHContext) SetValue(key, value interface{}) {}
|
||||
|
||||
/////////////////////////////////////////////////////////////////
|
||||
/////////////// CBOR vs JSON Encoding Tests ////////////////////
|
||||
/////////////////////////////////////////////////////////////////
|
||||
|
||||
// TestWriteToSessionEncoding tests that writeToSession actually encodes data in the correct format
|
||||
func TestWriteToSessionEncoding(t *testing.T) {
|
||||
tests := []struct {
|
||||
name string
|
||||
hubVersion string
|
||||
expectedUsesCbor bool
|
||||
}{
|
||||
{
|
||||
name: "old hub version should use JSON",
|
||||
hubVersion: "0.11.1",
|
||||
expectedUsesCbor: false,
|
||||
},
|
||||
{
|
||||
name: "non-beta release should use CBOR",
|
||||
hubVersion: "0.12.0",
|
||||
expectedUsesCbor: true,
|
||||
},
|
||||
{
|
||||
name: "even newer hub version should use CBOR",
|
||||
hubVersion: "0.16.4",
|
||||
expectedUsesCbor: true,
|
||||
},
|
||||
{
|
||||
name: "beta version below release threshold should use JSON",
|
||||
hubVersion: "0.12.0-beta0",
|
||||
expectedUsesCbor: false,
|
||||
},
|
||||
// {
|
||||
// name: "matching beta version should use CBOR",
|
||||
// hubVersion: "0.12.0-beta2",
|
||||
// expectedUsesCbor: true,
|
||||
// },
|
||||
}
|
||||
|
||||
for _, tt := range tests {
|
||||
t.Run(tt.name, func(t *testing.T) {
|
||||
// Reset the global hubVersions map to ensure clean state for each test
|
||||
hubVersions = nil
|
||||
|
||||
agent, err := NewAgent("")
|
||||
require.NoError(t, err)
|
||||
|
||||
// Parse the test version
|
||||
version, err := semver.Parse(tt.hubVersion)
|
||||
require.NoError(t, err)
|
||||
|
||||
// Create test data to encode
|
||||
testData := createTestCombinedData()
|
||||
|
||||
var buf strings.Builder
|
||||
err = agent.writeToSession(&buf, testData, version)
|
||||
require.NoError(t, err)
|
||||
|
||||
encodedData := buf.String()
|
||||
require.NotEmpty(t, encodedData)
|
||||
|
||||
// Verify the encoding format by attempting to decode
|
||||
if tt.expectedUsesCbor {
|
||||
var decodedCbor system.CombinedData
|
||||
err = cbor.Unmarshal([]byte(encodedData), &decodedCbor)
|
||||
assert.NoError(t, err, "Should be valid CBOR data")
|
||||
|
||||
var decodedJson system.CombinedData
|
||||
err = json.Unmarshal([]byte(encodedData), &decodedJson)
|
||||
assert.Error(t, err, "Should not be valid JSON data")
|
||||
|
||||
assert.Equal(t, testData.Info.Hostname, decodedCbor.Info.Hostname)
|
||||
assert.Equal(t, testData.Stats.Cpu, decodedCbor.Stats.Cpu)
|
||||
} else {
|
||||
// Should be JSON - try to decode as JSON
|
||||
var decodedJson system.CombinedData
|
||||
err = json.Unmarshal([]byte(encodedData), &decodedJson)
|
||||
assert.NoError(t, err, "Should be valid JSON data")
|
||||
|
||||
var decodedCbor system.CombinedData
|
||||
err = cbor.Unmarshal([]byte(encodedData), &decodedCbor)
|
||||
assert.Error(t, err, "Should not be valid CBOR data")
|
||||
|
||||
// Verify the decoded JSON data matches our test data
|
||||
assert.Equal(t, testData.Info.Hostname, decodedJson.Info.Hostname)
|
||||
assert.Equal(t, testData.Stats.Cpu, decodedJson.Stats.Cpu)
|
||||
|
||||
// Verify it looks like JSON (starts with '{' and contains readable field names)
|
||||
assert.True(t, strings.HasPrefix(encodedData, "{"), "JSON should start with '{'")
|
||||
assert.Contains(t, encodedData, `"info"`, "JSON should contain readable field names")
|
||||
assert.Contains(t, encodedData, `"stats"`, "JSON should contain readable field names")
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
// Helper function to create test data for encoding tests
|
||||
func createTestCombinedData() *system.CombinedData {
|
||||
return &system.CombinedData{
|
||||
Stats: system.Stats{
|
||||
Cpu: 25.5,
|
||||
Mem: 8589934592, // 8GB
|
||||
MemUsed: 4294967296, // 4GB
|
||||
MemPct: 50.0,
|
||||
DiskTotal: 1099511627776, // 1TB
|
||||
DiskUsed: 549755813888, // 512GB
|
||||
DiskPct: 50.0,
|
||||
},
|
||||
Info: system.Info{
|
||||
Hostname: "test-host",
|
||||
Cores: 8,
|
||||
CpuModel: "Test CPU Model",
|
||||
Uptime: 3600,
|
||||
AgentVersion: "0.12.0",
|
||||
Os: system.Linux,
|
||||
},
|
||||
Containers: []*container.Stats{
|
||||
{
|
||||
Name: "test-container",
|
||||
Cpu: 10.5,
|
||||
Mem: 1073741824, // 1GB
|
||||
},
|
||||
},
|
||||
}
|
||||
}
|
||||
|
||||
func TestHubVersionCaching(t *testing.T) {
|
||||
// Reset the global hubVersions map to ensure clean state
|
||||
hubVersions = nil
|
||||
|
||||
agent, err := NewAgent("")
|
||||
require.NoError(t, err)
|
||||
|
||||
ctx1 := &mockSSHContext{
|
||||
sessionID: "session1",
|
||||
clientVersion: "SSH-2.0-beszel_0.12.0",
|
||||
}
|
||||
ctx2 := &mockSSHContext{
|
||||
sessionID: "session2",
|
||||
clientVersion: "SSH-2.0-beszel_0.11.0",
|
||||
}
|
||||
|
||||
// First calls should cache the versions
|
||||
v1 := agent.getHubVersion("session1", ctx1)
|
||||
v2 := agent.getHubVersion("session2", ctx2)
|
||||
|
||||
assert.Equal(t, "0.12.0", v1.String())
|
||||
assert.Equal(t, "0.11.0", v2.String())
|
||||
|
||||
// Verify caching by changing context but keeping same session ID
|
||||
ctx1.clientVersion = "SSH-2.0-beszel_0.10.0"
|
||||
v1Cached := agent.getHubVersion("session1", ctx1)
|
||||
assert.Equal(t, "0.12.0", v1Cached.String()) // Should still be cached version
|
||||
|
||||
// New session should get new version
|
||||
ctx3 := &mockSSHContext{
|
||||
sessionID: "session3",
|
||||
clientVersion: "SSH-2.0-beszel_0.13.0",
|
||||
}
|
||||
v3 := agent.getHubVersion("session3", ctx3)
|
||||
assert.Equal(t, "0.13.0", v3.String())
|
||||
}
|
305
agent/system.go
Normal file
305
agent/system.go
Normal file
@@ -0,0 +1,305 @@
|
||||
package agent
|
||||
|
||||
import (
|
||||
"bufio"
|
||||
"fmt"
|
||||
"log/slog"
|
||||
"os"
|
||||
"strconv"
|
||||
"strings"
|
||||
"time"
|
||||
|
||||
"github.com/henrygd/beszel"
|
||||
"github.com/henrygd/beszel/agent/battery"
|
||||
"github.com/henrygd/beszel/src/entities/system"
|
||||
|
||||
"github.com/shirou/gopsutil/v4/cpu"
|
||||
"github.com/shirou/gopsutil/v4/disk"
|
||||
"github.com/shirou/gopsutil/v4/host"
|
||||
"github.com/shirou/gopsutil/v4/load"
|
||||
"github.com/shirou/gopsutil/v4/mem"
|
||||
psutilNet "github.com/shirou/gopsutil/v4/net"
|
||||
)
|
||||
|
||||
// Sets initial / non-changing values about the host system
|
||||
func (a *Agent) initializeSystemInfo() {
|
||||
a.systemInfo.AgentVersion = beszel.Version
|
||||
a.systemInfo.Hostname, _ = os.Hostname()
|
||||
|
||||
platform, _, version, _ := host.PlatformInformation()
|
||||
|
||||
if platform == "darwin" {
|
||||
a.systemInfo.KernelVersion = version
|
||||
a.systemInfo.Os = system.Darwin
|
||||
} else if strings.Contains(platform, "indows") {
|
||||
a.systemInfo.KernelVersion = strings.Replace(platform, "Microsoft ", "", 1) + " " + version
|
||||
a.systemInfo.Os = system.Windows
|
||||
} else if platform == "freebsd" {
|
||||
a.systemInfo.Os = system.Freebsd
|
||||
a.systemInfo.KernelVersion = version
|
||||
} else {
|
||||
a.systemInfo.Os = system.Linux
|
||||
}
|
||||
|
||||
if a.systemInfo.KernelVersion == "" {
|
||||
a.systemInfo.KernelVersion, _ = host.KernelVersion()
|
||||
}
|
||||
|
||||
// cpu model
|
||||
if info, err := cpu.Info(); err == nil && len(info) > 0 {
|
||||
a.systemInfo.CpuModel = info[0].ModelName
|
||||
}
|
||||
// cores / threads
|
||||
a.systemInfo.Cores, _ = cpu.Counts(false)
|
||||
if threads, err := cpu.Counts(true); err == nil {
|
||||
if threads > 0 && threads < a.systemInfo.Cores {
|
||||
// in lxc logical cores reflects container limits, so use that as cores if lower
|
||||
a.systemInfo.Cores = threads
|
||||
} else {
|
||||
a.systemInfo.Threads = threads
|
||||
}
|
||||
}
|
||||
|
||||
// zfs
|
||||
if _, err := getARCSize(); err != nil {
|
||||
slog.Debug("Not monitoring ZFS ARC", "err", err)
|
||||
} else {
|
||||
a.zfs = true
|
||||
}
|
||||
}
|
||||
|
||||
// Returns current info, stats about the host system
|
||||
func (a *Agent) getSystemStats() system.Stats {
|
||||
systemStats := system.Stats{}
|
||||
|
||||
// battery
|
||||
if battery.HasReadableBattery() {
|
||||
systemStats.Battery[0], systemStats.Battery[1], _ = battery.GetBatteryStats()
|
||||
}
|
||||
|
||||
// cpu percent
|
||||
cpuPct, err := cpu.Percent(0, false)
|
||||
if err != nil {
|
||||
slog.Error("Error getting cpu percent", "err", err)
|
||||
} else if len(cpuPct) > 0 {
|
||||
systemStats.Cpu = twoDecimals(cpuPct[0])
|
||||
}
|
||||
|
||||
// load average
|
||||
if avgstat, err := load.Avg(); err == nil {
|
||||
systemStats.LoadAvg[0] = avgstat.Load1
|
||||
systemStats.LoadAvg[1] = avgstat.Load5
|
||||
systemStats.LoadAvg[2] = avgstat.Load15
|
||||
slog.Debug("Load average", "5m", avgstat.Load5, "15m", avgstat.Load15)
|
||||
} else {
|
||||
slog.Error("Error getting load average", "err", err)
|
||||
}
|
||||
|
||||
// memory
|
||||
if v, err := mem.VirtualMemory(); err == nil {
|
||||
// swap
|
||||
systemStats.Swap = bytesToGigabytes(v.SwapTotal)
|
||||
systemStats.SwapUsed = bytesToGigabytes(v.SwapTotal - v.SwapFree - v.SwapCached)
|
||||
// cache + buffers value for default mem calculation
|
||||
cacheBuff := v.Total - v.Free - v.Used
|
||||
// htop memory calculation overrides
|
||||
if a.memCalc == "htop" {
|
||||
// note: gopsutil automatically adds SReclaimable to v.Cached
|
||||
cacheBuff = v.Cached + v.Buffers - v.Shared
|
||||
v.Used = v.Total - (v.Free + cacheBuff)
|
||||
v.UsedPercent = float64(v.Used) / float64(v.Total) * 100.0
|
||||
}
|
||||
// subtract ZFS ARC size from used memory and add as its own category
|
||||
if a.zfs {
|
||||
if arcSize, _ := getARCSize(); arcSize > 0 && arcSize < v.Used {
|
||||
v.Used = v.Used - arcSize
|
||||
v.UsedPercent = float64(v.Used) / float64(v.Total) * 100.0
|
||||
systemStats.MemZfsArc = bytesToGigabytes(arcSize)
|
||||
}
|
||||
}
|
||||
systemStats.Mem = bytesToGigabytes(v.Total)
|
||||
systemStats.MemBuffCache = bytesToGigabytes(cacheBuff)
|
||||
systemStats.MemUsed = bytesToGigabytes(v.Used)
|
||||
systemStats.MemPct = twoDecimals(v.UsedPercent)
|
||||
}
|
||||
|
||||
// disk usage
|
||||
for _, stats := range a.fsStats {
|
||||
if d, err := disk.Usage(stats.Mountpoint); err == nil {
|
||||
stats.DiskTotal = bytesToGigabytes(d.Total)
|
||||
stats.DiskUsed = bytesToGigabytes(d.Used)
|
||||
if stats.Root {
|
||||
systemStats.DiskTotal = bytesToGigabytes(d.Total)
|
||||
systemStats.DiskUsed = bytesToGigabytes(d.Used)
|
||||
systemStats.DiskPct = twoDecimals(d.UsedPercent)
|
||||
}
|
||||
} else {
|
||||
// reset stats if error (likely unmounted)
|
||||
slog.Error("Error getting disk stats", "name", stats.Mountpoint, "err", err)
|
||||
stats.DiskTotal = 0
|
||||
stats.DiskUsed = 0
|
||||
stats.TotalRead = 0
|
||||
stats.TotalWrite = 0
|
||||
}
|
||||
}
|
||||
|
||||
// disk i/o
|
||||
if ioCounters, err := disk.IOCounters(a.fsNames...); err == nil {
|
||||
for _, d := range ioCounters {
|
||||
stats := a.fsStats[d.Name]
|
||||
if stats == nil {
|
||||
continue
|
||||
}
|
||||
secondsElapsed := time.Since(stats.Time).Seconds()
|
||||
readPerSecond := bytesToMegabytes(float64(d.ReadBytes-stats.TotalRead) / secondsElapsed)
|
||||
writePerSecond := bytesToMegabytes(float64(d.WriteBytes-stats.TotalWrite) / secondsElapsed)
|
||||
// check for invalid values and reset stats if so
|
||||
if readPerSecond < 0 || writePerSecond < 0 || readPerSecond > 50_000 || writePerSecond > 50_000 {
|
||||
slog.Warn("Invalid disk I/O. Resetting.", "name", d.Name, "read", readPerSecond, "write", writePerSecond)
|
||||
a.initializeDiskIoStats(ioCounters)
|
||||
break
|
||||
}
|
||||
stats.Time = time.Now()
|
||||
stats.DiskReadPs = readPerSecond
|
||||
stats.DiskWritePs = writePerSecond
|
||||
stats.TotalRead = d.ReadBytes
|
||||
stats.TotalWrite = d.WriteBytes
|
||||
// if root filesystem, update system stats
|
||||
if stats.Root {
|
||||
systemStats.DiskReadPs = stats.DiskReadPs
|
||||
systemStats.DiskWritePs = stats.DiskWritePs
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// network stats
|
||||
if len(a.netInterfaces) == 0 {
|
||||
// if no network interfaces, initialize again
|
||||
// this is a fix if agent started before network is online (#466)
|
||||
// maybe refactor this in the future to not cache interface names at all so we
|
||||
// don't miss an interface that's been added after agent started in any circumstance
|
||||
a.initializeNetIoStats()
|
||||
}
|
||||
if netIO, err := psutilNet.IOCounters(true); err == nil {
|
||||
msElapsed := uint64(time.Since(a.netIoStats.Time).Milliseconds())
|
||||
a.netIoStats.Time = time.Now()
|
||||
totalBytesSent := uint64(0)
|
||||
totalBytesRecv := uint64(0)
|
||||
// sum all bytes sent and received
|
||||
for _, v := range netIO {
|
||||
// skip if not in valid network interfaces list
|
||||
if _, exists := a.netInterfaces[v.Name]; !exists {
|
||||
continue
|
||||
}
|
||||
totalBytesSent += v.BytesSent
|
||||
totalBytesRecv += v.BytesRecv
|
||||
}
|
||||
// add to systemStats
|
||||
var bytesSentPerSecond, bytesRecvPerSecond uint64
|
||||
if msElapsed > 0 {
|
||||
bytesSentPerSecond = (totalBytesSent - a.netIoStats.BytesSent) * 1000 / msElapsed
|
||||
bytesRecvPerSecond = (totalBytesRecv - a.netIoStats.BytesRecv) * 1000 / msElapsed
|
||||
}
|
||||
networkSentPs := bytesToMegabytes(float64(bytesSentPerSecond))
|
||||
networkRecvPs := bytesToMegabytes(float64(bytesRecvPerSecond))
|
||||
// add check for issue (#150) where sent is a massive number
|
||||
if networkSentPs > 10_000 || networkRecvPs > 10_000 {
|
||||
slog.Warn("Invalid net stats. Resetting.", "sent", networkSentPs, "recv", networkRecvPs)
|
||||
for _, v := range netIO {
|
||||
if _, exists := a.netInterfaces[v.Name]; !exists {
|
||||
continue
|
||||
}
|
||||
slog.Info(v.Name, "recv", v.BytesRecv, "sent", v.BytesSent)
|
||||
}
|
||||
// reset network I/O stats
|
||||
a.initializeNetIoStats()
|
||||
} else {
|
||||
systemStats.NetworkSent = networkSentPs
|
||||
systemStats.NetworkRecv = networkRecvPs
|
||||
systemStats.Bandwidth[0], systemStats.Bandwidth[1] = bytesSentPerSecond, bytesRecvPerSecond
|
||||
// update netIoStats
|
||||
a.netIoStats.BytesSent = totalBytesSent
|
||||
a.netIoStats.BytesRecv = totalBytesRecv
|
||||
}
|
||||
}
|
||||
|
||||
// temperatures
|
||||
// TODO: maybe refactor to methods on systemStats
|
||||
a.updateTemperatures(&systemStats)
|
||||
|
||||
// GPU data
|
||||
if a.gpuManager != nil {
|
||||
// reset high gpu percent
|
||||
a.systemInfo.GpuPct = 0
|
||||
// get current GPU data
|
||||
if gpuData := a.gpuManager.GetCurrentData(); len(gpuData) > 0 {
|
||||
systemStats.GPUData = gpuData
|
||||
|
||||
// add temperatures
|
||||
if systemStats.Temperatures == nil {
|
||||
systemStats.Temperatures = make(map[string]float64, len(gpuData))
|
||||
}
|
||||
highestTemp := 0.0
|
||||
for _, gpu := range gpuData {
|
||||
if gpu.Temperature > 0 {
|
||||
systemStats.Temperatures[gpu.Name] = gpu.Temperature
|
||||
if a.sensorConfig.primarySensor == gpu.Name {
|
||||
a.systemInfo.DashboardTemp = gpu.Temperature
|
||||
}
|
||||
if gpu.Temperature > highestTemp {
|
||||
highestTemp = gpu.Temperature
|
||||
}
|
||||
}
|
||||
// update high gpu percent for dashboard
|
||||
a.systemInfo.GpuPct = max(a.systemInfo.GpuPct, gpu.Usage)
|
||||
}
|
||||
// use highest temp for dashboard temp if dashboard temp is unset
|
||||
if a.systemInfo.DashboardTemp == 0 {
|
||||
a.systemInfo.DashboardTemp = highestTemp
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// update base system info
|
||||
a.systemInfo.Cpu = systemStats.Cpu
|
||||
a.systemInfo.LoadAvg = systemStats.LoadAvg
|
||||
// TODO: remove these in future release in favor of load avg array
|
||||
a.systemInfo.LoadAvg1 = systemStats.LoadAvg[0]
|
||||
a.systemInfo.LoadAvg5 = systemStats.LoadAvg[1]
|
||||
a.systemInfo.LoadAvg15 = systemStats.LoadAvg[2]
|
||||
a.systemInfo.MemPct = systemStats.MemPct
|
||||
a.systemInfo.DiskPct = systemStats.DiskPct
|
||||
a.systemInfo.Uptime, _ = host.Uptime()
|
||||
// TODO: in future release, remove MB bandwidth values in favor of bytes
|
||||
a.systemInfo.Bandwidth = twoDecimals(systemStats.NetworkSent + systemStats.NetworkRecv)
|
||||
a.systemInfo.BandwidthBytes = systemStats.Bandwidth[0] + systemStats.Bandwidth[1]
|
||||
slog.Debug("sysinfo", "data", a.systemInfo)
|
||||
|
||||
return systemStats
|
||||
}
|
||||
|
||||
// Returns the size of the ZFS ARC memory cache in bytes
|
||||
func getARCSize() (uint64, error) {
|
||||
file, err := os.Open("/proc/spl/kstat/zfs/arcstats")
|
||||
if err != nil {
|
||||
return 0, err
|
||||
}
|
||||
defer file.Close()
|
||||
|
||||
// Scan the lines
|
||||
scanner := bufio.NewScanner(file)
|
||||
for scanner.Scan() {
|
||||
line := scanner.Text()
|
||||
if strings.HasPrefix(line, "size") {
|
||||
// Example line: size 4 15032385536
|
||||
fields := strings.Fields(line)
|
||||
if len(fields) < 3 {
|
||||
return 0, err
|
||||
}
|
||||
// Return the size as uint64
|
||||
return strconv.ParseUint(fields[2], 10, 64)
|
||||
}
|
||||
}
|
||||
|
||||
return 0, fmt.Errorf("failed to parse size field")
|
||||
}
|
165
agent/update.go
Normal file
165
agent/update.go
Normal file
@@ -0,0 +1,165 @@
|
||||
package agent
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"log"
|
||||
"os"
|
||||
"os/exec"
|
||||
"runtime"
|
||||
"strings"
|
||||
|
||||
"github.com/henrygd/beszel/src/ghupdate"
|
||||
)
|
||||
|
||||
// restarter knows how to restart the beszel-agent service.
|
||||
type restarter interface {
|
||||
Restart() error
|
||||
}
|
||||
|
||||
type systemdRestarter struct{ cmd string }
|
||||
|
||||
func (s *systemdRestarter) Restart() error {
|
||||
// Only restart if the service is active
|
||||
if err := exec.Command(s.cmd, "is-active", "beszel-agent.service").Run(); err != nil {
|
||||
return nil
|
||||
}
|
||||
ghupdate.ColorPrint(ghupdate.ColorYellow, "Restarting beszel-agent.service via systemd…")
|
||||
return exec.Command(s.cmd, "restart", "beszel-agent.service").Run()
|
||||
}
|
||||
|
||||
type openRCRestarter struct{ cmd string }
|
||||
|
||||
func (o *openRCRestarter) Restart() error {
|
||||
if err := exec.Command(o.cmd, "status", "beszel-agent").Run(); err != nil {
|
||||
return nil
|
||||
}
|
||||
ghupdate.ColorPrint(ghupdate.ColorYellow, "Restarting beszel-agent via OpenRC…")
|
||||
return exec.Command(o.cmd, "restart", "beszel-agent").Run()
|
||||
}
|
||||
|
||||
type openWRTRestarter struct{ cmd string }
|
||||
|
||||
func (w *openWRTRestarter) Restart() error {
|
||||
if err := exec.Command(w.cmd, "running", "beszel-agent").Run(); err != nil {
|
||||
return nil
|
||||
}
|
||||
ghupdate.ColorPrint(ghupdate.ColorYellow, "Restarting beszel-agent via procd…")
|
||||
return exec.Command(w.cmd, "restart", "beszel-agent").Run()
|
||||
}
|
||||
|
||||
type freeBSDRestarter struct{ cmd string }
|
||||
|
||||
func (f *freeBSDRestarter) Restart() error {
|
||||
if err := exec.Command(f.cmd, "beszel-agent", "status").Run(); err != nil {
|
||||
return nil
|
||||
}
|
||||
ghupdate.ColorPrint(ghupdate.ColorYellow, "Restarting beszel-agent via FreeBSD rc…")
|
||||
return exec.Command(f.cmd, "beszel-agent", "restart").Run()
|
||||
}
|
||||
|
||||
func detectRestarter() restarter {
|
||||
if path, err := exec.LookPath("systemctl"); err == nil {
|
||||
return &systemdRestarter{cmd: path}
|
||||
}
|
||||
if path, err := exec.LookPath("rc-service"); err == nil {
|
||||
return &openRCRestarter{cmd: path}
|
||||
}
|
||||
if path, err := exec.LookPath("service"); err == nil {
|
||||
if runtime.GOOS == "freebsd" {
|
||||
return &freeBSDRestarter{cmd: path}
|
||||
}
|
||||
return &openWRTRestarter{cmd: path}
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
// Update checks GitHub for a newer release of beszel-agent, applies it,
|
||||
// fixes SELinux context if needed, and restarts the service.
|
||||
func Update(useMirror bool) error {
|
||||
exePath, _ := os.Executable()
|
||||
|
||||
dataDir, err := getDataDir()
|
||||
if err != nil {
|
||||
dataDir = os.TempDir()
|
||||
}
|
||||
updated, err := ghupdate.Update(ghupdate.Config{
|
||||
ArchiveExecutable: "beszel-agent",
|
||||
DataDir: dataDir,
|
||||
UseMirror: useMirror,
|
||||
})
|
||||
if err != nil {
|
||||
log.Fatal(err)
|
||||
}
|
||||
if !updated {
|
||||
return nil
|
||||
}
|
||||
|
||||
// make sure the file is executable
|
||||
if err := os.Chmod(exePath, 0755); err != nil {
|
||||
ghupdate.ColorPrintf(ghupdate.ColorYellow, "Warning: failed to set executable permissions: %v", err)
|
||||
}
|
||||
// set ownership to beszel:beszel if possible
|
||||
if chownPath, err := exec.LookPath("chown"); err == nil {
|
||||
if err := exec.Command(chownPath, "beszel:beszel", exePath).Run(); err != nil {
|
||||
ghupdate.ColorPrintf(ghupdate.ColorYellow, "Warning: failed to set file ownership: %v", err)
|
||||
}
|
||||
}
|
||||
|
||||
// 6) Fix SELinux context if necessary
|
||||
if err := handleSELinuxContext(exePath); err != nil {
|
||||
ghupdate.ColorPrintf(ghupdate.ColorYellow, "Warning: SELinux context handling: %v", err)
|
||||
}
|
||||
|
||||
// 7) Restart service if running under a recognised init system
|
||||
if r := detectRestarter(); r != nil {
|
||||
if err := r.Restart(); err != nil {
|
||||
ghupdate.ColorPrintf(ghupdate.ColorYellow, "Warning: failed to restart service: %v", err)
|
||||
ghupdate.ColorPrint(ghupdate.ColorYellow, "Please restart the service manually.")
|
||||
} else {
|
||||
ghupdate.ColorPrint(ghupdate.ColorGreen, "Service restarted successfully")
|
||||
}
|
||||
} else {
|
||||
ghupdate.ColorPrint(ghupdate.ColorYellow, "No supported init system detected; please restart manually if needed.")
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
// handleSELinuxContext restores or applies the correct SELinux label to the binary.
|
||||
func handleSELinuxContext(path string) error {
|
||||
out, err := exec.Command("getenforce").Output()
|
||||
if err != nil {
|
||||
// SELinux not enabled or getenforce not available
|
||||
return nil
|
||||
}
|
||||
state := strings.TrimSpace(string(out))
|
||||
if state == "Disabled" {
|
||||
return nil
|
||||
}
|
||||
|
||||
ghupdate.ColorPrint(ghupdate.ColorYellow, "SELinux is enabled; applying context…")
|
||||
var errs []string
|
||||
|
||||
// Try persistent context via semanage+restorecon
|
||||
if semanagePath, err := exec.LookPath("semanage"); err == nil {
|
||||
if err := exec.Command(semanagePath, "fcontext", "-a", "-t", "bin_t", path).Run(); err != nil {
|
||||
errs = append(errs, "semanage fcontext failed: "+err.Error())
|
||||
} else if restoreconPath, err := exec.LookPath("restorecon"); err == nil {
|
||||
if err := exec.Command(restoreconPath, "-v", path).Run(); err != nil {
|
||||
errs = append(errs, "restorecon failed: "+err.Error())
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Fallback to temporary context via chcon
|
||||
if chconPath, err := exec.LookPath("chcon"); err == nil {
|
||||
if err := exec.Command(chconPath, "-t", "bin_t", path).Run(); err != nil {
|
||||
errs = append(errs, "chcon failed: "+err.Error())
|
||||
}
|
||||
}
|
||||
|
||||
if len(errs) > 0 {
|
||||
return fmt.Errorf("SELinux context errors: %s", strings.Join(errs, "; "))
|
||||
}
|
||||
return nil
|
||||
}
|
15
agent/utils.go
Normal file
15
agent/utils.go
Normal file
@@ -0,0 +1,15 @@
|
||||
package agent
|
||||
|
||||
import "math"
|
||||
|
||||
func bytesToMegabytes(b float64) float64 {
|
||||
return twoDecimals(b / 1048576)
|
||||
}
|
||||
|
||||
func bytesToGigabytes(b uint64) float64 {
|
||||
return twoDecimals(float64(b) / 1073741824)
|
||||
}
|
||||
|
||||
func twoDecimals(value float64) float64 {
|
||||
return math.Round(value*100) / 100
|
||||
}
|
Reference in New Issue
Block a user