mirror of
https://github.com/fankes/beszel.git
synced 2025-10-19 01:39:34 +08:00
new agent healthcheck to support non-ssh connections
This commit is contained in:
@@ -3,6 +3,7 @@ package main
|
||||
import (
|
||||
"beszel"
|
||||
"beszel/internal/agent"
|
||||
"beszel/internal/agent/health"
|
||||
"flag"
|
||||
"fmt"
|
||||
"log"
|
||||
@@ -50,12 +51,7 @@ func (opts *cmdOptions) parse() bool {
|
||||
agent.Update()
|
||||
return true
|
||||
case "health":
|
||||
// for health, we need to parse flags first to get the listen address
|
||||
args := append(os.Args[2:], subcommand)
|
||||
flag.CommandLine.Parse(args)
|
||||
addr := opts.getAddress()
|
||||
network := agent.GetNetwork(addr)
|
||||
err := agent.Health(addr, network)
|
||||
err := health.Check()
|
||||
if err != nil {
|
||||
log.Fatal(err)
|
||||
}
|
||||
@@ -115,8 +111,12 @@ func main() {
|
||||
serverConfig.Addr = addr
|
||||
serverConfig.Network = agent.GetNetwork(addr)
|
||||
|
||||
agent := agent.NewAgent()
|
||||
if err := agent.StartServer(serverConfig); err != nil {
|
||||
log.Fatal("Failed to start server:", err)
|
||||
agent, err := agent.NewAgent("")
|
||||
if err != nil {
|
||||
log.Fatal("Failed to create agent: ", err)
|
||||
}
|
||||
|
||||
if err := agent.Start(serverConfig); err != nil {
|
||||
log.Fatal("Failed to start server: ", err)
|
||||
}
|
||||
}
|
||||
|
@@ -1,18 +0,0 @@
|
||||
package agent
|
||||
|
||||
import (
|
||||
"net"
|
||||
"time"
|
||||
)
|
||||
|
||||
// Health checks if the agent's server is running by attempting to connect to it.
|
||||
//
|
||||
// If an error occurs when attempting to connect to the server, it returns the error.
|
||||
func Health(addr string, network string) error {
|
||||
conn, err := net.DialTimeout(network, addr, 4*time.Second)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
conn.Close()
|
||||
return nil
|
||||
}
|
43
beszel/internal/agent/health/health.go
Normal file
43
beszel/internal/agent/health/health.go
Normal file
@@ -0,0 +1,43 @@
|
||||
// Package health provides functions to check and update the health of the agent.
|
||||
// It uses a file in the temp directory to store the timestamp of the last connection attempt.
|
||||
// If the timestamp is older than 90 seconds, the agent is considered unhealthy.
|
||||
// NB: The agent must be started with the Start() method to be considered healthy.
|
||||
package health
|
||||
|
||||
import (
|
||||
"errors"
|
||||
"log"
|
||||
"os"
|
||||
"path/filepath"
|
||||
"time"
|
||||
)
|
||||
|
||||
// healthFile is the path to the health file
|
||||
var healthFile = filepath.Join(os.TempDir(), "beszel_health")
|
||||
|
||||
// Check checks if the agent is connected by checking the modification time of the health file
|
||||
func Check() error {
|
||||
fileInfo, err := os.Stat(healthFile)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
if time.Since(fileInfo.ModTime()) > 91*time.Second {
|
||||
log.Println("over 90 seconds since last connection")
|
||||
return errors.New("unhealthy")
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
// Update updates the modification time of the health file
|
||||
func Update() error {
|
||||
file, err := os.Create(healthFile)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
return file.Close()
|
||||
}
|
||||
|
||||
// CleanUp removes the health file
|
||||
func CleanUp() error {
|
||||
return os.Remove(healthFile)
|
||||
}
|
67
beszel/internal/agent/health/health_test.go
Normal file
67
beszel/internal/agent/health/health_test.go
Normal file
@@ -0,0 +1,67 @@
|
||||
//go:build testing
|
||||
// +build testing
|
||||
|
||||
package health
|
||||
|
||||
import (
|
||||
"os"
|
||||
"path/filepath"
|
||||
"testing"
|
||||
"time"
|
||||
|
||||
"testing/synctest"
|
||||
|
||||
"github.com/stretchr/testify/assert"
|
||||
"github.com/stretchr/testify/require"
|
||||
)
|
||||
|
||||
func TestHealth(t *testing.T) {
|
||||
// Override healthFile to use a temporary directory for this test.
|
||||
originalHealthFile := healthFile
|
||||
tmpDir := t.TempDir()
|
||||
healthFile = filepath.Join(tmpDir, "beszel_health_test")
|
||||
defer func() { healthFile = originalHealthFile }()
|
||||
|
||||
t.Run("check with no health file", func(t *testing.T) {
|
||||
err := Check()
|
||||
require.Error(t, err)
|
||||
assert.True(t, os.IsNotExist(err), "expected a file-not-exist error, but got: %v", err)
|
||||
})
|
||||
|
||||
t.Run("update and check", func(t *testing.T) {
|
||||
err := Update()
|
||||
require.NoError(t, err, "Update() failed")
|
||||
|
||||
err = Check()
|
||||
assert.NoError(t, err, "Check() failed immediately after Update()")
|
||||
})
|
||||
|
||||
// This test uses synctest to simulate time passing.
|
||||
// NOTE: This test requires GOEXPERIMENT=synctest to run.
|
||||
t.Run("check with simulated time", func(t *testing.T) {
|
||||
synctest.Run(func() {
|
||||
// Update the file to set the initial timestamp.
|
||||
require.NoError(t, Update(), "Update() failed inside synctest")
|
||||
|
||||
// Set the mtime to the current fake time to align the file's timestamp with the simulated clock.
|
||||
now := time.Now()
|
||||
require.NoError(t, os.Chtimes(healthFile, now, now), "Chtimes failed")
|
||||
|
||||
// Wait a duration less than the threshold.
|
||||
time.Sleep(89 * time.Second)
|
||||
synctest.Wait()
|
||||
|
||||
// The check should still pass.
|
||||
assert.NoError(t, Check(), "Check() failed after 89s")
|
||||
|
||||
// Wait for the total duration to exceed the threshold.
|
||||
time.Sleep(5 * time.Second)
|
||||
synctest.Wait()
|
||||
|
||||
// The check should now fail as unhealthy.
|
||||
err := Check()
|
||||
require.Error(t, err, "Check() should have failed after 91s")
|
||||
assert.Equal(t, "unhealthy", err.Error(), "Check() returned wrong error")
|
||||
})
|
||||
})
|
||||
}
|
@@ -1,118 +0,0 @@
|
||||
//go:build testing
|
||||
// +build testing
|
||||
|
||||
package agent_test
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"net"
|
||||
"os"
|
||||
"testing"
|
||||
|
||||
"github.com/stretchr/testify/require"
|
||||
|
||||
"beszel/internal/agent"
|
||||
)
|
||||
|
||||
// setupTestServer creates a temporary server for testing
|
||||
func setupTestServer(t *testing.T) (string, func()) {
|
||||
// Create a temporary socket file for Unix socket testing
|
||||
tempSockFile := os.TempDir() + "/beszel_health_test.sock"
|
||||
|
||||
// Clean up any existing socket file
|
||||
os.Remove(tempSockFile)
|
||||
|
||||
// Create a listener
|
||||
listener, err := net.Listen("unix", tempSockFile)
|
||||
require.NoError(t, err, "Failed to create test listener")
|
||||
|
||||
// Start a simple server in a goroutine
|
||||
go func() {
|
||||
conn, err := listener.Accept()
|
||||
if err != nil {
|
||||
return // Listener closed
|
||||
}
|
||||
defer conn.Close()
|
||||
// Just accept the connection and do nothing
|
||||
}()
|
||||
|
||||
// Return the socket file path and a cleanup function
|
||||
return tempSockFile, func() {
|
||||
listener.Close()
|
||||
os.Remove(tempSockFile)
|
||||
}
|
||||
}
|
||||
|
||||
// setupTCPTestServer creates a temporary TCP server for testing
|
||||
func setupTCPTestServer(t *testing.T) (string, func()) {
|
||||
// Listen on a random available port
|
||||
listener, err := net.Listen("tcp", "127.0.0.1:0")
|
||||
require.NoError(t, err, "Failed to create test listener")
|
||||
|
||||
// Get the port that was assigned
|
||||
addr := listener.Addr().(*net.TCPAddr)
|
||||
port := addr.Port
|
||||
|
||||
// Start a simple server in a goroutine
|
||||
go func() {
|
||||
conn, err := listener.Accept()
|
||||
if err != nil {
|
||||
return // Listener closed
|
||||
}
|
||||
defer conn.Close()
|
||||
// Just accept the connection and do nothing
|
||||
}()
|
||||
|
||||
// Return the address and a cleanup function
|
||||
return fmt.Sprintf("127.0.0.1:%d", port), func() {
|
||||
listener.Close()
|
||||
}
|
||||
}
|
||||
|
||||
func TestHealth(t *testing.T) {
|
||||
t.Run("server is running (unix socket)", func(t *testing.T) {
|
||||
// Setup a test server
|
||||
sockFile, cleanup := setupTestServer(t)
|
||||
defer cleanup()
|
||||
|
||||
// Run the health check with explicit parameters
|
||||
err := agent.Health(sockFile, "unix")
|
||||
require.NoError(t, err, "Failed to check health")
|
||||
})
|
||||
|
||||
t.Run("server is running (tcp address)", func(t *testing.T) {
|
||||
// Setup a test server
|
||||
addr, cleanup := setupTCPTestServer(t)
|
||||
defer cleanup()
|
||||
|
||||
// Run the health check with explicit parameters
|
||||
err := agent.Health(addr, "tcp")
|
||||
require.NoError(t, err, "Failed to check health")
|
||||
})
|
||||
|
||||
t.Run("server is not running", func(t *testing.T) {
|
||||
// Use an address that's likely not in use
|
||||
addr := "127.0.0.1:65535"
|
||||
|
||||
// Run the health check with explicit parameters
|
||||
err := agent.Health(addr, "tcp")
|
||||
require.Error(t, err, "Health check should return an error when server is not running")
|
||||
})
|
||||
|
||||
t.Run("invalid network", func(t *testing.T) {
|
||||
// Use an invalid network type
|
||||
err := agent.Health("127.0.0.1:8080", "invalid_network")
|
||||
require.Error(t, err, "Health check should return an error with invalid network")
|
||||
})
|
||||
|
||||
t.Run("unix socket not found", func(t *testing.T) {
|
||||
// Use a non-existent unix socket
|
||||
nonExistentSocket := os.TempDir() + "/non_existent_socket.sock"
|
||||
|
||||
// Make sure it really doesn't exist
|
||||
os.Remove(nonExistentSocket)
|
||||
|
||||
err := agent.Health(nonExistentSocket, "unix")
|
||||
require.Error(t, err, "Health check should return an error when socket doesn't exist")
|
||||
})
|
||||
}
|
Reference in New Issue
Block a user