add health check for agent

- Updated command-line flag parsing.
- Moved GetAddress and GetNetwork to server.go
This commit is contained in:
henrygd
2025-03-14 03:33:25 -04:00
parent 400ea89587
commit edefc6f53e
7 changed files with 228 additions and 74 deletions

View File

@@ -6,8 +6,8 @@ import (
"flag"
"fmt"
"log"
"log/slog"
"os"
"strings"
"golang.org/x/crypto/ssh"
)
@@ -18,39 +18,50 @@ type cmdOptions struct {
listen string // listen is the address or port to listen on.
}
// parseFlags parses the command line flags and populates the config struct.
func (opts *cmdOptions) parseFlags() {
// parse parses the command line flags and populates the config struct.
// It returns true if a subcommand was handled and the program should exit.
func (opts *cmdOptions) parse() bool {
flag.StringVar(&opts.key, "key", "", "Public key(s) for SSH authentication")
flag.StringVar(&opts.listen, "listen", "", "Address or port to listen on")
flag.Usage = func() {
fmt.Printf("Usage: %s [options] [subcommand]\n", os.Args[0])
fmt.Println("\nOptions:")
flag.PrintDefaults()
fmt.Println("\nSubcommands:")
fmt.Println(" version Display the version")
fmt.Printf("Usage: %s [command] [flags]\n", os.Args[0])
fmt.Println("\nCommands:")
fmt.Println(" health Check if the agent is running")
fmt.Println(" help Display this help message")
fmt.Println(" update Update the agent to the latest version")
fmt.Println(" update Update to the latest version")
fmt.Println(" version Display the version")
fmt.Println("\nFlags:")
flag.PrintDefaults()
}
}
// handleSubcommand handles subcommands such as version, help, and update.
// It returns true if a subcommand was handled, false otherwise.
func handleSubcommand() bool {
if len(os.Args) <= 1 {
return false
subcommand := ""
if len(os.Args) > 1 {
subcommand = os.Args[1]
}
switch os.Args[1] {
case "version", "-v":
switch subcommand {
case "-v", "version":
fmt.Println(beszel.AppName+"-agent", beszel.Version)
os.Exit(0)
return true
case "help":
flag.Usage()
os.Exit(0)
return true
case "update":
agent.Update()
os.Exit(0)
return true
case "health":
// for health, we need to parse flags first to get the listen address
args := append(os.Args[2:], subcommand)
flag.CommandLine.Parse(args)
addr := opts.getAddress()
network := agent.GetNetwork(addr)
exitCode, err := agent.Health(addr, network)
slog.Info("Health", "code", exitCode, "err", err)
os.Exit(exitCode)
}
flag.Parse()
return false
}
@@ -79,46 +90,18 @@ func (opts *cmdOptions) loadPublicKeys() ([]ssh.PublicKey, error) {
return agent.ParseKeys(string(pubKey))
}
// getAddress gets the address to listen on from the command line flag, environment variable, or default value.
func (opts *cmdOptions) getAddress() string {
// Try command line flag first
if opts.listen != "" {
return opts.listen
}
// Try environment variables
if addr, ok := agent.GetEnv("LISTEN"); ok && addr != "" {
return addr
}
// Legacy PORT environment variable support
if port, ok := agent.GetEnv("PORT"); ok && port != "" {
return port
}
return ":45876"
}
// getNetwork returns the network type to use for the server.
func (opts *cmdOptions) getNetwork() string {
if network, _ := agent.GetEnv("NETWORK"); network != "" {
return network
}
if strings.HasPrefix(opts.listen, "/") {
return "unix"
}
return "tcp"
return agent.GetAddress(opts.listen)
}
func main() {
var opts cmdOptions
opts.parseFlags()
subcommandHandled := opts.parse()
if handleSubcommand() {
if subcommandHandled {
return
}
flag.Parse()
opts.listen = opts.getAddress()
var serverConfig agent.ServerOptions
var err error
serverConfig.Keys, err = opts.loadPublicKeys()
@@ -126,8 +109,9 @@ func main() {
log.Fatal("Failed to load public keys:", err)
}
serverConfig.Addr = opts.listen
serverConfig.Network = opts.getNetwork()
addr := opts.getAddress()
serverConfig.Addr = addr
serverConfig.Network = agent.GetNetwork(addr)
agent := agent.NewAgent()
if err := agent.StartServer(serverConfig); err != nil {

View File

@@ -1,6 +1,7 @@
package main
import (
"beszel/internal/agent"
"crypto/ed25519"
"flag"
"os"
@@ -29,7 +30,7 @@ func TestGetAddress(t *testing.T) {
opts: cmdOptions{
listen: "8080",
},
expected: "8080",
expected: ":8080",
},
{
name: "use unix socket from flag",
@@ -52,7 +53,7 @@ func TestGetAddress(t *testing.T) {
envVars: map[string]string{
"PORT": "7070",
},
expected: "7070",
expected: ":7070",
},
{
name: "use unix socket from env var",
@@ -233,7 +234,7 @@ func TestGetNetwork(t *testing.T) {
for k, v := range tt.envVars {
t.Setenv(k, v)
}
network := tt.opts.getNetwork()
network := agent.GetNetwork(tt.opts.listen)
assert.Equal(t, tt.expected, network)
})
}
@@ -293,7 +294,7 @@ func TestParseFlags(t *testing.T) {
os.Args = tt.args
var opts cmdOptions
opts.parseFlags()
opts.parse()
flag.Parse()
assert.Equal(t, tt.expected, opts)

View File

@@ -0,0 +1,19 @@
package agent
import (
"net"
"time"
)
// Health checks if the agent's server is running by attempting to connect to it.
// It returns 0 if the server is running, 1 otherwise (as in exit codes).
//
// If an error occurs when attempting to connect to the server, it returns the error.
func Health(addr string, network string) (int, error) {
conn, err := net.DialTimeout(network, addr, 4*time.Second)
if err != nil {
return 1, err
}
conn.Close()
return 0, nil
}

View File

@@ -0,0 +1,130 @@
//go:build testing
// +build testing
package agent_test
import (
"fmt"
"net"
"os"
"testing"
"github.com/stretchr/testify/assert"
"github.com/stretchr/testify/require"
"beszel/internal/agent"
)
// setupTestServer creates a temporary server for testing
func setupTestServer(t *testing.T) (string, func()) {
// Create a temporary socket file for Unix socket testing
tempSockFile := os.TempDir() + "/beszel_health_test.sock"
// Clean up any existing socket file
os.Remove(tempSockFile)
// Create a listener
listener, err := net.Listen("unix", tempSockFile)
require.NoError(t, err, "Failed to create test listener")
// Start a simple server in a goroutine
go func() {
conn, err := listener.Accept()
if err != nil {
return // Listener closed
}
defer conn.Close()
// Just accept the connection and do nothing
}()
// Return the socket file path and a cleanup function
return tempSockFile, func() {
listener.Close()
os.Remove(tempSockFile)
}
}
// setupTCPTestServer creates a temporary TCP server for testing
func setupTCPTestServer(t *testing.T) (string, func()) {
// Listen on a random available port
listener, err := net.Listen("tcp", "127.0.0.1:0")
require.NoError(t, err, "Failed to create test listener")
// Get the port that was assigned
addr := listener.Addr().(*net.TCPAddr)
port := addr.Port
// Start a simple server in a goroutine
go func() {
conn, err := listener.Accept()
if err != nil {
return // Listener closed
}
defer conn.Close()
// Just accept the connection and do nothing
}()
// Return the address and a cleanup function
return fmt.Sprintf("127.0.0.1:%d", port), func() {
listener.Close()
}
}
func TestHealth(t *testing.T) {
t.Run("server is running (unix socket)", func(t *testing.T) {
// Setup a test server
sockFile, cleanup := setupTestServer(t)
defer cleanup()
// Run the health check with explicit parameters
result, err := agent.Health(sockFile, "unix")
require.NoError(t, err, "Failed to check health")
// Verify the result
assert.Equal(t, 0, result, "Health check should return 0 when server is running")
})
t.Run("server is running (tcp address)", func(t *testing.T) {
// Setup a test server
addr, cleanup := setupTCPTestServer(t)
defer cleanup()
// Run the health check with explicit parameters
result, err := agent.Health(addr, "tcp")
require.NoError(t, err, "Failed to check health")
// Verify the result
assert.Equal(t, 0, result, "Health check should return 0 when server is running")
})
t.Run("server is not running", func(t *testing.T) {
// Use an address that's likely not in use
addr := "127.0.0.1:65535"
// Run the health check with explicit parameters
result, err := agent.Health(addr, "tcp")
require.Error(t, err, "Health check should return an error when server is not running")
// Verify the result
assert.Equal(t, 1, result, "Health check should return 1 when server is not running")
})
t.Run("invalid network", func(t *testing.T) {
// Use an invalid network type
result, err := agent.Health("127.0.0.1:8080", "invalid_network")
require.Error(t, err, "Health check should return an error with invalid network")
assert.Equal(t, 1, result, "Health check should return 1 when network is invalid")
})
t.Run("unix socket not found", func(t *testing.T) {
// Use a non-existent unix socket
nonExistentSocket := os.TempDir() + "/non_existent_socket.sock"
// Make sure it really doesn't exist
os.Remove(nonExistentSocket)
result, err := agent.Health(nonExistentSocket, "unix")
require.Error(t, err, "Health check should return an error when socket doesn't exist")
assert.Equal(t, 1, result, "Health check should return 1 when socket doesn't exist")
})
}

View File

@@ -17,7 +17,7 @@ func (a *Agent) initializeNetIoStats() {
nics, nicsEnvExists := GetEnv("NICS")
if nicsEnvExists {
nicsMap = make(map[string]struct{}, 0)
for _, nic := range strings.Split(nics, ",") {
for nic := range strings.SplitSeq(nics, ",") {
nicsMap[nic] = struct{}{}
}
}

View File

@@ -23,20 +23,14 @@ func (a *Agent) StartServer(opts ServerOptions) error {
slog.Info("Starting SSH server", "addr", opts.Addr, "network", opts.Network)
switch opts.Network {
case "unix":
if opts.Network == "unix" {
// remove existing socket file if it exists
if err := os.Remove(opts.Addr); err != nil && !os.IsNotExist(err) {
return err
}
default:
// prefix with : if only port was provided
if !strings.Contains(opts.Addr, ":") {
opts.Addr = ":" + opts.Addr
}
}
// Listen on the address
// start listening on the address
ln, err := net.Listen(opts.Network, opts.Addr)
if err != nil {
return err
@@ -44,7 +38,7 @@ func (a *Agent) StartServer(opts ServerOptions) error {
defer ln.Close()
// Start SSH server on the listener
err = sshServer.Serve(ln, nil, sshServer.NoPty(),
return sshServer.Serve(ln, nil, sshServer.NoPty(),
sshServer.PublicKeyAuth(func(ctx sshServer.Context, key sshServer.PublicKey) bool {
for _, pubKey := range opts.Keys {
if sshServer.KeysEqual(key, pubKey) {
@@ -54,10 +48,6 @@ func (a *Agent) StartServer(opts ServerOptions) error {
return false
}),
)
if err != nil {
return err
}
return nil
}
func (a *Agent) handleSession(s sshServer.Session) {
@@ -89,3 +79,33 @@ func ParseKeys(input string) ([]ssh.PublicKey, error) {
}
return parsedKeys, nil
}
// GetAddress gets the address to listen on or connect to from environment variables or default value.
func GetAddress(addr string) string {
if addr == "" {
addr, _ = GetEnv("LISTEN")
}
if addr == "" {
// Legacy PORT environment variable support
addr, _ = GetEnv("PORT")
}
if addr == "" {
return ":45876"
}
// prefix with : if only port was provided
if GetNetwork(addr) != "unix" && !strings.Contains(addr, ":") {
addr = ":" + addr
}
return addr
}
// GetNetwork returns the network type to use based on the address
func GetNetwork(addr string) string {
if network, ok := GetEnv("NETWORK"); ok && network != "" {
return network
}
if strings.HasPrefix(addr, "/") {
return "unix"
}
return "tcp"
}

View File

@@ -45,7 +45,7 @@ func TestStartServer(t *testing.T) {
name: "tcp port only",
config: ServerOptions{
Network: "tcp",
Addr: "45987",
Addr: ":45987",
Keys: []ssh.PublicKey{sshPubKey},
},
},
@@ -88,7 +88,7 @@ func TestStartServer(t *testing.T) {
name: "bad key should fail",
config: ServerOptions{
Network: "tcp",
Addr: "45987",
Addr: ":45987",
Keys: []ssh.PublicKey{sshBadPubKey},
},
wantErr: true,
@@ -98,7 +98,7 @@ func TestStartServer(t *testing.T) {
name: "good key still good",
config: ServerOptions{
Network: "tcp",
Addr: "45987",
Addr: ":45987",
Keys: []ssh.PublicKey{sshPubKey},
},
},