diff --git a/beszel/internal/alerts/alerts.go b/beszel/internal/alerts/alerts.go index f5975bd..28f0b20 100644 --- a/beszel/internal/alerts/alerts.go +++ b/beszel/internal/alerts/alerts.go @@ -2,25 +2,24 @@ package alerts import ( - "beszel/internal/entities/system" "fmt" "net/mail" "net/url" - "strings" + "sync" "time" "github.com/containrrr/shoutrrr" - "github.com/goccy/go-json" "github.com/pocketbase/dbx" "github.com/pocketbase/pocketbase/apis" "github.com/pocketbase/pocketbase/core" "github.com/pocketbase/pocketbase/tools/mailer" - "github.com/pocketbase/pocketbase/tools/types" - "github.com/spf13/cast" ) type AlertManager struct { - app core.App + app core.App + alertQueue chan alertTask + stopChan chan struct{} + pendingAlerts sync.Map } type AlertMessageData struct { @@ -60,350 +59,43 @@ type SystemAlertData struct { descriptor string // override descriptor in notification body (for temp sensor, disk partition, etc) } +// notification services that support title param +var supportsTitle = map[string]struct{}{ + "bark": {}, + "discord": {}, + "gotify": {}, + "ifttt": {}, + "join": {}, + "matrix": {}, + "ntfy": {}, + "opsgenie": {}, + "pushbullet": {}, + "pushover": {}, + "slack": {}, + "teams": {}, + "telegram": {}, + "zulip": {}, +} + +// NewAlertManager creates a new AlertManager instance. func NewAlertManager(app core.App) *AlertManager { - return &AlertManager{ - app: app, + am := &AlertManager{ + app: app, + alertQueue: make(chan alertTask), + stopChan: make(chan struct{}), } + go am.startWorker() + return am } -func (am *AlertManager) HandleSystemAlerts(systemRecord *core.Record, systemInfo system.Info, temperatures map[string]float64, extraFs map[string]*system.FsStats) error { - // start := time.Now() - // defer func() { - // log.Println("alert stats took", time.Since(start)) - // }() - alertRecords, err := am.app.FindAllRecords("alerts", - dbx.NewExp("system={:system}", dbx.Params{"system": systemRecord.Id}), - ) - if err != nil || len(alertRecords) == 0 { - // log.Println("no alerts found for system") - return nil - } - - var validAlerts []SystemAlertData - now := systemRecord.GetDateTime("updated").Time().UTC() - oldestTime := now - - for _, alertRecord := range alertRecords { - name := alertRecord.GetString("name") - var val float64 - unit := "%" - - switch name { - case "CPU": - val = systemInfo.Cpu - case "Memory": - val = systemInfo.MemPct - case "Bandwidth": - val = systemInfo.Bandwidth - unit = " MB/s" - case "Disk": - maxUsedPct := systemInfo.DiskPct - for _, fs := range extraFs { - usedPct := fs.DiskUsed / fs.DiskTotal * 100 - if usedPct > maxUsedPct { - maxUsedPct = usedPct - } - } - val = maxUsedPct - case "Temperature": - if temperatures == nil { - continue - } - for _, temp := range temperatures { - if temp > val { - val = temp - } - } - unit = "°C" - } - - triggered := alertRecord.GetBool("triggered") - threshold := alertRecord.GetFloat("value") - - // CONTINUE - // IF alert is not triggered and curValue is less than threshold - // OR alert is triggered and curValue is greater than threshold - if (!triggered && val <= threshold) || (triggered && val > threshold) { - // log.Printf("Skipping alert %s: val %f | threshold %f | triggered %v\n", name, val, threshold, triggered) - continue - } - - min := max(1, cast.ToUint8(alertRecord.Get("min"))) - // add time to alert time to make sure it's slighty after record creation - time := now.Add(-time.Duration(min) * time.Minute) - if time.Before(oldestTime) { - oldestTime = time - } - - validAlerts = append(validAlerts, SystemAlertData{ - systemRecord: systemRecord, - alertRecord: alertRecord, - name: name, - unit: unit, - val: val, - threshold: threshold, - triggered: triggered, - time: time, - min: min, - }) - } - - systemStats := []struct { - Stats []byte `db:"stats"` - Created types.DateTime `db:"created"` - }{} - - err = am.app.DB(). - Select("stats", "created"). - From("system_stats"). - Where(dbx.NewExp( - "system={:system} AND type='1m' AND created > {:created}", - dbx.Params{ - "system": systemRecord.Id, - // subtract some time to give us a bit of buffer - "created": oldestTime.Add(-time.Second * 90), - }, - )). - OrderBy("created"). - All(&systemStats) - if err != nil { - return err - } - - // get oldest record creation time from first record in the slice - oldestRecordTime := systemStats[0].Created.Time() - // log.Println("oldestRecordTime", oldestRecordTime.String()) - - // delete from validAlerts if time is older than oldestRecord - for i := 0; i < len(validAlerts); i++ { - if validAlerts[i].time.Before(oldestRecordTime) { - // log.Println("deleting alert - time is older than oldestRecord", validAlerts[i].name, oldestRecordTime, validAlerts[i].time) - validAlerts = append(validAlerts[:i], validAlerts[i+1:]...) - } - } - - if len(validAlerts) == 0 { - // log.Println("no valid alerts found") - return nil - } - - var stats SystemAlertStats - - // we can skip the latest systemStats record since it's the current value - for i := 0; i < len(systemStats); i++ { - stat := systemStats[i] - // subtract 10 seconds to give a small time buffer - systemStatsCreation := stat.Created.Time().Add(-time.Second * 10) - if err := json.Unmarshal(stat.Stats, &stats); err != nil { - return err - } - // log.Println("stats", stats) - for j := range validAlerts { - alert := &validAlerts[j] - // reset alert val on first iteration - if i == 0 { - alert.val = 0 - } - // continue if system_stats is older than alert time range - if systemStatsCreation.Before(alert.time) { - continue - } - // add to alert value - switch alert.name { - case "CPU": - alert.val += stats.Cpu - case "Memory": - alert.val += stats.Mem - case "Bandwidth": - alert.val += stats.NetSent + stats.NetRecv - case "Disk": - if alert.mapSums == nil { - alert.mapSums = make(map[string]float32, len(extraFs)+1) - } - // add root disk - if _, ok := alert.mapSums["root"]; !ok { - alert.mapSums["root"] = 0.0 - } - alert.mapSums["root"] += float32(stats.Disk) - // add extra disks - for key, fs := range extraFs { - if _, ok := alert.mapSums[key]; !ok { - alert.mapSums[key] = 0.0 - } - alert.mapSums[key] += float32(fs.DiskUsed / fs.DiskTotal * 100) - } - case "Temperature": - if alert.mapSums == nil { - alert.mapSums = make(map[string]float32, len(stats.Temperatures)) - } - for key, temp := range stats.Temperatures { - if _, ok := alert.mapSums[key]; !ok { - alert.mapSums[key] = float32(0) - } - alert.mapSums[key] += temp - } - default: - continue - } - alert.count++ - } - } - // sum up vals for each alert - for _, alert := range validAlerts { - switch alert.name { - case "Disk": - maxPct := float32(0) - for key, value := range alert.mapSums { - sumPct := float32(value) - if sumPct > maxPct { - maxPct = sumPct - alert.descriptor = fmt.Sprintf("Usage of %s", key) - } - } - alert.val = float64(maxPct / float32(alert.count)) - case "Temperature": - maxTemp := float32(0) - for key, value := range alert.mapSums { - sumTemp := float32(value) / float32(alert.count) - if sumTemp > maxTemp { - maxTemp = sumTemp - alert.descriptor = fmt.Sprintf("Highest sensor %s", key) - } - } - alert.val = float64(maxTemp) - default: - alert.val = alert.val / float64(alert.count) - } - minCount := float32(alert.min) / 1.2 - // log.Println("alert", alert.name, "val", alert.val, "threshold", alert.threshold, "triggered", alert.triggered) - // log.Printf("%s: val %f | count %d | min-count %f | threshold %f\n", alert.name, alert.val, alert.count, minCount, alert.threshold) - // pass through alert if count is greater than or equal to minCount - if float32(alert.count) >= minCount { - if !alert.triggered && alert.val > alert.threshold { - alert.triggered = true - go am.sendSystemAlert(alert) - } else if alert.triggered && alert.val <= alert.threshold { - alert.triggered = false - go am.sendSystemAlert(alert) - } - } - } - return nil -} - -func (am *AlertManager) sendSystemAlert(alert SystemAlertData) { - // log.Printf("Sending alert %s: val %f | count %d | threshold %f\n", alert.name, alert.val, alert.count, alert.threshold) - systemName := alert.systemRecord.GetString("name") - - // change Disk to Disk usage - if alert.name == "Disk" { - alert.name += " usage" - } - - // make title alert name lowercase if not CPU - titleAlertName := alert.name - if titleAlertName != "CPU" { - titleAlertName = strings.ToLower(titleAlertName) - } - - var subject string - if alert.triggered { - subject = fmt.Sprintf("%s %s above threshold", systemName, titleAlertName) - } else { - subject = fmt.Sprintf("%s %s below threshold", systemName, titleAlertName) - } - minutesLabel := "minute" - if alert.min > 1 { - minutesLabel += "s" - } - if alert.descriptor == "" { - alert.descriptor = alert.name - } - body := fmt.Sprintf("%s averaged %.2f%s for the previous %v %s.", alert.descriptor, alert.val, alert.unit, alert.min, minutesLabel) - - alert.alertRecord.Set("triggered", alert.triggered) - if err := am.app.Save(alert.alertRecord); err != nil { - // app.Logger().Error("failed to save alert record", "err", err.Error()) - return - } - // expand the user relation and send the alert - if errs := am.app.ExpandRecord(alert.alertRecord, []string{"user"}, nil); len(errs) > 0 { - // app.Logger().Error("failed to expand user relation", "errs", errs) - return - } - if user := alert.alertRecord.ExpandedOne("user"); user != nil { - am.sendAlert(AlertMessageData{ - UserID: user.Id, - Title: subject, - Message: body, - Link: am.app.Settings().Meta.AppURL + "/system/" + url.PathEscape(systemName), - LinkText: "View " + systemName, - }) - } -} - -// todo: allow x minutes downtime before sending alert -func (am *AlertManager) HandleStatusAlerts(newStatus string, oldSystemRecord *core.Record) error { - var alertStatus string - switch newStatus { - case "up": - if oldSystemRecord.GetString("status") == "down" { - alertStatus = "up" - } - case "down": - if oldSystemRecord.GetString("status") == "up" { - alertStatus = "down" - } - } - if alertStatus == "" { - return nil - } - // check if use - alertRecords, err := am.app.FindAllRecords("alerts", - dbx.HashExp{ - "system": oldSystemRecord.Id, - "name": "Status", - }, - ) - if err != nil || len(alertRecords) == 0 { - // log.Println("no alerts found for system") - return nil - } - for _, alertRecord := range alertRecords { - // expand the user relation - if errs := am.app.ExpandRecord(alertRecord, []string{"user"}, nil); len(errs) > 0 { - return fmt.Errorf("failed to expand: %v", errs) - } - user := alertRecord.ExpandedOne("user") - if user == nil { - return nil - } - emoji := "\U0001F534" - if alertStatus == "up" { - emoji = "\u2705" - } - // send alert - systemName := oldSystemRecord.GetString("name") - am.sendAlert(AlertMessageData{ - UserID: user.Id, - Title: fmt.Sprintf("Connection to %s is %s %v", systemName, alertStatus, emoji), - Message: fmt.Sprintf("Connection to %s is %s", systemName, alertStatus), - Link: am.app.Settings().Meta.AppURL + "/system/" + url.PathEscape(systemName), - LinkText: "View " + systemName, - }) - } - return nil -} - -func (am *AlertManager) sendAlert(data AlertMessageData) { +func (am *AlertManager) SendAlert(data AlertMessageData) error { // get user settings record, err := am.app.FindFirstRecordByFilter( "user_settings", "user={:user}", dbx.Params{"user": data.UserID}, ) if err != nil { - am.app.Logger().Error("Failed to get user settings", "err", err.Error()) - return + return err } // unmarshal user settings userAlertSettings := UserNotificationSettings{ @@ -421,8 +113,7 @@ func (am *AlertManager) sendAlert(data AlertMessageData) { } // send alerts via email if len(userAlertSettings.Emails) == 0 { - // log.Println("No email addresses found") - return + return nil } addresses := []mail.Address{} for _, email := range userAlertSettings.Emails { @@ -437,18 +128,16 @@ func (am *AlertManager) sendAlert(data AlertMessageData) { Name: am.app.Settings().Meta.SenderName, }, } - if err := am.app.NewMailClient().Send(&message); err != nil { - am.app.Logger().Error("Failed to send alert: ", "err", err.Error()) - } else { - am.app.Logger().Info("Sent email alert", "to", message.To, "subj", message.Subject) + err = am.app.NewMailClient().Send(&message) + if err != nil { + return err } + am.app.Logger().Info("Sent email alert", "to", message.To, "subj", message.Subject) + return nil } // SendShoutrrrAlert sends an alert via a Shoutrrr URL func (am *AlertManager) SendShoutrrrAlert(notificationUrl, title, message, link, linkText string) error { - // services that support title param - supportsTitle := []string{"bark", "discord", "gotify", "ifttt", "join", "matrix", "ntfy", "opsgenie", "pushbullet", "pushover", "slack", "teams", "telegram", "zulip"} - // Parse the URL parsedURL, err := url.Parse(notificationUrl) if err != nil { @@ -458,7 +147,7 @@ func (am *AlertManager) SendShoutrrrAlert(notificationUrl, title, message, link, queryParams := parsedURL.Query() // Add title - if sliceContains(supportsTitle, scheme) { + if _, ok := supportsTitle[scheme]; ok { queryParams.Add("title", title) } else if scheme == "mattermost" { // use markdown title for mattermost @@ -499,16 +188,6 @@ func (am *AlertManager) SendShoutrrrAlert(notificationUrl, title, message, link, return nil } -// Contains checks if a string is present in a slice of strings -func sliceContains(slice []string, item string) bool { - for _, v := range slice { - if v == item { - return true - } - } - return false -} - func (am *AlertManager) SendTestNotification(e *core.RequestEvent) error { info, _ := e.RequestInfo() if info.Auth == nil { diff --git a/beszel/internal/alerts/alerts_status.go b/beszel/internal/alerts/alerts_status.go new file mode 100644 index 0000000..3592dec --- /dev/null +++ b/beszel/internal/alerts/alerts_status.go @@ -0,0 +1,175 @@ +package alerts + +import ( + "fmt" + "net/url" + "strings" + "time" + + "github.com/pocketbase/dbx" + "github.com/pocketbase/pocketbase/core" +) + +type alertTask struct { + action string // "schedule" or "cancel" + systemName string + alertRecord *core.Record + delay time.Duration +} + +type alertInfo struct { + systemName string + alertRecord *core.Record + expireTime time.Time +} + +// startWorker is a long-running goroutine that processes alert tasks +// every x seconds. It must be running to process status alerts. +func (am *AlertManager) startWorker() { + // no special reason for 13 seconds + tick := time.Tick(13 * time.Second) + for { + select { + case <-am.stopChan: + return + case task := <-am.alertQueue: + switch task.action { + case "schedule": + am.pendingAlerts.Store(task.alertRecord.Id, &alertInfo{ + systemName: task.systemName, + alertRecord: task.alertRecord, + expireTime: time.Now().Add(task.delay), + }) + case "cancel": + am.pendingAlerts.Delete(task.alertRecord.Id) + } + case <-tick: + // Check for expired alerts every tick + now := time.Now() + for key, value := range am.pendingAlerts.Range { + info := value.(*alertInfo) + if now.After(info.expireTime) { + // Downtime delay has passed, process alert + am.sendStatusAlert("down", info.systemName, info.alertRecord) + am.pendingAlerts.Delete(key) + } + } + } + } +} + +// StopWorker shuts down the AlertManager.worker goroutine +func (am *AlertManager) StopWorker() { + close(am.stopChan) +} + +// HandleStatusAlerts manages the logic when system status changes. +func (am *AlertManager) HandleStatusAlerts(newStatus string, oldSystemRecord *core.Record) error { + switch newStatus { + case "up": + if oldSystemRecord.GetString("status") != "down" { + return nil + } + case "down": + if oldSystemRecord.GetString("status") != "up" { + return nil + } + default: + return nil + } + + alertRecords, err := am.getSystemStatusAlerts(oldSystemRecord.Id) + if err != nil { + return err + } + if len(alertRecords) == 0 { + return nil + } + + systemName := oldSystemRecord.GetString("name") + if newStatus == "down" { + am.handleSystemDown(systemName, alertRecords) + } else { + am.handleSystemUp(systemName, alertRecords) + } + return nil +} + +// getSystemStatusAlerts retrieves all "Status" alert records for a given system ID. +func (am *AlertManager) getSystemStatusAlerts(systemID string) ([]*core.Record, error) { + alertRecords, err := am.app.FindAllRecords("alerts", dbx.HashExp{ + "system": systemID, + "name": "Status", + }) + if err != nil { + return nil, err + } + return alertRecords, nil +} + +// Schedules delayed "down" alerts for each alert record. +func (am *AlertManager) handleSystemDown(systemName string, alertRecords []*core.Record) { + for _, alertRecord := range alertRecords { + // Continue if alert is already scheduled + if _, exists := am.pendingAlerts.Load(alertRecord.Id); exists { + continue + } + // Schedule by adding to queue + min := max(1, alertRecord.GetInt("min")) + am.alertQueue <- alertTask{ + action: "schedule", + systemName: systemName, + alertRecord: alertRecord, + delay: time.Duration(min) * time.Minute, + } + } +} + +// handleSystemUp manages the logic when a system status changes to "up". +// It cancels any pending alerts and sends "up" alerts. +func (am *AlertManager) handleSystemUp(systemName string, alertRecords []*core.Record) { + for _, alertRecord := range alertRecords { + alertRecordID := alertRecord.Id + // If alert exists for record, delete and continue (down alert not sent) + if _, exists := am.pendingAlerts.Load(alertRecordID); exists { + am.alertQueue <- alertTask{ + action: "cancel", + alertRecord: alertRecord, + } + continue + } + // No alert scheduled for this record, send "up" alert + if err := am.sendStatusAlert("up", systemName, alertRecord); err != nil { + am.app.Logger().Error("Failed to send alert", "err", err.Error()) + } + } +} + +// sendStatusAlert sends a status alert ("up" or "down") to the users associated with the alert records. +func (am *AlertManager) sendStatusAlert(alertStatus string, systemName string, alertRecord *core.Record) error { + var emoji string + if alertStatus == "up" { + emoji = "\u2705" // Green checkmark emoji + } else { + emoji = "\U0001F534" // Red alert emoji + } + + title := fmt.Sprintf("Connection to %s is %s %v", systemName, alertStatus, emoji) + message := strings.TrimSuffix(title, emoji) + + if errs := am.app.ExpandRecord(alertRecord, []string{"user"}, nil); len(errs) > 0 { + return errs["user"] + } + user := alertRecord.ExpandedOne("user") + if user == nil { + return nil + } + + return am.SendAlert(AlertMessageData{ + UserID: user.Id, + Title: title, + Message: message, + Link: am.app.Settings().Meta.AppURL + "/system/" + url.PathEscape(systemName), + LinkText: "View " + systemName, + }) +} diff --git a/beszel/internal/alerts/alerts_system.go b/beszel/internal/alerts/alerts_system.go new file mode 100644 index 0000000..49495a8 --- /dev/null +++ b/beszel/internal/alerts/alerts_system.go @@ -0,0 +1,288 @@ +package alerts + +import ( + "beszel/internal/entities/system" + "fmt" + "net/url" + "slices" + "strings" + "time" + + "github.com/goccy/go-json" + "github.com/pocketbase/dbx" + "github.com/pocketbase/pocketbase/core" + "github.com/pocketbase/pocketbase/tools/types" + "github.com/spf13/cast" +) + +func (am *AlertManager) HandleSystemAlerts(systemRecord *core.Record, systemInfo system.Info, temperatures map[string]float64, extraFs map[string]*system.FsStats) error { + alertRecords, err := am.app.FindAllRecords("alerts", + dbx.NewExp("system={:system}", dbx.Params{"system": systemRecord.Id}), + ) + if err != nil || len(alertRecords) == 0 { + // log.Println("no alerts found for system") + return nil + } + + var validAlerts []SystemAlertData + now := systemRecord.GetDateTime("updated").Time().UTC() + oldestTime := now + + for _, alertRecord := range alertRecords { + name := alertRecord.GetString("name") + var val float64 + unit := "%" + + switch name { + case "CPU": + val = systemInfo.Cpu + case "Memory": + val = systemInfo.MemPct + case "Bandwidth": + val = systemInfo.Bandwidth + unit = " MB/s" + case "Disk": + maxUsedPct := systemInfo.DiskPct + for _, fs := range extraFs { + usedPct := fs.DiskUsed / fs.DiskTotal * 100 + if usedPct > maxUsedPct { + maxUsedPct = usedPct + } + } + val = maxUsedPct + case "Temperature": + if temperatures == nil { + continue + } + for _, temp := range temperatures { + if temp > val { + val = temp + } + } + unit = "°C" + } + + triggered := alertRecord.GetBool("triggered") + threshold := alertRecord.GetFloat("value") + + // CONTINUE + // IF alert is not triggered and curValue is less than threshold + // OR alert is triggered and curValue is greater than threshold + if (!triggered && val <= threshold) || (triggered && val > threshold) { + // log.Printf("Skipping alert %s: val %f | threshold %f | triggered %v\n", name, val, threshold, triggered) + continue + } + + min := max(1, cast.ToUint8(alertRecord.Get("min"))) + // add time to alert time to make sure it's slighty after record creation + time := now.Add(-time.Duration(min) * time.Minute) + if time.Before(oldestTime) { + oldestTime = time + } + + validAlerts = append(validAlerts, SystemAlertData{ + systemRecord: systemRecord, + alertRecord: alertRecord, + name: name, + unit: unit, + val: val, + threshold: threshold, + triggered: triggered, + time: time, + min: min, + }) + } + + systemStats := []struct { + Stats []byte `db:"stats"` + Created types.DateTime `db:"created"` + }{} + + err = am.app.DB(). + Select("stats", "created"). + From("system_stats"). + Where(dbx.NewExp( + "system={:system} AND type='1m' AND created > {:created}", + dbx.Params{ + "system": systemRecord.Id, + // subtract some time to give us a bit of buffer + "created": oldestTime.Add(-time.Second * 90), + }, + )). + OrderBy("created"). + All(&systemStats) + if err != nil { + return err + } + + // get oldest record creation time from first record in the slice + oldestRecordTime := systemStats[0].Created.Time() + // log.Println("oldestRecordTime", oldestRecordTime.String()) + + // delete from validAlerts if time is older than oldestRecord + for i := range validAlerts { + if validAlerts[i].time.Before(oldestRecordTime) { + // log.Println("deleting alert - time is older than oldestRecord", validAlerts[i].name, oldestRecordTime, validAlerts[i].time) + validAlerts = slices.Delete(validAlerts, i, i+1) + } + } + + if len(validAlerts) == 0 { + // log.Println("no valid alerts found") + return nil + } + + var stats SystemAlertStats + + // we can skip the latest systemStats record since it's the current value + for i := range systemStats { + stat := systemStats[i] + // subtract 10 seconds to give a small time buffer + systemStatsCreation := stat.Created.Time().Add(-time.Second * 10) + if err := json.Unmarshal(stat.Stats, &stats); err != nil { + return err + } + // log.Println("stats", stats) + for j := range validAlerts { + alert := &validAlerts[j] + // reset alert val on first iteration + if i == 0 { + alert.val = 0 + } + // continue if system_stats is older than alert time range + if systemStatsCreation.Before(alert.time) { + continue + } + // add to alert value + switch alert.name { + case "CPU": + alert.val += stats.Cpu + case "Memory": + alert.val += stats.Mem + case "Bandwidth": + alert.val += stats.NetSent + stats.NetRecv + case "Disk": + if alert.mapSums == nil { + alert.mapSums = make(map[string]float32, len(extraFs)+1) + } + // add root disk + if _, ok := alert.mapSums["root"]; !ok { + alert.mapSums["root"] = 0.0 + } + alert.mapSums["root"] += float32(stats.Disk) + // add extra disks + for key, fs := range extraFs { + if _, ok := alert.mapSums[key]; !ok { + alert.mapSums[key] = 0.0 + } + alert.mapSums[key] += float32(fs.DiskUsed / fs.DiskTotal * 100) + } + case "Temperature": + if alert.mapSums == nil { + alert.mapSums = make(map[string]float32, len(stats.Temperatures)) + } + for key, temp := range stats.Temperatures { + if _, ok := alert.mapSums[key]; !ok { + alert.mapSums[key] = float32(0) + } + alert.mapSums[key] += temp + } + default: + continue + } + alert.count++ + } + } + // sum up vals for each alert + for _, alert := range validAlerts { + switch alert.name { + case "Disk": + maxPct := float32(0) + for key, value := range alert.mapSums { + sumPct := float32(value) + if sumPct > maxPct { + maxPct = sumPct + alert.descriptor = fmt.Sprintf("Usage of %s", key) + } + } + alert.val = float64(maxPct / float32(alert.count)) + case "Temperature": + maxTemp := float32(0) + for key, value := range alert.mapSums { + sumTemp := float32(value) / float32(alert.count) + if sumTemp > maxTemp { + maxTemp = sumTemp + alert.descriptor = fmt.Sprintf("Highest sensor %s", key) + } + } + alert.val = float64(maxTemp) + default: + alert.val = alert.val / float64(alert.count) + } + minCount := float32(alert.min) / 1.2 + // log.Println("alert", alert.name, "val", alert.val, "threshold", alert.threshold, "triggered", alert.triggered) + // log.Printf("%s: val %f | count %d | min-count %f | threshold %f\n", alert.name, alert.val, alert.count, minCount, alert.threshold) + // pass through alert if count is greater than or equal to minCount + if float32(alert.count) >= minCount { + if !alert.triggered && alert.val > alert.threshold { + alert.triggered = true + go am.sendSystemAlert(alert) + } else if alert.triggered && alert.val <= alert.threshold { + alert.triggered = false + go am.sendSystemAlert(alert) + } + } + } + return nil +} + +func (am *AlertManager) sendSystemAlert(alert SystemAlertData) { + // log.Printf("Sending alert %s: val %f | count %d | threshold %f\n", alert.name, alert.val, alert.count, alert.threshold) + systemName := alert.systemRecord.GetString("name") + + // change Disk to Disk usage + if alert.name == "Disk" { + alert.name += " usage" + } + + // make title alert name lowercase if not CPU + titleAlertName := alert.name + if titleAlertName != "CPU" { + titleAlertName = strings.ToLower(titleAlertName) + } + + var subject string + if alert.triggered { + subject = fmt.Sprintf("%s %s above threshold", systemName, titleAlertName) + } else { + subject = fmt.Sprintf("%s %s below threshold", systemName, titleAlertName) + } + minutesLabel := "minute" + if alert.min > 1 { + minutesLabel += "s" + } + if alert.descriptor == "" { + alert.descriptor = alert.name + } + body := fmt.Sprintf("%s averaged %.2f%s for the previous %v %s.", alert.descriptor, alert.val, alert.unit, alert.min, minutesLabel) + + alert.alertRecord.Set("triggered", alert.triggered) + if err := am.app.Save(alert.alertRecord); err != nil { + // app.Logger().Error("failed to save alert record", "err", err.Error()) + return + } + // expand the user relation and send the alert + if errs := am.app.ExpandRecord(alert.alertRecord, []string{"user"}, nil); len(errs) > 0 { + // app.Logger().Error("failed to expand user relation", "errs", errs) + return + } + if user := alert.alertRecord.ExpandedOne("user"); user != nil { + am.SendAlert(AlertMessageData{ + UserID: user.Id, + Title: subject, + Message: body, + Link: am.app.Settings().Meta.AppURL + "/system/" + url.PathEscape(systemName), + LinkText: "View " + systemName, + }) + } +} diff --git a/beszel/site/src/components/alerts/alerts-system.tsx b/beszel/site/src/components/alerts/alerts-system.tsx index 826cf4f..16a6736 100644 --- a/beszel/site/src/components/alerts/alerts-system.tsx +++ b/beszel/site/src/components/alerts/alerts-system.tsx @@ -160,13 +160,11 @@ export function SystemAlertGlobal({ function AlertContent({ data }: { data: AlertData }) { const { key } = data - const hasSliders = !("single" in data.alert) + const singleDescription = data.alert.singleDesc?.() const [checked, setChecked] = useState(data.checked || false) - const [min, setMin] = useState(data.min || (hasSliders ? 10 : 0)) - const [value, setValue] = useState(data.val || (hasSliders ? 80 : 0)) - - const showSliders = checked && hasSliders + const [min, setMin] = useState(data.min || 10) + const [value, setValue] = useState(data.val || (singleDescription ? 0 : 80)) const newMin = useRef(min) const newValue = useRef(value) @@ -180,14 +178,14 @@ function AlertContent({ data }: { data: AlertData }) { - {showSliders && ( + {checked && (
}> + {!singleDescription && (

@@ -222,8 +221,12 @@ function AlertContent({ data }: { data: AlertData }) { />

-
-

+ )} +

+

+ {singleDescription && ( + <>{singleDescription}{` `} + )} For {min}{" "} diff --git a/beszel/site/src/components/routes/system.tsx b/beszel/site/src/components/routes/system.tsx index 2c6545f..2735a78 100644 --- a/beszel/site/src/components/routes/system.tsx +++ b/beszel/site/src/components/routes/system.tsx @@ -302,6 +302,13 @@ export default function SystemDetail({ name }: { name: string }) { const hasGpuData = lastGpuVals.length > 0 const hasGpuPowerData = lastGpuVals.some((gpu) => gpu.p !== undefined) + let translatedStatus: string = system.status + if (system.status === "up") { + translatedStatus = t({ message: "Up", comment: "Context: System is up" }) + } else if (system.status === "down") { + translatedStatus = t({ message: "Down", comment: "Context: System is down" }) + } + return ( <>

@@ -328,7 +335,7 @@ export default function SystemDetail({ name }: { name: string }) { })} > - {system.status} + {translatedStatus}
{systemInfo.map(({ value, label, Icon, hide }, i) => { if (hide || !value) { diff --git a/beszel/site/src/lib/utils.ts b/beszel/site/src/lib/utils.ts index f060227..1d716a4 100644 --- a/beszel/site/src/lib/utils.ts +++ b/beszel/site/src/lib/utils.ts @@ -302,7 +302,8 @@ export const alertInfo: Record = { unit: "", icon: ServerIcon, desc: () => t`Triggers when status switches between up and down`, - single: true, + /** "for x minutes" is appended to desc when only one value */ + singleDesc: () => t`System` + " " + t`Down`, }, CPU: { name: () => t`CPU Usage`, diff --git a/beszel/site/src/types.d.ts b/beszel/site/src/types.d.ts index 26f7795..48bb67b 100644 --- a/beszel/site/src/types.d.ts +++ b/beszel/site/src/types.d.ts @@ -200,6 +200,7 @@ interface AlertInfo { unit: string icon: any desc: () => string - single?: boolean max?: number + /** Single value description (when there's only one value, like status) */ + singleDesc?: () => string }