mirror of
https://github.com/fankes/beszel.git
synced 2025-10-20 02:09:28 +08:00
change disk alert to monitor usage of any disk, not only root
This commit is contained in:
@@ -4,7 +4,6 @@ package alerts
|
|||||||
import (
|
import (
|
||||||
"beszel/internal/entities/system"
|
"beszel/internal/entities/system"
|
||||||
"fmt"
|
"fmt"
|
||||||
"log"
|
|
||||||
"net/mail"
|
"net/mail"
|
||||||
"net/url"
|
"net/url"
|
||||||
"time"
|
"time"
|
||||||
@@ -58,7 +57,7 @@ type SystemAlertData struct {
|
|||||||
time time.Time
|
time time.Time
|
||||||
count uint8
|
count uint8
|
||||||
min uint8
|
min uint8
|
||||||
tempSums map[string]float32
|
mapSums map[string]float32
|
||||||
descriptor string // override descriptor in notification body (for temp sensor, disk partition, etc)
|
descriptor string // override descriptor in notification body (for temp sensor, disk partition, etc)
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -68,18 +67,17 @@ func NewAlertManager(app *pocketbase.PocketBase) *AlertManager {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
func (am *AlertManager) HandleSystemAlerts(systemRecord *models.Record, systemInfo system.Info, temperatures map[string]float64) {
|
func (am *AlertManager) HandleSystemAlerts(systemRecord *models.Record, systemInfo system.Info, temperatures map[string]float64, extraFs map[string]*system.FsStats) error {
|
||||||
// start := time.Now()
|
// start := time.Now()
|
||||||
// defer func() {
|
// defer func() {
|
||||||
// log.Println("alert stats took", time.Since(start))
|
// log.Println("alert stats took", time.Since(start))
|
||||||
// }()
|
// }()
|
||||||
|
|
||||||
alertRecords, err := am.app.Dao().FindRecordsByExpr("alerts",
|
alertRecords, err := am.app.Dao().FindRecordsByExpr("alerts",
|
||||||
dbx.NewExp("system={:system}", dbx.Params{"system": systemRecord.GetId()}),
|
dbx.NewExp("system={:system}", dbx.Params{"system": systemRecord.Id}),
|
||||||
)
|
)
|
||||||
if err != nil || len(alertRecords) == 0 {
|
if err != nil || len(alertRecords) == 0 {
|
||||||
// log.Println("no alerts found for system")
|
// log.Println("no alerts found for system")
|
||||||
return
|
return nil
|
||||||
}
|
}
|
||||||
|
|
||||||
var validAlerts []SystemAlertData
|
var validAlerts []SystemAlertData
|
||||||
@@ -96,11 +94,18 @@ func (am *AlertManager) HandleSystemAlerts(systemRecord *models.Record, systemIn
|
|||||||
val = systemInfo.Cpu
|
val = systemInfo.Cpu
|
||||||
case "Memory":
|
case "Memory":
|
||||||
val = systemInfo.MemPct
|
val = systemInfo.MemPct
|
||||||
case "Disk":
|
|
||||||
val = systemInfo.DiskPct
|
|
||||||
case "Bandwidth":
|
case "Bandwidth":
|
||||||
val = systemInfo.Bandwidth
|
val = systemInfo.Bandwidth
|
||||||
unit = "MB/s"
|
unit = " MB/s"
|
||||||
|
case "Disk":
|
||||||
|
maxUsedPct := systemInfo.DiskPct
|
||||||
|
for _, fs := range extraFs {
|
||||||
|
usedPct := fs.DiskUsed / fs.DiskTotal * 100
|
||||||
|
if usedPct > maxUsedPct {
|
||||||
|
maxUsedPct = usedPct
|
||||||
|
}
|
||||||
|
}
|
||||||
|
val = maxUsedPct
|
||||||
case "Temperature":
|
case "Temperature":
|
||||||
if temperatures == nil {
|
if temperatures == nil {
|
||||||
continue
|
continue
|
||||||
@@ -126,7 +131,7 @@ func (am *AlertManager) HandleSystemAlerts(systemRecord *models.Record, systemIn
|
|||||||
|
|
||||||
min := max(1, cast.ToUint8(alertRecord.Get("min")))
|
min := max(1, cast.ToUint8(alertRecord.Get("min")))
|
||||||
// add time to alert time to make sure it's slighty after record creation
|
// add time to alert time to make sure it's slighty after record creation
|
||||||
time := now.Add(-time.Duration(min)*time.Minute + time.Second*5)
|
time := now.Add(-time.Duration(min) * time.Minute)
|
||||||
if time.Before(oldestTime) {
|
if time.Before(oldestTime) {
|
||||||
oldestTime = time
|
oldestTime = time
|
||||||
}
|
}
|
||||||
@@ -164,7 +169,7 @@ func (am *AlertManager) HandleSystemAlerts(systemRecord *models.Record, systemIn
|
|||||||
All(&systemStats)
|
All(&systemStats)
|
||||||
|
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return
|
return err
|
||||||
}
|
}
|
||||||
|
|
||||||
// get oldest record creation time from first record in the slice
|
// get oldest record creation time from first record in the slice
|
||||||
@@ -181,7 +186,7 @@ func (am *AlertManager) HandleSystemAlerts(systemRecord *models.Record, systemIn
|
|||||||
|
|
||||||
if len(validAlerts) == 0 {
|
if len(validAlerts) == 0 {
|
||||||
// log.Println("no valid alerts found")
|
// log.Println("no valid alerts found")
|
||||||
return
|
return nil
|
||||||
}
|
}
|
||||||
|
|
||||||
var stats SystemAlertStats
|
var stats SystemAlertStats
|
||||||
@@ -189,9 +194,11 @@ func (am *AlertManager) HandleSystemAlerts(systemRecord *models.Record, systemIn
|
|||||||
// we can skip the latest systemStats record since it's the current value
|
// we can skip the latest systemStats record since it's the current value
|
||||||
for i := 0; i < len(systemStats); i++ {
|
for i := 0; i < len(systemStats); i++ {
|
||||||
stat := systemStats[i]
|
stat := systemStats[i]
|
||||||
// log.Println("created", stat.Created.Time(), "now", time.Now().UTC())
|
// subtract 10 seconds to give a small time buffer
|
||||||
statTime := stat.Created.Time().Add(time.Second)
|
systemStatsCreation := stat.Created.Time().Add(-time.Second * 10)
|
||||||
json.Unmarshal(stat.Stats, &stats)
|
if err := json.Unmarshal(stat.Stats, &stats); err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
// log.Println("stats", stats)
|
// log.Println("stats", stats)
|
||||||
for j := range validAlerts {
|
for j := range validAlerts {
|
||||||
alert := &validAlerts[j]
|
alert := &validAlerts[j]
|
||||||
@@ -199,8 +206,8 @@ func (am *AlertManager) HandleSystemAlerts(systemRecord *models.Record, systemIn
|
|||||||
if i == 0 {
|
if i == 0 {
|
||||||
alert.val = 0
|
alert.val = 0
|
||||||
}
|
}
|
||||||
// continue if stat is older than alert time range
|
// continue if system_stats is older than alert time range
|
||||||
if statTime.Before(alert.time) {
|
if systemStatsCreation.Before(alert.time) {
|
||||||
continue
|
continue
|
||||||
}
|
}
|
||||||
// add to alert value
|
// add to alert value
|
||||||
@@ -212,17 +219,30 @@ func (am *AlertManager) HandleSystemAlerts(systemRecord *models.Record, systemIn
|
|||||||
case "Bandwidth":
|
case "Bandwidth":
|
||||||
alert.val += stats.NetSent + stats.NetRecv
|
alert.val += stats.NetSent + stats.NetRecv
|
||||||
case "Disk":
|
case "Disk":
|
||||||
// todo: check all disks instead of just root
|
if alert.mapSums == nil {
|
||||||
alert.val += stats.Disk
|
alert.mapSums = make(map[string]float32, len(extraFs)+1)
|
||||||
case "Temperature":
|
|
||||||
if alert.tempSums == nil {
|
|
||||||
alert.tempSums = make(map[string]float32, len(stats.Temperatures))
|
|
||||||
}
|
}
|
||||||
for key, value := range stats.Temperatures {
|
// add root disk
|
||||||
if _, ok := alert.tempSums[key]; !ok {
|
if _, ok := alert.mapSums["root"]; !ok {
|
||||||
alert.tempSums[key] = float32(0)
|
alert.mapSums["root"] = 0.0
|
||||||
|
}
|
||||||
|
alert.mapSums["root"] += float32(stats.Disk)
|
||||||
|
// add extra disks
|
||||||
|
for key, fs := range extraFs {
|
||||||
|
if _, ok := alert.mapSums[key]; !ok {
|
||||||
|
alert.mapSums[key] = 0.0
|
||||||
}
|
}
|
||||||
alert.tempSums[key] += value
|
alert.mapSums[key] += float32(fs.DiskUsed / fs.DiskTotal * 100)
|
||||||
|
}
|
||||||
|
case "Temperature":
|
||||||
|
if alert.mapSums == nil {
|
||||||
|
alert.mapSums = make(map[string]float32, len(stats.Temperatures))
|
||||||
|
}
|
||||||
|
for key, temp := range stats.Temperatures {
|
||||||
|
if _, ok := alert.mapSums[key]; !ok {
|
||||||
|
alert.mapSums[key] = float32(0)
|
||||||
|
}
|
||||||
|
alert.mapSums[key] += temp
|
||||||
}
|
}
|
||||||
default:
|
default:
|
||||||
continue
|
continue
|
||||||
@@ -233,13 +253,23 @@ func (am *AlertManager) HandleSystemAlerts(systemRecord *models.Record, systemIn
|
|||||||
// sum up vals for each alert
|
// sum up vals for each alert
|
||||||
for _, alert := range validAlerts {
|
for _, alert := range validAlerts {
|
||||||
switch alert.name {
|
switch alert.name {
|
||||||
|
case "Disk":
|
||||||
|
maxPct := float32(0)
|
||||||
|
for key, value := range alert.mapSums {
|
||||||
|
sumPct := float32(value)
|
||||||
|
if sumPct > maxPct {
|
||||||
|
maxPct = sumPct
|
||||||
|
alert.descriptor = fmt.Sprintf("Usage of %s", key)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
alert.val = float64(maxPct / float32(alert.count))
|
||||||
case "Temperature":
|
case "Temperature":
|
||||||
maxTemp := float32(0)
|
maxTemp := float32(0)
|
||||||
for key, value := range alert.tempSums {
|
for key, value := range alert.mapSums {
|
||||||
sumTemp := float32(value) / float32(alert.count)
|
sumTemp := float32(value) / float32(alert.count)
|
||||||
if sumTemp > maxTemp {
|
if sumTemp > maxTemp {
|
||||||
maxTemp = sumTemp
|
maxTemp = sumTemp
|
||||||
alert.descriptor = fmt.Sprintf("Hottest sensor %s", key)
|
alert.descriptor = fmt.Sprintf("Highest sensor %s", key)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
alert.val = float64(maxTemp)
|
alert.val = float64(maxTemp)
|
||||||
@@ -260,10 +290,11 @@ func (am *AlertManager) HandleSystemAlerts(systemRecord *models.Record, systemIn
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
return nil
|
||||||
}
|
}
|
||||||
|
|
||||||
func (am *AlertManager) sendSystemAlert(alert SystemAlertData) {
|
func (am *AlertManager) sendSystemAlert(alert SystemAlertData) {
|
||||||
log.Printf("Sending alert %s: val %f | count %d | threshold %f\n", alert.name, alert.val, alert.count, alert.threshold)
|
// log.Printf("Sending alert %s: val %f | count %d | threshold %f\n", alert.name, alert.val, alert.count, alert.threshold)
|
||||||
|
|
||||||
systemName := alert.systemRecord.GetString("name")
|
systemName := alert.systemRecord.GetString("name")
|
||||||
|
|
||||||
|
@@ -315,7 +315,9 @@ func (h *Hub) updateSystem(record *models.Record) {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
// system info alerts (todo: extra fs alerts)
|
// system info alerts (todo: extra fs alerts)
|
||||||
h.am.HandleSystemAlerts(record, systemData.Info, systemData.Stats.Temperatures)
|
if err := h.am.HandleSystemAlerts(record, systemData.Info, systemData.Stats.Temperatures, systemData.Stats.ExtraFs); err != nil {
|
||||||
|
h.app.Logger().Error("System alerts error", "err", err.Error())
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// set system to specified status and save record
|
// set system to specified status and save record
|
||||||
|
@@ -54,7 +54,7 @@ export const updateSystemList = async () => {
|
|||||||
|
|
||||||
export const updateAlerts = () => {
|
export const updateAlerts = () => {
|
||||||
pb.collection('alerts')
|
pb.collection('alerts')
|
||||||
.getFullList<AlertRecord>({ fields: 'id,name,system,value,min,triggered' })
|
.getFullList<AlertRecord>({ fields: 'id,name,system,value,min,triggered', sort: 'updated' })
|
||||||
.then((records) => {
|
.then((records) => {
|
||||||
$alerts.set(records)
|
$alerts.set(records)
|
||||||
})
|
})
|
||||||
@@ -315,7 +315,7 @@ export const alertInfo = {
|
|||||||
name: 'Disk usage',
|
name: 'Disk usage',
|
||||||
unit: '%',
|
unit: '%',
|
||||||
icon: HardDriveIcon,
|
icon: HardDriveIcon,
|
||||||
desc: 'Triggers when root usage exceeds a threshold.',
|
desc: 'Triggers when usage of any disk exceeds a threshold.',
|
||||||
},
|
},
|
||||||
Bandwidth: {
|
Bandwidth: {
|
||||||
name: 'Bandwidth',
|
name: 'Bandwidth',
|
||||||
|
Reference in New Issue
Block a user