add gpu power consumption chart

This commit is contained in:
Henry Dollman
2024-11-08 20:31:22 -05:00
parent bcb7de1b9a
commit 03de73560c
4 changed files with 144 additions and 21 deletions

View File

@@ -78,7 +78,7 @@ func (a *Agent) Run(pubKey []byte, addr string) {
// initialize GPU manager
if os.Getenv("GPU") == "true" {
if gm, err := NewGPUManager(); err != nil {
slog.Error("GPU manager", "err", err)
slog.Warn("GPU", "err", err)
} else {
a.gpuManager = gm
}

View File

@@ -93,9 +93,9 @@ func (gm *GPUManager) parseNvidiaData(output []byte) {
}
// update gpu data
gpu := gm.GpuDataMap[id]
gpu.Temperature += temp
gpu.MemoryUsed += memoryUsage / 1.024
gpu.MemoryTotal += totalMemory / 1.024
gpu.Temperature = temp
gpu.MemoryUsed = memoryUsage / 1.024
gpu.MemoryTotal = totalMemory / 1.024
gpu.Usage += usage
gpu.Power += power
gpu.Count++
@@ -120,7 +120,7 @@ func (gm *GPUManager) startAmdCollector() {
// collectAmdStats runs rocm-smi in a loop and passes the output to parseAmdData
func (gm *GPUManager) collectAmdStats() error {
cmd := exec.Command("/bin/sh", "-c", "while true; do rocm-smi --showid --showtemp --showuse --showpower --showproductname --showmeminfo vram --json; sleep 4.7; done")
cmd := exec.Command("/bin/sh", "-c", "while true; do rocm-smi --showid --showtemp --showuse --showpower --showproductname --showmeminfo vram --json; sleep 3.7; done")
// Set up a pipe to capture stdout
stdout, err := cmd.StdoutPipe()
if err != nil {
@@ -168,9 +168,9 @@ func (gm *GPUManager) parseAmdData(rocmSmiInfo *map[string]RocmSmiJson) {
gm.GpuDataMap[v.ID] = &system.GPUData{Name: v.Name}
}
gpu := gm.GpuDataMap[v.ID]
gpu.Temperature += temp
gpu.MemoryUsed += memoryUsage
gpu.MemoryTotal += totalMemory
gpu.Temperature = temp
gpu.MemoryUsed = memoryUsage
gpu.MemoryTotal = totalMemory
gpu.Usage += usage
gpu.Power += power
gpu.Count++
@@ -185,19 +185,14 @@ func (gm *GPUManager) GetCurrentData() map[string]system.GPUData {
gpuData := make(map[string]system.GPUData, len(gm.GpuDataMap))
for id, gpu := range gm.GpuDataMap {
// sum the data
gpu.Temperature = twoDecimals(gpu.Temperature / gpu.Count)
gpu.MemoryUsed = twoDecimals(gpu.MemoryUsed / gpu.Count)
gpu.MemoryTotal = twoDecimals(gpu.MemoryTotal / gpu.Count)
gpu.Temperature = twoDecimals(gpu.Temperature)
gpu.MemoryUsed = twoDecimals(gpu.MemoryUsed)
gpu.MemoryTotal = twoDecimals(gpu.MemoryTotal)
gpu.Usage = twoDecimals(gpu.Usage / gpu.Count)
gpu.Power = twoDecimals(gpu.Power / gpu.Count)
gpuData[id] = *gpu
// reset the data
gpu.Temperature = 0
gpu.MemoryUsed = 0
gpu.MemoryTotal = 0
gpu.Usage = 0
gpu.Power = 0
gpu.Count = 0
// reset the count
gpu.Count = 1
}
return gpuData
}

View File

@@ -0,0 +1,112 @@
import { CartesianGrid, Line, LineChart, YAxis } from "recharts"
import {
ChartContainer,
ChartLegend,
ChartLegendContent,
ChartTooltip,
ChartTooltipContent,
xAxis,
} from "@/components/ui/chart"
import {
useYAxisWidth,
cn,
formatShortDate,
toFixedWithoutTrailingZeros,
decimalString,
chartMargin,
} from "@/lib/utils"
import { ChartData } from "@/types"
import { memo, useMemo } from "react"
export default memo(function GpuPowerChart({ chartData }: { chartData: ChartData }) {
const { yAxisWidth, updateYAxisWidth } = useYAxisWidth()
if (chartData.systemStats.length === 0) {
return null
}
/** Format temperature data for chart and assign colors */
const newChartData = useMemo(() => {
const newChartData = { data: [], colors: {} } as {
data: Record<string, number | string>[]
colors: Record<string, string>
}
const powerSums = {} as Record<string, number>
for (let data of chartData.systemStats) {
let newData = { created: data.created } as Record<string, number | string>
for (let gpu of Object.values(data.stats?.g ?? {})) {
if (gpu.p) {
const name = gpu.n
newData[name] = gpu.p
powerSums[name] = (powerSums[name] ?? 0) + newData[name]
}
}
newChartData.data.push(newData)
}
const keys = Object.keys(powerSums).sort((a, b) => powerSums[b] - powerSums[a])
for (let key of keys) {
newChartData.colors[key] = `hsl(${((keys.indexOf(key) * 360) / keys.length) % 360}, 60%, 55%)`
}
return newChartData
}, [chartData])
const colors = Object.keys(newChartData.colors)
// console.log('rendered at', new Date())
return (
<div>
<ChartContainer
className={cn("h-full w-full absolute aspect-auto bg-card opacity-0 transition-opacity", {
"opacity-100": yAxisWidth,
})}
>
<LineChart accessibilityLayer data={newChartData.data} margin={chartMargin}>
<CartesianGrid vertical={false} />
<YAxis
direction="ltr"
orientation={chartData.orientation}
className="tracking-tighter"
domain={[0, "auto"]}
width={yAxisWidth}
tickFormatter={(value) => {
const val = toFixedWithoutTrailingZeros(value, 2)
return updateYAxisWidth(val + "W")
}}
tickLine={false}
axisLine={false}
/>
{xAxis(chartData)}
<ChartTooltip
animationEasing="ease-out"
animationDuration={150}
// @ts-ignore
itemSorter={(a, b) => b.value - a.value}
content={
<ChartTooltipContent
labelFormatter={(_, data) => formatShortDate(data[0].payload.created)}
contentFormatter={(item) => decimalString(item.value) + "W"}
// indicator="line"
/>
}
/>
{colors.map((key) => (
<Line
key={key}
dataKey={key}
name={key}
type="monotoneX"
dot={false}
strokeWidth={1.5}
stroke={newChartData.colors[key]}
isAnimationActive={false}
/>
))}
{colors.length < 12 && <ChartLegend content={<ChartLegendContent />} />}
</LineChart>
</ChartContainer>
</div>
)
})

View File

@@ -24,6 +24,7 @@ const MemChart = lazy(() => import("../charts/mem-chart"))
const DiskChart = lazy(() => import("../charts/disk-chart"))
const SwapChart = lazy(() => import("../charts/swap-chart"))
const TemperatureChart = lazy(() => import("../charts/temperature-chart"))
const GpuPowerChart = lazy(() => import("../charts/gpu-power-chart"))
const cache = new Map<string, any>()
@@ -285,6 +286,7 @@ export default function SystemDetail({ name }: { name: string }) {
// if no data, show empty message
const dataEmpty = !chartLoading && chartData.systemStats.length === 0
const hasGpuData = Object.keys(systemStats.at(-1)?.stats.g ?? {}).length > 0
return (
<>
@@ -455,6 +457,7 @@ export default function SystemDetail({ name }: { name: string }) {
</div>
)}
{/* Swap chart */}
{(systemStats.at(-1)?.stats.su ?? 0) > 0 && (
<ChartCard
empty={dataEmpty}
@@ -466,6 +469,7 @@ export default function SystemDetail({ name }: { name: string }) {
</ChartCard>
)}
{/* Temperature chart */}
{systemStats.at(-1)?.stats.t && (
<ChartCard
empty={dataEmpty}
@@ -476,10 +480,22 @@ export default function SystemDetail({ name }: { name: string }) {
<TemperatureChart chartData={chartData} />
</ChartCard>
)}
{/* GPU power draw chart */}
{hasGpuData && (
<ChartCard
empty={dataEmpty}
grid={grid}
title="GPU Power Draw"
description="Average power consumption of GPUs"
>
<GpuPowerChart chartData={chartData} />
</ChartCard>
)}
</div>
{/* GPU charts */}
{Object.keys(systemStats.at(-1)?.stats.g ?? {}).length > 0 && (
{hasGpuData && (
<div className="grid xl:grid-cols-2 gap-4">
{Object.keys(systemStats.at(-1)?.stats.g ?? {}).map((id) => {
const gpu = systemStats.at(-1)?.stats.g?.[id] as GPUData
@@ -489,7 +505,7 @@ export default function SystemDetail({ name }: { name: string }) {
empty={dataEmpty}
grid={grid}
title={`${gpu.n} ${t`Usage`}`}
description={t`Total utilization of ${gpu.n}`}
description={`Average utilization of ${gpu.n}`}
>
<AreaChartDefault chartData={chartData} chartName={`g.${id}.u`} unit="%" />
</ChartCard>
@@ -497,7 +513,7 @@ export default function SystemDetail({ name }: { name: string }) {
empty={dataEmpty}
grid={grid}
title={`${gpu.n} VRAM`}
description={t`VRAM usage of ${gpu.n}`}
description={t`Precise utilization at the recorded time`}
>
<AreaChartDefault
chartData={chartData}