add gpu power consumption chart

This commit is contained in:
Henry Dollman
2024-11-08 20:31:22 -05:00
parent bcb7de1b9a
commit 03de73560c
4 changed files with 144 additions and 21 deletions

View File

@@ -78,7 +78,7 @@ func (a *Agent) Run(pubKey []byte, addr string) {
// initialize GPU manager // initialize GPU manager
if os.Getenv("GPU") == "true" { if os.Getenv("GPU") == "true" {
if gm, err := NewGPUManager(); err != nil { if gm, err := NewGPUManager(); err != nil {
slog.Error("GPU manager", "err", err) slog.Warn("GPU", "err", err)
} else { } else {
a.gpuManager = gm a.gpuManager = gm
} }

View File

@@ -93,9 +93,9 @@ func (gm *GPUManager) parseNvidiaData(output []byte) {
} }
// update gpu data // update gpu data
gpu := gm.GpuDataMap[id] gpu := gm.GpuDataMap[id]
gpu.Temperature += temp gpu.Temperature = temp
gpu.MemoryUsed += memoryUsage / 1.024 gpu.MemoryUsed = memoryUsage / 1.024
gpu.MemoryTotal += totalMemory / 1.024 gpu.MemoryTotal = totalMemory / 1.024
gpu.Usage += usage gpu.Usage += usage
gpu.Power += power gpu.Power += power
gpu.Count++ gpu.Count++
@@ -120,7 +120,7 @@ func (gm *GPUManager) startAmdCollector() {
// collectAmdStats runs rocm-smi in a loop and passes the output to parseAmdData // collectAmdStats runs rocm-smi in a loop and passes the output to parseAmdData
func (gm *GPUManager) collectAmdStats() error { func (gm *GPUManager) collectAmdStats() error {
cmd := exec.Command("/bin/sh", "-c", "while true; do rocm-smi --showid --showtemp --showuse --showpower --showproductname --showmeminfo vram --json; sleep 4.7; done") cmd := exec.Command("/bin/sh", "-c", "while true; do rocm-smi --showid --showtemp --showuse --showpower --showproductname --showmeminfo vram --json; sleep 3.7; done")
// Set up a pipe to capture stdout // Set up a pipe to capture stdout
stdout, err := cmd.StdoutPipe() stdout, err := cmd.StdoutPipe()
if err != nil { if err != nil {
@@ -168,9 +168,9 @@ func (gm *GPUManager) parseAmdData(rocmSmiInfo *map[string]RocmSmiJson) {
gm.GpuDataMap[v.ID] = &system.GPUData{Name: v.Name} gm.GpuDataMap[v.ID] = &system.GPUData{Name: v.Name}
} }
gpu := gm.GpuDataMap[v.ID] gpu := gm.GpuDataMap[v.ID]
gpu.Temperature += temp gpu.Temperature = temp
gpu.MemoryUsed += memoryUsage gpu.MemoryUsed = memoryUsage
gpu.MemoryTotal += totalMemory gpu.MemoryTotal = totalMemory
gpu.Usage += usage gpu.Usage += usage
gpu.Power += power gpu.Power += power
gpu.Count++ gpu.Count++
@@ -185,19 +185,14 @@ func (gm *GPUManager) GetCurrentData() map[string]system.GPUData {
gpuData := make(map[string]system.GPUData, len(gm.GpuDataMap)) gpuData := make(map[string]system.GPUData, len(gm.GpuDataMap))
for id, gpu := range gm.GpuDataMap { for id, gpu := range gm.GpuDataMap {
// sum the data // sum the data
gpu.Temperature = twoDecimals(gpu.Temperature / gpu.Count) gpu.Temperature = twoDecimals(gpu.Temperature)
gpu.MemoryUsed = twoDecimals(gpu.MemoryUsed / gpu.Count) gpu.MemoryUsed = twoDecimals(gpu.MemoryUsed)
gpu.MemoryTotal = twoDecimals(gpu.MemoryTotal / gpu.Count) gpu.MemoryTotal = twoDecimals(gpu.MemoryTotal)
gpu.Usage = twoDecimals(gpu.Usage / gpu.Count) gpu.Usage = twoDecimals(gpu.Usage / gpu.Count)
gpu.Power = twoDecimals(gpu.Power / gpu.Count) gpu.Power = twoDecimals(gpu.Power / gpu.Count)
gpuData[id] = *gpu gpuData[id] = *gpu
// reset the data // reset the count
gpu.Temperature = 0 gpu.Count = 1
gpu.MemoryUsed = 0
gpu.MemoryTotal = 0
gpu.Usage = 0
gpu.Power = 0
gpu.Count = 0
} }
return gpuData return gpuData
} }

View File

@@ -0,0 +1,112 @@
import { CartesianGrid, Line, LineChart, YAxis } from "recharts"
import {
ChartContainer,
ChartLegend,
ChartLegendContent,
ChartTooltip,
ChartTooltipContent,
xAxis,
} from "@/components/ui/chart"
import {
useYAxisWidth,
cn,
formatShortDate,
toFixedWithoutTrailingZeros,
decimalString,
chartMargin,
} from "@/lib/utils"
import { ChartData } from "@/types"
import { memo, useMemo } from "react"
export default memo(function GpuPowerChart({ chartData }: { chartData: ChartData }) {
const { yAxisWidth, updateYAxisWidth } = useYAxisWidth()
if (chartData.systemStats.length === 0) {
return null
}
/** Format temperature data for chart and assign colors */
const newChartData = useMemo(() => {
const newChartData = { data: [], colors: {} } as {
data: Record<string, number | string>[]
colors: Record<string, string>
}
const powerSums = {} as Record<string, number>
for (let data of chartData.systemStats) {
let newData = { created: data.created } as Record<string, number | string>
for (let gpu of Object.values(data.stats?.g ?? {})) {
if (gpu.p) {
const name = gpu.n
newData[name] = gpu.p
powerSums[name] = (powerSums[name] ?? 0) + newData[name]
}
}
newChartData.data.push(newData)
}
const keys = Object.keys(powerSums).sort((a, b) => powerSums[b] - powerSums[a])
for (let key of keys) {
newChartData.colors[key] = `hsl(${((keys.indexOf(key) * 360) / keys.length) % 360}, 60%, 55%)`
}
return newChartData
}, [chartData])
const colors = Object.keys(newChartData.colors)
// console.log('rendered at', new Date())
return (
<div>
<ChartContainer
className={cn("h-full w-full absolute aspect-auto bg-card opacity-0 transition-opacity", {
"opacity-100": yAxisWidth,
})}
>
<LineChart accessibilityLayer data={newChartData.data} margin={chartMargin}>
<CartesianGrid vertical={false} />
<YAxis
direction="ltr"
orientation={chartData.orientation}
className="tracking-tighter"
domain={[0, "auto"]}
width={yAxisWidth}
tickFormatter={(value) => {
const val = toFixedWithoutTrailingZeros(value, 2)
return updateYAxisWidth(val + "W")
}}
tickLine={false}
axisLine={false}
/>
{xAxis(chartData)}
<ChartTooltip
animationEasing="ease-out"
animationDuration={150}
// @ts-ignore
itemSorter={(a, b) => b.value - a.value}
content={
<ChartTooltipContent
labelFormatter={(_, data) => formatShortDate(data[0].payload.created)}
contentFormatter={(item) => decimalString(item.value) + "W"}
// indicator="line"
/>
}
/>
{colors.map((key) => (
<Line
key={key}
dataKey={key}
name={key}
type="monotoneX"
dot={false}
strokeWidth={1.5}
stroke={newChartData.colors[key]}
isAnimationActive={false}
/>
))}
{colors.length < 12 && <ChartLegend content={<ChartLegendContent />} />}
</LineChart>
</ChartContainer>
</div>
)
})

View File

@@ -24,6 +24,7 @@ const MemChart = lazy(() => import("../charts/mem-chart"))
const DiskChart = lazy(() => import("../charts/disk-chart")) const DiskChart = lazy(() => import("../charts/disk-chart"))
const SwapChart = lazy(() => import("../charts/swap-chart")) const SwapChart = lazy(() => import("../charts/swap-chart"))
const TemperatureChart = lazy(() => import("../charts/temperature-chart")) const TemperatureChart = lazy(() => import("../charts/temperature-chart"))
const GpuPowerChart = lazy(() => import("../charts/gpu-power-chart"))
const cache = new Map<string, any>() const cache = new Map<string, any>()
@@ -285,6 +286,7 @@ export default function SystemDetail({ name }: { name: string }) {
// if no data, show empty message // if no data, show empty message
const dataEmpty = !chartLoading && chartData.systemStats.length === 0 const dataEmpty = !chartLoading && chartData.systemStats.length === 0
const hasGpuData = Object.keys(systemStats.at(-1)?.stats.g ?? {}).length > 0
return ( return (
<> <>
@@ -455,6 +457,7 @@ export default function SystemDetail({ name }: { name: string }) {
</div> </div>
)} )}
{/* Swap chart */}
{(systemStats.at(-1)?.stats.su ?? 0) > 0 && ( {(systemStats.at(-1)?.stats.su ?? 0) > 0 && (
<ChartCard <ChartCard
empty={dataEmpty} empty={dataEmpty}
@@ -466,6 +469,7 @@ export default function SystemDetail({ name }: { name: string }) {
</ChartCard> </ChartCard>
)} )}
{/* Temperature chart */}
{systemStats.at(-1)?.stats.t && ( {systemStats.at(-1)?.stats.t && (
<ChartCard <ChartCard
empty={dataEmpty} empty={dataEmpty}
@@ -476,10 +480,22 @@ export default function SystemDetail({ name }: { name: string }) {
<TemperatureChart chartData={chartData} /> <TemperatureChart chartData={chartData} />
</ChartCard> </ChartCard>
)} )}
{/* GPU power draw chart */}
{hasGpuData && (
<ChartCard
empty={dataEmpty}
grid={grid}
title="GPU Power Draw"
description="Average power consumption of GPUs"
>
<GpuPowerChart chartData={chartData} />
</ChartCard>
)}
</div> </div>
{/* GPU charts */} {/* GPU charts */}
{Object.keys(systemStats.at(-1)?.stats.g ?? {}).length > 0 && ( {hasGpuData && (
<div className="grid xl:grid-cols-2 gap-4"> <div className="grid xl:grid-cols-2 gap-4">
{Object.keys(systemStats.at(-1)?.stats.g ?? {}).map((id) => { {Object.keys(systemStats.at(-1)?.stats.g ?? {}).map((id) => {
const gpu = systemStats.at(-1)?.stats.g?.[id] as GPUData const gpu = systemStats.at(-1)?.stats.g?.[id] as GPUData
@@ -489,7 +505,7 @@ export default function SystemDetail({ name }: { name: string }) {
empty={dataEmpty} empty={dataEmpty}
grid={grid} grid={grid}
title={`${gpu.n} ${t`Usage`}`} title={`${gpu.n} ${t`Usage`}`}
description={t`Total utilization of ${gpu.n}`} description={`Average utilization of ${gpu.n}`}
> >
<AreaChartDefault chartData={chartData} chartName={`g.${id}.u`} unit="%" /> <AreaChartDefault chartData={chartData} chartName={`g.${id}.u`} unit="%" />
</ChartCard> </ChartCard>
@@ -497,7 +513,7 @@ export default function SystemDetail({ name }: { name: string }) {
empty={dataEmpty} empty={dataEmpty}
grid={grid} grid={grid}
title={`${gpu.n} VRAM`} title={`${gpu.n} VRAM`}
description={t`VRAM usage of ${gpu.n}`} description={t`Precise utilization at the recorded time`}
> >
<AreaChartDefault <AreaChartDefault
chartData={chartData} chartData={chartData}