mirror of
https://github.com/memohai/Memoh.git
synced 2026-04-25 07:00:48 +09:00
feat(container): add current container metrics view
Expose a dedicated container metrics endpoint and surface current CPU, memory, and root filesystem usage in the bot container view. This gives operators a quick health snapshot while degrading cleanly on unsupported backends.
This commit is contained in:
@@ -869,6 +869,21 @@
|
||||
"snapshotLoadFailed": "Failed to load snapshots",
|
||||
"snapshotNamePlaceholder": "Snapshot display name (optional)",
|
||||
"snapshotNameHint": "This field is only for the user-visible display name. The internal snapshot name is generated automatically.",
|
||||
"metricsTitle": "Resource Status",
|
||||
"metricsSubtitle": "View CPU, memory, and storage usage for the container's entire filesystem.",
|
||||
"metricsLoadFailed": "Failed to load container resource status",
|
||||
"metricsUnsupported": "The current container backend does not support resource monitoring.",
|
||||
"metricsUnavailable": "No resource metrics available.",
|
||||
"metricsStopped": "The container task is not running; CPU and memory metrics are unavailable. Storage information will still be shown if available.",
|
||||
"metricsPath": "Scope",
|
||||
"metricsUnlimited": "No memory limit configured",
|
||||
"currentSample": "Current sample",
|
||||
"sampledAt": "Sampled at",
|
||||
"metricsLabels": {
|
||||
"cpu": "CPU",
|
||||
"memory": "Memory",
|
||||
"storage": "Storage"
|
||||
},
|
||||
"dataTitle": "Data Operations",
|
||||
"dataSubtitle": "Independently manage import, export, and restore for the container `/data` directory.",
|
||||
"deleteTitle": "Delete Container",
|
||||
|
||||
@@ -865,6 +865,21 @@
|
||||
"snapshotLoadFailed": "加载快照失败",
|
||||
"snapshotNamePlaceholder": "快照显示名称(可选)",
|
||||
"snapshotNameHint": "这里只填写用户可见的显示名称,系统会自动生成内部快照名。",
|
||||
"metricsTitle": "资源状态",
|
||||
"metricsSubtitle": "查看当前容器的 CPU、内存与整个容器文件系统的存储使用情况。",
|
||||
"metricsLoadFailed": "加载容器资源状态失败",
|
||||
"metricsUnsupported": "当前容器后端暂不支持资源监控。",
|
||||
"metricsUnavailable": "当前暂无可用的资源指标。",
|
||||
"metricsStopped": "容器任务未运行,CPU 和内存指标暂不可用;如有存储信息仍会继续显示。",
|
||||
"metricsPath": "统计范围",
|
||||
"metricsUnlimited": "未配置内存限制",
|
||||
"currentSample": "当前采样",
|
||||
"sampledAt": "采样时间",
|
||||
"metricsLabels": {
|
||||
"cpu": "CPU",
|
||||
"memory": "内存",
|
||||
"storage": "存储"
|
||||
},
|
||||
"dataTitle": "数据操作",
|
||||
"dataSubtitle": "独立管理容器 `/data` 目录的导入、导出与恢复。",
|
||||
"deleteTitle": "删除容器",
|
||||
|
||||
@@ -8,6 +8,7 @@ import { ChevronRight } from 'lucide-vue-next'
|
||||
import {
|
||||
deleteBotsByBotIdContainer,
|
||||
getBotsByBotIdContainer,
|
||||
getBotsByBotIdContainerMetrics,
|
||||
getBotsByBotIdContainerSnapshots,
|
||||
getBotsById,
|
||||
postBotsByBotIdContainerDataExport,
|
||||
@@ -18,6 +19,7 @@ import {
|
||||
postBotsByBotIdContainerStart,
|
||||
postBotsByBotIdContainerStop,
|
||||
type HandlersCreateContainerRequest,
|
||||
type HandlersGetContainerMetricsResponse,
|
||||
type HandlersGetContainerResponse,
|
||||
type HandlersListSnapshotsResponse,
|
||||
} from '@memohai/sdk'
|
||||
@@ -29,6 +31,7 @@ import {
|
||||
import { Button, Collapsible, CollapsibleContent, CollapsibleTrigger, Input, Label, Separator, Spinner, Switch, Textarea } from '@memohai/ui'
|
||||
import ConfirmPopover from '@/components/confirm-popover/index.vue'
|
||||
import ContainerCreateProgress from './container-create-progress.vue'
|
||||
import ContainerMetricsPanel from './container-metrics-panel.vue'
|
||||
import { useSyncedQueryParam } from '@/composables/useSyncedQueryParam'
|
||||
import { useBotStatusMeta } from '@/composables/useBotStatusMeta'
|
||||
import { useCapabilitiesStore } from '@/store/capabilities'
|
||||
@@ -92,11 +95,14 @@ const botId = computed(() => route.params.botId as string)
|
||||
const containerBusy = computed(() => containerLoading.value || containerAction.value !== '')
|
||||
|
||||
type BotContainerInfo = HandlersGetContainerResponse
|
||||
type BotContainerMetrics = HandlersGetContainerMetricsResponse
|
||||
type BotContainerSnapshot = HandlersListSnapshotsResponse extends { snapshots?: (infer T)[] } ? T : never
|
||||
|
||||
const containerInfo = ref<BotContainerInfo | null>(null)
|
||||
const containerMetrics = ref<BotContainerMetrics | null>(null)
|
||||
const containerMissing = ref(false)
|
||||
const snapshots = ref<BotContainerSnapshot[]>([])
|
||||
const metricsLoading = ref(false)
|
||||
const snapshotsLoading = ref(false)
|
||||
|
||||
function resolveErrorMessage(error: unknown, fallback: string): string {
|
||||
@@ -134,6 +140,7 @@ async function loadContainerData(showLoadingToast: boolean) {
|
||||
if (result.error !== undefined) {
|
||||
if (result.response.status === 404) {
|
||||
containerInfo.value = null
|
||||
containerMetrics.value = null
|
||||
containerMissing.value = true
|
||||
snapshots.value = []
|
||||
return
|
||||
@@ -144,10 +151,13 @@ async function loadContainerData(showLoadingToast: boolean) {
|
||||
containerInfo.value = result.data
|
||||
containerMissing.value = false
|
||||
|
||||
const metricsPromise = loadContainerMetrics(showLoadingToast)
|
||||
|
||||
if (capabilitiesStore.snapshotSupported) {
|
||||
await loadSnapshots()
|
||||
await Promise.all([metricsPromise, loadSnapshots()])
|
||||
} else {
|
||||
snapshots.value = []
|
||||
await metricsPromise
|
||||
}
|
||||
} catch (error) {
|
||||
if (showLoadingToast) {
|
||||
@@ -158,6 +168,24 @@ async function loadContainerData(showLoadingToast: boolean) {
|
||||
}
|
||||
}
|
||||
|
||||
async function loadContainerMetrics(showLoadingToast: boolean) {
|
||||
metricsLoading.value = true
|
||||
try {
|
||||
const { data } = await getBotsByBotIdContainerMetrics({
|
||||
path: { bot_id: botId.value },
|
||||
throwOnError: true,
|
||||
})
|
||||
containerMetrics.value = data
|
||||
} catch (error) {
|
||||
containerMetrics.value = null
|
||||
if (showLoadingToast) {
|
||||
toast.error(resolveErrorMessage(error, t('bots.container.metricsLoadFailed')))
|
||||
}
|
||||
} finally {
|
||||
metricsLoading.value = false
|
||||
}
|
||||
}
|
||||
|
||||
async function loadSnapshots() {
|
||||
if (!containerInfo.value || !capabilitiesStore.snapshotSupported) {
|
||||
snapshots.value = []
|
||||
@@ -411,6 +439,7 @@ async function handleDeleteContainer(preserveData: boolean) {
|
||||
throwOnError: true,
|
||||
})
|
||||
containerInfo.value = null
|
||||
containerMetrics.value = null
|
||||
containerMissing.value = true
|
||||
snapshots.value = []
|
||||
createRestoreData.value = preserveData
|
||||
@@ -958,6 +987,12 @@ watch([activeTab, botId], ([tab]) => {
|
||||
</dl>
|
||||
</div>
|
||||
|
||||
<ContainerMetricsPanel
|
||||
:backend="capabilitiesStore.containerBackend"
|
||||
:loading="metricsLoading"
|
||||
:metrics="containerMetrics"
|
||||
/>
|
||||
|
||||
<div class="rounded-md border px-3 py-2 text-xs text-muted-foreground">
|
||||
{{ $t('bots.container.gpuRecreateHint') }}
|
||||
</div>
|
||||
|
||||
@@ -0,0 +1,159 @@
|
||||
<template>
|
||||
<div class="space-y-4 rounded-md border p-4">
|
||||
<div class="space-y-1">
|
||||
<h4 class="text-xs font-medium">
|
||||
{{ t('bots.container.metricsTitle') }}
|
||||
</h4>
|
||||
<p class="text-xs text-muted-foreground">
|
||||
{{ t('bots.container.metricsSubtitle') }}
|
||||
</p>
|
||||
</div>
|
||||
|
||||
<div
|
||||
v-if="loading && !metrics"
|
||||
class="flex items-center gap-2 text-xs text-muted-foreground"
|
||||
>
|
||||
<Spinner />
|
||||
<span>{{ t('common.loading') }}</span>
|
||||
</div>
|
||||
|
||||
<div
|
||||
v-else-if="backendUnsupported"
|
||||
class="rounded-md border border-dashed px-3 py-2 text-xs text-muted-foreground"
|
||||
>
|
||||
{{ t('bots.container.metricsUnsupported') }}
|
||||
</div>
|
||||
|
||||
<div
|
||||
v-else-if="!hasAnyMetric"
|
||||
class="rounded-md border border-dashed px-3 py-2 text-xs text-muted-foreground"
|
||||
>
|
||||
{{ taskRunning === false ? t('bots.container.metricsStopped') : t('bots.container.metricsUnavailable') }}
|
||||
</div>
|
||||
|
||||
<template v-else>
|
||||
<div
|
||||
v-if="taskRunning === false"
|
||||
class="rounded-md border border-primary/20 bg-primary/5 px-3 py-2 text-xs"
|
||||
>
|
||||
{{ t('bots.container.metricsStopped') }}
|
||||
</div>
|
||||
|
||||
<div class="grid gap-3 md:grid-cols-3">
|
||||
<div class="rounded-md border bg-background/70 p-3">
|
||||
<p class="text-xs text-muted-foreground">
|
||||
{{ t('bots.container.metricsLabels.cpu') }}
|
||||
</p>
|
||||
<p class="mt-2 text-2xl font-semibold">
|
||||
{{ cpuValueText }}
|
||||
</p>
|
||||
<p class="mt-2 text-[11px] text-muted-foreground">
|
||||
{{ t('bots.container.currentSample') }}
|
||||
</p>
|
||||
</div>
|
||||
|
||||
<div class="rounded-md border bg-background/70 p-3">
|
||||
<p class="text-xs text-muted-foreground">
|
||||
{{ t('bots.container.metricsLabels.memory') }}
|
||||
</p>
|
||||
<p class="mt-2 text-2xl font-semibold">
|
||||
{{ memoryValueText }}
|
||||
</p>
|
||||
<p class="mt-2 text-[11px] text-muted-foreground">
|
||||
{{ memoryHintText }}
|
||||
</p>
|
||||
</div>
|
||||
|
||||
<div class="rounded-md border bg-background/70 p-3">
|
||||
<p class="text-xs text-muted-foreground">
|
||||
{{ t('bots.container.metricsLabels.storage') }}
|
||||
</p>
|
||||
<p class="mt-2 text-2xl font-semibold">
|
||||
{{ storageValueText }}
|
||||
</p>
|
||||
<p class="mt-2 text-[11px] text-muted-foreground break-all">
|
||||
{{ t('bots.container.metricsPath') }}: {{ storagePathText }}
|
||||
</p>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
<p
|
||||
v-if="sampledAtText !== '-'"
|
||||
class="text-[11px] text-muted-foreground"
|
||||
>
|
||||
{{ t('bots.container.sampledAt') }}: {{ sampledAtText }}
|
||||
</p>
|
||||
</template>
|
||||
</div>
|
||||
</template>
|
||||
|
||||
<script setup lang="ts">
|
||||
import { computed } from 'vue'
|
||||
import { useI18n } from 'vue-i18n'
|
||||
import { Spinner } from '@memohai/ui'
|
||||
import type { HandlersGetContainerMetricsResponse } from '@memohai/sdk'
|
||||
import { formatDateTime } from '@/utils/date-time'
|
||||
|
||||
const props = defineProps<{
|
||||
backend: string
|
||||
loading: boolean
|
||||
metrics: HandlersGetContainerMetricsResponse | null
|
||||
}>()
|
||||
|
||||
const { t } = useI18n()
|
||||
|
||||
const status = computed(() => props.metrics?.status)
|
||||
const cpuMetrics = computed(() => props.metrics?.metrics?.cpu)
|
||||
const memoryMetrics = computed(() => props.metrics?.metrics?.memory)
|
||||
const storageMetrics = computed(() => props.metrics?.metrics?.storage)
|
||||
|
||||
const backendUnsupported = computed(() =>
|
||||
props.backend !== 'containerd' || props.metrics?.supported === false,
|
||||
)
|
||||
const taskRunning = computed(() => status.value?.task_running)
|
||||
const hasAnyMetric = computed(() =>
|
||||
!!cpuMetrics.value || !!memoryMetrics.value || !!storageMetrics.value,
|
||||
)
|
||||
|
||||
const cpuValueText = computed(() => formatPercent(cpuMetrics.value?.usage_percent))
|
||||
const memoryValueText = computed(() => formatBytes(memoryMetrics.value?.usage_bytes))
|
||||
const storageValueText = computed(() => formatBytes(storageMetrics.value?.used_bytes))
|
||||
const storagePathText = computed(() => storageMetrics.value?.path || '-')
|
||||
const sampledAtText = computed(() =>
|
||||
formatDateTime(props.metrics?.sampled_at, { fallback: '-' }),
|
||||
)
|
||||
const memoryHintText = computed(() => {
|
||||
const limit = memoryMetrics.value?.limit_bytes
|
||||
if (limit && limit > 0) {
|
||||
const usagePercent = formatPercent(memoryMetrics.value?.usage_percent)
|
||||
return `${formatBytes(memoryMetrics.value?.usage_bytes)} / ${formatBytes(limit)}${usagePercent === '--' ? '' : ` (${usagePercent})`}`
|
||||
}
|
||||
if (memoryMetrics.value) {
|
||||
return t('bots.container.metricsUnlimited')
|
||||
}
|
||||
return t('bots.container.metricsUnavailable')
|
||||
})
|
||||
|
||||
function formatBytes(value?: number) {
|
||||
if (typeof value !== 'number' || Number.isNaN(value) || value < 0) return '--'
|
||||
if (value === 0) return '0 B'
|
||||
|
||||
const units = ['B', 'KiB', 'MiB', 'GiB', 'TiB']
|
||||
let size = value
|
||||
let unitIndex = 0
|
||||
|
||||
while (size >= 1024 && unitIndex < units.length - 1) {
|
||||
size /= 1024
|
||||
unitIndex += 1
|
||||
}
|
||||
|
||||
const fractionDigits = size >= 100 || unitIndex === 0 ? 0 : 1
|
||||
return `${size.toFixed(fractionDigits)} ${units[unitIndex]}`
|
||||
}
|
||||
|
||||
function formatPercent(value?: number) {
|
||||
if (typeof value !== 'number' || Number.isNaN(value) || value < 0) return '--'
|
||||
const fractionDigits = value >= 100 ? 0 : 1
|
||||
return `${value.toFixed(fractionDigits)}%`
|
||||
}
|
||||
</script>
|
||||
@@ -0,0 +1,168 @@
|
||||
package containerd
|
||||
|
||||
import (
|
||||
"context"
|
||||
"fmt"
|
||||
"time"
|
||||
|
||||
cgroup1stats "github.com/containerd/cgroups/v3/cgroup1/stats"
|
||||
cgroup2stats "github.com/containerd/cgroups/v3/cgroup2/stats"
|
||||
containerd "github.com/containerd/containerd/v2/client"
|
||||
"google.golang.org/protobuf/proto"
|
||||
"google.golang.org/protobuf/types/known/anypb"
|
||||
)
|
||||
|
||||
const metricsSampleInterval = 200 * time.Millisecond
|
||||
|
||||
const maxPracticalMemoryLimitBytes = uint64(1) << 60
|
||||
|
||||
type taskMetricsSample struct {
|
||||
timestamp time.Time
|
||||
cpuUsageNS uint64
|
||||
cpuUserNS uint64
|
||||
cpuKernelNS uint64
|
||||
memoryUsage uint64
|
||||
memoryLimit uint64
|
||||
}
|
||||
|
||||
func (s *DefaultService) GetContainerMetrics(ctx context.Context, containerID string) (ContainerMetrics, error) {
|
||||
task, ctx, err := s.getTask(ctx, containerID)
|
||||
if err != nil {
|
||||
return ContainerMetrics{}, err
|
||||
}
|
||||
|
||||
first, err := sampleTaskMetrics(ctx, task)
|
||||
if err != nil {
|
||||
return ContainerMetrics{}, err
|
||||
}
|
||||
|
||||
timer := time.NewTimer(metricsSampleInterval)
|
||||
defer timer.Stop()
|
||||
|
||||
select {
|
||||
case <-ctx.Done():
|
||||
return ContainerMetrics{}, ctx.Err()
|
||||
case <-timer.C:
|
||||
}
|
||||
|
||||
second, err := sampleTaskMetrics(ctx, task)
|
||||
if err != nil {
|
||||
return ContainerMetrics{}, err
|
||||
}
|
||||
|
||||
return ContainerMetrics{
|
||||
SampledAt: second.timestamp,
|
||||
CPU: buildCPUMetrics(first, second),
|
||||
Memory: buildMemoryMetrics(second),
|
||||
}, nil
|
||||
}
|
||||
|
||||
func sampleTaskMetrics(ctx context.Context, task containerd.Task) (taskMetricsSample, error) {
|
||||
metric, err := task.Metrics(ctx)
|
||||
if err != nil {
|
||||
return taskMetricsSample{}, err
|
||||
}
|
||||
if metric == nil || metric.Data == nil {
|
||||
return taskMetricsSample{}, ErrNotSupported
|
||||
}
|
||||
|
||||
timestamp := time.Now()
|
||||
if ts := metric.GetTimestamp(); ts != nil {
|
||||
timestamp = ts.AsTime()
|
||||
}
|
||||
|
||||
switch {
|
||||
case metric.Data.MessageIs(&cgroup1stats.Metrics{}):
|
||||
var stats cgroup1stats.Metrics
|
||||
if err := anypb.UnmarshalTo(metric.Data, &stats, proto.UnmarshalOptions{}); err != nil {
|
||||
return taskMetricsSample{}, fmt.Errorf("decode cgroup v1 metrics: %w", err)
|
||||
}
|
||||
return sampleFromCgroup1(timestamp, &stats), nil
|
||||
case metric.Data.MessageIs(&cgroup2stats.Metrics{}):
|
||||
var stats cgroup2stats.Metrics
|
||||
if err := anypb.UnmarshalTo(metric.Data, &stats, proto.UnmarshalOptions{}); err != nil {
|
||||
return taskMetricsSample{}, fmt.Errorf("decode cgroup v2 metrics: %w", err)
|
||||
}
|
||||
return sampleFromCgroup2(timestamp, &stats), nil
|
||||
default:
|
||||
msg, decodeErr := anypb.UnmarshalNew(metric.Data, proto.UnmarshalOptions{})
|
||||
if decodeErr != nil {
|
||||
return taskMetricsSample{}, fmt.Errorf("decode task metrics: %w", decodeErr)
|
||||
}
|
||||
return taskMetricsSample{}, fmt.Errorf("%w: unsupported task metrics type %T", ErrNotSupported, msg)
|
||||
}
|
||||
}
|
||||
|
||||
func sampleFromCgroup1(timestamp time.Time, stats *cgroup1stats.Metrics) taskMetricsSample {
|
||||
sample := taskMetricsSample{timestamp: timestamp}
|
||||
if stats == nil {
|
||||
return sample
|
||||
}
|
||||
if cpu := stats.GetCPU(); cpu != nil {
|
||||
usage := cpu.GetUsage()
|
||||
sample.cpuUsageNS = usage.GetTotal()
|
||||
sample.cpuUserNS = usage.GetUser()
|
||||
sample.cpuKernelNS = usage.GetKernel()
|
||||
}
|
||||
if memory := stats.GetMemory(); memory != nil {
|
||||
entry := memory.GetUsage()
|
||||
sample.memoryUsage = entry.GetUsage()
|
||||
sample.memoryLimit = normalizeMemoryLimit(entry.GetLimit())
|
||||
}
|
||||
return sample
|
||||
}
|
||||
|
||||
func sampleFromCgroup2(timestamp time.Time, stats *cgroup2stats.Metrics) taskMetricsSample {
|
||||
sample := taskMetricsSample{timestamp: timestamp}
|
||||
if stats == nil {
|
||||
return sample
|
||||
}
|
||||
if cpu := stats.GetCPU(); cpu != nil {
|
||||
sample.cpuUsageNS = cpu.GetUsageUsec() * 1_000
|
||||
sample.cpuUserNS = cpu.GetUserUsec() * 1_000
|
||||
sample.cpuKernelNS = cpu.GetSystemUsec() * 1_000
|
||||
}
|
||||
if memory := stats.GetMemory(); memory != nil {
|
||||
sample.memoryUsage = memory.GetUsage()
|
||||
sample.memoryLimit = normalizeMemoryLimit(memory.GetUsageLimit())
|
||||
}
|
||||
return sample
|
||||
}
|
||||
|
||||
func buildCPUMetrics(first, second taskMetricsSample) *CPUMetrics {
|
||||
metrics := &CPUMetrics{
|
||||
UsageNanoseconds: second.cpuUsageNS,
|
||||
UserNanoseconds: second.cpuUserNS,
|
||||
KernelNanoseconds: second.cpuKernelNS,
|
||||
}
|
||||
|
||||
elapsedNS := second.timestamp.Sub(first.timestamp).Nanoseconds()
|
||||
if elapsedNS <= 0 || second.cpuUsageNS < first.cpuUsageNS {
|
||||
return metrics
|
||||
}
|
||||
|
||||
metrics.UsagePercent = (float64(second.cpuUsageNS-first.cpuUsageNS) / float64(elapsedNS)) * 100
|
||||
if metrics.UsagePercent < 0 {
|
||||
metrics.UsagePercent = 0
|
||||
}
|
||||
|
||||
return metrics
|
||||
}
|
||||
|
||||
func buildMemoryMetrics(sample taskMetricsSample) *MemoryMetrics {
|
||||
metrics := &MemoryMetrics{
|
||||
UsageBytes: sample.memoryUsage,
|
||||
LimitBytes: sample.memoryLimit,
|
||||
}
|
||||
if sample.memoryLimit > 0 {
|
||||
metrics.UsagePercent = (float64(sample.memoryUsage) / float64(sample.memoryLimit)) * 100
|
||||
}
|
||||
return metrics
|
||||
}
|
||||
|
||||
func normalizeMemoryLimit(limit uint64) uint64 {
|
||||
if limit == 0 || limit > maxPracticalMemoryLimitBytes {
|
||||
return 0
|
||||
}
|
||||
return limit
|
||||
}
|
||||
@@ -0,0 +1,88 @@
|
||||
package containerd
|
||||
|
||||
import (
|
||||
"testing"
|
||||
"time"
|
||||
|
||||
cgroup1stats "github.com/containerd/cgroups/v3/cgroup1/stats"
|
||||
cgroup2stats "github.com/containerd/cgroups/v3/cgroup2/stats"
|
||||
)
|
||||
|
||||
func TestBuildCPUMetricsUsesCumulativeDelta(t *testing.T) {
|
||||
start := time.Unix(0, 0)
|
||||
first := taskMetricsSample{
|
||||
timestamp: start,
|
||||
cpuUsageNS: 100_000_000,
|
||||
cpuUserNS: 60_000_000,
|
||||
cpuKernelNS: 40_000_000,
|
||||
}
|
||||
second := taskMetricsSample{
|
||||
timestamp: start.Add(200 * time.Millisecond),
|
||||
cpuUsageNS: 200_000_000,
|
||||
cpuUserNS: 120_000_000,
|
||||
cpuKernelNS: 80_000_000,
|
||||
}
|
||||
|
||||
metrics := buildCPUMetrics(first, second)
|
||||
if metrics == nil {
|
||||
t.Fatal("expected cpu metrics")
|
||||
}
|
||||
if metrics.UsagePercent != 50 {
|
||||
t.Fatalf("expected cpu usage percent 50, got %v", metrics.UsagePercent)
|
||||
}
|
||||
if metrics.UsageNanoseconds != second.cpuUsageNS {
|
||||
t.Fatalf("expected latest cpu usage %d, got %d", second.cpuUsageNS, metrics.UsageNanoseconds)
|
||||
}
|
||||
}
|
||||
|
||||
func TestSampleFromCgroup1(t *testing.T) {
|
||||
sample := sampleFromCgroup1(time.Unix(1, 0), &cgroup1stats.Metrics{
|
||||
CPU: &cgroup1stats.CPUStat{
|
||||
Usage: &cgroup1stats.CPUUsage{
|
||||
Total: 12,
|
||||
User: 7,
|
||||
Kernel: 5,
|
||||
},
|
||||
},
|
||||
Memory: &cgroup1stats.MemoryStat{
|
||||
Usage: &cgroup1stats.MemoryEntry{
|
||||
Usage: 4096,
|
||||
Limit: 8192,
|
||||
},
|
||||
},
|
||||
})
|
||||
|
||||
if sample.cpuUsageNS != 12 || sample.cpuUserNS != 7 || sample.cpuKernelNS != 5 {
|
||||
t.Fatalf("unexpected cpu sample: %+v", sample)
|
||||
}
|
||||
if sample.memoryUsage != 4096 || sample.memoryLimit != 8192 {
|
||||
t.Fatalf("unexpected memory sample: %+v", sample)
|
||||
}
|
||||
}
|
||||
|
||||
func TestSampleFromCgroup2(t *testing.T) {
|
||||
sample := sampleFromCgroup2(time.Unix(2, 0), &cgroup2stats.Metrics{
|
||||
CPU: &cgroup2stats.CPUStat{
|
||||
UsageUsec: 12,
|
||||
UserUsec: 7,
|
||||
SystemUsec: 5,
|
||||
},
|
||||
Memory: &cgroup2stats.MemoryStat{
|
||||
Usage: 16_384,
|
||||
UsageLimit: 32_768,
|
||||
},
|
||||
})
|
||||
|
||||
if sample.cpuUsageNS != 12_000 || sample.cpuUserNS != 7_000 || sample.cpuKernelNS != 5_000 {
|
||||
t.Fatalf("unexpected cpu sample: %+v", sample)
|
||||
}
|
||||
if sample.memoryUsage != 16_384 || sample.memoryLimit != 32_768 {
|
||||
t.Fatalf("unexpected memory sample: %+v", sample)
|
||||
}
|
||||
}
|
||||
|
||||
func TestNormalizeMemoryLimitTreatsHugeValueAsUnlimited(t *testing.T) {
|
||||
if got := normalizeMemoryLimit(maxPracticalMemoryLimitBytes + 1); got != 0 {
|
||||
t.Fatalf("expected unlimited memory limit to normalize to 0, got %d", got)
|
||||
}
|
||||
}
|
||||
@@ -100,6 +100,7 @@ type Service interface {
|
||||
StopContainer(ctx context.Context, containerID string, opts *StopTaskOptions) error
|
||||
DeleteTask(ctx context.Context, containerID string, opts *DeleteTaskOptions) error
|
||||
GetTaskInfo(ctx context.Context, containerID string) (TaskInfo, error)
|
||||
GetContainerMetrics(ctx context.Context, containerID string) (ContainerMetrics, error)
|
||||
ListTasks(ctx context.Context, opts *ListTasksOptions) ([]TaskInfo, error)
|
||||
SetupNetwork(ctx context.Context, req NetworkSetupRequest) (NetworkResult, error)
|
||||
RemoveNetwork(ctx context.Context, req NetworkSetupRequest) error
|
||||
|
||||
@@ -327,6 +327,10 @@ func (s *AppleService) GetTaskInfo(ctx context.Context, containerID string) (Tas
|
||||
}, nil
|
||||
}
|
||||
|
||||
func (*AppleService) GetContainerMetrics(context.Context, string) (ContainerMetrics, error) {
|
||||
return ContainerMetrics{}, ErrNotSupported
|
||||
}
|
||||
|
||||
func (s *AppleService) ListTasks(ctx context.Context, opts *ListTasksOptions) ([]TaskInfo, error) {
|
||||
if err := s.ensureHealthy(ctx); err != nil {
|
||||
return nil, err
|
||||
|
||||
@@ -61,6 +61,25 @@ type TaskInfo struct {
|
||||
ExitCode uint32
|
||||
}
|
||||
|
||||
type ContainerMetrics struct {
|
||||
SampledAt time.Time
|
||||
CPU *CPUMetrics
|
||||
Memory *MemoryMetrics
|
||||
}
|
||||
|
||||
type CPUMetrics struct {
|
||||
UsagePercent float64
|
||||
UsageNanoseconds uint64
|
||||
UserNanoseconds uint64
|
||||
KernelNanoseconds uint64
|
||||
}
|
||||
|
||||
type MemoryMetrics struct {
|
||||
UsageBytes uint64
|
||||
LimitBytes uint64
|
||||
UsagePercent float64
|
||||
}
|
||||
|
||||
type SnapshotInfo struct {
|
||||
Name string
|
||||
Parent string
|
||||
|
||||
@@ -106,6 +106,44 @@ type GetContainerResponse struct {
|
||||
UpdatedAt time.Time `json:"updated_at"`
|
||||
}
|
||||
|
||||
type ContainerMetricsStatusResponse struct {
|
||||
Exists bool `json:"exists"`
|
||||
TaskRunning bool `json:"task_running"`
|
||||
}
|
||||
|
||||
type ContainerCPUMetricsResponse struct {
|
||||
UsagePercent float64 `json:"usage_percent"`
|
||||
UsageNanoseconds uint64 `json:"usage_nanoseconds"`
|
||||
UserNanoseconds uint64 `json:"user_nanoseconds"`
|
||||
KernelNanoseconds uint64 `json:"kernel_nanoseconds"`
|
||||
}
|
||||
|
||||
type ContainerMemoryMetricsResponse struct {
|
||||
UsageBytes uint64 `json:"usage_bytes"`
|
||||
LimitBytes uint64 `json:"limit_bytes"`
|
||||
UsagePercent float64 `json:"usage_percent"`
|
||||
}
|
||||
|
||||
type ContainerStorageMetricsResponse struct {
|
||||
Path string `json:"path"`
|
||||
UsedBytes uint64 `json:"used_bytes"`
|
||||
}
|
||||
|
||||
type ContainerMetricsPayloadResponse struct {
|
||||
CPU *ContainerCPUMetricsResponse `json:"cpu,omitempty"`
|
||||
Memory *ContainerMemoryMetricsResponse `json:"memory,omitempty"`
|
||||
Storage *ContainerStorageMetricsResponse `json:"storage,omitempty"`
|
||||
}
|
||||
|
||||
type GetContainerMetricsResponse struct {
|
||||
Supported bool `json:"supported"`
|
||||
Backend string `json:"backend"`
|
||||
UnsupportedReason string `json:"unsupported_reason,omitempty"`
|
||||
Status ContainerMetricsStatusResponse `json:"status"`
|
||||
Metrics ContainerMetricsPayloadResponse `json:"metrics"`
|
||||
SampledAt *time.Time `json:"sampled_at,omitempty"`
|
||||
}
|
||||
|
||||
type RollbackRequest struct {
|
||||
Version int `json:"version"`
|
||||
}
|
||||
@@ -163,6 +201,7 @@ func (h *ContainerdHandler) Register(e *echo.Echo) {
|
||||
group := e.Group("/bots/:bot_id/container")
|
||||
group.POST("", h.CreateContainer)
|
||||
group.GET("", h.GetContainer)
|
||||
group.GET("/metrics", h.GetContainerMetrics)
|
||||
group.DELETE("", h.DeleteContainer)
|
||||
group.POST("/start", h.StartContainer)
|
||||
group.POST("/stop", h.StopContainer)
|
||||
@@ -400,6 +439,46 @@ func (h *ContainerdHandler) GetContainer(c echo.Context) error {
|
||||
})
|
||||
}
|
||||
|
||||
// GetContainerMetrics godoc
|
||||
// @Summary Get current container metrics for bot
|
||||
// @Tags containerd
|
||||
// @Param bot_id path string true "Bot ID"
|
||||
// @Success 200 {object} GetContainerMetricsResponse
|
||||
// @Failure 500 {object} ErrorResponse
|
||||
// @Router /bots/{bot_id}/container/metrics [get].
|
||||
func (h *ContainerdHandler) GetContainerMetrics(c echo.Context) error {
|
||||
botID, err := h.requireBotAccess(c)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
metrics, err := h.manager.GetContainerMetrics(c.Request().Context(), botID)
|
||||
if err != nil {
|
||||
return echo.NewHTTPError(http.StatusInternalServerError, err.Error())
|
||||
}
|
||||
|
||||
response := GetContainerMetricsResponse{
|
||||
Supported: metrics.Supported,
|
||||
Backend: h.containerBackend,
|
||||
UnsupportedReason: metrics.UnsupportedReason,
|
||||
Status: ContainerMetricsStatusResponse{
|
||||
Exists: metrics.Status.Exists,
|
||||
TaskRunning: metrics.Status.TaskRunning,
|
||||
},
|
||||
Metrics: ContainerMetricsPayloadResponse{
|
||||
CPU: toContainerCPUMetricsResponse(metrics.CPU),
|
||||
Memory: toContainerMemoryMetricsResponse(metrics.Memory),
|
||||
Storage: toContainerStorageMetricsResponse(metrics.Storage),
|
||||
},
|
||||
}
|
||||
if !metrics.SampledAt.IsZero() {
|
||||
sampledAt := metrics.SampledAt
|
||||
response.SampledAt = &sampledAt
|
||||
}
|
||||
|
||||
return c.JSON(http.StatusOK, response)
|
||||
}
|
||||
|
||||
// DeleteContainer godoc
|
||||
// @Summary Delete MCP container for bot
|
||||
// @Tags containerd
|
||||
@@ -763,6 +842,39 @@ func (h *ContainerdHandler) RestorePreservedData(c echo.Context) error {
|
||||
return c.JSON(http.StatusOK, map[string]bool{"restored": true})
|
||||
}
|
||||
|
||||
func toContainerCPUMetricsResponse(metrics *ctr.CPUMetrics) *ContainerCPUMetricsResponse {
|
||||
if metrics == nil {
|
||||
return nil
|
||||
}
|
||||
return &ContainerCPUMetricsResponse{
|
||||
UsagePercent: metrics.UsagePercent,
|
||||
UsageNanoseconds: metrics.UsageNanoseconds,
|
||||
UserNanoseconds: metrics.UserNanoseconds,
|
||||
KernelNanoseconds: metrics.KernelNanoseconds,
|
||||
}
|
||||
}
|
||||
|
||||
func toContainerMemoryMetricsResponse(metrics *ctr.MemoryMetrics) *ContainerMemoryMetricsResponse {
|
||||
if metrics == nil {
|
||||
return nil
|
||||
}
|
||||
return &ContainerMemoryMetricsResponse{
|
||||
UsageBytes: metrics.UsageBytes,
|
||||
LimitBytes: metrics.LimitBytes,
|
||||
UsagePercent: metrics.UsagePercent,
|
||||
}
|
||||
}
|
||||
|
||||
func toContainerStorageMetricsResponse(metrics *workspace.ContainerStorageMetrics) *ContainerStorageMetricsResponse {
|
||||
if metrics == nil {
|
||||
return nil
|
||||
}
|
||||
return &ContainerStorageMetricsResponse{
|
||||
Path: metrics.Path,
|
||||
UsedBytes: metrics.UsedBytes,
|
||||
}
|
||||
}
|
||||
|
||||
func snapshotLineage(root string, all []ctr.SnapshotInfo) ([]ctr.SnapshotInfo, bool) {
|
||||
root = strings.TrimSpace(root)
|
||||
if root == "" {
|
||||
|
||||
@@ -51,6 +51,26 @@ type ContainerStatus struct {
|
||||
UpdatedAt time.Time `json:"updated_at"`
|
||||
}
|
||||
|
||||
type ContainerMetricsStatus struct {
|
||||
Exists bool `json:"exists"`
|
||||
TaskRunning bool `json:"task_running"`
|
||||
}
|
||||
|
||||
type ContainerStorageMetrics struct {
|
||||
Path string `json:"path"`
|
||||
UsedBytes uint64 `json:"used_bytes"`
|
||||
}
|
||||
|
||||
type ContainerMetricsResult struct {
|
||||
Supported bool
|
||||
UnsupportedReason string
|
||||
Status ContainerMetricsStatus
|
||||
SampledAt time.Time
|
||||
CPU *ctr.CPUMetrics
|
||||
Memory *ctr.MemoryMetrics
|
||||
Storage *ContainerStorageMetrics
|
||||
}
|
||||
|
||||
type Manager struct {
|
||||
service ctr.Service
|
||||
cfg config.WorkspaceConfig
|
||||
|
||||
@@ -111,6 +111,10 @@ func (*legacyRouteTestService) GetTaskInfo(context.Context, string) (ctr.TaskInf
|
||||
return ctr.TaskInfo{}, errdefs.ErrNotFound
|
||||
}
|
||||
|
||||
func (*legacyRouteTestService) GetContainerMetrics(context.Context, string) (ctr.ContainerMetrics, error) {
|
||||
return ctr.ContainerMetrics{}, ctr.ErrNotSupported
|
||||
}
|
||||
|
||||
func (*legacyRouteTestService) ListTasks(context.Context, *ctr.ListTasksOptions) ([]ctr.TaskInfo, error) {
|
||||
return nil, nil
|
||||
}
|
||||
|
||||
@@ -0,0 +1,142 @@
|
||||
package workspace
|
||||
|
||||
import (
|
||||
"context"
|
||||
"errors"
|
||||
"io/fs"
|
||||
"os"
|
||||
"path/filepath"
|
||||
"time"
|
||||
|
||||
"github.com/containerd/containerd/v2/core/mount"
|
||||
"github.com/containerd/errdefs"
|
||||
|
||||
ctr "github.com/memohai/memoh/internal/containerd"
|
||||
)
|
||||
|
||||
const unsupportedReasonBackend = "backend_not_supported"
|
||||
|
||||
func (m *Manager) GetContainerMetrics(ctx context.Context, botID string) (*ContainerMetricsResult, error) {
|
||||
result := &ContainerMetricsResult{
|
||||
Supported: true,
|
||||
Status: ContainerMetricsStatus{
|
||||
Exists: false,
|
||||
},
|
||||
}
|
||||
|
||||
containerID, err := m.ContainerID(ctx, botID)
|
||||
if err != nil {
|
||||
if errors.Is(err, ErrContainerNotFound) {
|
||||
return result, nil
|
||||
}
|
||||
return nil, err
|
||||
}
|
||||
|
||||
info, err := m.service.GetContainer(ctx, containerID)
|
||||
if err != nil {
|
||||
if errdefs.IsNotFound(err) {
|
||||
return result, nil
|
||||
}
|
||||
return nil, err
|
||||
}
|
||||
|
||||
result.Status.Exists = true
|
||||
|
||||
taskInfo, err := m.service.GetTaskInfo(ctx, containerID)
|
||||
if err == nil {
|
||||
result.Status.TaskRunning = taskInfo.Status == ctr.TaskStatusRunning
|
||||
} else if !errdefs.IsNotFound(err) {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
runtimeMetrics, err := m.service.GetContainerMetrics(ctx, containerID)
|
||||
switch {
|
||||
case err == nil:
|
||||
result.CPU = runtimeMetrics.CPU
|
||||
result.Memory = runtimeMetrics.Memory
|
||||
result.SampledAt = runtimeMetrics.SampledAt
|
||||
case errors.Is(err, ctr.ErrNotSupported):
|
||||
result.Supported = false
|
||||
result.UnsupportedReason = unsupportedReasonBackend
|
||||
case errdefs.IsNotFound(err):
|
||||
// Task is not running, so CPU and memory metrics are unavailable.
|
||||
default:
|
||||
return nil, err
|
||||
}
|
||||
|
||||
if result.Supported {
|
||||
storage, err := m.collectStorageMetrics(ctx, info)
|
||||
if err != nil {
|
||||
if errors.Is(err, ctr.ErrNotSupported) {
|
||||
return result, nil
|
||||
}
|
||||
return nil, err
|
||||
}
|
||||
result.Storage = storage
|
||||
if result.SampledAt.IsZero() {
|
||||
result.SampledAt = time.Now()
|
||||
}
|
||||
}
|
||||
|
||||
return result, nil
|
||||
}
|
||||
|
||||
func (m *Manager) collectStorageMetrics(ctx context.Context, info ctr.ContainerInfo) (*ContainerStorageMetrics, error) {
|
||||
mounts, err := m.snapshotMounts(ctx, info)
|
||||
if err != nil {
|
||||
if errors.Is(err, errMountNotSupported) {
|
||||
return nil, ctr.ErrNotSupported
|
||||
}
|
||||
return nil, err
|
||||
}
|
||||
|
||||
var usedBytes uint64
|
||||
if err := mount.WithReadonlyTempMount(ctx, mounts, func(root string) error {
|
||||
if _, statErr := os.Stat(root); statErr != nil {
|
||||
if os.IsNotExist(statErr) {
|
||||
return nil
|
||||
}
|
||||
return statErr
|
||||
}
|
||||
|
||||
size, sizeErr := dirSize(root)
|
||||
if sizeErr != nil {
|
||||
return sizeErr
|
||||
}
|
||||
usedBytes = size
|
||||
return nil
|
||||
}); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
return &ContainerStorageMetrics{
|
||||
Path: "/",
|
||||
UsedBytes: usedBytes,
|
||||
}, nil
|
||||
}
|
||||
|
||||
func dirSize(root string) (uint64, error) {
|
||||
var size uint64
|
||||
err := filepath.WalkDir(root, func(_ string, entry fs.DirEntry, walkErr error) error {
|
||||
if walkErr != nil {
|
||||
return walkErr
|
||||
}
|
||||
if entry.IsDir() {
|
||||
return nil
|
||||
}
|
||||
|
||||
info, err := entry.Info()
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
fileSize := info.Size()
|
||||
if fileSize > 0 {
|
||||
size += uint64(fileSize) //nolint:gosec // file sizes are checked to be positive before conversion
|
||||
}
|
||||
return nil
|
||||
})
|
||||
if err != nil {
|
||||
return 0, err
|
||||
}
|
||||
return size, nil
|
||||
}
|
||||
File diff suppressed because one or more lines are too long
File diff suppressed because one or more lines are too long
File diff suppressed because one or more lines are too long
@@ -882,10 +882,39 @@ export type HandlersChannelMeta = {
|
||||
user_config_schema?: ChannelConfigSchema;
|
||||
};
|
||||
|
||||
export type HandlersContainerCpuMetricsResponse = {
|
||||
kernel_nanoseconds?: number;
|
||||
usage_nanoseconds?: number;
|
||||
usage_percent?: number;
|
||||
user_nanoseconds?: number;
|
||||
};
|
||||
|
||||
export type HandlersContainerGpuRequest = {
|
||||
devices?: Array<string>;
|
||||
};
|
||||
|
||||
export type HandlersContainerMemoryMetricsResponse = {
|
||||
limit_bytes?: number;
|
||||
usage_bytes?: number;
|
||||
usage_percent?: number;
|
||||
};
|
||||
|
||||
export type HandlersContainerMetricsPayloadResponse = {
|
||||
cpu?: HandlersContainerCpuMetricsResponse;
|
||||
memory?: HandlersContainerMemoryMetricsResponse;
|
||||
storage?: HandlersContainerStorageMetricsResponse;
|
||||
};
|
||||
|
||||
export type HandlersContainerMetricsStatusResponse = {
|
||||
exists?: boolean;
|
||||
task_running?: boolean;
|
||||
};
|
||||
|
||||
export type HandlersContainerStorageMetricsResponse = {
|
||||
path?: string;
|
||||
used_bytes?: number;
|
||||
};
|
||||
|
||||
export type HandlersContextUsage = {
|
||||
context_window?: number;
|
||||
used_tokens?: number;
|
||||
@@ -979,6 +1008,15 @@ export type HandlersFsWriteRequest = {
|
||||
path?: string;
|
||||
};
|
||||
|
||||
export type HandlersGetContainerMetricsResponse = {
|
||||
backend?: string;
|
||||
metrics?: HandlersContainerMetricsPayloadResponse;
|
||||
sampled_at?: string;
|
||||
status?: HandlersContainerMetricsStatusResponse;
|
||||
supported?: boolean;
|
||||
unsupported_reason?: string;
|
||||
};
|
||||
|
||||
export type HandlersGetContainerResponse = {
|
||||
cdi_devices?: Array<string>;
|
||||
container_id?: string;
|
||||
@@ -3038,6 +3076,36 @@ export type PostBotsByBotIdContainerFsWriteResponses = {
|
||||
|
||||
export type PostBotsByBotIdContainerFsWriteResponse = PostBotsByBotIdContainerFsWriteResponses[keyof PostBotsByBotIdContainerFsWriteResponses];
|
||||
|
||||
export type GetBotsByBotIdContainerMetricsData = {
|
||||
body?: never;
|
||||
path: {
|
||||
/**
|
||||
* Bot ID
|
||||
*/
|
||||
bot_id: string;
|
||||
};
|
||||
query?: never;
|
||||
url: '/bots/{bot_id}/container/metrics';
|
||||
};
|
||||
|
||||
export type GetBotsByBotIdContainerMetricsErrors = {
|
||||
/**
|
||||
* Internal Server Error
|
||||
*/
|
||||
500: HandlersErrorResponse;
|
||||
};
|
||||
|
||||
export type GetBotsByBotIdContainerMetricsError = GetBotsByBotIdContainerMetricsErrors[keyof GetBotsByBotIdContainerMetricsErrors];
|
||||
|
||||
export type GetBotsByBotIdContainerMetricsResponses = {
|
||||
/**
|
||||
* OK
|
||||
*/
|
||||
200: HandlersGetContainerMetricsResponse;
|
||||
};
|
||||
|
||||
export type GetBotsByBotIdContainerMetricsResponse = GetBotsByBotIdContainerMetricsResponses[keyof GetBotsByBotIdContainerMetricsResponses];
|
||||
|
||||
export type DeleteBotsByBotIdContainerSkillsData = {
|
||||
/**
|
||||
* Delete skills payload
|
||||
|
||||
+121
@@ -1503,6 +1503,37 @@ const docTemplate = `{
|
||||
}
|
||||
}
|
||||
},
|
||||
"/bots/{bot_id}/container/metrics": {
|
||||
"get": {
|
||||
"tags": [
|
||||
"containerd"
|
||||
],
|
||||
"summary": "Get current container metrics for bot",
|
||||
"parameters": [
|
||||
{
|
||||
"type": "string",
|
||||
"description": "Bot ID",
|
||||
"name": "bot_id",
|
||||
"in": "path",
|
||||
"required": true
|
||||
}
|
||||
],
|
||||
"responses": {
|
||||
"200": {
|
||||
"description": "OK",
|
||||
"schema": {
|
||||
"$ref": "#/definitions/handlers.GetContainerMetricsResponse"
|
||||
}
|
||||
},
|
||||
"500": {
|
||||
"description": "Internal Server Error",
|
||||
"schema": {
|
||||
"$ref": "#/definitions/handlers.ErrorResponse"
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
},
|
||||
"/bots/{bot_id}/container/skills": {
|
||||
"get": {
|
||||
"tags": [
|
||||
@@ -11653,6 +11684,23 @@ const docTemplate = `{
|
||||
}
|
||||
}
|
||||
},
|
||||
"handlers.ContainerCPUMetricsResponse": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"kernel_nanoseconds": {
|
||||
"type": "integer"
|
||||
},
|
||||
"usage_nanoseconds": {
|
||||
"type": "integer"
|
||||
},
|
||||
"usage_percent": {
|
||||
"type": "number"
|
||||
},
|
||||
"user_nanoseconds": {
|
||||
"type": "integer"
|
||||
}
|
||||
}
|
||||
},
|
||||
"handlers.ContainerGPURequest": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
@@ -11664,6 +11712,56 @@ const docTemplate = `{
|
||||
}
|
||||
}
|
||||
},
|
||||
"handlers.ContainerMemoryMetricsResponse": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"limit_bytes": {
|
||||
"type": "integer"
|
||||
},
|
||||
"usage_bytes": {
|
||||
"type": "integer"
|
||||
},
|
||||
"usage_percent": {
|
||||
"type": "number"
|
||||
}
|
||||
}
|
||||
},
|
||||
"handlers.ContainerMetricsPayloadResponse": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"cpu": {
|
||||
"$ref": "#/definitions/handlers.ContainerCPUMetricsResponse"
|
||||
},
|
||||
"memory": {
|
||||
"$ref": "#/definitions/handlers.ContainerMemoryMetricsResponse"
|
||||
},
|
||||
"storage": {
|
||||
"$ref": "#/definitions/handlers.ContainerStorageMetricsResponse"
|
||||
}
|
||||
}
|
||||
},
|
||||
"handlers.ContainerMetricsStatusResponse": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"exists": {
|
||||
"type": "boolean"
|
||||
},
|
||||
"task_running": {
|
||||
"type": "boolean"
|
||||
}
|
||||
}
|
||||
},
|
||||
"handlers.ContainerStorageMetricsResponse": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"path": {
|
||||
"type": "string"
|
||||
},
|
||||
"used_bytes": {
|
||||
"type": "integer"
|
||||
}
|
||||
}
|
||||
},
|
||||
"handlers.ContextUsage": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
@@ -11889,6 +11987,29 @@ const docTemplate = `{
|
||||
}
|
||||
}
|
||||
},
|
||||
"handlers.GetContainerMetricsResponse": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"backend": {
|
||||
"type": "string"
|
||||
},
|
||||
"metrics": {
|
||||
"$ref": "#/definitions/handlers.ContainerMetricsPayloadResponse"
|
||||
},
|
||||
"sampled_at": {
|
||||
"type": "string"
|
||||
},
|
||||
"status": {
|
||||
"$ref": "#/definitions/handlers.ContainerMetricsStatusResponse"
|
||||
},
|
||||
"supported": {
|
||||
"type": "boolean"
|
||||
},
|
||||
"unsupported_reason": {
|
||||
"type": "string"
|
||||
}
|
||||
}
|
||||
},
|
||||
"handlers.GetContainerResponse": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
|
||||
@@ -1494,6 +1494,37 @@
|
||||
}
|
||||
}
|
||||
},
|
||||
"/bots/{bot_id}/container/metrics": {
|
||||
"get": {
|
||||
"tags": [
|
||||
"containerd"
|
||||
],
|
||||
"summary": "Get current container metrics for bot",
|
||||
"parameters": [
|
||||
{
|
||||
"type": "string",
|
||||
"description": "Bot ID",
|
||||
"name": "bot_id",
|
||||
"in": "path",
|
||||
"required": true
|
||||
}
|
||||
],
|
||||
"responses": {
|
||||
"200": {
|
||||
"description": "OK",
|
||||
"schema": {
|
||||
"$ref": "#/definitions/handlers.GetContainerMetricsResponse"
|
||||
}
|
||||
},
|
||||
"500": {
|
||||
"description": "Internal Server Error",
|
||||
"schema": {
|
||||
"$ref": "#/definitions/handlers.ErrorResponse"
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
},
|
||||
"/bots/{bot_id}/container/skills": {
|
||||
"get": {
|
||||
"tags": [
|
||||
@@ -11644,6 +11675,23 @@
|
||||
}
|
||||
}
|
||||
},
|
||||
"handlers.ContainerCPUMetricsResponse": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"kernel_nanoseconds": {
|
||||
"type": "integer"
|
||||
},
|
||||
"usage_nanoseconds": {
|
||||
"type": "integer"
|
||||
},
|
||||
"usage_percent": {
|
||||
"type": "number"
|
||||
},
|
||||
"user_nanoseconds": {
|
||||
"type": "integer"
|
||||
}
|
||||
}
|
||||
},
|
||||
"handlers.ContainerGPURequest": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
@@ -11655,6 +11703,56 @@
|
||||
}
|
||||
}
|
||||
},
|
||||
"handlers.ContainerMemoryMetricsResponse": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"limit_bytes": {
|
||||
"type": "integer"
|
||||
},
|
||||
"usage_bytes": {
|
||||
"type": "integer"
|
||||
},
|
||||
"usage_percent": {
|
||||
"type": "number"
|
||||
}
|
||||
}
|
||||
},
|
||||
"handlers.ContainerMetricsPayloadResponse": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"cpu": {
|
||||
"$ref": "#/definitions/handlers.ContainerCPUMetricsResponse"
|
||||
},
|
||||
"memory": {
|
||||
"$ref": "#/definitions/handlers.ContainerMemoryMetricsResponse"
|
||||
},
|
||||
"storage": {
|
||||
"$ref": "#/definitions/handlers.ContainerStorageMetricsResponse"
|
||||
}
|
||||
}
|
||||
},
|
||||
"handlers.ContainerMetricsStatusResponse": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"exists": {
|
||||
"type": "boolean"
|
||||
},
|
||||
"task_running": {
|
||||
"type": "boolean"
|
||||
}
|
||||
}
|
||||
},
|
||||
"handlers.ContainerStorageMetricsResponse": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"path": {
|
||||
"type": "string"
|
||||
},
|
||||
"used_bytes": {
|
||||
"type": "integer"
|
||||
}
|
||||
}
|
||||
},
|
||||
"handlers.ContextUsage": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
@@ -11880,6 +11978,29 @@
|
||||
}
|
||||
}
|
||||
},
|
||||
"handlers.GetContainerMetricsResponse": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"backend": {
|
||||
"type": "string"
|
||||
},
|
||||
"metrics": {
|
||||
"$ref": "#/definitions/handlers.ContainerMetricsPayloadResponse"
|
||||
},
|
||||
"sampled_at": {
|
||||
"type": "string"
|
||||
},
|
||||
"status": {
|
||||
"$ref": "#/definitions/handlers.ContainerMetricsStatusResponse"
|
||||
},
|
||||
"supported": {
|
||||
"type": "boolean"
|
||||
},
|
||||
"unsupported_reason": {
|
||||
"type": "string"
|
||||
}
|
||||
}
|
||||
},
|
||||
"handlers.GetContainerResponse": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
|
||||
@@ -1486,6 +1486,17 @@ definitions:
|
||||
user_config_schema:
|
||||
$ref: '#/definitions/channel.ConfigSchema'
|
||||
type: object
|
||||
handlers.ContainerCPUMetricsResponse:
|
||||
properties:
|
||||
kernel_nanoseconds:
|
||||
type: integer
|
||||
usage_nanoseconds:
|
||||
type: integer
|
||||
usage_percent:
|
||||
type: number
|
||||
user_nanoseconds:
|
||||
type: integer
|
||||
type: object
|
||||
handlers.ContainerGPURequest:
|
||||
properties:
|
||||
devices:
|
||||
@@ -1493,6 +1504,38 @@ definitions:
|
||||
type: string
|
||||
type: array
|
||||
type: object
|
||||
handlers.ContainerMemoryMetricsResponse:
|
||||
properties:
|
||||
limit_bytes:
|
||||
type: integer
|
||||
usage_bytes:
|
||||
type: integer
|
||||
usage_percent:
|
||||
type: number
|
||||
type: object
|
||||
handlers.ContainerMetricsPayloadResponse:
|
||||
properties:
|
||||
cpu:
|
||||
$ref: '#/definitions/handlers.ContainerCPUMetricsResponse'
|
||||
memory:
|
||||
$ref: '#/definitions/handlers.ContainerMemoryMetricsResponse'
|
||||
storage:
|
||||
$ref: '#/definitions/handlers.ContainerStorageMetricsResponse'
|
||||
type: object
|
||||
handlers.ContainerMetricsStatusResponse:
|
||||
properties:
|
||||
exists:
|
||||
type: boolean
|
||||
task_running:
|
||||
type: boolean
|
||||
type: object
|
||||
handlers.ContainerStorageMetricsResponse:
|
||||
properties:
|
||||
path:
|
||||
type: string
|
||||
used_bytes:
|
||||
type: integer
|
||||
type: object
|
||||
handlers.ContextUsage:
|
||||
properties:
|
||||
context_window:
|
||||
@@ -1638,6 +1681,21 @@ definitions:
|
||||
path:
|
||||
type: string
|
||||
type: object
|
||||
handlers.GetContainerMetricsResponse:
|
||||
properties:
|
||||
backend:
|
||||
type: string
|
||||
metrics:
|
||||
$ref: '#/definitions/handlers.ContainerMetricsPayloadResponse'
|
||||
sampled_at:
|
||||
type: string
|
||||
status:
|
||||
$ref: '#/definitions/handlers.ContainerMetricsStatusResponse'
|
||||
supported:
|
||||
type: boolean
|
||||
unsupported_reason:
|
||||
type: string
|
||||
type: object
|
||||
handlers.GetContainerResponse:
|
||||
properties:
|
||||
cdi_devices:
|
||||
@@ -4037,6 +4095,26 @@ paths:
|
||||
summary: Write text content to a file
|
||||
tags:
|
||||
- containerd
|
||||
/bots/{bot_id}/container/metrics:
|
||||
get:
|
||||
parameters:
|
||||
- description: Bot ID
|
||||
in: path
|
||||
name: bot_id
|
||||
required: true
|
||||
type: string
|
||||
responses:
|
||||
"200":
|
||||
description: OK
|
||||
schema:
|
||||
$ref: '#/definitions/handlers.GetContainerMetricsResponse'
|
||||
"500":
|
||||
description: Internal Server Error
|
||||
schema:
|
||||
$ref: '#/definitions/handlers.ErrorResponse'
|
||||
summary: Get current container metrics for bot
|
||||
tags:
|
||||
- containerd
|
||||
/bots/{bot_id}/container/skills:
|
||||
delete:
|
||||
parameters:
|
||||
|
||||
Reference in New Issue
Block a user