feat(container): add current container metrics view

Expose a dedicated container metrics endpoint and surface current CPU, memory, and root filesystem usage in the bot container view. This gives operators a quick health snapshot while degrading cleanly on unsupported backends.
2026-04-25 07:00:48 +09:00 · 2026-04-24 15:10:47 +08:00
parent 8136ef6ed6
commit e4aca0db13
20 changed files with 1198 additions and 6 deletions
@@ -869,6 +869,21 @@
      "snapshotLoadFailed": "Failed to load snapshots",
      "snapshotNamePlaceholder": "Snapshot display name (optional)",
      "snapshotNameHint": "This field is only for the user-visible display name. The internal snapshot name is generated automatically.",
      "metricsTitle": "Resource Status",
      "metricsSubtitle": "View CPU, memory, and storage usage for the container's entire filesystem.",
      "metricsLoadFailed": "Failed to load container resource status",
      "metricsUnsupported": "The current container backend does not support resource monitoring.",
      "metricsUnavailable": "No resource metrics available.",
      "metricsStopped": "The container task is not running; CPU and memory metrics are unavailable. Storage information will still be shown if available.",
      "metricsPath": "Scope",
      "metricsUnlimited": "No memory limit configured",
      "currentSample": "Current sample",
      "sampledAt": "Sampled at",
      "metricsLabels": {
        "cpu": "CPU",
        "memory": "Memory",
        "storage": "Storage"
      },
      "dataTitle": "Data Operations",
      "dataSubtitle": "Independently manage import, export, and restore for the container `/data` directory.",
      "deleteTitle": "Delete Container",
@@ -865,6 +865,21 @@
      "snapshotLoadFailed": "加载快照失败",
      "snapshotNamePlaceholder": "快照显示名称（可选）",
      "snapshotNameHint": "这里只填写用户可见的显示名称，系统会自动生成内部快照名。",
      "metricsTitle": "资源状态",
      "metricsSubtitle": "查看当前容器的 CPU、内存与整个容器文件系统的存储使用情况。",
      "metricsLoadFailed": "加载容器资源状态失败",
      "metricsUnsupported": "当前容器后端暂不支持资源监控。",
      "metricsUnavailable": "当前暂无可用的资源指标。",
      "metricsStopped": "容器任务未运行，CPU 和内存指标暂不可用；如有存储信息仍会继续显示。",
      "metricsPath": "统计范围",
      "metricsUnlimited": "未配置内存限制",
      "currentSample": "当前采样",
      "sampledAt": "采样时间",
      "metricsLabels": {
        "cpu": "CPU",
        "memory": "内存",
        "storage": "存储"
      },
      "dataTitle": "数据操作",
      "dataSubtitle": "独立管理容器 `/data` 目录的导入、导出与恢复。",
      "deleteTitle": "删除容器",
@@ -8,6 +8,7 @@ import { ChevronRight } from 'lucide-vue-next'
 import {
  deleteBotsByBotIdContainer,
  getBotsByBotIdContainer,
  getBotsByBotIdContainerMetrics,
  getBotsByBotIdContainerSnapshots,
  getBotsById,
  postBotsByBotIdContainerDataExport,
@@ -18,6 +19,7 @@ import {
  postBotsByBotIdContainerStart,
  postBotsByBotIdContainerStop,
  type HandlersCreateContainerRequest,
  type HandlersGetContainerMetricsResponse,
  type HandlersGetContainerResponse,
  type HandlersListSnapshotsResponse,
 } from '@memohai/sdk'
@@ -29,6 +31,7 @@ import {
 import { Button, Collapsible, CollapsibleContent, CollapsibleTrigger, Input, Label, Separator, Spinner, Switch, Textarea } from '@memohai/ui'
 import ConfirmPopover from '@/components/confirm-popover/index.vue'
 import ContainerCreateProgress from './container-create-progress.vue'
 import ContainerMetricsPanel from './container-metrics-panel.vue'
 import { useSyncedQueryParam } from '@/composables/useSyncedQueryParam'
 import { useBotStatusMeta } from '@/composables/useBotStatusMeta'
 import { useCapabilitiesStore } from '@/store/capabilities'
@@ -92,11 +95,14 @@ const botId = computed(() => route.params.botId as string)
 const containerBusy = computed(() => containerLoading.value || containerAction.value !== '')
 type BotContainerInfo = HandlersGetContainerResponse
 type BotContainerMetrics = HandlersGetContainerMetricsResponse
 type BotContainerSnapshot = HandlersListSnapshotsResponse extends { snapshots?: (infer T)[] } ? T : never
 const containerInfo = ref<BotContainerInfo | null>(null)
 const containerMetrics = ref<BotContainerMetrics | null>(null)
 const containerMissing = ref(false)
 const snapshots = ref<BotContainerSnapshot[]>([])
 const metricsLoading = ref(false)
 const snapshotsLoading = ref(false)
 function resolveErrorMessage(error: unknown, fallback: string): string {
@@ -134,6 +140,7 @@ async function loadContainerData(showLoadingToast: boolean) {
    if (result.error !== undefined) {
      if (result.response.status === 404) {
        containerInfo.value = null
        containerMetrics.value = null
        containerMissing.value = true
        snapshots.value = []
        return
@@ -144,10 +151,13 @@ async function loadContainerData(showLoadingToast: boolean) {
    containerInfo.value = result.data
    containerMissing.value = false
    const metricsPromise = loadContainerMetrics(showLoadingToast)
    if (capabilitiesStore.snapshotSupported) {
-      await loadSnapshots()
+      await Promise.all([metricsPromise, loadSnapshots()])
    } else {
      snapshots.value = []
      await metricsPromise
    }
  } catch (error) {
    if (showLoadingToast) {
@@ -158,6 +168,24 @@ async function loadContainerData(showLoadingToast: boolean) {
  }
 }
 async function loadContainerMetrics(showLoadingToast: boolean) {
  metricsLoading.value = true
  try {
    const { data } = await getBotsByBotIdContainerMetrics({
      path: { bot_id: botId.value },
      throwOnError: true,
    })
    containerMetrics.value = data
  } catch (error) {
    containerMetrics.value = null
    if (showLoadingToast) {
      toast.error(resolveErrorMessage(error, t('bots.container.metricsLoadFailed')))
    }
  } finally {
    metricsLoading.value = false
  }
 }
 async function loadSnapshots() {
  if (!containerInfo.value || !capabilitiesStore.snapshotSupported) {
    snapshots.value = []
@@ -411,6 +439,7 @@ async function handleDeleteContainer(preserveData: boolean) {
        throwOnError: true,
      })
      containerInfo.value = null
      containerMetrics.value = null
      containerMissing.value = true
      snapshots.value = []
      createRestoreData.value = preserveData
@@ -958,6 +987,12 @@ watch([activeTab, botId], ([tab]) => {
        </dl>
      </div>
      <ContainerMetricsPanel
        :backend="capabilitiesStore.containerBackend"
        :loading="metricsLoading"
        :metrics="containerMetrics"
      />
      <div class="rounded-md border px-3 py-2 text-xs text-muted-foreground">
        {{ $t('bots.container.gpuRecreateHint') }}
      </div>
@@ -0,0 +1,159 @@
 <template>
  <div class="space-y-4 rounded-md border p-4">
    <div class="space-y-1">
      <h4 class="text-xs font-medium">
        {{ t('bots.container.metricsTitle') }}
      </h4>
      <p class="text-xs text-muted-foreground">
        {{ t('bots.container.metricsSubtitle') }}
      </p>
    </div>
    <div
      v-if="loading && !metrics"
      class="flex items-center gap-2 text-xs text-muted-foreground"
    >
      <Spinner />
      <span>{{ t('common.loading') }}</span>
    </div>
    <div
      v-else-if="backendUnsupported"
      class="rounded-md border border-dashed px-3 py-2 text-xs text-muted-foreground"
    >
      {{ t('bots.container.metricsUnsupported') }}
    </div>
    <div
      v-else-if="!hasAnyMetric"
      class="rounded-md border border-dashed px-3 py-2 text-xs text-muted-foreground"
    >
      {{ taskRunning === false ? t('bots.container.metricsStopped') : t('bots.container.metricsUnavailable') }}
    </div>
    <template v-else>
      <div
        v-if="taskRunning === false"
        class="rounded-md border border-primary/20 bg-primary/5 px-3 py-2 text-xs"
      >
        {{ t('bots.container.metricsStopped') }}
      </div>
      <div class="grid gap-3 md:grid-cols-3">
        <div class="rounded-md border bg-background/70 p-3">
          <p class="text-xs text-muted-foreground">
            {{ t('bots.container.metricsLabels.cpu') }}
          </p>
          <p class="mt-2 text-2xl font-semibold">
            {{ cpuValueText }}
          </p>
          <p class="mt-2 text-[11px] text-muted-foreground">
            {{ t('bots.container.currentSample') }}
          </p>
        </div>
        <div class="rounded-md border bg-background/70 p-3">
          <p class="text-xs text-muted-foreground">
            {{ t('bots.container.metricsLabels.memory') }}
          </p>
          <p class="mt-2 text-2xl font-semibold">
            {{ memoryValueText }}
          </p>
          <p class="mt-2 text-[11px] text-muted-foreground">
            {{ memoryHintText }}
          </p>
        </div>
        <div class="rounded-md border bg-background/70 p-3">
          <p class="text-xs text-muted-foreground">
            {{ t('bots.container.metricsLabels.storage') }}
          </p>
          <p class="mt-2 text-2xl font-semibold">
            {{ storageValueText }}
          </p>
          <p class="mt-2 text-[11px] text-muted-foreground break-all">
            {{ t('bots.container.metricsPath') }}: {{ storagePathText }}
          </p>
        </div>
      </div>
      <p
        v-if="sampledAtText !== '-'"
        class="text-[11px] text-muted-foreground"
      >
        {{ t('bots.container.sampledAt') }}: {{ sampledAtText }}
      </p>
    </template>
  </div>
 </template>
 <script setup lang="ts">
 import { computed } from 'vue'
 import { useI18n } from 'vue-i18n'
 import { Spinner } from '@memohai/ui'
 import type { HandlersGetContainerMetricsResponse } from '@memohai/sdk'
 import { formatDateTime } from '@/utils/date-time'
 const props = defineProps<{
  backend: string
  loading: boolean
  metrics: HandlersGetContainerMetricsResponse | null
 }>()
 const { t } = useI18n()
 const status = computed(() => props.metrics?.status)
 const cpuMetrics = computed(() => props.metrics?.metrics?.cpu)
 const memoryMetrics = computed(() => props.metrics?.metrics?.memory)
 const storageMetrics = computed(() => props.metrics?.metrics?.storage)
 const backendUnsupported = computed(() =>
  props.backend !== 'containerd' || props.metrics?.supported === false,
 )
 const taskRunning = computed(() => status.value?.task_running)
 const hasAnyMetric = computed(() =>
  !!cpuMetrics.value || !!memoryMetrics.value || !!storageMetrics.value,
 )
 const cpuValueText = computed(() => formatPercent(cpuMetrics.value?.usage_percent))
 const memoryValueText = computed(() => formatBytes(memoryMetrics.value?.usage_bytes))
 const storageValueText = computed(() => formatBytes(storageMetrics.value?.used_bytes))
 const storagePathText = computed(() => storageMetrics.value?.path || '-')
 const sampledAtText = computed(() =>
  formatDateTime(props.metrics?.sampled_at, { fallback: '-' }),
 )
 const memoryHintText = computed(() => {
  const limit = memoryMetrics.value?.limit_bytes
  if (limit && limit > 0) {
    const usagePercent = formatPercent(memoryMetrics.value?.usage_percent)
    return `${formatBytes(memoryMetrics.value?.usage_bytes)} / ${formatBytes(limit)}${usagePercent === '--' ? '' : ` (${usagePercent})`}`
  }
  if (memoryMetrics.value) {
    return t('bots.container.metricsUnlimited')
  }
  return t('bots.container.metricsUnavailable')
 })
 function formatBytes(value?: number) {
  if (typeof value !== 'number' || Number.isNaN(value) || value < 0) return '--'
  if (value === 0) return '0 B'
  const units = ['B', 'KiB', 'MiB', 'GiB', 'TiB']
  let size = value
  let unitIndex = 0
  while (size >= 1024 && unitIndex < units.length - 1) {
    size /= 1024
    unitIndex += 1
  }
  const fractionDigits = size >= 100 || unitIndex === 0 ? 0 : 1
  return `${size.toFixed(fractionDigits)} ${units[unitIndex]}`
 }
 function formatPercent(value?: number) {
  if (typeof value !== 'number' || Number.isNaN(value) || value < 0) return '--'
  const fractionDigits = value >= 100 ? 0 : 1
  return `${value.toFixed(fractionDigits)}%`
 }
 </script>
@@ -0,0 +1,168 @@
 package containerd
 import (
 	"context"
 	"fmt"
 	"time"
 	cgroup1stats "github.com/containerd/cgroups/v3/cgroup1/stats"
 	cgroup2stats "github.com/containerd/cgroups/v3/cgroup2/stats"
 	containerd "github.com/containerd/containerd/v2/client"
 	"google.golang.org/protobuf/proto"
 	"google.golang.org/protobuf/types/known/anypb"
 )
 const metricsSampleInterval = 200 * time.Millisecond
 const maxPracticalMemoryLimitBytes = uint64(1) << 60
 type taskMetricsSample struct {
 	timestamp   time.Time
 	cpuUsageNS  uint64
 	cpuUserNS   uint64
 	cpuKernelNS uint64
 	memoryUsage uint64
 	memoryLimit uint64
 }
 func (s *DefaultService) GetContainerMetrics(ctx context.Context, containerID string) (ContainerMetrics, error) {
 	task, ctx, err := s.getTask(ctx, containerID)
 	if err != nil {
 		return ContainerMetrics{}, err
 	}
 	first, err := sampleTaskMetrics(ctx, task)
 	if err != nil {
 		return ContainerMetrics{}, err
 	}
 	timer := time.NewTimer(metricsSampleInterval)
 	defer timer.Stop()
 	select {
 	case <-ctx.Done():
 		return ContainerMetrics{}, ctx.Err()
 	case <-timer.C:
 	}
 	second, err := sampleTaskMetrics(ctx, task)
 	if err != nil {
 		return ContainerMetrics{}, err
 	}
 	return ContainerMetrics{
 		SampledAt: second.timestamp,
 		CPU:       buildCPUMetrics(first, second),
 		Memory:    buildMemoryMetrics(second),
 	}, nil
 }
 func sampleTaskMetrics(ctx context.Context, task containerd.Task) (taskMetricsSample, error) {
 	metric, err := task.Metrics(ctx)
 	if err != nil {
 		return taskMetricsSample{}, err
 	}
 	if metric == nil || metric.Data == nil {
 		return taskMetricsSample{}, ErrNotSupported
 	}
 	timestamp := time.Now()
 	if ts := metric.GetTimestamp(); ts != nil {
 		timestamp = ts.AsTime()
 	}
 	switch {
 	case metric.Data.MessageIs(&cgroup1stats.Metrics{}):
 		var stats cgroup1stats.Metrics
 		if err := anypb.UnmarshalTo(metric.Data, &stats, proto.UnmarshalOptions{}); err != nil {
 			return taskMetricsSample{}, fmt.Errorf("decode cgroup v1 metrics: %w", err)
 		}
 		return sampleFromCgroup1(timestamp, &stats), nil
 	case metric.Data.MessageIs(&cgroup2stats.Metrics{}):
 		var stats cgroup2stats.Metrics
 		if err := anypb.UnmarshalTo(metric.Data, &stats, proto.UnmarshalOptions{}); err != nil {
 			return taskMetricsSample{}, fmt.Errorf("decode cgroup v2 metrics: %w", err)
 		}
 		return sampleFromCgroup2(timestamp, &stats), nil
 	default:
 		msg, decodeErr := anypb.UnmarshalNew(metric.Data, proto.UnmarshalOptions{})
 		if decodeErr != nil {
 			return taskMetricsSample{}, fmt.Errorf("decode task metrics: %w", decodeErr)
 		}
 		return taskMetricsSample{}, fmt.Errorf("%w: unsupported task metrics type %T", ErrNotSupported, msg)
 	}
 }
 func sampleFromCgroup1(timestamp time.Time, stats *cgroup1stats.Metrics) taskMetricsSample {
 	sample := taskMetricsSample{timestamp: timestamp}
 	if stats == nil {
 		return sample
 	}
 	if cpu := stats.GetCPU(); cpu != nil {
 		usage := cpu.GetUsage()
 		sample.cpuUsageNS = usage.GetTotal()
 		sample.cpuUserNS = usage.GetUser()
 		sample.cpuKernelNS = usage.GetKernel()
 	}
 	if memory := stats.GetMemory(); memory != nil {
 		entry := memory.GetUsage()
 		sample.memoryUsage = entry.GetUsage()
 		sample.memoryLimit = normalizeMemoryLimit(entry.GetLimit())
 	}
 	return sample
 }
 func sampleFromCgroup2(timestamp time.Time, stats *cgroup2stats.Metrics) taskMetricsSample {
 	sample := taskMetricsSample{timestamp: timestamp}
 	if stats == nil {
 		return sample
 	}
 	if cpu := stats.GetCPU(); cpu != nil {
 		sample.cpuUsageNS = cpu.GetUsageUsec() * 1_000
 		sample.cpuUserNS = cpu.GetUserUsec() * 1_000
 		sample.cpuKernelNS = cpu.GetSystemUsec() * 1_000
 	}
 	if memory := stats.GetMemory(); memory != nil {
 		sample.memoryUsage = memory.GetUsage()
 		sample.memoryLimit = normalizeMemoryLimit(memory.GetUsageLimit())
 	}
 	return sample
 }
 func buildCPUMetrics(first, second taskMetricsSample) *CPUMetrics {
 	metrics := &CPUMetrics{
 		UsageNanoseconds:  second.cpuUsageNS,
 		UserNanoseconds:   second.cpuUserNS,
 		KernelNanoseconds: second.cpuKernelNS,
 	}
 	elapsedNS := second.timestamp.Sub(first.timestamp).Nanoseconds()
 	if elapsedNS <= 0 || second.cpuUsageNS < first.cpuUsageNS {
 		return metrics
 	}
 	metrics.UsagePercent = (float64(second.cpuUsageNS-first.cpuUsageNS) / float64(elapsedNS)) * 100
 	if metrics.UsagePercent < 0 {
 		metrics.UsagePercent = 0
 	}
 	return metrics
 }
 func buildMemoryMetrics(sample taskMetricsSample) *MemoryMetrics {
 	metrics := &MemoryMetrics{
 		UsageBytes: sample.memoryUsage,
 		LimitBytes: sample.memoryLimit,
 	}
 	if sample.memoryLimit > 0 {
 		metrics.UsagePercent = (float64(sample.memoryUsage) / float64(sample.memoryLimit)) * 100
 	}
 	return metrics
 }
 func normalizeMemoryLimit(limit uint64) uint64 {
 	if limit == 0 || limit > maxPracticalMemoryLimitBytes {
 		return 0
 	}
 	return limit
 }
@@ -0,0 +1,88 @@
 package containerd
 import (
 	"testing"
 	"time"
 	cgroup1stats "github.com/containerd/cgroups/v3/cgroup1/stats"
 	cgroup2stats "github.com/containerd/cgroups/v3/cgroup2/stats"
 )
 func TestBuildCPUMetricsUsesCumulativeDelta(t *testing.T) {
 	start := time.Unix(0, 0)
 	first := taskMetricsSample{
 		timestamp:   start,
 		cpuUsageNS:  100_000_000,
 		cpuUserNS:   60_000_000,
 		cpuKernelNS: 40_000_000,
 	}
 	second := taskMetricsSample{
 		timestamp:   start.Add(200 * time.Millisecond),
 		cpuUsageNS:  200_000_000,
 		cpuUserNS:   120_000_000,
 		cpuKernelNS: 80_000_000,
 	}
 	metrics := buildCPUMetrics(first, second)
 	if metrics == nil {
 		t.Fatal("expected cpu metrics")
 	}
 	if metrics.UsagePercent != 50 {
 		t.Fatalf("expected cpu usage percent 50, got %v", metrics.UsagePercent)
 	}
 	if metrics.UsageNanoseconds != second.cpuUsageNS {
 		t.Fatalf("expected latest cpu usage %d, got %d", second.cpuUsageNS, metrics.UsageNanoseconds)
 	}
 }
 func TestSampleFromCgroup1(t *testing.T) {
 	sample := sampleFromCgroup1(time.Unix(1, 0), &cgroup1stats.Metrics{
 		CPU: &cgroup1stats.CPUStat{
 			Usage: &cgroup1stats.CPUUsage{
 				Total:  12,
 				User:   7,
 				Kernel: 5,
 			},
 		},
 		Memory: &cgroup1stats.MemoryStat{
 			Usage: &cgroup1stats.MemoryEntry{
 				Usage: 4096,
 				Limit: 8192,
 			},
 		},
 	})
 	if sample.cpuUsageNS != 12 || sample.cpuUserNS != 7 || sample.cpuKernelNS != 5 {
 		t.Fatalf("unexpected cpu sample: %+v", sample)
 	}
 	if sample.memoryUsage != 4096 || sample.memoryLimit != 8192 {
 		t.Fatalf("unexpected memory sample: %+v", sample)
 	}
 }
 func TestSampleFromCgroup2(t *testing.T) {
 	sample := sampleFromCgroup2(time.Unix(2, 0), &cgroup2stats.Metrics{
 		CPU: &cgroup2stats.CPUStat{
 			UsageUsec:  12,
 			UserUsec:   7,
 			SystemUsec: 5,
 		},
 		Memory: &cgroup2stats.MemoryStat{
 			Usage:      16_384,
 			UsageLimit: 32_768,
 		},
 	})
 	if sample.cpuUsageNS != 12_000 || sample.cpuUserNS != 7_000 || sample.cpuKernelNS != 5_000 {
 		t.Fatalf("unexpected cpu sample: %+v", sample)
 	}
 	if sample.memoryUsage != 16_384 || sample.memoryLimit != 32_768 {
 		t.Fatalf("unexpected memory sample: %+v", sample)
 	}
 }
 func TestNormalizeMemoryLimitTreatsHugeValueAsUnlimited(t *testing.T) {
 	if got := normalizeMemoryLimit(maxPracticalMemoryLimitBytes + 1); got != 0 {
 		t.Fatalf("expected unlimited memory limit to normalize to 0, got %d", got)
 	}
 }
@@ -100,6 +100,7 @@ type Service interface {
 	StopContainer(ctx context.Context, containerID string, opts *StopTaskOptions) error
 	DeleteTask(ctx context.Context, containerID string, opts *DeleteTaskOptions) error
 	GetTaskInfo(ctx context.Context, containerID string) (TaskInfo, error)
 	GetContainerMetrics(ctx context.Context, containerID string) (ContainerMetrics, error)
 	ListTasks(ctx context.Context, opts *ListTasksOptions) ([]TaskInfo, error)
 	SetupNetwork(ctx context.Context, req NetworkSetupRequest) (NetworkResult, error)
 	RemoveNetwork(ctx context.Context, req NetworkSetupRequest) error
@@ -327,6 +327,10 @@ func (s *AppleService) GetTaskInfo(ctx context.Context, containerID string) (Tas
 	}, nil
 }
 func (*AppleService) GetContainerMetrics(context.Context, string) (ContainerMetrics, error) {
 	return ContainerMetrics{}, ErrNotSupported
 }
 func (s *AppleService) ListTasks(ctx context.Context, opts *ListTasksOptions) ([]TaskInfo, error) {
 	if err := s.ensureHealthy(ctx); err != nil {
 		return nil, err
@@ -61,6 +61,25 @@ type TaskInfo struct {
 	ExitCode    uint32
 }
 type ContainerMetrics struct {
 	SampledAt time.Time
 	CPU       *CPUMetrics
 	Memory    *MemoryMetrics
 }
 type CPUMetrics struct {
 	UsagePercent      float64
 	UsageNanoseconds  uint64
 	UserNanoseconds   uint64
 	KernelNanoseconds uint64
 }
 type MemoryMetrics struct {
 	UsageBytes   uint64
 	LimitBytes   uint64
 	UsagePercent float64
 }
 type SnapshotInfo struct {
 	Name    string
 	Parent  string
@@ -106,6 +106,44 @@ type GetContainerResponse struct {
 	UpdatedAt        time.Time `json:"updated_at"`
 }
 type ContainerMetricsStatusResponse struct {
 	Exists      bool `json:"exists"`
 	TaskRunning bool `json:"task_running"`
 }
 type ContainerCPUMetricsResponse struct {
 	UsagePercent      float64 `json:"usage_percent"`
 	UsageNanoseconds  uint64  `json:"usage_nanoseconds"`
 	UserNanoseconds   uint64  `json:"user_nanoseconds"`
 	KernelNanoseconds uint64  `json:"kernel_nanoseconds"`
 }
 type ContainerMemoryMetricsResponse struct {
 	UsageBytes   uint64  `json:"usage_bytes"`
 	LimitBytes   uint64  `json:"limit_bytes"`
 	UsagePercent float64 `json:"usage_percent"`
 }
 type ContainerStorageMetricsResponse struct {
 	Path      string `json:"path"`
 	UsedBytes uint64 `json:"used_bytes"`
 }
 type ContainerMetricsPayloadResponse struct {
 	CPU     *ContainerCPUMetricsResponse     `json:"cpu,omitempty"`
 	Memory  *ContainerMemoryMetricsResponse  `json:"memory,omitempty"`
 	Storage *ContainerStorageMetricsResponse `json:"storage,omitempty"`
 }
 type GetContainerMetricsResponse struct {
 	Supported         bool                            `json:"supported"`
 	Backend           string                          `json:"backend"`
 	UnsupportedReason string                          `json:"unsupported_reason,omitempty"`
 	Status            ContainerMetricsStatusResponse  `json:"status"`
 	Metrics           ContainerMetricsPayloadResponse `json:"metrics"`
 	SampledAt         *time.Time                      `json:"sampled_at,omitempty"`
 }
 type RollbackRequest struct {
 	Version int `json:"version"`
 }
@@ -163,6 +201,7 @@ func (h *ContainerdHandler) Register(e *echo.Echo) {
 	group := e.Group("/bots/:bot_id/container")
 	group.POST("", h.CreateContainer)
 	group.GET("", h.GetContainer)
 	group.GET("/metrics", h.GetContainerMetrics)
 	group.DELETE("", h.DeleteContainer)
 	group.POST("/start", h.StartContainer)
 	group.POST("/stop", h.StopContainer)
@@ -400,6 +439,46 @@ func (h *ContainerdHandler) GetContainer(c echo.Context) error {
 	})
 }
 // GetContainerMetrics godoc
 // @Summary Get current container metrics for bot
 // @Tags containerd
 // @Param bot_id path string true "Bot ID"
 // @Success 200 {object} GetContainerMetricsResponse
 // @Failure 500 {object} ErrorResponse
 // @Router /bots/{bot_id}/container/metrics [get].
 func (h *ContainerdHandler) GetContainerMetrics(c echo.Context) error {
 	botID, err := h.requireBotAccess(c)
 	if err != nil {
 		return err
 	}
 	metrics, err := h.manager.GetContainerMetrics(c.Request().Context(), botID)
 	if err != nil {
 		return echo.NewHTTPError(http.StatusInternalServerError, err.Error())
 	}
 	response := GetContainerMetricsResponse{
 		Supported:         metrics.Supported,
 		Backend:           h.containerBackend,
 		UnsupportedReason: metrics.UnsupportedReason,
 		Status: ContainerMetricsStatusResponse{
 			Exists:      metrics.Status.Exists,
 			TaskRunning: metrics.Status.TaskRunning,
 		},
 		Metrics: ContainerMetricsPayloadResponse{
 			CPU:     toContainerCPUMetricsResponse(metrics.CPU),
 			Memory:  toContainerMemoryMetricsResponse(metrics.Memory),
 			Storage: toContainerStorageMetricsResponse(metrics.Storage),
 		},
 	}
 	if !metrics.SampledAt.IsZero() {
 		sampledAt := metrics.SampledAt
 		response.SampledAt = &sampledAt
 	}
 	return c.JSON(http.StatusOK, response)
 }
 // DeleteContainer godoc
 // @Summary Delete MCP container for bot
 // @Tags containerd
@@ -763,6 +842,39 @@ func (h *ContainerdHandler) RestorePreservedData(c echo.Context) error {
 	return c.JSON(http.StatusOK, map[string]bool{"restored": true})
 }
 func toContainerCPUMetricsResponse(metrics *ctr.CPUMetrics) *ContainerCPUMetricsResponse {
 	if metrics == nil {
 		return nil
 	}
 	return &ContainerCPUMetricsResponse{
 		UsagePercent:      metrics.UsagePercent,
 		UsageNanoseconds:  metrics.UsageNanoseconds,
 		UserNanoseconds:   metrics.UserNanoseconds,
 		KernelNanoseconds: metrics.KernelNanoseconds,
 	}
 }
 func toContainerMemoryMetricsResponse(metrics *ctr.MemoryMetrics) *ContainerMemoryMetricsResponse {
 	if metrics == nil {
 		return nil
 	}
 	return &ContainerMemoryMetricsResponse{
 		UsageBytes:   metrics.UsageBytes,
 		LimitBytes:   metrics.LimitBytes,
 		UsagePercent: metrics.UsagePercent,
 	}
 }
 func toContainerStorageMetricsResponse(metrics *workspace.ContainerStorageMetrics) *ContainerStorageMetricsResponse {
 	if metrics == nil {
 		return nil
 	}
 	return &ContainerStorageMetricsResponse{
 		Path:      metrics.Path,
 		UsedBytes: metrics.UsedBytes,
 	}
 }
 func snapshotLineage(root string, all []ctr.SnapshotInfo) ([]ctr.SnapshotInfo, bool) {
 	root = strings.TrimSpace(root)
 	if root == "" {
@@ -51,6 +51,26 @@ type ContainerStatus struct {
 	UpdatedAt        time.Time `json:"updated_at"`
 }
 type ContainerMetricsStatus struct {
 	Exists      bool `json:"exists"`
 	TaskRunning bool `json:"task_running"`
 }
 type ContainerStorageMetrics struct {
 	Path      string `json:"path"`
 	UsedBytes uint64 `json:"used_bytes"`
 }
 type ContainerMetricsResult struct {
 	Supported         bool
 	UnsupportedReason string
 	Status            ContainerMetricsStatus
 	SampledAt         time.Time
 	CPU               *ctr.CPUMetrics
 	Memory            *ctr.MemoryMetrics
 	Storage           *ContainerStorageMetrics
 }
 type Manager struct {
 	service         ctr.Service
 	cfg             config.WorkspaceConfig
@@ -111,6 +111,10 @@ func (*legacyRouteTestService) GetTaskInfo(context.Context, string) (ctr.TaskInf
 	return ctr.TaskInfo{}, errdefs.ErrNotFound
 }
 func (*legacyRouteTestService) GetContainerMetrics(context.Context, string) (ctr.ContainerMetrics, error) {
 	return ctr.ContainerMetrics{}, ctr.ErrNotSupported
 }
 func (*legacyRouteTestService) ListTasks(context.Context, *ctr.ListTasksOptions) ([]ctr.TaskInfo, error) {
 	return nil, nil
 }
@@ -0,0 +1,142 @@
 package workspace
 import (
 	"context"
 	"errors"
 	"io/fs"
 	"os"
 	"path/filepath"
 	"time"
 	"github.com/containerd/containerd/v2/core/mount"
 	"github.com/containerd/errdefs"
 	ctr "github.com/memohai/memoh/internal/containerd"
 )
 const unsupportedReasonBackend = "backend_not_supported"
 func (m *Manager) GetContainerMetrics(ctx context.Context, botID string) (*ContainerMetricsResult, error) {
 	result := &ContainerMetricsResult{
 		Supported: true,
 		Status: ContainerMetricsStatus{
 			Exists: false,
 		},
 	}
 	containerID, err := m.ContainerID(ctx, botID)
 	if err != nil {
 		if errors.Is(err, ErrContainerNotFound) {
 			return result, nil
 		}
 		return nil, err
 	}
 	info, err := m.service.GetContainer(ctx, containerID)
 	if err != nil {
 		if errdefs.IsNotFound(err) {
 			return result, nil
 		}
 		return nil, err
 	}
 	result.Status.Exists = true
 	taskInfo, err := m.service.GetTaskInfo(ctx, containerID)
 	if err == nil {
 		result.Status.TaskRunning = taskInfo.Status == ctr.TaskStatusRunning
 	} else if !errdefs.IsNotFound(err) {
 		return nil, err
 	}
 	runtimeMetrics, err := m.service.GetContainerMetrics(ctx, containerID)
 	switch {
 	case err == nil:
 		result.CPU = runtimeMetrics.CPU
 		result.Memory = runtimeMetrics.Memory
 		result.SampledAt = runtimeMetrics.SampledAt
 	case errors.Is(err, ctr.ErrNotSupported):
 		result.Supported = false
 		result.UnsupportedReason = unsupportedReasonBackend
 	case errdefs.IsNotFound(err):
 		// Task is not running, so CPU and memory metrics are unavailable.
 	default:
 		return nil, err
 	}
 	if result.Supported {
 		storage, err := m.collectStorageMetrics(ctx, info)
 		if err != nil {
 			if errors.Is(err, ctr.ErrNotSupported) {
 				return result, nil
 			}
 			return nil, err
 		}
 		result.Storage = storage
 		if result.SampledAt.IsZero() {
 			result.SampledAt = time.Now()
 		}
 	}
 	return result, nil
 }
 func (m *Manager) collectStorageMetrics(ctx context.Context, info ctr.ContainerInfo) (*ContainerStorageMetrics, error) {
 	mounts, err := m.snapshotMounts(ctx, info)
 	if err != nil {
 		if errors.Is(err, errMountNotSupported) {
 			return nil, ctr.ErrNotSupported
 		}
 		return nil, err
 	}
 	var usedBytes uint64
 	if err := mount.WithReadonlyTempMount(ctx, mounts, func(root string) error {
 		if _, statErr := os.Stat(root); statErr != nil {
 			if os.IsNotExist(statErr) {
 				return nil
 			}
 			return statErr
 		}
 		size, sizeErr := dirSize(root)
 		if sizeErr != nil {
 			return sizeErr
 		}
 		usedBytes = size
 		return nil
 	}); err != nil {
 		return nil, err
 	}
 	return &ContainerStorageMetrics{
 		Path:      "/",
 		UsedBytes: usedBytes,
 	}, nil
 }
 func dirSize(root string) (uint64, error) {
 	var size uint64
 	err := filepath.WalkDir(root, func(_ string, entry fs.DirEntry, walkErr error) error {
 		if walkErr != nil {
 			return walkErr
 		}
 		if entry.IsDir() {
 			return nil
 		}
 		info, err := entry.Info()
 		if err != nil {
 			return err
 		}
 		fileSize := info.Size()
 		if fileSize > 0 {
 			size += uint64(fileSize) //nolint:gosec // file sizes are checked to be positive before conversion
 		}
 		return nil
 	})
 	if err != nil {
 		return 0, err
 	}
 	return size, nil
 }
@@ -882,10 +882,39 @@ export type HandlersChannelMeta = {
    user_config_schema?: ChannelConfigSchema;
 };
 export type HandlersContainerCpuMetricsResponse = {
    kernel_nanoseconds?: number;
    usage_nanoseconds?: number;
    usage_percent?: number;
    user_nanoseconds?: number;
 };
 export type HandlersContainerGpuRequest = {
    devices?: Array<string>;
 };
 export type HandlersContainerMemoryMetricsResponse = {
    limit_bytes?: number;
    usage_bytes?: number;
    usage_percent?: number;
 };
 export type HandlersContainerMetricsPayloadResponse = {
    cpu?: HandlersContainerCpuMetricsResponse;
    memory?: HandlersContainerMemoryMetricsResponse;
    storage?: HandlersContainerStorageMetricsResponse;
 };
 export type HandlersContainerMetricsStatusResponse = {
    exists?: boolean;
    task_running?: boolean;
 };
 export type HandlersContainerStorageMetricsResponse = {
    path?: string;
    used_bytes?: number;
 };
 export type HandlersContextUsage = {
    context_window?: number;
    used_tokens?: number;
@@ -979,6 +1008,15 @@ export type HandlersFsWriteRequest = {
    path?: string;
 };
 export type HandlersGetContainerMetricsResponse = {
    backend?: string;
    metrics?: HandlersContainerMetricsPayloadResponse;
    sampled_at?: string;
    status?: HandlersContainerMetricsStatusResponse;
    supported?: boolean;
    unsupported_reason?: string;
 };
 export type HandlersGetContainerResponse = {
    cdi_devices?: Array<string>;
    container_id?: string;
@@ -3038,6 +3076,36 @@ export type PostBotsByBotIdContainerFsWriteResponses = {
 export type PostBotsByBotIdContainerFsWriteResponse = PostBotsByBotIdContainerFsWriteResponses[keyof PostBotsByBotIdContainerFsWriteResponses];
 export type GetBotsByBotIdContainerMetricsData = {
    body?: never;
    path: {
        /**
         * Bot ID
         */
        bot_id: string;
    };
    query?: never;
    url: '/bots/{bot_id}/container/metrics';
 };
 export type GetBotsByBotIdContainerMetricsErrors = {
    /**
     * Internal Server Error
     */
    500: HandlersErrorResponse;
 };
 export type GetBotsByBotIdContainerMetricsError = GetBotsByBotIdContainerMetricsErrors[keyof GetBotsByBotIdContainerMetricsErrors];
 export type GetBotsByBotIdContainerMetricsResponses = {
    /**
     * OK
     */
    200: HandlersGetContainerMetricsResponse;
 };
 export type GetBotsByBotIdContainerMetricsResponse = GetBotsByBotIdContainerMetricsResponses[keyof GetBotsByBotIdContainerMetricsResponses];
 export type DeleteBotsByBotIdContainerSkillsData = {
    /**
     * Delete skills payload
@@ -1503,6 +1503,37 @@ const docTemplate = `{
                }
            }
        },
        "/bots/{bot_id}/container/metrics": {
            "get": {
                "tags": [
                    "containerd"
                ],
                "summary": "Get current container metrics for bot",
                "parameters": [
                    {
                        "type": "string",
                        "description": "Bot ID",
                        "name": "bot_id",
                        "in": "path",
                        "required": true
                    }
                ],
                "responses": {
                    "200": {
                        "description": "OK",
                        "schema": {
                            "$ref": "#/definitions/handlers.GetContainerMetricsResponse"
                        }
                    },
                    "500": {
                        "description": "Internal Server Error",
                        "schema": {
                            "$ref": "#/definitions/handlers.ErrorResponse"
                        }
                    }
                }
            }
        },
        "/bots/{bot_id}/container/skills": {
            "get": {
                "tags": [
@@ -11653,6 +11684,23 @@ const docTemplate = `{
                }
            }
        },
        "handlers.ContainerCPUMetricsResponse": {
            "type": "object",
            "properties": {
                "kernel_nanoseconds": {
                    "type": "integer"
                },
                "usage_nanoseconds": {
                    "type": "integer"
                },
                "usage_percent": {
                    "type": "number"
                },
                "user_nanoseconds": {
                    "type": "integer"
                }
            }
        },
        "handlers.ContainerGPURequest": {
            "type": "object",
            "properties": {
@@ -11664,6 +11712,56 @@ const docTemplate = `{
                }
            }
        },
        "handlers.ContainerMemoryMetricsResponse": {
            "type": "object",
            "properties": {
                "limit_bytes": {
                    "type": "integer"
                },
                "usage_bytes": {
                    "type": "integer"
                },
                "usage_percent": {
                    "type": "number"
                }
            }
        },
        "handlers.ContainerMetricsPayloadResponse": {
            "type": "object",
            "properties": {
                "cpu": {
                    "$ref": "#/definitions/handlers.ContainerCPUMetricsResponse"
                },
                "memory": {
                    "$ref": "#/definitions/handlers.ContainerMemoryMetricsResponse"
                },
                "storage": {
                    "$ref": "#/definitions/handlers.ContainerStorageMetricsResponse"
                }
            }
        },
        "handlers.ContainerMetricsStatusResponse": {
            "type": "object",
            "properties": {
                "exists": {
                    "type": "boolean"
                },
                "task_running": {
                    "type": "boolean"
                }
            }
        },
        "handlers.ContainerStorageMetricsResponse": {
            "type": "object",
            "properties": {
                "path": {
                    "type": "string"
                },
                "used_bytes": {
                    "type": "integer"
                }
            }
        },
        "handlers.ContextUsage": {
            "type": "object",
            "properties": {
@@ -11889,6 +11987,29 @@ const docTemplate = `{
                }
            }
        },
        "handlers.GetContainerMetricsResponse": {
            "type": "object",
            "properties": {
                "backend": {
                    "type": "string"
                },
                "metrics": {
                    "$ref": "#/definitions/handlers.ContainerMetricsPayloadResponse"
                },
                "sampled_at": {
                    "type": "string"
                },
                "status": {
                    "$ref": "#/definitions/handlers.ContainerMetricsStatusResponse"
                },
                "supported": {
                    "type": "boolean"
                },
                "unsupported_reason": {
                    "type": "string"
                }
            }
        },
        "handlers.GetContainerResponse": {
            "type": "object",
            "properties": {
@@ -1494,6 +1494,37 @@
                }
            }
        },
        "/bots/{bot_id}/container/metrics": {
            "get": {
                "tags": [
                    "containerd"
                ],
                "summary": "Get current container metrics for bot",
                "parameters": [
                    {
                        "type": "string",
                        "description": "Bot ID",
                        "name": "bot_id",
                        "in": "path",
                        "required": true
                    }
                ],
                "responses": {
                    "200": {
                        "description": "OK",
                        "schema": {
                            "$ref": "#/definitions/handlers.GetContainerMetricsResponse"
                        }
                    },
                    "500": {
                        "description": "Internal Server Error",
                        "schema": {
                            "$ref": "#/definitions/handlers.ErrorResponse"
                        }
                    }
                }
            }
        },
        "/bots/{bot_id}/container/skills": {
            "get": {
                "tags": [
@@ -11644,6 +11675,23 @@
                }
            }
        },
        "handlers.ContainerCPUMetricsResponse": {
            "type": "object",
            "properties": {
                "kernel_nanoseconds": {
                    "type": "integer"
                },
                "usage_nanoseconds": {
                    "type": "integer"
                },
                "usage_percent": {
                    "type": "number"
                },
                "user_nanoseconds": {
                    "type": "integer"
                }
            }
        },
        "handlers.ContainerGPURequest": {
            "type": "object",
            "properties": {
@@ -11655,6 +11703,56 @@
                }
            }
        },
        "handlers.ContainerMemoryMetricsResponse": {
            "type": "object",
            "properties": {
                "limit_bytes": {
                    "type": "integer"
                },
                "usage_bytes": {
                    "type": "integer"
                },
                "usage_percent": {
                    "type": "number"
                }
            }
        },
        "handlers.ContainerMetricsPayloadResponse": {
            "type": "object",
            "properties": {
                "cpu": {
                    "$ref": "#/definitions/handlers.ContainerCPUMetricsResponse"
                },
                "memory": {
                    "$ref": "#/definitions/handlers.ContainerMemoryMetricsResponse"
                },
                "storage": {
                    "$ref": "#/definitions/handlers.ContainerStorageMetricsResponse"
                }
            }
        },
        "handlers.ContainerMetricsStatusResponse": {
            "type": "object",
            "properties": {
                "exists": {
                    "type": "boolean"
                },
                "task_running": {
                    "type": "boolean"
                }
            }
        },
        "handlers.ContainerStorageMetricsResponse": {
            "type": "object",
            "properties": {
                "path": {
                    "type": "string"
                },
                "used_bytes": {
                    "type": "integer"
                }
            }
        },
        "handlers.ContextUsage": {
            "type": "object",
            "properties": {
@@ -11880,6 +11978,29 @@
                }
            }
        },
        "handlers.GetContainerMetricsResponse": {
            "type": "object",
            "properties": {
                "backend": {
                    "type": "string"
                },
                "metrics": {
                    "$ref": "#/definitions/handlers.ContainerMetricsPayloadResponse"
                },
                "sampled_at": {
                    "type": "string"
                },
                "status": {
                    "$ref": "#/definitions/handlers.ContainerMetricsStatusResponse"
                },
                "supported": {
                    "type": "boolean"
                },
                "unsupported_reason": {
                    "type": "string"
                }
            }
        },
        "handlers.GetContainerResponse": {
            "type": "object",
            "properties": {
@@ -1486,6 +1486,17 @@ definitions:
      user_config_schema:
        $ref: '#/definitions/channel.ConfigSchema'
    type: object
  handlers.ContainerCPUMetricsResponse:
    properties:
      kernel_nanoseconds:
        type: integer
      usage_nanoseconds:
        type: integer
      usage_percent:
        type: number
      user_nanoseconds:
        type: integer
    type: object
  handlers.ContainerGPURequest:
    properties:
      devices:
@@ -1493,6 +1504,38 @@ definitions:
          type: string
        type: array
    type: object
  handlers.ContainerMemoryMetricsResponse:
    properties:
      limit_bytes:
        type: integer
      usage_bytes:
        type: integer
      usage_percent:
        type: number
    type: object
  handlers.ContainerMetricsPayloadResponse:
    properties:
      cpu:
        $ref: '#/definitions/handlers.ContainerCPUMetricsResponse'
      memory:
        $ref: '#/definitions/handlers.ContainerMemoryMetricsResponse'
      storage:
        $ref: '#/definitions/handlers.ContainerStorageMetricsResponse'
    type: object
  handlers.ContainerMetricsStatusResponse:
    properties:
      exists:
        type: boolean
      task_running:
        type: boolean
    type: object
  handlers.ContainerStorageMetricsResponse:
    properties:
      path:
        type: string
      used_bytes:
        type: integer
    type: object
  handlers.ContextUsage:
    properties:
      context_window:
@@ -1638,6 +1681,21 @@ definitions:
      path:
        type: string
    type: object
  handlers.GetContainerMetricsResponse:
    properties:
      backend:
        type: string
      metrics:
        $ref: '#/definitions/handlers.ContainerMetricsPayloadResponse'
      sampled_at:
        type: string
      status:
        $ref: '#/definitions/handlers.ContainerMetricsStatusResponse'
      supported:
        type: boolean
      unsupported_reason:
        type: string
    type: object
  handlers.GetContainerResponse:
    properties:
      cdi_devices:
@@ -4037,6 +4095,26 @@ paths:
      summary: Write text content to a file
      tags:
      - containerd
  /bots/{bot_id}/container/metrics:
    get:
      parameters:
      - description: Bot ID
        in: path
        name: bot_id
        required: true
        type: string
      responses:
        "200":
          description: OK
          schema:
            $ref: '#/definitions/handlers.GetContainerMetricsResponse'
        "500":
          description: Internal Server Error
          schema:
            $ref: '#/definitions/handlers.ErrorResponse'
      summary: Get current container metrics for bot
      tags:
      - containerd
  /bots/{bot_id}/container/skills:
    delete:
      parameters: