Files
Memoh/internal/handlers/containerd.go
T
Acbox e4aca0db13 feat(container): add current container metrics view
Expose a dedicated container metrics endpoint and surface current CPU, memory, and root filesystem usage in the bot container view. This gives operators a quick health snapshot while degrading cleanly on unsupported backends.
2026-04-24 15:10:47 +08:00

954 lines
31 KiB
Go

package handlers
import (
"bufio"
"context"
"encoding/json"
"errors"
"fmt"
"io"
"log/slog"
"net/http"
"sort"
"strings"
"sync"
"sync/atomic"
"time"
"github.com/containerd/errdefs"
"github.com/labstack/echo/v4"
"github.com/memohai/memoh/internal/accounts"
"github.com/memohai/memoh/internal/bots"
"github.com/memohai/memoh/internal/config"
ctr "github.com/memohai/memoh/internal/containerd"
"github.com/memohai/memoh/internal/mcp"
"github.com/memohai/memoh/internal/policy"
"github.com/memohai/memoh/internal/workspace"
)
type ContainerdHandler struct {
manager *workspace.Manager
cfg config.WorkspaceConfig
containerBackend string
logger *slog.Logger
toolGateway *mcp.ToolGatewayService
mcpSess map[string]*mcpSession
mcpStdioMu sync.Mutex
mcpStdioSess map[string]*mcpStdioSession
botService *bots.Service
accountService *accounts.Service
policyService *policy.Service
}
type ContainerGPURequest struct {
Devices []string `json:"devices,omitempty"`
}
type CreateContainerRequest struct {
Snapshotter string `json:"snapshotter,omitempty"`
RestoreData bool `json:"restore_data,omitempty"`
Image string `json:"image,omitempty"`
GPU *ContainerGPURequest `json:"gpu,omitempty"`
}
type CreateContainerResponse struct {
ContainerID string `json:"container_id"`
Image string `json:"image"`
Snapshotter string `json:"snapshotter"`
CDIDevices []string `json:"cdi_devices,omitempty"`
Started bool `json:"started"`
DataRestored bool `json:"data_restored"`
HasPreservedData bool `json:"has_preserved_data"`
}
// codesync(container-create-stream): keep these SSE payloads in sync with
// packages/sdk/src/container-stream.ts.
type createContainerPullingEvent struct {
Type string `json:"type"`
Image string `json:"image"`
}
type createContainerPullProgressEvent struct {
Type string `json:"type"`
Layers []ctr.LayerStatus `json:"layers"`
}
type createContainerCreatingEvent struct {
Type string `json:"type"`
}
type createContainerCompleteEvent struct {
Type string `json:"type"`
Container CreateContainerResponse `json:"container"`
}
type createContainerRestoringEvent struct {
Type string `json:"type"`
}
type createContainerErrorEvent struct {
Type string `json:"type"`
Message string `json:"message"`
}
type GetContainerResponse struct {
ContainerID string `json:"container_id"`
Image string `json:"image"`
Status string `json:"status"`
Namespace string `json:"namespace"`
ContainerPath string `json:"container_path"`
CDIDevices []string `json:"cdi_devices,omitempty"`
TaskRunning bool `json:"task_running"`
HasPreservedData bool `json:"has_preserved_data"`
Legacy bool `json:"legacy"`
CreatedAt time.Time `json:"created_at"`
UpdatedAt time.Time `json:"updated_at"`
}
type ContainerMetricsStatusResponse struct {
Exists bool `json:"exists"`
TaskRunning bool `json:"task_running"`
}
type ContainerCPUMetricsResponse struct {
UsagePercent float64 `json:"usage_percent"`
UsageNanoseconds uint64 `json:"usage_nanoseconds"`
UserNanoseconds uint64 `json:"user_nanoseconds"`
KernelNanoseconds uint64 `json:"kernel_nanoseconds"`
}
type ContainerMemoryMetricsResponse struct {
UsageBytes uint64 `json:"usage_bytes"`
LimitBytes uint64 `json:"limit_bytes"`
UsagePercent float64 `json:"usage_percent"`
}
type ContainerStorageMetricsResponse struct {
Path string `json:"path"`
UsedBytes uint64 `json:"used_bytes"`
}
type ContainerMetricsPayloadResponse struct {
CPU *ContainerCPUMetricsResponse `json:"cpu,omitempty"`
Memory *ContainerMemoryMetricsResponse `json:"memory,omitempty"`
Storage *ContainerStorageMetricsResponse `json:"storage,omitempty"`
}
type GetContainerMetricsResponse struct {
Supported bool `json:"supported"`
Backend string `json:"backend"`
UnsupportedReason string `json:"unsupported_reason,omitempty"`
Status ContainerMetricsStatusResponse `json:"status"`
Metrics ContainerMetricsPayloadResponse `json:"metrics"`
SampledAt *time.Time `json:"sampled_at,omitempty"`
}
type RollbackRequest struct {
Version int `json:"version"`
}
type CreateSnapshotRequest struct {
SnapshotName string `json:"snapshot_name"`
}
type CreateSnapshotResponse struct {
ContainerID string `json:"container_id"`
SnapshotName string `json:"snapshot_name"`
RuntimeSnapshotName string `json:"runtime_snapshot_name"`
DisplayName string `json:"display_name"`
Snapshotter string `json:"snapshotter"`
Version int `json:"version"`
Source string `json:"source"`
}
type SnapshotInfo struct {
Snapshotter string `json:"snapshotter"`
Name string `json:"name"`
DisplayName string `json:"display_name,omitempty"`
RuntimeName string `json:"runtime_snapshot_name"`
Parent string `json:"parent,omitempty"`
Kind string `json:"kind"`
CreatedAt time.Time `json:"created_at,omitempty"`
UpdatedAt time.Time `json:"updated_at,omitempty"`
Labels map[string]string `json:"labels,omitempty"`
Source string `json:"source"`
Managed bool `json:"managed"`
Version *int `json:"version,omitempty"`
}
type ListSnapshotsResponse struct {
Snapshotter string `json:"snapshotter"`
Snapshots []SnapshotInfo `json:"snapshots"`
}
func NewContainerdHandler(log *slog.Logger, manager *workspace.Manager, cfg config.WorkspaceConfig, containerBackend string, botService *bots.Service, accountService *accounts.Service, policyService *policy.Service) *ContainerdHandler {
h := &ContainerdHandler{
manager: manager,
cfg: cfg,
containerBackend: containerBackend,
logger: log.With(slog.String("handler", "containerd")),
mcpSess: make(map[string]*mcpSession),
mcpStdioSess: make(map[string]*mcpStdioSession),
botService: botService,
accountService: accountService,
policyService: policyService,
}
return h
}
func (h *ContainerdHandler) Register(e *echo.Echo) {
group := e.Group("/bots/:bot_id/container")
group.POST("", h.CreateContainer)
group.GET("", h.GetContainer)
group.GET("/metrics", h.GetContainerMetrics)
group.DELETE("", h.DeleteContainer)
group.POST("/start", h.StartContainer)
group.POST("/stop", h.StopContainer)
group.POST("/snapshots", h.CreateSnapshot)
group.GET("/snapshots", h.ListSnapshots)
group.POST("/snapshots/rollback", h.RollbackSnapshot)
group.POST("/data/export", h.ExportContainerData)
group.POST("/data/import", h.ImportContainerData)
group.POST("/data/restore", h.RestorePreservedData)
group.GET("/skills", h.ListSkills)
group.POST("/skills", h.UpsertSkills)
group.DELETE("/skills", h.DeleteSkills)
group.POST("/skills/actions", h.ApplySkillAction)
// Terminal routes
group.GET("/terminal", h.GetTerminalInfo)
group.GET("/terminal/ws", h.HandleTerminalWS)
// File manager routes
group.GET("/fs", h.FSStat)
group.GET("/fs/list", h.FSList)
group.GET("/fs/read", h.FSRead)
group.GET("/fs/download", h.FSDownload)
group.POST("/fs/write", h.FSWrite)
group.POST("/fs/upload", h.FSUpload)
group.POST("/fs/mkdir", h.FSMkdir)
group.POST("/fs/delete", h.FSDelete)
group.POST("/fs/rename", h.FSRename)
root := e.Group("/bots/:bot_id")
root.POST("/mcp-stdio", h.CreateMCPStdio)
root.POST("/mcp-stdio/:connection_id", h.HandleMCPStdio)
root.POST("/tools", h.HandleMCPTools)
}
// CreateContainer godoc
// @Summary Create and start MCP container for bot
// @Tags containerd
// @Param bot_id path string true "Bot ID"
// @Param payload body CreateContainerRequest true "Create container payload"
// @Success 200 {object} CreateContainerResponse "SSE stream of container creation events"
// @Failure 400 {object} ErrorResponse
// @Failure 500 {object} ErrorResponse
// @Router /bots/{bot_id}/container [post].
func (h *ContainerdHandler) CreateContainer(c echo.Context) error {
botID, err := h.requireBotAccess(c)
if err != nil {
return err
}
var req CreateContainerRequest
if err := c.Bind(&req); err != nil {
return echo.NewHTTPError(http.StatusBadRequest, err.Error())
}
// Image override lets administrators specify a custom base image.
// NOTE(saas): if this becomes a multi-tenant SaaS, image override must be
// validated against an allowlist to prevent SSRF and resource abuse.
ctx := c.Request().Context()
imageOverride := strings.TrimSpace(req.Image)
image, err := h.manager.ResolveWorkspaceImage(ctx, botID)
if err != nil {
h.logger.Error("resolve workspace image failed",
slog.String("bot_id", botID), slog.Any("error", err))
return nil
}
gpu, err := h.manager.ResolveWorkspaceGPU(ctx, botID)
if err != nil {
h.logger.Error("resolve workspace gpu failed",
slog.String("bot_id", botID), slog.Any("error", err))
return nil
}
if imageOverride != "" {
image = config.NormalizeImageRef(imageOverride)
}
if req.GPU != nil {
gpu = workspace.WorkspaceGPUConfig{Devices: req.GPU.Devices}
}
snapshotter := strings.TrimSpace(req.Snapshotter)
if snapshotter == "" {
snapshotter = h.cfg.Snapshotter
}
flusher, ok := c.Response().Writer.(http.Flusher)
if !ok {
return echo.NewHTTPError(http.StatusInternalServerError, "streaming not supported")
}
c.Response().Header().Set(echo.HeaderContentType, "text/event-stream")
c.Response().Header().Set(echo.HeaderCacheControl, "no-cache")
c.Response().Header().Set(echo.HeaderConnection, "keep-alive")
c.Response().WriteHeader(http.StatusOK)
writer := bufio.NewWriter(c.Response().Writer)
var mu sync.Mutex
send := func(payload any) {
mu.Lock()
defer mu.Unlock()
data, err := json.Marshal(payload)
if err != nil {
return
}
_ = writeSSEData(writer, flusher, string(data))
}
sendError := func(msg string) {
send(createContainerErrorEvent{Type: "error", Message: msg})
}
// Phase 1: Pull image with progress
send(createContainerPullingEvent{Type: "pulling", Image: image})
var pullDone atomic.Bool
_, pullErr := h.manager.PullImage(ctx, image, &ctr.PullImageOptions{
Unpack: true,
Snapshotter: snapshotter,
OnProgress: func(p ctr.PullProgress) {
if pullDone.Load() {
return
}
send(createContainerPullProgressEvent{Type: "pull_progress", Layers: p.Layers})
},
})
pullDone.Store(true)
if pullErr != nil {
h.logger.Error("image pull failed",
slog.String("image", image), slog.Any("error", pullErr))
sendError("image pull failed: " + pullErr.Error())
return nil
}
// Phase 2: Create container (image is local, should be fast)
send(createContainerCreatingEvent{Type: "creating"})
// Notify the client before starting if data migration will happen,
// since restoring a large /data volume can take a while.
if h.manager.HasPreservedData(botID) {
send(createContainerRestoringEvent{Type: "restoring"})
}
if err := h.manager.StartWithResolvedConfig(ctx, botID, image, gpu); err != nil {
h.logger.Error("container start failed",
slog.String("bot_id", botID), slog.Any("error", err))
sendError("container start failed: " + err.Error())
return nil
}
if err := h.manager.RememberWorkspaceImage(ctx, botID, image); err != nil {
h.logger.Warn("remember workspace image failed",
slog.String("bot_id", botID), slog.String("image", image), slog.Any("error", err))
}
if req.GPU != nil {
if err := h.manager.RememberWorkspaceGPU(ctx, botID, gpu); err != nil {
h.logger.Warn("remember workspace gpu failed",
slog.String("bot_id", botID), slog.Any("error", err))
}
}
containerID, err := h.manager.ContainerID(ctx, botID)
if err != nil {
h.logger.Error("container ID resolution failed after start",
slog.String("bot_id", botID), slog.Any("error", err))
sendError("container ID resolution failed: " + err.Error())
return nil
}
dataRestored := false
if req.RestoreData && h.manager.HasPreservedData(botID) {
if err := h.manager.RestorePreservedData(ctx, botID); err != nil {
h.logger.Error("restore preserved data failed",
slog.String("bot_id", botID), slog.Any("error", err))
sendError("restore preserved data failed: " + err.Error())
return nil
}
dataRestored = true
}
h.manager.RecordContainerRunning(ctx, botID, containerID, image)
status, statusErr := h.manager.GetContainerInfo(ctx, botID)
if statusErr != nil {
h.logger.Warn("load container status after start failed",
slog.String("bot_id", botID), slog.Any("error", statusErr))
}
cdiDevices := gpu.Devices
if status != nil {
cdiDevices = status.CDIDevices
}
// Phase 3: Complete
send(createContainerCompleteEvent{
Type: "complete",
Container: CreateContainerResponse{
ContainerID: containerID,
Image: image,
Snapshotter: snapshotter,
CDIDevices: cdiDevices,
Started: true,
DataRestored: dataRestored,
HasPreservedData: h.manager.HasPreservedData(botID),
},
})
return nil
}
// GetContainer godoc
// @Summary Get container info for bot
// @Tags containerd
// @Param bot_id path string true "Bot ID"
// @Success 200 {object} GetContainerResponse
// @Failure 404 {object} ErrorResponse
// @Failure 500 {object} ErrorResponse
// @Router /bots/{bot_id}/container [get].
func (h *ContainerdHandler) GetContainer(c echo.Context) error {
botID, err := h.requireBotAccess(c)
if err != nil {
return err
}
status, err := h.manager.GetContainerInfo(c.Request().Context(), botID)
if err != nil {
if errors.Is(err, workspace.ErrContainerNotFound) {
return echo.NewHTTPError(http.StatusNotFound, "container not found for bot")
}
return echo.NewHTTPError(http.StatusInternalServerError, err.Error())
}
return c.JSON(http.StatusOK, GetContainerResponse{
ContainerID: status.ContainerID,
Image: status.Image,
Status: status.Status,
Namespace: status.Namespace,
ContainerPath: status.ContainerPath,
CDIDevices: status.CDIDevices,
TaskRunning: status.TaskRunning,
HasPreservedData: status.HasPreservedData,
Legacy: status.Legacy,
CreatedAt: status.CreatedAt,
UpdatedAt: status.UpdatedAt,
})
}
// GetContainerMetrics godoc
// @Summary Get current container metrics for bot
// @Tags containerd
// @Param bot_id path string true "Bot ID"
// @Success 200 {object} GetContainerMetricsResponse
// @Failure 500 {object} ErrorResponse
// @Router /bots/{bot_id}/container/metrics [get].
func (h *ContainerdHandler) GetContainerMetrics(c echo.Context) error {
botID, err := h.requireBotAccess(c)
if err != nil {
return err
}
metrics, err := h.manager.GetContainerMetrics(c.Request().Context(), botID)
if err != nil {
return echo.NewHTTPError(http.StatusInternalServerError, err.Error())
}
response := GetContainerMetricsResponse{
Supported: metrics.Supported,
Backend: h.containerBackend,
UnsupportedReason: metrics.UnsupportedReason,
Status: ContainerMetricsStatusResponse{
Exists: metrics.Status.Exists,
TaskRunning: metrics.Status.TaskRunning,
},
Metrics: ContainerMetricsPayloadResponse{
CPU: toContainerCPUMetricsResponse(metrics.CPU),
Memory: toContainerMemoryMetricsResponse(metrics.Memory),
Storage: toContainerStorageMetricsResponse(metrics.Storage),
},
}
if !metrics.SampledAt.IsZero() {
sampledAt := metrics.SampledAt
response.SampledAt = &sampledAt
}
return c.JSON(http.StatusOK, response)
}
// DeleteContainer godoc
// @Summary Delete MCP container for bot
// @Tags containerd
// @Param bot_id path string true "Bot ID"
// @Param preserve_data query bool false "Export /data before deletion"
// @Success 204
// @Failure 404 {object} ErrorResponse
// @Failure 500 {object} ErrorResponse
// @Router /bots/{bot_id}/container [delete].
func (h *ContainerdHandler) DeleteContainer(c echo.Context) error {
botID, err := h.requireBotAccess(c)
if err != nil {
return err
}
preserveData := c.QueryParam("preserve_data") == "true"
if err := h.manager.CleanupBotContainer(c.Request().Context(), botID, preserveData); err != nil {
return echo.NewHTTPError(http.StatusInternalServerError, err.Error())
}
return c.NoContent(http.StatusNoContent)
}
// StartContainer godoc
// @Summary Start container task for bot
// @Tags containerd
// @Param bot_id path string true "Bot ID"
// @Success 200 {object} object
// @Failure 404 {object} ErrorResponse
// @Failure 500 {object} ErrorResponse
// @Router /bots/{bot_id}/container/start [post].
func (h *ContainerdHandler) StartContainer(c echo.Context) error {
botID, err := h.requireBotAccess(c)
if err != nil {
return err
}
if err := h.manager.EnsureRunning(c.Request().Context(), botID); err != nil {
if errors.Is(err, workspace.ErrContainerNotFound) {
return echo.NewHTTPError(http.StatusNotFound, "container not found for bot")
}
return echo.NewHTTPError(http.StatusInternalServerError, err.Error())
}
return c.JSON(http.StatusOK, map[string]bool{"started": true})
}
// StopContainer godoc
// @Summary Stop container task for bot
// @Tags containerd
// @Param bot_id path string true "Bot ID"
// @Success 200 {object} object
// @Failure 404 {object} ErrorResponse
// @Failure 500 {object} ErrorResponse
// @Router /bots/{bot_id}/container/stop [post].
func (h *ContainerdHandler) StopContainer(c echo.Context) error {
botID, err := h.requireBotAccess(c)
if err != nil {
return err
}
if err := h.manager.StopBot(c.Request().Context(), botID); err != nil {
if errors.Is(err, workspace.ErrContainerNotFound) {
return echo.NewHTTPError(http.StatusNotFound, "container not found for bot")
}
return echo.NewHTTPError(http.StatusInternalServerError, err.Error())
}
return c.JSON(http.StatusOK, map[string]bool{"stopped": true})
}
// CreateSnapshot godoc
// @Summary Create container snapshot for bot
// @Tags containerd
// @Param bot_id path string true "Bot ID"
// @Param payload body CreateSnapshotRequest true "Create snapshot payload"
// @Success 200 {object} CreateSnapshotResponse
// @Failure 404 {object} ErrorResponse
// @Failure 500 {object} ErrorResponse
// @Failure 501 {object} ErrorResponse "Snapshots currently not supported on this backend"
// @Router /bots/{bot_id}/container/snapshots [post].
func (h *ContainerdHandler) CreateSnapshot(c echo.Context) error {
if h.containerBackend == "apple" {
return echo.NewHTTPError(http.StatusNotImplemented, "snapshots currently not supported on Apple Container backend")
}
botID, err := h.requireBotAccess(c)
if err != nil {
return err
}
if h.manager == nil {
return echo.NewHTTPError(http.StatusInternalServerError, "snapshot manager not configured")
}
var req CreateSnapshotRequest
if err := c.Bind(&req); err != nil {
return echo.NewHTTPError(http.StatusBadRequest, err.Error())
}
created, err := h.manager.CreateSnapshot(c.Request().Context(), botID, req.SnapshotName, workspace.SnapshotSourceManual)
if err != nil {
if errdefs.IsNotFound(err) {
return echo.NewHTTPError(http.StatusNotFound, "container not found")
}
return echo.NewHTTPError(http.StatusInternalServerError, err.Error())
}
return c.JSON(http.StatusOK, CreateSnapshotResponse{
ContainerID: created.ContainerID,
SnapshotName: created.SnapshotName,
RuntimeSnapshotName: created.RuntimeSnapshotName,
DisplayName: created.DisplayName,
Snapshotter: created.Snapshotter,
Version: created.Version,
Source: workspace.SnapshotSourceManual,
})
}
// ListSnapshots godoc
// @Summary List snapshots
// @Tags containerd
// @Param bot_id path string true "Bot ID"
// @Param snapshotter query string false "Snapshotter name"
// @Success 200 {object} ListSnapshotsResponse
// @Failure 501 {object} ErrorResponse "Snapshots currently not supported on this backend"
// @Router /bots/{bot_id}/container/snapshots [get].
func (h *ContainerdHandler) ListSnapshots(c echo.Context) error {
if h.containerBackend == "apple" {
return echo.NewHTTPError(http.StatusNotImplemented, "snapshots currently not supported on Apple Container backend")
}
botID, err := h.requireBotAccess(c)
if err != nil {
return err
}
if h.manager == nil {
return echo.NewHTTPError(http.StatusInternalServerError, "snapshot manager not configured")
}
data, err := h.manager.ListBotSnapshotData(c.Request().Context(), botID)
if err != nil {
if errdefs.IsNotFound(err) {
return echo.NewHTTPError(http.StatusNotFound, "container not found")
}
return echo.NewHTTPError(http.StatusInternalServerError, err.Error())
}
if req := strings.TrimSpace(c.QueryParam("snapshotter")); req != "" && req != data.Snapshotter {
return echo.NewHTTPError(http.StatusBadRequest, "snapshotter does not match container snapshotter")
}
snapshotKey := strings.TrimSpace(data.Info.SnapshotKey)
if snapshotKey == "" {
return echo.NewHTTPError(http.StatusInternalServerError, "container snapshot key is empty")
}
runtimeByName := make(map[string]ctr.SnapshotInfo, len(data.RuntimeSnapshots))
for _, info := range data.RuntimeSnapshots {
name := strings.TrimSpace(info.Name)
if name == "" {
continue
}
runtimeByName[name] = info
}
lineage, ok := snapshotLineage(snapshotKey, data.RuntimeSnapshots)
if !ok {
h.logger.Warn("container snapshot chain root not found",
slog.String("container_id", data.ContainerID),
slog.String("snapshotter", data.Snapshotter),
slog.String("snapshot_key", snapshotKey),
)
return echo.NewHTTPError(http.StatusInternalServerError, "container snapshot chain not found")
}
items := make([]SnapshotInfo, 0, len(lineage)+len(data.ManagedMeta))
seen := make(map[string]struct{}, len(lineage)+len(data.ManagedMeta))
appendRuntime := func(runtimeInfo ctr.SnapshotInfo, fallbackSource string, meta *workspace.ManagedSnapshotMeta) {
source := fallbackSource
managed := false
var version *int
displayName := ""
if meta != nil {
if meta.Source != "" {
source = meta.Source
}
managed = true
version = meta.Version
displayName = strings.TrimSpace(meta.DisplayName)
}
name := displayName
if name == "" {
if version != nil {
name = fmt.Sprintf("Version %d", *version)
} else {
name = runtimeInfo.Name
}
}
items = append(items, SnapshotInfo{
Snapshotter: data.Snapshotter,
Name: name,
DisplayName: displayName,
RuntimeName: runtimeInfo.Name,
Parent: runtimeInfo.Parent,
Kind: runtimeInfo.Kind,
CreatedAt: runtimeInfo.Created,
UpdatedAt: runtimeInfo.Updated,
Labels: runtimeInfo.Labels,
Source: source,
Managed: managed,
Version: version,
})
seen[strings.TrimSpace(runtimeInfo.Name)] = struct{}{}
}
for _, runtimeInfo := range lineage {
name := strings.TrimSpace(runtimeInfo.Name)
if meta, hasMeta := data.ManagedMeta[name]; hasMeta {
appendRuntime(runtimeInfo, "image_layer", &meta)
continue
}
appendRuntime(runtimeInfo, "image_layer", nil)
}
for name, meta := range data.ManagedMeta {
if _, exists := seen[name]; exists {
continue
}
runtimeInfo, exists := runtimeByName[name]
if !exists {
h.logger.Warn("managed snapshot not found in runtime",
slog.String("container_id", data.ContainerID),
slog.String("snapshot_name", name),
slog.String("snapshotter", data.Snapshotter),
)
continue
}
appendRuntime(runtimeInfo, "managed", &meta)
}
sort.Slice(items, func(i, j int) bool {
if items[i].CreatedAt.Equal(items[j].CreatedAt) {
return items[i].Name < items[j].Name
}
return items[i].CreatedAt.Before(items[j].CreatedAt)
})
return c.JSON(http.StatusOK, ListSnapshotsResponse{
Snapshotter: data.Snapshotter,
Snapshots: items,
})
}
// RollbackSnapshot godoc
// @Summary Rollback container to a previous snapshot version
// @Tags containerd
// @Param bot_id path string true "Bot ID"
// @Param payload body RollbackRequest true "Rollback payload"
// @Success 200 {object} object
// @Failure 400 {object} ErrorResponse
// @Failure 500 {object} ErrorResponse
// @Router /bots/{bot_id}/container/snapshots/rollback [post].
func (h *ContainerdHandler) RollbackSnapshot(c echo.Context) error {
botID, err := h.requireBotAccess(c)
if err != nil {
return err
}
if h.manager == nil {
return echo.NewHTTPError(http.StatusInternalServerError, "manager not configured")
}
var req RollbackRequest
if err := c.Bind(&req); err != nil {
return echo.NewHTTPError(http.StatusBadRequest, "invalid request body")
}
if req.Version < 1 {
return echo.NewHTTPError(http.StatusBadRequest, "version must be >= 1")
}
if err := h.manager.RollbackVersion(c.Request().Context(), botID, req.Version); err != nil {
return echo.NewHTTPError(http.StatusInternalServerError, err.Error())
}
return c.JSON(http.StatusOK, map[string]any{"rolled_back_to": req.Version})
}
// ExportContainerData godoc
// @Summary Export container /data as a tar.gz archive
// @Tags containerd
// @Param bot_id path string true "Bot ID"
// @Produce application/gzip
// @Success 200 {file} file
// @Failure 500 {object} ErrorResponse
// @Router /bots/{bot_id}/container/data/export [post].
func (h *ContainerdHandler) ExportContainerData(c echo.Context) error {
botID, err := h.requireBotAccess(c)
if err != nil {
return err
}
if h.manager == nil {
return echo.NewHTTPError(http.StatusInternalServerError, "manager not configured")
}
reader, err := h.manager.ExportData(c.Request().Context(), botID)
if err != nil {
return echo.NewHTTPError(http.StatusInternalServerError, err.Error())
}
defer func() { _ = reader.Close() }()
c.Response().Header().Set("Content-Type", "application/gzip")
c.Response().Header().Set("Content-Disposition", `attachment; filename="`+botID+`-data.tar.gz"`)
c.Response().WriteHeader(http.StatusOK)
_, err = io.Copy(c.Response(), reader)
return err
}
// ImportContainerData godoc
// @Summary Import a tar.gz archive into container /data
// @Tags containerd
// @Param bot_id path string true "Bot ID"
// @Accept multipart/form-data
// @Param file formData file true "tar.gz archive"
// @Success 200 {object} object
// @Failure 400 {object} ErrorResponse
// @Failure 500 {object} ErrorResponse
// @Router /bots/{bot_id}/container/data/import [post].
func (h *ContainerdHandler) ImportContainerData(c echo.Context) error {
botID, err := h.requireBotAccess(c)
if err != nil {
return err
}
if h.manager == nil {
return echo.NewHTTPError(http.StatusInternalServerError, "manager not configured")
}
file, err := c.FormFile("file")
if err != nil {
return echo.NewHTTPError(http.StatusBadRequest, "file is required")
}
src, err := file.Open()
if err != nil {
return echo.NewHTTPError(http.StatusBadRequest, "failed to open uploaded file")
}
defer func() { _ = src.Close() }()
if err := h.manager.ImportData(c.Request().Context(), botID, src); err != nil {
return echo.NewHTTPError(http.StatusInternalServerError, err.Error())
}
return c.JSON(http.StatusOK, map[string]bool{"imported": true})
}
// RestorePreservedData godoc
// @Summary Restore previously preserved data into container
// @Tags containerd
// @Param bot_id path string true "Bot ID"
// @Success 200 {object} object
// @Failure 404 {object} ErrorResponse
// @Failure 500 {object} ErrorResponse
// @Router /bots/{bot_id}/container/data/restore [post].
func (h *ContainerdHandler) RestorePreservedData(c echo.Context) error {
botID, err := h.requireBotAccess(c)
if err != nil {
return err
}
if h.manager == nil {
return echo.NewHTTPError(http.StatusInternalServerError, "manager not configured")
}
if !h.manager.HasPreservedData(botID) {
return echo.NewHTTPError(http.StatusNotFound, "no preserved data found")
}
if err := h.manager.RestorePreservedData(c.Request().Context(), botID); err != nil {
return echo.NewHTTPError(http.StatusInternalServerError, err.Error())
}
return c.JSON(http.StatusOK, map[string]bool{"restored": true})
}
func toContainerCPUMetricsResponse(metrics *ctr.CPUMetrics) *ContainerCPUMetricsResponse {
if metrics == nil {
return nil
}
return &ContainerCPUMetricsResponse{
UsagePercent: metrics.UsagePercent,
UsageNanoseconds: metrics.UsageNanoseconds,
UserNanoseconds: metrics.UserNanoseconds,
KernelNanoseconds: metrics.KernelNanoseconds,
}
}
func toContainerMemoryMetricsResponse(metrics *ctr.MemoryMetrics) *ContainerMemoryMetricsResponse {
if metrics == nil {
return nil
}
return &ContainerMemoryMetricsResponse{
UsageBytes: metrics.UsageBytes,
LimitBytes: metrics.LimitBytes,
UsagePercent: metrics.UsagePercent,
}
}
func toContainerStorageMetricsResponse(metrics *workspace.ContainerStorageMetrics) *ContainerStorageMetricsResponse {
if metrics == nil {
return nil
}
return &ContainerStorageMetricsResponse{
Path: metrics.Path,
UsedBytes: metrics.UsedBytes,
}
}
func snapshotLineage(root string, all []ctr.SnapshotInfo) ([]ctr.SnapshotInfo, bool) {
root = strings.TrimSpace(root)
if root == "" {
return nil, false
}
index := make(map[string]ctr.SnapshotInfo, len(all))
for _, info := range all {
name := strings.TrimSpace(info.Name)
if name == "" {
continue
}
index[name] = info
}
if _, ok := index[root]; !ok {
return nil, false
}
lineage := make([]ctr.SnapshotInfo, 0, len(index))
visited := make(map[string]struct{}, len(index))
current := root
for current != "" {
if _, seen := visited[current]; seen {
break
}
info, ok := index[current]
if !ok {
break
}
lineage = append(lineage, info)
visited[current] = struct{}{}
current = strings.TrimSpace(info.Parent)
}
return lineage, true
}
// ---------- auth helpers ----------
// requireBotAccess extracts bot_id from path, validates user auth, and authorizes bot access.
func (h *ContainerdHandler) requireBotAccess(c echo.Context) (string, error) {
channelIdentityID, err := h.requireChannelIdentityID(c)
if err != nil {
return "", err
}
botID := strings.TrimSpace(c.Param("bot_id"))
if botID == "" {
return "", echo.NewHTTPError(http.StatusBadRequest, "bot id is required")
}
if _, err := h.authorizeBotAccess(c.Request().Context(), channelIdentityID, botID); err != nil {
return "", err
}
return botID, nil
}
func (*ContainerdHandler) requireChannelIdentityID(c echo.Context) (string, error) {
return RequireChannelIdentityID(c)
}
func (h *ContainerdHandler) authorizeBotAccess(ctx context.Context, channelIdentityID, botID string) (bots.Bot, error) {
return AuthorizeBotAccess(ctx, h.botService, h.accountService, channelIdentityID, botID)
}
// requireBotAccessWithGuest is like requireBotAccess but also allows guest access
// via ACL when the caller explicitly opts into guest-compatible access.
func (h *ContainerdHandler) requireBotAccessWithGuest(c echo.Context) (string, error) {
channelIdentityID, err := h.requireChannelIdentityID(c)
if err != nil {
return "", err
}
botID := strings.TrimSpace(c.Param("bot_id"))
if botID == "" {
return "", echo.NewHTTPError(http.StatusBadRequest, "bot id is required")
}
if _, err := AuthorizeBotAccess(c.Request().Context(), h.botService, h.accountService, channelIdentityID, botID); err != nil {
return "", err
}
return botID, nil
}