mirror of
https://github.com/memohai/Memoh.git
synced 2026-04-25 07:00:48 +09:00
c53d35740e
- Add Dockerfile.containerd: multi-stage build that compiles MCP binary, assembles rootfs, creates Docker image tar, and bundles it with containerd - Add containerd-entrypoint.sh: auto-imports MCP image on first start - Fix MCP image reference: rename busybox_image to image in config, use fully-qualified docker.io/library/memoh-mcp:latest everywhere - Make image ref configurable via config.toml instead of hardcoded - Simplify deploy.sh: remove manual nerdctl/containerd-install steps
970 lines
32 KiB
Go
970 lines
32 KiB
Go
package handlers
|
|
|
|
import (
|
|
"context"
|
|
"errors"
|
|
"fmt"
|
|
"log/slog"
|
|
"net/http"
|
|
"os"
|
|
"path/filepath"
|
|
"sort"
|
|
"strings"
|
|
"sync"
|
|
"time"
|
|
|
|
tasktypes "github.com/containerd/containerd/api/types/task"
|
|
"github.com/containerd/containerd/v2/pkg/namespaces"
|
|
"github.com/containerd/containerd/v2/pkg/oci"
|
|
"github.com/containerd/errdefs"
|
|
"github.com/google/uuid"
|
|
"github.com/jackc/pgx/v5/pgtype"
|
|
"github.com/labstack/echo/v4"
|
|
"github.com/opencontainers/runtime-spec/specs-go"
|
|
|
|
"github.com/memohai/memoh/internal/accounts"
|
|
"github.com/memohai/memoh/internal/auth"
|
|
"github.com/memohai/memoh/internal/bots"
|
|
"github.com/memohai/memoh/internal/config"
|
|
ctr "github.com/memohai/memoh/internal/containerd"
|
|
"github.com/memohai/memoh/internal/db"
|
|
dbsqlc "github.com/memohai/memoh/internal/db/sqlc"
|
|
"github.com/memohai/memoh/internal/identity"
|
|
"github.com/memohai/memoh/internal/mcp"
|
|
"github.com/memohai/memoh/internal/policy"
|
|
)
|
|
|
|
type ContainerdHandler struct {
|
|
service ctr.Service
|
|
cfg config.MCPConfig
|
|
namespace string
|
|
logger *slog.Logger
|
|
toolGateway *mcp.ToolGatewayService
|
|
mcpMu sync.Mutex
|
|
mcpSess map[string]*mcpSession
|
|
mcpStdioMu sync.Mutex
|
|
mcpStdioSess map[string]*mcpStdioSession
|
|
botService *bots.Service
|
|
accountService *accounts.Service
|
|
policyService *policy.Service
|
|
queries *dbsqlc.Queries
|
|
}
|
|
|
|
type CreateContainerRequest struct {
|
|
Snapshotter string `json:"snapshotter,omitempty"`
|
|
}
|
|
|
|
type CreateContainerResponse struct {
|
|
ContainerID string `json:"container_id"`
|
|
Image string `json:"image"`
|
|
Snapshotter string `json:"snapshotter"`
|
|
Started bool `json:"started"`
|
|
}
|
|
|
|
type GetContainerResponse struct {
|
|
ContainerID string `json:"container_id"`
|
|
Image string `json:"image"`
|
|
Status string `json:"status"`
|
|
Namespace string `json:"namespace"`
|
|
HostPath string `json:"host_path,omitempty"`
|
|
ContainerPath string `json:"container_path"`
|
|
TaskRunning bool `json:"task_running"`
|
|
CreatedAt time.Time `json:"created_at"`
|
|
UpdatedAt time.Time `json:"updated_at"`
|
|
}
|
|
|
|
type CreateSnapshotRequest struct {
|
|
SnapshotName string `json:"snapshot_name"`
|
|
}
|
|
|
|
type CreateSnapshotResponse struct {
|
|
ContainerID string `json:"container_id"`
|
|
SnapshotName string `json:"snapshot_name"`
|
|
Snapshotter string `json:"snapshotter"`
|
|
}
|
|
|
|
type SnapshotInfo struct {
|
|
Snapshotter string `json:"snapshotter"`
|
|
Name string `json:"name"`
|
|
Parent string `json:"parent,omitempty"`
|
|
Kind string `json:"kind"`
|
|
CreatedAt time.Time `json:"created_at,omitempty"`
|
|
UpdatedAt time.Time `json:"updated_at,omitempty"`
|
|
Labels map[string]string `json:"labels,omitempty"`
|
|
}
|
|
|
|
type ListSnapshotsResponse struct {
|
|
Snapshotter string `json:"snapshotter"`
|
|
Snapshots []SnapshotInfo `json:"snapshots"`
|
|
}
|
|
|
|
func NewContainerdHandler(log *slog.Logger, service ctr.Service, cfg config.MCPConfig, namespace string, botService *bots.Service, accountService *accounts.Service, policyService *policy.Service, queries *dbsqlc.Queries) *ContainerdHandler {
|
|
return &ContainerdHandler{
|
|
service: service,
|
|
cfg: cfg,
|
|
namespace: namespace,
|
|
logger: log.With(slog.String("handler", "containerd")),
|
|
mcpSess: make(map[string]*mcpSession),
|
|
mcpStdioSess: make(map[string]*mcpStdioSession),
|
|
botService: botService,
|
|
accountService: accountService,
|
|
policyService: policyService,
|
|
queries: queries,
|
|
}
|
|
}
|
|
|
|
func (h *ContainerdHandler) Register(e *echo.Echo) {
|
|
group := e.Group("/bots/:bot_id/container")
|
|
group.POST("", h.CreateContainer)
|
|
group.GET("", h.GetContainer)
|
|
group.DELETE("", h.DeleteContainer)
|
|
group.POST("/start", h.StartContainer)
|
|
group.POST("/stop", h.StopContainer)
|
|
group.POST("/snapshots", h.CreateSnapshot)
|
|
group.GET("/snapshots", h.ListSnapshots)
|
|
group.GET("/skills", h.ListSkills)
|
|
group.POST("/skills", h.UpsertSkills)
|
|
group.DELETE("/skills", h.DeleteSkills)
|
|
root := e.Group("/bots/:bot_id")
|
|
root.POST("/mcp-stdio", h.CreateMCPStdio)
|
|
root.POST("/mcp-stdio/:connection_id", h.HandleMCPStdio)
|
|
root.POST("/tools", h.HandleMCPTools)
|
|
}
|
|
|
|
// CreateContainer godoc
|
|
// @Summary Create and start MCP container for bot
|
|
// @Tags containerd
|
|
// @Param bot_id path string true "Bot ID"
|
|
// @Param payload body CreateContainerRequest true "Create container payload"
|
|
// @Success 200 {object} CreateContainerResponse
|
|
// @Failure 400 {object} ErrorResponse
|
|
// @Failure 500 {object} ErrorResponse
|
|
// @Router /bots/{bot_id}/container [post]
|
|
func (h *ContainerdHandler) CreateContainer(c echo.Context) error {
|
|
botID, err := h.requireBotAccess(c)
|
|
if err != nil {
|
|
return err
|
|
}
|
|
|
|
var req CreateContainerRequest
|
|
if err := c.Bind(&req); err != nil {
|
|
return echo.NewHTTPError(http.StatusBadRequest, err.Error())
|
|
}
|
|
containerID := mcp.ContainerPrefix + botID
|
|
|
|
image := h.mcpImageRef()
|
|
snapshotter := strings.TrimSpace(req.Snapshotter)
|
|
if snapshotter == "" {
|
|
snapshotter = h.cfg.Snapshotter
|
|
}
|
|
|
|
ctx := c.Request().Context()
|
|
if strings.TrimSpace(h.namespace) != "" {
|
|
ctx = namespaces.WithNamespace(ctx, h.namespace)
|
|
}
|
|
dataRoot := strings.TrimSpace(h.cfg.DataRoot)
|
|
if dataRoot == "" {
|
|
dataRoot = config.DefaultDataRoot
|
|
}
|
|
dataRoot, _ = filepath.Abs(dataRoot)
|
|
dataMount := strings.TrimSpace(h.cfg.DataMount)
|
|
if dataMount == "" {
|
|
dataMount = config.DefaultDataMount
|
|
}
|
|
dataDir := filepath.Join(dataRoot, "bots", botID)
|
|
if err := os.MkdirAll(dataDir, 0o755); err != nil {
|
|
return echo.NewHTTPError(http.StatusInternalServerError, err.Error())
|
|
}
|
|
if err := os.MkdirAll(filepath.Join(dataDir, ".skills"), 0o755); err != nil {
|
|
return echo.NewHTTPError(http.StatusInternalServerError, err.Error())
|
|
}
|
|
resolvPath, err := ctr.ResolveConfSource(dataDir)
|
|
if err != nil {
|
|
return echo.NewHTTPError(http.StatusInternalServerError, err.Error())
|
|
}
|
|
|
|
specOpts := []oci.SpecOpts{
|
|
oci.WithMounts([]specs.Mount{
|
|
{
|
|
Destination: dataMount,
|
|
Type: "bind",
|
|
Source: dataDir,
|
|
Options: []string{"rbind", "rw"},
|
|
},
|
|
{
|
|
Destination: "/etc/resolv.conf",
|
|
Type: "bind",
|
|
Source: resolvPath,
|
|
Options: []string{"rbind", "ro"},
|
|
},
|
|
}),
|
|
oci.WithProcessArgs("/bin/sh", "-lc", fmt.Sprintf("bootstrap(){ [ -e /app/mcp ] || { mkdir -p /app; [ -f /opt/mcp ] && cp -a /opt/mcp /app/mcp 2>/dev/null || true; }; if [ -d /opt/mcp-template ]; then mkdir -p %q; for f in /opt/mcp-template/*; do name=$(basename \"$f\"); [ -e %q/\"$name\" ] || cp -a \"$f\" %q/\"$name\" 2>/dev/null || true; done; fi; }; bootstrap; exec /app/mcp", dataMount, dataMount, dataMount)),
|
|
}
|
|
|
|
_, err = h.service.CreateContainer(ctx, ctr.CreateContainerRequest{
|
|
ID: containerID,
|
|
ImageRef: image,
|
|
Snapshotter: snapshotter,
|
|
Labels: map[string]string{
|
|
mcp.BotLabelKey: botID,
|
|
},
|
|
SpecOpts: specOpts,
|
|
})
|
|
if err != nil && !errdefs.IsAlreadyExists(err) {
|
|
return echo.NewHTTPError(http.StatusInternalServerError, "snapshotter="+snapshotter+" image="+image+" err="+err.Error())
|
|
}
|
|
|
|
if h.queries != nil {
|
|
pgBotID, parseErr := db.ParseUUID(botID)
|
|
if parseErr == nil {
|
|
ns := strings.TrimSpace(h.namespace)
|
|
if ns == "" {
|
|
ns = "default"
|
|
}
|
|
if dbErr := h.queries.UpsertContainer(c.Request().Context(), dbsqlc.UpsertContainerParams{
|
|
BotID: pgBotID,
|
|
ContainerID: containerID,
|
|
ContainerName: containerID,
|
|
Image: image,
|
|
Status: "created",
|
|
Namespace: ns,
|
|
AutoStart: true,
|
|
HostPath: pgtype.Text{String: dataDir, Valid: true},
|
|
ContainerPath: dataMount,
|
|
}); dbErr != nil {
|
|
h.logger.Error("failed to upsert container record",
|
|
slog.String("bot_id", botID), slog.Any("error", dbErr))
|
|
}
|
|
}
|
|
}
|
|
|
|
started := false
|
|
if task, err := h.service.StartTask(ctx, containerID, &ctr.StartTaskOptions{
|
|
UseStdio: false,
|
|
}); err == nil {
|
|
if netErr := ctr.SetupNetwork(ctx, task, containerID); netErr == nil {
|
|
started = true
|
|
if h.queries != nil {
|
|
if pgBotID, parseErr := db.ParseUUID(botID); parseErr == nil {
|
|
if dbErr := h.queries.UpdateContainerStarted(c.Request().Context(), pgBotID); dbErr != nil {
|
|
h.logger.Error("failed to update container started status",
|
|
slog.String("bot_id", botID), slog.Any("error", dbErr))
|
|
}
|
|
}
|
|
}
|
|
} else {
|
|
_ = h.service.StopTask(ctx, containerID, &ctr.StopTaskOptions{Force: true})
|
|
h.logger.Error("mcp container network setup failed",
|
|
slog.String("container_id", containerID),
|
|
slog.Any("error", netErr),
|
|
)
|
|
}
|
|
} else {
|
|
h.logger.Error("mcp container start failed",
|
|
slog.String("container_id", containerID),
|
|
slog.Any("error", err),
|
|
)
|
|
}
|
|
|
|
return c.JSON(http.StatusOK, CreateContainerResponse{
|
|
ContainerID: containerID,
|
|
Image: image,
|
|
Snapshotter: snapshotter,
|
|
Started: started,
|
|
})
|
|
}
|
|
|
|
// ensureContainerAndTask verifies the container exists in containerd and its task is
|
|
// running. If the container is missing (e.g. after a VM restart) it is recreated via
|
|
// SetupBotContainer. This prevents permanent desync between DB and containerd state.
|
|
func (h *ContainerdHandler) ensureContainerAndTask(ctx context.Context, containerID, botID string) error {
|
|
// Check whether the container exists in containerd.
|
|
_, err := h.service.GetContainer(ctx, containerID)
|
|
if err != nil {
|
|
if !errdefs.IsNotFound(err) {
|
|
return err
|
|
}
|
|
// Container gone — rebuild from scratch.
|
|
h.logger.Warn("container missing in containerd, rebuilding",
|
|
slog.String("bot_id", botID),
|
|
slog.String("container_id", containerID),
|
|
)
|
|
return h.SetupBotContainer(ctx, botID)
|
|
}
|
|
|
|
// Container exists — make sure the task is running.
|
|
tasks, err := h.service.ListTasks(ctx, &ctr.ListTasksOptions{
|
|
Filter: "container.id==" + containerID,
|
|
})
|
|
if err != nil {
|
|
return err
|
|
}
|
|
if len(tasks) > 0 {
|
|
if tasks[0].Status == tasktypes.Status_RUNNING {
|
|
return nil
|
|
}
|
|
_ = h.service.DeleteTask(ctx, containerID, &ctr.DeleteTaskOptions{Force: true})
|
|
}
|
|
|
|
task, err := h.service.StartTask(ctx, containerID, &ctr.StartTaskOptions{
|
|
UseStdio: false,
|
|
})
|
|
if err != nil {
|
|
return err
|
|
}
|
|
if err := ctr.SetupNetwork(ctx, task, containerID); err != nil {
|
|
_ = h.service.StopTask(ctx, containerID, &ctr.StopTaskOptions{Force: true})
|
|
return err
|
|
}
|
|
return nil
|
|
}
|
|
|
|
// botContainerID resolves container_id for a bot from the database.
|
|
func (h *ContainerdHandler) botContainerID(ctx context.Context, botID string) (string, error) {
|
|
if h.queries != nil {
|
|
pgBotID, err := db.ParseUUID(botID)
|
|
if err == nil {
|
|
row, err := h.queries.GetContainerByBotID(ctx, pgBotID)
|
|
if err == nil && strings.TrimSpace(row.ContainerID) != "" {
|
|
return row.ContainerID, nil
|
|
}
|
|
}
|
|
}
|
|
// Fallback: search by containerd label
|
|
containers, err := h.service.ListContainersByLabel(ctx, mcp.BotLabelKey, botID)
|
|
if err != nil {
|
|
return "", err
|
|
}
|
|
if len(containers) == 0 {
|
|
return "", echo.NewHTTPError(http.StatusNotFound, "container not found")
|
|
}
|
|
infoCtx := ctx
|
|
if strings.TrimSpace(h.namespace) != "" {
|
|
infoCtx = namespaces.WithNamespace(ctx, h.namespace)
|
|
}
|
|
bestID := ""
|
|
var bestUpdated time.Time
|
|
for _, container := range containers {
|
|
info, err := container.Info(infoCtx)
|
|
if err != nil {
|
|
return "", err
|
|
}
|
|
if bestID == "" || info.UpdatedAt.After(bestUpdated) {
|
|
bestID = info.ID
|
|
bestUpdated = info.UpdatedAt
|
|
}
|
|
}
|
|
return bestID, nil
|
|
}
|
|
|
|
// GetContainer godoc
|
|
// @Summary Get container info for bot
|
|
// @Tags containerd
|
|
// @Param bot_id path string true "Bot ID"
|
|
// @Success 200 {object} GetContainerResponse
|
|
// @Failure 404 {object} ErrorResponse
|
|
// @Failure 500 {object} ErrorResponse
|
|
// @Router /bots/{bot_id}/container [get]
|
|
func (h *ContainerdHandler) GetContainer(c echo.Context) error {
|
|
botID, err := h.requireBotAccess(c)
|
|
if err != nil {
|
|
return err
|
|
}
|
|
ctx := c.Request().Context()
|
|
|
|
if h.queries != nil {
|
|
pgBotID, parseErr := db.ParseUUID(botID)
|
|
if parseErr == nil {
|
|
row, dbErr := h.queries.GetContainerByBotID(ctx, pgBotID)
|
|
if dbErr == nil {
|
|
taskRunning := h.isTaskRunning(ctx, row.ContainerID)
|
|
hostPath := ""
|
|
if row.HostPath.Valid {
|
|
hostPath = row.HostPath.String
|
|
}
|
|
createdAt := time.Time{}
|
|
if row.CreatedAt.Valid {
|
|
createdAt = row.CreatedAt.Time
|
|
}
|
|
updatedAt := time.Time{}
|
|
if row.UpdatedAt.Valid {
|
|
updatedAt = row.UpdatedAt.Time
|
|
}
|
|
return c.JSON(http.StatusOK, GetContainerResponse{
|
|
ContainerID: row.ContainerID,
|
|
Image: row.Image,
|
|
Status: row.Status,
|
|
Namespace: row.Namespace,
|
|
HostPath: hostPath,
|
|
ContainerPath: row.ContainerPath,
|
|
TaskRunning: taskRunning,
|
|
CreatedAt: createdAt,
|
|
UpdatedAt: updatedAt,
|
|
})
|
|
}
|
|
}
|
|
}
|
|
|
|
containerID, err := h.botContainerID(ctx, botID)
|
|
if err != nil {
|
|
return echo.NewHTTPError(http.StatusNotFound, "container not found for bot")
|
|
}
|
|
infoCtx := ctx
|
|
if strings.TrimSpace(h.namespace) != "" {
|
|
infoCtx = namespaces.WithNamespace(ctx, h.namespace)
|
|
}
|
|
container, err := h.service.GetContainer(infoCtx, containerID)
|
|
if err != nil {
|
|
if errdefs.IsNotFound(err) {
|
|
return echo.NewHTTPError(http.StatusNotFound, "container not found")
|
|
}
|
|
return echo.NewHTTPError(http.StatusInternalServerError, err.Error())
|
|
}
|
|
info, err := container.Info(infoCtx)
|
|
if err != nil {
|
|
return echo.NewHTTPError(http.StatusInternalServerError, err.Error())
|
|
}
|
|
return c.JSON(http.StatusOK, GetContainerResponse{
|
|
ContainerID: info.ID,
|
|
Image: info.Image,
|
|
Status: "unknown",
|
|
Namespace: h.namespace,
|
|
TaskRunning: h.isTaskRunning(ctx, containerID),
|
|
CreatedAt: info.CreatedAt,
|
|
UpdatedAt: info.UpdatedAt,
|
|
})
|
|
}
|
|
|
|
// DeleteContainer godoc
|
|
// @Summary Delete MCP container for bot
|
|
// @Tags containerd
|
|
// @Param bot_id path string true "Bot ID"
|
|
// @Success 204
|
|
// @Failure 404 {object} ErrorResponse
|
|
// @Failure 500 {object} ErrorResponse
|
|
// @Router /bots/{bot_id}/container [delete]
|
|
func (h *ContainerdHandler) DeleteContainer(c echo.Context) error {
|
|
botID, err := h.requireBotAccess(c)
|
|
if err != nil {
|
|
return err
|
|
}
|
|
if err := h.CleanupBotContainer(c.Request().Context(), botID); err != nil {
|
|
return echo.NewHTTPError(http.StatusInternalServerError, err.Error())
|
|
}
|
|
return c.NoContent(http.StatusNoContent)
|
|
}
|
|
|
|
// StartContainer godoc
|
|
// @Summary Start container task for bot
|
|
// @Tags containerd
|
|
// @Param bot_id path string true "Bot ID"
|
|
// @Success 200 {object} object
|
|
// @Failure 404 {object} ErrorResponse
|
|
// @Failure 500 {object} ErrorResponse
|
|
// @Router /bots/{bot_id}/container/start [post]
|
|
func (h *ContainerdHandler) StartContainer(c echo.Context) error {
|
|
botID, err := h.requireBotAccess(c)
|
|
if err != nil {
|
|
return err
|
|
}
|
|
ctx := c.Request().Context()
|
|
containerID, err := h.botContainerID(ctx, botID)
|
|
if err != nil {
|
|
return echo.NewHTTPError(http.StatusNotFound, "container not found for bot")
|
|
}
|
|
if err := h.ensureContainerAndTask(ctx, containerID, botID); err != nil {
|
|
return echo.NewHTTPError(http.StatusInternalServerError, err.Error())
|
|
}
|
|
if h.queries != nil {
|
|
if pgBotID, parseErr := db.ParseUUID(botID); parseErr == nil {
|
|
if dbErr := h.queries.UpdateContainerStarted(ctx, pgBotID); dbErr != nil {
|
|
h.logger.Error("failed to update container started status",
|
|
slog.String("bot_id", botID), slog.Any("error", dbErr))
|
|
}
|
|
}
|
|
}
|
|
return c.JSON(http.StatusOK, map[string]bool{"started": true})
|
|
}
|
|
|
|
// StopContainer godoc
|
|
// @Summary Stop container task for bot
|
|
// @Tags containerd
|
|
// @Param bot_id path string true "Bot ID"
|
|
// @Success 200 {object} object
|
|
// @Failure 404 {object} ErrorResponse
|
|
// @Failure 500 {object} ErrorResponse
|
|
// @Router /bots/{bot_id}/container/stop [post]
|
|
func (h *ContainerdHandler) StopContainer(c echo.Context) error {
|
|
botID, err := h.requireBotAccess(c)
|
|
if err != nil {
|
|
return err
|
|
}
|
|
ctx := c.Request().Context()
|
|
containerID, err := h.botContainerID(ctx, botID)
|
|
if err != nil {
|
|
return echo.NewHTTPError(http.StatusNotFound, "container not found for bot")
|
|
}
|
|
if err := h.service.StopTask(ctx, containerID, &ctr.StopTaskOptions{
|
|
Timeout: 10 * time.Second,
|
|
Force: true,
|
|
}); err != nil && !errdefs.IsNotFound(err) {
|
|
return echo.NewHTTPError(http.StatusInternalServerError, err.Error())
|
|
}
|
|
_ = h.service.DeleteTask(ctx, containerID, &ctr.DeleteTaskOptions{Force: true})
|
|
if h.queries != nil {
|
|
if pgBotID, parseErr := db.ParseUUID(botID); parseErr == nil {
|
|
if dbErr := h.queries.UpdateContainerStopped(ctx, pgBotID); dbErr != nil {
|
|
h.logger.Error("failed to update container stopped status",
|
|
slog.String("bot_id", botID), slog.Any("error", dbErr))
|
|
}
|
|
}
|
|
}
|
|
return c.JSON(http.StatusOK, map[string]bool{"stopped": true})
|
|
}
|
|
|
|
// CreateSnapshot godoc
|
|
// @Summary Create container snapshot for bot
|
|
// @Tags containerd
|
|
// @Param bot_id path string true "Bot ID"
|
|
// @Param payload body CreateSnapshotRequest true "Create snapshot payload"
|
|
// @Success 200 {object} CreateSnapshotResponse
|
|
// @Failure 404 {object} ErrorResponse
|
|
// @Failure 500 {object} ErrorResponse
|
|
// @Router /bots/{bot_id}/container/snapshots [post]
|
|
func (h *ContainerdHandler) CreateSnapshot(c echo.Context) error {
|
|
botID, err := h.requireBotAccess(c)
|
|
if err != nil {
|
|
return err
|
|
}
|
|
var req CreateSnapshotRequest
|
|
if err := c.Bind(&req); err != nil {
|
|
return echo.NewHTTPError(http.StatusBadRequest, err.Error())
|
|
}
|
|
ctx := c.Request().Context()
|
|
containerID, err := h.botContainerID(ctx, botID)
|
|
if err != nil {
|
|
return echo.NewHTTPError(http.StatusNotFound, "container not found for bot")
|
|
}
|
|
container, err := h.service.GetContainer(ctx, containerID)
|
|
if err != nil {
|
|
if errdefs.IsNotFound(err) {
|
|
return echo.NewHTTPError(http.StatusNotFound, "container not found")
|
|
}
|
|
return echo.NewHTTPError(http.StatusInternalServerError, err.Error())
|
|
}
|
|
infoCtx := ctx
|
|
if strings.TrimSpace(h.namespace) != "" {
|
|
infoCtx = namespaces.WithNamespace(ctx, h.namespace)
|
|
}
|
|
info, err := container.Info(infoCtx)
|
|
if err != nil {
|
|
return echo.NewHTTPError(http.StatusInternalServerError, err.Error())
|
|
}
|
|
snapshotName := strings.TrimSpace(req.SnapshotName)
|
|
if snapshotName == "" {
|
|
snapshotName = containerID + "-" + time.Now().Format("20060102150405")
|
|
}
|
|
if err := h.service.CommitSnapshot(ctx, info.Snapshotter, snapshotName, info.SnapshotKey); err != nil {
|
|
return echo.NewHTTPError(http.StatusInternalServerError, err.Error())
|
|
}
|
|
return c.JSON(http.StatusOK, CreateSnapshotResponse{
|
|
ContainerID: containerID,
|
|
SnapshotName: snapshotName,
|
|
Snapshotter: info.Snapshotter,
|
|
})
|
|
}
|
|
|
|
// ListSnapshots godoc
|
|
// @Summary List snapshots
|
|
// @Tags containerd
|
|
// @Param bot_id path string true "Bot ID"
|
|
// @Param snapshotter query string false "Snapshotter name"
|
|
// @Success 200 {object} ListSnapshotsResponse
|
|
// @Router /bots/{bot_id}/container/snapshots [get]
|
|
func (h *ContainerdHandler) ListSnapshots(c echo.Context) error {
|
|
if _, err := h.requireBotAccess(c); err != nil {
|
|
return err
|
|
}
|
|
snapshotter := strings.TrimSpace(c.QueryParam("snapshotter"))
|
|
if snapshotter == "" {
|
|
snapshotter = strings.TrimSpace(h.cfg.Snapshotter)
|
|
}
|
|
if snapshotter == "" {
|
|
snapshotter = "overlayfs"
|
|
}
|
|
snapshots, err := h.service.ListSnapshots(c.Request().Context(), snapshotter)
|
|
if err != nil {
|
|
return echo.NewHTTPError(http.StatusInternalServerError, err.Error())
|
|
}
|
|
items := make([]SnapshotInfo, 0, len(snapshots))
|
|
for _, info := range snapshots {
|
|
items = append(items, SnapshotInfo{
|
|
Snapshotter: snapshotter,
|
|
Name: info.Name,
|
|
Parent: info.Parent,
|
|
Kind: info.Kind.String(),
|
|
CreatedAt: info.Created,
|
|
UpdatedAt: info.Updated,
|
|
Labels: info.Labels,
|
|
})
|
|
}
|
|
sort.Slice(items, func(i, j int) bool {
|
|
if items[i].CreatedAt.Equal(items[j].CreatedAt) {
|
|
return items[i].Name < items[j].Name
|
|
}
|
|
return items[i].CreatedAt.Before(items[j].CreatedAt)
|
|
})
|
|
return c.JSON(http.StatusOK, ListSnapshotsResponse{
|
|
Snapshotter: snapshotter,
|
|
Snapshots: items,
|
|
})
|
|
}
|
|
|
|
// ---------- auth helpers ----------
|
|
|
|
func (h *ContainerdHandler) mcpImageRef() string {
|
|
if h.cfg.Image != "" {
|
|
return h.cfg.Image
|
|
}
|
|
return config.DefaultMCPImage
|
|
}
|
|
|
|
// requireBotAccess extracts bot_id from path, validates user auth, and authorizes bot access.
|
|
func (h *ContainerdHandler) requireBotAccess(c echo.Context) (string, error) {
|
|
channelIdentityID, err := h.requireChannelIdentityID(c)
|
|
if err != nil {
|
|
return "", err
|
|
}
|
|
botID := strings.TrimSpace(c.Param("bot_id"))
|
|
if botID == "" {
|
|
return "", echo.NewHTTPError(http.StatusBadRequest, "bot id is required")
|
|
}
|
|
if _, err := h.authorizeBotAccess(c.Request().Context(), channelIdentityID, botID); err != nil {
|
|
return "", err
|
|
}
|
|
return botID, nil
|
|
}
|
|
|
|
func (h *ContainerdHandler) requireChannelIdentityID(c echo.Context) (string, error) {
|
|
channelIdentityID, err := auth.UserIDFromContext(c)
|
|
if err != nil {
|
|
return "", err
|
|
}
|
|
if err := identity.ValidateChannelIdentityID(channelIdentityID); err != nil {
|
|
return "", echo.NewHTTPError(http.StatusBadRequest, err.Error())
|
|
}
|
|
return channelIdentityID, nil
|
|
}
|
|
|
|
func (h *ContainerdHandler) authorizeBotAccess(ctx context.Context, channelIdentityID, botID string) (bots.Bot, error) {
|
|
if h.botService == nil || h.accountService == nil {
|
|
return bots.Bot{}, echo.NewHTTPError(http.StatusInternalServerError, "bot services not configured")
|
|
}
|
|
isAdmin, err := h.accountService.IsAdmin(ctx, channelIdentityID)
|
|
if err != nil {
|
|
return bots.Bot{}, echo.NewHTTPError(http.StatusInternalServerError, err.Error())
|
|
}
|
|
bot, err := h.botService.AuthorizeAccess(ctx, channelIdentityID, botID, isAdmin, bots.AccessPolicy{AllowPublicMember: false})
|
|
if err != nil {
|
|
if errors.Is(err, bots.ErrBotNotFound) {
|
|
return bots.Bot{}, echo.NewHTTPError(http.StatusNotFound, "bot not found")
|
|
}
|
|
if errors.Is(err, bots.ErrBotAccessDenied) && h.policyService != nil {
|
|
allowGuest, policyErr := h.policyService.AllowGuest(ctx, botID)
|
|
if policyErr == nil && allowGuest {
|
|
bot, getErr := h.botService.Get(ctx, botID)
|
|
if getErr == nil {
|
|
return bot, nil
|
|
}
|
|
}
|
|
}
|
|
if errors.Is(err, bots.ErrBotAccessDenied) {
|
|
return bots.Bot{}, echo.NewHTTPError(http.StatusForbidden, "bot access denied")
|
|
}
|
|
return bots.Bot{}, echo.NewHTTPError(http.StatusInternalServerError, err.Error())
|
|
}
|
|
return bot, nil
|
|
}
|
|
|
|
// SetupBotContainer creates and starts the MCP container for a bot.
|
|
func (h *ContainerdHandler) SetupBotContainer(ctx context.Context, botID string) error {
|
|
containerID := mcp.ContainerPrefix + botID
|
|
|
|
image := h.mcpImageRef()
|
|
snapshotter := strings.TrimSpace(h.cfg.Snapshotter)
|
|
|
|
if strings.TrimSpace(h.namespace) != "" {
|
|
ctx = namespaces.WithNamespace(ctx, h.namespace)
|
|
}
|
|
|
|
dataRoot := strings.TrimSpace(h.cfg.DataRoot)
|
|
if dataRoot == "" {
|
|
dataRoot = config.DefaultDataRoot
|
|
}
|
|
dataRoot, _ = filepath.Abs(dataRoot)
|
|
dataMount := strings.TrimSpace(h.cfg.DataMount)
|
|
if dataMount == "" {
|
|
dataMount = config.DefaultDataMount
|
|
}
|
|
dataDir := filepath.Join(dataRoot, "bots", botID)
|
|
if err := os.MkdirAll(dataDir, 0o755); err != nil {
|
|
return err
|
|
}
|
|
if err := os.MkdirAll(filepath.Join(dataDir, ".skills"), 0o755); err != nil {
|
|
return err
|
|
}
|
|
resolvPath, err := ctr.ResolveConfSource(dataDir)
|
|
if err != nil {
|
|
return err
|
|
}
|
|
|
|
specOpts := []oci.SpecOpts{
|
|
oci.WithMounts([]specs.Mount{
|
|
{
|
|
Destination: dataMount,
|
|
Type: "bind",
|
|
Source: dataDir,
|
|
Options: []string{"rbind", "rw"},
|
|
},
|
|
{
|
|
Destination: "/etc/resolv.conf",
|
|
Type: "bind",
|
|
Source: resolvPath,
|
|
Options: []string{"rbind", "ro"},
|
|
},
|
|
}),
|
|
oci.WithProcessArgs("/bin/sh", "-lc", fmt.Sprintf("bootstrap(){ [ -e /app/mcp ] || { mkdir -p /app; [ -f /opt/mcp ] && cp -a /opt/mcp /app/mcp 2>/dev/null || true; }; if [ -d /opt/mcp-template ]; then mkdir -p %q; for f in /opt/mcp-template/*; do name=$(basename \"$f\"); [ -e %q/\"$name\" ] || cp -a \"$f\" %q/\"$name\" 2>/dev/null || true; done; fi; }; bootstrap; exec /app/mcp", dataMount, dataMount, dataMount)),
|
|
}
|
|
|
|
_, err = h.service.CreateContainer(ctx, ctr.CreateContainerRequest{
|
|
ID: containerID,
|
|
ImageRef: image,
|
|
Snapshotter: snapshotter,
|
|
Labels: map[string]string{
|
|
mcp.BotLabelKey: botID,
|
|
},
|
|
SpecOpts: specOpts,
|
|
})
|
|
if err != nil && !errdefs.IsAlreadyExists(err) {
|
|
return err
|
|
}
|
|
|
|
if h.queries != nil {
|
|
pgBotID, parseErr := db.ParseUUID(botID)
|
|
if parseErr == nil {
|
|
ns := strings.TrimSpace(h.namespace)
|
|
if ns == "" {
|
|
ns = "default"
|
|
}
|
|
if dbErr := h.queries.UpsertContainer(ctx, dbsqlc.UpsertContainerParams{
|
|
BotID: pgBotID,
|
|
ContainerID: containerID,
|
|
ContainerName: containerID,
|
|
Image: image,
|
|
Status: "created",
|
|
Namespace: ns,
|
|
AutoStart: true,
|
|
HostPath: pgtype.Text{String: dataDir, Valid: true},
|
|
ContainerPath: dataMount,
|
|
}); dbErr != nil {
|
|
h.logger.Error("setup bot container: failed to upsert container record",
|
|
slog.String("bot_id", botID), slog.Any("error", dbErr))
|
|
}
|
|
}
|
|
}
|
|
|
|
if task, err := h.service.StartTask(ctx, containerID, &ctr.StartTaskOptions{
|
|
UseStdio: false,
|
|
}); err == nil {
|
|
if netErr := ctr.SetupNetwork(ctx, task, containerID); netErr == nil {
|
|
if h.queries != nil {
|
|
if pgBotID, parseErr := db.ParseUUID(botID); parseErr == nil {
|
|
if dbErr := h.queries.UpdateContainerStarted(ctx, pgBotID); dbErr != nil {
|
|
h.logger.Error("setup bot container: failed to update container started status",
|
|
slog.String("bot_id", botID), slog.Any("error", dbErr))
|
|
}
|
|
}
|
|
}
|
|
} else {
|
|
_ = h.service.StopTask(ctx, containerID, &ctr.StopTaskOptions{Force: true})
|
|
h.logger.Error("setup bot container: network setup failed",
|
|
slog.String("bot_id", botID),
|
|
slog.String("container_id", containerID),
|
|
slog.Any("error", netErr),
|
|
)
|
|
}
|
|
} else {
|
|
h.logger.Error("setup bot container: task start failed",
|
|
slog.String("bot_id", botID),
|
|
slog.String("container_id", containerID),
|
|
slog.Any("error", err),
|
|
)
|
|
}
|
|
return nil
|
|
}
|
|
|
|
// CleanupBotContainer removes the containerd container and DB record for a bot.
|
|
func (h *ContainerdHandler) CleanupBotContainer(ctx context.Context, botID string) error {
|
|
h.logger.Info("CleanupBotContainer starting", slog.String("bot_id", botID))
|
|
containerID, err := h.botContainerID(ctx, botID)
|
|
if err != nil {
|
|
h.logger.Warn("CleanupBotContainer: container not found for bot, cleaning up DB only",
|
|
slog.String("bot_id", botID),
|
|
slog.Any("error", err),
|
|
)
|
|
if h.queries != nil {
|
|
if pgBotID, parseErr := db.ParseUUID(botID); parseErr == nil {
|
|
if dbErr := h.queries.DeleteContainerByBotID(ctx, pgBotID); dbErr != nil {
|
|
h.logger.Error("CleanupBotContainer: failed to delete DB record",
|
|
slog.String("bot_id", botID), slog.Any("error", dbErr))
|
|
}
|
|
}
|
|
}
|
|
return nil
|
|
}
|
|
|
|
h.logger.Info("CleanupBotContainer: found container",
|
|
slog.String("bot_id", botID),
|
|
slog.String("container_id", containerID),
|
|
)
|
|
|
|
if task, taskErr := h.service.GetTask(ctx, containerID); taskErr == nil {
|
|
h.logger.Info("CleanupBotContainer: removing network", slog.String("container_id", containerID))
|
|
_ = ctr.RemoveNetwork(ctx, task, containerID)
|
|
}
|
|
h.logger.Info("CleanupBotContainer: stopping task", slog.String("container_id", containerID))
|
|
_ = h.service.StopTask(ctx, containerID, &ctr.StopTaskOptions{
|
|
Timeout: 5 * time.Second,
|
|
Force: true,
|
|
})
|
|
h.logger.Info("CleanupBotContainer: deleting task", slog.String("container_id", containerID))
|
|
_ = h.service.DeleteTask(ctx, containerID, &ctr.DeleteTaskOptions{Force: true})
|
|
|
|
h.logger.Info("CleanupBotContainer: deleting container", slog.String("container_id", containerID))
|
|
if err := h.service.DeleteContainer(ctx, containerID, &ctr.DeleteContainerOptions{
|
|
CleanupSnapshot: true,
|
|
}); err != nil && !errdefs.IsNotFound(err) {
|
|
h.logger.Error("CleanupBotContainer: failed to delete container",
|
|
slog.String("container_id", containerID),
|
|
slog.Any("error", err),
|
|
)
|
|
return err
|
|
}
|
|
|
|
if h.queries != nil {
|
|
h.logger.Info("CleanupBotContainer: deleting container record from DB", slog.String("bot_id", botID))
|
|
if pgBotID, parseErr := db.ParseUUID(botID); parseErr == nil {
|
|
if dbErr := h.queries.DeleteContainerByBotID(ctx, pgBotID); dbErr != nil {
|
|
h.logger.Error("CleanupBotContainer: failed to delete DB record",
|
|
slog.String("bot_id", botID), slog.Any("error", dbErr))
|
|
}
|
|
}
|
|
}
|
|
h.logger.Info("CleanupBotContainer finished", slog.String("bot_id", botID))
|
|
return nil
|
|
}
|
|
|
|
func (h *ContainerdHandler) isTaskRunning(ctx context.Context, containerID string) bool {
|
|
tasks, err := h.service.ListTasks(ctx, &ctr.ListTasksOptions{
|
|
Filter: "container.id==" + containerID,
|
|
})
|
|
return err == nil && len(tasks) > 0 && tasks[0].Status == tasktypes.Status_RUNNING
|
|
}
|
|
|
|
// ReconcileContainers compares the DB containers table against actual containerd
|
|
// state on startup. For each auto_start container in DB it verifies the container
|
|
// and task exist; if missing they are rebuilt via SetupBotContainer. Containers that
|
|
// the DB claims are running but are not present in containerd get corrected.
|
|
func (h *ContainerdHandler) ReconcileContainers(ctx context.Context) {
|
|
if h.queries == nil {
|
|
return
|
|
}
|
|
rows, err := h.queries.ListAutoStartContainers(ctx)
|
|
if err != nil {
|
|
h.logger.Error("reconcile: failed to list containers from DB", slog.Any("error", err))
|
|
return
|
|
}
|
|
if len(rows) == 0 {
|
|
h.logger.Info("reconcile: no auto-start containers in DB")
|
|
return
|
|
}
|
|
|
|
h.logger.Info("reconcile: checking containers", slog.Int("count", len(rows)))
|
|
for _, row := range rows {
|
|
containerID := row.ContainerID
|
|
botID := uuid.UUID(row.BotID.Bytes).String()
|
|
|
|
_, err := h.service.GetContainer(ctx, containerID)
|
|
if err != nil {
|
|
if !errdefs.IsNotFound(err) {
|
|
h.logger.Error("reconcile: failed to get container",
|
|
slog.String("container_id", containerID), slog.Any("error", err))
|
|
continue
|
|
}
|
|
// Container missing in containerd — rebuild.
|
|
h.logger.Warn("reconcile: container missing, rebuilding",
|
|
slog.String("bot_id", botID), slog.String("container_id", containerID))
|
|
if setupErr := h.SetupBotContainer(ctx, botID); setupErr != nil {
|
|
h.logger.Error("reconcile: rebuild failed",
|
|
slog.String("bot_id", botID), slog.Any("error", setupErr))
|
|
if dbErr := h.queries.UpdateContainerStatus(ctx, dbsqlc.UpdateContainerStatusParams{
|
|
Status: "error",
|
|
BotID: row.BotID,
|
|
}); dbErr != nil {
|
|
h.logger.Error("reconcile: failed to mark container as error",
|
|
slog.String("bot_id", botID), slog.Any("error", dbErr))
|
|
}
|
|
}
|
|
continue
|
|
}
|
|
|
|
// Container exists — ensure the task is running.
|
|
running := h.isTaskRunning(ctx, containerID)
|
|
if running {
|
|
if row.Status != "running" {
|
|
if dbErr := h.queries.UpdateContainerStarted(ctx, row.BotID); dbErr != nil {
|
|
h.logger.Error("reconcile: failed to update DB status to running",
|
|
slog.String("bot_id", botID), slog.Any("error", dbErr))
|
|
}
|
|
}
|
|
h.logger.Info("reconcile: container healthy",
|
|
slog.String("bot_id", botID), slog.String("container_id", containerID))
|
|
continue
|
|
}
|
|
|
|
// Task not running — try to start it.
|
|
h.logger.Warn("reconcile: task not running, starting",
|
|
slog.String("bot_id", botID), slog.String("container_id", containerID))
|
|
if err := h.ensureContainerAndTask(ctx, containerID, botID); err != nil {
|
|
h.logger.Error("reconcile: failed to start task",
|
|
slog.String("bot_id", botID), slog.Any("error", err))
|
|
if dbErr := h.queries.UpdateContainerStopped(ctx, row.BotID); dbErr != nil {
|
|
h.logger.Error("reconcile: failed to mark container as stopped",
|
|
slog.String("bot_id", botID), slog.Any("error", dbErr))
|
|
}
|
|
} else {
|
|
if dbErr := h.queries.UpdateContainerStarted(ctx, row.BotID); dbErr != nil {
|
|
h.logger.Error("reconcile: failed to update DB status to running",
|
|
slog.String("bot_id", botID), slog.Any("error", dbErr))
|
|
}
|
|
}
|
|
}
|
|
h.logger.Info("reconcile: completed")
|
|
}
|
|
|
|
func (h *ContainerdHandler) ensureBotDataRoot(botID string) (string, error) {
|
|
dataRoot := strings.TrimSpace(h.cfg.DataRoot)
|
|
if dataRoot == "" {
|
|
dataRoot = config.DefaultDataRoot
|
|
}
|
|
dataRoot, err := filepath.Abs(dataRoot)
|
|
if err != nil {
|
|
return "", err
|
|
}
|
|
root := filepath.Join(dataRoot, "bots", botID)
|
|
if err := os.MkdirAll(root, 0o755); err != nil {
|
|
return "", err
|
|
}
|
|
return root, nil
|
|
}
|