diff --git a/config.toml.example b/config.toml.example index c103ceb0..4952ac28 100644 --- a/config.toml.example +++ b/config.toml.example @@ -28,6 +28,8 @@ image = "docker.io/library/memoh-mcp:dev" snapshotter = "overlayfs" data_root = "data" data_mount = "/data" +cni_bin_dir = "/opt/cni/bin" +cni_conf_dir = "/etc/cni/net.d" ## Postgres configuration [postgres] diff --git a/internal/config/config.go b/internal/config/config.go index c4a5ea49..54c67415 100644 --- a/internal/config/config.go +++ b/internal/config/config.go @@ -15,6 +15,8 @@ const ( DefaultMCPImage = "docker.io/library/memoh-mcp:latest" DefaultDataRoot = "data" DefaultDataMount = "/data" + DefaultCNIBinaryDir = "/opt/cni/bin" + DefaultCNIConfigDir = "/etc/cni/net.d" DefaultJWTExpiresIn = "24h" DefaultPGHost = "127.0.0.1" DefaultPGPort = 5432 @@ -63,10 +65,12 @@ type ContainerdConfig struct { } type MCPConfig struct { - Image string `toml:"image"` - Snapshotter string `toml:"snapshotter"` - DataRoot string `toml:"data_root"` - DataMount string `toml:"data_mount"` + Image string `toml:"image"` + Snapshotter string `toml:"snapshotter"` + DataRoot string `toml:"data_root"` + DataMount string `toml:"data_mount"` + CNIBinaryDir string `toml:"cni_bin_dir"` + CNIConfigDir string `toml:"cni_conf_dir"` } type PostgresConfig struct { @@ -124,9 +128,11 @@ func Load(path string) (Config, error) { Namespace: DefaultNamespace, }, MCP: MCPConfig{ - Image: DefaultMCPImage, - DataRoot: DefaultDataRoot, - DataMount: DefaultDataMount, + Image: DefaultMCPImage, + DataRoot: DefaultDataRoot, + DataMount: DefaultDataMount, + CNIBinaryDir: DefaultCNIBinaryDir, + CNIConfigDir: DefaultCNIConfigDir, }, Postgres: PostgresConfig{ Host: DefaultPGHost, diff --git a/internal/containerd/network.go b/internal/containerd/network.go index c895dbe4..a65a1937 100644 --- a/internal/containerd/network.go +++ b/internal/containerd/network.go @@ -14,13 +14,8 @@ import ( gocni "github.com/containerd/go-cni" ) -const ( - defaultCNIConfDir = "/etc/cni/net.d" - defaultCNIBinDir = "/opt/cni/bin" -) - // SetupNetwork attaches CNI networking to a running task. -func SetupNetwork(ctx context.Context, task client.Task, containerID string) error { +func SetupNetwork(ctx context.Context, task client.Task, containerID string, CNIBinDir string, CNIConfDir string) error { if task == nil { return ErrInvalidArgument } @@ -36,14 +31,14 @@ func SetupNetwork(ctx context.Context, task client.Task, containerID string) err return fmt.Errorf("task pid not available for %s", containerID) } if runtime.GOOS == "darwin" { - return setupNetworkWithCLI(ctx, containerID, pid) + return setupNetworkWithCLI(ctx, containerID, pid, CNIBinDir, CNIConfDir) } - if _, err := os.Stat(defaultCNIConfDir); err != nil { - return fmt.Errorf("cni config dir missing: %s: %w", defaultCNIConfDir, err) + if _, err := os.Stat(CNIConfDir); err != nil { + return fmt.Errorf("cni config dir missing: %s: %w", CNIConfDir, err) } - if _, err := os.Stat(defaultCNIBinDir); err != nil { - return fmt.Errorf("cni bin dir missing: %s: %w", defaultCNIBinDir, err) + if _, err := os.Stat(CNIBinDir); err != nil { + return fmt.Errorf("cni bin dir missing: %s: %w", CNIBinDir, err) } netnsPath := filepath.Join("/proc", fmt.Sprint(pid), "ns", "net") if _, err := os.Stat(netnsPath); err != nil { @@ -51,8 +46,8 @@ func SetupNetwork(ctx context.Context, task client.Task, containerID string) err } cni, err := gocni.New( - gocni.WithPluginDir([]string{defaultCNIBinDir}), - gocni.WithPluginConfDir(defaultCNIConfDir), + gocni.WithPluginDir([]string{CNIBinDir}), + gocni.WithPluginConfDir(CNIConfDir), ) if err != nil { return err @@ -74,7 +69,7 @@ func SetupNetwork(ctx context.Context, task client.Task, containerID string) err return err } -func setupNetworkWithCLI(ctx context.Context, containerID string, pid uint32) error { +func setupNetworkWithCLI(ctx context.Context, containerID string, pid uint32, CNIBinDir string, CNIConfDir string) error { args := []string{ "shell", "--tty=false", @@ -86,8 +81,8 @@ func setupNetworkWithCLI(ctx context.Context, containerID string, pid uint32) er "cni-setup", "--id", containerID, "--pid", fmt.Sprint(pid), - "--conf-dir", defaultCNIConfDir, - "--bin-dir", defaultCNIBinDir, + "--conf-dir", CNIConfDir, + "--bin-dir", CNIBinDir, } cmd := exec.CommandContext(ctx, "limactl", args...) var stderr bytes.Buffer @@ -101,7 +96,7 @@ func setupNetworkWithCLI(ctx context.Context, containerID string, pid uint32) er } else if !isDuplicateAllocationError(err) { return err } - if rmErr := removeNetworkWithCLI(ctx, containerID, pid); rmErr != nil { + if rmErr := removeNetworkWithCLI(ctx, containerID, pid, CNIBinDir, CNIConfDir); rmErr != nil { return rmErr } cmd = exec.CommandContext(ctx, "limactl", args...) @@ -118,7 +113,7 @@ func setupNetworkWithCLI(ctx context.Context, containerID string, pid uint32) er } // RemoveNetwork detaches CNI networking for a running task. -func RemoveNetwork(ctx context.Context, task client.Task, containerID string) error { +func RemoveNetwork(ctx context.Context, task client.Task, containerID string, CNIBinDir string, CNIConfDir string) error { if task == nil { return ErrInvalidArgument } @@ -134,14 +129,14 @@ func RemoveNetwork(ctx context.Context, task client.Task, containerID string) er return fmt.Errorf("task pid not available for %s", containerID) } if runtime.GOOS == "darwin" { - return removeNetworkWithCLI(ctx, containerID, pid) + return removeNetworkWithCLI(ctx, containerID, pid, CNIBinDir, CNIConfDir) } - if _, err := os.Stat(defaultCNIConfDir); err != nil { - return fmt.Errorf("cni config dir missing: %s: %w", defaultCNIConfDir, err) + if _, err := os.Stat(CNIConfDir); err != nil { + return fmt.Errorf("cni config dir missing: %s: %w", CNIConfDir, err) } - if _, err := os.Stat(defaultCNIBinDir); err != nil { - return fmt.Errorf("cni bin dir missing: %s: %w", defaultCNIBinDir, err) + if _, err := os.Stat(CNIBinDir); err != nil { + return fmt.Errorf("cni bin dir missing: %s: %w", CNIBinDir, err) } netnsPath := filepath.Join("/proc", fmt.Sprint(pid), "ns", "net") @@ -150,8 +145,8 @@ func RemoveNetwork(ctx context.Context, task client.Task, containerID string) er } cni, err := gocni.New( - gocni.WithPluginDir([]string{defaultCNIBinDir}), - gocni.WithPluginConfDir(defaultCNIConfDir), + gocni.WithPluginDir([]string{CNIBinDir}), + gocni.WithPluginConfDir(CNIConfDir), ) if err != nil { return err @@ -162,7 +157,7 @@ func RemoveNetwork(ctx context.Context, task client.Task, containerID string) er return cni.Remove(ctx, containerID, netnsPath) } -func removeNetworkWithCLI(ctx context.Context, containerID string, pid uint32) error { +func removeNetworkWithCLI(ctx context.Context, containerID string, pid uint32, CNIBinDir string, CNIConfDir string) error { args := []string{ "shell", "--tty=false", @@ -174,8 +169,8 @@ func removeNetworkWithCLI(ctx context.Context, containerID string, pid uint32) e "cni-remove", "--id", containerID, "--pid", fmt.Sprint(pid), - "--conf-dir", defaultCNIConfDir, - "--bin-dir", defaultCNIBinDir, + "--conf-dir", CNIConfDir, + "--bin-dir", CNIBinDir, } cmd := exec.CommandContext(ctx, "limactl", args...) var stderr bytes.Buffer diff --git a/internal/handlers/containerd.go b/internal/handlers/containerd.go index a3433221..d78b3ed7 100644 --- a/internal/handlers/containerd.go +++ b/internal/handlers/containerd.go @@ -245,7 +245,7 @@ func (h *ContainerdHandler) CreateContainer(c echo.Context) error { UseStdio: false, }); err == nil { started = true - if netErr := ctr.SetupNetwork(ctx, task, containerID); netErr != nil { + if netErr := ctr.SetupNetwork(ctx, task, containerID, h.cfg.CNIBinaryDir, h.cfg.CNIConfigDir); netErr != nil { h.logger.Warn("mcp container network setup failed, task kept running", slog.String("container_id", containerID), slog.Any("error", netErr), @@ -304,7 +304,7 @@ func (h *ContainerdHandler) ensureContainerAndTask(ctx context.Context, containe // Task is running but CNI state may be stale (e.g. server container restarted). // Re-apply network to ensure connectivity. if task, taskErr := h.service.GetTask(ctx, containerID); taskErr == nil { - if netErr := ctr.SetupNetwork(ctx, task, containerID); netErr != nil { + if netErr := ctr.SetupNetwork(ctx, task, containerID, h.cfg.CNIBinaryDir, h.cfg.CNIConfigDir); netErr != nil { h.logger.Warn("network re-setup failed for running task", slog.String("container_id", containerID), slog.Any("error", netErr)) } @@ -322,7 +322,7 @@ func (h *ContainerdHandler) ensureContainerAndTask(ctx context.Context, containe if err != nil { return err } - if netErr := ctr.SetupNetwork(ctx, task, containerID); netErr != nil { + if netErr := ctr.SetupNetwork(ctx, task, containerID, h.cfg.CNIBinaryDir, h.cfg.CNIConfigDir); netErr != nil { h.logger.Warn("network setup failed, task kept running", slog.String("container_id", containerID), slog.Any("error", netErr)) } @@ -781,7 +781,7 @@ func (h *ContainerdHandler) SetupBotContainer(ctx context.Context, botID string) if task, err := h.service.StartTask(ctx, containerID, &ctr.StartTaskOptions{ UseStdio: false, }); err == nil { - if netErr := ctr.SetupNetwork(ctx, task, containerID); netErr != nil { + if netErr := ctr.SetupNetwork(ctx, task, containerID, h.cfg.CNIBinaryDir, h.cfg.CNIConfigDir); netErr != nil { h.logger.Warn("setup bot container: network setup failed, task kept running", slog.String("bot_id", botID), slog.String("container_id", containerID), @@ -833,7 +833,7 @@ func (h *ContainerdHandler) CleanupBotContainer(ctx context.Context, botID strin if task, taskErr := h.service.GetTask(ctx, containerID); taskErr == nil { h.logger.Info("CleanupBotContainer: removing network", slog.String("container_id", containerID)) - if err := ctr.RemoveNetwork(ctx, task, containerID); err != nil { + if err := ctr.RemoveNetwork(ctx, task, containerID, h.cfg.CNIBinaryDir, h.cfg.CNIConfigDir); err != nil { h.logger.Warn("cleanup: remove network failed", slog.String("container_id", containerID), slog.Any("error", err)) } } @@ -940,7 +940,7 @@ func (h *ContainerdHandler) ReconcileContainers(ctx context.Context) { // veth endpoints and iptables masquerade rules while the MCP task keeps // running inside containerd. if task, taskErr := h.service.GetTask(ctx, containerID); taskErr == nil { - if netErr := ctr.SetupNetwork(ctx, task, containerID); netErr != nil { + if netErr := ctr.SetupNetwork(ctx, task, containerID, h.cfg.CNIBinaryDir, h.cfg.CNIConfigDir); netErr != nil { h.logger.Warn("reconcile: network re-setup failed for running task", slog.String("bot_id", botID), slog.String("container_id", containerID), diff --git a/internal/mcp/manager.go b/internal/mcp/manager.go index a2c1806a..efb7bd00 100644 --- a/internal/mcp/manager.go +++ b/internal/mcp/manager.go @@ -174,7 +174,7 @@ func (m *Manager) Start(ctx context.Context, botID string) error { if err != nil { return err } - if err := ctr.SetupNetwork(ctx, task, m.containerID(botID)); err != nil { + if err := ctr.SetupNetwork(ctx, task, m.containerID(botID), m.cfg.CNIBinaryDir, m.cfg.CNIConfigDir); err != nil { if stopErr := m.service.StopTask(ctx, m.containerID(botID), &ctr.StopTaskOptions{Force: true}); stopErr != nil { m.logger.Warn("cleanup: stop task failed", slog.String("container_id", m.containerID(botID)), slog.Any("error", stopErr)) } @@ -199,7 +199,7 @@ func (m *Manager) Delete(ctx context.Context, botID string) error { } if task, taskErr := m.service.GetTask(ctx, m.containerID(botID)); taskErr == nil { - if err := ctr.RemoveNetwork(ctx, task, m.containerID(botID)); err != nil { + if err := ctr.RemoveNetwork(ctx, task, m.containerID(botID), m.cfg.CNIBinaryDir, m.cfg.CNIConfigDir); err != nil { m.logger.Warn("cleanup: remove network failed", slog.String("container_id", m.containerID(botID)), slog.Any("error", err)) } }