Files
Memoh/internal/containerd/network.go
T
BBQ 9ceabf68c4 feat(mcp): replace bind-mount+exec with in-container gRPC service (#179)
Replace the host bind-mount + containerd exec approach with a per-bot
in-container gRPC server (ContainerService, port 9090). All file I/O,
exec, and MCP stdio sessions now go through gRPC instead of running
shell commands or reading host-mounted directories.

Architecture changes:
- cmd/mcp: rewritten as a gRPC server (ContainerService) with full
  file and exec API (ReadFile, WriteFile, ListDir, ReadRaw, WriteRaw,
  Exec, Stat, Mkdir, Rename, DeleteFile)
- internal/mcp/mcpcontainer: protobuf definitions and generated stubs
- internal/mcp/mcpclient: gRPC client wrapper with connection pool
  (Pool) and Provider interface for dependency injection
- mcp.Manager: add per-bot IP cache, gRPC connection pool, and
  SetContainerIP/MCPClient methods; remove DataDir/Exec helpers
- containerd.Service: remove ExecTask/ExecTaskStreaming; network setup
  now returns NetworkResult{IP} for pool routing
- internal/fs/service.go: deleted (replaced by mcpclient)
- handlers/fs.go: deleted; MCP stdio session logic moved to mcp_stdio.go
- container provider Executor: all tools (read/write/list/edit/exec)
  now call gRPC client instead of running shell via exec
- storefs, containerfs, media, skills, memory: all I/O ported to
  mcpclient.Provider

Database:
- migration 0022: drop host_path column from containers table

One-time data migration:
- migrateBindMountData: on first Start() after upgrade, copies old
  bind-mount data into the container via gRPC, then renames src dir
  to prevent re-migration; runs in background goroutine

Bug fixes:
- mcp_stdio: callRaw now returns full JSON-RPC envelope
  {"jsonrpc","id","result"|"error"} matching protocol spec;
  explicit "initialize" call now advances session init state to
  prevent duplicate handshake on next non-initialize call
- mcpclient Pool: properly evict stale gRPC connection after snapshot
  replace (container process recreated); use SetContainerIP instead
  of direct map write so IP changes always evict pool entry
- migrateBindMountData: walkErr on directories now counted as failure
  so partially-walked trees don't get incorrectly marked as migrated
- cmd/mcp/Dockerfile: removed dead file (docker/Dockerfile.mcp is the
  canonical production build)

Tests:
- provider_test.go: restored with bufconn in-process gRPC mock
  (fakeContainerService + staticProvider), 14 cases covering all 5
  tools plus edge cases
- mcp_session_test.go: new, covers JSON-RPC envelope, init state
  machine, pending cleanup on cancel/close, readLoop cancel
- storefs/service_test.go: restored (pure function roundtrip tests)
2026-03-04 21:50:08 +08:00

141 lines
3.6 KiB
Go

package containerd
import (
"context"
"fmt"
"os"
"path/filepath"
"strings"
"github.com/containerd/containerd/v2/client"
gocni "github.com/containerd/go-cni"
)
func setupCNINetwork(ctx context.Context, task client.Task, containerID string, CNIBinDir string, CNIConfDir string) (string, error) {
if task == nil {
return "", ErrInvalidArgument
}
if containerID == "" {
containerID = task.ID()
}
if containerID == "" {
return "", ErrInvalidArgument
}
pid := task.Pid()
if pid == 0 {
return "", fmt.Errorf("task pid not available for %s", containerID)
}
if _, err := os.Stat(CNIConfDir); err != nil {
return "", fmt.Errorf("cni config dir missing: %s: %w", CNIConfDir, err)
}
if _, err := os.Stat(CNIBinDir); err != nil {
return "", fmt.Errorf("cni bin dir missing: %s: %w", CNIBinDir, err)
}
netnsPath := filepath.Join("/proc", fmt.Sprint(pid), "ns", "net")
if _, err := os.Stat(netnsPath); err != nil {
return "", fmt.Errorf("netns not found: %s: %w", netnsPath, err)
}
cni, err := gocni.New(
gocni.WithPluginDir([]string{CNIBinDir}),
gocni.WithPluginConfDir(CNIConfDir),
)
if err != nil {
return "", err
}
if err := cni.Load(gocni.WithLoNetwork, gocni.WithDefaultConf); err != nil {
return "", err
}
result, err := cni.Setup(ctx, containerID, netnsPath)
if err != nil {
if !isDuplicateAllocationError(err) && !isVethExistsError(err) {
return "", err
}
// Stale IPAM allocation or veth exists (e.g. after container restart with persisted
// /var/lib/cni). Remove may fail if the previous iptables/veth state
// is already gone; ignore the error so the retry Setup still runs.
_ = cni.Remove(ctx, containerID, netnsPath)
result, err = cni.Setup(ctx, containerID, netnsPath)
if err != nil {
return "", err
}
}
return extractIP(result), nil
}
func extractIP(result *gocni.Result) string {
if result == nil {
return ""
}
for _, cfg := range result.Interfaces {
for _, ipCfg := range cfg.IPConfigs {
if ipCfg.IP != nil {
ip := ipCfg.IP.String()
if ip != "" && ip != "127.0.0.1" && ip != "::1" {
return ip
}
}
}
}
return ""
}
func removeCNINetwork(ctx context.Context, task client.Task, containerID string, CNIBinDir string, CNIConfDir string) error {
if task == nil {
return ErrInvalidArgument
}
if containerID == "" {
containerID = task.ID()
}
if containerID == "" {
return ErrInvalidArgument
}
pid := task.Pid()
if pid == 0 {
return fmt.Errorf("task pid not available for %s", containerID)
}
if _, err := os.Stat(CNIConfDir); err != nil {
return fmt.Errorf("cni config dir missing: %s: %w", CNIConfDir, err)
}
if _, err := os.Stat(CNIBinDir); err != nil {
return fmt.Errorf("cni bin dir missing: %s: %w", CNIBinDir, err)
}
netnsPath := filepath.Join("/proc", fmt.Sprint(pid), "ns", "net")
if _, err := os.Stat(netnsPath); err != nil {
return fmt.Errorf("netns not found: %s: %w", netnsPath, err)
}
cni, err := gocni.New(
gocni.WithPluginDir([]string{CNIBinDir}),
gocni.WithPluginConfDir(CNIConfDir),
)
if err != nil {
return err
}
if err := cni.Load(gocni.WithLoNetwork, gocni.WithDefaultConf); err != nil {
return err
}
return cni.Remove(ctx, containerID, netnsPath)
}
func isDuplicateAllocationError(err error) bool {
if err == nil {
return false
}
return strings.Contains(err.Error(), "duplicate allocation")
}
// isVethExistsError returns true if the CNI setup failed because veth devices
// already exist (e.g. after container restart with stale network state).
func isVethExistsError(err error) bool {
if err == nil {
return false
}
return strings.Contains(err.Error(), "already exists")
}