mirror of
https://github.com/memohai/Memoh.git
synced 2026-04-27 07:16:19 +09:00
9ceabf68c4
Replace the host bind-mount + containerd exec approach with a per-bot
in-container gRPC server (ContainerService, port 9090). All file I/O,
exec, and MCP stdio sessions now go through gRPC instead of running
shell commands or reading host-mounted directories.
Architecture changes:
- cmd/mcp: rewritten as a gRPC server (ContainerService) with full
file and exec API (ReadFile, WriteFile, ListDir, ReadRaw, WriteRaw,
Exec, Stat, Mkdir, Rename, DeleteFile)
- internal/mcp/mcpcontainer: protobuf definitions and generated stubs
- internal/mcp/mcpclient: gRPC client wrapper with connection pool
(Pool) and Provider interface for dependency injection
- mcp.Manager: add per-bot IP cache, gRPC connection pool, and
SetContainerIP/MCPClient methods; remove DataDir/Exec helpers
- containerd.Service: remove ExecTask/ExecTaskStreaming; network setup
now returns NetworkResult{IP} for pool routing
- internal/fs/service.go: deleted (replaced by mcpclient)
- handlers/fs.go: deleted; MCP stdio session logic moved to mcp_stdio.go
- container provider Executor: all tools (read/write/list/edit/exec)
now call gRPC client instead of running shell via exec
- storefs, containerfs, media, skills, memory: all I/O ported to
mcpclient.Provider
Database:
- migration 0022: drop host_path column from containers table
One-time data migration:
- migrateBindMountData: on first Start() after upgrade, copies old
bind-mount data into the container via gRPC, then renames src dir
to prevent re-migration; runs in background goroutine
Bug fixes:
- mcp_stdio: callRaw now returns full JSON-RPC envelope
{"jsonrpc","id","result"|"error"} matching protocol spec;
explicit "initialize" call now advances session init state to
prevent duplicate handshake on next non-initialize call
- mcpclient Pool: properly evict stale gRPC connection after snapshot
replace (container process recreated); use SetContainerIP instead
of direct map write so IP changes always evict pool entry
- migrateBindMountData: walkErr on directories now counted as failure
so partially-walked trees don't get incorrectly marked as migrated
- cmd/mcp/Dockerfile: removed dead file (docker/Dockerfile.mcp is the
canonical production build)
Tests:
- provider_test.go: restored with bufconn in-process gRPC mock
(fakeContainerService + staticProvider), 14 cases covering all 5
tools plus edge cases
- mcp_session_test.go: new, covers JSON-RPC envelope, init state
machine, pending cleanup on cancel/close, readLoop cancel
- storefs/service_test.go: restored (pure function roundtrip tests)
141 lines
3.6 KiB
Go
141 lines
3.6 KiB
Go
package containerd
|
|
|
|
import (
|
|
"context"
|
|
"fmt"
|
|
"os"
|
|
"path/filepath"
|
|
"strings"
|
|
|
|
"github.com/containerd/containerd/v2/client"
|
|
gocni "github.com/containerd/go-cni"
|
|
)
|
|
|
|
func setupCNINetwork(ctx context.Context, task client.Task, containerID string, CNIBinDir string, CNIConfDir string) (string, error) {
|
|
if task == nil {
|
|
return "", ErrInvalidArgument
|
|
}
|
|
if containerID == "" {
|
|
containerID = task.ID()
|
|
}
|
|
if containerID == "" {
|
|
return "", ErrInvalidArgument
|
|
}
|
|
|
|
pid := task.Pid()
|
|
if pid == 0 {
|
|
return "", fmt.Errorf("task pid not available for %s", containerID)
|
|
}
|
|
|
|
if _, err := os.Stat(CNIConfDir); err != nil {
|
|
return "", fmt.Errorf("cni config dir missing: %s: %w", CNIConfDir, err)
|
|
}
|
|
if _, err := os.Stat(CNIBinDir); err != nil {
|
|
return "", fmt.Errorf("cni bin dir missing: %s: %w", CNIBinDir, err)
|
|
}
|
|
netnsPath := filepath.Join("/proc", fmt.Sprint(pid), "ns", "net")
|
|
if _, err := os.Stat(netnsPath); err != nil {
|
|
return "", fmt.Errorf("netns not found: %s: %w", netnsPath, err)
|
|
}
|
|
|
|
cni, err := gocni.New(
|
|
gocni.WithPluginDir([]string{CNIBinDir}),
|
|
gocni.WithPluginConfDir(CNIConfDir),
|
|
)
|
|
if err != nil {
|
|
return "", err
|
|
}
|
|
if err := cni.Load(gocni.WithLoNetwork, gocni.WithDefaultConf); err != nil {
|
|
return "", err
|
|
}
|
|
result, err := cni.Setup(ctx, containerID, netnsPath)
|
|
if err != nil {
|
|
if !isDuplicateAllocationError(err) && !isVethExistsError(err) {
|
|
return "", err
|
|
}
|
|
// Stale IPAM allocation or veth exists (e.g. after container restart with persisted
|
|
// /var/lib/cni). Remove may fail if the previous iptables/veth state
|
|
// is already gone; ignore the error so the retry Setup still runs.
|
|
_ = cni.Remove(ctx, containerID, netnsPath)
|
|
result, err = cni.Setup(ctx, containerID, netnsPath)
|
|
if err != nil {
|
|
return "", err
|
|
}
|
|
}
|
|
return extractIP(result), nil
|
|
}
|
|
|
|
func extractIP(result *gocni.Result) string {
|
|
if result == nil {
|
|
return ""
|
|
}
|
|
for _, cfg := range result.Interfaces {
|
|
for _, ipCfg := range cfg.IPConfigs {
|
|
if ipCfg.IP != nil {
|
|
ip := ipCfg.IP.String()
|
|
if ip != "" && ip != "127.0.0.1" && ip != "::1" {
|
|
return ip
|
|
}
|
|
}
|
|
}
|
|
}
|
|
return ""
|
|
}
|
|
|
|
func removeCNINetwork(ctx context.Context, task client.Task, containerID string, CNIBinDir string, CNIConfDir string) error {
|
|
if task == nil {
|
|
return ErrInvalidArgument
|
|
}
|
|
if containerID == "" {
|
|
containerID = task.ID()
|
|
}
|
|
if containerID == "" {
|
|
return ErrInvalidArgument
|
|
}
|
|
|
|
pid := task.Pid()
|
|
if pid == 0 {
|
|
return fmt.Errorf("task pid not available for %s", containerID)
|
|
}
|
|
|
|
if _, err := os.Stat(CNIConfDir); err != nil {
|
|
return fmt.Errorf("cni config dir missing: %s: %w", CNIConfDir, err)
|
|
}
|
|
if _, err := os.Stat(CNIBinDir); err != nil {
|
|
return fmt.Errorf("cni bin dir missing: %s: %w", CNIBinDir, err)
|
|
}
|
|
|
|
netnsPath := filepath.Join("/proc", fmt.Sprint(pid), "ns", "net")
|
|
if _, err := os.Stat(netnsPath); err != nil {
|
|
return fmt.Errorf("netns not found: %s: %w", netnsPath, err)
|
|
}
|
|
|
|
cni, err := gocni.New(
|
|
gocni.WithPluginDir([]string{CNIBinDir}),
|
|
gocni.WithPluginConfDir(CNIConfDir),
|
|
)
|
|
if err != nil {
|
|
return err
|
|
}
|
|
if err := cni.Load(gocni.WithLoNetwork, gocni.WithDefaultConf); err != nil {
|
|
return err
|
|
}
|
|
return cni.Remove(ctx, containerID, netnsPath)
|
|
}
|
|
|
|
func isDuplicateAllocationError(err error) bool {
|
|
if err == nil {
|
|
return false
|
|
}
|
|
return strings.Contains(err.Error(), "duplicate allocation")
|
|
}
|
|
|
|
// isVethExistsError returns true if the CNI setup failed because veth devices
|
|
// already exist (e.g. after container restart with stale network state).
|
|
func isVethExistsError(err error) bool {
|
|
if err == nil {
|
|
return false
|
|
}
|
|
return strings.Contains(err.Error(), "already exists")
|
|
}
|