From 01cb6c85dbef6402df13f598441584b8825ace62 Mon Sep 17 00:00:00 2001 From: Ran <16112591+chen-ran@users.noreply.github.com> Date: Thu, 12 Feb 2026 08:23:25 +0800 Subject: [PATCH] fix(deploy): many docker compose bug --- DEPLOYMENT.md | 2 +- agent/src/config.ts | 1 + agent/src/index.ts | 38 +++++++++++++++------- config.toml.example | 1 + deploy.sh | 37 +++++++++++++++++++--- docker-compose.yml | 34 ++++++++++---------- docker/Dockerfile.server | 36 +++++++++++++++++++-- docker/Dockerfile.web | 2 +- docker/config/config.docker.toml | 9 +++--- docker/config/nginx.conf | 54 ++++++++++++++++---------------- internal/containerd/service.go | 33 +++++++++++++------ internal/handlers/fs.go | 42 ++++++++++++++++++++++--- internal/handlers/mcp_stdio.go | 17 +++++++--- internal/handlers/ping.go | 5 +++ internal/server/server.go | 2 +- scripts/containerd-install.sh | 5 ++- 16 files changed, 228 insertions(+), 90 deletions(-) diff --git a/DEPLOYMENT.md b/DEPLOYMENT.md index 5be9cc79..6b57052b 100644 --- a/DEPLOYMENT.md +++ b/DEPLOYMENT.md @@ -127,7 +127,7 @@ host = "postgres" password = "your_secure_password" # Must change in config.toml [containerd] -socket_path = "unix:///var/run/docker.sock" # Use host Docker +socket_path = "/run/containerd/containerd.sock" [qdrant] base_url = "http://qdrant:6334" diff --git a/agent/src/config.ts b/agent/src/config.ts index 9f3ca291..3a23b5f6 100644 --- a/agent/src/config.ts +++ b/agent/src/config.ts @@ -4,6 +4,7 @@ import { parse } from 'toml' type AgentGatewayConfig = { 'agent_gateway': { host?: string + server_addr?: string port?: number }, 'server': { diff --git a/agent/src/index.ts b/agent/src/index.ts index fcb0320a..1ebf9d07 100644 --- a/agent/src/index.ts +++ b/agent/src/index.ts @@ -3,7 +3,6 @@ import { chatModule } from './modules/chat' import { corsMiddleware } from './middlewares/cors' import { errorMiddleware } from './middlewares/error' import { loadConfig } from './config' -import { join } from 'path' const config = loadConfig('../config.toml') @@ -15,17 +14,26 @@ export const getBraveConfig = () => { } export const getBaseUrl = () => { - let baseUrl = '' - if (!baseUrl) { - baseUrl = 'http://127.0.0.1' + const rawAddr = + typeof config.agent_gateway.server_addr === 'string' + ? config.agent_gateway.server_addr.trim() + : typeof config.server.addr === 'string' + ? config.server.addr.trim() + : '' + + if (!rawAddr) { + return 'http://127.0.0.1' } - if ( - typeof config.server.addr === 'string' && - config.server.addr.startsWith(':') - ) { - baseUrl = `http://127.0.0.1${config.server.addr}` + + if (rawAddr.startsWith('http://') || rawAddr.startsWith('https://')) { + return rawAddr.replace(/\/+$/, '') } - return baseUrl + + if (rawAddr.startsWith(':')) { + return `http://127.0.0.1${rawAddr}` + } + + return `http://${rawAddr}` } export type AuthFetcher = ( @@ -40,7 +48,12 @@ export const createAuthFetcher = (bearer: string | undefined): AuthFetcher => { headers.set('Authorization', `Bearer ${bearer}`) } - return await fetch(join(getBaseUrl(), url), { + const requestUrl = new URL( + url, + `${getBaseUrl().replace(/\/+$/, '')}/`, + ).toString() + + return await fetch(requestUrl, { ...requestOptions, headers, }) @@ -50,6 +63,9 @@ export const createAuthFetcher = (bearer: string | undefined): AuthFetcher => { const app = new Elysia() .use(corsMiddleware) .use(errorMiddleware) + .get('/health', () => ({ + status: 'ok', + })) .use(chatModule) .listen({ port: config.agent_gateway.port ?? 8081, diff --git a/config.toml.example b/config.toml.example index 7f8458b1..640da6ae 100644 --- a/config.toml.example +++ b/config.toml.example @@ -50,6 +50,7 @@ timeout_seconds = 10 [agent_gateway] host = "127.0.0.1" port = 8081 +server_addr = ":8080" [brave] api_key = "" diff --git a/deploy.sh b/deploy.sh index 58c867ee..f61ca163 100755 --- a/deploy.sh +++ b/deploy.sh @@ -44,12 +44,39 @@ if [ ! -f config.toml ]; then echo "" fi -# Build MCP image -echo -e "${GREEN}Building MCP image...${NC}" -if docker build -f docker/Dockerfile.mcp -t memoh-mcp:latest . > /dev/null 2>&1; then - echo -e "${GREEN}✓ MCP image built successfully${NC}" +# Prepare data root path for host/containerd compatibility +MEMOH_DATA_ROOT="$(pwd)/.data/memoh" +mkdir -p "${MEMOH_DATA_ROOT}" +export MEMOH_DATA_ROOT +if grep -q '^data_root[[:space:]]*=' config.toml; then + awk -v path="${MEMOH_DATA_ROOT}" ' + $0 ~ /^data_root[[:space:]]*=/ { print "data_root = \"" path "\""; next } + { print } + ' config.toml > config.toml.tmp && mv config.toml.tmp config.toml +fi +echo -e "${GREEN}✓ Data root: ${MEMOH_DATA_ROOT}${NC}" +echo "" + +# Prepare container runtime environment +echo -e "${GREEN}Preparing container runtime environment...${NC}" +if sh scripts/containerd-install.sh > /dev/null 2>&1; then + echo -e "${GREEN}✓ Container runtime environment is ready${NC}" else - echo -e "${YELLOW}⚠ MCP image build failed, will try to pull at runtime${NC}" + echo -e "${YELLOW}⚠ Failed to prepare container runtime environment, MCP build may be skipped${NC}" +fi +echo "" + +# Build MCP image on host with nerdctl +MCP_IMAGE="docker.io/library/memoh-mcp:latest" +echo -e "${GREEN}Building MCP image on host with nerdctl...${NC}" +if command -v nerdctl &> /dev/null && command -v buildctl &> /dev/null && command -v buildkitd &> /dev/null; then + if nerdctl build -f docker/Dockerfile.mcp -t "${MCP_IMAGE}" . > /dev/null 2>&1; then + echo -e "${GREEN}✓ MCP image built successfully (on host)${NC}" + else + echo -e "${YELLOW}⚠ MCP image build failed on host, will try to pull at runtime${NC}" + fi +else + echo -e "${YELLOW}⚠ nerdctl/buildkit environment not found on host, skipping MCP build${NC}" fi echo "" diff --git a/docker-compose.yml b/docker-compose.yml index 1345bc93..28f554ca 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -1,5 +1,5 @@ +name: "memoh" services: - postgres: image: postgres:18.1-alpine container_name: memoh-postgres @@ -8,7 +8,7 @@ services: POSTGRES_USER: memoh POSTGRES_PASSWORD: memoh123 volumes: - - postgres_data:/var/lib/postgresql/data + - postgres_data:/var/lib/postgresql - ./db/migrations:/docker-entrypoint-initdb.d:ro expose: - "5432" @@ -38,26 +38,24 @@ services: networks: - memoh-network - docker-cli: - image: docker:27-cli - container_name: memoh-docker-cli - volumes: - - /var/run/docker.sock:/var/run/docker.sock - - memoh_bot_data:/var/lib/memoh/data - command: ["tail", "-f", "/dev/null"] - restart: unless-stopped - networks: - - memoh-network - server: build: - context: ./docker - dockerfile: Dockerfile.server + context: . + dockerfile: docker/Dockerfile.server container_name: memoh-server + pid: host volumes: - ./config.toml:/app/config.toml:ro - - /var/run/docker.sock:/var/run/docker.sock - - memoh_bot_data:/var/lib/memoh/data + - /run/containerd/containerd.sock:/run/containerd/containerd.sock + - /var/lib/containerd:/var/lib/containerd + - server_cni_state:/var/lib/cni + - ${MEMOH_DATA_ROOT:-/opt/memoh/data}:${MEMOH_DATA_ROOT:-/opt/memoh/data} + cap_add: + - SYS_ADMIN + - NET_ADMIN + security_opt: + - seccomp:unconfined + - apparmor:unconfined ports: - "8080:8080" depends_on: @@ -106,7 +104,7 @@ volumes: driver: local qdrant_data: driver: local - memoh_bot_data: + server_cni_state: driver: local networks: diff --git a/docker/Dockerfile.server b/docker/Dockerfile.server index e0d68061..283b7f2c 100644 --- a/docker/Dockerfile.server +++ b/docker/Dockerfile.server @@ -17,11 +17,43 @@ FROM alpine:latest WORKDIR /app -RUN apk add --no-cache ca-certificates tzdata wget +RUN apk add --no-cache ca-certificates tzdata wget nerdctl cni-plugins iptables \ + && mkdir -p /opt/cni/bin \ + && (cp -a /usr/lib/cni/. /opt/cni/bin/ 2>/dev/null || true) \ + && (cp -a /usr/libexec/cni/. /opt/cni/bin/ 2>/dev/null || true) \ + && mkdir -p /etc/cni/net.d /var/lib/cni \ + && printf '%s\n' \ + '{' \ + ' "cniVersion": "1.0.0",' \ + ' "name": "memoh-cni",' \ + ' "plugins": [' \ + ' {' \ + ' "type": "bridge",' \ + ' "bridge": "cni0",' \ + ' "isGateway": true,' \ + ' "ipMasq": true,' \ + ' "promiscMode": true,' \ + ' "ipam": {' \ + ' "type": "host-local",' \ + ' "ranges": [[' \ + ' { "subnet": "10.88.0.0/16" }' \ + ' ]],' \ + ' "routes": [' \ + ' { "dst": "0.0.0.0/0" }' \ + ' ]' \ + ' }' \ + ' },' \ + ' {' \ + ' "type": "portmap",' \ + ' "capabilities": { "portMappings": true }' \ + ' }' \ + ' ]' \ + '}' > /etc/cni/net.d/10-memoh.conflist COPY --from=builder /build/memoh-server /app/memoh-server +COPY --from=builder /build/spec /app/spec -RUN mkdir -p /var/lib/memoh/data +RUN mkdir -p /opt/memoh/data EXPOSE 8080 diff --git a/docker/Dockerfile.web b/docker/Dockerfile.web index c1532a7a..4f73c4cb 100644 --- a/docker/Dockerfile.web +++ b/docker/Dockerfile.web @@ -28,6 +28,6 @@ COPY docker/config/nginx.conf /etc/nginx/conf.d/default.conf EXPOSE 80 HEALTHCHECK --interval=30s --timeout=3s --start-period=5s --retries=3 \ - CMD wget --no-verbose --tries=1 --spider http://localhost/ || exit 1 + CMD wget --no-verbose --tries=1 --spider http://localhost/health || exit 1 CMD ["nginx", "-g", "daemon off;"] diff --git a/docker/config/config.docker.toml b/docker/config/config.docker.toml index 3ea7e9b7..376d84d8 100644 --- a/docker/config/config.docker.toml +++ b/docker/config/config.docker.toml @@ -4,7 +4,7 @@ level = "info" format = "text" [server] -addr = ":8080" +addr = "server:8080" ## Admin [admin] @@ -19,13 +19,13 @@ jwt_expires_in = "168h" ## Docker configuration [containerd] -socket_path = "unix:///var/run/docker.sock" +socket_path = "/run/containerd/containerd.sock" namespace = "default" [mcp] -busybox_image = "memoh-mcp:latest" +busybox_image = "docker.io/library/memoh-mcp:latest" snapshotter = "overlayfs" -data_root = "/var/lib/memoh/data" +data_root = "/opt/memoh/data" data_mount = "/data" ## Postgres configuration @@ -48,6 +48,7 @@ timeout_seconds = 10 [agent_gateway] host = "agent" port = 8081 +server_addr = "server:8080" [brave] api_key = "" diff --git a/docker/config/nginx.conf b/docker/config/nginx.conf index b89ba890..255c8b65 100644 --- a/docker/config/nginx.conf +++ b/docker/config/nginx.conf @@ -1,5 +1,6 @@ server { listen 80; + listen [::]:80; server_name _; root /usr/share/nginx/html; index index.html; @@ -15,40 +16,39 @@ server { try_files $uri $uri/ /index.html; } - # API 代理 + # Nginx 健康检查 + location = /health { + access_log off; + add_header Content-Type text/plain; + return 200 "ok\n"; + } + + # 统一代理参数 + proxy_http_version 1.1; + proxy_set_header Upgrade $http_upgrade; + proxy_set_header Connection 'upgrade'; + proxy_set_header Host $host; + proxy_cache_bypass $http_upgrade; + proxy_set_header X-Real-IP $remote_addr; + proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for; + proxy_set_header X-Forwarded-Proto $scheme; + proxy_connect_timeout 60s; + proxy_send_timeout 60s; + proxy_read_timeout 60s; + + # Swagger 文档(保留 /api 前缀) + location ~ ^/api/(docs(?:/.*)?|swagger\.json)$ { + proxy_pass http://memoh-server:8080; + } + + # API 代理(其余 /api/* 去掉 /api 前缀转发) location /api/ { proxy_pass http://memoh-server:8080/; - proxy_http_version 1.1; - proxy_set_header Upgrade $http_upgrade; - proxy_set_header Connection 'upgrade'; - proxy_set_header Host $host; - proxy_cache_bypass $http_upgrade; - proxy_set_header X-Real-IP $remote_addr; - proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for; - proxy_set_header X-Forwarded-Proto $scheme; - - # 超时设置 - proxy_connect_timeout 60s; - proxy_send_timeout 60s; - proxy_read_timeout 60s; } # Agent Gateway 代理 location /agent/ { proxy_pass http://memoh-agent:8081/; - proxy_http_version 1.1; - proxy_set_header Upgrade $http_upgrade; - proxy_set_header Connection 'upgrade'; - proxy_set_header Host $host; - proxy_cache_bypass $http_upgrade; - proxy_set_header X-Real-IP $remote_addr; - proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for; - proxy_set_header X-Forwarded-Proto $scheme; - - # 超时设置 - proxy_connect_timeout 60s; - proxy_send_timeout 60s; - proxy_read_timeout 60s; } # 静态资源缓存 diff --git a/internal/containerd/service.go b/internal/containerd/service.go index bcf9449a..44ee8633 100644 --- a/internal/containerd/service.go +++ b/internal/containerd/service.go @@ -9,7 +9,6 @@ import ( "io" "log/slog" "os" - "path/filepath" "runtime" "strings" "syscall" @@ -673,15 +672,8 @@ func (s *DefaultService) ExecTaskStreaming(ctx context.Context, containerID stri if req.Terminal { ioOpts = append(ioOpts, cio.WithTerminal) } - fifoDir := strings.TrimSpace(req.FIFODir) - if fifoDir == "" { - if homeDir, err := os.UserHomeDir(); err == nil && homeDir != "" { - fifoDir = filepath.Join(homeDir, ".memoh", "containerd-fifo") - } else { - fifoDir = "/tmp/memoh-containerd-fifo" - } - } - if err := os.MkdirAll(fifoDir, 0o755); err != nil { + fifoDir, err := resolveExecFIFODir(req.FIFODir) + if err != nil { _ = stdinR.Close() _ = stdinW.Close() _ = stdoutR.Close() @@ -752,6 +744,27 @@ func (s *DefaultService) ExecTaskStreaming(ctx context.Context, containerID stri }, nil } +func resolveExecFIFODir(preferred string) (string, error) { + candidates := make([]string, 0, 3) + if p := strings.TrimSpace(preferred); p != "" { + candidates = append(candidates, p) + } + candidates = append(candidates, "/var/lib/containerd/memoh-fifo", "/tmp/memoh-containerd-fifo") + + var lastErr error + for _, dir := range candidates { + if err := os.MkdirAll(dir, 0o755); err == nil { + return dir, nil + } else { + lastErr = err + } + } + if lastErr == nil { + lastErr = fmt.Errorf("no fifo directory candidate available") + } + return "", lastErr +} + func (s *DefaultService) ListContainersByLabel(ctx context.Context, key, value string) ([]containerd.Container, error) { if key == "" { return nil, ErrInvalidArgument diff --git a/internal/handlers/fs.go b/internal/handlers/fs.go index d10d5b35..05b3b48f 100644 --- a/internal/handlers/fs.go +++ b/internal/handlers/fs.go @@ -4,11 +4,13 @@ import ( "bufio" "context" "encoding/json" + "errors" "fmt" "io" "log/slog" "net/http" "os/exec" + "path/filepath" "runtime" "strings" "sync" @@ -188,7 +190,8 @@ func (h *ContainerdHandler) getMCPSession(ctx context.Context, containerID strin func (h *ContainerdHandler) startContainerdMCPSession(ctx context.Context, containerID string) (*mcpSession, error) { execSession, err := h.service.ExecTaskStreaming(ctx, containerID, ctr.ExecTaskRequest{ - Args: []string{"/app/mcp"}, + Args: []string{"/app/mcp"}, + FIFODir: h.mcpFIFODir(), }) if err != nil { return nil, err @@ -207,11 +210,15 @@ func (h *ContainerdHandler) startContainerdMCPSession(ctx context.Context, conta go func() { _, err := execSession.Wait() if err != nil { + if isBenignMCPSessionExit(err) { + sess.closeWithError(io.EOF) + return + } h.logger.Error("mcp session exited", slog.Any("error", err), slog.String("container_id", containerID)) sess.closeWithError(err) - } else { - sess.closeWithError(io.EOF) + return } + sess.closeWithError(io.EOF) }() return sess, nil @@ -273,11 +280,15 @@ func (h *ContainerdHandler) startLimaMCPSession(containerID string) (*mcpSession go sess.readLoop() go func() { if err := cmd.Wait(); err != nil { + if isBenignMCPSessionExit(err) { + sess.closeWithError(io.EOF) + return + } h.logger.Error("mcp session exited", slog.Any("error", err), slog.String("container_id", containerID)) sess.closeWithError(err) - } else { - sess.closeWithError(io.EOF) + return } + sess.closeWithError(io.EOF) }() return sess, nil @@ -320,11 +331,32 @@ func (h *ContainerdHandler) startMCPStderrLogger(stderr io.ReadCloser, container h.logger.Warn("mcp stderr", slog.String("container_id", containerID), slog.String("message", line)) } if err := scanner.Err(); err != nil { + if errors.Is(err, io.EOF) || errors.Is(err, io.ErrClosedPipe) || strings.Contains(err.Error(), "closed pipe") { + return + } h.logger.Error("mcp stderr read failed", slog.Any("error", err), slog.String("container_id", containerID)) } }() } +func isBenignMCPSessionExit(err error) bool { + if err == nil { + return false + } + if errors.Is(err, context.Canceled) || errors.Is(err, io.EOF) || errors.Is(err, io.ErrClosedPipe) { + return true + } + msg := strings.ToLower(err.Error()) + return strings.Contains(msg, "code = canceled") || strings.Contains(msg, "context canceled") || strings.Contains(msg, "closed pipe") +} + +func (h *ContainerdHandler) mcpFIFODir() string { + if root := strings.TrimSpace(h.cfg.DataRoot); root != "" { + return filepath.Join(root, ".containerd-fifo") + } + return "/tmp/memoh-containerd-fifo" +} + func (s *mcpSession) readLoop() { scanner := bufio.NewScanner(s.stdout) scanner.Buffer(make([]byte, 0, 64*1024), 8*1024*1024) diff --git a/internal/handlers/mcp_stdio.go b/internal/handlers/mcp_stdio.go index a262adaf..d213a2d7 100644 --- a/internal/handlers/mcp_stdio.go +++ b/internal/handlers/mcp_stdio.go @@ -178,6 +178,7 @@ func (h *ContainerdHandler) startContainerdMCPCommandSession(ctx context.Context Args: args, Env: env, WorkDir: strings.TrimSpace(req.Cwd), + FIFODir: h.mcpFIFODir(), }) if err != nil { return nil, err @@ -195,11 +196,15 @@ func (h *ContainerdHandler) startContainerdMCPCommandSession(ctx context.Context go func() { _, err := execSession.Wait() if err != nil { + if isBenignMCPSessionExit(err) { + sess.closeWithError(io.EOF) + return + } h.logger.Error("mcp stdio session exited", slog.Any("error", err), slog.String("container_id", containerID)) sess.closeWithError(err) - } else { - sess.closeWithError(io.EOF) + return } + sess.closeWithError(io.EOF) }() return sess, nil } @@ -342,11 +347,15 @@ func (h *ContainerdHandler) startLimaMCPCommandSession(containerID string, req M go sess.readLoop() go func() { if err := cmd.Wait(); err != nil { + if isBenignMCPSessionExit(err) { + sess.closeWithError(io.EOF) + return + } h.logger.Error("mcp stdio session exited", slog.Any("error", err), slog.String("container_id", containerID)) sess.closeWithError(err) - } else { - sess.closeWithError(io.EOF) + return } + sess.closeWithError(io.EOF) }() return sess, nil diff --git a/internal/handlers/ping.go b/internal/handlers/ping.go index 49fc7f9a..e21ff554 100644 --- a/internal/handlers/ping.go +++ b/internal/handlers/ping.go @@ -17,6 +17,7 @@ func NewPingHandler(log *slog.Logger) *PingHandler { func (h *PingHandler) Register(e *echo.Echo) { e.GET("/ping", h.Ping) + e.HEAD("/health", h.PingHead) } func (h *PingHandler) Ping(c echo.Context) error { @@ -24,3 +25,7 @@ func (h *PingHandler) Ping(c echo.Context) error { "status": "ok", }) } + +func (h *PingHandler) PingHead(c echo.Context) error { + return c.NoContent(http.StatusOK) +} diff --git a/internal/server/server.go b/internal/server/server.go index b451fa90..cb8113d2 100644 --- a/internal/server/server.go +++ b/internal/server/server.go @@ -42,7 +42,7 @@ func NewServer(log *slog.Logger, addr string, jwtSecret string, pingHandler *han })) e.Use(auth.JWTMiddleware(jwtSecret, func(c echo.Context) bool { path := c.Request().URL.Path - if path == "/ping" || path == "/api/swagger.json" || path == "/auth/login" { + if path == "/ping" || path == "/health" || path == "/api/swagger.json" || path == "/auth/login" { return true } if strings.HasPrefix(path, "/api/docs") { diff --git a/scripts/containerd-install.sh b/scripts/containerd-install.sh index c459f636..367b4f3b 100755 --- a/scripts/containerd-install.sh +++ b/scripts/containerd-install.sh @@ -7,7 +7,10 @@ if [ "$(uname -s)" = "Darwin" ]; then exit $? fi -if command -v containerd >/dev/null 2>&1 && command -v nerdctl >/dev/null 2>&1 && command -v buildctl >/dev/null 2>&1 && command -v buildkitd >/dev/null 2>&1; then +if command -v containerd >/dev/null 2>&1 \ + && command -v nerdctl >/dev/null 2>&1 \ + && command -v buildctl >/dev/null 2>&1 \ + && command -v buildkitd >/dev/null 2>&1; then containerd --version nerdctl --version buildctl --version