Files
Memoh/internal/agent/tools/read_media.go
T
2026-04-04 20:56:00 +08:00

138 lines
3.8 KiB
Go

package tools
import (
"context"
"encoding/base64"
"errors"
"fmt"
"io"
"net/http"
"strings"
"github.com/memohai/memoh/internal/workspace/bridge"
)
const (
// ReadMediaToolName is the tool name that the agent decoration layer
// matches on to intercept image payloads. After the merge this is "read".
ReadMediaToolName = "read"
defaultReadMediaMaxBytes = 20 * 1024 * 1024
)
var readMediaSupportedMimeTypes = map[string]struct{}{
"image/gif": {},
"image/jpeg": {},
"image/png": {},
"image/webp": {},
}
// ReadMediaToolResult is the public result returned to the model.
type ReadMediaToolResult struct {
OK bool `json:"ok"`
Path string `json:"path,omitempty"`
Mime string `json:"mime,omitempty"`
Size int `json:"size,omitempty"`
Error string `json:"error,omitempty"`
}
// ReadMediaToolOutput is the internal execution result used by the agent to
// inject the image into the next Twilight AI step while keeping the visible
// tool result lightweight.
type ReadMediaToolOutput struct {
Public ReadMediaToolResult
ImageBase64 string
ImageMediaType string
}
// mimeSniffSize is the number of bytes http.DetectContentType needs.
const mimeSniffSize = 512
// ReadImageFromContainer reads a binary file through the bridge client,
// validates that it is a supported image format, and returns a
// ReadMediaToolOutput ready for the agent decoration pipeline.
//
// It reads only a small header first to sniff the MIME type, avoiding
// buffering large non-image binaries just to reject them.
func ReadImageFromContainer(ctx context.Context, client *bridge.Client, path string, maxBytes int64) ReadMediaToolOutput {
if maxBytes <= 0 {
maxBytes = defaultReadMediaMaxBytes
}
reader, err := client.ReadRaw(ctx, path)
if err != nil {
return readMediaErrorResult(err.Error())
}
defer func() { _ = reader.Close() }()
// Read only the sniff header first so non-image binaries fail fast.
header := make([]byte, mimeSniffSize)
n, err := io.ReadAtLeast(reader, header, 1)
if err != nil && !errors.Is(err, io.ErrUnexpectedEOF) {
return readMediaErrorResult("failed to load image: " + err.Error())
}
header = header[:n]
mimeType, err := detectReadMediaMime(header)
if err != nil {
return readMediaErrorResult(err.Error())
}
// MIME looks good — read the remainder up to the size limit.
rest, err := io.ReadAll(io.LimitReader(reader, maxBytes-int64(n)+1))
if err != nil {
return readMediaErrorResult("failed to load image: " + err.Error())
}
data := make([]byte, 0, len(header)+len(rest))
data = append(data, header...)
data = append(data, rest...)
if int64(len(data)) > maxBytes {
return readMediaErrorResult(fmt.Sprintf("failed to load image: file exceeds %d bytes", maxBytes))
}
encoded := base64.StdEncoding.EncodeToString(data)
return ReadMediaToolOutput{
Public: ReadMediaToolResult{
OK: true,
Path: path,
Mime: mimeType,
Size: len(data),
},
ImageBase64: encoded,
ImageMediaType: mimeType,
}
}
func readMediaErrorResult(message string) ReadMediaToolOutput {
msg := strings.TrimSpace(message)
if msg == "" {
msg = "read failed"
}
return ReadMediaToolOutput{
Public: ReadMediaToolResult{
OK: false,
Error: msg,
},
}
}
func detectReadMediaMime(data []byte) (string, error) {
sniffedMime := ""
if len(data) > 0 {
sniffedMime = strings.ToLower(strings.TrimSpace(http.DetectContentType(data)))
}
switch {
case sniffedMime == "":
return "", errors.New("only supports PNG, JPEG, GIF, or WebP image bytes")
case isSupportedReadMediaMime(sniffedMime):
return sniffedMime, nil
default:
return "", errors.New("only supports PNG, JPEG, GIF, or WebP image bytes")
}
}
func isSupportedReadMediaMime(mimeType string) bool {
_, ok := readMediaSupportedMimeTypes[strings.ToLower(strings.TrimSpace(mimeType))]
return ok
}