Files
Memoh/internal/attachment/normalize.go
T
BBQ bc374fe8cd refactor: content-addressed assets, cross-channel multimodal, infra simplification (#63)
* refactor(attachment): multimodal attachment refactor with snapshot schema and storage layer

- Add snapshot schema migration (0008) and update init/versions/snapshots
- Add internal/attachment and internal/channel normalize for unified attachment handling
- Move containerfs provider from internal/media to internal/storage
- Update agent types, channel adapters (Telegram/Feishu), inbound and handlers
- Add containerd snapshot lineage and local_channel tests
- Regenerate sqlc, swagger and SDK

* refactor(media): content-addressed asset system with unified naming

- Replace asset_id foreign key with content_hash as sole identifier
  for bot_history_message_assets (pure soft-link model)
- Remove mime, size_bytes, storage_key from DB; derive at read time
  via media.Resolve from actual storage
- Merge migrations 0008/0009 into single 0008; keep 0001 as canonical schema
- Add Docker initdb script for deterministic migration execution order
- Fix cross-channel real-time image display (Telegram → WebUI SSE)
- Fix message disappearing on refresh (null assets fallback)
- Fix file icon instead of image preview (mime derivation from storage)
- Unify AssetID → ContentHash naming across Go, Agent, and Frontend
- Change storage key prefix from 4-char to 2-char for directory sharding
- Add server-entrypoint.sh for Docker deployment migration handling

* refactor(infra): embedded migrations, Docker simplification, and config consolidation

- Embed SQL migrations into Go binary, removing shell-based migration scripts
- Consolidate config files into conf/ directory (app.example.toml, app.docker.toml, app.dev.toml)
- Simplify Docker setup: remove initdb.d scripts, streamline nginx config and entrypoint
- Remove legacy CLI, feishu-echo commands, and obsolete incremental migration files
- Update install script and docs to require sudo for one-click install
- Add mise tasks for dev environment orchestration

* chore: recover migrations

---------

Co-authored-by: Acbox <acbox0328@gmail.com>
2026-02-19 00:20:27 +08:00

141 lines
3.6 KiB
Go

package attachment
import (
"bytes"
"encoding/base64"
"fmt"
"io"
"net/http"
"strings"
"github.com/memohai/memoh/internal/media"
)
// MapMediaType maps attachment type strings to media types.
func MapMediaType(rawType string) media.MediaType {
switch strings.ToLower(strings.TrimSpace(rawType)) {
case "image", "gif":
return media.MediaTypeImage
case "audio", "voice":
return media.MediaTypeAudio
case "video":
return media.MediaTypeVideo
default:
return media.MediaTypeFile
}
}
// NormalizeMime normalizes MIME to lowercase token form.
func NormalizeMime(raw string) string {
mime := strings.ToLower(strings.TrimSpace(raw))
if mime == "" {
return ""
}
if idx := strings.Index(mime, ";"); idx >= 0 {
mime = strings.TrimSpace(mime[:idx])
}
return mime
}
// MimeFromDataURL extracts MIME from a data URL.
func MimeFromDataURL(raw string) string {
value := strings.TrimSpace(raw)
lower := strings.ToLower(value)
if !strings.HasPrefix(lower, "data:") {
return ""
}
rest := value[len("data:"):]
if idx := strings.Index(rest, ";"); idx >= 0 {
return NormalizeMime(rest[:idx])
}
if idx := strings.Index(rest, ","); idx >= 0 {
return NormalizeMime(rest[:idx])
}
return ""
}
// ResolveMime resolves source MIME and sniffed MIME into final MIME.
func ResolveMime(mediaType media.MediaType, sourceMime, sniffedMime string) string {
source := NormalizeMime(sourceMime)
sniffed := NormalizeMime(sniffedMime)
sourceGeneric := source == "" || source == "application/octet-stream"
if mediaType == media.MediaTypeImage {
if strings.HasPrefix(source, "image/") {
return source
}
if strings.HasPrefix(sniffed, "image/") {
return sniffed
}
if !sourceGeneric {
return source
}
if sniffed != "" {
return sniffed
}
return "application/octet-stream"
}
if !sourceGeneric {
return source
}
if sniffed != "" {
return sniffed
}
if source != "" {
return source
}
return "application/octet-stream"
}
// PrepareReaderAndMime reads a small prefix for MIME sniffing and replays it.
func PrepareReaderAndMime(reader io.Reader, mediaType media.MediaType, sourceMime string) (io.Reader, string, error) {
if reader == nil {
return nil, "", fmt.Errorf("reader is required")
}
header := make([]byte, 512)
n, err := reader.Read(header)
if err != nil && err != io.EOF {
return nil, "", fmt.Errorf("read mime sniff bytes: %w", err)
}
header = header[:n]
sniffed := ""
if len(header) > 0 {
sniffed = NormalizeMime(http.DetectContentType(header))
}
finalMime := ResolveMime(mediaType, sourceMime, sniffed)
return io.MultiReader(bytes.NewReader(header), reader), finalMime, nil
}
// NormalizeBase64DataURL normalizes raw base64 into a data URL.
func NormalizeBase64DataURL(input, mime string) string {
value := strings.TrimSpace(input)
if value == "" {
return ""
}
if strings.HasPrefix(strings.ToLower(value), "data:") {
return value
}
mime = NormalizeMime(mime)
if mime == "" {
mime = "application/octet-stream"
}
return "data:" + mime + ";base64," + value
}
// DecodeBase64 decodes both raw base64 and data URL base64 content.
// The returned reader is bounded to maxBytes+1 for caller-side size validation.
func DecodeBase64(input string, maxBytes int64) (io.Reader, error) {
value := strings.TrimSpace(input)
if value == "" {
return nil, fmt.Errorf("base64 payload is empty")
}
if strings.HasPrefix(strings.ToLower(value), "data:") {
if idx := strings.Index(value, ","); idx >= 0 {
value = value[idx+1:]
}
}
decoder := base64.NewDecoder(base64.StdEncoding, strings.NewReader(value))
return io.LimitReader(decoder, maxBytes+1), nil
}