Files
Memoh/internal/handlers/bot_tts.go
T
2026-03-13 02:49:52 +08:00

112 lines
3.6 KiB
Go

package handlers
import (
"log/slog"
"net/http"
"strings"
"github.com/labstack/echo/v4"
"github.com/memohai/memoh/internal/settings"
"github.com/memohai/memoh/internal/tts"
)
// BotTtsHandler handles per-bot TTS synthesis requests from the agent tool.
type BotTtsHandler struct {
ttsService *tts.Service
settingsService *settings.Service
tempStore *tts.TempStore
logger *slog.Logger
}
func NewBotTtsHandler(log *slog.Logger, ttsService *tts.Service, settingsService *settings.Service, tempStore *tts.TempStore) *BotTtsHandler {
return &BotTtsHandler{
ttsService: ttsService,
settingsService: settingsService,
tempStore: tempStore,
logger: log.With(slog.String("handler", "bot_tts")),
}
}
func (h *BotTtsHandler) Register(e *echo.Echo) {
e.POST("/bots/:bot_id/tts/synthesize", h.Synthesize)
}
type synthesizeRequest struct {
Text string `json:"text"`
}
type synthesizeResponse struct {
TempID string `json:"temp_id"`
ContentType string `json:"content_type"`
Size int64 `json:"size"`
}
// Synthesize godoc
// @Summary Synthesize speech for a bot
// @Description Stream-synthesize text using the bot's configured TTS model, write to temp file
// @Tags bots
// @Accept json
// @Produce json
// @Param bot_id path string true "Bot ID"
// @Param request body synthesizeRequest true "Text to synthesize"
// @Success 200 {object} synthesizeResponse
// @Failure 400 {object} ErrorResponse
// @Failure 500 {object} ErrorResponse
// @Router /bots/{bot_id}/tts/synthesize [post].
func (h *BotTtsHandler) Synthesize(c echo.Context) error {
botID := strings.TrimSpace(c.Param("bot_id"))
if botID == "" {
return echo.NewHTTPError(http.StatusBadRequest, "bot_id is required")
}
var req synthesizeRequest
if err := c.Bind(&req); err != nil {
return echo.NewHTTPError(http.StatusBadRequest, err.Error())
}
text := strings.TrimSpace(req.Text)
if text == "" {
return echo.NewHTTPError(http.StatusBadRequest, "text is required")
}
const maxTextLen = 500
if len([]rune(text)) > maxTextLen {
return echo.NewHTTPError(http.StatusBadRequest, "text too long, max 500 characters")
}
botSettings, err := h.settingsService.GetBot(c.Request().Context(), botID)
if err != nil {
h.logger.Error("failed to load bot settings", slog.String("bot_id", botID), slog.Any("error", err))
return echo.NewHTTPError(http.StatusInternalServerError, "failed to load bot settings")
}
if botSettings.TtsModelID == "" {
return echo.NewHTTPError(http.StatusBadRequest, "bot has no TTS model configured")
}
tempID, f, err := h.tempStore.Create()
if err != nil {
h.logger.Error("failed to create temp file", slog.Any("error", err))
return echo.NewHTTPError(http.StatusInternalServerError, "failed to create temp file")
}
contentType, streamErr := h.ttsService.StreamToFile(c.Request().Context(), botSettings.TtsModelID, text, f)
closeErr := f.Close()
if streamErr != nil {
h.logger.Error("tts synthesis failed", slog.String("bot_id", botID), slog.String("model_id", botSettings.TtsModelID), slog.Any("error", streamErr))
h.tempStore.Delete(tempID)
return echo.NewHTTPError(http.StatusInternalServerError, streamErr.Error())
}
if closeErr != nil {
h.logger.Error("failed to finalize audio file", slog.String("bot_id", botID), slog.Any("error", closeErr))
h.tempStore.Delete(tempID)
return echo.NewHTTPError(http.StatusInternalServerError, "failed to finalize audio file")
}
size, _ := h.tempStore.FileSize(tempID)
return c.JSON(http.StatusOK, synthesizeResponse{
TempID: tempID,
ContentType: contentType,
Size: size,
})
}