mirror of
https://github.com/memohai/Memoh.git
synced 2026-04-25 07:00:48 +09:00
112 lines
3.6 KiB
Go
112 lines
3.6 KiB
Go
package handlers
|
|
|
|
import (
|
|
"log/slog"
|
|
"net/http"
|
|
"strings"
|
|
|
|
"github.com/labstack/echo/v4"
|
|
|
|
"github.com/memohai/memoh/internal/settings"
|
|
"github.com/memohai/memoh/internal/tts"
|
|
)
|
|
|
|
// BotTtsHandler handles per-bot TTS synthesis requests from the agent tool.
|
|
type BotTtsHandler struct {
|
|
ttsService *tts.Service
|
|
settingsService *settings.Service
|
|
tempStore *tts.TempStore
|
|
logger *slog.Logger
|
|
}
|
|
|
|
func NewBotTtsHandler(log *slog.Logger, ttsService *tts.Service, settingsService *settings.Service, tempStore *tts.TempStore) *BotTtsHandler {
|
|
return &BotTtsHandler{
|
|
ttsService: ttsService,
|
|
settingsService: settingsService,
|
|
tempStore: tempStore,
|
|
logger: log.With(slog.String("handler", "bot_tts")),
|
|
}
|
|
}
|
|
|
|
func (h *BotTtsHandler) Register(e *echo.Echo) {
|
|
e.POST("/bots/:bot_id/tts/synthesize", h.Synthesize)
|
|
}
|
|
|
|
type synthesizeRequest struct {
|
|
Text string `json:"text"`
|
|
}
|
|
|
|
type synthesizeResponse struct {
|
|
TempID string `json:"temp_id"`
|
|
ContentType string `json:"content_type"`
|
|
Size int64 `json:"size"`
|
|
}
|
|
|
|
// Synthesize godoc
|
|
// @Summary Synthesize speech for a bot
|
|
// @Description Stream-synthesize text using the bot's configured TTS model, write to temp file
|
|
// @Tags bots
|
|
// @Accept json
|
|
// @Produce json
|
|
// @Param bot_id path string true "Bot ID"
|
|
// @Param request body synthesizeRequest true "Text to synthesize"
|
|
// @Success 200 {object} synthesizeResponse
|
|
// @Failure 400 {object} ErrorResponse
|
|
// @Failure 500 {object} ErrorResponse
|
|
// @Router /bots/{bot_id}/tts/synthesize [post].
|
|
func (h *BotTtsHandler) Synthesize(c echo.Context) error {
|
|
botID := strings.TrimSpace(c.Param("bot_id"))
|
|
if botID == "" {
|
|
return echo.NewHTTPError(http.StatusBadRequest, "bot_id is required")
|
|
}
|
|
|
|
var req synthesizeRequest
|
|
if err := c.Bind(&req); err != nil {
|
|
return echo.NewHTTPError(http.StatusBadRequest, err.Error())
|
|
}
|
|
text := strings.TrimSpace(req.Text)
|
|
if text == "" {
|
|
return echo.NewHTTPError(http.StatusBadRequest, "text is required")
|
|
}
|
|
const maxTextLen = 500
|
|
if len([]rune(text)) > maxTextLen {
|
|
return echo.NewHTTPError(http.StatusBadRequest, "text too long, max 500 characters")
|
|
}
|
|
|
|
botSettings, err := h.settingsService.GetBot(c.Request().Context(), botID)
|
|
if err != nil {
|
|
h.logger.Error("failed to load bot settings", slog.String("bot_id", botID), slog.Any("error", err))
|
|
return echo.NewHTTPError(http.StatusInternalServerError, "failed to load bot settings")
|
|
}
|
|
if botSettings.TtsModelID == "" {
|
|
return echo.NewHTTPError(http.StatusBadRequest, "bot has no TTS model configured")
|
|
}
|
|
|
|
tempID, f, err := h.tempStore.Create()
|
|
if err != nil {
|
|
h.logger.Error("failed to create temp file", slog.Any("error", err))
|
|
return echo.NewHTTPError(http.StatusInternalServerError, "failed to create temp file")
|
|
}
|
|
|
|
contentType, streamErr := h.ttsService.StreamToFile(c.Request().Context(), botSettings.TtsModelID, text, f)
|
|
closeErr := f.Close()
|
|
if streamErr != nil {
|
|
h.logger.Error("tts synthesis failed", slog.String("bot_id", botID), slog.String("model_id", botSettings.TtsModelID), slog.Any("error", streamErr))
|
|
h.tempStore.Delete(tempID)
|
|
return echo.NewHTTPError(http.StatusInternalServerError, streamErr.Error())
|
|
}
|
|
if closeErr != nil {
|
|
h.logger.Error("failed to finalize audio file", slog.String("bot_id", botID), slog.Any("error", closeErr))
|
|
h.tempStore.Delete(tempID)
|
|
return echo.NewHTTPError(http.StatusInternalServerError, "failed to finalize audio file")
|
|
}
|
|
|
|
size, _ := h.tempStore.FileSize(tempID)
|
|
|
|
return c.JSON(http.StatusOK, synthesizeResponse{
|
|
TempID: tempID,
|
|
ContentType: contentType,
|
|
Size: size,
|
|
})
|
|
}
|