Files
Memoh/internal/handlers/bot_tts.go
Acbox 925fdee478 feat: transcription support (#394)
* feat: expand speech provider support with new client types and configuration schema

* feat: add icon support for speech providers and update related configurations

* feat: add SVG support for Deepgram and Elevenlabs with Vue components

* feat: except *-speech client type in llm provider

* feat: enhance speech provider functionality with advanced settings and model import capabilities

* chore: remove go.mod replace

* feat: enhance speech provider functionality with advanced settings and model import capabilities

* chore: update go module dependencies

* feat: Ear and Mouth

* fix: separate ear/mouth page

* fix: separate audio domain and restore transcription templates

Move speech and transcription internals into the audio domain, restore template-driven transcription providers, and regenerate Swagger/SDK so the frontend can stop hand-calling /transcription-* APIs.

---------

Co-authored-by: aki <arisu@ieee.org>
2026-04-22 00:12:01 +08:00

112 lines
3.6 KiB
Go

package handlers
import (
"log/slog"
"net/http"
"strings"
"github.com/labstack/echo/v4"
audiopkg "github.com/memohai/memoh/internal/audio"
"github.com/memohai/memoh/internal/settings"
)
// BotAudioHandler handles per-bot speech synthesis requests from the agent tool.
type BotAudioHandler struct {
audioService *audiopkg.Service
settingsService *settings.Service
tempStore *audiopkg.TempStore
logger *slog.Logger
}
func NewBotAudioHandler(log *slog.Logger, audioService *audiopkg.Service, settingsService *settings.Service, tempStore *audiopkg.TempStore) *BotAudioHandler {
return &BotAudioHandler{
audioService: audioService,
settingsService: settingsService,
tempStore: tempStore,
logger: log.With(slog.String("handler", "bot_audio")),
}
}
func (h *BotAudioHandler) Register(e *echo.Echo) {
e.POST("/bots/:bot_id/tts/synthesize", h.Synthesize)
}
type synthesizeRequest struct {
Text string `json:"text"`
}
type synthesizeResponse struct {
TempID string `json:"temp_id"`
ContentType string `json:"content_type"`
Size int64 `json:"size"`
}
// Synthesize godoc
// @Summary Synthesize speech for a bot
// @Description Stream-synthesize text using the bot's configured TTS model, write to temp file
// @Tags bots
// @Accept json
// @Produce json
// @Param bot_id path string true "Bot ID"
// @Param request body synthesizeRequest true "Text to synthesize"
// @Success 200 {object} synthesizeResponse
// @Failure 400 {object} ErrorResponse
// @Failure 500 {object} ErrorResponse
// @Router /bots/{bot_id}/tts/synthesize [post].
func (h *BotAudioHandler) Synthesize(c echo.Context) error {
botID := strings.TrimSpace(c.Param("bot_id"))
if botID == "" {
return echo.NewHTTPError(http.StatusBadRequest, "bot_id is required")
}
var req synthesizeRequest
if err := c.Bind(&req); err != nil {
return echo.NewHTTPError(http.StatusBadRequest, err.Error())
}
text := strings.TrimSpace(req.Text)
if text == "" {
return echo.NewHTTPError(http.StatusBadRequest, "text is required")
}
const maxTextLen = 500
if len([]rune(text)) > maxTextLen {
return echo.NewHTTPError(http.StatusBadRequest, "text too long, max 500 characters")
}
botSettings, err := h.settingsService.GetBot(c.Request().Context(), botID)
if err != nil {
h.logger.Error("failed to load bot settings", slog.String("bot_id", botID), slog.Any("error", err))
return echo.NewHTTPError(http.StatusInternalServerError, "failed to load bot settings")
}
if botSettings.TtsModelID == "" {
return echo.NewHTTPError(http.StatusBadRequest, "bot has no TTS model configured")
}
tempID, f, err := h.tempStore.Create()
if err != nil {
h.logger.Error("failed to create temp file", slog.Any("error", err))
return echo.NewHTTPError(http.StatusInternalServerError, "failed to create temp file")
}
contentType, streamErr := h.audioService.StreamToFile(c.Request().Context(), botSettings.TtsModelID, text, f)
closeErr := f.Close()
if streamErr != nil {
h.logger.Error("speech synthesis failed", slog.String("bot_id", botID), slog.String("model_id", botSettings.TtsModelID), slog.Any("error", streamErr))
h.tempStore.Delete(tempID)
return echo.NewHTTPError(http.StatusInternalServerError, streamErr.Error())
}
if closeErr != nil {
h.logger.Error("failed to finalize audio file", slog.String("bot_id", botID), slog.Any("error", closeErr))
h.tempStore.Delete(tempID)
return echo.NewHTTPError(http.StatusInternalServerError, "failed to finalize audio file")
}
size, _ := h.tempStore.FileSize(tempID)
return c.JSON(http.StatusOK, synthesizeResponse{
TempID: tempID,
ContentType: contentType,
Size: size,
})
}