Revert "Feat/speech support (#392)" (#393)

This reverts commit c9dcfe287f.
This commit is contained in:
Acbox
2026-04-22 00:11:16 +08:00
committed by GitHub
parent c9dcfe287f
commit fd8f1ec078
70 changed files with 1689 additions and 6609 deletions
+13 -13
View File
@@ -7,28 +7,28 @@ import (
"github.com/labstack/echo/v4"
audiopkg "github.com/memohai/memoh/internal/audio"
"github.com/memohai/memoh/internal/settings"
"github.com/memohai/memoh/internal/tts"
)
// BotAudioHandler handles per-bot speech synthesis requests from the agent tool.
type BotAudioHandler struct {
audioService *audiopkg.Service
// BotTtsHandler handles per-bot TTS synthesis requests from the agent tool.
type BotTtsHandler struct {
ttsService *tts.Service
settingsService *settings.Service
tempStore *audiopkg.TempStore
tempStore *tts.TempStore
logger *slog.Logger
}
func NewBotAudioHandler(log *slog.Logger, audioService *audiopkg.Service, settingsService *settings.Service, tempStore *audiopkg.TempStore) *BotAudioHandler {
return &BotAudioHandler{
audioService: audioService,
func NewBotTtsHandler(log *slog.Logger, ttsService *tts.Service, settingsService *settings.Service, tempStore *tts.TempStore) *BotTtsHandler {
return &BotTtsHandler{
ttsService: ttsService,
settingsService: settingsService,
tempStore: tempStore,
logger: log.With(slog.String("handler", "bot_audio")),
logger: log.With(slog.String("handler", "bot_tts")),
}
}
func (h *BotAudioHandler) Register(e *echo.Echo) {
func (h *BotTtsHandler) Register(e *echo.Echo) {
e.POST("/bots/:bot_id/tts/synthesize", h.Synthesize)
}
@@ -54,7 +54,7 @@ type synthesizeResponse struct {
// @Failure 400 {object} ErrorResponse
// @Failure 500 {object} ErrorResponse
// @Router /bots/{bot_id}/tts/synthesize [post].
func (h *BotAudioHandler) Synthesize(c echo.Context) error {
func (h *BotTtsHandler) Synthesize(c echo.Context) error {
botID := strings.TrimSpace(c.Param("bot_id"))
if botID == "" {
return echo.NewHTTPError(http.StatusBadRequest, "bot_id is required")
@@ -88,10 +88,10 @@ func (h *BotAudioHandler) Synthesize(c echo.Context) error {
return echo.NewHTTPError(http.StatusInternalServerError, "failed to create temp file")
}
contentType, streamErr := h.audioService.StreamToFile(c.Request().Context(), botSettings.TtsModelID, text, f)
contentType, streamErr := h.ttsService.StreamToFile(c.Request().Context(), botSettings.TtsModelID, text, f)
closeErr := f.Close()
if streamErr != nil {
h.logger.Error("speech synthesis failed", slog.String("bot_id", botID), slog.String("model_id", botSettings.TtsModelID), slog.Any("error", streamErr))
h.logger.Error("tts synthesis failed", slog.String("bot_id", botID), slog.String("model_id", botSettings.TtsModelID), slog.Any("error", streamErr))
h.tempStore.Delete(tempID)
return echo.NewHTTPError(http.StatusInternalServerError, streamErr.Error())
}
+24 -24
View File
@@ -30,30 +30,30 @@ import (
messagepkg "github.com/memohai/memoh/internal/message"
)
// localSpeechSynthesizer synthesizes text to speech audio.
type localSpeechSynthesizer interface {
// localTtsSynthesizer synthesizes text to speech audio.
type localTtsSynthesizer interface {
Synthesize(ctx context.Context, modelID string, text string, overrideCfg map[string]any) ([]byte, string, error)
}
// localSpeechModelResolver resolves speech model IDs for bots.
type localSpeechModelResolver interface {
ResolveSpeechModelID(ctx context.Context, botID string) (string, error)
// localTtsModelResolver resolves TTS model IDs for bots.
type localTtsModelResolver interface {
ResolveTtsModelID(ctx context.Context, botID string) (string, error)
}
// LocalChannelHandler handles local channel routes (WebUI / API) backed by bot history.
type LocalChannelHandler struct {
channelType channel.ChannelType
channelManager *channel.Manager
channelStore *channel.Store
chatService *conversation.Service
routeHub *local.RouteHub
botService *bots.Service
accountService *accounts.Service
resolver *flow.Resolver
mediaService *media.Service
speechService localSpeechSynthesizer
speechModelResolver localSpeechModelResolver
logger *slog.Logger
channelType channel.ChannelType
channelManager *channel.Manager
channelStore *channel.Store
chatService *conversation.Service
routeHub *local.RouteHub
botService *bots.Service
accountService *accounts.Service
resolver *flow.Resolver
mediaService *media.Service
ttsService localTtsSynthesizer
ttsModelResolver localTtsModelResolver
logger *slog.Logger
}
// NewLocalChannelHandler creates a local channel handler.
@@ -80,10 +80,10 @@ func (h *LocalChannelHandler) SetMediaService(svc *media.Service) {
h.mediaService = svc
}
// SetSpeechService configures speech synthesis for handling speech_delta events.
func (h *LocalChannelHandler) SetSpeechService(synth localSpeechSynthesizer, resolver localSpeechModelResolver) {
h.speechService = synth
h.speechModelResolver = resolver
// SetTtsService configures TTS synthesis for handling speech_delta events.
func (h *LocalChannelHandler) SetTtsService(synth localTtsSynthesizer, resolver localTtsModelResolver) {
h.ttsService = synth
h.ttsModelResolver = resolver
}
// Register registers the local channel routes.
@@ -719,12 +719,12 @@ func (h *LocalChannelHandler) ingestSingleAttachment(ctx context.Context, botID,
// wsSynthesizeSpeech handles speech_delta events by synthesizing audio and
// injecting attachment_delta events with the resulting voice attachments.
func (h *LocalChannelHandler) wsSynthesizeSpeech(ctx context.Context, botID string, original json.RawMessage) []json.RawMessage {
if h.speechService == nil || h.speechModelResolver == nil {
if h.ttsService == nil || h.ttsModelResolver == nil {
h.logger.Warn("speech_delta received but TTS service not configured")
return nil
}
modelID, err := h.speechModelResolver.ResolveSpeechModelID(ctx, botID)
modelID, err := h.ttsModelResolver.ResolveTtsModelID(ctx, botID)
if err != nil || strings.TrimSpace(modelID) == "" {
h.logger.Warn("speech_delta: bot has no TTS model configured", slog.String("bot_id", botID))
return nil
@@ -746,7 +746,7 @@ func (h *LocalChannelHandler) wsSynthesizeSpeech(ctx context.Context, botID stri
continue
}
audioData, contentType, synthErr := h.speechService.Synthesize(ctx, modelID, text, nil)
audioData, contentType, synthErr := h.ttsService.Synthesize(ctx, modelID, text, nil)
if synthErr != nil {
h.logger.Warn("speech synthesis failed", slog.String("bot_id", botID), slog.Any("error", synthErr))
continue
+29 -327
View File
@@ -1,83 +1,55 @@
package handlers
import (
"encoding/json"
"errors"
"fmt"
"io"
"log/slog"
"mime/multipart"
"net/http"
"strings"
"github.com/labstack/echo/v4"
audiopkg "github.com/memohai/memoh/internal/audio"
"github.com/memohai/memoh/internal/models"
"github.com/memohai/memoh/internal/tts"
)
type AudioHandler struct {
service *audiopkg.Service
type SpeechHandler struct {
service *tts.Service
modelsService *models.Service
logger *slog.Logger
}
func NewAudioHandler(log *slog.Logger, service *audiopkg.Service, modelsService *models.Service) *AudioHandler {
return &AudioHandler{
func NewSpeechHandler(log *slog.Logger, service *tts.Service, modelsService *models.Service) *SpeechHandler {
return &SpeechHandler{
service: service,
modelsService: modelsService,
logger: log.With(slog.String("handler", "audio")),
logger: log.With(slog.String("handler", "speech")),
}
}
func (h *AudioHandler) Register(e *echo.Echo) {
func (h *SpeechHandler) Register(e *echo.Echo) {
pg := e.Group("/speech-providers")
pg.GET("", h.ListProviders)
pg.GET("/:id", h.GetProvider)
pg.GET("/meta", h.ListSpeechMeta)
pg.GET("/meta", h.ListMeta)
pg.GET("/:id/models", h.ListModelsByProvider)
pg.POST("/:id/import-models", h.ImportModels)
tpg := e.Group("/transcription-providers")
tpg.GET("", h.ListTranscriptionProviders)
tpg.GET("/meta", h.ListTranscriptionMeta)
tpg.GET("/:id", h.GetProvider)
tpg.GET("/:id/models", h.ListTranscriptionModelsByProvider)
tpg.POST("/:id/import-models", h.ImportTranscriptionModels)
mg := e.Group("/speech-models")
mg.GET("", h.ListModels)
mg.GET("/:id", h.GetModel)
mg.PUT("/:id", h.UpdateModel)
mg.GET("/:id/capabilities", h.GetModelCapabilities)
mg.POST("/:id/test", h.TestModel)
tg := e.Group("/transcription-models")
tg.GET("", h.ListTranscriptionModels)
tg.GET("/:id", h.GetTranscriptionModel)
tg.PUT("/:id", h.UpdateTranscriptionModel)
tg.GET("/:id/capabilities", h.GetTranscriptionModelCapabilities)
tg.POST("/:id/test", h.TestTranscriptionModel)
}
// ListMeta godoc
// @Summary List speech provider metadata
// @Description List available speech provider types with their models and capabilities
// @Tags speech-providers
// @Success 200 {array} audiopkg.ProviderMetaResponse
// @Success 200 {array} tts.ProviderMetaResponse
// @Router /speech-providers/meta [get].
func (h *AudioHandler) ListSpeechMeta(c echo.Context) error {
return c.JSON(http.StatusOK, h.service.ListSpeechMeta(c.Request().Context()))
}
// ListTranscriptionMeta godoc
// @Summary List transcription provider metadata
// @Description List available transcription provider types with their models and capabilities
// @Tags transcription-providers
// @Success 200 {array} audiopkg.ProviderMetaResponse
// @Router /transcription-providers/meta [get].
func (h *AudioHandler) ListTranscriptionMeta(c echo.Context) error {
return c.JSON(http.StatusOK, h.service.ListTranscriptionMeta(c.Request().Context()))
func (h *SpeechHandler) ListMeta(c echo.Context) error {
return c.JSON(http.StatusOK, h.service.ListMeta(c.Request().Context()))
}
// ListProviders godoc
@@ -85,10 +57,10 @@ func (h *AudioHandler) ListTranscriptionMeta(c echo.Context) error {
// @Description List providers that support speech (filtered view of unified providers table)
// @Tags speech-providers
// @Produce json
// @Success 200 {array} audiopkg.SpeechProviderResponse
// @Success 200 {array} tts.SpeechProviderResponse
// @Failure 500 {object} ErrorResponse
// @Router /speech-providers [get].
func (h *AudioHandler) ListProviders(c echo.Context) error {
func (h *SpeechHandler) ListProviders(c echo.Context) error {
items, err := h.service.ListSpeechProviders(c.Request().Context())
if err != nil {
return echo.NewHTTPError(http.StatusInternalServerError, err.Error())
@@ -96,34 +68,17 @@ func (h *AudioHandler) ListProviders(c echo.Context) error {
return c.JSON(http.StatusOK, items)
}
// ListTranscriptionProviders godoc
// @Summary List transcription providers
// @Description List providers that support transcription (filtered view of unified providers table)
// @Tags transcription-providers
// @Produce json
// @Success 200 {array} audiopkg.SpeechProviderResponse
// @Failure 500 {object} ErrorResponse
// @Router /transcription-providers [get].
func (h *AudioHandler) ListTranscriptionProviders(c echo.Context) error {
items, err := h.service.ListTranscriptionProviders(c.Request().Context())
if err != nil {
return echo.NewHTTPError(http.StatusInternalServerError, err.Error())
}
return c.JSON(http.StatusOK, items)
}
// GetProvider godoc
// @Summary Get speech provider
// @Description Get a speech provider with masked config values
// @Tags speech-providers
// @Produce json
// @Param id path string true "Provider ID (UUID)"
// @Success 200 {object} audiopkg.SpeechProviderResponse
// @Success 200 {object} tts.SpeechProviderResponse
// @Failure 400 {object} ErrorResponse
// @Failure 404 {object} ErrorResponse
// @Router /speech-providers/{id} [get].
// @Router /transcription-providers/{id} [get].
func (h *AudioHandler) GetProvider(c echo.Context) error {
func (h *SpeechHandler) GetProvider(c echo.Context) error {
id := strings.TrimSpace(c.Param("id"))
if id == "" {
return echo.NewHTTPError(http.StatusBadRequest, "id is required")
@@ -141,11 +96,11 @@ func (h *AudioHandler) GetProvider(c echo.Context) error {
// @Tags speech-providers
// @Produce json
// @Param id path string true "Provider ID (UUID)"
// @Success 200 {array} audiopkg.SpeechModelResponse
// @Success 200 {array} tts.SpeechModelResponse
// @Failure 400 {object} ErrorResponse
// @Failure 500 {object} ErrorResponse
// @Router /speech-providers/{id}/models [get].
func (h *AudioHandler) ListModelsByProvider(c echo.Context) error {
func (h *SpeechHandler) ListModelsByProvider(c echo.Context) error {
id := strings.TrimSpace(c.Param("id"))
if id == "" {
return echo.NewHTTPError(http.StatusBadRequest, "id is required")
@@ -164,12 +119,12 @@ func (h *AudioHandler) ListModelsByProvider(c echo.Context) error {
// @Accept json
// @Produce json
// @Param id path string true "Provider ID (UUID)"
// @Success 200 {object} audiopkg.ImportModelsResponse
// @Success 200 {object} tts.ImportModelsResponse
// @Failure 400 {object} ErrorResponse
// @Failure 404 {object} ErrorResponse
// @Failure 500 {object} ErrorResponse
// @Router /speech-providers/{id}/import-models [post].
func (h *AudioHandler) ImportModels(c echo.Context) error {
func (h *SpeechHandler) ImportModels(c echo.Context) error {
id := strings.TrimSpace(c.Param("id"))
if id == "" {
return echo.NewHTTPError(http.StatusBadRequest, "id is required")
@@ -180,7 +135,7 @@ func (h *AudioHandler) ImportModels(c echo.Context) error {
return echo.NewHTTPError(http.StatusInternalServerError, fmt.Sprintf("fetch remote speech models: %v", err))
}
resp := audiopkg.ImportModelsResponse{
resp := tts.ImportModelsResponse{
Models: make([]string, 0, len(remoteModels)),
}
@@ -212,92 +167,15 @@ func (h *AudioHandler) ImportModels(c echo.Context) error {
return c.JSON(http.StatusOK, resp)
}
// ListTranscriptionModelsByProvider godoc
// @Summary List transcription models by provider
// @Description List models of type 'transcription' for a specific transcription provider
// @Tags transcription-providers
// @Produce json
// @Param id path string true "Provider ID (UUID)"
// @Success 200 {array} audiopkg.TranscriptionModelResponse
// @Failure 400 {object} ErrorResponse
// @Failure 500 {object} ErrorResponse
// @Router /transcription-providers/{id}/models [get].
func (h *AudioHandler) ListTranscriptionModelsByProvider(c echo.Context) error {
id := strings.TrimSpace(c.Param("id"))
if id == "" {
return echo.NewHTTPError(http.StatusBadRequest, "id is required")
}
items, err := h.service.ListTranscriptionModelsByProvider(c.Request().Context(), id)
if err != nil {
return echo.NewHTTPError(http.StatusInternalServerError, err.Error())
}
return c.JSON(http.StatusOK, items)
}
// ImportTranscriptionModels godoc
// @Summary Import transcription models from provider
// @Description Fetch models using the configured transcription provider and import them into the unified models table
// @Tags transcription-providers
// @Accept json
// @Produce json
// @Param id path string true "Provider ID (UUID)"
// @Success 200 {object} audiopkg.ImportModelsResponse
// @Failure 400 {object} ErrorResponse
// @Failure 404 {object} ErrorResponse
// @Failure 500 {object} ErrorResponse
// @Router /transcription-providers/{id}/import-models [post].
func (h *AudioHandler) ImportTranscriptionModels(c echo.Context) error {
id := strings.TrimSpace(c.Param("id"))
if id == "" {
return echo.NewHTTPError(http.StatusBadRequest, "id is required")
}
remoteModels, err := h.service.FetchRemoteTranscriptionModels(c.Request().Context(), id)
if err != nil {
return echo.NewHTTPError(http.StatusInternalServerError, fmt.Sprintf("fetch remote transcription models: %v", err))
}
resp := audiopkg.ImportModelsResponse{
Models: make([]string, 0, len(remoteModels)),
}
for _, model := range remoteModels {
name := strings.TrimSpace(model.Name)
if name == "" {
name = model.ID
}
_, err := h.modelsService.Create(c.Request().Context(), models.AddRequest{
ModelID: model.ID,
Name: name,
ProviderID: id,
Type: models.ModelTypeTranscription,
Config: models.ModelConfig{},
})
if err != nil {
if errors.Is(err, models.ErrModelIDAlreadyExists) {
resp.Skipped++
continue
}
h.logger.Warn("failed to import transcription model", slog.String("model_id", model.ID), slog.Any("error", err))
continue
}
resp.Created++
resp.Models = append(resp.Models, model.ID)
}
return c.JSON(http.StatusOK, resp)
}
// ListModels godoc
// @Summary List all speech models
// @Description List all models of type 'speech' (filtered view of unified models table)
// @Tags speech-models
// @Produce json
// @Success 200 {array} audiopkg.SpeechModelResponse
// @Success 200 {array} tts.SpeechModelResponse
// @Failure 500 {object} ErrorResponse
// @Router /speech-models [get].
func (h *AudioHandler) ListModels(c echo.Context) error {
func (h *SpeechHandler) ListModels(c echo.Context) error {
items, err := h.service.ListSpeechModels(c.Request().Context())
if err != nil {
return echo.NewHTTPError(http.StatusInternalServerError, err.Error())
@@ -305,31 +183,15 @@ func (h *AudioHandler) ListModels(c echo.Context) error {
return c.JSON(http.StatusOK, items)
}
// ListTranscriptionModels godoc
// @Summary List all transcription models
// @Description List all models of type 'transcription' (filtered view of unified models table)
// @Tags transcription-models
// @Produce json
// @Success 200 {array} audiopkg.TranscriptionModelResponse
// @Failure 500 {object} ErrorResponse
// @Router /transcription-models [get].
func (h *AudioHandler) ListTranscriptionModels(c echo.Context) error {
items, err := h.service.ListTranscriptionModels(c.Request().Context())
if err != nil {
return echo.NewHTTPError(http.StatusInternalServerError, err.Error())
}
return c.JSON(http.StatusOK, items)
}
// GetModel godoc
// @Summary Get a speech model
// @Tags speech-models
// @Produce json
// @Param id path string true "Model ID"
// @Success 200 {object} audiopkg.SpeechModelResponse
// @Success 200 {object} tts.SpeechModelResponse
// @Failure 404 {object} ErrorResponse
// @Router /speech-models/{id} [get].
func (h *AudioHandler) GetModel(c echo.Context) error {
func (h *SpeechHandler) GetModel(c echo.Context) error {
id := strings.TrimSpace(c.Param("id"))
if id == "" {
return echo.NewHTTPError(http.StatusBadRequest, "id is required")
@@ -341,89 +203,15 @@ func (h *AudioHandler) GetModel(c echo.Context) error {
return c.JSON(http.StatusOK, resp)
}
// UpdateModel godoc
// @Summary Update a speech model
// @Tags speech-models
// @Accept json
// @Produce json
// @Param id path string true "Model ID"
// @Param request body audiopkg.UpdateSpeechModelRequest true "Model update payload"
// @Success 200 {object} audiopkg.SpeechModelResponse
// @Failure 400 {object} ErrorResponse
// @Failure 500 {object} ErrorResponse
// @Router /speech-models/{id} [put].
func (h *AudioHandler) UpdateModel(c echo.Context) error {
id := strings.TrimSpace(c.Param("id"))
if id == "" {
return echo.NewHTTPError(http.StatusBadRequest, "id is required")
}
var req audiopkg.UpdateSpeechModelRequest
if err := c.Bind(&req); err != nil {
return echo.NewHTTPError(http.StatusBadRequest, err.Error())
}
resp, err := h.service.UpdateSpeechModel(c.Request().Context(), id, req)
if err != nil {
return echo.NewHTTPError(http.StatusInternalServerError, err.Error())
}
return c.JSON(http.StatusOK, resp)
}
// GetTranscriptionModel godoc
// @Summary Get a transcription model
// @Tags transcription-models
// @Produce json
// @Param id path string true "Model ID"
// @Success 200 {object} audiopkg.TranscriptionModelResponse
// @Failure 404 {object} ErrorResponse
// @Router /transcription-models/{id} [get].
func (h *AudioHandler) GetTranscriptionModel(c echo.Context) error {
id := strings.TrimSpace(c.Param("id"))
if id == "" {
return echo.NewHTTPError(http.StatusBadRequest, "id is required")
}
resp, err := h.service.GetTranscriptionModel(c.Request().Context(), id)
if err != nil {
return echo.NewHTTPError(http.StatusNotFound, err.Error())
}
return c.JSON(http.StatusOK, resp)
}
// UpdateTranscriptionModel godoc
// @Summary Update a transcription model
// @Tags transcription-models
// @Accept json
// @Produce json
// @Param id path string true "Model ID"
// @Param request body audiopkg.UpdateSpeechModelRequest true "Model update payload"
// @Success 200 {object} audiopkg.TranscriptionModelResponse
// @Failure 400 {object} ErrorResponse
// @Failure 500 {object} ErrorResponse
// @Router /transcription-models/{id} [put].
func (h *AudioHandler) UpdateTranscriptionModel(c echo.Context) error {
id := strings.TrimSpace(c.Param("id"))
if id == "" {
return echo.NewHTTPError(http.StatusBadRequest, "id is required")
}
var req audiopkg.UpdateSpeechModelRequest
if err := c.Bind(&req); err != nil {
return echo.NewHTTPError(http.StatusBadRequest, err.Error())
}
resp, err := h.service.UpdateTranscriptionModel(c.Request().Context(), id, req)
if err != nil {
return echo.NewHTTPError(http.StatusInternalServerError, err.Error())
}
return c.JSON(http.StatusOK, resp)
}
// GetModelCapabilities godoc
// @Summary Get speech model capabilities
// @Tags speech-models
// @Produce json
// @Param id path string true "Model ID"
// @Success 200 {object} audiopkg.ModelCapabilities
// @Success 200 {object} tts.ModelCapabilities
// @Failure 404 {object} ErrorResponse
// @Router /speech-models/{id}/capabilities [get].
func (h *AudioHandler) GetModelCapabilities(c echo.Context) error {
func (h *SpeechHandler) GetModelCapabilities(c echo.Context) error {
id := strings.TrimSpace(c.Param("id"))
if id == "" {
return echo.NewHTTPError(http.StatusBadRequest, "id is required")
@@ -435,26 +223,6 @@ func (h *AudioHandler) GetModelCapabilities(c echo.Context) error {
return c.JSON(http.StatusOK, caps)
}
// GetTranscriptionModelCapabilities godoc
// @Summary Get transcription model capabilities
// @Tags transcription-models
// @Produce json
// @Param id path string true "Model ID"
// @Success 200 {object} audiopkg.ModelCapabilities
// @Failure 404 {object} ErrorResponse
// @Router /transcription-models/{id}/capabilities [get].
func (h *AudioHandler) GetTranscriptionModelCapabilities(c echo.Context) error {
id := strings.TrimSpace(c.Param("id"))
if id == "" {
return echo.NewHTTPError(http.StatusBadRequest, "id is required")
}
caps, err := h.service.GetTranscriptionModelCapabilities(c.Request().Context(), id)
if err != nil {
return echo.NewHTTPError(http.StatusNotFound, err.Error())
}
return c.JSON(http.StatusOK, caps)
}
// TestModel godoc
// @Summary Test speech model synthesis
// @Description Synthesize text using a specific model's config and return audio
@@ -462,17 +230,17 @@ func (h *AudioHandler) GetTranscriptionModelCapabilities(c echo.Context) error {
// @Accept json
// @Produce application/octet-stream
// @Param id path string true "Model ID"
// @Param request body audiopkg.TestSynthesizeRequest true "Text to synthesize"
// @Param request body tts.TestSynthesizeRequest true "Text to synthesize"
// @Success 200 {file} binary "Audio data"
// @Failure 400 {object} ErrorResponse
// @Failure 500 {object} ErrorResponse
// @Router /speech-models/{id}/test [post].
func (h *AudioHandler) TestModel(c echo.Context) error {
func (h *SpeechHandler) TestModel(c echo.Context) error {
id := strings.TrimSpace(c.Param("id"))
if id == "" {
return echo.NewHTTPError(http.StatusBadRequest, "id is required")
}
var req audiopkg.TestSynthesizeRequest
var req tts.TestSynthesizeRequest
if err := c.Bind(&req); err != nil {
return echo.NewHTTPError(http.StatusBadRequest, err.Error())
}
@@ -490,69 +258,3 @@ func (h *AudioHandler) TestModel(c echo.Context) error {
}
return c.Blob(http.StatusOK, contentType, audio)
}
// TestTranscriptionModel godoc
// @Summary Test transcription model recognition
// @Description Transcribe uploaded audio using a specific model's config and return structured text output
// @Tags transcription-models
// @Accept mpfd
// @Produce json
// @Param id path string true "Model ID"
// @Param file formData file true "Audio file"
// @Param config formData string false "Optional JSON config"
// @Success 200 {object} audiopkg.TestTranscriptionResponse
// @Failure 400 {object} ErrorResponse
// @Failure 500 {object} ErrorResponse
// @Router /transcription-models/{id}/test [post].
func (h *AudioHandler) TestTranscriptionModel(c echo.Context) error {
id := strings.TrimSpace(c.Param("id"))
if id == "" {
return echo.NewHTTPError(http.StatusBadRequest, "id is required")
}
file, err := c.FormFile("file")
if err != nil {
return echo.NewHTTPError(http.StatusBadRequest, "file is required")
}
src, err := file.Open()
if err != nil {
return echo.NewHTTPError(http.StatusBadRequest, err.Error())
}
defer func(src multipart.File) {
err := src.Close()
if err != nil {
h.logger.Warn("failed to close uploaded file", slog.Any("error", err))
}
}(src)
audio, err := io.ReadAll(src)
if err != nil {
return echo.NewHTTPError(http.StatusBadRequest, err.Error())
}
var cfg map[string]any
if raw := strings.TrimSpace(c.FormValue("config")); raw != "" {
if err := json.Unmarshal([]byte(raw), &cfg); err != nil {
return echo.NewHTTPError(http.StatusBadRequest, "invalid config")
}
}
result, err := h.service.Transcribe(c.Request().Context(), id, audio, file.Filename, file.Header.Get("Content-Type"), cfg)
if err != nil {
return echo.NewHTTPError(http.StatusInternalServerError, err.Error())
}
resp := audiopkg.TestTranscriptionResponse{
Text: result.Text,
Language: result.Language,
DurationSeconds: result.DurationSeconds,
Metadata: result.ProviderMetadata,
}
if len(result.Words) > 0 {
resp.Words = make([]audiopkg.TranscriptionWord, 0, len(result.Words))
for _, word := range result.Words {
resp.Words = append(resp.Words, audiopkg.TranscriptionWord{
Text: word.Text,
Start: word.Start,
End: word.End,
SpeakerID: word.SpeakerID,
})
}
}
return c.JSON(http.StatusOK, resp)
}