mirror of
https://github.com/memohai/Memoh.git
synced 2026-04-25 07:00:48 +09:00
This reverts commit c9dcfe287f.
This commit is contained in:
@@ -7,28 +7,28 @@ import (
|
||||
|
||||
"github.com/labstack/echo/v4"
|
||||
|
||||
audiopkg "github.com/memohai/memoh/internal/audio"
|
||||
"github.com/memohai/memoh/internal/settings"
|
||||
"github.com/memohai/memoh/internal/tts"
|
||||
)
|
||||
|
||||
// BotAudioHandler handles per-bot speech synthesis requests from the agent tool.
|
||||
type BotAudioHandler struct {
|
||||
audioService *audiopkg.Service
|
||||
// BotTtsHandler handles per-bot TTS synthesis requests from the agent tool.
|
||||
type BotTtsHandler struct {
|
||||
ttsService *tts.Service
|
||||
settingsService *settings.Service
|
||||
tempStore *audiopkg.TempStore
|
||||
tempStore *tts.TempStore
|
||||
logger *slog.Logger
|
||||
}
|
||||
|
||||
func NewBotAudioHandler(log *slog.Logger, audioService *audiopkg.Service, settingsService *settings.Service, tempStore *audiopkg.TempStore) *BotAudioHandler {
|
||||
return &BotAudioHandler{
|
||||
audioService: audioService,
|
||||
func NewBotTtsHandler(log *slog.Logger, ttsService *tts.Service, settingsService *settings.Service, tempStore *tts.TempStore) *BotTtsHandler {
|
||||
return &BotTtsHandler{
|
||||
ttsService: ttsService,
|
||||
settingsService: settingsService,
|
||||
tempStore: tempStore,
|
||||
logger: log.With(slog.String("handler", "bot_audio")),
|
||||
logger: log.With(slog.String("handler", "bot_tts")),
|
||||
}
|
||||
}
|
||||
|
||||
func (h *BotAudioHandler) Register(e *echo.Echo) {
|
||||
func (h *BotTtsHandler) Register(e *echo.Echo) {
|
||||
e.POST("/bots/:bot_id/tts/synthesize", h.Synthesize)
|
||||
}
|
||||
|
||||
@@ -54,7 +54,7 @@ type synthesizeResponse struct {
|
||||
// @Failure 400 {object} ErrorResponse
|
||||
// @Failure 500 {object} ErrorResponse
|
||||
// @Router /bots/{bot_id}/tts/synthesize [post].
|
||||
func (h *BotAudioHandler) Synthesize(c echo.Context) error {
|
||||
func (h *BotTtsHandler) Synthesize(c echo.Context) error {
|
||||
botID := strings.TrimSpace(c.Param("bot_id"))
|
||||
if botID == "" {
|
||||
return echo.NewHTTPError(http.StatusBadRequest, "bot_id is required")
|
||||
@@ -88,10 +88,10 @@ func (h *BotAudioHandler) Synthesize(c echo.Context) error {
|
||||
return echo.NewHTTPError(http.StatusInternalServerError, "failed to create temp file")
|
||||
}
|
||||
|
||||
contentType, streamErr := h.audioService.StreamToFile(c.Request().Context(), botSettings.TtsModelID, text, f)
|
||||
contentType, streamErr := h.ttsService.StreamToFile(c.Request().Context(), botSettings.TtsModelID, text, f)
|
||||
closeErr := f.Close()
|
||||
if streamErr != nil {
|
||||
h.logger.Error("speech synthesis failed", slog.String("bot_id", botID), slog.String("model_id", botSettings.TtsModelID), slog.Any("error", streamErr))
|
||||
h.logger.Error("tts synthesis failed", slog.String("bot_id", botID), slog.String("model_id", botSettings.TtsModelID), slog.Any("error", streamErr))
|
||||
h.tempStore.Delete(tempID)
|
||||
return echo.NewHTTPError(http.StatusInternalServerError, streamErr.Error())
|
||||
}
|
||||
|
||||
@@ -30,30 +30,30 @@ import (
|
||||
messagepkg "github.com/memohai/memoh/internal/message"
|
||||
)
|
||||
|
||||
// localSpeechSynthesizer synthesizes text to speech audio.
|
||||
type localSpeechSynthesizer interface {
|
||||
// localTtsSynthesizer synthesizes text to speech audio.
|
||||
type localTtsSynthesizer interface {
|
||||
Synthesize(ctx context.Context, modelID string, text string, overrideCfg map[string]any) ([]byte, string, error)
|
||||
}
|
||||
|
||||
// localSpeechModelResolver resolves speech model IDs for bots.
|
||||
type localSpeechModelResolver interface {
|
||||
ResolveSpeechModelID(ctx context.Context, botID string) (string, error)
|
||||
// localTtsModelResolver resolves TTS model IDs for bots.
|
||||
type localTtsModelResolver interface {
|
||||
ResolveTtsModelID(ctx context.Context, botID string) (string, error)
|
||||
}
|
||||
|
||||
// LocalChannelHandler handles local channel routes (WebUI / API) backed by bot history.
|
||||
type LocalChannelHandler struct {
|
||||
channelType channel.ChannelType
|
||||
channelManager *channel.Manager
|
||||
channelStore *channel.Store
|
||||
chatService *conversation.Service
|
||||
routeHub *local.RouteHub
|
||||
botService *bots.Service
|
||||
accountService *accounts.Service
|
||||
resolver *flow.Resolver
|
||||
mediaService *media.Service
|
||||
speechService localSpeechSynthesizer
|
||||
speechModelResolver localSpeechModelResolver
|
||||
logger *slog.Logger
|
||||
channelType channel.ChannelType
|
||||
channelManager *channel.Manager
|
||||
channelStore *channel.Store
|
||||
chatService *conversation.Service
|
||||
routeHub *local.RouteHub
|
||||
botService *bots.Service
|
||||
accountService *accounts.Service
|
||||
resolver *flow.Resolver
|
||||
mediaService *media.Service
|
||||
ttsService localTtsSynthesizer
|
||||
ttsModelResolver localTtsModelResolver
|
||||
logger *slog.Logger
|
||||
}
|
||||
|
||||
// NewLocalChannelHandler creates a local channel handler.
|
||||
@@ -80,10 +80,10 @@ func (h *LocalChannelHandler) SetMediaService(svc *media.Service) {
|
||||
h.mediaService = svc
|
||||
}
|
||||
|
||||
// SetSpeechService configures speech synthesis for handling speech_delta events.
|
||||
func (h *LocalChannelHandler) SetSpeechService(synth localSpeechSynthesizer, resolver localSpeechModelResolver) {
|
||||
h.speechService = synth
|
||||
h.speechModelResolver = resolver
|
||||
// SetTtsService configures TTS synthesis for handling speech_delta events.
|
||||
func (h *LocalChannelHandler) SetTtsService(synth localTtsSynthesizer, resolver localTtsModelResolver) {
|
||||
h.ttsService = synth
|
||||
h.ttsModelResolver = resolver
|
||||
}
|
||||
|
||||
// Register registers the local channel routes.
|
||||
@@ -719,12 +719,12 @@ func (h *LocalChannelHandler) ingestSingleAttachment(ctx context.Context, botID,
|
||||
// wsSynthesizeSpeech handles speech_delta events by synthesizing audio and
|
||||
// injecting attachment_delta events with the resulting voice attachments.
|
||||
func (h *LocalChannelHandler) wsSynthesizeSpeech(ctx context.Context, botID string, original json.RawMessage) []json.RawMessage {
|
||||
if h.speechService == nil || h.speechModelResolver == nil {
|
||||
if h.ttsService == nil || h.ttsModelResolver == nil {
|
||||
h.logger.Warn("speech_delta received but TTS service not configured")
|
||||
return nil
|
||||
}
|
||||
|
||||
modelID, err := h.speechModelResolver.ResolveSpeechModelID(ctx, botID)
|
||||
modelID, err := h.ttsModelResolver.ResolveTtsModelID(ctx, botID)
|
||||
if err != nil || strings.TrimSpace(modelID) == "" {
|
||||
h.logger.Warn("speech_delta: bot has no TTS model configured", slog.String("bot_id", botID))
|
||||
return nil
|
||||
@@ -746,7 +746,7 @@ func (h *LocalChannelHandler) wsSynthesizeSpeech(ctx context.Context, botID stri
|
||||
continue
|
||||
}
|
||||
|
||||
audioData, contentType, synthErr := h.speechService.Synthesize(ctx, modelID, text, nil)
|
||||
audioData, contentType, synthErr := h.ttsService.Synthesize(ctx, modelID, text, nil)
|
||||
if synthErr != nil {
|
||||
h.logger.Warn("speech synthesis failed", slog.String("bot_id", botID), slog.Any("error", synthErr))
|
||||
continue
|
||||
|
||||
@@ -1,83 +1,55 @@
|
||||
package handlers
|
||||
|
||||
import (
|
||||
"encoding/json"
|
||||
"errors"
|
||||
"fmt"
|
||||
"io"
|
||||
"log/slog"
|
||||
"mime/multipart"
|
||||
"net/http"
|
||||
"strings"
|
||||
|
||||
"github.com/labstack/echo/v4"
|
||||
|
||||
audiopkg "github.com/memohai/memoh/internal/audio"
|
||||
"github.com/memohai/memoh/internal/models"
|
||||
"github.com/memohai/memoh/internal/tts"
|
||||
)
|
||||
|
||||
type AudioHandler struct {
|
||||
service *audiopkg.Service
|
||||
type SpeechHandler struct {
|
||||
service *tts.Service
|
||||
modelsService *models.Service
|
||||
logger *slog.Logger
|
||||
}
|
||||
|
||||
func NewAudioHandler(log *slog.Logger, service *audiopkg.Service, modelsService *models.Service) *AudioHandler {
|
||||
return &AudioHandler{
|
||||
func NewSpeechHandler(log *slog.Logger, service *tts.Service, modelsService *models.Service) *SpeechHandler {
|
||||
return &SpeechHandler{
|
||||
service: service,
|
||||
modelsService: modelsService,
|
||||
logger: log.With(slog.String("handler", "audio")),
|
||||
logger: log.With(slog.String("handler", "speech")),
|
||||
}
|
||||
}
|
||||
|
||||
func (h *AudioHandler) Register(e *echo.Echo) {
|
||||
func (h *SpeechHandler) Register(e *echo.Echo) {
|
||||
pg := e.Group("/speech-providers")
|
||||
pg.GET("", h.ListProviders)
|
||||
pg.GET("/:id", h.GetProvider)
|
||||
pg.GET("/meta", h.ListSpeechMeta)
|
||||
pg.GET("/meta", h.ListMeta)
|
||||
pg.GET("/:id/models", h.ListModelsByProvider)
|
||||
pg.POST("/:id/import-models", h.ImportModels)
|
||||
|
||||
tpg := e.Group("/transcription-providers")
|
||||
tpg.GET("", h.ListTranscriptionProviders)
|
||||
tpg.GET("/meta", h.ListTranscriptionMeta)
|
||||
tpg.GET("/:id", h.GetProvider)
|
||||
tpg.GET("/:id/models", h.ListTranscriptionModelsByProvider)
|
||||
tpg.POST("/:id/import-models", h.ImportTranscriptionModels)
|
||||
|
||||
mg := e.Group("/speech-models")
|
||||
mg.GET("", h.ListModels)
|
||||
mg.GET("/:id", h.GetModel)
|
||||
mg.PUT("/:id", h.UpdateModel)
|
||||
mg.GET("/:id/capabilities", h.GetModelCapabilities)
|
||||
mg.POST("/:id/test", h.TestModel)
|
||||
|
||||
tg := e.Group("/transcription-models")
|
||||
tg.GET("", h.ListTranscriptionModels)
|
||||
tg.GET("/:id", h.GetTranscriptionModel)
|
||||
tg.PUT("/:id", h.UpdateTranscriptionModel)
|
||||
tg.GET("/:id/capabilities", h.GetTranscriptionModelCapabilities)
|
||||
tg.POST("/:id/test", h.TestTranscriptionModel)
|
||||
}
|
||||
|
||||
// ListMeta godoc
|
||||
// @Summary List speech provider metadata
|
||||
// @Description List available speech provider types with their models and capabilities
|
||||
// @Tags speech-providers
|
||||
// @Success 200 {array} audiopkg.ProviderMetaResponse
|
||||
// @Success 200 {array} tts.ProviderMetaResponse
|
||||
// @Router /speech-providers/meta [get].
|
||||
func (h *AudioHandler) ListSpeechMeta(c echo.Context) error {
|
||||
return c.JSON(http.StatusOK, h.service.ListSpeechMeta(c.Request().Context()))
|
||||
}
|
||||
|
||||
// ListTranscriptionMeta godoc
|
||||
// @Summary List transcription provider metadata
|
||||
// @Description List available transcription provider types with their models and capabilities
|
||||
// @Tags transcription-providers
|
||||
// @Success 200 {array} audiopkg.ProviderMetaResponse
|
||||
// @Router /transcription-providers/meta [get].
|
||||
func (h *AudioHandler) ListTranscriptionMeta(c echo.Context) error {
|
||||
return c.JSON(http.StatusOK, h.service.ListTranscriptionMeta(c.Request().Context()))
|
||||
func (h *SpeechHandler) ListMeta(c echo.Context) error {
|
||||
return c.JSON(http.StatusOK, h.service.ListMeta(c.Request().Context()))
|
||||
}
|
||||
|
||||
// ListProviders godoc
|
||||
@@ -85,10 +57,10 @@ func (h *AudioHandler) ListTranscriptionMeta(c echo.Context) error {
|
||||
// @Description List providers that support speech (filtered view of unified providers table)
|
||||
// @Tags speech-providers
|
||||
// @Produce json
|
||||
// @Success 200 {array} audiopkg.SpeechProviderResponse
|
||||
// @Success 200 {array} tts.SpeechProviderResponse
|
||||
// @Failure 500 {object} ErrorResponse
|
||||
// @Router /speech-providers [get].
|
||||
func (h *AudioHandler) ListProviders(c echo.Context) error {
|
||||
func (h *SpeechHandler) ListProviders(c echo.Context) error {
|
||||
items, err := h.service.ListSpeechProviders(c.Request().Context())
|
||||
if err != nil {
|
||||
return echo.NewHTTPError(http.StatusInternalServerError, err.Error())
|
||||
@@ -96,34 +68,17 @@ func (h *AudioHandler) ListProviders(c echo.Context) error {
|
||||
return c.JSON(http.StatusOK, items)
|
||||
}
|
||||
|
||||
// ListTranscriptionProviders godoc
|
||||
// @Summary List transcription providers
|
||||
// @Description List providers that support transcription (filtered view of unified providers table)
|
||||
// @Tags transcription-providers
|
||||
// @Produce json
|
||||
// @Success 200 {array} audiopkg.SpeechProviderResponse
|
||||
// @Failure 500 {object} ErrorResponse
|
||||
// @Router /transcription-providers [get].
|
||||
func (h *AudioHandler) ListTranscriptionProviders(c echo.Context) error {
|
||||
items, err := h.service.ListTranscriptionProviders(c.Request().Context())
|
||||
if err != nil {
|
||||
return echo.NewHTTPError(http.StatusInternalServerError, err.Error())
|
||||
}
|
||||
return c.JSON(http.StatusOK, items)
|
||||
}
|
||||
|
||||
// GetProvider godoc
|
||||
// @Summary Get speech provider
|
||||
// @Description Get a speech provider with masked config values
|
||||
// @Tags speech-providers
|
||||
// @Produce json
|
||||
// @Param id path string true "Provider ID (UUID)"
|
||||
// @Success 200 {object} audiopkg.SpeechProviderResponse
|
||||
// @Success 200 {object} tts.SpeechProviderResponse
|
||||
// @Failure 400 {object} ErrorResponse
|
||||
// @Failure 404 {object} ErrorResponse
|
||||
// @Router /speech-providers/{id} [get].
|
||||
// @Router /transcription-providers/{id} [get].
|
||||
func (h *AudioHandler) GetProvider(c echo.Context) error {
|
||||
func (h *SpeechHandler) GetProvider(c echo.Context) error {
|
||||
id := strings.TrimSpace(c.Param("id"))
|
||||
if id == "" {
|
||||
return echo.NewHTTPError(http.StatusBadRequest, "id is required")
|
||||
@@ -141,11 +96,11 @@ func (h *AudioHandler) GetProvider(c echo.Context) error {
|
||||
// @Tags speech-providers
|
||||
// @Produce json
|
||||
// @Param id path string true "Provider ID (UUID)"
|
||||
// @Success 200 {array} audiopkg.SpeechModelResponse
|
||||
// @Success 200 {array} tts.SpeechModelResponse
|
||||
// @Failure 400 {object} ErrorResponse
|
||||
// @Failure 500 {object} ErrorResponse
|
||||
// @Router /speech-providers/{id}/models [get].
|
||||
func (h *AudioHandler) ListModelsByProvider(c echo.Context) error {
|
||||
func (h *SpeechHandler) ListModelsByProvider(c echo.Context) error {
|
||||
id := strings.TrimSpace(c.Param("id"))
|
||||
if id == "" {
|
||||
return echo.NewHTTPError(http.StatusBadRequest, "id is required")
|
||||
@@ -164,12 +119,12 @@ func (h *AudioHandler) ListModelsByProvider(c echo.Context) error {
|
||||
// @Accept json
|
||||
// @Produce json
|
||||
// @Param id path string true "Provider ID (UUID)"
|
||||
// @Success 200 {object} audiopkg.ImportModelsResponse
|
||||
// @Success 200 {object} tts.ImportModelsResponse
|
||||
// @Failure 400 {object} ErrorResponse
|
||||
// @Failure 404 {object} ErrorResponse
|
||||
// @Failure 500 {object} ErrorResponse
|
||||
// @Router /speech-providers/{id}/import-models [post].
|
||||
func (h *AudioHandler) ImportModels(c echo.Context) error {
|
||||
func (h *SpeechHandler) ImportModels(c echo.Context) error {
|
||||
id := strings.TrimSpace(c.Param("id"))
|
||||
if id == "" {
|
||||
return echo.NewHTTPError(http.StatusBadRequest, "id is required")
|
||||
@@ -180,7 +135,7 @@ func (h *AudioHandler) ImportModels(c echo.Context) error {
|
||||
return echo.NewHTTPError(http.StatusInternalServerError, fmt.Sprintf("fetch remote speech models: %v", err))
|
||||
}
|
||||
|
||||
resp := audiopkg.ImportModelsResponse{
|
||||
resp := tts.ImportModelsResponse{
|
||||
Models: make([]string, 0, len(remoteModels)),
|
||||
}
|
||||
|
||||
@@ -212,92 +167,15 @@ func (h *AudioHandler) ImportModels(c echo.Context) error {
|
||||
return c.JSON(http.StatusOK, resp)
|
||||
}
|
||||
|
||||
// ListTranscriptionModelsByProvider godoc
|
||||
// @Summary List transcription models by provider
|
||||
// @Description List models of type 'transcription' for a specific transcription provider
|
||||
// @Tags transcription-providers
|
||||
// @Produce json
|
||||
// @Param id path string true "Provider ID (UUID)"
|
||||
// @Success 200 {array} audiopkg.TranscriptionModelResponse
|
||||
// @Failure 400 {object} ErrorResponse
|
||||
// @Failure 500 {object} ErrorResponse
|
||||
// @Router /transcription-providers/{id}/models [get].
|
||||
func (h *AudioHandler) ListTranscriptionModelsByProvider(c echo.Context) error {
|
||||
id := strings.TrimSpace(c.Param("id"))
|
||||
if id == "" {
|
||||
return echo.NewHTTPError(http.StatusBadRequest, "id is required")
|
||||
}
|
||||
items, err := h.service.ListTranscriptionModelsByProvider(c.Request().Context(), id)
|
||||
if err != nil {
|
||||
return echo.NewHTTPError(http.StatusInternalServerError, err.Error())
|
||||
}
|
||||
return c.JSON(http.StatusOK, items)
|
||||
}
|
||||
|
||||
// ImportTranscriptionModels godoc
|
||||
// @Summary Import transcription models from provider
|
||||
// @Description Fetch models using the configured transcription provider and import them into the unified models table
|
||||
// @Tags transcription-providers
|
||||
// @Accept json
|
||||
// @Produce json
|
||||
// @Param id path string true "Provider ID (UUID)"
|
||||
// @Success 200 {object} audiopkg.ImportModelsResponse
|
||||
// @Failure 400 {object} ErrorResponse
|
||||
// @Failure 404 {object} ErrorResponse
|
||||
// @Failure 500 {object} ErrorResponse
|
||||
// @Router /transcription-providers/{id}/import-models [post].
|
||||
func (h *AudioHandler) ImportTranscriptionModels(c echo.Context) error {
|
||||
id := strings.TrimSpace(c.Param("id"))
|
||||
if id == "" {
|
||||
return echo.NewHTTPError(http.StatusBadRequest, "id is required")
|
||||
}
|
||||
|
||||
remoteModels, err := h.service.FetchRemoteTranscriptionModels(c.Request().Context(), id)
|
||||
if err != nil {
|
||||
return echo.NewHTTPError(http.StatusInternalServerError, fmt.Sprintf("fetch remote transcription models: %v", err))
|
||||
}
|
||||
|
||||
resp := audiopkg.ImportModelsResponse{
|
||||
Models: make([]string, 0, len(remoteModels)),
|
||||
}
|
||||
|
||||
for _, model := range remoteModels {
|
||||
name := strings.TrimSpace(model.Name)
|
||||
if name == "" {
|
||||
name = model.ID
|
||||
}
|
||||
|
||||
_, err := h.modelsService.Create(c.Request().Context(), models.AddRequest{
|
||||
ModelID: model.ID,
|
||||
Name: name,
|
||||
ProviderID: id,
|
||||
Type: models.ModelTypeTranscription,
|
||||
Config: models.ModelConfig{},
|
||||
})
|
||||
if err != nil {
|
||||
if errors.Is(err, models.ErrModelIDAlreadyExists) {
|
||||
resp.Skipped++
|
||||
continue
|
||||
}
|
||||
h.logger.Warn("failed to import transcription model", slog.String("model_id", model.ID), slog.Any("error", err))
|
||||
continue
|
||||
}
|
||||
resp.Created++
|
||||
resp.Models = append(resp.Models, model.ID)
|
||||
}
|
||||
|
||||
return c.JSON(http.StatusOK, resp)
|
||||
}
|
||||
|
||||
// ListModels godoc
|
||||
// @Summary List all speech models
|
||||
// @Description List all models of type 'speech' (filtered view of unified models table)
|
||||
// @Tags speech-models
|
||||
// @Produce json
|
||||
// @Success 200 {array} audiopkg.SpeechModelResponse
|
||||
// @Success 200 {array} tts.SpeechModelResponse
|
||||
// @Failure 500 {object} ErrorResponse
|
||||
// @Router /speech-models [get].
|
||||
func (h *AudioHandler) ListModels(c echo.Context) error {
|
||||
func (h *SpeechHandler) ListModels(c echo.Context) error {
|
||||
items, err := h.service.ListSpeechModels(c.Request().Context())
|
||||
if err != nil {
|
||||
return echo.NewHTTPError(http.StatusInternalServerError, err.Error())
|
||||
@@ -305,31 +183,15 @@ func (h *AudioHandler) ListModels(c echo.Context) error {
|
||||
return c.JSON(http.StatusOK, items)
|
||||
}
|
||||
|
||||
// ListTranscriptionModels godoc
|
||||
// @Summary List all transcription models
|
||||
// @Description List all models of type 'transcription' (filtered view of unified models table)
|
||||
// @Tags transcription-models
|
||||
// @Produce json
|
||||
// @Success 200 {array} audiopkg.TranscriptionModelResponse
|
||||
// @Failure 500 {object} ErrorResponse
|
||||
// @Router /transcription-models [get].
|
||||
func (h *AudioHandler) ListTranscriptionModels(c echo.Context) error {
|
||||
items, err := h.service.ListTranscriptionModels(c.Request().Context())
|
||||
if err != nil {
|
||||
return echo.NewHTTPError(http.StatusInternalServerError, err.Error())
|
||||
}
|
||||
return c.JSON(http.StatusOK, items)
|
||||
}
|
||||
|
||||
// GetModel godoc
|
||||
// @Summary Get a speech model
|
||||
// @Tags speech-models
|
||||
// @Produce json
|
||||
// @Param id path string true "Model ID"
|
||||
// @Success 200 {object} audiopkg.SpeechModelResponse
|
||||
// @Success 200 {object} tts.SpeechModelResponse
|
||||
// @Failure 404 {object} ErrorResponse
|
||||
// @Router /speech-models/{id} [get].
|
||||
func (h *AudioHandler) GetModel(c echo.Context) error {
|
||||
func (h *SpeechHandler) GetModel(c echo.Context) error {
|
||||
id := strings.TrimSpace(c.Param("id"))
|
||||
if id == "" {
|
||||
return echo.NewHTTPError(http.StatusBadRequest, "id is required")
|
||||
@@ -341,89 +203,15 @@ func (h *AudioHandler) GetModel(c echo.Context) error {
|
||||
return c.JSON(http.StatusOK, resp)
|
||||
}
|
||||
|
||||
// UpdateModel godoc
|
||||
// @Summary Update a speech model
|
||||
// @Tags speech-models
|
||||
// @Accept json
|
||||
// @Produce json
|
||||
// @Param id path string true "Model ID"
|
||||
// @Param request body audiopkg.UpdateSpeechModelRequest true "Model update payload"
|
||||
// @Success 200 {object} audiopkg.SpeechModelResponse
|
||||
// @Failure 400 {object} ErrorResponse
|
||||
// @Failure 500 {object} ErrorResponse
|
||||
// @Router /speech-models/{id} [put].
|
||||
func (h *AudioHandler) UpdateModel(c echo.Context) error {
|
||||
id := strings.TrimSpace(c.Param("id"))
|
||||
if id == "" {
|
||||
return echo.NewHTTPError(http.StatusBadRequest, "id is required")
|
||||
}
|
||||
var req audiopkg.UpdateSpeechModelRequest
|
||||
if err := c.Bind(&req); err != nil {
|
||||
return echo.NewHTTPError(http.StatusBadRequest, err.Error())
|
||||
}
|
||||
resp, err := h.service.UpdateSpeechModel(c.Request().Context(), id, req)
|
||||
if err != nil {
|
||||
return echo.NewHTTPError(http.StatusInternalServerError, err.Error())
|
||||
}
|
||||
return c.JSON(http.StatusOK, resp)
|
||||
}
|
||||
|
||||
// GetTranscriptionModel godoc
|
||||
// @Summary Get a transcription model
|
||||
// @Tags transcription-models
|
||||
// @Produce json
|
||||
// @Param id path string true "Model ID"
|
||||
// @Success 200 {object} audiopkg.TranscriptionModelResponse
|
||||
// @Failure 404 {object} ErrorResponse
|
||||
// @Router /transcription-models/{id} [get].
|
||||
func (h *AudioHandler) GetTranscriptionModel(c echo.Context) error {
|
||||
id := strings.TrimSpace(c.Param("id"))
|
||||
if id == "" {
|
||||
return echo.NewHTTPError(http.StatusBadRequest, "id is required")
|
||||
}
|
||||
resp, err := h.service.GetTranscriptionModel(c.Request().Context(), id)
|
||||
if err != nil {
|
||||
return echo.NewHTTPError(http.StatusNotFound, err.Error())
|
||||
}
|
||||
return c.JSON(http.StatusOK, resp)
|
||||
}
|
||||
|
||||
// UpdateTranscriptionModel godoc
|
||||
// @Summary Update a transcription model
|
||||
// @Tags transcription-models
|
||||
// @Accept json
|
||||
// @Produce json
|
||||
// @Param id path string true "Model ID"
|
||||
// @Param request body audiopkg.UpdateSpeechModelRequest true "Model update payload"
|
||||
// @Success 200 {object} audiopkg.TranscriptionModelResponse
|
||||
// @Failure 400 {object} ErrorResponse
|
||||
// @Failure 500 {object} ErrorResponse
|
||||
// @Router /transcription-models/{id} [put].
|
||||
func (h *AudioHandler) UpdateTranscriptionModel(c echo.Context) error {
|
||||
id := strings.TrimSpace(c.Param("id"))
|
||||
if id == "" {
|
||||
return echo.NewHTTPError(http.StatusBadRequest, "id is required")
|
||||
}
|
||||
var req audiopkg.UpdateSpeechModelRequest
|
||||
if err := c.Bind(&req); err != nil {
|
||||
return echo.NewHTTPError(http.StatusBadRequest, err.Error())
|
||||
}
|
||||
resp, err := h.service.UpdateTranscriptionModel(c.Request().Context(), id, req)
|
||||
if err != nil {
|
||||
return echo.NewHTTPError(http.StatusInternalServerError, err.Error())
|
||||
}
|
||||
return c.JSON(http.StatusOK, resp)
|
||||
}
|
||||
|
||||
// GetModelCapabilities godoc
|
||||
// @Summary Get speech model capabilities
|
||||
// @Tags speech-models
|
||||
// @Produce json
|
||||
// @Param id path string true "Model ID"
|
||||
// @Success 200 {object} audiopkg.ModelCapabilities
|
||||
// @Success 200 {object} tts.ModelCapabilities
|
||||
// @Failure 404 {object} ErrorResponse
|
||||
// @Router /speech-models/{id}/capabilities [get].
|
||||
func (h *AudioHandler) GetModelCapabilities(c echo.Context) error {
|
||||
func (h *SpeechHandler) GetModelCapabilities(c echo.Context) error {
|
||||
id := strings.TrimSpace(c.Param("id"))
|
||||
if id == "" {
|
||||
return echo.NewHTTPError(http.StatusBadRequest, "id is required")
|
||||
@@ -435,26 +223,6 @@ func (h *AudioHandler) GetModelCapabilities(c echo.Context) error {
|
||||
return c.JSON(http.StatusOK, caps)
|
||||
}
|
||||
|
||||
// GetTranscriptionModelCapabilities godoc
|
||||
// @Summary Get transcription model capabilities
|
||||
// @Tags transcription-models
|
||||
// @Produce json
|
||||
// @Param id path string true "Model ID"
|
||||
// @Success 200 {object} audiopkg.ModelCapabilities
|
||||
// @Failure 404 {object} ErrorResponse
|
||||
// @Router /transcription-models/{id}/capabilities [get].
|
||||
func (h *AudioHandler) GetTranscriptionModelCapabilities(c echo.Context) error {
|
||||
id := strings.TrimSpace(c.Param("id"))
|
||||
if id == "" {
|
||||
return echo.NewHTTPError(http.StatusBadRequest, "id is required")
|
||||
}
|
||||
caps, err := h.service.GetTranscriptionModelCapabilities(c.Request().Context(), id)
|
||||
if err != nil {
|
||||
return echo.NewHTTPError(http.StatusNotFound, err.Error())
|
||||
}
|
||||
return c.JSON(http.StatusOK, caps)
|
||||
}
|
||||
|
||||
// TestModel godoc
|
||||
// @Summary Test speech model synthesis
|
||||
// @Description Synthesize text using a specific model's config and return audio
|
||||
@@ -462,17 +230,17 @@ func (h *AudioHandler) GetTranscriptionModelCapabilities(c echo.Context) error {
|
||||
// @Accept json
|
||||
// @Produce application/octet-stream
|
||||
// @Param id path string true "Model ID"
|
||||
// @Param request body audiopkg.TestSynthesizeRequest true "Text to synthesize"
|
||||
// @Param request body tts.TestSynthesizeRequest true "Text to synthesize"
|
||||
// @Success 200 {file} binary "Audio data"
|
||||
// @Failure 400 {object} ErrorResponse
|
||||
// @Failure 500 {object} ErrorResponse
|
||||
// @Router /speech-models/{id}/test [post].
|
||||
func (h *AudioHandler) TestModel(c echo.Context) error {
|
||||
func (h *SpeechHandler) TestModel(c echo.Context) error {
|
||||
id := strings.TrimSpace(c.Param("id"))
|
||||
if id == "" {
|
||||
return echo.NewHTTPError(http.StatusBadRequest, "id is required")
|
||||
}
|
||||
var req audiopkg.TestSynthesizeRequest
|
||||
var req tts.TestSynthesizeRequest
|
||||
if err := c.Bind(&req); err != nil {
|
||||
return echo.NewHTTPError(http.StatusBadRequest, err.Error())
|
||||
}
|
||||
@@ -490,69 +258,3 @@ func (h *AudioHandler) TestModel(c echo.Context) error {
|
||||
}
|
||||
return c.Blob(http.StatusOK, contentType, audio)
|
||||
}
|
||||
|
||||
// TestTranscriptionModel godoc
|
||||
// @Summary Test transcription model recognition
|
||||
// @Description Transcribe uploaded audio using a specific model's config and return structured text output
|
||||
// @Tags transcription-models
|
||||
// @Accept mpfd
|
||||
// @Produce json
|
||||
// @Param id path string true "Model ID"
|
||||
// @Param file formData file true "Audio file"
|
||||
// @Param config formData string false "Optional JSON config"
|
||||
// @Success 200 {object} audiopkg.TestTranscriptionResponse
|
||||
// @Failure 400 {object} ErrorResponse
|
||||
// @Failure 500 {object} ErrorResponse
|
||||
// @Router /transcription-models/{id}/test [post].
|
||||
func (h *AudioHandler) TestTranscriptionModel(c echo.Context) error {
|
||||
id := strings.TrimSpace(c.Param("id"))
|
||||
if id == "" {
|
||||
return echo.NewHTTPError(http.StatusBadRequest, "id is required")
|
||||
}
|
||||
file, err := c.FormFile("file")
|
||||
if err != nil {
|
||||
return echo.NewHTTPError(http.StatusBadRequest, "file is required")
|
||||
}
|
||||
src, err := file.Open()
|
||||
if err != nil {
|
||||
return echo.NewHTTPError(http.StatusBadRequest, err.Error())
|
||||
}
|
||||
defer func(src multipart.File) {
|
||||
err := src.Close()
|
||||
if err != nil {
|
||||
h.logger.Warn("failed to close uploaded file", slog.Any("error", err))
|
||||
}
|
||||
}(src)
|
||||
audio, err := io.ReadAll(src)
|
||||
if err != nil {
|
||||
return echo.NewHTTPError(http.StatusBadRequest, err.Error())
|
||||
}
|
||||
var cfg map[string]any
|
||||
if raw := strings.TrimSpace(c.FormValue("config")); raw != "" {
|
||||
if err := json.Unmarshal([]byte(raw), &cfg); err != nil {
|
||||
return echo.NewHTTPError(http.StatusBadRequest, "invalid config")
|
||||
}
|
||||
}
|
||||
result, err := h.service.Transcribe(c.Request().Context(), id, audio, file.Filename, file.Header.Get("Content-Type"), cfg)
|
||||
if err != nil {
|
||||
return echo.NewHTTPError(http.StatusInternalServerError, err.Error())
|
||||
}
|
||||
resp := audiopkg.TestTranscriptionResponse{
|
||||
Text: result.Text,
|
||||
Language: result.Language,
|
||||
DurationSeconds: result.DurationSeconds,
|
||||
Metadata: result.ProviderMetadata,
|
||||
}
|
||||
if len(result.Words) > 0 {
|
||||
resp.Words = make([]audiopkg.TranscriptionWord, 0, len(result.Words))
|
||||
for _, word := range result.Words {
|
||||
resp.Words = append(resp.Words, audiopkg.TranscriptionWord{
|
||||
Text: word.Text,
|
||||
Start: word.Start,
|
||||
End: word.End,
|
||||
SpeakerID: word.SpeakerID,
|
||||
})
|
||||
}
|
||||
}
|
||||
return c.JSON(http.StatusOK, resp)
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user