mirror of
https://github.com/memohai/Memoh.git
synced 2026-04-25 07:00:48 +09:00
8d5c38f0e5
* refactor: unify providers and models tables
- Rename `llm_providers` → `providers`, `llm_provider_oauth_tokens` → `provider_oauth_tokens`
- Remove `tts_providers` and `tts_models` tables; speech models now live in the unified `models` table with `type = 'speech'`
- Replace top-level `api_key`/`base_url` columns with a JSONB `config` field on `providers`
- Rename `llm_provider_id` → `provider_id` across all references
- Add `edge-speech` client type and `conf/providers/edge.yaml` default provider
- Create new read-only speech endpoints (`/speech-providers`, `/speech-models`) backed by filtered views of the unified tables
- Remove old TTS CRUD handlers; simplify speech page to read-only + test
- Update registry loader to skip malformed YAML files instead of failing entirely
- Fix YAML quoting for model names containing colons in openrouter.yaml
- Regenerate sqlc, swagger, and TypeScript SDK
* fix: exclude speech providers from providers list endpoint
ListProviders now filters out client_type matching '%-speech' so Edge
and future speech providers no longer appear on the Providers page.
ListSpeechProviders uses the same pattern match instead of hard-coding
'edge-speech'.
* fix: use explicit client_type list instead of LIKE pattern
Replace '%-speech' pattern with explicit IN ('edge-speech') for both
ListProviders (exclusion) and ListSpeechProviders (inclusion). New
speech client types must be added to both queries.
* fix: use EXECUTE for dynamic SQL in migrations referencing old schema
PL/pgSQL pre-validates column/table references in static SQL statements
inside DO blocks before evaluating IF/RETURN guards. This caused
migrations 0010-0061 to fail on fresh databases where the canonical
schema uses `providers`/`provider_id` instead of `llm_providers`/
`llm_provider_id`.
Wrap all SQL that references potentially non-existent old schema objects
(llm_providers, llm_provider_id, tts_providers, tts_models, etc.) in
EXECUTE strings so they are only parsed at runtime when actually reached.
* fix: revert canonical schema to use llm_providers for migration compatibility
The CI migrations workflow (up → down → up) failed because 0061 down
renames `providers` back to `llm_providers`, but 0001 down only dropped
`providers` — leaving `llm_providers` as a remnant. On the second
migrate up, 0010 found the stale `llm_providers` and tried to reference
`models.llm_provider_id` which no longer existed.
Revert 0001 canonical schema to use original names (llm_providers,
tts_providers, tts_models) so incremental migrations work naturally and
0061 handles the final rename. Remove EXECUTE wrappers and unnecessary
guards from migrations that now always operate on llm_providers.
* fix: icons
* fix: sync canonical schema with 0061 migration to fix sqlc column mismatch
0001_init.up.sql still used old names (llm_providers, llm_provider_id)
and included dropped tts_providers/tts_models tables. sqlc could not
parse the PL/pgSQL EXECUTE in migration 0061, so generated code retained
stale columns (input_modalities, supports_reasoning) causing runtime
"column does not exist" errors when adding models.
- Update 0001_init.up.sql to current schema (providers, provider_id,
no tts tables, add provider_oauth_tokens)
- Use ALTER TABLE IF EXISTS in 0010/0041/0042 for backward compat
- Regenerate sqlc
* fix: guard all legacy migrations against fresh schema for CI compat
On fresh databases, 0001_init.up.sql creates providers/provider_id
(not llm_providers/llm_provider_id). Migrations 0013, 0041, 0046, 0047
referenced the old names without guards, causing CI migration failures.
- 0013: check llm_provider_id column exists before adding old constraint
- 0041: check llm_providers table exists before backfill/constraint DDL
- 0046: wrap CREATE TABLE in DO block with llm_providers existence check
- 0047: use ALTER TABLE IF EXISTS + DO block guard
154 lines
4.8 KiB
Go
154 lines
4.8 KiB
Go
package handlers
|
|
|
|
import (
|
|
"log/slog"
|
|
"net/http"
|
|
"strings"
|
|
|
|
"github.com/labstack/echo/v4"
|
|
|
|
"github.com/memohai/memoh/internal/tts"
|
|
)
|
|
|
|
type SpeechHandler struct {
|
|
service *tts.Service
|
|
logger *slog.Logger
|
|
}
|
|
|
|
func NewSpeechHandler(log *slog.Logger, service *tts.Service) *SpeechHandler {
|
|
return &SpeechHandler{
|
|
service: service,
|
|
logger: log.With(slog.String("handler", "speech")),
|
|
}
|
|
}
|
|
|
|
func (h *SpeechHandler) Register(e *echo.Echo) {
|
|
pg := e.Group("/speech-providers")
|
|
pg.GET("", h.ListProviders)
|
|
pg.GET("/meta", h.ListMeta)
|
|
|
|
mg := e.Group("/speech-models")
|
|
mg.GET("", h.ListModels)
|
|
mg.GET("/:id", h.GetModel)
|
|
mg.GET("/:id/capabilities", h.GetModelCapabilities)
|
|
mg.POST("/:id/test", h.TestModel)
|
|
}
|
|
|
|
// ListMeta godoc
|
|
// @Summary List speech provider metadata
|
|
// @Description List available speech provider types with their models and capabilities
|
|
// @Tags speech-providers
|
|
// @Success 200 {array} tts.ProviderMetaResponse
|
|
// @Router /speech-providers/meta [get].
|
|
func (h *SpeechHandler) ListMeta(c echo.Context) error {
|
|
return c.JSON(http.StatusOK, h.service.ListMeta(c.Request().Context()))
|
|
}
|
|
|
|
// ListProviders godoc
|
|
// @Summary List speech providers
|
|
// @Description List providers that support speech (filtered view of unified providers table)
|
|
// @Tags speech-providers
|
|
// @Produce json
|
|
// @Success 200 {array} tts.SpeechProviderResponse
|
|
// @Failure 500 {object} ErrorResponse
|
|
// @Router /speech-providers [get].
|
|
func (h *SpeechHandler) ListProviders(c echo.Context) error {
|
|
items, err := h.service.ListSpeechProviders(c.Request().Context())
|
|
if err != nil {
|
|
return echo.NewHTTPError(http.StatusInternalServerError, err.Error())
|
|
}
|
|
return c.JSON(http.StatusOK, items)
|
|
}
|
|
|
|
// ListModels godoc
|
|
// @Summary List all speech models
|
|
// @Description List all models of type 'speech' (filtered view of unified models table)
|
|
// @Tags speech-models
|
|
// @Produce json
|
|
// @Success 200 {array} tts.SpeechModelResponse
|
|
// @Failure 500 {object} ErrorResponse
|
|
// @Router /speech-models [get].
|
|
func (h *SpeechHandler) ListModels(c echo.Context) error {
|
|
items, err := h.service.ListSpeechModels(c.Request().Context())
|
|
if err != nil {
|
|
return echo.NewHTTPError(http.StatusInternalServerError, err.Error())
|
|
}
|
|
return c.JSON(http.StatusOK, items)
|
|
}
|
|
|
|
// GetModel godoc
|
|
// @Summary Get a speech model
|
|
// @Tags speech-models
|
|
// @Produce json
|
|
// @Param id path string true "Model ID"
|
|
// @Success 200 {object} tts.SpeechModelResponse
|
|
// @Failure 404 {object} ErrorResponse
|
|
// @Router /speech-models/{id} [get].
|
|
func (h *SpeechHandler) GetModel(c echo.Context) error {
|
|
id := strings.TrimSpace(c.Param("id"))
|
|
if id == "" {
|
|
return echo.NewHTTPError(http.StatusBadRequest, "id is required")
|
|
}
|
|
resp, err := h.service.GetSpeechModel(c.Request().Context(), id)
|
|
if err != nil {
|
|
return echo.NewHTTPError(http.StatusNotFound, err.Error())
|
|
}
|
|
return c.JSON(http.StatusOK, resp)
|
|
}
|
|
|
|
// GetModelCapabilities godoc
|
|
// @Summary Get speech model capabilities
|
|
// @Tags speech-models
|
|
// @Produce json
|
|
// @Param id path string true "Model ID"
|
|
// @Success 200 {object} tts.ModelCapabilities
|
|
// @Failure 404 {object} ErrorResponse
|
|
// @Router /speech-models/{id}/capabilities [get].
|
|
func (h *SpeechHandler) GetModelCapabilities(c echo.Context) error {
|
|
id := strings.TrimSpace(c.Param("id"))
|
|
if id == "" {
|
|
return echo.NewHTTPError(http.StatusBadRequest, "id is required")
|
|
}
|
|
caps, err := h.service.GetModelCapabilities(c.Request().Context(), id)
|
|
if err != nil {
|
|
return echo.NewHTTPError(http.StatusNotFound, err.Error())
|
|
}
|
|
return c.JSON(http.StatusOK, caps)
|
|
}
|
|
|
|
// TestModel godoc
|
|
// @Summary Test speech model synthesis
|
|
// @Description Synthesize text using a specific model's config and return audio
|
|
// @Tags speech-models
|
|
// @Accept json
|
|
// @Produce application/octet-stream
|
|
// @Param id path string true "Model ID"
|
|
// @Param request body tts.TestSynthesizeRequest true "Text to synthesize"
|
|
// @Success 200 {file} binary "Audio data"
|
|
// @Failure 400 {object} ErrorResponse
|
|
// @Failure 500 {object} ErrorResponse
|
|
// @Router /speech-models/{id}/test [post].
|
|
func (h *SpeechHandler) TestModel(c echo.Context) error {
|
|
id := strings.TrimSpace(c.Param("id"))
|
|
if id == "" {
|
|
return echo.NewHTTPError(http.StatusBadRequest, "id is required")
|
|
}
|
|
var req tts.TestSynthesizeRequest
|
|
if err := c.Bind(&req); err != nil {
|
|
return echo.NewHTTPError(http.StatusBadRequest, err.Error())
|
|
}
|
|
text := strings.TrimSpace(req.Text)
|
|
if text == "" {
|
|
return echo.NewHTTPError(http.StatusBadRequest, "text is required")
|
|
}
|
|
const maxTestTextLen = 500
|
|
if len([]rune(text)) > maxTestTextLen {
|
|
return echo.NewHTTPError(http.StatusBadRequest, "text too long, max 500 characters")
|
|
}
|
|
audio, contentType, err := h.service.Synthesize(c.Request().Context(), id, text, req.Config)
|
|
if err != nil {
|
|
return echo.NewHTTPError(http.StatusInternalServerError, err.Error())
|
|
}
|
|
return c.Blob(http.StatusOK, contentType, audio)
|
|
}
|