Feat/speech support (#392)

* feat: expand speech provider support with new client types and configuration schema

* feat: add icon support for speech providers and update related configurations

* feat: add SVG support for Deepgram and Elevenlabs with Vue components

* feat: except *-speech client type in llm provider

* feat: enhance speech provider functionality with advanced settings and model import capabilities

* chore: remove go.mod replace

* feat: enhance speech provider functionality with advanced settings and model import capabilities

* chore: update go module dependencies

* feat: Ear and Mouth

* fix: separate ear/mouth page

* fix: separate audio domain and restore transcription templates

Move speech and transcription internals into the audio domain, restore template-driven transcription providers, and regenerate Swagger/SDK so the frontend can stop hand-calling /transcription-* APIs.

---------

Co-authored-by: aki <arisu@ieee.org>
This commit is contained in:
Acbox
2026-04-22 00:09:46 +08:00
committed by GitHub
parent 8d78925a23
commit c9dcfe287f
70 changed files with 6612 additions and 1692 deletions
+1 -1
View File
@@ -511,7 +511,7 @@ WITH updated AS (
SET display_name = $1,
updated_at = now()
WHERE bots.id = $2
RETURNING id, owner_user_id, display_name, avatar_url, timezone, is_active, status, language, reasoning_enabled, reasoning_effort, chat_model_id, search_provider_id, memory_provider_id, heartbeat_enabled, heartbeat_interval, heartbeat_prompt, heartbeat_model_id, compaction_enabled, compaction_threshold, compaction_ratio, compaction_model_id, title_model_id, image_model_id, discuss_probe_model_id, tts_model_id, browser_context_id, persist_full_tool_results, metadata, created_at, updated_at, acl_default_effect
RETURNING id, owner_user_id, display_name, avatar_url, timezone, is_active, status, language, reasoning_enabled, reasoning_effort, chat_model_id, search_provider_id, memory_provider_id, heartbeat_enabled, heartbeat_interval, heartbeat_prompt, heartbeat_model_id, compaction_enabled, compaction_threshold, compaction_ratio, compaction_model_id, title_model_id, image_model_id, discuss_probe_model_id, tts_model_id, transcription_model_id, browser_context_id, persist_full_tool_results, metadata, created_at, updated_at, acl_default_effect
)
SELECT
updated.id AS id,
+1
View File
@@ -34,6 +34,7 @@ type Bot struct {
ImageModelID pgtype.UUID `json:"image_model_id"`
DiscussProbeModelID pgtype.UUID `json:"discuss_probe_model_id"`
TtsModelID pgtype.UUID `json:"tts_model_id"`
TranscriptionModelID pgtype.UUID `json:"transcription_model_id"`
BrowserContextID pgtype.UUID `json:"browser_context_id"`
PersistFullToolResults bool `json:"persist_full_tool_results"`
Metadata []byte `json:"metadata"`
+225 -11
View File
@@ -13,7 +13,7 @@ import (
const countModels = `-- name: CountModels :one
SELECT COUNT(*) FROM models
WHERE type != 'speech'
WHERE type NOT IN ('speech', 'transcription')
`
func (q *Queries) CountModels(ctx context.Context) (int64, error) {
@@ -40,13 +40,19 @@ FROM providers
WHERE client_type NOT IN (
'edge-speech',
'openai-speech',
'openai-transcription',
'openrouter-speech',
'openrouter-transcription',
'elevenlabs-speech',
'elevenlabs-transcription',
'deepgram-speech',
'deepgram-transcription',
'minimax-speech',
'volcengine-speech',
'alibabacloud-speech',
'microsoft-speech'
'microsoft-speech',
'google-speech',
'google-transcription'
)
`
@@ -201,6 +207,24 @@ func (q *Queries) DeleteModelByModelID(ctx context.Context, modelID string) erro
return err
}
const deleteModelByProviderAndType = `-- name: DeleteModelByProviderAndType :exec
DELETE FROM models
WHERE provider_id = $1
AND model_id = $2
AND type = $3
`
type DeleteModelByProviderAndTypeParams struct {
ProviderID pgtype.UUID `json:"provider_id"`
ModelID string `json:"model_id"`
Type string `json:"type"`
}
func (q *Queries) DeleteModelByProviderAndType(ctx context.Context, arg DeleteModelByProviderAndTypeParams) error {
_, err := q.db.Exec(ctx, deleteModelByProviderAndType, arg.ProviderID, arg.ModelID, arg.Type)
return err
}
const deleteModelByProviderIDAndModelID = `-- name: DeleteModelByProviderIDAndModelID :exec
DELETE FROM models
WHERE provider_id = $1
@@ -294,6 +318,27 @@ func (q *Queries) GetModelByProviderAndModelID(ctx context.Context, arg GetModel
return i, err
}
const getProviderByClientType = `-- name: GetProviderByClientType :one
SELECT id, name, client_type, icon, enable, config, metadata, created_at, updated_at FROM providers WHERE client_type = $1
`
func (q *Queries) GetProviderByClientType(ctx context.Context, clientType string) (Provider, error) {
row := q.db.QueryRow(ctx, getProviderByClientType, clientType)
var i Provider
err := row.Scan(
&i.ID,
&i.Name,
&i.ClientType,
&i.Icon,
&i.Enable,
&i.Config,
&i.Metadata,
&i.CreatedAt,
&i.UpdatedAt,
)
return i, err
}
const getProviderByID = `-- name: GetProviderByID :one
SELECT id, name, client_type, icon, enable, config, metadata, created_at, updated_at FROM providers WHERE id = $1
`
@@ -375,12 +420,51 @@ func (q *Queries) GetSpeechModelWithProvider(ctx context.Context, id pgtype.UUID
return i, err
}
const getTranscriptionModelWithProvider = `-- name: GetTranscriptionModelWithProvider :one
SELECT
m.id, m.model_id, m.name, m.provider_id, m.type, m.config, m.created_at, m.updated_at,
p.client_type AS provider_type
FROM models m
JOIN providers p ON p.id = m.provider_id
WHERE m.id = $1
AND m.type = 'transcription'
`
type GetTranscriptionModelWithProviderRow struct {
ID pgtype.UUID `json:"id"`
ModelID string `json:"model_id"`
Name pgtype.Text `json:"name"`
ProviderID pgtype.UUID `json:"provider_id"`
Type string `json:"type"`
Config []byte `json:"config"`
CreatedAt pgtype.Timestamptz `json:"created_at"`
UpdatedAt pgtype.Timestamptz `json:"updated_at"`
ProviderType string `json:"provider_type"`
}
func (q *Queries) GetTranscriptionModelWithProvider(ctx context.Context, id pgtype.UUID) (GetTranscriptionModelWithProviderRow, error) {
row := q.db.QueryRow(ctx, getTranscriptionModelWithProvider, id)
var i GetTranscriptionModelWithProviderRow
err := row.Scan(
&i.ID,
&i.ModelID,
&i.Name,
&i.ProviderID,
&i.Type,
&i.Config,
&i.CreatedAt,
&i.UpdatedAt,
&i.ProviderType,
)
return i, err
}
const listEnabledModels = `-- name: ListEnabledModels :many
SELECT m.id, m.model_id, m.name, m.provider_id, m.type, m.config, m.created_at, m.updated_at
FROM models m
JOIN providers p ON m.provider_id = p.id
WHERE p.enable = true
AND m.type != 'speech'
AND m.type NOT IN ('speech', 'transcription')
ORDER BY m.created_at DESC
`
@@ -525,7 +609,7 @@ func (q *Queries) ListModelVariantsByModelUUID(ctx context.Context, modelUuid pg
const listModels = `-- name: ListModels :many
SELECT id, model_id, name, provider_id, type, config, created_at, updated_at FROM models
WHERE type != 'speech'
WHERE type NOT IN ('speech', 'transcription')
ORDER BY created_at DESC
`
@@ -633,7 +717,7 @@ func (q *Queries) ListModelsByProviderClientType(ctx context.Context, clientType
const listModelsByProviderID = `-- name: ListModelsByProviderID :many
SELECT id, model_id, name, provider_id, type, config, created_at, updated_at FROM models
WHERE provider_id = $1
AND type != 'speech'
AND type NOT IN ('speech', 'transcription')
ORDER BY created_at DESC
`
@@ -747,13 +831,19 @@ SELECT id, name, client_type, icon, enable, config, metadata, created_at, update
WHERE client_type NOT IN (
'edge-speech',
'openai-speech',
'openai-transcription',
'openrouter-speech',
'openrouter-transcription',
'elevenlabs-speech',
'elevenlabs-transcription',
'deepgram-speech',
'deepgram-transcription',
'minimax-speech',
'volcengine-speech',
'alibabacloud-speech',
'microsoft-speech'
'microsoft-speech',
'google-speech',
'google-transcription'
)
ORDER BY created_at DESC
`
@@ -921,6 +1011,135 @@ func (q *Queries) ListSpeechProviders(ctx context.Context) ([]Provider, error) {
return items, nil
}
const listTranscriptionModels = `-- name: ListTranscriptionModels :many
SELECT m.id, m.model_id, m.name, m.provider_id, m.type, m.config, m.created_at, m.updated_at,
p.client_type AS provider_type
FROM models m
JOIN providers p ON p.id = m.provider_id
WHERE m.type = 'transcription'
ORDER BY m.created_at DESC
`
type ListTranscriptionModelsRow struct {
ID pgtype.UUID `json:"id"`
ModelID string `json:"model_id"`
Name pgtype.Text `json:"name"`
ProviderID pgtype.UUID `json:"provider_id"`
Type string `json:"type"`
Config []byte `json:"config"`
CreatedAt pgtype.Timestamptz `json:"created_at"`
UpdatedAt pgtype.Timestamptz `json:"updated_at"`
ProviderType string `json:"provider_type"`
}
func (q *Queries) ListTranscriptionModels(ctx context.Context) ([]ListTranscriptionModelsRow, error) {
rows, err := q.db.Query(ctx, listTranscriptionModels)
if err != nil {
return nil, err
}
defer rows.Close()
var items []ListTranscriptionModelsRow
for rows.Next() {
var i ListTranscriptionModelsRow
if err := rows.Scan(
&i.ID,
&i.ModelID,
&i.Name,
&i.ProviderID,
&i.Type,
&i.Config,
&i.CreatedAt,
&i.UpdatedAt,
&i.ProviderType,
); err != nil {
return nil, err
}
items = append(items, i)
}
if err := rows.Err(); err != nil {
return nil, err
}
return items, nil
}
const listTranscriptionModelsByProviderID = `-- name: ListTranscriptionModelsByProviderID :many
SELECT id, model_id, name, provider_id, type, config, created_at, updated_at FROM models
WHERE provider_id = $1
AND type = 'transcription'
ORDER BY created_at DESC
`
func (q *Queries) ListTranscriptionModelsByProviderID(ctx context.Context, providerID pgtype.UUID) ([]Model, error) {
rows, err := q.db.Query(ctx, listTranscriptionModelsByProviderID, providerID)
if err != nil {
return nil, err
}
defer rows.Close()
var items []Model
for rows.Next() {
var i Model
if err := rows.Scan(
&i.ID,
&i.ModelID,
&i.Name,
&i.ProviderID,
&i.Type,
&i.Config,
&i.CreatedAt,
&i.UpdatedAt,
); err != nil {
return nil, err
}
items = append(items, i)
}
if err := rows.Err(); err != nil {
return nil, err
}
return items, nil
}
const listTranscriptionProviders = `-- name: ListTranscriptionProviders :many
SELECT id, name, client_type, icon, enable, config, metadata, created_at, updated_at FROM providers
WHERE client_type IN (
'openai-transcription',
'openrouter-transcription',
'elevenlabs-transcription',
'deepgram-transcription',
'google-transcription'
)
ORDER BY created_at DESC
`
func (q *Queries) ListTranscriptionProviders(ctx context.Context) ([]Provider, error) {
rows, err := q.db.Query(ctx, listTranscriptionProviders)
if err != nil {
return nil, err
}
defer rows.Close()
var items []Provider
for rows.Next() {
var i Provider
if err := rows.Scan(
&i.ID,
&i.Name,
&i.ClientType,
&i.Icon,
&i.Enable,
&i.Config,
&i.Metadata,
&i.CreatedAt,
&i.UpdatedAt,
); err != nil {
return nil, err
}
items = append(items, i)
}
if err := rows.Err(); err != nil {
return nil, err
}
return items, nil
}
const updateModel = `-- name: UpdateModel :one
UPDATE models
SET
@@ -1062,11 +1281,6 @@ VALUES ($1, $2, $3, false, $4, '{}')
ON CONFLICT (name) DO UPDATE SET
icon = EXCLUDED.icon,
client_type = EXCLUDED.client_type,
config = CASE
WHEN providers.config->>'api_key' IS NOT NULL AND providers.config->>'api_key' != ''
THEN jsonb_set(EXCLUDED.config, '{api_key}', providers.config->'api_key')
ELSE EXCLUDED.config
END,
updated_at = now()
RETURNING id, name, client_type, icon, enable, config, metadata, created_at, updated_at
`
+16 -4
View File
@@ -30,6 +30,7 @@ SET language = 'auto',
search_provider_id = NULL,
memory_provider_id = NULL,
tts_model_id = NULL,
transcription_model_id = NULL,
browser_context_id = NULL,
persist_full_tool_results = false,
updated_at = now()
@@ -62,6 +63,7 @@ SELECT
memory_providers.id AS memory_provider_id,
image_models.id AS image_model_id,
tts_models.id AS tts_model_id,
transcription_models.id AS transcription_model_id,
browser_contexts.id AS browser_context_id,
bots.persist_full_tool_results
FROM bots
@@ -73,6 +75,7 @@ LEFT JOIN models AS image_models ON image_models.id = bots.image_model_id
LEFT JOIN search_providers ON search_providers.id = bots.search_provider_id
LEFT JOIN memory_providers ON memory_providers.id = bots.memory_provider_id
LEFT JOIN models AS tts_models ON tts_models.id = bots.tts_model_id
LEFT JOIN models AS transcription_models ON transcription_models.id = bots.transcription_model_id
LEFT JOIN browser_contexts ON browser_contexts.id = bots.browser_context_id
WHERE bots.id = $1
`
@@ -97,6 +100,7 @@ type GetSettingsByBotIDRow struct {
MemoryProviderID pgtype.UUID `json:"memory_provider_id"`
ImageModelID pgtype.UUID `json:"image_model_id"`
TtsModelID pgtype.UUID `json:"tts_model_id"`
TranscriptionModelID pgtype.UUID `json:"transcription_model_id"`
BrowserContextID pgtype.UUID `json:"browser_context_id"`
PersistFullToolResults bool `json:"persist_full_tool_results"`
}
@@ -124,6 +128,7 @@ func (q *Queries) GetSettingsByBotID(ctx context.Context, id pgtype.UUID) (GetSe
&i.MemoryProviderID,
&i.ImageModelID,
&i.TtsModelID,
&i.TranscriptionModelID,
&i.BrowserContextID,
&i.PersistFullToolResults,
)
@@ -151,11 +156,12 @@ WITH updated AS (
memory_provider_id = COALESCE($16::uuid, bots.memory_provider_id),
image_model_id = COALESCE($17::uuid, bots.image_model_id),
tts_model_id = COALESCE($18::uuid, bots.tts_model_id),
browser_context_id = COALESCE($19::uuid, bots.browser_context_id),
persist_full_tool_results = $20,
transcription_model_id = COALESCE($19::uuid, bots.transcription_model_id),
browser_context_id = COALESCE($20::uuid, bots.browser_context_id),
persist_full_tool_results = $21,
updated_at = now()
WHERE bots.id = $21
RETURNING bots.id, bots.language, bots.reasoning_enabled, bots.reasoning_effort, bots.heartbeat_enabled, bots.heartbeat_interval, bots.heartbeat_prompt, bots.compaction_enabled, bots.compaction_threshold, bots.compaction_ratio, bots.timezone, bots.chat_model_id, bots.heartbeat_model_id, bots.compaction_model_id, bots.title_model_id, bots.image_model_id, bots.search_provider_id, bots.memory_provider_id, bots.tts_model_id, bots.browser_context_id, bots.persist_full_tool_results
WHERE bots.id = $22
RETURNING bots.id, bots.language, bots.reasoning_enabled, bots.reasoning_effort, bots.heartbeat_enabled, bots.heartbeat_interval, bots.heartbeat_prompt, bots.compaction_enabled, bots.compaction_threshold, bots.compaction_ratio, bots.timezone, bots.chat_model_id, bots.heartbeat_model_id, bots.compaction_model_id, bots.title_model_id, bots.image_model_id, bots.search_provider_id, bots.memory_provider_id, bots.tts_model_id, bots.transcription_model_id, bots.browser_context_id, bots.persist_full_tool_results
)
SELECT
updated.id AS bot_id,
@@ -177,6 +183,7 @@ SELECT
memory_providers.id AS memory_provider_id,
image_models.id AS image_model_id,
tts_models.id AS tts_model_id,
transcription_models.id AS transcription_model_id,
browser_contexts.id AS browser_context_id,
updated.persist_full_tool_results
FROM updated
@@ -188,6 +195,7 @@ LEFT JOIN models AS image_models ON image_models.id = updated.image_model_id
LEFT JOIN search_providers ON search_providers.id = updated.search_provider_id
LEFT JOIN memory_providers ON memory_providers.id = updated.memory_provider_id
LEFT JOIN models AS tts_models ON tts_models.id = updated.tts_model_id
LEFT JOIN models AS transcription_models ON transcription_models.id = updated.transcription_model_id
LEFT JOIN browser_contexts ON browser_contexts.id = updated.browser_context_id
`
@@ -210,6 +218,7 @@ type UpsertBotSettingsParams struct {
MemoryProviderID pgtype.UUID `json:"memory_provider_id"`
ImageModelID pgtype.UUID `json:"image_model_id"`
TtsModelID pgtype.UUID `json:"tts_model_id"`
TranscriptionModelID pgtype.UUID `json:"transcription_model_id"`
BrowserContextID pgtype.UUID `json:"browser_context_id"`
PersistFullToolResults bool `json:"persist_full_tool_results"`
ID pgtype.UUID `json:"id"`
@@ -235,6 +244,7 @@ type UpsertBotSettingsRow struct {
MemoryProviderID pgtype.UUID `json:"memory_provider_id"`
ImageModelID pgtype.UUID `json:"image_model_id"`
TtsModelID pgtype.UUID `json:"tts_model_id"`
TranscriptionModelID pgtype.UUID `json:"transcription_model_id"`
BrowserContextID pgtype.UUID `json:"browser_context_id"`
PersistFullToolResults bool `json:"persist_full_tool_results"`
}
@@ -259,6 +269,7 @@ func (q *Queries) UpsertBotSettings(ctx context.Context, arg UpsertBotSettingsPa
arg.MemoryProviderID,
arg.ImageModelID,
arg.TtsModelID,
arg.TranscriptionModelID,
arg.BrowserContextID,
arg.PersistFullToolResults,
arg.ID,
@@ -284,6 +295,7 @@ func (q *Queries) UpsertBotSettings(ctx context.Context, arg UpsertBotSettingsPa
&i.MemoryProviderID,
&i.ImageModelID,
&i.TtsModelID,
&i.TranscriptionModelID,
&i.BrowserContextID,
&i.PersistFullToolResults,
)