feat: transcription support (#394)

* feat: expand speech provider support with new client types and configuration schema

* feat: add icon support for speech providers and update related configurations

* feat: add SVG support for Deepgram and Elevenlabs with Vue components

* feat: except *-speech client type in llm provider

* feat: enhance speech provider functionality with advanced settings and model import capabilities

* chore: remove go.mod replace

* feat: enhance speech provider functionality with advanced settings and model import capabilities

* chore: update go module dependencies

* feat: Ear and Mouth

* fix: separate ear/mouth page

* fix: separate audio domain and restore transcription templates

Move speech and transcription internals into the audio domain, restore template-driven transcription providers, and regenerate Swagger/SDK so the frontend can stop hand-calling /transcription-* APIs.

---------

Co-authored-by: aki <arisu@ieee.org>
This commit is contained in:
Acbox
2026-04-22 00:12:01 +08:00
committed by GitHub
parent fd8f1ec078
commit 925fdee478
70 changed files with 6612 additions and 1692 deletions
+16 -4
View File
@@ -30,6 +30,7 @@ SET language = 'auto',
search_provider_id = NULL,
memory_provider_id = NULL,
tts_model_id = NULL,
transcription_model_id = NULL,
browser_context_id = NULL,
persist_full_tool_results = false,
updated_at = now()
@@ -62,6 +63,7 @@ SELECT
memory_providers.id AS memory_provider_id,
image_models.id AS image_model_id,
tts_models.id AS tts_model_id,
transcription_models.id AS transcription_model_id,
browser_contexts.id AS browser_context_id,
bots.persist_full_tool_results
FROM bots
@@ -73,6 +75,7 @@ LEFT JOIN models AS image_models ON image_models.id = bots.image_model_id
LEFT JOIN search_providers ON search_providers.id = bots.search_provider_id
LEFT JOIN memory_providers ON memory_providers.id = bots.memory_provider_id
LEFT JOIN models AS tts_models ON tts_models.id = bots.tts_model_id
LEFT JOIN models AS transcription_models ON transcription_models.id = bots.transcription_model_id
LEFT JOIN browser_contexts ON browser_contexts.id = bots.browser_context_id
WHERE bots.id = $1
`
@@ -97,6 +100,7 @@ type GetSettingsByBotIDRow struct {
MemoryProviderID pgtype.UUID `json:"memory_provider_id"`
ImageModelID pgtype.UUID `json:"image_model_id"`
TtsModelID pgtype.UUID `json:"tts_model_id"`
TranscriptionModelID pgtype.UUID `json:"transcription_model_id"`
BrowserContextID pgtype.UUID `json:"browser_context_id"`
PersistFullToolResults bool `json:"persist_full_tool_results"`
}
@@ -124,6 +128,7 @@ func (q *Queries) GetSettingsByBotID(ctx context.Context, id pgtype.UUID) (GetSe
&i.MemoryProviderID,
&i.ImageModelID,
&i.TtsModelID,
&i.TranscriptionModelID,
&i.BrowserContextID,
&i.PersistFullToolResults,
)
@@ -151,11 +156,12 @@ WITH updated AS (
memory_provider_id = COALESCE($16::uuid, bots.memory_provider_id),
image_model_id = COALESCE($17::uuid, bots.image_model_id),
tts_model_id = COALESCE($18::uuid, bots.tts_model_id),
browser_context_id = COALESCE($19::uuid, bots.browser_context_id),
persist_full_tool_results = $20,
transcription_model_id = COALESCE($19::uuid, bots.transcription_model_id),
browser_context_id = COALESCE($20::uuid, bots.browser_context_id),
persist_full_tool_results = $21,
updated_at = now()
WHERE bots.id = $21
RETURNING bots.id, bots.language, bots.reasoning_enabled, bots.reasoning_effort, bots.heartbeat_enabled, bots.heartbeat_interval, bots.heartbeat_prompt, bots.compaction_enabled, bots.compaction_threshold, bots.compaction_ratio, bots.timezone, bots.chat_model_id, bots.heartbeat_model_id, bots.compaction_model_id, bots.title_model_id, bots.image_model_id, bots.search_provider_id, bots.memory_provider_id, bots.tts_model_id, bots.browser_context_id, bots.persist_full_tool_results
WHERE bots.id = $22
RETURNING bots.id, bots.language, bots.reasoning_enabled, bots.reasoning_effort, bots.heartbeat_enabled, bots.heartbeat_interval, bots.heartbeat_prompt, bots.compaction_enabled, bots.compaction_threshold, bots.compaction_ratio, bots.timezone, bots.chat_model_id, bots.heartbeat_model_id, bots.compaction_model_id, bots.title_model_id, bots.image_model_id, bots.search_provider_id, bots.memory_provider_id, bots.tts_model_id, bots.transcription_model_id, bots.browser_context_id, bots.persist_full_tool_results
)
SELECT
updated.id AS bot_id,
@@ -177,6 +183,7 @@ SELECT
memory_providers.id AS memory_provider_id,
image_models.id AS image_model_id,
tts_models.id AS tts_model_id,
transcription_models.id AS transcription_model_id,
browser_contexts.id AS browser_context_id,
updated.persist_full_tool_results
FROM updated
@@ -188,6 +195,7 @@ LEFT JOIN models AS image_models ON image_models.id = updated.image_model_id
LEFT JOIN search_providers ON search_providers.id = updated.search_provider_id
LEFT JOIN memory_providers ON memory_providers.id = updated.memory_provider_id
LEFT JOIN models AS tts_models ON tts_models.id = updated.tts_model_id
LEFT JOIN models AS transcription_models ON transcription_models.id = updated.transcription_model_id
LEFT JOIN browser_contexts ON browser_contexts.id = updated.browser_context_id
`
@@ -210,6 +218,7 @@ type UpsertBotSettingsParams struct {
MemoryProviderID pgtype.UUID `json:"memory_provider_id"`
ImageModelID pgtype.UUID `json:"image_model_id"`
TtsModelID pgtype.UUID `json:"tts_model_id"`
TranscriptionModelID pgtype.UUID `json:"transcription_model_id"`
BrowserContextID pgtype.UUID `json:"browser_context_id"`
PersistFullToolResults bool `json:"persist_full_tool_results"`
ID pgtype.UUID `json:"id"`
@@ -235,6 +244,7 @@ type UpsertBotSettingsRow struct {
MemoryProviderID pgtype.UUID `json:"memory_provider_id"`
ImageModelID pgtype.UUID `json:"image_model_id"`
TtsModelID pgtype.UUID `json:"tts_model_id"`
TranscriptionModelID pgtype.UUID `json:"transcription_model_id"`
BrowserContextID pgtype.UUID `json:"browser_context_id"`
PersistFullToolResults bool `json:"persist_full_tool_results"`
}
@@ -259,6 +269,7 @@ func (q *Queries) UpsertBotSettings(ctx context.Context, arg UpsertBotSettingsPa
arg.MemoryProviderID,
arg.ImageModelID,
arg.TtsModelID,
arg.TranscriptionModelID,
arg.BrowserContextID,
arg.PersistFullToolResults,
arg.ID,
@@ -284,6 +295,7 @@ func (q *Queries) UpsertBotSettings(ctx context.Context, arg UpsertBotSettingsPa
&i.MemoryProviderID,
&i.ImageModelID,
&i.TtsModelID,
&i.TranscriptionModelID,
&i.BrowserContextID,
&i.PersistFullToolResults,
)