From 8d78925a238f8828d34eb261f90893bcc401b96f Mon Sep 17 00:00:00 2001 From: Yiming Qi Date: Sun, 19 Apr 2026 22:58:16 +0900 Subject: [PATCH] =?UTF-8?q?feat:=20expand=20speech=20provider=20support=20?= =?UTF-8?q?with=20new=20client=20types=20and=20config=E2=80=A6=20(#389)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * feat: expand speech provider support with new client types and configuration schema * feat: add icon support for speech providers and update related configurations * feat: add SVG support for Deepgram and Elevenlabs with Vue components * feat: except *-speech client type in llm provider * feat: enhance speech provider functionality with advanced settings and model import capabilities * chore: remove go.mod replace * feat: enhance speech provider functionality with advanced settings and model import capabilities * chore: update go module dependencies --------- Co-authored-by: Acbox --- .../web/src/components/provider-icon/icons.ts | 8 + apps/web/src/constants/client-types.ts | 42 +- apps/web/src/i18n/locales/en.json | 5 + apps/web/src/i18n/locales/zh.json | 5 + .../speech/components/model-config-editor.vue | 500 +++++++-------- .../speech/components/provider-setting.vue | 349 ++++++++++- apps/web/src/pages/speech/index.vue | 18 +- cmd/agent/app.go | 18 +- cmd/agent/module.go | 1 + conf/providers/alibabacloud-speech.yaml | 8 + conf/providers/deepgram-speech.yaml | 8 + conf/providers/edge.yaml | 2 +- conf/providers/elevenlabs-speech.yaml | 8 + conf/providers/microsoft-speech.yaml | 8 + conf/providers/minimax-speech.yaml | 8 + conf/providers/openai-speech.yaml | 8 + conf/providers/openrouter-speech.yaml | 8 + conf/providers/volcengine-speech.yaml | 8 + db/migrations/0001_init.up.sql | 18 +- ...pand_speech_provider_client_types.down.sql | 29 + ...expand_speech_provider_client_types.up.sql | 25 + db/queries/models.sql | 44 +- go.mod | 25 +- go.sum | 46 +- internal/db/sqlc/models.sql.go | 55 +- internal/handlers/tts_providers.go | 117 +++- internal/models/models.go | 24 +- internal/models/types.go | 8 + internal/tts/bootstrap.go | 68 +++ internal/tts/config.go | 60 +- internal/tts/registry.go | 577 +++++++++++++++++- internal/tts/service.go | 380 +++++++----- internal/tts/types.go | 32 +- packages/icons/icons/deepgram.svg | 3 + packages/icons/icons/elevenlabs.svg | 4 + packages/icons/scripts/manifest.ts | 2 + packages/icons/src/icons/Deepgram.vue | 17 + packages/icons/src/icons/Elevenlabs.vue | 21 + packages/icons/src/index.ts | 2 + packages/sdk/src/@pinia/colada.gen.ts | 58 +- packages/sdk/src/index.ts | 4 +- packages/sdk/src/sdk.gen.ts | 23 +- packages/sdk/src/types.gen.ts | 138 +++++ spec/docs.go | 218 +++++++ spec/swagger.json | 218 +++++++ spec/swagger.yaml | 145 +++++ 46 files changed, 2808 insertions(+), 565 deletions(-) create mode 100644 conf/providers/alibabacloud-speech.yaml create mode 100644 conf/providers/deepgram-speech.yaml create mode 100644 conf/providers/elevenlabs-speech.yaml create mode 100644 conf/providers/microsoft-speech.yaml create mode 100644 conf/providers/minimax-speech.yaml create mode 100644 conf/providers/openai-speech.yaml create mode 100644 conf/providers/openrouter-speech.yaml create mode 100644 conf/providers/volcengine-speech.yaml create mode 100644 db/migrations/0068_expand_speech_provider_client_types.down.sql create mode 100644 db/migrations/0068_expand_speech_provider_client_types.up.sql create mode 100644 internal/tts/bootstrap.go create mode 100644 packages/icons/icons/deepgram.svg create mode 100644 packages/icons/icons/elevenlabs.svg create mode 100644 packages/icons/src/icons/Deepgram.vue create mode 100644 packages/icons/src/icons/Elevenlabs.vue diff --git a/apps/web/src/components/provider-icon/icons.ts b/apps/web/src/components/provider-icon/icons.ts index 6c76d4a0..a8599158 100644 --- a/apps/web/src/components/provider-icon/icons.ts +++ b/apps/web/src/components/provider-icon/icons.ts @@ -13,10 +13,12 @@ import { ClaudeColor, Cohere, CohereColor, + Deepgram, Deepseek, DeepseekColor, Doubao, DoubaoColor, + Elevenlabs, Fireworks, FireworksColor, Gemini, @@ -35,6 +37,8 @@ import { Lmstudio, Meta, MetaColor, + Microsoft, + MicrosoftColor, Minimax, MinimaxColor, Mistral, @@ -81,6 +85,8 @@ export const iconMap: Record = { 'google-brand-color': GoogleBrandColor, 'deepseek': Deepseek, 'deepseek-color': DeepseekColor, + 'deepgram': Deepgram, + 'elevenlabs': Elevenlabs, 'groq': Groq, 'huggingface': Huggingface, 'huggingface-color': HuggingfaceColor, @@ -105,6 +111,8 @@ export const iconMap: Record = { 'cohere-color': CohereColor, 'azure': Azure, 'azure-color': AzureColor, + 'microsoft': Microsoft, + 'microsoft-color': MicrosoftColor, 'nvidia': Nvidia, 'nvidia-color': NvidiaColor, 'fireworks': Fireworks, diff --git a/apps/web/src/constants/client-types.ts b/apps/web/src/constants/client-types.ts index ae751d87..812786b2 100644 --- a/apps/web/src/constants/client-types.ts +++ b/apps/web/src/constants/client-types.ts @@ -40,9 +40,49 @@ export const CLIENT_TYPE_META: Record = { label: 'Edge Speech', hint: 'Microsoft Edge Read Aloud TTS', }, + 'openai-speech': { + value: 'openai-speech', + label: 'OpenAI Speech', + hint: 'OpenAI /audio/speech compatible TTS', + }, + 'openrouter-speech': { + value: 'openrouter-speech', + label: 'OpenRouter Speech', + hint: 'OpenRouter audio modality TTS', + }, + 'elevenlabs-speech': { + value: 'elevenlabs-speech', + label: 'ElevenLabs Speech', + hint: 'ElevenLabs text-to-speech', + }, + 'deepgram-speech': { + value: 'deepgram-speech', + label: 'Deepgram Speech', + hint: 'Deepgram TTS', + }, + 'minimax-speech': { + value: 'minimax-speech', + label: 'MiniMax Speech', + hint: 'MiniMax TTS', + }, + 'volcengine-speech': { + value: 'volcengine-speech', + label: 'Volcengine Speech', + hint: 'Volcengine SAMI TTS', + }, + 'alibabacloud-speech': { + value: 'alibabacloud-speech', + label: 'Alibaba Cloud Speech', + hint: 'DashScope CosyVoice TTS', + }, + 'microsoft-speech': { + value: 'microsoft-speech', + label: 'Microsoft Speech', + hint: 'Azure Cognitive Services TTS', + }, } export const CLIENT_TYPE_LIST: ClientTypeMeta[] = Object.values(CLIENT_TYPE_META) export const LLM_CLIENT_TYPE_LIST: ClientTypeMeta[] = CLIENT_TYPE_LIST - .filter(ct => ct.value !== 'edge-speech') + .filter(ct => !ct.value.endsWith('-speech')) diff --git a/apps/web/src/i18n/locales/en.json b/apps/web/src/i18n/locales/en.json index e4fc0362..59f83d6c 100644 --- a/apps/web/src/i18n/locales/en.json +++ b/apps/web/src/i18n/locales/en.json @@ -424,6 +424,11 @@ "modelIdPlaceholder": "Enter model identifier (e.g. custom-voice)", "noModels": "No models found. Click \"Import Models\" to discover available models or \"Add Model\" to create one manually.", "noCapabilities": "No capabilities available for this model.", + "saveSuccess": "Speech configuration saved", + "advanced": { + "title": "Advanced Settings", + "description": "These fields usually map to underlying vendor implementation details. Most users can keep the defaults." + }, "fields": { "language": "Language", "languagePlaceholder": "Select language...", diff --git a/apps/web/src/i18n/locales/zh.json b/apps/web/src/i18n/locales/zh.json index 318ae3f5..8891cad6 100644 --- a/apps/web/src/i18n/locales/zh.json +++ b/apps/web/src/i18n/locales/zh.json @@ -420,6 +420,11 @@ "modelIdPlaceholder": "输入模型标识符(如 custom-voice)", "noModels": "暂无模型,点击\"导入模型\"发现可用模型,或点击\"新建模型\"手动创建。", "noCapabilities": "该模型暂无可用能力信息。", + "saveSuccess": "语音配置已保存", + "advanced": { + "title": "高级设置", + "description": "这些字段通常对应底层服务商实现细节。大多数情况下保留默认值即可。" + }, "fields": { "language": "语言", "languagePlaceholder": "选择语言...", diff --git a/apps/web/src/pages/speech/components/model-config-editor.vue b/apps/web/src/pages/speech/components/model-config-editor.vue index af9ccb81..334d8f0d 100644 --- a/apps/web/src/pages/speech/components/model-config-editor.vue +++ b/apps/web/src/pages/speech/components/model-config-editor.vue @@ -1,189 +1,198 @@