From b46e494d3aa63f47186aabfb62c13a1800fab775 Mon Sep 17 00:00:00 2001 From: Fodesu <75713465+Fodesu@users.noreply.github.com> Date: Fri, 13 Mar 2026 02:49:52 +0800 Subject: [PATCH] feat(tts): introduce `TTS` system (#195) --- apps/web/src/components/sidebar/index.vue | 5 + apps/web/src/i18n/locales/en.json | 39 + apps/web/src/i18n/locales/zh.json | 39 + .../pages/bots/components/bot-settings.vue | 42 +- .../bots/components/tts-model-select.vue | 107 +++ .../bots/components/tts-provider-select.vue | 84 ++ .../chat/components/attachment-block.vue | 20 + .../components/add-tts-model.vue | 130 +++ .../components/add-tts-provider.vue | 141 +++ .../components/model-config-editor.vue | 405 +++++++++ .../components/provider-setting.vue | 338 +++++++ apps/web/src/pages/tts-providers/index.vue | 150 ++++ apps/web/src/router.ts | 8 + cmd/agent/main.go | 66 +- cmd/memoh/serve.go | 94 +- db/migrations/0001_init.up.sql | 25 + db/migrations/0029_tts_provider.down.sql | 14 + db/migrations/0029_tts_provider.up.sql | 41 + .../0030_drop_tts_model_unique.down.sql | 5 + .../0030_drop_tts_model_unique.up.sql | 6 + db/queries/settings.sql | 8 +- db/queries/tts_models.sql | 50 ++ db/queries/tts_providers.sql | 36 + go.mod | 7 +- go.sum | 12 + internal/channel/adapters/discord/discord.go | 10 +- internal/channel/inbound/channel.go | 200 ++++- internal/channel/types.go | 7 + internal/conversation/flow/resolver.go | 2 + internal/db/sqlc/conversations.sql.go | 2 +- internal/db/sqlc/email_oauth_tokens.sql.go | 123 +-- internal/db/sqlc/models.go | 46 +- internal/db/sqlc/settings.sql.go | 18 +- internal/db/sqlc/tts_models.sql.go | 248 +++++ internal/db/sqlc/tts_providers.sql.go | 189 ++++ internal/email/oauth_token_store.go | 2 +- internal/handlers/bot_tts.go | 111 +++ internal/handlers/local_channel.go | 265 +++++- internal/handlers/tts_providers.go | 371 ++++++++ internal/mcp/dataio.go | 45 +- internal/mcp/mcpcontainer/mcpcontainer.pb.go | 5 +- .../mcp/mcpcontainer/mcpcontainer_grpc.pb.go | 1 + internal/mcp/providers/tts/provider.go | 199 +++++ internal/settings/service.go | 15 + internal/settings/types.go | 2 + internal/tts/adapter.go | 20 + internal/tts/adapter/edge/edge.go | 118 +++ internal/tts/adapter/edge/edge_test.go | 118 +++ internal/tts/adapter/edge/type.go | 49 + internal/tts/adapter/edge/voices.json | 180 ++++ internal/tts/adapter/edge/ws.go | 414 +++++++++ .../tts/adapter/edge/ws_integration_test.go | 130 +++ internal/tts/adapter/edge/ws_test.go | 220 +++++ internal/tts/config.go | 58 ++ internal/tts/registry.go | 48 + internal/tts/service.go | 552 ++++++++++++ internal/tts/tempstore.go | 125 +++ internal/tts/types.go | 62 ++ packages/agent/src/agent.ts | 20 +- packages/agent/src/prompts/system.ts | 17 + packages/agent/src/types/action.ts | 7 + packages/agent/src/utils/index.ts | 1 + packages/agent/src/utils/speech.ts | 16 + packages/sdk/src/@pinia/colada.gen.ts | 261 +++++- packages/sdk/src/index.ts | 4 +- packages/sdk/src/sdk.gen.ts | 136 ++- packages/sdk/src/types.gen.ts | 587 ++++++++++++ spec/docs.go | 845 +++++++++++++++++- spec/swagger.json | 843 +++++++++++++++++ spec/swagger.yaml | 552 ++++++++++++ typos.toml | 2 +- 71 files changed, 8959 insertions(+), 159 deletions(-) create mode 100644 apps/web/src/pages/bots/components/tts-model-select.vue create mode 100644 apps/web/src/pages/bots/components/tts-provider-select.vue create mode 100644 apps/web/src/pages/tts-providers/components/add-tts-model.vue create mode 100644 apps/web/src/pages/tts-providers/components/add-tts-provider.vue create mode 100644 apps/web/src/pages/tts-providers/components/model-config-editor.vue create mode 100644 apps/web/src/pages/tts-providers/components/provider-setting.vue create mode 100644 apps/web/src/pages/tts-providers/index.vue create mode 100644 db/migrations/0029_tts_provider.down.sql create mode 100644 db/migrations/0029_tts_provider.up.sql create mode 100644 db/migrations/0030_drop_tts_model_unique.down.sql create mode 100644 db/migrations/0030_drop_tts_model_unique.up.sql create mode 100644 db/queries/tts_models.sql create mode 100644 db/queries/tts_providers.sql create mode 100644 internal/db/sqlc/tts_models.sql.go create mode 100644 internal/db/sqlc/tts_providers.sql.go create mode 100644 internal/handlers/bot_tts.go create mode 100644 internal/handlers/tts_providers.go create mode 100644 internal/mcp/providers/tts/provider.go create mode 100644 internal/tts/adapter.go create mode 100644 internal/tts/adapter/edge/edge.go create mode 100644 internal/tts/adapter/edge/edge_test.go create mode 100644 internal/tts/adapter/edge/type.go create mode 100644 internal/tts/adapter/edge/voices.json create mode 100644 internal/tts/adapter/edge/ws.go create mode 100644 internal/tts/adapter/edge/ws_integration_test.go create mode 100644 internal/tts/adapter/edge/ws_test.go create mode 100644 internal/tts/config.go create mode 100644 internal/tts/registry.go create mode 100644 internal/tts/service.go create mode 100644 internal/tts/tempstore.go create mode 100644 internal/tts/types.go create mode 100644 packages/agent/src/utils/speech.ts diff --git a/apps/web/src/components/sidebar/index.vue b/apps/web/src/components/sidebar/index.vue index 175ce22f..f1e41e6d 100644 --- a/apps/web/src/components/sidebar/index.vue +++ b/apps/web/src/components/sidebar/index.vue @@ -131,6 +131,11 @@ const sidebarInfo = computed(() => [ name: 'memory-providers', icon: ['fas', 'brain'], }, + { + title: t('sidebar.ttsProvider'), + name: 'tts-providers', + icon: ['fas', 'volume-high'], + }, { title: t('sidebar.emailProvider'), name: 'email-providers', diff --git a/apps/web/src/i18n/locales/en.json b/apps/web/src/i18n/locales/en.json index af2ce4a4..7a6384a3 100644 --- a/apps/web/src/i18n/locales/en.json +++ b/apps/web/src/i18n/locales/en.json @@ -55,6 +55,7 @@ "models": "Models", "searchProvider": "Search Providers", "memoryProvider": "Memory", + "ttsProvider": "TTS Providers", "emailProvider": "Email Providers", "settings": "Settings", "home": "Home", @@ -265,6 +266,42 @@ "builtin": "Built-in" } }, + "ttsProvider": { + "title": "TTS Providers", + "add": "Add TTS Provider", + "providerType": "Provider Type", + "searchPlaceholder": "Search TTS providers...", + "emptyTitle": "No TTS Providers", + "emptyDescription": "Add a TTS provider to enable text-to-speech for your bots", + "deleteConfirm": "Are you sure you want to delete this TTS provider? This action cannot be undone.", + "models": "Models", + "importModels": "Import Models", + "importSuccess": "Models imported successfully", + "importFailed": "Failed to import models", + "addModel": "Add Model", + "modelId": "Model ID", + "modelIdPlaceholder": "Enter model identifier (e.g. custom-voice)", + "noModels": "No models found. Click \"Import Models\" to discover available models or \"Add Model\" to create one manually.", + "noCapabilities": "No capabilities available for this model.", + "fields": { + "language": "Language", + "languagePlaceholder": "Select language...", + "voice": "Voice", + "voicePlaceholder": "Select voice...", + "format": "Output Format", + "formatPlaceholder": "Select format...", + "speed": "Speed", + "speedDescription": "Playback speed (default: {default})", + "pitch": "Pitch", + "pitchDescription": "Voice pitch adjustment in Hz (default: {default})" + }, + "test": { + "title": "Test Synthesis", + "placeholder": "Enter text to synthesize...", + "generate": "Generate", + "failed": "Synthesis failed" + } + }, "emailProvider": { "title": "Email Providers", "add": "Add Email Provider", @@ -652,6 +689,8 @@ "searchProviderPlaceholder": "Select search provider", "memoryProvider": "Memory Provider", "memoryProviderPlaceholder": "Select memory provider (disabled if empty)", + "ttsModel": "TTS Model", + "ttsModelPlaceholder": "Select TTS model", "maxContextLoadTime": "Max Context Load Time", "maxContextTokens": "Max Context Tokens", "language": "Language", diff --git a/apps/web/src/i18n/locales/zh.json b/apps/web/src/i18n/locales/zh.json index a81e42ba..879a21ea 100644 --- a/apps/web/src/i18n/locales/zh.json +++ b/apps/web/src/i18n/locales/zh.json @@ -55,6 +55,7 @@ "models": "模型管理", "searchProvider": "搜索提供方", "memoryProvider": "记忆", + "ttsProvider": "语音合成", "emailProvider": "邮件提供方", "settings": "设置", "home": "首页", @@ -261,6 +262,42 @@ "builtin": "内置" } }, + "ttsProvider": { + "title": "语音合成", + "add": "添加语音合成提供方", + "providerType": "提供方类型", + "searchPlaceholder": "搜索语音合成...", + "emptyTitle": "暂无语音合成提供方", + "emptyDescription": "添加语音合成提供方以为 Bot 启用文字转语音功能", + "deleteConfirm": "确定要删除此语音合成提供方吗?此操作不可撤销。", + "models": "模型", + "importModels": "导入模型", + "importSuccess": "模型导入成功", + "importFailed": "模型导入失败", + "addModel": "新建模型", + "modelId": "模型 ID", + "modelIdPlaceholder": "输入模型标识符(如 custom-voice)", + "noModels": "暂无模型,点击\"导入模型\"发现可用模型,或点击\"新建模型\"手动创建。", + "noCapabilities": "该模型暂无可用能力信息。", + "fields": { + "language": "语言", + "languagePlaceholder": "选择语言...", + "voice": "声音", + "voicePlaceholder": "选择声音...", + "format": "输出格式", + "formatPlaceholder": "选择格式...", + "speed": "语速", + "speedDescription": "播放速度(默认:{default})", + "pitch": "音调", + "pitchDescription": "语音音调调整,单位 Hz(默认:{default})" + }, + "test": { + "title": "测试合成", + "placeholder": "输入要合成的文本...", + "generate": "生成", + "failed": "合成失败" + } + }, "emailProvider": { "title": "邮件提供方", "add": "添加邮件提供方", @@ -648,6 +685,8 @@ "searchProviderPlaceholder": "选择搜索提供方", "memoryProvider": "记忆提供方", "memoryProviderPlaceholder": "选择记忆提供方(为空则禁用)", + "ttsModel": "语音合成模型", + "ttsModelPlaceholder": "选择语音合成模型", "maxContextLoadTime": "最大上下文加载时间", "maxContextTokens": "最大上下文Token数", "language": "语言", diff --git a/apps/web/src/pages/bots/components/bot-settings.vue b/apps/web/src/pages/bots/components/bot-settings.vue index 43341d91..8303047e 100644 --- a/apps/web/src/pages/bots/components/bot-settings.vue +++ b/apps/web/src/pages/bots/components/bot-settings.vue @@ -32,6 +32,17 @@ /> + +
+ + +
+
@@ -199,9 +210,10 @@ import ConfirmPopover from '@/components/confirm-popover/index.vue' import ModelSelect from './model-select.vue' import SearchProviderSelect from './search-provider-select.vue' import MemoryProviderSelect from './memory-provider-select.vue' +import TtsModelSelect from './tts-model-select.vue' import BrowserContextSelect from './browser-context-select.vue' import { useQuery, useMutation, useQueryCache } from '@pinia/colada' -import { getBotsByBotIdSettings, putBotsByBotIdSettings, deleteBotsById, getModels, getProviders, getSearchProviders, getMemoryProviders, getBrowserContexts } from '@memoh/sdk' +import { getBotsByBotIdSettings, putBotsByBotIdSettings, deleteBotsById, getModels, getProviders, getSearchProviders, getMemoryProviders, getTtsProviders, getBrowserContexts } from '@memoh/sdk' import type { SettingsSettings } from '@memoh/sdk' import type { Ref } from 'vue' import { resolveApiErrorMessage } from '@/utils/api-error' @@ -262,6 +274,27 @@ const { data: memoryProviderData } = useQuery({ }, }) +const { data: ttsProviderData } = useQuery({ + key: ['tts-providers'], + query: async () => { + const { data } = await getTtsProviders({ throwOnError: true }) + return data + }, +}) + +const { data: ttsModelData } = useQuery({ + key: ['tts-models'], + query: async () => { + const apiBase = import.meta.env.VITE_API_URL?.trim() || '/api' + const token = localStorage.getItem('token') + const resp = await fetch(`${apiBase}/tts-models`, { + headers: token ? { Authorization: `Bearer ${token}` } : {}, + }) + if (!resp.ok) throw new Error('Failed to fetch TTS models') + return resp.json() + }, +}) + const { data: browserContextData } = useQuery({ key: ['all-browser-contexts'], query: async () => { @@ -296,6 +329,8 @@ const models = computed(() => modelData.value ?? []) const providers = computed(() => providerData.value ?? []) const searchProviders = computed(() => searchProviderData.value ?? []) const memoryProviders = computed(() => memoryProviderData.value ?? []) +const ttsProviders = computed(() => ttsProviderData.value ?? []) +const ttsModels = computed(() => ttsModelData.value ?? []) const browserContexts = computed(() => browserContextData.value ?? []) const chatModelSupportsReasoning = computed(() => { @@ -309,6 +344,7 @@ const form = reactive({ chat_model_id: '', search_provider_id: '', memory_provider_id: '', + tts_model_id: '', browser_context_id: '', max_context_load_time: 0, max_context_tokens: 0, @@ -323,6 +359,7 @@ watch(settings, (val) => { form.chat_model_id = val.chat_model_id ?? '' form.search_provider_id = val.search_provider_id ?? '' form.memory_provider_id = (val as any).memory_provider_id ?? '' + form.tts_model_id = (val as any).tts_model_id ?? '' form.browser_context_id = (val as any).browser_context_id ?? '' form.max_context_load_time = val.max_context_load_time ?? 0 form.max_context_tokens = val.max_context_tokens ?? 0 @@ -340,6 +377,7 @@ const hasChanges = computed(() => { form.chat_model_id !== (s.chat_model_id ?? '') || form.search_provider_id !== (s.search_provider_id ?? '') || form.memory_provider_id !== (s.memory_provider_id ?? '') + || form.tts_model_id !== (s.tts_model_id ?? '') || form.browser_context_id !== (s.browser_context_id ?? '') || form.max_context_load_time !== (s.max_context_load_time ?? 0) || form.max_context_tokens !== (s.max_context_tokens ?? 0) @@ -365,7 +403,7 @@ async function handleDeleteBot() { try { await deleteBot() await router.push({ name: 'bots' }) - toast.success(t('bots.deleteSuccess')) + toast.success(t('bots.deleteSuccess')) } catch (error) { toast.error(resolveApiErrorMessage(error, t('bots.lifecycle.deleteFailed'))) } diff --git a/apps/web/src/pages/bots/components/tts-model-select.vue b/apps/web/src/pages/bots/components/tts-model-select.vue new file mode 100644 index 00000000..9e8386ee --- /dev/null +++ b/apps/web/src/pages/bots/components/tts-model-select.vue @@ -0,0 +1,107 @@ + + + diff --git a/apps/web/src/pages/bots/components/tts-provider-select.vue b/apps/web/src/pages/bots/components/tts-provider-select.vue new file mode 100644 index 00000000..9fcfe9bd --- /dev/null +++ b/apps/web/src/pages/bots/components/tts-provider-select.vue @@ -0,0 +1,84 @@ + + + diff --git a/apps/web/src/pages/chat/components/attachment-block.vue b/apps/web/src/pages/chat/components/attachment-block.vue index 36727933..71b818fc 100644 --- a/apps/web/src/pages/chat/components/attachment-block.vue +++ b/apps/web/src/pages/chat/components/attachment-block.vue @@ -30,6 +30,19 @@ /> + +
+
+ ): boolean { + const type = String(att.type ?? '').toLowerCase() + if (type === 'audio' || type === 'voice') return true + const mime = String(att.mime ?? '').toLowerCase() + return mime.startsWith('audio/') +} + function getContainerPath(att: AttachmentItem): string { return String(att.path ?? '').trim() } diff --git a/apps/web/src/pages/tts-providers/components/add-tts-model.vue b/apps/web/src/pages/tts-providers/components/add-tts-model.vue new file mode 100644 index 00000000..ad392500 --- /dev/null +++ b/apps/web/src/pages/tts-providers/components/add-tts-model.vue @@ -0,0 +1,130 @@ + + + diff --git a/apps/web/src/pages/tts-providers/components/add-tts-provider.vue b/apps/web/src/pages/tts-providers/components/add-tts-provider.vue new file mode 100644 index 00000000..ec9742dc --- /dev/null +++ b/apps/web/src/pages/tts-providers/components/add-tts-provider.vue @@ -0,0 +1,141 @@ + + + diff --git a/apps/web/src/pages/tts-providers/components/model-config-editor.vue b/apps/web/src/pages/tts-providers/components/model-config-editor.vue new file mode 100644 index 00000000..bb05945b --- /dev/null +++ b/apps/web/src/pages/tts-providers/components/model-config-editor.vue @@ -0,0 +1,405 @@ +