diff --git a/apps/web/src/components/create-model/index.vue b/apps/web/src/components/create-model/index.vue index 93442bfb..d168332b 100644 --- a/apps/web/src/components/create-model/index.vue +++ b/apps/web/src/components/create-model/index.vue @@ -18,6 +18,7 @@
@@ -35,11 +36,12 @@ - - Chat - - - Embedding + + {{ opt.label }} @@ -181,6 +183,11 @@ import { COMPATIBILITY_OPTIONS } from '@/constants/compatibilities' import FormDialogShell from '@/components/form-dialog-shell/index.vue' import { useDialogMutation } from '@/composables/useDialogMutation' +interface ModelTypeOption { + value: string + label: string +} + const selectedCompat = ref([]) const { t } = useI18n() const { run } = useDialogMutation() @@ -193,14 +200,30 @@ const formSchema = toTypedSchema(z.object({ context_window: z.coerce.number().min(1).optional(), })) +const props = withDefaults(defineProps<{ + id: string + typeOptions?: ModelTypeOption[] + defaultType?: string + hideType?: boolean + invalidateKeys?: string[] +}>(), { + typeOptions: () => [ + { value: 'chat', label: 'Chat' }, + { value: 'embedding', label: 'Embedding' }, + ], + defaultType: 'chat', + hideType: false, + invalidateKeys: () => ['provider-models'], +}) + const form = useForm({ validationSchema: formSchema, initialValues: { - type: 'chat', + type: props.defaultType, }, }) -const selectedType = computed(() => form.values.type || 'chat') +const selectedType = computed(() => form.values.type || props.defaultType) const open = inject>('openModel', ref(false)) const title = inject>('openModelTitle', ref('title')) @@ -237,15 +260,19 @@ function onNameInput(e: Event) { form.setFieldValue('name', (e.target as HTMLInputElement).value) } -const { id } = defineProps<{ id: string }>() - const queryCache = useQueryCache() +function invalidateModelQueries() { + for (const key of props.invalidateKeys) { + queryCache.invalidateQueries({ key: [key] }) + } +} + const { mutateAsync: createModel, isLoading: createLoading } = useMutation({ mutation: async (data: Record) => { const { data: result } = await postModels({ body: data as ModelsAddRequest, throwOnError: true }) return result }, - onSettled: () => queryCache.invalidateQueries({ key: ['provider-models'] }), + onSettled: invalidateModelQueries, }) const { mutateAsync: updateModel, isLoading: updateLoading } = useMutation({ mutation: async ({ id, data }: { id: string; data: Record }) => { @@ -256,7 +283,7 @@ const { mutateAsync: updateModel, isLoading: updateLoading } = useMutation({ }) return result }, - onSettled: () => queryCache.invalidateQueries({ key: ['provider-models'] }), + onSettled: invalidateModelQueries, }) const { mutateAsync: updateModelByLegacyModelID, isLoading: updateLegacyLoading } = useMutation({ mutation: async ({ modelId, data }: { modelId: string; data: Record }) => { @@ -267,7 +294,7 @@ const { mutateAsync: updateModelByLegacyModelID, isLoading: updateLegacyLoading }) return result }, - onSettled: () => queryCache.invalidateQueries({ key: ['provider-models'] }), + onSettled: invalidateModelQueries, }) const isLoading = computed(() => createLoading.value || updateLoading.value || updateLegacyLoading.value) @@ -297,7 +324,7 @@ async function addModel() { const payload: Record = { type, model_id, - provider_id: id, + provider_id: props.id, config, } @@ -348,7 +375,15 @@ watch(open, async () => { selectedCompat.value = config?.compatibilities ?? [] userEditedName.value = !!(name && name !== model_id) } else { - form.resetForm({ values: { type: 'chat', model_id: '', name: '', dimensions: undefined, context_window: undefined } }) + form.resetForm({ + values: { + type: props.defaultType, + model_id: '', + name: '', + dimensions: undefined, + context_window: undefined, + }, + }) selectedCompat.value = [] userEditedName.value = false } diff --git a/apps/web/src/components/settings-sidebar/index.vue b/apps/web/src/components/settings-sidebar/index.vue index aef7f5fd..66e81ea0 100644 --- a/apps/web/src/components/settings-sidebar/index.vue +++ b/apps/web/src/components/settings-sidebar/index.vue @@ -52,7 +52,7 @@ import { computed, type Component } from 'vue' import { storeToRefs } from 'pinia' import { useRouter, useRoute } from 'vue-router' import { useI18n } from 'vue-i18n' -import { ChevronLeft, Bot, Boxes, Globe, Brain, Volume2, Mail, AppWindow, ChartLine, User, Store, Info } from 'lucide-vue-next' +import { ChevronLeft, Bot, Boxes, Globe, Brain, Volume2, AudioLines, Mail, AppWindow, ChartLine, User, Store, Info } from 'lucide-vue-next' import { useChatSelectionStore } from '@/store/chat-selection' import { Sidebar, @@ -118,6 +118,11 @@ const navItems = computed<{ title: string; name: string; icon: Component }[]>(() name: 'speech', icon: Volume2, }, + { + title: t('sidebar.transcription'), + name: 'transcription', + icon: AudioLines, + }, { title: t('sidebar.email'), name: 'email', diff --git a/apps/web/src/constants/client-types.ts b/apps/web/src/constants/client-types.ts index 812786b2..c69615c3 100644 --- a/apps/web/src/constants/client-types.ts +++ b/apps/web/src/constants/client-types.ts @@ -45,21 +45,41 @@ export const CLIENT_TYPE_META: Record = { label: 'OpenAI Speech', hint: 'OpenAI /audio/speech compatible TTS', }, + 'openai-transcription': { + value: 'openai-transcription', + label: 'OpenAI Transcription', + hint: 'OpenAI audio transcription', + }, 'openrouter-speech': { value: 'openrouter-speech', label: 'OpenRouter Speech', hint: 'OpenRouter audio modality TTS', }, + 'openrouter-transcription': { + value: 'openrouter-transcription', + label: 'OpenRouter Transcription', + hint: 'OpenRouter transcription models', + }, 'elevenlabs-speech': { value: 'elevenlabs-speech', label: 'ElevenLabs Speech', hint: 'ElevenLabs text-to-speech', }, + 'elevenlabs-transcription': { + value: 'elevenlabs-transcription', + label: 'ElevenLabs Transcription', + hint: 'ElevenLabs speech-to-text', + }, 'deepgram-speech': { value: 'deepgram-speech', label: 'Deepgram Speech', hint: 'Deepgram TTS', }, + 'deepgram-transcription': { + value: 'deepgram-transcription', + label: 'Deepgram Transcription', + hint: 'Deepgram speech-to-text', + }, 'minimax-speech': { value: 'minimax-speech', label: 'MiniMax Speech', @@ -80,9 +100,19 @@ export const CLIENT_TYPE_META: Record = { label: 'Microsoft Speech', hint: 'Azure Cognitive Services TTS', }, + 'google-speech': { + value: 'google-speech', + label: 'Google Speech', + hint: 'Gemini speech transcription', + }, + 'google-transcription': { + value: 'google-transcription', + label: 'Google Transcription', + hint: 'Gemini speech transcription', + }, } export const CLIENT_TYPE_LIST: ClientTypeMeta[] = Object.values(CLIENT_TYPE_META) export const LLM_CLIENT_TYPE_LIST: ClientTypeMeta[] = CLIENT_TYPE_LIST - .filter(ct => !ct.value.endsWith('-speech')) + .filter(ct => !ct.value.endsWith('-speech') && !ct.value.endsWith('-transcription')) diff --git a/apps/web/src/i18n/locales/en.json b/apps/web/src/i18n/locales/en.json index 59f83d6c..bdaaefc9 100644 --- a/apps/web/src/i18n/locales/en.json +++ b/apps/web/src/i18n/locales/en.json @@ -63,6 +63,7 @@ "webSearch": "Web Search", "memory": "Memory", "speech": "Speech", + "transcription": "Transcription", "email": "Email", "settings": "Settings", "profile": "Profile", @@ -425,6 +426,9 @@ "noModels": "No models found. Click \"Import Models\" to discover available models or \"Add Model\" to create one manually.", "noCapabilities": "No capabilities available for this model.", "saveSuccess": "Speech configuration saved", + "synthesis": { + "models": "Synthesis Models" + }, "advanced": { "title": "Advanced Settings", "description": "These fields usually map to underlying vendor implementation details. Most users can keep the defaults." @@ -448,6 +452,27 @@ "failed": "Synthesis failed" } }, + "transcription": { + "title": "Transcription", + "emptyTitle": "No Transcription Providers", + "emptyDescription": "Add a transcription provider to enable speech-to-text for your bots", + "models": "Transcription Models", + "noModels": "No transcription models found. Import available models or keep the default template model.", + "noCapabilities": "No capabilities available for this model.", + "importModels": "Import Models", + "importSuccess": "Transcription models imported successfully", + "importFailed": "Failed to import transcription models", + "saveSuccess": "Transcription configuration saved", + "advanced": { + "title": "Advanced Settings", + "description": "These fields usually map to underlying vendor implementation details. Most users can keep the defaults." + }, + "test": { + "title": "Test Transcription", + "run": "Transcribe", + "failed": "Transcription failed" + } + }, "email": { "title": "Email", "add": "Add Email", @@ -920,6 +945,8 @@ "memoryHealthUnavailable": "Unavailable", "ttsModel": "TTS Model", "ttsModelPlaceholder": "Select TTS model", + "transcriptionModel": "Transcription Model", + "transcriptionModelPlaceholder": "Select transcription model", "imageModel": "Image Generation Model", "imageModelDescription": "Model used for the generate_image tool. Must support image-output compatibility.", "imageModelPlaceholder": "Select image model (optional)", diff --git a/apps/web/src/i18n/locales/zh.json b/apps/web/src/i18n/locales/zh.json index 8891cad6..382a3ee9 100644 --- a/apps/web/src/i18n/locales/zh.json +++ b/apps/web/src/i18n/locales/zh.json @@ -64,6 +64,7 @@ "webSearch": "搜索", "memory": "记忆", "speech": "语音", + "transcription": "转写", "email": "邮件", "profile": "用户", "home": "首页", @@ -421,6 +422,9 @@ "noModels": "暂无模型,点击\"导入模型\"发现可用模型,或点击\"新建模型\"手动创建。", "noCapabilities": "该模型暂无可用能力信息。", "saveSuccess": "语音配置已保存", + "synthesis": { + "models": "语音合成模型" + }, "advanced": { "title": "高级设置", "description": "这些字段通常对应底层服务商实现细节。大多数情况下保留默认值即可。" @@ -444,6 +448,27 @@ "failed": "合成失败" } }, + "transcription": { + "title": "语音转写", + "emptyTitle": "暂无转写提供方", + "emptyDescription": "添加转写提供方以为 Bot 启用语音转文字功能", + "models": "语音识别模型", + "noModels": "暂无语音识别模型,可导入可用模型,或保留默认模板模型。", + "importModels": "导入模型", + "importSuccess": "识别模型导入成功", + "importFailed": "识别模型导入失败", + "saveSuccess": "转写配置已保存", + "noCapabilities": "该模型暂无可用能力信息。", + "advanced": { + "title": "高级设置", + "description": "这些字段通常对应底层服务商实现细节。大多数情况下保留默认值即可。" + }, + "test": { + "title": "测试识别", + "run": "开始识别", + "failed": "识别失败" + } + }, "email": { "title": "邮件提供方", "add": "添加邮件提供方", @@ -916,6 +941,8 @@ "memoryHealthUnavailable": "暂不可用", "ttsModel": "语音合成模型", "ttsModelPlaceholder": "选择语音合成模型", + "transcriptionModel": "转写模型", + "transcriptionModelPlaceholder": "选择语音转写模型", "imageModel": "图片生成模型", "imageModelDescription": "用于 generate_image 工具的模型,必须支持 image-output 兼容性。", "imageModelPlaceholder": "选择图片模型(可选)", diff --git a/apps/web/src/pages/bots/components/bot-settings.vue b/apps/web/src/pages/bots/components/bot-settings.vue index 7fd4632b..7f1e3e84 100644 --- a/apps/web/src/pages/bots/components/bot-settings.vue +++ b/apps/web/src/pages/bots/components/bot-settings.vue @@ -187,6 +187,17 @@ />
+ +
+ + +
+
@@ -356,7 +367,7 @@ import MemoryProviderSelect from './memory-provider-select.vue' import TtsModelSelect from './tts-model-select.vue' import BrowserContextSelect from './browser-context-select.vue' import { useQuery, useMutation, useQueryCache } from '@pinia/colada' -import { getBotsById, putBotsById, getBotsByBotIdSettings, putBotsByBotIdSettings, deleteBotsById, getModels, getProviders, getSearchProviders, getMemoryProviders, getSpeechProviders, getSpeechModels, getBrowserContexts, getBotsByBotIdMemoryStatus, postBotsByBotIdMemoryRebuild } from '@memohai/sdk' +import { getBotsById, putBotsById, getBotsByBotIdSettings, putBotsByBotIdSettings, deleteBotsById, getModels, getProviders, getSearchProviders, getMemoryProviders, getSpeechProviders, getSpeechModels, getTranscriptionProviders, getTranscriptionModels, getBrowserContexts, getBotsByBotIdMemoryStatus, postBotsByBotIdMemoryRebuild } from '@memohai/sdk' import type { SettingsSettings } from '@memohai/sdk' import type { Ref } from 'vue' import { resolveApiErrorMessage } from '@/utils/api-error' @@ -440,6 +451,22 @@ const { data: ttsModelData } = useQuery({ }, }) +const { data: transcriptionModelData } = useQuery({ + key: ['transcription-models'], + query: async () => { + const { data } = await getTranscriptionModels({ throwOnError: true }) + return data + }, +}) + +const { data: transcriptionProviderData } = useQuery({ + key: ['transcription-providers'], + query: async () => { + const { data } = await getTranscriptionProviders({ throwOnError: true }) + return data + }, +}) + const { data: browserContextData } = useQuery({ key: ['all-browser-contexts'], query: async () => { @@ -494,7 +521,10 @@ const searchProviders = computed(() => (searchProviderData.value ?? []).filter(( const memoryProviders = computed(() => memoryProviderData.value ?? []) const ttsProviders = computed(() => (ttsProviderData.value ?? []).filter((p) => p.enable !== false)) const enabledTtsProviderIds = computed(() => new Set(ttsProviders.value.map((p) => p.id))) +const transcriptionProviders = computed(() => (transcriptionProviderData.value ?? []).filter((p: Record) => p.enable !== false)) +const enabledTranscriptionProviderIds = computed(() => new Set(transcriptionProviders.value.map((p: Record) => p.id as string))) const ttsModels = computed(() => (ttsModelData.value ?? []).filter((m: Record) => enabledTtsProviderIds.value.has(m.provider_id as string))) +const transcriptionModels = computed(() => (transcriptionModelData.value ?? []).filter((m: Record) => enabledTranscriptionProviderIds.value.has(m.provider_id as string))) const browserContexts = computed(() => browserContextData.value ?? []) // ---- Form ---- @@ -505,6 +535,7 @@ const form = reactive({ search_provider_id: '', memory_provider_id: '', tts_model_id: '', + transcription_model_id: '', browser_context_id: '', timezone: '', language: '', @@ -644,6 +675,7 @@ watch(settings, (val) => { form.search_provider_id = val.search_provider_id ?? '' form.memory_provider_id = val.memory_provider_id ?? '' form.tts_model_id = val.tts_model_id ?? '' + form.transcription_model_id = val.transcription_model_id ?? '' form.browser_context_id = val.browser_context_id ?? '' form.language = val.language ?? '' form.timezone = val.timezone ?? '' @@ -666,6 +698,7 @@ const hasSettingsChanges = computed(() => { || form.search_provider_id !== (s.search_provider_id ?? '') || form.memory_provider_id !== (s.memory_provider_id ?? '') || form.tts_model_id !== (s.tts_model_id ?? '') + || form.transcription_model_id !== (s.transcription_model_id ?? '') || form.browser_context_id !== (s.browser_context_id ?? '') || form.language !== (s.language ?? '') || form.timezone !== (s.timezone ?? '') diff --git a/apps/web/src/pages/speech/components/model-config-editor.vue b/apps/web/src/pages/speech/components/model-config-editor.vue index 334d8f0d..d0b9c4cc 100644 --- a/apps/web/src/pages/speech/components/model-config-editor.vue +++ b/apps/web/src/pages/speech/components/model-config-editor.vue @@ -85,7 +85,7 @@ v-else-if="advancedFields.length === 0" class="text-xs text-muted-foreground" > - {{ $t('speech.noCapabilities') }} + {{ mode === 'transcription' ? $t('transcription.noCapabilities') : $t('speech.noCapabilities') }}
- {{ $t('speech.advanced.title') }} + {{ mode === 'transcription' ? $t('transcription.advanced.title') : $t('speech.advanced.title') }}

- {{ $t('speech.advanced.description') }} + {{ mode === 'transcription' ? $t('transcription.advanced.description') : $t('speech.advanced.description') }}

- {{ $t('speech.test.title') }} + {{ mode === 'transcription' ? $t('transcription.test.title') : $t('speech.test.title') }}

-
+