Feat/speech support (#392)

* feat: expand speech provider support with new client types and configuration schema * feat: add icon support for speech providers and update related configurations * feat: add SVG support for Deepgram and Elevenlabs with Vue components * feat: except *-speech client type in llm provider * feat: enhance speech provider functionality with advanced settings and model import capabilities * chore: remove go.mod replace * feat: enhance speech provider functionality with advanced settings and model import capabilities * chore: update go module dependencies * feat: Ear and Mouth * fix: separate ear/mouth page * fix: separate audio domain and restore transcription templates Move speech and transcription internals into the audio domain, restore template-driven transcription providers, and regenerate Swagger/SDK so the frontend can stop hand-calling /transcription-* APIs. --------- Co-authored-by: aki <arisu@ieee.org>
2026-04-27 07:16:19 +09:00 · 2026-04-22 00:09:46 +08:00
parent 8d78925a23
commit c9dcfe287f
70 changed files with 6612 additions and 1692 deletions
@@ -18,6 +18,7 @@
        <div class="flex flex-col gap-3 mt-4">
          <!-- Type -->
          <FormField
+            v-if="!hideType"
            v-slot="{ componentField }"
            name="type"
          >
@@ -35,11 +36,12 @@
                  </SelectTrigger>
                  <SelectContent>
                    <SelectGroup>
-                      <SelectItem value="chat">
-                        Chat
-                      </SelectItem>
-                      <SelectItem value="embedding">
-                        Embedding
+                      <SelectItem
+                        v-for="opt in typeOptions"
+                        :key="opt.value"
+                        :value="opt.value"
+                      >
+                        {{ opt.label }}
                      </SelectItem>
                    </SelectGroup>
                  </SelectContent>
@@ -181,6 +183,11 @@ import { COMPATIBILITY_OPTIONS } from '@/constants/compatibilities'
 import FormDialogShell from '@/components/form-dialog-shell/index.vue'
 import { useDialogMutation } from '@/composables/useDialogMutation'

+interface ModelTypeOption {
+  value: string
+  label: string
+}
+
 const selectedCompat = ref<string[]>([])
 const { t } = useI18n()
 const { run } = useDialogMutation()
@@ -193,14 +200,30 @@ const formSchema = toTypedSchema(z.object({
  context_window: z.coerce.number().min(1).optional(),
 }))

+const props = withDefaults(defineProps<{
+  id: string
+  typeOptions?: ModelTypeOption[]
+  defaultType?: string
+  hideType?: boolean
+  invalidateKeys?: string[]
+}>(), {
+  typeOptions: () => [
+    { value: 'chat', label: 'Chat' },
+    { value: 'embedding', label: 'Embedding' },
+  ],
+  defaultType: 'chat',
+  hideType: false,
+  invalidateKeys: () => ['provider-models'],
+})
+
 const form = useForm({
  validationSchema: formSchema,
  initialValues: {
-    type: 'chat',
+    type: props.defaultType,
  },
 })

-const selectedType = computed(() => form.values.type || 'chat')
+const selectedType = computed(() => form.values.type || props.defaultType)

 const open = inject<Ref<boolean>>('openModel', ref(false))
 const title = inject<Ref<'edit' | 'title'>>('openModelTitle', ref('title'))
@@ -237,15 +260,19 @@ function onNameInput(e: Event) {
  form.setFieldValue('name', (e.target as HTMLInputElement).value)
 }

-const { id } = defineProps<{ id: string }>()
-
 const queryCache = useQueryCache()
+function invalidateModelQueries() {
+  for (const key of props.invalidateKeys) {
+    queryCache.invalidateQueries({ key: [key] })
+  }
+}
+
 const { mutateAsync: createModel, isLoading: createLoading } = useMutation({
  mutation: async (data: Record<string, unknown>) => {
    const { data: result } = await postModels({ body: data as ModelsAddRequest, throwOnError: true })
    return result
  },
-  onSettled: () => queryCache.invalidateQueries({ key: ['provider-models'] }),
+  onSettled: invalidateModelQueries,
 })
 const { mutateAsync: updateModel, isLoading: updateLoading } = useMutation({
  mutation: async ({ id, data }: { id: string; data: Record<string, unknown> }) => {
@@ -256,7 +283,7 @@ const { mutateAsync: updateModel, isLoading: updateLoading } = useMutation({
    })
    return result
  },
-  onSettled: () => queryCache.invalidateQueries({ key: ['provider-models'] }),
+  onSettled: invalidateModelQueries,
 })
 const { mutateAsync: updateModelByLegacyModelID, isLoading: updateLegacyLoading } = useMutation({
  mutation: async ({ modelId, data }: { modelId: string; data: Record<string, unknown> }) => {
@@ -267,7 +294,7 @@ const { mutateAsync: updateModelByLegacyModelID, isLoading: updateLegacyLoading
    })
    return result
  },
-  onSettled: () => queryCache.invalidateQueries({ key: ['provider-models'] }),
+  onSettled: invalidateModelQueries,
 })
 const isLoading = computed(() => createLoading.value || updateLoading.value || updateLegacyLoading.value)

@@ -297,7 +324,7 @@ async function addModel() {
  const payload: Record<string, unknown> = {
    type,
    model_id,
-    provider_id: id,
+    provider_id: props.id,
    config,
  }

@@ -348,7 +375,15 @@ watch(open, async () => {
    selectedCompat.value = config?.compatibilities ?? []
    userEditedName.value = !!(name && name !== model_id)
  } else {
-    form.resetForm({ values: { type: 'chat', model_id: '', name: '', dimensions: undefined, context_window: undefined } })
+    form.resetForm({
+      values: {
+        type: props.defaultType,
+        model_id: '',
+        name: '',
+        dimensions: undefined,
+        context_window: undefined,
+      },
+    })
    selectedCompat.value = []
    userEditedName.value = false
  }
@@ -52,7 +52,7 @@ import { computed, type Component } from 'vue'
 import { storeToRefs } from 'pinia'
 import { useRouter, useRoute } from 'vue-router'
 import { useI18n } from 'vue-i18n'
-import { ChevronLeft, Bot, Boxes, Globe, Brain, Volume2, Mail, AppWindow, ChartLine, User, Store, Info } from 'lucide-vue-next'
+import { ChevronLeft, Bot, Boxes, Globe, Brain, Volume2, AudioLines, Mail, AppWindow, ChartLine, User, Store, Info } from 'lucide-vue-next'
 import { useChatSelectionStore } from '@/store/chat-selection'
 import {
  Sidebar,
@@ -118,6 +118,11 @@ const navItems = computed<{ title: string; name: string; icon: Component }[]>(()
    name: 'speech',
    icon: Volume2,
  },
+  {
+    title: t('sidebar.transcription'),
+    name: 'transcription',
+    icon: AudioLines,
+  },
  {
    title: t('sidebar.email'),
    name: 'email',
@@ -45,21 +45,41 @@ export const CLIENT_TYPE_META: Record<string, ClientTypeMeta> = {
    label: 'OpenAI Speech',
    hint: 'OpenAI /audio/speech compatible TTS',
  },
+  'openai-transcription': {
+    value: 'openai-transcription',
+    label: 'OpenAI Transcription',
+    hint: 'OpenAI audio transcription',
+  },
  'openrouter-speech': {
    value: 'openrouter-speech',
    label: 'OpenRouter Speech',
    hint: 'OpenRouter audio modality TTS',
  },
+  'openrouter-transcription': {
+    value: 'openrouter-transcription',
+    label: 'OpenRouter Transcription',
+    hint: 'OpenRouter transcription models',
+  },
  'elevenlabs-speech': {
    value: 'elevenlabs-speech',
    label: 'ElevenLabs Speech',
    hint: 'ElevenLabs text-to-speech',
  },
+  'elevenlabs-transcription': {
+    value: 'elevenlabs-transcription',
+    label: 'ElevenLabs Transcription',
+    hint: 'ElevenLabs speech-to-text',
+  },
  'deepgram-speech': {
    value: 'deepgram-speech',
    label: 'Deepgram Speech',
    hint: 'Deepgram TTS',
  },
+  'deepgram-transcription': {
+    value: 'deepgram-transcription',
+    label: 'Deepgram Transcription',
+    hint: 'Deepgram speech-to-text',
+  },
  'minimax-speech': {
    value: 'minimax-speech',
    label: 'MiniMax Speech',
@@ -80,9 +100,19 @@ export const CLIENT_TYPE_META: Record<string, ClientTypeMeta> = {
    label: 'Microsoft Speech',
    hint: 'Azure Cognitive Services TTS',
  },
+  'google-speech': {
+    value: 'google-speech',
+    label: 'Google Speech',
+    hint: 'Gemini speech transcription',
+  },
+  'google-transcription': {
+    value: 'google-transcription',
+    label: 'Google Transcription',
+    hint: 'Gemini speech transcription',
+  },
 }

 export const CLIENT_TYPE_LIST: ClientTypeMeta[] = Object.values(CLIENT_TYPE_META)

 export const LLM_CLIENT_TYPE_LIST: ClientTypeMeta[] = CLIENT_TYPE_LIST
-  .filter(ct => !ct.value.endsWith('-speech'))
+  .filter(ct => !ct.value.endsWith('-speech') && !ct.value.endsWith('-transcription'))
@@ -63,6 +63,7 @@
    "webSearch": "Web Search",
    "memory": "Memory",
    "speech": "Speech",
+    "transcription": "Transcription",
    "email": "Email",
    "settings": "Settings",
    "profile": "Profile",
@@ -425,6 +426,9 @@
    "noModels": "No models found. Click \"Import Models\" to discover available models or \"Add Model\" to create one manually.",
    "noCapabilities": "No capabilities available for this model.",
    "saveSuccess": "Speech configuration saved",
+    "synthesis": {
+      "models": "Synthesis Models"
+    },
    "advanced": {
      "title": "Advanced Settings",
      "description": "These fields usually map to underlying vendor implementation details. Most users can keep the defaults."
@@ -448,6 +452,27 @@
      "failed": "Synthesis failed"
    }
  },
+  "transcription": {
+    "title": "Transcription",
+    "emptyTitle": "No Transcription Providers",
+    "emptyDescription": "Add a transcription provider to enable speech-to-text for your bots",
+    "models": "Transcription Models",
+    "noModels": "No transcription models found. Import available models or keep the default template model.",
+    "noCapabilities": "No capabilities available for this model.",
+    "importModels": "Import Models",
+    "importSuccess": "Transcription models imported successfully",
+    "importFailed": "Failed to import transcription models",
+    "saveSuccess": "Transcription configuration saved",
+    "advanced": {
+      "title": "Advanced Settings",
+      "description": "These fields usually map to underlying vendor implementation details. Most users can keep the defaults."
+    },
+    "test": {
+      "title": "Test Transcription",
+      "run": "Transcribe",
+      "failed": "Transcription failed"
+    }
+  },
  "email": {
    "title": "Email",
    "add": "Add Email",
@@ -920,6 +945,8 @@
      "memoryHealthUnavailable": "Unavailable",
      "ttsModel": "TTS Model",
      "ttsModelPlaceholder": "Select TTS model",
+      "transcriptionModel": "Transcription Model",
+      "transcriptionModelPlaceholder": "Select transcription model",
      "imageModel": "Image Generation Model",
      "imageModelDescription": "Model used for the generate_image tool. Must support image-output compatibility.",
      "imageModelPlaceholder": "Select image model (optional)",
@@ -64,6 +64,7 @@
    "webSearch": "搜索",
    "memory": "记忆",
    "speech": "语音",
+    "transcription": "转写",
    "email": "邮件",
    "profile": "用户",
    "home": "首页",
@@ -421,6 +422,9 @@
    "noModels": "暂无模型，点击\"导入模型\"发现可用模型，或点击\"新建模型\"手动创建。",
    "noCapabilities": "该模型暂无可用能力信息。",
    "saveSuccess": "语音配置已保存",
+    "synthesis": {
+      "models": "语音合成模型"
+    },
    "advanced": {
      "title": "高级设置",
      "description": "这些字段通常对应底层服务商实现细节。大多数情况下保留默认值即可。"
@@ -444,6 +448,27 @@
      "failed": "合成失败"
    }
  },
+  "transcription": {
+    "title": "语音转写",
+    "emptyTitle": "暂无转写提供方",
+    "emptyDescription": "添加转写提供方以为 Bot 启用语音转文字功能",
+    "models": "语音识别模型",
+    "noModels": "暂无语音识别模型，可导入可用模型，或保留默认模板模型。",
+    "importModels": "导入模型",
+    "importSuccess": "识别模型导入成功",
+    "importFailed": "识别模型导入失败",
+    "saveSuccess": "转写配置已保存",
+    "noCapabilities": "该模型暂无可用能力信息。",
+    "advanced": {
+      "title": "高级设置",
+      "description": "这些字段通常对应底层服务商实现细节。大多数情况下保留默认值即可。"
+    },
+    "test": {
+      "title": "测试识别",
+      "run": "开始识别",
+      "failed": "识别失败"
+    }
+  },
  "email": {
    "title": "邮件提供方",
    "add": "添加邮件提供方",
@@ -916,6 +941,8 @@
      "memoryHealthUnavailable": "暂不可用",
      "ttsModel": "语音合成模型",
      "ttsModelPlaceholder": "选择语音合成模型",
+      "transcriptionModel": "转写模型",
+      "transcriptionModelPlaceholder": "选择语音转写模型",
      "imageModel": "图片生成模型",
      "imageModelDescription": "用于 generate_image 工具的模型，必须支持 image-output 兼容性。",
      "imageModelPlaceholder": "选择图片模型（可选）",
@@ -187,6 +187,17 @@
      />
    </div>

+    <!-- Transcription Model -->
+    <div class="space-y-2">
+      <Label>{{ $t('bots.settings.transcriptionModel') }}</Label>
+      <TtsModelSelect
+        v-model="form.transcription_model_id"
+        :models="transcriptionModels"
+        :providers="ttsProviders"
+        :placeholder="$t('bots.settings.transcriptionModelPlaceholder')"
+      />
+    </div>
+
    <!-- Image Generation Model -->
    <div class="space-y-2">
      <Label>{{ $t('bots.settings.imageModel') }}</Label>
@@ -356,7 +367,7 @@ import MemoryProviderSelect from './memory-provider-select.vue'
 import TtsModelSelect from './tts-model-select.vue'
 import BrowserContextSelect from './browser-context-select.vue'
 import { useQuery, useMutation, useQueryCache } from '@pinia/colada'
-import { getBotsById, putBotsById, getBotsByBotIdSettings, putBotsByBotIdSettings, deleteBotsById, getModels, getProviders, getSearchProviders, getMemoryProviders, getSpeechProviders, getSpeechModels, getBrowserContexts, getBotsByBotIdMemoryStatus, postBotsByBotIdMemoryRebuild } from '@memohai/sdk'
+import { getBotsById, putBotsById, getBotsByBotIdSettings, putBotsByBotIdSettings, deleteBotsById, getModels, getProviders, getSearchProviders, getMemoryProviders, getSpeechProviders, getSpeechModels, getTranscriptionProviders, getTranscriptionModels, getBrowserContexts, getBotsByBotIdMemoryStatus, postBotsByBotIdMemoryRebuild } from '@memohai/sdk'
 import type { SettingsSettings } from '@memohai/sdk'
 import type { Ref } from 'vue'
 import { resolveApiErrorMessage } from '@/utils/api-error'
@@ -440,6 +451,22 @@ const { data: ttsModelData } = useQuery({
  },
 })

+const { data: transcriptionModelData } = useQuery({
+  key: ['transcription-models'],
+  query: async () => {
+    const { data } = await getTranscriptionModels({ throwOnError: true })
+    return data
+  },
+})
+
+const { data: transcriptionProviderData } = useQuery({
+  key: ['transcription-providers'],
+  query: async () => {
+    const { data } = await getTranscriptionProviders({ throwOnError: true })
+    return data
+  },
+})
+
 const { data: browserContextData } = useQuery({
  key: ['all-browser-contexts'],
  query: async () => {
@@ -494,7 +521,10 @@ const searchProviders = computed(() => (searchProviderData.value ?? []).filter((
 const memoryProviders = computed(() => memoryProviderData.value ?? [])
 const ttsProviders = computed(() => (ttsProviderData.value ?? []).filter((p) => p.enable !== false))
 const enabledTtsProviderIds = computed(() => new Set(ttsProviders.value.map((p) => p.id)))
+const transcriptionProviders = computed(() => (transcriptionProviderData.value ?? []).filter((p: Record<string, unknown>) => p.enable !== false))
+const enabledTranscriptionProviderIds = computed(() => new Set(transcriptionProviders.value.map((p: Record<string, unknown>) => p.id as string)))
 const ttsModels = computed(() => (ttsModelData.value ?? []).filter((m: Record<string, unknown>) => enabledTtsProviderIds.value.has(m.provider_id as string)))
+const transcriptionModels = computed(() => (transcriptionModelData.value ?? []).filter((m: Record<string, unknown>) => enabledTranscriptionProviderIds.value.has(m.provider_id as string)))
 const browserContexts = computed(() => browserContextData.value ?? [])

 // ---- Form ----
@@ -505,6 +535,7 @@ const form = reactive({
  search_provider_id: '',
  memory_provider_id: '',
  tts_model_id: '',
+  transcription_model_id: '',
  browser_context_id: '',
  timezone: '',
  language: '',
@@ -644,6 +675,7 @@ watch(settings, (val) => {
    form.search_provider_id = val.search_provider_id ?? ''
    form.memory_provider_id = val.memory_provider_id ?? ''
    form.tts_model_id = val.tts_model_id ?? ''
+    form.transcription_model_id = val.transcription_model_id ?? ''
    form.browser_context_id = val.browser_context_id ?? ''
    form.language = val.language ?? ''
    form.timezone = val.timezone ?? ''
@@ -666,6 +698,7 @@ const hasSettingsChanges = computed(() => {
    || form.search_provider_id !== (s.search_provider_id ?? '')
    || form.memory_provider_id !== (s.memory_provider_id ?? '')
    || form.tts_model_id !== (s.tts_model_id ?? '')
+    || form.transcription_model_id !== (s.transcription_model_id ?? '')
    || form.browser_context_id !== (s.browser_context_id ?? '')
    || form.language !== (s.language ?? '')
    || form.timezone !== (s.timezone ?? '')
@@ -85,7 +85,7 @@
      v-else-if="advancedFields.length === 0"
      class="text-xs text-muted-foreground"
    >
-      {{ $t('speech.noCapabilities') }}
+      {{ mode === 'transcription' ? $t('transcription.noCapabilities') : $t('speech.noCapabilities') }}
    </div>

    <div
@@ -97,7 +97,7 @@
        class="flex w-full items-center justify-between px-3 py-2 text-left text-xs font-medium"
        @click="showAdvanced = !showAdvanced"
      >
-        <span>{{ $t('speech.advanced.title') }}</span>
+        <span>{{ mode === 'transcription' ? $t('transcription.advanced.title') : $t('speech.advanced.title') }}</span>
        <component
          :is="showAdvanced ? ChevronUp : ChevronDown"
          class="size-3 text-muted-foreground"
@@ -108,7 +108,7 @@
        class="space-y-4 border-t border-border px-3 py-3"
      >
        <p class="text-xs text-muted-foreground">
-          {{ $t('speech.advanced.description') }}
+          {{ mode === 'transcription' ? $t('transcription.advanced.description') : $t('speech.advanced.description') }}
        </p>
        <section
          v-for="field in advancedFields"
@@ -195,9 +195,12 @@

    <div class="space-y-3">
      <h4 class="text-xs font-medium">
-        {{ $t('speech.test.title') }}
+        {{ mode === 'transcription' ? $t('transcription.test.title') : $t('speech.test.title') }}
      </h4>
-      <div class="relative">
+      <div
+        v-if="mode === 'synthesis'"
+        class="relative"
+      >
        <Textarea
          v-model="testText"
          :placeholder="$t('speech.test.placeholder')"
@@ -209,17 +212,36 @@
          {{ testText.length }}/{{ maxTestTextLen }}
        </span>
      </div>
+      <div
+        v-else
+        class="space-y-2"
+      >
+        <Input
+          type="file"
+          accept="audio/*"
+          @change="handleFileChange"
+        />
+        <p
+          v-if="selectedFileName"
+          class="text-xs text-muted-foreground"
+        >
+          {{ selectedFileName }}
+        </p>
+      </div>
      <div class="flex items-center gap-3">
        <LoadingButton
          type="button"
          variant="outline"
          size="sm"
          :loading="testLoading"
-          :disabled="!testText.trim() || testText.length > maxTestTextLen"
+          :disabled="mode === 'synthesis' ? (!testText.trim() || testText.length > maxTestTextLen) : !selectedFile"
          @click="handleTest"
        >
-          <Play class="mr-1.5" />
-          {{ $t('speech.test.generate') }}
+          <Play
+            v-if="mode === 'synthesis'"
+            class="mr-1.5"
+          />
+          {{ mode === 'transcription' ? $t('transcription.test.run') : $t('speech.test.generate') }}
        </LoadingButton>
        <span
          v-if="testError"
@@ -229,7 +251,7 @@
        </span>
      </div>
      <div
-        v-if="audioUrl"
+        v-if="mode === 'synthesis' && audioUrl"
        class="rounded-md border border-border bg-muted/30 p-3"
      >
        <audio
@@ -239,6 +261,20 @@
          class="w-full"
        />
      </div>
+      <div
+        v-if="mode === 'transcription' && transcriptionText"
+        class="rounded-md border border-border bg-muted/30 p-3 space-y-2"
+      >
+        <p class="text-sm whitespace-pre-wrap wrap-break-word">
+          {{ transcriptionText }}
+        </p>
+        <p
+          v-if="transcriptionLanguage"
+          class="text-xs text-muted-foreground"
+        >
+          {{ transcriptionLanguage }}
+        </p>
+      </div>
    </div>

    <Separator class="my-3" />
@@ -296,7 +332,8 @@ const props = defineProps<{
  modelName: string
  config: Record<string, unknown>
  schema: SpeechConfigSchema | null
-  onTest: (text: string, config: Record<string, unknown>) => Promise<Blob>
+  mode?: 'synthesis' | 'transcription'
+  onTest: (payload: string | File, config: Record<string, unknown>) => Promise<Blob | { text?: string, language?: string }>
 }>()

 const emit = defineEmits<{
@@ -309,11 +346,16 @@ const visibleSecrets = reactive<Record<string, boolean>>({})
 const saving = ref(false)
 const showAdvanced = ref(false)
 const testText = ref('')
+const selectedFile = ref<File | null>(null)
+const selectedFileName = ref('')
 const testLoading = ref(false)
 const testError = ref('')
 const audioUrl = ref('')
+const transcriptionText = ref('')
+const transcriptionLanguage = ref('')
 const audioEl = ref<HTMLAudioElement>()
 const maxTestTextLen = 500
+const mode = computed(() => props.mode ?? 'synthesis')

 const orderedFields = computed(() => {
  const fields = props.schema?.fields ?? []
@@ -348,6 +390,11 @@ function revokeAudio() {
  }
 }

+function resetTranscription() {
+  transcriptionText.value = ''
+  transcriptionLanguage.value = ''
+}
+
 onBeforeUnmount(revokeAudio)

 async function handleSaveConfig() {
@@ -360,23 +407,39 @@ async function handleSaveConfig() {
 }

 async function handleTest() {
-  if (!testText.value.trim()) return
+  if (mode.value === 'synthesis' && !testText.value.trim()) return
+  if (mode.value === 'transcription' && !selectedFile.value) return
  testLoading.value = true
  testError.value = ''
  revokeAudio()
+  resetTranscription()

  try {
-    const blob = await props.onTest(testText.value, buildConfig())
+    const result = await props.onTest(mode.value === 'synthesis' ? testText.value : selectedFile.value as File, buildConfig())

-    audioUrl.value = URL.createObjectURL(blob)
-    await new Promise<void>((resolve) => setTimeout(resolve, 50))
-    audioEl.value?.play()
+    if (mode.value === 'synthesis') {
+      const blob = result as Blob
+      audioUrl.value = URL.createObjectURL(blob)
+      await new Promise<void>((resolve) => setTimeout(resolve, 50))
+      audioEl.value?.play()
+    } else {
+      const payload = result as { text?: string, language?: string }
+      transcriptionText.value = payload.text ?? ''
+      transcriptionLanguage.value = payload.language ?? ''
+    }
  } catch (error: unknown) {
-    const msg = error instanceof Error ? error.message : t('speech.test.failed')
+    const msg = error instanceof Error ? error.message : t(mode.value === 'transcription' ? 'transcription.test.failed' : 'speech.test.failed')
    testError.value = msg
    toast.error(msg)
  } finally {
    testLoading.value = false
  }
 }
+
+function handleFileChange(event: Event) {
+  const input = event.target as HTMLInputElement
+  const file = input.files?.[0] ?? null
+  selectedFile.value = file
+  selectedFileName.value = file?.name ?? ''
+}
 </script>
@@ -138,18 +138,29 @@
    <section>
      <div class="flex justify-between items-center mb-4">
        <h3 class="text-xs font-medium">
-          {{ $t('speech.models') }}
+          {{ $t('speech.synthesis.models') }}
        </h3>
-        <LoadingButton
+        <div
          v-if="curProviderId"
-          type="button"
-          variant="outline"
-          size="sm"
-          :loading="importLoading"
-          @click="handleImportModels"
+          class="flex items-center gap-2"
        >
-          {{ $t('speech.importModels') }}
-        </LoadingButton>
+          <LoadingButton
+            type="button"
+            variant="outline"
+            size="sm"
+            :loading="importLoading"
+            @click="handleImportModels"
+          >
+            {{ $t('speech.importModels') }}
+          </LoadingButton>
+          <CreateModel
+            :id="curProviderId"
+            default-type="speech"
+            hide-type
+            :type-options="speechTypeOptions"
+            :invalidate-keys="['speech-provider-models', 'speech-models']"
+          />
+        </div>
      </div>

      <div
@@ -191,7 +202,7 @@
            :model-name="model.model_id ?? ''"
            :config="model.config || {}"
            :schema="getModelSchema(model.model_id ?? '')"
-            :on-test="(text, cfg) => handleTestModel(model.id ?? '', text, cfg)"
+            :on-test="(text, cfg) => handleTestModel(model.id ?? '', text as string, cfg)"
            @save="(cfg) => handleSaveModel(model.id ?? '', cfg)"
          />
        </div>
@@ -218,10 +229,11 @@ import { computed, inject, reactive, ref, watch } from 'vue'
 import { toast } from 'vue-sonner'
 import { useI18n } from 'vue-i18n'
 import { useQuery, useQueryCache } from '@pinia/colada'
-import { getSpeechProvidersById, getSpeechProvidersByIdModels, getSpeechProvidersMeta, postSpeechProvidersByIdImportModels, putModelsById, putProvidersById } from '@memohai/sdk'
+import { getSpeechProvidersById, getSpeechProvidersByIdModels, getSpeechProvidersMeta, postSpeechProvidersByIdImportModels, putProvidersById } from '@memohai/sdk'
 import type { TtsSpeechModelResponse, TtsSpeechProviderResponse } from '@memohai/sdk'
 import LoadingButton from '@/components/loading-button/index.vue'
 import ProviderIcon from '@/components/provider-icon/index.vue'
+import CreateModel from '@/components/create-model/index.vue'

 interface SpeechFieldSchema {
  key: string
@@ -256,6 +268,8 @@ interface SpeechProviderMeta {
  config_schema?: SpeechConfigSchema
  default_model?: string
  models?: SpeechModelMeta[]
+  default_synthesis_model?: string
+  synthesis_models?: SpeechModelMeta[]
 }

 function getInitials(name: string | undefined) {
@@ -274,6 +288,9 @@ const enableLoading = ref(false)
 const saveLoading = ref(false)
 const importLoading = ref(false)
 const queryCache = useQueryCache()
+const speechTypeOptions = [
+  { value: 'speech', label: 'Speech' },
+]

 const { data: providerDetail } = useQuery({
  key: () => ['speech-provider-detail', curProviderId.value],
@@ -297,7 +314,7 @@ const { data: metaList } = useQuery({

 const currentMeta = computed(() => {
  if (!metaList.value || !curProvider.value?.client_type) return null
-  return (metaList.value as SpeechProviderMeta[]).find((m) => m.provider === curProvider.value?.client_type) ?? null
+  return (metaList.value as SpeechProviderMeta[]).find(m => m.provider === curProvider.value?.client_type) ?? null
 })

 const orderedProviderFields = computed(() => {
@@ -317,9 +334,7 @@ const { data: providerSpeechModels } = useQuery({
  },
 })

-const providerModels = computed(() => {
-  return (providerSpeechModels.value as TtsSpeechModelResponse[] | undefined) ?? []
-})
+const providerModels = computed(() => ((providerSpeechModels.value as TtsSpeechModelResponse[] | undefined) ?? []))

 watch(() => providerDetail.value, (provider) => {
  providerName.value = provider?.name ?? curProvider.value?.name ?? ''
@@ -328,12 +343,11 @@ watch(() => providerDetail.value, (provider) => {
 }, { immediate: true, deep: true })

 function getModelMeta(modelID: string): SpeechModelMeta | null {
-  const models = currentMeta.value?.models ?? []
+  const models = currentMeta.value?.synthesis_models ?? currentMeta.value?.models ?? []
  const exact = models.find(m => m.id === modelID)
  if (exact) return exact
-  if (currentMeta.value?.default_model) {
-    return models.find(m => m.id === currentMeta.value?.default_model) ?? null
-  }
+  const defaultModel = currentMeta.value?.default_synthesis_model ?? currentMeta.value?.default_model
+  if (defaultModel) return models.find(m => m.id === defaultModel) ?? null
  return models[0] ?? null
 }

@@ -398,20 +412,23 @@ async function handleSaveProvider() {
 }

 async function handleSaveModel(modelId: string, config: Record<string, unknown>) {
-  const model = providerModels.value.find((item) => item.id === modelId)
+  const model = providerModels.value.find(item => item.id === modelId)
  if (!model) return
  try {
-    await putModelsById({
-      path: { id: modelId },
-      body: {
-        model_id: model.model_id,
-        name: model.name ?? model.model_id,
-        provider_id: model.provider_id,
-        type: 'speech',
-        config,
+    const apiBase = import.meta.env.VITE_API_URL?.trim() || '/api'
+    const token = localStorage.getItem('token')
+    const resp = await fetch(`${apiBase}/speech-models/${modelId}`, {
+      method: 'PUT',
+      headers: {
+        'Content-Type': 'application/json',
+        ...(token ? { Authorization: `Bearer ${token}` } : {}),
      },
-      throwOnError: true,
+      body: JSON.stringify({
+        name: model.name ?? model.model_id,
+        config,
+      }),
    })
+    if (!resp.ok) throw new Error(await resp.text())
    toast.success(t('speech.saveSuccess'))
    queryCache.invalidateQueries({ key: ['speech-provider-models', curProviderId.value] })
    queryCache.invalidateQueries({ key: ['speech-models'] })
@@ -0,0 +1,126 @@
+<script setup lang="ts">
+import { computed, ref, provide, watch } from 'vue'
+import { useQuery } from '@pinia/colada'
+import {
+  ScrollArea,
+  SidebarMenu,
+  SidebarMenuButton,
+  SidebarMenuItem,
+  Toggle,
+  Empty,
+  EmptyDescription,
+  EmptyHeader,
+  EmptyMedia,
+  EmptyTitle,
+} from '@memohai/ui'
+import { getTranscriptionProviders } from '@memohai/sdk'
+import type { AudioSpeechProviderResponse } from '@memohai/sdk'
+import ProviderSetting from './provider-setting.vue'
+import { AudioLines } from 'lucide-vue-next'
+import MasterDetailSidebarLayout from '@/components/master-detail-sidebar-layout/index.vue'
+import ProviderIcon from '@/components/provider-icon/index.vue'
+
+function getInitials(name: string | undefined) {
+  const label = name?.trim() ?? ''
+  return label ? label.slice(0, 2).toUpperCase() : '?'
+}
+
+const { data: providerData } = useQuery({
+  key: () => ['transcription-providers'],
+  query: async () => {
+    const { data } = await getTranscriptionProviders({ throwOnError: true })
+    return (data ?? []) as AudioSpeechProviderResponse[]
+  },
+})
+const curProvider = ref<AudioSpeechProviderResponse>()
+provide('curTranscriptionProvider', curProvider)
+
+const selectProvider = (name: string) => computed(() => curProvider.value?.name === name)
+
+const filteredProviders = computed(() => {
+  if (!Array.isArray(providerData.value)) return []
+  return [...providerData.value].sort((a, b) => Number(b.enable !== false) - Number(a.enable !== false))
+})
+
+watch(filteredProviders, (list) => {
+  if (!list || list.length === 0) {
+    curProvider.value = { id: '' }
+    return
+  }
+  const currentId = curProvider.value?.id
+  if (currentId) {
+    const stillExists = list.find(p => p.id === currentId)
+    if (stillExists) {
+      curProvider.value = stillExists
+      return
+    }
+  }
+  curProvider.value = list[0]
+}, { immediate: true })
+</script>
+
+<template>
+  <MasterDetailSidebarLayout>
+    <template #sidebar-content>
+      <SidebarMenu
+        v-for="item in filteredProviders"
+        :key="item.id"
+      >
+        <SidebarMenuItem>
+          <SidebarMenuButton
+            as-child
+            class="justify-start py-5! px-4"
+          >
+            <Toggle
+              :class="['py-4 border', curProvider?.id === item.id ? 'border-border' : 'border-transparent']"
+              :model-value="selectProvider(item.name ?? '').value"
+              @update:model-value="(isSelect) => { if (isSelect) curProvider = item }"
+            >
+              <span class="relative shrink-0">
+                <span class="flex size-7 items-center justify-center rounded-full bg-muted">
+                  <ProviderIcon
+                    v-if="item.icon"
+                    :icon="item.icon"
+                    size="1.25em"
+                  />
+                  <span
+                    v-else
+                    class="text-xs font-medium text-muted-foreground"
+                  >
+                    {{ getInitials(item.name) }}
+                  </span>
+                </span>
+                <span
+                  v-if="item.enable !== false"
+                  class="absolute -bottom-0.5 -right-0.5 size-2.5 rounded-full bg-green-500 ring-2 ring-background"
+                />
+              </span>
+              <span class="truncate">{{ item.name }}</span>
+            </Toggle>
+          </SidebarMenuButton>
+        </SidebarMenuItem>
+      </SidebarMenu>
+    </template>
+
+    <template #detail>
+      <ScrollArea
+        v-if="curProvider?.id"
+        class="max-h-full h-full"
+      >
+        <ProviderSetting />
+      </ScrollArea>
+      <Empty
+        v-else
+        class="h-full flex justify-center items-center"
+      >
+        <EmptyHeader>
+          <EmptyMedia variant="icon">
+            <AudioLines />
+          </EmptyMedia>
+        </EmptyHeader>
+        <EmptyTitle>{{ $t('transcription.emptyTitle') }}</EmptyTitle>
+        <EmptyDescription>{{ $t('transcription.emptyDescription') }}</EmptyDescription>
+      </Empty>
+    </template>
+  </MasterDetailSidebarLayout>
+</template>
@@ -0,0 +1,480 @@
+<template>
+  <div class="p-4">
+    <section class="flex items-center gap-3">
+      <span class="flex size-10 shrink-0 items-center justify-center rounded-full bg-muted">
+        <ProviderIcon
+          v-if="curProvider?.icon"
+          :icon="curProvider.icon"
+          size="1.5em"
+        />
+        <span
+          v-else
+          class="text-xs font-medium text-muted-foreground"
+        >
+          {{ getInitials(curProvider?.name) }}
+        </span>
+      </span>
+      <div class="min-w-0">
+        <h2 class="text-sm font-semibold truncate">
+          {{ curProvider?.name }}
+        </h2>
+        <p class="text-xs text-muted-foreground">
+          {{ currentMeta?.display_name ?? curProvider?.client_type }}
+        </p>
+      </div>
+      <div class="ml-auto flex items-center gap-2">
+        <span class="text-xs text-muted-foreground">
+          {{ $t('common.enable') }}
+        </span>
+        <Switch
+          :model-value="curProvider?.enable ?? false"
+          :disabled="!curProvider?.id || enableLoading"
+          @update:model-value="handleToggleEnable"
+        />
+      </div>
+    </section>
+    <Separator class="mt-4 mb-6" />
+
+    <form
+      class="space-y-4"
+      @submit.prevent="handleSaveProvider"
+    >
+      <section class="space-y-2">
+        <Label for="transcription-provider-name">{{ $t('common.name') }}</Label>
+        <Input
+          id="transcription-provider-name"
+          v-model="providerName"
+          type="text"
+          :placeholder="$t('common.namePlaceholder')"
+        />
+      </section>
+
+      <section
+        v-for="field in orderedProviderFields"
+        :key="field.key"
+        class="space-y-2"
+      >
+        <Label :for="field.type === 'bool' || field.type === 'enum' ? undefined : `transcription-provider-${field.key}`">
+          {{ field.title || field.key }}
+        </Label>
+        <p
+          v-if="field.description"
+          class="text-xs text-muted-foreground"
+        >
+          {{ field.description }}
+        </p>
+        <div
+          v-if="field.type === 'secret'"
+          class="relative"
+        >
+          <Input
+            :id="`transcription-provider-${field.key}`"
+            v-model="providerConfig[field.key] as string"
+            :type="visibleSecrets[field.key] ? 'text' : 'password'"
+          />
+          <button
+            type="button"
+            class="absolute right-2 top-1/2 -translate-y-1/2 text-muted-foreground hover:text-foreground"
+            @click="visibleSecrets[field.key] = !visibleSecrets[field.key]"
+          >
+            <component
+              :is="visibleSecrets[field.key] ? EyeOff : Eye"
+              class="size-3.5"
+            />
+          </button>
+        </div>
+        <Switch
+          v-else-if="field.type === 'bool'"
+          :model-value="!!providerConfig[field.key]"
+          @update:model-value="(val) => providerConfig[field.key] = !!val"
+        />
+        <Input
+          v-else-if="field.type === 'number'"
+          :id="`transcription-provider-${field.key}`"
+          v-model.number="providerConfig[field.key] as number"
+          type="number"
+        />
+        <Select
+          v-else-if="field.type === 'enum' && field.enum"
+          :model-value="String(providerConfig[field.key] ?? '')"
+          @update:model-value="(val) => providerConfig[field.key] = val"
+        >
+          <SelectTrigger>
+            <SelectValue :placeholder="field.title || field.key" />
+          </SelectTrigger>
+          <SelectContent>
+            <SelectItem
+              v-for="opt in field.enum"
+              :key="opt"
+              :value="opt"
+            >
+              {{ opt }}
+            </SelectItem>
+          </SelectContent>
+        </Select>
+        <Input
+          v-else
+          :id="`transcription-provider-${field.key}`"
+          v-model="providerConfig[field.key] as string"
+          type="text"
+        />
+      </section>
+
+      <div class="flex justify-end">
+        <LoadingButton
+          type="submit"
+          :loading="saveLoading"
+        >
+          {{ $t('provider.saveChanges') }}
+        </LoadingButton>
+      </div>
+    </form>
+
+    <Separator class="mt-6 mb-6" />
+
+    <section>
+      <div class="flex justify-between items-center mb-4">
+        <h3 class="text-xs font-medium">
+          {{ $t('transcription.models') }}
+        </h3>
+        <div
+          v-if="curProviderId"
+          class="flex items-center gap-2"
+        >
+          <LoadingButton
+            type="button"
+            variant="outline"
+            size="sm"
+            :loading="importLoading"
+            @click="handleImportModels"
+          >
+            {{ $t('transcription.importModels') }}
+          </LoadingButton>
+          <CreateModel
+            :id="curProviderId"
+            default-type="transcription"
+            hide-type
+            :type-options="transcriptionTypeOptions"
+            :invalidate-keys="['transcription-provider-models', 'transcription-models']"
+          />
+        </div>
+      </div>
+
+      <div
+        v-if="providerModels.length === 0"
+        class="text-xs text-muted-foreground py-4 text-center"
+      >
+        {{ $t('transcription.noModels') }}
+      </div>
+
+      <div
+        v-for="model in providerModels"
+        :key="model.id"
+        class="border border-border rounded-lg mb-4"
+      >
+        <button
+          type="button"
+          class="w-full flex items-center justify-between p-3 text-left hover:bg-accent/50 rounded-t-lg transition-colors"
+          @click="toggleModel(model.id ?? '')"
+        >
+          <div>
+            <span class="text-xs font-medium">{{ model.name || model.model_id }}</span>
+            <span
+              v-if="model.name"
+              class="text-xs text-muted-foreground ml-2"
+            >
+              {{ model.model_id }}
+            </span>
+          </div>
+          <component
+            :is="expandedModelId === model.id ? ChevronUp : ChevronDown"
+            class="size-3 text-muted-foreground"
+          />
+        </button>
+        <div
+          v-if="expandedModelId === model.id"
+          class="px-3 pb-3 space-y-4 border-t border-border pt-3"
+        >
+          <ModelConfigEditor
+            :model-id="model.id ?? ''"
+            :model-name="model.model_id ?? ''"
+            :config="model.config || {}"
+            :schema="getModelSchema(model.model_id ?? '')"
+            mode="transcription"
+            :on-test="(file, cfg) => handleTestModel(model.id ?? '', file as File, cfg)"
+            @save="(cfg) => handleSaveModel(model.id ?? '', cfg)"
+          />
+        </div>
+      </div>
+    </section>
+  </div>
+</template>
+
+<script setup lang="ts">
+import { computed, inject, reactive, ref, watch } from 'vue'
+import { useQuery, useQueryCache } from '@pinia/colada'
+import { toast } from 'vue-sonner'
+import { useI18n } from 'vue-i18n'
+import {
+  getTranscriptionProvidersById,
+  getTranscriptionProvidersMeta,
+  getTranscriptionProvidersByIdModels,
+  postTranscriptionProvidersByIdImportModels,
+  postTranscriptionModelsByIdTest,
+  putProvidersById,
+  putTranscriptionModelsById,
+} from '@memohai/sdk'
+import type {
+  AudioProviderMetaResponse,
+  AudioSpeechProviderResponse,
+  AudioTestTranscriptionResponse,
+  AudioTranscriptionModelResponse,
+} from '@memohai/sdk'
+import { ChevronDown, ChevronUp, Eye, EyeOff } from 'lucide-vue-next'
+import { Input, Label, Select, SelectContent, SelectItem, SelectTrigger, SelectValue, Separator, Switch } from '@memohai/ui'
+import ProviderIcon from '@/components/provider-icon/index.vue'
+import LoadingButton from '@/components/loading-button/index.vue'
+import ModelConfigEditor from '@/pages/speech/components/model-config-editor.vue'
+import CreateModel from '@/components/create-model/index.vue'
+
+interface FieldSchema { key: string, type: string, title?: string, description?: string, enum?: string[], order?: number }
+interface ConfigSchema { fields?: FieldSchema[] }
+interface ModelMeta { id: string, name: string, config_schema?: ConfigSchema, capabilities?: { config_schema?: ConfigSchema } }
+interface ProviderMeta {
+  provider: string
+  display_name?: string
+  config_schema?: ConfigSchema
+  default_transcription_model?: string
+  transcription_models?: ModelMeta[]
+  models?: ModelMeta[]
+}
+
+function getInitials(name: string | undefined) {
+  const label = name?.trim() ?? ''
+  return label ? label.slice(0, 2).toUpperCase() : '?'
+}
+
+function normalizeConfigSchema(schema?: AudioProviderMetaResponse['config_schema']): ConfigSchema | undefined {
+  if (!schema) return undefined
+  const fields: FieldSchema[] = []
+  for (const field of schema.fields ?? []) {
+    if (!field?.key || !field.type) continue
+    fields.push({
+      key: field.key,
+      type: field.type,
+      title: field.title,
+      description: field.description,
+      enum: field.enum,
+      order: field.order,
+    })
+  }
+  return { fields }
+}
+
+function normalizeModelMeta(model: NonNullable<AudioProviderMetaResponse['models']>[number]): ModelMeta | null {
+  if (!model?.id) return null
+  return {
+    id: model.id,
+    name: model.name ?? model.id,
+    config_schema: normalizeConfigSchema(model.config_schema),
+    capabilities: model.capabilities
+      ? { config_schema: normalizeConfigSchema(model.capabilities.config_schema) }
+      : undefined,
+  }
+}
+
+function normalizeProviderMeta(meta: AudioProviderMetaResponse): ProviderMeta {
+  return {
+    provider: meta.provider ?? '',
+    display_name: meta.display_name,
+    config_schema: normalizeConfigSchema(meta.config_schema),
+    default_transcription_model: meta.default_transcription_model,
+    transcription_models: (meta.transcription_models ?? [])
+      .map(normalizeModelMeta)
+      .filter((model): model is ModelMeta => model !== null),
+    models: (meta.models ?? [])
+      .map(normalizeModelMeta)
+      .filter((model): model is ModelMeta => model !== null),
+  }
+}
+
+const { t } = useI18n()
+const curProvider = inject('curTranscriptionProvider', ref<AudioSpeechProviderResponse>())
+const curProviderId = computed(() => curProvider.value?.id)
+const providerName = ref('')
+const providerConfig = reactive<Record<string, unknown>>({})
+const visibleSecrets = reactive<Record<string, boolean>>({})
+const expandedModelId = ref('')
+const enableLoading = ref(false)
+const saveLoading = ref(false)
+const importLoading = ref(false)
+const queryCache = useQueryCache()
+const transcriptionTypeOptions = [
+  { value: 'transcription', label: 'Transcription' },
+]
+
+const { data: providerDetail } = useQuery({
+  key: () => ['transcription-provider-detail', curProviderId.value ?? ''],
+  query: async () => {
+    if (!curProviderId.value) return null
+    const { data } = await getTranscriptionProvidersById({
+      path: { id: curProviderId.value },
+      throwOnError: true,
+    })
+    return (data ?? null) as AudioSpeechProviderResponse | null
+  },
+})
+
+const { data: metaList } = useQuery({
+  key: () => ['transcription-providers-meta'],
+  query: async () => {
+    const { data } = await getTranscriptionProvidersMeta({ throwOnError: true })
+    return (data ?? []).map(normalizeProviderMeta)
+  },
+})
+
+const currentMeta = computed(() => (metaList.value ?? []).find(m => m.provider === curProvider.value?.client_type) ?? null)
+const orderedProviderFields = computed(() => [...(currentMeta.value?.config_schema?.fields ?? [])].sort((a, b) => (a.order ?? 0) - (b.order ?? 0)))
+
+const { data: providerModelData } = useQuery({
+  key: () => ['transcription-provider-models', curProviderId.value ?? ''],
+  query: async () => {
+    if (!curProviderId.value) return []
+    const { data } = await getTranscriptionProvidersByIdModels({
+      path: { id: curProviderId.value },
+      throwOnError: true,
+    })
+    return (data ?? []) as AudioTranscriptionModelResponse[]
+  },
+})
+
+const providerModels = computed(() => providerModelData.value ?? [])
+
+watch(() => providerDetail.value, (provider) => {
+  providerName.value = provider?.name ?? curProvider.value?.name ?? ''
+  Object.keys(providerConfig).forEach((key) => delete providerConfig[key])
+  Object.assign(providerConfig, { ...(provider?.config ?? {}) })
+}, { immediate: true, deep: true })
+
+function getModelSchema(modelID: string): ConfigSchema | null {
+  const models = currentMeta.value?.transcription_models ?? currentMeta.value?.models ?? []
+  const exact = models.find(m => m.id === modelID)
+  const fallback = exact ?? models.find(m => m.id === currentMeta.value?.default_transcription_model) ?? models[0]
+  return fallback?.config_schema ?? fallback?.capabilities?.config_schema ?? null
+}
+
+function toggleModel(id: string) {
+  expandedModelId.value = expandedModelId.value === id ? '' : id
+}
+
+async function handleToggleEnable(value: boolean) {
+  if (!curProviderId.value || !curProvider.value?.client_type) return
+  const prev = curProvider.value.enable ?? false
+  curProvider.value = { ...curProvider.value, enable: value }
+  enableLoading.value = true
+  try {
+    await putProvidersById({
+      path: { id: curProviderId.value },
+      body: {
+        name: providerName.value.trim() || curProvider.value.name || '',
+        client_type: curProvider.value.client_type,
+        enable: value,
+        config: sanitizeConfig(providerConfig),
+      },
+      throwOnError: true,
+    })
+    queryCache.invalidateQueries({ key: ['transcription-providers'] })
+    queryCache.invalidateQueries({ key: ['transcription-provider-detail', curProviderId.value ?? ''] })
+  } catch {
+    curProvider.value = { ...curProvider.value, enable: prev }
+    toast.error(t('common.saveFailed'))
+  } finally {
+    enableLoading.value = false
+  }
+}
+
+async function handleSaveProvider() {
+  if (!curProviderId.value || !curProvider.value?.client_type) return
+  saveLoading.value = true
+  try {
+    await putProvidersById({
+      path: { id: curProviderId.value },
+      body: {
+        name: providerName.value.trim() || curProvider.value.name || '',
+        client_type: curProvider.value.client_type,
+        enable: curProvider.value.enable,
+        config: sanitizeConfig(providerConfig),
+      },
+      throwOnError: true,
+    })
+    toast.success(t('transcription.saveSuccess'))
+    queryCache.invalidateQueries({ key: ['transcription-providers'] })
+    queryCache.invalidateQueries({ key: ['transcription-provider-detail', curProviderId.value ?? ''] })
+  } catch {
+    toast.error(t('common.saveFailed'))
+  } finally {
+    saveLoading.value = false
+  }
+}
+
+async function handleSaveModel(modelId: string, config: Record<string, unknown>) {
+  const model = providerModels.value.find(item => item.id === modelId)
+  if (!model) return
+  try {
+    await putTranscriptionModelsById({
+      path: { id: modelId },
+      body: { name: model.name ?? model.model_id ?? modelId, config },
+      throwOnError: true,
+    })
+    toast.success(t('transcription.saveSuccess'))
+    queryCache.invalidateQueries({ key: ['transcription-provider-models', curProviderId.value ?? ''] })
+    queryCache.invalidateQueries({ key: ['transcription-models'] })
+  } catch {
+    toast.error(t('common.saveFailed'))
+  }
+}
+
+async function handleImportModels() {
+  if (!curProviderId.value) return
+  importLoading.value = true
+  try {
+    const { data } = await postTranscriptionProvidersByIdImportModels({
+      path: { id: curProviderId.value },
+      throwOnError: true,
+    })
+    const payload = (data ?? {}) as { created?: number, skipped?: number }
+    toast.success(t('transcription.importSuccess', {
+      created: payload.created ?? 0,
+      skipped: payload.skipped ?? 0,
+    }))
+    queryCache.invalidateQueries({ key: ['transcription-provider-models', curProviderId.value ?? ''] })
+    queryCache.invalidateQueries({ key: ['transcription-models'] })
+    queryCache.invalidateQueries({ key: ['transcription-providers-meta'] })
+  } catch {
+    toast.error(t('transcription.importFailed'))
+  } finally {
+    importLoading.value = false
+  }
+}
+
+async function handleTestModel(modelId: string, file: File, config: Record<string, unknown>) {
+  const { data } = await postTranscriptionModelsByIdTest({
+    path: { id: modelId },
+    body: {
+      file,
+      config: JSON.stringify(config),
+    },
+    throwOnError: true,
+  })
+  return (data ?? {}) as AudioTestTranscriptionResponse
+}
+
+function sanitizeConfig(input: Record<string, unknown>) {
+  const result: Record<string, unknown> = {}
+  for (const [key, value] of Object.entries(input)) {
+    if (value === '' || value == null) continue
+    result[key] = value
+  }
+  return result
+}
+</script>
@@ -89,6 +89,14 @@ const routes = [
          breadcrumb: i18nRef('sidebar.speech'),
        },
      },
+      {
+        name: 'transcription',
+        path: 'transcription',
+        component: () => import('@/pages/transcription/index.vue'),
+        meta: {
+          breadcrumb: i18nRef('sidebar.transcription'),
+        },
+      },
      {
        name: 'email',
        path: 'email',