Feat/speech support (#392)

* feat: expand speech provider support with new client types and configuration schema * feat: add icon support for speech providers and update related configurations * feat: add SVG support for Deepgram and Elevenlabs with Vue components * feat: except *-speech client type in llm provider * feat: enhance speech provider functionality with advanced settings and model import capabilities * chore: remove go.mod replace * feat: enhance speech provider functionality with advanced settings and model import capabilities * chore: update go module dependencies * feat: Ear and Mouth * fix: separate ear/mouth page * fix: separate audio domain and restore transcription templates Move speech and transcription internals into the audio domain, restore template-driven transcription providers, and regenerate Swagger/SDK so the frontend can stop hand-calling /transcription-* APIs. --------- Co-authored-by: aki <arisu@ieee.org>
2026-04-27 07:16:19 +09:00 · 2026-04-22 00:09:46 +08:00
parent 8d78925a23
commit c9dcfe287f
70 changed files with 6612 additions and 1692 deletions
@@ -138,18 +138,29 @@
    <section>
      <div class="flex justify-between items-center mb-4">
        <h3 class="text-xs font-medium">
-          {{ $t('speech.models') }}
+          {{ $t('speech.synthesis.models') }}
        </h3>
-        <LoadingButton
+        <div
          v-if="curProviderId"
-          type="button"
-          variant="outline"
-          size="sm"
-          :loading="importLoading"
-          @click="handleImportModels"
+          class="flex items-center gap-2"
        >
-          {{ $t('speech.importModels') }}
-        </LoadingButton>
+          <LoadingButton
+            type="button"
+            variant="outline"
+            size="sm"
+            :loading="importLoading"
+            @click="handleImportModels"
+          >
+            {{ $t('speech.importModels') }}
+          </LoadingButton>
+          <CreateModel
+            :id="curProviderId"
+            default-type="speech"
+            hide-type
+            :type-options="speechTypeOptions"
+            :invalidate-keys="['speech-provider-models', 'speech-models']"
+          />
+        </div>
      </div>

      <div
@@ -191,7 +202,7 @@
            :model-name="model.model_id ?? ''"
            :config="model.config || {}"
            :schema="getModelSchema(model.model_id ?? '')"
-            :on-test="(text, cfg) => handleTestModel(model.id ?? '', text, cfg)"
+            :on-test="(text, cfg) => handleTestModel(model.id ?? '', text as string, cfg)"
            @save="(cfg) => handleSaveModel(model.id ?? '', cfg)"
          />
        </div>
@@ -218,10 +229,11 @@ import { computed, inject, reactive, ref, watch } from 'vue'
 import { toast } from 'vue-sonner'
 import { useI18n } from 'vue-i18n'
 import { useQuery, useQueryCache } from '@pinia/colada'
-import { getSpeechProvidersById, getSpeechProvidersByIdModels, getSpeechProvidersMeta, postSpeechProvidersByIdImportModels, putModelsById, putProvidersById } from '@memohai/sdk'
+import { getSpeechProvidersById, getSpeechProvidersByIdModels, getSpeechProvidersMeta, postSpeechProvidersByIdImportModels, putProvidersById } from '@memohai/sdk'
 import type { TtsSpeechModelResponse, TtsSpeechProviderResponse } from '@memohai/sdk'
 import LoadingButton from '@/components/loading-button/index.vue'
 import ProviderIcon from '@/components/provider-icon/index.vue'
+import CreateModel from '@/components/create-model/index.vue'

 interface SpeechFieldSchema {
  key: string
@@ -256,6 +268,8 @@ interface SpeechProviderMeta {
  config_schema?: SpeechConfigSchema
  default_model?: string
  models?: SpeechModelMeta[]
+  default_synthesis_model?: string
+  synthesis_models?: SpeechModelMeta[]
 }

 function getInitials(name: string | undefined) {
@@ -274,6 +288,9 @@ const enableLoading = ref(false)
 const saveLoading = ref(false)
 const importLoading = ref(false)
 const queryCache = useQueryCache()
+const speechTypeOptions = [
+  { value: 'speech', label: 'Speech' },
+]

 const { data: providerDetail } = useQuery({
  key: () => ['speech-provider-detail', curProviderId.value],
@@ -297,7 +314,7 @@ const { data: metaList } = useQuery({

 const currentMeta = computed(() => {
  if (!metaList.value || !curProvider.value?.client_type) return null
-  return (metaList.value as SpeechProviderMeta[]).find((m) => m.provider === curProvider.value?.client_type) ?? null
+  return (metaList.value as SpeechProviderMeta[]).find(m => m.provider === curProvider.value?.client_type) ?? null
 })

 const orderedProviderFields = computed(() => {
@@ -317,9 +334,7 @@ const { data: providerSpeechModels } = useQuery({
  },
 })

-const providerModels = computed(() => {
-  return (providerSpeechModels.value as TtsSpeechModelResponse[] | undefined) ?? []
-})
+const providerModels = computed(() => ((providerSpeechModels.value as TtsSpeechModelResponse[] | undefined) ?? []))

 watch(() => providerDetail.value, (provider) => {
  providerName.value = provider?.name ?? curProvider.value?.name ?? ''
@@ -328,12 +343,11 @@ watch(() => providerDetail.value, (provider) => {
 }, { immediate: true, deep: true })

 function getModelMeta(modelID: string): SpeechModelMeta | null {
-  const models = currentMeta.value?.models ?? []
+  const models = currentMeta.value?.synthesis_models ?? currentMeta.value?.models ?? []
  const exact = models.find(m => m.id === modelID)
  if (exact) return exact
-  if (currentMeta.value?.default_model) {
-    return models.find(m => m.id === currentMeta.value?.default_model) ?? null
-  }
+  const defaultModel = currentMeta.value?.default_synthesis_model ?? currentMeta.value?.default_model
+  if (defaultModel) return models.find(m => m.id === defaultModel) ?? null
  return models[0] ?? null
 }

@@ -398,20 +412,23 @@ async function handleSaveProvider() {
 }

 async function handleSaveModel(modelId: string, config: Record<string, unknown>) {
-  const model = providerModels.value.find((item) => item.id === modelId)
+  const model = providerModels.value.find(item => item.id === modelId)
  if (!model) return
  try {
-    await putModelsById({
-      path: { id: modelId },
-      body: {
-        model_id: model.model_id,
-        name: model.name ?? model.model_id,
-        provider_id: model.provider_id,
-        type: 'speech',
-        config,
+    const apiBase = import.meta.env.VITE_API_URL?.trim() || '/api'
+    const token = localStorage.getItem('token')
+    const resp = await fetch(`${apiBase}/speech-models/${modelId}`, {
+      method: 'PUT',
+      headers: {
+        'Content-Type': 'application/json',
+        ...(token ? { Authorization: `Bearer ${token}` } : {}),
      },
-      throwOnError: true,
+      body: JSON.stringify({
+        name: model.name ?? model.model_id,
+        config,
+      }),
    })
+    if (!resp.ok) throw new Error(await resp.text())
    toast.success(t('speech.saveSuccess'))
    queryCache.invalidateQueries({ key: ['speech-provider-models', curProviderId.value] })
    queryCache.invalidateQueries({ key: ['speech-models'] })