Feat/speech support (#392)

* feat: expand speech provider support with new client types and configuration schema

* feat: add icon support for speech providers and update related configurations

* feat: add SVG support for Deepgram and Elevenlabs with Vue components

* feat: except *-speech client type in llm provider

* feat: enhance speech provider functionality with advanced settings and model import capabilities

* chore: remove go.mod replace

* feat: enhance speech provider functionality with advanced settings and model import capabilities

* chore: update go module dependencies

* feat: Ear and Mouth

* fix: separate ear/mouth page

* fix: separate audio domain and restore transcription templates

Move speech and transcription internals into the audio domain, restore template-driven transcription providers, and regenerate Swagger/SDK so the frontend can stop hand-calling /transcription-* APIs.

---------

Co-authored-by: aki <arisu@ieee.org>
This commit is contained in:
Acbox
2026-04-22 00:09:46 +08:00
committed by GitHub
parent 8d78925a23
commit c9dcfe287f
70 changed files with 6612 additions and 1692 deletions
@@ -138,18 +138,29 @@
<section>
<div class="flex justify-between items-center mb-4">
<h3 class="text-xs font-medium">
{{ $t('speech.models') }}
{{ $t('speech.synthesis.models') }}
</h3>
<LoadingButton
<div
v-if="curProviderId"
type="button"
variant="outline"
size="sm"
:loading="importLoading"
@click="handleImportModels"
class="flex items-center gap-2"
>
{{ $t('speech.importModels') }}
</LoadingButton>
<LoadingButton
type="button"
variant="outline"
size="sm"
:loading="importLoading"
@click="handleImportModels"
>
{{ $t('speech.importModels') }}
</LoadingButton>
<CreateModel
:id="curProviderId"
default-type="speech"
hide-type
:type-options="speechTypeOptions"
:invalidate-keys="['speech-provider-models', 'speech-models']"
/>
</div>
</div>
<div
@@ -191,7 +202,7 @@
:model-name="model.model_id ?? ''"
:config="model.config || {}"
:schema="getModelSchema(model.model_id ?? '')"
:on-test="(text, cfg) => handleTestModel(model.id ?? '', text, cfg)"
:on-test="(text, cfg) => handleTestModel(model.id ?? '', text as string, cfg)"
@save="(cfg) => handleSaveModel(model.id ?? '', cfg)"
/>
</div>
@@ -218,10 +229,11 @@ import { computed, inject, reactive, ref, watch } from 'vue'
import { toast } from 'vue-sonner'
import { useI18n } from 'vue-i18n'
import { useQuery, useQueryCache } from '@pinia/colada'
import { getSpeechProvidersById, getSpeechProvidersByIdModels, getSpeechProvidersMeta, postSpeechProvidersByIdImportModels, putModelsById, putProvidersById } from '@memohai/sdk'
import { getSpeechProvidersById, getSpeechProvidersByIdModels, getSpeechProvidersMeta, postSpeechProvidersByIdImportModels, putProvidersById } from '@memohai/sdk'
import type { TtsSpeechModelResponse, TtsSpeechProviderResponse } from '@memohai/sdk'
import LoadingButton from '@/components/loading-button/index.vue'
import ProviderIcon from '@/components/provider-icon/index.vue'
import CreateModel from '@/components/create-model/index.vue'
interface SpeechFieldSchema {
key: string
@@ -256,6 +268,8 @@ interface SpeechProviderMeta {
config_schema?: SpeechConfigSchema
default_model?: string
models?: SpeechModelMeta[]
default_synthesis_model?: string
synthesis_models?: SpeechModelMeta[]
}
function getInitials(name: string | undefined) {
@@ -274,6 +288,9 @@ const enableLoading = ref(false)
const saveLoading = ref(false)
const importLoading = ref(false)
const queryCache = useQueryCache()
const speechTypeOptions = [
{ value: 'speech', label: 'Speech' },
]
const { data: providerDetail } = useQuery({
key: () => ['speech-provider-detail', curProviderId.value],
@@ -297,7 +314,7 @@ const { data: metaList } = useQuery({
const currentMeta = computed(() => {
if (!metaList.value || !curProvider.value?.client_type) return null
return (metaList.value as SpeechProviderMeta[]).find((m) => m.provider === curProvider.value?.client_type) ?? null
return (metaList.value as SpeechProviderMeta[]).find(m => m.provider === curProvider.value?.client_type) ?? null
})
const orderedProviderFields = computed(() => {
@@ -317,9 +334,7 @@ const { data: providerSpeechModels } = useQuery({
},
})
const providerModels = computed(() => {
return (providerSpeechModels.value as TtsSpeechModelResponse[] | undefined) ?? []
})
const providerModels = computed(() => ((providerSpeechModels.value as TtsSpeechModelResponse[] | undefined) ?? []))
watch(() => providerDetail.value, (provider) => {
providerName.value = provider?.name ?? curProvider.value?.name ?? ''
@@ -328,12 +343,11 @@ watch(() => providerDetail.value, (provider) => {
}, { immediate: true, deep: true })
function getModelMeta(modelID: string): SpeechModelMeta | null {
const models = currentMeta.value?.models ?? []
const models = currentMeta.value?.synthesis_models ?? currentMeta.value?.models ?? []
const exact = models.find(m => m.id === modelID)
if (exact) return exact
if (currentMeta.value?.default_model) {
return models.find(m => m.id === currentMeta.value?.default_model) ?? null
}
const defaultModel = currentMeta.value?.default_synthesis_model ?? currentMeta.value?.default_model
if (defaultModel) return models.find(m => m.id === defaultModel) ?? null
return models[0] ?? null
}
@@ -398,20 +412,23 @@ async function handleSaveProvider() {
}
async function handleSaveModel(modelId: string, config: Record<string, unknown>) {
const model = providerModels.value.find((item) => item.id === modelId)
const model = providerModels.value.find(item => item.id === modelId)
if (!model) return
try {
await putModelsById({
path: { id: modelId },
body: {
model_id: model.model_id,
name: model.name ?? model.model_id,
provider_id: model.provider_id,
type: 'speech',
config,
const apiBase = import.meta.env.VITE_API_URL?.trim() || '/api'
const token = localStorage.getItem('token')
const resp = await fetch(`${apiBase}/speech-models/${modelId}`, {
method: 'PUT',
headers: {
'Content-Type': 'application/json',
...(token ? { Authorization: `Bearer ${token}` } : {}),
},
throwOnError: true,
body: JSON.stringify({
name: model.name ?? model.model_id,
config,
}),
})
if (!resp.ok) throw new Error(await resp.text())
toast.success(t('speech.saveSuccess'))
queryCache.invalidateQueries({ key: ['speech-provider-models', curProviderId.value] })
queryCache.invalidateQueries({ key: ['speech-models'] })