feat: expand speech provider support with new client types and config… (#389)

* feat: expand speech provider support with new client types and configuration schema

* feat: add icon support for speech providers and update related configurations

* feat: add SVG support for Deepgram and Elevenlabs with Vue components

* feat: except *-speech client type in llm provider

* feat: enhance speech provider functionality with advanced settings and model import capabilities

* chore: remove go.mod replace

* feat: enhance speech provider functionality with advanced settings and model import capabilities

* chore: update go module dependencies

---------

Co-authored-by: Acbox <acbox0328@gmail.com>
This commit is contained in:
Yiming Qi
2026-04-19 22:58:16 +09:00
committed by GitHub
parent 8e013ad1ad
commit 8d78925a23
46 changed files with 2808 additions and 565 deletions
@@ -13,10 +13,12 @@ import {
ClaudeColor,
Cohere,
CohereColor,
Deepgram,
Deepseek,
DeepseekColor,
Doubao,
DoubaoColor,
Elevenlabs,
Fireworks,
FireworksColor,
Gemini,
@@ -35,6 +37,8 @@ import {
Lmstudio,
Meta,
MetaColor,
Microsoft,
MicrosoftColor,
Minimax,
MinimaxColor,
Mistral,
@@ -81,6 +85,8 @@ export const iconMap: Record<string, Component> = {
'google-brand-color': GoogleBrandColor,
'deepseek': Deepseek,
'deepseek-color': DeepseekColor,
'deepgram': Deepgram,
'elevenlabs': Elevenlabs,
'groq': Groq,
'huggingface': Huggingface,
'huggingface-color': HuggingfaceColor,
@@ -105,6 +111,8 @@ export const iconMap: Record<string, Component> = {
'cohere-color': CohereColor,
'azure': Azure,
'azure-color': AzureColor,
'microsoft': Microsoft,
'microsoft-color': MicrosoftColor,
'nvidia': Nvidia,
'nvidia-color': NvidiaColor,
'fireworks': Fireworks,
+41 -1
View File
@@ -40,9 +40,49 @@ export const CLIENT_TYPE_META: Record<string, ClientTypeMeta> = {
label: 'Edge Speech',
hint: 'Microsoft Edge Read Aloud TTS',
},
'openai-speech': {
value: 'openai-speech',
label: 'OpenAI Speech',
hint: 'OpenAI /audio/speech compatible TTS',
},
'openrouter-speech': {
value: 'openrouter-speech',
label: 'OpenRouter Speech',
hint: 'OpenRouter audio modality TTS',
},
'elevenlabs-speech': {
value: 'elevenlabs-speech',
label: 'ElevenLabs Speech',
hint: 'ElevenLabs text-to-speech',
},
'deepgram-speech': {
value: 'deepgram-speech',
label: 'Deepgram Speech',
hint: 'Deepgram TTS',
},
'minimax-speech': {
value: 'minimax-speech',
label: 'MiniMax Speech',
hint: 'MiniMax TTS',
},
'volcengine-speech': {
value: 'volcengine-speech',
label: 'Volcengine Speech',
hint: 'Volcengine SAMI TTS',
},
'alibabacloud-speech': {
value: 'alibabacloud-speech',
label: 'Alibaba Cloud Speech',
hint: 'DashScope CosyVoice TTS',
},
'microsoft-speech': {
value: 'microsoft-speech',
label: 'Microsoft Speech',
hint: 'Azure Cognitive Services TTS',
},
}
export const CLIENT_TYPE_LIST: ClientTypeMeta[] = Object.values(CLIENT_TYPE_META)
export const LLM_CLIENT_TYPE_LIST: ClientTypeMeta[] = CLIENT_TYPE_LIST
.filter(ct => ct.value !== 'edge-speech')
.filter(ct => !ct.value.endsWith('-speech'))
+5
View File
@@ -424,6 +424,11 @@
"modelIdPlaceholder": "Enter model identifier (e.g. custom-voice)",
"noModels": "No models found. Click \"Import Models\" to discover available models or \"Add Model\" to create one manually.",
"noCapabilities": "No capabilities available for this model.",
"saveSuccess": "Speech configuration saved",
"advanced": {
"title": "Advanced Settings",
"description": "These fields usually map to underlying vendor implementation details. Most users can keep the defaults."
},
"fields": {
"language": "Language",
"languagePlaceholder": "Select language...",
+5
View File
@@ -420,6 +420,11 @@
"modelIdPlaceholder": "输入模型标识符(如 custom-voice",
"noModels": "暂无模型,点击\"导入模型\"发现可用模型,或点击\"新建模型\"手动创建。",
"noCapabilities": "该模型暂无可用能力信息。",
"saveSuccess": "语音配置已保存",
"advanced": {
"title": "高级设置",
"description": "这些字段通常对应底层服务商实现细节。大多数情况下保留默认值即可。"
},
"fields": {
"language": "语言",
"languagePlaceholder": "选择语言...",
@@ -1,189 +1,198 @@
<template>
<div class="space-y-4">
<template v-if="caps">
<!-- Language -->
<div class="space-y-2">
<Label for="tts-lang">{{ $t('speech.fields.language') }}</Label>
<Select
:model-value="configData.voice_lang ?? ''"
@update:model-value="onLangChange"
>
<SelectTrigger
id="tts-lang"
class="w-full"
>
<SelectValue :placeholder="$t('speech.fields.languagePlaceholder')" />
</SelectTrigger>
<SelectContent class="max-h-60">
<SelectItem
v-for="lang in availableLanguages"
:key="lang"
:value="lang"
>
{{ lang }}
</SelectItem>
</SelectContent>
</Select>
</div>
<!-- Voice -->
<div class="space-y-2">
<Label for="tts-voice">{{ $t('speech.fields.voice') }}</Label>
<Select
:model-value="configData.voice_id ?? ''"
@update:model-value="(val) => configData.voice_id = val"
>
<SelectTrigger
id="tts-voice"
class="w-full"
>
<SelectValue :placeholder="$t('speech.fields.voicePlaceholder')" />
</SelectTrigger>
<SelectContent class="max-h-60">
<SelectItem
v-for="voice in filteredVoices"
:key="voice.id"
:value="voice.id!"
>
{{ voice.name }} ({{ voice.id }})
</SelectItem>
</SelectContent>
</Select>
</div>
<!-- Format -->
<div
v-if="caps.formats && caps.formats.length > 0"
<template v-if="basicFields.length > 0">
<section
v-for="field in basicFields"
:key="field.key"
class="space-y-2"
>
<Label for="tts-format">{{ $t('speech.fields.format') }}</Label>
<Select
:model-value="configData.format ?? ''"
@update:model-value="(val) => configData.format = val"
<Label :for="field.type === 'bool' || field.type === 'enum' ? undefined : `tts-field-${field.key}`">
{{ field.title || field.key }}
</Label>
<p
v-if="field.description"
class="text-xs text-muted-foreground"
>
<SelectTrigger
id="tts-format"
class="w-full"
{{ field.description }}
</p>
<div
v-if="field.type === 'secret'"
class="relative"
>
<Input
:id="`tts-field-${field.key}`"
v-model="configData[field.key] as string"
:type="visibleSecrets[field.key] ? 'text' : 'password'"
:placeholder="field.example ? String(field.example) : ''"
/>
<button
type="button"
class="absolute right-2 top-1/2 -translate-y-1/2 text-muted-foreground hover:text-foreground"
@click="visibleSecrets[field.key] = !visibleSecrets[field.key]"
>
<SelectValue :placeholder="$t('speech.fields.formatPlaceholder')" />
<component
:is="visibleSecrets[field.key] ? EyeOff : Eye"
class="size-3.5"
/>
</button>
</div>
<Switch
v-else-if="field.type === 'bool'"
:model-value="!!configData[field.key]"
@update:model-value="(val) => configData[field.key] = !!val"
/>
<Input
v-else-if="field.type === 'number'"
:id="`tts-field-${field.key}`"
v-model.number="configData[field.key] as number"
type="number"
:placeholder="field.example ? String(field.example) : ''"
/>
<Select
v-else-if="field.type === 'enum' && field.enum"
:model-value="String(configData[field.key] ?? '')"
@update:model-value="(val) => configData[field.key] = val"
>
<SelectTrigger>
<SelectValue :placeholder="field.title || field.key" />
</SelectTrigger>
<SelectContent>
<SelectItem
v-for="fmt in caps.formats"
:key="fmt"
:value="fmt"
v-for="opt in field.enum"
:key="opt"
:value="opt"
>
{{ fmt }}
{{ opt }}
</SelectItem>
</SelectContent>
</Select>
</div>
<!-- Speed -->
<div
v-if="caps.speed"
class="space-y-2"
>
<Label>{{ $t('speech.fields.speed') }}</Label>
<p class="text-xs text-muted-foreground">
{{ $t('speech.fields.speedDescription', { default: caps.speed.default ?? 1 }) }}
</p>
<div v-if="caps.speed.options && caps.speed.options.length > 0">
<Select
:model-value="String(configData.speed ?? caps.speed.default ?? 1)"
@update:model-value="(val) => configData.speed = Number(val)"
>
<SelectTrigger class="w-full">
<SelectValue />
</SelectTrigger>
<SelectContent>
<SelectItem
v-for="opt in caps.speed.options"
:key="opt"
:value="String(opt)"
>
{{ opt }}x
</SelectItem>
</SelectContent>
</Select>
</div>
<div
<Input
v-else
class="flex items-center gap-3"
>
<Slider
:model-value="[Number(configData.speed ?? caps.speed.default ?? 1)]"
:min="caps.speed.min"
:max="caps.speed.max"
:step="0.1"
class="flex-1"
@update:model-value="(val) => configData.speed = val[0]"
/>
<span class="text-xs text-muted-foreground w-12 text-right">
{{ Number(configData.speed ?? caps.speed.default ?? 1).toFixed(1) }}x
</span>
</div>
</div>
<!-- Pitch -->
<div
v-if="caps.pitch"
class="space-y-2"
>
<Label>{{ $t('speech.fields.pitch') }}</Label>
<p class="text-xs text-muted-foreground">
{{ $t('speech.fields.pitchDescription', { default: caps.pitch.default ?? 0 }) }}
</p>
<div
v-if="caps.pitch.options && caps.pitch.options.length > 0"
>
<Select
:model-value="String(configData.pitch ?? caps.pitch.default ?? 0)"
@update:model-value="(val) => configData.pitch = Number(val)"
>
<SelectTrigger class="w-full">
<SelectValue />
</SelectTrigger>
<SelectContent>
<SelectItem
v-for="opt in caps.pitch.options"
:key="opt"
:value="String(opt)"
>
{{ opt }} Hz
</SelectItem>
</SelectContent>
</Select>
</div>
<div
v-else
class="flex items-center gap-3"
>
<Slider
:model-value="[Number(configData.pitch ?? caps.pitch.default ?? 0)]"
:min="caps.pitch.min"
:max="caps.pitch.max"
:step="1"
class="flex-1"
@update:model-value="(val) => configData.pitch = val[0]"
/>
<span class="text-xs text-muted-foreground w-16 text-right">
{{ Number(configData.pitch ?? caps.pitch.default ?? 0).toFixed(0) }} Hz
</span>
</div>
</div>
:id="`tts-field-${field.key}`"
v-model="configData[field.key] as string"
type="text"
:placeholder="field.example ? String(field.example) : ''"
/>
</section>
</template>
<div
v-else
v-else-if="advancedFields.length === 0"
class="text-xs text-muted-foreground"
>
{{ $t('speech.noCapabilities') }}
</div>
<div
v-if="advancedFields.length > 0"
class="rounded-lg border border-border"
>
<button
type="button"
class="flex w-full items-center justify-between px-3 py-2 text-left text-xs font-medium"
@click="showAdvanced = !showAdvanced"
>
<span>{{ $t('speech.advanced.title') }}</span>
<component
:is="showAdvanced ? ChevronUp : ChevronDown"
class="size-3 text-muted-foreground"
/>
</button>
<div
v-if="showAdvanced"
class="space-y-4 border-t border-border px-3 py-3"
>
<p class="text-xs text-muted-foreground">
{{ $t('speech.advanced.description') }}
</p>
<section
v-for="field in advancedFields"
:key="field.key"
class="space-y-2"
>
<Label :for="field.type === 'bool' || field.type === 'enum' ? undefined : `tts-field-${field.key}`">
{{ field.title || field.key }}
</Label>
<p
v-if="field.description"
class="text-xs text-muted-foreground"
>
{{ field.description }}
</p>
<div
v-if="field.type === 'secret'"
class="relative"
>
<Input
:id="`tts-field-${field.key}`"
v-model="configData[field.key] as string"
:type="visibleSecrets[field.key] ? 'text' : 'password'"
:placeholder="field.example ? String(field.example) : ''"
/>
<button
type="button"
class="absolute right-2 top-1/2 -translate-y-1/2 text-muted-foreground hover:text-foreground"
@click="visibleSecrets[field.key] = !visibleSecrets[field.key]"
>
<component
:is="visibleSecrets[field.key] ? EyeOff : Eye"
class="size-3.5"
/>
</button>
</div>
<Switch
v-else-if="field.type === 'bool'"
:model-value="!!configData[field.key]"
@update:model-value="(val) => configData[field.key] = !!val"
/>
<Input
v-else-if="field.type === 'number'"
:id="`tts-field-${field.key}`"
v-model.number="configData[field.key] as number"
type="number"
:placeholder="field.example ? String(field.example) : ''"
/>
<Select
v-else-if="field.type === 'enum' && field.enum"
:model-value="String(configData[field.key] ?? '')"
@update:model-value="(val) => configData[field.key] = val"
>
<SelectTrigger>
<SelectValue :placeholder="field.title || field.key" />
</SelectTrigger>
<SelectContent>
<SelectItem
v-for="opt in field.enum"
:key="opt"
:value="opt"
>
{{ opt }}
</SelectItem>
</SelectContent>
</Select>
<Input
v-else
:id="`tts-field-${field.key}`"
v-model="configData[field.key] as string"
type="text"
:placeholder="field.example ? String(field.example) : ''"
/>
</section>
</div>
</div>
<Separator class="my-3" />
<!-- Test Synthesis -->
<div class="space-y-3">
<h4 class="text-xs font-medium">
{{ $t('speech.test.title') }}
@@ -209,9 +218,7 @@
:disabled="!testText.trim() || testText.length > maxTestTextLen"
@click="handleTest"
>
<Play
class="mr-1.5"
/>
<Play class="mr-1.5" />
{{ $t('speech.test.generate') }}
</LoadingButton>
<span
@@ -251,104 +258,88 @@
<script setup lang="ts">
import {
Input,
Label,
Select,
SelectTrigger,
SelectValue,
SelectContent,
SelectItem,
Slider,
Textarea,
SelectTrigger,
SelectValue,
Separator,
Switch,
Textarea,
} from '@memohai/ui'
import { Play } from 'lucide-vue-next'
import LoadingButton from '@/components/loading-button/index.vue'
import type { TtsModelCapabilities, TtsVoiceInfo } from '@memohai/sdk'
import { ChevronDown, ChevronUp, Eye, EyeOff, Play } from 'lucide-vue-next'
import { computed, onBeforeUnmount, reactive, ref, watch } from 'vue'
import { toast } from 'vue-sonner'
import { useI18n } from 'vue-i18n'
import LoadingButton from '@/components/loading-button/index.vue'
interface SpeechFieldSchema {
key: string
type: string
title?: string
description?: string
required?: boolean
advanced?: boolean
enum?: string[]
example?: unknown
order?: number
}
interface SpeechConfigSchema {
fields?: SpeechFieldSchema[]
}
const props = defineProps<{
modelId: string
modelName: string
config: Record<string, unknown>
capabilities: TtsModelCapabilities | null
schema: SpeechConfigSchema | null
onTest: (text: string, config: Record<string, unknown>) => Promise<Blob>
}>()
const emit = defineEmits<{
save: [config: Record<string, unknown>]
test: [text: string, config: Record<string, unknown>]
}>()
const { t } = useI18n()
const caps = computed(() => props.capabilities)
const configData = reactive<Record<string, unknown>>({})
watch(() => props.config, (cfg) => {
Object.keys(configData).forEach((k) => delete configData[k])
if (cfg.voice && typeof cfg.voice === 'object') {
const voice = cfg.voice as Record<string, unknown>
configData.voice_id = voice.id ?? ''
configData.voice_lang = voice.lang ?? ''
}
if (cfg.format) configData.format = cfg.format
if (cfg.speed != null) configData.speed = cfg.speed
if (cfg.pitch != null) configData.pitch = cfg.pitch
if (cfg.sample_rate != null) configData.sample_rate = cfg.sample_rate
}, { immediate: true })
const availableLanguages = computed(() => {
if (!caps.value?.voices) return []
const langs = new Set(caps.value.voices.map((v: TtsVoiceInfo) => v.lang ?? '').filter(Boolean))
return [...langs].sort()
})
const filteredVoices = computed(() => {
if (!caps.value?.voices) return []
const lang = configData.voice_lang
if (!lang) return caps.value.voices
return caps.value.voices.filter((v: TtsVoiceInfo) => v.lang === lang)
})
function onLangChange(lang: string) {
configData.voice_lang = lang
const voices = caps.value?.voices?.filter((v: TtsVoiceInfo) => v.lang === lang)
if (voices && voices.length > 0 && !voices.some((v: TtsVoiceInfo) => v.id === configData.voice_id)) {
configData.voice_id = voices[0].id ?? ''
}
}
function buildConfig(): Record<string, unknown> {
const result: Record<string, unknown> = {}
if (configData.voice_id || configData.voice_lang) {
result.voice = { id: configData.voice_id ?? '', lang: configData.voice_lang ?? '' }
}
if (configData.format) result.format = configData.format
if (configData.speed != null) result.speed = Number(configData.speed)
if (configData.pitch != null) result.pitch = Number(configData.pitch)
if (configData.sample_rate != null) result.sample_rate = Number(configData.sample_rate)
return result
}
const visibleSecrets = reactive<Record<string, boolean>>({})
const saving = ref(false)
async function handleSaveConfig() {
saving.value = true
try {
emit('save', buildConfig())
} finally {
saving.value = false
}
}
// Test synthesis
const maxTestTextLen = 500
const showAdvanced = ref(false)
const testText = ref('')
const testLoading = ref(false)
const testError = ref('')
const audioUrl = ref('')
const audioEl = ref<HTMLAudioElement>()
const maxTestTextLen = 500
const orderedFields = computed(() => {
const fields = props.schema?.fields ?? []
return [...fields].sort((a, b) => (a.order ?? 0) - (b.order ?? 0))
})
const basicFields = computed(() => orderedFields.value.filter(field => !field.advanced))
const advancedFields = computed(() => orderedFields.value.filter(field => field.advanced))
watch(() => props.config, (cfg) => {
Object.keys(configData).forEach((key) => delete configData[key])
Object.assign(configData, { ...(cfg ?? {}) })
showAdvanced.value = advancedFields.value.some(field => {
const value = cfg?.[field.key]
return value !== '' && value != null
})
}, { immediate: true, deep: true })
function buildConfig(): Record<string, unknown> {
const result: Record<string, unknown> = {}
for (const [key, value] of Object.entries(configData)) {
if (value === '' || value == null) continue
result[key] = value
}
return result
}
function revokeAudio() {
if (audioUrl.value) {
@@ -359,6 +350,15 @@ function revokeAudio() {
onBeforeUnmount(revokeAudio)
async function handleSaveConfig() {
saving.value = true
try {
emit('save', buildConfig())
} finally {
saving.value = false
}
}
async function handleTest() {
if (!testText.value.trim()) return
testLoading.value = true
@@ -366,39 +366,13 @@ async function handleTest() {
revokeAudio()
try {
const blob = await new Promise<Blob>((resolve, reject) => {
const handler = async () => {
try {
const apiBase = import.meta.env.VITE_API_URL?.trim() || '/api'
const token = localStorage.getItem('token')
const resp = await fetch(`${apiBase}/speech-models/${props.modelId}/test`, {
method: 'POST',
headers: {
'Content-Type': 'application/json',
...(token ? { Authorization: `Bearer ${token}` } : {}),
},
body: JSON.stringify({ text: testText.value, config: buildConfig() }),
})
if (!resp.ok) {
const errBody = await resp.text()
let msg: string
try { msg = JSON.parse(errBody)?.message ?? errBody } catch { msg = errBody }
reject(new Error(msg))
return
}
resolve(await resp.blob())
} catch (e) {
reject(e)
}
}
handler()
})
const blob = await props.onTest(testText.value, buildConfig())
audioUrl.value = URL.createObjectURL(blob)
await new Promise<void>((resolve) => setTimeout(resolve, 50))
audioEl.value?.play()
} catch (e: unknown) {
const msg = e instanceof Error ? e.message : t('speech.test.failed')
} catch (error: unknown) {
const msg = error instanceof Error ? error.message : t('speech.test.failed')
testError.value = msg
toast.error(msg)
} finally {
@@ -1,9 +1,19 @@
<template>
<div class="p-4">
<section class="flex items-center gap-3">
<Volume2
class="size-5"
/>
<span class="flex size-10 shrink-0 items-center justify-center rounded-full bg-muted">
<ProviderIcon
v-if="curProvider?.icon"
:icon="curProvider.icon"
size="1.5em"
/>
<span
v-else
class="text-xs font-medium text-muted-foreground"
>
{{ getInitials(curProvider?.name) }}
</span>
</span>
<div class="min-w-0">
<h2 class="text-sm font-semibold truncate">
{{ curProvider?.name }}
@@ -25,12 +35,121 @@
</section>
<Separator class="mt-4 mb-6" />
<!-- Models -->
<form
class="space-y-4"
@submit.prevent="handleSaveProvider"
>
<section class="space-y-2">
<Label for="speech-provider-name">{{ $t('common.name') }}</Label>
<Input
id="speech-provider-name"
v-model="providerName"
type="text"
:placeholder="$t('common.namePlaceholder')"
/>
</section>
<section
v-for="field in orderedProviderFields"
:key="field.key"
class="space-y-2"
>
<Label :for="field.type === 'bool' || field.type === 'enum' ? undefined : `speech-provider-${field.key}`">
{{ field.title || field.key }}
</Label>
<p
v-if="field.description"
class="text-xs text-muted-foreground"
>
{{ field.description }}
</p>
<div
v-if="field.type === 'secret'"
class="relative"
>
<Input
:id="`speech-provider-${field.key}`"
v-model="providerConfig[field.key] as string"
:type="visibleSecrets[field.key] ? 'text' : 'password'"
:placeholder="field.example ? String(field.example) : ''"
/>
<button
type="button"
class="absolute right-2 top-1/2 -translate-y-1/2 text-muted-foreground hover:text-foreground"
@click="visibleSecrets[field.key] = !visibleSecrets[field.key]"
>
<component
:is="visibleSecrets[field.key] ? EyeOff : Eye"
class="size-3.5"
/>
</button>
</div>
<Switch
v-else-if="field.type === 'bool'"
:model-value="!!providerConfig[field.key]"
@update:model-value="(val) => providerConfig[field.key] = !!val"
/>
<Input
v-else-if="field.type === 'number'"
:id="`speech-provider-${field.key}`"
v-model.number="providerConfig[field.key] as number"
type="number"
:placeholder="field.example ? String(field.example) : ''"
/>
<Select
v-else-if="field.type === 'enum' && field.enum"
:model-value="String(providerConfig[field.key] ?? '')"
@update:model-value="(val) => providerConfig[field.key] = val"
>
<SelectTrigger>
<SelectValue :placeholder="field.title || field.key" />
</SelectTrigger>
<SelectContent>
<SelectItem
v-for="opt in field.enum"
:key="opt"
:value="opt"
>
{{ opt }}
</SelectItem>
</SelectContent>
</Select>
<Input
v-else
:id="`speech-provider-${field.key}`"
v-model="providerConfig[field.key] as string"
type="text"
:placeholder="field.example ? String(field.example) : ''"
/>
</section>
<div class="flex justify-end">
<LoadingButton
type="submit"
:loading="saveLoading"
>
{{ $t('provider.saveChanges') }}
</LoadingButton>
</div>
</form>
<Separator class="mt-6 mb-6" />
<section>
<div class="flex justify-between items-center mb-4">
<h3 class="text-xs font-medium">
{{ $t('speech.models') }}
</h3>
<LoadingButton
v-if="curProviderId"
type="button"
variant="outline"
size="sm"
:loading="importLoading"
@click="handleImportModels"
>
{{ $t('speech.importModels') }}
</LoadingButton>
</div>
<div
@@ -71,8 +190,9 @@
:model-id="model.id ?? ''"
:model-name="model.model_id ?? ''"
:config="model.config || {}"
:capabilities="getModelCapabilities(model.model_id ?? '')"
@test="(text, cfg) => handleTestModel(model.id ?? '', text, cfg)"
:schema="getModelSchema(model.model_id ?? '')"
:on-test="(text, cfg) => handleTestModel(model.id ?? '', text, cfg)"
@save="(cfg) => handleSaveModel(model.id ?? '', cfg)"
/>
</div>
</div>
@@ -82,65 +202,152 @@
<script setup lang="ts">
import {
Input,
Label,
Select,
SelectContent,
SelectItem,
SelectTrigger,
SelectValue,
Separator,
Switch,
} from '@memohai/ui'
import ModelConfigEditor from './model-config-editor.vue'
import { Volume2, ChevronUp, ChevronDown } from 'lucide-vue-next'
import { computed, inject, ref } from 'vue'
import { ChevronDown, ChevronUp, Eye, EyeOff } from 'lucide-vue-next'
import { computed, inject, reactive, ref, watch } from 'vue'
import { toast } from 'vue-sonner'
import { useI18n } from 'vue-i18n'
import { useQuery, useQueryCache } from '@pinia/colada'
import { getSpeechProvidersMeta, getSpeechModels, putProvidersById } from '@memohai/sdk'
import type { TtsSpeechProviderResponse, TtsProviderMetaResponse, TtsModelInfo } from '@memohai/sdk'
import { getSpeechProvidersById, getSpeechProvidersByIdModels, getSpeechProvidersMeta, postSpeechProvidersByIdImportModels, putModelsById, putProvidersById } from '@memohai/sdk'
import type { TtsSpeechModelResponse, TtsSpeechProviderResponse } from '@memohai/sdk'
import LoadingButton from '@/components/loading-button/index.vue'
import ProviderIcon from '@/components/provider-icon/index.vue'
interface SpeechFieldSchema {
key: string
type: string
title?: string
description?: string
required?: boolean
advanced?: boolean
enum?: string[]
example?: unknown
order?: number
}
interface SpeechConfigSchema {
fields?: SpeechFieldSchema[]
}
interface SpeechModelMeta {
id: string
name: string
description?: string
config_schema?: SpeechConfigSchema
capabilities?: {
config_schema?: SpeechConfigSchema
}
}
interface SpeechProviderMeta {
provider: string
display_name: string
description?: string
config_schema?: SpeechConfigSchema
default_model?: string
models?: SpeechModelMeta[]
}
function getInitials(name: string | undefined) {
const label = name?.trim() ?? ''
return label ? label.slice(0, 2).toUpperCase() : '?'
}
const { t } = useI18n()
const curProvider = inject('curTtsProvider', ref<TtsSpeechProviderResponse>())
const curProviderId = computed(() => curProvider.value?.id)
const providerName = ref('')
const providerConfig = reactive<Record<string, unknown>>({})
const visibleSecrets = reactive<Record<string, boolean>>({})
const expandedModelId = ref('')
const enableLoading = ref(false)
const saveLoading = ref(false)
const importLoading = ref(false)
const queryCache = useQueryCache()
const { data: providerDetail } = useQuery({
key: () => ['speech-provider-detail', curProviderId.value],
query: async () => {
if (!curProviderId.value) return null
const { data } = await getSpeechProvidersById({
path: { id: curProviderId.value },
throwOnError: true,
})
return data ?? null
},
})
const { data: metaList } = useQuery({
key: () => ['speech-providers-meta'],
query: async () => {
const { data } = await getSpeechProvidersMeta({ throwOnError: true })
return data
return (data ?? []) as SpeechProviderMeta[]
},
})
const currentMeta = computed<TtsProviderMetaResponse | null>(() => {
const currentMeta = computed(() => {
if (!metaList.value || !curProvider.value?.client_type) return null
return (metaList.value as TtsProviderMetaResponse[]).find((m) => m.provider === curProvider.value?.client_type) ?? null
return (metaList.value as SpeechProviderMeta[]).find((m) => m.provider === curProvider.value?.client_type) ?? null
})
function getModelCapabilities(modelId: string) {
const meta = currentMeta.value
if (!meta?.models) return null
return meta.models.find((m: TtsModelInfo) => m.id === modelId)?.capabilities ?? null
}
const orderedProviderFields = computed(() => {
const fields = currentMeta.value?.config_schema?.fields ?? []
return [...fields].sort((a, b) => (a.order ?? 0) - (b.order ?? 0))
})
const { data: allSpeechModels } = useQuery({
key: () => ['speech-models'],
const { data: providerSpeechModels } = useQuery({
key: () => ['speech-provider-models', curProviderId.value],
query: async () => {
const { data } = await getSpeechModels({ throwOnError: true })
return data
if (!curProviderId.value) return []
const { data } = await getSpeechProvidersByIdModels({
path: { id: curProviderId.value },
throwOnError: true,
})
return data ?? []
},
})
const providerModels = computed(() => {
if (!allSpeechModels.value || !curProviderId.value) return []
return allSpeechModels.value.filter((m) => m.provider_id === curProviderId.value)
return (providerSpeechModels.value as TtsSpeechModelResponse[] | undefined) ?? []
})
const expandedModelId = ref('')
watch(() => providerDetail.value, (provider) => {
providerName.value = provider?.name ?? curProvider.value?.name ?? ''
Object.keys(providerConfig).forEach((key) => delete providerConfig[key])
Object.assign(providerConfig, { ...(provider?.config ?? {}) })
}, { immediate: true, deep: true })
function getModelMeta(modelID: string): SpeechModelMeta | null {
const models = currentMeta.value?.models ?? []
const exact = models.find(m => m.id === modelID)
if (exact) return exact
if (currentMeta.value?.default_model) {
return models.find(m => m.id === currentMeta.value?.default_model) ?? null
}
return models[0] ?? null
}
function getModelSchema(modelID: string): SpeechConfigSchema | null {
const meta = getModelMeta(modelID)
return meta?.config_schema ?? meta?.capabilities?.config_schema ?? null
}
function toggleModel(id: string) {
expandedModelId.value = expandedModelId.value === id ? '' : id
}
const queryCache = useQueryCache()
async function handleToggleEnable(value: boolean) {
if (!curProviderId.value || !curProvider.value) return
const prev = curProvider.value.enable ?? false
curProvider.value = { ...curProvider.value, enable: value }
@@ -148,10 +355,16 @@ async function handleToggleEnable(value: boolean) {
try {
await putProvidersById({
path: { id: curProviderId.value },
body: { enable: value },
body: {
name: providerName.value.trim() || curProvider.value.name,
client_type: curProvider.value.client_type,
enable: value,
config: sanitizeConfig(providerConfig),
},
throwOnError: true,
})
queryCache.invalidateQueries({ key: ['speech-providers'] })
queryCache.invalidateQueries({ key: ['speech-provider-detail', curProviderId.value] })
} catch {
curProvider.value = { ...curProvider.value, enable: prev }
toast.error(t('common.saveFailed'))
@@ -160,6 +373,75 @@ async function handleToggleEnable(value: boolean) {
}
}
async function handleSaveProvider() {
if (!curProviderId.value || !curProvider.value) return
saveLoading.value = true
try {
await putProvidersById({
path: { id: curProviderId.value },
body: {
name: providerName.value.trim() || curProvider.value.name,
client_type: curProvider.value.client_type,
enable: curProvider.value.enable,
config: sanitizeConfig(providerConfig),
},
throwOnError: true,
})
toast.success(t('speech.saveSuccess'))
queryCache.invalidateQueries({ key: ['speech-providers'] })
queryCache.invalidateQueries({ key: ['speech-provider-detail', curProviderId.value] })
} catch {
toast.error(t('common.saveFailed'))
} finally {
saveLoading.value = false
}
}
async function handleSaveModel(modelId: string, config: Record<string, unknown>) {
const model = providerModels.value.find((item) => item.id === modelId)
if (!model) return
try {
await putModelsById({
path: { id: modelId },
body: {
model_id: model.model_id,
name: model.name ?? model.model_id,
provider_id: model.provider_id,
type: 'speech',
config,
},
throwOnError: true,
})
toast.success(t('speech.saveSuccess'))
queryCache.invalidateQueries({ key: ['speech-provider-models', curProviderId.value] })
queryCache.invalidateQueries({ key: ['speech-models'] })
} catch {
toast.error(t('common.saveFailed'))
}
}
async function handleImportModels() {
if (!curProviderId.value) return
importLoading.value = true
try {
const { data } = await postSpeechProvidersByIdImportModels({
path: { id: curProviderId.value },
throwOnError: true,
})
toast.success(t('speech.importSuccess', {
created: data?.created ?? 0,
skipped: data?.skipped ?? 0,
}))
queryCache.invalidateQueries({ key: ['speech-provider-models', curProviderId.value] })
queryCache.invalidateQueries({ key: ['speech-models'] })
queryCache.invalidateQueries({ key: ['speech-providers-meta'] })
} catch {
toast.error(t('speech.importFailed'))
} finally {
importLoading.value = false
}
}
async function handleTestModel(modelId: string, text: string, config: Record<string, unknown>) {
const apiBase = import.meta.env.VITE_API_URL?.trim() || '/api'
const token = localStorage.getItem('token')
@@ -183,4 +465,13 @@ async function handleTestModel(modelId: string, text: string, config: Record<str
}
return resp.blob()
}
function sanitizeConfig(input: Record<string, unknown>) {
const result: Record<string, unknown> = {}
for (const [key, value] of Object.entries(input)) {
if (value === '' || value == null) continue
result[key] = value
}
return result
}
</script>
+16 -2
View File
@@ -18,6 +18,12 @@ import type { TtsSpeechProviderResponse } from '@memohai/sdk'
import ProviderSetting from './components/provider-setting.vue'
import { Volume2 } from 'lucide-vue-next'
import MasterDetailSidebarLayout from '@/components/master-detail-sidebar-layout/index.vue'
import ProviderIcon from '@/components/provider-icon/index.vue'
function getInitials(name: string | undefined) {
const label = name?.trim() ?? ''
return label ? label.slice(0, 2).toUpperCase() : '?'
}
const { data: providerData } = useQuery({
key: () => ['speech-providers'],
@@ -79,9 +85,17 @@ watch(filteredProviders, (list) => {
>
<span class="relative shrink-0">
<span class="flex size-7 items-center justify-center rounded-full bg-muted">
<Volume2
class="size-3.5 text-muted-foreground"
<ProviderIcon
v-if="item.icon"
:icon="item.icon"
size="1.25em"
/>
<span
v-else
class="text-xs font-medium text-muted-foreground"
>
{{ getInitials(item.name) }}
</span>
</span>
<span
v-if="item.enable !== false"