Feat/speech support (#392)

* feat: expand speech provider support with new client types and configuration schema

* feat: add icon support for speech providers and update related configurations

* feat: add SVG support for Deepgram and Elevenlabs with Vue components

* feat: except *-speech client type in llm provider

* feat: enhance speech provider functionality with advanced settings and model import capabilities

* chore: remove go.mod replace

* feat: enhance speech provider functionality with advanced settings and model import capabilities

* chore: update go module dependencies

* feat: Ear and Mouth

* fix: separate ear/mouth page

* fix: separate audio domain and restore transcription templates

Move speech and transcription internals into the audio domain, restore template-driven transcription providers, and regenerate Swagger/SDK so the frontend can stop hand-calling /transcription-* APIs.

---------

Co-authored-by: aki <arisu@ieee.org>
This commit is contained in:
Acbox
2026-04-22 00:09:46 +08:00
committed by GitHub
parent 8d78925a23
commit c9dcfe287f
70 changed files with 6612 additions and 1692 deletions
+49 -14
View File
@@ -18,6 +18,7 @@
<div class="flex flex-col gap-3 mt-4">
<!-- Type -->
<FormField
v-if="!hideType"
v-slot="{ componentField }"
name="type"
>
@@ -35,11 +36,12 @@
</SelectTrigger>
<SelectContent>
<SelectGroup>
<SelectItem value="chat">
Chat
</SelectItem>
<SelectItem value="embedding">
Embedding
<SelectItem
v-for="opt in typeOptions"
:key="opt.value"
:value="opt.value"
>
{{ opt.label }}
</SelectItem>
</SelectGroup>
</SelectContent>
@@ -181,6 +183,11 @@ import { COMPATIBILITY_OPTIONS } from '@/constants/compatibilities'
import FormDialogShell from '@/components/form-dialog-shell/index.vue'
import { useDialogMutation } from '@/composables/useDialogMutation'
interface ModelTypeOption {
value: string
label: string
}
const selectedCompat = ref<string[]>([])
const { t } = useI18n()
const { run } = useDialogMutation()
@@ -193,14 +200,30 @@ const formSchema = toTypedSchema(z.object({
context_window: z.coerce.number().min(1).optional(),
}))
const props = withDefaults(defineProps<{
id: string
typeOptions?: ModelTypeOption[]
defaultType?: string
hideType?: boolean
invalidateKeys?: string[]
}>(), {
typeOptions: () => [
{ value: 'chat', label: 'Chat' },
{ value: 'embedding', label: 'Embedding' },
],
defaultType: 'chat',
hideType: false,
invalidateKeys: () => ['provider-models'],
})
const form = useForm({
validationSchema: formSchema,
initialValues: {
type: 'chat',
type: props.defaultType,
},
})
const selectedType = computed(() => form.values.type || 'chat')
const selectedType = computed(() => form.values.type || props.defaultType)
const open = inject<Ref<boolean>>('openModel', ref(false))
const title = inject<Ref<'edit' | 'title'>>('openModelTitle', ref('title'))
@@ -237,15 +260,19 @@ function onNameInput(e: Event) {
form.setFieldValue('name', (e.target as HTMLInputElement).value)
}
const { id } = defineProps<{ id: string }>()
const queryCache = useQueryCache()
function invalidateModelQueries() {
for (const key of props.invalidateKeys) {
queryCache.invalidateQueries({ key: [key] })
}
}
const { mutateAsync: createModel, isLoading: createLoading } = useMutation({
mutation: async (data: Record<string, unknown>) => {
const { data: result } = await postModels({ body: data as ModelsAddRequest, throwOnError: true })
return result
},
onSettled: () => queryCache.invalidateQueries({ key: ['provider-models'] }),
onSettled: invalidateModelQueries,
})
const { mutateAsync: updateModel, isLoading: updateLoading } = useMutation({
mutation: async ({ id, data }: { id: string; data: Record<string, unknown> }) => {
@@ -256,7 +283,7 @@ const { mutateAsync: updateModel, isLoading: updateLoading } = useMutation({
})
return result
},
onSettled: () => queryCache.invalidateQueries({ key: ['provider-models'] }),
onSettled: invalidateModelQueries,
})
const { mutateAsync: updateModelByLegacyModelID, isLoading: updateLegacyLoading } = useMutation({
mutation: async ({ modelId, data }: { modelId: string; data: Record<string, unknown> }) => {
@@ -267,7 +294,7 @@ const { mutateAsync: updateModelByLegacyModelID, isLoading: updateLegacyLoading
})
return result
},
onSettled: () => queryCache.invalidateQueries({ key: ['provider-models'] }),
onSettled: invalidateModelQueries,
})
const isLoading = computed(() => createLoading.value || updateLoading.value || updateLegacyLoading.value)
@@ -297,7 +324,7 @@ async function addModel() {
const payload: Record<string, unknown> = {
type,
model_id,
provider_id: id,
provider_id: props.id,
config,
}
@@ -348,7 +375,15 @@ watch(open, async () => {
selectedCompat.value = config?.compatibilities ?? []
userEditedName.value = !!(name && name !== model_id)
} else {
form.resetForm({ values: { type: 'chat', model_id: '', name: '', dimensions: undefined, context_window: undefined } })
form.resetForm({
values: {
type: props.defaultType,
model_id: '',
name: '',
dimensions: undefined,
context_window: undefined,
},
})
selectedCompat.value = []
userEditedName.value = false
}
@@ -52,7 +52,7 @@ import { computed, type Component } from 'vue'
import { storeToRefs } from 'pinia'
import { useRouter, useRoute } from 'vue-router'
import { useI18n } from 'vue-i18n'
import { ChevronLeft, Bot, Boxes, Globe, Brain, Volume2, Mail, AppWindow, ChartLine, User, Store, Info } from 'lucide-vue-next'
import { ChevronLeft, Bot, Boxes, Globe, Brain, Volume2, AudioLines, Mail, AppWindow, ChartLine, User, Store, Info } from 'lucide-vue-next'
import { useChatSelectionStore } from '@/store/chat-selection'
import {
Sidebar,
@@ -118,6 +118,11 @@ const navItems = computed<{ title: string; name: string; icon: Component }[]>(()
name: 'speech',
icon: Volume2,
},
{
title: t('sidebar.transcription'),
name: 'transcription',
icon: AudioLines,
},
{
title: t('sidebar.email'),
name: 'email',
+31 -1
View File
@@ -45,21 +45,41 @@ export const CLIENT_TYPE_META: Record<string, ClientTypeMeta> = {
label: 'OpenAI Speech',
hint: 'OpenAI /audio/speech compatible TTS',
},
'openai-transcription': {
value: 'openai-transcription',
label: 'OpenAI Transcription',
hint: 'OpenAI audio transcription',
},
'openrouter-speech': {
value: 'openrouter-speech',
label: 'OpenRouter Speech',
hint: 'OpenRouter audio modality TTS',
},
'openrouter-transcription': {
value: 'openrouter-transcription',
label: 'OpenRouter Transcription',
hint: 'OpenRouter transcription models',
},
'elevenlabs-speech': {
value: 'elevenlabs-speech',
label: 'ElevenLabs Speech',
hint: 'ElevenLabs text-to-speech',
},
'elevenlabs-transcription': {
value: 'elevenlabs-transcription',
label: 'ElevenLabs Transcription',
hint: 'ElevenLabs speech-to-text',
},
'deepgram-speech': {
value: 'deepgram-speech',
label: 'Deepgram Speech',
hint: 'Deepgram TTS',
},
'deepgram-transcription': {
value: 'deepgram-transcription',
label: 'Deepgram Transcription',
hint: 'Deepgram speech-to-text',
},
'minimax-speech': {
value: 'minimax-speech',
label: 'MiniMax Speech',
@@ -80,9 +100,19 @@ export const CLIENT_TYPE_META: Record<string, ClientTypeMeta> = {
label: 'Microsoft Speech',
hint: 'Azure Cognitive Services TTS',
},
'google-speech': {
value: 'google-speech',
label: 'Google Speech',
hint: 'Gemini speech transcription',
},
'google-transcription': {
value: 'google-transcription',
label: 'Google Transcription',
hint: 'Gemini speech transcription',
},
}
export const CLIENT_TYPE_LIST: ClientTypeMeta[] = Object.values(CLIENT_TYPE_META)
export const LLM_CLIENT_TYPE_LIST: ClientTypeMeta[] = CLIENT_TYPE_LIST
.filter(ct => !ct.value.endsWith('-speech'))
.filter(ct => !ct.value.endsWith('-speech') && !ct.value.endsWith('-transcription'))
+27
View File
@@ -63,6 +63,7 @@
"webSearch": "Web Search",
"memory": "Memory",
"speech": "Speech",
"transcription": "Transcription",
"email": "Email",
"settings": "Settings",
"profile": "Profile",
@@ -425,6 +426,9 @@
"noModels": "No models found. Click \"Import Models\" to discover available models or \"Add Model\" to create one manually.",
"noCapabilities": "No capabilities available for this model.",
"saveSuccess": "Speech configuration saved",
"synthesis": {
"models": "Synthesis Models"
},
"advanced": {
"title": "Advanced Settings",
"description": "These fields usually map to underlying vendor implementation details. Most users can keep the defaults."
@@ -448,6 +452,27 @@
"failed": "Synthesis failed"
}
},
"transcription": {
"title": "Transcription",
"emptyTitle": "No Transcription Providers",
"emptyDescription": "Add a transcription provider to enable speech-to-text for your bots",
"models": "Transcription Models",
"noModels": "No transcription models found. Import available models or keep the default template model.",
"noCapabilities": "No capabilities available for this model.",
"importModels": "Import Models",
"importSuccess": "Transcription models imported successfully",
"importFailed": "Failed to import transcription models",
"saveSuccess": "Transcription configuration saved",
"advanced": {
"title": "Advanced Settings",
"description": "These fields usually map to underlying vendor implementation details. Most users can keep the defaults."
},
"test": {
"title": "Test Transcription",
"run": "Transcribe",
"failed": "Transcription failed"
}
},
"email": {
"title": "Email",
"add": "Add Email",
@@ -920,6 +945,8 @@
"memoryHealthUnavailable": "Unavailable",
"ttsModel": "TTS Model",
"ttsModelPlaceholder": "Select TTS model",
"transcriptionModel": "Transcription Model",
"transcriptionModelPlaceholder": "Select transcription model",
"imageModel": "Image Generation Model",
"imageModelDescription": "Model used for the generate_image tool. Must support image-output compatibility.",
"imageModelPlaceholder": "Select image model (optional)",
+27
View File
@@ -64,6 +64,7 @@
"webSearch": "搜索",
"memory": "记忆",
"speech": "语音",
"transcription": "转写",
"email": "邮件",
"profile": "用户",
"home": "首页",
@@ -421,6 +422,9 @@
"noModels": "暂无模型,点击\"导入模型\"发现可用模型,或点击\"新建模型\"手动创建。",
"noCapabilities": "该模型暂无可用能力信息。",
"saveSuccess": "语音配置已保存",
"synthesis": {
"models": "语音合成模型"
},
"advanced": {
"title": "高级设置",
"description": "这些字段通常对应底层服务商实现细节。大多数情况下保留默认值即可。"
@@ -444,6 +448,27 @@
"failed": "合成失败"
}
},
"transcription": {
"title": "语音转写",
"emptyTitle": "暂无转写提供方",
"emptyDescription": "添加转写提供方以为 Bot 启用语音转文字功能",
"models": "语音识别模型",
"noModels": "暂无语音识别模型,可导入可用模型,或保留默认模板模型。",
"importModels": "导入模型",
"importSuccess": "识别模型导入成功",
"importFailed": "识别模型导入失败",
"saveSuccess": "转写配置已保存",
"noCapabilities": "该模型暂无可用能力信息。",
"advanced": {
"title": "高级设置",
"description": "这些字段通常对应底层服务商实现细节。大多数情况下保留默认值即可。"
},
"test": {
"title": "测试识别",
"run": "开始识别",
"failed": "识别失败"
}
},
"email": {
"title": "邮件提供方",
"add": "添加邮件提供方",
@@ -916,6 +941,8 @@
"memoryHealthUnavailable": "暂不可用",
"ttsModel": "语音合成模型",
"ttsModelPlaceholder": "选择语音合成模型",
"transcriptionModel": "转写模型",
"transcriptionModelPlaceholder": "选择语音转写模型",
"imageModel": "图片生成模型",
"imageModelDescription": "用于 generate_image 工具的模型,必须支持 image-output 兼容性。",
"imageModelPlaceholder": "选择图片模型(可选)",
@@ -187,6 +187,17 @@
/>
</div>
<!-- Transcription Model -->
<div class="space-y-2">
<Label>{{ $t('bots.settings.transcriptionModel') }}</Label>
<TtsModelSelect
v-model="form.transcription_model_id"
:models="transcriptionModels"
:providers="ttsProviders"
:placeholder="$t('bots.settings.transcriptionModelPlaceholder')"
/>
</div>
<!-- Image Generation Model -->
<div class="space-y-2">
<Label>{{ $t('bots.settings.imageModel') }}</Label>
@@ -356,7 +367,7 @@ import MemoryProviderSelect from './memory-provider-select.vue'
import TtsModelSelect from './tts-model-select.vue'
import BrowserContextSelect from './browser-context-select.vue'
import { useQuery, useMutation, useQueryCache } from '@pinia/colada'
import { getBotsById, putBotsById, getBotsByBotIdSettings, putBotsByBotIdSettings, deleteBotsById, getModels, getProviders, getSearchProviders, getMemoryProviders, getSpeechProviders, getSpeechModels, getBrowserContexts, getBotsByBotIdMemoryStatus, postBotsByBotIdMemoryRebuild } from '@memohai/sdk'
import { getBotsById, putBotsById, getBotsByBotIdSettings, putBotsByBotIdSettings, deleteBotsById, getModels, getProviders, getSearchProviders, getMemoryProviders, getSpeechProviders, getSpeechModels, getTranscriptionProviders, getTranscriptionModels, getBrowserContexts, getBotsByBotIdMemoryStatus, postBotsByBotIdMemoryRebuild } from '@memohai/sdk'
import type { SettingsSettings } from '@memohai/sdk'
import type { Ref } from 'vue'
import { resolveApiErrorMessage } from '@/utils/api-error'
@@ -440,6 +451,22 @@ const { data: ttsModelData } = useQuery({
},
})
const { data: transcriptionModelData } = useQuery({
key: ['transcription-models'],
query: async () => {
const { data } = await getTranscriptionModels({ throwOnError: true })
return data
},
})
const { data: transcriptionProviderData } = useQuery({
key: ['transcription-providers'],
query: async () => {
const { data } = await getTranscriptionProviders({ throwOnError: true })
return data
},
})
const { data: browserContextData } = useQuery({
key: ['all-browser-contexts'],
query: async () => {
@@ -494,7 +521,10 @@ const searchProviders = computed(() => (searchProviderData.value ?? []).filter((
const memoryProviders = computed(() => memoryProviderData.value ?? [])
const ttsProviders = computed(() => (ttsProviderData.value ?? []).filter((p) => p.enable !== false))
const enabledTtsProviderIds = computed(() => new Set(ttsProviders.value.map((p) => p.id)))
const transcriptionProviders = computed(() => (transcriptionProviderData.value ?? []).filter((p: Record<string, unknown>) => p.enable !== false))
const enabledTranscriptionProviderIds = computed(() => new Set(transcriptionProviders.value.map((p: Record<string, unknown>) => p.id as string)))
const ttsModels = computed(() => (ttsModelData.value ?? []).filter((m: Record<string, unknown>) => enabledTtsProviderIds.value.has(m.provider_id as string)))
const transcriptionModels = computed(() => (transcriptionModelData.value ?? []).filter((m: Record<string, unknown>) => enabledTranscriptionProviderIds.value.has(m.provider_id as string)))
const browserContexts = computed(() => browserContextData.value ?? [])
// ---- Form ----
@@ -505,6 +535,7 @@ const form = reactive({
search_provider_id: '',
memory_provider_id: '',
tts_model_id: '',
transcription_model_id: '',
browser_context_id: '',
timezone: '',
language: '',
@@ -644,6 +675,7 @@ watch(settings, (val) => {
form.search_provider_id = val.search_provider_id ?? ''
form.memory_provider_id = val.memory_provider_id ?? ''
form.tts_model_id = val.tts_model_id ?? ''
form.transcription_model_id = val.transcription_model_id ?? ''
form.browser_context_id = val.browser_context_id ?? ''
form.language = val.language ?? ''
form.timezone = val.timezone ?? ''
@@ -666,6 +698,7 @@ const hasSettingsChanges = computed(() => {
|| form.search_provider_id !== (s.search_provider_id ?? '')
|| form.memory_provider_id !== (s.memory_provider_id ?? '')
|| form.tts_model_id !== (s.tts_model_id ?? '')
|| form.transcription_model_id !== (s.transcription_model_id ?? '')
|| form.browser_context_id !== (s.browser_context_id ?? '')
|| form.language !== (s.language ?? '')
|| form.timezone !== (s.timezone ?? '')
@@ -85,7 +85,7 @@
v-else-if="advancedFields.length === 0"
class="text-xs text-muted-foreground"
>
{{ $t('speech.noCapabilities') }}
{{ mode === 'transcription' ? $t('transcription.noCapabilities') : $t('speech.noCapabilities') }}
</div>
<div
@@ -97,7 +97,7 @@
class="flex w-full items-center justify-between px-3 py-2 text-left text-xs font-medium"
@click="showAdvanced = !showAdvanced"
>
<span>{{ $t('speech.advanced.title') }}</span>
<span>{{ mode === 'transcription' ? $t('transcription.advanced.title') : $t('speech.advanced.title') }}</span>
<component
:is="showAdvanced ? ChevronUp : ChevronDown"
class="size-3 text-muted-foreground"
@@ -108,7 +108,7 @@
class="space-y-4 border-t border-border px-3 py-3"
>
<p class="text-xs text-muted-foreground">
{{ $t('speech.advanced.description') }}
{{ mode === 'transcription' ? $t('transcription.advanced.description') : $t('speech.advanced.description') }}
</p>
<section
v-for="field in advancedFields"
@@ -195,9 +195,12 @@
<div class="space-y-3">
<h4 class="text-xs font-medium">
{{ $t('speech.test.title') }}
{{ mode === 'transcription' ? $t('transcription.test.title') : $t('speech.test.title') }}
</h4>
<div class="relative">
<div
v-if="mode === 'synthesis'"
class="relative"
>
<Textarea
v-model="testText"
:placeholder="$t('speech.test.placeholder')"
@@ -209,17 +212,36 @@
{{ testText.length }}/{{ maxTestTextLen }}
</span>
</div>
<div
v-else
class="space-y-2"
>
<Input
type="file"
accept="audio/*"
@change="handleFileChange"
/>
<p
v-if="selectedFileName"
class="text-xs text-muted-foreground"
>
{{ selectedFileName }}
</p>
</div>
<div class="flex items-center gap-3">
<LoadingButton
type="button"
variant="outline"
size="sm"
:loading="testLoading"
:disabled="!testText.trim() || testText.length > maxTestTextLen"
:disabled="mode === 'synthesis' ? (!testText.trim() || testText.length > maxTestTextLen) : !selectedFile"
@click="handleTest"
>
<Play class="mr-1.5" />
{{ $t('speech.test.generate') }}
<Play
v-if="mode === 'synthesis'"
class="mr-1.5"
/>
{{ mode === 'transcription' ? $t('transcription.test.run') : $t('speech.test.generate') }}
</LoadingButton>
<span
v-if="testError"
@@ -229,7 +251,7 @@
</span>
</div>
<div
v-if="audioUrl"
v-if="mode === 'synthesis' && audioUrl"
class="rounded-md border border-border bg-muted/30 p-3"
>
<audio
@@ -239,6 +261,20 @@
class="w-full"
/>
</div>
<div
v-if="mode === 'transcription' && transcriptionText"
class="rounded-md border border-border bg-muted/30 p-3 space-y-2"
>
<p class="text-sm whitespace-pre-wrap wrap-break-word">
{{ transcriptionText }}
</p>
<p
v-if="transcriptionLanguage"
class="text-xs text-muted-foreground"
>
{{ transcriptionLanguage }}
</p>
</div>
</div>
<Separator class="my-3" />
@@ -296,7 +332,8 @@ const props = defineProps<{
modelName: string
config: Record<string, unknown>
schema: SpeechConfigSchema | null
onTest: (text: string, config: Record<string, unknown>) => Promise<Blob>
mode?: 'synthesis' | 'transcription'
onTest: (payload: string | File, config: Record<string, unknown>) => Promise<Blob | { text?: string, language?: string }>
}>()
const emit = defineEmits<{
@@ -309,11 +346,16 @@ const visibleSecrets = reactive<Record<string, boolean>>({})
const saving = ref(false)
const showAdvanced = ref(false)
const testText = ref('')
const selectedFile = ref<File | null>(null)
const selectedFileName = ref('')
const testLoading = ref(false)
const testError = ref('')
const audioUrl = ref('')
const transcriptionText = ref('')
const transcriptionLanguage = ref('')
const audioEl = ref<HTMLAudioElement>()
const maxTestTextLen = 500
const mode = computed(() => props.mode ?? 'synthesis')
const orderedFields = computed(() => {
const fields = props.schema?.fields ?? []
@@ -348,6 +390,11 @@ function revokeAudio() {
}
}
function resetTranscription() {
transcriptionText.value = ''
transcriptionLanguage.value = ''
}
onBeforeUnmount(revokeAudio)
async function handleSaveConfig() {
@@ -360,23 +407,39 @@ async function handleSaveConfig() {
}
async function handleTest() {
if (!testText.value.trim()) return
if (mode.value === 'synthesis' && !testText.value.trim()) return
if (mode.value === 'transcription' && !selectedFile.value) return
testLoading.value = true
testError.value = ''
revokeAudio()
resetTranscription()
try {
const blob = await props.onTest(testText.value, buildConfig())
const result = await props.onTest(mode.value === 'synthesis' ? testText.value : selectedFile.value as File, buildConfig())
audioUrl.value = URL.createObjectURL(blob)
await new Promise<void>((resolve) => setTimeout(resolve, 50))
audioEl.value?.play()
if (mode.value === 'synthesis') {
const blob = result as Blob
audioUrl.value = URL.createObjectURL(blob)
await new Promise<void>((resolve) => setTimeout(resolve, 50))
audioEl.value?.play()
} else {
const payload = result as { text?: string, language?: string }
transcriptionText.value = payload.text ?? ''
transcriptionLanguage.value = payload.language ?? ''
}
} catch (error: unknown) {
const msg = error instanceof Error ? error.message : t('speech.test.failed')
const msg = error instanceof Error ? error.message : t(mode.value === 'transcription' ? 'transcription.test.failed' : 'speech.test.failed')
testError.value = msg
toast.error(msg)
} finally {
testLoading.value = false
}
}
function handleFileChange(event: Event) {
const input = event.target as HTMLInputElement
const file = input.files?.[0] ?? null
selectedFile.value = file
selectedFileName.value = file?.name ?? ''
}
</script>
@@ -138,18 +138,29 @@
<section>
<div class="flex justify-between items-center mb-4">
<h3 class="text-xs font-medium">
{{ $t('speech.models') }}
{{ $t('speech.synthesis.models') }}
</h3>
<LoadingButton
<div
v-if="curProviderId"
type="button"
variant="outline"
size="sm"
:loading="importLoading"
@click="handleImportModels"
class="flex items-center gap-2"
>
{{ $t('speech.importModels') }}
</LoadingButton>
<LoadingButton
type="button"
variant="outline"
size="sm"
:loading="importLoading"
@click="handleImportModels"
>
{{ $t('speech.importModels') }}
</LoadingButton>
<CreateModel
:id="curProviderId"
default-type="speech"
hide-type
:type-options="speechTypeOptions"
:invalidate-keys="['speech-provider-models', 'speech-models']"
/>
</div>
</div>
<div
@@ -191,7 +202,7 @@
:model-name="model.model_id ?? ''"
:config="model.config || {}"
:schema="getModelSchema(model.model_id ?? '')"
:on-test="(text, cfg) => handleTestModel(model.id ?? '', text, cfg)"
:on-test="(text, cfg) => handleTestModel(model.id ?? '', text as string, cfg)"
@save="(cfg) => handleSaveModel(model.id ?? '', cfg)"
/>
</div>
@@ -218,10 +229,11 @@ import { computed, inject, reactive, ref, watch } from 'vue'
import { toast } from 'vue-sonner'
import { useI18n } from 'vue-i18n'
import { useQuery, useQueryCache } from '@pinia/colada'
import { getSpeechProvidersById, getSpeechProvidersByIdModels, getSpeechProvidersMeta, postSpeechProvidersByIdImportModels, putModelsById, putProvidersById } from '@memohai/sdk'
import { getSpeechProvidersById, getSpeechProvidersByIdModels, getSpeechProvidersMeta, postSpeechProvidersByIdImportModels, putProvidersById } from '@memohai/sdk'
import type { TtsSpeechModelResponse, TtsSpeechProviderResponse } from '@memohai/sdk'
import LoadingButton from '@/components/loading-button/index.vue'
import ProviderIcon from '@/components/provider-icon/index.vue'
import CreateModel from '@/components/create-model/index.vue'
interface SpeechFieldSchema {
key: string
@@ -256,6 +268,8 @@ interface SpeechProviderMeta {
config_schema?: SpeechConfigSchema
default_model?: string
models?: SpeechModelMeta[]
default_synthesis_model?: string
synthesis_models?: SpeechModelMeta[]
}
function getInitials(name: string | undefined) {
@@ -274,6 +288,9 @@ const enableLoading = ref(false)
const saveLoading = ref(false)
const importLoading = ref(false)
const queryCache = useQueryCache()
const speechTypeOptions = [
{ value: 'speech', label: 'Speech' },
]
const { data: providerDetail } = useQuery({
key: () => ['speech-provider-detail', curProviderId.value],
@@ -297,7 +314,7 @@ const { data: metaList } = useQuery({
const currentMeta = computed(() => {
if (!metaList.value || !curProvider.value?.client_type) return null
return (metaList.value as SpeechProviderMeta[]).find((m) => m.provider === curProvider.value?.client_type) ?? null
return (metaList.value as SpeechProviderMeta[]).find(m => m.provider === curProvider.value?.client_type) ?? null
})
const orderedProviderFields = computed(() => {
@@ -317,9 +334,7 @@ const { data: providerSpeechModels } = useQuery({
},
})
const providerModels = computed(() => {
return (providerSpeechModels.value as TtsSpeechModelResponse[] | undefined) ?? []
})
const providerModels = computed(() => ((providerSpeechModels.value as TtsSpeechModelResponse[] | undefined) ?? []))
watch(() => providerDetail.value, (provider) => {
providerName.value = provider?.name ?? curProvider.value?.name ?? ''
@@ -328,12 +343,11 @@ watch(() => providerDetail.value, (provider) => {
}, { immediate: true, deep: true })
function getModelMeta(modelID: string): SpeechModelMeta | null {
const models = currentMeta.value?.models ?? []
const models = currentMeta.value?.synthesis_models ?? currentMeta.value?.models ?? []
const exact = models.find(m => m.id === modelID)
if (exact) return exact
if (currentMeta.value?.default_model) {
return models.find(m => m.id === currentMeta.value?.default_model) ?? null
}
const defaultModel = currentMeta.value?.default_synthesis_model ?? currentMeta.value?.default_model
if (defaultModel) return models.find(m => m.id === defaultModel) ?? null
return models[0] ?? null
}
@@ -398,20 +412,23 @@ async function handleSaveProvider() {
}
async function handleSaveModel(modelId: string, config: Record<string, unknown>) {
const model = providerModels.value.find((item) => item.id === modelId)
const model = providerModels.value.find(item => item.id === modelId)
if (!model) return
try {
await putModelsById({
path: { id: modelId },
body: {
model_id: model.model_id,
name: model.name ?? model.model_id,
provider_id: model.provider_id,
type: 'speech',
config,
const apiBase = import.meta.env.VITE_API_URL?.trim() || '/api'
const token = localStorage.getItem('token')
const resp = await fetch(`${apiBase}/speech-models/${modelId}`, {
method: 'PUT',
headers: {
'Content-Type': 'application/json',
...(token ? { Authorization: `Bearer ${token}` } : {}),
},
throwOnError: true,
body: JSON.stringify({
name: model.name ?? model.model_id,
config,
}),
})
if (!resp.ok) throw new Error(await resp.text())
toast.success(t('speech.saveSuccess'))
queryCache.invalidateQueries({ key: ['speech-provider-models', curProviderId.value] })
queryCache.invalidateQueries({ key: ['speech-models'] })
+126
View File
@@ -0,0 +1,126 @@
<script setup lang="ts">
import { computed, ref, provide, watch } from 'vue'
import { useQuery } from '@pinia/colada'
import {
ScrollArea,
SidebarMenu,
SidebarMenuButton,
SidebarMenuItem,
Toggle,
Empty,
EmptyDescription,
EmptyHeader,
EmptyMedia,
EmptyTitle,
} from '@memohai/ui'
import { getTranscriptionProviders } from '@memohai/sdk'
import type { AudioSpeechProviderResponse } from '@memohai/sdk'
import ProviderSetting from './provider-setting.vue'
import { AudioLines } from 'lucide-vue-next'
import MasterDetailSidebarLayout from '@/components/master-detail-sidebar-layout/index.vue'
import ProviderIcon from '@/components/provider-icon/index.vue'
function getInitials(name: string | undefined) {
const label = name?.trim() ?? ''
return label ? label.slice(0, 2).toUpperCase() : '?'
}
const { data: providerData } = useQuery({
key: () => ['transcription-providers'],
query: async () => {
const { data } = await getTranscriptionProviders({ throwOnError: true })
return (data ?? []) as AudioSpeechProviderResponse[]
},
})
const curProvider = ref<AudioSpeechProviderResponse>()
provide('curTranscriptionProvider', curProvider)
const selectProvider = (name: string) => computed(() => curProvider.value?.name === name)
const filteredProviders = computed(() => {
if (!Array.isArray(providerData.value)) return []
return [...providerData.value].sort((a, b) => Number(b.enable !== false) - Number(a.enable !== false))
})
watch(filteredProviders, (list) => {
if (!list || list.length === 0) {
curProvider.value = { id: '' }
return
}
const currentId = curProvider.value?.id
if (currentId) {
const stillExists = list.find(p => p.id === currentId)
if (stillExists) {
curProvider.value = stillExists
return
}
}
curProvider.value = list[0]
}, { immediate: true })
</script>
<template>
<MasterDetailSidebarLayout>
<template #sidebar-content>
<SidebarMenu
v-for="item in filteredProviders"
:key="item.id"
>
<SidebarMenuItem>
<SidebarMenuButton
as-child
class="justify-start py-5! px-4"
>
<Toggle
:class="['py-4 border', curProvider?.id === item.id ? 'border-border' : 'border-transparent']"
:model-value="selectProvider(item.name ?? '').value"
@update:model-value="(isSelect) => { if (isSelect) curProvider = item }"
>
<span class="relative shrink-0">
<span class="flex size-7 items-center justify-center rounded-full bg-muted">
<ProviderIcon
v-if="item.icon"
:icon="item.icon"
size="1.25em"
/>
<span
v-else
class="text-xs font-medium text-muted-foreground"
>
{{ getInitials(item.name) }}
</span>
</span>
<span
v-if="item.enable !== false"
class="absolute -bottom-0.5 -right-0.5 size-2.5 rounded-full bg-green-500 ring-2 ring-background"
/>
</span>
<span class="truncate">{{ item.name }}</span>
</Toggle>
</SidebarMenuButton>
</SidebarMenuItem>
</SidebarMenu>
</template>
<template #detail>
<ScrollArea
v-if="curProvider?.id"
class="max-h-full h-full"
>
<ProviderSetting />
</ScrollArea>
<Empty
v-else
class="h-full flex justify-center items-center"
>
<EmptyHeader>
<EmptyMedia variant="icon">
<AudioLines />
</EmptyMedia>
</EmptyHeader>
<EmptyTitle>{{ $t('transcription.emptyTitle') }}</EmptyTitle>
<EmptyDescription>{{ $t('transcription.emptyDescription') }}</EmptyDescription>
</Empty>
</template>
</MasterDetailSidebarLayout>
</template>
@@ -0,0 +1,480 @@
<template>
<div class="p-4">
<section class="flex items-center gap-3">
<span class="flex size-10 shrink-0 items-center justify-center rounded-full bg-muted">
<ProviderIcon
v-if="curProvider?.icon"
:icon="curProvider.icon"
size="1.5em"
/>
<span
v-else
class="text-xs font-medium text-muted-foreground"
>
{{ getInitials(curProvider?.name) }}
</span>
</span>
<div class="min-w-0">
<h2 class="text-sm font-semibold truncate">
{{ curProvider?.name }}
</h2>
<p class="text-xs text-muted-foreground">
{{ currentMeta?.display_name ?? curProvider?.client_type }}
</p>
</div>
<div class="ml-auto flex items-center gap-2">
<span class="text-xs text-muted-foreground">
{{ $t('common.enable') }}
</span>
<Switch
:model-value="curProvider?.enable ?? false"
:disabled="!curProvider?.id || enableLoading"
@update:model-value="handleToggleEnable"
/>
</div>
</section>
<Separator class="mt-4 mb-6" />
<form
class="space-y-4"
@submit.prevent="handleSaveProvider"
>
<section class="space-y-2">
<Label for="transcription-provider-name">{{ $t('common.name') }}</Label>
<Input
id="transcription-provider-name"
v-model="providerName"
type="text"
:placeholder="$t('common.namePlaceholder')"
/>
</section>
<section
v-for="field in orderedProviderFields"
:key="field.key"
class="space-y-2"
>
<Label :for="field.type === 'bool' || field.type === 'enum' ? undefined : `transcription-provider-${field.key}`">
{{ field.title || field.key }}
</Label>
<p
v-if="field.description"
class="text-xs text-muted-foreground"
>
{{ field.description }}
</p>
<div
v-if="field.type === 'secret'"
class="relative"
>
<Input
:id="`transcription-provider-${field.key}`"
v-model="providerConfig[field.key] as string"
:type="visibleSecrets[field.key] ? 'text' : 'password'"
/>
<button
type="button"
class="absolute right-2 top-1/2 -translate-y-1/2 text-muted-foreground hover:text-foreground"
@click="visibleSecrets[field.key] = !visibleSecrets[field.key]"
>
<component
:is="visibleSecrets[field.key] ? EyeOff : Eye"
class="size-3.5"
/>
</button>
</div>
<Switch
v-else-if="field.type === 'bool'"
:model-value="!!providerConfig[field.key]"
@update:model-value="(val) => providerConfig[field.key] = !!val"
/>
<Input
v-else-if="field.type === 'number'"
:id="`transcription-provider-${field.key}`"
v-model.number="providerConfig[field.key] as number"
type="number"
/>
<Select
v-else-if="field.type === 'enum' && field.enum"
:model-value="String(providerConfig[field.key] ?? '')"
@update:model-value="(val) => providerConfig[field.key] = val"
>
<SelectTrigger>
<SelectValue :placeholder="field.title || field.key" />
</SelectTrigger>
<SelectContent>
<SelectItem
v-for="opt in field.enum"
:key="opt"
:value="opt"
>
{{ opt }}
</SelectItem>
</SelectContent>
</Select>
<Input
v-else
:id="`transcription-provider-${field.key}`"
v-model="providerConfig[field.key] as string"
type="text"
/>
</section>
<div class="flex justify-end">
<LoadingButton
type="submit"
:loading="saveLoading"
>
{{ $t('provider.saveChanges') }}
</LoadingButton>
</div>
</form>
<Separator class="mt-6 mb-6" />
<section>
<div class="flex justify-between items-center mb-4">
<h3 class="text-xs font-medium">
{{ $t('transcription.models') }}
</h3>
<div
v-if="curProviderId"
class="flex items-center gap-2"
>
<LoadingButton
type="button"
variant="outline"
size="sm"
:loading="importLoading"
@click="handleImportModels"
>
{{ $t('transcription.importModels') }}
</LoadingButton>
<CreateModel
:id="curProviderId"
default-type="transcription"
hide-type
:type-options="transcriptionTypeOptions"
:invalidate-keys="['transcription-provider-models', 'transcription-models']"
/>
</div>
</div>
<div
v-if="providerModels.length === 0"
class="text-xs text-muted-foreground py-4 text-center"
>
{{ $t('transcription.noModels') }}
</div>
<div
v-for="model in providerModels"
:key="model.id"
class="border border-border rounded-lg mb-4"
>
<button
type="button"
class="w-full flex items-center justify-between p-3 text-left hover:bg-accent/50 rounded-t-lg transition-colors"
@click="toggleModel(model.id ?? '')"
>
<div>
<span class="text-xs font-medium">{{ model.name || model.model_id }}</span>
<span
v-if="model.name"
class="text-xs text-muted-foreground ml-2"
>
{{ model.model_id }}
</span>
</div>
<component
:is="expandedModelId === model.id ? ChevronUp : ChevronDown"
class="size-3 text-muted-foreground"
/>
</button>
<div
v-if="expandedModelId === model.id"
class="px-3 pb-3 space-y-4 border-t border-border pt-3"
>
<ModelConfigEditor
:model-id="model.id ?? ''"
:model-name="model.model_id ?? ''"
:config="model.config || {}"
:schema="getModelSchema(model.model_id ?? '')"
mode="transcription"
:on-test="(file, cfg) => handleTestModel(model.id ?? '', file as File, cfg)"
@save="(cfg) => handleSaveModel(model.id ?? '', cfg)"
/>
</div>
</div>
</section>
</div>
</template>
<script setup lang="ts">
import { computed, inject, reactive, ref, watch } from 'vue'
import { useQuery, useQueryCache } from '@pinia/colada'
import { toast } from 'vue-sonner'
import { useI18n } from 'vue-i18n'
import {
getTranscriptionProvidersById,
getTranscriptionProvidersMeta,
getTranscriptionProvidersByIdModels,
postTranscriptionProvidersByIdImportModels,
postTranscriptionModelsByIdTest,
putProvidersById,
putTranscriptionModelsById,
} from '@memohai/sdk'
import type {
AudioProviderMetaResponse,
AudioSpeechProviderResponse,
AudioTestTranscriptionResponse,
AudioTranscriptionModelResponse,
} from '@memohai/sdk'
import { ChevronDown, ChevronUp, Eye, EyeOff } from 'lucide-vue-next'
import { Input, Label, Select, SelectContent, SelectItem, SelectTrigger, SelectValue, Separator, Switch } from '@memohai/ui'
import ProviderIcon from '@/components/provider-icon/index.vue'
import LoadingButton from '@/components/loading-button/index.vue'
import ModelConfigEditor from '@/pages/speech/components/model-config-editor.vue'
import CreateModel from '@/components/create-model/index.vue'
interface FieldSchema { key: string, type: string, title?: string, description?: string, enum?: string[], order?: number }
interface ConfigSchema { fields?: FieldSchema[] }
interface ModelMeta { id: string, name: string, config_schema?: ConfigSchema, capabilities?: { config_schema?: ConfigSchema } }
interface ProviderMeta {
provider: string
display_name?: string
config_schema?: ConfigSchema
default_transcription_model?: string
transcription_models?: ModelMeta[]
models?: ModelMeta[]
}
function getInitials(name: string | undefined) {
const label = name?.trim() ?? ''
return label ? label.slice(0, 2).toUpperCase() : '?'
}
function normalizeConfigSchema(schema?: AudioProviderMetaResponse['config_schema']): ConfigSchema | undefined {
if (!schema) return undefined
const fields: FieldSchema[] = []
for (const field of schema.fields ?? []) {
if (!field?.key || !field.type) continue
fields.push({
key: field.key,
type: field.type,
title: field.title,
description: field.description,
enum: field.enum,
order: field.order,
})
}
return { fields }
}
function normalizeModelMeta(model: NonNullable<AudioProviderMetaResponse['models']>[number]): ModelMeta | null {
if (!model?.id) return null
return {
id: model.id,
name: model.name ?? model.id,
config_schema: normalizeConfigSchema(model.config_schema),
capabilities: model.capabilities
? { config_schema: normalizeConfigSchema(model.capabilities.config_schema) }
: undefined,
}
}
function normalizeProviderMeta(meta: AudioProviderMetaResponse): ProviderMeta {
return {
provider: meta.provider ?? '',
display_name: meta.display_name,
config_schema: normalizeConfigSchema(meta.config_schema),
default_transcription_model: meta.default_transcription_model,
transcription_models: (meta.transcription_models ?? [])
.map(normalizeModelMeta)
.filter((model): model is ModelMeta => model !== null),
models: (meta.models ?? [])
.map(normalizeModelMeta)
.filter((model): model is ModelMeta => model !== null),
}
}
const { t } = useI18n()
const curProvider = inject('curTranscriptionProvider', ref<AudioSpeechProviderResponse>())
const curProviderId = computed(() => curProvider.value?.id)
const providerName = ref('')
const providerConfig = reactive<Record<string, unknown>>({})
const visibleSecrets = reactive<Record<string, boolean>>({})
const expandedModelId = ref('')
const enableLoading = ref(false)
const saveLoading = ref(false)
const importLoading = ref(false)
const queryCache = useQueryCache()
const transcriptionTypeOptions = [
{ value: 'transcription', label: 'Transcription' },
]
const { data: providerDetail } = useQuery({
key: () => ['transcription-provider-detail', curProviderId.value ?? ''],
query: async () => {
if (!curProviderId.value) return null
const { data } = await getTranscriptionProvidersById({
path: { id: curProviderId.value },
throwOnError: true,
})
return (data ?? null) as AudioSpeechProviderResponse | null
},
})
const { data: metaList } = useQuery({
key: () => ['transcription-providers-meta'],
query: async () => {
const { data } = await getTranscriptionProvidersMeta({ throwOnError: true })
return (data ?? []).map(normalizeProviderMeta)
},
})
const currentMeta = computed(() => (metaList.value ?? []).find(m => m.provider === curProvider.value?.client_type) ?? null)
const orderedProviderFields = computed(() => [...(currentMeta.value?.config_schema?.fields ?? [])].sort((a, b) => (a.order ?? 0) - (b.order ?? 0)))
const { data: providerModelData } = useQuery({
key: () => ['transcription-provider-models', curProviderId.value ?? ''],
query: async () => {
if (!curProviderId.value) return []
const { data } = await getTranscriptionProvidersByIdModels({
path: { id: curProviderId.value },
throwOnError: true,
})
return (data ?? []) as AudioTranscriptionModelResponse[]
},
})
const providerModels = computed(() => providerModelData.value ?? [])
watch(() => providerDetail.value, (provider) => {
providerName.value = provider?.name ?? curProvider.value?.name ?? ''
Object.keys(providerConfig).forEach((key) => delete providerConfig[key])
Object.assign(providerConfig, { ...(provider?.config ?? {}) })
}, { immediate: true, deep: true })
function getModelSchema(modelID: string): ConfigSchema | null {
const models = currentMeta.value?.transcription_models ?? currentMeta.value?.models ?? []
const exact = models.find(m => m.id === modelID)
const fallback = exact ?? models.find(m => m.id === currentMeta.value?.default_transcription_model) ?? models[0]
return fallback?.config_schema ?? fallback?.capabilities?.config_schema ?? null
}
function toggleModel(id: string) {
expandedModelId.value = expandedModelId.value === id ? '' : id
}
async function handleToggleEnable(value: boolean) {
if (!curProviderId.value || !curProvider.value?.client_type) return
const prev = curProvider.value.enable ?? false
curProvider.value = { ...curProvider.value, enable: value }
enableLoading.value = true
try {
await putProvidersById({
path: { id: curProviderId.value },
body: {
name: providerName.value.trim() || curProvider.value.name || '',
client_type: curProvider.value.client_type,
enable: value,
config: sanitizeConfig(providerConfig),
},
throwOnError: true,
})
queryCache.invalidateQueries({ key: ['transcription-providers'] })
queryCache.invalidateQueries({ key: ['transcription-provider-detail', curProviderId.value ?? ''] })
} catch {
curProvider.value = { ...curProvider.value, enable: prev }
toast.error(t('common.saveFailed'))
} finally {
enableLoading.value = false
}
}
async function handleSaveProvider() {
if (!curProviderId.value || !curProvider.value?.client_type) return
saveLoading.value = true
try {
await putProvidersById({
path: { id: curProviderId.value },
body: {
name: providerName.value.trim() || curProvider.value.name || '',
client_type: curProvider.value.client_type,
enable: curProvider.value.enable,
config: sanitizeConfig(providerConfig),
},
throwOnError: true,
})
toast.success(t('transcription.saveSuccess'))
queryCache.invalidateQueries({ key: ['transcription-providers'] })
queryCache.invalidateQueries({ key: ['transcription-provider-detail', curProviderId.value ?? ''] })
} catch {
toast.error(t('common.saveFailed'))
} finally {
saveLoading.value = false
}
}
async function handleSaveModel(modelId: string, config: Record<string, unknown>) {
const model = providerModels.value.find(item => item.id === modelId)
if (!model) return
try {
await putTranscriptionModelsById({
path: { id: modelId },
body: { name: model.name ?? model.model_id ?? modelId, config },
throwOnError: true,
})
toast.success(t('transcription.saveSuccess'))
queryCache.invalidateQueries({ key: ['transcription-provider-models', curProviderId.value ?? ''] })
queryCache.invalidateQueries({ key: ['transcription-models'] })
} catch {
toast.error(t('common.saveFailed'))
}
}
async function handleImportModels() {
if (!curProviderId.value) return
importLoading.value = true
try {
const { data } = await postTranscriptionProvidersByIdImportModels({
path: { id: curProviderId.value },
throwOnError: true,
})
const payload = (data ?? {}) as { created?: number, skipped?: number }
toast.success(t('transcription.importSuccess', {
created: payload.created ?? 0,
skipped: payload.skipped ?? 0,
}))
queryCache.invalidateQueries({ key: ['transcription-provider-models', curProviderId.value ?? ''] })
queryCache.invalidateQueries({ key: ['transcription-models'] })
queryCache.invalidateQueries({ key: ['transcription-providers-meta'] })
} catch {
toast.error(t('transcription.importFailed'))
} finally {
importLoading.value = false
}
}
async function handleTestModel(modelId: string, file: File, config: Record<string, unknown>) {
const { data } = await postTranscriptionModelsByIdTest({
path: { id: modelId },
body: {
file,
config: JSON.stringify(config),
},
throwOnError: true,
})
return (data ?? {}) as AudioTestTranscriptionResponse
}
function sanitizeConfig(input: Record<string, unknown>) {
const result: Record<string, unknown> = {}
for (const [key, value] of Object.entries(input)) {
if (value === '' || value == null) continue
result[key] = value
}
return result
}
</script>
+8
View File
@@ -89,6 +89,14 @@ const routes = [
breadcrumb: i18nRef('sidebar.speech'),
},
},
{
name: 'transcription',
path: 'transcription',
component: () => import('@/pages/transcription/index.vue'),
meta: {
breadcrumb: i18nRef('sidebar.transcription'),
},
},
{
name: 'email',
path: 'email',