Files
Memoh/internal/audio/config.go
T
Acbox c9dcfe287f Feat/speech support (#392)
* feat: expand speech provider support with new client types and configuration schema

* feat: add icon support for speech providers and update related configurations

* feat: add SVG support for Deepgram and Elevenlabs with Vue components

* feat: except *-speech client type in llm provider

* feat: enhance speech provider functionality with advanced settings and model import capabilities

* chore: remove go.mod replace

* feat: enhance speech provider functionality with advanced settings and model import capabilities

* chore: update go module dependencies

* feat: Ear and Mouth

* fix: separate ear/mouth page

* fix: separate audio domain and restore transcription templates

Move speech and transcription internals into the audio domain, restore template-driven transcription providers, and regenerate Swagger/SDK so the frontend can stop hand-calling /transcription-* APIs.

---------

Co-authored-by: aki <arisu@ieee.org>
2026-04-22 00:09:46 +08:00

71 lines
2.4 KiB
Go

package audio
// VoiceConfig is kept for backward compatibility with the legacy Edge adapter tests.
type VoiceConfig struct {
ID string `json:"id"`
Lang string `json:"lang"`
}
// AudioConfig is kept for backward compatibility with the legacy Edge adapter tests.
type AudioConfig struct {
Format string `json:"format"`
SampleRate int `json:"sample_rate"`
Speed float64 `json:"speed"`
Pitch float64 `json:"pitch"`
Voice VoiceConfig `json:"voice"`
}
func (AudioConfig) Validate() error { return nil }
// FieldSchema describes a single dynamic speech config field.
type FieldSchema struct {
Key string `json:"key"`
Type string `json:"type"`
Title string `json:"title,omitempty"`
Description string `json:"description,omitempty"`
Required bool `json:"required,omitempty"`
Advanced bool `json:"advanced,omitempty"`
Enum []string `json:"enum,omitempty"`
Example any `json:"example,omitempty"`
Order int `json:"order"`
}
type ConfigSchema struct {
Fields []FieldSchema `json:"fields"`
}
// ParamConstraint describes valid values for a numeric parameter.
// If Options is non-empty, only those discrete values are allowed.
type ParamConstraint struct {
Options []float64 `json:"options,omitempty"`
Min float64 `json:"min,omitempty"`
Max float64 `json:"max,omitempty"`
Default float64 `json:"default"`
}
// ModelCapabilities exposes optional UX hints for speech config forms.
type ModelCapabilities struct {
ConfigSchema ConfigSchema `json:"config_schema,omitempty"`
Voices []VoiceInfo `json:"voices,omitempty"`
Formats []string `json:"formats,omitempty"`
Speed *ParamConstraint `json:"speed,omitempty"`
Pitch *ParamConstraint `json:"pitch,omitempty"`
Metadata map[string]string `json:"metadata,omitempty"`
}
// ModelInfo describes a single speech model exposed by a provider definition.
type ModelInfo struct {
ID string `json:"id"`
Name string `json:"name"`
Description string `json:"description,omitempty"`
TemplateOnly bool `json:"template_only,omitempty"`
ConfigSchema ConfigSchema `json:"config_schema,omitempty"`
Capabilities ModelCapabilities `json:"capabilities"`
}
type VoiceInfo struct {
ID string `json:"id"`
Name string `json:"name"`
Lang string `json:"lang"`
}