mirror of
https://github.com/memohai/Memoh.git
synced 2026-04-25 07:00:48 +09:00
c9dcfe287f
* feat: expand speech provider support with new client types and configuration schema * feat: add icon support for speech providers and update related configurations * feat: add SVG support for Deepgram and Elevenlabs with Vue components * feat: except *-speech client type in llm provider * feat: enhance speech provider functionality with advanced settings and model import capabilities * chore: remove go.mod replace * feat: enhance speech provider functionality with advanced settings and model import capabilities * chore: update go module dependencies * feat: Ear and Mouth * fix: separate ear/mouth page * fix: separate audio domain and restore transcription templates Move speech and transcription internals into the audio domain, restore template-driven transcription providers, and regenerate Swagger/SDK so the frontend can stop hand-calling /transcription-* APIs. --------- Co-authored-by: aki <arisu@ieee.org>
103 lines
4.2 KiB
Go
103 lines
4.2 KiB
Go
package audio
|
|
|
|
import "time"
|
|
|
|
// ProviderMetaResponse exposes adapter metadata (from the registry, not DB).
|
|
type ProviderMetaResponse struct {
|
|
Provider string `json:"provider"`
|
|
DisplayName string `json:"display_name"`
|
|
Description string `json:"description"`
|
|
ConfigSchema ConfigSchema `json:"config_schema,omitempty"`
|
|
DefaultModel string `json:"default_model,omitempty"`
|
|
Models []ModelInfo `json:"models,omitempty"`
|
|
DefaultSynthesisModel string `json:"default_synthesis_model,omitempty"`
|
|
SynthesisModels []ModelInfo `json:"synthesis_models,omitempty"`
|
|
SupportsSynthesisList bool `json:"supports_synthesis_list,omitempty"`
|
|
DefaultTranscriptionModel string `json:"default_transcription_model,omitempty"`
|
|
TranscriptionModels []ModelInfo `json:"transcription_models,omitempty"`
|
|
SupportsTranscriptionList bool `json:"supports_transcription_list,omitempty"`
|
|
}
|
|
|
|
// SpeechProviderResponse represents a speech-capable provider from the unified providers table.
|
|
type SpeechProviderResponse struct {
|
|
ID string `json:"id"`
|
|
Name string `json:"name"`
|
|
ClientType string `json:"client_type"`
|
|
Icon string `json:"icon,omitempty"`
|
|
Enable bool `json:"enable"`
|
|
Config map[string]any `json:"config,omitempty"`
|
|
CreatedAt time.Time `json:"created_at"`
|
|
UpdatedAt time.Time `json:"updated_at"`
|
|
}
|
|
|
|
// SpeechModelResponse represents a speech model from the unified models table.
|
|
type SpeechModelResponse struct {
|
|
ID string `json:"id"`
|
|
ModelID string `json:"model_id"`
|
|
Name string `json:"name"`
|
|
ProviderID string `json:"provider_id"`
|
|
ProviderType string `json:"provider_type,omitempty"`
|
|
Config map[string]any `json:"config,omitempty"`
|
|
CreatedAt time.Time `json:"created_at"`
|
|
UpdatedAt time.Time `json:"updated_at"`
|
|
}
|
|
|
|
// TranscriptionModelResponse represents a transcription model from the unified models table.
|
|
type TranscriptionModelResponse struct {
|
|
ID string `json:"id"`
|
|
ModelID string `json:"model_id"`
|
|
Name string `json:"name"`
|
|
ProviderID string `json:"provider_id"`
|
|
ProviderType string `json:"provider_type,omitempty"`
|
|
Config map[string]any `json:"config,omitempty"`
|
|
CreatedAt time.Time `json:"created_at"`
|
|
UpdatedAt time.Time `json:"updated_at"`
|
|
}
|
|
|
|
// UpdateSpeechProviderRequest is used for updating a speech provider.
|
|
type UpdateSpeechProviderRequest struct {
|
|
Name *string `json:"name,omitempty"`
|
|
Enable *bool `json:"enable,omitempty"`
|
|
}
|
|
|
|
// UpdateSpeechModelRequest is used for updating a speech model.
|
|
type UpdateSpeechModelRequest struct {
|
|
Name *string `json:"name,omitempty"`
|
|
Config map[string]any `json:"config,omitempty"`
|
|
}
|
|
|
|
// TestSynthesizeRequest represents a text-to-speech test request.
|
|
type TestSynthesizeRequest struct {
|
|
Text string `json:"text"`
|
|
Config map[string]any `json:"config,omitempty"`
|
|
}
|
|
|
|
// TestTranscriptionRequest represents an audio-to-text test request.
|
|
type TestTranscriptionRequest struct {
|
|
Config map[string]any `json:"config,omitempty"`
|
|
}
|
|
|
|
// TestTranscriptionResponse represents the result of a transcription test.
|
|
type TestTranscriptionResponse struct {
|
|
Text string `json:"text"`
|
|
Language string `json:"language,omitempty"`
|
|
DurationSeconds float64 `json:"duration_seconds,omitempty"`
|
|
Words []TranscriptionWord `json:"words,omitempty"`
|
|
Metadata map[string]any `json:"metadata,omitempty"`
|
|
}
|
|
|
|
// TranscriptionWord represents a single word alignment from a transcription result.
|
|
type TranscriptionWord struct {
|
|
Text string `json:"text"`
|
|
Start float64 `json:"start,omitempty"`
|
|
End float64 `json:"end,omitempty"`
|
|
SpeakerID string `json:"speaker_id,omitempty"`
|
|
}
|
|
|
|
// ImportModelsResponse represents the response for importing speech models.
|
|
type ImportModelsResponse struct {
|
|
Created int `json:"created"`
|
|
Skipped int `json:"skipped"`
|
|
Models []string `json:"models"`
|
|
}
|