feat: transcription support (#394)

* feat: expand speech provider support with new client types and configuration schema

* feat: add icon support for speech providers and update related configurations

* feat: add SVG support for Deepgram and Elevenlabs with Vue components

* feat: except *-speech client type in llm provider

* feat: enhance speech provider functionality with advanced settings and model import capabilities

* chore: remove go.mod replace

* feat: enhance speech provider functionality with advanced settings and model import capabilities

* chore: update go module dependencies

* feat: Ear and Mouth

* fix: separate ear/mouth page

* fix: separate audio domain and restore transcription templates

Move speech and transcription internals into the audio domain, restore template-driven transcription providers, and regenerate Swagger/SDK so the frontend can stop hand-calling /transcription-* APIs.

---------

Co-authored-by: aki <arisu@ieee.org>
This commit is contained in:
Acbox
2026-04-22 00:12:01 +08:00
committed by GitHub
parent fd8f1ec078
commit 925fdee478
70 changed files with 6612 additions and 1692 deletions
+15 -7
View File
@@ -126,9 +126,9 @@ func (s *Service) List(ctx context.Context) ([]GetResponse, error) {
return s.convertToGetResponseList(dbModels), nil
}
// ListByType returns models filtered by type (chat, embedding, or speech).
// ListByType returns models filtered by type.
func (s *Service) ListByType(ctx context.Context, modelType ModelType) ([]GetResponse, error) {
if modelType != ModelTypeChat && modelType != ModelTypeEmbedding && modelType != ModelTypeSpeech {
if modelType != ModelTypeChat && modelType != ModelTypeEmbedding && modelType != ModelTypeSpeech && modelType != ModelTypeTranscription {
return nil, fmt.Errorf("invalid model type: %s", modelType)
}
@@ -165,7 +165,7 @@ func (s *Service) ListEnabled(ctx context.Context) ([]GetResponse, error) {
// ListEnabledByType returns models from enabled providers filtered by type.
func (s *Service) ListEnabledByType(ctx context.Context, modelType ModelType) ([]GetResponse, error) {
if modelType != ModelTypeChat && modelType != ModelTypeEmbedding && modelType != ModelTypeSpeech {
if modelType != ModelTypeChat && modelType != ModelTypeEmbedding && modelType != ModelTypeSpeech && modelType != ModelTypeTranscription {
return nil, fmt.Errorf("invalid model type: %s", modelType)
}
dbModels, err := s.queries.ListEnabledModelsByType(ctx, string(modelType))
@@ -206,7 +206,7 @@ func (s *Service) ListByProviderID(ctx context.Context, providerID string) ([]Ge
// ListByProviderIDAndType returns models filtered by provider ID and type.
func (s *Service) ListByProviderIDAndType(ctx context.Context, providerID string, modelType ModelType) ([]GetResponse, error) {
if modelType != ModelTypeChat && modelType != ModelTypeEmbedding && modelType != ModelTypeSpeech {
if modelType != ModelTypeChat && modelType != ModelTypeEmbedding && modelType != ModelTypeSpeech && modelType != ModelTypeTranscription {
return nil, fmt.Errorf("invalid model type: %s", modelType)
}
if strings.TrimSpace(providerID) == "" {
@@ -361,7 +361,7 @@ func (s *Service) Count(ctx context.Context) (int64, error) {
// CountByType returns the number of models of a specific type.
func (s *Service) CountByType(ctx context.Context, modelType ModelType) (int64, error) {
if modelType != ModelTypeChat && modelType != ModelTypeEmbedding && modelType != ModelTypeSpeech {
if modelType != ModelTypeChat && modelType != ModelTypeEmbedding && modelType != ModelTypeSpeech && modelType != ModelTypeTranscription {
return 0, fmt.Errorf("invalid model type: %s", modelType)
}
@@ -432,13 +432,19 @@ func IsValidClientType(clientType ClientType) bool {
ClientTypeGitHubCopilot,
ClientTypeEdgeSpeech,
ClientTypeOpenAISpeech,
ClientTypeOpenAITranscription,
ClientTypeOpenRouterSpeech,
ClientTypeOpenRouterTranscription,
ClientTypeElevenLabsSpeech,
ClientTypeElevenLabsTranscription,
ClientTypeDeepgramSpeech,
ClientTypeDeepgramTranscription,
ClientTypeMiniMaxSpeech,
ClientTypeVolcengineSpeech,
ClientTypeAlibabaSpeech,
ClientTypeMicrosoftSpeech:
ClientTypeMicrosoftSpeech,
ClientTypeGoogleSpeech,
ClientTypeGoogleTranscription:
return true
default:
return false
@@ -448,7 +454,9 @@ func IsValidClientType(clientType ClientType) bool {
// IsLLMClientType returns true if the client type belongs to the LLM domain
// (chat/embedding), excluding speech-only types (any type ending in "-speech").
func IsLLMClientType(clientType ClientType) bool {
return IsValidClientType(clientType) && !strings.HasSuffix(string(clientType), "-speech")
return IsValidClientType(clientType) &&
!strings.HasSuffix(string(clientType), "-speech") &&
!strings.HasSuffix(string(clientType), "-transcription")
}
// SelectMemoryModel selects a chat model for memory operations.
+26 -19
View File
@@ -9,29 +9,36 @@ import (
type ModelType string
const (
ModelTypeChat ModelType = "chat"
ModelTypeEmbedding ModelType = "embedding"
ModelTypeSpeech ModelType = "speech"
ModelTypeChat ModelType = "chat"
ModelTypeEmbedding ModelType = "embedding"
ModelTypeSpeech ModelType = "speech"
ModelTypeTranscription ModelType = "transcription"
)
type ClientType string
const (
ClientTypeOpenAIResponses ClientType = "openai-responses"
ClientTypeOpenAICompletions ClientType = "openai-completions"
ClientTypeAnthropicMessages ClientType = "anthropic-messages"
ClientTypeGoogleGenerativeAI ClientType = "google-generative-ai"
ClientTypeOpenAICodex ClientType = "openai-codex"
ClientTypeGitHubCopilot ClientType = "github-copilot"
ClientTypeEdgeSpeech ClientType = "edge-speech"
ClientTypeOpenAISpeech ClientType = "openai-speech"
ClientTypeOpenRouterSpeech ClientType = "openrouter-speech"
ClientTypeElevenLabsSpeech ClientType = "elevenlabs-speech"
ClientTypeDeepgramSpeech ClientType = "deepgram-speech"
ClientTypeMiniMaxSpeech ClientType = "minimax-speech"
ClientTypeVolcengineSpeech ClientType = "volcengine-speech"
ClientTypeAlibabaSpeech ClientType = "alibabacloud-speech"
ClientTypeMicrosoftSpeech ClientType = "microsoft-speech"
ClientTypeOpenAIResponses ClientType = "openai-responses"
ClientTypeOpenAICompletions ClientType = "openai-completions"
ClientTypeAnthropicMessages ClientType = "anthropic-messages"
ClientTypeGoogleGenerativeAI ClientType = "google-generative-ai"
ClientTypeOpenAICodex ClientType = "openai-codex"
ClientTypeGitHubCopilot ClientType = "github-copilot"
ClientTypeEdgeSpeech ClientType = "edge-speech"
ClientTypeOpenAISpeech ClientType = "openai-speech"
ClientTypeOpenAITranscription ClientType = "openai-transcription"
ClientTypeOpenRouterSpeech ClientType = "openrouter-speech"
ClientTypeOpenRouterTranscription ClientType = "openrouter-transcription"
ClientTypeElevenLabsSpeech ClientType = "elevenlabs-speech"
ClientTypeElevenLabsTranscription ClientType = "elevenlabs-transcription"
ClientTypeDeepgramSpeech ClientType = "deepgram-speech"
ClientTypeDeepgramTranscription ClientType = "deepgram-transcription"
ClientTypeMiniMaxSpeech ClientType = "minimax-speech"
ClientTypeVolcengineSpeech ClientType = "volcengine-speech"
ClientTypeAlibabaSpeech ClientType = "alibabacloud-speech"
ClientTypeMicrosoftSpeech ClientType = "microsoft-speech"
ClientTypeGoogleSpeech ClientType = "google-speech"
ClientTypeGoogleTranscription ClientType = "google-transcription"
)
const (
@@ -88,7 +95,7 @@ func (m *Model) Validate() error {
if _, err := uuid.Parse(m.ProviderID); err != nil {
return errors.New("provider ID must be a valid UUID")
}
if m.Type != ModelTypeChat && m.Type != ModelTypeEmbedding && m.Type != ModelTypeSpeech {
if m.Type != ModelTypeChat && m.Type != ModelTypeEmbedding && m.Type != ModelTypeSpeech && m.Type != ModelTypeTranscription {
return errors.New("invalid model type")
}
if m.Type == ModelTypeEmbedding {