Feat/speech support (#392)

* feat: expand speech provider support with new client types and configuration schema

* feat: add icon support for speech providers and update related configurations

* feat: add SVG support for Deepgram and Elevenlabs with Vue components

* feat: except *-speech client type in llm provider

* feat: enhance speech provider functionality with advanced settings and model import capabilities

* chore: remove go.mod replace

* feat: enhance speech provider functionality with advanced settings and model import capabilities

* chore: update go module dependencies

* feat: Ear and Mouth

* fix: separate ear/mouth page

* fix: separate audio domain and restore transcription templates

Move speech and transcription internals into the audio domain, restore template-driven transcription providers, and regenerate Swagger/SDK so the frontend can stop hand-calling /transcription-* APIs.

---------

Co-authored-by: aki <arisu@ieee.org>
This commit is contained in:
Acbox
2026-04-22 00:09:46 +08:00
committed by GitHub
parent 8d78925a23
commit c9dcfe287f
70 changed files with 6612 additions and 1692 deletions
+15 -7
View File
@@ -126,9 +126,9 @@ func (s *Service) List(ctx context.Context) ([]GetResponse, error) {
return s.convertToGetResponseList(dbModels), nil
}
// ListByType returns models filtered by type (chat, embedding, or speech).
// ListByType returns models filtered by type.
func (s *Service) ListByType(ctx context.Context, modelType ModelType) ([]GetResponse, error) {
if modelType != ModelTypeChat && modelType != ModelTypeEmbedding && modelType != ModelTypeSpeech {
if modelType != ModelTypeChat && modelType != ModelTypeEmbedding && modelType != ModelTypeSpeech && modelType != ModelTypeTranscription {
return nil, fmt.Errorf("invalid model type: %s", modelType)
}
@@ -165,7 +165,7 @@ func (s *Service) ListEnabled(ctx context.Context) ([]GetResponse, error) {
// ListEnabledByType returns models from enabled providers filtered by type.
func (s *Service) ListEnabledByType(ctx context.Context, modelType ModelType) ([]GetResponse, error) {
if modelType != ModelTypeChat && modelType != ModelTypeEmbedding && modelType != ModelTypeSpeech {
if modelType != ModelTypeChat && modelType != ModelTypeEmbedding && modelType != ModelTypeSpeech && modelType != ModelTypeTranscription {
return nil, fmt.Errorf("invalid model type: %s", modelType)
}
dbModels, err := s.queries.ListEnabledModelsByType(ctx, string(modelType))
@@ -206,7 +206,7 @@ func (s *Service) ListByProviderID(ctx context.Context, providerID string) ([]Ge
// ListByProviderIDAndType returns models filtered by provider ID and type.
func (s *Service) ListByProviderIDAndType(ctx context.Context, providerID string, modelType ModelType) ([]GetResponse, error) {
if modelType != ModelTypeChat && modelType != ModelTypeEmbedding && modelType != ModelTypeSpeech {
if modelType != ModelTypeChat && modelType != ModelTypeEmbedding && modelType != ModelTypeSpeech && modelType != ModelTypeTranscription {
return nil, fmt.Errorf("invalid model type: %s", modelType)
}
if strings.TrimSpace(providerID) == "" {
@@ -361,7 +361,7 @@ func (s *Service) Count(ctx context.Context) (int64, error) {
// CountByType returns the number of models of a specific type.
func (s *Service) CountByType(ctx context.Context, modelType ModelType) (int64, error) {
if modelType != ModelTypeChat && modelType != ModelTypeEmbedding && modelType != ModelTypeSpeech {
if modelType != ModelTypeChat && modelType != ModelTypeEmbedding && modelType != ModelTypeSpeech && modelType != ModelTypeTranscription {
return 0, fmt.Errorf("invalid model type: %s", modelType)
}
@@ -432,13 +432,19 @@ func IsValidClientType(clientType ClientType) bool {
ClientTypeGitHubCopilot,
ClientTypeEdgeSpeech,
ClientTypeOpenAISpeech,
ClientTypeOpenAITranscription,
ClientTypeOpenRouterSpeech,
ClientTypeOpenRouterTranscription,
ClientTypeElevenLabsSpeech,
ClientTypeElevenLabsTranscription,
ClientTypeDeepgramSpeech,
ClientTypeDeepgramTranscription,
ClientTypeMiniMaxSpeech,
ClientTypeVolcengineSpeech,
ClientTypeAlibabaSpeech,
ClientTypeMicrosoftSpeech:
ClientTypeMicrosoftSpeech,
ClientTypeGoogleSpeech,
ClientTypeGoogleTranscription:
return true
default:
return false
@@ -448,7 +454,9 @@ func IsValidClientType(clientType ClientType) bool {
// IsLLMClientType returns true if the client type belongs to the LLM domain
// (chat/embedding), excluding speech-only types (any type ending in "-speech").
func IsLLMClientType(clientType ClientType) bool {
return IsValidClientType(clientType) && !strings.HasSuffix(string(clientType), "-speech")
return IsValidClientType(clientType) &&
!strings.HasSuffix(string(clientType), "-speech") &&
!strings.HasSuffix(string(clientType), "-transcription")
}
// SelectMemoryModel selects a chat model for memory operations.