mirror of
https://github.com/memohai/Memoh.git
synced 2026-04-25 07:00:48 +09:00
feat: transcription support (#394)
* feat: expand speech provider support with new client types and configuration schema * feat: add icon support for speech providers and update related configurations * feat: add SVG support for Deepgram and Elevenlabs with Vue components * feat: except *-speech client type in llm provider * feat: enhance speech provider functionality with advanced settings and model import capabilities * chore: remove go.mod replace * feat: enhance speech provider functionality with advanced settings and model import capabilities * chore: update go module dependencies * feat: Ear and Mouth * fix: separate ear/mouth page * fix: separate audio domain and restore transcription templates Move speech and transcription internals into the audio domain, restore template-driven transcription providers, and regenerate Swagger/SDK so the frontend can stop hand-calling /transcription-* APIs. --------- Co-authored-by: aki <arisu@ieee.org>
This commit is contained in:
@@ -126,9 +126,9 @@ func (s *Service) List(ctx context.Context) ([]GetResponse, error) {
|
||||
return s.convertToGetResponseList(dbModels), nil
|
||||
}
|
||||
|
||||
// ListByType returns models filtered by type (chat, embedding, or speech).
|
||||
// ListByType returns models filtered by type.
|
||||
func (s *Service) ListByType(ctx context.Context, modelType ModelType) ([]GetResponse, error) {
|
||||
if modelType != ModelTypeChat && modelType != ModelTypeEmbedding && modelType != ModelTypeSpeech {
|
||||
if modelType != ModelTypeChat && modelType != ModelTypeEmbedding && modelType != ModelTypeSpeech && modelType != ModelTypeTranscription {
|
||||
return nil, fmt.Errorf("invalid model type: %s", modelType)
|
||||
}
|
||||
|
||||
@@ -165,7 +165,7 @@ func (s *Service) ListEnabled(ctx context.Context) ([]GetResponse, error) {
|
||||
|
||||
// ListEnabledByType returns models from enabled providers filtered by type.
|
||||
func (s *Service) ListEnabledByType(ctx context.Context, modelType ModelType) ([]GetResponse, error) {
|
||||
if modelType != ModelTypeChat && modelType != ModelTypeEmbedding && modelType != ModelTypeSpeech {
|
||||
if modelType != ModelTypeChat && modelType != ModelTypeEmbedding && modelType != ModelTypeSpeech && modelType != ModelTypeTranscription {
|
||||
return nil, fmt.Errorf("invalid model type: %s", modelType)
|
||||
}
|
||||
dbModels, err := s.queries.ListEnabledModelsByType(ctx, string(modelType))
|
||||
@@ -206,7 +206,7 @@ func (s *Service) ListByProviderID(ctx context.Context, providerID string) ([]Ge
|
||||
|
||||
// ListByProviderIDAndType returns models filtered by provider ID and type.
|
||||
func (s *Service) ListByProviderIDAndType(ctx context.Context, providerID string, modelType ModelType) ([]GetResponse, error) {
|
||||
if modelType != ModelTypeChat && modelType != ModelTypeEmbedding && modelType != ModelTypeSpeech {
|
||||
if modelType != ModelTypeChat && modelType != ModelTypeEmbedding && modelType != ModelTypeSpeech && modelType != ModelTypeTranscription {
|
||||
return nil, fmt.Errorf("invalid model type: %s", modelType)
|
||||
}
|
||||
if strings.TrimSpace(providerID) == "" {
|
||||
@@ -361,7 +361,7 @@ func (s *Service) Count(ctx context.Context) (int64, error) {
|
||||
|
||||
// CountByType returns the number of models of a specific type.
|
||||
func (s *Service) CountByType(ctx context.Context, modelType ModelType) (int64, error) {
|
||||
if modelType != ModelTypeChat && modelType != ModelTypeEmbedding && modelType != ModelTypeSpeech {
|
||||
if modelType != ModelTypeChat && modelType != ModelTypeEmbedding && modelType != ModelTypeSpeech && modelType != ModelTypeTranscription {
|
||||
return 0, fmt.Errorf("invalid model type: %s", modelType)
|
||||
}
|
||||
|
||||
@@ -432,13 +432,19 @@ func IsValidClientType(clientType ClientType) bool {
|
||||
ClientTypeGitHubCopilot,
|
||||
ClientTypeEdgeSpeech,
|
||||
ClientTypeOpenAISpeech,
|
||||
ClientTypeOpenAITranscription,
|
||||
ClientTypeOpenRouterSpeech,
|
||||
ClientTypeOpenRouterTranscription,
|
||||
ClientTypeElevenLabsSpeech,
|
||||
ClientTypeElevenLabsTranscription,
|
||||
ClientTypeDeepgramSpeech,
|
||||
ClientTypeDeepgramTranscription,
|
||||
ClientTypeMiniMaxSpeech,
|
||||
ClientTypeVolcengineSpeech,
|
||||
ClientTypeAlibabaSpeech,
|
||||
ClientTypeMicrosoftSpeech:
|
||||
ClientTypeMicrosoftSpeech,
|
||||
ClientTypeGoogleSpeech,
|
||||
ClientTypeGoogleTranscription:
|
||||
return true
|
||||
default:
|
||||
return false
|
||||
@@ -448,7 +454,9 @@ func IsValidClientType(clientType ClientType) bool {
|
||||
// IsLLMClientType returns true if the client type belongs to the LLM domain
|
||||
// (chat/embedding), excluding speech-only types (any type ending in "-speech").
|
||||
func IsLLMClientType(clientType ClientType) bool {
|
||||
return IsValidClientType(clientType) && !strings.HasSuffix(string(clientType), "-speech")
|
||||
return IsValidClientType(clientType) &&
|
||||
!strings.HasSuffix(string(clientType), "-speech") &&
|
||||
!strings.HasSuffix(string(clientType), "-transcription")
|
||||
}
|
||||
|
||||
// SelectMemoryModel selects a chat model for memory operations.
|
||||
|
||||
+26
-19
@@ -9,29 +9,36 @@ import (
|
||||
type ModelType string
|
||||
|
||||
const (
|
||||
ModelTypeChat ModelType = "chat"
|
||||
ModelTypeEmbedding ModelType = "embedding"
|
||||
ModelTypeSpeech ModelType = "speech"
|
||||
ModelTypeChat ModelType = "chat"
|
||||
ModelTypeEmbedding ModelType = "embedding"
|
||||
ModelTypeSpeech ModelType = "speech"
|
||||
ModelTypeTranscription ModelType = "transcription"
|
||||
)
|
||||
|
||||
type ClientType string
|
||||
|
||||
const (
|
||||
ClientTypeOpenAIResponses ClientType = "openai-responses"
|
||||
ClientTypeOpenAICompletions ClientType = "openai-completions"
|
||||
ClientTypeAnthropicMessages ClientType = "anthropic-messages"
|
||||
ClientTypeGoogleGenerativeAI ClientType = "google-generative-ai"
|
||||
ClientTypeOpenAICodex ClientType = "openai-codex"
|
||||
ClientTypeGitHubCopilot ClientType = "github-copilot"
|
||||
ClientTypeEdgeSpeech ClientType = "edge-speech"
|
||||
ClientTypeOpenAISpeech ClientType = "openai-speech"
|
||||
ClientTypeOpenRouterSpeech ClientType = "openrouter-speech"
|
||||
ClientTypeElevenLabsSpeech ClientType = "elevenlabs-speech"
|
||||
ClientTypeDeepgramSpeech ClientType = "deepgram-speech"
|
||||
ClientTypeMiniMaxSpeech ClientType = "minimax-speech"
|
||||
ClientTypeVolcengineSpeech ClientType = "volcengine-speech"
|
||||
ClientTypeAlibabaSpeech ClientType = "alibabacloud-speech"
|
||||
ClientTypeMicrosoftSpeech ClientType = "microsoft-speech"
|
||||
ClientTypeOpenAIResponses ClientType = "openai-responses"
|
||||
ClientTypeOpenAICompletions ClientType = "openai-completions"
|
||||
ClientTypeAnthropicMessages ClientType = "anthropic-messages"
|
||||
ClientTypeGoogleGenerativeAI ClientType = "google-generative-ai"
|
||||
ClientTypeOpenAICodex ClientType = "openai-codex"
|
||||
ClientTypeGitHubCopilot ClientType = "github-copilot"
|
||||
ClientTypeEdgeSpeech ClientType = "edge-speech"
|
||||
ClientTypeOpenAISpeech ClientType = "openai-speech"
|
||||
ClientTypeOpenAITranscription ClientType = "openai-transcription"
|
||||
ClientTypeOpenRouterSpeech ClientType = "openrouter-speech"
|
||||
ClientTypeOpenRouterTranscription ClientType = "openrouter-transcription"
|
||||
ClientTypeElevenLabsSpeech ClientType = "elevenlabs-speech"
|
||||
ClientTypeElevenLabsTranscription ClientType = "elevenlabs-transcription"
|
||||
ClientTypeDeepgramSpeech ClientType = "deepgram-speech"
|
||||
ClientTypeDeepgramTranscription ClientType = "deepgram-transcription"
|
||||
ClientTypeMiniMaxSpeech ClientType = "minimax-speech"
|
||||
ClientTypeVolcengineSpeech ClientType = "volcengine-speech"
|
||||
ClientTypeAlibabaSpeech ClientType = "alibabacloud-speech"
|
||||
ClientTypeMicrosoftSpeech ClientType = "microsoft-speech"
|
||||
ClientTypeGoogleSpeech ClientType = "google-speech"
|
||||
ClientTypeGoogleTranscription ClientType = "google-transcription"
|
||||
)
|
||||
|
||||
const (
|
||||
@@ -88,7 +95,7 @@ func (m *Model) Validate() error {
|
||||
if _, err := uuid.Parse(m.ProviderID); err != nil {
|
||||
return errors.New("provider ID must be a valid UUID")
|
||||
}
|
||||
if m.Type != ModelTypeChat && m.Type != ModelTypeEmbedding && m.Type != ModelTypeSpeech {
|
||||
if m.Type != ModelTypeChat && m.Type != ModelTypeEmbedding && m.Type != ModelTypeSpeech && m.Type != ModelTypeTranscription {
|
||||
return errors.New("invalid model type")
|
||||
}
|
||||
if m.Type == ModelTypeEmbedding {
|
||||
|
||||
Reference in New Issue
Block a user