mirror of
https://github.com/memohai/Memoh.git
synced 2026-04-27 07:16:19 +09:00
Feat/speech support (#392)
* feat: expand speech provider support with new client types and configuration schema * feat: add icon support for speech providers and update related configurations * feat: add SVG support for Deepgram and Elevenlabs with Vue components * feat: except *-speech client type in llm provider * feat: enhance speech provider functionality with advanced settings and model import capabilities * chore: remove go.mod replace * feat: enhance speech provider functionality with advanced settings and model import capabilities * chore: update go module dependencies * feat: Ear and Mouth * fix: separate ear/mouth page * fix: separate audio domain and restore transcription templates Move speech and transcription internals into the audio domain, restore template-driven transcription providers, and regenerate Swagger/SDK so the frontend can stop hand-calling /transcription-* APIs. --------- Co-authored-by: aki <arisu@ieee.org>
This commit is contained in:
@@ -77,13 +77,19 @@ CREATE TABLE IF NOT EXISTS providers (
|
||||
'github-copilot',
|
||||
'edge-speech',
|
||||
'openai-speech',
|
||||
'openai-transcription',
|
||||
'openrouter-speech',
|
||||
'openrouter-transcription',
|
||||
'elevenlabs-speech',
|
||||
'elevenlabs-transcription',
|
||||
'deepgram-speech',
|
||||
'deepgram-transcription',
|
||||
'minimax-speech',
|
||||
'volcengine-speech',
|
||||
'alibabacloud-speech',
|
||||
'microsoft-speech'
|
||||
'microsoft-speech',
|
||||
'google-speech',
|
||||
'google-transcription'
|
||||
))
|
||||
);
|
||||
|
||||
@@ -108,7 +114,7 @@ CREATE TABLE IF NOT EXISTS models (
|
||||
created_at TIMESTAMPTZ NOT NULL DEFAULT now(),
|
||||
updated_at TIMESTAMPTZ NOT NULL DEFAULT now(),
|
||||
CONSTRAINT models_provider_id_model_id_unique UNIQUE (provider_id, model_id),
|
||||
CONSTRAINT models_type_check CHECK (type IN ('chat', 'embedding', 'speech'))
|
||||
CONSTRAINT models_type_check CHECK (type IN ('chat', 'embedding', 'speech', 'transcription'))
|
||||
);
|
||||
|
||||
CREATE TABLE IF NOT EXISTS model_variants (
|
||||
@@ -170,6 +176,7 @@ CREATE TABLE IF NOT EXISTS bots (
|
||||
image_model_id UUID REFERENCES models(id) ON DELETE SET NULL,
|
||||
discuss_probe_model_id UUID REFERENCES models(id) ON DELETE SET NULL,
|
||||
tts_model_id UUID REFERENCES models(id) ON DELETE SET NULL,
|
||||
transcription_model_id UUID REFERENCES models(id) ON DELETE SET NULL,
|
||||
browser_context_id UUID REFERENCES browser_contexts(id) ON DELETE SET NULL,
|
||||
persist_full_tool_results BOOLEAN NOT NULL DEFAULT false,
|
||||
metadata JSONB NOT NULL DEFAULT '{}'::jsonb,
|
||||
|
||||
@@ -0,0 +1,33 @@
|
||||
-- 0069_add_transcription_models_and_speech_domain
|
||||
-- Revert transcription model type and speech-domain expansion.
|
||||
|
||||
DELETE FROM models WHERE type = 'transcription';
|
||||
DELETE FROM providers WHERE client_type = 'google-speech';
|
||||
|
||||
ALTER TABLE models
|
||||
DROP CONSTRAINT IF EXISTS models_type_check;
|
||||
|
||||
ALTER TABLE models
|
||||
ADD CONSTRAINT models_type_check CHECK (type IN ('chat', 'embedding', 'speech'));
|
||||
|
||||
ALTER TABLE providers
|
||||
DROP CONSTRAINT IF EXISTS providers_client_type_check;
|
||||
|
||||
ALTER TABLE providers
|
||||
ADD CONSTRAINT providers_client_type_check CHECK (client_type IN (
|
||||
'openai-responses',
|
||||
'openai-completions',
|
||||
'anthropic-messages',
|
||||
'google-generative-ai',
|
||||
'openai-codex',
|
||||
'github-copilot',
|
||||
'edge-speech',
|
||||
'openai-speech',
|
||||
'openrouter-speech',
|
||||
'elevenlabs-speech',
|
||||
'deepgram-speech',
|
||||
'minimax-speech',
|
||||
'volcengine-speech',
|
||||
'alibabacloud-speech',
|
||||
'microsoft-speech'
|
||||
));
|
||||
@@ -0,0 +1,31 @@
|
||||
-- 0069_add_transcription_models_and_speech_domain
|
||||
-- Expand the speech domain to support transcription models and shared speech providers.
|
||||
|
||||
ALTER TABLE providers
|
||||
DROP CONSTRAINT IF EXISTS providers_client_type_check;
|
||||
|
||||
ALTER TABLE providers
|
||||
ADD CONSTRAINT providers_client_type_check CHECK (client_type IN (
|
||||
'openai-responses',
|
||||
'openai-completions',
|
||||
'anthropic-messages',
|
||||
'google-generative-ai',
|
||||
'openai-codex',
|
||||
'github-copilot',
|
||||
'edge-speech',
|
||||
'openai-speech',
|
||||
'openrouter-speech',
|
||||
'elevenlabs-speech',
|
||||
'deepgram-speech',
|
||||
'minimax-speech',
|
||||
'volcengine-speech',
|
||||
'alibabacloud-speech',
|
||||
'microsoft-speech',
|
||||
'google-speech'
|
||||
));
|
||||
|
||||
ALTER TABLE models
|
||||
DROP CONSTRAINT IF EXISTS models_type_check;
|
||||
|
||||
ALTER TABLE models
|
||||
ADD CONSTRAINT models_type_check CHECK (type IN ('chat', 'embedding', 'speech', 'transcription'));
|
||||
@@ -0,0 +1,8 @@
|
||||
-- 0070_add_bot_transcription_model
|
||||
-- Remove bots.transcription_model_id.
|
||||
|
||||
ALTER TABLE bots
|
||||
DROP CONSTRAINT IF EXISTS bots_transcription_model_id_fkey;
|
||||
|
||||
ALTER TABLE bots
|
||||
DROP COLUMN IF EXISTS transcription_model_id;
|
||||
@@ -0,0 +1,5 @@
|
||||
-- 0070_add_bot_transcription_model
|
||||
-- Add bots.transcription_model_id for bot-level speech-to-text defaults.
|
||||
|
||||
ALTER TABLE bots
|
||||
ADD COLUMN IF NOT EXISTS transcription_model_id UUID REFERENCES models(id) ON DELETE SET NULL;
|
||||
@@ -0,0 +1,33 @@
|
||||
-- 0071_split_transcription_providers
|
||||
-- Remove dedicated transcription provider client types.
|
||||
|
||||
DELETE FROM providers
|
||||
WHERE client_type IN (
|
||||
'openai-transcription',
|
||||
'openrouter-transcription',
|
||||
'elevenlabs-transcription',
|
||||
'deepgram-transcription',
|
||||
'google-transcription'
|
||||
);
|
||||
|
||||
ALTER TABLE providers DROP CONSTRAINT IF EXISTS providers_client_type_check;
|
||||
|
||||
ALTER TABLE providers
|
||||
ADD CONSTRAINT providers_client_type_check CHECK (client_type IN (
|
||||
'openai-responses',
|
||||
'openai-completions',
|
||||
'anthropic-messages',
|
||||
'google-generative-ai',
|
||||
'openai-codex',
|
||||
'github-copilot',
|
||||
'edge-speech',
|
||||
'openai-speech',
|
||||
'openrouter-speech',
|
||||
'elevenlabs-speech',
|
||||
'deepgram-speech',
|
||||
'minimax-speech',
|
||||
'volcengine-speech',
|
||||
'alibabacloud-speech',
|
||||
'microsoft-speech',
|
||||
'google-speech'
|
||||
));
|
||||
@@ -0,0 +1,29 @@
|
||||
-- 0071_split_transcription_providers
|
||||
-- Add dedicated transcription provider client types.
|
||||
|
||||
ALTER TABLE providers DROP CONSTRAINT IF EXISTS providers_client_type_check;
|
||||
|
||||
ALTER TABLE providers
|
||||
ADD CONSTRAINT providers_client_type_check CHECK (client_type IN (
|
||||
'openai-responses',
|
||||
'openai-completions',
|
||||
'anthropic-messages',
|
||||
'google-generative-ai',
|
||||
'openai-codex',
|
||||
'github-copilot',
|
||||
'edge-speech',
|
||||
'openai-speech',
|
||||
'openai-transcription',
|
||||
'openrouter-speech',
|
||||
'openrouter-transcription',
|
||||
'elevenlabs-speech',
|
||||
'elevenlabs-transcription',
|
||||
'deepgram-speech',
|
||||
'deepgram-transcription',
|
||||
'minimax-speech',
|
||||
'volcengine-speech',
|
||||
'alibabacloud-speech',
|
||||
'microsoft-speech',
|
||||
'google-speech',
|
||||
'google-transcription'
|
||||
));
|
||||
Reference in New Issue
Block a user