mirror of
https://github.com/memohai/Memoh.git
synced 2026-04-27 07:16:19 +09:00
8d5c38f0e5
* refactor: unify providers and models tables
- Rename `llm_providers` → `providers`, `llm_provider_oauth_tokens` → `provider_oauth_tokens`
- Remove `tts_providers` and `tts_models` tables; speech models now live in the unified `models` table with `type = 'speech'`
- Replace top-level `api_key`/`base_url` columns with a JSONB `config` field on `providers`
- Rename `llm_provider_id` → `provider_id` across all references
- Add `edge-speech` client type and `conf/providers/edge.yaml` default provider
- Create new read-only speech endpoints (`/speech-providers`, `/speech-models`) backed by filtered views of the unified tables
- Remove old TTS CRUD handlers; simplify speech page to read-only + test
- Update registry loader to skip malformed YAML files instead of failing entirely
- Fix YAML quoting for model names containing colons in openrouter.yaml
- Regenerate sqlc, swagger, and TypeScript SDK
* fix: exclude speech providers from providers list endpoint
ListProviders now filters out client_type matching '%-speech' so Edge
and future speech providers no longer appear on the Providers page.
ListSpeechProviders uses the same pattern match instead of hard-coding
'edge-speech'.
* fix: use explicit client_type list instead of LIKE pattern
Replace '%-speech' pattern with explicit IN ('edge-speech') for both
ListProviders (exclusion) and ListSpeechProviders (inclusion). New
speech client types must be added to both queries.
* fix: use EXECUTE for dynamic SQL in migrations referencing old schema
PL/pgSQL pre-validates column/table references in static SQL statements
inside DO blocks before evaluating IF/RETURN guards. This caused
migrations 0010-0061 to fail on fresh databases where the canonical
schema uses `providers`/`provider_id` instead of `llm_providers`/
`llm_provider_id`.
Wrap all SQL that references potentially non-existent old schema objects
(llm_providers, llm_provider_id, tts_providers, tts_models, etc.) in
EXECUTE strings so they are only parsed at runtime when actually reached.
* fix: revert canonical schema to use llm_providers for migration compatibility
The CI migrations workflow (up → down → up) failed because 0061 down
renames `providers` back to `llm_providers`, but 0001 down only dropped
`providers` — leaving `llm_providers` as a remnant. On the second
migrate up, 0010 found the stale `llm_providers` and tried to reference
`models.llm_provider_id` which no longer existed.
Revert 0001 canonical schema to use original names (llm_providers,
tts_providers, tts_models) so incremental migrations work naturally and
0061 handles the final rename. Remove EXECUTE wrappers and unnecessary
guards from migrations that now always operate on llm_providers.
* fix: icons
* fix: sync canonical schema with 0061 migration to fix sqlc column mismatch
0001_init.up.sql still used old names (llm_providers, llm_provider_id)
and included dropped tts_providers/tts_models tables. sqlc could not
parse the PL/pgSQL EXECUTE in migration 0061, so generated code retained
stale columns (input_modalities, supports_reasoning) causing runtime
"column does not exist" errors when adding models.
- Update 0001_init.up.sql to current schema (providers, provider_id,
no tts tables, add provider_oauth_tokens)
- Use ALTER TABLE IF EXISTS in 0010/0041/0042 for backward compat
- Regenerate sqlc
* fix: guard all legacy migrations against fresh schema for CI compat
On fresh databases, 0001_init.up.sql creates providers/provider_id
(not llm_providers/llm_provider_id). Migrations 0013, 0041, 0046, 0047
referenced the old names without guards, causing CI migration failures.
- 0013: check llm_provider_id column exists before adding old constraint
- 0041: check llm_providers table exists before backfill/constraint DDL
- 0046: wrap CREATE TABLE in DO block with llm_providers existence check
- 0047: use ALTER TABLE IF EXISTS + DO block guard
199 lines
5.8 KiB
Go
199 lines
5.8 KiB
Go
package tools
|
|
|
|
import (
|
|
"context"
|
|
"encoding/base64"
|
|
"errors"
|
|
"fmt"
|
|
"log/slog"
|
|
"strings"
|
|
"time"
|
|
|
|
sdk "github.com/memohai/twilight-ai/sdk"
|
|
|
|
"github.com/memohai/memoh/internal/db/sqlc"
|
|
"github.com/memohai/memoh/internal/models"
|
|
"github.com/memohai/memoh/internal/providers"
|
|
"github.com/memohai/memoh/internal/settings"
|
|
"github.com/memohai/memoh/internal/workspace/bridge"
|
|
)
|
|
|
|
const imageGenDir = "/data/generated-images"
|
|
|
|
type ImageGenProvider struct {
|
|
logger *slog.Logger
|
|
settings *settings.Service
|
|
models *models.Service
|
|
queries *sqlc.Queries
|
|
containers bridge.Provider
|
|
dataMount string
|
|
}
|
|
|
|
func NewImageGenProvider(
|
|
log *slog.Logger,
|
|
settingsSvc *settings.Service,
|
|
modelsSvc *models.Service,
|
|
queries *sqlc.Queries,
|
|
containers bridge.Provider,
|
|
dataMount string,
|
|
) *ImageGenProvider {
|
|
if log == nil {
|
|
log = slog.Default()
|
|
}
|
|
return &ImageGenProvider{
|
|
logger: log.With(slog.String("tool", "image_gen")),
|
|
settings: settingsSvc,
|
|
models: modelsSvc,
|
|
queries: queries,
|
|
containers: containers,
|
|
dataMount: dataMount,
|
|
}
|
|
}
|
|
|
|
func (p *ImageGenProvider) Tools(ctx context.Context, session SessionContext) ([]sdk.Tool, error) {
|
|
if session.IsSubagent || p.settings == nil || p.models == nil || p.queries == nil {
|
|
return nil, nil
|
|
}
|
|
botID := strings.TrimSpace(session.BotID)
|
|
if botID == "" {
|
|
return nil, nil
|
|
}
|
|
botSettings, err := p.settings.GetBot(ctx, botID)
|
|
if err != nil {
|
|
return nil, nil
|
|
}
|
|
if strings.TrimSpace(botSettings.ImageModelID) == "" {
|
|
return nil, nil
|
|
}
|
|
sess := session
|
|
return []sdk.Tool{
|
|
{
|
|
Name: "generate_image",
|
|
Description: "Generate an image from a text description using the configured image generation model. Returns the file path of the generated image in the workspace.",
|
|
Parameters: map[string]any{
|
|
"type": "object",
|
|
"properties": map[string]any{
|
|
"prompt": map[string]any{"type": "string", "description": "Detailed description of the image to generate"},
|
|
"size": map[string]any{"type": "string", "description": "Image size, e.g. 1024x1024, 1792x1024, 1024x1792. Defaults to 1024x1024."},
|
|
},
|
|
"required": []string{"prompt"},
|
|
},
|
|
Execute: func(execCtx *sdk.ToolExecContext, input any) (any, error) {
|
|
return p.execGenerateImage(execCtx.Context, sess, inputAsMap(input))
|
|
},
|
|
},
|
|
}, nil
|
|
}
|
|
|
|
func (p *ImageGenProvider) execGenerateImage(ctx context.Context, session SessionContext, args map[string]any) (any, error) {
|
|
botID := strings.TrimSpace(session.BotID)
|
|
if botID == "" {
|
|
return nil, errors.New("bot_id is required")
|
|
}
|
|
prompt := strings.TrimSpace(StringArg(args, "prompt"))
|
|
if prompt == "" {
|
|
return nil, errors.New("prompt is required")
|
|
}
|
|
size := strings.TrimSpace(StringArg(args, "size"))
|
|
if size == "" {
|
|
size = "1024x1024"
|
|
}
|
|
|
|
botSettings, err := p.settings.GetBot(ctx, botID)
|
|
if err != nil {
|
|
return nil, errors.New("failed to load bot settings")
|
|
}
|
|
imageModelID := strings.TrimSpace(botSettings.ImageModelID)
|
|
if imageModelID == "" {
|
|
return nil, errors.New("no image generation model configured")
|
|
}
|
|
|
|
modelResp, err := p.models.GetByID(ctx, imageModelID)
|
|
if err != nil {
|
|
return nil, fmt.Errorf("failed to load image model: %w", err)
|
|
}
|
|
if !modelResp.HasCompatibility(models.CompatImageOutput) {
|
|
return nil, errors.New("configured model does not support image generation")
|
|
}
|
|
|
|
provider, err := models.FetchProviderByID(ctx, p.queries, modelResp.ProviderID)
|
|
if err != nil {
|
|
return nil, fmt.Errorf("failed to load model provider: %w", err)
|
|
}
|
|
|
|
authResolver := providers.NewService(nil, p.queries, "")
|
|
creds, err := authResolver.ResolveModelCredentials(ctx, provider)
|
|
if err != nil {
|
|
return nil, fmt.Errorf("failed to resolve provider credentials: %w", err)
|
|
}
|
|
|
|
sdkModel := models.NewSDKChatModel(models.SDKModelConfig{
|
|
ModelID: modelResp.ModelID,
|
|
ClientType: provider.ClientType,
|
|
APIKey: creds.APIKey,
|
|
BaseURL: providers.ProviderConfigString(provider, "base_url"),
|
|
})
|
|
|
|
userMsg := fmt.Sprintf("Generate an image with the following description. Size: %s\n\n%s", size, prompt)
|
|
result, err := sdk.GenerateTextResult(ctx,
|
|
sdk.WithModel(sdkModel),
|
|
sdk.WithMessages([]sdk.Message{
|
|
{Role: sdk.MessageRoleUser, Content: []sdk.MessagePart{sdk.TextPart{Text: userMsg}}},
|
|
}),
|
|
)
|
|
if err != nil {
|
|
return nil, fmt.Errorf("image generation failed: %w", err)
|
|
}
|
|
|
|
if len(result.Files) == 0 {
|
|
if result.Text != "" {
|
|
return map[string]any{"error": "no image generated", "model_response": result.Text}, nil
|
|
}
|
|
return nil, errors.New("no image was generated by the model")
|
|
}
|
|
|
|
file := result.Files[0]
|
|
imgBytes, err := base64.StdEncoding.DecodeString(file.Data)
|
|
if err != nil {
|
|
return nil, fmt.Errorf("failed to decode generated image: %w", err)
|
|
}
|
|
|
|
ext := "png"
|
|
switch {
|
|
case strings.Contains(file.MediaType, "jpeg"), strings.Contains(file.MediaType, "jpg"):
|
|
ext = "jpg"
|
|
case strings.Contains(file.MediaType, "webp"):
|
|
ext = "webp"
|
|
}
|
|
|
|
containerPath := fmt.Sprintf("%s/%d.%s", imageGenDir, time.Now().UnixMilli(), ext)
|
|
|
|
client, clientErr := p.containers.MCPClient(ctx, botID)
|
|
if clientErr != nil {
|
|
return map[string]any{
|
|
"content": []map[string]any{
|
|
{"type": "text", "text": "Image generated (container not reachable, not saved to disk)"},
|
|
{"type": "image", "data": file.Data, "mimeType": file.MediaType},
|
|
},
|
|
}, nil
|
|
}
|
|
|
|
mkdirCmd := fmt.Sprintf("mkdir -p %s", imageGenDir)
|
|
_, _ = client.Exec(ctx, mkdirCmd, "/", 5)
|
|
|
|
if writeErr := client.WriteFile(ctx, containerPath, imgBytes); writeErr != nil {
|
|
return map[string]any{
|
|
"content": []map[string]any{
|
|
{"type": "text", "text": fmt.Sprintf("Image generated (failed to save: %s)", writeErr.Error())},
|
|
{"type": "image", "data": file.Data, "mimeType": file.MediaType},
|
|
},
|
|
}, nil
|
|
}
|
|
|
|
return map[string]any{
|
|
"path": containerPath,
|
|
"media_type": file.MediaType,
|
|
"size_bytes": len(imgBytes),
|
|
}, nil
|
|
}
|