mirror of
https://github.com/memohai/Memoh.git
synced 2026-04-25 07:00:48 +09:00
feat: add image generation model and generate_image agent tool
Bots can now be configured with an image generation model (must have image-output compatibility). When set, the agent exposes a generate_image tool that calls the model via Twilight AI SDK, saves the result to the bot container filesystem, and returns the file path. - Add image_model_id column to bots table (migration 0053) - Update settings SQL queries, service, and types - New ImageGenProvider tool provider in internal/agent/tools/ - Wire provider in both cmd/agent and cmd/memoh entry points - Add image model selector to frontend bot settings with compat filtering - Regenerate swagger, SDK types, and sqlc code
This commit is contained in:
@@ -0,0 +1,198 @@
|
||||
package tools
|
||||
|
||||
import (
|
||||
"context"
|
||||
"encoding/base64"
|
||||
"errors"
|
||||
"fmt"
|
||||
"log/slog"
|
||||
"strings"
|
||||
"time"
|
||||
|
||||
sdk "github.com/memohai/twilight-ai/sdk"
|
||||
|
||||
"github.com/memohai/memoh/internal/db/sqlc"
|
||||
"github.com/memohai/memoh/internal/models"
|
||||
"github.com/memohai/memoh/internal/providers"
|
||||
"github.com/memohai/memoh/internal/settings"
|
||||
"github.com/memohai/memoh/internal/workspace/bridge"
|
||||
)
|
||||
|
||||
const imageGenDir = "/data/generated-images"
|
||||
|
||||
type ImageGenProvider struct {
|
||||
logger *slog.Logger
|
||||
settings *settings.Service
|
||||
models *models.Service
|
||||
queries *sqlc.Queries
|
||||
containers bridge.Provider
|
||||
dataMount string
|
||||
}
|
||||
|
||||
func NewImageGenProvider(
|
||||
log *slog.Logger,
|
||||
settingsSvc *settings.Service,
|
||||
modelsSvc *models.Service,
|
||||
queries *sqlc.Queries,
|
||||
containers bridge.Provider,
|
||||
dataMount string,
|
||||
) *ImageGenProvider {
|
||||
if log == nil {
|
||||
log = slog.Default()
|
||||
}
|
||||
return &ImageGenProvider{
|
||||
logger: log.With(slog.String("tool", "image_gen")),
|
||||
settings: settingsSvc,
|
||||
models: modelsSvc,
|
||||
queries: queries,
|
||||
containers: containers,
|
||||
dataMount: dataMount,
|
||||
}
|
||||
}
|
||||
|
||||
func (p *ImageGenProvider) Tools(ctx context.Context, session SessionContext) ([]sdk.Tool, error) {
|
||||
if session.IsSubagent || p.settings == nil || p.models == nil || p.queries == nil {
|
||||
return nil, nil
|
||||
}
|
||||
botID := strings.TrimSpace(session.BotID)
|
||||
if botID == "" {
|
||||
return nil, nil
|
||||
}
|
||||
botSettings, err := p.settings.GetBot(ctx, botID)
|
||||
if err != nil {
|
||||
return nil, nil
|
||||
}
|
||||
if strings.TrimSpace(botSettings.ImageModelID) == "" {
|
||||
return nil, nil
|
||||
}
|
||||
sess := session
|
||||
return []sdk.Tool{
|
||||
{
|
||||
Name: "generate_image",
|
||||
Description: "Generate an image from a text description using the configured image generation model. Returns the file path of the generated image in the workspace.",
|
||||
Parameters: map[string]any{
|
||||
"type": "object",
|
||||
"properties": map[string]any{
|
||||
"prompt": map[string]any{"type": "string", "description": "Detailed description of the image to generate"},
|
||||
"size": map[string]any{"type": "string", "description": "Image size, e.g. 1024x1024, 1792x1024, 1024x1792. Defaults to 1024x1024."},
|
||||
},
|
||||
"required": []string{"prompt"},
|
||||
},
|
||||
Execute: func(execCtx *sdk.ToolExecContext, input any) (any, error) {
|
||||
return p.execGenerateImage(execCtx.Context, sess, inputAsMap(input))
|
||||
},
|
||||
},
|
||||
}, nil
|
||||
}
|
||||
|
||||
func (p *ImageGenProvider) execGenerateImage(ctx context.Context, session SessionContext, args map[string]any) (any, error) {
|
||||
botID := strings.TrimSpace(session.BotID)
|
||||
if botID == "" {
|
||||
return nil, errors.New("bot_id is required")
|
||||
}
|
||||
prompt := strings.TrimSpace(StringArg(args, "prompt"))
|
||||
if prompt == "" {
|
||||
return nil, errors.New("prompt is required")
|
||||
}
|
||||
size := strings.TrimSpace(StringArg(args, "size"))
|
||||
if size == "" {
|
||||
size = "1024x1024"
|
||||
}
|
||||
|
||||
botSettings, err := p.settings.GetBot(ctx, botID)
|
||||
if err != nil {
|
||||
return nil, errors.New("failed to load bot settings")
|
||||
}
|
||||
imageModelID := strings.TrimSpace(botSettings.ImageModelID)
|
||||
if imageModelID == "" {
|
||||
return nil, errors.New("no image generation model configured")
|
||||
}
|
||||
|
||||
modelResp, err := p.models.GetByID(ctx, imageModelID)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("failed to load image model: %w", err)
|
||||
}
|
||||
if !modelResp.HasCompatibility(models.CompatImageOutput) {
|
||||
return nil, errors.New("configured model does not support image generation")
|
||||
}
|
||||
|
||||
provider, err := models.FetchProviderByID(ctx, p.queries, modelResp.LlmProviderID)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("failed to load model provider: %w", err)
|
||||
}
|
||||
|
||||
authResolver := providers.NewService(nil, p.queries, "")
|
||||
creds, err := authResolver.ResolveModelCredentials(ctx, provider)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("failed to resolve provider credentials: %w", err)
|
||||
}
|
||||
|
||||
sdkModel := models.NewSDKChatModel(models.SDKModelConfig{
|
||||
ModelID: modelResp.ModelID,
|
||||
ClientType: provider.ClientType,
|
||||
APIKey: creds.APIKey,
|
||||
BaseURL: provider.BaseUrl,
|
||||
})
|
||||
|
||||
userMsg := fmt.Sprintf("Generate an image with the following description. Size: %s\n\n%s", size, prompt)
|
||||
result, err := sdk.GenerateTextResult(ctx,
|
||||
sdk.WithModel(sdkModel),
|
||||
sdk.WithMessages([]sdk.Message{
|
||||
{Role: sdk.MessageRoleUser, Content: []sdk.MessagePart{sdk.TextPart{Text: userMsg}}},
|
||||
}),
|
||||
)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("image generation failed: %w", err)
|
||||
}
|
||||
|
||||
if len(result.Files) == 0 {
|
||||
if result.Text != "" {
|
||||
return map[string]any{"error": "no image generated", "model_response": result.Text}, nil
|
||||
}
|
||||
return nil, errors.New("no image was generated by the model")
|
||||
}
|
||||
|
||||
file := result.Files[0]
|
||||
imgBytes, err := base64.StdEncoding.DecodeString(file.Data)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("failed to decode generated image: %w", err)
|
||||
}
|
||||
|
||||
ext := "png"
|
||||
switch {
|
||||
case strings.Contains(file.MediaType, "jpeg"), strings.Contains(file.MediaType, "jpg"):
|
||||
ext = "jpg"
|
||||
case strings.Contains(file.MediaType, "webp"):
|
||||
ext = "webp"
|
||||
}
|
||||
|
||||
containerPath := fmt.Sprintf("%s/%d.%s", imageGenDir, time.Now().UnixMilli(), ext)
|
||||
|
||||
client, clientErr := p.containers.MCPClient(ctx, botID)
|
||||
if clientErr != nil {
|
||||
return map[string]any{
|
||||
"content": []map[string]any{
|
||||
{"type": "text", "text": "Image generated (container not reachable, not saved to disk)"},
|
||||
{"type": "image", "data": file.Data, "mimeType": file.MediaType},
|
||||
},
|
||||
}, nil
|
||||
}
|
||||
|
||||
mkdirCmd := fmt.Sprintf("mkdir -p %s", imageGenDir)
|
||||
_, _ = client.Exec(ctx, mkdirCmd, "/", 5)
|
||||
|
||||
if writeErr := client.WriteFile(ctx, containerPath, imgBytes); writeErr != nil {
|
||||
return map[string]any{
|
||||
"content": []map[string]any{
|
||||
{"type": "text", "text": fmt.Sprintf("Image generated (failed to save: %s)", writeErr.Error())},
|
||||
{"type": "image", "data": file.Data, "mimeType": file.MediaType},
|
||||
},
|
||||
}, nil
|
||||
}
|
||||
|
||||
return map[string]any{
|
||||
"path": containerPath,
|
||||
"media_type": file.MediaType,
|
||||
"size_bytes": len(imgBytes),
|
||||
}, nil
|
||||
}
|
||||
Reference in New Issue
Block a user