package tools import ( "context" "encoding/base64" "errors" "fmt" "log/slog" "strings" "time" sdk "github.com/memohai/twilight-ai/sdk" "github.com/memohai/memoh/internal/db/sqlc" "github.com/memohai/memoh/internal/models" "github.com/memohai/memoh/internal/providers" "github.com/memohai/memoh/internal/settings" "github.com/memohai/memoh/internal/workspace/bridge" ) const imageGenDir = "/data/generated-images" type ImageGenProvider struct { logger *slog.Logger settings *settings.Service models *models.Service queries *sqlc.Queries containers bridge.Provider dataMount string } type generatedImageFile struct { Data string MediaType string } func NewImageGenProvider( log *slog.Logger, settingsSvc *settings.Service, modelsSvc *models.Service, queries *sqlc.Queries, containers bridge.Provider, dataMount string, ) *ImageGenProvider { if log == nil { log = slog.Default() } return &ImageGenProvider{ logger: log.With(slog.String("tool", "image_gen")), settings: settingsSvc, models: modelsSvc, queries: queries, containers: containers, dataMount: dataMount, } } func (p *ImageGenProvider) Tools(ctx context.Context, session SessionContext) ([]sdk.Tool, error) { if session.IsSubagent || p.settings == nil || p.models == nil || p.queries == nil { return nil, nil } botID := strings.TrimSpace(session.BotID) if botID == "" { return nil, nil } botSettings, err := p.settings.GetBot(ctx, botID) if err != nil { return nil, nil } if strings.TrimSpace(botSettings.ImageModelID) == "" { return nil, nil } modelResp, err := p.models.GetByID(ctx, botSettings.ImageModelID) if err != nil || !supportsImageGeneration(modelResp) { return nil, nil } sess := session return []sdk.Tool{ { Name: "generate_image", Description: "Generate an image from a text description using the configured image generation model. Returns the file path of the generated image in the workspace.", Parameters: map[string]any{ "type": "object", "properties": map[string]any{ "prompt": map[string]any{"type": "string", "description": "Detailed description of the image to generate"}, "size": map[string]any{"type": "string", "description": "Image size, e.g. 1024x1024, 1792x1024, 1024x1792. Defaults to 1024x1024."}, }, "required": []string{"prompt"}, }, Execute: func(execCtx *sdk.ToolExecContext, input any) (any, error) { return p.execGenerateImage(execCtx.Context, sess, inputAsMap(input)) }, }, }, nil } func (p *ImageGenProvider) execGenerateImage(ctx context.Context, session SessionContext, args map[string]any) (any, error) { botID := strings.TrimSpace(session.BotID) if botID == "" { return nil, errors.New("bot_id is required") } prompt := strings.TrimSpace(StringArg(args, "prompt")) if prompt == "" { return nil, errors.New("prompt is required") } size := strings.TrimSpace(StringArg(args, "size")) if size == "" { size = "1024x1024" } botSettings, err := p.settings.GetBot(ctx, botID) if err != nil { return nil, errors.New("failed to load bot settings") } imageModelID := strings.TrimSpace(botSettings.ImageModelID) if imageModelID == "" { return nil, errors.New("no image generation model configured") } modelResp, err := p.models.GetByID(ctx, imageModelID) if err != nil { return nil, fmt.Errorf("failed to load image model: %w", err) } if !supportsImageGeneration(modelResp) { return nil, errors.New("configured model does not support image generation") } provider, err := models.FetchProviderByID(ctx, p.queries, modelResp.ProviderID) if err != nil { return nil, fmt.Errorf("failed to load model provider: %w", err) } authResolver := providers.NewService(nil, p.queries, "") creds, err := authResolver.ResolveModelCredentials(ctx, provider) if err != nil { return nil, fmt.Errorf("failed to resolve provider credentials: %w", err) } file, imgBytes, ext, err := generateImage(ctx, modelResp, provider, creds, prompt, size) if err != nil { return nil, err } containerPath := fmt.Sprintf("%s/%d.%s", imageGenDir, time.Now().UnixMilli(), ext) client, clientErr := p.containers.MCPClient(ctx, botID) if clientErr != nil { return map[string]any{ "content": []map[string]any{ {"type": "text", "text": "Image generated (container not reachable, not saved to disk)"}, {"type": "image", "data": file.Data, "mimeType": file.MediaType}, }, }, nil } mkdirCmd := fmt.Sprintf("mkdir -p %s", imageGenDir) _, _ = client.Exec(ctx, mkdirCmd, "/", 5) if writeErr := client.WriteFile(ctx, containerPath, imgBytes); writeErr != nil { return map[string]any{ "content": []map[string]any{ {"type": "text", "text": fmt.Sprintf("Image generated (failed to save: %s)", writeErr.Error())}, {"type": "image", "data": file.Data, "mimeType": file.MediaType}, }, }, nil } return map[string]any{ "path": containerPath, "media_type": file.MediaType, "size_bytes": len(imgBytes), }, nil } func supportsImageGeneration(model models.GetResponse) bool { switch model.Type { case models.ModelTypeChat: return model.HasCompatibility(models.CompatImageOutput) case models.ModelTypeImage: return model.HasCompatibility(models.CompatGenerate) default: return false } } func generateImage( ctx context.Context, modelResp models.GetResponse, provider sqlc.Provider, creds providers.ModelCredentials, prompt string, size string, ) (generatedImageFile, []byte, string, error) { switch modelResp.Type { case models.ModelTypeChat: return generateImageFromChatModel(ctx, modelResp, provider, creds, prompt, size) case models.ModelTypeImage: return generateImageFromImageModel(ctx, modelResp, provider, creds, prompt, size) default: return generatedImageFile{}, nil, "", fmt.Errorf("unsupported image model type: %s", modelResp.Type) } } func generateImageFromChatModel( ctx context.Context, modelResp models.GetResponse, provider sqlc.Provider, creds providers.ModelCredentials, prompt string, size string, ) (generatedImageFile, []byte, string, error) { sdkModel := models.NewSDKChatModel(models.SDKModelConfig{ ModelID: modelResp.ModelID, ClientType: provider.ClientType, APIKey: creds.APIKey, BaseURL: providers.ProviderConfigString(provider, "base_url"), }) userMsg := fmt.Sprintf("Generate an image with the following description. Size: %s\n\n%s", size, prompt) result, err := sdk.GenerateTextResult(ctx, sdk.WithModel(sdkModel), sdk.WithMessages([]sdk.Message{ {Role: sdk.MessageRoleUser, Content: []sdk.MessagePart{sdk.TextPart{Text: userMsg}}}, }), ) if err != nil { return generatedImageFile{}, nil, "", fmt.Errorf("image generation failed: %w", err) } if len(result.Files) == 0 { if result.Text != "" { return generatedImageFile{}, nil, "", fmt.Errorf("no image generated: %s", result.Text) } return generatedImageFile{}, nil, "", errors.New("no image was generated by the model") } file := generatedImageFile{ Data: result.Files[0].Data, MediaType: result.Files[0].MediaType, } imgBytes, ext, err := decodeGeneratedImage(file) if err != nil { return generatedImageFile{}, nil, "", err } return file, imgBytes, ext, nil } func generateImageFromImageModel( ctx context.Context, modelResp models.GetResponse, provider sqlc.Provider, creds providers.ModelCredentials, prompt string, size string, ) (generatedImageFile, []byte, string, error) { imageModel := models.NewSDKImageGenerationModel(models.SDKModelConfig{ ModelID: modelResp.ModelID, ClientType: provider.ClientType, APIKey: creds.APIKey, BaseURL: providers.ProviderConfigString(provider, "base_url"), }) if imageModel == nil { return generatedImageFile{}, nil, "", errors.New("configured provider does not support image generation API") } result, err := sdk.GenerateImage(ctx, sdk.WithImageGenerationModel(imageModel), sdk.WithImagePrompt(prompt), sdk.WithImageSize(size), sdk.WithImageResponseFormat("b64_json"), sdk.WithImageOutputFormat("png"), ) if err != nil { return generatedImageFile{}, nil, "", fmt.Errorf("image generation failed: %w", err) } if len(result.Data) == 0 { return generatedImageFile{}, nil, "", errors.New("no image was generated by the model") } if strings.TrimSpace(result.Data[0].B64JSON) == "" { return generatedImageFile{}, nil, "", errors.New("image model did not return inline image data") } file := generatedImageFile{ Data: result.Data[0].B64JSON, MediaType: "image/png", } imgBytes, ext, err := decodeGeneratedImage(file) if err != nil { return generatedImageFile{}, nil, "", err } return file, imgBytes, ext, nil } func decodeGeneratedImage(file generatedImageFile) ([]byte, string, error) { imgBytes, err := base64.StdEncoding.DecodeString(file.Data) if err != nil { return nil, "", fmt.Errorf("failed to decode generated image: %w", err) } ext := "png" switch { case strings.Contains(file.MediaType, "jpeg"), strings.Contains(file.MediaType, "jpg"): ext = "jpg" case strings.Contains(file.MediaType, "webp"): ext = "webp" } return imgBytes, ext, nil }