fix(agent): skip tools for models without tool-call capability and parse image output

- Add SupportsToolCall to RunConfig; only inject tools into SDK when set
- Update twilight-ai to 497ad09 which adds SSE scanner 10MB buffer
  (fixes token-too-long on large image payloads) and parses the images
  array from OpenAI-compatible chat completions into StreamFilePart
This commit is contained in:
Acbox
2026-04-03 00:01:14 +08:00
parent 574bc1fb59
commit fc2b603018
7 changed files with 18 additions and 8 deletions
+1 -1
View File
@@ -26,7 +26,7 @@ require (
github.com/larksuite/oapi-sdk-go/v3 v3.5.3 github.com/larksuite/oapi-sdk-go/v3 v3.5.3
github.com/mailgun/mailgun-go/v5 v5.14.0 github.com/mailgun/mailgun-go/v5 v5.14.0
github.com/memohai/acgo v0.0.0-20260221232113-babac0d6acd7 github.com/memohai/acgo v0.0.0-20260221232113-babac0d6acd7
github.com/memohai/twilight-ai v0.3.4-0.20260329101319-3ebcc563f5d9 github.com/memohai/twilight-ai v0.3.4-0.20260402155501-497ad09c6724
github.com/modelcontextprotocol/go-sdk v1.4.1 github.com/modelcontextprotocol/go-sdk v1.4.1
github.com/opencontainers/image-spec v1.1.1 github.com/opencontainers/image-spec v1.1.1
github.com/opencontainers/runtime-spec v1.3.0 github.com/opencontainers/runtime-spec v1.3.0
+4
View File
@@ -230,6 +230,10 @@ github.com/memohai/acgo v0.0.0-20260221232113-babac0d6acd7 h1:beehwOQperqGWj4m4E
github.com/memohai/acgo v0.0.0-20260221232113-babac0d6acd7/go.mod h1:OvmxM7JmnXBmwJWWVqtreL3HSHSKuzPbtbhlg5MvBg0= github.com/memohai/acgo v0.0.0-20260221232113-babac0d6acd7/go.mod h1:OvmxM7JmnXBmwJWWVqtreL3HSHSKuzPbtbhlg5MvBg0=
github.com/memohai/twilight-ai v0.3.4-0.20260329101319-3ebcc563f5d9 h1:vpq3FgZ7UJAWr63M4mAtB8wvUWHSAdrgWibTFocXFBk= github.com/memohai/twilight-ai v0.3.4-0.20260329101319-3ebcc563f5d9 h1:vpq3FgZ7UJAWr63M4mAtB8wvUWHSAdrgWibTFocXFBk=
github.com/memohai/twilight-ai v0.3.4-0.20260329101319-3ebcc563f5d9/go.mod h1:GZTT9GUT3uSs6zram/FcF24GLTZMFSpiybbYmjr+gH8= github.com/memohai/twilight-ai v0.3.4-0.20260329101319-3ebcc563f5d9/go.mod h1:GZTT9GUT3uSs6zram/FcF24GLTZMFSpiybbYmjr+gH8=
github.com/memohai/twilight-ai v0.3.4-0.20260402145441-9e9f798cbd2d h1:tZYwJ0YDc6FUlh3mXYv+Tkw5dLfMD6IgP8VY5/+AfB0=
github.com/memohai/twilight-ai v0.3.4-0.20260402145441-9e9f798cbd2d/go.mod h1:GZTT9GUT3uSs6zram/FcF24GLTZMFSpiybbYmjr+gH8=
github.com/memohai/twilight-ai v0.3.4-0.20260402155501-497ad09c6724 h1:/Hw5vHfAeHRGx+duPKyetT5n2t6J5cYMfUysN/Xh9U0=
github.com/memohai/twilight-ai v0.3.4-0.20260402155501-497ad09c6724/go.mod h1:GZTT9GUT3uSs6zram/FcF24GLTZMFSpiybbYmjr+gH8=
github.com/moby/docker-image-spec v1.3.1 h1:jMKff3w6PgbfSa69GfNg+zN/XLhfXJGnEx3Nl2EsFP0= github.com/moby/docker-image-spec v1.3.1 h1:jMKff3w6PgbfSa69GfNg+zN/XLhfXJGnEx3Nl2EsFP0=
github.com/moby/docker-image-spec v1.3.1/go.mod h1:eKmb5VW8vQEh/BAr2yvVNvuiJuY6UIocYsFu/DxxRpo= github.com/moby/docker-image-spec v1.3.1/go.mod h1:eKmb5VW8vQEh/BAr2yvVNvuiJuY6UIocYsFu/DxxRpo=
github.com/moby/locker v1.0.1 h1:fOXqR41zeveg4fFODix+1Ch4mj/gT0NE1XJbp/epuBg= github.com/moby/locker v1.0.1 h1:fOXqR41zeveg4fFODix+1Ch4mj/gT0NE1XJbp/epuBg=
+1 -1
View File
@@ -366,7 +366,7 @@ func (*Agent) buildGenerateOptions(cfg RunConfig, tools []sdk.Tool, prepareStep
sdk.WithSystem(cfg.System), sdk.WithSystem(cfg.System),
sdk.WithMaxSteps(-1), sdk.WithMaxSteps(-1),
} }
if len(tools) > 0 { if len(tools) > 0 && cfg.SupportsToolCall {
opts = append(opts, sdk.WithTools(tools)) opts = append(opts, sdk.WithTools(tools))
} }
if prepareStep != nil { if prepareStep != nil {
+3
View File
@@ -246,6 +246,7 @@ func TestAgentGenerateReadMediaInjectsImageIntoNextStep(t *testing.T) {
Model: &sdk.Model{ID: "mock-model", Provider: modelProvider}, Model: &sdk.Model{ID: "mock-model", Provider: modelProvider},
Messages: []sdk.Message{sdk.UserMessage("look at the image")}, Messages: []sdk.Message{sdk.UserMessage("look at the image")},
SupportsImageInput: true, SupportsImageInput: true,
SupportsToolCall: true,
Identity: SessionContext{ Identity: SessionContext{
BotID: "bot-1", BotID: "bot-1",
}, },
@@ -321,6 +322,7 @@ func TestAgentGenerateReadMediaInjectsAnthropicSafeImageIntoNextStep(t *testing.
Model: &sdk.Model{ID: "mock-model", Provider: modelProvider}, Model: &sdk.Model{ID: "mock-model", Provider: modelProvider},
Messages: []sdk.Message{sdk.UserMessage("look at the image")}, Messages: []sdk.Message{sdk.UserMessage("look at the image")},
SupportsImageInput: true, SupportsImageInput: true,
SupportsToolCall: true,
Identity: SessionContext{ Identity: SessionContext{
BotID: "bot-1", BotID: "bot-1",
}, },
@@ -367,6 +369,7 @@ func TestAgentStreamReadMediaPersistsInjectedImageInTerminalMessages(t *testing.
Model: &sdk.Model{ID: "mock-model", Provider: modelProvider}, Model: &sdk.Model{ID: "mock-model", Provider: modelProvider},
Messages: []sdk.Message{sdk.UserMessage("look at the image")}, Messages: []sdk.Message{sdk.UserMessage("look at the image")},
SupportsImageInput: true, SupportsImageInput: true,
SupportsToolCall: true,
Identity: SessionContext{ Identity: SessionContext{
BotID: "bot-1", BotID: "bot-1",
}, },
+7 -6
View File
@@ -31,12 +31,13 @@ func (s *SpawnAdapter) Generate(ctx context.Context, cfg tools.SpawnRunConfig) (
} }
rc := RunConfig{ rc := RunConfig{
Model: cfg.Model, Model: cfg.Model,
System: cfg.System, System: cfg.System,
Query: cfg.Query, Query: cfg.Query,
SessionType: cfg.SessionType, SessionType: cfg.SessionType,
Messages: messages, Messages: messages,
ReasoningEffort: cfg.ReasoningEffort, ReasoningEffort: cfg.ReasoningEffort,
SupportsToolCall: true,
Identity: SessionContext{ Identity: SessionContext{
BotID: cfg.Identity.BotID, BotID: cfg.Identity.BotID,
ChatID: cfg.Identity.ChatID, ChatID: cfg.Identity.ChatID,
+1
View File
@@ -55,6 +55,7 @@ type RunConfig struct {
System string System string
SessionType string SessionType string
SupportsImageInput bool SupportsImageInput bool
SupportsToolCall bool
InlineImages []sdk.ImagePart InlineImages []sdk.ImagePart
Identity SessionContext Identity SessionContext
Skills []SkillEntry Skills []SkillEntry
+1
View File
@@ -274,6 +274,7 @@ func (r *Resolver) resolve(ctx context.Context, req conversation.ChatRequest) (r
Messages: sdkMessages, Messages: sdkMessages,
Query: headerifiedQuery, Query: headerifiedQuery,
SupportsImageInput: chatModel.HasCompatibility(models.CompatVision), SupportsImageInput: chatModel.HasCompatibility(models.CompatVision),
SupportsToolCall: chatModel.HasCompatibility(models.CompatToolCall),
InlineImages: inlineImages, InlineImages: inlineImages,
Identity: agentpkg.SessionContext{ Identity: agentpkg.SessionContext{
BotID: req.BotID, BotID: req.BotID,