fix(agent): skip tools for models without tool-call capability and parse image output

- Add SupportsToolCall to RunConfig; only inject tools into SDK when set
- Update twilight-ai to 497ad09 which adds SSE scanner 10MB buffer
  (fixes token-too-long on large image payloads) and parses the images
  array from OpenAI-compatible chat completions into StreamFilePart
This commit is contained in:
Acbox
2026-04-03 00:01:14 +08:00
parent 574bc1fb59
commit fc2b603018
7 changed files with 18 additions and 8 deletions
+1 -1
View File
@@ -26,7 +26,7 @@ require (
github.com/larksuite/oapi-sdk-go/v3 v3.5.3
github.com/mailgun/mailgun-go/v5 v5.14.0
github.com/memohai/acgo v0.0.0-20260221232113-babac0d6acd7
github.com/memohai/twilight-ai v0.3.4-0.20260329101319-3ebcc563f5d9
github.com/memohai/twilight-ai v0.3.4-0.20260402155501-497ad09c6724
github.com/modelcontextprotocol/go-sdk v1.4.1
github.com/opencontainers/image-spec v1.1.1
github.com/opencontainers/runtime-spec v1.3.0
+4
View File
@@ -230,6 +230,10 @@ github.com/memohai/acgo v0.0.0-20260221232113-babac0d6acd7 h1:beehwOQperqGWj4m4E
github.com/memohai/acgo v0.0.0-20260221232113-babac0d6acd7/go.mod h1:OvmxM7JmnXBmwJWWVqtreL3HSHSKuzPbtbhlg5MvBg0=
github.com/memohai/twilight-ai v0.3.4-0.20260329101319-3ebcc563f5d9 h1:vpq3FgZ7UJAWr63M4mAtB8wvUWHSAdrgWibTFocXFBk=
github.com/memohai/twilight-ai v0.3.4-0.20260329101319-3ebcc563f5d9/go.mod h1:GZTT9GUT3uSs6zram/FcF24GLTZMFSpiybbYmjr+gH8=
github.com/memohai/twilight-ai v0.3.4-0.20260402145441-9e9f798cbd2d h1:tZYwJ0YDc6FUlh3mXYv+Tkw5dLfMD6IgP8VY5/+AfB0=
github.com/memohai/twilight-ai v0.3.4-0.20260402145441-9e9f798cbd2d/go.mod h1:GZTT9GUT3uSs6zram/FcF24GLTZMFSpiybbYmjr+gH8=
github.com/memohai/twilight-ai v0.3.4-0.20260402155501-497ad09c6724 h1:/Hw5vHfAeHRGx+duPKyetT5n2t6J5cYMfUysN/Xh9U0=
github.com/memohai/twilight-ai v0.3.4-0.20260402155501-497ad09c6724/go.mod h1:GZTT9GUT3uSs6zram/FcF24GLTZMFSpiybbYmjr+gH8=
github.com/moby/docker-image-spec v1.3.1 h1:jMKff3w6PgbfSa69GfNg+zN/XLhfXJGnEx3Nl2EsFP0=
github.com/moby/docker-image-spec v1.3.1/go.mod h1:eKmb5VW8vQEh/BAr2yvVNvuiJuY6UIocYsFu/DxxRpo=
github.com/moby/locker v1.0.1 h1:fOXqR41zeveg4fFODix+1Ch4mj/gT0NE1XJbp/epuBg=
+1 -1
View File
@@ -366,7 +366,7 @@ func (*Agent) buildGenerateOptions(cfg RunConfig, tools []sdk.Tool, prepareStep
sdk.WithSystem(cfg.System),
sdk.WithMaxSteps(-1),
}
if len(tools) > 0 {
if len(tools) > 0 && cfg.SupportsToolCall {
opts = append(opts, sdk.WithTools(tools))
}
if prepareStep != nil {
+3
View File
@@ -246,6 +246,7 @@ func TestAgentGenerateReadMediaInjectsImageIntoNextStep(t *testing.T) {
Model: &sdk.Model{ID: "mock-model", Provider: modelProvider},
Messages: []sdk.Message{sdk.UserMessage("look at the image")},
SupportsImageInput: true,
SupportsToolCall: true,
Identity: SessionContext{
BotID: "bot-1",
},
@@ -321,6 +322,7 @@ func TestAgentGenerateReadMediaInjectsAnthropicSafeImageIntoNextStep(t *testing.
Model: &sdk.Model{ID: "mock-model", Provider: modelProvider},
Messages: []sdk.Message{sdk.UserMessage("look at the image")},
SupportsImageInput: true,
SupportsToolCall: true,
Identity: SessionContext{
BotID: "bot-1",
},
@@ -367,6 +369,7 @@ func TestAgentStreamReadMediaPersistsInjectedImageInTerminalMessages(t *testing.
Model: &sdk.Model{ID: "mock-model", Provider: modelProvider},
Messages: []sdk.Message{sdk.UserMessage("look at the image")},
SupportsImageInput: true,
SupportsToolCall: true,
Identity: SessionContext{
BotID: "bot-1",
},
+7 -6
View File
@@ -31,12 +31,13 @@ func (s *SpawnAdapter) Generate(ctx context.Context, cfg tools.SpawnRunConfig) (
}
rc := RunConfig{
Model: cfg.Model,
System: cfg.System,
Query: cfg.Query,
SessionType: cfg.SessionType,
Messages: messages,
ReasoningEffort: cfg.ReasoningEffort,
Model: cfg.Model,
System: cfg.System,
Query: cfg.Query,
SessionType: cfg.SessionType,
Messages: messages,
ReasoningEffort: cfg.ReasoningEffort,
SupportsToolCall: true,
Identity: SessionContext{
BotID: cfg.Identity.BotID,
ChatID: cfg.Identity.ChatID,
+1
View File
@@ -55,6 +55,7 @@ type RunConfig struct {
System string
SessionType string
SupportsImageInput bool
SupportsToolCall bool
InlineImages []sdk.ImagePart
Identity SessionContext
Skills []SkillEntry
+1
View File
@@ -274,6 +274,7 @@ func (r *Resolver) resolve(ctx context.Context, req conversation.ChatRequest) (r
Messages: sdkMessages,
Query: headerifiedQuery,
SupportsImageInput: chatModel.HasCompatibility(models.CompatVision),
SupportsToolCall: chatModel.HasCompatibility(models.CompatToolCall),
InlineImages: inlineImages,
Identity: agentpkg.SessionContext{
BotID: req.BotID,