feat: add image generation model and generate_image agent tool

Bots can now be configured with an image generation model (must have image-output compatibility). When set, the agent exposes a generate_image tool that calls the model via Twilight AI SDK, saves the result to the bot container filesystem, and returns the file path. - Add image_model_id column to bots table (migration 0053) - Update settings SQL queries, service, and types - New ImageGenProvider tool provider in internal/agent/tools/ - Wire provider in both cmd/agent and cmd/memoh entry points - Add image model selector to frontend bot settings with compat filtering - Regenerate swagger, SDK types, and sqlc code
2026-04-25 07:00:48 +09:00 · 2026-04-03 01:15:57 +08:00
parent 7ce1306505
commit a9a9f7e955
19 changed files with 294 additions and 6 deletions
@@ -843,6 +843,9 @@
      "memoryHealthUnavailable": "Unavailable",
      "ttsModel": "TTS Model",
      "ttsModelPlaceholder": "Select TTS model",
+      "imageModel": "Image Generation Model",
+      "imageModelDescription": "Model used for the generate_image tool. Must support image-output compatibility.",
+      "imageModelPlaceholder": "Select image model (optional)",
      "language": "Language",
      "reasoningEnabled": "Enable Reasoning",
      "reasoningEffort": "Reasoning Effort",
@@ -839,6 +839,9 @@
      "memoryHealthUnavailable": "暂不可用",
      "ttsModel": "语音合成模型",
      "ttsModelPlaceholder": "选择语音合成模型",
+      "imageModel": "图片生成模型",
+      "imageModelDescription": "用于 generate_image 工具的模型，必须支持 image-output 兼容性。",
+      "imageModelPlaceholder": "选择图片模型（可选）",
      "language": "语言",
      "reasoningEnabled": "启用推理",
      "reasoningEffort": "推理等级",
@@ -187,6 +187,21 @@
      />
    </div>

+    <!-- Image Generation Model -->
+    <div class="space-y-2">
+      <Label>{{ $t('bots.settings.imageModel') }}</Label>
+      <p class="text-xs text-muted-foreground">
+        {{ $t('bots.settings.imageModelDescription') }}
+      </p>
+      <ModelSelect
+        v-model="form.image_model_id"
+        :models="imageCapableModels"
+        :providers="providers"
+        model-type="chat"
+        :placeholder="$t('bots.settings.imageModelPlaceholder')"
+      />
+    </div>
+
    <!-- Browser Context -->
    <div class="space-y-2">
      <Label>{{ $t('bots.settings.browserContext') }}</Label>
@@ -426,6 +441,9 @@ const { mutateAsync: deleteBot, isLoading: deleteLoading } = useMutation({

 const models = computed(() => modelData.value ?? [])
 const providers = computed(() => providerData.value ?? [])
+const imageCapableModels = computed(() =>
+  models.value.filter((m) => m.config?.compatibilities?.includes('image-output')),
+)
 const searchProviders = computed(() => (searchProviderData.value ?? []).filter((p) => p.enable !== false))
 const memoryProviders = computed(() => memoryProviderData.value ?? [])
 const ttsProviders = computed(() => (ttsProviderData.value ?? []).filter((p) => p.enable !== false))
@@ -437,6 +455,7 @@ const browserContexts = computed(() => browserContextData.value ?? [])
 const form = reactive({
  chat_model_id: '',
  title_model_id: '',
+  image_model_id: '',
  search_provider_id: '',
  memory_provider_id: '',
  tts_model_id: '',
@@ -574,6 +593,7 @@ watch(settings, (val) => {
  if (val) {
    form.chat_model_id = val.chat_model_id ?? ''
    form.title_model_id = val.title_model_id ?? ''
+    form.image_model_id = val.image_model_id ?? ''
    form.search_provider_id = val.search_provider_id ?? ''
    form.memory_provider_id = val.memory_provider_id ?? ''
    form.tts_model_id = val.tts_model_id ?? ''
@@ -590,6 +610,7 @@ const hasChanges = computed(() => {
  let changed =
    form.chat_model_id !== (s.chat_model_id ?? '')
    || form.title_model_id !== (s.title_model_id ?? '')
+    || form.image_model_id !== (s.image_model_id ?? '')
    || form.search_provider_id !== (s.search_provider_id ?? '')
    || form.memory_provider_id !== (s.memory_provider_id ?? '')
    || form.tts_model_id !== (s.tts_model_id ?? '')