feat: add image generation model and generate_image agent tool

Bots can now be configured with an image generation model (must have
image-output compatibility). When set, the agent exposes a generate_image
tool that calls the model via Twilight AI SDK, saves the result to the
bot container filesystem, and returns the file path.

- Add image_model_id column to bots table (migration 0053)
- Update settings SQL queries, service, and types
- New ImageGenProvider tool provider in internal/agent/tools/
- Wire provider in both cmd/agent and cmd/memoh entry points
- Add image model selector to frontend bot settings with compat filtering
- Regenerate swagger, SDK types, and sqlc code
This commit is contained in:
Acbox
2026-04-03 01:15:57 +08:00
parent 7ce1306505
commit a9a9f7e955
19 changed files with 294 additions and 6 deletions
+3
View File
@@ -843,6 +843,9 @@
"memoryHealthUnavailable": "Unavailable",
"ttsModel": "TTS Model",
"ttsModelPlaceholder": "Select TTS model",
"imageModel": "Image Generation Model",
"imageModelDescription": "Model used for the generate_image tool. Must support image-output compatibility.",
"imageModelPlaceholder": "Select image model (optional)",
"language": "Language",
"reasoningEnabled": "Enable Reasoning",
"reasoningEffort": "Reasoning Effort",
+3
View File
@@ -839,6 +839,9 @@
"memoryHealthUnavailable": "暂不可用",
"ttsModel": "语音合成模型",
"ttsModelPlaceholder": "选择语音合成模型",
"imageModel": "图片生成模型",
"imageModelDescription": "用于 generate_image 工具的模型,必须支持 image-output 兼容性。",
"imageModelPlaceholder": "选择图片模型(可选)",
"language": "语言",
"reasoningEnabled": "启用推理",
"reasoningEffort": "推理等级",
@@ -187,6 +187,21 @@
/>
</div>
<!-- Image Generation Model -->
<div class="space-y-2">
<Label>{{ $t('bots.settings.imageModel') }}</Label>
<p class="text-xs text-muted-foreground">
{{ $t('bots.settings.imageModelDescription') }}
</p>
<ModelSelect
v-model="form.image_model_id"
:models="imageCapableModels"
:providers="providers"
model-type="chat"
:placeholder="$t('bots.settings.imageModelPlaceholder')"
/>
</div>
<!-- Browser Context -->
<div class="space-y-2">
<Label>{{ $t('bots.settings.browserContext') }}</Label>
@@ -426,6 +441,9 @@ const { mutateAsync: deleteBot, isLoading: deleteLoading } = useMutation({
const models = computed(() => modelData.value ?? [])
const providers = computed(() => providerData.value ?? [])
const imageCapableModels = computed(() =>
models.value.filter((m) => m.config?.compatibilities?.includes('image-output')),
)
const searchProviders = computed(() => (searchProviderData.value ?? []).filter((p) => p.enable !== false))
const memoryProviders = computed(() => memoryProviderData.value ?? [])
const ttsProviders = computed(() => (ttsProviderData.value ?? []).filter((p) => p.enable !== false))
@@ -437,6 +455,7 @@ const browserContexts = computed(() => browserContextData.value ?? [])
const form = reactive({
chat_model_id: '',
title_model_id: '',
image_model_id: '',
search_provider_id: '',
memory_provider_id: '',
tts_model_id: '',
@@ -574,6 +593,7 @@ watch(settings, (val) => {
if (val) {
form.chat_model_id = val.chat_model_id ?? ''
form.title_model_id = val.title_model_id ?? ''
form.image_model_id = val.image_model_id ?? ''
form.search_provider_id = val.search_provider_id ?? ''
form.memory_provider_id = val.memory_provider_id ?? ''
form.tts_model_id = val.tts_model_id ?? ''
@@ -590,6 +610,7 @@ const hasChanges = computed(() => {
let changed =
form.chat_model_id !== (s.chat_model_id ?? '')
|| form.title_model_id !== (s.title_model_id ?? '')
|| form.image_model_id !== (s.image_model_id ?? '')
|| form.search_provider_id !== (s.search_provider_id ?? '')
|| form.memory_provider_id !== (s.memory_provider_id ?? '')
|| form.tts_model_id !== (s.tts_model_id ?? '')