feat: Add GPU CDI support for workspace containers (#332)

* feat: add CDI GPU support for workspace containers

* feat: expose GPU CDI settings in bot container UI

* feat: move GPU settings into advanced container options

* docs: document advanced CDI device configuration
This commit is contained in:
Ming Lin
2026-04-10 14:52:17 +08:00
committed by GitHub
parent 19619d73a9
commit 4d3f2de7e2
22 changed files with 752 additions and 84 deletions
+11
View File
@@ -748,6 +748,16 @@
"createRestoreDataDescription": "If a previously exported backup or legacy bind-mounted data exists, it will be restored into `/data` after the container is created.",
"createImageLabel": "Base image",
"createImageDescription": "Docker image to use as the container base (e.g. debian:bookworm-slim, alpine:latest, ubuntu:24.04). Leave empty for the default.",
"createAdvancedTitle": "Advanced options",
"createAdvancedDescription": "Configure optional GPU access and raw CDI device mappings for this container.",
"createGpuLabel": "Enable GPU",
"createGpuDescription": "Attach host GPU access to the new container.",
"createGpuDevicesLabel": "CDI devices",
"createGpuDevicesDescription": "Enter one CDI device per line or separate them with commas, for example `nvidia.com/gpu=0` or `amd.com/gpu=0`. Turning GPU off explicitly clears the saved GPU preference.",
"createGpuDevicesPlaceholder": "nvidia.com/gpu=0\namd.com/gpu=0",
"gpuDevicesRequired": "At least one CDI device is required when GPU is enabled.",
"cdiDevicesEmpty": "No GPU attached",
"gpuRecreateHint": "Changing GPU settings requires recreating the container. A simple start or stop will not change the devices already attached.",
"deleteConfirm": "Are you sure you want to permanently delete this container? Unpreserved data cannot be recovered.",
"deletePreserveConfirm": "Are you sure you want to export `/data` and then delete this container?",
"restoreConfirm": "Are you sure you want to restore preserved data into this container's `/data`?",
@@ -797,6 +807,7 @@
"task": "Task",
"namespace": "Namespace",
"image": "Image",
"cdiDevices": "CDI Devices",
"hostPath": "Host Path",
"containerPath": "Container Path",
"preservedData": "Preserved Data",
+11
View File
@@ -744,6 +744,16 @@
"createRestoreDataDescription": "如果存在之前导出的备份或旧版 bind mount 数据,将在容器创建后恢复到 `/data`。",
"createImageLabel": "基础镜像",
"createImageDescription": "作为容器基础环境的 Docker 镜像(如 debian:bookworm-slim、alpine:latest、ubuntu:24.04)。留空则使用默认镜像。",
"createAdvancedTitle": "高级选项",
"createAdvancedDescription": "配置该容器的可选 GPU 访问能力与原始 CDI 设备映射。",
"createGpuLabel": "启用 GPU",
"createGpuDescription": "为新容器开启宿主机 GPU 访问。",
"createGpuDevicesLabel": "CDI 设备",
"createGpuDevicesDescription": "每行或用逗号填写一个 CDI 设备名,例如 `nvidia.com/gpu=0` 或 `amd.com/gpu=0`。关闭 GPU 后会显式清空已保存的 GPU 偏好。",
"createGpuDevicesPlaceholder": "nvidia.com/gpu=0\namd.com/gpu=0",
"gpuDevicesRequired": "已启用 GPU 时,至少需要填写一个 CDI 设备名。",
"cdiDevicesEmpty": "未附加 GPU",
"gpuRecreateHint": "GPU 配置变更需要重建容器后才会生效,单纯启动或停止不会更新当前已附加的设备。",
"deleteConfirm": "确定要彻底删除这个容器吗?未保留的数据将无法恢复。",
"deletePreserveConfirm": "确定要先导出 `/data` 再删除这个容器吗?",
"restoreConfirm": "确定要将已保留的数据恢复到当前容器的 `/data` 吗?",
@@ -793,6 +803,7 @@
"task": "任务状态",
"namespace": "命名空间",
"image": "镜像",
"cdiDevices": "CDI 设备",
"hostPath": "主机路径",
"containerPath": "容器路径",
"preservedData": "保留数据",
@@ -4,6 +4,7 @@ import { toast } from 'vue-sonner'
import { useI18n } from 'vue-i18n'
import { useRoute } from 'vue-router'
import { useQuery } from '@pinia/colada'
import { ChevronRight } from 'lucide-vue-next'
import {
deleteBotsByBotIdContainer,
getBotsByBotIdContainer,
@@ -25,7 +26,7 @@ import {
type ContainerCreateLayerStatus,
type ContainerCreateStreamEvent,
} from '@/composables/api/useContainerStream'
import { Button, Input, Label, Separator, Spinner, Switch } from '@memohai/ui'
import { Button, Collapsible, CollapsibleContent, CollapsibleTrigger, Input, Label, Separator, Spinner, Switch, Textarea } from '@memohai/ui'
import ConfirmPopover from '@/components/confirm-popover/index.vue'
import ContainerCreateProgress from './container-create-progress.vue'
import { useSyncedQueryParam } from '@/composables/useSyncedQueryParam'
@@ -59,6 +60,10 @@ const rollbackVersion = ref<number | null>(null)
const createRestoreData = ref(false)
const createImage = ref('')
const createImagePrefilled = ref(false)
const createGPUEnabled = ref(false)
const createGPUDevices = ref('')
const createGPUPrefilled = ref(false)
const createAdvancedOpen = ref(false)
const newSnapshotName = ref('')
const importInputRef = ref<HTMLInputElement | null>(null)
@@ -178,7 +183,7 @@ async function handleRefreshContainer() {
await runContainerAction('refresh', () => loadContainerData(false))
}
const { data: bot } = useQuery({
const { data: bot, refetch: refetchBot } = useQuery({
key: () => ['bot', botId.value],
query: async () => {
const { data } = await getBotsById({ path: { id: botId.value }, throwOnError: true })
@@ -194,8 +199,48 @@ function rememberedWorkspaceImage(metadata: Record<string, unknown> | undefined)
return typeof image === 'string' ? shortenImageRef(image) : ''
}
type RememberedWorkspaceGPU = {
exists: boolean
devices: string[]
}
function rememberedWorkspaceGPU(metadata: Record<string, unknown> | undefined): RememberedWorkspaceGPU {
const workspace = metadata?.workspace
if (!workspace || typeof workspace !== 'object' || Array.isArray(workspace)) {
return { exists: false, devices: [] }
}
const workspaceRecord = workspace as Record<string, unknown>
if (!Object.prototype.hasOwnProperty.call(workspaceRecord, 'gpu')) {
return { exists: false, devices: [] }
}
const gpu = workspaceRecord.gpu
if (!gpu || typeof gpu !== 'object' || Array.isArray(gpu)) {
return { exists: true, devices: [] }
}
const rawDevices = (gpu as Record<string, unknown>).devices
const devices = Array.isArray(rawDevices)
? rawDevices.filter((value): value is string => typeof value === 'string').map(value => value.trim()).filter(Boolean)
: []
return { exists: true, devices: [...new Set(devices)] }
}
function parseCDIDevices(value: string): string[] {
return [...new Set(
value
.split(/[\n,]/)
.map(item => item.trim())
.filter(Boolean),
)]
}
const rememberedCreateImage = computed(() => rememberedWorkspaceImage(bot.value?.metadata as Record<string, unknown> | undefined))
const rememberedCreateGPU = computed(() => rememberedWorkspaceGPU(bot.value?.metadata as Record<string, unknown> | undefined))
const displayedContainerImage = computed(() => shortenImageRef(containerInfo.value?.image))
const displayedCDIDevices = computed(() => containerInfo.value?.cdi_devices ?? [])
const { isPending: botLifecyclePending } = useBotStatusMeta(bot, t)
@@ -248,16 +293,29 @@ async function handleCreateContainer() {
containerAction.value = 'create'
createProgress.value = { phase: 'pulling' }
try {
const gpuDevices = parseCDIDevices(createGPUDevices.value)
if (createGPUEnabled.value && gpuDevices.length === 0) {
throw new Error(t('bots.container.gpuDevicesRequired'))
}
const body: HandlersCreateContainerRequest = {
restore_data: createRestoreData.value,
}
const trimmedImage = createImage.value.trim()
if (trimmedImage) body.image = trimmedImage
if (createGPUEnabled.value || rememberedCreateGPU.value.exists) {
body.gpu = {
devices: createGPUEnabled.value ? gpuDevices : [],
}
}
const { dataRestored } = await createContainerSSE(body)
createRestoreData.value = false
createImage.value = ''
createGPUEnabled.value = false
createGPUDevices.value = ''
await loadContainerData(false)
await refetchBot()
toast.success(dataRestored
? t('bots.container.createRestoreSuccess')
: t('bots.container.createSuccess'))
@@ -567,6 +625,8 @@ const activeTab = useSyncedQueryParam('tab', 'overview')
watch(containerMissing, (missing) => {
if (!missing) {
createImagePrefilled.value = false
createGPUPrefilled.value = false
createAdvancedOpen.value = false
}
})
@@ -577,6 +637,15 @@ watch([containerMissing, rememberedCreateImage], ([missing, remembered]) => {
createImagePrefilled.value = true
}, { immediate: true })
watch([containerMissing, rememberedCreateGPU], ([missing, remembered]) => {
if (!missing || createGPUPrefilled.value) return
if (!remembered.exists) return
if (createGPUEnabled.value || createGPUDevices.value.trim()) return
createGPUEnabled.value = remembered.devices.length > 0
createGPUDevices.value = remembered.devices.join('\n')
createGPUPrefilled.value = true
}, { immediate: true })
watch([activeTab, botId], ([tab]) => {
if (!botId.value) return
if (tab === 'container') {
@@ -685,6 +754,59 @@ watch([activeTab, botId], ([tab]) => {
</p>
</div>
<Collapsible v-model:open="createAdvancedOpen">
<div class="rounded-md border">
<CollapsibleTrigger class="flex w-full items-center justify-between gap-3 px-3 py-2 text-left hover:bg-accent/40">
<div class="space-y-1">
<p class="text-xs font-medium">
{{ $t('bots.container.createAdvancedTitle') }}
</p>
<p class="text-xs text-muted-foreground">
{{ $t('bots.container.createAdvancedDescription') }}
</p>
</div>
<ChevronRight
class="size-4 shrink-0 text-muted-foreground transition-transform"
:class="{ 'rotate-90': createAdvancedOpen }"
/>
</CollapsibleTrigger>
<CollapsibleContent>
<div class="space-y-4 border-t px-3 py-3">
<div class="flex items-start justify-between gap-4 rounded-md border p-3">
<div class="space-y-1">
<Label>{{ $t('bots.container.createGpuLabel') }}</Label>
<p class="text-xs text-muted-foreground">
{{ $t('bots.container.createGpuDescription') }}
</p>
</div>
<Switch
:model-value="createGPUEnabled"
:disabled="containerBusy || botLifecyclePending"
@update:model-value="(value) => createGPUEnabled = !!value"
/>
</div>
<div
v-if="createGPUEnabled"
class="space-y-2"
>
<Label>{{ $t('bots.container.createGpuDevicesLabel') }}</Label>
<Textarea
v-model="createGPUDevices"
:placeholder="$t('bots.container.createGpuDevicesPlaceholder')"
:disabled="containerBusy || botLifecyclePending"
class="min-h-24 font-mono text-xs"
/>
<p class="text-xs text-muted-foreground">
{{ $t('bots.container.createGpuDevicesDescription') }}
</p>
</div>
</div>
</CollapsibleContent>
</div>
</Collapsible>
<div class="flex justify-end">
<Button
:disabled="containerBusy || botLifecyclePending"
@@ -784,6 +906,29 @@ watch([activeTab, botId], ([tab]) => {
{{ displayedContainerImage }}
</dd>
</div>
<div class="space-y-1 sm:col-span-2">
<dt class="text-muted-foreground">
{{ $t('bots.container.fields.cdiDevices') }}
</dt>
<dd
v-if="displayedCDIDevices.length === 0"
class="text-muted-foreground"
>
{{ $t('bots.container.cdiDevicesEmpty') }}
</dd>
<dd
v-else
class="space-y-1 font-mono text-xs"
>
<div
v-for="device in displayedCDIDevices"
:key="device"
class="break-all"
>
{{ device }}
</div>
</dd>
</div>
<div class="space-y-1 sm:col-span-2">
<dt class="text-muted-foreground">
{{ $t('bots.container.fields.containerPath') }}
@@ -813,6 +958,10 @@ watch([activeTab, botId], ([tab]) => {
</dl>
</div>
<div class="rounded-md border px-3 py-2 text-xs text-muted-foreground">
{{ $t('bots.container.gpuRecreateHint') }}
</div>
<div class="space-y-4 rounded-md border p-4">
<div class="space-y-1">
<h4 class="text-xs font-medium">