mirror of
https://github.com/memohai/Memoh.git
synced 2026-04-27 07:16:19 +09:00
feat(container): add current container metrics view
Expose a dedicated container metrics endpoint and surface current CPU, memory, and root filesystem usage in the bot container view. This gives operators a quick health snapshot while degrading cleanly on unsupported backends.
This commit is contained in:
@@ -51,6 +51,26 @@ type ContainerStatus struct {
|
||||
UpdatedAt time.Time `json:"updated_at"`
|
||||
}
|
||||
|
||||
type ContainerMetricsStatus struct {
|
||||
Exists bool `json:"exists"`
|
||||
TaskRunning bool `json:"task_running"`
|
||||
}
|
||||
|
||||
type ContainerStorageMetrics struct {
|
||||
Path string `json:"path"`
|
||||
UsedBytes uint64 `json:"used_bytes"`
|
||||
}
|
||||
|
||||
type ContainerMetricsResult struct {
|
||||
Supported bool
|
||||
UnsupportedReason string
|
||||
Status ContainerMetricsStatus
|
||||
SampledAt time.Time
|
||||
CPU *ctr.CPUMetrics
|
||||
Memory *ctr.MemoryMetrics
|
||||
Storage *ContainerStorageMetrics
|
||||
}
|
||||
|
||||
type Manager struct {
|
||||
service ctr.Service
|
||||
cfg config.WorkspaceConfig
|
||||
|
||||
@@ -111,6 +111,10 @@ func (*legacyRouteTestService) GetTaskInfo(context.Context, string) (ctr.TaskInf
|
||||
return ctr.TaskInfo{}, errdefs.ErrNotFound
|
||||
}
|
||||
|
||||
func (*legacyRouteTestService) GetContainerMetrics(context.Context, string) (ctr.ContainerMetrics, error) {
|
||||
return ctr.ContainerMetrics{}, ctr.ErrNotSupported
|
||||
}
|
||||
|
||||
func (*legacyRouteTestService) ListTasks(context.Context, *ctr.ListTasksOptions) ([]ctr.TaskInfo, error) {
|
||||
return nil, nil
|
||||
}
|
||||
|
||||
@@ -0,0 +1,142 @@
|
||||
package workspace
|
||||
|
||||
import (
|
||||
"context"
|
||||
"errors"
|
||||
"io/fs"
|
||||
"os"
|
||||
"path/filepath"
|
||||
"time"
|
||||
|
||||
"github.com/containerd/containerd/v2/core/mount"
|
||||
"github.com/containerd/errdefs"
|
||||
|
||||
ctr "github.com/memohai/memoh/internal/containerd"
|
||||
)
|
||||
|
||||
const unsupportedReasonBackend = "backend_not_supported"
|
||||
|
||||
func (m *Manager) GetContainerMetrics(ctx context.Context, botID string) (*ContainerMetricsResult, error) {
|
||||
result := &ContainerMetricsResult{
|
||||
Supported: true,
|
||||
Status: ContainerMetricsStatus{
|
||||
Exists: false,
|
||||
},
|
||||
}
|
||||
|
||||
containerID, err := m.ContainerID(ctx, botID)
|
||||
if err != nil {
|
||||
if errors.Is(err, ErrContainerNotFound) {
|
||||
return result, nil
|
||||
}
|
||||
return nil, err
|
||||
}
|
||||
|
||||
info, err := m.service.GetContainer(ctx, containerID)
|
||||
if err != nil {
|
||||
if errdefs.IsNotFound(err) {
|
||||
return result, nil
|
||||
}
|
||||
return nil, err
|
||||
}
|
||||
|
||||
result.Status.Exists = true
|
||||
|
||||
taskInfo, err := m.service.GetTaskInfo(ctx, containerID)
|
||||
if err == nil {
|
||||
result.Status.TaskRunning = taskInfo.Status == ctr.TaskStatusRunning
|
||||
} else if !errdefs.IsNotFound(err) {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
runtimeMetrics, err := m.service.GetContainerMetrics(ctx, containerID)
|
||||
switch {
|
||||
case err == nil:
|
||||
result.CPU = runtimeMetrics.CPU
|
||||
result.Memory = runtimeMetrics.Memory
|
||||
result.SampledAt = runtimeMetrics.SampledAt
|
||||
case errors.Is(err, ctr.ErrNotSupported):
|
||||
result.Supported = false
|
||||
result.UnsupportedReason = unsupportedReasonBackend
|
||||
case errdefs.IsNotFound(err):
|
||||
// Task is not running, so CPU and memory metrics are unavailable.
|
||||
default:
|
||||
return nil, err
|
||||
}
|
||||
|
||||
if result.Supported {
|
||||
storage, err := m.collectStorageMetrics(ctx, info)
|
||||
if err != nil {
|
||||
if errors.Is(err, ctr.ErrNotSupported) {
|
||||
return result, nil
|
||||
}
|
||||
return nil, err
|
||||
}
|
||||
result.Storage = storage
|
||||
if result.SampledAt.IsZero() {
|
||||
result.SampledAt = time.Now()
|
||||
}
|
||||
}
|
||||
|
||||
return result, nil
|
||||
}
|
||||
|
||||
func (m *Manager) collectStorageMetrics(ctx context.Context, info ctr.ContainerInfo) (*ContainerStorageMetrics, error) {
|
||||
mounts, err := m.snapshotMounts(ctx, info)
|
||||
if err != nil {
|
||||
if errors.Is(err, errMountNotSupported) {
|
||||
return nil, ctr.ErrNotSupported
|
||||
}
|
||||
return nil, err
|
||||
}
|
||||
|
||||
var usedBytes uint64
|
||||
if err := mount.WithReadonlyTempMount(ctx, mounts, func(root string) error {
|
||||
if _, statErr := os.Stat(root); statErr != nil {
|
||||
if os.IsNotExist(statErr) {
|
||||
return nil
|
||||
}
|
||||
return statErr
|
||||
}
|
||||
|
||||
size, sizeErr := dirSize(root)
|
||||
if sizeErr != nil {
|
||||
return sizeErr
|
||||
}
|
||||
usedBytes = size
|
||||
return nil
|
||||
}); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
return &ContainerStorageMetrics{
|
||||
Path: "/",
|
||||
UsedBytes: usedBytes,
|
||||
}, nil
|
||||
}
|
||||
|
||||
func dirSize(root string) (uint64, error) {
|
||||
var size uint64
|
||||
err := filepath.WalkDir(root, func(_ string, entry fs.DirEntry, walkErr error) error {
|
||||
if walkErr != nil {
|
||||
return walkErr
|
||||
}
|
||||
if entry.IsDir() {
|
||||
return nil
|
||||
}
|
||||
|
||||
info, err := entry.Info()
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
fileSize := info.Size()
|
||||
if fileSize > 0 {
|
||||
size += uint64(fileSize) //nolint:gosec // file sizes are checked to be positive before conversion
|
||||
}
|
||||
return nil
|
||||
})
|
||||
if err != nil {
|
||||
return 0, err
|
||||
}
|
||||
return size, nil
|
||||
}
|
||||
Reference in New Issue
Block a user