From 318bd87f652cb1da9b70e66b4db1fa745749dcf5 Mon Sep 17 00:00:00 2001 From: Acbox Date: Sun, 8 Feb 2026 01:57:06 +0800 Subject: [PATCH] feat: skills --- agent/src/agent.ts | 29 ++++++- agent/src/modules/chat.ts | 7 +- agent/src/prompts/system.ts | 2 + agent/src/tools/index.ts | 10 ++- agent/src/tools/skill.ts | 12 +-- agent/src/types/agent.ts | 4 +- cmd/agent/main.go | 23 ++++++ docs/docs.go | 4 + docs/swagger.json | 4 + docs/swagger.yaml | 3 + internal/chat/resolver.go | 51 +++++++++++++ internal/handlers/skills.go | 148 ++++++++++++++++++++++++++++++------ 12 files changed, 254 insertions(+), 43 deletions(-) diff --git a/agent/src/agent.ts b/agent/src/agent.ts index fd15ec83..a536ce51 100644 --- a/agent/src/agent.ts +++ b/agent/src/agent.ts @@ -1,5 +1,5 @@ import { generateText, ImagePart, LanguageModelUsage, ModelMessage, stepCountIs, streamText, UserModelMessage } from 'ai' -import { AgentInput, AgentParams, allActions, HTTPMCPConnection, MCPConnection, Schedule } from './types' +import { AgentInput, AgentParams, AgentSkill, allActions, HTTPMCPConnection, MCPConnection, Schedule } from './types' import { system, schedule, user, subagentSystem } from './prompts' import { AuthFetcher } from './index' import { createModel } from './model' @@ -22,6 +22,7 @@ export const createAgent = ({ allowedActions = allActions, channels = [], mcpConnections = [], + skills = [], currentChannel = 'Unknown Channel', identity = { botId: '', @@ -33,6 +34,18 @@ export const createAgent = ({ auth, }: AgentParams, fetch: AuthFetcher) => { const model = createModel(modelConfig) + const enabledSkills: AgentSkill[] = [] + + const enableSkill = (skill: string) => { + const agentSkill = skills.find(s => s.name === skill) + if (agentSkill) { + enabledSkills.push(agentSkill) + } + } + + const getEnabledSkills = () => { + return enabledSkills.map(skill => skill.name) + } const getDefaultMCPConnections = (): MCPConnection[] => { const fs: HTTPMCPConnection = { @@ -52,8 +65,8 @@ export const createAgent = ({ language, maxContextLoadTime: activeContextTime, channels, - skills: [], - enabledSkills: [], + skills, + enabledSkills, }) } @@ -63,6 +76,7 @@ export const createAgent = ({ model: modelConfig, brave, identity, + enableSkill, }) const defaultMCPConnections = getDefaultMCPConnections() const { tools: mcpTools, close: closeMCP } = await getMCPTools([ @@ -99,6 +113,7 @@ export const createAgent = ({ const ask = async (input: AgentInput) => { const userPrompt = generateUserPrompt(input) const messages = [...input.messages, userPrompt] + input.skills.forEach(skill => enableSkill(skill)) const systemPrompt = generateSystemPrompt() const { tools, close } = await getAgentTools() const { response, reasoning, text, usage } = await generateText({ @@ -125,6 +140,7 @@ export const createAgent = ({ usage, text: cleanedText, attachments: allAttachments, + skills: getEnabledSkills(), } } @@ -169,12 +185,14 @@ export const createAgent = ({ reasoning: reasoning.map(part => part.text), usage, text, + skills: getEnabledSkills(), } } const triggerSchedule = async (params: { schedule: Schedule messages: ModelMessage[] + skills: string[] }) => { const scheduleMessage: UserModelMessage = { role: 'user', @@ -183,6 +201,7 @@ export const createAgent = ({ ] } const messages = [...params.messages, scheduleMessage] + params.skills.forEach(skill => enableSkill(skill)) const { tools, close } = await getAgentTools() const { response, reasoning, text, usage } = await generateText({ model, @@ -199,12 +218,14 @@ export const createAgent = ({ reasoning: reasoning.map(part => part.text), usage, text, + skills: getEnabledSkills(), } } async function* stream(input: AgentInput): AsyncGenerator { const userPrompt = generateUserPrompt(input) const messages = [...input.messages, userPrompt] + input.skills.forEach(skill => enableSkill(skill)) const systemPrompt = generateSystemPrompt() const attachmentsExtractor = new AttachmentsStreamExtractor() const result: { @@ -320,9 +341,9 @@ export const createAgent = ({ yield { type: 'agent_end', messages: [userPrompt, ...strippedMessages], - skills: [], reasoning: result.reasoning, usage: result.usage!, + skills: getEnabledSkills(), } } diff --git a/agent/src/modules/chat.ts b/agent/src/modules/chat.ts index 2ea49fc6..a67fac09 100644 --- a/agent/src/modules/chat.ts +++ b/agent/src/modules/chat.ts @@ -4,7 +4,7 @@ import { createAgent } from '../agent' import { createAuthFetcher, getBaseUrl, getBraveConfig } from '../index' import { ModelConfig } from '../types' import { bearerMiddleware } from '../middlewares/bearer' -import { AllowedActionModel, AttachmentModel, IdentityContextModel, MCPConnectionModel, ModelConfigModel, ScheduleModel } from '../models' +import { AgentSkillModel, AllowedActionModel, AttachmentModel, IdentityContextModel, MCPConnectionModel, ModelConfigModel, ScheduleModel } from '../models' import { allActions } from '../types' const AgentModel = z.object({ @@ -14,6 +14,7 @@ const AgentModel = z.object({ currentChannel: z.string(), allowedActions: z.array(AllowedActionModel).optional().default(allActions), messages: z.array(z.any()), + usableSkills: z.array(AgentSkillModel).optional().default([]), skills: z.array(z.string()), identity: IdentityContextModel, attachments: z.array(AttachmentModel).optional().default([]), @@ -37,6 +38,7 @@ export const chatModule = new Elysia({ prefix: '/chat' }) bearer: bearer!, baseUrl: getBaseUrl(), }, + skills: body.usableSkills, brave: getBraveConfig(), }, authFetcher) return ask({ @@ -66,6 +68,7 @@ export const chatModule = new Elysia({ prefix: '/chat' }) bearer: bearer!, baseUrl: getBaseUrl(), }, + skills: body.usableSkills, brave: getBraveConfig(), }, authFetcher) for await (const action of stream({ @@ -101,11 +104,13 @@ export const chatModule = new Elysia({ prefix: '/chat' }) bearer: bearer!, baseUrl: getBaseUrl(), }, + skills: body.usableSkills, brave: getBraveConfig(), }, authFetcher) return triggerSchedule({ schedule: body.schedule, messages: body.messages, + skills: body.skills, }) }, { body: AgentModel.extend({ diff --git a/agent/src/prompts/system.ts b/agent/src/prompts/system.ts index 75e77606..05772a6e 100644 --- a/agent/src/prompts/system.ts +++ b/agent/src/prompts/system.ts @@ -35,6 +35,8 @@ export const system = ({ 'time-now': date.toISOString(), } + console.log('enabledSkills', enabledSkills) + return ` --- ${Bun.YAML.stringify(headers)} diff --git a/agent/src/tools/index.ts b/agent/src/tools/index.ts index 821d7c28..34422aed 100644 --- a/agent/src/tools/index.ts +++ b/agent/src/tools/index.ts @@ -7,17 +7,19 @@ import { getMemoryTools } from './memory' import { getSubagentTools } from './subagent' import { getContactTools } from './contact' import { getMessageTools } from './message' +import { getSkillTools } from './skill' export interface ToolsParams { fetch: AuthFetcher model: ModelConfig brave?: BraveConfig identity: IdentityContext + enableSkill: (skill: string) => void } export const getTools = ( actions: AgentAction[], - { fetch, model, brave, identity }: ToolsParams + { fetch, model, brave, identity, enableSkill }: ToolsParams ) => { const tools: ToolSet = {} if (actions.includes(AgentAction.Web) && brave) { @@ -44,5 +46,9 @@ export const getTools = ( const messageTools = getMessageTools({ fetch, identity }) Object.assign(tools, messageTools) } - return tools + if (actions.includes(AgentAction.Skill)) { + const skillTools = getSkillTools({ useSkill: enableSkill }) + Object.assign(tools, skillTools) + } + return tools } \ No newline at end of file diff --git a/agent/src/tools/skill.ts b/agent/src/tools/skill.ts index f669d074..d911b841 100644 --- a/agent/src/tools/skill.ts +++ b/agent/src/tools/skill.ts @@ -1,13 +1,11 @@ -import { AgentSkill } from '../types' import { tool } from 'ai' import { z } from 'zod' interface SkillToolParams { - skills: AgentSkill[] - useSkill: (skill: AgentSkill, reason: string) => void + useSkill: (skill: string) => void } -export const getSkillTools = ({ skills, useSkill }: SkillToolParams) => { +export const getSkillTools = ({ useSkill }: SkillToolParams) => { const useSkillTool = tool({ description: 'Use a skill if you think it is relevant to the current task', inputSchema: z.object({ @@ -15,11 +13,7 @@ export const getSkillTools = ({ skills, useSkill }: SkillToolParams) => { reason: z.string().describe('The reason why you think this skill is relevant to the current task'), }), execute: async ({ skillName, reason }) => { - const skill = skills.find((s) => s.name === skillName) - if (!skill) { - return { error: 'Skill not found' } - } - await useSkill(skill, reason) + useSkill(skillName) return { success: true, skillName, diff --git a/agent/src/types/agent.ts b/agent/src/types/agent.ts index 6d935dbe..430c3850 100644 --- a/agent/src/types/agent.ts +++ b/agent/src/types/agent.ts @@ -51,6 +51,7 @@ export interface AgentParams { mcpConnections?: MCPConnection[] identity?: IdentityContext auth: AgentAuthContext + skills?: AgentSkill[] } export interface AgentInput { @@ -64,6 +65,5 @@ export interface AgentSkill { name: string description: string content: string - // eslint-disable-next-line @typescript-eslint/no-explicit-any - metadata: Record + metadata?: Record } diff --git a/cmd/agent/main.go b/cmd/agent/main.go index dc3beec0..ef18c7d0 100644 --- a/cmd/agent/main.go +++ b/cmd/agent/main.go @@ -156,6 +156,7 @@ func main() { preauthHandler := handlers.NewPreauthHandler(preauthService, botService, usersService) chatResolver = chat.NewResolver(logger.L, modelsService, queries, memoryService, historyService, settingsService, cfg.AgentGateway.BaseURL(), 120*time.Second) + chatResolver.SetSkillLoader(&skillLoaderAdapter{handler: containerdHandler}) embeddingsHandler := handlers.NewEmbeddingsHandler(logger.L, modelsService, queries) swaggerHandler := handlers.NewSwaggerHandler(logger.L) chatHandler := handlers.NewChatHandler(logger.L, chatResolver, botService, usersService) @@ -338,3 +339,25 @@ func (c *lazyLLMClient) resolve(ctx context.Context) (memory.LLM, error) { } return memory.NewLLMClient(c.logger, memoryProvider.BaseUrl, memoryProvider.ApiKey, memoryModel.ModelID, c.timeout) } + +// skillLoaderAdapter bridges handlers.ContainerdHandler to chat.SkillLoader. +type skillLoaderAdapter struct { + handler *handlers.ContainerdHandler +} + +func (a *skillLoaderAdapter) LoadSkills(ctx context.Context, botID string) ([]chat.SkillEntry, error) { + items, err := a.handler.LoadSkills(ctx, botID) + if err != nil { + return nil, err + } + entries := make([]chat.SkillEntry, len(items)) + for i, item := range items { + entries[i] = chat.SkillEntry{ + Name: item.Name, + Description: item.Description, + Content: item.Content, + Metadata: item.Metadata, + } + } + return entries, nil +} diff --git a/docs/docs.go b/docs/docs.go index 0000cab8..f1df0f4e 100644 --- a/docs/docs.go +++ b/docs/docs.go @@ -4942,6 +4942,10 @@ const docTemplate = `{ "description": { "type": "string" }, + "metadata": { + "type": "object", + "additionalProperties": {} + }, "name": { "type": "string" } diff --git a/docs/swagger.json b/docs/swagger.json index 52b2ce65..81af9224 100644 --- a/docs/swagger.json +++ b/docs/swagger.json @@ -4933,6 +4933,10 @@ "description": { "type": "string" }, + "metadata": { + "type": "object", + "additionalProperties": {} + }, "name": { "type": "string" } diff --git a/docs/swagger.yaml b/docs/swagger.yaml index 2f63e7c6..ff7551bb 100644 --- a/docs/swagger.yaml +++ b/docs/swagger.yaml @@ -654,6 +654,9 @@ definitions: type: string description: type: string + metadata: + additionalProperties: {} + type: object name: type: string type: object diff --git a/internal/chat/resolver.go b/internal/chat/resolver.go index bdf11cf6..44cbb0c5 100644 --- a/internal/chat/resolver.go +++ b/internal/chat/resolver.go @@ -24,6 +24,19 @@ import ( const defaultMaxContextMinutes = 24 * 60 +// SkillEntry represents a skill loaded from the container. +type SkillEntry struct { + Name string + Description string + Content string + Metadata map[string]any +} + +// SkillLoader loads skills for a given bot from its container. +type SkillLoader interface { + LoadSkills(ctx context.Context, botID string) ([]SkillEntry, error) +} + // Resolver orchestrates chat with the agent gateway. type Resolver struct { modelsService *models.Service @@ -31,6 +44,7 @@ type Resolver struct { memoryService *memory.Service historyService *history.Service settingsService *settings.Service + skillLoader SkillLoader gatewayBaseURL string timeout time.Duration logger *slog.Logger @@ -70,6 +84,11 @@ func NewResolver( } } +// SetSkillLoader sets the skill loader used to populate usable skills in gateway requests. +func (r *Resolver) SetSkillLoader(sl SkillLoader) { + r.skillLoader = sl +} + // --- gateway payload --- type gatewayModelConfig struct { @@ -93,6 +112,13 @@ type gatewayIdentity struct { SessionToken string `json:"sessionToken,omitempty"` } +type gatewaySkill struct { + Name string `json:"name"` + Description string `json:"description"` + Content string `json:"content"` + Metadata map[string]any `json:"metadata,omitempty"` +} + type gatewayRequest struct { Model gatewayModelConfig `json:"model"` ActiveContextTime int `json:"activeContextTime"` @@ -101,6 +127,7 @@ type gatewayRequest struct { AllowedActions []string `json:"allowedActions,omitempty"` Messages []ModelMessage `json:"messages"` Skills []string `json:"skills"` + UsableSkills []gatewaySkill `json:"usableSkills"` Query string `json:"query"` Identity gatewayIdentity `json:"identity"` Attachments []any `json:"attachments"` @@ -130,6 +157,7 @@ type triggerScheduleRequest struct { AllowedActions []string `json:"allowedActions,omitempty"` Messages []ModelMessage `json:"messages"` Skills []string `json:"skills"` + UsableSkills []gatewaySkill `json:"usableSkills"` Identity gatewayIdentity `json:"identity"` Attachments []any `json:"attachments"` Schedule gatewaySchedule `json:"schedule"` @@ -191,6 +219,27 @@ func (r *Resolver) resolve(ctx context.Context, req ChatRequest) (resolvedContex skills := dedup(append(historySkills, req.Skills...)) containerID := r.resolveContainerID(ctx, req.BotID, req.ContainerID) + var usableSkills []gatewaySkill + if r.skillLoader != nil { + entries, err := r.skillLoader.LoadSkills(ctx, req.BotID) + if err != nil { + r.logger.Warn("failed to load usable skills", slog.String("bot_id", req.BotID), slog.Any("error", err)) + } else { + usableSkills = make([]gatewaySkill, 0, len(entries)) + for _, e := range entries { + usableSkills = append(usableSkills, gatewaySkill{ + Name: e.Name, + Description: e.Description, + Content: e.Content, + Metadata: e.Metadata, + }) + } + } + } + if usableSkills == nil { + usableSkills = []gatewaySkill{} + } + payload := gatewayRequest{ Model: gatewayModelConfig{ ModelID: chatModel.ModelID, @@ -205,6 +254,7 @@ func (r *Resolver) resolve(ctx context.Context, req ChatRequest) (resolvedContex AllowedActions: req.AllowedActions, Messages: nonNilMessages(messages), Skills: nonNilStrings(skills), + UsableSkills: usableSkills, Query: req.Query, Identity: gatewayIdentity{ BotID: req.BotID, @@ -279,6 +329,7 @@ func (r *Resolver) TriggerSchedule(ctx context.Context, botID string, payload sc AllowedActions: rc.payload.AllowedActions, Messages: rc.payload.Messages, Skills: rc.payload.Skills, + UsableSkills: rc.payload.UsableSkills, Identity: gatewayIdentity{ BotID: rc.payload.Identity.BotID, SessionID: rc.payload.Identity.SessionID, diff --git a/internal/handlers/skills.go b/internal/handlers/skills.go index 80284b29..3fd4169f 100644 --- a/internal/handlers/skills.go +++ b/internal/handlers/skills.go @@ -11,15 +11,17 @@ import ( "time" "github.com/labstack/echo/v4" + "gopkg.in/yaml.v3" "github.com/memohai/memoh/internal/config" mcptools "github.com/memohai/memoh/internal/mcp" ) type SkillItem struct { - Name string `json:"name"` - Description string `json:"description"` - Content string `json:"content"` + Name string `json:"name"` + Description string `json:"description"` + Content string `json:"content"` + Metadata map[string]any `json:"metadata,omitempty"` } type SkillsResponse struct { @@ -64,7 +66,7 @@ func (h *ContainerdHandler) ListSkills(c echo.Context) error { return echo.NewHTTPError(http.StatusInternalServerError, err.Error()) } - listPayload, err := h.callMCPTool(ctx, containerID, "fs.list", map[string]any{ + listPayload, err := h.callMCPTool(ctx, containerID, "list", map[string]any{ "path": ".skills", "recursive": false, }) @@ -82,14 +84,16 @@ func (h *ContainerdHandler) ListSkills(c echo.Context) error { if skillPath == "" { continue } - content, err := h.readSkillFile(ctx, containerID, skillPath) + raw, err := h.readSkillFile(ctx, containerID, skillPath) if err != nil { continue } + parsed := parseSkillFile(raw, name) skills = append(skills, SkillItem{ - Name: name, - Description: skillDescription(content), - Content: content, + Name: parsed.Name, + Description: parsed.Description, + Content: parsed.Content, + Metadata: parsed.Metadata, }) } @@ -137,7 +141,7 @@ func (h *ContainerdHandler) UpsertSkills(c echo.Context) error { content = buildSkillContent(name, strings.TrimSpace(skill.Description)) } filePath := path.Join(".skills", name, "SKILL.md") - if _, err := h.callMCPTool(ctx, containerID, "fs.write", map[string]any{ + if _, err := h.callMCPTool(ctx, containerID, "write", map[string]any{ "path": filePath, "content": content, }); err != nil { @@ -186,7 +190,7 @@ func (h *ContainerdHandler) DeleteSkills(c echo.Context) error { return echo.NewHTTPError(http.StatusBadRequest, "invalid skill name") } deletePath := path.Join(".skills", skillName) - if _, err := h.callMCPTool(ctx, containerID, "fs.delete", map[string]any{ + if _, err := h.callMCPTool(ctx, containerID, "delete", map[string]any{ "path": deletePath, }); err != nil { return echo.NewHTTPError(http.StatusInternalServerError, err.Error()) @@ -196,6 +200,53 @@ func (h *ContainerdHandler) DeleteSkills(c echo.Context) error { return c.JSON(http.StatusOK, skillsOpResponse{OK: true}) } +// LoadSkills loads all skills from the container for the given bot. +// This implements chat.SkillLoader. +func (h *ContainerdHandler) LoadSkills(ctx context.Context, botID string) ([]SkillItem, error) { + containerID, err := h.botContainerID(ctx, botID) + if err != nil { + return nil, err + } + if err := h.ensureTaskRunning(ctx, containerID); err != nil { + return nil, err + } + if err := h.ensureSkillsDirHost(botID); err != nil { + return nil, err + } + + listPayload, err := h.callMCPTool(ctx, containerID, "list", map[string]any{ + "path": ".skills", + "recursive": false, + }) + if err != nil { + return nil, err + } + entries, err := extractListEntries(listPayload) + if err != nil { + return nil, err + } + + skills := make([]SkillItem, 0, len(entries)) + for _, entry := range entries { + skillPath, name := skillPathForEntry(entry) + if skillPath == "" { + continue + } + raw, err := h.readSkillFile(ctx, containerID, skillPath) + if err != nil { + continue + } + parsed := parseSkillFile(raw, name) + skills = append(skills, SkillItem{ + Name: parsed.Name, + Description: parsed.Description, + Content: parsed.Content, + Metadata: parsed.Metadata, + }) + } + return skills, nil +} + func (h *ContainerdHandler) ensureSkillsDirHost(botID string) error { dataRoot := strings.TrimSpace(h.cfg.DataRoot) if dataRoot == "" { @@ -206,7 +257,7 @@ func (h *ContainerdHandler) ensureSkillsDirHost(botID string) error { } func (h *ContainerdHandler) readSkillFile(ctx context.Context, containerID, filePath string) (string, error) { - payload, err := h.callMCPTool(ctx, containerID, "fs.read", map[string]any{ + payload, err := h.callMCPTool(ctx, containerID, "read", map[string]any{ "path": filePath, }) if err != nil { @@ -309,26 +360,73 @@ func skillPathForEntry(entry skillEntry) (string, string) { return "", "" } -func skillDescription(content string) string { - lines := strings.Split(content, "\n") - for _, line := range lines { - line = strings.TrimSpace(line) - if line == "" { - continue - } - if strings.HasPrefix(line, "#") { - return strings.TrimSpace(strings.TrimPrefix(line, "#")) - } - return line +// parsedSkill holds the result of parsing a SKILL.md file with YAML frontmatter. +type parsedSkill struct { + Name string + Description string + Content string // body after frontmatter + Metadata map[string]any // "metadata" key from frontmatter +} + +// parseSkillFile parses a SKILL.md file with YAML frontmatter delimited by "---". +// Format: +// +// --- +// name: your-skill-name +// description: Brief description +// metadata: +// key: value +// --- +// # Body content ... +func parseSkillFile(raw string, fallbackName string) parsedSkill { + result := parsedSkill{Name: fallbackName} + + trimmed := strings.TrimSpace(raw) + if !strings.HasPrefix(trimmed, "---") { + return result } - return "" + + // Find closing "---". + rest := trimmed[3:] + rest = strings.TrimLeft(rest, " \t") + if len(rest) > 0 && rest[0] == '\n' { + rest = rest[1:] + } else if len(rest) > 1 && rest[0] == '\r' && rest[1] == '\n' { + rest = rest[2:] + } + closingIdx := strings.Index(rest, "\n---") + if closingIdx < 0 { + return result + } + + frontmatterRaw := rest[:closingIdx] + body := rest[closingIdx+4:] + body = strings.TrimLeft(body, "\r\n") + result.Content = body + + var fm struct { + Name string `yaml:"name"` + Description string `yaml:"description"` + Metadata map[string]any `yaml:"metadata"` + } + if err := yaml.Unmarshal([]byte(frontmatterRaw), &fm); err != nil { + return result + } + + if strings.TrimSpace(fm.Name) != "" { + result.Name = strings.TrimSpace(fm.Name) + } + result.Description = strings.TrimSpace(fm.Description) + result.Metadata = fm.Metadata + + return result } func buildSkillContent(name, description string) string { if description == "" { - return "# " + name + description = name } - return "# " + name + "\n\n" + description + return "---\nname: " + name + "\ndescription: " + description + "\n---\n\n# " + name + "\n\n" + description } func isValidSkillName(name string) bool {