Files
Memoh/assets/zh_getting-started_compaction.md.Hrp0kpsz.js
2026-04-24 06:37:27 +00:00

2 lines
6.1 KiB
JavaScript
Raw Permalink Blame History

This file contains invisible Unicode characters
This file contains invisible Unicode characters that are indistinguishable to humans but may be processed differently by a computer. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
import{_ as a,o,c as e,ag as r}from"./chunks/framework.CAXxHpAX.js";const g=JSON.parse('{"title":"会话上下文压缩","description":"","frontmatter":{},"headers":[],"relativePath":"zh/getting-started/compaction.md","filePath":"zh/getting-started/compaction.md","lastUpdated":1777012567000}'),n={name:"zh/getting-started/compaction.md"};function s(d,t,i,h,l,c){return o(),e("div",null,[...t[0]||(t[0]=[r('<h1 id="会话上下文压缩" tabindex="-1">会话上下文压缩 <a class="header-anchor" href="#会话上下文压缩" aria-label="Permalink to &quot;会话上下文压缩&quot;"></a></h1><p><strong>上下文压缩</strong>只针对<strong>当前这一会话</strong>:把早先轮次压成摘要,让后面模型调用时带的<strong>活跃窗口</strong>小一点。</p><p>这和<strong>改记忆库里存的长记忆</strong>不是一码事。要动存储条目,看 <a href="/zh/getting-started/memory.html">长期记忆</a>。</p><hr><h2 id="为什么需要" tabindex="-1">为什么需要 <a class="header-anchor" href="#为什么需要" aria-label="Permalink to &quot;为什么需要&quot;"></a></h2><p>对话一拉長,回给模型的历史就膨胀:<strong>token、延迟、吃满 context</strong> 都难受,老内容还可能挤掉新内容。压缩用摘要换掉一部分细节,<strong>还保留点连贯性</strong>即可。</p><hr><h2 id="它动什么、不动什么" tabindex="-1">它动什么、不动什么 <a class="header-anchor" href="#它动什么、不动什么" aria-label="Permalink to &quot;它动什么、不动什么&quot;"></a></h2><p>动的是<strong>本会话的活跃上下文</strong>。</p><p>不动:</p><ul><li>不删机器人本身</li><li>不换记忆提供方配置</li><li>不合并长期记忆条目的主流程(那是记忆 tab 里另一类 compaction</li><li>不替代「去记忆库里搜」</li></ul><hr><h2 id="自动" tabindex="-1">自动 <a class="header-anchor" href="#自动" aria-label="Permalink to &quot;自动&quot;"></a></h2><p>在 <strong>General</strong>(或你版本里放压缩设置的地方)里配:</p><table tabindex="0"><thead><tr><th>字段</th><th>说明</th></tr></thead><tbody><tr><td><strong>Compaction Enabled</strong></td><td>开不开自动压</td></tr><tr><td><strong>Compaction Threshold</strong></td><td>估算超多少 token 就触发后台压缩</td></tr><tr><td><strong>Compaction Ratio</strong></td><td>压多狠</td></tr><tr><td><strong>Compaction Model</strong></td><td>用谁写摘要,可与主 chat 不同</td></tr></tbody></table><p>打开后,Memoh 在某一<strong>轮</strong>之后,若估摸输入已超阈值,会后台做压缩。<code>context_window</code> 会参与「快满了」的感觉。</p><hr><h2 id="立刻压" tabindex="-1">立刻压 <a class="header-anchor" href="#立刻压" aria-label="Permalink to &quot;立刻压&quot;"></a></h2><p>两路:</p><h3 id="状态区" tabindex="-1">状态区 <a class="header-anchor" href="#状态区" aria-label="Permalink to &quot;状态区&quot;"></a></h3><ol><li>打开当前对话。</li><li>打开会话状态区。</li><li>点 <strong>Compact Now</strong>。</li></ol><p>上面也有上下文占用、缓存、技能等,方便你判断要不要现在压。</p><h3 id="斜杠命令" tabindex="-1">斜杠命令 <a class="header-anchor" href="#斜杠命令" aria-label="Permalink to &quot;斜杠命令&quot;"></a></h3><div class="language-text vp-adaptive-theme"><button title="Copy Code" class="copy"></button><span class="lang">text</span><pre class="shiki shiki-themes github-light github-dark vp-code" tabindex="0"><code><span class="line"><span>/compact</span></span></code></pre></div><p>或</p><div class="language-text vp-adaptive-theme"><button title="Copy Code" class="copy"></button><span class="lang">text</span><pre class="shiki shiki-themes github-light github-dark vp-code" tabindex="0"><code><span class="line"><span>/compact run</span></span></code></pre></div><p>同步压当前会话,结果会回到聊天里。</p><hr><h2 id="记录" tabindex="-1">记录 <a class="header-anchor" href="#记录" aria-label="Permalink to &quot;记录&quot;"></a></h2><p>机器人详情 <strong>Compaction</strong> tab 可看各次:成功/失败、摘要或预览、涉多少条消息、起止时间、若有的模型与用量。用来确认<strong>自动</strong>有没有在跑、失败原因等。</p><hr><h2 id="和-context-window" tabindex="-1">和 <code>context_window</code> <a class="header-anchor" href="#和-context-window" aria-label="Permalink to &quot;和 `context_window`&quot;"></a></h2><p>Memoh 会拿当前选中的 chat 模型的 <code>context_window</code> 对照本会话。网页状态区、<code>/status</code> 都能看到离上限多近。越满,<strong>专门建一个便宜点的压缩模型</strong>写摘要,往往越划算。</p><hr><h2 id="和「记忆压缩」的区别" tabindex="-1">和「记忆压缩」的区别 <a class="header-anchor" href="#和「记忆压缩」的区别" aria-label="Permalink to &quot;和「记忆压缩」的区别&quot;"></a></h2><table tabindex="0"><thead><tr><th></th><th>作用范围</th><th>怎么触发</th><th>结果</th></tr></thead><tbody><tr><td><strong>上下文压缩</strong></td><td>当前活跃会话</td><td>状态区 / <code>/compact</code> / 自动</td><td>本会话里较早内容变摘要,方便后面几轮继续聊</td></tr><tr><td><strong>记忆压缩</strong></td><td>长期记忆提供方</td><td>Memory tab</td><td>改库里记忆条目,不是单会话 prompt</td></tr></tbody></table><p><strong>一路聊太长了</strong> → 上下文压缩。<br><strong>存下来的记忆又脏又重</strong> → 去 Memory 里做那类维护。</p><hr><h2 id="接下来" tabindex="-1">接下来 <a class="header-anchor" href="#接下来" aria-label="Permalink to &quot;接下来&quot;"></a></h2><ul><li>会话、Discuss<a href="/zh/getting-started/sessions.html">会话</a></li><li>斜杠与 <code>/compact</code><a href="/zh/getting-started/slash-commands.html">斜杠命令</a></li><li>长期记忆维护:<a href="/zh/getting-started/memory.html">长期记忆</a></li></ul>',40)])])}const m=a(n,[["render",s]]);export{g as __pageData,m as default};