diff --git a/apps/cli/ai/ui.ts b/apps/cli/ai/ui.ts index 06d703620d..18a2994b34 100644 --- a/apps/cli/ai/ui.ts +++ b/apps/cli/ai/ui.ts @@ -83,6 +83,7 @@ class PromptEditor implements Component, Focusable { busyMessage: string | null = null; hints: string[] = []; statusMessage: string | null = null; + contextUsageLabel: string | null = null; showBottomBar = true; get focused(): boolean { @@ -193,7 +194,17 @@ class PromptEditor implements Component, Focusable { activeHints.length > 0 ? ' ' + activeHints.map( ( h ) => chalk.dim( h ) ).join( chalk.dim( ' · ' ) ) : ''; - const rightPart = this.statusMessage ? chalk.dim( this.statusMessage ) + ' ' : ''; + const rightSegments: string[] = []; + if ( this.contextUsageLabel ) { + rightSegments.push( this.contextUsageLabel ); + } + if ( this.statusMessage ) { + rightSegments.push( this.statusMessage ); + } + const rightPart = + rightSegments.length > 0 + ? rightSegments.map( ( s ) => chalk.dim( s ) ).join( chalk.dim( ' · ' ) ) + ' ' + : ''; if ( leftPart || rightPart ) { const leftLen = visibleWidth( leftPart ); const rightLen = visibleWidth( rightPart ); @@ -1597,6 +1608,97 @@ export class AiChatUI { this.tui.requestRender(); } + /** + * Tracks the last-call prompt size (input + cache reads + cache creations) + * from the most recent assistant message. Unlike `result.modelUsage` — which + * is summed across all agentic iterations in a turn and therefore overstates + * how full the context actually is — this mirrors what was actually sent to + * the model on the latest API call. That's the closest available signal for + * "how full is the context right now". + */ + private lastPromptTokens: number | null = null; + + /** + * Largest `contextWindow` observed in any `result.modelUsage` this session. + * Cached so the footer can render mid-turn (while only `assistant` messages + * are streaming in) without waiting for the next `result`. + */ + private knownContextWindow: number | null = null; + + /** + * Record the prompt-size snapshot from an assistant message's `usage` block. + * Called from `handleMessage` on every `assistant` message so the indicator + * reflects the latest iteration even mid-turn. + */ + recordAssistantUsage( usage: unknown ): void { + if ( ! usage || typeof usage !== 'object' ) { + return; + } + const u = usage as { + input_tokens?: number; + cache_read_input_tokens?: number; + cache_creation_input_tokens?: number; + }; + const total = + ( u.input_tokens ?? 0 ) + + ( u.cache_read_input_tokens ?? 0 ) + + ( u.cache_creation_input_tokens ?? 0 ); + if ( total > 0 ) { + this.lastPromptTokens = total; + this.renderContextUsageLabel(); + } + } + + /** + * Cache the largest `contextWindow` from an SDK `result.modelUsage` map. + * Called on every `result` message. We pick the largest window because + * resume/fork sessions can list multiple models and we want the main + * agent's budget, not a side sub-agent's. + */ + updateContextUsage( + modelUsage: + | Record< + string, + { + contextWindow?: number; + } + > + | undefined + ): void { + if ( ! modelUsage ) { + return; + } + + let window = this.knownContextWindow ?? 0; + for ( const usage of Object.values( modelUsage ) ) { + const candidate = usage.contextWindow ?? 0; + if ( candidate > window ) { + window = candidate; + } + } + + if ( window > 0 ) { + this.knownContextWindow = window; + this.renderContextUsageLabel(); + } + } + + private renderContextUsageLabel(): void { + if ( this.lastPromptTokens === null || ! this.knownContextWindow ) { + return; + } + const percent = Math.min( + 100, + Math.round( ( this.lastPromptTokens / this.knownContextWindow ) * 100 ) + ); + this.editor.contextUsageLabel = sprintf( + /* translators: %d: percentage of context window consumed */ + __( 'Context %d%%' ), + percent + ); + this.tui.requestRender(); + } + private busyTimer: ReturnType< typeof setInterval > | null = null; private busyFrameIndex = 0; private static readonly BUSY_FRAMES = [ '⠋', '⠙', '⠹', '⠸', '⠼', '⠴', '⠦', '⠧', '⠇', '⠏' ]; @@ -2034,7 +2136,73 @@ export class AiChatUI { | { sessionId: string; maxTurnsReached: true; numTurns: number } | undefined { switch ( message.type ) { + case 'system': { + // Surface context-management events the SDK emits but that would + // otherwise be invisible to the user (compaction, micro-compaction, + // compacting status). Without this, long turns can appear to "stop + // suddenly" while the SDK is silently shrinking context. + if ( ! ( 'subtype' in message ) ) { + return undefined; + } + + if ( message.subtype === 'status' ) { + if ( message.status === 'compacting' ) { + this.setLoaderMessage( __( 'Compacting context…' ) ); + } + return undefined; + } + + if ( message.subtype === 'compact_boundary' ) { + const meta = message.compact_metadata; + const trigger = meta?.trigger ?? 'auto'; + const preTokens = meta?.pre_tokens ?? 0; + this.showInfo( + sprintf( + /* translators: 1: trigger (auto|manual), 2: token count before compaction */ + __( 'Context compacted (%1$s, %2$d tokens summarized into a shorter form).' ), + trigger, + preTokens + ) + ); + return undefined; + } + + // `microcompact_boundary` is emitted by the SDK runtime but not in + // the public type, so narrow via a defensive cast. It drops old + // tool-result attachments (e.g. screenshots) to free tokens without + // summarizing the whole transcript. + const systemMessage = message as { + subtype?: string; + microcompactMetadata?: { + tokensSaved?: number; + clearedAttachmentUUIDs?: string[]; + }; + }; + if ( systemMessage.subtype === 'microcompact_boundary' ) { + const tokensSaved = systemMessage.microcompactMetadata?.tokensSaved ?? 0; + const cleared = systemMessage.microcompactMetadata?.clearedAttachmentUUIDs?.length ?? 0; + this.showInfo( + sprintf( + /* translators: 1: number of old attachments dropped, 2: tokens freed */ + _n( + 'Dropped %1$d old attachment to free %2$d tokens.', + 'Dropped %1$d old attachments to free %2$d tokens.', + cleared + ), + cleared, + tokensSaved + ) + ); + return undefined; + } + + return undefined; + } + case 'assistant': { + // Capture per-call prompt size so the footer reflects the latest + // iteration's context fill, not the per-turn cumulative total. + this.recordAssistantUsage( ( message.message as { usage?: unknown } ).usage ); for ( const block of message.message.content ) { if ( block.type === 'text' ) { this.hideLoader(); @@ -2123,6 +2291,7 @@ export class AiChatUI { } case 'result': { this.hideLoader(); + this.updateContextUsage( message.modelUsage ); if ( message.subtype === 'success' ) { const thinkingSec = Math.round( ( this.nowMs() - this.turnStartTime ) / 1000 ); if ( ! this.hasShownResponseMarker ) {