Automattic · sejas · Apr 8, 2026 · Apr 8, 2026 · Apr 8, 2026
diff --git a/apps/cli/ai/ui.ts b/apps/cli/ai/ui.ts
@@ -83,6 +83,7 @@ class PromptEditor implements Component, Focusable {
 	busyMessage: string | null = null;
 	hints: string[] = [];
 	statusMessage: string | null = null;
+	contextUsageLabel: string | null = null;
 	showBottomBar = true;
 
 	get focused(): boolean {
@@ -193,7 +194,17 @@ class PromptEditor implements Component, Focusable {
 			activeHints.length > 0
 				? ' ' + activeHints.map( ( h ) => chalk.dim( h ) ).join( chalk.dim( ' · ' ) )
 				: '';
-		const rightPart = this.statusMessage ? chalk.dim( this.statusMessage ) + ' ' : '';
+		const rightSegments: string[] = [];
+		if ( this.contextUsageLabel ) {
+			rightSegments.push( this.contextUsageLabel );
+		}
+		if ( this.statusMessage ) {
+			rightSegments.push( this.statusMessage );
+		}
+		const rightPart =
+			rightSegments.length > 0
+				? rightSegments.map( ( s ) => chalk.dim( s ) ).join( chalk.dim( ' · ' ) ) + ' '
+				: '';
 		if ( leftPart || rightPart ) {
 			const leftLen = visibleWidth( leftPart );
 			const rightLen = visibleWidth( rightPart );
@@ -1597,6 +1608,97 @@ export class AiChatUI {
 		this.tui.requestRender();
 	}
 
+	/**
+	 * Tracks the last-call prompt size (input + cache reads + cache creations)
+	 * from the most recent assistant message. Unlike `result.modelUsage` — which
+	 * is summed across all agentic iterations in a turn and therefore overstates
+	 * how full the context actually is — this mirrors what was actually sent to
+	 * the model on the latest API call. That's the closest available signal for
+	 * "how full is the context right now".
+	 */
+	private lastPromptTokens: number | null = null;
+
+	/**
+	 * Largest `contextWindow` observed in any `result.modelUsage` this session.
+	 * Cached so the footer can render mid-turn (while only `assistant` messages
+	 * are streaming in) without waiting for the next `result`.
+	 */
+	private knownContextWindow: number | null = null;
+
+	/**
+	 * Record the prompt-size snapshot from an assistant message's `usage` block.
+	 * Called from `handleMessage` on every `assistant` message so the indicator
+	 * reflects the latest iteration even mid-turn.
+	 */
+	recordAssistantUsage( usage: unknown ): void {
+		if ( ! usage || typeof usage !== 'object' ) {
+			return;
+		}
+		const u = usage as {
+			input_tokens?: number;
+			cache_read_input_tokens?: number;
+			cache_creation_input_tokens?: number;
+		};
+		const total =
+			( u.input_tokens ?? 0 ) +
+			( u.cache_read_input_tokens ?? 0 ) +
+			( u.cache_creation_input_tokens ?? 0 );
+		if ( total > 0 ) {
+			this.lastPromptTokens = total;
+			this.renderContextUsageLabel();
+		}
+	}
+
+	/**
+	 * Cache the largest `contextWindow` from an SDK `result.modelUsage` map.
+	 * Called on every `result` message. We pick the largest window because
+	 * resume/fork sessions can list multiple models and we want the main
+	 * agent's budget, not a side sub-agent's.
+	 */
+	updateContextUsage(
+		modelUsage:
+			| Record<
+					string,
+					{
+						contextWindow?: number;
+					}
+			  >
+			| undefined
+	): void {
+		if ( ! modelUsage ) {
+			return;
+		}
+
+		let window = this.knownContextWindow ?? 0;
+		for ( const usage of Object.values( modelUsage ) ) {
+			const candidate = usage.contextWindow ?? 0;
+			if ( candidate > window ) {
+				window = candidate;
+			}
+		}
+
+		if ( window > 0 ) {
+			this.knownContextWindow = window;
+			this.renderContextUsageLabel();
+		}
+	}
+
+	private renderContextUsageLabel(): void {
+		if ( this.lastPromptTokens === null || ! this.knownContextWindow ) {
+			return;
+		}
+		const percent = Math.min(
+			100,
+			Math.round( ( this.lastPromptTokens / this.knownContextWindow ) * 100 )
+		);
+		this.editor.contextUsageLabel = sprintf(
+			/* translators: %d: percentage of context window consumed */
+			__( 'Context %d%%' ),
+			percent
+		);
+		this.tui.requestRender();
+	}
+
 	private busyTimer: ReturnType< typeof setInterval > | null = null;
 	private busyFrameIndex = 0;
 	private static readonly BUSY_FRAMES = [ '⠋', '⠙', '⠹', '⠸', '⠼', '⠴', '⠦', '⠧', '⠇', '⠏' ];
@@ -2034,7 +2136,73 @@ export class AiChatUI {
 		| { sessionId: string; maxTurnsReached: true; numTurns: number }
 		| undefined {
 		switch ( message.type ) {
+			case 'system': {
+				// Surface context-management events the SDK emits but that would
+				// otherwise be invisible to the user (compaction, micro-compaction,
+				// compacting status). Without this, long turns can appear to "stop
+				// suddenly" while the SDK is silently shrinking context.
+				if ( ! ( 'subtype' in message ) ) {
+					return undefined;
+				}
+
+				if ( message.subtype === 'status' ) {
+					if ( message.status === 'compacting' ) {
+						this.setLoaderMessage( __( 'Compacting context…' ) );
+					}
+					return undefined;
+				}
+
+				if ( message.subtype === 'compact_boundary' ) {
+					const meta = message.compact_metadata;
+					const trigger = meta?.trigger ?? 'auto';
+					const preTokens = meta?.pre_tokens ?? 0;
+					this.showInfo(
+						sprintf(
+							/* translators: 1: trigger (auto|manual), 2: token count before compaction */
+							__( 'Context compacted (%1$s, %2$d tokens summarized into a shorter form).' ),
+							trigger,
+							preTokens
+						)
+					);
+					return undefined;
+				}
+
+				// `microcompact_boundary` is emitted by the SDK runtime but not in
+				// the public type, so narrow via a defensive cast. It drops old
+				// tool-result attachments (e.g. screenshots) to free tokens without
+				// summarizing the whole transcript.
+				const systemMessage = message as {
+					subtype?: string;
+					microcompactMetadata?: {
+						tokensSaved?: number;
+						clearedAttachmentUUIDs?: string[];
+					};
+				};
+				if ( systemMessage.subtype === 'microcompact_boundary' ) {
+					const tokensSaved = systemMessage.microcompactMetadata?.tokensSaved ?? 0;
+					const cleared = systemMessage.microcompactMetadata?.clearedAttachmentUUIDs?.length ?? 0;
+					this.showInfo(
+						sprintf(
+							/* translators: 1: number of old attachments dropped, 2: tokens freed */
+							_n(
+								'Dropped %1$d old attachment to free %2$d tokens.',
+								'Dropped %1$d old attachments to free %2$d tokens.',
+								cleared
+							),
+							cleared,
+							tokensSaved
+						)
+					);
+					return undefined;
+				}
+
+				return undefined;
+			}
+
 			case 'assistant': {
+				// Capture per-call prompt size so the footer reflects the latest
+				// iteration's context fill, not the per-turn cumulative total.
+				this.recordAssistantUsage( ( message.message as { usage?: unknown } ).usage );
 				for ( const block of message.message.content ) {
 					if ( block.type === 'text' ) {
 						this.hideLoader();
@@ -2123,6 +2291,7 @@ export class AiChatUI {
 			}
 			case 'result': {
 				this.hideLoader();
+				this.updateContextUsage( message.modelUsage );
 				if ( message.subtype === 'success' ) {
 					const thinkingSec = Math.round( ( this.nowMs() - this.turnStartTime ) / 1000 );
 					if ( ! this.hasShownResponseMarker ) {