Skip to content
Draft
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
171 changes: 170 additions & 1 deletion apps/cli/ai/ui.ts
Original file line number Diff line number Diff line change
Expand Up @@ -83,6 +83,7 @@ class PromptEditor implements Component, Focusable {
busyMessage: string | null = null;
hints: string[] = [];
statusMessage: string | null = null;
contextUsageLabel: string | null = null;
showBottomBar = true;

get focused(): boolean {
Expand Down Expand Up @@ -193,7 +194,17 @@ class PromptEditor implements Component, Focusable {
activeHints.length > 0
? ' ' + activeHints.map( ( h ) => chalk.dim( h ) ).join( chalk.dim( ' · ' ) )
: '';
const rightPart = this.statusMessage ? chalk.dim( this.statusMessage ) + ' ' : '';
const rightSegments: string[] = [];
if ( this.contextUsageLabel ) {
rightSegments.push( this.contextUsageLabel );
}
if ( this.statusMessage ) {
rightSegments.push( this.statusMessage );
}
const rightPart =
rightSegments.length > 0
? rightSegments.map( ( s ) => chalk.dim( s ) ).join( chalk.dim( ' · ' ) ) + ' '
: '';
if ( leftPart || rightPart ) {
const leftLen = visibleWidth( leftPart );
const rightLen = visibleWidth( rightPart );
Expand Down Expand Up @@ -1597,6 +1608,97 @@ export class AiChatUI {
this.tui.requestRender();
}

/**
* Tracks the last-call prompt size (input + cache reads + cache creations)
* from the most recent assistant message. Unlike `result.modelUsage` — which
* is summed across all agentic iterations in a turn and therefore overstates
* how full the context actually is — this mirrors what was actually sent to
* the model on the latest API call. That's the closest available signal for
* "how full is the context right now".
*/
private lastPromptTokens: number | null = null;

/**
* Largest `contextWindow` observed in any `result.modelUsage` this session.
* Cached so the footer can render mid-turn (while only `assistant` messages
* are streaming in) without waiting for the next `result`.
*/
private knownContextWindow: number | null = null;

/**
* Record the prompt-size snapshot from an assistant message's `usage` block.
* Called from `handleMessage` on every `assistant` message so the indicator
* reflects the latest iteration even mid-turn.
*/
recordAssistantUsage( usage: unknown ): void {
if ( ! usage || typeof usage !== 'object' ) {
return;
}
const u = usage as {
input_tokens?: number;
cache_read_input_tokens?: number;
cache_creation_input_tokens?: number;
};
const total =
( u.input_tokens ?? 0 ) +
( u.cache_read_input_tokens ?? 0 ) +
( u.cache_creation_input_tokens ?? 0 );
if ( total > 0 ) {
this.lastPromptTokens = total;
this.renderContextUsageLabel();
}
}

/**
* Cache the largest `contextWindow` from an SDK `result.modelUsage` map.
* Called on every `result` message. We pick the largest window because
* resume/fork sessions can list multiple models and we want the main
* agent's budget, not a side sub-agent's.
*/
updateContextUsage(
modelUsage:
| Record<
string,
{
contextWindow?: number;
}
>
| undefined
): void {
if ( ! modelUsage ) {
return;
}

let window = this.knownContextWindow ?? 0;
for ( const usage of Object.values( modelUsage ) ) {
const candidate = usage.contextWindow ?? 0;
if ( candidate > window ) {
window = candidate;
}
}

if ( window > 0 ) {
this.knownContextWindow = window;
this.renderContextUsageLabel();
}
}

private renderContextUsageLabel(): void {
if ( this.lastPromptTokens === null || ! this.knownContextWindow ) {
return;
}
const percent = Math.min(
100,
Math.round( ( this.lastPromptTokens / this.knownContextWindow ) * 100 )
);
this.editor.contextUsageLabel = sprintf(
/* translators: %d: percentage of context window consumed */
__( 'Context %d%%' ),
percent
);
this.tui.requestRender();
}

private busyTimer: ReturnType< typeof setInterval > | null = null;
private busyFrameIndex = 0;
private static readonly BUSY_FRAMES = [ '⠋', '⠙', '⠹', '⠸', '⠼', '⠴', '⠦', '⠧', '⠇', '⠏' ];
Expand Down Expand Up @@ -2034,7 +2136,73 @@ export class AiChatUI {
| { sessionId: string; maxTurnsReached: true; numTurns: number }
| undefined {
switch ( message.type ) {
case 'system': {
// Surface context-management events the SDK emits but that would
// otherwise be invisible to the user (compaction, micro-compaction,
// compacting status). Without this, long turns can appear to "stop
// suddenly" while the SDK is silently shrinking context.
if ( ! ( 'subtype' in message ) ) {
return undefined;
}

if ( message.subtype === 'status' ) {
if ( message.status === 'compacting' ) {
this.setLoaderMessage( __( 'Compacting context…' ) );
}
return undefined;
}

if ( message.subtype === 'compact_boundary' ) {
const meta = message.compact_metadata;
const trigger = meta?.trigger ?? 'auto';
const preTokens = meta?.pre_tokens ?? 0;
this.showInfo(
sprintf(
/* translators: 1: trigger (auto|manual), 2: token count before compaction */
__( 'Context compacted (%1$s, %2$d tokens summarized into a shorter form).' ),
trigger,
preTokens
)
);
return undefined;
}

// `microcompact_boundary` is emitted by the SDK runtime but not in
// the public type, so narrow via a defensive cast. It drops old
// tool-result attachments (e.g. screenshots) to free tokens without
// summarizing the whole transcript.
const systemMessage = message as {
subtype?: string;
microcompactMetadata?: {
tokensSaved?: number;
clearedAttachmentUUIDs?: string[];
};
};
if ( systemMessage.subtype === 'microcompact_boundary' ) {
const tokensSaved = systemMessage.microcompactMetadata?.tokensSaved ?? 0;
const cleared = systemMessage.microcompactMetadata?.clearedAttachmentUUIDs?.length ?? 0;
this.showInfo(
sprintf(
/* translators: 1: number of old attachments dropped, 2: tokens freed */
_n(
'Dropped %1$d old attachment to free %2$d tokens.',
'Dropped %1$d old attachments to free %2$d tokens.',
cleared
),
cleared,
tokensSaved
)
);
return undefined;
}

return undefined;
}

case 'assistant': {
// Capture per-call prompt size so the footer reflects the latest
// iteration's context fill, not the per-turn cumulative total.
this.recordAssistantUsage( ( message.message as { usage?: unknown } ).usage );
for ( const block of message.message.content ) {
if ( block.type === 'text' ) {
this.hideLoader();
Expand Down Expand Up @@ -2123,6 +2291,7 @@ export class AiChatUI {
}
case 'result': {
this.hideLoader();
this.updateContextUsage( message.modelUsage );
if ( message.subtype === 'success' ) {
const thinkingSec = Math.round( ( this.nowMs() - this.turnStartTime ) / 1000 );
if ( ! this.hasShownResponseMarker ) {
Expand Down
Loading