diff --git a/bin/lib/onboard.js b/bin/lib/onboard.js index 61d618e4e..6370f27b7 100644 --- a/bin/lib/onboard.js +++ b/bin/lib/onboard.js @@ -1548,7 +1548,9 @@ async function preflight() { console.log(" ⓘ Running under WSL"); } - // OpenShell CLI + // OpenShell CLI — install if missing, upgrade if below minimum version. + // MIN_VERSION in install-openshell.sh handles the version gate; calling it + // when openshell already exists is safe (it exits early if version is OK). let openshellInstall = { localBin: null, futureShellPathHint: null }; if (!isOpenshellInstalled()) { console.log(" openshell CLI not found. Installing..."); @@ -1558,6 +1560,37 @@ async function preflight() { console.error(" Install manually: https://github.com/NVIDIA/OpenShell/releases"); process.exit(1); } + } else { + // Ensure the installed version meets the minimum required by install-openshell.sh. + // The script itself is idempotent — it exits early if the version is already sufficient. + const currentVersion = getInstalledOpenshellVersion(); + if (!currentVersion) { + console.log(" openshell version could not be determined. Reinstalling..."); + openshellInstall = installOpenshell(); + if (!openshellInstall.installed) { + console.error(" Failed to reinstall openshell CLI."); + console.error(" Install manually: https://github.com/NVIDIA/OpenShell/releases"); + process.exit(1); + } + } else { + const parts = currentVersion.split(".").map(Number); + const minParts = [0, 0, 24]; // must match MIN_VERSION in scripts/install-openshell.sh + const needsUpgrade = + parts[0] < minParts[0] || + (parts[0] === minParts[0] && parts[1] < minParts[1]) || + (parts[0] === minParts[0] && parts[1] === minParts[1] && parts[2] < minParts[2]); + if (needsUpgrade) { + console.log( + ` openshell ${currentVersion} is below minimum required version. Upgrading...`, + ); + openshellInstall = installOpenshell(); + if (!openshellInstall.installed) { + console.error(" Failed to upgrade openshell CLI."); + console.error(" Install manually: https://github.com/NVIDIA/OpenShell/releases"); + process.exit(1); + } + } + } } console.log( ` ✓ openshell CLI: ${runCaptureOpenshell(["--version"], { ignoreError: true }) || "unknown"}`, diff --git a/scripts/brev-launchable-ci-cpu.sh b/scripts/brev-launchable-ci-cpu.sh index 2a1dace33..ec3c82ee2 100755 --- a/scripts/brev-launchable-ci-cpu.sh +++ b/scripts/brev-launchable-ci-cpu.sh @@ -40,7 +40,7 @@ set -euo pipefail # ── Configuration ──────────────────────────────────────────────────── -OPENSHELL_VERSION="${OPENSHELL_VERSION:-v0.0.20}" +OPENSHELL_VERSION="${OPENSHELL_VERSION:-v0.0.24}" NEMOCLAW_REF="${NEMOCLAW_REF:-main}" TARGET_USER="${SUDO_USER:-$(id -un)}" TARGET_HOME="$(getent passwd "$TARGET_USER" | cut -d: -f6)" @@ -161,7 +161,27 @@ fi # 4. OpenShell CLI # ══════════════════════════════════════════════════════════════════════ if command -v openshell >/dev/null 2>&1; then - info "OpenShell CLI already installed: $(openshell --version 2>&1 || echo unknown)" + _installed_ver="$(openshell --version 2>&1 | grep -oE '[0-9]+\.[0-9]+\.[0-9]+' | head -1 || echo '0.0.0')" + _pinned_ver="${OPENSHELL_VERSION#v}" # strip leading 'v' + if [ "$_installed_ver" = "$_pinned_ver" ]; then + info "OpenShell CLI already installed at pinned version: $_installed_ver" + else + info "OpenShell CLI $_installed_ver does not match pinned ${_pinned_ver} — reinstalling..." + ARCH="$(uname -m)" + case "$ARCH" in + x86_64 | amd64) ASSET="openshell-x86_64-unknown-linux-musl.tar.gz" ;; + aarch64 | arm64) ASSET="openshell-aarch64-unknown-linux-musl.tar.gz" ;; + *) fail "Unsupported architecture: $ARCH" ;; + esac + tmpdir="$(mktemp -d)" + retry 3 10 "download openshell" \ + curl -fsSL -o "$tmpdir/$ASSET" \ + "https://github.com/NVIDIA/OpenShell/releases/download/${OPENSHELL_VERSION}/${ASSET}" + tar xzf "$tmpdir/$ASSET" -C "$tmpdir" + sudo install -m 755 "$tmpdir/openshell" /usr/local/bin/openshell + rm -rf "$tmpdir" + info "OpenShell CLI upgraded: $(openshell --version 2>&1 || echo unknown)" + fi else info "Installing OpenShell CLI ${OPENSHELL_VERSION}..." ARCH="$(uname -m)" diff --git a/scripts/install-openshell.sh b/scripts/install-openshell.sh index e0118436f..a5bfae37a 100755 --- a/scripts/install-openshell.sh +++ b/scripts/install-openshell.sh @@ -35,7 +35,7 @@ info "Detected $OS_LABEL ($ARCH_LABEL)" # Minimum version required for sandbox persistence across gateway restarts # (deterministic k3s node name + workspace PVC: NVIDIA/OpenShell#739, #488) -MIN_VERSION="0.0.22" +MIN_VERSION="0.0.24" version_gte() { # Returns 0 (true) if $1 >= $2 — portable, no sort -V (BSD compat) diff --git a/test/e2e/test-sandbox-survival.sh b/test/e2e/test-sandbox-survival.sh index 01cecdc72..b84473faf 100755 --- a/test/e2e/test-sandbox-survival.sh +++ b/test/e2e/test-sandbox-survival.sh @@ -2,28 +2,41 @@ # SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. # SPDX-License-Identifier: Apache-2.0 -# Sandbox survival across gateway restart (REAL inference, no mocks): -# - prove sandbox pods survive gateway stop/start (laptop close/open) -# - prove workspace files persist across the restart cycle -# - prove SSH connectivity resumes without re-onboarding -# - prove LIVE inference through NVIDIA Endpoints works after gateway resume +# Sandbox survival across gateway restart — end-to-end proof. # -# Requires OpenShell >= 0.0.22 (deterministic node name + workspace PVC). +# Validates EVERY complaint from NVIDIA/NemoClaw#486, #888, #859, #1086: +# 1. Sandbox is discoverable after restart (not "No sandboxes registered") +# 2. SSH connectivity resumes (no handshake verification failure) +# 3. Workspace files in /sandbox/ persist +# 4. OpenClaw agent data persists (/sandbox/.openclaw-data/) +# 5. No re-onboard required (nemoclaw status/connect work) +# 6. Live inference works end-to-end after restart +# 7. NemoClaw registry retains sandbox entry +# 8. Gateway stop/start is non-destructive +# +# This test uses NemoClaw's own install.sh to set up everything including +# OpenShell — we are the installer, we test the installer. +# +# Requires OpenShell >= 0.0.24 (gateway resume + SSH secret persistence + +# sandbox state persistence: NVIDIA/OpenShell#488, #739). # # Prerequisites: # - Docker running -# - openshell >= 0.0.22 installed # - NVIDIA_API_KEY set (real key, starts with nvapi-) # - Network access to integrate.api.nvidia.com # # Environment variables: -# NEMOCLAW_NON_INTERACTIVE=1 — required -# NVIDIA_API_KEY — required for real NVIDIA Endpoints inference -# NEMOCLAW_SANDBOX_NAME — sandbox name (default: e2e-survival) -# NEMOCLAW_E2E_TIMEOUT_SECONDS — overall timeout (default: 900) +# NEMOCLAW_NON_INTERACTIVE=1 — required +# NEMOCLAW_ACCEPT_THIRD_PARTY_SOFTWARE=1 — required +# NVIDIA_API_KEY — required for real NVIDIA Endpoints inference +# NEMOCLAW_SANDBOX_NAME — sandbox name (default: e2e-survival) +# NEMOCLAW_E2E_TIMEOUT_SECONDS — overall timeout (default: 900) # # Usage: -# NEMOCLAW_NON_INTERACTIVE=1 NVIDIA_API_KEY=nvapi-... bash test/e2e/test-sandbox-survival.sh +# NEMOCLAW_NON_INTERACTIVE=1 \ +# NEMOCLAW_ACCEPT_THIRD_PARTY_SOFTWARE=1 \ +# NVIDIA_API_KEY=nvapi-... \ +# bash test/e2e/test-sandbox-survival.sh set -uo pipefail @@ -79,38 +92,38 @@ except Exception as e: " } +# Compare semver: returns 0 if $1 >= $2 +version_gte() { + [ "$(printf '%s\n%s\n' "$2" "$1" | sort -V | head -1)" = "$2" ] +} + SANDBOX_NAME="${NEMOCLAW_SANDBOX_NAME:-e2e-survival}" REGISTRY="$HOME/.nemoclaw/sandboxes.json" SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]:-$0}")" && pwd)" REPO_ROOT="$(cd "${SCRIPT_DIR}/../.." && pwd)" -MIN_OPENSHELL="0.0.22" +MIN_OPENSHELL="0.0.24" MODEL="nvidia/nemotron-3-super-120b-a12b" -# Resolve nemoclaw command — prefer local repo checkout over PATH -if command -v node >/dev/null 2>&1 && [ -f "$REPO_ROOT/bin/nemoclaw.js" ]; then - NEMOCLAW_CMD=(node "$REPO_ROOT/bin/nemoclaw.js") -else - NEMOCLAW_CMD=(nemoclaw) -fi - -run_nemoclaw() { "${NEMOCLAW_CMD[@]}" "$@"; } - -registry_has() { - local name="$1" - [ -f "$REGISTRY" ] && python3 - "$REGISTRY" "$name" <<'PY' -import json, sys -with open(sys.argv[1], encoding="utf-8") as fh: - sandboxes = json.load(fh).get("sandboxes", {}) -if isinstance(sandboxes, dict): - sys.exit(0 if sys.argv[2] in sandboxes else 1) -else: - sys.exit(0 if any(sb.get("name") == sys.argv[2] for sb in sandboxes) else 1) -PY +# SSH helper — sets up SSH config and common options for sandbox access +# Sets: ssh_config, SSH_OPTS, SSH_TARGET, TIMEOUT_CMD +setup_ssh() { + ssh_config="$(mktemp)" + if ! openshell sandbox ssh-config "$SANDBOX_NAME" >"$ssh_config" 2>/dev/null; then + rm -f "$ssh_config" + ssh_config="" + return 1 + fi + SSH_OPTS=(-F "$ssh_config" -o StrictHostKeyChecking=no -o UserKnownHostsFile=/dev/null -o ConnectTimeout=10 -o LogLevel=ERROR) + SSH_TARGET="openshell-${SANDBOX_NAME}" + TIMEOUT_CMD="" + command -v timeout >/dev/null 2>&1 && TIMEOUT_CMD="timeout 90" + command -v gtimeout >/dev/null 2>&1 && TIMEOUT_CMD="gtimeout 90" + return 0 } -# Compare semver: returns 0 if $1 >= $2 -version_gte() { - [ "$(printf '%s\n%s\n' "$2" "$1" | sort -V | head -1)" = "$2" ] +cleanup_ssh() { + [ -n "${ssh_config:-}" ] && rm -f "$ssh_config" + ssh_config="" } # ══════════════════════════════════════════════════════════════════ @@ -125,20 +138,6 @@ else exit 1 fi -if ! command -v openshell >/dev/null 2>&1; then - fail "openshell not found on PATH" - exit 1 -fi - -OPENSHELL_VERSION=$(openshell --version 2>&1 | grep -oE '[0-9]+\.[0-9]+\.[0-9]+' | head -1) -if version_gte "$OPENSHELL_VERSION" "$MIN_OPENSHELL"; then - pass "openshell $OPENSHELL_VERSION >= $MIN_OPENSHELL (gateway resume + workspace PVC)" -else - fail "openshell $OPENSHELL_VERSION < $MIN_OPENSHELL — sandbox survival requires v0.0.22+" - info "Install latest: curl -LsSf https://raw.githubusercontent.com/NVIDIA/OpenShell/main/install.sh | OPENSHELL_VERSION=v0.0.22 sh" - exit 1 -fi - if [ -n "${NVIDIA_API_KEY:-}" ] && [[ "${NVIDIA_API_KEY}" == nvapi-* ]]; then pass "NVIDIA_API_KEY is set (starts with nvapi-)" else @@ -158,80 +157,163 @@ if [ "${NEMOCLAW_NON_INTERACTIVE:-}" != "1" ]; then exit 1 fi +if [ "${NEMOCLAW_ACCEPT_THIRD_PARTY_SOFTWARE:-}" != "1" ]; then + fail "NEMOCLAW_ACCEPT_THIRD_PARTY_SOFTWARE=1 is required" + exit 1 +fi + +if [ ! -f "$REPO_ROOT/install.sh" ]; then + fail "Cannot find install.sh at $REPO_ROOT/install.sh" + exit 1 +fi +pass "Repo root found: $REPO_ROOT" + # ══════════════════════════════════════════════════════════════════ # Phase 1: Pre-cleanup # ══════════════════════════════════════════════════════════════════ section "Phase 1: Pre-cleanup" info "Destroying any leftover sandbox/gateway from previous runs..." -run_nemoclaw "$SANDBOX_NAME" destroy --yes 2>/dev/null || true -openshell sandbox delete "$SANDBOX_NAME" 2>/dev/null || true -openshell gateway destroy -g nemoclaw 2>/dev/null || true +if command -v nemoclaw >/dev/null 2>&1; then + nemoclaw "$SANDBOX_NAME" destroy --yes 2>/dev/null || true +fi +if command -v openshell >/dev/null 2>&1; then + openshell sandbox delete "$SANDBOX_NAME" 2>/dev/null || true + openshell gateway destroy -g nemoclaw 2>/dev/null || true +fi rm -f "$HOME/.nemoclaw/onboard.lock" 2>/dev/null || true pass "Pre-cleanup complete" # ══════════════════════════════════════════════════════════════════ -# Phase 2: Onboard sandbox with real NVIDIA inference +# Phase 2: Install NemoClaw (which installs OpenShell) # ══════════════════════════════════════════════════════════════════ -section "Phase 2: Onboard sandbox (NVIDIA Endpoints)" +section "Phase 2: Install NemoClaw via install.sh" -info "Running nemoclaw onboard with real NVIDIA inference..." -info "Model: $MODEL" +info "Running install.sh --non-interactive (installs Node.js, OpenShell, NemoClaw, runs onboard)..." -ONBOARD_LOG="$(mktemp)" -# Stream output in real-time (avoid buffering that hides progress/hangs). -# Use tail --pid to auto-stop when onboard exits. +cd "$REPO_ROOT" || { + fail "Could not cd to repo root: $REPO_ROOT" + exit 1 +} + +INSTALL_LOG="$(mktemp)" env \ NEMOCLAW_NON_INTERACTIVE=1 \ + NEMOCLAW_ACCEPT_THIRD_PARTY_SOFTWARE=1 \ NEMOCLAW_SANDBOX_NAME="$SANDBOX_NAME" \ NEMOCLAW_RECREATE_SANDBOX=1 \ - "${NEMOCLAW_CMD[@]}" onboard --non-interactive >"$ONBOARD_LOG" 2>&1 & -onboard_pid=$! -tail -f "$ONBOARD_LOG" --pid=$onboard_pid 2>/dev/null & + bash install.sh --non-interactive >"$INSTALL_LOG" 2>&1 & +install_pid=$! +tail -f "$INSTALL_LOG" --pid=$install_pid 2>/dev/null & tail_pid=$! -wait $onboard_pid -onboard_exit=$? +wait $install_pid +install_exit=$? kill $tail_pid 2>/dev/null || true wait $tail_pid 2>/dev/null || true -rm -f "$ONBOARD_LOG" +rm -f "$INSTALL_LOG" + +# Source shell profile to pick up nvm/PATH changes from install.sh +if [ -f "$HOME/.bashrc" ]; then + # shellcheck source=/dev/null + source "$HOME/.bashrc" 2>/dev/null || true +fi +export NVM_DIR="${NVM_DIR:-$HOME/.nvm}" +if [ -s "$NVM_DIR/nvm.sh" ]; then + # shellcheck source=/dev/null + . "$NVM_DIR/nvm.sh" +fi +if [ -d "$HOME/.local/bin" ] && [[ ":$PATH:" != *":$HOME/.local/bin:"* ]]; then + export PATH="$HOME/.local/bin:$PATH" +fi -if [ $onboard_exit -eq 0 ]; then - pass "Onboard completed successfully" +if [ $install_exit -eq 0 ]; then + pass "install.sh completed (exit 0)" else - fail "Onboard failed (exit $onboard_exit)" + fail "install.sh failed (exit $install_exit)" exit 1 fi -if registry_has "$SANDBOX_NAME"; then - pass "Sandbox '$SANDBOX_NAME' registered" +# Verify nemoclaw is on PATH +if command -v nemoclaw >/dev/null 2>&1; then + pass "nemoclaw on PATH: $(command -v nemoclaw)" else - fail "Sandbox '$SANDBOX_NAME' not found in registry" + fail "nemoclaw not found on PATH after install" + exit 1 +fi + +# Verify openshell was installed and meets minimum version +if ! command -v openshell >/dev/null 2>&1; then + fail "openshell not found on PATH after install" + exit 1 +fi + +OPENSHELL_VERSION=$(openshell --version 2>&1 | grep -oE '[0-9]+\.[0-9]+\.[0-9]+' | head -1) +if version_gte "$OPENSHELL_VERSION" "$MIN_OPENSHELL"; then + pass "openshell $OPENSHELL_VERSION >= $MIN_OPENSHELL (gateway resume + SSH secret + state persistence)" +else + fail "openshell $OPENSHELL_VERSION < $MIN_OPENSHELL — sandbox survival requires $MIN_OPENSHELL+" exit 1 fi # ══════════════════════════════════════════════════════════════════ -# Phase 3: Prove live inference works BEFORE restart (baseline) +# Phase 3: Verify sandbox is live after install # ══════════════════════════════════════════════════════════════════ -section "Phase 3: Baseline — live inference before restart" +section "Phase 3: Post-install verification" -ssh_config="$(mktemp)" -if openshell sandbox ssh-config "$SANDBOX_NAME" >"$ssh_config" 2>/dev/null; then - pass "Got SSH config for sandbox" +# 3a: NemoClaw registry has it +if [ -f "$REGISTRY" ] && grep -Fq "\"${SANDBOX_NAME}\"" "$REGISTRY"; then + pass "NemoClaw registry contains '$SANDBOX_NAME'" else - fail "Could not get SSH config" - rm -f "$ssh_config" + fail "NemoClaw registry missing '$SANDBOX_NAME' — onboard may have failed" exit 1 fi -SSH_OPTS=(-F "$ssh_config" -o StrictHostKeyChecking=no -o UserKnownHostsFile=/dev/null -o ConnectTimeout=10 -o LogLevel=ERROR) -SSH_TARGET="openshell-${SANDBOX_NAME}" +# 3b: nemoclaw list shows it +if list_output=$(nemoclaw list 2>&1) && grep -Fq "$SANDBOX_NAME" <<<"$list_output"; then + pass "nemoclaw list shows '$SANDBOX_NAME'" +else + fail "nemoclaw list doesn't show '$SANDBOX_NAME': ${list_output:0:200}" + exit 1 +fi -# Use timeout if available -TIMEOUT_CMD="" -command -v timeout >/dev/null 2>&1 && TIMEOUT_CMD="timeout 90" -command -v gtimeout >/dev/null 2>&1 && TIMEOUT_CMD="gtimeout 90" +# 3c: openshell sandbox list shows it +if os_list=$(openshell sandbox list 2>&1) && grep -q "$SANDBOX_NAME" <<<"$os_list"; then + pass "openshell sandbox list shows '$SANDBOX_NAME'" +else + fail "openshell sandbox list doesn't show '$SANDBOX_NAME': ${os_list:0:200}" + exit 1 +fi + +# 3d: nemoclaw status works +if status_output=$(nemoclaw "$SANDBOX_NAME" status 2>&1); then + pass "nemoclaw $SANDBOX_NAME status exits 0" +else + fail "nemoclaw $SANDBOX_NAME status failed: ${status_output:0:200}" +fi + +# ══════════════════════════════════════════════════════════════════ +# Phase 4: Baseline — prove live inference BEFORE restart +# ══════════════════════════════════════════════════════════════════ +section "Phase 4: Baseline — live inference before restart" + +if ! setup_ssh; then + fail "Could not get SSH config for sandbox" + exit 1 +fi +pass "SSH config obtained" + +# 4a: SSH connectivity +if ssh "${SSH_OPTS[@]}" "$SSH_TARGET" "echo alive" >/dev/null 2>&1; then + pass "SSH into sandbox works (baseline)" +else + fail "SSH into sandbox failed (baseline) — cannot continue" + cleanup_ssh + exit 1 +fi +# 4b: Live inference through sandbox info "[LIVE] Baseline inference: user → sandbox → gateway → NVIDIA Endpoints..." +# shellcheck disable=SC2029 # client-side expansion is intentional baseline_response=$($TIMEOUT_CMD ssh "${SSH_OPTS[@]}" "$SSH_TARGET" \ "curl -s --max-time 60 https://inference.local/v1/chat/completions \ -H 'Content-Type: application/json' \ @@ -249,44 +331,81 @@ else fail "[LIVE] Baseline: expected PONG, got: ${baseline_content:0:200}" info "Raw response: ${baseline_response:0:300}" info "Cannot establish baseline — aborting (survival test meaningless without it)" - rm -f "$ssh_config" + cleanup_ssh exit 1 fi # ══════════════════════════════════════════════════════════════════ -# Phase 4: Plant marker file inside sandbox +# Phase 5: Plant state markers inside sandbox # ══════════════════════════════════════════════════════════════════ -section "Phase 4: Plant marker file in sandbox workspace" +section "Phase 5: Plant state markers in sandbox" MARKER_VALUE="nemoclaw-survival-$(date +%s)" -# Write a marker file into /sandbox (the persistent workspace mount) -# shellcheck disable=SC2029 # client-side expansion is intentional +# 5a: Workspace file in /sandbox/ +# shellcheck disable=SC2029 if ssh "${SSH_OPTS[@]}" "$SSH_TARGET" "echo ${MARKER_VALUE} > /sandbox/.survival-marker" 2>/dev/null; then - pass "Planted marker file: /sandbox/.survival-marker = $MARKER_VALUE" + pass "Planted workspace marker: /sandbox/.survival-marker" else - fail "Could not plant marker file inside sandbox" - rm -f "$ssh_config" - exit 1 + fail "Could not plant workspace marker" fi -# Verify the marker is readable before we restart +# Verify read-back before restart readback=$(ssh "${SSH_OPTS[@]}" "$SSH_TARGET" "cat /sandbox/.survival-marker" 2>/dev/null) if [ "$readback" = "$MARKER_VALUE" ]; then - pass "Marker file read-back verified before restart" + pass "Workspace marker verified before restart" else - fail "Marker file read-back mismatch: expected '$MARKER_VALUE', got '$readback'" + fail "Workspace marker read-back mismatch: expected '$MARKER_VALUE', got '$readback'" fi -rm -f "$ssh_config" +# 5b: Agent data directory — plant marker in .openclaw-data if it exists +# This tests the complaint from #1086 and @Koneisto: agent state loss +# shellcheck disable=SC2029 +agent_data_exists=$(ssh "${SSH_OPTS[@]}" "$SSH_TARGET" \ + "[ -d /sandbox/.openclaw-data ] && echo yes || echo no" 2>/dev/null) +if [ "$agent_data_exists" = "yes" ]; then + # shellcheck disable=SC2029 + if ssh "${SSH_OPTS[@]}" "$SSH_TARGET" \ + "echo ${MARKER_VALUE} > /sandbox/.openclaw-data/.survival-marker" 2>/dev/null; then + pass "Planted agent data marker: /sandbox/.openclaw-data/.survival-marker" + else + fail "Could not plant agent data marker" + fi +else + info "No .openclaw-data directory yet — will check if sandbox itself survives" +fi + +# 5c: Snapshot which agent identity files exist (to verify they survive) +agent_files_before=$(ssh "${SSH_OPTS[@]}" "$SSH_TARGET" \ + "ls -la /sandbox/.openclaw-data/ 2>/dev/null | head -20" 2>/dev/null) || true +if [ -n "$agent_files_before" ]; then + info "Agent data directory contents before restart:" + echo "$agent_files_before" | while IFS= read -r line; do + info " $line" + done +fi + +# 5d: Record a deeper workspace file to test nested persistence +# shellcheck disable=SC2029 +if ssh "${SSH_OPTS[@]}" "$SSH_TARGET" \ + "mkdir -p /sandbox/test-data && echo ${MARKER_VALUE} > /sandbox/test-data/nested-marker.txt" \ + 2>/dev/null; then + pass "Planted nested marker: /sandbox/test-data/nested-marker.txt" +else + fail "Could not plant nested workspace marker" +fi + +cleanup_ssh # ══════════════════════════════════════════════════════════════════ -# Phase 5: Gateway stop/start cycle (simulates laptop close/open) +# Phase 6: Gateway stop/start cycle (simulates reboot) # ══════════════════════════════════════════════════════════════════ -section "Phase 5: Gateway stop/start cycle" +section "Phase 6: Gateway stop/start cycle (simulates host reboot)" -info "Stopping gateway (simulates laptop close / Docker stop)..." +# Stop any port forwards first openshell forward stop 18789 2>/dev/null || true + +info "Stopping gateway (simulates laptop close / VM shutdown)..." if openshell gateway stop -g nemoclaw 2>/dev/null; then pass "Gateway stopped" else @@ -306,10 +425,10 @@ else fail "Docker container still running: state=$container_state" fi -info "Waiting 5 seconds to simulate delay (laptop lid close)..." +info "Waiting 5 seconds to simulate delay (laptop lid close / VM hibernate)..." sleep 5 -info "Starting gateway (simulates laptop open / Docker restart)..." +info "Starting gateway (simulates laptop open / VM boot)..." if openshell gateway start --name nemoclaw 2>&1; then pass "Gateway start command succeeded" else @@ -317,8 +436,7 @@ else info "Gateway start returned non-zero — checking health..." fi -# Wait for gateway to become healthy — verify both "Connected" status and -# active gateway is "nemoclaw" (matches production health predicate). +# Wait for gateway to become healthy info "Waiting for gateway to become healthy..." HEALTHY=0 for attempt in $(seq 1 60); do @@ -339,19 +457,19 @@ else fi # ══════════════════════════════════════════════════════════════════ -# Phase 6: Verify sandbox survived +# Phase 7: Verify sandbox survived — every complaint from #486/#888/#859/#1086 # ══════════════════════════════════════════════════════════════════ -section "Phase 6: Verify sandbox survived restart" +section "Phase 7: Verify sandbox survived restart" -# 6a: Sandbox exists in openshell +# 7a: openshell sandbox list — #486 "No sandboxes found" if openshell sandbox list 2>&1 | grep -q "$SANDBOX_NAME"; then - pass "Sandbox '$SANDBOX_NAME' still listed in openshell after restart" + pass "openshell sandbox list shows '$SANDBOX_NAME' after restart" else - fail "Sandbox '$SANDBOX_NAME' not found in openshell after restart" + fail "openshell sandbox list: '$SANDBOX_NAME' NOT FOUND after restart (#486)" openshell sandbox list 2>&1 || true fi -# 6b: Sandbox pod is running (not just listed) +# 7b: Sandbox pod is running, not just listed sandbox_phase="" for attempt in $(seq 1 30); do sandbox_phase=$(openshell sandbox list 2>&1 | grep "$SANDBOX_NAME" | grep -oiE 'running|ready' | head -1) @@ -362,100 +480,169 @@ for attempt in $(seq 1 30); do done if [ -n "$sandbox_phase" ]; then - pass "Sandbox pod is in '$sandbox_phase' state" + pass "Sandbox pod is '$sandbox_phase' after restart" else - fail "Sandbox pod did not reach Running/Ready state after restart" + fail "Sandbox pod did not reach Running/Ready after restart" openshell sandbox list 2>&1 || true fi -# 6c: NemoClaw registry still knows about it -if registry_has "$SANDBOX_NAME"; then - pass "NemoClaw registry still contains '$SANDBOX_NAME'" +# 7c: NemoClaw registry still has it — #486 "No sandboxes registered" +if [ -f "$REGISTRY" ] && grep -Fq "\"${SANDBOX_NAME}\"" "$REGISTRY"; then + pass "NemoClaw registry still contains '$SANDBOX_NAME' after restart" else - fail "NemoClaw registry lost '$SANDBOX_NAME' after restart" + fail "NemoClaw registry lost '$SANDBOX_NAME' after restart (#486)" fi -# 6d: nemoclaw status works -if status_output=$(run_nemoclaw "$SANDBOX_NAME" status 2>&1); then - pass "nemoclaw status exits 0 after restart" +# 7d: nemoclaw list shows it — the actual user-facing command +if list_output=$(nemoclaw list 2>&1) && grep -Fq "$SANDBOX_NAME" <<<"$list_output"; then + pass "nemoclaw list shows '$SANDBOX_NAME' after restart" else - fail "nemoclaw status failed after restart: ${status_output:0:200}" + fail "nemoclaw list doesn't show '$SANDBOX_NAME' after restart: ${list_output:0:200}" +fi + +# 7e: nemoclaw status works — #859 "unclear CLI behavior" +# No special intervention should be required after gateway restart. +# If nemoclaw status hangs, that IS the bug — use timeout to detect it. +# Write to a temp file instead of $() to avoid pipe FD inheritance: +# nemoclaw's SSH recovery can spawn background processes that hold the +# pipe open, preventing $() from returning even after timeout kills nemoclaw. +STATUS_TMP="$(mktemp)" +TIMEOUT_STATUS="" +command -v timeout >/dev/null 2>&1 && TIMEOUT_STATUS="timeout 120" +command -v gtimeout >/dev/null 2>&1 && TIMEOUT_STATUS="gtimeout 120" +$TIMEOUT_STATUS nemoclaw "$SANDBOX_NAME" status >"$STATUS_TMP" 2>&1 +status_exit=$? +status_output=$(cat "$STATUS_TMP") +rm -f "$STATUS_TMP" +if [ "$status_exit" -eq 0 ]; then + pass "nemoclaw $SANDBOX_NAME status exits 0 after restart (no re-onboard needed)" +elif [ "$status_exit" -eq 124 ]; then + fail "nemoclaw $SANDBOX_NAME status TIMED OUT after restart (port forward or SSH recovery hung)" +else + fail "nemoclaw $SANDBOX_NAME status failed after restart (exit $status_exit): ${status_output:0:200}" fi # ══════════════════════════════════════════════════════════════════ -# Phase 7: Verify workspace data persisted +# Phase 8: Verify SSH connectivity — #888/#1086 handshake failure # ══════════════════════════════════════════════════════════════════ -section "Phase 7: Verify workspace data persisted" +section "Phase 8: Verify SSH connectivity after restart" + +if ! setup_ssh; then + fail "Could not get SSH config after restart (#888 handshake failure?)" + skip "Workspace marker check (SSH unavailable)" + skip "Agent data marker check (SSH unavailable)" + skip "Nested marker check (SSH unavailable)" + skip "Post-restart inference (SSH unavailable)" + + # Jump to cleanup + section "Phase 11: Cleanup" + nemoclaw "$SANDBOX_NAME" destroy --yes 2>&1 | tail -3 || true + openshell gateway destroy -g nemoclaw 2>/dev/null || true + echo "" + echo "========================================" + echo " Sandbox Survival E2E Results:" + echo " Passed: $PASS" + echo " Failed: $FAIL" + echo " Skipped: $SKIP" + echo " Total: $TOTAL" + echo "========================================" + printf '\n\033[1;31m %d test(s) failed.\033[0m\n' "$FAIL" + exit 1 +fi +pass "SSH config available after restart" -ssh_config="$(mktemp)" -if openshell sandbox ssh-config "$SANDBOX_NAME" >"$ssh_config" 2>/dev/null; then - pass "SSH config available after restart" +# 8a: Raw SSH connectivity — the #888/#1086 handshake test +if ssh "${SSH_OPTS[@]}" "$SSH_TARGET" "echo alive" >/dev/null 2>&1; then + pass "SSH into sandbox works after restart (no handshake failure — #888/#1086)" else - fail "Could not get SSH config after restart" - rm -f "$ssh_config" - ssh_config="" + fail "SSH into sandbox FAILED after restart — handshake verification likely failed (#888/#1086)" + info "This is the core bug: gateway regenerated secrets, sandbox has stale ones" + cleanup_ssh + # Still try to get logs for diagnosis + nemoclaw "$SANDBOX_NAME" logs 2>&1 | grep -i "handshake" | head -5 || true fi -SSH_OPTS=(-F "$ssh_config" -o StrictHostKeyChecking=no -o UserKnownHostsFile=/dev/null -o ConnectTimeout=10 -o LogLevel=ERROR) +# ══════════════════════════════════════════════════════════════════ +# Phase 9: Verify workspace and agent state persisted — #1086/@Koneisto +# ══════════════════════════════════════════════════════════════════ +section "Phase 9: Verify state persisted across restart" -# 7a: SSH connectivity works -if [ -n "$ssh_config" ]; then - if ssh "${SSH_OPTS[@]}" "$SSH_TARGET" "echo alive" >/dev/null 2>&1; then - pass "SSH into sandbox works after restart" +# 9a: Workspace marker +post_restart_marker=$(ssh "${SSH_OPTS[@]}" "$SSH_TARGET" "cat /sandbox/.survival-marker" 2>/dev/null) +if [ "$post_restart_marker" = "$MARKER_VALUE" ]; then + pass "Workspace marker survived restart: $MARKER_VALUE" +else + fail "Workspace marker LOST: expected '$MARKER_VALUE', got '${post_restart_marker:-}' (#1086 state loss)" +fi + +# 9b: Agent data marker +if [ "$agent_data_exists" = "yes" ]; then + agent_marker=$(ssh "${SSH_OPTS[@]}" "$SSH_TARGET" "cat /sandbox/.openclaw-data/.survival-marker" 2>/dev/null) + if [ "$agent_marker" = "$MARKER_VALUE" ]; then + pass "Agent data marker survived restart" else - fail "SSH into sandbox failed after restart" - rm -f "$ssh_config" - skip "Marker file check (SSH unavailable)" - skip "Post-restart inference (SSH unavailable)" - ssh_config="" + fail "Agent data marker LOST: expected '$MARKER_VALUE', got '${agent_marker:-}' (agent state destroyed)" fi fi -# 7b: Marker file survived -if [ -n "$ssh_config" ]; then - post_restart_marker=$(ssh "${SSH_OPTS[@]}" "$SSH_TARGET" "cat /sandbox/.survival-marker" 2>/dev/null) - if [ "$post_restart_marker" = "$MARKER_VALUE" ]; then - pass "Marker file survived restart: $MARKER_VALUE" +# 9c: Nested workspace file +nested_marker=$(ssh "${SSH_OPTS[@]}" "$SSH_TARGET" "cat /sandbox/test-data/nested-marker.txt" 2>/dev/null) +if [ "$nested_marker" = "$MARKER_VALUE" ]; then + pass "Nested workspace marker survived restart" +else + fail "Nested workspace marker LOST: expected '$MARKER_VALUE', got '${nested_marker:-}'" +fi + +# 9d: Agent data directory still populated (not wiped to image defaults) +if [ "$agent_data_exists" = "yes" ]; then + agent_files_after=$(ssh "${SSH_OPTS[@]}" "$SSH_TARGET" \ + "ls -la /sandbox/.openclaw-data/ 2>/dev/null | head -20" 2>/dev/null) || true + if [ -n "$agent_files_after" ]; then + info "Agent data directory contents after restart:" + echo "$agent_files_after" | while IFS= read -r line; do + info " $line" + done + pass "Agent data directory still populated after restart" else - fail "Marker file lost or changed: expected '$MARKER_VALUE', got '${post_restart_marker:-}'" + fail "Agent data directory is empty after restart (@Koneisto overlay wipe)" fi fi # ══════════════════════════════════════════════════════════════════ -# Phase 8: Prove live inference works AFTER restart (the real proof) +# Phase 10: Prove live inference works AFTER restart (the definitive proof) # ══════════════════════════════════════════════════════════════════ -section "Phase 8: Live inference after restart (THE definitive test)" - -if [ -n "$ssh_config" ]; then - info "[LIVE] Post-restart inference: user → sandbox → gateway → NVIDIA Endpoints..." - post_response=$($TIMEOUT_CMD ssh "${SSH_OPTS[@]}" "$SSH_TARGET" \ - "curl -s --max-time 60 https://inference.local/v1/chat/completions \ - -H 'Content-Type: application/json' \ - -d '{\"model\":\"$MODEL\",\"messages\":[{\"role\":\"user\",\"content\":\"Reply with exactly one word: PONG\"}],\"max_tokens\":100}'" \ - 2>&1) || true - - post_content="" - if [ -n "$post_response" ]; then - post_content=$(echo "$post_response" | parse_chat_content 2>/dev/null) || true - fi +section "Phase 10: Live inference after restart (THE definitive test)" - if grep -qi "PONG" <<<"$post_content"; then - pass "[LIVE] Post-restart: model responded with PONG through sandbox" - info "Full path proven: user → sandbox → openshell gateway (resumed) → NVIDIA Endpoints → response" - else - fail "[LIVE] Post-restart: expected PONG, got: ${post_content:0:200}" - info "Raw response: ${post_response:0:300}" - fi +info "[LIVE] Post-restart inference: user → sandbox → gateway → NVIDIA Endpoints..." +# shellcheck disable=SC2029 +post_response=$($TIMEOUT_CMD ssh "${SSH_OPTS[@]}" "$SSH_TARGET" \ + "curl -s --max-time 60 https://inference.local/v1/chat/completions \ + -H 'Content-Type: application/json' \ + -d '{\"model\":\"$MODEL\",\"messages\":[{\"role\":\"user\",\"content\":\"Reply with exactly one word: PONG\"}],\"max_tokens\":100}'" \ + 2>&1) || true + +post_content="" +if [ -n "$post_response" ]; then + post_content=$(echo "$post_response" | parse_chat_content 2>/dev/null) || true +fi + +if grep -qi "PONG" <<<"$post_content"; then + pass "[LIVE] Post-restart: model responded with PONG through sandbox" + info "Full path proven: user → sandbox → openshell gateway (resumed) → NVIDIA Endpoints → response" + info "This proves #859's ask: reliable non-destructive gateway lifecycle with working inference" +else + fail "[LIVE] Post-restart: expected PONG, got: ${post_content:0:200}" + info "Raw response: ${post_response:0:300}" fi -[ -n "${ssh_config:-}" ] && rm -f "$ssh_config" +cleanup_ssh # ══════════════════════════════════════════════════════════════════ -# Phase 9: Cleanup +# Phase 11: Cleanup # ══════════════════════════════════════════════════════════════════ -section "Phase 9: Cleanup" +section "Phase 11: Cleanup" -run_nemoclaw "$SANDBOX_NAME" destroy --yes 2>&1 | tail -3 || true +nemoclaw "$SANDBOX_NAME" destroy --yes 2>&1 | tail -3 || true openshell gateway destroy -g nemoclaw 2>/dev/null || true if [ -f "$REGISTRY" ] && grep -Fq "\"${SANDBOX_NAME}\"" "$REGISTRY"; then @@ -477,7 +664,8 @@ echo " Total: $TOTAL" echo "========================================" if [ "$FAIL" -eq 0 ]; then - printf '\n\033[1;32m Sandbox survival PASSED — live inference verified before AND after gateway restart.\033[0m\n' + printf '\n\033[1;32m Sandbox survival PASSED — all state persisted, live inference verified before AND after gateway restart.\033[0m\n' + printf '\033[1;32m Issues validated: #486, #888, #859, #1086\033[0m\n' exit 0 else printf '\n\033[1;31m %d test(s) failed.\033[0m\n' "$FAIL"