NVIDIA · KeitaW · Apr 2, 2026 · Apr 3, 2026 · Apr 3, 2026 · coderabbitai
@@ -19,6 +19,7 @@
 import base64
 import datetime
 import json
+import mimetypes
 import shlex
 from typing import Any, Dict, List, Sequence
 import uuid
@@ -988,6 +989,65 @@ def get_info(
                                     versions=rows)
 
 
+@router.get('/{bucket}/dataset/{name}/manifest')
+def get_manifest(
+    bucket: objects.DatasetPattern,
+    name: objects.DatasetPattern,
+    version: str = fastapi.Query(...),
+) -> List:
+    """ This api returns the manifest for a dataset version. """
+    postgres = connectors.PostgresConnector.get_instance()
+    dataset_info = get_dataset(postgres, bucket=bucket, name=name)
+
+    fetch_command = '''
+        SELECT location FROM dataset_version
+        WHERE dataset_id = %s AND version_id = %s AND status = %s;
+    '''
+    rows = postgres.execute_fetch_command(
+        fetch_command, (dataset_info.id, version, objects.DatasetStatus.READY.name))
+    if not rows:
+        raise osmo_errors.OSMODatabaseError(
+            f'Version {version} not found for dataset {name} in bucket {bucket}.')
+
+    bucket_config = postgres.get_dataset_configs().get_bucket_config(bucket)
+    client = storage.Client.create(
+        storage_uri=rows[0].location,
+        data_credential=bucket_config.default_credential,
+    )
+    manifest_content = client.get_object_stream(as_io=True).read()
+    return json.loads(manifest_content)
+
+
+@router.api_route('/{bucket}/dataset/{name}/file-content', methods=['GET', 'HEAD'])
+def get_file_content(
+    bucket: objects.DatasetPattern,
+    name: objects.DatasetPattern,
+    storage_path: str = fastapi.Query(...),
+) -> fastapi.responses.StreamingResponse:
+    """ This api streams file content from storage for the dataset file preview. """
+    postgres = connectors.PostgresConnector.get_instance()
+    dataset_info = get_dataset(postgres, bucket=bucket, name=name)
+
+    # Validate that the storage path belongs to this dataset's storage prefix
+    requested_backend = storage.construct_storage_backend(storage_path)
+    dataset_backend = storage.construct_storage_backend(dataset_info.hash_location)
+    if requested_backend.container != dataset_backend.container:
+        raise osmo_errors.OSMOUserError(
+            'Storage path does not belong to this dataset.')
-    requested_backend = storage.construct_storage_backend(storage_path)
-    dataset_backend = storage.construct_storage_backend(dataset_info.hash_location)
-    if requested_backend.container != dataset_backend.container:
-        raise osmo_errors.OSMOUserError(
-            'Storage path does not belong to this dataset.')
+    requested_backend = storage.construct_storage_backend(storage_path)
+    dataset_backend = storage.construct_storage_backend(dataset_info.hash_location)
+    if requested_backend.container != dataset_backend.container or \
+       not storage_path.startswith(dataset_info.hash_location.rstrip('/')):
+        raise osmo_errors.OSMOUserError(
+            'Storage path does not belong to this dataset.')
-    requested_backend = storage.construct_storage_backend(storage_path)
-    dataset_backend = storage.construct_storage_backend(dataset_info.hash_location)
-    if requested_backend.container != dataset_backend.container:
-        raise osmo_errors.OSMOUserError(
-            'Storage path does not belong to this dataset.')
+    requested_backend = storage.construct_storage_backend(storage_path)
+    dataset_backend = storage.construct_storage_backend(dataset_info.hash_location)
+    if requested_backend.container != dataset_backend.container or \
+       not storage_path.startswith(dataset_info.hash_location.rstrip('/')):
+        raise osmo_errors.OSMOUserError(
+            'Storage path does not belong to this dataset.')
+
+    bucket_config = postgres.get_dataset_configs().get_bucket_config(bucket)
+    client = storage.Client.create(
+        storage_uri=storage_path,
+        data_credential=bucket_config.default_credential,
+    )
+
+    content_type = mimetypes.guess_type(storage_path)[0] or 'application/octet-stream'
+    return fastapi.responses.StreamingResponse(
+        client.get_object_stream(),
+        media_type=content_type,
+    )
+
+
 @router.get('/list_dataset')
 def list_dataset_from_bucket(name: objects.DatasetPattern | None = None,
                              user: List[str] | None = fastapi.Query(default = None),

@@ -198,9 +198,6 @@ const nextConfig: NextConfig = {
             // This allows Turbopack aliasing to work (aliases work for imports, not file discovery)
             "@/app/api/[...path]/route.impl": "@/app/api/[...path]/route.impl.production",
 
-            // Dataset manifest server action - alias to production version (zero mock code)
-            "@/lib/api/server/dataset-actions": "@/lib/api/server/dataset-actions.production",
-
             // Dataset file proxy route - alias to production version (zero mock code)
             "@/app/proxy/dataset/file/route.impl":
               "@/app/proxy/dataset/file/route.impl.production",

@@ -18,55 +18,96 @@
  * Dataset File Proxy — Production Implementation
  *
  * Server-side proxy for fetching dataset files from storage URLs.
- * Routes requests through the server to avoid CSP restrictions.
  *
- * GET /proxy/dataset/file?url={encodedFileUrl}  → streams file content
- * HEAD /proxy/dataset/file?url={encodedFileUrl} → returns headers only
+ * When bucket/name/storagePath params are present, routes through the backend
+ * service's file-content endpoint (handles private buckets with credentials).
+ * Falls back to direct fetch for legacy callers that only provide a url param.
+ *
+ * GET /proxy/dataset/file?bucket=...&name=...&storagePath=...  → service proxy
+ * GET /proxy/dataset/file?url={encodedFileUrl}                 → direct fetch (legacy)
+ * HEAD variants of the above                                   → headers only
  */
 
+import type { NextRequest } from "next/server";
+import { getServerApiBaseUrl } from "@/lib/api/server/config";
+import { forwardAuthHeaders } from "@/lib/api/server/proxy-headers";
+
 const FORWARDED_HEADERS = ["content-type", "content-length", "last-modified", "etag", "cache-control"] as const;
 
-function parseAndValidateUrl(request: Request): { url: string } | Response {
+function forwardResponseHeaders(upstream: Response): Headers {
+  const headers = new Headers();
+  for (const header of FORWARDED_HEADERS) {
+    const value = upstream.headers.get(header);
+    if (value) headers.set(header, value);
+  }
+  return headers;
+}
+
+interface ServiceParams {
+  bucket: string;
+  name: string;
+  storagePath: string;
+}
+
+interface LegacyParams {
+  url: string;
+}
+
+function parseParams(request: Request): ServiceParams | LegacyParams | Response {
   const { searchParams } = new URL(request.url);
-  const url = searchParams.get("url");
 
-  if (!url) {
-    return Response.json({ error: "url parameter is required" }, { status: 400 });
+  const bucket = searchParams.get("bucket");
+  const name = searchParams.get("name");
+  const storagePath = searchParams.get("storagePath");
+
+  if (bucket && name && storagePath) {
+    return { bucket, name, storagePath };
   }
 
+  // Legacy fallback: direct URL fetch (works for public buckets only)
+  const url = searchParams.get("url");
+  if (!url) {
+    return Response.json({ error: "bucket/name/storagePath or url parameter is required" }, { status: 400 });
+  }
   if (!url.startsWith("http://") && !url.startsWith("https://")) {
     return Response.json({ error: "Only http/https URLs are supported" }, { status: 400 });
   }
-
   return { url };
 }
 
-export async function GET(request: Request) {
-  const result = parseAndValidateUrl(request);
-  if (result instanceof Response) return result;
-
-  const upstream = await fetch(result.url);
+function isServiceParams(params: ServiceParams | LegacyParams): params is ServiceParams {
+  return "storagePath" in params;
+}
 
-  const headers = new Headers();
-  for (const header of FORWARDED_HEADERS) {
-    const value = upstream.headers.get(header);
-    if (value) headers.set(header, value);
+async function fetchUpstream(
+  request: NextRequest,
+  params: ServiceParams | LegacyParams,
+  method: string,
+): Promise<Response> {
+  if (isServiceParams(params)) {
+    const backendUrl = getServerApiBaseUrl();
+    const query = new URLSearchParams({ storage_path: params.storagePath });
+    const serviceUrl = `${backendUrl}/api/bucket/${encodeURIComponent(params.bucket)}/dataset/${encodeURIComponent(params.name)}/file-content?${query}`;
+    const headers = forwardAuthHeaders(request);
+    return fetch(serviceUrl, { method, headers });
   }
-
-  return new Response(upstream.body, { status: upstream.status, headers });
+  return fetch(params.url, { method });
 }
 
-export async function HEAD(request: Request) {
-  const result = parseAndValidateUrl(request);
+export async function GET(request: NextRequest) {
+  const result = parseParams(request);
   if (result instanceof Response) return result;
 
-  const upstream = await fetch(result.url, { method: "HEAD" });
+  const upstream = await fetchUpstream(request, result, "GET");
+  const headers = forwardResponseHeaders(upstream);
+  return new Response(upstream.body, { status: upstream.status, headers });
+}
 
-  const headers = new Headers();
-  for (const header of FORWARDED_HEADERS) {
-    const value = upstream.headers.get(header);
-    if (value) headers.set(header, value);
-  }
+export async function HEAD(request: NextRequest) {
+  const result = parseParams(request);
+  if (result instanceof Response) return result;
 
+  const upstream = await fetchUpstream(request, result, "HEAD");
+  const headers = forwardResponseHeaders(upstream);
   return new Response(null, { status: upstream.status, headers });
 }
@@ -176,6 +176,8 @@ export function DatasetDetailContent({ bucket, name }: Props) {
   const {
     versions,
     location,
+    resolvedName,
+    resolvedVersion,
     files: virtualFiles,
     memberSubPath,
     segmentLabels,
@@ -184,6 +186,8 @@ export function DatasetDetailContent({ bucket, name }: Props) {
       return {
         versions: [],
         location: null as string | null,
+        resolvedName: name,
+        resolvedVersion: "",
         files: null as DatasetFile[] | null,
         memberSubPath: "",
         segmentLabels: {} as Record<string, string>,
@@ -197,6 +201,8 @@ export function DatasetDetailContent({ bucket, name }: Props) {
       return {
         versions: detail.versions,
         location: currentVersionData?.location ?? null,
+        resolvedName: name,
+        resolvedVersion: currentVersionData?.version ?? "",
         files: null,
         memberSubPath: path,
         segmentLabels: {},
@@ -221,6 +227,8 @@ export function DatasetDetailContent({ bucket, name }: Props) {
       return {
         versions: [],
         location: null,
+        resolvedName: name,
+        resolvedVersion: "",
         files: memberEntries,
         memberSubPath: "",
         segmentLabels: labels,
@@ -234,11 +242,13 @@ export function DatasetDetailContent({ bucket, name }: Props) {
     return {
       versions: [],
       location: member?.location ?? null,
+      resolvedName: member?.name ?? name,
+      resolvedVersion: member?.version ?? "",
       files: null,
       memberSubPath: subPath,
       segmentLabels: labels,
     };
-  }, [detail, version, path]);
+  }, [detail, version, path, name]);
 
   // ==========================================================================
   // File listing — fetch manifest for selected version/member
@@ -249,7 +259,7 @@ export function DatasetDetailContent({ bucket, name }: Props) {
     isLoading: isFilesLoading,
     error: filesError,
     refetch: refetchFiles,
-  } = useDatasetFiles(location);
+  } = useDatasetFiles(bucket, resolvedName, resolvedVersion, location);
 
   // Normal (unfiltered) directory listing — used for FilterBar suggestions and as base view
   const normalFiles = useMemo(
@@ -573,6 +583,8 @@ export function DatasetDetailContent({ bucket, name }: Props) {
                       <FilePreviewPanel
                         file={panelFileData}
                         path={fileDirPath}
+                        bucket={bucket}
+                        datasetName={resolvedName}
                         onClose={handleClosePanel}
                       />
                     )}