From b649a800ef2d2696eb10187d8e3b3d9143af76de Mon Sep 17 00:00:00 2001 From: Keita Watanabe Date: Thu, 2 Apr 2026 23:36:56 +0000 Subject: [PATCH 1/3] fix: route dataset file browser and preview through service to support private buckets (#793) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The UI's fetchManifest and file preview proxy performed unsigned fetch() against S3 HTTPS URLs, which fails with 403 on private buckets. Added two service-side proxy endpoints: - GET /{bucket}/dataset/{name}/manifest — reads manifest JSON from storage using bucket credentials (supports S3, GCS, Azure, Swift, TOS) - GET /{bucket}/dataset/{name}/file-content — streams individual file content with storage_path validation against dataset container Updated UI to call these endpoints through the existing /api catch-all proxy instead of direct unsigned fetch. Removed unused fetchManifest server action files. --- src/service/core/data/data_service.py | 60 ++++++++++++ src/ui/next.config.ts | 3 - .../dataset/file/route.impl.production.ts | 95 +++++++++++++------ .../components/dataset-detail-content.tsx | 16 +++- .../detail/components/file-preview-panel.tsx | 67 +++++++++---- src/ui/src/lib/api/adapter/datasets-hooks.ts | 7 +- src/ui/src/lib/api/adapter/datasets.ts | 22 ++++- .../api/server/dataset-actions.production.ts | 31 ------ src/ui/src/lib/api/server/dataset-actions.ts | 59 ------------ src/ui/src/mocks/handlers.ts | 41 ++++++-- 10 files changed, 246 insertions(+), 155 deletions(-) delete mode 100644 src/ui/src/lib/api/server/dataset-actions.production.ts delete mode 100644 src/ui/src/lib/api/server/dataset-actions.ts diff --git a/src/service/core/data/data_service.py b/src/service/core/data/data_service.py index 8f68b3577..3a3fe8d35 100755 --- a/src/service/core/data/data_service.py +++ b/src/service/core/data/data_service.py @@ -19,6 +19,7 @@ import base64 import datetime import json +import mimetypes import shlex from typing import Any, Dict, List, Sequence import uuid @@ -988,6 +989,65 @@ def get_info( versions=rows) +@router.get('/{bucket}/dataset/{name}/manifest') +def get_manifest( + bucket: objects.DatasetPattern, + name: objects.DatasetPattern, + version: str = fastapi.Query(...), +) -> List: + """ This api returns the manifest for a dataset version. """ + postgres = connectors.PostgresConnector.get_instance() + dataset_info = get_dataset(postgres, bucket=bucket, name=name) + + fetch_command = ''' + SELECT location FROM dataset_version + WHERE dataset_id = %s AND version_id = %s AND status = %s; + ''' + rows = postgres.execute_fetch_command( + fetch_command, (dataset_info.id, version, objects.DatasetStatus.READY.name)) + if not rows: + raise osmo_errors.OSMODatabaseError( + f'Version {version} not found for dataset {name} in bucket {bucket}.') + + bucket_config = postgres.get_dataset_configs().get_bucket_config(bucket) + client = storage.Client.create( + storage_uri=rows[0].location, + data_credential=bucket_config.default_credential, + ) + manifest_content = client.get_object_stream(as_io=True).read() + return json.loads(manifest_content) + + +@router.get('/{bucket}/dataset/{name}/file-content') +def get_file_content( + bucket: objects.DatasetPattern, + name: objects.DatasetPattern, + storage_path: str = fastapi.Query(...), +) -> fastapi.responses.StreamingResponse: + """ This api streams file content from storage for the dataset file preview. """ + postgres = connectors.PostgresConnector.get_instance() + dataset_info = get_dataset(postgres, bucket=bucket, name=name) + + # Validate that the storage path belongs to this dataset's storage prefix + requested_backend = storage.construct_storage_backend(storage_path) + dataset_backend = storage.construct_storage_backend(dataset_info.hash_location) + if requested_backend.container != dataset_backend.container: + raise osmo_errors.OSMOUserError( + 'Storage path does not belong to this dataset.') + + bucket_config = postgres.get_dataset_configs().get_bucket_config(bucket) + client = storage.Client.create( + storage_uri=storage_path, + data_credential=bucket_config.default_credential, + ) + + content_type = mimetypes.guess_type(storage_path)[0] or 'application/octet-stream' + return fastapi.responses.StreamingResponse( + client.get_object_stream(), + media_type=content_type, + ) + + @router.get('/list_dataset') def list_dataset_from_bucket(name: objects.DatasetPattern | None = None, user: List[str] | None = fastapi.Query(default = None), diff --git a/src/ui/next.config.ts b/src/ui/next.config.ts index d65685c4e..32732d8d5 100644 --- a/src/ui/next.config.ts +++ b/src/ui/next.config.ts @@ -198,9 +198,6 @@ const nextConfig: NextConfig = { // This allows Turbopack aliasing to work (aliases work for imports, not file discovery) "@/app/api/[...path]/route.impl": "@/app/api/[...path]/route.impl.production", - // Dataset manifest server action - alias to production version (zero mock code) - "@/lib/api/server/dataset-actions": "@/lib/api/server/dataset-actions.production", - // Dataset file proxy route - alias to production version (zero mock code) "@/app/proxy/dataset/file/route.impl": "@/app/proxy/dataset/file/route.impl.production", diff --git a/src/ui/src/app/proxy/dataset/file/route.impl.production.ts b/src/ui/src/app/proxy/dataset/file/route.impl.production.ts index 17c7ef928..2c5e3c0f5 100644 --- a/src/ui/src/app/proxy/dataset/file/route.impl.production.ts +++ b/src/ui/src/app/proxy/dataset/file/route.impl.production.ts @@ -18,55 +18,96 @@ * Dataset File Proxy — Production Implementation * * Server-side proxy for fetching dataset files from storage URLs. - * Routes requests through the server to avoid CSP restrictions. * - * GET /proxy/dataset/file?url={encodedFileUrl} → streams file content - * HEAD /proxy/dataset/file?url={encodedFileUrl} → returns headers only + * When bucket/name/storagePath params are present, routes through the backend + * service's file-content endpoint (handles private buckets with credentials). + * Falls back to direct fetch for legacy callers that only provide a url param. + * + * GET /proxy/dataset/file?bucket=...&name=...&storagePath=... → service proxy + * GET /proxy/dataset/file?url={encodedFileUrl} → direct fetch (legacy) + * HEAD variants of the above → headers only */ +import type { NextRequest } from "next/server"; +import { getServerApiBaseUrl } from "@/lib/api/server/config"; +import { forwardAuthHeaders } from "@/lib/api/server/proxy-headers"; + const FORWARDED_HEADERS = ["content-type", "content-length", "last-modified", "etag", "cache-control"] as const; -function parseAndValidateUrl(request: Request): { url: string } | Response { +function forwardResponseHeaders(upstream: Response): Headers { + const headers = new Headers(); + for (const header of FORWARDED_HEADERS) { + const value = upstream.headers.get(header); + if (value) headers.set(header, value); + } + return headers; +} + +interface ServiceParams { + bucket: string; + name: string; + storagePath: string; +} + +interface LegacyParams { + url: string; +} + +function parseParams(request: Request): ServiceParams | LegacyParams | Response { const { searchParams } = new URL(request.url); - const url = searchParams.get("url"); - if (!url) { - return Response.json({ error: "url parameter is required" }, { status: 400 }); + const bucket = searchParams.get("bucket"); + const name = searchParams.get("name"); + const storagePath = searchParams.get("storagePath"); + + if (bucket && name && storagePath) { + return { bucket, name, storagePath }; } + // Legacy fallback: direct URL fetch (works for public buckets only) + const url = searchParams.get("url"); + if (!url) { + return Response.json({ error: "bucket/name/storagePath or url parameter is required" }, { status: 400 }); + } if (!url.startsWith("http://") && !url.startsWith("https://")) { return Response.json({ error: "Only http/https URLs are supported" }, { status: 400 }); } - return { url }; } -export async function GET(request: Request) { - const result = parseAndValidateUrl(request); - if (result instanceof Response) return result; - - const upstream = await fetch(result.url); +function isServiceParams(params: ServiceParams | LegacyParams): params is ServiceParams { + return "storagePath" in params; +} - const headers = new Headers(); - for (const header of FORWARDED_HEADERS) { - const value = upstream.headers.get(header); - if (value) headers.set(header, value); +async function fetchUpstream( + request: NextRequest, + params: ServiceParams | LegacyParams, + method: string, +): Promise { + if (isServiceParams(params)) { + const backendUrl = getServerApiBaseUrl(); + const query = new URLSearchParams({ storage_path: params.storagePath }); + const serviceUrl = `${backendUrl}/api/bucket/${encodeURIComponent(params.bucket)}/dataset/${encodeURIComponent(params.name)}/file-content?${query}`; + const headers = forwardAuthHeaders(request); + return fetch(serviceUrl, { method, headers }); } - - return new Response(upstream.body, { status: upstream.status, headers }); + return fetch(params.url, { method }); } -export async function HEAD(request: Request) { - const result = parseAndValidateUrl(request); +export async function GET(request: NextRequest) { + const result = parseParams(request); if (result instanceof Response) return result; - const upstream = await fetch(result.url, { method: "HEAD" }); + const upstream = await fetchUpstream(request, result, "GET"); + const headers = forwardResponseHeaders(upstream); + return new Response(upstream.body, { status: upstream.status, headers }); +} - const headers = new Headers(); - for (const header of FORWARDED_HEADERS) { - const value = upstream.headers.get(header); - if (value) headers.set(header, value); - } +export async function HEAD(request: NextRequest) { + const result = parseParams(request); + if (result instanceof Response) return result; + const upstream = await fetchUpstream(request, result, "HEAD"); + const headers = forwardResponseHeaders(upstream); return new Response(null, { status: upstream.status, headers }); } diff --git a/src/ui/src/features/datasets/detail/components/dataset-detail-content.tsx b/src/ui/src/features/datasets/detail/components/dataset-detail-content.tsx index 2e3c5e741..c05137402 100644 --- a/src/ui/src/features/datasets/detail/components/dataset-detail-content.tsx +++ b/src/ui/src/features/datasets/detail/components/dataset-detail-content.tsx @@ -176,6 +176,8 @@ export function DatasetDetailContent({ bucket, name }: Props) { const { versions, location, + resolvedName, + resolvedVersion, files: virtualFiles, memberSubPath, segmentLabels, @@ -184,6 +186,8 @@ export function DatasetDetailContent({ bucket, name }: Props) { return { versions: [], location: null as string | null, + resolvedName: name, + resolvedVersion: "", files: null as DatasetFile[] | null, memberSubPath: "", segmentLabels: {} as Record, @@ -197,6 +201,8 @@ export function DatasetDetailContent({ bucket, name }: Props) { return { versions: detail.versions, location: currentVersionData?.location ?? null, + resolvedName: name, + resolvedVersion: currentVersionData?.version ?? "", files: null, memberSubPath: path, segmentLabels: {}, @@ -221,6 +227,8 @@ export function DatasetDetailContent({ bucket, name }: Props) { return { versions: [], location: null, + resolvedName: name, + resolvedVersion: "", files: memberEntries, memberSubPath: "", segmentLabels: labels, @@ -234,11 +242,13 @@ export function DatasetDetailContent({ bucket, name }: Props) { return { versions: [], location: member?.location ?? null, + resolvedName: member?.name ?? name, + resolvedVersion: member?.version ?? "", files: null, memberSubPath: subPath, segmentLabels: labels, }; - }, [detail, version, path]); + }, [detail, version, path, name]); // ========================================================================== // File listing — fetch manifest for selected version/member @@ -249,7 +259,7 @@ export function DatasetDetailContent({ bucket, name }: Props) { isLoading: isFilesLoading, error: filesError, refetch: refetchFiles, - } = useDatasetFiles(location); + } = useDatasetFiles(bucket, resolvedName, resolvedVersion, location); // Normal (unfiltered) directory listing — used for FilterBar suggestions and as base view const normalFiles = useMemo( @@ -573,6 +583,8 @@ export function DatasetDetailContent({ bucket, name }: Props) { )} diff --git a/src/ui/src/features/datasets/detail/components/file-preview-panel.tsx b/src/ui/src/features/datasets/detail/components/file-preview-panel.tsx index 9274ecdef..90dd58e8e 100644 --- a/src/ui/src/features/datasets/detail/components/file-preview-panel.tsx +++ b/src/ui/src/features/datasets/detail/components/file-preview-panel.tsx @@ -24,7 +24,7 @@ * - image/* → via proxy * - video/* →