Skip to content
Open
Show file tree
Hide file tree
Changes from 3 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions src/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -70,6 +70,7 @@ def create_api() -> FastAPI:
app.include_router(task_router)
app.include_router(flows_router)
app.include_router(study_router)
app.include_router(run_router)
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

issue (bug_risk): The run_router is being included twice, which will cause route duplication conflicts.

It’s now included once in the new line you added and again in the existing line below, which will cause FastAPI to raise duplicate route errors for identical paths/methods. Please keep only one app.include_router(run_router) here.

app.include_router(setup_router)
app.include_router(run_router)
return app
Expand Down
134 changes: 134 additions & 0 deletions src/routers/openml/runs.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,14 @@
"""Endpoints for OpenML Run resources."""

from typing import Annotated, Any

from fastapi import APIRouter, Body, Depends
from sqlalchemy import text
from sqlalchemy.ext.asyncio import AsyncConnection

from core.errors import NoResultsError
from routers.dependencies import Pagination, expdb_connection
from routers.types import SystemString64
"""Endpoints for run-related data."""

from typing import Annotated
Expand All @@ -13,6 +24,129 @@
router = APIRouter(prefix="/run", tags=["run"])


def _add_in_filter(
filters: list[str],
params: dict[str, Any],
column: str,
param_prefix: str,
values: list[int],
) -> None:
"""Append an IN filter clause and its bind parameters to the query builder.

Builds named placeholders (:prefix_0, :prefix_1, ...) for safe binding
of multiple integer values without SQL injection risk.

Args:
filters: List of WHERE clause fragments to append to.
params: Bind parameter dict to update in-place.
column: SQL column expression (e.g. "r.rid", "a.implementation_id").
param_prefix: Prefix for named bind params (e.g. "run_id", "flow_id").
values: List of integer values to filter by.

"""
placeholders = ", ".join(f":{param_prefix}_{i}" for i in range(len(values)))
filters.append(f"{column} IN ({placeholders})")
params |= {f"{param_prefix}_{i}": v for i, v in enumerate(values)}


@router.post(path="/list", description="Provided for convenience, same as `GET` endpoint.")
@router.get(path="/list")
async def list_runs( # noqa: PLR0913
pagination: Annotated[Pagination, Body(default_factory=Pagination)],
run_id: Annotated[
list[int] | None,
Body(
description="The run(s) to include in the search. "
"If none are specified, all runs are included.",
),
] = None,
task_id: Annotated[
list[int] | None,
Body(description="Only include runs for these task id(s)."),
] = None,
flow_id: Annotated[
list[int] | None,
Body(description="Only include runs using these flow id(s)."),
] = None,
setup_id: Annotated[
list[int] | None,
Body(description="Only include runs with these setup id(s)."),
] = None,
uploader: Annotated[
list[int] | None,
Body(description="Only include runs uploaded by these user id(s)."),
] = None,
Comment on lines +48 to +70
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

⚠️ Potential issue | 🟠 Major

Explicit empty filters currently broaden the search.

The truthiness checks on Lines 95-104 treat [] the same as None, so a request like { "run_id": [] } falls through to the no-filter path and can return unrelated runs. That contradicts the documented “all provided filters” behavior; handle is not None separately and reject or short-circuit empty lists.

🔧 One possible guard
-    if run_id:
+    if run_id is not None:
+        if not run_id:
+            raise NoResultsError("No runs match the search criteria.")
         _add_in_filter(filters, params, "r.rid", "run_id", run_id)

Apply the same pattern to task_id, flow_id, setup_id, and uploader.

Also applies to: 95-104

🤖 Prompt for AI Agents
Verify each finding against the current code and only fix it if needed.

In `@src/routers/openml/runs.py` around lines 47 - 69, The current filter handling
treats empty lists like None so inputs such as run_id=[] broaden the search;
update the checks in the run filtering logic (where run_id, task_id, flow_id,
setup_id, uploader are evaluated) to explicitly distinguish None from an empty
list: if a filter is None skip it, if it is an empty list either return an empty
result (short-circuit) or raise a 400 validation error (choose the project's
convention), and apply this same pattern to task_id, flow_id, setup_id and
uploader so empty arrays do not fall through to the no-filter path.

tag: Annotated[str | None, SystemString64] = None,
expdb: Annotated[AsyncConnection, Depends(expdb_connection)] = None,
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

suggestion: The expdb dependency defaulting to None is unnecessary and slightly misleading.

Given Depends(expdb_connection), FastAPI will always provide an AsyncConnection here, so expdb can never be None. Removing the = None default (using just expdb: Annotated[AsyncConnection, Depends(expdb_connection)]) makes the type accurate and discourages unnecessary None checks downstream.

Suggested change
expdb: Annotated[AsyncConnection, Depends(expdb_connection)] = None,
expdb: Annotated[AsyncConnection, Depends(expdb_connection)],

) -> list[dict[str, Any]]:
"""List runs, optionally filtered by one or more criteria.

Filters are combinable — all provided filters are applied with AND logic.
List filters (run_id, task_id, flow_id, setup_id, uploader) accept multiple
values and are applied with IN logic within each filter.

Returns a flat list of run objects. Raises 404 if no runs match the filters.

PHP equivalent: GET /run/list/[run/{ids}][/task/{ids}][/flow/{ids}]...
Note: Unlike PHP (which requires at least one filter), this endpoint allows
an empty filter set and returns all runs paginated.
"""
filters: list[str] = []
params: dict[str, Any] = {"limit": pagination.limit, "offset": pagination.offset}

# Each list filter maps a user-facing param to a SQL column.
# flow_id maps to algorithm_setup.implementation_id (aliased as `a`).
# setup_id maps to run.setup — the FK column stored on the run row.
if run_id:
_add_in_filter(filters, params, "r.rid", "run_id", run_id)
if task_id:
_add_in_filter(filters, params, "r.task_id", "task_id", task_id)
if flow_id:
_add_in_filter(filters, params, "a.implementation_id", "flow_id", flow_id)
if setup_id:
_add_in_filter(filters, params, "r.setup", "setup_id", setup_id)
if uploader:
_add_in_filter(filters, params, "r.uploader", "uploader", uploader)

if tag is not None:
# run_tag.id is the run FK (not a surrogate PK), so we join on run.rid
filters.append("r.rid IN (SELECT id FROM run_tag WHERE tag = :tag)")
params["tag"] = tag

where_clause = f"WHERE {' AND '.join(filters)}" if filters else ""

query = text(
f"""
SELECT
r.rid AS run_id,
r.task_id AS task_id,
r.setup AS setup_id,
a.implementation_id AS flow_id,
r.uploader AS uploader,
r.start_time AS upload_time,
IFNULL(r.error_message, '') AS error_message,
IFNULL(r.run_details, '') AS run_details
FROM run r
JOIN algorithm_setup a ON r.setup = a.sid
{where_clause}
ORDER BY r.rid
LIMIT :limit OFFSET :offset
""", # noqa: S608
)

result = await expdb.execute(query, params)
rows = result.mappings().all()

if not rows:
msg = "No runs match the search criteria."
raise NoResultsError(msg)

# SQLAlchemy returns start_time as a datetime object. Format to match PHP
# response shape: "YYYY-MM-DD HH:MM:SS" (no T separator, no timezone).
# dict unpacking with a later key overrides the earlier one from **row.
return [
{**row, "upload_time": row["upload_time"].strftime("%Y-%m-%d %H:%M:%S")} for row in rows
]
@router.get("/trace/{run_id}")
async def get_run_trace(
run_id: int,
Expand Down
Loading
Loading