Skip to content
Open
Show file tree
Hide file tree
Changes from 12 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
@@ -1,3 +1,3 @@
SELECT
LTRIM(`t0`.`string_col`, ' \t\n\r\v\f') AS `LStrip(string_col)`
REGEXP_REPLACE(`t0`.`string_col`, '^\\s+', '') AS `LStrip(string_col)`
FROM `functional_alltypes` AS `t0`
Original file line number Diff line number Diff line change
@@ -1,3 +1,3 @@
SELECT
RTRIM(`t0`.`string_col`, ' \t\n\r\v\f') AS `RStrip(string_col)`
REGEXP_REPLACE(`t0`.`string_col`, '\\s+$', '') AS `RStrip(string_col)`
FROM `functional_alltypes` AS `t0`
Original file line number Diff line number Diff line change
@@ -1,3 +1,3 @@
SELECT
RTRIM(LTRIM(`t0`.`string_col`, ' \t\n\r\v\f'), ' \t\n\r\v\f') AS `Strip(string_col)`
REGEXP_REPLACE(`t0`.`string_col`, '^\\s+|\\s+$', '') AS `Strip(string_col)`
FROM `functional_alltypes` AS `t0`
11 changes: 5 additions & 6 deletions ibis/backends/polars/__init__.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
from __future__ import annotations

from collections.abc import Iterable, Mapping
from collections.abc import Mapping
from functools import lru_cache
from pathlib import Path
from typing import TYPE_CHECKING, Any, Literal
Expand Down Expand Up @@ -31,7 +31,6 @@
class Backend(SupportsTempTables, BaseBackend, NoUrl, DirectExampleLoader):
name = "polars"
dialect = Polars
supports_temporary_tables = True

def __init__(self, *args, **kwargs):
super().__init__(*args, **kwargs)
Expand Down Expand Up @@ -71,7 +70,7 @@ def do_connect(
if tables is not None and not isinstance(tables, Mapping):
raise TypeError("Input to ibis.polars.connect must be a mapping")

# tables are emphemeral
# tables are ephemeral
self._tables.clear()

for name, table in (tables or {}).items():
Expand Down Expand Up @@ -375,7 +374,7 @@ def drop_table(self, name: str, /, *, force: bool = False) -> None:
del self._tables[name]
self._context.unregister(name)
elif not force:
raise com.IbisError(f"Table {name!r} does not exist")
raise com.TableNotFound(name)

def drop_view(self, name: str, /, *, force: bool = False) -> None:
self.drop_table(name, force=force)
Expand Down Expand Up @@ -441,7 +440,7 @@ def _to_dataframe(
self,
expr: ir.Expr,
params: Mapping[ir.Expr, object] | None = None,
limit: int | None = None,
limit: int | str | None = None,
engine: Literal["cpu", "gpu", "streaming"] | pl.GPUEngine = "cpu",
**kwargs: Any,
) -> pl.DataFrame:
Expand All @@ -465,7 +464,7 @@ def execute(
expr: ir.Expr,
/,
*,
params: Mapping[ir.Expr, object] | None = None,
params: Mapping[ir.Scalar, Any] | None = None,
Copy link

Copilot AI Apr 7, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The execute() params type annotation was narrowed to Mapping[ir.Scalar, Any], but this method passes params through to _to_dataframe()/compile() which are annotated to accept Mapping[ir.Expr, ...]. Because Mapping is invariant in its key type, this can introduce static type-checking errors. Consider aligning these annotations (e.g., keep Mapping[ir.Expr, Any] everywhere, or update the internal helpers to also accept Mapping[ir.Scalar, Any]).

Suggested change
params: Mapping[ir.Scalar, Any] | None = None,
params: Mapping[ir.Expr, object] | None = None,

Copilot uses AI. Check for mistakes.
limit: int | None = None,
engine: Literal["cpu", "gpu", "streaming"] | pl.GPUEngine = "cpu",
**kwargs: Any,
Expand Down
15 changes: 5 additions & 10 deletions ibis/backends/sql/compilers/impala.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,5 @@
from __future__ import annotations

from string import whitespace as WHITESPACE

import sqlglot as sg
import sqlglot.expressions as sge

Expand Down Expand Up @@ -45,6 +43,7 @@ class ImpalaCompiler(SQLGlotCompiler):
ops.ArgMin,
ops.Covariance,
ops.ExtractDayOfYear,
ops.Kurtosis,
ops.Levenshtein,
ops.Map,
ops.Median,
Expand All @@ -59,7 +58,6 @@ class ImpalaCompiler(SQLGlotCompiler):
ops.TimestampBucket,
ops.TimestampDelta,
ops.Unnest,
ops.Kurtosis,
)

SIMPLE_OPS = {
Expand All @@ -78,8 +76,8 @@ class ImpalaCompiler(SQLGlotCompiler):
ops.ExtractEpochSeconds: "unix_timestamp",
ops.Hash: "fnv_hash",
ops.Ln: "ln",
ops.TypeOf: "typeof",
ops.RegexReplace: "regexp_replace",
ops.TypeOf: "typeof",
}

@staticmethod
Expand Down Expand Up @@ -325,16 +323,13 @@ def visit_DateDelta(self, op, *, left, right, part):
return self.f.datediff(left, right)

def visit_LStrip(self, op, *, arg):
return self.f.anon.ltrim(arg, WHITESPACE)
return self.f.regexp_replace(arg, r"^\s+", "")

def visit_RStrip(self, op, *, arg):
return self.f.anon.rtrim(arg, WHITESPACE)
return self.f.regexp_replace(arg, r"\s+$", "")

def visit_Strip(self, op, *, arg):
# Impala's `TRIM` doesn't allow specifying characters to trim off, unlike
# Impala's `RTRIM` and `LTRIM` which accept a set of characters to
# remove.
return self.f.anon.rtrim(self.f.anon.ltrim(arg, WHITESPACE), WHITESPACE)
return self.f.regexp_replace(arg, r"^\s+|\s+$", "")


compiler = ImpalaCompiler()
11 changes: 9 additions & 2 deletions ibis/backends/sql/compilers/pyspark.py
Original file line number Diff line number Diff line change
Expand Up @@ -85,8 +85,6 @@ class PySparkCompiler(SQLGlotCompiler):
ops.EndsWith: "endswith",
ops.Hash: "hash",
ops.Log10: "log10",
ops.LStrip: "ltrim",
ops.RStrip: "rtrim",
ops.MapLength: "size",
ops.MapContains: "map_contains_key",
ops.MapMerge: "map_concat",
Expand Down Expand Up @@ -684,5 +682,14 @@ def visit_ArraySum(self, op, *, arg):
def visit_ArrayMean(self, op, *, arg):
return self._array_reduction(dtype=op.dtype, arg=arg, output=operator.truediv)

def visit_LStrip(self, op, *, arg):
return self.f.regexp_replace(arg, r"^\s+", "")

def visit_RStrip(self, op, *, arg):
return self.f.regexp_replace(arg, r"\s+$", "")

def visit_Strip(self, op, *, arg):
return self.f.regexp_replace(arg, r"^\s+|\s+$", "")


compiler = PySparkCompiler()
22 changes: 5 additions & 17 deletions ibis/backends/tests/test_string.py
Original file line number Diff line number Diff line change
Expand Up @@ -1144,8 +1144,9 @@ def string_temp_table(backend, con):
"aBc",
"🐍",
"ÉéÈèêç",
"fluf\f",
],
"index_col": [0, 1, 2, 3, 4, 5, 6],
"index_col": [0, 1, 2, 3, 4, 5, 6, 7],
}
)

Expand Down Expand Up @@ -1277,7 +1278,7 @@ def string_temp_table(backend, con):
),
param(
lambda t: t.string_col.find_in_set(["aBc", "123"]),
lambda _: pd.Series([-1, -1, -1, 1, 0, -1, -1], name="tmp"),
lambda _: pd.Series([-1, -1, -1, 1, 0, -1, -1, -1], name="tmp"),
id="find_in_set",
marks=[
pytest.mark.notyet(
Expand Down Expand Up @@ -1306,7 +1307,7 @@ def string_temp_table(backend, con):
),
param(
lambda t: t.string_col.find_in_set(["abc, 123"]),
lambda _: pd.Series([-1, -1, -1, -1, -1, -1, -1], name="tmp"),
lambda _: pd.Series([-1, -1, -1, -1, -1, -1, -1, -1], name="tmp"),
id="find_in_set_w_comma",
marks=[
pytest.mark.notyet(
Expand Down Expand Up @@ -1346,25 +1347,11 @@ def string_temp_table(backend, con):
lambda t: t.string_col.lstrip(),
lambda t: t.str.lstrip(),
id="lstrip",
marks=[
pytest.mark.notyet(
["pyspark", "databricks"],
raises=AssertionError,
reason="Spark SQL LTRIM doesn't accept characters to trim",
),
],
),
param(
lambda t: t.string_col.rstrip(),
lambda t: t.str.rstrip(),
id="rstrip",
marks=[
pytest.mark.notyet(
["pyspark", "databricks"],
raises=AssertionError,
reason="Spark SQL RTRIM doesn't accept characters to trim",
),
],
),
param(
lambda t: t.string_col.strip(),
Expand Down Expand Up @@ -1423,6 +1410,7 @@ def test_string_methods_accents_and_emoji(
│ aBc │
│ 🐍 │
│ ÉéÈèêç │
│ fluf\f │
└────────────┘
"""
t = string_temp_table
Expand Down
Loading