From b78c2030877e4b007aa73f28a434b1b28361495b Mon Sep 17 00:00:00 2001 From: Deepyaman Datta Date: Sun, 5 Apr 2026 18:41:59 -0600 Subject: [PATCH 01/13] fix(pyspark): make sure `trim` does not remove f's --- ibis/backends/polars/__init__.py | 11 +++++------ ibis/backends/tests/test_string.py | 3 ++- 2 files changed, 7 insertions(+), 7 deletions(-) diff --git a/ibis/backends/polars/__init__.py b/ibis/backends/polars/__init__.py index 38ab2ba9fc25..92e7dc1d15f3 100644 --- a/ibis/backends/polars/__init__.py +++ b/ibis/backends/polars/__init__.py @@ -1,6 +1,6 @@ from __future__ import annotations -from collections.abc import Iterable, Mapping +from collections.abc import Mapping from functools import lru_cache from pathlib import Path from typing import TYPE_CHECKING, Any, Literal @@ -31,7 +31,6 @@ class Backend(SupportsTempTables, BaseBackend, NoUrl, DirectExampleLoader): name = "polars" dialect = Polars - supports_temporary_tables = True def __init__(self, *args, **kwargs): super().__init__(*args, **kwargs) @@ -71,7 +70,7 @@ def do_connect( if tables is not None and not isinstance(tables, Mapping): raise TypeError("Input to ibis.polars.connect must be a mapping") - # tables are emphemeral + # tables are ephemeral self._tables.clear() for name, table in (tables or {}).items(): @@ -375,7 +374,7 @@ def drop_table(self, name: str, /, *, force: bool = False) -> None: del self._tables[name] self._context.unregister(name) elif not force: - raise com.IbisError(f"Table {name!r} does not exist") + raise com.TableNotFound(name) def drop_view(self, name: str, /, *, force: bool = False) -> None: self.drop_table(name, force=force) @@ -441,7 +440,7 @@ def _to_dataframe( self, expr: ir.Expr, params: Mapping[ir.Expr, object] | None = None, - limit: int | None = None, + limit: int | str | None = None, engine: Literal["cpu", "gpu", "streaming"] | pl.GPUEngine = "cpu", **kwargs: Any, ) -> pl.DataFrame: @@ -465,7 +464,7 @@ def execute( expr: ir.Expr, /, *, - params: Mapping[ir.Expr, object] | None = None, + params: Mapping[ir.Scalar, Any] | None = None, limit: int | None = None, engine: Literal["cpu", "gpu", "streaming"] | pl.GPUEngine = "cpu", **kwargs: Any, diff --git a/ibis/backends/tests/test_string.py b/ibis/backends/tests/test_string.py index da33489ba703..f90f1a4f8f95 100644 --- a/ibis/backends/tests/test_string.py +++ b/ibis/backends/tests/test_string.py @@ -1144,8 +1144,9 @@ def string_temp_table(backend, con): "aBc", "🐍", "ÉéÈèêç", + "fluff", ], - "index_col": [0, 1, 2, 3, 4, 5, 6], + "index_col": [0, 1, 2, 3, 4, 5, 6, 7], } ) From 43e200bbff89b00e77c686ed94d7148c10c350a7 Mon Sep 17 00:00:00 2001 From: Deepyaman Datta Date: Sun, 5 Apr 2026 19:55:46 -0600 Subject: [PATCH 02/13] test: update the expected values for `find_in_set` --- ibis/backends/tests/test_string.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ibis/backends/tests/test_string.py b/ibis/backends/tests/test_string.py index f90f1a4f8f95..b1c6ae92c943 100644 --- a/ibis/backends/tests/test_string.py +++ b/ibis/backends/tests/test_string.py @@ -1278,7 +1278,7 @@ def string_temp_table(backend, con): ), param( lambda t: t.string_col.find_in_set(["aBc", "123"]), - lambda _: pd.Series([-1, -1, -1, 1, 0, -1, -1], name="tmp"), + lambda _: pd.Series([-1, -1, -1, 1, 0, -1, -1, -1], name="tmp"), id="find_in_set", marks=[ pytest.mark.notyet( From 6fa9b0789c12db5a68e3c2ff506ad7987525d1d1 Mon Sep 17 00:00:00 2001 From: Deepyaman Datta Date: Sun, 5 Apr 2026 22:23:16 -0600 Subject: [PATCH 03/13] fix(pyspark): make sure `trim` does not remove f's --- ibis/backends/sql/compilers/pyspark.py | 1 + 1 file changed, 1 insertion(+) diff --git a/ibis/backends/sql/compilers/pyspark.py b/ibis/backends/sql/compilers/pyspark.py index c4a4d741d659..038e1e4dbd84 100644 --- a/ibis/backends/sql/compilers/pyspark.py +++ b/ibis/backends/sql/compilers/pyspark.py @@ -85,6 +85,7 @@ class PySparkCompiler(SQLGlotCompiler): ops.EndsWith: "endswith", ops.Hash: "hash", ops.Log10: "log10", + ops.Strip: "trim", ops.LStrip: "ltrim", ops.RStrip: "rtrim", ops.MapLength: "size", From 616620eca73445750f4ded2a581b10cbc5d0868d Mon Sep 17 00:00:00 2001 From: Deepyaman Datta Date: Sun, 5 Apr 2026 22:29:57 -0600 Subject: [PATCH 04/13] fix(impala): make sure `trim` does not remove _f_s --- ibis/backends/sql/compilers/impala.py | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/ibis/backends/sql/compilers/impala.py b/ibis/backends/sql/compilers/impala.py index 244daca67c89..1645f219ad5d 100644 --- a/ibis/backends/sql/compilers/impala.py +++ b/ibis/backends/sql/compilers/impala.py @@ -45,6 +45,7 @@ class ImpalaCompiler(SQLGlotCompiler): ops.ArgMin, ops.Covariance, ops.ExtractDayOfYear, + ops.Kurtosis, ops.Levenshtein, ops.Map, ops.Median, @@ -59,7 +60,6 @@ class ImpalaCompiler(SQLGlotCompiler): ops.TimestampBucket, ops.TimestampDelta, ops.Unnest, - ops.Kurtosis, ) SIMPLE_OPS = { @@ -77,9 +77,12 @@ class ImpalaCompiler(SQLGlotCompiler): ops.DayOfWeekName: "dayname", ops.ExtractEpochSeconds: "unix_timestamp", ops.Hash: "fnv_hash", + ops.LStrip: "ltrim", ops.Ln: "ln", - ops.TypeOf: "typeof", + ops.RStrip: "rtrim", ops.RegexReplace: "regexp_replace", + ops.Strip: "trim", + ops.TypeOf: "typeof", } @staticmethod From b5ad8f618ec0f9129ceab46d9787875b48d9e16e Mon Sep 17 00:00:00 2001 From: Deepyaman Datta Date: Sun, 5 Apr 2026 22:33:08 -0600 Subject: [PATCH 05/13] test: update the expected values for `find_in_set` --- ibis/backends/tests/test_string.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ibis/backends/tests/test_string.py b/ibis/backends/tests/test_string.py index b1c6ae92c943..9b9eeb5ee552 100644 --- a/ibis/backends/tests/test_string.py +++ b/ibis/backends/tests/test_string.py @@ -1307,7 +1307,7 @@ def string_temp_table(backend, con): ), param( lambda t: t.string_col.find_in_set(["abc, 123"]), - lambda _: pd.Series([-1, -1, -1, -1, -1, -1, -1], name="tmp"), + lambda _: pd.Series([-1, -1, -1, -1, -1, -1, -1, -1], name="tmp"), id="find_in_set_w_comma", marks=[ pytest.mark.notyet( From 7b4809f865f59fa6414ea65e1f9f0379538c7702 Mon Sep 17 00:00:00 2001 From: Deepyaman Datta Date: Mon, 6 Apr 2026 19:31:29 -0600 Subject: [PATCH 06/13] fix(backends): use regexp_replace to strip strings --- ibis/backends/sql/compilers/impala.py | 12 +++--------- ibis/backends/sql/compilers/pyspark.py | 13 ++++++++++--- 2 files changed, 13 insertions(+), 12 deletions(-) diff --git a/ibis/backends/sql/compilers/impala.py b/ibis/backends/sql/compilers/impala.py index 1645f219ad5d..a6cf0705188c 100644 --- a/ibis/backends/sql/compilers/impala.py +++ b/ibis/backends/sql/compilers/impala.py @@ -77,11 +77,8 @@ class ImpalaCompiler(SQLGlotCompiler): ops.DayOfWeekName: "dayname", ops.ExtractEpochSeconds: "unix_timestamp", ops.Hash: "fnv_hash", - ops.LStrip: "ltrim", ops.Ln: "ln", - ops.RStrip: "rtrim", ops.RegexReplace: "regexp_replace", - ops.Strip: "trim", ops.TypeOf: "typeof", } @@ -328,16 +325,13 @@ def visit_DateDelta(self, op, *, left, right, part): return self.f.datediff(left, right) def visit_LStrip(self, op, *, arg): - return self.f.anon.ltrim(arg, WHITESPACE) + return self.f.regexp_replace(arg, rf"^[{WHITESPACE}]+", "") def visit_RStrip(self, op, *, arg): - return self.f.anon.rtrim(arg, WHITESPACE) + return self.f.regexp_replace(arg, rf"[{WHITESPACE}]+$", "") def visit_Strip(self, op, *, arg): - # Impala's `TRIM` doesn't allow specifying characters to trim off, unlike - # Impala's `RTRIM` and `LTRIM` which accept a set of characters to - # remove. - return self.f.anon.rtrim(self.f.anon.ltrim(arg, WHITESPACE), WHITESPACE) + return self.visit_RStrip(self.visit_LStrip(op, arg=arg), arg=arg) compiler = ImpalaCompiler() diff --git a/ibis/backends/sql/compilers/pyspark.py b/ibis/backends/sql/compilers/pyspark.py index 038e1e4dbd84..1d9e0e9319b4 100644 --- a/ibis/backends/sql/compilers/pyspark.py +++ b/ibis/backends/sql/compilers/pyspark.py @@ -4,6 +4,7 @@ import itertools import operator import re +from string import whitespace as WHITESPACE import sqlglot as sg import sqlglot.expressions as sge @@ -85,9 +86,6 @@ class PySparkCompiler(SQLGlotCompiler): ops.EndsWith: "endswith", ops.Hash: "hash", ops.Log10: "log10", - ops.Strip: "trim", - ops.LStrip: "ltrim", - ops.RStrip: "rtrim", ops.MapLength: "size", ops.MapContains: "map_contains_key", ops.MapMerge: "map_concat", @@ -685,5 +683,14 @@ def visit_ArraySum(self, op, *, arg): def visit_ArrayMean(self, op, *, arg): return self._array_reduction(dtype=op.dtype, arg=arg, output=operator.truediv) + def visit_LStrip(self, op, *, arg): + return self.f.regexp_replace(arg, rf"^[{WHITESPACE}]+", "") + + def visit_RStrip(self, op, *, arg): + return self.f.regexp_replace(arg, rf"[{WHITESPACE}]+$", "") + + def visit_Strip(self, op, *, arg): + return self.visit_RStrip(self.visit_LStrip(op, arg=arg), arg=arg) + compiler = PySparkCompiler() From 1cd23c41446c1fabed119793a74d0f1a0ec7d9b2 Mon Sep 17 00:00:00 2001 From: Deepyaman Datta Date: Mon, 6 Apr 2026 22:26:01 -0600 Subject: [PATCH 07/13] fix(impala/pyspark): use `regexp_replace` to strip --- ibis/backends/sql/compilers/impala.py | 8 +++----- ibis/backends/sql/compilers/pyspark.py | 7 +++---- ibis/backends/tests/test_string.py | 2 +- 3 files changed, 7 insertions(+), 10 deletions(-) diff --git a/ibis/backends/sql/compilers/impala.py b/ibis/backends/sql/compilers/impala.py index a6cf0705188c..f0381c9fac20 100644 --- a/ibis/backends/sql/compilers/impala.py +++ b/ibis/backends/sql/compilers/impala.py @@ -1,7 +1,5 @@ from __future__ import annotations -from string import whitespace as WHITESPACE - import sqlglot as sg import sqlglot.expressions as sge @@ -325,13 +323,13 @@ def visit_DateDelta(self, op, *, left, right, part): return self.f.datediff(left, right) def visit_LStrip(self, op, *, arg): - return self.f.regexp_replace(arg, rf"^[{WHITESPACE}]+", "") + return self.f.regexp_replace(arg, r"^\s+", "") def visit_RStrip(self, op, *, arg): - return self.f.regexp_replace(arg, rf"[{WHITESPACE}]+$", "") + return self.f.regexp_replace(arg, r"\s+$", "") def visit_Strip(self, op, *, arg): - return self.visit_RStrip(self.visit_LStrip(op, arg=arg), arg=arg) + return self.f.regexp_replace(arg, r"^\s+|\s+$", "") compiler = ImpalaCompiler() diff --git a/ibis/backends/sql/compilers/pyspark.py b/ibis/backends/sql/compilers/pyspark.py index 1d9e0e9319b4..b85267be2b8b 100644 --- a/ibis/backends/sql/compilers/pyspark.py +++ b/ibis/backends/sql/compilers/pyspark.py @@ -4,7 +4,6 @@ import itertools import operator import re -from string import whitespace as WHITESPACE import sqlglot as sg import sqlglot.expressions as sge @@ -684,13 +683,13 @@ def visit_ArrayMean(self, op, *, arg): return self._array_reduction(dtype=op.dtype, arg=arg, output=operator.truediv) def visit_LStrip(self, op, *, arg): - return self.f.regexp_replace(arg, rf"^[{WHITESPACE}]+", "") + return self.f.regexp_replace(arg, r"^\s+", "") def visit_RStrip(self, op, *, arg): - return self.f.regexp_replace(arg, rf"[{WHITESPACE}]+$", "") + return self.f.regexp_replace(arg, r"\s+$", "") def visit_Strip(self, op, *, arg): - return self.visit_RStrip(self.visit_LStrip(op, arg=arg), arg=arg) + return self.f.regexp_replace(arg, r"^\s+|\s+$", "") compiler = PySparkCompiler() diff --git a/ibis/backends/tests/test_string.py b/ibis/backends/tests/test_string.py index 9b9eeb5ee552..b5d92d8635be 100644 --- a/ibis/backends/tests/test_string.py +++ b/ibis/backends/tests/test_string.py @@ -1144,7 +1144,7 @@ def string_temp_table(backend, con): "aBc", "🐍", "ÉéÈèêç", - "fluff", + "fluf\f", ], "index_col": [0, 1, 2, 3, 4, 5, 6, 7], } From a65ad04eac63acc6c43eed40c4e36a3d7e2afe35 Mon Sep 17 00:00:00 2001 From: Deepyaman Datta Date: Mon, 6 Apr 2026 22:55:15 -0600 Subject: [PATCH 08/13] Revert "fix(impala/pyspark): use `regexp_replace` to strip" This reverts commit 1cd23c41446c1fabed119793a74d0f1a0ec7d9b2. --- ibis/backends/sql/compilers/impala.py | 8 +++++--- ibis/backends/sql/compilers/pyspark.py | 7 ++++--- ibis/backends/tests/test_string.py | 2 +- 3 files changed, 10 insertions(+), 7 deletions(-) diff --git a/ibis/backends/sql/compilers/impala.py b/ibis/backends/sql/compilers/impala.py index f0381c9fac20..a6cf0705188c 100644 --- a/ibis/backends/sql/compilers/impala.py +++ b/ibis/backends/sql/compilers/impala.py @@ -1,5 +1,7 @@ from __future__ import annotations +from string import whitespace as WHITESPACE + import sqlglot as sg import sqlglot.expressions as sge @@ -323,13 +325,13 @@ def visit_DateDelta(self, op, *, left, right, part): return self.f.datediff(left, right) def visit_LStrip(self, op, *, arg): - return self.f.regexp_replace(arg, r"^\s+", "") + return self.f.regexp_replace(arg, rf"^[{WHITESPACE}]+", "") def visit_RStrip(self, op, *, arg): - return self.f.regexp_replace(arg, r"\s+$", "") + return self.f.regexp_replace(arg, rf"[{WHITESPACE}]+$", "") def visit_Strip(self, op, *, arg): - return self.f.regexp_replace(arg, r"^\s+|\s+$", "") + return self.visit_RStrip(self.visit_LStrip(op, arg=arg), arg=arg) compiler = ImpalaCompiler() diff --git a/ibis/backends/sql/compilers/pyspark.py b/ibis/backends/sql/compilers/pyspark.py index b85267be2b8b..1d9e0e9319b4 100644 --- a/ibis/backends/sql/compilers/pyspark.py +++ b/ibis/backends/sql/compilers/pyspark.py @@ -4,6 +4,7 @@ import itertools import operator import re +from string import whitespace as WHITESPACE import sqlglot as sg import sqlglot.expressions as sge @@ -683,13 +684,13 @@ def visit_ArrayMean(self, op, *, arg): return self._array_reduction(dtype=op.dtype, arg=arg, output=operator.truediv) def visit_LStrip(self, op, *, arg): - return self.f.regexp_replace(arg, r"^\s+", "") + return self.f.regexp_replace(arg, rf"^[{WHITESPACE}]+", "") def visit_RStrip(self, op, *, arg): - return self.f.regexp_replace(arg, r"\s+$", "") + return self.f.regexp_replace(arg, rf"[{WHITESPACE}]+$", "") def visit_Strip(self, op, *, arg): - return self.f.regexp_replace(arg, r"^\s+|\s+$", "") + return self.visit_RStrip(self.visit_LStrip(op, arg=arg), arg=arg) compiler = PySparkCompiler() diff --git a/ibis/backends/tests/test_string.py b/ibis/backends/tests/test_string.py index b5d92d8635be..9b9eeb5ee552 100644 --- a/ibis/backends/tests/test_string.py +++ b/ibis/backends/tests/test_string.py @@ -1144,7 +1144,7 @@ def string_temp_table(backend, con): "aBc", "🐍", "ÉéÈèêç", - "fluf\f", + "fluff", ], "index_col": [0, 1, 2, 3, 4, 5, 6, 7], } From c689adc92c839e6301feeef8cf0c6e7ca1af1658 Mon Sep 17 00:00:00 2001 From: Deepyaman Datta Date: Mon, 6 Apr 2026 22:56:45 -0600 Subject: [PATCH 09/13] Revert "fix(backends): use regexp_replace to strip strings" This reverts commit 7b4809f865f59fa6414ea65e1f9f0379538c7702. --- ibis/backends/sql/compilers/impala.py | 12 +++++++++--- ibis/backends/sql/compilers/pyspark.py | 13 +++---------- 2 files changed, 12 insertions(+), 13 deletions(-) diff --git a/ibis/backends/sql/compilers/impala.py b/ibis/backends/sql/compilers/impala.py index a6cf0705188c..1645f219ad5d 100644 --- a/ibis/backends/sql/compilers/impala.py +++ b/ibis/backends/sql/compilers/impala.py @@ -77,8 +77,11 @@ class ImpalaCompiler(SQLGlotCompiler): ops.DayOfWeekName: "dayname", ops.ExtractEpochSeconds: "unix_timestamp", ops.Hash: "fnv_hash", + ops.LStrip: "ltrim", ops.Ln: "ln", + ops.RStrip: "rtrim", ops.RegexReplace: "regexp_replace", + ops.Strip: "trim", ops.TypeOf: "typeof", } @@ -325,13 +328,16 @@ def visit_DateDelta(self, op, *, left, right, part): return self.f.datediff(left, right) def visit_LStrip(self, op, *, arg): - return self.f.regexp_replace(arg, rf"^[{WHITESPACE}]+", "") + return self.f.anon.ltrim(arg, WHITESPACE) def visit_RStrip(self, op, *, arg): - return self.f.regexp_replace(arg, rf"[{WHITESPACE}]+$", "") + return self.f.anon.rtrim(arg, WHITESPACE) def visit_Strip(self, op, *, arg): - return self.visit_RStrip(self.visit_LStrip(op, arg=arg), arg=arg) + # Impala's `TRIM` doesn't allow specifying characters to trim off, unlike + # Impala's `RTRIM` and `LTRIM` which accept a set of characters to + # remove. + return self.f.anon.rtrim(self.f.anon.ltrim(arg, WHITESPACE), WHITESPACE) compiler = ImpalaCompiler() diff --git a/ibis/backends/sql/compilers/pyspark.py b/ibis/backends/sql/compilers/pyspark.py index 1d9e0e9319b4..038e1e4dbd84 100644 --- a/ibis/backends/sql/compilers/pyspark.py +++ b/ibis/backends/sql/compilers/pyspark.py @@ -4,7 +4,6 @@ import itertools import operator import re -from string import whitespace as WHITESPACE import sqlglot as sg import sqlglot.expressions as sge @@ -86,6 +85,9 @@ class PySparkCompiler(SQLGlotCompiler): ops.EndsWith: "endswith", ops.Hash: "hash", ops.Log10: "log10", + ops.Strip: "trim", + ops.LStrip: "ltrim", + ops.RStrip: "rtrim", ops.MapLength: "size", ops.MapContains: "map_contains_key", ops.MapMerge: "map_concat", @@ -683,14 +685,5 @@ def visit_ArraySum(self, op, *, arg): def visit_ArrayMean(self, op, *, arg): return self._array_reduction(dtype=op.dtype, arg=arg, output=operator.truediv) - def visit_LStrip(self, op, *, arg): - return self.f.regexp_replace(arg, rf"^[{WHITESPACE}]+", "") - - def visit_RStrip(self, op, *, arg): - return self.f.regexp_replace(arg, rf"[{WHITESPACE}]+$", "") - - def visit_Strip(self, op, *, arg): - return self.visit_RStrip(self.visit_LStrip(op, arg=arg), arg=arg) - compiler = PySparkCompiler() From f6d7c35198663c08fc802d3fc9dcf5888ac4aa42 Mon Sep 17 00:00:00 2001 From: Deepyaman Datta Date: Mon, 6 Apr 2026 23:06:27 -0600 Subject: [PATCH 10/13] fix(impala/pyspark): try with repr of `WHITESPACE` --- ibis/backends/sql/compilers/impala.py | 11 +++++------ ibis/backends/sql/compilers/pyspark.py | 18 +++++++++++++++--- ibis/backends/tests/test_string.py | 9 +++++---- 3 files changed, 25 insertions(+), 13 deletions(-) diff --git a/ibis/backends/sql/compilers/impala.py b/ibis/backends/sql/compilers/impala.py index 1645f219ad5d..ca4baf85d2f4 100644 --- a/ibis/backends/sql/compilers/impala.py +++ b/ibis/backends/sql/compilers/impala.py @@ -77,11 +77,8 @@ class ImpalaCompiler(SQLGlotCompiler): ops.DayOfWeekName: "dayname", ops.ExtractEpochSeconds: "unix_timestamp", ops.Hash: "fnv_hash", - ops.LStrip: "ltrim", ops.Ln: "ln", - ops.RStrip: "rtrim", ops.RegexReplace: "regexp_replace", - ops.Strip: "trim", ops.TypeOf: "typeof", } @@ -328,16 +325,18 @@ def visit_DateDelta(self, op, *, left, right, part): return self.f.datediff(left, right) def visit_LStrip(self, op, *, arg): - return self.f.anon.ltrim(arg, WHITESPACE) + return self.f.anon.ltrim(arg, repr(WHITESPACE)) def visit_RStrip(self, op, *, arg): - return self.f.anon.rtrim(arg, WHITESPACE) + return self.f.anon.rtrim(arg, repr(WHITESPACE)) def visit_Strip(self, op, *, arg): # Impala's `TRIM` doesn't allow specifying characters to trim off, unlike # Impala's `RTRIM` and `LTRIM` which accept a set of characters to # remove. - return self.f.anon.rtrim(self.f.anon.ltrim(arg, WHITESPACE), WHITESPACE) + return self.f.anon.rtrim( + self.f.anon.ltrim(arg, repr(WHITESPACE)), repr(WHITESPACE) + ) compiler = ImpalaCompiler() diff --git a/ibis/backends/sql/compilers/pyspark.py b/ibis/backends/sql/compilers/pyspark.py index 038e1e4dbd84..e810a8a5d162 100644 --- a/ibis/backends/sql/compilers/pyspark.py +++ b/ibis/backends/sql/compilers/pyspark.py @@ -4,6 +4,7 @@ import itertools import operator import re +from string import whitespace as WHITESPACE import sqlglot as sg import sqlglot.expressions as sge @@ -85,9 +86,6 @@ class PySparkCompiler(SQLGlotCompiler): ops.EndsWith: "endswith", ops.Hash: "hash", ops.Log10: "log10", - ops.Strip: "trim", - ops.LStrip: "ltrim", - ops.RStrip: "rtrim", ops.MapLength: "size", ops.MapContains: "map_contains_key", ops.MapMerge: "map_concat", @@ -685,5 +683,19 @@ def visit_ArraySum(self, op, *, arg): def visit_ArrayMean(self, op, *, arg): return self._array_reduction(dtype=op.dtype, arg=arg, output=operator.truediv) + def visit_LStrip(self, op, *, arg): + return self.f.anon.ltrim(arg, repr(WHITESPACE)) + + def visit_RStrip(self, op, *, arg): + return self.f.anon.rtrim(arg, repr(WHITESPACE)) + + def visit_Strip(self, op, *, arg): + # PySpark's `TRIM` didn't allow specifying characters to trim off, unlike + # PySpark's `RTRIM` and `LTRIM` which accept a set of characters to + # remove. + return self.f.anon.rtrim( + self.f.anon.ltrim(arg, repr(WHITESPACE)), repr(WHITESPACE) + ) + compiler = PySparkCompiler() diff --git a/ibis/backends/tests/test_string.py b/ibis/backends/tests/test_string.py index 9b9eeb5ee552..e1a3972b6b2b 100644 --- a/ibis/backends/tests/test_string.py +++ b/ibis/backends/tests/test_string.py @@ -1144,9 +1144,10 @@ def string_temp_table(backend, con): "aBc", "🐍", "ÉéÈèêç", - "fluff", + "fluf\f", + "'fluf\f'", ], - "index_col": [0, 1, 2, 3, 4, 5, 6, 7], + "index_col": [0, 1, 2, 3, 4, 5, 6, 7, 8], } ) @@ -1278,7 +1279,7 @@ def string_temp_table(backend, con): ), param( lambda t: t.string_col.find_in_set(["aBc", "123"]), - lambda _: pd.Series([-1, -1, -1, 1, 0, -1, -1, -1], name="tmp"), + lambda _: pd.Series([-1, -1, -1, 1, 0, -1, -1, -1, -1], name="tmp"), id="find_in_set", marks=[ pytest.mark.notyet( @@ -1307,7 +1308,7 @@ def string_temp_table(backend, con): ), param( lambda t: t.string_col.find_in_set(["abc, 123"]), - lambda _: pd.Series([-1, -1, -1, -1, -1, -1, -1, -1], name="tmp"), + lambda _: pd.Series([-1, -1, -1, -1, -1, -1, -1, -1, -1], name="tmp"), id="find_in_set_w_comma", marks=[ pytest.mark.notyet( From fea92d54814176e7b006158524d067cc5bc290df Mon Sep 17 00:00:00 2001 From: Deepyaman Datta Date: Tue, 7 Apr 2026 08:27:13 -0600 Subject: [PATCH 11/13] fix(impala/pyspark): escape regex passed to `trim` --- ibis/backends/sql/compilers/impala.py | 12 ++++++------ ibis/backends/sql/compilers/pyspark.py | 15 ++++++--------- 2 files changed, 12 insertions(+), 15 deletions(-) diff --git a/ibis/backends/sql/compilers/impala.py b/ibis/backends/sql/compilers/impala.py index ca4baf85d2f4..7b120021e5f7 100644 --- a/ibis/backends/sql/compilers/impala.py +++ b/ibis/backends/sql/compilers/impala.py @@ -1,6 +1,6 @@ from __future__ import annotations -from string import whitespace as WHITESPACE +from string import whitespace import sqlglot as sg import sqlglot.expressions as sge @@ -20,6 +20,8 @@ split_select_distinct_with_order_by, ) +WHITESPACE = whitespace.encode("unicode-escape").decode() + class ImpalaCompiler(SQLGlotCompiler): __slots__ = () @@ -325,18 +327,16 @@ def visit_DateDelta(self, op, *, left, right, part): return self.f.datediff(left, right) def visit_LStrip(self, op, *, arg): - return self.f.anon.ltrim(arg, repr(WHITESPACE)) + return self.f.anon.ltrim(arg, WHITESPACE) def visit_RStrip(self, op, *, arg): - return self.f.anon.rtrim(arg, repr(WHITESPACE)) + return self.f.anon.rtrim(arg, WHITESPACE) def visit_Strip(self, op, *, arg): # Impala's `TRIM` doesn't allow specifying characters to trim off, unlike # Impala's `RTRIM` and `LTRIM` which accept a set of characters to # remove. - return self.f.anon.rtrim( - self.f.anon.ltrim(arg, repr(WHITESPACE)), repr(WHITESPACE) - ) + return self.f.anon.rtrim(self.f.anon.ltrim(arg, WHITESPACE), WHITESPACE) compiler = ImpalaCompiler() diff --git a/ibis/backends/sql/compilers/pyspark.py b/ibis/backends/sql/compilers/pyspark.py index e810a8a5d162..6ebe223fe707 100644 --- a/ibis/backends/sql/compilers/pyspark.py +++ b/ibis/backends/sql/compilers/pyspark.py @@ -4,7 +4,7 @@ import itertools import operator import re -from string import whitespace as WHITESPACE +from string import whitespace import sqlglot as sg import sqlglot.expressions as sge @@ -28,6 +28,8 @@ from ibis.expr.operations.udf import InputType from ibis.util import gen_name +WHITESPACE = whitespace.encode("unicode-escape").decode() + @replace(p.Limit) def offset_to_filter(_): @@ -684,18 +686,13 @@ def visit_ArrayMean(self, op, *, arg): return self._array_reduction(dtype=op.dtype, arg=arg, output=operator.truediv) def visit_LStrip(self, op, *, arg): - return self.f.anon.ltrim(arg, repr(WHITESPACE)) + return self.f.anon.ltrim(arg, WHITESPACE) def visit_RStrip(self, op, *, arg): - return self.f.anon.rtrim(arg, repr(WHITESPACE)) + return self.f.anon.rtrim(arg, WHITESPACE) def visit_Strip(self, op, *, arg): - # PySpark's `TRIM` didn't allow specifying characters to trim off, unlike - # PySpark's `RTRIM` and `LTRIM` which accept a set of characters to - # remove. - return self.f.anon.rtrim( - self.f.anon.ltrim(arg, repr(WHITESPACE)), repr(WHITESPACE) - ) + return self.f.anon.trim(arg, WHITESPACE) compiler = PySparkCompiler() From 11f7e8a8a933dcf119c6bc1111c6d2d14a19a641 Mon Sep 17 00:00:00 2001 From: Deepyaman Datta Date: Tue, 7 Apr 2026 09:05:16 -0600 Subject: [PATCH 12/13] fix(impala/pyspark): use `regexp_replace` to strip --- .../test_string_builtins/lstrip/out.sql | 2 +- .../test_string_builtins/rstrip/out.sql | 2 +- .../test_string_builtins/strip/out.sql | 2 +- ibis/backends/sql/compilers/impala.py | 13 +++-------- ibis/backends/sql/compilers/pyspark.py | 9 +++----- ibis/backends/tests/test_string.py | 22 ++++--------------- 6 files changed, 13 insertions(+), 37 deletions(-) diff --git a/ibis/backends/impala/tests/snapshots/test_string_builtins/test_string_builtins/lstrip/out.sql b/ibis/backends/impala/tests/snapshots/test_string_builtins/test_string_builtins/lstrip/out.sql index 361d6f5fb2e9..411a49821957 100644 --- a/ibis/backends/impala/tests/snapshots/test_string_builtins/test_string_builtins/lstrip/out.sql +++ b/ibis/backends/impala/tests/snapshots/test_string_builtins/test_string_builtins/lstrip/out.sql @@ -1,3 +1,3 @@ SELECT - LTRIM(`t0`.`string_col`, ' \t\n\r\v\f') AS `LStrip(string_col)` + REGEXP_REPLACE(`t0`.`string_col`, '^\\s+', '') AS `LStrip(string_col)` FROM `functional_alltypes` AS `t0` \ No newline at end of file diff --git a/ibis/backends/impala/tests/snapshots/test_string_builtins/test_string_builtins/rstrip/out.sql b/ibis/backends/impala/tests/snapshots/test_string_builtins/test_string_builtins/rstrip/out.sql index 4506448e7e27..93ddcb55c9ae 100644 --- a/ibis/backends/impala/tests/snapshots/test_string_builtins/test_string_builtins/rstrip/out.sql +++ b/ibis/backends/impala/tests/snapshots/test_string_builtins/test_string_builtins/rstrip/out.sql @@ -1,3 +1,3 @@ SELECT - RTRIM(`t0`.`string_col`, ' \t\n\r\v\f') AS `RStrip(string_col)` + REGEXP_REPLACE(`t0`.`string_col`, '\\s+$', '') AS `RStrip(string_col)` FROM `functional_alltypes` AS `t0` \ No newline at end of file diff --git a/ibis/backends/impala/tests/snapshots/test_string_builtins/test_string_builtins/strip/out.sql b/ibis/backends/impala/tests/snapshots/test_string_builtins/test_string_builtins/strip/out.sql index bb26534ade86..64c67f6a524e 100644 --- a/ibis/backends/impala/tests/snapshots/test_string_builtins/test_string_builtins/strip/out.sql +++ b/ibis/backends/impala/tests/snapshots/test_string_builtins/test_string_builtins/strip/out.sql @@ -1,3 +1,3 @@ SELECT - RTRIM(LTRIM(`t0`.`string_col`, ' \t\n\r\v\f'), ' \t\n\r\v\f') AS `Strip(string_col)` + REGEXP_REPLACE(`t0`.`string_col`, '^\\s+|\\s+$', '') AS `Strip(string_col)` FROM `functional_alltypes` AS `t0` \ No newline at end of file diff --git a/ibis/backends/sql/compilers/impala.py b/ibis/backends/sql/compilers/impala.py index 7b120021e5f7..f0381c9fac20 100644 --- a/ibis/backends/sql/compilers/impala.py +++ b/ibis/backends/sql/compilers/impala.py @@ -1,7 +1,5 @@ from __future__ import annotations -from string import whitespace - import sqlglot as sg import sqlglot.expressions as sge @@ -20,8 +18,6 @@ split_select_distinct_with_order_by, ) -WHITESPACE = whitespace.encode("unicode-escape").decode() - class ImpalaCompiler(SQLGlotCompiler): __slots__ = () @@ -327,16 +323,13 @@ def visit_DateDelta(self, op, *, left, right, part): return self.f.datediff(left, right) def visit_LStrip(self, op, *, arg): - return self.f.anon.ltrim(arg, WHITESPACE) + return self.f.regexp_replace(arg, r"^\s+", "") def visit_RStrip(self, op, *, arg): - return self.f.anon.rtrim(arg, WHITESPACE) + return self.f.regexp_replace(arg, r"\s+$", "") def visit_Strip(self, op, *, arg): - # Impala's `TRIM` doesn't allow specifying characters to trim off, unlike - # Impala's `RTRIM` and `LTRIM` which accept a set of characters to - # remove. - return self.f.anon.rtrim(self.f.anon.ltrim(arg, WHITESPACE), WHITESPACE) + return self.f.regexp_replace(arg, r"^\s+|\s+$", "") compiler = ImpalaCompiler() diff --git a/ibis/backends/sql/compilers/pyspark.py b/ibis/backends/sql/compilers/pyspark.py index 6ebe223fe707..b85267be2b8b 100644 --- a/ibis/backends/sql/compilers/pyspark.py +++ b/ibis/backends/sql/compilers/pyspark.py @@ -4,7 +4,6 @@ import itertools import operator import re -from string import whitespace import sqlglot as sg import sqlglot.expressions as sge @@ -28,8 +27,6 @@ from ibis.expr.operations.udf import InputType from ibis.util import gen_name -WHITESPACE = whitespace.encode("unicode-escape").decode() - @replace(p.Limit) def offset_to_filter(_): @@ -686,13 +683,13 @@ def visit_ArrayMean(self, op, *, arg): return self._array_reduction(dtype=op.dtype, arg=arg, output=operator.truediv) def visit_LStrip(self, op, *, arg): - return self.f.anon.ltrim(arg, WHITESPACE) + return self.f.regexp_replace(arg, r"^\s+", "") def visit_RStrip(self, op, *, arg): - return self.f.anon.rtrim(arg, WHITESPACE) + return self.f.regexp_replace(arg, r"\s+$", "") def visit_Strip(self, op, *, arg): - return self.f.anon.trim(arg, WHITESPACE) + return self.f.regexp_replace(arg, r"^\s+|\s+$", "") compiler = PySparkCompiler() diff --git a/ibis/backends/tests/test_string.py b/ibis/backends/tests/test_string.py index e1a3972b6b2b..1346b404316f 100644 --- a/ibis/backends/tests/test_string.py +++ b/ibis/backends/tests/test_string.py @@ -1145,9 +1145,8 @@ def string_temp_table(backend, con): "🐍", "ÉéÈèêç", "fluf\f", - "'fluf\f'", ], - "index_col": [0, 1, 2, 3, 4, 5, 6, 7, 8], + "index_col": [0, 1, 2, 3, 4, 5, 6, 7], } ) @@ -1279,7 +1278,7 @@ def string_temp_table(backend, con): ), param( lambda t: t.string_col.find_in_set(["aBc", "123"]), - lambda _: pd.Series([-1, -1, -1, 1, 0, -1, -1, -1, -1], name="tmp"), + lambda _: pd.Series([-1, -1, -1, 1, 0, -1, -1, -1], name="tmp"), id="find_in_set", marks=[ pytest.mark.notyet( @@ -1308,7 +1307,7 @@ def string_temp_table(backend, con): ), param( lambda t: t.string_col.find_in_set(["abc, 123"]), - lambda _: pd.Series([-1, -1, -1, -1, -1, -1, -1, -1, -1], name="tmp"), + lambda _: pd.Series([-1, -1, -1, -1, -1, -1, -1, -1], name="tmp"), id="find_in_set_w_comma", marks=[ pytest.mark.notyet( @@ -1348,25 +1347,11 @@ def string_temp_table(backend, con): lambda t: t.string_col.lstrip(), lambda t: t.str.lstrip(), id="lstrip", - marks=[ - pytest.mark.notyet( - ["pyspark", "databricks"], - raises=AssertionError, - reason="Spark SQL LTRIM doesn't accept characters to trim", - ), - ], ), param( lambda t: t.string_col.rstrip(), lambda t: t.str.rstrip(), id="rstrip", - marks=[ - pytest.mark.notyet( - ["pyspark", "databricks"], - raises=AssertionError, - reason="Spark SQL RTRIM doesn't accept characters to trim", - ), - ], ), param( lambda t: t.string_col.strip(), @@ -1425,6 +1410,7 @@ def test_string_methods_accents_and_emoji( │ aBc │ │ 🐍 │ │ ÉéÈèêç │ + │ fluf\f │ └────────────┘ """ t = string_temp_table From 5e34b811806fd2fd4b1d357972cd4ed6761ae43e Mon Sep 17 00:00:00 2001 From: Deepyaman Datta Date: Tue, 7 Apr 2026 09:45:37 -0600 Subject: [PATCH 13/13] revert(polars): undo incidental changes to backend --- ibis/backends/polars/__init__.py | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/ibis/backends/polars/__init__.py b/ibis/backends/polars/__init__.py index 92e7dc1d15f3..38ab2ba9fc25 100644 --- a/ibis/backends/polars/__init__.py +++ b/ibis/backends/polars/__init__.py @@ -1,6 +1,6 @@ from __future__ import annotations -from collections.abc import Mapping +from collections.abc import Iterable, Mapping from functools import lru_cache from pathlib import Path from typing import TYPE_CHECKING, Any, Literal @@ -31,6 +31,7 @@ class Backend(SupportsTempTables, BaseBackend, NoUrl, DirectExampleLoader): name = "polars" dialect = Polars + supports_temporary_tables = True def __init__(self, *args, **kwargs): super().__init__(*args, **kwargs) @@ -70,7 +71,7 @@ def do_connect( if tables is not None and not isinstance(tables, Mapping): raise TypeError("Input to ibis.polars.connect must be a mapping") - # tables are ephemeral + # tables are emphemeral self._tables.clear() for name, table in (tables or {}).items(): @@ -374,7 +375,7 @@ def drop_table(self, name: str, /, *, force: bool = False) -> None: del self._tables[name] self._context.unregister(name) elif not force: - raise com.TableNotFound(name) + raise com.IbisError(f"Table {name!r} does not exist") def drop_view(self, name: str, /, *, force: bool = False) -> None: self.drop_table(name, force=force) @@ -440,7 +441,7 @@ def _to_dataframe( self, expr: ir.Expr, params: Mapping[ir.Expr, object] | None = None, - limit: int | str | None = None, + limit: int | None = None, engine: Literal["cpu", "gpu", "streaming"] | pl.GPUEngine = "cpu", **kwargs: Any, ) -> pl.DataFrame: @@ -464,7 +465,7 @@ def execute( expr: ir.Expr, /, *, - params: Mapping[ir.Scalar, Any] | None = None, + params: Mapping[ir.Expr, object] | None = None, limit: int | None = None, engine: Literal["cpu", "gpu", "streaming"] | pl.GPUEngine = "cpu", **kwargs: Any,