Checks
Reproducible example
d = {"initial_data_date": "2025-03-07T08:00:00", "other": [1,2,3]}
pl.LazyFrame(d, schema_overrides={"initial_data_date": pl.Datetime(time_zone="UTC")}, strict=False)
Log output
In [28]: pl.LazyFrame(d, schema_overrides={"initial_data_date": pl.Datetime(time_zone="UTC")}, strict=False)
---------------------------------------------------------------------------
InvalidOperationError Traceback (most recent call last)
Cell In[28], line 1
----> 1 pl.LazyFrame(d, schema_overrides={"initial_data_date": pl.Datetime(time_zone="UTC")}, strict=False)
File ~/work/kroo-tran/.venv/lib/python3.14/site-packages/polars/lazyframe/frame.py:430, in LazyFrame.__init__(self, data, schema, schema_overrides, strict, orient, infer_schema_length, nan_to_null, height)
415 def __init__(
416 self,
417 data: FrameInitTypes | None = None,
(...) 425 height: int | None = None,
426 ) -> None:
427 from polars.dataframe import DataFrame
429 self._ldf = (
--> 430 DataFrame(
431 data=data,
432 schema=schema,
433 schema_overrides=schema_overrides,
434 strict=strict,
435 orient=orient,
436 infer_schema_length=infer_schema_length,
437 nan_to_null=nan_to_null,
438 height=height,
439 )
440 .lazy()
441 ._ldf
442 )
File ~/work/kroo-tran/.venv/lib/python3.14/site-packages/polars/dataframe/frame.py:400, in DataFrame.__init__(self, data, schema, schema_overrides, strict, orient, infer_schema_length, nan_to_null, height)
396 if height is not None and self.width == 0:
397 self._df = PyDataFrame.empty_with_height(height)
398
399 elif isinstance(data, dict):
--> 400 self._df = dict_to_pydf(
401 data,
402 schema=schema,
403 schema_overrides=schema_overrides,
File ~/work/kroo-tran/.venv/lib/python3.14/site-packages/polars/_utils/construction/dataframe.py:161, in dict_to_pydf(data, schema, schema_overrides, strict, nan_to_null, allow_multithreaded)
148 data_series = [
149 pl.Series(
150 name,
(...) 156 for name in column_names
157 ]
158 else:
159 data_series = [
160 s._s
--> 161 for s in _expand_dict_values(
162 data,
163 schema_overrides=schema_overrides,
164 strict=strict,
165 nan_to_null=nan_to_null,
166 ).values()
167 ]
169 data_series = _handle_columns_arg(data_series, columns=column_names, from_dict=True)
170 pydf = PyDataFrame(data_series)
File ~/work/kroo-tran/.venv/lib/python3.14/site-packages/polars/_utils/construction/dataframe.py:404, in _expand_dict_values(data, schema_overrides, strict, order, nan_to_null)
394 updated_data[name] = pl.Series(
395 name=name,
396 values=val,
(...) 399 nan_to_null=nan_to_null,
400 )
401 elif val is None or isinstance( # type: ignore[redundant-expr]
402 val, (int, float, str, bool, date, datetime, time, timedelta)
403 ):
--> 404 updated_data[name] = F.repeat(
405 val, array_len, dtype=dtype, eager=True
406 ).alias(name)
407 else:
408 updated_data[name] = pl.Series(
409 name=name, values=[val] * array_len, dtype=dtype, strict=strict
410 )
File ~/work/kroo-tran/.venv/lib/python3.14/site-packages/polars/functions/repeat.py:145, in repeat(value, n, dtype, eager)
143 msg = f"`n` parameter of `repeat expected a `int` or `Expr` got a `{qualified_type_name(n)}`"
144 raise TypeError(msg)
--> 145 value_pyexpr = parse_into_expression(value, str_as_lit=True, dtype=dtype)
146 expr = wrap_expr(plr.repeat(value_pyexpr, n._pyexpr, dtype))
147 if eager:
File ~/work/kroo-tran/.venv/lib/python3.14/site-packages/polars/_utils/parse/expr.py:68, in parse_into_expression(input, str_as_lit, list_as_series, structify, dtype, require_selector)
66 expr = F.lit(pl.Series(input), dtype=dtype)
67 else:
---> 68 expr = F.lit(input, dtype=dtype)
70 return expr._pyexpr
File ~/work/kroo-tran/.venv/lib/python3.14/site-packages/polars/functions/lit.py:197, in lit(value, dtype, allow_object)
194 return lit(value.value, dtype=dtype)
196 if dtype:
--> 197 value_s = pl.Series("literal", [value]).cast(dtype)
198 return wrap_expr(plr.lit(value_s._s, allow_object, is_scalar=True))
200 if _check_for_numpy(value) and isinstance(value, np.generic):
201 # note: the item() is a py-native datetime/timedelta when units < 'ns'
File ~/work/kroo-tran/.venv/lib/python3.14/site-packages/polars/series/series.py:4603, in Series.cast(self, dtype, strict, wrap_numerical)
4601 # Do not dispatch cast as it is expensive and used in other functions.
4602 dtype = parse_into_dtype(dtype)
-> 4603 return self._from_pyseries(self._s.cast(dtype, strict, wrap_numerical))
InvalidOperationError: conversion from `str` to `datetime[μs, UTC]` failed in column 'literal' for 1 out of 1 values: ["2025-03-07T08:00:00"]
You might want to try:
- setting `strict=False` to set values that cannot be converted to `null`
- using `str.strptime`, `str.to_date`, or `str.to_datetime` and providing a format string
Issue description
When setting the schema override for string -> datetime, on a mixed nested dict, the construction fails. An error might be valid here but not for the reason it claims. Additionally, strict is set to False and the data is not cast to null.
If I remove the schema_ovverrides, the output is:
shape: (3, 2)
┌─────────────────────┬───────┐
│ initial_data_date ┆ other │
│ --- ┆ --- │
│ str ┆ i64 │
╞═════════════════════╪═══════╡
│ 2025-03-07T08:00:00 ┆ 1 │
│ 2025-03-07T08:00:00 ┆ 2 │
│ 2025-03-07T08:00:00 ┆ 3 │
Not quite what I had in mind but fine. If I wrap the dict in an array:
In [31]: pl.LazyFrame([d], strict=False).collect()
Out[31]:
shape: (1, 2)
┌─────────────────────┬───────────┐
│ initial_data_date ┆ other │
│ --- ┆ --- │
│ str ┆ list[i64] │
╞═════════════════════╪═══════════╡
│ 2025-03-07T08:00:00 ┆ [1, 2, 3] │
└─────────────────────┴───────────┘
Closer. And now if I add back the schema_overrides:
New compute error
In [32]: pl.LazyFrame([d], schema_overrides={"initial_data_date": pl.Datetime(time_zone="UTC")}, strict=False)
---------------------------------------------------------------------------
ComputeError Traceback (most recent call last)
Cell In[32], line 1
----> 1 pl.LazyFrame([d], schema_overrides={"initial_data_date": pl.Datetime(time_zone="UTC")}, strict=False)
File ~/work/kroo-tran/.venv/lib/python3.14/site-packages/polars/lazyframe/frame.py:430, in LazyFrame.__init__(self, data, schema, schema_overrides, strict, orient, infer_schema_length, nan_to_null, height)
415 def __init__(
416 self,
417 data: FrameInitTypes | None = None,
(...) 425 height: int | None = None,
426 ) -> None:
427 from polars.dataframe import DataFrame
429 self._ldf = (
--> 430 DataFrame(
431 data=data,
432 schema=schema,
433 schema_overrides=schema_overrides,
434 strict=strict,
435 orient=orient,
436 infer_schema_length=infer_schema_length,
437 nan_to_null=nan_to_null,
438 height=height,
439 )
440 .lazy()
441 ._ldf
442 )
File ~/work/kroo-tran/.venv/lib/python3.14/site-packages/polars/dataframe/frame.py:409, in DataFrame.__init__(self, data, schema, schema_overrides, strict, orient, infer_schema_length, nan_to_null, height)
405 nan_to_null=nan_to_null,
406 )
407
408 elif isinstance(data, (list, tuple, Sequence)):
--> 409 self._df = sequence_to_pydf(
410 data,
411 schema=schema,
412 schema_overrides=schema_overrides,
File ~/work/kroo-tran/.venv/lib/python3.14/site-packages/polars/_utils/construction/dataframe.py:466, in sequence_to_pydf(data, schema, schema_overrides, strict, orient, infer_schema_length, nan_to_null)
463 if not data:
464 return dict_to_pydf({}, schema=schema, schema_overrides=schema_overrides)
--> 466 return _sequence_to_pydf_dispatcher(
467 get_first_non_none(data),
468 data=data,
469 schema=schema,
470 schema_overrides=schema_overrides,
471 strict=strict,
472 orient=orient,
473 infer_schema_length=infer_schema_length,
474 nan_to_null=nan_to_null,
475 )
File /opt/homebrew/Cellar/python@3.14/3.14.3_1/Frameworks/Python.framework/Versions/3.14/lib/python3.14/functools.py:982, in singledispatch.<locals>.wrapper(*args, **kw)
979 if not args:
980 raise TypeError(f'{funcname} requires at least '
981 '1 positional argument')
--> 982 return dispatch(args[0].__class__)(*args, **kw)
File ~/work/kroo-tran/.venv/lib/python3.14/site-packages/polars/_utils/construction/dataframe.py:722, in _sequence_of_dict_to_pydf(first_element, data, schema, schema_overrides, strict, infer_schema_length, **kwargs)
713 column_names, schema_overrides = _unpack_schema(
714 schema, schema_overrides=schema_overrides
715 )
716 dicts_schema = (
717 _include_unknowns(schema_overrides, column_names or list(schema_overrides))
718 if column_names
719 else None
720 )
--> 722 pydf = PyDataFrame.from_dicts(
723 data,
724 dicts_schema,
725 schema_overrides,
726 strict=strict,
727 infer_schema_length=infer_schema_length,
728 )
729 return pydf
ComputeError: could not append value: "2025-03-07T08:00:00" of type: str to the builder; make sure that all rows have the same schema or consider increasing `infer_schema_length`
it might also be that a value overflows the data-type's capacity
The behavior in general is inconsistent. I am fine with the logic of a bare dict that has some nested data and some not resulting in the data splayed out. But even so, the schema_overrides should still be possible and these errors should not happen. They are wrong.
Expected behavior
Not 100% sure. For one, the error is wrong. Ideally, the dataframe gets built with no errors.
Installed versions
Details
In [33]: pl.show_versions()
--------Version info---------
Polars: 1.39.3
Index type: UInt32
Platform: macOS-26.3.1-arm64-arm-64bit-Mach-O
Python: 3.14.3 (main, Feb 3 2026, 15:32:20) [Clang 17.0.0 (clang-1700.6.3.2)]
Runtime: rt32
----Optional dependencies----
Azure CLI <not installed>
adbc_driver_manager <not installed>
altair <not installed>
azure.identity <not installed>
boto3 <not installed>
cloudpickle <not installed>
connectorx <not installed>
deltalake 1.5.0
fastexcel <not installed>
fsspec <not installed>
gevent 25.9.1
google.auth <not installed>
great_tables <not installed>
matplotlib <not installed>
numpy 2.4.4
openpyxl <not installed>
pandas <not installed>
polars_cloud <not installed>
pyarrow 23.0.1
pydantic <not installed>
pyiceberg <not installed>
sqlalchemy <not installed>
torch <not installed>
xlsx2csv <not installed>
xlsxwriter <not installed>
Checks
Reproducible example
Log output
In [28]: pl.LazyFrame(d, schema_overrides={"initial_data_date": pl.Datetime(time_zone="UTC")}, strict=False) --------------------------------------------------------------------------- InvalidOperationError Traceback (most recent call last) Cell In[28], line 1 ----> 1 pl.LazyFrame(d, schema_overrides={"initial_data_date": pl.Datetime(time_zone="UTC")}, strict=False) File ~/work/kroo-tran/.venv/lib/python3.14/site-packages/polars/lazyframe/frame.py:430, in LazyFrame.__init__(self, data, schema, schema_overrides, strict, orient, infer_schema_length, nan_to_null, height) 415 def __init__( 416 self, 417 data: FrameInitTypes | None = None, (...) 425 height: int | None = None, 426 ) -> None: 427 from polars.dataframe import DataFrame 429 self._ldf = ( --> 430 DataFrame( 431 data=data, 432 schema=schema, 433 schema_overrides=schema_overrides, 434 strict=strict, 435 orient=orient, 436 infer_schema_length=infer_schema_length, 437 nan_to_null=nan_to_null, 438 height=height, 439 ) 440 .lazy() 441 ._ldf 442 ) File ~/work/kroo-tran/.venv/lib/python3.14/site-packages/polars/dataframe/frame.py:400, in DataFrame.__init__(self, data, schema, schema_overrides, strict, orient, infer_schema_length, nan_to_null, height) 396 if height is not None and self.width == 0: 397 self._df = PyDataFrame.empty_with_height(height) 398 399 elif isinstance(data, dict): --> 400 self._df = dict_to_pydf( 401 data, 402 schema=schema, 403 schema_overrides=schema_overrides, File ~/work/kroo-tran/.venv/lib/python3.14/site-packages/polars/_utils/construction/dataframe.py:161, in dict_to_pydf(data, schema, schema_overrides, strict, nan_to_null, allow_multithreaded) 148 data_series = [ 149 pl.Series( 150 name, (...) 156 for name in column_names 157 ] 158 else: 159 data_series = [ 160 s._s --> 161 for s in _expand_dict_values( 162 data, 163 schema_overrides=schema_overrides, 164 strict=strict, 165 nan_to_null=nan_to_null, 166 ).values() 167 ] 169 data_series = _handle_columns_arg(data_series, columns=column_names, from_dict=True) 170 pydf = PyDataFrame(data_series) File ~/work/kroo-tran/.venv/lib/python3.14/site-packages/polars/_utils/construction/dataframe.py:404, in _expand_dict_values(data, schema_overrides, strict, order, nan_to_null) 394 updated_data[name] = pl.Series( 395 name=name, 396 values=val, (...) 399 nan_to_null=nan_to_null, 400 ) 401 elif val is None or isinstance( # type: ignore[redundant-expr] 402 val, (int, float, str, bool, date, datetime, time, timedelta) 403 ): --> 404 updated_data[name] = F.repeat( 405 val, array_len, dtype=dtype, eager=True 406 ).alias(name) 407 else: 408 updated_data[name] = pl.Series( 409 name=name, values=[val] * array_len, dtype=dtype, strict=strict 410 ) File ~/work/kroo-tran/.venv/lib/python3.14/site-packages/polars/functions/repeat.py:145, in repeat(value, n, dtype, eager) 143 msg = f"`n` parameter of `repeat expected a `int` or `Expr` got a `{qualified_type_name(n)}`" 144 raise TypeError(msg) --> 145 value_pyexpr = parse_into_expression(value, str_as_lit=True, dtype=dtype) 146 expr = wrap_expr(plr.repeat(value_pyexpr, n._pyexpr, dtype)) 147 if eager: File ~/work/kroo-tran/.venv/lib/python3.14/site-packages/polars/_utils/parse/expr.py:68, in parse_into_expression(input, str_as_lit, list_as_series, structify, dtype, require_selector) 66 expr = F.lit(pl.Series(input), dtype=dtype) 67 else: ---> 68 expr = F.lit(input, dtype=dtype) 70 return expr._pyexpr File ~/work/kroo-tran/.venv/lib/python3.14/site-packages/polars/functions/lit.py:197, in lit(value, dtype, allow_object) 194 return lit(value.value, dtype=dtype) 196 if dtype: --> 197 value_s = pl.Series("literal", [value]).cast(dtype) 198 return wrap_expr(plr.lit(value_s._s, allow_object, is_scalar=True)) 200 if _check_for_numpy(value) and isinstance(value, np.generic): 201 # note: the item() is a py-native datetime/timedelta when units < 'ns' File ~/work/kroo-tran/.venv/lib/python3.14/site-packages/polars/series/series.py:4603, in Series.cast(self, dtype, strict, wrap_numerical) 4601 # Do not dispatch cast as it is expensive and used in other functions. 4602 dtype = parse_into_dtype(dtype) -> 4603 return self._from_pyseries(self._s.cast(dtype, strict, wrap_numerical)) InvalidOperationError: conversion from `str` to `datetime[μs, UTC]` failed in column 'literal' for 1 out of 1 values: ["2025-03-07T08:00:00"] You might want to try: - setting `strict=False` to set values that cannot be converted to `null` - using `str.strptime`, `str.to_date`, or `str.to_datetime` and providing a format stringIssue description
When setting the schema override for string -> datetime, on a mixed nested dict, the construction fails. An error might be valid here but not for the reason it claims. Additionally, strict is set to
Falseand the data is not cast tonull.If I remove the
schema_ovverrides, the output is:Not quite what I had in mind but fine. If I wrap the dict in an array:
Closer. And now if I add back the schema_overrides:
New compute error
The behavior in general is inconsistent. I am fine with the logic of a bare dict that has some nested data and some not resulting in the data splayed out. But even so, the schema_overrides should still be possible and these errors should not happen. They are wrong.
Expected behavior
Not 100% sure. For one, the error is wrong. Ideally, the dataframe gets built with no errors.
Installed versions
Details