Skip to content
Open
Show file tree
Hide file tree
Changes from 2 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 5 additions & 2 deletions docs/source/python/api/formats.rst
Original file line number Diff line number Diff line change
Expand Up @@ -42,8 +42,11 @@ CSV Files

.. _api.feather:

Feather Files
-------------
Feather Files (Deprecated)
--------------------------

.. deprecated:: 24.0.0
The Feather API is deprecated. Use the :ref:`IPC <ipc>` API instead.

.. currentmodule:: pyarrow.feather

Expand Down
36 changes: 36 additions & 0 deletions docs/source/python/feather.rst
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,10 @@
Feather File Format
===================

.. deprecated:: 24.0.0
The ``pyarrow.feather`` module is deprecated. Feather V2 is the Arrow IPC
file format. Use :mod:`pyarrow.ipc` instead. See :ref:`ipc` for details.

Feather is a portable file format for storing Arrow tables or data frames (from
languages like Python or R) that utilizes the :ref:`Arrow IPC format <ipc>`
internally. Feather was created early in the Arrow project as a proof of
Expand Down Expand Up @@ -107,3 +111,35 @@ Writing Version 1 (V1) Files
For compatibility with libraries without support for Version 2 files, you can
write the version 1 format by passing ``version=1`` to ``write_feather``. We
intend to maintain read support for V1 for the foreseeable future.

Migration to IPC
----------------

Since Feather V2 is the Arrow IPC file format, you can use the
:mod:`pyarrow.ipc` module as a direct replacement:

.. code-block:: python

import pyarrow as pa
import pyarrow.ipc

table = pa.table({"col1": [1, 2, 3], "col2": ["a", "b", "c"]})

# Writing (replaces feather.write_feather)
options = pa.ipc.IpcWriteOptions(compression='lz4')
with pa.ipc.new_file("data.arrow", table.schema, options=options) as writer:
writer.write_table(table)

# Reading (replaces feather.read_table)
with pa.ipc.open_file("data.arrow") as reader:
result = reader.read_all()

.. note::

``feather.write_feather`` defaults to LZ4 compression, while
``ipc.new_file`` does not compress by default. To preserve the same
behavior, pass ``compression='lz4'`` via
:class:`~pyarrow.ipc.IpcWriteOptions` as shown above.

For reading multiple files, use the :mod:`pyarrow.dataset` module with
``format='ipc'`` instead of :class:`~pyarrow.feather.FeatherDataset`.
101 changes: 79 additions & 22 deletions python/pyarrow/feather.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@

from collections.abc import Sequence
import os
import warnings

from pyarrow.pandas_compat import _pandas_api # noqa
from pyarrow.lib import (Codec, Table, # noqa
Expand All @@ -31,6 +32,9 @@ class FeatherDataset:
"""
Encapsulates details of reading a list of Feather files.

.. deprecated:: 24.0.0
Use :func:`pyarrow.dataset.dataset` with ``format='ipc'`` instead.

Parameters
----------
path_or_paths : List[str]
Expand All @@ -40,6 +44,12 @@ class FeatherDataset:
"""

def __init__(self, path_or_paths, validate_schema=True):
warnings.warn(
"pyarrow.feather.FeatherDataset is deprecated as of 24.0.0. "
"Use pyarrow.dataset.dataset() with format='ipc' instead.",
FutureWarning,
stacklevel=2
)
self.paths = path_or_paths
self.validate_schema = validate_schema

Expand All @@ -57,12 +67,12 @@ def read_table(self, columns=None):
pyarrow.Table
Content of the file as a table (of columns)
"""
_fil = read_table(self.paths[0], columns=columns)
_fil = _read_table_internal(self.paths[0], columns=columns)
self._tables = [_fil]
self.schema = _fil.schema

for path in self.paths[1:]:
table = read_table(path, columns=columns)
table = _read_table_internal(path, columns=columns)
if self.validate_schema:
self.validate_schemas(path, table)
self._tables.append(table)
Expand Down Expand Up @@ -117,6 +127,11 @@ def write_feather(df, dest, compression=None, compression_level=None,
"""
Write a pandas.DataFrame to Feather format.

.. deprecated:: 24.0.0
Use :func:`pyarrow.ipc.new_file` /
:class:`pyarrow.ipc.RecordBatchFileWriter` instead.
Feather V2 is the Arrow IPC file format.

Parameters
----------
df : pandas.DataFrame or pyarrow.Table
Expand All @@ -137,6 +152,13 @@ def write_feather(df, dest, compression=None, compression_level=None,
Feather file version. Version 2 is the current. Version 1 is the more
limited legacy format
"""
warnings.warn(
"pyarrow.feather.write_feather is deprecated as of 24.0.0. "
"Use pyarrow.ipc.new_file() / RecordBatchFileWriter instead. "
"Feather V2 is the Arrow IPC file format.",
FutureWarning,
stacklevel=2
)
if _pandas_api.have_pandas:
if (_pandas_api.has_sparse and
isinstance(df, _pandas_api.pd.SparseDataFrame)):
Expand Down Expand Up @@ -201,6 +223,11 @@ def read_feather(source, columns=None, use_threads=True,
Read a pandas.DataFrame from Feather format. To read as pyarrow.Table use
feather.read_table.

.. deprecated:: 24.0.0
Use :func:`pyarrow.ipc.open_file` /
:class:`pyarrow.ipc.RecordBatchFileReader` instead.
Feather V2 is the Arrow IPC file format.

Parameters
----------
source : str file path, or file-like object
Expand All @@ -222,31 +249,23 @@ def read_feather(source, columns=None, use_threads=True,
df : pandas.DataFrame
The contents of the Feather file as a pandas.DataFrame
"""
return (read_table(
warnings.warn(
"pyarrow.feather.read_feather is deprecated as of 24.0.0. "
"Use pyarrow.ipc.open_file() / RecordBatchFileReader instead. "
"Feather V2 is the Arrow IPC file format.",
FutureWarning,
stacklevel=2
)
return (_read_table_internal(
source, columns=columns, memory_map=memory_map,
use_threads=use_threads).to_pandas(use_threads=use_threads, **kwargs))


def read_table(source, columns=None, memory_map=False, use_threads=True):
def _read_table_internal(source, columns=None, memory_map=False,
use_threads=True):
"""
Read a pyarrow.Table from Feather format

Parameters
----------
source : str file path, or file-like object
You can use MemoryMappedFile as source, for explicitly use memory map.
columns : sequence, optional
Only read a specific set of columns. If not provided, all columns are
read.
memory_map : boolean, default False
Use memory mapping when opening file on disk, when source is a str
use_threads : bool, default True
Whether to parallelize reading using multiple threads.

Returns
-------
table : pyarrow.Table
The contents of the Feather file as a pyarrow.Table
Internal implementation for reading a Feather file as a pyarrow.Table.
Does not emit deprecation warnings.
"""
reader = _feather.FeatherReader(
source, use_memory_map=memory_map, use_threads=use_threads)
Expand Down Expand Up @@ -277,3 +296,41 @@ def read_table(source, columns=None, memory_map=False, use_threads=True):
else:
# follow exact order / selection of names
return table.select(columns)


def read_table(source, columns=None, memory_map=False, use_threads=True):
"""
Read a pyarrow.Table from Feather format

.. deprecated:: 24.0.0
Use :func:`pyarrow.ipc.open_file` /
:class:`pyarrow.ipc.RecordBatchFileReader` instead.
Feather V2 is the Arrow IPC file format.

Parameters
----------
source : str file path, or file-like object
You can use MemoryMappedFile as source, for explicitly use memory map.
columns : sequence, optional
Only read a specific set of columns. If not provided, all columns are
read.
memory_map : boolean, default False
Use memory mapping when opening file on disk, when source is a str
use_threads : bool, default True
Whether to parallelize reading using multiple threads.

Returns
-------
table : pyarrow.Table
The contents of the Feather file as a pyarrow.Table
"""
warnings.warn(
"pyarrow.feather.read_table is deprecated as of 24.0.0. "
"Use pyarrow.ipc.open_file() / RecordBatchFileReader instead. "
"Feather V2 is the Arrow IPC file format.",
FutureWarning,
stacklevel=2
)
return _read_table_internal(source, columns=columns,
memory_map=memory_map,
use_threads=use_threads)
5 changes: 5 additions & 0 deletions python/pyarrow/tests/test_dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -1927,6 +1927,7 @@ def test_fragments_parquet_subset_with_nested_fields(tempdir):

@pytest.mark.pandas
@pytest.mark.parquet
@pytest.mark.filterwarnings("ignore:pyarrow.feather:FutureWarning")
def test_fragments_repr(tempdir, dataset):
# partitioned parquet dataset
fragment = list(dataset.get_fragments())[0]
Expand Down Expand Up @@ -3699,6 +3700,7 @@ def test_column_names_encoding(tempdir, dataset_reader):
assert dataset_transcoded.to_table().equals(expected_table)


@pytest.mark.filterwarnings("ignore:pyarrow.feather:FutureWarning")
def test_feather_format(tempdir, dataset_reader):
from pyarrow.feather import write_feather

Expand Down Expand Up @@ -4080,6 +4082,7 @@ def test_dataset_project_null_column(tempdir, dataset_reader):
assert dataset_reader.to_table(dataset).equals(expected)


@pytest.mark.filterwarnings("ignore:pyarrow.feather:FutureWarning")
def test_dataset_project_columns(tempdir, dataset_reader):
# basic column re-projection with expressions
from pyarrow import feather
Expand Down Expand Up @@ -4431,6 +4434,7 @@ def test_write_dataset_with_dataset(tempdir):


@pytest.mark.pandas
@pytest.mark.filterwarnings("ignore:pyarrow.feather:FutureWarning")
def test_write_dataset_existing_data(tempdir):
directory = tempdir / 'ds'
table = pa.table({'b': ['x', 'y', 'z'], 'c': [1, 2, 3]})
Expand Down Expand Up @@ -5054,6 +5058,7 @@ def test_write_dataset_arrow_schema_metadata(tempdir):
assert result["a"].type.tz == "Europe/Brussels"


@pytest.mark.filterwarnings("ignore:pyarrow.feather:FutureWarning")
def test_write_dataset_schema_metadata(tempdir):
# ensure that schema metadata gets written
from pyarrow import feather
Expand Down
62 changes: 62 additions & 0 deletions python/pyarrow/tests/test_feather.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@
import os
import sys
import tempfile
import warnings
import pytest
import hypothesis as h
import hypothesis.strategies as st
Expand All @@ -40,6 +41,12 @@
except ImportError:
pass

# Suppress deprecation warnings for existing tests since pyarrow.feather
# is deprecated as of 24.0.0
pytestmark = pytest.mark.filterwarnings(
"ignore:pyarrow.feather:FutureWarning"
)


@pytest.fixture(scope='module')
def datadir(base_datadir):
Expand Down Expand Up @@ -882,3 +889,58 @@ def test_feather_datetime_resolution_arrow_to_pandas(tempdir):

assert expected_0 == result['date'][0]
assert expected_1 == result['date'][1]


# --- Deprecation warning tests ---

@pytest.mark.filterwarnings("default:pyarrow.feather:FutureWarning")
def test_write_feather_deprecated(tempdir):
table = pa.table({"a": [1, 2, 3]})
with pytest.warns(FutureWarning, match="write_feather is deprecated"):
write_feather(table, str(tempdir / "test.feather"))


@pytest.mark.filterwarnings("default:pyarrow.feather:FutureWarning")
def test_read_table_deprecated(tempdir):
table = pa.table({"a": [1, 2, 3]})
path = str(tempdir / "test.feather")
with warnings.catch_warnings():
warnings.simplefilter("ignore", FutureWarning)
write_feather(table, path)
with pytest.warns(FutureWarning, match="read_table is deprecated"):
read_table(path)


@pytest.mark.pandas
@pytest.mark.filterwarnings("default:pyarrow.feather:FutureWarning")
def test_read_feather_deprecated(tempdir):
table = pa.table({"a": [1, 2, 3]})
path = str(tempdir / "test.feather")
with warnings.catch_warnings():
warnings.simplefilter("ignore", FutureWarning)
write_feather(table, path)
with pytest.warns(FutureWarning, match="read_feather is deprecated"):
read_feather(path)


@pytest.mark.filterwarnings("default:pyarrow.feather:FutureWarning")
def test_feather_dataset_deprecated():
with pytest.warns(FutureWarning, match="FeatherDataset is deprecated"):
FeatherDataset([])


@pytest.mark.pandas
@pytest.mark.filterwarnings("default:pyarrow.feather:FutureWarning")
def test_read_feather_no_double_warning(tempdir):
"""Verify read_feather emits exactly one FutureWarning, not two."""
table = pa.table({"a": [1, 2, 3]})
path = str(tempdir / "test.feather")
with warnings.catch_warnings():
warnings.simplefilter("ignore", FutureWarning)
write_feather(table, path)
with warnings.catch_warnings(record=True) as w:
warnings.simplefilter("always")
read_feather(path)
future_warnings = [x for x in w if issubclass(x.category,
FutureWarning)]
assert len(future_warnings) == 1
Loading