Skip to content

Commit c82e90b

Browse files
committed
Change default value and update the requirements doc
1 parent 04bbe1b commit c82e90b

File tree

2 files changed

+16
-17
lines changed

2 files changed

+16
-17
lines changed

protocol/dataframe_protocol_summary.md

Lines changed: 9 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -40,8 +40,8 @@ libraries, the example above can change to:
4040
def get_df_module(df):
4141
"""Utility function to support programming against a dataframe API"""
4242
if hasattr(df, '__dataframe_namespace__'):
43-
# Retrieve the namespace
44-
pdx = df.__dataframe_namespace__()
43+
# Retrieve the namespace
44+
pdx = df.__dataframe_namespace__()
4545
else:
4646
# Here we can raise an exception if we only want to support compliant dataframes,
4747
# or convert to our default choice of dataframe if we want to accept (e.g.) dicts
@@ -168,13 +168,12 @@ We'll also list some things that were discussed but are not requirements:
168168
3. Extension dtypes, i.e. a way to extend the set of dtypes that is
169169
explicitly support, are out of scope.
170170
_Rationale: complex to support, not used enough to justify that complexity._
171-
4. "virtual columns", i.e. columns for which the data is not yet in memory
172-
because it uses lazy evaluation, are not supported other than through
173-
letting the producer materialize the data in memory when the consumer
174-
calls `__dataframe__`.
175-
_Rationale: the full dataframe API will support this use case by
176-
"programming to an interface"; this data interchange protocol is
177-
fundamentally built around describing data in memory_.
171+
4. Support for strided storage in buffers.
172+
_Rationale: this is supported by a subset of dataframes only, mainly those
173+
that use NumPy arrays. In many real-world use cases, strided arrays will
174+
force a copy at some point, so requiring contiguous memory layout (and hence
175+
an extra copy at the moment `__dataframe__` is used) is considered a good
176+
trade-off for reduced implementation complexity._
178177

179178
### To be decided
180179

@@ -245,7 +244,7 @@ library that implements `__array__` must depend (optionally at least) on
245244
NumPy, and call a NumPy `ndarray` constructor itself from within `__array__`.
246245

247246

248-
### What is wrong with `.to_numpy?` and `.to_arrow()`?
247+
### What is wrong with `.to_numpy?` and `.to_arrow()`?
249248

250249
Such methods ask the object it is attached to to turn itself into a NumPy or
251250
Arrow array. Which means each library must have at least an optional

protocol/pandas_implementation.py

Lines changed: 7 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -36,7 +36,7 @@
3636

3737

3838
def from_dataframe(df : DataFrameObject,
39-
allow_copy : bool = False) -> pd.DataFrame:
39+
allow_copy : bool = True) -> pd.DataFrame:
4040
"""
4141
Construct a pandas DataFrame from ``df`` if it supports ``__dataframe__``
4242
"""
@@ -162,7 +162,7 @@ def convert_categorical_column(col : ColumnObject) -> pd.Series:
162162

163163

164164
def __dataframe__(cls, nan_as_null : bool = False,
165-
allow_copy : bool = False) -> dict:
165+
allow_copy : bool = True) -> dict:
166166
"""
167167
The public method to attach to pd.DataFrame
168168
@@ -195,11 +195,11 @@ class _PandasBuffer:
195195
Data in the buffer is guaranteed to be contiguous in memory.
196196
"""
197197

198-
def __init__(self, x : np.ndarray, allow_copy : bool = False) -> None:
198+
def __init__(self, x : np.ndarray, allow_copy : bool = True) -> None:
199199
"""
200200
Handle only regular columns (= numpy arrays) for now.
201201
"""
202-
if allow_copy:
202+
if not allow_copy:
203203
# Array is not contiguous and strided buffers do not need to be
204204
# supported. It brings some extra complexity for libraries that
205205
# don't support it (e.g. Arrow).
@@ -260,7 +260,7 @@ class _PandasColumn:
260260
"""
261261

262262
def __init__(self, column : pd.Series,
263-
allow_copy : bool = False) -> None:
263+
allow_copy : bool = True) -> None:
264264
"""
265265
Note: doesn't deal with extension arrays yet, just assume a regular
266266
Series/ndarray for now.
@@ -496,7 +496,7 @@ class _PandasDataFrame:
496496
attributes defined on this class.
497497
"""
498498
def __init__(self, df : pd.DataFrame, nan_as_null : bool = False,
499-
allow_copy : bool = False) -> None:
499+
allow_copy : bool = True) -> None:
500500
"""
501501
Constructor - an instance of this (private) class is returned from
502502
`pd.DataFrame.__dataframe__`.
@@ -574,7 +574,7 @@ def test_noncontiguous_columns():
574574
df = pd.DataFrame(arr)
575575
assert df[0].to_numpy().strides == (24,)
576576
with pytest.raises(RuntimeError):
577-
df2 = from_dataframe(df, allow_copy=True)
577+
df2 = from_dataframe(df, allow_copy=False)
578578

579579

580580
def test_categorical_dtype():

0 commit comments

Comments
 (0)