-
-
Notifications
You must be signed in to change notification settings - Fork 18.4k
ERR: Check that dtype_backend is valid #51871
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from 11 commits
1958b53
d054ae4
0298f60
111b42a
8e112b1
998b807
0858a27
fb42c2e
04ca968
d90989f
200757a
c6f21c5
f251c8e
6b83aa0
2eb5c88
2e26467
2186e5b
612df73
40c4fb5
59518e9
73592be
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -339,7 +339,6 @@ cdef class TextReader: | |
object index_col | ||
object skiprows | ||
object dtype | ||
bint use_nullable_dtypes | ||
object usecols | ||
set unnamed_cols # set[str] | ||
str dtype_backend | ||
|
@@ -379,8 +378,7 @@ cdef class TextReader: | |
float_precision=None, | ||
bint skip_blank_lines=True, | ||
encoding_errors=b"strict", | ||
use_nullable_dtypes=False, | ||
dtype_backend="pandas"): | ||
dtype_backend="numpy"): | ||
|
||
# set encoding for native Python and C library | ||
if isinstance(encoding_errors, str): | ||
|
@@ -501,7 +499,6 @@ cdef class TextReader: | |
# - DtypeObj | ||
# - dict[Any, DtypeObj] | ||
self.dtype = dtype | ||
self.use_nullable_dtypes = use_nullable_dtypes | ||
self.dtype_backend = dtype_backend | ||
|
||
self.noconvert = set() | ||
|
@@ -928,7 +925,6 @@ cdef class TextReader: | |
bint na_filter = 0 | ||
int64_t num_cols | ||
dict results | ||
bint use_nullable_dtypes | ||
|
||
start = self.parser_start | ||
|
||
|
@@ -1049,12 +1045,12 @@ cdef class TextReader: | |
# don't try to upcast EAs | ||
if ( | ||
na_count > 0 and not is_extension_array_dtype(col_dtype) | ||
or self.use_nullable_dtypes | ||
or self.dtype_backend != "numpy" | ||
): | ||
use_nullable_dtypes = self.use_nullable_dtypes and col_dtype is None | ||
use_dtype_backend = self.dtype_backend != "numpy" and col_dtype is None | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Not sure if the previous variable name There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. As long as no_default is used for numpy I like this more |
||
col_res = _maybe_upcast( | ||
col_res, | ||
use_nullable_dtypes=use_nullable_dtypes, | ||
use_dtype_backend=use_dtype_backend, | ||
dtype_backend=self.dtype_backend, | ||
) | ||
|
||
|
@@ -1389,11 +1385,11 @@ _NA_VALUES = _ensure_encoded(list(STR_NA_VALUES)) | |
|
||
|
||
def _maybe_upcast( | ||
arr, use_nullable_dtypes: bool = False, dtype_backend: str = "pandas" | ||
arr, use_dtype_backend: bool = False, dtype_backend: str = "numpy" | ||
): | ||
"""Sets nullable dtypes or upcasts if nans are present. | ||
|
||
Upcast, if use_nullable_dtypes is false and nans are present so that the | ||
Upcast, if use_dtype_backend is false and nans are present so that the | ||
current dtype can not hold the na value. We use nullable dtypes if the | ||
flag is true for every array. | ||
|
||
|
@@ -1402,7 +1398,7 @@ def _maybe_upcast( | |
arr: ndarray | ||
Numpy array that is potentially being upcast. | ||
|
||
use_nullable_dtypes: bool, default False | ||
use_dtype_backend: bool, default False | ||
If true, we cast to the associated nullable dtypes. | ||
|
||
Returns | ||
|
@@ -1419,7 +1415,7 @@ def _maybe_upcast( | |
if issubclass(arr.dtype.type, np.integer): | ||
mask = arr == na_value | ||
|
||
if use_nullable_dtypes: | ||
if use_dtype_backend: | ||
arr = IntegerArray(arr, mask) | ||
else: | ||
arr = arr.astype(float) | ||
|
@@ -1428,22 +1424,22 @@ def _maybe_upcast( | |
elif arr.dtype == np.bool_: | ||
mask = arr.view(np.uint8) == na_value | ||
|
||
if use_nullable_dtypes: | ||
if use_dtype_backend: | ||
arr = BooleanArray(arr, mask) | ||
else: | ||
arr = arr.astype(object) | ||
np.putmask(arr, mask, np.nan) | ||
|
||
elif issubclass(arr.dtype.type, float) or arr.dtype.type == np.float32: | ||
if use_nullable_dtypes: | ||
if use_dtype_backend: | ||
mask = np.isnan(arr) | ||
arr = FloatingArray(arr, mask) | ||
|
||
elif arr.dtype == np.object_: | ||
if use_nullable_dtypes: | ||
if use_dtype_backend: | ||
arr = StringDtype().construct_array_type()._from_sequence(arr) | ||
|
||
if use_nullable_dtypes and dtype_backend == "pyarrow": | ||
if use_dtype_backend and dtype_backend == "pyarrow": | ||
import pyarrow as pa | ||
if isinstance(arr, IntegerArray) and arr.isna().all(): | ||
# use null instead of int64 in pyarrow | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I think this needs to be changed, no?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Changed already on the other pr, rebased now