|
29 | 29 | from pandas.util._decorators import doc
|
30 | 30 | from pandas.util._validators import validate_fillna_kwargs
|
31 | 31 |
|
| 32 | +from pandas.core.dtypes.cast import can_hold_element |
32 | 33 | from pandas.core.dtypes.common import (
|
33 | 34 | is_array_like,
|
34 | 35 | is_bool_dtype,
|
35 | 36 | is_integer,
|
36 | 37 | is_list_like,
|
37 |
| - is_object_dtype, |
38 | 38 | is_scalar,
|
39 | 39 | )
|
40 | 40 | from pandas.core.dtypes.dtypes import DatetimeTZDtype
|
@@ -1240,46 +1240,50 @@ def to_numpy(
|
1240 | 1240 | ) -> np.ndarray:
|
1241 | 1241 | if dtype is not None:
|
1242 | 1242 | dtype = np.dtype(dtype)
|
1243 |
| - elif self._hasna: |
1244 |
| - dtype = np.dtype(object) |
1245 | 1243 |
|
1246 | 1244 | if na_value is lib.no_default:
|
1247 | 1245 | na_value = self.dtype.na_value
|
1248 | 1246 |
|
1249 | 1247 | pa_type = self._pa_array.type
|
| 1248 | + if not self._hasna or isna(na_value) or pa.types.is_null(pa_type): |
| 1249 | + data = self |
| 1250 | + else: |
| 1251 | + data = self.fillna(na_value) |
| 1252 | + copy = False |
| 1253 | + |
1250 | 1254 | if pa.types.is_timestamp(pa_type) or pa.types.is_duration(pa_type):
|
1251 |
| - result = self._maybe_convert_datelike_array() |
| 1255 | + result = data._maybe_convert_datelike_array() |
1252 | 1256 | if dtype is None or dtype.kind == "O":
|
1253 | 1257 | result = result.to_numpy(dtype=object, na_value=na_value)
|
1254 | 1258 | else:
|
1255 | 1259 | result = result.to_numpy(dtype=dtype)
|
1256 |
| - return result |
1257 | 1260 | elif pa.types.is_time(pa_type) or pa.types.is_date(pa_type):
|
1258 | 1261 | # convert to list of python datetime.time objects before
|
1259 | 1262 | # wrapping in ndarray
|
1260 |
| - result = np.array(list(self), dtype=dtype) |
1261 |
| - elif is_object_dtype(dtype) and self._hasna: |
1262 |
| - result = np.empty(len(self), dtype=object) |
1263 |
| - mask = ~self.isna() |
1264 |
| - result[mask] = np.asarray(self[mask]._pa_array) |
1265 |
| - elif pa.types.is_null(self._pa_array.type): |
1266 |
| - fill_value = None if isna(na_value) else na_value |
1267 |
| - return np.full(len(self), fill_value=fill_value, dtype=dtype) |
1268 |
| - elif self._hasna: |
1269 |
| - data = self.fillna(na_value) |
| 1263 | + result = np.array(list(data), dtype=dtype) |
| 1264 | + if data._hasna: |
| 1265 | + result[data.isna()] = na_value |
| 1266 | + elif pa.types.is_null(pa_type): |
| 1267 | + if dtype is not None and isna(na_value): |
| 1268 | + na_value = None |
| 1269 | + result = np.full(len(data), fill_value=na_value, dtype=dtype) |
| 1270 | + elif not data._hasna or (pa.types.is_floating(pa_type) and na_value is np.nan): |
1270 | 1271 | result = data._pa_array.to_numpy()
|
1271 |
| - if dtype is not None: |
1272 |
| - result = result.astype(dtype, copy=False) |
1273 |
| - return result |
1274 |
| - else: |
1275 |
| - result = self._pa_array.to_numpy() |
1276 | 1272 | if dtype is not None:
|
1277 | 1273 | result = result.astype(dtype, copy=False)
|
1278 | 1274 | if copy:
|
1279 | 1275 | result = result.copy()
|
1280 |
| - return result |
1281 |
| - if self._hasna: |
1282 |
| - result[self.isna()] = na_value |
| 1276 | + else: |
| 1277 | + if dtype is None: |
| 1278 | + empty = pa.array([], type=pa_type).to_numpy(zero_copy_only=False) |
| 1279 | + if can_hold_element(empty, na_value): |
| 1280 | + dtype = empty.dtype |
| 1281 | + else: |
| 1282 | + dtype = np.object_ |
| 1283 | + result = np.empty(len(data), dtype=dtype) |
| 1284 | + mask = data.isna() |
| 1285 | + result[mask] = na_value |
| 1286 | + result[~mask] = data[~mask]._pa_array.to_numpy() |
1283 | 1287 | return result
|
1284 | 1288 |
|
1285 | 1289 | def unique(self) -> Self:
|
|
0 commit comments