Skip to content

Commit 8e502ea

Browse files
jbrockmendelluckyvs1
authored andcommitted
REF: collected dtypes.cast and construction simplifications (pandas-dev#38629)
1 parent 2a98c09 commit 8e502ea

File tree

4 files changed

+44
-54
lines changed

4 files changed

+44
-54
lines changed

pandas/core/construction.py

+6-13
Original file line numberDiff line numberDiff line change
@@ -32,7 +32,6 @@
3232
is_extension_array_dtype,
3333
is_float_dtype,
3434
is_integer_dtype,
35-
is_iterator,
3635
is_list_like,
3736
is_object_dtype,
3837
is_sparse,
@@ -462,10 +461,7 @@ def sanitize_array(
462461
try:
463462
subarr = _try_cast(data, dtype, copy, True)
464463
except ValueError:
465-
if copy:
466-
subarr = data.copy()
467-
else:
468-
subarr = np.array(data, copy=False)
464+
subarr = np.array(data, copy=copy)
469465
else:
470466
# we will try to copy by-definition here
471467
subarr = _try_cast(data, dtype, copy, raise_cast_failure)
@@ -600,6 +596,10 @@ def _try_cast(arr, dtype: Optional[DtypeObj], copy: bool, raise_cast_failure: bo
600596
subarr = array_type(arr, dtype=dtype, copy=copy)
601597
return subarr
602598

599+
if is_object_dtype(dtype) and not isinstance(arr, np.ndarray):
600+
subarr = construct_1d_object_array_from_listlike(arr)
601+
return subarr
602+
603603
try:
604604
# GH#15832: Check if we are requesting a numeric dtype and
605605
# that we can convert the data to the requested dtype.
@@ -610,14 +610,7 @@ def _try_cast(arr, dtype: Optional[DtypeObj], copy: bool, raise_cast_failure: bo
610610
else:
611611
subarr = maybe_cast_to_datetime(arr, dtype)
612612

613-
# Take care in creating object arrays (but iterators are not
614-
# supported):
615-
if is_object_dtype(dtype) and (
616-
is_list_like(subarr)
617-
and not (is_iterator(subarr) or isinstance(subarr, np.ndarray))
618-
):
619-
subarr = construct_1d_object_array_from_listlike(subarr)
620-
elif not is_extension_array_dtype(subarr):
613+
if not isinstance(subarr, (ABCExtensionArray, ABCIndex)):
621614
subarr = construct_1d_ndarray_preserving_na(subarr, dtype, copy=copy)
622615
except OutOfBoundsDatetime:
623616
# in case of out of bound datetime64 -> always raise

pandas/core/dtypes/cast.py

+27-34
Original file line numberDiff line numberDiff line change
@@ -53,7 +53,6 @@
5353
is_datetime64_dtype,
5454
is_datetime64_ns_dtype,
5555
is_datetime64tz_dtype,
56-
is_datetime_or_timedelta_dtype,
5756
is_dtype_equal,
5857
is_extension_array_dtype,
5958
is_float,
@@ -219,14 +218,11 @@ def maybe_downcast_to_dtype(result, dtype: Union[str, np.dtype]):
219218
# a datetimelike
220219
# GH12821, iNaT is cast to float
221220
if dtype.kind in ["M", "m"] and result.dtype.kind in ["i", "f"]:
222-
if hasattr(dtype, "tz"):
223-
# not a numpy dtype
224-
if dtype.tz:
225-
# convert to datetime and change timezone
226-
from pandas import to_datetime
227-
228-
result = to_datetime(result).tz_localize("utc")
229-
result = result.tz_convert(dtype.tz)
221+
if isinstance(dtype, DatetimeTZDtype):
222+
# convert to datetime and change timezone
223+
i8values = result.astype("i8", copy=False)
224+
cls = dtype.construct_array_type()
225+
result = cls._simple_new(i8values, dtype=dtype)
230226
else:
231227
result = result.astype(dtype)
232228

@@ -609,13 +605,12 @@ def maybe_promote(dtype, fill_value=np.nan):
609605
dtype = mst
610606

611607
elif fill_value is None or fill_value is libmissing.NA:
608+
# Note: we already excluded dt64/td64 dtypes above
612609
if is_float_dtype(dtype) or is_complex_dtype(dtype):
613610
fill_value = np.nan
614611
elif is_integer_dtype(dtype):
615612
dtype = np.float64
616613
fill_value = np.nan
617-
elif is_datetime_or_timedelta_dtype(dtype):
618-
fill_value = dtype.type("NaT", "ns")
619614
else:
620615
dtype = np.dtype(np.object_)
621616
if fill_value is not libmissing.NA:
@@ -951,7 +946,7 @@ def astype_td64_unit_conversion(
951946

952947

953948
def astype_nansafe(
954-
arr, dtype: DtypeObj, copy: bool = True, skipna: bool = False
949+
arr: np.ndarray, dtype: DtypeObj, copy: bool = True, skipna: bool = False
955950
) -> ArrayLike:
956951
"""
957952
Cast the elements of an array to a given dtype a nan-safe manner.
@@ -979,6 +974,9 @@ def astype_nansafe(
979974
order = "F" if flags.f_contiguous else "C"
980975
return result.reshape(arr.shape, order=order)
981976

977+
# We get here with 0-dim from sparse
978+
arr = np.atleast_1d(arr)
979+
982980
# dispatch on extension dtype if needed
983981
if isinstance(dtype, ExtensionDtype):
984982
return dtype.construct_array_type()._from_sequence(arr, dtype=dtype, copy=copy)
@@ -995,9 +993,7 @@ def astype_nansafe(
995993
return arr.astype(dtype, copy=copy)
996994

997995
if issubclass(dtype.type, str):
998-
return lib.ensure_string_array(
999-
arr.ravel(), skipna=skipna, convert_na_value=False
1000-
).reshape(arr.shape)
996+
return lib.ensure_string_array(arr, skipna=skipna, convert_na_value=False)
1001997

1002998
elif is_datetime64_dtype(arr):
1003999
if dtype == np.int64:
@@ -1031,7 +1027,7 @@ def astype_nansafe(
10311027

10321028
# work around NumPy brokenness, #1987
10331029
if np.issubdtype(dtype.type, np.integer):
1034-
return lib.astype_intsafe(arr.ravel(), dtype).reshape(arr.shape)
1030+
return lib.astype_intsafe(arr, dtype)
10351031

10361032
# if we have a datetime/timedelta array of objects
10371033
# then coerce to a proper dtype and recall astype_nansafe
@@ -1092,27 +1088,22 @@ def soft_convert_objects(
10921088
raise ValueError("At least one of datetime, numeric or timedelta must be True.")
10931089

10941090
# Soft conversions
1095-
if datetime:
1091+
if datetime or timedelta:
10961092
# GH 20380, when datetime is beyond year 2262, hence outside
10971093
# bound of nanosecond-resolution 64-bit integers.
10981094
try:
1099-
values = lib.maybe_convert_objects(values, convert_datetime=True)
1095+
values = lib.maybe_convert_objects(
1096+
values, convert_datetime=datetime, convert_timedelta=timedelta
1097+
)
11001098
except OutOfBoundsDatetime:
1101-
pass
1102-
1103-
if timedelta and is_object_dtype(values.dtype):
1104-
# Object check to ensure only run if previous did not convert
1105-
values = lib.maybe_convert_objects(values, convert_timedelta=True)
1099+
return values
11061100

11071101
if numeric and is_object_dtype(values.dtype):
1108-
try:
1109-
converted = lib.maybe_convert_numeric(values, set(), coerce_numeric=True)
1110-
except (ValueError, TypeError):
1111-
pass
1112-
else:
1113-
# If all NaNs, then do not-alter
1114-
values = converted if not isna(converted).all() else values
1115-
values = values.copy() if copy else values
1102+
converted = lib.maybe_convert_numeric(values, set(), coerce_numeric=True)
1103+
1104+
# If all NaNs, then do not-alter
1105+
values = converted if not isna(converted).all() else values
1106+
values = values.copy() if copy else values
11161107

11171108
return values
11181109

@@ -1274,6 +1265,7 @@ def try_datetime(v):
12741265
# safe coerce to datetime64
12751266
try:
12761267
# GH19671
1268+
# tznaive only
12771269
v = tslib.array_to_datetime(v, require_iso8601=True, errors="raise")[0]
12781270
except ValueError:
12791271

@@ -1285,11 +1277,12 @@ def try_datetime(v):
12851277
try:
12861278

12871279
values, tz = conversion.datetime_to_datetime64(v)
1288-
return DatetimeIndex(values).tz_localize("UTC").tz_convert(tz=tz)
12891280
except (ValueError, TypeError):
12901281
pass
1291-
1292-
except Exception:
1282+
else:
1283+
return DatetimeIndex(values).tz_localize("UTC").tz_convert(tz=tz)
1284+
except TypeError:
1285+
# e.g. <class 'numpy.timedelta64'> is not convertible to datetime
12931286
pass
12941287

12951288
return v.reshape(shape)

pandas/tests/dtypes/cast/test_downcast.py

+2-2
Original file line numberDiff line numberDiff line change
@@ -91,9 +91,9 @@ def test_datetime_likes_nan(klass):
9191
def test_datetime_with_timezone(as_asi):
9292
# see gh-15426
9393
ts = Timestamp("2016-01-01 12:00:00", tz="US/Pacific")
94-
exp = DatetimeIndex([ts, ts])
94+
exp = DatetimeIndex([ts, ts])._data
9595

9696
obj = exp.asi8 if as_asi else exp
9797
res = maybe_downcast_to_dtype(obj, exp.dtype)
9898

99-
tm.assert_index_equal(res, exp)
99+
tm.assert_datetime_array_equal(res, exp)

pandas/tests/io/parser/test_c_parser_only.py

+9-5
Original file line numberDiff line numberDiff line change
@@ -49,11 +49,15 @@ def test_buffer_rd_bytes(c_parser_only):
4949
)
5050
parser = c_parser_only
5151

52-
for _ in range(100):
53-
try:
54-
parser.read_csv(StringIO(data), compression="gzip", delim_whitespace=True)
55-
except Exception:
56-
pass
52+
with tm.assert_produces_warning(RuntimeWarning):
53+
# compression has no effect when passing a non-binary object as input
54+
for _ in range(100):
55+
try:
56+
parser.read_csv(
57+
StringIO(data), compression="gzip", delim_whitespace=True
58+
)
59+
except Exception:
60+
pass
5761

5862

5963
def test_delim_whitespace_custom_terminator(c_parser_only):

0 commit comments

Comments
 (0)