Skip to content

Commit 9b65719

Browse files
TomAugspurgerNo-Stream
authored andcommitted
Use argument dtype to inform coercion (pandas-dev#17779)
* Use argument dtype to inform coercion Master: ```python >>> import dask.dataframe as dd >>> s = dd.core.Scalar({('s', 0): 10}, 's', 'i8') >>> pdf = pd.DataFrame({'a': [1, 2, 3, 4, 5, 6, 7], ... 'b': [7, 6, 5, 4, 3, 2, 1]}) >>> (pdf + s).dtypes a object b object dtype: object Head: ``` >>> (pdf + s).dtypes a int64 b int64 dtype: object ``` This is more consistent with 0.20.3, while still most of the changes in pandas-dev#16821 Closes pandas-dev#17767 * Compat for older numpy where bool(dtype) is False * Added timedelta
1 parent 6f0fe34 commit 9b65719

File tree

3 files changed

+122
-27
lines changed

3 files changed

+122
-27
lines changed

pandas/core/dtypes/cast.py

+33
Original file line numberDiff line numberDiff line change
@@ -483,6 +483,39 @@ def infer_dtype_from_array(arr, pandas_dtype=False):
483483
return arr.dtype, arr
484484

485485

486+
def maybe_infer_dtype_type(element):
487+
"""Try to infer an object's dtype, for use in arithmetic ops
488+
489+
Uses `element.dtype` if that's available.
490+
Objects implementing the iterator protocol are cast to a NumPy array,
491+
and from there the array's type is used.
492+
493+
Parameters
494+
----------
495+
element : object
496+
Possibly has a `.dtype` attribute, and possibly the iterator
497+
protocol.
498+
499+
Returns
500+
-------
501+
tipo : type
502+
503+
Examples
504+
--------
505+
>>> from collections import namedtuple
506+
>>> Foo = namedtuple("Foo", "dtype")
507+
>>> maybe_infer_dtype_type(Foo(np.dtype("i8")))
508+
numpy.int64
509+
"""
510+
tipo = None
511+
if hasattr(element, 'dtype'):
512+
tipo = element.dtype
513+
elif is_list_like(element):
514+
element = np.asarray(element)
515+
tipo = element.dtype
516+
return tipo
517+
518+
486519
def maybe_upcast(values, fill_value=np.nan, dtype=None, copy=False):
487520
""" provide explict type promotion and coercion
488521

pandas/core/internals.py

+27-27
Original file line numberDiff line numberDiff line change
@@ -44,7 +44,8 @@
4444
soft_convert_objects,
4545
maybe_convert_objects,
4646
astype_nansafe,
47-
find_common_type)
47+
find_common_type,
48+
maybe_infer_dtype_type)
4849
from pandas.core.dtypes.missing import (
4950
isna, notna, array_equivalent,
5051
_isna_compat,
@@ -629,10 +630,9 @@ def convert(self, copy=True, **kwargs):
629630
def _can_hold_element(self, element):
630631
""" require the same dtype as ourselves """
631632
dtype = self.values.dtype.type
632-
if is_list_like(element):
633-
element = np.asarray(element)
634-
tipo = element.dtype.type
635-
return issubclass(tipo, dtype)
633+
tipo = maybe_infer_dtype_type(element)
634+
if tipo is not None:
635+
return issubclass(tipo.type, dtype)
636636
return isinstance(element, dtype)
637637

638638
def _try_cast_result(self, result, dtype=None):
@@ -1806,11 +1806,10 @@ class FloatBlock(FloatOrComplexBlock):
18061806
_downcast_dtype = 'int64'
18071807

18081808
def _can_hold_element(self, element):
1809-
if is_list_like(element):
1810-
element = np.asarray(element)
1811-
tipo = element.dtype.type
1812-
return (issubclass(tipo, (np.floating, np.integer)) and
1813-
not issubclass(tipo, (np.datetime64, np.timedelta64)))
1809+
tipo = maybe_infer_dtype_type(element)
1810+
if tipo is not None:
1811+
return (issubclass(tipo.type, (np.floating, np.integer)) and
1812+
not issubclass(tipo.type, (np.datetime64, np.timedelta64)))
18141813
return (isinstance(element, (float, int, np.floating, np.int_)) and
18151814
not isinstance(element, (bool, np.bool_, datetime, timedelta,
18161815
np.datetime64, np.timedelta64)))
@@ -1856,9 +1855,9 @@ class ComplexBlock(FloatOrComplexBlock):
18561855
is_complex = True
18571856

18581857
def _can_hold_element(self, element):
1859-
if is_list_like(element):
1860-
element = np.array(element)
1861-
return issubclass(element.dtype.type,
1858+
tipo = maybe_infer_dtype_type(element)
1859+
if tipo is not None:
1860+
return issubclass(tipo.type,
18621861
(np.floating, np.integer, np.complexfloating))
18631862
return (isinstance(element,
18641863
(float, int, complex, np.float_, np.int_)) and
@@ -1874,12 +1873,12 @@ class IntBlock(NumericBlock):
18741873
_can_hold_na = False
18751874

18761875
def _can_hold_element(self, element):
1877-
if is_list_like(element):
1878-
element = np.array(element)
1879-
tipo = element.dtype.type
1880-
return (issubclass(tipo, np.integer) and
1881-
not issubclass(tipo, (np.datetime64, np.timedelta64)) and
1882-
self.dtype.itemsize >= element.dtype.itemsize)
1876+
tipo = maybe_infer_dtype_type(element)
1877+
if tipo is not None:
1878+
return (issubclass(tipo.type, np.integer) and
1879+
not issubclass(tipo.type, (np.datetime64,
1880+
np.timedelta64)) and
1881+
self.dtype.itemsize >= tipo.itemsize)
18831882
return is_integer(element)
18841883

18851884
def should_store(self, value):
@@ -1917,10 +1916,9 @@ def _box_func(self):
19171916
return lambda x: tslib.Timedelta(x, unit='ns')
19181917

19191918
def _can_hold_element(self, element):
1920-
if is_list_like(element):
1921-
element = np.array(element)
1922-
tipo = element.dtype.type
1923-
return issubclass(tipo, np.timedelta64)
1919+
tipo = maybe_infer_dtype_type(element)
1920+
if tipo is not None:
1921+
return issubclass(tipo.type, np.timedelta64)
19241922
return isinstance(element, (timedelta, np.timedelta64))
19251923

19261924
def fillna(self, value, **kwargs):
@@ -2018,9 +2016,9 @@ class BoolBlock(NumericBlock):
20182016
_can_hold_na = False
20192017

20202018
def _can_hold_element(self, element):
2021-
if is_list_like(element):
2022-
element = np.asarray(element)
2023-
return issubclass(element.dtype.type, np.bool_)
2019+
tipo = maybe_infer_dtype_type(element)
2020+
if tipo is not None:
2021+
return issubclass(tipo.type, np.bool_)
20242022
return isinstance(element, (bool, np.bool_))
20252023

20262024
def should_store(self, value):
@@ -2450,7 +2448,9 @@ def _astype(self, dtype, mgr=None, **kwargs):
24502448
return super(DatetimeBlock, self)._astype(dtype=dtype, **kwargs)
24512449

24522450
def _can_hold_element(self, element):
2453-
if is_list_like(element):
2451+
tipo = maybe_infer_dtype_type(element)
2452+
if tipo is not None:
2453+
# TODO: this still uses asarray, instead of dtype.type
24542454
element = np.array(element)
24552455
return element.dtype == _NS_DTYPE or element.dtype == np.int64
24562456
return (is_integer(element) or isinstance(element, datetime) or

pandas/tests/internals/test_internals.py

+62
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@
22
# pylint: disable=W0102
33

44
from datetime import datetime, date
5+
import operator
56
import sys
67
import pytest
78
import numpy as np
@@ -1213,3 +1214,64 @@ def assert_add_equals(val, inc, result):
12131214

12141215
with pytest.raises(ValueError):
12151216
BlockPlacement(slice(2, None, -1)).add(-1)
1217+
1218+
1219+
class DummyElement(object):
1220+
def __init__(self, value, dtype):
1221+
self.value = value
1222+
self.dtype = np.dtype(dtype)
1223+
1224+
def __array__(self):
1225+
return np.array(self.value, dtype=self.dtype)
1226+
1227+
def __str__(self):
1228+
return "DummyElement({}, {})".format(self.value, self.dtype)
1229+
1230+
def __repr__(self):
1231+
return str(self)
1232+
1233+
def astype(self, dtype, copy=False):
1234+
self.dtype = dtype
1235+
return self
1236+
1237+
def view(self, dtype):
1238+
return type(self)(self.value.view(dtype), dtype)
1239+
1240+
def any(self, axis=None):
1241+
return bool(self.value)
1242+
1243+
1244+
class TestCanHoldElement(object):
1245+
@pytest.mark.parametrize('value, dtype', [
1246+
(1, 'i8'),
1247+
(1.0, 'f8'),
1248+
(1j, 'complex128'),
1249+
(True, 'bool'),
1250+
(np.timedelta64(20, 'ns'), '<m8[ns]'),
1251+
(np.datetime64(20, 'ns'), '<M8[ns]'),
1252+
])
1253+
@pytest.mark.parametrize('op', [
1254+
operator.add,
1255+
operator.sub,
1256+
operator.mul,
1257+
operator.truediv,
1258+
operator.mod,
1259+
operator.pow,
1260+
], ids=lambda x: x.__name__)
1261+
def test_binop_other(self, op, value, dtype):
1262+
skip = {(operator.add, 'bool'),
1263+
(operator.sub, 'bool'),
1264+
(operator.mul, 'bool'),
1265+
(operator.truediv, 'bool'),
1266+
(operator.mod, 'i8'),
1267+
(operator.mod, 'complex128'),
1268+
(operator.mod, '<M8[ns]'),
1269+
(operator.mod, '<m8[ns]'),
1270+
(operator.pow, 'bool')}
1271+
if (op, dtype) in skip:
1272+
pytest.skip("Invalid combination {},{}".format(op, dtype))
1273+
e = DummyElement(value, dtype)
1274+
s = pd.DataFrame({"A": [e.value, e.value]}, dtype=e.dtype)
1275+
result = op(s, e).dtypes
1276+
expected = op(s, value).dtypes
1277+
assert_series_equal(result, expected)

0 commit comments

Comments
 (0)