-
-
Notifications
You must be signed in to change notification settings - Fork 18.5k
BUG: multi-type SparseDataFrame fixes and improvements #13917
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
2e833fa
fb6237c
c7fb0f2
114217e
33973a5
93d2de6
6782bc7
2104948
ac790d7
eebcb23
8d675ad
442b8c1
926ca1e
057d56b
8c7d1ea
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -104,15 +104,21 @@ def test_as_matrix_lcd(self): | |
values = self.mixed_float.as_matrix(['C']) | ||
self.assertEqual(values.dtype, np.float16) | ||
|
||
# GH 10364 | ||
# B uint64 forces float because there are other signed int types | ||
values = self.mixed_int.as_matrix(['A', 'B', 'C', 'D']) | ||
self.assertEqual(values.dtype, np.int64) | ||
self.assertEqual(values.dtype, np.float64) | ||
|
||
values = self.mixed_int.as_matrix(['A', 'D']) | ||
self.assertEqual(values.dtype, np.int64) | ||
|
||
# guess all ints are cast to uints.... | ||
# B uint64 forces float because there are other signed int types | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. this might fix another bug, can you search for uint64 issues and see? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. add the issue as a reference here |
||
values = self.mixed_int.as_matrix(['A', 'B', 'C']) | ||
self.assertEqual(values.dtype, np.int64) | ||
self.assertEqual(values.dtype, np.float64) | ||
|
||
# as B and C are both unsigned, no forcing to float is needed | ||
values = self.mixed_int.as_matrix(['B', 'C']) | ||
self.assertEqual(values.dtype, np.uint64) | ||
|
||
values = self.mixed_int.as_matrix(['A', 'C']) | ||
self.assertEqual(values.dtype, np.int32) | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -15,7 +15,10 @@ | |
_possibly_convert_objects, | ||
_infer_dtype_from_scalar, | ||
_maybe_convert_string_to_object, | ||
_maybe_convert_scalar) | ||
_maybe_convert_scalar, | ||
_find_common_type) | ||
from pandas.types.dtypes import (CategoricalDtype, | ||
DatetimeTZDtype) | ||
from pandas.util import testing as tm | ||
|
||
_multiprocess_can_split_ = True | ||
|
@@ -188,6 +191,46 @@ def test_possibly_convert_objects_copy(self): | |
self.assertTrue(values is not out) | ||
|
||
|
||
class TestCommonTypes(tm.TestCase): | ||
def test_numpy_dtypes(self): | ||
# (source_types, destination_type) | ||
testcases = ( | ||
# identity | ||
((np.int64,), np.int64), | ||
((np.uint64,), np.uint64), | ||
((np.float32,), np.float32), | ||
((np.object,), np.object), | ||
|
||
# into ints | ||
((np.int16, np.int64), np.int64), | ||
((np.int32, np.uint32), np.int64), | ||
((np.uint16, np.uint64), np.uint64), | ||
|
||
# into floats | ||
((np.float16, np.float32), np.float32), | ||
((np.float16, np.int16), np.float32), | ||
((np.float32, np.int16), np.float32), | ||
((np.uint64, np.int64), np.float64), | ||
((np.int16, np.float64), np.float64), | ||
((np.float16, np.int64), np.float64), | ||
|
||
# into others | ||
((np.complex128, np.int32), np.complex128), | ||
((np.object, np.float32), np.object), | ||
((np.object, np.int16), np.object), | ||
) | ||
for src, common in testcases: | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. nice! |
||
self.assertEqual(_find_common_type(src), common) | ||
|
||
def test_pandas_dtypes(self): | ||
# TODO: not implemented yet | ||
with self.assertRaises(TypeError): | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. put a TODO: this is not implemented |
||
self.assertEqual(_find_common_type([CategoricalDtype()]), | ||
CategoricalDtype) | ||
with self.assertRaises(TypeError): | ||
self.assertEqual(_find_common_type([DatetimeTZDtype()]), | ||
DatetimeTZDtype) | ||
|
||
if __name__ == '__main__': | ||
nose.runmodule(argv=[__file__, '-vvs', '-x', '--pdb', '--pdb-failure'], | ||
exit=False) |
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -19,6 +19,7 @@ | |
_ensure_int32, _ensure_int64, | ||
_NS_DTYPE, _TD_DTYPE, _INT64_DTYPE, | ||
_DATELIKE_DTYPES, _POSSIBLY_CAST_DTYPES) | ||
from .dtypes import ExtensionDtype | ||
from .generic import ABCDatetimeIndex, ABCPeriodIndex, ABCSeries | ||
from .missing import isnull, notnull | ||
from .inference import is_list_like | ||
|
@@ -861,3 +862,12 @@ def _possibly_cast_to_datetime(value, dtype, errors='raise'): | |
value = _possibly_infer_to_datetimelike(value) | ||
|
||
return value | ||
|
||
|
||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. can you add some tests for validation on this (there might be some existing) There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. IOW, just run it thru the standard types for now, and asserting that it raises for pandas dtypes |
||
def _find_common_type(types): | ||
"""Find a common data type among the given dtypes.""" | ||
# TODO: enable using pandas-specific types | ||
if any(isinstance(t, ExtensionDtype) for t in types): | ||
raise TypeError("Common type discovery is currently only " | ||
"supported for pure numpy dtypes.") | ||
return np.find_common_type(types, []) |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
what I meant was can start off by simply moving this (your version or new one with
np.find_common_type
topandas.types.cast
(and if we have specific tests move similarly; if not, ideally add some). we can add the pandas specific functionaility later.