From 3d2ce5b8acafe52a9ea3c53db44ebb6cf310d157 Mon Sep 17 00:00:00 2001 From: Nicholas Ver Halen Date: Wed, 12 Oct 2016 16:13:38 -0500 Subject: [PATCH 1/7] Made it so that 0 was included in uint8 --- pandas/tools/util.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/tools/util.py b/pandas/tools/util.py index fec56328c1721..b50bf9dc448bc 100644 --- a/pandas/tools/util.py +++ b/pandas/tools/util.py @@ -205,7 +205,7 @@ def to_numeric(arg, errors='raise', downcast=None): if downcast in ('integer', 'signed'): typecodes = np.typecodes['Integer'] - elif downcast == 'unsigned' and np.min(values) > 0: + elif downcast == 'unsigned' and np.min(values) >= 0: typecodes = np.typecodes['UnsignedInteger'] elif downcast == 'float': typecodes = np.typecodes['Float'] From 81b49650b0200858351df5d240cd6a4d24718e12 Mon Sep 17 00:00:00 2001 From: Nicholas Ver Halen Date: Wed, 12 Oct 2016 18:49:19 -0500 Subject: [PATCH 2/7] Added a test to check uint8 with 0 --- pandas/tools/tests/test_util.py | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/pandas/tools/tests/test_util.py b/pandas/tools/tests/test_util.py index 8c16308d79a31..e5286ccdbe2a4 100644 --- a/pandas/tools/tests/test_util.py +++ b/pandas/tools/tests/test_util.py @@ -391,6 +391,14 @@ def test_downcast(self): res = pd.to_numeric(data, downcast=downcast) tm.assert_numpy_array_equal(res, expected) + #check that 0 works as a unsigned downcast + + data = [0, 1, 2, 3] + res = pd.to_numeric(data, downcast=downcast) + expected = np.array(data, dtype=np.uint8) + tm.assert_numpy_array_equal(res, expected) + + # the smallest integer dtype need not be np.(u)int8 data = ['256', 257, 258] From b6331a5fef151bfd9669ec2db71e05b3dcb68d47 Mon Sep 17 00:00:00 2001 From: Nicholas Ver Halen Date: Wed, 12 Oct 2016 19:07:04 -0500 Subject: [PATCH 3/7] Added release note to issue 14401 resolve. --- doc/source/whatsnew/v0.19.1.txt | 1 + 1 file changed, 1 insertion(+) diff --git a/doc/source/whatsnew/v0.19.1.txt b/doc/source/whatsnew/v0.19.1.txt index 5180b9a092f6c..66c1e9f766087 100644 --- a/doc/source/whatsnew/v0.19.1.txt +++ b/doc/source/whatsnew/v0.19.1.txt @@ -48,3 +48,4 @@ Bug Fixes - Bug in ``MultiIndex.set_levels`` where illegal level values were still set after raising an error (:issue:`13754`) - Bug in ``DataFrame.to_json`` where ``lines=True`` and a value contained a ``}`` character (:issue:`14391`) - Bug in ``df.groupby`` causing an ``AttributeError`` when grouping a single index frame by a column and the index level (:issue`14327`) +- Bug in ``pd.to_numeric`` where it would not downcast a 0 to a uint8 (:issue:`14404`) From 8a836b2c8a7a4269e792041fc6657484326b2af4 Mon Sep 17 00:00:00 2001 From: Nicholas Ver Halen Date: Thu, 13 Oct 2016 12:01:28 -0500 Subject: [PATCH 4/7] Edited mistakes in whatsnew --- doc/source/whatsnew/v0.19.1.txt | 1 + 1 file changed, 1 insertion(+) diff --git a/doc/source/whatsnew/v0.19.1.txt b/doc/source/whatsnew/v0.19.1.txt index 66c1e9f766087..14a81e473f9f0 100644 --- a/doc/source/whatsnew/v0.19.1.txt +++ b/doc/source/whatsnew/v0.19.1.txt @@ -49,3 +49,4 @@ Bug Fixes - Bug in ``DataFrame.to_json`` where ``lines=True`` and a value contained a ``}`` character (:issue:`14391`) - Bug in ``df.groupby`` causing an ``AttributeError`` when grouping a single index frame by a column and the index level (:issue`14327`) - Bug in ``pd.to_numeric`` where it would not downcast a 0 to a uint8 (:issue:`14404`) +- Bug in ``pd.to_numeric`` where it would not downcast a 0 properly. (:issue:`14401`) From 3427e4fcf145546b9d746ecafc0af2a85ca22457 Mon Sep 17 00:00:00 2001 From: Nicholas Ver Halen Date: Thu, 13 Oct 2016 20:42:36 -0500 Subject: [PATCH 5/7] Added tests for the max and min values of all dtypes to to_numeric --- pandas/tools/tests/test_util.py | 34 +++++++++++++++++++++++++-------- 1 file changed, 26 insertions(+), 8 deletions(-) diff --git a/pandas/tools/tests/test_util.py b/pandas/tools/tests/test_util.py index e5286ccdbe2a4..59f8d31104a61 100644 --- a/pandas/tools/tests/test_util.py +++ b/pandas/tools/tests/test_util.py @@ -391,14 +391,6 @@ def test_downcast(self): res = pd.to_numeric(data, downcast=downcast) tm.assert_numpy_array_equal(res, expected) - #check that 0 works as a unsigned downcast - - data = [0, 1, 2, 3] - res = pd.to_numeric(data, downcast=downcast) - expected = np.array(data, dtype=np.uint8) - tm.assert_numpy_array_equal(res, expected) - - # the smallest integer dtype need not be np.(u)int8 data = ['256', 257, 258] @@ -409,6 +401,32 @@ def test_downcast(self): res = pd.to_numeric(data, downcast=downcast) tm.assert_numpy_array_equal(res, expected) + # check that the smallest and largest values in each integer type pass to each type. + int8 = [-128, 127] + int8_Series = pd.to_numeric(int8, downcast = 'integer') + tm.assert_equal(int8_Series.dtype, 'int8') + int16 = [-32768, 32767] + int16_Series = pd.to_numeric(int16, downcast = 'integer') + tm.assert_equal(int16_Series.dtype, 'int16') + int32 = [-2147483648, 2147483647] + int32_Series = pd.to_numeric(int32, downcast = 'integer') + tm.assert_equal(int32_Series.dtype, 'int32') + int64 = [-9223372036854775808, 9223372036854775807] + int64_Series = pd.to_numeric(int64, downcast = 'integer') + tm.assert_equal(int64_Series.dtype, 'int64') + uint8 = [0, 255] + uint8_Series = pd.to_numeric(uint8, downcast = 'unsigned') + tm.assert_equal(uint8_Series.dtype, 'uint8') + uint16 = [0, 65535] + uint16_Series = pd.to_numeric(uint16, downcast = 'unsigned') + tm.assert_equal(uint16_Series.dtype, 'uint16') + uint32 = [0, 4294967295] + uint32_Series = pd.to_numeric(uint32, downcast = 'unsigned') + tm.assert_equal(uint32_Series.dtype, 'uint32') + # uint64 = [0, 18446744073709551615] + # uint64_Series = pd.to_numeric(uint64, downcast = 'unsigned') + # tm.assert_equal(uint64_Series.dtype, 'uint64') + if __name__ == '__main__': nose.runmodule(argv=[__file__, '-vvs', '-x', '--pdb', '--pdb-failure'], exit=False) From 0da19185563080eb7e0fcebddde89945c6e76216 Mon Sep 17 00:00:00 2001 From: Nicholas Ver Halen Date: Fri, 14 Oct 2016 20:18:47 -0500 Subject: [PATCH 6/7] Changed the tests so that it iterated through a dictionary. I also added a series of other tests that included making sure that a dtype shifted dtype once it reached the next value and once again edited the whatsnew. --- doc/source/whatsnew/v0.19.1.txt | 1 + pandas/tools/tests/test_util.py | 78 +++++++++++++++++++++++---------- 2 files changed, 55 insertions(+), 24 deletions(-) diff --git a/doc/source/whatsnew/v0.19.1.txt b/doc/source/whatsnew/v0.19.1.txt index 14a81e473f9f0..1a86ecae2faf3 100644 --- a/doc/source/whatsnew/v0.19.1.txt +++ b/doc/source/whatsnew/v0.19.1.txt @@ -50,3 +50,4 @@ Bug Fixes - Bug in ``df.groupby`` causing an ``AttributeError`` when grouping a single index frame by a column and the index level (:issue`14327`) - Bug in ``pd.to_numeric`` where it would not downcast a 0 to a uint8 (:issue:`14404`) - Bug in ``pd.to_numeric`` where it would not downcast a 0 properly. (:issue:`14401`) +- Bug in ``pd.to_numeric`` where a 0 was not unsigned on a downcast = 'unsigned' argument (:issue:`14401`) diff --git a/pandas/tools/tests/test_util.py b/pandas/tools/tests/test_util.py index 59f8d31104a61..856ed8431c548 100644 --- a/pandas/tools/tests/test_util.py +++ b/pandas/tools/tests/test_util.py @@ -402,30 +402,60 @@ def test_downcast(self): tm.assert_numpy_array_equal(res, expected) # check that the smallest and largest values in each integer type pass to each type. - int8 = [-128, 127] - int8_Series = pd.to_numeric(int8, downcast = 'integer') - tm.assert_equal(int8_Series.dtype, 'int8') - int16 = [-32768, 32767] - int16_Series = pd.to_numeric(int16, downcast = 'integer') - tm.assert_equal(int16_Series.dtype, 'int16') - int32 = [-2147483648, 2147483647] - int32_Series = pd.to_numeric(int32, downcast = 'integer') - tm.assert_equal(int32_Series.dtype, 'int32') - int64 = [-9223372036854775808, 9223372036854775807] - int64_Series = pd.to_numeric(int64, downcast = 'integer') - tm.assert_equal(int64_Series.dtype, 'int64') - uint8 = [0, 255] - uint8_Series = pd.to_numeric(uint8, downcast = 'unsigned') - tm.assert_equal(uint8_Series.dtype, 'uint8') - uint16 = [0, 65535] - uint16_Series = pd.to_numeric(uint16, downcast = 'unsigned') - tm.assert_equal(uint16_Series.dtype, 'uint16') - uint32 = [0, 4294967295] - uint32_Series = pd.to_numeric(uint32, downcast = 'unsigned') - tm.assert_equal(uint32_Series.dtype, 'uint32') - # uint64 = [0, 18446744073709551615] - # uint64_Series = pd.to_numeric(uint64, downcast = 'unsigned') - # tm.assert_equal(uint64_Series.dtype, 'uint64') + integer_dtype_min_max = { + 'int8': [np.iinfo(np.int8).min, np.iinfo(np.int8).max], + 'int16': [np.iinfo(np.int16).min, np.iinfo(np.int16).max], + 'int32': [np.iinfo(np.int32).min, np.iinfo(np.int32).max], + 'int64': [np.iinfo(np.int64).min, np.iinfo(np.int64).max] + } + + for dtype, min_max in integer_dtype_min_max.iteritems(): + series = pd.to_numeric(pd.Series(min_max), downcast = 'integer') + tm.assert_equal(series.dtype, dtype) + + + unsigned_dtype_min_max = { + 'uint8': [np.iinfo(np.uint8).min, np.iinfo(np.uint8).max], + 'uint16': [np.iinfo(np.uint16).min, np.iinfo(np.uint16).max], + 'uint32': [np.iinfo(np.uint32).min, np.iinfo(np.uint32).max], + # 'uint64': [np.iinfo(np.uint64).min, np.iinfo(np.uint64).max] + } + + for dtype, min_max in unsigned_dtype_min_max.iteritems(): + series = pd.to_numeric(pd.Series(min_max), downcast = 'unsigned') + tm.assert_equal(series.dtype, dtype) + + #check to see if the minimum number to shift integer types actually shifts + + integer_dtype_min_max_plus = { + 'int16': [np.iinfo(np.int8).min, np.iinfo(np.int8).max + 1], + 'int32': [np.iinfo(np.int16).min, np.iinfo(np.int16).max + 1], + 'int64': [np.iinfo(np.int32).min, np.iinfo(np.int32).max + 1], + } + + for dtype, min_max in integer_dtype_min_max_plus.iteritems(): + series = pd.to_numeric(pd.Series(min_max), downcast = 'integer') + tm.assert_equal(series.dtype, dtype) + + integer_dtype_min_max_minus = { + 'int16': [np.iinfo(np.int8).min - 1, np.iinfo(np.int16).max], + 'int32': [np.iinfo(np.int16).min - 1, np.iinfo(np.int32).max], + 'int64': [np.iinfo(np.int32).min - 1, np.iinfo(np.int64).max] + } + + for dtype, min_max in integer_dtype_min_max_minus.iteritems(): + series = pd.to_numeric(pd.Series(min_max), downcast = 'integer') + tm.assert_equal(series.dtype, dtype) + + unsigned_dtype_min_max_plus = { + 'uint16': [np.iinfo(np.uint8).min, np.iinfo(np.uint8).max + 1], + 'uint32': [np.iinfo(np.uint16).min, np.iinfo(np.uint16).max + 1], + # 'uint64': [np.iinfo(np.uint32).min, np.iinfo(np.uint32).max + 1], + } + + for dtype, min_max in unsigned_dtype_min_max_plus.iteritems(): + series = pd.to_numeric(pd.Series(min_max), downcast = 'unsigned') + tm.assert_equal(series.dtype, dtype) if __name__ == '__main__': nose.runmodule(argv=[__file__, '-vvs', '-x', '--pdb', '--pdb-failure'], From c6be0ebce0c8670b824bc055dd612921898e0ec9 Mon Sep 17 00:00:00 2001 From: Nicholas Ver Halen Date: Mon, 17 Oct 2016 18:20:05 -0500 Subject: [PATCH 7/7] Changed the test to work with python 3.x --- pandas/tools/tests/test_util.py | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/pandas/tools/tests/test_util.py b/pandas/tools/tests/test_util.py index 856ed8431c548..54cfd1dacb87e 100644 --- a/pandas/tools/tests/test_util.py +++ b/pandas/tools/tests/test_util.py @@ -409,7 +409,7 @@ def test_downcast(self): 'int64': [np.iinfo(np.int64).min, np.iinfo(np.int64).max] } - for dtype, min_max in integer_dtype_min_max.iteritems(): + for dtype, min_max in integer_dtype_min_max.items(): series = pd.to_numeric(pd.Series(min_max), downcast = 'integer') tm.assert_equal(series.dtype, dtype) @@ -421,7 +421,7 @@ def test_downcast(self): # 'uint64': [np.iinfo(np.uint64).min, np.iinfo(np.uint64).max] } - for dtype, min_max in unsigned_dtype_min_max.iteritems(): + for dtype, min_max in unsigned_dtype_min_max.items(): series = pd.to_numeric(pd.Series(min_max), downcast = 'unsigned') tm.assert_equal(series.dtype, dtype) @@ -433,7 +433,7 @@ def test_downcast(self): 'int64': [np.iinfo(np.int32).min, np.iinfo(np.int32).max + 1], } - for dtype, min_max in integer_dtype_min_max_plus.iteritems(): + for dtype, min_max in integer_dtype_min_max_plus.items(): series = pd.to_numeric(pd.Series(min_max), downcast = 'integer') tm.assert_equal(series.dtype, dtype) @@ -443,7 +443,7 @@ def test_downcast(self): 'int64': [np.iinfo(np.int32).min - 1, np.iinfo(np.int64).max] } - for dtype, min_max in integer_dtype_min_max_minus.iteritems(): + for dtype, min_max in integer_dtype_min_max_minus.items(): series = pd.to_numeric(pd.Series(min_max), downcast = 'integer') tm.assert_equal(series.dtype, dtype) @@ -453,7 +453,7 @@ def test_downcast(self): # 'uint64': [np.iinfo(np.uint32).min, np.iinfo(np.uint32).max + 1], } - for dtype, min_max in unsigned_dtype_min_max_plus.iteritems(): + for dtype, min_max in unsigned_dtype_min_max_plus.items(): series = pd.to_numeric(pd.Series(min_max), downcast = 'unsigned') tm.assert_equal(series.dtype, dtype)