Skip to content

Commit 5da8df7

Browse files
committed
ENH: raise exc if find NA values when explicit integer dtype passed to read_* functions. close #2631
1 parent dd439c9 commit 5da8df7

File tree

3 files changed

+21
-6
lines changed

3 files changed

+21
-6
lines changed

RELEASE.rst

+3
Original file line numberDiff line numberDiff line change
@@ -57,6 +57,8 @@ pandas 0.10.1
5757
- ``pivot_table`` aggfunc can be anything used in GroupBy.aggregate (GH2643_)
5858
- Implement DataFrame merges in case where set cardinalities might overflow
5959
64-bit integer (GH2690_)
60+
- Raise exception in C file parser if integer dtype specified and have NA
61+
values. (GH2631_)
6062

6163
**Bug fixes**
6264

@@ -103,6 +105,7 @@ pandas 0.10.1
103105
.. _GH2616: https://github.com/pydata/pandas/issues/2616
104106
.. _GH2625: https://github.com/pydata/pandas/issues/2625
105107
.. _GH2643: https://github.com/pydata/pandas/issues/2643
108+
.. _GH2631: https://github.com/pydata/pandas/issues/2631
106109
.. _GH2633: https://github.com/pydata/pandas/issues/2633
107110
.. _GH2637: https://github.com/pydata/pandas/issues/2637
108111
.. _GH2690: https://github.com/pydata/pandas/issues/2690

pandas/io/tests/test_parsers.py

+10
Original file line numberDiff line numberDiff line change
@@ -2007,6 +2007,16 @@ def test_custom_lineterminator(self):
20072007
result = self.assertRaises(ValueError, read_csv, StringIO(data2),
20082008
lineterminator='~~')
20092009

2010+
def test_raise_on_passed_int_dtype_with_nas(self):
2011+
# #2631
2012+
data = """YEAR, DOY, a
2013+
2001,106380451,10
2014+
2001,,11
2015+
2001,106380451,67"""
2016+
self.assertRaises(Exception, read_csv, StringIO(data), sep=",",
2017+
skipinitialspace=True,
2018+
dtype={'DOY': np.int64})
2019+
20102020

20112021
class TestParseSQL(unittest.TestCase):
20122022

pandas/src/parser.pyx

+8-6
Original file line numberDiff line numberDiff line change
@@ -870,7 +870,7 @@ cdef class TextReader:
870870
col_dtype = np.dtype(col_dtype).str
871871

872872
return self._convert_with_dtype(col_dtype, i, start, end,
873-
na_filter, na_hashset)
873+
na_filter, 1, na_hashset)
874874

875875
if i in self.noconvert:
876876
return self._string_convert(i, start, end, na_filter, na_hashset)
@@ -879,10 +879,10 @@ cdef class TextReader:
879879
for dt in dtype_cast_order:
880880
try:
881881
col_res, na_count = self._convert_with_dtype(
882-
dt, i, start, end, na_filter, na_hashset)
882+
dt, i, start, end, na_filter, 0, na_hashset)
883883
except OverflowError:
884884
col_res, na_count = self._convert_with_dtype(
885-
'|O8', i, start, end, na_filter, na_hashset)
885+
'|O8', i, start, end, na_filter, 0, na_hashset)
886886

887887
if col_res is not None:
888888
break
@@ -891,14 +891,16 @@ cdef class TextReader:
891891

892892
cdef _convert_with_dtype(self, object dtype, Py_ssize_t i,
893893
int start, int end,
894-
bint na_filter, kh_str_t *na_hashset):
894+
bint na_filter,
895+
bint user_dtype,
896+
kh_str_t *na_hashset):
895897
cdef kh_str_t *true_set, *false_set
896898

897899
if dtype[1] == 'i' or dtype[1] == 'u':
898900
result, na_count = _try_int64(self.parser, i, start, end,
899901
na_filter, na_hashset)
900-
# if na_count > 0:
901-
# raise Exception('Integer column has NA values')
902+
if user_dtype and na_count > 0:
903+
raise Exception('Integer column has NA values')
902904

903905
if dtype[1:] != 'i8':
904906
result = result.astype(dtype)

0 commit comments

Comments
 (0)