Skip to content

Commit 4937b22

Browse files
committed
Require EAs to implement _from_sequence_of_strings to be used in parsers
1 parent cfd55fe commit 4937b22

File tree

6 files changed

+49
-7
lines changed

6 files changed

+49
-7
lines changed

pandas/_libs/parsers.pyx

+6-2
Original file line numberDiff line numberDiff line change
@@ -1223,8 +1223,12 @@ cdef class TextReader:
12231223
result = dtype.construct_array_type() \
12241224
._from_sequence_of_strings(result, dtype=dtype)
12251225
except NotImplementedError:
1226-
result = dtype.construct_array_type() \
1227-
._from_sequence(result, dtype=dtype)
1226+
raise NotImplementedError(
1227+
"Extension Array: {ea} must implement "
1228+
"_from_sequence_of_strings in order "
1229+
"to be used in parser methods".format(
1230+
ea=dtype.construct_array_type()))
1231+
12281232
return result, na_count
12291233

12301234
elif is_integer_dtype(dtype):

pandas/core/arrays/base.py

+2-2
Original file line numberDiff line numberDiff line change
@@ -125,7 +125,7 @@ def _from_sequence(cls, scalars, dtype=None, copy=False):
125125

126126
@classmethod
127127
def _from_sequence_of_strings(cls, strings, dtype=None, copy=False):
128-
"""Construct a new ExtensionArray from a sequence of scalars.
128+
"""Construct a new ExtensionArray from a sequence of strings.
129129
130130
.. versionadded:: 0.24.0
131131
@@ -145,7 +145,7 @@ def _from_sequence_of_strings(cls, strings, dtype=None, copy=False):
145145
ExtensionArray
146146
147147
"""
148-
raise AbstractMethodError(cls)
148+
raise NotImplementedError(cls)
149149

150150
@classmethod
151151
def _from_factorized(cls, values, original):

pandas/core/arrays/integer.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -267,7 +267,7 @@ def _from_sequence(cls, scalars, dtype=None, copy=False):
267267

268268
@classmethod
269269
def _from_sequence_of_strings(cls, strings, dtype=None, copy=False):
270-
scalars = to_numeric(strings, errors='raise')
270+
scalars = to_numeric(strings, errors="raise")
271271
return cls._from_sequence(scalars, dtype, copy)
272272

273273
@classmethod

pandas/core/dtypes/cast.py

-1
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,6 @@
44

55
import numpy as np
66

7-
from pandas.errors import AbstractMethodError
87
from pandas._libs import lib, tslib, tslibs
98
from pandas._libs.tslibs import OutOfBoundsDatetime, Period, iNaT
109
from pandas.compat import PY3, string_types, text_type, to_str

pandas/tests/extension/base/io.py

-1
Original file line numberDiff line numberDiff line change
@@ -35,4 +35,3 @@ def test_EA_types(self, engine, data):
3535
result = pd.read_csv(StringIO(data), dtype={'Int': str(data.dtype)},
3636
engine=engine)
3737
assert result is not None
38-

pandas/tests/extension/test_common.py

+40
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,27 @@
77
import pandas as pd
88
from pandas.core.arrays import ExtensionArray
99
import pandas.util.testing as tm
10+
from pandas.compat import StringIO
11+
from pandas.core.arrays.integer import (
12+
Int8Dtype, Int16Dtype, Int32Dtype, Int64Dtype, UInt8Dtype, UInt16Dtype,
13+
UInt32Dtype, UInt64Dtype, integer_array,
14+
)
15+
16+
17+
def make_data():
18+
return (list(range(1, 9)) + [np.nan] + list(range(10, 98))
19+
+ [np.nan] + [99, 100])
20+
21+
22+
@pytest.fixture(params=[Int8Dtype, Int16Dtype, Int32Dtype, Int64Dtype,
23+
UInt8Dtype, UInt16Dtype, UInt32Dtype, UInt64Dtype])
24+
def dtype(request):
25+
return request.param()
26+
27+
28+
@pytest.fixture
29+
def data(dtype):
30+
return integer_array(make_data(), dtype=dtype)
1031

1132

1233
class DummyDtype(dtypes.ExtensionDtype):
@@ -92,3 +113,22 @@ def test_is_not_extension_array_dtype(dtype):
92113
def test_is_extension_array_dtype(dtype):
93114
assert isinstance(dtype, dtypes.ExtensionDtype)
94115
assert is_extension_array_dtype(dtype)
116+
117+
118+
@pytest.mark.parametrize('engine', ['c', 'python'])
119+
def test_EA_types(engine):
120+
df = pd.DataFrame({'Int': pd.Series([1, 2, 3], dtype='Int64'),
121+
'A': [1, 2, 1]})
122+
data = df.to_csv(index=False)
123+
result = pd.read_csv(StringIO(data), dtype={'Int': Int64Dtype},
124+
engine=engine)
125+
assert result is not None
126+
tm.assert_frame_equal(result, df)
127+
128+
df = pd.DataFrame({'Int': pd.Series([1, 2, 3], dtype='Int8'),
129+
'A': [1, 2, 1]})
130+
data = df.to_csv(index=False)
131+
result = pd.read_csv(StringIO(data), dtype={'Int': 'Int8'},
132+
engine=engine)
133+
assert result is not None
134+
tm.assert_frame_equal(result, df)

0 commit comments

Comments
 (0)