Skip to content

Commit 30ac610

Browse files
committed
BUG: Respect the dtype parameter for empty CSV
Closes gh-14712.
1 parent fdb70a9 commit 30ac610

File tree

3 files changed

+45
-8
lines changed

3 files changed

+45
-8
lines changed

doc/source/whatsnew/v0.20.0.txt

+2
Original file line numberDiff line numberDiff line change
@@ -81,3 +81,5 @@ Performance Improvements
8181

8282
Bug Fixes
8383
~~~~~~~~~
84+
85+
- Bug in ``pd.read_csv()`` in which the ``dtype`` parameter was not being respected for empty data (:issue:`14712`)

pandas/io/parsers.py

+16-8
Original file line numberDiff line numberDiff line change
@@ -2791,27 +2791,35 @@ def _clean_index_names(columns, index_col):
27912791
def _get_empty_meta(columns, index_col, index_names, dtype=None):
27922792
columns = list(columns)
27932793

2794-
if dtype is None:
2795-
dtype = {}
2794+
# Convert `dtype` to a defaultdict of some kind.
2795+
# This will enable us to write `dtype[col_name]`
2796+
# without worrying about KeyError issues later on.
2797+
if not isinstance(dtype, dict):
2798+
# if dtype == None, default will be np.object.
2799+
default_dtype = dtype or np.object
2800+
dtype = defaultdict(lambda: default_dtype)
27962801
else:
2797-
if not isinstance(dtype, dict):
2798-
dtype = defaultdict(lambda: dtype)
2802+
# Save a copy of the dictionary.
2803+
_dtype = dtype.copy()
2804+
dtype = defaultdict(lambda: np.object)
2805+
27992806
# Convert column indexes to column names.
2800-
dtype = dict((columns[k] if is_integer(k) else k, v)
2801-
for k, v in compat.iteritems(dtype))
2807+
for k, v in compat.iteritems(_dtype):
2808+
col = columns[k] if is_integer(k) else k
2809+
dtype[col] = v
28022810

28032811
if index_col is None or index_col is False:
28042812
index = Index([])
28052813
else:
2806-
index = [np.empty(0, dtype=dtype.get(index_name, np.object))
2814+
index = [np.empty(0, dtype=dtype[index_name])
28072815
for index_name in index_names]
28082816
index = MultiIndex.from_arrays(index, names=index_names)
28092817
index_col.sort()
28102818
for i, n in enumerate(index_col):
28112819
columns.pop(n - i)
28122820

28132821
col_dict = dict((col_name,
2814-
np.empty(0, dtype=dtype.get(col_name, np.object)))
2822+
np.empty(0, dtype=dtype[col_name]))
28152823
for col_name in columns)
28162824

28172825
return index, columns, col_dict

pandas/io/tests/parser/c_parser_only.py

+27
Original file line numberDiff line numberDiff line change
@@ -561,3 +561,30 @@ def test_internal_null_byte(self):
561561

562562
result = self.read_csv(StringIO(data), names=names)
563563
tm.assert_frame_equal(result, expected)
564+
565+
def test_empty_dtype(self):
566+
# see gh-14712
567+
data = 'a,b'
568+
569+
expected = pd.DataFrame(columns=['a', 'b'], dtype=np.float64)
570+
result = self.read_csv(StringIO(data), header=0, dtype=np.float64)
571+
tm.assert_frame_equal(result, expected)
572+
573+
expected = pd.DataFrame(columns=['a', 'b'])
574+
expected['a'] = expected['a'].astype(np.float64)
575+
result = self.read_csv(StringIO(data), header=0,
576+
dtype={'a': np.float64})
577+
tm.assert_frame_equal(result, expected)
578+
579+
expected = pd.DataFrame(columns=['a', 'b'])
580+
expected['a'] = expected['a'].astype(np.float64)
581+
result = self.read_csv(StringIO(data), header=0,
582+
dtype={0: np.float64})
583+
tm.assert_frame_equal(result, expected)
584+
585+
expected = pd.DataFrame(columns=['a', 'b'])
586+
expected['a'] = expected['a'].astype(np.int32)
587+
expected['b'] = expected['b'].astype(np.float64)
588+
result = self.read_csv(StringIO(data), header=0,
589+
dtype={'a': np.int32, 1: np.float64})
590+
tm.assert_frame_equal(result, expected)

0 commit comments

Comments
 (0)