Skip to content

Commit b89c88a

Browse files
committed
Merge pull request #4406 from tr11/fix_csv_parser_dtypes
BUG: Fix an issue with the csv cparser when usecols is used
2 parents cc9bff6 + d05f3b1 commit b89c88a

File tree

3 files changed

+25
-4
lines changed

3 files changed

+25
-4
lines changed

doc/source/release.rst

+2
Original file line numberDiff line numberDiff line change
@@ -94,6 +94,8 @@ pandas 0.13
9494
- Fixed an issue where ``PeriodIndex`` joining with self was returning a new
9595
instance rather than the same instance (:issue:`4379`); also adds a test
9696
for this for the other index types
97+
- Fixed a bug with all the dtypes being converted to object when using the CSV cparser
98+
with the usecols parameter (:issue: `3192`)
9799

98100
pandas 0.12
99101
===========

pandas/io/tests/test_parsers.py

+22
Original file line numberDiff line numberDiff line change
@@ -2138,6 +2138,28 @@ def test_usecols(self):
21382138
self.assertRaises(ValueError, self.read_csv, StringIO(data),
21392139
names=['a', 'b'], usecols=[1], header=None)
21402140

2141+
def test_usecols_dtypes(self):
2142+
data = """\
2143+
1,2,3
2144+
4,5,6
2145+
7,8,9
2146+
10,11,12"""
2147+
result = self.read_csv(StringIO(data), usecols=(0, 1, 2),
2148+
names=('a', 'b', 'c'),
2149+
header=None,
2150+
converters={'a': str},
2151+
dtype={'b': int, 'c': float},
2152+
)
2153+
result2 = self.read_csv(StringIO(data), usecols=(0, 2),
2154+
names=('a', 'b', 'c'),
2155+
header=None,
2156+
converters={'a': str},
2157+
dtype={'b': int, 'c': float},
2158+
)
2159+
self.assertTrue((result.dtypes == [object, np.int, np.float]).all())
2160+
self.assertTrue((result2.dtypes == [object, np.float]).all())
2161+
2162+
21412163
def test_usecols_implicit_index_col(self):
21422164
# #2654
21432165
data = 'a,b,c\n4,apple,bat,5.7\n8,orange,cow,10'

pandas/parser.pyx

+1-4
Original file line numberDiff line numberDiff line change
@@ -869,6 +869,7 @@ cdef class TextReader:
869869
if self.has_usecols and not (i in self.usecols or
870870
name in self.usecols):
871871
continue
872+
nused += 1
872873

873874
conv = self._get_converter(i, name)
874875

@@ -907,10 +908,6 @@ cdef class TextReader:
907908

908909
results[i] = col_res
909910

910-
# number of used column names
911-
if i > self.leading_cols:
912-
nused += 1
913-
914911
self.parser_start += end - start
915912

916913
return results

0 commit comments

Comments
 (0)