Skip to content

Commit af7fc60

Browse files
committed
CLN: Drop compact_ints/use_unsigned from read_csv
Deprecated in v0.19.0 xref gh-13323
1 parent 8e33a71 commit af7fc60

File tree

6 files changed

+2
-111
lines changed

6 files changed

+2
-111
lines changed

doc/source/io.rst

-15
Original file line numberDiff line numberDiff line change
@@ -199,21 +199,6 @@ low_memory : boolean, default ``True``
199199
Note that the entire file is read into a single DataFrame regardless,
200200
use the ``chunksize`` or ``iterator`` parameter to return the data in chunks.
201201
(Only valid with C parser)
202-
compact_ints : boolean, default False
203-
.. deprecated:: 0.19.0
204-
205-
Argument moved to ``pd.to_numeric``
206-
207-
If ``compact_ints`` is ``True``, then for any column that is of integer dtype, the
208-
parser will attempt to cast it as the smallest integer ``dtype`` possible, either
209-
signed or unsigned depending on the specification from the ``use_unsigned`` parameter.
210-
use_unsigned : boolean, default False
211-
.. deprecated:: 0.18.2
212-
213-
Argument moved to ``pd.to_numeric``
214-
215-
If integer columns are being compacted (i.e. ``compact_ints=True``), specify whether
216-
the column should be compacted to the smallest signed or unsigned integer dtype.
217202
memory_map : boolean, default False
218203
If a filepath is provided for ``filepath_or_buffer``, map the file object
219204
directly onto memory and access the data directly from there. Using this

doc/source/whatsnew/v0.22.0.txt

+1
Original file line numberDiff line numberDiff line change
@@ -233,6 +233,7 @@ Removal of prior version deprecations/changes
233233
- :func:`read_csv` has dropped the ``skip_footer`` parameter (:issue:`13386`)
234234
- :func:`read_csv` has dropped the ``as_recarray`` parameter (:issue:`13373`)
235235
- :func:`read_csv` has dropped the ``buffer_lines`` parameter (:issue:`13360`)
236+
- :func:`read_csv` has dropped the ``compact_ints`` and ``use_unsigned`` parameters (:issue:`13323`)
236237

237238
.. _whatsnew_0220.performance:
238239

pandas/_libs/parsers.pyx

-12
Original file line numberDiff line numberDiff line change
@@ -305,7 +305,6 @@ cdef class TextReader:
305305
object index_col
306306
object low_memory
307307
object skiprows
308-
object compact_ints, use_unsigned
309308
object dtype
310309
object encoding
311310
object compression
@@ -355,10 +354,7 @@ cdef class TextReader:
355354
na_fvalues=None,
356355
true_values=None,
357356
false_values=None,
358-
359-
compact_ints=False,
360357
allow_leading_cols=True,
361-
use_unsigned=False,
362358
low_memory=False,
363359
skiprows=None,
364360
skipfooter=0,
@@ -482,10 +478,7 @@ cdef class TextReader:
482478
self.false_set = kset_from_list(self.false_values)
483479

484480
self.converters = converters
485-
486481
self.na_filter = na_filter
487-
self.compact_ints = compact_ints
488-
self.use_unsigned = use_unsigned
489482

490483
self.verbose = verbose
491484
self.low_memory = low_memory
@@ -1122,11 +1115,6 @@ cdef class TextReader:
11221115
if upcast_na and na_count > 0:
11231116
col_res = _maybe_upcast(col_res)
11241117

1125-
if issubclass(col_res.dtype.type,
1126-
np.integer) and self.compact_ints:
1127-
col_res = lib.downcast_int64(col_res, na_values,
1128-
self.use_unsigned)
1129-
11301118
if col_res is None:
11311119
raise ParserError('Unable to parse column %d' % i)
11321120

pandas/io/parsers.py

-32
Original file line numberDiff line numberDiff line change
@@ -273,21 +273,6 @@
273273
Note that the entire file is read into a single DataFrame regardless,
274274
use the `chunksize` or `iterator` parameter to return the data in chunks.
275275
(Only valid with C parser)
276-
compact_ints : boolean, default False
277-
.. deprecated:: 0.19.0
278-
Argument moved to ``pd.to_numeric``
279-
280-
If compact_ints is True, then for any column that is of integer dtype,
281-
the parser will attempt to cast it as the smallest integer dtype possible,
282-
either signed or unsigned depending on the specification from the
283-
`use_unsigned` parameter.
284-
use_unsigned : boolean, default False
285-
.. deprecated:: 0.19.0
286-
Argument moved to ``pd.to_numeric``
287-
288-
If integer columns are being compacted (i.e. `compact_ints=True`), specify
289-
whether the column should be compacted to the smallest signed or unsigned
290-
integer dtype.
291276
memory_map : boolean, default False
292277
If a filepath is provided for `filepath_or_buffer`, map the file object
293278
directly onto memory and access the data directly from there. Using this
@@ -496,8 +481,6 @@ def _read(filepath_or_buffer, kwds):
496481
_c_parser_defaults = {
497482
'delim_whitespace': False,
498483
'na_filter': True,
499-
'compact_ints': False,
500-
'use_unsigned': False,
501484
'low_memory': True,
502485
'memory_map': False,
503486
'error_bad_lines': True,
@@ -518,13 +501,9 @@ def _read(filepath_or_buffer, kwds):
518501
}
519502

520503
_deprecated_defaults = {
521-
'compact_ints': None,
522-
'use_unsigned': None,
523504
'tupleize_cols': None
524505
}
525506
_deprecated_args = {
526-
'compact_ints',
527-
'use_unsigned',
528507
'tupleize_cols',
529508
}
530509

@@ -596,8 +575,6 @@ def parser_f(filepath_or_buffer,
596575
# Internal
597576
doublequote=True,
598577
delim_whitespace=False,
599-
compact_ints=None,
600-
use_unsigned=None,
601578
low_memory=_c_parser_defaults['low_memory'],
602579
memory_map=False,
603580
float_precision=None):
@@ -662,8 +639,6 @@ def parser_f(filepath_or_buffer,
662639
float_precision=float_precision,
663640

664641
na_filter=na_filter,
665-
compact_ints=compact_ints,
666-
use_unsigned=use_unsigned,
667642
delim_whitespace=delim_whitespace,
668643
warn_bad_lines=warn_bad_lines,
669644
error_bad_lines=error_bad_lines,
@@ -1569,11 +1544,6 @@ def _convert_to_ndarrays(self, dct, na_values, na_fvalues, verbose=False,
15691544
if cast_type and not is_dtype_equal(cvals, cast_type):
15701545
cvals = self._cast_types(cvals, cast_type, c)
15711546

1572-
if issubclass(cvals.dtype.type, np.integer) and self.compact_ints:
1573-
cvals = lib.downcast_int64(
1574-
cvals, parsers.na_values,
1575-
self.use_unsigned)
1576-
15771547
result[c] = cvals
15781548
if verbose and na_count:
15791549
print('Filled %d NA values in column %s' % (na_count, str(c)))
@@ -2064,8 +2034,6 @@ def __init__(self, f, **kwds):
20642034
self.converters = kwds['converters']
20652035
self.dtype = kwds['dtype']
20662036

2067-
self.compact_ints = kwds['compact_ints']
2068-
self.use_unsigned = kwds['use_unsigned']
20692037
self.thousands = kwds['thousands']
20702038
self.decimal = kwds['decimal']
20712039

pandas/tests/io/parser/common.py

-43
Original file line numberDiff line numberDiff line change
@@ -1371,49 +1371,6 @@ def test_raise_on_no_columns(self):
13711371
data = "\n\n\n"
13721372
pytest.raises(EmptyDataError, self.read_csv, StringIO(data))
13731373

1374-
def test_compact_ints_use_unsigned(self):
1375-
# see gh-13323
1376-
data = 'a,b,c\n1,9,258'
1377-
1378-
# sanity check
1379-
expected = DataFrame({
1380-
'a': np.array([1], dtype=np.int64),
1381-
'b': np.array([9], dtype=np.int64),
1382-
'c': np.array([258], dtype=np.int64),
1383-
})
1384-
out = self.read_csv(StringIO(data))
1385-
tm.assert_frame_equal(out, expected)
1386-
1387-
expected = DataFrame({
1388-
'a': np.array([1], dtype=np.int8),
1389-
'b': np.array([9], dtype=np.int8),
1390-
'c': np.array([258], dtype=np.int16),
1391-
})
1392-
1393-
# default behaviour for 'use_unsigned'
1394-
with tm.assert_produces_warning(
1395-
FutureWarning, check_stacklevel=False):
1396-
out = self.read_csv(StringIO(data), compact_ints=True)
1397-
tm.assert_frame_equal(out, expected)
1398-
1399-
with tm.assert_produces_warning(
1400-
FutureWarning, check_stacklevel=False):
1401-
out = self.read_csv(StringIO(data), compact_ints=True,
1402-
use_unsigned=False)
1403-
tm.assert_frame_equal(out, expected)
1404-
1405-
expected = DataFrame({
1406-
'a': np.array([1], dtype=np.uint8),
1407-
'b': np.array([9], dtype=np.uint8),
1408-
'c': np.array([258], dtype=np.uint16),
1409-
})
1410-
1411-
with tm.assert_produces_warning(
1412-
FutureWarning, check_stacklevel=False):
1413-
out = self.read_csv(StringIO(data), compact_ints=True,
1414-
use_unsigned=True)
1415-
tm.assert_frame_equal(out, expected)
1416-
14171374
def test_memory_map(self):
14181375
mmap_file = os.path.join(self.dirpath, 'test_mmap.csv')
14191376
expected = DataFrame({

pandas/tests/io/parser/test_unsupported.py

+1-9
Original file line numberDiff line numberDiff line change
@@ -128,20 +128,12 @@ def read(self):
128128
class TestDeprecatedFeatures(object):
129129

130130
@pytest.mark.parametrize("engine", ["c", "python"])
131-
@pytest.mark.parametrize("kwargs", [{"compact_ints": True},
132-
{"compact_ints": False},
133-
{"use_unsigned": True},
134-
{"use_unsigned": False},
135-
{"tupleize_cols": True},
131+
@pytest.mark.parametrize("kwargs", [{"tupleize_cols": True},
136132
{"tupleize_cols": False}])
137133
def test_deprecated_args(self, engine, kwargs):
138134
data = "1,2,3"
139135
arg, _ = list(kwargs.items())[0]
140136

141-
if engine == "python" and arg == "buffer_lines":
142-
# unsupported --> exception is raised
143-
return
144-
145137
with tm.assert_produces_warning(
146138
FutureWarning, check_stacklevel=False):
147139
read_csv(StringIO(data), engine=engine, **kwargs)

0 commit comments

Comments
 (0)