Skip to content

Commit 0112267

Browse files
gfyoungjreback
authored andcommitted
CLN: Drop compact_ints/use_unsigned from read_csv (#18851)
* CLN: Drop compact_ints/use_unsigned from read_csv Deprecated in v0.19.0 xref gh-13323 * CLN: Remove downcast_int64 from inference.pyx It was only being used for the compact_ints and use_unsigned parameters in read_csv.
1 parent 8a5e085 commit 0112267

File tree

8 files changed

+2
-218
lines changed

8 files changed

+2
-218
lines changed

doc/source/io.rst

-15
Original file line numberDiff line numberDiff line change
@@ -199,21 +199,6 @@ low_memory : boolean, default ``True``
199199
Note that the entire file is read into a single DataFrame regardless,
200200
use the ``chunksize`` or ``iterator`` parameter to return the data in chunks.
201201
(Only valid with C parser)
202-
compact_ints : boolean, default False
203-
.. deprecated:: 0.19.0
204-
205-
Argument moved to ``pd.to_numeric``
206-
207-
If ``compact_ints`` is ``True``, then for any column that is of integer dtype, the
208-
parser will attempt to cast it as the smallest integer ``dtype`` possible, either
209-
signed or unsigned depending on the specification from the ``use_unsigned`` parameter.
210-
use_unsigned : boolean, default False
211-
.. deprecated:: 0.18.2
212-
213-
Argument moved to ``pd.to_numeric``
214-
215-
If integer columns are being compacted (i.e. ``compact_ints=True``), specify whether
216-
the column should be compacted to the smallest signed or unsigned integer dtype.
217202
memory_map : boolean, default False
218203
If a filepath is provided for ``filepath_or_buffer``, map the file object
219204
directly onto memory and access the data directly from there. Using this

doc/source/whatsnew/v0.22.0.txt

+1
Original file line numberDiff line numberDiff line change
@@ -233,6 +233,7 @@ Removal of prior version deprecations/changes
233233
- :func:`read_csv` has dropped the ``skip_footer`` parameter (:issue:`13386`)
234234
- :func:`read_csv` has dropped the ``as_recarray`` parameter (:issue:`13373`)
235235
- :func:`read_csv` has dropped the ``buffer_lines`` parameter (:issue:`13360`)
236+
- :func:`read_csv` has dropped the ``compact_ints`` and ``use_unsigned`` parameters (:issue:`13323`)
236237

237238
.. _whatsnew_0220.performance:
238239

pandas/_libs/parsers.pyx

-12
Original file line numberDiff line numberDiff line change
@@ -305,7 +305,6 @@ cdef class TextReader:
305305
object index_col
306306
object low_memory
307307
object skiprows
308-
object compact_ints, use_unsigned
309308
object dtype
310309
object encoding
311310
object compression
@@ -355,10 +354,7 @@ cdef class TextReader:
355354
na_fvalues=None,
356355
true_values=None,
357356
false_values=None,
358-
359-
compact_ints=False,
360357
allow_leading_cols=True,
361-
use_unsigned=False,
362358
low_memory=False,
363359
skiprows=None,
364360
skipfooter=0,
@@ -482,10 +478,7 @@ cdef class TextReader:
482478
self.false_set = kset_from_list(self.false_values)
483479

484480
self.converters = converters
485-
486481
self.na_filter = na_filter
487-
self.compact_ints = compact_ints
488-
self.use_unsigned = use_unsigned
489482

490483
self.verbose = verbose
491484
self.low_memory = low_memory
@@ -1122,11 +1115,6 @@ cdef class TextReader:
11221115
if upcast_na and na_count > 0:
11231116
col_res = _maybe_upcast(col_res)
11241117

1125-
if issubclass(col_res.dtype.type,
1126-
np.integer) and self.compact_ints:
1127-
col_res = lib.downcast_int64(col_res, na_values,
1128-
self.use_unsigned)
1129-
11301118
if col_res is None:
11311119
raise ParserError('Unable to parse column %d' % i)
11321120

pandas/_libs/src/inference.pyx

-71
Original file line numberDiff line numberDiff line change
@@ -1657,74 +1657,3 @@ def fast_multiget(dict mapping, ndarray keys, default=np.nan):
16571657
output[i] = default
16581658

16591659
return maybe_convert_objects(output)
1660-
1661-
1662-
def downcast_int64(ndarray[int64_t] arr, object na_values,
1663-
bint use_unsigned=0):
1664-
cdef:
1665-
Py_ssize_t i, n = len(arr)
1666-
int64_t mx = INT64_MIN + 1, mn = INT64_MAX
1667-
int64_t NA = na_values[np.int64]
1668-
int64_t val
1669-
ndarray[uint8_t] mask
1670-
int na_count = 0
1671-
1672-
_mask = np.empty(n, dtype=bool)
1673-
mask = _mask.view(np.uint8)
1674-
1675-
for i in range(n):
1676-
val = arr[i]
1677-
1678-
if val == NA:
1679-
mask[i] = 1
1680-
na_count += 1
1681-
continue
1682-
1683-
# not NA
1684-
mask[i] = 0
1685-
1686-
if val > mx:
1687-
mx = val
1688-
1689-
if val < mn:
1690-
mn = val
1691-
1692-
if mn >= 0 and use_unsigned:
1693-
if mx <= UINT8_MAX - 1:
1694-
result = arr.astype(np.uint8)
1695-
if na_count:
1696-
np.putmask(result, _mask, na_values[np.uint8])
1697-
return result
1698-
1699-
if mx <= UINT16_MAX - 1:
1700-
result = arr.astype(np.uint16)
1701-
if na_count:
1702-
np.putmask(result, _mask, na_values[np.uint16])
1703-
return result
1704-
1705-
if mx <= UINT32_MAX - 1:
1706-
result = arr.astype(np.uint32)
1707-
if na_count:
1708-
np.putmask(result, _mask, na_values[np.uint32])
1709-
return result
1710-
1711-
else:
1712-
if mn >= INT8_MIN + 1 and mx <= INT8_MAX:
1713-
result = arr.astype(np.int8)
1714-
if na_count:
1715-
np.putmask(result, _mask, na_values[np.int8])
1716-
return result
1717-
1718-
if mn >= INT16_MIN + 1 and mx <= INT16_MAX:
1719-
result = arr.astype(np.int16)
1720-
if na_count:
1721-
np.putmask(result, _mask, na_values[np.int16])
1722-
return result
1723-
1724-
if mn >= INT32_MIN + 1 and mx <= INT32_MAX:
1725-
result = arr.astype(np.int32)
1726-
if na_count:
1727-
np.putmask(result, _mask, na_values[np.int32])
1728-
return result
1729-
1730-
return arr

pandas/io/parsers.py

-32
Original file line numberDiff line numberDiff line change
@@ -273,21 +273,6 @@
273273
Note that the entire file is read into a single DataFrame regardless,
274274
use the `chunksize` or `iterator` parameter to return the data in chunks.
275275
(Only valid with C parser)
276-
compact_ints : boolean, default False
277-
.. deprecated:: 0.19.0
278-
Argument moved to ``pd.to_numeric``
279-
280-
If compact_ints is True, then for any column that is of integer dtype,
281-
the parser will attempt to cast it as the smallest integer dtype possible,
282-
either signed or unsigned depending on the specification from the
283-
`use_unsigned` parameter.
284-
use_unsigned : boolean, default False
285-
.. deprecated:: 0.19.0
286-
Argument moved to ``pd.to_numeric``
287-
288-
If integer columns are being compacted (i.e. `compact_ints=True`), specify
289-
whether the column should be compacted to the smallest signed or unsigned
290-
integer dtype.
291276
memory_map : boolean, default False
292277
If a filepath is provided for `filepath_or_buffer`, map the file object
293278
directly onto memory and access the data directly from there. Using this
@@ -496,8 +481,6 @@ def _read(filepath_or_buffer, kwds):
496481
_c_parser_defaults = {
497482
'delim_whitespace': False,
498483
'na_filter': True,
499-
'compact_ints': False,
500-
'use_unsigned': False,
501484
'low_memory': True,
502485
'memory_map': False,
503486
'error_bad_lines': True,
@@ -518,13 +501,9 @@ def _read(filepath_or_buffer, kwds):
518501
}
519502

520503
_deprecated_defaults = {
521-
'compact_ints': None,
522-
'use_unsigned': None,
523504
'tupleize_cols': None
524505
}
525506
_deprecated_args = {
526-
'compact_ints',
527-
'use_unsigned',
528507
'tupleize_cols',
529508
}
530509

@@ -596,8 +575,6 @@ def parser_f(filepath_or_buffer,
596575
# Internal
597576
doublequote=True,
598577
delim_whitespace=False,
599-
compact_ints=None,
600-
use_unsigned=None,
601578
low_memory=_c_parser_defaults['low_memory'],
602579
memory_map=False,
603580
float_precision=None):
@@ -662,8 +639,6 @@ def parser_f(filepath_or_buffer,
662639
float_precision=float_precision,
663640

664641
na_filter=na_filter,
665-
compact_ints=compact_ints,
666-
use_unsigned=use_unsigned,
667642
delim_whitespace=delim_whitespace,
668643
warn_bad_lines=warn_bad_lines,
669644
error_bad_lines=error_bad_lines,
@@ -1569,11 +1544,6 @@ def _convert_to_ndarrays(self, dct, na_values, na_fvalues, verbose=False,
15691544
if cast_type and not is_dtype_equal(cvals, cast_type):
15701545
cvals = self._cast_types(cvals, cast_type, c)
15711546

1572-
if issubclass(cvals.dtype.type, np.integer) and self.compact_ints:
1573-
cvals = lib.downcast_int64(
1574-
cvals, parsers.na_values,
1575-
self.use_unsigned)
1576-
15771547
result[c] = cvals
15781548
if verbose and na_count:
15791549
print('Filled %d NA values in column %s' % (na_count, str(c)))
@@ -2064,8 +2034,6 @@ def __init__(self, f, **kwds):
20642034
self.converters = kwds['converters']
20652035
self.dtype = kwds['dtype']
20662036

2067-
self.compact_ints = kwds['compact_ints']
2068-
self.use_unsigned = kwds['use_unsigned']
20692037
self.thousands = kwds['thousands']
20702038
self.decimal = kwds['decimal']
20712039

pandas/tests/dtypes/test_io.py

-36
Original file line numberDiff line numberDiff line change
@@ -71,39 +71,3 @@ def test_convert_sql_column_decimals(self):
7171
result = lib.convert_sql_column(arr)
7272
expected = np.array([1.5, np.nan, 3, 4.2], dtype='f8')
7373
tm.assert_numpy_array_equal(result, expected)
74-
75-
def test_convert_downcast_int64(self):
76-
from pandas._libs.parsers import na_values
77-
78-
arr = np.array([1, 2, 7, 8, 10], dtype=np.int64)
79-
expected = np.array([1, 2, 7, 8, 10], dtype=np.int8)
80-
81-
# default argument
82-
result = lib.downcast_int64(arr, na_values)
83-
tm.assert_numpy_array_equal(result, expected)
84-
85-
result = lib.downcast_int64(arr, na_values, use_unsigned=False)
86-
tm.assert_numpy_array_equal(result, expected)
87-
88-
expected = np.array([1, 2, 7, 8, 10], dtype=np.uint8)
89-
result = lib.downcast_int64(arr, na_values, use_unsigned=True)
90-
tm.assert_numpy_array_equal(result, expected)
91-
92-
# still cast to int8 despite use_unsigned=True
93-
# because of the negative number as an element
94-
arr = np.array([1, 2, -7, 8, 10], dtype=np.int64)
95-
expected = np.array([1, 2, -7, 8, 10], dtype=np.int8)
96-
result = lib.downcast_int64(arr, na_values, use_unsigned=True)
97-
tm.assert_numpy_array_equal(result, expected)
98-
99-
arr = np.array([1, 2, 7, 8, 300], dtype=np.int64)
100-
expected = np.array([1, 2, 7, 8, 300], dtype=np.int16)
101-
result = lib.downcast_int64(arr, na_values)
102-
tm.assert_numpy_array_equal(result, expected)
103-
104-
int8_na = na_values[np.int8]
105-
int64_na = na_values[np.int64]
106-
arr = np.array([int64_na, 2, 3, 10, 15], dtype=np.int64)
107-
expected = np.array([int8_na, 2, 3, 10, 15], dtype=np.int8)
108-
result = lib.downcast_int64(arr, na_values)
109-
tm.assert_numpy_array_equal(result, expected)

pandas/tests/io/parser/common.py

-43
Original file line numberDiff line numberDiff line change
@@ -1371,49 +1371,6 @@ def test_raise_on_no_columns(self):
13711371
data = "\n\n\n"
13721372
pytest.raises(EmptyDataError, self.read_csv, StringIO(data))
13731373

1374-
def test_compact_ints_use_unsigned(self):
1375-
# see gh-13323
1376-
data = 'a,b,c\n1,9,258'
1377-
1378-
# sanity check
1379-
expected = DataFrame({
1380-
'a': np.array([1], dtype=np.int64),
1381-
'b': np.array([9], dtype=np.int64),
1382-
'c': np.array([258], dtype=np.int64),
1383-
})
1384-
out = self.read_csv(StringIO(data))
1385-
tm.assert_frame_equal(out, expected)
1386-
1387-
expected = DataFrame({
1388-
'a': np.array([1], dtype=np.int8),
1389-
'b': np.array([9], dtype=np.int8),
1390-
'c': np.array([258], dtype=np.int16),
1391-
})
1392-
1393-
# default behaviour for 'use_unsigned'
1394-
with tm.assert_produces_warning(
1395-
FutureWarning, check_stacklevel=False):
1396-
out = self.read_csv(StringIO(data), compact_ints=True)
1397-
tm.assert_frame_equal(out, expected)
1398-
1399-
with tm.assert_produces_warning(
1400-
FutureWarning, check_stacklevel=False):
1401-
out = self.read_csv(StringIO(data), compact_ints=True,
1402-
use_unsigned=False)
1403-
tm.assert_frame_equal(out, expected)
1404-
1405-
expected = DataFrame({
1406-
'a': np.array([1], dtype=np.uint8),
1407-
'b': np.array([9], dtype=np.uint8),
1408-
'c': np.array([258], dtype=np.uint16),
1409-
})
1410-
1411-
with tm.assert_produces_warning(
1412-
FutureWarning, check_stacklevel=False):
1413-
out = self.read_csv(StringIO(data), compact_ints=True,
1414-
use_unsigned=True)
1415-
tm.assert_frame_equal(out, expected)
1416-
14171374
def test_memory_map(self):
14181375
mmap_file = os.path.join(self.dirpath, 'test_mmap.csv')
14191376
expected = DataFrame({

pandas/tests/io/parser/test_unsupported.py

+1-9
Original file line numberDiff line numberDiff line change
@@ -128,20 +128,12 @@ def read(self):
128128
class TestDeprecatedFeatures(object):
129129

130130
@pytest.mark.parametrize("engine", ["c", "python"])
131-
@pytest.mark.parametrize("kwargs", [{"compact_ints": True},
132-
{"compact_ints": False},
133-
{"use_unsigned": True},
134-
{"use_unsigned": False},
135-
{"tupleize_cols": True},
131+
@pytest.mark.parametrize("kwargs", [{"tupleize_cols": True},
136132
{"tupleize_cols": False}])
137133
def test_deprecated_args(self, engine, kwargs):
138134
data = "1,2,3"
139135
arg, _ = list(kwargs.items())[0]
140136

141-
if engine == "python" and arg == "buffer_lines":
142-
# unsupported --> exception is raised
143-
return
144-
145137
with tm.assert_produces_warning(
146138
FutureWarning, check_stacklevel=False):
147139
read_csv(StringIO(data), engine=engine, **kwargs)

0 commit comments

Comments
 (0)