Skip to content

Commit 2b71592

Browse files
committed
Merge remote-tracking branch 'upstream/master' into bug/categorical-indexing-1row-df
2 parents 39c95f4 + db60ab6 commit 2b71592

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

65 files changed

+626
-876
lines changed

asv_bench/benchmarks/categoricals.py

+27-15
Original file line numberDiff line numberDiff line change
@@ -14,21 +14,6 @@
1414
pass
1515

1616

17-
class Concat:
18-
def setup(self):
19-
N = 10 ** 5
20-
self.s = pd.Series(list("aabbcd") * N).astype("category")
21-
22-
self.a = pd.Categorical(list("aabbcd") * N)
23-
self.b = pd.Categorical(list("bbcdjk") * N)
24-
25-
def time_concat(self):
26-
pd.concat([self.s, self.s])
27-
28-
def time_union(self):
29-
union_categoricals([self.a, self.b])
30-
31-
3217
class Constructor:
3318
def setup(self):
3419
N = 10 ** 5
@@ -77,6 +62,33 @@ def time_existing_series(self):
7762
pd.Categorical(self.series)
7863

7964

65+
class CategoricalOps:
66+
params = ["__lt__", "__le__", "__eq__", "__ne__", "__ge__", "__gt__"]
67+
param_names = ["op"]
68+
69+
def setup(self, op):
70+
N = 10 ** 5
71+
self.cat = pd.Categorical(list("aabbcd") * N, ordered=True)
72+
73+
def time_categorical_op(self, op):
74+
getattr(self.cat, op)("b")
75+
76+
77+
class Concat:
78+
def setup(self):
79+
N = 10 ** 5
80+
self.s = pd.Series(list("aabbcd") * N).astype("category")
81+
82+
self.a = pd.Categorical(list("aabbcd") * N)
83+
self.b = pd.Categorical(list("bbcdjk") * N)
84+
85+
def time_concat(self):
86+
pd.concat([self.s, self.s])
87+
88+
def time_union(self):
89+
union_categoricals([self.a, self.b])
90+
91+
8092
class ValueCounts:
8193

8294
params = [True, False]

ci/azure/posix.yml

+7-10
Original file line numberDiff line numberDiff line change
@@ -44,16 +44,13 @@ jobs:
4444
PATTERN: "not slow and not network"
4545
LOCALE_OVERRIDE: "zh_CN.UTF-8"
4646

47-
# https://github.com/pandas-dev/pandas/issues/29432
48-
# py37_np_dev:
49-
# ENV_FILE: ci/deps/azure-37-numpydev.yaml
50-
# CONDA_PY: "37"
51-
# PATTERN: "not slow and not network"
52-
# TEST_ARGS: "-W error"
53-
# PANDAS_TESTING_MODE: "deprecate"
54-
# EXTRA_APT: "xsel"
55-
# # TODO:
56-
# continueOnError: true
47+
py37_np_dev:
48+
ENV_FILE: ci/deps/azure-37-numpydev.yaml
49+
CONDA_PY: "37"
50+
PATTERN: "not slow and not network"
51+
TEST_ARGS: "-W error"
52+
PANDAS_TESTING_MODE: "deprecate"
53+
EXTRA_APT: "xsel"
5754

5855
steps:
5956
- script: |

ci/deps/azure-macos-36.yaml

+2-2
Original file line numberDiff line numberDiff line change
@@ -20,9 +20,9 @@ dependencies:
2020
- matplotlib=2.2.3
2121
- nomkl
2222
- numexpr
23-
- numpy=1.13.3
23+
- numpy=1.14
2424
- openpyxl
25-
- pyarrow
25+
- pyarrow>=0.12.0
2626
- pytables
2727
- python-dateutil==2.6.1
2828
- pytz

ci/deps/azure-windows-36.yaml

+1-1
Original file line numberDiff line numberDiff line change
@@ -20,7 +20,7 @@ dependencies:
2020
- numexpr
2121
- numpy=1.15.*
2222
- openpyxl
23-
- pyarrow
23+
- pyarrow>=0.12.0
2424
- pytables
2525
- python-dateutil
2626
- pytz

doc/redirects.csv

-1
Original file line numberDiff line numberDiff line change
@@ -828,7 +828,6 @@ generated/pandas.MultiIndex.sortlevel,../reference/api/pandas.MultiIndex.sortlev
828828
generated/pandas.MultiIndex.swaplevel,../reference/api/pandas.MultiIndex.swaplevel
829829
generated/pandas.MultiIndex.to_flat_index,../reference/api/pandas.MultiIndex.to_flat_index
830830
generated/pandas.MultiIndex.to_frame,../reference/api/pandas.MultiIndex.to_frame
831-
generated/pandas.MultiIndex.to_hierarchical,../reference/api/pandas.MultiIndex.to_hierarchical
832831
generated/pandas.notna,../reference/api/pandas.notna
833832
generated/pandas.notnull,../reference/api/pandas.notnull
834833
generated/pandas.option_context,../reference/api/pandas.option_context

doc/source/getting_started/install.rst

+1-1
Original file line numberDiff line numberDiff line change
@@ -258,7 +258,7 @@ matplotlib 2.2.2 Visualization
258258
openpyxl 2.4.8 Reading / writing for xlsx files
259259
pandas-gbq 0.8.0 Google Big Query access
260260
psycopg2 PostgreSQL engine for sqlalchemy
261-
pyarrow 0.9.0 Parquet and feather reading / writing
261+
pyarrow 0.12.0 Parquet and feather reading / writing
262262
pymysql 0.7.11 MySQL engine for sqlalchemy
263263
pyreadstat SPSS files (.sav) reading
264264
pytables 3.4.2 HDF5 reading / writing

doc/source/reference/indexing.rst

-1
Original file line numberDiff line numberDiff line change
@@ -305,7 +305,6 @@ MultiIndex components
305305

306306
MultiIndex.set_levels
307307
MultiIndex.set_codes
308-
MultiIndex.to_hierarchical
309308
MultiIndex.to_flat_index
310309
MultiIndex.to_frame
311310
MultiIndex.is_lexsorted

doc/source/user_guide/scale.rst

+3-3
Original file line numberDiff line numberDiff line change
@@ -93,9 +93,9 @@ Use efficient datatypes
9393
-----------------------
9494

9595
The default pandas data types are not the most memory efficient. This is
96-
especially true for high-cardinality text data (columns with relatively few
97-
unique values). By using more efficient data types you can store larger datasets
98-
in memory.
96+
especially true for text data columns with relatively few unique values (commonly
97+
referred to as "low-cardinality" data). By using more efficient data types you
98+
can store larger datasets in memory.
9999

100100
.. ipython:: python
101101

doc/source/whatsnew/v1.0.0.rst

+68-52
Original file line numberDiff line numberDiff line change
@@ -265,62 +265,62 @@ The following methods now also correctly output values for unobserved categories
265265
Increased minimum versions for dependencies
266266
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
267267

268-
Some minimum supported versions of dependencies were updated (:issue:`29723`).
268+
Some minimum supported versions of dependencies were updated (:issue:`29766`, :issue:`29723`).
269269
If installed, we now require:
270270

271-
+-----------------+-----------------+----------+
272-
| Package | Minimum Version | Required |
273-
+=================+=================+==========+
274-
| numpy | 1.13.3 | X |
275-
+-----------------+-----------------+----------+
276-
| pytz | 2015.4 | X |
277-
+-----------------+-----------------+----------+
278-
| python-dateutil | 2.6.1 | X |
279-
+-----------------+-----------------+----------+
280-
| bottleneck | 1.2.1 | |
281-
+-----------------+-----------------+----------+
282-
| numexpr | 2.6.2 | |
283-
+-----------------+-----------------+----------+
284-
| pytest (dev) | 4.0.2 | |
285-
+-----------------+-----------------+----------+
271+
+-----------------+-----------------+----------+---------+
272+
| Package | Minimum Version | Required | Changed |
273+
+=================+=================+==========+=========+
274+
| numpy | 1.13.3 | X | |
275+
+-----------------+-----------------+----------+---------+
276+
| pytz | 2015.4 | X | |
277+
+-----------------+-----------------+----------+---------+
278+
| python-dateutil | 2.6.1 | X | |
279+
+-----------------+-----------------+----------+---------+
280+
| bottleneck | 1.2.1 | | |
281+
+-----------------+-----------------+----------+---------+
282+
| numexpr | 2.6.2 | | |
283+
+-----------------+-----------------+----------+---------+
284+
| pytest (dev) | 4.0.2 | | |
285+
+-----------------+-----------------+----------+---------+
286286

287287
For `optional libraries <https://dev.pandas.io/docs/install.html#dependencies>`_ the general recommendation is to use the latest version.
288288
The following table lists the lowest version per library that is currently being tested throughout the development of pandas.
289289
Optional libraries below the lowest tested version may still work, but are not considered supported.
290290

291-
+-----------------+-----------------+
292-
| Package | Minimum Version |
293-
+=================+=================+
294-
| beautifulsoup4 | 4.6.0 |
295-
+-----------------+-----------------+
296-
| fastparquet | 0.3.2 |
297-
+-----------------+-----------------+
298-
| gcsfs | 0.2.2 |
299-
+-----------------+-----------------+
300-
| lxml | 3.8.0 |
301-
+-----------------+-----------------+
302-
| matplotlib | 2.2.2 |
303-
+-----------------+-----------------+
304-
| openpyxl | 2.4.8 |
305-
+-----------------+-----------------+
306-
| pyarrow | 0.9.0 |
307-
+-----------------+-----------------+
308-
| pymysql | 0.7.1 |
309-
+-----------------+-----------------+
310-
| pytables | 3.4.2 |
311-
+-----------------+-----------------+
312-
| scipy | 0.19.0 |
313-
+-----------------+-----------------+
314-
| sqlalchemy | 1.1.4 |
315-
+-----------------+-----------------+
316-
| xarray | 0.8.2 |
317-
+-----------------+-----------------+
318-
| xlrd | 1.1.0 |
319-
+-----------------+-----------------+
320-
| xlsxwriter | 0.9.8 |
321-
+-----------------+-----------------+
322-
| xlwt | 1.2.0 |
323-
+-----------------+-----------------+
291+
+-----------------+-----------------+---------+
292+
| Package | Minimum Version | Changed |
293+
+=================+=================+=========+
294+
| beautifulsoup4 | 4.6.0 | |
295+
+-----------------+-----------------+---------+
296+
| fastparquet | 0.3.2 | X |
297+
+-----------------+-----------------+---------+
298+
| gcsfs | 0.2.2 | |
299+
+-----------------+-----------------+---------+
300+
| lxml | 3.8.0 | |
301+
+-----------------+-----------------+---------+
302+
| matplotlib | 2.2.2 | |
303+
+-----------------+-----------------+---------+
304+
| openpyxl | 2.4.8 | |
305+
+-----------------+-----------------+---------+
306+
| pyarrow | 0.12.0 | X |
307+
+-----------------+-----------------+---------+
308+
| pymysql | 0.7.1 | |
309+
+-----------------+-----------------+---------+
310+
| pytables | 3.4.2 | |
311+
+-----------------+-----------------+---------+
312+
| scipy | 0.19.0 | |
313+
+-----------------+-----------------+---------+
314+
| sqlalchemy | 1.1.4 | |
315+
+-----------------+-----------------+---------+
316+
| xarray | 0.8.2 | |
317+
+-----------------+-----------------+---------+
318+
| xlrd | 1.1.0 | |
319+
+-----------------+-----------------+---------+
320+
| xlsxwriter | 0.9.8 | |
321+
+-----------------+-----------------+---------+
322+
| xlwt | 1.2.0 | |
323+
+-----------------+-----------------+---------+
324324

325325
See :ref:`install.dependencies` and :ref:`install.optional_dependencies` for more.
326326

@@ -364,7 +364,7 @@ Deprecations
364364
value in ``idx`` of ``idx_val`` and a new value of ``val``, ``idx.set_value(arr, idx_val, val)``
365365
is equivalent to ``arr[idx.get_loc(idx_val)] = val``, which should be used instead (:issue:`28621`).
366366
- :func:`is_extension_type` is deprecated, :func:`is_extension_array_dtype` should be used instead (:issue:`29457`)
367-
367+
- :func:`eval` keyword argument "truediv" is deprecated and will be removed in a future version (:issue:`29812`)
368368

369369
.. _whatsnew_1000.prior_deprecations:
370370

@@ -401,10 +401,12 @@ or ``matplotlib.Axes.plot``. See :ref:`plotting.formatters` for more.
401401

402402
**Other removals**
403403

404+
- Floordiv of integer-dtyped array by :class:`Timedelta` now raises ``TypeError`` (:issue:`21036`)
404405
- Removed the previously deprecated :meth:`Index.summary` (:issue:`18217`)
405406
- Removed the previously deprecated :meth:`Series.get_value`, :meth:`Series.set_value`, :meth:`DataFrame.get_value`, :meth:`DataFrame.set_value` (:issue:`17739`)
406407
- Changed the the default value of `inplace` in :meth:`DataFrame.set_index` and :meth:`Series.set_axis`. It now defaults to False (:issue:`27600`)
407408
- Removed support for nested renaming in :meth:`DataFrame.aggregate`, :meth:`Series.aggregate`, :meth:`DataFrameGroupBy.aggregate`, :meth:`SeriesGroupBy.aggregate`, :meth:`Rolling.aggregate` (:issue:`18529`)
409+
- A tuple passed to :meth:`DataFrame.groupby` is now exclusively treated as a single key (:issue:`18314`)
408410
- Removed :meth:`Series.from_array` (:issue:`18258`)
409411
- Removed :meth:`DataFrame.from_items` (:issue:`18458`)
410412
- Removed :meth:`DataFrame.as_matrix`, :meth:`Series.as_matrix` (:issue:`18458`)
@@ -415,6 +417,11 @@ or ``matplotlib.Axes.plot``. See :ref:`plotting.formatters` for more.
415417
- :func:`core.internals.blocks.make_block` no longer accepts the "fastpath" keyword(:issue:`19265`)
416418
- :meth:`Block.make_block_same_class` no longer accepts the "dtype" keyword(:issue:`19434`)
417419
- Removed the previously deprecated :meth:`ExtensionArray._formatting_values`. Use :attr:`ExtensionArray._formatter` instead. (:issue:`23601`)
420+
- Removed the previously deprecated :meth:`MultiIndex.to_hierarchical` (:issue:`21613`)
421+
- Removed the previously deprecated :attr:`MultiIndex.labels`, use :attr:`MultiIndex.codes` instead (:issue:`23752`)
422+
- Removed the previously deprecated "labels" keyword from the :class:`MultiIndex` constructor, use "codes" instead (:issue:`23752`)
423+
- Removed the previously deprecated :meth:`MultiIndex.set_labels`, use :meth:`MultiIndex.set_codes` instead (:issue:`23752`)
424+
- Removed the previously deprecated "labels" keyword from :meth:`MultiIndex.set_codes`, :meth:`MultiIndex.copy`, :meth:`MultiIndex.drop`, use "codes" instead (:issue:`23752`)
418425
- Removed support for legacy HDF5 formats (:issue:`29787`)
419426
- :func:`read_excel` removed support for "skip_footer" argument, use "skipfooter" instead (:issue:`18836`)
420427
- :func:`read_excel` no longer allows an integer value for the parameter ``usecols``, instead pass a list of integers from 0 to ``usecols`` inclusive (:issue:`23635`)
@@ -434,11 +441,17 @@ or ``matplotlib.Axes.plot``. See :ref:`plotting.formatters` for more.
434441
- Removed the previously deprecated :meth:`DataFrame.get_ftype_counts`, :meth:`Series.get_ftype_counts` (:issue:`18243`)
435442
- Removed the previously deprecated :meth:`Index.get_duplicated`, use ``idx[idx.duplicated()].unique()`` instead (:issue:`20239`)
436443
- Removed the previously deprecated :meth:`Series.clip_upper`, :meth:`Series.clip_lower`, :meth:`DataFrame.clip_upper`, :meth:`DataFrame.clip_lower` (:issue:`24203`)
444+
- Removed the ability to alter :attr:`DatetimeIndex.freq`, :attr:`TimedeltaIndex.freq`, or :attr:`PeriodIndex.freq` (:issue:`20772`)
445+
- Removed the previously deprecated :attr:`DatetimeIndex.offset` (:issue:`20730`)
446+
- Removed the previously deprecated :meth:`DatetimeIndex.asobject`, :meth:`TimedeltaIndex.asobject`, :meth:`PeriodIndex.asobject`, use ``astype(object)`` instead (:issue:`29801`)
437447
- Removed previously deprecated "order" argument from :func:`factorize` (:issue:`19751`)
438448
- Removed previously deprecated "v" argument from :meth:`FrozenNDarray.searchsorted`, use "value" instead (:issue:`22672`)
439449
- :func:`read_stata` and :meth:`DataFrame.to_stata` no longer supports the "encoding" argument (:issue:`21400`)
450+
- In :func:`concat` the default value for ``sort`` has been changed from ``None`` to ``False`` (:issue:`20613`)
440451
- Removed previously deprecated "raise_conflict" argument from :meth:`DataFrame.update`, use "errors" instead (:issue:`23585`)
441452
- Removed previously deprecated keyword "n" from :meth:`DatetimeIndex.shift`, :meth:`TimedeltaIndex.shift`, :meth:`PeriodIndex.shift`, use "periods" instead (:issue:`22458`)
453+
- Changed the default value for the `raw` argument in :func:`Series.rolling().apply() <pandas.core.window.Rolling.apply>`, :func:`DataFrame.rolling().apply() <pandas.core.window.Rolling.apply>`,
454+
- :func:`Series.expanding().apply() <pandas.core.window.Expanding.apply>`, and :func:`DataFrame.expanding().apply() <pandas.core.window.Expanding.apply>` to ``False`` (:issue:`20584`)
442455
-
443456

444457
.. _whatsnew_1000.performance:
@@ -453,7 +466,9 @@ Performance improvements
453466
- Performance improvement in :meth:`DataFrame.replace` when provided a list of values to replace (:issue:`28099`)
454467
- Performance improvement in :meth:`DataFrame.select_dtypes` by using vectorization instead of iterating over a loop (:issue:`28317`)
455468
- Performance improvement in :meth:`Categorical.searchsorted` and :meth:`CategoricalIndex.searchsorted` (:issue:`28795`)
456-
- Performance improvement when comparing a :meth:`Categorical` with a scalar and the scalar is not found in the categories (:issue:`29750`)
469+
- Performance improvement when comparing a :class:`Categorical` with a scalar and the scalar is not found in the categories (:issue:`29750`)
470+
- Performance improvement when checking if values in a :class:`Categorical` are equal, equal or larger or larger than a given scalar.
471+
The improvement is not present if checking if the :class:`Categorical` is less than or less than or equal than the scalar (:issue:`29820`)
457472

458473
.. _whatsnew_1000.bug_fixes:
459474

@@ -549,6 +564,7 @@ Indexing
549564
- Bug in :meth:`Float64Index.astype` where ``np.inf`` was not handled properly when casting to an integer dtype (:issue:`28475`)
550565
- :meth:`Index.union` could fail when the left contained duplicates (:issue:`28257`)
551566
- :meth:`Index.get_indexer_non_unique` could fail with `TypeError` in some cases, such as when searching for ints in a string index (:issue:`28257`)
567+
- Bug in :meth:`Float64Index.get_loc` incorrectly raising ``TypeError`` instead of ``KeyError`` (:issue:`29189`)
552568
- Bug in :meth:`DataFrame.loc` with incorrect dtype when setting Categorical value in 1-row DataFrame (:issue:`25495`)
553569

554570
Missing
@@ -664,4 +680,4 @@ Other
664680
.. _whatsnew_1000.contributors:
665681

666682
Contributors
667-
~~~~~~~~~~~~
683+
~~~~~~~~~~~~

pandas/_libs/index.pyx

+6-2
Original file line numberDiff line numberDiff line change
@@ -141,8 +141,12 @@ cdef class IndexEngine:
141141

142142
if self.is_monotonic_increasing:
143143
values = self._get_index_values()
144-
left = values.searchsorted(val, side='left')
145-
right = values.searchsorted(val, side='right')
144+
try:
145+
left = values.searchsorted(val, side='left')
146+
right = values.searchsorted(val, side='right')
147+
except TypeError:
148+
# e.g. GH#29189 get_loc(None) with a Float64Index
149+
raise KeyError(val)
146150

147151
diff = right - left
148152
if diff == 0:

pandas/_libs/tslibs/period.pyx

+1-1
Original file line numberDiff line numberDiff line change
@@ -2201,7 +2201,7 @@ cdef class _Period:
22012201
return self.days_in_month
22022202

22032203
@property
2204-
def is_leap_year(self):
2204+
def is_leap_year(self) -> bool:
22052205
return bool(is_leapyear(self.year))
22062206

22072207
@classmethod

pandas/_libs/tslibs/timedeltas.pyx

+2-12
Original file line numberDiff line numberDiff line change
@@ -1509,18 +1509,8 @@ class Timedelta(_Timedelta):
15091509
if other.dtype.kind == 'm':
15101510
# also timedelta-like
15111511
return _broadcast_floordiv_td64(self.value, other, _rfloordiv)
1512-
elif other.dtype.kind == 'i':
1513-
# Backwards compatibility
1514-
# GH-19761
1515-
msg = textwrap.dedent("""\
1516-
Floor division between integer array and Timedelta is
1517-
deprecated. Use 'array // timedelta.value' instead.
1518-
If you want to obtain epochs from an array of timestamps,
1519-
you can rather use
1520-
'(array - pd.Timestamp("1970-01-01")) // pd.Timedelta("1s")'.
1521-
""")
1522-
warnings.warn(msg, FutureWarning)
1523-
return other // self.value
1512+
1513+
# Includes integer array // Timedelta, deprecated in GH#19761
15241514
raise TypeError(f'Invalid dtype {other.dtype} for __floordiv__')
15251515

15261516
elif is_float_object(other) and util.is_nan(other):

0 commit comments

Comments
 (0)