Skip to content

Commit 5ba464c

Browse files
committed
Merge remote-tracking branch 'upstream_main_pandas/master' into changes_to_documentation
2 parents f48b2dd + 49bc8d8 commit 5ba464c

File tree

119 files changed

+3029
-2824
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

119 files changed

+3029
-2824
lines changed

asv_bench/benchmarks/frame_methods.py

+13
Original file line numberDiff line numberDiff line change
@@ -619,4 +619,17 @@ def time_select_dtypes(self, n):
619619
self.df.select_dtypes(include="int")
620620

621621

622+
class MemoryUsage:
623+
def setup(self):
624+
self.df = DataFrame(np.random.randn(100000, 2), columns=list("AB"))
625+
self.df2 = self.df.copy()
626+
self.df2["A"] = self.df2["A"].astype("object")
627+
628+
def time_memory_usage(self):
629+
self.df.memory_usage(deep=True)
630+
631+
def time_memory_usage_object_dtype(self):
632+
self.df2.memory_usage(deep=True)
633+
634+
622635
from .pandas_vb_common import setup # noqa: F401 isort:skip

asv_bench/benchmarks/indexing.py

+25
Original file line numberDiff line numberDiff line change
@@ -308,6 +308,31 @@ def time_frame_getitem_single_column_int(self):
308308
self.df_int_col[0]
309309

310310

311+
class IndexSingleRow:
312+
params = [True, False]
313+
param_names = ["unique_cols"]
314+
315+
def setup(self, unique_cols):
316+
arr = np.arange(10 ** 7).reshape(-1, 10)
317+
df = DataFrame(arr)
318+
dtypes = ["u1", "u2", "u4", "u8", "i1", "i2", "i4", "i8", "f8", "f4"]
319+
for i, d in enumerate(dtypes):
320+
df[i] = df[i].astype(d)
321+
322+
if not unique_cols:
323+
# GH#33032 single-row lookups with non-unique columns were
324+
# 15x slower than with unique columns
325+
df.columns = ["A", "A"] + list(df.columns[2:])
326+
327+
self.df = df
328+
329+
def time_iloc_row(self, unique_cols):
330+
self.df.iloc[10000]
331+
332+
def time_loc_row(self, unique_cols):
333+
self.df.loc[10000]
334+
335+
311336
class AssignTimeseriesIndex:
312337
def setup(self):
313338
N = 100000

asv_bench/benchmarks/sparse.py

+2-1
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,8 @@
22
import scipy.sparse
33

44
import pandas as pd
5-
from pandas import MultiIndex, Series, SparseArray, date_range
5+
from pandas import MultiIndex, Series, date_range
6+
from pandas.arrays import SparseArray
67

78

89
def make_array(size, dense_proportion, fill_value, dtype):

asv_bench/benchmarks/timeseries.py

+19-1
Original file line numberDiff line numberDiff line change
@@ -336,15 +336,33 @@ def time_infer_quarter(self):
336336

337337
class ToDatetimeFormat:
338338
def setup(self):
339-
self.s = Series(["19MAY11", "19MAY11:00:00:00"] * 100000)
339+
N = 100000
340+
self.s = Series(["19MAY11", "19MAY11:00:00:00"] * N)
340341
self.s2 = self.s.str.replace(":\\S+$", "")
341342

343+
self.same_offset = ["10/11/2018 00:00:00.045-07:00"] * N
344+
self.diff_offset = [
345+
f"10/11/2018 00:00:00.045-0{offset}:00" for offset in range(10)
346+
] * int(N / 10)
347+
342348
def time_exact(self):
343349
to_datetime(self.s2, format="%d%b%y")
344350

345351
def time_no_exact(self):
346352
to_datetime(self.s, format="%d%b%y", exact=False)
347353

354+
def time_same_offset(self):
355+
to_datetime(self.same_offset, format="%m/%d/%Y %H:%M:%S.%f%z")
356+
357+
def time_different_offset(self):
358+
to_datetime(self.diff_offset, format="%m/%d/%Y %H:%M:%S.%f%z")
359+
360+
def time_same_offset_to_utc(self):
361+
to_datetime(self.same_offset, format="%m/%d/%Y %H:%M:%S.%f%z", utc=True)
362+
363+
def time_different_offset_to_utc(self):
364+
to_datetime(self.diff_offset, format="%m/%d/%Y %H:%M:%S.%f%z", utc=True)
365+
348366

349367
class ToDatetimeCache:
350368

ci/code_checks.sh

+6-2
Original file line numberDiff line numberDiff line change
@@ -279,8 +279,8 @@ if [[ -z "$CHECK" || "$CHECK" == "doctests" ]]; then
279279
pytest -q --doctest-modules pandas/core/groupby/groupby.py -k"-cumcount -describe -pipe"
280280
RET=$(($RET + $?)) ; echo $MSG "DONE"
281281

282-
MSG='Doctests datetimes.py' ; echo $MSG
283-
pytest -q --doctest-modules pandas/core/tools/datetimes.py
282+
MSG='Doctests tools' ; echo $MSG
283+
pytest -q --doctest-modules pandas/core/tools/
284284
RET=$(($RET + $?)) ; echo $MSG "DONE"
285285

286286
MSG='Doctests reshaping functions' ; echo $MSG
@@ -323,6 +323,10 @@ if [[ -z "$CHECK" || "$CHECK" == "doctests" ]]; then
323323
MSG='Doctests tseries' ; echo $MSG
324324
pytest -q --doctest-modules pandas/tseries/
325325
RET=$(($RET + $?)) ; echo $MSG "DONE"
326+
327+
MSG='Doctests computation' ; echo $MSG
328+
pytest -q --doctest-modules pandas/core/computation/
329+
RET=$(($RET + $?)) ; echo $MSG "DONE"
326330
fi
327331

328332
### DOCSTRINGS ###

doc/source/development/contributing_docstring.rst

+1-1
Original file line numberDiff line numberDiff line change
@@ -998,4 +998,4 @@ mapping function names to docstrings. Wherever possible, we prefer using
998998

999999
See ``pandas.core.generic.NDFrame.fillna`` for an example template, and
10001000
``pandas.core.series.Series.fillna`` and ``pandas.core.generic.frame.fillna``
1001-
for the filled versions.
1001+
for the filled versions.

doc/source/development/developer.rst

+1-1
Original file line numberDiff line numberDiff line change
@@ -182,4 +182,4 @@ As an example of fully-formed metadata:
182182
'creator': {
183183
'library': 'pyarrow',
184184
'version': '0.13.0'
185-
}}
185+
}}

doc/source/development/extending.rst

+1-1
Original file line numberDiff line numberDiff line change
@@ -501,4 +501,4 @@ registers the default "matplotlib" backend as follows.
501501
502502
503503
More information on how to implement a third-party plotting backend can be found at
504-
https://github.com/pandas-dev/pandas/blob/master/pandas/plotting/__init__.py#L1.
504+
https://github.com/pandas-dev/pandas/blob/master/pandas/plotting/__init__.py#L1.

doc/source/development/meeting.rst

-1
Original file line numberDiff line numberDiff line change
@@ -29,4 +29,3 @@ You can subscribe to this calendar with the following links:
2929

3030
Additionally, we'll sometimes have one-off meetings on specific topics.
3131
These will be published on the same calendar.
32-

doc/source/getting_started/intro_tutorials/01_table_oriented.rst

+1-1
Original file line numberDiff line numberDiff line change
@@ -26,7 +26,7 @@ documentation.
2626
</li>
2727
</ul>
2828

29-
Pandas data table representation
29+
pandas data table representation
3030
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
3131

3232
.. image:: ../../_static/schemas/01_table_dataframe.svg

doc/source/getting_started/tutorials.rst

+3-3
Original file line numberDiff line numberDiff line change
@@ -3,12 +3,12 @@
33
{{ header }}
44

55
*******************
6-
Community Tutorials
6+
Community tutorials
77
*******************
88

99
This is a guide to many pandas tutorials by the community, geared mainly for new users.
1010

11-
pandas Cookbook by Julia Evans
11+
pandas cookbook by Julia Evans
1212
------------------------------
1313

1414
The goal of this 2015 cookbook (by `Julia Evans <https://jvns.ca>`_) is to
@@ -18,7 +18,7 @@ entails.
1818
For the table of contents, see the `pandas-cookbook GitHub
1919
repository <https://github.com/jvns/pandas-cookbook>`_.
2020

21-
Learn Pandas by Hernan Rojas
21+
Learn pandas by Hernan Rojas
2222
----------------------------
2323

2424
A set of lesson for new pandas users: https://bitbucket.org/hrojas/learn-pandas

doc/source/reference/general_utility_functions.rst

+8
Original file line numberDiff line numberDiff line change
@@ -108,3 +108,11 @@ Scalar introspection
108108
api.types.is_re
109109
api.types.is_re_compilable
110110
api.types.is_scalar
111+
112+
Bug report function
113+
-------------------
114+
.. autosummary::
115+
:toctree: api/
116+
117+
show_versions
118+

doc/source/user_guide/boolean.rst

+1-1
Original file line numberDiff line numberDiff line change
@@ -39,7 +39,7 @@ If you would prefer to keep the ``NA`` values you can manually fill them with ``
3939
4040
.. _boolean.kleene:
4141

42-
Kleene Logical Operations
42+
Kleene logical operations
4343
-------------------------
4444

4545
:class:`arrays.BooleanArray` implements `Kleene Logic`_ (sometimes called three-value logic) for

doc/source/user_guide/options.rst

+1-1
Original file line numberDiff line numberDiff line change
@@ -140,7 +140,7 @@ More information can be found in the `ipython documentation
140140
141141
.. _options.frequently_used:
142142

143-
Frequently Used Options
143+
Frequently used options
144144
-----------------------
145145
The following is a walk-through of the more frequently used display options.
146146

doc/source/user_guide/reshaping.rst

+1-1
Original file line numberDiff line numberDiff line change
@@ -272,7 +272,7 @@ the right thing:
272272
273273
.. _reshaping.melt:
274274

275-
Reshaping by Melt
275+
Reshaping by melt
276276
-----------------
277277

278278
.. image:: ../_static/reshaping_melt.png

doc/source/user_guide/text.rst

+3-3
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,7 @@ Working with text data
88

99
.. _text.types:
1010

11-
Text Data Types
11+
Text data types
1212
---------------
1313

1414
.. versionadded:: 1.0.0
@@ -113,7 +113,7 @@ Everything else that follows in the rest of this document applies equally to
113113

114114
.. _text.string_methods:
115115

116-
String Methods
116+
String methods
117117
--------------
118118

119119
Series and Index are equipped with a set of string processing methods
@@ -633,7 +633,7 @@ same result as a ``Series.str.extractall`` with a default index (starts from 0).
633633
pd.Series(["a1a2", "b1", "c1"], dtype="string").str.extractall(two_groups)
634634
635635
636-
Testing for Strings that match or contain a pattern
636+
Testing for strings that match or contain a pattern
637637
---------------------------------------------------
638638

639639
You can check whether elements contain a pattern:

doc/source/user_guide/timeseries.rst

+2-2
Original file line numberDiff line numberDiff line change
@@ -122,7 +122,7 @@ as ``np.nan`` does for float data.
122122
123123
.. _timeseries.representation:
124124

125-
Timestamps vs. Time Spans
125+
Timestamps vs. time spans
126126
-------------------------
127127

128128
Timestamped data is the most basic type of time series data that associates
@@ -1434,7 +1434,7 @@ or calendars with additional rules.
14341434
14351435
.. _timeseries.advanced_datetime:
14361436

1437-
Time Series-Related Instance Methods
1437+
Time series-related instance methods
14381438
------------------------------------
14391439

14401440
Shifting / lagging

doc/source/user_guide/visualization.rst

+2-2
Original file line numberDiff line numberDiff line change
@@ -796,7 +796,7 @@ before plotting.
796796

797797
.. _visualization.tools:
798798

799-
Plotting Tools
799+
Plotting tools
800800
--------------
801801

802802
These functions can be imported from ``pandas.plotting``
@@ -1045,7 +1045,7 @@ for more information.
10451045
10461046
.. _visualization.formatting:
10471047

1048-
Plot Formatting
1048+
Plot formatting
10491049
---------------
10501050

10511051
Setting the plot style

doc/source/whatsnew/v1.1.0.rst

+20-2
Original file line numberDiff line numberDiff line change
@@ -58,6 +58,22 @@ For example:
5858
5959
For more on working with fold, see :ref:`Fold subsection <timeseries.fold>` in the user guide.
6060

61+
.. _whatsnew_110.to_datetime_multiple_tzname_tzoffset_support:
62+
63+
Parsing timezone-aware format with different timezones in to_datetime
64+
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
65+
66+
:func:`to_datetime` now supports parsing formats containing timezone names (``%Z``) and UTC offsets (``%z``) from different timezones then converting them to UTC by setting ``utc=True``. This would return a :class:`DatetimeIndex` with timezone at UTC as opposed to an :class:`Index` with ``object`` dtype if ``utc=True`` is not set (:issue:`32792`).
67+
68+
For example:
69+
70+
.. ipython:: python
71+
72+
tz_strs = ["2010-01-01 12:00:00 +0100", "2010-01-01 12:00:00 -0100",
73+
"2010-01-01 12:00:00 +0300", "2010-01-01 12:00:00 +0400"]
74+
pd.to_datetime(tz_strs, format='%Y-%m-%d %H:%M:%S %z', utc=True)
75+
pd.to_datetime(tz_strs, format='%Y-%m-%d %H:%M:%S %z')
76+
6177
.. _whatsnew_110.enhancements.other:
6278

6379
Other enhancements
@@ -237,6 +253,7 @@ Deprecations
237253
- Setting values with ``.loc`` using a positional slice is deprecated and will raise in a future version. Use ``.loc`` with labels or ``.iloc`` with positions instead (:issue:`31840`)
238254
- :meth:`DataFrame.to_dict` has deprecated accepting short names for ``orient`` in future versions (:issue:`32515`)
239255
- :meth:`Categorical.to_dense` is deprecated and will be removed in a future version, use ``np.asarray(cat)`` instead (:issue:`32639`)
256+
- The ``fastpath`` keyword in the ``SingleBlockManager`` constructor is deprecated and will be removed in a future version (:issue:`33092`)
240257

241258
.. ---------------------------------------------------------------------------
242259
@@ -296,7 +313,7 @@ Timedelta
296313
Timezones
297314
^^^^^^^^^
298315

299-
-
316+
- Bug in :func:`to_datetime` with ``infer_datetime_format=True`` where timezone names (e.g. ``UTC``) would not be parsed correctly (:issue:`33133`)
300317
-
301318

302319

@@ -389,7 +406,7 @@ I/O
389406
- Bug in :meth:`read_csv` was causing a segfault when there were blank lines between the header and data rows (:issue:`28071`)
390407
- Bug in :meth:`read_csv` was raising a misleading exception on a permissions issue (:issue:`23784`)
391408
- Bug in :meth:`read_csv` was raising an ``IndexError`` when header=None and 2 extra data columns
392-
409+
- Bug in :meth:`DataFrame.to_sql` where an ``AttributeError`` was raised when saving an out of bounds date (:issue:`26761`)
393410

394411
Plotting
395412
^^^^^^^^
@@ -454,6 +471,7 @@ Other
454471
- Fixed bug in :func:`pandas.testing.assert_series_equal` where dtypes were checked for ``Interval`` and ``ExtensionArray`` operands when ``check_dtype`` was ``False`` (:issue:`32747`)
455472
- Bug in :meth:`Series.map` not raising on invalid ``na_action`` (:issue:`32815`)
456473
- Bug in :meth:`DataFrame.__dir__` caused a segfault when using unicode surrogates in a column name (:issue:`25509`)
474+
- Bug in :meth:`DataFrame.plot.scatter` caused an error when plotting variable marker sizes (:issue:`32904`)
457475

458476
.. ---------------------------------------------------------------------------
459477

pandas/_libs/__init__.py

+11-2
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,15 @@
1-
# flake8: noqa
1+
__all__ = [
2+
"NaT",
3+
"NaTType",
4+
"OutOfBoundsDatetime",
5+
"Period",
6+
"Timedelta",
7+
"Timestamp",
8+
"iNaT",
9+
]
210

3-
from .tslibs import (
11+
12+
from pandas._libs.tslibs import (
413
NaT,
514
NaTType,
615
OutOfBoundsDatetime,

pandas/_libs/index.pyx

-1
Original file line numberDiff line numberDiff line change
@@ -479,7 +479,6 @@ cdef class PeriodEngine(Int64Engine):
479479
return scalar.value
480480
if isinstance(scalar, Period):
481481
# NB: we assume that we have the correct freq here.
482-
# TODO: potential optimize by checking for _Period?
483482
return scalar.ordinal
484483
raise TypeError(scalar)
485484

0 commit comments

Comments
 (0)