Skip to content

Commit fc91118

Browse files
committed
Merge branch 'master' into doc-fix-flake8-issue-in-groupby.rst-pandas-dev#24178
2 parents b80ff17 + 08c920e commit fc91118

File tree

166 files changed

+6496
-11352
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

166 files changed

+6496
-11352
lines changed

asv_bench/benchmarks/frame_methods.py

+62-1
Original file line numberDiff line numberDiff line change
@@ -103,6 +103,7 @@ def setup(self):
103103
self.df2 = DataFrame(np.random.randn(N * 50, 10))
104104
self.df3 = DataFrame(np.random.randn(N, 5 * N),
105105
columns=['C' + str(c) for c in range(N * 5)])
106+
self.df4 = DataFrame(np.random.randn(N * 1000, 10))
106107

107108
def time_iteritems(self):
108109
# (monitor no-copying behaviour)
@@ -119,10 +120,70 @@ def time_iteritems_indexing(self):
119120
for col in self.df3:
120121
self.df3[col]
121122

123+
def time_itertuples_start(self):
124+
self.df4.itertuples()
125+
126+
def time_itertuples_read_first(self):
127+
next(self.df4.itertuples())
128+
122129
def time_itertuples(self):
123-
for row in self.df2.itertuples():
130+
for row in self.df4.itertuples():
131+
pass
132+
133+
def time_itertuples_to_list(self):
134+
list(self.df4.itertuples())
135+
136+
def mem_itertuples_start(self):
137+
return self.df4.itertuples()
138+
139+
def peakmem_itertuples_start(self):
140+
self.df4.itertuples()
141+
142+
def mem_itertuples_read_first(self):
143+
return next(self.df4.itertuples())
144+
145+
def peakmem_itertuples(self):
146+
for row in self.df4.itertuples():
147+
pass
148+
149+
def mem_itertuples_to_list(self):
150+
return list(self.df4.itertuples())
151+
152+
def peakmem_itertuples_to_list(self):
153+
list(self.df4.itertuples())
154+
155+
def time_itertuples_raw_start(self):
156+
self.df4.itertuples(index=False, name=None)
157+
158+
def time_itertuples_raw_read_first(self):
159+
next(self.df4.itertuples(index=False, name=None))
160+
161+
def time_itertuples_raw_tuples(self):
162+
for row in self.df4.itertuples(index=False, name=None):
124163
pass
125164

165+
def time_itertuples_raw_tuples_to_list(self):
166+
list(self.df4.itertuples(index=False, name=None))
167+
168+
def mem_itertuples_raw_start(self):
169+
return self.df4.itertuples(index=False, name=None)
170+
171+
def peakmem_itertuples_raw_start(self):
172+
self.df4.itertuples(index=False, name=None)
173+
174+
def peakmem_itertuples_raw_read_first(self):
175+
next(self.df4.itertuples(index=False, name=None))
176+
177+
def peakmem_itertuples_raw(self):
178+
for row in self.df4.itertuples(index=False, name=None):
179+
pass
180+
181+
def mem_itertuples_raw_to_list(self):
182+
return list(self.df4.itertuples(index=False, name=None))
183+
184+
def peakmem_itertuples_raw_to_list(self):
185+
list(self.df4.itertuples(index=False, name=None))
186+
126187
def time_iterrows(self):
127188
for row in self.df.iterrows():
128189
pass

asv_bench/benchmarks/join_merge.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -50,7 +50,7 @@ def setup(self, axis):
5050
self.empty_right = [df, DataFrame()]
5151

5252
def time_concat_series(self, axis):
53-
concat(self.series, axis=axis)
53+
concat(self.series, axis=axis, sort=False)
5454

5555
def time_concat_small_frames(self, axis):
5656
concat(self.small_frames, axis=axis)

asv_bench/benchmarks/panel_ctor.py

+6-6
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
import warnings
22
from datetime import datetime, timedelta
33

4-
from pandas import DataFrame, Panel, DatetimeIndex, date_range
4+
from pandas import DataFrame, Panel, date_range
55

66

77
class DifferentIndexes(object):
@@ -23,9 +23,9 @@ def time_from_dict(self):
2323
class SameIndexes(object):
2424

2525
def setup(self):
26-
idx = DatetimeIndex(start=datetime(1990, 1, 1),
27-
end=datetime(2012, 1, 1),
28-
freq='D')
26+
idx = date_range(start=datetime(1990, 1, 1),
27+
end=datetime(2012, 1, 1),
28+
freq='D')
2929
df = DataFrame({'a': 0, 'b': 1, 'c': 2}, index=idx)
3030
self.data_frames = dict(enumerate([df] * 100))
3131

@@ -40,10 +40,10 @@ def setup(self):
4040
start = datetime(1990, 1, 1)
4141
end = datetime(2012, 1, 1)
4242
df1 = DataFrame({'a': 0, 'b': 1, 'c': 2},
43-
index=DatetimeIndex(start=start, end=end, freq='D'))
43+
index=date_range(start=start, end=end, freq='D'))
4444
end += timedelta(days=1)
4545
df2 = DataFrame({'a': 0, 'b': 1, 'c': 2},
46-
index=DatetimeIndex(start=start, end=end, freq='D'))
46+
index=date_range(start=start, end=end, freq='D'))
4747
dfs = [df1] * 50 + [df2] * 50
4848
self.data_frames = dict(enumerate(dfs))
4949

asv_bench/benchmarks/reindex.py

+2-2
Original file line numberDiff line numberDiff line change
@@ -1,14 +1,14 @@
11
import numpy as np
22
import pandas.util.testing as tm
3-
from pandas import (DataFrame, Series, DatetimeIndex, MultiIndex, Index,
3+
from pandas import (DataFrame, Series, MultiIndex, Index,
44
date_range)
55
from .pandas_vb_common import lib
66

77

88
class Reindex(object):
99

1010
def setup(self):
11-
rng = DatetimeIndex(start='1/1/1970', periods=10000, freq='1min')
11+
rng = date_range(start='1/1/1970', periods=10000, freq='1min')
1212
self.df = DataFrame(np.random.rand(10000, 10), index=rng,
1313
columns=range(10))
1414
self.df['foo'] = 'bar'

asv_bench/benchmarks/timedelta.py

+5-4
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,9 @@
11
import datetime
22

33
import numpy as np
4-
from pandas import Series, timedelta_range, to_timedelta, Timestamp, \
5-
Timedelta, TimedeltaIndex, DataFrame
4+
5+
from pandas import (
6+
DataFrame, Series, Timedelta, Timestamp, timedelta_range, to_timedelta)
67

78

89
class TimedeltaConstructor(object):
@@ -122,8 +123,8 @@ def time_timedelta_nanoseconds(self, series):
122123
class TimedeltaIndexing(object):
123124

124125
def setup(self):
125-
self.index = TimedeltaIndex(start='1985', periods=1000, freq='D')
126-
self.index2 = TimedeltaIndex(start='1986', periods=1000, freq='D')
126+
self.index = timedelta_range(start='1985', periods=1000, freq='D')
127+
self.index2 = timedelta_range(start='1986', periods=1000, freq='D')
127128
self.series = Series(range(1000), index=self.index)
128129
self.timedelta = self.index[500]
129130

asv_bench/benchmarks/timestamp.py

+4-3
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,9 @@
11
import datetime
22

3-
from pandas import Timestamp
4-
import pytz
53
import dateutil
4+
import pytz
5+
6+
from pandas import Timestamp
67

78

89
class TimestampConstruction(object):
@@ -46,7 +47,7 @@ def time_dayofweek(self, tz, freq):
4647
self.ts.dayofweek
4748

4849
def time_weekday_name(self, tz, freq):
49-
self.ts.weekday_name
50+
self.ts.day_name
5051

5152
def time_dayofyear(self, tz, freq):
5253
self.ts.dayofyear

azure-pipelines.yml

+1
Original file line numberDiff line numberDiff line change
@@ -43,6 +43,7 @@ jobs:
4343
ci/incremental/install_miniconda.sh
4444
ci/incremental/setup_conda_environment.sh
4545
displayName: 'Set up environment'
46+
condition: true
4647
4748
# Do not require pandas
4849
- script: |

ci/code_checks.sh

+7-2
Original file line numberDiff line numberDiff line change
@@ -145,7 +145,7 @@ if [[ -z "$CHECK" || "$CHECK" == "patterns" ]]; then
145145
RET=$(($RET + $?)) ; echo $MSG "DONE"
146146

147147
MSG='Check that the deprecated `assert_raises_regex` is not used (`pytest.raises(match=pattern)` should be used instead)' ; echo $MSG
148-
invgrep -R --exclude=*.pyc --exclude=testing.py --exclude=test_testing.py assert_raises_regex pandas
148+
invgrep -R --exclude=*.pyc --exclude=testing.py --exclude=test_util.py assert_raises_regex pandas
149149
RET=$(($RET + $?)) ; echo $MSG "DONE"
150150

151151
# Check that we use pytest.raises only as a context manager
@@ -158,7 +158,12 @@ if [[ -z "$CHECK" || "$CHECK" == "patterns" ]]; then
158158
# RET=$(($RET + $?)) ; echo $MSG "DONE"
159159

160160
MSG='Check that no file in the repo contains tailing whitespaces' ; echo $MSG
161-
invgrep --exclude="*.svg" -RI "\s$" *
161+
set -o pipefail
162+
if [[ "$AZURE" == "true" ]]; then
163+
! grep -n --exclude="*.svg" -RI "\s$" * | awk -F ":" '{print "##vso[task.logissue type=error;sourcepath=" $1 ";linenumber=" $2 ";] Tailing whitespaces found: " $3}'
164+
else
165+
! grep -n --exclude="*.svg" -RI "\s$" * | awk -F ":" '{print $1 ":" $2 ":Tailing whitespaces found: " $3}'
166+
fi
162167
RET=$(($RET + $?)) ; echo $MSG "DONE"
163168
fi
164169

doc/source/advanced.rst

+8-7
Original file line numberDiff line numberDiff line change
@@ -778,12 +778,12 @@ a ``Categorical`` will return a ``CategoricalIndex``, indexed according to the c
778778
of the **passed** ``Categorical`` dtype. This allows one to arbitrarily index these even with
779779
values **not** in the categories, similarly to how you can reindex **any** pandas index.
780780

781-
.. ipython :: python
781+
.. ipython:: python
782782
783-
df2.reindex(['a','e'])
784-
df2.reindex(['a','e']).index
785-
df2.reindex(pd.Categorical(['a','e'],categories=list('abcde')))
786-
df2.reindex(pd.Categorical(['a','e'],categories=list('abcde'))).index
783+
df2.reindex(['a', 'e'])
784+
df2.reindex(['a', 'e']).index
785+
df2.reindex(pd.Categorical(['a', 'e'], categories=list('abcde')))
786+
df2.reindex(pd.Categorical(['a', 'e'], categories=list('abcde'))).index
787787
788788
.. warning::
789789

@@ -1040,7 +1040,8 @@ than integer locations. Therefore, with an integer axis index *only*
10401040
label-based indexing is possible with the standard tools like ``.loc``. The
10411041
following code will generate exceptions:
10421042

1043-
.. code-block:: python
1043+
.. ipython:: python
1044+
:okexcept:
10441045
10451046
s = pd.Series(range(5))
10461047
s[-1]
@@ -1130,7 +1131,7 @@ index can be somewhat complicated. For example, the following does not work:
11301131

11311132
::
11321133

1133-
s.loc['c':'e'+1]
1134+
s.loc['c':'e' + 1]
11341135

11351136
A very common use case is to limit a time series to start and end at two
11361137
specific dates. To enable this, we made the design to make label-based

doc/source/basics.rst

+1-3
Original file line numberDiff line numberDiff line change
@@ -374,9 +374,7 @@ To evaluate single-element pandas objects in a boolean context, use the method
374374
375375
>>> df and df2
376376
377-
These will both raise errors, as you are trying to compare multiple values.
378-
379-
.. code-block:: python-traceback
377+
These will both raise errors, as you are trying to compare multiple values.::
380378

381379
ValueError: The truth value of an array is ambiguous. Use a.empty, a.any() or a.all().
382380

doc/source/categorical.rst

+6-8
Original file line numberDiff line numberDiff line change
@@ -977,21 +977,17 @@ categorical (categories and ordering). So if you read back the CSV file you have
977977
relevant columns back to `category` and assign the right categories and categories ordering.
978978

979979
.. ipython:: python
980-
:suppress:
981980
982-
983-
.. ipython:: python
984-
985-
from pandas.compat import StringIO
981+
import io
986982
s = pd.Series(pd.Categorical(['a', 'b', 'b', 'a', 'a', 'd']))
987983
# rename the categories
988984
s.cat.categories = ["very good", "good", "bad"]
989985
# reorder the categories and add missing categories
990986
s = s.cat.set_categories(["very bad", "bad", "medium", "good", "very good"])
991987
df = pd.DataFrame({"cats": s, "vals": [1, 2, 3, 4, 5, 6]})
992-
csv = StringIO()
988+
csv = io.StringIO()
993989
df.to_csv(csv)
994-
df2 = pd.read_csv(StringIO(csv.getvalue()))
990+
df2 = pd.read_csv(io.StringIO(csv.getvalue()))
995991
df2.dtypes
996992
df2["cats"]
997993
# Redo the category
@@ -1145,7 +1141,8 @@ dtype in apply
11451141

11461142
Pandas currently does not preserve the dtype in apply functions: If you apply along rows you get
11471143
a `Series` of ``object`` `dtype` (same as getting a row -> getting one element will return a
1148-
basic type) and applying along columns will also convert to object.
1144+
basic type) and applying along columns will also convert to object. ``NaN`` values are unaffected.
1145+
You can use ``fillna`` to handle missing values before applying a function.
11491146

11501147
.. ipython:: python
11511148
@@ -1205,6 +1202,7 @@ Use ``copy=True`` to prevent such a behaviour or simply don't reuse ``Categorica
12051202
cat
12061203
12071204
.. note::
1205+
12081206
This also happens in some cases when you supply a NumPy array instead of a ``Categorical``:
12091207
using an int array (e.g. ``np.array([1,2,3,4])``) will exhibit the same behavior, while using
12101208
a string array (e.g. ``np.array(["a","b","c","a"])``) will not.

doc/source/conf.py

+4-1
Original file line numberDiff line numberDiff line change
@@ -296,7 +296,10 @@
296296
np.random.seed(123456)
297297
np.set_printoptions(precision=4, suppress=True)
298298
pd.options.display.max_rows = 15
299-
"""
299+
300+
import os
301+
os.chdir('{}')
302+
""".format(os.path.dirname(os.path.dirname(__file__)))
300303

301304

302305
html_context = {

doc/source/contributing.rst

+9-5
Original file line numberDiff line numberDiff line change
@@ -125,21 +125,25 @@ requires a C compiler and Python environment. If you're making documentation
125125
changes, you can skip to :ref:`contributing.documentation` but you won't be able
126126
to build the documentation locally before pushing your changes.
127127

128-
.. _contributiong.dev_c:
128+
.. _contributing.dev_c:
129129

130130
Installing a C Compiler
131131
~~~~~~~~~~~~~~~~~~~~~~~
132132

133133
Pandas uses C extensions (mostly written using Cython) to speed up certain
134134
operations. To install pandas from source, you need to compile these C
135135
extensions, which means you need a C compiler. This process depends on which
136-
platform you're using. Follow the `CPython contributing guidelines
137-
<https://docs.python.org/devguide/setup.html#build-dependencies>`_ for getting a
136+
platform you're using. Follow the `CPython contributing guide
137+
<https://devguide.python.org/setup/#compile-and-build>`_ for getting a
138138
compiler installed. You don't need to do any of the ``./configure`` or ``make``
139139
steps; you only need to install the compiler.
140140

141-
For Windows developers, the following links may be helpful.
141+
For Windows developers, when using Python 3.5 and later, it is sufficient to
142+
install `Visual Studio 2017 <https://visualstudio.com/>`_ with the
143+
**Python development workload** and the **Python native development tools**
144+
option. Otherwise, the following links may be helpful.
142145

146+
* https://blogs.msdn.microsoft.com/pythonengineering/2017/03/07/python-support-in-vs2017/
143147
* https://blogs.msdn.microsoft.com/pythonengineering/2016/04/11/unable-to-find-vcvarsall-bat/
144148
* https://github.com/conda/conda-recipes/wiki/Building-from-Source-on-Windows-32-bit-and-64-bit
145149
* https://cowboyprogrammer.org/building-python-wheels-for-windows/
@@ -149,7 +153,7 @@ For Windows developers, the following links may be helpful.
149153
Let us know if you have any difficulties by opening an issue or reaching out on
150154
`Gitter`_.
151155

152-
.. _contributiong.dev_python:
156+
.. _contributing.dev_python:
153157

154158
Creating a Python Environment
155159
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~

doc/source/cookbook.rst

+1-2
Original file line numberDiff line numberDiff line change
@@ -1236,7 +1236,7 @@ the following Python code will read the binary file ``'binary.dat'`` into a
12361236
pandas ``DataFrame``, where each element of the struct corresponds to a column
12371237
in the frame:
12381238

1239-
.. code-block:: python
1239+
.. ipython:: python
12401240
12411241
names = 'count', 'avg', 'scale'
12421242
@@ -1399,7 +1399,6 @@ of the data values:
13991399

14001400
.. ipython:: python
14011401
1402-
14031402
def expand_grid(data_dict):
14041403
rows = itertools.product(*data_dict.values())
14051404
return pd.DataFrame.from_records(rows, columns=data_dict.keys())

doc/source/gotchas.rst

+1-3
Original file line numberDiff line numberDiff line change
@@ -301,9 +301,7 @@ Byte-Ordering Issues
301301
--------------------
302302
Occasionally you may have to deal with data that were created on a machine with
303303
a different byte order than the one on which you are running Python. A common
304-
symptom of this issue is an error like:
305-
306-
.. code-block:: python-traceback
304+
symptom of this issue is an error like:::
307305

308306
Traceback
309307
...

0 commit comments

Comments
 (0)