Skip to content

Commit 72fe4fc

Browse files
committed
Merge remote-tracking branch 'upstream/master' into disown-tz-only-rebased
2 parents 0b570b1 + 08c920e commit 72fe4fc

File tree

102 files changed

+1251
-7252
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

102 files changed

+1251
-7252
lines changed

asv_bench/benchmarks/frame_methods.py

+62-1
Original file line numberDiff line numberDiff line change
@@ -103,6 +103,7 @@ def setup(self):
103103
self.df2 = DataFrame(np.random.randn(N * 50, 10))
104104
self.df3 = DataFrame(np.random.randn(N, 5 * N),
105105
columns=['C' + str(c) for c in range(N * 5)])
106+
self.df4 = DataFrame(np.random.randn(N * 1000, 10))
106107

107108
def time_iteritems(self):
108109
# (monitor no-copying behaviour)
@@ -119,10 +120,70 @@ def time_iteritems_indexing(self):
119120
for col in self.df3:
120121
self.df3[col]
121122

123+
def time_itertuples_start(self):
124+
self.df4.itertuples()
125+
126+
def time_itertuples_read_first(self):
127+
next(self.df4.itertuples())
128+
122129
def time_itertuples(self):
123-
for row in self.df2.itertuples():
130+
for row in self.df4.itertuples():
131+
pass
132+
133+
def time_itertuples_to_list(self):
134+
list(self.df4.itertuples())
135+
136+
def mem_itertuples_start(self):
137+
return self.df4.itertuples()
138+
139+
def peakmem_itertuples_start(self):
140+
self.df4.itertuples()
141+
142+
def mem_itertuples_read_first(self):
143+
return next(self.df4.itertuples())
144+
145+
def peakmem_itertuples(self):
146+
for row in self.df4.itertuples():
147+
pass
148+
149+
def mem_itertuples_to_list(self):
150+
return list(self.df4.itertuples())
151+
152+
def peakmem_itertuples_to_list(self):
153+
list(self.df4.itertuples())
154+
155+
def time_itertuples_raw_start(self):
156+
self.df4.itertuples(index=False, name=None)
157+
158+
def time_itertuples_raw_read_first(self):
159+
next(self.df4.itertuples(index=False, name=None))
160+
161+
def time_itertuples_raw_tuples(self):
162+
for row in self.df4.itertuples(index=False, name=None):
124163
pass
125164

165+
def time_itertuples_raw_tuples_to_list(self):
166+
list(self.df4.itertuples(index=False, name=None))
167+
168+
def mem_itertuples_raw_start(self):
169+
return self.df4.itertuples(index=False, name=None)
170+
171+
def peakmem_itertuples_raw_start(self):
172+
self.df4.itertuples(index=False, name=None)
173+
174+
def peakmem_itertuples_raw_read_first(self):
175+
next(self.df4.itertuples(index=False, name=None))
176+
177+
def peakmem_itertuples_raw(self):
178+
for row in self.df4.itertuples(index=False, name=None):
179+
pass
180+
181+
def mem_itertuples_raw_to_list(self):
182+
return list(self.df4.itertuples(index=False, name=None))
183+
184+
def peakmem_itertuples_raw_to_list(self):
185+
list(self.df4.itertuples(index=False, name=None))
186+
126187
def time_iterrows(self):
127188
for row in self.df.iterrows():
128189
pass

azure-pipelines.yml

+1
Original file line numberDiff line numberDiff line change
@@ -43,6 +43,7 @@ jobs:
4343
ci/incremental/install_miniconda.sh
4444
ci/incremental/setup_conda_environment.sh
4545
displayName: 'Set up environment'
46+
condition: true
4647
4748
# Do not require pandas
4849
- script: |

ci/code_checks.sh

+6-1
Original file line numberDiff line numberDiff line change
@@ -158,7 +158,12 @@ if [[ -z "$CHECK" || "$CHECK" == "patterns" ]]; then
158158
# RET=$(($RET + $?)) ; echo $MSG "DONE"
159159

160160
MSG='Check that no file in the repo contains tailing whitespaces' ; echo $MSG
161-
invgrep --exclude="*.svg" -RI "\s$" *
161+
set -o pipefail
162+
if [[ "$AZURE" == "true" ]]; then
163+
! grep -n --exclude="*.svg" -RI "\s$" * | awk -F ":" '{print "##vso[task.logissue type=error;sourcepath=" $1 ";linenumber=" $2 ";] Tailing whitespaces found: " $3}'
164+
else
165+
! grep -n --exclude="*.svg" -RI "\s$" * | awk -F ":" '{print $1 ":" $2 ":Tailing whitespaces found: " $3}'
166+
fi
162167
RET=$(($RET + $?)) ; echo $MSG "DONE"
163168
fi
164169

doc/source/advanced.rst

+8-7
Original file line numberDiff line numberDiff line change
@@ -778,12 +778,12 @@ a ``Categorical`` will return a ``CategoricalIndex``, indexed according to the c
778778
of the **passed** ``Categorical`` dtype. This allows one to arbitrarily index these even with
779779
values **not** in the categories, similarly to how you can reindex **any** pandas index.
780780

781-
.. ipython :: python
781+
.. ipython:: python
782782
783-
df2.reindex(['a','e'])
784-
df2.reindex(['a','e']).index
785-
df2.reindex(pd.Categorical(['a','e'],categories=list('abcde')))
786-
df2.reindex(pd.Categorical(['a','e'],categories=list('abcde'))).index
783+
df2.reindex(['a', 'e'])
784+
df2.reindex(['a', 'e']).index
785+
df2.reindex(pd.Categorical(['a', 'e'], categories=list('abcde')))
786+
df2.reindex(pd.Categorical(['a', 'e'], categories=list('abcde'))).index
787787
788788
.. warning::
789789

@@ -1040,7 +1040,8 @@ than integer locations. Therefore, with an integer axis index *only*
10401040
label-based indexing is possible with the standard tools like ``.loc``. The
10411041
following code will generate exceptions:
10421042

1043-
.. code-block:: python
1043+
.. ipython:: python
1044+
:okexcept:
10441045
10451046
s = pd.Series(range(5))
10461047
s[-1]
@@ -1130,7 +1131,7 @@ index can be somewhat complicated. For example, the following does not work:
11301131

11311132
::
11321133

1133-
s.loc['c':'e'+1]
1134+
s.loc['c':'e' + 1]
11341135

11351136
A very common use case is to limit a time series to start and end at two
11361137
specific dates. To enable this, we made the design to make label-based

doc/source/basics.rst

+1-3
Original file line numberDiff line numberDiff line change
@@ -374,9 +374,7 @@ To evaluate single-element pandas objects in a boolean context, use the method
374374
375375
>>> df and df2
376376
377-
These will both raise errors, as you are trying to compare multiple values.
378-
379-
.. code-block:: python-traceback
377+
These will both raise errors, as you are trying to compare multiple values.::
380378

381379
ValueError: The truth value of an array is ambiguous. Use a.empty, a.any() or a.all().
382380

doc/source/categorical.rst

+4-7
Original file line numberDiff line numberDiff line change
@@ -977,21 +977,17 @@ categorical (categories and ordering). So if you read back the CSV file you have
977977
relevant columns back to `category` and assign the right categories and categories ordering.
978978

979979
.. ipython:: python
980-
:suppress:
981980
982-
983-
.. ipython:: python
984-
985-
from pandas.compat import StringIO
981+
import io
986982
s = pd.Series(pd.Categorical(['a', 'b', 'b', 'a', 'a', 'd']))
987983
# rename the categories
988984
s.cat.categories = ["very good", "good", "bad"]
989985
# reorder the categories and add missing categories
990986
s = s.cat.set_categories(["very bad", "bad", "medium", "good", "very good"])
991987
df = pd.DataFrame({"cats": s, "vals": [1, 2, 3, 4, 5, 6]})
992-
csv = StringIO()
988+
csv = io.StringIO()
993989
df.to_csv(csv)
994-
df2 = pd.read_csv(StringIO(csv.getvalue()))
990+
df2 = pd.read_csv(io.StringIO(csv.getvalue()))
995991
df2.dtypes
996992
df2["cats"]
997993
# Redo the category
@@ -1206,6 +1202,7 @@ Use ``copy=True`` to prevent such a behaviour or simply don't reuse ``Categorica
12061202
cat
12071203
12081204
.. note::
1205+
12091206
This also happens in some cases when you supply a NumPy array instead of a ``Categorical``:
12101207
using an int array (e.g. ``np.array([1,2,3,4])``) will exhibit the same behavior, while using
12111208
a string array (e.g. ``np.array(["a","b","c","a"])``) will not.

doc/source/conf.py

+4-1
Original file line numberDiff line numberDiff line change
@@ -296,7 +296,10 @@
296296
np.random.seed(123456)
297297
np.set_printoptions(precision=4, suppress=True)
298298
pd.options.display.max_rows = 15
299-
"""
299+
300+
import os
301+
os.chdir('{}')
302+
""".format(os.path.dirname(os.path.dirname(__file__)))
300303

301304

302305
html_context = {

doc/source/cookbook.rst

+1-2
Original file line numberDiff line numberDiff line change
@@ -1236,7 +1236,7 @@ the following Python code will read the binary file ``'binary.dat'`` into a
12361236
pandas ``DataFrame``, where each element of the struct corresponds to a column
12371237
in the frame:
12381238

1239-
.. code-block:: python
1239+
.. ipython:: python
12401240
12411241
names = 'count', 'avg', 'scale'
12421242
@@ -1399,7 +1399,6 @@ of the data values:
13991399

14001400
.. ipython:: python
14011401
1402-
14031402
def expand_grid(data_dict):
14041403
rows = itertools.product(*data_dict.values())
14051404
return pd.DataFrame.from_records(rows, columns=data_dict.keys())

doc/source/gotchas.rst

+1-3
Original file line numberDiff line numberDiff line change
@@ -301,9 +301,7 @@ Byte-Ordering Issues
301301
--------------------
302302
Occasionally you may have to deal with data that were created on a machine with
303303
a different byte order than the one on which you are running Python. A common
304-
symptom of this issue is an error like:
305-
306-
.. code-block:: python-traceback
304+
symptom of this issue is an error like:::
307305

308306
Traceback
309307
...

doc/source/io.rst

+4-1
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,8 @@
11
.. _io:
22

3+
.. currentmodule:: pandas
4+
5+
36
{{ header }}
47

58
.. ipython:: python
@@ -4876,7 +4879,7 @@ below and the SQLAlchemy `documentation <https://docs.sqlalchemy.org/en/latest/c
48764879
48774880
If you want to manage your own connections you can pass one of those instead:
48784881

4879-
.. code-block:: python
4882+
.. ipython:: python
48804883
48814884
with engine.connect() as conn, conn.begin():
48824885
data = pd.read_sql_table('data', conn)

doc/source/merging.rst

+2
Original file line numberDiff line numberDiff line change
@@ -1122,6 +1122,8 @@ This is equivalent but less verbose and more memory efficient / faster than this
11221122
labels=['left', 'right'], vertical=False);
11231123
plt.close('all');
11241124
1125+
.. _merging.join_with_two_multi_indexes:
1126+
11251127
Joining with two MultiIndexes
11261128
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
11271129

0 commit comments

Comments
 (0)