Skip to content

Commit 6a7d7df

Browse files
committed
Merge branch 'master' into feature/groupby-repr-ellipses-1135
2 parents 0746c3b + aa08416 commit 6a7d7df

File tree

237 files changed

+5795
-7933
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

237 files changed

+5795
-7933
lines changed

asv_bench/benchmarks/__init__.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
"""Pandas benchmarks."""

asv_bench/benchmarks/io/hdf.py

Lines changed: 1 addition & 28 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,5 @@
1-
import warnings
2-
31
import numpy as np
4-
from pandas import DataFrame, Panel, date_range, HDFStore, read_hdf
2+
from pandas import DataFrame, date_range, HDFStore, read_hdf
53
import pandas.util.testing as tm
64

75
from ..pandas_vb_common import BaseIO
@@ -99,31 +97,6 @@ def time_store_info(self):
9997
self.store.info()
10098

10199

102-
class HDFStorePanel(BaseIO):
103-
104-
def setup(self):
105-
self.fname = '__test__.h5'
106-
with warnings.catch_warnings(record=True):
107-
self.p = Panel(np.random.randn(20, 1000, 25),
108-
items=['Item%03d' % i for i in range(20)],
109-
major_axis=date_range('1/1/2000', periods=1000),
110-
minor_axis=['E%03d' % i for i in range(25)])
111-
self.store = HDFStore(self.fname)
112-
self.store.append('p1', self.p)
113-
114-
def teardown(self):
115-
self.store.close()
116-
self.remove(self.fname)
117-
118-
def time_read_store_table_panel(self):
119-
with warnings.catch_warnings(record=True):
120-
self.store.select('p1')
121-
122-
def time_write_store_table_panel(self):
123-
with warnings.catch_warnings(record=True):
124-
self.store.append('p2', self.p)
125-
126-
127100
class HDF(BaseIO):
128101

129102
params = ['table', 'fixed']

asv_bench/benchmarks/series_methods.py

Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -124,6 +124,25 @@ def time_dropna(self, dtype):
124124
self.s.dropna()
125125

126126

127+
class SearchSorted(object):
128+
129+
goal_time = 0.2
130+
params = ['int8', 'int16', 'int32', 'int64',
131+
'uint8', 'uint16', 'uint32', 'uint64',
132+
'float16', 'float32', 'float64',
133+
'str']
134+
param_names = ['dtype']
135+
136+
def setup(self, dtype):
137+
N = 10**5
138+
data = np.array([1] * N + [2] * N + [3] * N).astype(dtype)
139+
self.s = Series(data)
140+
141+
def time_searchsorted(self, dtype):
142+
key = '2' if dtype == 'str' else 2
143+
self.s.searchsorted(key)
144+
145+
127146
class Map(object):
128147

129148
params = ['dict', 'Series']

ci/code_checks.sh

Lines changed: 6 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -93,7 +93,7 @@ if [[ -z "$CHECK" || "$CHECK" == "lint" ]]; then
9393
# this particular codebase (e.g. src/headers, src/klib, src/msgpack). However,
9494
# we can lint all header files since they aren't "generated" like C files are.
9595
MSG='Linting .c and .h' ; echo $MSG
96-
cpplint --quiet --extensions=c,h --headers=h --recursive --filter=-readability/casting,-runtime/int,-build/include_subdir pandas/_libs/src/*.h pandas/_libs/src/parser pandas/_libs/ujson pandas/_libs/tslibs/src/datetime
96+
cpplint --quiet --extensions=c,h --headers=h --recursive --filter=-readability/casting,-runtime/int,-build/include_subdir pandas/_libs/src/*.h pandas/_libs/src/parser pandas/_libs/ujson pandas/_libs/tslibs/src/datetime pandas/io/msgpack pandas/_libs/*.cpp pandas/util
9797
RET=$(($RET + $?)) ; echo $MSG "DONE"
9898

9999
echo "isort --version-number"
@@ -174,9 +174,10 @@ if [[ -z "$CHECK" || "$CHECK" == "patterns" ]]; then
174174
MSG='Check that no file in the repo contains tailing whitespaces' ; echo $MSG
175175
set -o pipefail
176176
if [[ "$AZURE" == "true" ]]; then
177-
! grep -n --exclude="*.svg" -RI "\s$" * | awk -F ":" '{print "##vso[task.logissue type=error;sourcepath=" $1 ";linenumber=" $2 ";] Tailing whitespaces found: " $3}'
177+
# we exclude all c/cpp files as the c/cpp files of pandas code base are tested when Linting .c and .h files
178+
! grep -n '--exclude=*.'{svg,c,cpp,html} -RI "\s$" * | awk -F ":" '{print "##vso[task.logissue type=error;sourcepath=" $1 ";linenumber=" $2 ";] Tailing whitespaces found: " $3}'
178179
else
179-
! grep -n --exclude="*.svg" -RI "\s$" * | awk -F ":" '{print $1 ":" $2 ":Tailing whitespaces found: " $3}'
180+
! grep -n '--exclude=*.'{svg,c,cpp,html} -RI "\s$" * | awk -F ":" '{print $1 ":" $2 ":Tailing whitespaces found: " $3}'
180181
fi
181182
RET=$(($RET + $?)) ; echo $MSG "DONE"
182183
fi
@@ -240,8 +241,8 @@ fi
240241
### DOCSTRINGS ###
241242
if [[ -z "$CHECK" || "$CHECK" == "docstrings" ]]; then
242243

243-
MSG='Validate docstrings (GL06, GL07, GL09, SS04, PR03, PR05, EX04, RT04, SS05)' ; echo $MSG
244-
$BASE_DIR/scripts/validate_docstrings.py --format=azure --errors=GL06,GL07,GL09,SS04,PR03,PR05,EX04,RT04,SS05
244+
MSG='Validate docstrings (GL06, GL07, GL09, SS04, PR03, PR05, PR10, EX04, RT04, RT05, SS05, SA05)' ; echo $MSG
245+
$BASE_DIR/scripts/validate_docstrings.py --format=azure --errors=GL06,GL07,GL09,SS04,PR03,PR04,PR05,EX04,RT04,RT05,SS05,SA05
245246
RET=$(($RET + $?)) ; echo $MSG "DONE"
246247

247248
fi

ci/deps/azure-27-compat.yaml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,7 @@ dependencies:
2020
# universal
2121
- pytest
2222
- pytest-xdist
23+
- pytest-mock
2324
- pip:
2425
- html5lib==1.0b2
2526
- beautifulsoup4==4.2.1

ci/deps/azure-27-locale.yaml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,7 @@ dependencies:
2222
# universal
2323
- pytest
2424
- pytest-xdist
25+
- pytest-mock
2526
- hypothesis>=3.58.0
2627
- pip:
2728
- html5lib==1.0b2

ci/deps/azure-36-locale_slow.yaml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -28,6 +28,7 @@ dependencies:
2828
# universal
2929
- pytest
3030
- pytest-xdist
31+
- pytest-mock
3132
- moto
3233
- pip:
3334
- hypothesis>=3.58.0

ci/deps/azure-37-locale.yaml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -27,6 +27,7 @@ dependencies:
2727
# universal
2828
- pytest
2929
- pytest-xdist
30+
- pytest-mock
3031
- pip:
3132
- hypothesis>=3.58.0
3233
- moto # latest moto in conda-forge fails with 3.7, move to conda dependencies when this is fixed

ci/deps/azure-37-numpydev.yaml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,7 @@ dependencies:
88
# universal
99
- pytest
1010
- pytest-xdist
11+
- pytest-mock
1112
- hypothesis>=3.58.0
1213
- pip:
1314
- "git+git://github.com/dateutil/dateutil.git"

ci/deps/azure-macos-35.yaml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,7 @@ dependencies:
2424
# universal
2525
- pytest
2626
- pytest-xdist
27+
- pytest-mock
2728
- pip:
2829
- python-dateutil==2.5.3
2930
- hypothesis>=3.58.0

ci/deps/azure-windows-27.yaml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -27,5 +27,6 @@ dependencies:
2727
- cython>=0.28.2
2828
- pytest
2929
- pytest-xdist
30+
- pytest-mock
3031
- moto
3132
- hypothesis>=3.58.0

ci/deps/azure-windows-36.yaml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -25,4 +25,5 @@ dependencies:
2525
- cython>=0.28.2
2626
- pytest
2727
- pytest-xdist
28+
- pytest-mock
2829
- hypothesis>=3.58.0

ci/deps/travis-27.yaml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -41,6 +41,7 @@ dependencies:
4141
# universal
4242
- pytest
4343
- pytest-xdist
44+
- pytest-mock
4445
- moto==1.3.4
4546
- hypothesis>=3.58.0
4647
- pip:

ci/deps/travis-36-locale.yaml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -30,6 +30,7 @@ dependencies:
3030
# universal
3131
- pytest
3232
- pytest-xdist
33+
- pytest-mock
3334
- moto
3435
- pip:
3536
- hypothesis>=3.58.0

ci/deps/travis-36-slow.yaml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -27,5 +27,6 @@ dependencies:
2727
# universal
2828
- pytest
2929
- pytest-xdist
30+
- pytest-mock
3031
- moto
3132
- hypothesis>=3.58.0

ci/deps/travis-36.yaml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -36,6 +36,7 @@ dependencies:
3636
- pytest
3737
- pytest-xdist
3838
- pytest-cov
39+
- pytest-mock
3940
- hypothesis>=3.58.0
4041
- pip:
4142
- brotlipy

ci/deps/travis-37.yaml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,7 @@ dependencies:
1414
- pytz
1515
- pytest
1616
- pytest-xdist
17+
- pytest-mock
1718
- hypothesis>=3.58.0
1819
- s3fs
1920
- pip:

doc/cheatsheet/Pandas_Cheat_Sheet.pdf

6.61 KB
Binary file not shown.
13 Bytes
Binary file not shown.
209 KB
Binary file not shown.
5.93 KB
Binary file not shown.

doc/source/conf.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -98,9 +98,9 @@
9898
if (fname == 'index.rst'
9999
and os.path.abspath(dirname) == source_path):
100100
continue
101-
elif pattern == '-api' and dirname == 'api':
101+
elif pattern == '-api' and dirname == 'reference':
102102
exclude_patterns.append(fname)
103-
elif fname != pattern:
103+
elif pattern != '-api' and fname != pattern:
104104
exclude_patterns.append(fname)
105105

106106
with open(os.path.join(source_path, 'index.rst.template')) as f:

doc/source/development/contributing.rst

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -54,7 +54,7 @@ Bug reports must:
5454
...
5555
```
5656

57-
#. Include the full version string of *pandas* and its dependencies. You can use the built in function::
57+
#. Include the full version string of *pandas* and its dependencies. You can use the built-in function::
5858

5959
>>> import pandas as pd
6060
>>> pd.show_versions()
@@ -211,7 +211,7 @@ See the full conda docs `here <http://conda.pydata.org/docs>`__.
211211
Creating a Python Environment (pip)
212212
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
213213

214-
If you aren't using conda for you development environment, follow these instructions.
214+
If you aren't using conda for your development environment, follow these instructions.
215215
You'll need to have at least python3.5 installed on your system.
216216

217217
.. code-block:: none
@@ -484,7 +484,7 @@ contributing them to the project::
484484

485485
./ci/code_checks.sh
486486

487-
The script verify the linting of code files, it looks for common mistake patterns
487+
The script verifies the linting of code files, it looks for common mistake patterns
488488
(like missing spaces around sphinx directives that make the documentation not
489489
being rendered properly) and it also validates the doctests. It is possible to
490490
run the checks independently by using the parameters ``lint``, ``patterns`` and
@@ -675,7 +675,7 @@ Otherwise, you need to do it manually:
675675
676676
You'll also need to
677677

678-
1. write a new test that asserts a warning is issued when calling with the deprecated argument
678+
1. Write a new test that asserts a warning is issued when calling with the deprecated argument
679679
2. Update all of pandas existing tests and code to use the new argument
680680

681681
See :ref:`contributing.warnings` for more.

doc/source/development/extending.rst

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -33,8 +33,9 @@ decorate a class, providing the name of attribute to add. The class's
3333
3434
@staticmethod
3535
def _validate(obj):
36-
if 'lat' not in obj.columns or 'lon' not in obj.columns:
37-
raise AttributeError("Must have 'lat' and 'lon'.")
36+
# verify there is a column latitude and a column longitude
37+
if 'latitude' not in obj.columns or 'longitude' not in obj.columns:
38+
raise AttributeError("Must have 'latitude' and 'longitude'.")
3839
3940
@property
4041
def center(self):

doc/source/getting_started/basics.rst

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -505,7 +505,7 @@ So, for instance, to reproduce :meth:`~DataFrame.combine_first` as above:
505505
.. ipython:: python
506506
507507
def combiner(x, y):
508-
np.where(pd.isna(x), y, x)
508+
return np.where(pd.isna(x), y, x)
509509
df1.combine(df2, combiner)
510510
511511
.. _basics.stats:

doc/source/reference/arrays.rst

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -120,6 +120,7 @@ Methods
120120
Timestamp.timetuple
121121
Timestamp.timetz
122122
Timestamp.to_datetime64
123+
Timestamp.to_numpy
123124
Timestamp.to_julian_date
124125
Timestamp.to_period
125126
Timestamp.to_pydatetime
@@ -191,6 +192,7 @@ Methods
191192
Timedelta.round
192193
Timedelta.to_pytimedelta
193194
Timedelta.to_timedelta64
195+
Timedelta.to_numpy
194196
Timedelta.total_seconds
195197

196198
A collection of timedeltas may be stored in a :class:`TimedeltaArray`.

doc/source/styled.xlsx

5.55 KB
Binary file not shown.

doc/source/user_guide/groupby.rst

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1317,7 +1317,7 @@ arbitrary function, for example:
13171317
df.groupby(['Store', 'Product']).pipe(mean)
13181318
13191319
where ``mean`` takes a GroupBy object and finds the mean of the Revenue and Quantity
1320-
columns repectively for each Store-Product combination. The ``mean`` function can
1320+
columns respectively for each Store-Product combination. The ``mean`` function can
13211321
be any function that takes in a GroupBy object; the ``.pipe`` will pass the GroupBy
13221322
object as a parameter into the function you specify.
13231323

doc/source/user_guide/missing_data.rst

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -335,7 +335,7 @@ examined :ref:`in the API <api.dataframe.missing>`.
335335
Interpolation
336336
~~~~~~~~~~~~~
337337

338-
.. versionadded:: 0.21.0
338+
.. versionadded:: 0.23.0
339339

340340
The ``limit_area`` keyword argument was added.
341341

doc/source/user_guide/timeseries.rst

Lines changed: 34 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -321,6 +321,15 @@ which can be specified. These are computed from the starting point specified by
321321
pd.to_datetime([1349720105100, 1349720105200, 1349720105300,
322322
1349720105400, 1349720105500], unit='ms')
323323
324+
Constructing a :class:`Timestamp` or :class:`DatetimeIndex` with an epoch timestamp
325+
with the ``tz`` argument specified will localize the epoch timestamps to UTC
326+
first then convert the result to the specified time zone.
327+
328+
.. ipython:: python
329+
330+
pd.Timestamp(1262347200000000000, tz='US/Pacific')
331+
pd.DatetimeIndex([1262347200000000000], tz='US/Pacific')
332+
324333
.. note::
325334

326335
Epoch times will be rounded to the nearest nanosecond.
@@ -624,6 +633,16 @@ We are stopping on the included end-point as it is part of the index:
624633
dft2 = dft2.swaplevel(0, 1).sort_index()
625634
dft2.loc[idx[:, '2013-01-05'], :]
626635
636+
.. versionadded:: 0.25.0
637+
638+
Slicing with string indexing also honors UTC offset.
639+
640+
.. ipython:: python
641+
642+
df = pd.DataFrame([0], index=pd.DatetimeIndex(['2019-01-01'], tz='US/Pacific'))
643+
df
644+
df['2019-01-01 12:00:00+04:00':'2019-01-01 13:00:00+04:00']
645+
627646
.. _timeseries.slice_vs_exact_match:
628647

629648
Slice vs. Exact Match
@@ -2205,6 +2224,21 @@ you can use the ``tz_convert`` method.
22052224
22062225
rng_pytz.tz_convert('US/Eastern')
22072226
2227+
.. note::
2228+
2229+
When using ``pytz`` time zones, :class:`DatetimeIndex` will construct a different
2230+
time zone object than a :class:`Timestamp` for the same time zone input. A :class:`DatetimeIndex`
2231+
can hold a collection of :class:`Timestamp` objects that may have different UTC offsets and cannot be
2232+
succinctly represented by one ``pytz`` time zone instance while one :class:`Timestamp`
2233+
represents one point in time with a specific UTC offset.
2234+
2235+
.. ipython:: python
2236+
2237+
dti = pd.date_range('2019-01-01', periods=3, freq='D', tz='US/Pacific')
2238+
dti.tz
2239+
ts = pd.Timestamp('2019-01-01', tz='US/Pacific')
2240+
ts.tz
2241+
22082242
.. warning::
22092243

22102244
Be wary of conversions between libraries. For some time zones, ``pytz`` and ``dateutil`` have different

doc/source/whatsnew/v0.10.0.rst

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -370,7 +370,7 @@ Updated PyTables Support
370370
df1.get_dtype_counts()
371371
372372
- performance improvements on table writing
373-
- support for arbitrarly indexed dimensions
373+
- support for arbitrarily indexed dimensions
374374
- ``SparseSeries`` now has a ``density`` property (:issue:`2384`)
375375
- enable ``Series.str.strip/lstrip/rstrip`` methods to take an input argument
376376
to strip arbitrary characters (:issue:`2411`)

doc/source/whatsnew/v0.16.1.rst

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -136,7 +136,7 @@ groupby operations on the index will preserve the index nature as well
136136
reindexing operations, will return a resulting index based on the type of the passed
137137
indexer, meaning that passing a list will return a plain-old-``Index``; indexing with
138138
a ``Categorical`` will return a ``CategoricalIndex``, indexed according to the categories
139-
of the PASSED ``Categorical`` dtype. This allows one to arbitrarly index these even with
139+
of the PASSED ``Categorical`` dtype. This allows one to arbitrarily index these even with
140140
values NOT in the categories, similarly to how you can reindex ANY pandas index.
141141

142142
.. code-block:: ipython

0 commit comments

Comments
 (0)