Skip to content

Commit a7f3815

Browse files
author
tp
committed
solve merge conflict
2 parents da4b494 + 37086a0 commit a7f3815

File tree

115 files changed

+7113
-6041
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

115 files changed

+7113
-6041
lines changed

.gitignore

+1
Original file line numberDiff line numberDiff line change
@@ -106,3 +106,4 @@ doc/build/html/index.html
106106
doc/tmp.sv
107107
doc/source/styled.xlsx
108108
doc/source/templates/
109+
env/

asv_bench/benchmarks/frame_ctor.py

+19-72
Original file line numberDiff line numberDiff line change
@@ -2,8 +2,9 @@
22
import pandas.util.testing as tm
33
from pandas import DataFrame, Series, MultiIndex, Timestamp, date_range
44
try:
5-
from pandas.tseries import offsets
6-
except:
5+
from pandas.tseries.offsets import Nano, Hour
6+
except ImportError:
7+
# For compatability with older versions
78
from pandas.core.datetools import * # noqa
89

910
from .pandas_vb_common import setup # noqa
@@ -24,16 +25,16 @@ def setup(self):
2425
self.data2 = {i: {j: float(j) for j in range(100)}
2526
for i in range(2000)}
2627

27-
def time_frame_ctor_list_of_dict(self):
28+
def time_list_of_dict(self):
2829
DataFrame(self.dict_list)
2930

30-
def time_frame_ctor_nested_dict(self):
31+
def time_nested_dict(self):
3132
DataFrame(self.data)
3233

33-
def time_series_ctor_from_dict(self):
34+
def time_dict(self):
3435
Series(self.some_dict)
3536

36-
def time_frame_ctor_nested_dict_int64(self):
37+
def time_nested_dict_int64(self):
3738
# nested dict, integer indexes, regression described in #621
3839
DataFrame(self.data2)
3940

@@ -46,78 +47,24 @@ def setup(self):
4647
mi = MultiIndex.from_product([range(100), range(100)])
4748
self.s = Series(np.random.randn(10000), index=mi)
4849

49-
def time_frame_from_mi_series(self):
50+
def time_mi_series(self):
5051
DataFrame(self.s)
5152

52-
# ----------------------------------------------------------------------
53-
# From dict with DatetimeIndex with all offsets
5453

55-
# dynamically generate benchmarks for every offset
56-
#
57-
# get_period_count & get_index_for_offset are there because blindly taking each
58-
# offset times 1000 can easily go out of Timestamp bounds and raise errors.
54+
class FromDictwithTimestamp(object):
5955

56+
goal_time = 0.2
57+
params = [Nano(1), Hour(1)]
58+
param_names = ['offset']
6059

61-
def get_period_count(start_date, off):
62-
ten_offsets_in_days = ((start_date + (off * 10)) - start_date).days
63-
if (ten_offsets_in_days == 0):
64-
return 1000
65-
else:
66-
periods = 9 * (Timestamp.max - start_date).days // ten_offsets_in_days
67-
return min(periods, 1000)
68-
69-
70-
def get_index_for_offset(off):
71-
start_date = Timestamp('1/1/1900')
72-
return date_range(start_date,
73-
periods=get_period_count(start_date, off),
74-
freq=off)
75-
76-
77-
all_offsets = offsets.__all__
78-
# extra cases
79-
for off in ['FY5253', 'FY5253Quarter']:
80-
all_offsets.pop(all_offsets.index(off))
81-
all_offsets.extend([off + '_1', off + '_2'])
82-
83-
84-
class FromDictwithTimestampOffsets(object):
85-
86-
params = [all_offsets, [1, 2]]
87-
param_names = ['offset', 'n_steps']
88-
89-
offset_kwargs = {'WeekOfMonth': {'weekday': 1, 'week': 1},
90-
'LastWeekOfMonth': {'weekday': 1, 'week': 1},
91-
'FY5253': {'startingMonth': 1, 'weekday': 1},
92-
'FY5253Quarter': {'qtr_with_extra_week': 1,
93-
'startingMonth': 1,
94-
'weekday': 1}}
95-
96-
offset_extra_cases = {'FY5253': {'variation': ['nearest', 'last']},
97-
'FY5253Quarter': {'variation': ['nearest', 'last']}}
98-
99-
def setup(self, offset, n_steps):
60+
def setup(self, offset):
61+
N = 10**3
10062
np.random.seed(1234)
101-
extra = False
102-
if offset.endswith("_", None, -1):
103-
extra = int(offset[-1])
104-
offset = offset[:-2]
105-
106-
kwargs = {}
107-
if offset in self.offset_kwargs:
108-
kwargs = self.offset_kwargs[offset]
109-
110-
if extra:
111-
extras = self.offset_extra_cases[offset]
112-
for extra_arg in extras:
113-
kwargs[extra_arg] = extras[extra_arg][extra - 1]
114-
115-
offset = getattr(offsets, offset)
116-
self.idx = get_index_for_offset(offset(n_steps, **kwargs))
117-
self.df = DataFrame(np.random.randn(len(self.idx), 10), index=self.idx)
118-
self.d = self.df.to_dict()
119-
120-
def time_frame_ctor(self, offset, n_steps):
63+
idx = date_range(Timestamp('1/1/1900'), freq=offset, periods=N)
64+
df = DataFrame(np.random.randn(N, 10), index=idx)
65+
self.d = df.to_dict()
66+
67+
def time_dict_with_timestamp_offsets(self, offset):
12168
DataFrame(self.d)
12269

12370

asv_bench/benchmarks/timedelta.py

+6-4
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,7 @@
1-
from .pandas_vb_common import *
2-
from pandas import to_timedelta, Timestamp
1+
import numpy as np
2+
import pandas as pd
3+
4+
from pandas import to_timedelta, Timestamp, Timedelta
35

46

57
class ToTimedelta(object):
@@ -67,8 +69,8 @@ class DatetimeAccessor(object):
6769
def setup(self):
6870
self.N = 100000
6971
self.series = pd.Series(
70-
pd.timedelta_range('1 days', periods=self.N, freq='h')
71-
)
72+
pd.timedelta_range('1 days', periods=self.N, freq='h'))
73+
7274
def time_dt_accessor(self):
7375
self.series.dt
7476

asv_bench/benchmarks/timeseries.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -85,7 +85,7 @@ def time_dti_tz_factorize(self):
8585
self.dti_tz.factorize()
8686

8787
def time_dti_time(self):
88-
self.rng.time
88+
self.dst_rng.time
8989

9090
def time_timestamp_tzinfo_cons(self):
9191
self.rng5[0]

asv_bench/benchmarks/timestamp.py

+23-5
Original file line numberDiff line numberDiff line change
@@ -1,15 +1,33 @@
1-
from pandas import to_timedelta, Timestamp
1+
from pandas import Timestamp
22
import pytz
33
import datetime
44

55

6+
class TimestampConstruction(object):
7+
# TODO: classmethod constructors: fromordinal, fromtimestamp...
8+
9+
def time_parse_iso8601_no_tz(self):
10+
Timestamp('2017-08-25 08:16:14')
11+
12+
def time_parse_iso8601_tz(self):
13+
Timestamp('2017-08-25 08:16:14-0500')
14+
15+
def time_parse_dateutil(self):
16+
Timestamp('2017/08/25 08:16:14 AM')
17+
18+
def time_parse_today(self):
19+
Timestamp('today')
20+
21+
def time_parse_now(self):
22+
Timestamp('now')
23+
24+
625
class TimestampProperties(object):
726
goal_time = 0.2
827

9-
params = [(None, None),
10-
(pytz.timezone('Europe/Amsterdam'), None),
11-
(None, 'B'),
12-
(pytz.timezone('Europe/Amsterdam'), 'B')]
28+
_tzs = [None, pytz.timezone('Europe/Amsterdam')]
29+
_freqs = [None, 'B']
30+
params = [_tzs, _freqs]
1331
param_names = ['tz', 'freq']
1432

1533
def setup(self, tz, freq):

ci/check_imports.py

-1
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,6 @@
99
'ipython',
1010
'jinja2'
1111
'lxml',
12-
'matplotlib',
1312
'numexpr',
1413
'openpyxl',
1514
'py',

ci/environment-dev.yaml

+1-1
Original file line numberDiff line numberDiff line change
@@ -10,5 +10,5 @@ dependencies:
1010
- python-dateutil>=2.5.0
1111
- python=3
1212
- pytz
13-
- setuptools
13+
- setuptools>=3.3
1414
- sphinx

ci/lint.sh

+7
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,13 @@ if [ "$LINT" ]; then
2323
fi
2424
echo "Linting setup.py DONE"
2525

26+
echo "Linting asv_bench/benchmarks/"
27+
flake8 asv_bench/benchmarks/ --exclude=asv_bench/benchmarks/[ghijoprs]*.py --ignore=F811
28+
if [ $? -ne "0" ]; then
29+
RET=1
30+
fi
31+
echo "Linting asv_bench/benchmarks/*.py DONE"
32+
2633
echo "Linting *.pyx"
2734
flake8 pandas --filename=*.pyx --select=E501,E302,E203,E111,E114,E221,E303,E128,E231,E126,E265,E305,E301,E127,E261,E271,E129,W291,E222,E241,E123,F403
2835
if [ $? -ne "0" ]; then

ci/requirements_dev.txt

+2-2
Original file line numberDiff line numberDiff line change
@@ -6,5 +6,5 @@ moto
66
pytest>=3.1
77
python-dateutil>=2.5.0
88
pytz
9-
setuptools
10-
sphinx
9+
setuptools>=3.3
10+
sphinx

conda.recipe/meta.yaml

+1-1
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,7 @@ requirements:
1515
- python
1616
- cython
1717
- numpy x.x
18-
- setuptools
18+
- setuptools >=3.3
1919

2020
run:
2121
- python

doc/source/api.rst

+11
Original file line numberDiff line numberDiff line change
@@ -2373,6 +2373,17 @@ Style Export and Import
23732373
Styler.use
23742374
Styler.to_excel
23752375

2376+
Plotting
2377+
~~~~~~~~
2378+
2379+
.. currentmodule:: pandas
2380+
2381+
.. autosummary::
2382+
:toctree: generated/
2383+
2384+
plotting.register_matplotlib_converters
2385+
plotting.deregister_matplotlib_converters
2386+
23762387
.. currentmodule:: pandas
23772388

23782389
General utility functions

doc/source/computation.rst

-6
Original file line numberDiff line numberDiff line change
@@ -253,12 +253,6 @@ accept the following arguments:
253253
result is NA)
254254
- ``center``: boolean, whether to set the labels at the center (default is False)
255255

256-
.. warning::
257-
258-
The ``freq`` and ``how`` arguments were in the API prior to 0.18.0 changes. These are deprecated in the new API. You can simply resample the input prior to creating a window function.
259-
260-
For example, instead of ``s.rolling(window=5,freq='D').max()`` to get the max value on a rolling 5 Day window, one could use ``s.resample('D').max().rolling(window=5).max()``, which first resamples the data to daily data, then provides a rolling 5 day window.
261-
262256
We can then call methods on these ``rolling`` objects. These return like-indexed objects:
263257

264258
.. ipython:: python

doc/source/indexing.rst

+17-5
Original file line numberDiff line numberDiff line change
@@ -1833,15 +1833,27 @@ that you've done this:
18331833
18341834
Yikes!
18351835

1836+
.. _indexing.evaluation_order:
1837+
18361838
Evaluation order matters
18371839
~~~~~~~~~~~~~~~~~~~~~~~~
18381840

1839-
Furthermore, in chained expressions, the order may determine whether a copy is returned or not.
1840-
If an expression will set values on a copy of a slice, then a ``SettingWithCopy``
1841-
warning will be issued.
1841+
When you use chained indexing, the order and type of the indexing operation
1842+
partially determine whether the result is a slice into the original object, or
1843+
a copy of the slice.
1844+
1845+
Pandas has the ``SettingWithCopyWarning`` because assigning to a copy of a
1846+
slice is frequently not intentional, but a mistake caused by chained indexing
1847+
returning a copy where a slice was expected.
1848+
1849+
If you would like pandas to be more or less trusting about assignment to a
1850+
chained indexing expression, you can set the :ref:`option <options>`
1851+
``mode.chained_assignment`` to one of these values:
18421852

1843-
You can control the action of a chained assignment via the option ``mode.chained_assignment``,
1844-
which can take the values ``['raise','warn',None]``, where showing a warning is the default.
1853+
* ``'warn'``, the default, means a ``SettingWithCopyWarning`` is printed.
1854+
* ``'raise'`` means pandas will raise a ``SettingWithCopyException``
1855+
you have to deal with.
1856+
* ``None`` will suppress the warnings entirely.
18451857

18461858
.. ipython:: python
18471859
:okwarning:

doc/source/install.rst

+12-16
Original file line numberDiff line numberDiff line change
@@ -141,28 +141,24 @@ and can take a few minutes to complete.
141141
Installing using your Linux distribution's package manager.
142142
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
143143

144-
The commands in this table will install pandas for Python 2 from your distribution.
145-
To install pandas for Python 3 you may need to use the package ``python3-pandas``.
144+
The commands in this table will install pandas for Python 3 from your distribution.
145+
To install pandas for Python 2 you may need to use the package ``python-pandas``.
146146

147147
.. csv-table::
148148
:header: "Distribution", "Status", "Download / Repository Link", "Install method"
149149
:widths: 10, 10, 20, 50
150150

151151

152-
Debian, stable, `official Debian repository <http://packages.debian.org/search?keywords=pandas&searchon=names&suite=all&section=all>`__ , ``sudo apt-get install python-pandas``
153-
Debian & Ubuntu, unstable (latest packages), `NeuroDebian <http://neuro.debian.net/index.html#how-to-use-this-repository>`__ , ``sudo apt-get install python-pandas``
154-
Ubuntu, stable, `official Ubuntu repository <http://packages.ubuntu.com/search?keywords=pandas&searchon=names&suite=all&section=all>`__ , ``sudo apt-get install python-pandas``
155-
Ubuntu, unstable (daily builds), `PythonXY PPA <https://code.launchpad.net/~pythonxy/+archive/pythonxy-devel>`__; activate by: ``sudo add-apt-repository ppa:pythonxy/pythonxy-devel && sudo apt-get update``, ``sudo apt-get install python-pandas``
156-
OpenSuse, stable, `OpenSuse Repository <http://software.opensuse.org/package/python-pandas?search_term=pandas>`__ , ``zypper in python-pandas``
157-
Fedora, stable, `official Fedora repository <https://admin.fedoraproject.org/pkgdb/package/rpms/python-pandas/>`__ , ``dnf install python-pandas``
158-
Centos/RHEL, stable, `EPEL repository <https://admin.fedoraproject.org/pkgdb/package/rpms/python-pandas/>`__ , ``yum install python-pandas``
159-
160-
161-
162-
163-
164-
152+
Debian, stable, `official Debian repository <http://packages.debian.org/search?keywords=pandas&searchon=names&suite=all&section=all>`__ , ``sudo apt-get install python3-pandas``
153+
Debian & Ubuntu, unstable (latest packages), `NeuroDebian <http://neuro.debian.net/index.html#how-to-use-this-repository>`__ , ``sudo apt-get install python3-pandas``
154+
Ubuntu, stable, `official Ubuntu repository <http://packages.ubuntu.com/search?keywords=pandas&searchon=names&suite=all&section=all>`__ , ``sudo apt-get install python3-pandas``
155+
OpenSuse, stable, `OpenSuse Repository <http://software.opensuse.org/package/python-pandas?search_term=pandas>`__ , ``zypper in python3-pandas``
156+
Fedora, stable, `official Fedora repository <https://admin.fedoraproject.org/pkgdb/package/rpms/python-pandas/>`__ , ``dnf install python3-pandas``
157+
Centos/RHEL, stable, `EPEL repository <https://admin.fedoraproject.org/pkgdb/package/rpms/python-pandas/>`__ , ``yum install python3-pandas``
165158

159+
**However**, the packages in the linux package managers are often a few versions behind, so
160+
to get the newest version of pandas, it's recommended to install using the ``pip`` or ``conda``
161+
methods described above.
166162

167163

168164
Installing from source
@@ -198,7 +194,7 @@ installed), make sure you have `pytest
198194
Dependencies
199195
------------
200196

201-
* `setuptools <https://setuptools.readthedocs.io/en/latest/>`__
197+
* `setuptools <https://setuptools.readthedocs.io/en/latest/>`__: 3.3.0 or higher
202198
* `NumPy <http://www.numpy.org>`__: 1.9.0 or higher
203199
* `python-dateutil <//https://dateutil.readthedocs.io/en/stable/>`__: 2.5.0 or higher
204200
* `pytz <http://pytz.sourceforge.net/>`__

doc/source/io.rst

+2-2
Original file line numberDiff line numberDiff line change
@@ -4522,6 +4522,7 @@ See the documentation for `pyarrow <http://arrow.apache.org/docs/python/>`__ and
45224522
.. note::
45234523

45244524
These engines are very similar and should read/write nearly identical parquet format files.
4525+
Currently ``pyarrow`` does not support timedelta data, and ``fastparquet`` does not support timezone aware datetimes (they are coerced to UTC).
45254526
These libraries differ by having different underlying dependencies (``fastparquet`` by using ``numba``, while ``pyarrow`` uses a c-library).
45264527

45274528
.. ipython:: python
@@ -4548,16 +4549,15 @@ Read from a parquet file.
45484549

45494550
.. ipython:: python
45504551
4551-
result = pd.read_parquet('example_pa.parquet', engine='pyarrow')
45524552
result = pd.read_parquet('example_fp.parquet', engine='fastparquet')
4553+
result = pd.read_parquet('example_pa.parquet', engine='pyarrow')
45534554
45544555
result.dtypes
45554556
45564557
Read only certain columns of a parquet file.
45574558

45584559
.. ipython:: python
45594560
4560-
result = pd.read_parquet('example_pa.parquet', engine='pyarrow', columns=['a', 'b'])
45614561
result = pd.read_parquet('example_fp.parquet', engine='fastparquet', columns=['a', 'b'])
45624562
45634563
result.dtypes

0 commit comments

Comments
 (0)