Skip to content

Commit 689d50c

Browse files
committed
Add test for timedeltas data
2 parents 7e7cb93 + 9a2e821 commit 689d50c

File tree

122 files changed

+1826
-1830
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

122 files changed

+1826
-1830
lines changed

asv_bench/benchmarks/arithmetic.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -466,7 +466,7 @@ def setup(self, offset):
466466
self.rng = rng
467467

468468
def time_apply_index(self, offset):
469-
offset.apply_index(self.rng)
469+
self.rng + offset
470470

471471

472472
class BinaryOpsMultiIndex:

asv_bench/benchmarks/io/json.py

+6
Original file line numberDiff line numberDiff line change
@@ -53,12 +53,18 @@ def time_read_json_lines(self, index):
5353
def time_read_json_lines_concat(self, index):
5454
concat(read_json(self.fname, orient="records", lines=True, chunksize=25000))
5555

56+
def time_read_json_lines_nrows(self, index):
57+
read_json(self.fname, orient="records", lines=True, nrows=25000)
58+
5659
def peakmem_read_json_lines(self, index):
5760
read_json(self.fname, orient="records", lines=True)
5861

5962
def peakmem_read_json_lines_concat(self, index):
6063
concat(read_json(self.fname, orient="records", lines=True, chunksize=25000))
6164

65+
def peakmem_read_json_lines_nrows(self, index):
66+
read_json(self.fname, orient="records", lines=True, nrows=15000)
67+
6268

6369
class ToJSON(BaseIO):
6470

ci/deps/travis-36-locale.yaml

+1-1
Original file line numberDiff line numberDiff line change
@@ -27,7 +27,7 @@ dependencies:
2727
- numexpr
2828
- numpy
2929
- openpyxl
30-
- pandas-gbq=0.8.0
30+
- pandas-gbq=0.12.0
3131
- psycopg2=2.6.2
3232
- pymysql=0.7.11
3333
- pytables

doc/source/development/contributing.rst

+27-3
Original file line numberDiff line numberDiff line change
@@ -270,7 +270,7 @@ Creating a Python environment (pip)
270270
If you aren't using conda for your development environment, follow these instructions.
271271
You'll need to have at least Python 3.6.1 installed on your system.
272272

273-
**Unix**/**Mac OS**
273+
**Unix**/**Mac OS with virtualenv**
274274

275275
.. code-block:: bash
276276
@@ -286,7 +286,31 @@ You'll need to have at least Python 3.6.1 installed on your system.
286286
python -m pip install -r requirements-dev.txt
287287
288288
# Build and install pandas
289-
python setup.py build_ext --inplace -j 0
289+
python setup.py build_ext --inplace -j 4
290+
python -m pip install -e . --no-build-isolation --no-use-pep517
291+
292+
**Unix**/**Mac OS with pyenv**
293+
294+
Consult the docs for setting up pyenv `here <https://github.com/pyenv/pyenv>`__.
295+
296+
.. code-block:: bash
297+
298+
# Create a virtual environment
299+
# Use an ENV_DIR of your choice. We'll use ~/Users/<yourname>/.pyenv/versions/pandas-dev
300+
301+
pyenv virtualenv <version> <name-to-give-it>
302+
303+
# For instance:
304+
pyenv virtualenv 3.7.6 pandas-dev
305+
306+
# Activate the virtualenv
307+
pyenv activate pandas-dev
308+
309+
# Now install the build dependencies in the cloned pandas repo
310+
python -m pip install -r requirements-dev.txt
311+
312+
# Build and install pandas
313+
python setup.py build_ext --inplace -j 4
290314
python -m pip install -e . --no-build-isolation --no-use-pep517
291315
292316
**Windows**
@@ -312,7 +336,7 @@ should already exist.
312336
python -m pip install -r requirements-dev.txt
313337
314338
# Build and install pandas
315-
python setup.py build_ext --inplace -j 0
339+
python setup.py build_ext --inplace -j 4
316340
python -m pip install -e . --no-build-isolation --no-use-pep517
317341
318342
Creating a branch

doc/source/getting_started/comparison/comparison_with_sas.rst

+2-2
Original file line numberDiff line numberDiff line change
@@ -115,7 +115,7 @@ Reading external data
115115

116116
Like SAS, pandas provides utilities for reading in data from
117117
many formats. The ``tips`` dataset, found within the pandas
118-
tests (`csv <https://raw.github.com/pandas-dev/pandas/master/pandas/tests/data/tips.csv>`_)
118+
tests (`csv <https://raw.github.com/pandas-dev/pandas/master/pandas/tests/io/data/csv/tips.csv>`_)
119119
will be used in many of the following examples.
120120

121121
SAS provides ``PROC IMPORT`` to read csv data into a data set.
@@ -131,7 +131,7 @@ The pandas method is :func:`read_csv`, which works similarly.
131131
.. ipython:: python
132132
133133
url = ('https://raw.github.com/pandas-dev/'
134-
'pandas/master/pandas/tests/data/tips.csv')
134+
'pandas/master/pandas/tests/io/data/csv/tips.csv')
135135
tips = pd.read_csv(url)
136136
tips.head()
137137

doc/source/getting_started/comparison/comparison_with_sql.rst

+1-1
Original file line numberDiff line numberDiff line change
@@ -25,7 +25,7 @@ structure.
2525
.. ipython:: python
2626
2727
url = ('https://raw.github.com/pandas-dev'
28-
'/pandas/master/pandas/tests/data/tips.csv')
28+
'/pandas/master/pandas/tests/io/data/csv/tips.csv')
2929
tips = pd.read_csv(url)
3030
tips.head()
3131

doc/source/getting_started/comparison/comparison_with_stata.rst

+2-2
Original file line numberDiff line numberDiff line change
@@ -112,7 +112,7 @@ Reading external data
112112

113113
Like Stata, pandas provides utilities for reading in data from
114114
many formats. The ``tips`` data set, found within the pandas
115-
tests (`csv <https://raw.github.com/pandas-dev/pandas/master/pandas/tests/data/tips.csv>`_)
115+
tests (`csv <https://raw.github.com/pandas-dev/pandas/master/pandas/tests/io/data/csv/tips.csv>`_)
116116
will be used in many of the following examples.
117117

118118
Stata provides ``import delimited`` to read csv data into a data set in memory.
@@ -128,7 +128,7 @@ the data set if presented with a url.
128128
.. ipython:: python
129129
130130
url = ('https://raw.github.com/pandas-dev'
131-
'/pandas/master/pandas/tests/data/tips.csv')
131+
'/pandas/master/pandas/tests/io/data/csv/tips.csv')
132132
tips = pd.read_csv(url)
133133
tips.head()
134134

doc/source/getting_started/install.rst

+1-1
Original file line numberDiff line numberDiff line change
@@ -274,7 +274,7 @@ lxml 3.8.0 HTML parser for read_html (see :ref
274274
matplotlib 2.2.2 Visualization
275275
numba 0.46.0 Alternative execution engine for rolling operations
276276
openpyxl 2.5.7 Reading / writing for xlsx files
277-
pandas-gbq 0.8.0 Google Big Query access
277+
pandas-gbq 0.12.0 Google Big Query access
278278
psycopg2 PostgreSQL engine for sqlalchemy
279279
pyarrow 0.12.0 Parquet, ORC (requires 0.13.0), and feather reading / writing
280280
pymysql 0.7.11 MySQL engine for sqlalchemy

doc/source/reference/frame.rst

+9-3
Original file line numberDiff line numberDiff line change
@@ -47,8 +47,6 @@ Conversion
4747
DataFrame.convert_dtypes
4848
DataFrame.infer_objects
4949
DataFrame.copy
50-
DataFrame.isna
51-
DataFrame.notna
5250
DataFrame.bool
5351

5452
Indexing, iteration
@@ -211,10 +209,18 @@ Missing data handling
211209
.. autosummary::
212210
:toctree: api/
213211

212+
DataFrame.backfill
213+
DataFrame.bfill
214214
DataFrame.dropna
215+
DataFrame.ffill
215216
DataFrame.fillna
216-
DataFrame.replace
217217
DataFrame.interpolate
218+
DataFrame.isna
219+
DataFrame.isnull
220+
DataFrame.notna
221+
DataFrame.notnull
222+
DataFrame.pad
223+
DataFrame.replace
218224

219225
Reshaping, sorting, transposing
220226
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~

doc/source/reference/groupby.rst

+5
Original file line numberDiff line numberDiff line change
@@ -50,6 +50,7 @@ Computations / descriptive stats
5050
GroupBy.all
5151
GroupBy.any
5252
GroupBy.bfill
53+
GroupBy.backfill
5354
GroupBy.count
5455
GroupBy.cumcount
5556
GroupBy.cummax
@@ -67,6 +68,7 @@ Computations / descriptive stats
6768
GroupBy.ngroup
6869
GroupBy.nth
6970
GroupBy.ohlc
71+
GroupBy.pad
7072
GroupBy.prod
7173
GroupBy.rank
7274
GroupBy.pct_change
@@ -88,10 +90,12 @@ application to columns of a specific data type.
8890

8991
DataFrameGroupBy.all
9092
DataFrameGroupBy.any
93+
DataFrameGroupBy.backfill
9194
DataFrameGroupBy.bfill
9295
DataFrameGroupBy.corr
9396
DataFrameGroupBy.count
9497
DataFrameGroupBy.cov
98+
DataFrameGroupBy.cumcount
9599
DataFrameGroupBy.cummax
96100
DataFrameGroupBy.cummin
97101
DataFrameGroupBy.cumprod
@@ -106,6 +110,7 @@ application to columns of a specific data type.
106110
DataFrameGroupBy.idxmin
107111
DataFrameGroupBy.mad
108112
DataFrameGroupBy.nunique
113+
DataFrameGroupBy.pad
109114
DataFrameGroupBy.pct_change
110115
DataFrameGroupBy.plot
111116
DataFrameGroupBy.quantile

doc/source/reference/series.rst

+9-2
Original file line numberDiff line numberDiff line change
@@ -214,11 +214,18 @@ Missing data handling
214214
.. autosummary::
215215
:toctree: api/
216216

217-
Series.isna
218-
Series.notna
217+
Series.backfill
218+
Series.bfill
219219
Series.dropna
220+
Series.ffill
220221
Series.fillna
221222
Series.interpolate
223+
Series.isna
224+
Series.isnull
225+
Series.notna
226+
Series.notnull
227+
Series.pad
228+
Series.replace
222229

223230
Reshaping, sorting
224231
------------------

doc/source/user_guide/merging.rst

+1-1
Original file line numberDiff line numberDiff line change
@@ -1273,7 +1273,7 @@ columns:
12731273
12741274
.. ipython:: python
12751275
1276-
result = pd.merge(left, right, on='k', suffixes=['_l', '_r'])
1276+
result = pd.merge(left, right, on='k', suffixes=('_l', '_r'))
12771277
12781278
.. ipython:: python
12791279
:suppress:

doc/source/user_guide/visualization.rst

+4-3
Original file line numberDiff line numberDiff line change
@@ -865,7 +865,7 @@ for more information. By coloring these curves differently for each class
865865
it is possible to visualize data clustering. Curves belonging to samples
866866
of the same class will usually be closer together and form larger structures.
867867

868-
**Note**: The "Iris" dataset is available `here <https://raw.github.com/pandas-dev/pandas/master/pandas/tests/data/iris.csv>`__.
868+
**Note**: The "Iris" dataset is available `here <https://raw.github.com/pandas-dev/pandas/master/pandas/tests/io/data/csv/iris.csv>`__.
869869

870870
.. ipython:: python
871871
@@ -1025,7 +1025,7 @@ be colored differently.
10251025
See the R package `Radviz <https://cran.r-project.org/package=Radviz/>`__
10261026
for more information.
10271027

1028-
**Note**: The "Iris" dataset is available `here <https://raw.github.com/pandas-dev/pandas/master/pandas/tests/data/iris.csv>`__.
1028+
**Note**: The "Iris" dataset is available `here <https://raw.github.com/pandas-dev/pandas/master/pandas/tests/io/data/csv/iris.csv>`__.
10291029

10301030
.. ipython:: python
10311031
@@ -1332,7 +1332,7 @@ otherwise you will see a warning.
13321332

13331333
.. ipython:: python
13341334
1335-
fig, axes = plt.subplots(4, 4, figsize=(6, 6))
1335+
fig, axes = plt.subplots(4, 4, figsize=(9, 9))
13361336
plt.subplots_adjust(wspace=0.5, hspace=0.5)
13371337
target1 = [axes[0][0], axes[1][1], axes[2][2], axes[3][3]]
13381338
target2 = [axes[3][0], axes[2][1], axes[1][2], axes[0][3]]
@@ -1369,6 +1369,7 @@ Another option is passing an ``ax`` argument to :meth:`Series.plot` to plot on a
13691369
.. ipython:: python
13701370
13711371
fig, axes = plt.subplots(nrows=2, ncols=2)
1372+
plt.subplots_adjust(wspace=0.2, hspace=0.5)
13721373
df['A'].plot(ax=axes[0, 0]);
13731374
axes[0, 0].set_title('A');
13741375
df['B'].plot(ax=axes[0, 1]);

doc/source/whatsnew/v1.1.0.rst

+11
Original file line numberDiff line numberDiff line change
@@ -288,6 +288,9 @@ Other enhancements
288288
- :meth:`HDFStore.put` now accepts `track_times` parameter. Parameter is passed to ``create_table`` method of ``PyTables`` (:issue:`32682`).
289289
- Make :class:`pandas.core.window.Rolling` and :class:`pandas.core.window.Expanding` iterable(:issue:`11704`)
290290
- Make ``option_context`` a :class:`contextlib.ContextDecorator`, which allows it to be used as a decorator over an entire function (:issue:`34253`).
291+
- :meth:`groupby.transform` now allows ``func`` to be ``pad``, ``backfill`` and ``cumcount`` (:issue:`31269`).
292+
- :meth:`~pandas.io.json.read_json` now accepts `nrows` parameter. (:issue:`33916`).
293+
- :meth `~pandas.io.gbq.read_gbq` now allows to disable progress bar (:issue:`33360`).
291294

292295
.. ---------------------------------------------------------------------------
293296
@@ -354,6 +357,8 @@ Optional libraries below the lowest tested version may still work, but are not c
354357
+-----------------+-----------------+---------+
355358
| xlwt | 1.2.0 | |
356359
+-----------------+-----------------+---------+
360+
| pandas-gbq | 1.2.0 | X |
361+
+-----------------+-----------------+---------+
357362

358363
See :ref:`install.dependencies` and :ref:`install.optional_dependencies` for more.
359364

@@ -396,6 +401,7 @@ Backwards incompatible API changes
396401
- :func: `pandas.api.dtypes.is_string_dtype` no longer incorrectly identifies categorical series as string.
397402
- :func:`read_excel` no longer takes ``**kwds`` arguments. This means that passing in keyword ``chunksize`` now raises a ``TypeError``
398403
(previously raised a ``NotImplementedError``), while passing in keyword ``encoding`` now raises a ``TypeError`` (:issue:`34464`)
404+
- :func: `merge` now checks ``suffixes`` parameter type to be ``tuple`` and raises ``TypeError``, whereas before a ``list`` or ``set`` were accepted and that the ``set`` could produce unexpected results (:issue:`33740`)
399405

400406
``MultiIndex.get_indexer`` interprets `method` argument differently
401407
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
@@ -821,6 +827,7 @@ Timedelta
821827
- Bug in :func:`timedelta_range` that produced an extra point on a edge case (:issue:`30353`, :issue:`33498`)
822828
- Bug in :meth:`DataFrame.resample` that produced an extra point on a edge case (:issue:`30353`, :issue:`13022`, :issue:`33498`)
823829
- Bug in :meth:`DataFrame.resample` that ignored the ``loffset`` argument when dealing with timedelta (:issue:`7687`, :issue:`33498`)
830+
- Bug in :class:`Timedelta` and `pandas.to_timedelta` that ignored `unit`-argument for string input (:issue:`12136`)
824831

825832
Timezones
826833
^^^^^^^^^
@@ -887,6 +894,7 @@ Indexing
887894
- Bug in :meth:`DataFrame.truncate` and :meth:`Series.truncate` where index was assumed to be monotone increasing (:issue:`33756`)
888895
- Indexing with a list of strings representing datetimes failed on :class:`DatetimeIndex` or :class:`PeriodIndex`(:issue:`11278`)
889896
- Bug in :meth:`Series.at` when used with a :class:`MultiIndex` would raise an exception on valid inputs (:issue:`26989`)
897+
- Bug in :meth:`Series.loc` when used with a :class:`MultiIndex` would raise an IndexingError when accessing a None value (:issue:`34318`)
890898

891899
Missing
892900
^^^^^^^
@@ -977,6 +985,7 @@ Groupby/resample/rolling
977985
to the input DataFrame is inconsistent. An internal heuristic to detect index mutation would behave differently for equal but not identical
978986
indices. In particular, the result index shape might change if a copy of the input would be returned.
979987
The behaviour now is consistent, independent of internal heuristics. (:issue:`31612`, :issue:`14927`, :issue:`13056`)
988+
- Bug in :meth:`SeriesGroupBy.agg` where any column name was accepted in the named aggregation of ``SeriesGroupBy`` previously. The behaviour now allows only ``str`` and callables else would raise ``TypeError``. (:issue:`34422`)
980989

981990
Reshaping
982991
^^^^^^^^^
@@ -1005,7 +1014,9 @@ Reshaping
10051014
- Bug in :func:`concat` was not allowing for concatenation of ``DataFrame`` and ``Series`` with duplicate keys (:issue:`33654`)
10061015
- Bug in :func:`cut` raised an error when non-unique labels (:issue:`33141`)
10071016
- Ensure only named functions can be used in :func:`eval()` (:issue:`32460`)
1017+
- Bug in :func:`Dataframe.aggregate` and :func:`Series.aggregate` was causing recursive loop in some cases (:issue:`34224`)
10081018
- Fixed bug in :func:`melt` where melting MultiIndex columns with ``col_level`` > 0 would raise a ``KeyError`` on ``id_vars`` (:issue:`34129`)
1019+
- Bug in :meth:`Series.where` with an empty Series and empty ``cond`` having non-bool dtype (:issue:`34592`)
10091020

10101021
Sparse
10111022
^^^^^^

pandas/_libs/index.pyx

+2-2
Original file line numberDiff line numberDiff line change
@@ -22,9 +22,9 @@ cnp.import_array()
2222
from pandas._libs cimport util
2323

2424
from pandas._libs.tslibs.nattype cimport c_NaT as NaT
25-
from pandas._libs.tslibs.base cimport ABCTimedelta
2625
from pandas._libs.tslibs.period cimport is_period_object
2726
from pandas._libs.tslibs.timestamps cimport _Timestamp
27+
from pandas._libs.tslibs.timedeltas cimport _Timedelta
2828

2929
from pandas._libs.hashtable cimport HashTable
3030

@@ -471,7 +471,7 @@ cdef class TimedeltaEngine(DatetimeEngine):
471471
return 'm8[ns]'
472472

473473
cdef int64_t _unbox_scalar(self, scalar) except? -1:
474-
if not (isinstance(scalar, ABCTimedelta) or scalar is NaT):
474+
if not (isinstance(scalar, _Timedelta) or scalar is NaT):
475475
raise TypeError(scalar)
476476
return scalar.value
477477

pandas/_libs/interval.pyx

+2-2
Original file line numberDiff line numberDiff line change
@@ -42,9 +42,9 @@ from pandas._libs.tslibs.util cimport (
4242
is_timedelta64_object,
4343
)
4444

45-
from pandas._libs.tslibs.base cimport ABCTimedelta
4645
from pandas._libs.tslibs.timezones cimport tz_compare
4746
from pandas._libs.tslibs.timestamps cimport _Timestamp
47+
from pandas._libs.tslibs.timedeltas cimport _Timedelta
4848

4949
_VALID_CLOSED = frozenset(['left', 'right', 'both', 'neither'])
5050

@@ -340,7 +340,7 @@ cdef class Interval(IntervalMixin):
340340
def _validate_endpoint(self, endpoint):
341341
# GH 23013
342342
if not (is_integer_object(endpoint) or is_float_object(endpoint) or
343-
isinstance(endpoint, (_Timestamp, ABCTimedelta))):
343+
isinstance(endpoint, (_Timestamp, _Timedelta))):
344344
raise ValueError("Only numeric, Timestamp and Timedelta endpoints "
345345
"are allowed when constructing an Interval.")
346346

pandas/_libs/tslibs/__init__.py

+4-1
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
11
__all__ = [
2+
"dtypes",
23
"localize_pydatetime",
34
"NaT",
45
"NaTType",
@@ -14,12 +15,14 @@
1415
"ints_to_pytimedelta",
1516
"Timestamp",
1617
"tz_convert_single",
18+
"to_offset",
1719
]
1820

19-
21+
from . import dtypes # type: ignore
2022
from .conversion import localize_pydatetime
2123
from .nattype import NaT, NaTType, iNaT, is_null_datetimelike, nat_strings
2224
from .np_datetime import OutOfBoundsDatetime
25+
from .offsets import to_offset
2326
from .period import IncompatibleFrequency, Period
2427
from .resolution import Resolution
2528
from .timedeltas import Timedelta, delta_to_nanoseconds, ints_to_pytimedelta

0 commit comments

Comments
 (0)